{ "best_metric": 0.016786308959126472, "best_model_checkpoint": "./deberta-v3-base_finetuned_bluegennx_run2.21_3e/checkpoint-48015", "epoch": 3.0, "eval_steps": 500, "global_step": 48015, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.031240237425804437, "grad_norm": 1.9453787803649902, "learning_rate": 2.6033531188170364e-06, "loss": 2.1742, "step": 500 }, { "epoch": 0.062480474851608875, "grad_norm": 2.207657814025879, "learning_rate": 5.206706237634073e-06, "loss": 0.3611, "step": 1000 }, { "epoch": 0.09372071227741331, "grad_norm": 3.9118669033050537, "learning_rate": 7.81005935645111e-06, "loss": 0.1781, "step": 1500 }, { "epoch": 0.12496094970321775, "grad_norm": 1.7872343063354492, "learning_rate": 1.0413412475268146e-05, "loss": 0.1175, "step": 2000 }, { "epoch": 0.15620118712902217, "grad_norm": 0.5929828882217407, "learning_rate": 1.3016765594085182e-05, "loss": 0.0815, "step": 2500 }, { "epoch": 0.18744142455482662, "grad_norm": 0.8290306925773621, "learning_rate": 1.562011871290222e-05, "loss": 0.0766, "step": 3000 }, { "epoch": 0.21868166198063105, "grad_norm": 0.4442843198776245, "learning_rate": 1.8223471831719256e-05, "loss": 0.0637, "step": 3500 }, { "epoch": 0.2499218994064355, "grad_norm": 5.056885719299316, "learning_rate": 2.082682495053629e-05, "loss": 0.0588, "step": 4000 }, { "epoch": 0.28116213683223995, "grad_norm": 2.825920820236206, "learning_rate": 2.3430178069353328e-05, "loss": 0.0525, "step": 4500 }, { "epoch": 0.31240237425804435, "grad_norm": 1.8231415748596191, "learning_rate": 2.6033531188170364e-05, "loss": 0.0533, "step": 5000 }, { "epoch": 0.3436426116838488, "grad_norm": 0.9496334195137024, "learning_rate": 2.86368843069874e-05, "loss": 0.06, "step": 5500 }, { "epoch": 0.37488284910965325, "grad_norm": 0.6990556716918945, "learning_rate": 3.124023742580444e-05, "loss": 0.059, "step": 6000 }, { "epoch": 0.40612308653545764, "grad_norm": 1.7713068723678589, "learning_rate": 3.384359054462147e-05, "loss": 0.0465, "step": 6500 }, { "epoch": 0.4373633239612621, "grad_norm": 0.48251184821128845, "learning_rate": 3.644694366343851e-05, "loss": 0.0508, "step": 7000 }, { "epoch": 0.46860356138706655, "grad_norm": 0.4298129379749298, "learning_rate": 3.9050296782255544e-05, "loss": 0.0531, "step": 7500 }, { "epoch": 0.499843798812871, "grad_norm": 0.6740893125534058, "learning_rate": 4.165364990107258e-05, "loss": 0.0443, "step": 8000 }, { "epoch": 0.5310840362386754, "grad_norm": 0.515224277973175, "learning_rate": 4.425700301988962e-05, "loss": 0.0442, "step": 8500 }, { "epoch": 0.5623242736644799, "grad_norm": 0.07582231611013412, "learning_rate": 4.6860356138706655e-05, "loss": 0.0455, "step": 9000 }, { "epoch": 0.5935645110902843, "grad_norm": 0.13481149077415466, "learning_rate": 4.9463709257523695e-05, "loss": 0.0502, "step": 9500 }, { "epoch": 0.6248047485160887, "grad_norm": 0.06367824971675873, "learning_rate": 4.998682293232702e-05, "loss": 0.0497, "step": 10000 }, { "epoch": 0.6560449859418932, "grad_norm": 0.107658751308918, "learning_rate": 4.993275406892816e-05, "loss": 0.0429, "step": 10500 }, { "epoch": 0.6872852233676976, "grad_norm": 0.1800769716501236, "learning_rate": 4.9836996765456125e-05, "loss": 0.0425, "step": 11000 }, { "epoch": 0.718525460793502, "grad_norm": 0.3818538784980774, "learning_rate": 4.969971113148389e-05, "loss": 0.035, "step": 11500 }, { "epoch": 0.7497656982193065, "grad_norm": 0.8075230717658997, "learning_rate": 4.952112671341091e-05, "loss": 0.0378, "step": 12000 }, { "epoch": 0.7810059356451109, "grad_norm": 1.5522449016571045, "learning_rate": 4.9301542110653454e-05, "loss": 0.0434, "step": 12500 }, { "epoch": 0.8122461730709153, "grad_norm": 0.18798592686653137, "learning_rate": 4.904132447637588e-05, "loss": 0.0377, "step": 13000 }, { "epoch": 0.8434864104967198, "grad_norm": 0.46252405643463135, "learning_rate": 4.8740908903597714e-05, "loss": 0.0386, "step": 13500 }, { "epoch": 0.8747266479225242, "grad_norm": 1.5039823055267334, "learning_rate": 4.8400797697702785e-05, "loss": 0.036, "step": 14000 }, { "epoch": 0.9059668853483287, "grad_norm": 4.865286350250244, "learning_rate": 4.802155953656704e-05, "loss": 0.0332, "step": 14500 }, { "epoch": 0.9372071227741331, "grad_norm": 1.6047935485839844, "learning_rate": 4.760382851970915e-05, "loss": 0.0358, "step": 15000 }, { "epoch": 0.9684473601999375, "grad_norm": 0.28227582573890686, "learning_rate": 4.714830310805381e-05, "loss": 0.0338, "step": 15500 }, { "epoch": 0.999687597625742, "grad_norm": 1.1359831094741821, "learning_rate": 4.6655744956080655e-05, "loss": 0.0308, "step": 16000 }, { "epoch": 1.0, "eval_AADHAR_CARD_f1": 0.935032483758121, "eval_AGE_f1": 0.9315192743764172, "eval_CITY_f1": 0.965787821720025, "eval_COUNTRY_f1": 0.9705882352941178, "eval_CREDITCARDCVV_f1": 0.9630872483221476, "eval_CREDITCARDNUMBER_f1": 0.9281792224906655, "eval_DATEOFBIRTH_f1": 0.8429846212265047, "eval_DATE_f1": 0.9250582271544047, "eval_EMAIL_f1": 0.9719218537028623, "eval_EXPIRYDATE_f1": 0.9841930116472545, "eval_ORGANIZATION_f1": 0.9866225478610258, "eval_PAN_CARD_f1": 0.9696224758560141, "eval_PERSON_f1": 0.9771855010660981, "eval_PHONENUMBER_f1": 0.9502501389660922, "eval_PINCODE_f1": 0.9834710743801652, "eval_SECONDARYADDRESS_f1": 0.968014484007242, "eval_STATE_f1": 0.9814928300036769, "eval_TIME_f1": 0.9739364385404405, "eval_URL_f1": 0.9844536895117144, "eval_loss": 0.03587241843342781, "eval_overall_accuracy": 0.9919974872098162, "eval_overall_f1": 0.96261001579714, "eval_overall_precision": 0.9500096174288057, "eval_overall_recall": 0.9755491563835206, "eval_runtime": 307.0183, "eval_samples_per_second": 52.13, "eval_steps_per_second": 13.035, "step": 16005 }, { "epoch": 1.0309278350515463, "grad_norm": 2.9516384601593018, "learning_rate": 4.6126977638311145e-05, "loss": 0.0315, "step": 16500 }, { "epoch": 1.0621680724773508, "grad_norm": 0.9985421299934387, "learning_rate": 4.556288527226308e-05, "loss": 0.0258, "step": 17000 }, { "epoch": 1.0934083099031553, "grad_norm": 0.09413296729326248, "learning_rate": 4.496441104017508e-05, "loss": 0.0251, "step": 17500 }, { "epoch": 1.1246485473289598, "grad_norm": 0.5080928802490234, "learning_rate": 4.433255561197277e-05, "loss": 0.0375, "step": 18000 }, { "epoch": 1.155888784754764, "grad_norm": 0.6283013224601746, "learning_rate": 4.366837547211354e-05, "loss": 0.0238, "step": 18500 }, { "epoch": 1.1871290221805686, "grad_norm": 0.09778323024511337, "learning_rate": 4.297298115310743e-05, "loss": 0.0258, "step": 19000 }, { "epoch": 1.218369259606373, "grad_norm": 0.5504295825958252, "learning_rate": 4.224753537866777e-05, "loss": 0.0337, "step": 19500 }, { "epoch": 1.2496094970321774, "grad_norm": 0.045782480388879776, "learning_rate": 4.1493251119596374e-05, "loss": 0.0299, "step": 20000 }, { "epoch": 1.280849734457982, "grad_norm": 0.31934645771980286, "learning_rate": 4.071138956565373e-05, "loss": 0.0264, "step": 20500 }, { "epoch": 1.3120899718837864, "grad_norm": 0.09766834229230881, "learning_rate": 3.990325801680546e-05, "loss": 0.0247, "step": 21000 }, { "epoch": 1.3433302093095907, "grad_norm": 1.4730592966079712, "learning_rate": 3.907020769737086e-05, "loss": 0.024, "step": 21500 }, { "epoch": 1.3745704467353952, "grad_norm": 0.22080545127391815, "learning_rate": 3.8213631496728466e-05, "loss": 0.0263, "step": 22000 }, { "epoch": 1.4058106841611997, "grad_norm": 1.110324501991272, "learning_rate": 3.733496164035603e-05, "loss": 0.0232, "step": 22500 }, { "epoch": 1.437050921587004, "grad_norm": 0.38805103302001953, "learning_rate": 3.643566729509932e-05, "loss": 0.0277, "step": 23000 }, { "epoch": 1.4682911590128085, "grad_norm": 0.02454444207251072, "learning_rate": 3.551725211267354e-05, "loss": 0.0227, "step": 23500 }, { "epoch": 1.499531396438613, "grad_norm": 1.0061655044555664, "learning_rate": 3.458125171550487e-05, "loss": 0.0258, "step": 24000 }, { "epoch": 1.5307716338644175, "grad_norm": 0.09139035642147064, "learning_rate": 3.362923112911588e-05, "loss": 0.0238, "step": 24500 }, { "epoch": 1.5620118712902218, "grad_norm": 1.6061333417892456, "learning_rate": 3.266278216534792e-05, "loss": 0.0228, "step": 25000 }, { "epoch": 1.5932521087160263, "grad_norm": 0.014178004115819931, "learning_rate": 3.1683520760795795e-05, "loss": 0.0253, "step": 25500 }, { "epoch": 1.6244923461418308, "grad_norm": 0.1629280000925064, "learning_rate": 3.069308427490513e-05, "loss": 0.0226, "step": 26000 }, { "epoch": 1.655732583567635, "grad_norm": 0.6675782203674316, "learning_rate": 2.969312875224981e-05, "loss": 0.0209, "step": 26500 }, { "epoch": 1.6869728209934396, "grad_norm": 0.20190191268920898, "learning_rate": 2.8685326153567404e-05, "loss": 0.0232, "step": 27000 }, { "epoch": 1.718213058419244, "grad_norm": 0.22869578003883362, "learning_rate": 2.7671361560182018e-05, "loss": 0.0205, "step": 27500 }, { "epoch": 1.7494532958450484, "grad_norm": 0.4046876132488251, "learning_rate": 2.6652930356489275e-05, "loss": 0.0235, "step": 28000 }, { "epoch": 1.780693533270853, "grad_norm": 0.08248839527368546, "learning_rate": 2.563173539521398e-05, "loss": 0.0212, "step": 28500 }, { "epoch": 1.8119337706966574, "grad_norm": 0.14581803977489471, "learning_rate": 2.460948415018067e-05, "loss": 0.022, "step": 29000 }, { "epoch": 1.8431740081224617, "grad_norm": 0.07922026515007019, "learning_rate": 2.3587885861357417e-05, "loss": 0.0201, "step": 29500 }, { "epoch": 1.8744142455482662, "grad_norm": 0.12380392849445343, "learning_rate": 2.256864867694655e-05, "loss": 0.0222, "step": 30000 }, { "epoch": 1.9056544829740707, "grad_norm": 0.01387234590947628, "learning_rate": 2.1553476797300967e-05, "loss": 0.0172, "step": 30500 }, { "epoch": 1.936894720399875, "grad_norm": 0.7150996923446655, "learning_rate": 2.054406762544126e-05, "loss": 0.0193, "step": 31000 }, { "epoch": 1.9681349578256795, "grad_norm": 0.12249834090471268, "learning_rate": 1.954210892893833e-05, "loss": 0.0162, "step": 31500 }, { "epoch": 1.999375195251484, "grad_norm": 0.20916196703910828, "learning_rate": 1.8549276017906707e-05, "loss": 0.0202, "step": 32000 }, { "epoch": 2.0, "eval_AADHAR_CARD_f1": 0.975609756097561, "eval_AGE_f1": 0.9585812623964532, "eval_CITY_f1": 0.9789817842129845, "eval_COUNTRY_f1": 0.9825646794150731, "eval_CREDITCARDCVV_f1": 0.9866327180140038, "eval_CREDITCARDNUMBER_f1": 0.9497256746165043, "eval_DATEOFBIRTH_f1": 0.9060173073052928, "eval_DATE_f1": 0.9592630501535313, "eval_EMAIL_f1": 0.9893244984354868, "eval_EXPIRYDATE_f1": 0.9871847871021083, "eval_ORGANIZATION_f1": 0.9902360413309318, "eval_PAN_CARD_f1": 0.9716031631919482, "eval_PERSON_f1": 0.9874935533780299, "eval_PHONENUMBER_f1": 0.9829537315570835, "eval_PINCODE_f1": 0.9939471169162153, "eval_SECONDARYADDRESS_f1": 0.9831296090514193, "eval_STATE_f1": 0.9871265484576147, "eval_TIME_f1": 0.9798612286342867, "eval_URL_f1": 0.9927456583864586, "eval_loss": 0.019483117386698723, "eval_overall_accuracy": 0.9950012756613735, "eval_overall_f1": 0.9786100681623069, "eval_overall_precision": 0.9736953143492266, "eval_overall_recall": 0.9835746883868889, "eval_runtime": 307.7213, "eval_samples_per_second": 52.011, "eval_steps_per_second": 13.005, "step": 32010 }, { "epoch": 2.0306154326772883, "grad_norm": 0.4832023084163666, "learning_rate": 1.7567228943827265e-05, "loss": 0.0189, "step": 32500 }, { "epoch": 2.0618556701030926, "grad_norm": 0.07361295074224472, "learning_rate": 1.6597609723882834e-05, "loss": 0.0177, "step": 33000 }, { "epoch": 2.0930959075288973, "grad_norm": 0.5370087623596191, "learning_rate": 1.5642039595447862e-05, "loss": 0.0108, "step": 33500 }, { "epoch": 2.1243361449547016, "grad_norm": 0.2966110408306122, "learning_rate": 1.4702116305322614e-05, "loss": 0.0122, "step": 34000 }, { "epoch": 2.155576382380506, "grad_norm": 0.25330406427383423, "learning_rate": 1.3779411438244385e-05, "loss": 0.0126, "step": 34500 }, { "epoch": 2.1868166198063106, "grad_norm": 0.023234376683831215, "learning_rate": 1.2875467789142694e-05, "loss": 0.0149, "step": 35000 }, { "epoch": 2.218056857232115, "grad_norm": 0.15632040798664093, "learning_rate": 1.1991796783531975e-05, "loss": 0.0118, "step": 35500 }, { "epoch": 2.2492970946579196, "grad_norm": 2.662566900253296, "learning_rate": 1.1129875950355095e-05, "loss": 0.0147, "step": 36000 }, { "epoch": 2.280537332083724, "grad_norm": 0.07179576903581619, "learning_rate": 1.0291146451503033e-05, "loss": 0.013, "step": 36500 }, { "epoch": 2.311777569509528, "grad_norm": 1.6363691091537476, "learning_rate": 9.477010672141675e-06, "loss": 0.0101, "step": 37000 }, { "epoch": 2.3430178069353325, "grad_norm": 0.0774739608168602, "learning_rate": 8.688829875874516e-06, "loss": 0.0175, "step": 37500 }, { "epoch": 2.374258044361137, "grad_norm": 0.07443530112504959, "learning_rate": 7.927921928662193e-06, "loss": 0.0104, "step": 38000 }, { "epoch": 2.4054982817869415, "grad_norm": 0.021464819088578224, "learning_rate": 7.195559095304283e-06, "loss": 0.0118, "step": 38500 }, { "epoch": 2.436738519212746, "grad_norm": 0.9025238752365112, "learning_rate": 6.492965912167923e-06, "loss": 0.0122, "step": 39000 }, { "epoch": 2.4679787566385505, "grad_norm": 0.19682565331459045, "learning_rate": 5.821317139720001e-06, "loss": 0.0118, "step": 39500 }, { "epoch": 2.4992189940643548, "grad_norm": 0.056999098509550095, "learning_rate": 5.1817357982864415e-06, "loss": 0.0113, "step": 40000 }, { "epoch": 2.530459231490159, "grad_norm": 0.016481826081871986, "learning_rate": 4.575291290322764e-06, "loss": 0.011, "step": 40500 }, { "epoch": 2.561699468915964, "grad_norm": 0.011844165623188019, "learning_rate": 4.0029976123356544e-06, "loss": 0.0108, "step": 41000 }, { "epoch": 2.592939706341768, "grad_norm": 0.02857145480811596, "learning_rate": 3.465811659445206e-06, "loss": 0.0134, "step": 41500 }, { "epoch": 2.624179943767573, "grad_norm": 0.03046867437660694, "learning_rate": 2.9646316254226786e-06, "loss": 0.0126, "step": 42000 }, { "epoch": 2.655420181193377, "grad_norm": 0.0065807169303298, "learning_rate": 2.5002955008790074e-06, "loss": 0.0089, "step": 42500 }, { "epoch": 2.6866604186191814, "grad_norm": 0.16103477776050568, "learning_rate": 2.0735796721150447e-06, "loss": 0.0154, "step": 43000 }, { "epoch": 2.717900656044986, "grad_norm": 0.08998264372348785, "learning_rate": 1.6851976229764393e-06, "loss": 0.0123, "step": 43500 }, { "epoch": 2.7491408934707904, "grad_norm": 0.04546121135354042, "learning_rate": 1.3357987418835955e-06, "loss": 0.0092, "step": 44000 }, { "epoch": 2.7803811308965947, "grad_norm": 0.6218581199645996, "learning_rate": 1.0259672360314221e-06, "loss": 0.0101, "step": 44500 }, { "epoch": 2.8116213683223994, "grad_norm": 0.2742210030555725, "learning_rate": 7.562211545744102e-07, "loss": 0.009, "step": 45000 }, { "epoch": 2.8428616057482037, "grad_norm": 0.0064840721897780895, "learning_rate": 5.270115224302541e-07, "loss": 0.0109, "step": 45500 }, { "epoch": 2.874101843174008, "grad_norm": 0.033717311918735504, "learning_rate": 3.3872158615035067e-07, "loss": 0.0136, "step": 46000 }, { "epoch": 2.9053420805998127, "grad_norm": 0.08778509497642517, "learning_rate": 1.9166617311812407e-07, "loss": 0.0122, "step": 46500 }, { "epoch": 2.936582318025617, "grad_norm": 4.181912422180176, "learning_rate": 8.609116514657622e-08, "loss": 0.0105, "step": 47000 }, { "epoch": 2.9678225554514217, "grad_norm": 0.6781991124153137, "learning_rate": 2.2173087355270573e-08, "loss": 0.0129, "step": 47500 }, { "epoch": 2.999062792877226, "grad_norm": 0.332028865814209, "learning_rate": 1.8813014129870354e-11, "loss": 0.0107, "step": 48000 }, { "epoch": 3.0, "eval_AADHAR_CARD_f1": 0.9865537848605577, "eval_AGE_f1": 0.9707006369426752, "eval_CITY_f1": 0.9867601246105918, "eval_COUNTRY_f1": 0.9864864864864865, "eval_CREDITCARDCVV_f1": 0.9887830687830689, "eval_CREDITCARDNUMBER_f1": 0.9587166255328696, "eval_DATEOFBIRTH_f1": 0.9165348101265822, "eval_DATE_f1": 0.9642906194204092, "eval_EMAIL_f1": 0.9894194498113901, "eval_EXPIRYDATE_f1": 0.9921259842519685, "eval_ORGANIZATION_f1": 0.9917026219714571, "eval_PAN_CARD_f1": 0.9855999999999999, "eval_PERSON_f1": 0.9883056021325078, "eval_PHONENUMBER_f1": 0.9868345735546651, "eval_PINCODE_f1": 0.9936386768447837, "eval_SECONDARYADDRESS_f1": 0.9860662358642972, "eval_STATE_f1": 0.9901351845085861, "eval_TIME_f1": 0.9821322719959352, "eval_URL_f1": 0.994939493949395, "eval_loss": 0.016786308959126472, "eval_overall_accuracy": 0.9958519111859447, "eval_overall_f1": 0.9824886126865787, "eval_overall_precision": 0.9772792726075866, "eval_overall_recall": 0.9877537866580728, "eval_runtime": 309.3329, "eval_samples_per_second": 51.74, "eval_steps_per_second": 12.938, "step": 48015 }, { "epoch": 3.0, "step": 48015, "total_flos": 1.8885846596837596e+16, "train_loss": 0.0555913904110963, "train_runtime": 9844.7347, "train_samples_per_second": 19.509, "train_steps_per_second": 4.877 }, { "epoch": 3.0, "eval_AADHAR_CARD_f1": 0.9865537848605577, "eval_AGE_f1": 0.9707006369426752, "eval_CITY_f1": 0.9867601246105918, "eval_COUNTRY_f1": 0.9864864864864865, "eval_CREDITCARDCVV_f1": 0.9887830687830689, "eval_CREDITCARDNUMBER_f1": 0.9587166255328696, "eval_DATEOFBIRTH_f1": 0.9165348101265822, "eval_DATE_f1": 0.9642906194204092, "eval_EMAIL_f1": 0.9894194498113901, "eval_EXPIRYDATE_f1": 0.9921259842519685, "eval_ORGANIZATION_f1": 0.9917026219714571, "eval_PAN_CARD_f1": 0.9855999999999999, "eval_PERSON_f1": 0.9883056021325078, "eval_PHONENUMBER_f1": 0.9868345735546651, "eval_PINCODE_f1": 0.9936386768447837, "eval_SECONDARYADDRESS_f1": 0.9860662358642972, "eval_STATE_f1": 0.9901351845085861, "eval_TIME_f1": 0.9821322719959352, "eval_URL_f1": 0.994939493949395, "eval_loss": 0.016786308959126472, "eval_overall_accuracy": 0.9958519111859447, "eval_overall_f1": 0.9824886126865787, "eval_overall_precision": 0.9772792726075866, "eval_overall_recall": 0.9877537866580728, "eval_runtime": 267.5949, "eval_samples_per_second": 59.811, "eval_steps_per_second": 14.955, "step": 48015 } ], "logging_steps": 500, "max_steps": 48015, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8885846596837596e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }