{ "best_metric": null, "best_model_checkpoint": null, "epoch": 40.0, "eval_steps": 60, "global_step": 9920, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24193548387096775, "eval_f1": 0.5851960435915661, "eval_loss": 0.8871086835861206, "eval_runtime": 0.2564, "eval_samples_per_second": 585.087, "eval_steps_per_second": 11.702, "step": 60 }, { "epoch": 0.4838709677419355, "eval_f1": 0.6617468749727405, "eval_loss": 0.7329094409942627, "eval_runtime": 0.2545, "eval_samples_per_second": 589.47, "eval_steps_per_second": 11.789, "step": 120 }, { "epoch": 0.7258064516129032, "eval_f1": 0.8160652328346837, "eval_loss": 0.46765777468681335, "eval_runtime": 0.2541, "eval_samples_per_second": 590.294, "eval_steps_per_second": 11.806, "step": 180 }, { "epoch": 0.967741935483871, "eval_f1": 0.8587881420000322, "eval_loss": 0.44318950176239014, "eval_runtime": 0.2554, "eval_samples_per_second": 587.306, "eval_steps_per_second": 11.746, "step": 240 }, { "epoch": 1.2096774193548387, "eval_f1": 0.809411799328179, "eval_loss": 0.5408520102500916, "eval_runtime": 0.2546, "eval_samples_per_second": 589.168, "eval_steps_per_second": 11.783, "step": 300 }, { "epoch": 1.4516129032258065, "eval_f1": 0.8382158435345435, "eval_loss": 0.5099673271179199, "eval_runtime": 0.2543, "eval_samples_per_second": 589.946, "eval_steps_per_second": 11.799, "step": 360 }, { "epoch": 1.6935483870967742, "eval_f1": 0.850649606409975, "eval_loss": 0.3776738941669464, "eval_runtime": 0.2587, "eval_samples_per_second": 579.748, "eval_steps_per_second": 11.595, "step": 420 }, { "epoch": 1.935483870967742, "eval_f1": 0.9060457516339869, "eval_loss": 0.3096984922885895, "eval_runtime": 0.2544, "eval_samples_per_second": 589.649, "eval_steps_per_second": 11.793, "step": 480 }, { "epoch": 2.0161290322580645, "grad_norm": 6.729659080505371, "learning_rate": 4.647087149462068e-05, "loss": 0.3881, "step": 500 }, { "epoch": 2.1774193548387095, "eval_f1": 0.9331478793742946, "eval_loss": 0.19595660269260406, "eval_runtime": 0.2551, "eval_samples_per_second": 587.93, "eval_steps_per_second": 11.759, "step": 540 }, { "epoch": 2.4193548387096775, "eval_f1": 0.9535178217421209, "eval_loss": 0.15902738273143768, "eval_runtime": 0.2546, "eval_samples_per_second": 589.066, "eval_steps_per_second": 11.781, "step": 600 }, { "epoch": 2.661290322580645, "eval_f1": 0.9534276314241835, "eval_loss": 0.1501321643590927, "eval_runtime": 0.2547, "eval_samples_per_second": 589.023, "eval_steps_per_second": 11.78, "step": 660 }, { "epoch": 2.903225806451613, "eval_f1": 0.9800105522453767, "eval_loss": 0.07951977103948593, "eval_runtime": 0.2544, "eval_samples_per_second": 589.701, "eval_steps_per_second": 11.794, "step": 720 }, { "epoch": 3.1451612903225805, "eval_f1": 0.9931623931623932, "eval_loss": 0.009822274558246136, "eval_runtime": 0.2546, "eval_samples_per_second": 589.081, "eval_steps_per_second": 11.782, "step": 780 }, { "epoch": 3.3870967741935485, "eval_f1": 0.9931623931623932, "eval_loss": 0.028039630502462387, "eval_runtime": 0.2548, "eval_samples_per_second": 588.756, "eval_steps_per_second": 11.775, "step": 840 }, { "epoch": 3.629032258064516, "eval_f1": 0.9805444220089162, "eval_loss": 0.048611294478178024, "eval_runtime": 0.2559, "eval_samples_per_second": 586.116, "eval_steps_per_second": 11.722, "step": 900 }, { "epoch": 3.870967741935484, "eval_f1": 0.9804415639480298, "eval_loss": 0.08277872949838638, "eval_runtime": 0.256, "eval_samples_per_second": 585.987, "eval_steps_per_second": 11.72, "step": 960 }, { "epoch": 4.032258064516129, "grad_norm": 0.015000954270362854, "learning_rate": 4.560063227687659e-05, "loss": 0.083, "step": 1000 }, { "epoch": 4.112903225806452, "eval_f1": 0.9804415639480298, "eval_loss": 0.05285938084125519, "eval_runtime": 0.2542, "eval_samples_per_second": 590.087, "eval_steps_per_second": 11.802, "step": 1020 }, { "epoch": 4.354838709677419, "eval_f1": 0.9931394168504323, "eval_loss": 0.042371999472379684, "eval_runtime": 0.2565, "eval_samples_per_second": 584.745, "eval_steps_per_second": 11.695, "step": 1080 }, { "epoch": 4.596774193548387, "eval_f1": 0.9931394168504323, "eval_loss": 0.020373299717903137, "eval_runtime": 0.2568, "eval_samples_per_second": 584.182, "eval_steps_per_second": 11.684, "step": 1140 }, { "epoch": 4.838709677419355, "eval_f1": 0.9873004354136429, "eval_loss": 0.01691700704395771, "eval_runtime": 0.2564, "eval_samples_per_second": 584.952, "eval_steps_per_second": 11.699, "step": 1200 }, { "epoch": 5.080645161290323, "eval_f1": 1.0, "eval_loss": 0.0007654120563529432, "eval_runtime": 0.2582, "eval_samples_per_second": 581.0, "eval_steps_per_second": 11.62, "step": 1260 }, { "epoch": 5.32258064516129, "eval_f1": 0.9863433667781494, "eval_loss": 0.026578061282634735, "eval_runtime": 0.2557, "eval_samples_per_second": 586.566, "eval_steps_per_second": 11.731, "step": 1320 }, { "epoch": 5.564516129032258, "eval_f1": 1.0, "eval_loss": 0.002019105711951852, "eval_runtime": 0.2547, "eval_samples_per_second": 588.941, "eval_steps_per_second": 11.779, "step": 1380 }, { "epoch": 5.806451612903226, "eval_f1": 1.0, "eval_loss": 0.00011336191528243944, "eval_runtime": 0.2554, "eval_samples_per_second": 587.273, "eval_steps_per_second": 11.745, "step": 1440 }, { "epoch": 6.048387096774194, "grad_norm": 0.0038932343013584614, "learning_rate": 4.4174447775380035e-05, "loss": 0.0341, "step": 1500 }, { "epoch": 6.048387096774194, "eval_f1": 0.9931394168504323, "eval_loss": 0.005218833684921265, "eval_runtime": 0.2547, "eval_samples_per_second": 588.864, "eval_steps_per_second": 11.777, "step": 1500 }, { "epoch": 6.290322580645161, "eval_f1": 1.0, "eval_loss": 0.001448934432119131, "eval_runtime": 0.2549, "eval_samples_per_second": 588.397, "eval_steps_per_second": 11.768, "step": 1560 }, { "epoch": 6.532258064516129, "eval_f1": 1.0, "eval_loss": 0.0035662834998220205, "eval_runtime": 0.2544, "eval_samples_per_second": 589.724, "eval_steps_per_second": 11.794, "step": 1620 }, { "epoch": 6.774193548387097, "eval_f1": 1.0, "eval_loss": 0.003718534717336297, "eval_runtime": 0.2544, "eval_samples_per_second": 589.734, "eval_steps_per_second": 11.795, "step": 1680 }, { "epoch": 7.016129032258065, "eval_f1": 1.0, "eval_loss": 0.00021605176152661443, "eval_runtime": 0.2549, "eval_samples_per_second": 588.428, "eval_steps_per_second": 11.769, "step": 1740 }, { "epoch": 7.258064516129032, "eval_f1": 0.9931623931623932, "eval_loss": 0.013021533377468586, "eval_runtime": 0.254, "eval_samples_per_second": 590.458, "eval_steps_per_second": 11.809, "step": 1800 }, { "epoch": 7.5, "eval_f1": 1.0, "eval_loss": 8.37745756143704e-05, "eval_runtime": 0.2567, "eval_samples_per_second": 584.393, "eval_steps_per_second": 11.688, "step": 1860 }, { "epoch": 7.741935483870968, "eval_f1": 1.0, "eval_loss": 7.913756417110562e-05, "eval_runtime": 0.2556, "eval_samples_per_second": 586.853, "eval_steps_per_second": 11.737, "step": 1920 }, { "epoch": 7.983870967741936, "eval_f1": 1.0, "eval_loss": 6.221562944119796e-05, "eval_runtime": 0.2559, "eval_samples_per_second": 586.146, "eval_steps_per_second": 11.723, "step": 1980 }, { "epoch": 8.064516129032258, "grad_norm": 0.0007441982161253691, "learning_rate": 4.222800289057154e-05, "loss": 0.017, "step": 2000 }, { "epoch": 8.225806451612904, "eval_f1": 1.0, "eval_loss": 7.838715828256682e-05, "eval_runtime": 0.2539, "eval_samples_per_second": 590.717, "eval_steps_per_second": 11.814, "step": 2040 }, { "epoch": 8.46774193548387, "eval_f1": 1.0, "eval_loss": 0.0009664014796726406, "eval_runtime": 0.2556, "eval_samples_per_second": 586.876, "eval_steps_per_second": 11.738, "step": 2100 }, { "epoch": 8.709677419354838, "eval_f1": 0.9936507936507937, "eval_loss": 0.00964354071766138, "eval_runtime": 0.2545, "eval_samples_per_second": 589.449, "eval_steps_per_second": 11.789, "step": 2160 }, { "epoch": 8.951612903225806, "eval_f1": 0.9661393494972598, "eval_loss": 0.08236207813024521, "eval_runtime": 0.2545, "eval_samples_per_second": 589.3, "eval_steps_per_second": 11.786, "step": 2220 }, { "epoch": 9.193548387096774, "eval_f1": 1.0, "eval_loss": 0.00091545470058918, "eval_runtime": 0.2554, "eval_samples_per_second": 587.389, "eval_steps_per_second": 11.748, "step": 2280 }, { "epoch": 9.435483870967742, "eval_f1": 1.0, "eval_loss": 0.0017120252596214414, "eval_runtime": 0.2536, "eval_samples_per_second": 591.372, "eval_steps_per_second": 11.827, "step": 2340 }, { "epoch": 9.67741935483871, "eval_f1": 1.0, "eval_loss": 0.0004082757805008441, "eval_runtime": 0.2565, "eval_samples_per_second": 584.858, "eval_steps_per_second": 11.697, "step": 2400 }, { "epoch": 9.919354838709678, "eval_f1": 0.9867521367521368, "eval_loss": 0.0258210189640522, "eval_runtime": 0.2589, "eval_samples_per_second": 579.307, "eval_steps_per_second": 11.586, "step": 2460 }, { "epoch": 10.080645161290322, "grad_norm": 0.001595139503479004, "learning_rate": 3.981000008125249e-05, "loss": 0.0278, "step": 2500 }, { "epoch": 10.161290322580646, "eval_f1": 0.9930800784733368, "eval_loss": 0.02786369062960148, "eval_runtime": 0.2601, "eval_samples_per_second": 576.62, "eval_steps_per_second": 11.532, "step": 2520 }, { "epoch": 10.403225806451612, "eval_f1": 0.9931007503823038, "eval_loss": 0.05510010942816734, "eval_runtime": 0.2554, "eval_samples_per_second": 587.317, "eval_steps_per_second": 11.746, "step": 2580 }, { "epoch": 10.64516129032258, "eval_f1": 1.0, "eval_loss": 5.751836943090893e-05, "eval_runtime": 0.2573, "eval_samples_per_second": 583.023, "eval_steps_per_second": 11.66, "step": 2640 }, { "epoch": 10.887096774193548, "eval_f1": 0.9936484892477996, "eval_loss": 0.031413882970809937, "eval_runtime": 0.2541, "eval_samples_per_second": 590.408, "eval_steps_per_second": 11.808, "step": 2700 }, { "epoch": 11.129032258064516, "eval_f1": 0.9930800784733368, "eval_loss": 0.034935660660266876, "eval_runtime": 0.2557, "eval_samples_per_second": 586.588, "eval_steps_per_second": 11.732, "step": 2760 }, { "epoch": 11.370967741935484, "eval_f1": 0.9930800784733368, "eval_loss": 0.005744210444390774, "eval_runtime": 0.2558, "eval_samples_per_second": 586.315, "eval_steps_per_second": 11.726, "step": 2820 }, { "epoch": 11.612903225806452, "eval_f1": 0.9930800784733368, "eval_loss": 0.044621676206588745, "eval_runtime": 0.255, "eval_samples_per_second": 588.201, "eval_steps_per_second": 11.764, "step": 2880 }, { "epoch": 11.85483870967742, "eval_f1": 1.0, "eval_loss": 5.108299956191331e-05, "eval_runtime": 0.2538, "eval_samples_per_second": 591.101, "eval_steps_per_second": 11.822, "step": 2940 }, { "epoch": 12.096774193548388, "grad_norm": 1.4287638664245605, "learning_rate": 3.698094076882026e-05, "loss": 0.0099, "step": 3000 }, { "epoch": 12.096774193548388, "eval_f1": 0.9867291604401759, "eval_loss": 0.09647814929485321, "eval_runtime": 0.2561, "eval_samples_per_second": 585.684, "eval_steps_per_second": 11.714, "step": 3000 }, { "epoch": 12.338709677419354, "eval_f1": 0.9936507936507937, "eval_loss": 0.0637097880244255, "eval_runtime": 0.256, "eval_samples_per_second": 586.002, "eval_steps_per_second": 11.72, "step": 3060 }, { "epoch": 12.580645161290322, "eval_f1": 0.9867291604401759, "eval_loss": 0.08844030648469925, "eval_runtime": 0.2555, "eval_samples_per_second": 587.048, "eval_steps_per_second": 11.741, "step": 3120 }, { "epoch": 12.82258064516129, "eval_f1": 0.9930800784733368, "eval_loss": 0.07371941953897476, "eval_runtime": 0.2568, "eval_samples_per_second": 584.175, "eval_steps_per_second": 11.684, "step": 3180 }, { "epoch": 13.064516129032258, "eval_f1": 0.9930800784733368, "eval_loss": 0.07483946532011032, "eval_runtime": 0.2538, "eval_samples_per_second": 590.92, "eval_steps_per_second": 11.818, "step": 3240 }, { "epoch": 13.306451612903226, "eval_f1": 0.9930800784733368, "eval_loss": 0.07480111718177795, "eval_runtime": 0.2557, "eval_samples_per_second": 586.667, "eval_steps_per_second": 11.733, "step": 3300 }, { "epoch": 13.548387096774194, "eval_f1": 1.0, "eval_loss": 2.3137790776672773e-05, "eval_runtime": 0.2547, "eval_samples_per_second": 589.018, "eval_steps_per_second": 11.78, "step": 3360 }, { "epoch": 13.790322580645162, "eval_f1": 1.0, "eval_loss": 2.158356255677063e-05, "eval_runtime": 0.2541, "eval_samples_per_second": 590.234, "eval_steps_per_second": 11.805, "step": 3420 }, { "epoch": 14.03225806451613, "eval_f1": 0.9659544901858607, "eval_loss": 0.15978649258613586, "eval_runtime": 0.2579, "eval_samples_per_second": 581.665, "eval_steps_per_second": 11.633, "step": 3480 }, { "epoch": 14.112903225806452, "grad_norm": 0.0005940676783211529, "learning_rate": 3.381161151686362e-05, "loss": 0.0169, "step": 3500 }, { "epoch": 14.274193548387096, "eval_f1": 1.0, "eval_loss": 0.0006047336501069367, "eval_runtime": 0.2554, "eval_samples_per_second": 587.341, "eval_steps_per_second": 11.747, "step": 3540 }, { "epoch": 14.516129032258064, "eval_f1": 1.0, "eval_loss": 7.13071713107638e-05, "eval_runtime": 0.2552, "eval_samples_per_second": 587.767, "eval_steps_per_second": 11.755, "step": 3600 }, { "epoch": 14.758064516129032, "eval_f1": 1.0, "eval_loss": 0.00021068091155029833, "eval_runtime": 0.2571, "eval_samples_per_second": 583.488, "eval_steps_per_second": 11.67, "step": 3660 }, { "epoch": 15.0, "eval_f1": 1.0, "eval_loss": 0.0005000072997063398, "eval_runtime": 0.2551, "eval_samples_per_second": 587.982, "eval_steps_per_second": 11.76, "step": 3720 }, { "epoch": 15.241935483870968, "eval_f1": 1.0, "eval_loss": 3.806134554906748e-05, "eval_runtime": 0.2554, "eval_samples_per_second": 587.376, "eval_steps_per_second": 11.748, "step": 3780 }, { "epoch": 15.483870967741936, "eval_f1": 1.0, "eval_loss": 2.8680979085038416e-05, "eval_runtime": 0.2548, "eval_samples_per_second": 588.673, "eval_steps_per_second": 11.773, "step": 3840 }, { "epoch": 15.725806451612904, "eval_f1": 1.0, "eval_loss": 0.0001551912137074396, "eval_runtime": 0.255, "eval_samples_per_second": 588.229, "eval_steps_per_second": 11.765, "step": 3900 }, { "epoch": 15.967741935483872, "eval_f1": 1.0, "eval_loss": 0.0014714256394654512, "eval_runtime": 0.2542, "eval_samples_per_second": 590.047, "eval_steps_per_second": 11.801, "step": 3960 }, { "epoch": 16.129032258064516, "grad_norm": 0.0009025487233884633, "learning_rate": 3.038131286377757e-05, "loss": 0.0155, "step": 4000 }, { "epoch": 16.20967741935484, "eval_f1": 1.0, "eval_loss": 2.558559936005622e-05, "eval_runtime": 0.2538, "eval_samples_per_second": 590.96, "eval_steps_per_second": 11.819, "step": 4020 }, { "epoch": 16.451612903225808, "eval_f1": 1.0, "eval_loss": 3.426855255383998e-05, "eval_runtime": 0.2563, "eval_samples_per_second": 585.204, "eval_steps_per_second": 11.704, "step": 4080 }, { "epoch": 16.693548387096776, "eval_f1": 1.0, "eval_loss": 0.0018926361808553338, "eval_runtime": 0.2547, "eval_samples_per_second": 588.923, "eval_steps_per_second": 11.778, "step": 4140 }, { "epoch": 16.93548387096774, "eval_f1": 0.9931394168504323, "eval_loss": 0.05736490711569786, "eval_runtime": 0.256, "eval_samples_per_second": 586.046, "eval_steps_per_second": 11.721, "step": 4200 }, { "epoch": 17.177419354838708, "eval_f1": 0.9931394168504323, "eval_loss": 0.05700461566448212, "eval_runtime": 0.2546, "eval_samples_per_second": 589.274, "eval_steps_per_second": 11.785, "step": 4260 }, { "epoch": 17.419354838709676, "eval_f1": 0.9931394168504323, "eval_loss": 0.05664278194308281, "eval_runtime": 0.254, "eval_samples_per_second": 590.639, "eval_steps_per_second": 11.813, "step": 4320 }, { "epoch": 17.661290322580644, "eval_f1": 1.0, "eval_loss": 0.00015866669127717614, "eval_runtime": 0.2545, "eval_samples_per_second": 589.42, "eval_steps_per_second": 11.788, "step": 4380 }, { "epoch": 17.903225806451612, "eval_f1": 1.0, "eval_loss": 8.310633711516857e-05, "eval_runtime": 0.3017, "eval_samples_per_second": 497.145, "eval_steps_per_second": 9.943, "step": 4440 }, { "epoch": 18.14516129032258, "grad_norm": 0.0018858049297705293, "learning_rate": 2.6775875125196684e-05, "loss": 0.0214, "step": 4500 }, { "epoch": 18.14516129032258, "eval_f1": 1.0, "eval_loss": 6.328391464194283e-05, "eval_runtime": 0.255, "eval_samples_per_second": 588.208, "eval_steps_per_second": 11.764, "step": 4500 }, { "epoch": 18.387096774193548, "eval_f1": 1.0, "eval_loss": 0.00011058977543143556, "eval_runtime": 0.2575, "eval_samples_per_second": 582.543, "eval_steps_per_second": 11.651, "step": 4560 }, { "epoch": 18.629032258064516, "eval_f1": 0.9936507936507937, "eval_loss": 0.0063561322167515755, "eval_runtime": 0.2546, "eval_samples_per_second": 589.057, "eval_steps_per_second": 11.781, "step": 4620 }, { "epoch": 18.870967741935484, "eval_f1": 0.9936484892477996, "eval_loss": 0.07243207842111588, "eval_runtime": 0.2542, "eval_samples_per_second": 590.195, "eval_steps_per_second": 11.804, "step": 4680 }, { "epoch": 19.112903225806452, "eval_f1": 1.0, "eval_loss": 3.163903966196813e-05, "eval_runtime": 0.2549, "eval_samples_per_second": 588.475, "eval_steps_per_second": 11.769, "step": 4740 }, { "epoch": 19.35483870967742, "eval_f1": 1.0, "eval_loss": 0.00013194132770877331, "eval_runtime": 0.2542, "eval_samples_per_second": 590.164, "eval_steps_per_second": 11.803, "step": 4800 }, { "epoch": 19.596774193548388, "eval_f1": 1.0, "eval_loss": 8.35082828416489e-05, "eval_runtime": 0.2555, "eval_samples_per_second": 587.138, "eval_steps_per_second": 11.743, "step": 4860 }, { "epoch": 19.838709677419356, "eval_f1": 1.0, "eval_loss": 0.00017465968267060816, "eval_runtime": 0.2551, "eval_samples_per_second": 588.086, "eval_steps_per_second": 11.762, "step": 4920 }, { "epoch": 20.080645161290324, "eval_f1": 1.0, "eval_loss": 0.00028180619119666517, "eval_runtime": 0.2551, "eval_samples_per_second": 588.014, "eval_steps_per_second": 11.76, "step": 4980 }, { "epoch": 20.161290322580644, "grad_norm": 0.0009010693174786866, "learning_rate": 2.308551081332452e-05, "loss": 0.0161, "step": 5000 }, { "epoch": 20.322580645161292, "eval_f1": 1.0, "eval_loss": 0.0002893185301218182, "eval_runtime": 0.2539, "eval_samples_per_second": 590.81, "eval_steps_per_second": 11.816, "step": 5040 }, { "epoch": 20.56451612903226, "eval_f1": 1.0, "eval_loss": 3.712088437168859e-05, "eval_runtime": 0.2569, "eval_samples_per_second": 583.889, "eval_steps_per_second": 11.678, "step": 5100 }, { "epoch": 20.806451612903224, "eval_f1": 1.0, "eval_loss": 3.2648320484440774e-05, "eval_runtime": 0.2548, "eval_samples_per_second": 588.741, "eval_steps_per_second": 11.775, "step": 5160 }, { "epoch": 21.048387096774192, "eval_f1": 1.0, "eval_loss": 2.866457543859724e-05, "eval_runtime": 0.2559, "eval_samples_per_second": 586.183, "eval_steps_per_second": 11.724, "step": 5220 }, { "epoch": 21.29032258064516, "eval_f1": 1.0, "eval_loss": 2.9972579795867205e-05, "eval_runtime": 0.2547, "eval_samples_per_second": 588.897, "eval_steps_per_second": 11.778, "step": 5280 }, { "epoch": 21.532258064516128, "eval_f1": 1.0, "eval_loss": 2.6008663553511724e-05, "eval_runtime": 0.2568, "eval_samples_per_second": 584.06, "eval_steps_per_second": 11.681, "step": 5340 }, { "epoch": 21.774193548387096, "eval_f1": 1.0, "eval_loss": 2.6406421966385096e-05, "eval_runtime": 0.2542, "eval_samples_per_second": 590.17, "eval_steps_per_second": 11.803, "step": 5400 }, { "epoch": 22.016129032258064, "eval_f1": 1.0, "eval_loss": 2.0519637473626062e-05, "eval_runtime": 0.2555, "eval_samples_per_second": 587.081, "eval_steps_per_second": 11.742, "step": 5460 }, { "epoch": 22.177419354838708, "grad_norm": 0.0008068878669291735, "learning_rate": 1.940255740828388e-05, "loss": 0.004, "step": 5500 }, { "epoch": 22.258064516129032, "eval_f1": 1.0, "eval_loss": 1.894849083328154e-05, "eval_runtime": 0.2549, "eval_samples_per_second": 588.573, "eval_steps_per_second": 11.771, "step": 5520 }, { "epoch": 22.5, "eval_f1": 1.0, "eval_loss": 1.6999869330902584e-05, "eval_runtime": 0.2551, "eval_samples_per_second": 587.891, "eval_steps_per_second": 11.758, "step": 5580 }, { "epoch": 22.741935483870968, "eval_f1": 1.0, "eval_loss": 1.7026062778313644e-05, "eval_runtime": 0.2555, "eval_samples_per_second": 587.094, "eval_steps_per_second": 11.742, "step": 5640 }, { "epoch": 22.983870967741936, "eval_f1": 1.0, "eval_loss": 1.6268664694507606e-05, "eval_runtime": 0.2555, "eval_samples_per_second": 587.126, "eval_steps_per_second": 11.743, "step": 5700 }, { "epoch": 23.225806451612904, "eval_f1": 1.0, "eval_loss": 1.5551027900073677e-05, "eval_runtime": 0.2557, "eval_samples_per_second": 586.707, "eval_steps_per_second": 11.734, "step": 5760 }, { "epoch": 23.467741935483872, "eval_f1": 1.0, "eval_loss": 1.4529051441058982e-05, "eval_runtime": 0.255, "eval_samples_per_second": 588.162, "eval_steps_per_second": 11.763, "step": 5820 }, { "epoch": 23.70967741935484, "eval_f1": 1.0, "eval_loss": 1.2737024917441886e-05, "eval_runtime": 0.2548, "eval_samples_per_second": 588.642, "eval_steps_per_second": 11.773, "step": 5880 }, { "epoch": 23.951612903225808, "eval_f1": 1.0, "eval_loss": 1.2093305485905148e-05, "eval_runtime": 0.2559, "eval_samples_per_second": 586.075, "eval_steps_per_second": 11.722, "step": 5940 }, { "epoch": 24.193548387096776, "grad_norm": 0.0002727832761593163, "learning_rate": 1.5819166960141247e-05, "loss": 0.0026, "step": 6000 }, { "epoch": 24.193548387096776, "eval_f1": 1.0, "eval_loss": 1.1829461072920822e-05, "eval_runtime": 0.255, "eval_samples_per_second": 588.148, "eval_steps_per_second": 11.763, "step": 6000 }, { "epoch": 24.43548387096774, "eval_f1": 1.0, "eval_loss": 1.2995285942452028e-05, "eval_runtime": 0.2539, "eval_samples_per_second": 590.855, "eval_steps_per_second": 11.817, "step": 6060 }, { "epoch": 24.677419354838708, "eval_f1": 1.0, "eval_loss": 1.1007723514921963e-05, "eval_runtime": 0.255, "eval_samples_per_second": 588.252, "eval_steps_per_second": 11.765, "step": 6120 }, { "epoch": 24.919354838709676, "eval_f1": 1.0, "eval_loss": 1.0587315045995638e-05, "eval_runtime": 0.255, "eval_samples_per_second": 588.297, "eval_steps_per_second": 11.766, "step": 6180 }, { "epoch": 25.161290322580644, "eval_f1": 1.0, "eval_loss": 1.0195521099376492e-05, "eval_runtime": 0.2548, "eval_samples_per_second": 588.807, "eval_steps_per_second": 11.776, "step": 6240 }, { "epoch": 25.403225806451612, "eval_f1": 1.0, "eval_loss": 9.860146747087128e-06, "eval_runtime": 0.2557, "eval_samples_per_second": 586.556, "eval_steps_per_second": 11.731, "step": 6300 }, { "epoch": 25.64516129032258, "eval_f1": 1.0, "eval_loss": 9.539876373310108e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.288, "eval_steps_per_second": 11.746, "step": 6360 }, { "epoch": 25.887096774193548, "eval_f1": 1.0, "eval_loss": 9.233906894223765e-06, "eval_runtime": 0.2561, "eval_samples_per_second": 585.792, "eval_steps_per_second": 11.716, "step": 6420 }, { "epoch": 26.129032258064516, "eval_f1": 1.0, "eval_loss": 8.902509762265254e-06, "eval_runtime": 0.2539, "eval_samples_per_second": 590.758, "eval_steps_per_second": 11.815, "step": 6480 }, { "epoch": 26.20967741935484, "grad_norm": 0.0002617554273456335, "learning_rate": 1.242500033062053e-05, "loss": 0.0009, "step": 6500 }, { "epoch": 26.370967741935484, "eval_f1": 1.0, "eval_loss": 8.575877473049331e-06, "eval_runtime": 0.2549, "eval_samples_per_second": 588.489, "eval_steps_per_second": 11.77, "step": 6540 }, { "epoch": 26.612903225806452, "eval_f1": 1.0, "eval_loss": 8.099837941699661e-06, "eval_runtime": 0.2547, "eval_samples_per_second": 588.925, "eval_steps_per_second": 11.778, "step": 6600 }, { "epoch": 26.85483870967742, "eval_f1": 1.0, "eval_loss": 7.677047506149393e-06, "eval_runtime": 0.2549, "eval_samples_per_second": 588.508, "eval_steps_per_second": 11.77, "step": 6660 }, { "epoch": 27.096774193548388, "eval_f1": 1.0, "eval_loss": 7.495055797335226e-06, "eval_runtime": 0.2547, "eval_samples_per_second": 588.85, "eval_steps_per_second": 11.777, "step": 6720 }, { "epoch": 27.338709677419356, "eval_f1": 1.0, "eval_loss": 7.2447178354195785e-06, "eval_runtime": 0.255, "eval_samples_per_second": 588.316, "eval_steps_per_second": 11.766, "step": 6780 }, { "epoch": 27.580645161290324, "eval_f1": 1.0, "eval_loss": 7.079415809130296e-06, "eval_runtime": 0.2558, "eval_samples_per_second": 586.414, "eval_steps_per_second": 11.728, "step": 6840 }, { "epoch": 27.822580645161292, "eval_f1": 1.0, "eval_loss": 7.096105036907829e-06, "eval_runtime": 0.2567, "eval_samples_per_second": 584.45, "eval_steps_per_second": 11.689, "step": 6900 }, { "epoch": 28.06451612903226, "eval_f1": 1.0, "eval_loss": 6.859276709292317e-06, "eval_runtime": 0.2543, "eval_samples_per_second": 589.912, "eval_steps_per_second": 11.798, "step": 6960 }, { "epoch": 28.225806451612904, "grad_norm": 0.00016708578914403915, "learning_rate": 9.304983767430839e-06, "loss": 0.0007, "step": 7000 }, { "epoch": 28.306451612903224, "eval_f1": 1.0, "eval_loss": 6.732917427143548e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.412, "eval_steps_per_second": 11.748, "step": 7020 }, { "epoch": 28.548387096774192, "eval_f1": 1.0, "eval_loss": 6.600993401661981e-06, "eval_runtime": 0.2602, "eval_samples_per_second": 576.473, "eval_steps_per_second": 11.529, "step": 7080 }, { "epoch": 28.79032258064516, "eval_f1": 1.0, "eval_loss": 6.482578555733198e-06, "eval_runtime": 0.2561, "eval_samples_per_second": 585.802, "eval_steps_per_second": 11.716, "step": 7140 }, { "epoch": 29.032258064516128, "eval_f1": 1.0, "eval_loss": 6.322838999039959e-06, "eval_runtime": 0.2545, "eval_samples_per_second": 589.379, "eval_steps_per_second": 11.788, "step": 7200 }, { "epoch": 29.274193548387096, "eval_f1": 1.0, "eval_loss": 6.2060144045972265e-06, "eval_runtime": 0.255, "eval_samples_per_second": 588.181, "eval_steps_per_second": 11.764, "step": 7260 }, { "epoch": 29.516129032258064, "eval_f1": 1.0, "eval_loss": 6.097932327975286e-06, "eval_runtime": 0.2575, "eval_samples_per_second": 582.488, "eval_steps_per_second": 11.65, "step": 7320 }, { "epoch": 29.758064516129032, "eval_f1": 1.0, "eval_loss": 5.999386758048786e-06, "eval_runtime": 0.2549, "eval_samples_per_second": 588.48, "eval_steps_per_second": 11.77, "step": 7380 }, { "epoch": 30.0, "eval_f1": 1.0, "eval_loss": 5.888920895813499e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.296, "eval_steps_per_second": 11.746, "step": 7440 }, { "epoch": 30.241935483870968, "grad_norm": 0.00016466749366372824, "learning_rate": 6.537183944493647e-06, "loss": 0.0009, "step": 7500 }, { "epoch": 30.241935483870968, "eval_f1": 1.0, "eval_loss": 5.9008411881222855e-06, "eval_runtime": 0.2545, "eval_samples_per_second": 589.276, "eval_steps_per_second": 11.786, "step": 7500 }, { "epoch": 30.483870967741936, "eval_f1": 1.0, "eval_loss": 6.41104315945995e-06, "eval_runtime": 0.254, "eval_samples_per_second": 590.649, "eval_steps_per_second": 11.813, "step": 7560 }, { "epoch": 30.725806451612904, "eval_f1": 1.0, "eval_loss": 5.6163312365242746e-06, "eval_runtime": 0.254, "eval_samples_per_second": 590.583, "eval_steps_per_second": 11.812, "step": 7620 }, { "epoch": 30.967741935483872, "eval_f1": 1.0, "eval_loss": 5.524143034563167e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.338, "eval_steps_per_second": 11.747, "step": 7680 }, { "epoch": 31.20967741935484, "eval_f1": 1.0, "eval_loss": 5.447849162010243e-06, "eval_runtime": 0.2559, "eval_samples_per_second": 586.279, "eval_steps_per_second": 11.726, "step": 7740 }, { "epoch": 31.451612903225808, "eval_f1": 1.0, "eval_loss": 5.366786808735924e-06, "eval_runtime": 0.2551, "eval_samples_per_second": 587.967, "eval_steps_per_second": 11.759, "step": 7800 }, { "epoch": 31.693548387096776, "eval_f1": 1.0, "eval_loss": 5.3008247959951404e-06, "eval_runtime": 0.2549, "eval_samples_per_second": 588.378, "eval_steps_per_second": 11.768, "step": 7860 }, { "epoch": 31.93548387096774, "eval_f1": 1.0, "eval_loss": 5.241220605967101e-06, "eval_runtime": 0.254, "eval_samples_per_second": 590.438, "eval_steps_per_second": 11.809, "step": 7920 }, { "epoch": 32.17741935483871, "eval_f1": 1.0, "eval_loss": 5.184000656299759e-06, "eval_runtime": 0.2548, "eval_samples_per_second": 588.705, "eval_steps_per_second": 11.774, "step": 7980 }, { "epoch": 32.25806451612903, "grad_norm": 0.0001813564304029569, "learning_rate": 4.190854637191562e-06, "loss": 0.0, "step": 8000 }, { "epoch": 32.41935483870968, "eval_f1": 1.0, "eval_loss": 5.115653948450927e-06, "eval_runtime": 0.2548, "eval_samples_per_second": 588.735, "eval_steps_per_second": 11.775, "step": 8040 }, { "epoch": 32.66129032258065, "eval_f1": 1.0, "eval_loss": 5.066380708740326e-06, "eval_runtime": 0.255, "eval_samples_per_second": 588.337, "eval_steps_per_second": 11.767, "step": 8100 }, { "epoch": 32.903225806451616, "eval_f1": 1.0, "eval_loss": 5.009161668567685e-06, "eval_runtime": 0.2543, "eval_samples_per_second": 589.863, "eval_steps_per_second": 11.797, "step": 8160 }, { "epoch": 33.145161290322584, "eval_f1": 1.0, "eval_loss": 4.943993189954199e-06, "eval_runtime": 0.2543, "eval_samples_per_second": 589.901, "eval_steps_per_second": 11.798, "step": 8220 }, { "epoch": 33.38709677419355, "eval_f1": 1.0, "eval_loss": 4.893926870863652e-06, "eval_runtime": 0.2545, "eval_samples_per_second": 589.333, "eval_steps_per_second": 11.787, "step": 8280 }, { "epoch": 33.62903225806452, "eval_f1": 1.0, "eval_loss": 4.85418968310114e-06, "eval_runtime": 0.2543, "eval_samples_per_second": 589.802, "eval_steps_per_second": 11.796, "step": 8340 }, { "epoch": 33.87096774193548, "eval_f1": 1.0, "eval_loss": 4.81445340483333e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.21, "eval_steps_per_second": 11.744, "step": 8400 }, { "epoch": 34.11290322580645, "eval_f1": 1.0, "eval_loss": 4.774717581312871e-06, "eval_runtime": 0.2556, "eval_samples_per_second": 586.894, "eval_steps_per_second": 11.738, "step": 8460 }, { "epoch": 34.274193548387096, "grad_norm": 0.00011297773016849533, "learning_rate": 2.3247039072402065e-06, "loss": 0.0, "step": 8500 }, { "epoch": 34.354838709677416, "eval_f1": 1.0, "eval_loss": 4.748492301587248e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.245, "eval_steps_per_second": 11.745, "step": 8520 }, { "epoch": 34.596774193548384, "eval_f1": 1.0, "eval_loss": 4.70478153147269e-06, "eval_runtime": 0.2545, "eval_samples_per_second": 589.454, "eval_steps_per_second": 11.789, "step": 8580 }, { "epoch": 34.83870967741935, "eval_f1": 1.0, "eval_loss": 4.678555796999717e-06, "eval_runtime": 0.2551, "eval_samples_per_second": 588.068, "eval_steps_per_second": 11.761, "step": 8640 }, { "epoch": 35.08064516129032, "eval_f1": 1.0, "eval_loss": 4.659482328861486e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.313, "eval_steps_per_second": 11.746, "step": 8700 }, { "epoch": 35.32258064516129, "eval_f1": 1.0, "eval_loss": 4.6300779104058165e-06, "eval_runtime": 0.2558, "eval_samples_per_second": 586.379, "eval_steps_per_second": 11.728, "step": 8760 }, { "epoch": 35.564516129032256, "eval_f1": 1.0, "eval_loss": 4.600673037202796e-06, "eval_runtime": 0.255, "eval_samples_per_second": 588.143, "eval_steps_per_second": 11.763, "step": 8820 }, { "epoch": 35.806451612903224, "eval_f1": 1.0, "eval_loss": 4.583984264172614e-06, "eval_runtime": 0.2548, "eval_samples_per_second": 588.693, "eval_steps_per_second": 11.774, "step": 8880 }, { "epoch": 36.04838709677419, "eval_f1": 1.0, "eval_loss": 4.560936304187635e-06, "eval_runtime": 0.2541, "eval_samples_per_second": 590.228, "eval_steps_per_second": 11.805, "step": 8940 }, { "epoch": 36.29032258064516, "grad_norm": 0.00010865663352888077, "learning_rate": 9.854251543613312e-07, "loss": 0.0, "step": 9000 }, { "epoch": 36.29032258064516, "eval_f1": 1.0, "eval_loss": 4.545041974779451e-06, "eval_runtime": 0.2628, "eval_samples_per_second": 570.734, "eval_steps_per_second": 11.415, "step": 9000 }, { "epoch": 36.53225806451613, "eval_f1": 1.0, "eval_loss": 4.527558303379919e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.354, "eval_steps_per_second": 11.747, "step": 9060 }, { "epoch": 36.774193548387096, "eval_f1": 1.0, "eval_loss": 4.5124579628463835e-06, "eval_runtime": 0.2575, "eval_samples_per_second": 582.554, "eval_steps_per_second": 11.651, "step": 9120 }, { "epoch": 37.016129032258064, "eval_f1": 1.0, "eval_loss": 4.502126103034243e-06, "eval_runtime": 0.2542, "eval_samples_per_second": 590.074, "eval_steps_per_second": 11.801, "step": 9180 }, { "epoch": 37.25806451612903, "eval_f1": 1.0, "eval_loss": 4.493384949455503e-06, "eval_runtime": 0.2554, "eval_samples_per_second": 587.314, "eval_steps_per_second": 11.746, "step": 9240 }, { "epoch": 37.5, "eval_f1": 1.0, "eval_loss": 4.461595381144434e-06, "eval_runtime": 0.2547, "eval_samples_per_second": 588.917, "eval_steps_per_second": 11.778, "step": 9300 }, { "epoch": 37.74193548387097, "eval_f1": 1.0, "eval_loss": 4.452854227565695e-06, "eval_runtime": 0.2549, "eval_samples_per_second": 588.51, "eval_steps_per_second": 11.77, "step": 9360 }, { "epoch": 37.983870967741936, "eval_f1": 1.0, "eval_loss": 4.44729084847495e-06, "eval_runtime": 0.2567, "eval_samples_per_second": 584.253, "eval_steps_per_second": 11.685, "step": 9420 }, { "epoch": 38.225806451612904, "eval_f1": 1.0, "eval_loss": 4.4433177208702546e-06, "eval_runtime": 0.2562, "eval_samples_per_second": 585.502, "eval_steps_per_second": 11.71, "step": 9480 }, { "epoch": 38.306451612903224, "grad_norm": 0.00010896463209064677, "learning_rate": 2.0652878966122916e-07, "loss": 0.0, "step": 9500 }, { "epoch": 38.46774193548387, "eval_f1": 1.0, "eval_loss": 4.434575657796813e-06, "eval_runtime": 0.2594, "eval_samples_per_second": 578.204, "eval_steps_per_second": 11.564, "step": 9540 }, { "epoch": 38.70967741935484, "eval_f1": 1.0, "eval_loss": 4.4282178350840695e-06, "eval_runtime": 0.2571, "eval_samples_per_second": 583.366, "eval_steps_per_second": 11.667, "step": 9600 }, { "epoch": 38.95161290322581, "eval_f1": 1.0, "eval_loss": 4.4266280383453704e-06, "eval_runtime": 0.261, "eval_samples_per_second": 574.674, "eval_steps_per_second": 11.493, "step": 9660 }, { "epoch": 39.193548387096776, "eval_f1": 1.0, "eval_loss": 4.425038696354022e-06, "eval_runtime": 0.2615, "eval_samples_per_second": 573.707, "eval_steps_per_second": 11.474, "step": 9720 }, { "epoch": 39.435483870967744, "eval_f1": 1.0, "eval_loss": 4.423449354362674e-06, "eval_runtime": 0.2582, "eval_samples_per_second": 580.933, "eval_steps_per_second": 11.619, "step": 9780 }, { "epoch": 39.67741935483871, "eval_f1": 1.0, "eval_loss": 4.4226544559933245e-06, "eval_runtime": 0.258, "eval_samples_per_second": 581.476, "eval_steps_per_second": 11.63, "step": 9840 }, { "epoch": 39.91935483870968, "eval_f1": 1.0, "eval_loss": 4.4226544559933245e-06, "eval_runtime": 0.257, "eval_samples_per_second": 583.764, "eval_steps_per_second": 11.675, "step": 9900 }, { "epoch": 40.0, "step": 9920, "total_flos": 1.041666020785152e+16, "train_loss": 0.03220151995762623, "train_runtime": 939.6593, "train_samples_per_second": 168.529, "train_steps_per_second": 10.557 } ], "logging_steps": 500, "max_steps": 9920, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 1200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.041666020785152e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }