|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 40.0, |
|
"eval_steps": 60, |
|
"global_step": 9920, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24193548387096775, |
|
"eval_f1": 0.5851960435915661, |
|
"eval_loss": 0.8871086835861206, |
|
"eval_runtime": 0.2564, |
|
"eval_samples_per_second": 585.087, |
|
"eval_steps_per_second": 11.702, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"eval_f1": 0.6617468749727405, |
|
"eval_loss": 0.7329094409942627, |
|
"eval_runtime": 0.2545, |
|
"eval_samples_per_second": 589.47, |
|
"eval_steps_per_second": 11.789, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7258064516129032, |
|
"eval_f1": 0.8160652328346837, |
|
"eval_loss": 0.46765777468681335, |
|
"eval_runtime": 0.2541, |
|
"eval_samples_per_second": 590.294, |
|
"eval_steps_per_second": 11.806, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"eval_f1": 0.8587881420000322, |
|
"eval_loss": 0.44318950176239014, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.306, |
|
"eval_steps_per_second": 11.746, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2096774193548387, |
|
"eval_f1": 0.809411799328179, |
|
"eval_loss": 0.5408520102500916, |
|
"eval_runtime": 0.2546, |
|
"eval_samples_per_second": 589.168, |
|
"eval_steps_per_second": 11.783, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4516129032258065, |
|
"eval_f1": 0.8382158435345435, |
|
"eval_loss": 0.5099673271179199, |
|
"eval_runtime": 0.2543, |
|
"eval_samples_per_second": 589.946, |
|
"eval_steps_per_second": 11.799, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.6935483870967742, |
|
"eval_f1": 0.850649606409975, |
|
"eval_loss": 0.3776738941669464, |
|
"eval_runtime": 0.2587, |
|
"eval_samples_per_second": 579.748, |
|
"eval_steps_per_second": 11.595, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.935483870967742, |
|
"eval_f1": 0.9060457516339869, |
|
"eval_loss": 0.3096984922885895, |
|
"eval_runtime": 0.2544, |
|
"eval_samples_per_second": 589.649, |
|
"eval_steps_per_second": 11.793, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0161290322580645, |
|
"grad_norm": 6.729659080505371, |
|
"learning_rate": 4.647087149462068e-05, |
|
"loss": 0.3881, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.1774193548387095, |
|
"eval_f1": 0.9331478793742946, |
|
"eval_loss": 0.19595660269260406, |
|
"eval_runtime": 0.2551, |
|
"eval_samples_per_second": 587.93, |
|
"eval_steps_per_second": 11.759, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.4193548387096775, |
|
"eval_f1": 0.9535178217421209, |
|
"eval_loss": 0.15902738273143768, |
|
"eval_runtime": 0.2546, |
|
"eval_samples_per_second": 589.066, |
|
"eval_steps_per_second": 11.781, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.661290322580645, |
|
"eval_f1": 0.9534276314241835, |
|
"eval_loss": 0.1501321643590927, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 589.023, |
|
"eval_steps_per_second": 11.78, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.903225806451613, |
|
"eval_f1": 0.9800105522453767, |
|
"eval_loss": 0.07951977103948593, |
|
"eval_runtime": 0.2544, |
|
"eval_samples_per_second": 589.701, |
|
"eval_steps_per_second": 11.794, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.1451612903225805, |
|
"eval_f1": 0.9931623931623932, |
|
"eval_loss": 0.009822274558246136, |
|
"eval_runtime": 0.2546, |
|
"eval_samples_per_second": 589.081, |
|
"eval_steps_per_second": 11.782, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.3870967741935485, |
|
"eval_f1": 0.9931623931623932, |
|
"eval_loss": 0.028039630502462387, |
|
"eval_runtime": 0.2548, |
|
"eval_samples_per_second": 588.756, |
|
"eval_steps_per_second": 11.775, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.629032258064516, |
|
"eval_f1": 0.9805444220089162, |
|
"eval_loss": 0.048611294478178024, |
|
"eval_runtime": 0.2559, |
|
"eval_samples_per_second": 586.116, |
|
"eval_steps_per_second": 11.722, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.870967741935484, |
|
"eval_f1": 0.9804415639480298, |
|
"eval_loss": 0.08277872949838638, |
|
"eval_runtime": 0.256, |
|
"eval_samples_per_second": 585.987, |
|
"eval_steps_per_second": 11.72, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.032258064516129, |
|
"grad_norm": 0.015000954270362854, |
|
"learning_rate": 4.560063227687659e-05, |
|
"loss": 0.083, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.112903225806452, |
|
"eval_f1": 0.9804415639480298, |
|
"eval_loss": 0.05285938084125519, |
|
"eval_runtime": 0.2542, |
|
"eval_samples_per_second": 590.087, |
|
"eval_steps_per_second": 11.802, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.354838709677419, |
|
"eval_f1": 0.9931394168504323, |
|
"eval_loss": 0.042371999472379684, |
|
"eval_runtime": 0.2565, |
|
"eval_samples_per_second": 584.745, |
|
"eval_steps_per_second": 11.695, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.596774193548387, |
|
"eval_f1": 0.9931394168504323, |
|
"eval_loss": 0.020373299717903137, |
|
"eval_runtime": 0.2568, |
|
"eval_samples_per_second": 584.182, |
|
"eval_steps_per_second": 11.684, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.838709677419355, |
|
"eval_f1": 0.9873004354136429, |
|
"eval_loss": 0.01691700704395771, |
|
"eval_runtime": 0.2564, |
|
"eval_samples_per_second": 584.952, |
|
"eval_steps_per_second": 11.699, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.080645161290323, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0007654120563529432, |
|
"eval_runtime": 0.2582, |
|
"eval_samples_per_second": 581.0, |
|
"eval_steps_per_second": 11.62, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 5.32258064516129, |
|
"eval_f1": 0.9863433667781494, |
|
"eval_loss": 0.026578061282634735, |
|
"eval_runtime": 0.2557, |
|
"eval_samples_per_second": 586.566, |
|
"eval_steps_per_second": 11.731, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 5.564516129032258, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.002019105711951852, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 588.941, |
|
"eval_steps_per_second": 11.779, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 5.806451612903226, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00011336191528243944, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.273, |
|
"eval_steps_per_second": 11.745, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 6.048387096774194, |
|
"grad_norm": 0.0038932343013584614, |
|
"learning_rate": 4.4174447775380035e-05, |
|
"loss": 0.0341, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.048387096774194, |
|
"eval_f1": 0.9931394168504323, |
|
"eval_loss": 0.005218833684921265, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 588.864, |
|
"eval_steps_per_second": 11.777, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.290322580645161, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.001448934432119131, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.397, |
|
"eval_steps_per_second": 11.768, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.532258064516129, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0035662834998220205, |
|
"eval_runtime": 0.2544, |
|
"eval_samples_per_second": 589.724, |
|
"eval_steps_per_second": 11.794, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 6.774193548387097, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.003718534717336297, |
|
"eval_runtime": 0.2544, |
|
"eval_samples_per_second": 589.734, |
|
"eval_steps_per_second": 11.795, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 7.016129032258065, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00021605176152661443, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.428, |
|
"eval_steps_per_second": 11.769, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 7.258064516129032, |
|
"eval_f1": 0.9931623931623932, |
|
"eval_loss": 0.013021533377468586, |
|
"eval_runtime": 0.254, |
|
"eval_samples_per_second": 590.458, |
|
"eval_steps_per_second": 11.809, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_f1": 1.0, |
|
"eval_loss": 8.37745756143704e-05, |
|
"eval_runtime": 0.2567, |
|
"eval_samples_per_second": 584.393, |
|
"eval_steps_per_second": 11.688, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 7.741935483870968, |
|
"eval_f1": 1.0, |
|
"eval_loss": 7.913756417110562e-05, |
|
"eval_runtime": 0.2556, |
|
"eval_samples_per_second": 586.853, |
|
"eval_steps_per_second": 11.737, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 7.983870967741936, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.221562944119796e-05, |
|
"eval_runtime": 0.2559, |
|
"eval_samples_per_second": 586.146, |
|
"eval_steps_per_second": 11.723, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 8.064516129032258, |
|
"grad_norm": 0.0007441982161253691, |
|
"learning_rate": 4.222800289057154e-05, |
|
"loss": 0.017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.225806451612904, |
|
"eval_f1": 1.0, |
|
"eval_loss": 7.838715828256682e-05, |
|
"eval_runtime": 0.2539, |
|
"eval_samples_per_second": 590.717, |
|
"eval_steps_per_second": 11.814, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 8.46774193548387, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0009664014796726406, |
|
"eval_runtime": 0.2556, |
|
"eval_samples_per_second": 586.876, |
|
"eval_steps_per_second": 11.738, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.709677419354838, |
|
"eval_f1": 0.9936507936507937, |
|
"eval_loss": 0.00964354071766138, |
|
"eval_runtime": 0.2545, |
|
"eval_samples_per_second": 589.449, |
|
"eval_steps_per_second": 11.789, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 8.951612903225806, |
|
"eval_f1": 0.9661393494972598, |
|
"eval_loss": 0.08236207813024521, |
|
"eval_runtime": 0.2545, |
|
"eval_samples_per_second": 589.3, |
|
"eval_steps_per_second": 11.786, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 9.193548387096774, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00091545470058918, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.389, |
|
"eval_steps_per_second": 11.748, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 9.435483870967742, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0017120252596214414, |
|
"eval_runtime": 0.2536, |
|
"eval_samples_per_second": 591.372, |
|
"eval_steps_per_second": 11.827, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 9.67741935483871, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0004082757805008441, |
|
"eval_runtime": 0.2565, |
|
"eval_samples_per_second": 584.858, |
|
"eval_steps_per_second": 11.697, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.919354838709678, |
|
"eval_f1": 0.9867521367521368, |
|
"eval_loss": 0.0258210189640522, |
|
"eval_runtime": 0.2589, |
|
"eval_samples_per_second": 579.307, |
|
"eval_steps_per_second": 11.586, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 10.080645161290322, |
|
"grad_norm": 0.001595139503479004, |
|
"learning_rate": 3.981000008125249e-05, |
|
"loss": 0.0278, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.161290322580646, |
|
"eval_f1": 0.9930800784733368, |
|
"eval_loss": 0.02786369062960148, |
|
"eval_runtime": 0.2601, |
|
"eval_samples_per_second": 576.62, |
|
"eval_steps_per_second": 11.532, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 10.403225806451612, |
|
"eval_f1": 0.9931007503823038, |
|
"eval_loss": 0.05510010942816734, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.317, |
|
"eval_steps_per_second": 11.746, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 10.64516129032258, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.751836943090893e-05, |
|
"eval_runtime": 0.2573, |
|
"eval_samples_per_second": 583.023, |
|
"eval_steps_per_second": 11.66, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 10.887096774193548, |
|
"eval_f1": 0.9936484892477996, |
|
"eval_loss": 0.031413882970809937, |
|
"eval_runtime": 0.2541, |
|
"eval_samples_per_second": 590.408, |
|
"eval_steps_per_second": 11.808, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.129032258064516, |
|
"eval_f1": 0.9930800784733368, |
|
"eval_loss": 0.034935660660266876, |
|
"eval_runtime": 0.2557, |
|
"eval_samples_per_second": 586.588, |
|
"eval_steps_per_second": 11.732, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 11.370967741935484, |
|
"eval_f1": 0.9930800784733368, |
|
"eval_loss": 0.005744210444390774, |
|
"eval_runtime": 0.2558, |
|
"eval_samples_per_second": 586.315, |
|
"eval_steps_per_second": 11.726, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 11.612903225806452, |
|
"eval_f1": 0.9930800784733368, |
|
"eval_loss": 0.044621676206588745, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.201, |
|
"eval_steps_per_second": 11.764, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 11.85483870967742, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.108299956191331e-05, |
|
"eval_runtime": 0.2538, |
|
"eval_samples_per_second": 591.101, |
|
"eval_steps_per_second": 11.822, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 12.096774193548388, |
|
"grad_norm": 1.4287638664245605, |
|
"learning_rate": 3.698094076882026e-05, |
|
"loss": 0.0099, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.096774193548388, |
|
"eval_f1": 0.9867291604401759, |
|
"eval_loss": 0.09647814929485321, |
|
"eval_runtime": 0.2561, |
|
"eval_samples_per_second": 585.684, |
|
"eval_steps_per_second": 11.714, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.338709677419354, |
|
"eval_f1": 0.9936507936507937, |
|
"eval_loss": 0.0637097880244255, |
|
"eval_runtime": 0.256, |
|
"eval_samples_per_second": 586.002, |
|
"eval_steps_per_second": 11.72, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 12.580645161290322, |
|
"eval_f1": 0.9867291604401759, |
|
"eval_loss": 0.08844030648469925, |
|
"eval_runtime": 0.2555, |
|
"eval_samples_per_second": 587.048, |
|
"eval_steps_per_second": 11.741, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 12.82258064516129, |
|
"eval_f1": 0.9930800784733368, |
|
"eval_loss": 0.07371941953897476, |
|
"eval_runtime": 0.2568, |
|
"eval_samples_per_second": 584.175, |
|
"eval_steps_per_second": 11.684, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 13.064516129032258, |
|
"eval_f1": 0.9930800784733368, |
|
"eval_loss": 0.07483946532011032, |
|
"eval_runtime": 0.2538, |
|
"eval_samples_per_second": 590.92, |
|
"eval_steps_per_second": 11.818, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 13.306451612903226, |
|
"eval_f1": 0.9930800784733368, |
|
"eval_loss": 0.07480111718177795, |
|
"eval_runtime": 0.2557, |
|
"eval_samples_per_second": 586.667, |
|
"eval_steps_per_second": 11.733, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.548387096774194, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.3137790776672773e-05, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 589.018, |
|
"eval_steps_per_second": 11.78, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 13.790322580645162, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.158356255677063e-05, |
|
"eval_runtime": 0.2541, |
|
"eval_samples_per_second": 590.234, |
|
"eval_steps_per_second": 11.805, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 14.03225806451613, |
|
"eval_f1": 0.9659544901858607, |
|
"eval_loss": 0.15978649258613586, |
|
"eval_runtime": 0.2579, |
|
"eval_samples_per_second": 581.665, |
|
"eval_steps_per_second": 11.633, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 14.112903225806452, |
|
"grad_norm": 0.0005940676783211529, |
|
"learning_rate": 3.381161151686362e-05, |
|
"loss": 0.0169, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.274193548387096, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0006047336501069367, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.341, |
|
"eval_steps_per_second": 11.747, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 14.516129032258064, |
|
"eval_f1": 1.0, |
|
"eval_loss": 7.13071713107638e-05, |
|
"eval_runtime": 0.2552, |
|
"eval_samples_per_second": 587.767, |
|
"eval_steps_per_second": 11.755, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.758064516129032, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00021068091155029833, |
|
"eval_runtime": 0.2571, |
|
"eval_samples_per_second": 583.488, |
|
"eval_steps_per_second": 11.67, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0005000072997063398, |
|
"eval_runtime": 0.2551, |
|
"eval_samples_per_second": 587.982, |
|
"eval_steps_per_second": 11.76, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 15.241935483870968, |
|
"eval_f1": 1.0, |
|
"eval_loss": 3.806134554906748e-05, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.376, |
|
"eval_steps_per_second": 11.748, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 15.483870967741936, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.8680979085038416e-05, |
|
"eval_runtime": 0.2548, |
|
"eval_samples_per_second": 588.673, |
|
"eval_steps_per_second": 11.773, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 15.725806451612904, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0001551912137074396, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.229, |
|
"eval_steps_per_second": 11.765, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 15.967741935483872, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0014714256394654512, |
|
"eval_runtime": 0.2542, |
|
"eval_samples_per_second": 590.047, |
|
"eval_steps_per_second": 11.801, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 16.129032258064516, |
|
"grad_norm": 0.0009025487233884633, |
|
"learning_rate": 3.038131286377757e-05, |
|
"loss": 0.0155, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.20967741935484, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.558559936005622e-05, |
|
"eval_runtime": 0.2538, |
|
"eval_samples_per_second": 590.96, |
|
"eval_steps_per_second": 11.819, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 16.451612903225808, |
|
"eval_f1": 1.0, |
|
"eval_loss": 3.426855255383998e-05, |
|
"eval_runtime": 0.2563, |
|
"eval_samples_per_second": 585.204, |
|
"eval_steps_per_second": 11.704, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 16.693548387096776, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0018926361808553338, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 588.923, |
|
"eval_steps_per_second": 11.778, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 16.93548387096774, |
|
"eval_f1": 0.9931394168504323, |
|
"eval_loss": 0.05736490711569786, |
|
"eval_runtime": 0.256, |
|
"eval_samples_per_second": 586.046, |
|
"eval_steps_per_second": 11.721, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.177419354838708, |
|
"eval_f1": 0.9931394168504323, |
|
"eval_loss": 0.05700461566448212, |
|
"eval_runtime": 0.2546, |
|
"eval_samples_per_second": 589.274, |
|
"eval_steps_per_second": 11.785, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 17.419354838709676, |
|
"eval_f1": 0.9931394168504323, |
|
"eval_loss": 0.05664278194308281, |
|
"eval_runtime": 0.254, |
|
"eval_samples_per_second": 590.639, |
|
"eval_steps_per_second": 11.813, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 17.661290322580644, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00015866669127717614, |
|
"eval_runtime": 0.2545, |
|
"eval_samples_per_second": 589.42, |
|
"eval_steps_per_second": 11.788, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 17.903225806451612, |
|
"eval_f1": 1.0, |
|
"eval_loss": 8.310633711516857e-05, |
|
"eval_runtime": 0.3017, |
|
"eval_samples_per_second": 497.145, |
|
"eval_steps_per_second": 9.943, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 18.14516129032258, |
|
"grad_norm": 0.0018858049297705293, |
|
"learning_rate": 2.6775875125196684e-05, |
|
"loss": 0.0214, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.14516129032258, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.328391464194283e-05, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.208, |
|
"eval_steps_per_second": 11.764, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.387096774193548, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00011058977543143556, |
|
"eval_runtime": 0.2575, |
|
"eval_samples_per_second": 582.543, |
|
"eval_steps_per_second": 11.651, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 18.629032258064516, |
|
"eval_f1": 0.9936507936507937, |
|
"eval_loss": 0.0063561322167515755, |
|
"eval_runtime": 0.2546, |
|
"eval_samples_per_second": 589.057, |
|
"eval_steps_per_second": 11.781, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 18.870967741935484, |
|
"eval_f1": 0.9936484892477996, |
|
"eval_loss": 0.07243207842111588, |
|
"eval_runtime": 0.2542, |
|
"eval_samples_per_second": 590.195, |
|
"eval_steps_per_second": 11.804, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 19.112903225806452, |
|
"eval_f1": 1.0, |
|
"eval_loss": 3.163903966196813e-05, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.475, |
|
"eval_steps_per_second": 11.769, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 19.35483870967742, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00013194132770877331, |
|
"eval_runtime": 0.2542, |
|
"eval_samples_per_second": 590.164, |
|
"eval_steps_per_second": 11.803, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.596774193548388, |
|
"eval_f1": 1.0, |
|
"eval_loss": 8.35082828416489e-05, |
|
"eval_runtime": 0.2555, |
|
"eval_samples_per_second": 587.138, |
|
"eval_steps_per_second": 11.743, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 19.838709677419356, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00017465968267060816, |
|
"eval_runtime": 0.2551, |
|
"eval_samples_per_second": 588.086, |
|
"eval_steps_per_second": 11.762, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 20.080645161290324, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00028180619119666517, |
|
"eval_runtime": 0.2551, |
|
"eval_samples_per_second": 588.014, |
|
"eval_steps_per_second": 11.76, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 20.161290322580644, |
|
"grad_norm": 0.0009010693174786866, |
|
"learning_rate": 2.308551081332452e-05, |
|
"loss": 0.0161, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.322580645161292, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0002893185301218182, |
|
"eval_runtime": 0.2539, |
|
"eval_samples_per_second": 590.81, |
|
"eval_steps_per_second": 11.816, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 20.56451612903226, |
|
"eval_f1": 1.0, |
|
"eval_loss": 3.712088437168859e-05, |
|
"eval_runtime": 0.2569, |
|
"eval_samples_per_second": 583.889, |
|
"eval_steps_per_second": 11.678, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 20.806451612903224, |
|
"eval_f1": 1.0, |
|
"eval_loss": 3.2648320484440774e-05, |
|
"eval_runtime": 0.2548, |
|
"eval_samples_per_second": 588.741, |
|
"eval_steps_per_second": 11.775, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 21.048387096774192, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.866457543859724e-05, |
|
"eval_runtime": 0.2559, |
|
"eval_samples_per_second": 586.183, |
|
"eval_steps_per_second": 11.724, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 21.29032258064516, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.9972579795867205e-05, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 588.897, |
|
"eval_steps_per_second": 11.778, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 21.532258064516128, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.6008663553511724e-05, |
|
"eval_runtime": 0.2568, |
|
"eval_samples_per_second": 584.06, |
|
"eval_steps_per_second": 11.681, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 21.774193548387096, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.6406421966385096e-05, |
|
"eval_runtime": 0.2542, |
|
"eval_samples_per_second": 590.17, |
|
"eval_steps_per_second": 11.803, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 22.016129032258064, |
|
"eval_f1": 1.0, |
|
"eval_loss": 2.0519637473626062e-05, |
|
"eval_runtime": 0.2555, |
|
"eval_samples_per_second": 587.081, |
|
"eval_steps_per_second": 11.742, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 22.177419354838708, |
|
"grad_norm": 0.0008068878669291735, |
|
"learning_rate": 1.940255740828388e-05, |
|
"loss": 0.004, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.258064516129032, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.894849083328154e-05, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.573, |
|
"eval_steps_per_second": 11.771, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.6999869330902584e-05, |
|
"eval_runtime": 0.2551, |
|
"eval_samples_per_second": 587.891, |
|
"eval_steps_per_second": 11.758, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 22.741935483870968, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.7026062778313644e-05, |
|
"eval_runtime": 0.2555, |
|
"eval_samples_per_second": 587.094, |
|
"eval_steps_per_second": 11.742, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 22.983870967741936, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.6268664694507606e-05, |
|
"eval_runtime": 0.2555, |
|
"eval_samples_per_second": 587.126, |
|
"eval_steps_per_second": 11.743, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 23.225806451612904, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.5551027900073677e-05, |
|
"eval_runtime": 0.2557, |
|
"eval_samples_per_second": 586.707, |
|
"eval_steps_per_second": 11.734, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 23.467741935483872, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.4529051441058982e-05, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.162, |
|
"eval_steps_per_second": 11.763, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 23.70967741935484, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.2737024917441886e-05, |
|
"eval_runtime": 0.2548, |
|
"eval_samples_per_second": 588.642, |
|
"eval_steps_per_second": 11.773, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 23.951612903225808, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.2093305485905148e-05, |
|
"eval_runtime": 0.2559, |
|
"eval_samples_per_second": 586.075, |
|
"eval_steps_per_second": 11.722, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 24.193548387096776, |
|
"grad_norm": 0.0002727832761593163, |
|
"learning_rate": 1.5819166960141247e-05, |
|
"loss": 0.0026, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.193548387096776, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.1829461072920822e-05, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.148, |
|
"eval_steps_per_second": 11.763, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.43548387096774, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.2995285942452028e-05, |
|
"eval_runtime": 0.2539, |
|
"eval_samples_per_second": 590.855, |
|
"eval_steps_per_second": 11.817, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 24.677419354838708, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.1007723514921963e-05, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.252, |
|
"eval_steps_per_second": 11.765, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 24.919354838709676, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.0587315045995638e-05, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.297, |
|
"eval_steps_per_second": 11.766, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 25.161290322580644, |
|
"eval_f1": 1.0, |
|
"eval_loss": 1.0195521099376492e-05, |
|
"eval_runtime": 0.2548, |
|
"eval_samples_per_second": 588.807, |
|
"eval_steps_per_second": 11.776, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 25.403225806451612, |
|
"eval_f1": 1.0, |
|
"eval_loss": 9.860146747087128e-06, |
|
"eval_runtime": 0.2557, |
|
"eval_samples_per_second": 586.556, |
|
"eval_steps_per_second": 11.731, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 25.64516129032258, |
|
"eval_f1": 1.0, |
|
"eval_loss": 9.539876373310108e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.288, |
|
"eval_steps_per_second": 11.746, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 25.887096774193548, |
|
"eval_f1": 1.0, |
|
"eval_loss": 9.233906894223765e-06, |
|
"eval_runtime": 0.2561, |
|
"eval_samples_per_second": 585.792, |
|
"eval_steps_per_second": 11.716, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 26.129032258064516, |
|
"eval_f1": 1.0, |
|
"eval_loss": 8.902509762265254e-06, |
|
"eval_runtime": 0.2539, |
|
"eval_samples_per_second": 590.758, |
|
"eval_steps_per_second": 11.815, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 26.20967741935484, |
|
"grad_norm": 0.0002617554273456335, |
|
"learning_rate": 1.242500033062053e-05, |
|
"loss": 0.0009, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.370967741935484, |
|
"eval_f1": 1.0, |
|
"eval_loss": 8.575877473049331e-06, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.489, |
|
"eval_steps_per_second": 11.77, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 26.612903225806452, |
|
"eval_f1": 1.0, |
|
"eval_loss": 8.099837941699661e-06, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 588.925, |
|
"eval_steps_per_second": 11.778, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 26.85483870967742, |
|
"eval_f1": 1.0, |
|
"eval_loss": 7.677047506149393e-06, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.508, |
|
"eval_steps_per_second": 11.77, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 27.096774193548388, |
|
"eval_f1": 1.0, |
|
"eval_loss": 7.495055797335226e-06, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 588.85, |
|
"eval_steps_per_second": 11.777, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 27.338709677419356, |
|
"eval_f1": 1.0, |
|
"eval_loss": 7.2447178354195785e-06, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.316, |
|
"eval_steps_per_second": 11.766, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 27.580645161290324, |
|
"eval_f1": 1.0, |
|
"eval_loss": 7.079415809130296e-06, |
|
"eval_runtime": 0.2558, |
|
"eval_samples_per_second": 586.414, |
|
"eval_steps_per_second": 11.728, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 27.822580645161292, |
|
"eval_f1": 1.0, |
|
"eval_loss": 7.096105036907829e-06, |
|
"eval_runtime": 0.2567, |
|
"eval_samples_per_second": 584.45, |
|
"eval_steps_per_second": 11.689, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 28.06451612903226, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.859276709292317e-06, |
|
"eval_runtime": 0.2543, |
|
"eval_samples_per_second": 589.912, |
|
"eval_steps_per_second": 11.798, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 28.225806451612904, |
|
"grad_norm": 0.00016708578914403915, |
|
"learning_rate": 9.304983767430839e-06, |
|
"loss": 0.0007, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.306451612903224, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.732917427143548e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.412, |
|
"eval_steps_per_second": 11.748, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 28.548387096774192, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.600993401661981e-06, |
|
"eval_runtime": 0.2602, |
|
"eval_samples_per_second": 576.473, |
|
"eval_steps_per_second": 11.529, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 28.79032258064516, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.482578555733198e-06, |
|
"eval_runtime": 0.2561, |
|
"eval_samples_per_second": 585.802, |
|
"eval_steps_per_second": 11.716, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 29.032258064516128, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.322838999039959e-06, |
|
"eval_runtime": 0.2545, |
|
"eval_samples_per_second": 589.379, |
|
"eval_steps_per_second": 11.788, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 29.274193548387096, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.2060144045972265e-06, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.181, |
|
"eval_steps_per_second": 11.764, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 29.516129032258064, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.097932327975286e-06, |
|
"eval_runtime": 0.2575, |
|
"eval_samples_per_second": 582.488, |
|
"eval_steps_per_second": 11.65, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 29.758064516129032, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.999386758048786e-06, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.48, |
|
"eval_steps_per_second": 11.77, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.888920895813499e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.296, |
|
"eval_steps_per_second": 11.746, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 30.241935483870968, |
|
"grad_norm": 0.00016466749366372824, |
|
"learning_rate": 6.537183944493647e-06, |
|
"loss": 0.0009, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.241935483870968, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.9008411881222855e-06, |
|
"eval_runtime": 0.2545, |
|
"eval_samples_per_second": 589.276, |
|
"eval_steps_per_second": 11.786, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.483870967741936, |
|
"eval_f1": 1.0, |
|
"eval_loss": 6.41104315945995e-06, |
|
"eval_runtime": 0.254, |
|
"eval_samples_per_second": 590.649, |
|
"eval_steps_per_second": 11.813, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 30.725806451612904, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.6163312365242746e-06, |
|
"eval_runtime": 0.254, |
|
"eval_samples_per_second": 590.583, |
|
"eval_steps_per_second": 11.812, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 30.967741935483872, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.524143034563167e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.338, |
|
"eval_steps_per_second": 11.747, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 31.20967741935484, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.447849162010243e-06, |
|
"eval_runtime": 0.2559, |
|
"eval_samples_per_second": 586.279, |
|
"eval_steps_per_second": 11.726, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 31.451612903225808, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.366786808735924e-06, |
|
"eval_runtime": 0.2551, |
|
"eval_samples_per_second": 587.967, |
|
"eval_steps_per_second": 11.759, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 31.693548387096776, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.3008247959951404e-06, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.378, |
|
"eval_steps_per_second": 11.768, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 31.93548387096774, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.241220605967101e-06, |
|
"eval_runtime": 0.254, |
|
"eval_samples_per_second": 590.438, |
|
"eval_steps_per_second": 11.809, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 32.17741935483871, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.184000656299759e-06, |
|
"eval_runtime": 0.2548, |
|
"eval_samples_per_second": 588.705, |
|
"eval_steps_per_second": 11.774, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 32.25806451612903, |
|
"grad_norm": 0.0001813564304029569, |
|
"learning_rate": 4.190854637191562e-06, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 32.41935483870968, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.115653948450927e-06, |
|
"eval_runtime": 0.2548, |
|
"eval_samples_per_second": 588.735, |
|
"eval_steps_per_second": 11.775, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 32.66129032258065, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.066380708740326e-06, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.337, |
|
"eval_steps_per_second": 11.767, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 32.903225806451616, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.009161668567685e-06, |
|
"eval_runtime": 0.2543, |
|
"eval_samples_per_second": 589.863, |
|
"eval_steps_per_second": 11.797, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 33.145161290322584, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.943993189954199e-06, |
|
"eval_runtime": 0.2543, |
|
"eval_samples_per_second": 589.901, |
|
"eval_steps_per_second": 11.798, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 33.38709677419355, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.893926870863652e-06, |
|
"eval_runtime": 0.2545, |
|
"eval_samples_per_second": 589.333, |
|
"eval_steps_per_second": 11.787, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 33.62903225806452, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.85418968310114e-06, |
|
"eval_runtime": 0.2543, |
|
"eval_samples_per_second": 589.802, |
|
"eval_steps_per_second": 11.796, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 33.87096774193548, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.81445340483333e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.21, |
|
"eval_steps_per_second": 11.744, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 34.11290322580645, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.774717581312871e-06, |
|
"eval_runtime": 0.2556, |
|
"eval_samples_per_second": 586.894, |
|
"eval_steps_per_second": 11.738, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 34.274193548387096, |
|
"grad_norm": 0.00011297773016849533, |
|
"learning_rate": 2.3247039072402065e-06, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 34.354838709677416, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.748492301587248e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.245, |
|
"eval_steps_per_second": 11.745, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 34.596774193548384, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.70478153147269e-06, |
|
"eval_runtime": 0.2545, |
|
"eval_samples_per_second": 589.454, |
|
"eval_steps_per_second": 11.789, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 34.83870967741935, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.678555796999717e-06, |
|
"eval_runtime": 0.2551, |
|
"eval_samples_per_second": 588.068, |
|
"eval_steps_per_second": 11.761, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 35.08064516129032, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.659482328861486e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.313, |
|
"eval_steps_per_second": 11.746, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 35.32258064516129, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.6300779104058165e-06, |
|
"eval_runtime": 0.2558, |
|
"eval_samples_per_second": 586.379, |
|
"eval_steps_per_second": 11.728, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 35.564516129032256, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.600673037202796e-06, |
|
"eval_runtime": 0.255, |
|
"eval_samples_per_second": 588.143, |
|
"eval_steps_per_second": 11.763, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 35.806451612903224, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.583984264172614e-06, |
|
"eval_runtime": 0.2548, |
|
"eval_samples_per_second": 588.693, |
|
"eval_steps_per_second": 11.774, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 36.04838709677419, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.560936304187635e-06, |
|
"eval_runtime": 0.2541, |
|
"eval_samples_per_second": 590.228, |
|
"eval_steps_per_second": 11.805, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 36.29032258064516, |
|
"grad_norm": 0.00010865663352888077, |
|
"learning_rate": 9.854251543613312e-07, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 36.29032258064516, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.545041974779451e-06, |
|
"eval_runtime": 0.2628, |
|
"eval_samples_per_second": 570.734, |
|
"eval_steps_per_second": 11.415, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 36.53225806451613, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.527558303379919e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.354, |
|
"eval_steps_per_second": 11.747, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 36.774193548387096, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.5124579628463835e-06, |
|
"eval_runtime": 0.2575, |
|
"eval_samples_per_second": 582.554, |
|
"eval_steps_per_second": 11.651, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 37.016129032258064, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.502126103034243e-06, |
|
"eval_runtime": 0.2542, |
|
"eval_samples_per_second": 590.074, |
|
"eval_steps_per_second": 11.801, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 37.25806451612903, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.493384949455503e-06, |
|
"eval_runtime": 0.2554, |
|
"eval_samples_per_second": 587.314, |
|
"eval_steps_per_second": 11.746, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.461595381144434e-06, |
|
"eval_runtime": 0.2547, |
|
"eval_samples_per_second": 588.917, |
|
"eval_steps_per_second": 11.778, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 37.74193548387097, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.452854227565695e-06, |
|
"eval_runtime": 0.2549, |
|
"eval_samples_per_second": 588.51, |
|
"eval_steps_per_second": 11.77, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 37.983870967741936, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.44729084847495e-06, |
|
"eval_runtime": 0.2567, |
|
"eval_samples_per_second": 584.253, |
|
"eval_steps_per_second": 11.685, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 38.225806451612904, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.4433177208702546e-06, |
|
"eval_runtime": 0.2562, |
|
"eval_samples_per_second": 585.502, |
|
"eval_steps_per_second": 11.71, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 38.306451612903224, |
|
"grad_norm": 0.00010896463209064677, |
|
"learning_rate": 2.0652878966122916e-07, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 38.46774193548387, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.434575657796813e-06, |
|
"eval_runtime": 0.2594, |
|
"eval_samples_per_second": 578.204, |
|
"eval_steps_per_second": 11.564, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 38.70967741935484, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.4282178350840695e-06, |
|
"eval_runtime": 0.2571, |
|
"eval_samples_per_second": 583.366, |
|
"eval_steps_per_second": 11.667, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 38.95161290322581, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.4266280383453704e-06, |
|
"eval_runtime": 0.261, |
|
"eval_samples_per_second": 574.674, |
|
"eval_steps_per_second": 11.493, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 39.193548387096776, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.425038696354022e-06, |
|
"eval_runtime": 0.2615, |
|
"eval_samples_per_second": 573.707, |
|
"eval_steps_per_second": 11.474, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 39.435483870967744, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.423449354362674e-06, |
|
"eval_runtime": 0.2582, |
|
"eval_samples_per_second": 580.933, |
|
"eval_steps_per_second": 11.619, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 39.67741935483871, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.4226544559933245e-06, |
|
"eval_runtime": 0.258, |
|
"eval_samples_per_second": 581.476, |
|
"eval_steps_per_second": 11.63, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 39.91935483870968, |
|
"eval_f1": 1.0, |
|
"eval_loss": 4.4226544559933245e-06, |
|
"eval_runtime": 0.257, |
|
"eval_samples_per_second": 583.764, |
|
"eval_steps_per_second": 11.675, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 9920, |
|
"total_flos": 1.041666020785152e+16, |
|
"train_loss": 0.03220151995762623, |
|
"train_runtime": 939.6593, |
|
"train_samples_per_second": 168.529, |
|
"train_steps_per_second": 10.557 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9920, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 1200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.041666020785152e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|