|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 200, |
|
"global_step": 491, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002036659877800407, |
|
"grad_norm": 42.73765709178892, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 4.2137, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010183299389002037, |
|
"grad_norm": 43.823126465212304, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 4.2378, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.020366598778004074, |
|
"grad_norm": 42.78292182490666, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 4.1532, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03054989816700611, |
|
"grad_norm": 26.956865206535298, |
|
"learning_rate": 6e-06, |
|
"loss": 3.7244, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04073319755600815, |
|
"grad_norm": 12.021358068486181, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 3.0576, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05091649694501019, |
|
"grad_norm": 10.364947442111331, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5815, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06109979633401222, |
|
"grad_norm": 4.530399924565578, |
|
"learning_rate": 1.2e-05, |
|
"loss": 2.2026, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07128309572301425, |
|
"grad_norm": 3.2260177605870783, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.9904, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0814663951120163, |
|
"grad_norm": 2.12005760250828, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.8236, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09164969450101833, |
|
"grad_norm": 1.8468068964995024, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.6897, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10183299389002037, |
|
"grad_norm": 1.5429747637898503, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6026, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1120162932790224, |
|
"grad_norm": 1.1896025193659396, |
|
"learning_rate": 1.9993657117550972e-05, |
|
"loss": 1.5431, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12219959266802444, |
|
"grad_norm": 1.0406079144844387, |
|
"learning_rate": 1.9974636516635436e-05, |
|
"loss": 1.4948, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13238289205702647, |
|
"grad_norm": 0.9826731697259301, |
|
"learning_rate": 1.994296232634054e-05, |
|
"loss": 1.4551, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1425661914460285, |
|
"grad_norm": 0.8350404902447894, |
|
"learning_rate": 1.9898674727799418e-05, |
|
"loss": 1.4548, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15274949083503056, |
|
"grad_norm": 0.8022790771564833, |
|
"learning_rate": 1.9841829903218377e-05, |
|
"loss": 1.45, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1629327902240326, |
|
"grad_norm": 0.8137274794499317, |
|
"learning_rate": 1.977249996460544e-05, |
|
"loss": 1.4206, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17311608961303462, |
|
"grad_norm": 0.6702892273400702, |
|
"learning_rate": 1.969077286229078e-05, |
|
"loss": 1.4068, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18329938900203666, |
|
"grad_norm": 0.6712397854919697, |
|
"learning_rate": 1.959675227335497e-05, |
|
"loss": 1.4027, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1934826883910387, |
|
"grad_norm": 0.6017420494268232, |
|
"learning_rate": 1.949055747010669e-05, |
|
"loss": 1.4036, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.20366598778004075, |
|
"grad_norm": 0.5946907262324206, |
|
"learning_rate": 1.937232316877668e-05, |
|
"loss": 1.4021, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21384928716904278, |
|
"grad_norm": 0.584049807273413, |
|
"learning_rate": 1.9242199358619897e-05, |
|
"loss": 1.3773, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2240325865580448, |
|
"grad_norm": 0.5468171068137498, |
|
"learning_rate": 1.9100351111642666e-05, |
|
"loss": 1.3824, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.23421588594704684, |
|
"grad_norm": 0.5631810496676463, |
|
"learning_rate": 1.894695837319623e-05, |
|
"loss": 1.3716, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.24439918533604887, |
|
"grad_norm": 0.5579171191759417, |
|
"learning_rate": 1.8782215733702286e-05, |
|
"loss": 1.3722, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2545824847250509, |
|
"grad_norm": 0.5126218869264142, |
|
"learning_rate": 1.8606332181800165e-05, |
|
"loss": 1.3523, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.26476578411405294, |
|
"grad_norm": 0.5007973223598261, |
|
"learning_rate": 1.841953083922875e-05, |
|
"loss": 1.3451, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.27494908350305497, |
|
"grad_norm": 0.5048172409656928, |
|
"learning_rate": 1.8222048677779495e-05, |
|
"loss": 1.3534, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.285132382892057, |
|
"grad_norm": 0.4725169108639936, |
|
"learning_rate": 1.8014136218679566e-05, |
|
"loss": 1.3473, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2953156822810591, |
|
"grad_norm": 0.48611682020845765, |
|
"learning_rate": 1.779605721478652e-05, |
|
"loss": 1.3373, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3054989816700611, |
|
"grad_norm": 0.45300491076813515, |
|
"learning_rate": 1.756808831599762e-05, |
|
"loss": 1.3513, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31568228105906315, |
|
"grad_norm": 0.4382778646436873, |
|
"learning_rate": 1.7330518718298263e-05, |
|
"loss": 1.3342, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3258655804480652, |
|
"grad_norm": 0.4427941208449195, |
|
"learning_rate": 1.7083649796894798e-05, |
|
"loss": 1.3162, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3360488798370672, |
|
"grad_norm": 0.416296697128989, |
|
"learning_rate": 1.6827794723896968e-05, |
|
"loss": 1.3468, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.34623217922606925, |
|
"grad_norm": 0.43327582549066207, |
|
"learning_rate": 1.6563278071035182e-05, |
|
"loss": 1.3413, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3564154786150713, |
|
"grad_norm": 0.39987863733708096, |
|
"learning_rate": 1.6290435397916426e-05, |
|
"loss": 1.3275, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3665987780040733, |
|
"grad_norm": 0.4107034128984309, |
|
"learning_rate": 1.6009612826341226e-05, |
|
"loss": 1.3165, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.37678207739307534, |
|
"grad_norm": 0.39958981244827124, |
|
"learning_rate": 1.5721166601221697e-05, |
|
"loss": 1.3238, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3869653767820774, |
|
"grad_norm": 0.40620805351711664, |
|
"learning_rate": 1.5425462638657597e-05, |
|
"loss": 1.3265, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3971486761710794, |
|
"grad_norm": 0.3930588952515658, |
|
"learning_rate": 1.5122876061743772e-05, |
|
"loss": 1.3346, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4073319755600815, |
|
"grad_norm": 0.3455411843725847, |
|
"learning_rate": 1.4813790724697832e-05, |
|
"loss": 1.2996, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4073319755600815, |
|
"eval_loss": 1.3280781507492065, |
|
"eval_runtime": 58.3849, |
|
"eval_samples_per_second": 238.195, |
|
"eval_steps_per_second": 3.734, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4175152749490835, |
|
"grad_norm": 0.38779728978307215, |
|
"learning_rate": 1.4498598725911693e-05, |
|
"loss": 1.3155, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.42769857433808556, |
|
"grad_norm": 0.34758190751014123, |
|
"learning_rate": 1.4177699910544793e-05, |
|
"loss": 1.3226, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4378818737270876, |
|
"grad_norm": 0.347052132116147, |
|
"learning_rate": 1.3851501363289907e-05, |
|
"loss": 1.3042, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4480651731160896, |
|
"grad_norm": 0.35329848914934636, |
|
"learning_rate": 1.3520416891955101e-05, |
|
"loss": 1.3198, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.45824847250509165, |
|
"grad_norm": 0.3689489097870586, |
|
"learning_rate": 1.3184866502516846e-05, |
|
"loss": 1.3295, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4684317718940937, |
|
"grad_norm": 0.3591427089290735, |
|
"learning_rate": 1.2845275866310325e-05, |
|
"loss": 1.3026, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4786150712830957, |
|
"grad_norm": 0.33192490822778736, |
|
"learning_rate": 1.2502075780032792e-05, |
|
"loss": 1.3211, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.48879837067209775, |
|
"grad_norm": 0.3314637232326215, |
|
"learning_rate": 1.2155701619244997e-05, |
|
"loss": 1.3146, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4989816700610998, |
|
"grad_norm": 0.33428121648270825, |
|
"learning_rate": 1.1806592786063991e-05, |
|
"loss": 1.2953, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5091649694501018, |
|
"grad_norm": 0.33140728066789105, |
|
"learning_rate": 1.1455192151747931e-05, |
|
"loss": 1.3083, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5193482688391039, |
|
"grad_norm": 0.34894619572153845, |
|
"learning_rate": 1.1101945494880013e-05, |
|
"loss": 1.3001, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5295315682281059, |
|
"grad_norm": 0.2936066479920224, |
|
"learning_rate": 1.0747300935864245e-05, |
|
"loss": 1.299, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.539714867617108, |
|
"grad_norm": 0.34761064821495535, |
|
"learning_rate": 1.0391708368450429e-05, |
|
"loss": 1.2987, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5498981670061099, |
|
"grad_norm": 0.32443077572814916, |
|
"learning_rate": 1.0035618889009535e-05, |
|
"loss": 1.3154, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.560081466395112, |
|
"grad_norm": 0.3573334196924316, |
|
"learning_rate": 9.67948422428345e-06, |
|
"loss": 1.3045, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.570264765784114, |
|
"grad_norm": 0.2929859629265363, |
|
"learning_rate": 9.323756158335054e-06, |
|
"loss": 1.2995, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5804480651731161, |
|
"grad_norm": 0.3725484551984738, |
|
"learning_rate": 8.968885959425567e-06, |
|
"loss": 1.3006, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5906313645621182, |
|
"grad_norm": 0.2977272122499357, |
|
"learning_rate": 8.615323807546258e-06, |
|
"loss": 1.2785, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6008146639511202, |
|
"grad_norm": 0.2934843742212814, |
|
"learning_rate": 8.263518223330698e-06, |
|
"loss": 1.301, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6109979633401222, |
|
"grad_norm": 0.2851617487482263, |
|
"learning_rate": 7.913915499071994e-06, |
|
"loss": 1.2957, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6211812627291242, |
|
"grad_norm": 0.3280712460992494, |
|
"learning_rate": 7.566959132566914e-06, |
|
"loss": 1.3021, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6313645621181263, |
|
"grad_norm": 0.28660010388812523, |
|
"learning_rate": 7.223089264505001e-06, |
|
"loss": 1.2809, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6415478615071283, |
|
"grad_norm": 0.29055149959882093, |
|
"learning_rate": 6.882742120116419e-06, |
|
"loss": 1.2958, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6517311608961304, |
|
"grad_norm": 0.2868148780704892, |
|
"learning_rate": 6.546349455786926e-06, |
|
"loss": 1.2931, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6619144602851323, |
|
"grad_norm": 0.3021008995725944, |
|
"learning_rate": 6.214338011341825e-06, |
|
"loss": 1.2855, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6720977596741344, |
|
"grad_norm": 0.2840488188778215, |
|
"learning_rate": 5.887128968693887e-06, |
|
"loss": 1.3091, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6822810590631364, |
|
"grad_norm": 0.2978772611216691, |
|
"learning_rate": 5.565137417541866e-06, |
|
"loss": 1.2826, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6924643584521385, |
|
"grad_norm": 0.28796550430490037, |
|
"learning_rate": 5.248771828797474e-06, |
|
"loss": 1.3008, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7026476578411406, |
|
"grad_norm": 0.2791511099905026, |
|
"learning_rate": 4.938433536408771e-06, |
|
"loss": 1.2848, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7128309572301426, |
|
"grad_norm": 0.282696156100298, |
|
"learning_rate": 4.634516228237372e-06, |
|
"loss": 1.291, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7230142566191446, |
|
"grad_norm": 0.27007483942462107, |
|
"learning_rate": 4.337405446635264e-06, |
|
"loss": 1.2821, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7331975560081466, |
|
"grad_norm": 0.2885536779707833, |
|
"learning_rate": 4.047478099354857e-06, |
|
"loss": 1.3193, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7433808553971487, |
|
"grad_norm": 0.3194820894463116, |
|
"learning_rate": 3.7651019814126656e-06, |
|
"loss": 1.2993, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7535641547861507, |
|
"grad_norm": 0.28463406586484574, |
|
"learning_rate": 3.4906353085131917e-06, |
|
"loss": 1.297, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7637474541751528, |
|
"grad_norm": 0.2887353591273086, |
|
"learning_rate": 3.224426262624908e-06, |
|
"loss": 1.2887, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7739307535641547, |
|
"grad_norm": 0.28334983310000655, |
|
"learning_rate": 2.9668125502848035e-06, |
|
"loss": 1.2951, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7841140529531568, |
|
"grad_norm": 0.25257521474138683, |
|
"learning_rate": 2.7181209741918093e-06, |
|
"loss": 1.2937, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7942973523421588, |
|
"grad_norm": 0.2871884940474984, |
|
"learning_rate": 2.478667018632562e-06, |
|
"loss": 1.2858, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8044806517311609, |
|
"grad_norm": 0.25758574412430923, |
|
"learning_rate": 2.2487544492654832e-06, |
|
"loss": 1.3027, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.814663951120163, |
|
"grad_norm": 0.2840318008095242, |
|
"learning_rate": 2.0286749277707783e-06, |
|
"loss": 1.2961, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.814663951120163, |
|
"eval_loss": 1.297537922859192, |
|
"eval_runtime": 58.3016, |
|
"eval_samples_per_second": 238.536, |
|
"eval_steps_per_second": 3.739, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.824847250509165, |
|
"grad_norm": 0.3101944505956513, |
|
"learning_rate": 1.8187076418552974e-06, |
|
"loss": 1.3014, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.835030549898167, |
|
"grad_norm": 0.2791695231780548, |
|
"learning_rate": 1.6191189510815942e-06, |
|
"loss": 1.2931, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.845213849287169, |
|
"grad_norm": 0.28415893155261507, |
|
"learning_rate": 1.4301620489704072e-06, |
|
"loss": 1.2988, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8553971486761711, |
|
"grad_norm": 0.2822535369305096, |
|
"learning_rate": 1.2520766418053408e-06, |
|
"loss": 1.2905, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8655804480651731, |
|
"grad_norm": 0.28168468277135167, |
|
"learning_rate": 1.0850886445471055e-06, |
|
"loss": 1.2817, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8757637474541752, |
|
"grad_norm": 0.2655425105216882, |
|
"learning_rate": 9.294098942430996e-07, |
|
"loss": 1.2843, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8859470468431772, |
|
"grad_norm": 0.2741436677332839, |
|
"learning_rate": 7.852378812959227e-07, |
|
"loss": 1.2924, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8961303462321792, |
|
"grad_norm": 0.26895584158358093, |
|
"learning_rate": 6.527554989316898e-07, |
|
"loss": 1.2935, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9063136456211812, |
|
"grad_norm": 0.266002518787797, |
|
"learning_rate": 5.321308111859791e-07, |
|
"loss": 1.2857, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9164969450101833, |
|
"grad_norm": 0.2650609910106162, |
|
"learning_rate": 4.235168397017542e-07, |
|
"loss": 1.2664, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9266802443991853, |
|
"grad_norm": 0.2647562856371479, |
|
"learning_rate": 3.2705136960970554e-07, |
|
"loss": 1.2988, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9368635437881874, |
|
"grad_norm": 0.29894331322023837, |
|
"learning_rate": 2.4285677473727123e-07, |
|
"loss": 1.2744, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9470468431771895, |
|
"grad_norm": 0.2719080220558317, |
|
"learning_rate": 1.7103986236807312e-07, |
|
"loss": 1.2803, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9572301425661914, |
|
"grad_norm": 0.2791229463614563, |
|
"learning_rate": 1.1169173774871478e-07, |
|
"loss": 1.2906, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9674134419551935, |
|
"grad_norm": 0.2987546290973262, |
|
"learning_rate": 6.488768851480087e-08, |
|
"loss": 1.2884, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9775967413441955, |
|
"grad_norm": 0.30009885175708245, |
|
"learning_rate": 3.0687089182819264e-08, |
|
"loss": 1.3051, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9877800407331976, |
|
"grad_norm": 0.2873283513110056, |
|
"learning_rate": 9.13332582901716e-09, |
|
"loss": 1.286, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9979633401221996, |
|
"grad_norm": 0.272339006461075, |
|
"learning_rate": 2.5374105085518297e-10, |
|
"loss": 1.2873, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 491, |
|
"total_flos": 154920007237632.0, |
|
"train_loss": 1.4646558688760047, |
|
"train_runtime": 2127.1747, |
|
"train_samples_per_second": 59.07, |
|
"train_steps_per_second": 0.231 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 491, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 154920007237632.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|