|
{ |
|
"best_metric": 10.333786964416504, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.11312217194570136, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005656108597285068, |
|
"grad_norm": 0.062021128833293915, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 10.38, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005656108597285068, |
|
"eval_loss": 10.380452156066895, |
|
"eval_runtime": 8.0865, |
|
"eval_samples_per_second": 2945.633, |
|
"eval_steps_per_second": 92.128, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011312217194570137, |
|
"grad_norm": 0.053750570863485336, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 10.3795, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0016968325791855204, |
|
"grad_norm": 0.05637124180793762, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 10.3798, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0022624434389140274, |
|
"grad_norm": 0.05955745279788971, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 10.3783, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002828054298642534, |
|
"grad_norm": 0.05484645813703537, |
|
"learning_rate": 5.05e-05, |
|
"loss": 10.3809, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003393665158371041, |
|
"grad_norm": 0.0704905167222023, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 10.3812, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.003959276018099547, |
|
"grad_norm": 0.06389003992080688, |
|
"learning_rate": 7.07e-05, |
|
"loss": 10.3791, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004524886877828055, |
|
"grad_norm": 0.05992201343178749, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 10.3783, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005090497737556561, |
|
"grad_norm": 0.06138139218091965, |
|
"learning_rate": 9.09e-05, |
|
"loss": 10.3794, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005656108597285068, |
|
"grad_norm": 0.06030235439538956, |
|
"learning_rate": 0.000101, |
|
"loss": 10.3799, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006221719457013574, |
|
"grad_norm": 0.06504027545452118, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 10.3793, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.006787330316742082, |
|
"grad_norm": 0.0788484662771225, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 10.3778, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007352941176470588, |
|
"grad_norm": 0.06486255675554276, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 10.377, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.007918552036199095, |
|
"grad_norm": 0.07012791931629181, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 10.3765, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008484162895927601, |
|
"grad_norm": 0.07867205142974854, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 10.3772, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00904977375565611, |
|
"grad_norm": 0.07887768745422363, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 10.3753, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.009615384615384616, |
|
"grad_norm": 0.08439292013645172, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 10.3755, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.010180995475113122, |
|
"grad_norm": 0.09982705861330032, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 10.3748, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010746606334841629, |
|
"grad_norm": 0.0821399837732315, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 10.3751, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.011312217194570135, |
|
"grad_norm": 0.10993044823408127, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 10.3726, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011877828054298642, |
|
"grad_norm": 0.09375250339508057, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 10.3728, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.012443438914027148, |
|
"grad_norm": 0.10467270761728287, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 10.3732, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.013009049773755657, |
|
"grad_norm": 0.11246021836996078, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 10.3718, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.013574660633484163, |
|
"grad_norm": 0.1300632804632187, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 10.3706, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01414027149321267, |
|
"grad_norm": 0.16326434910297394, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 10.3704, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.014705882352941176, |
|
"grad_norm": 0.13459563255310059, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 10.3687, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.015271493212669683, |
|
"grad_norm": 0.12868914008140564, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 10.3698, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01583710407239819, |
|
"grad_norm": 0.14536577463150024, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 10.3672, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.016402714932126698, |
|
"grad_norm": 0.14456918835639954, |
|
"learning_rate": 9.09e-05, |
|
"loss": 10.3665, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.016968325791855202, |
|
"grad_norm": 0.15334004163742065, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 10.3661, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01753393665158371, |
|
"grad_norm": 0.18508021533489227, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 10.3633, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01809954751131222, |
|
"grad_norm": 0.15807212889194489, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 10.3645, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.018665158371040724, |
|
"grad_norm": 0.15088380873203278, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 10.3629, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.019230769230769232, |
|
"grad_norm": 0.1459580808877945, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 10.3616, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.019796380090497737, |
|
"grad_norm": 0.14841324090957642, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 10.3609, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.020361990950226245, |
|
"grad_norm": 0.15354986488819122, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 10.3602, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02092760180995475, |
|
"grad_norm": 0.17225728929042816, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 10.3598, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.021493212669683258, |
|
"grad_norm": 0.1513800323009491, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 10.3605, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.022058823529411766, |
|
"grad_norm": 0.14970330893993378, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 10.3568, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02262443438914027, |
|
"grad_norm": 0.14152267575263977, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 10.3571, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02319004524886878, |
|
"grad_norm": 0.11801969259977341, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 10.3565, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.023755656108597284, |
|
"grad_norm": 0.13193698227405548, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 10.3548, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.024321266968325792, |
|
"grad_norm": 0.12654702365398407, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 10.3526, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.024886877828054297, |
|
"grad_norm": 0.12153801321983337, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 10.3572, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.025452488687782805, |
|
"grad_norm": 0.11495444178581238, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 10.3546, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.026018099547511313, |
|
"grad_norm": 0.09821103513240814, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 10.3542, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.026583710407239818, |
|
"grad_norm": 0.09399436414241791, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 10.3539, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.027149321266968326, |
|
"grad_norm": 0.10562458634376526, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 10.3534, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02771493212669683, |
|
"grad_norm": 0.10220655053853989, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 10.3517, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02828054298642534, |
|
"grad_norm": 0.13333728909492493, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 10.3467, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02828054298642534, |
|
"eval_loss": 10.350386619567871, |
|
"eval_runtime": 8.1717, |
|
"eval_samples_per_second": 2914.943, |
|
"eval_steps_per_second": 91.168, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.028846153846153848, |
|
"grad_norm": 0.12691977620124817, |
|
"learning_rate": 7.920526315789474e-05, |
|
"loss": 10.3513, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.029411764705882353, |
|
"grad_norm": 0.08717770874500275, |
|
"learning_rate": 7.867368421052631e-05, |
|
"loss": 10.3522, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02997737556561086, |
|
"grad_norm": 0.06944411247968674, |
|
"learning_rate": 7.814210526315789e-05, |
|
"loss": 10.3525, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.030542986425339366, |
|
"grad_norm": 0.07963120192289352, |
|
"learning_rate": 7.761052631578946e-05, |
|
"loss": 10.3511, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.031108597285067874, |
|
"grad_norm": 0.0897977203130722, |
|
"learning_rate": 7.707894736842105e-05, |
|
"loss": 10.3484, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03167420814479638, |
|
"grad_norm": 0.08346674591302872, |
|
"learning_rate": 7.654736842105264e-05, |
|
"loss": 10.3466, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03223981900452489, |
|
"grad_norm": 0.11057797819375992, |
|
"learning_rate": 7.601578947368422e-05, |
|
"loss": 10.3507, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.032805429864253395, |
|
"grad_norm": 0.07332270592451096, |
|
"learning_rate": 7.548421052631579e-05, |
|
"loss": 10.3516, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0333710407239819, |
|
"grad_norm": 0.05284840986132622, |
|
"learning_rate": 7.495263157894737e-05, |
|
"loss": 10.3498, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.033936651583710405, |
|
"grad_norm": 0.06752610951662064, |
|
"learning_rate": 7.442105263157894e-05, |
|
"loss": 10.3497, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.034502262443438916, |
|
"grad_norm": 0.06512323766946793, |
|
"learning_rate": 7.388947368421053e-05, |
|
"loss": 10.3478, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03506787330316742, |
|
"grad_norm": 0.0671728327870369, |
|
"learning_rate": 7.335789473684211e-05, |
|
"loss": 10.3461, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.035633484162895926, |
|
"grad_norm": 0.07600655406713486, |
|
"learning_rate": 7.282631578947368e-05, |
|
"loss": 10.3527, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03619909502262444, |
|
"grad_norm": 0.0868535041809082, |
|
"learning_rate": 7.229473684210527e-05, |
|
"loss": 10.3483, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03676470588235294, |
|
"grad_norm": 0.05451541021466255, |
|
"learning_rate": 7.176315789473685e-05, |
|
"loss": 10.347, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03733031674208145, |
|
"grad_norm": 0.0499880388379097, |
|
"learning_rate": 7.123157894736842e-05, |
|
"loss": 10.348, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03789592760180995, |
|
"grad_norm": 0.056676704436540604, |
|
"learning_rate": 7.07e-05, |
|
"loss": 10.3474, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 0.06582105159759521, |
|
"learning_rate": 7.016842105263159e-05, |
|
"loss": 10.3454, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03902714932126697, |
|
"grad_norm": 0.07939665019512177, |
|
"learning_rate": 6.963684210526316e-05, |
|
"loss": 10.3508, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.03959276018099547, |
|
"grad_norm": 0.07071398943662643, |
|
"learning_rate": 6.910526315789474e-05, |
|
"loss": 10.3478, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.040158371040723985, |
|
"grad_norm": 0.05655914917588234, |
|
"learning_rate": 6.857368421052631e-05, |
|
"loss": 10.3488, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04072398190045249, |
|
"grad_norm": 0.04718935862183571, |
|
"learning_rate": 6.80421052631579e-05, |
|
"loss": 10.3471, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.041289592760180995, |
|
"grad_norm": 0.05402664840221405, |
|
"learning_rate": 6.751052631578948e-05, |
|
"loss": 10.3492, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0418552036199095, |
|
"grad_norm": 0.06643565744161606, |
|
"learning_rate": 6.697894736842105e-05, |
|
"loss": 10.3456, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04242081447963801, |
|
"grad_norm": 0.07058984041213989, |
|
"learning_rate": 6.644736842105264e-05, |
|
"loss": 10.3428, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.042986425339366516, |
|
"grad_norm": 0.09027786552906036, |
|
"learning_rate": 6.591578947368422e-05, |
|
"loss": 10.3491, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04355203619909502, |
|
"grad_norm": 0.047901950776576996, |
|
"learning_rate": 6.538421052631579e-05, |
|
"loss": 10.349, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.04411764705882353, |
|
"grad_norm": 0.04141535609960556, |
|
"learning_rate": 6.485263157894737e-05, |
|
"loss": 10.3488, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04468325791855204, |
|
"grad_norm": 0.04282054677605629, |
|
"learning_rate": 6.432105263157894e-05, |
|
"loss": 10.3473, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04524886877828054, |
|
"grad_norm": 0.05405720695853233, |
|
"learning_rate": 6.378947368421053e-05, |
|
"loss": 10.3454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04581447963800905, |
|
"grad_norm": 0.06005045026540756, |
|
"learning_rate": 6.32578947368421e-05, |
|
"loss": 10.3433, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.04638009049773756, |
|
"grad_norm": 0.10236992686986923, |
|
"learning_rate": 6.27263157894737e-05, |
|
"loss": 10.35, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04694570135746606, |
|
"grad_norm": 0.07769302278757095, |
|
"learning_rate": 6.219473684210527e-05, |
|
"loss": 10.3463, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.04751131221719457, |
|
"grad_norm": 0.03876901790499687, |
|
"learning_rate": 6.166315789473685e-05, |
|
"loss": 10.3472, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04807692307692308, |
|
"grad_norm": 0.03502112627029419, |
|
"learning_rate": 6.113157894736842e-05, |
|
"loss": 10.3474, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.048642533936651584, |
|
"grad_norm": 0.06084531545639038, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 10.3461, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04920814479638009, |
|
"grad_norm": 0.07850203663110733, |
|
"learning_rate": 6.006842105263158e-05, |
|
"loss": 10.3443, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.049773755656108594, |
|
"grad_norm": 0.09611647576093674, |
|
"learning_rate": 5.953684210526315e-05, |
|
"loss": 10.3509, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.050339366515837106, |
|
"grad_norm": 0.07283317297697067, |
|
"learning_rate": 5.900526315789474e-05, |
|
"loss": 10.3477, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.05090497737556561, |
|
"grad_norm": 0.04070362076163292, |
|
"learning_rate": 5.847368421052632e-05, |
|
"loss": 10.3477, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.051470588235294115, |
|
"grad_norm": 0.04328368604183197, |
|
"learning_rate": 5.79421052631579e-05, |
|
"loss": 10.3458, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.05203619909502263, |
|
"grad_norm": 0.048710327595472336, |
|
"learning_rate": 5.7410526315789475e-05, |
|
"loss": 10.3459, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05260180995475113, |
|
"grad_norm": 0.07019612193107605, |
|
"learning_rate": 5.687894736842105e-05, |
|
"loss": 10.3449, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.053167420814479636, |
|
"grad_norm": 0.07387306541204453, |
|
"learning_rate": 5.6347368421052625e-05, |
|
"loss": 10.3485, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.05373303167420815, |
|
"grad_norm": 0.08147264271974564, |
|
"learning_rate": 5.5815789473684214e-05, |
|
"loss": 10.347, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05429864253393665, |
|
"grad_norm": 0.06585867702960968, |
|
"learning_rate": 5.5284210526315796e-05, |
|
"loss": 10.3463, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.05486425339366516, |
|
"grad_norm": 0.046585794538259506, |
|
"learning_rate": 5.475263157894737e-05, |
|
"loss": 10.3451, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.05542986425339366, |
|
"grad_norm": 0.05905357748270035, |
|
"learning_rate": 5.422105263157895e-05, |
|
"loss": 10.3458, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.055995475113122174, |
|
"grad_norm": 0.06317008286714554, |
|
"learning_rate": 5.368947368421053e-05, |
|
"loss": 10.3458, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.05656108597285068, |
|
"grad_norm": 0.08555221557617188, |
|
"learning_rate": 5.3157894736842104e-05, |
|
"loss": 10.3401, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05656108597285068, |
|
"eval_loss": 10.344589233398438, |
|
"eval_runtime": 8.2091, |
|
"eval_samples_per_second": 2901.663, |
|
"eval_steps_per_second": 90.753, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.057126696832579184, |
|
"grad_norm": 0.09945505112409592, |
|
"learning_rate": 5.262631578947368e-05, |
|
"loss": 10.3451, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.057692307692307696, |
|
"grad_norm": 0.04811963438987732, |
|
"learning_rate": 5.209473684210527e-05, |
|
"loss": 10.3452, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0582579185520362, |
|
"grad_norm": 0.043795328587293625, |
|
"learning_rate": 5.1563157894736844e-05, |
|
"loss": 10.3436, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 0.05168459936976433, |
|
"learning_rate": 5.1031578947368426e-05, |
|
"loss": 10.3442, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05938914027149321, |
|
"grad_norm": 0.06889228522777557, |
|
"learning_rate": 5.05e-05, |
|
"loss": 10.344, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05995475113122172, |
|
"grad_norm": 0.07568846642971039, |
|
"learning_rate": 4.9968421052631576e-05, |
|
"loss": 10.3411, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.060520361990950226, |
|
"grad_norm": 0.09742461889982224, |
|
"learning_rate": 4.943684210526316e-05, |
|
"loss": 10.3466, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.06108597285067873, |
|
"grad_norm": 0.08516088128089905, |
|
"learning_rate": 4.890526315789474e-05, |
|
"loss": 10.3427, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06165158371040724, |
|
"grad_norm": 0.05197165533900261, |
|
"learning_rate": 4.8373684210526316e-05, |
|
"loss": 10.3429, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.06221719457013575, |
|
"grad_norm": 0.05537767708301544, |
|
"learning_rate": 4.784210526315789e-05, |
|
"loss": 10.3442, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06278280542986425, |
|
"grad_norm": 0.06518259644508362, |
|
"learning_rate": 4.731052631578947e-05, |
|
"loss": 10.3444, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.06334841628959276, |
|
"grad_norm": 0.06740637868642807, |
|
"learning_rate": 4.6778947368421055e-05, |
|
"loss": 10.3408, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06391402714932126, |
|
"grad_norm": 0.09538576751947403, |
|
"learning_rate": 4.624736842105263e-05, |
|
"loss": 10.3463, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.06447963800904978, |
|
"grad_norm": 0.08066005259752274, |
|
"learning_rate": 4.571578947368421e-05, |
|
"loss": 10.3457, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06504524886877829, |
|
"grad_norm": 0.06505700945854187, |
|
"learning_rate": 4.518421052631579e-05, |
|
"loss": 10.3432, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06561085972850679, |
|
"grad_norm": 0.05407899618148804, |
|
"learning_rate": 4.465263157894737e-05, |
|
"loss": 10.3417, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0661764705882353, |
|
"grad_norm": 0.06977938115596771, |
|
"learning_rate": 4.412105263157895e-05, |
|
"loss": 10.3432, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0667420814479638, |
|
"grad_norm": 0.07747713476419449, |
|
"learning_rate": 4.358947368421053e-05, |
|
"loss": 10.3408, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0673076923076923, |
|
"grad_norm": 0.09030125290155411, |
|
"learning_rate": 4.30578947368421e-05, |
|
"loss": 10.3474, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.06787330316742081, |
|
"grad_norm": 0.08575378358364105, |
|
"learning_rate": 4.2526315789473685e-05, |
|
"loss": 10.3429, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06843891402714933, |
|
"grad_norm": 0.06714797765016556, |
|
"learning_rate": 4.199473684210527e-05, |
|
"loss": 10.3433, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.06900452488687783, |
|
"grad_norm": 0.05677073448896408, |
|
"learning_rate": 4.146315789473684e-05, |
|
"loss": 10.3432, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06957013574660634, |
|
"grad_norm": 0.059628356248140335, |
|
"learning_rate": 4.093157894736842e-05, |
|
"loss": 10.3412, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.07013574660633484, |
|
"grad_norm": 0.06992416828870773, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 10.3418, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07070135746606335, |
|
"grad_norm": 0.09124021232128143, |
|
"learning_rate": 3.986842105263158e-05, |
|
"loss": 10.3367, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07126696832579185, |
|
"grad_norm": 0.10908302664756775, |
|
"learning_rate": 3.933684210526316e-05, |
|
"loss": 10.3426, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07183257918552036, |
|
"grad_norm": 0.08162763714790344, |
|
"learning_rate": 3.880526315789473e-05, |
|
"loss": 10.3418, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.07239819004524888, |
|
"grad_norm": 0.05531831085681915, |
|
"learning_rate": 3.827368421052632e-05, |
|
"loss": 10.3389, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.07296380090497738, |
|
"grad_norm": 0.06366059929132462, |
|
"learning_rate": 3.7742105263157896e-05, |
|
"loss": 10.338, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.07352941176470588, |
|
"grad_norm": 0.06867145001888275, |
|
"learning_rate": 3.721052631578947e-05, |
|
"loss": 10.3417, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07409502262443439, |
|
"grad_norm": 0.10291559994220734, |
|
"learning_rate": 3.6678947368421054e-05, |
|
"loss": 10.3357, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0746606334841629, |
|
"grad_norm": 0.09882048517465591, |
|
"learning_rate": 3.6147368421052636e-05, |
|
"loss": 10.3433, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0752262443438914, |
|
"grad_norm": 0.07626540958881378, |
|
"learning_rate": 3.561578947368421e-05, |
|
"loss": 10.3397, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0757918552036199, |
|
"grad_norm": 0.04596826434135437, |
|
"learning_rate": 3.508421052631579e-05, |
|
"loss": 10.3394, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.07635746606334842, |
|
"grad_norm": 0.0479527972638607, |
|
"learning_rate": 3.455263157894737e-05, |
|
"loss": 10.3388, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 0.07407166063785553, |
|
"learning_rate": 3.402105263157895e-05, |
|
"loss": 10.34, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07748868778280543, |
|
"grad_norm": 0.09090636670589447, |
|
"learning_rate": 3.3489473684210526e-05, |
|
"loss": 10.3363, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.07805429864253394, |
|
"grad_norm": 0.08997251093387604, |
|
"learning_rate": 3.295789473684211e-05, |
|
"loss": 10.3424, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.07861990950226244, |
|
"grad_norm": 0.07923514395952225, |
|
"learning_rate": 3.242631578947368e-05, |
|
"loss": 10.3398, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.07918552036199095, |
|
"grad_norm": 0.07008980959653854, |
|
"learning_rate": 3.1894736842105265e-05, |
|
"loss": 10.3375, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07975113122171945, |
|
"grad_norm": 0.049700357019901276, |
|
"learning_rate": 3.136315789473685e-05, |
|
"loss": 10.3388, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.08031674208144797, |
|
"grad_norm": 0.06060990318655968, |
|
"learning_rate": 3.083157894736842e-05, |
|
"loss": 10.3363, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08088235294117647, |
|
"grad_norm": 0.08178900182247162, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 10.3372, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08144796380090498, |
|
"grad_norm": 0.09628574550151825, |
|
"learning_rate": 2.9768421052631577e-05, |
|
"loss": 10.3433, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08201357466063348, |
|
"grad_norm": 0.08116014301776886, |
|
"learning_rate": 2.923684210526316e-05, |
|
"loss": 10.3396, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08257918552036199, |
|
"grad_norm": 0.05855511128902435, |
|
"learning_rate": 2.8705263157894737e-05, |
|
"loss": 10.3388, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0831447963800905, |
|
"grad_norm": 0.045891135931015015, |
|
"learning_rate": 2.8173684210526313e-05, |
|
"loss": 10.3368, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.083710407239819, |
|
"grad_norm": 0.06934593617916107, |
|
"learning_rate": 2.7642105263157898e-05, |
|
"loss": 10.3369, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.08427601809954752, |
|
"grad_norm": 0.07505083084106445, |
|
"learning_rate": 2.7110526315789473e-05, |
|
"loss": 10.3353, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.08484162895927602, |
|
"grad_norm": 0.09443920105695724, |
|
"learning_rate": 2.6578947368421052e-05, |
|
"loss": 10.3336, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08484162895927602, |
|
"eval_loss": 10.336673736572266, |
|
"eval_runtime": 8.6605, |
|
"eval_samples_per_second": 2750.409, |
|
"eval_steps_per_second": 86.022, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08540723981900453, |
|
"grad_norm": 0.10061606764793396, |
|
"learning_rate": 2.6047368421052634e-05, |
|
"loss": 10.3406, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.08597285067873303, |
|
"grad_norm": 0.06028294190764427, |
|
"learning_rate": 2.5515789473684213e-05, |
|
"loss": 10.3384, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.08653846153846154, |
|
"grad_norm": 0.0465410090982914, |
|
"learning_rate": 2.4984210526315788e-05, |
|
"loss": 10.3362, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.08710407239819004, |
|
"grad_norm": 0.05667021870613098, |
|
"learning_rate": 2.445263157894737e-05, |
|
"loss": 10.3345, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.08766968325791855, |
|
"grad_norm": 0.06780000030994415, |
|
"learning_rate": 2.3921052631578946e-05, |
|
"loss": 10.3364, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08823529411764706, |
|
"grad_norm": 0.09162377566099167, |
|
"learning_rate": 2.3389473684210528e-05, |
|
"loss": 10.3332, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.08880090497737557, |
|
"grad_norm": 0.0900709331035614, |
|
"learning_rate": 2.2857894736842106e-05, |
|
"loss": 10.3398, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.08936651583710407, |
|
"grad_norm": 0.06955718994140625, |
|
"learning_rate": 2.2326315789473685e-05, |
|
"loss": 10.338, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.08993212669683258, |
|
"grad_norm": 0.0517783984541893, |
|
"learning_rate": 2.1794736842105264e-05, |
|
"loss": 10.3337, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.09049773755656108, |
|
"grad_norm": 0.05297861248254776, |
|
"learning_rate": 2.1263157894736842e-05, |
|
"loss": 10.3348, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09106334841628959, |
|
"grad_norm": 0.08668467402458191, |
|
"learning_rate": 2.073157894736842e-05, |
|
"loss": 10.3373, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.0916289592760181, |
|
"grad_norm": 0.08060705661773682, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 10.3338, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.09219457013574661, |
|
"grad_norm": 0.08586835861206055, |
|
"learning_rate": 1.966842105263158e-05, |
|
"loss": 10.3389, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09276018099547512, |
|
"grad_norm": 0.07375074923038483, |
|
"learning_rate": 1.913684210526316e-05, |
|
"loss": 10.3386, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.09332579185520362, |
|
"grad_norm": 0.046294908970594406, |
|
"learning_rate": 1.8605263157894736e-05, |
|
"loss": 10.3356, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09389140271493213, |
|
"grad_norm": 0.04964763671159744, |
|
"learning_rate": 1.8073684210526318e-05, |
|
"loss": 10.3364, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.09445701357466063, |
|
"grad_norm": 0.06984888017177582, |
|
"learning_rate": 1.7542105263157897e-05, |
|
"loss": 10.3365, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.09502262443438914, |
|
"grad_norm": 0.07992294430732727, |
|
"learning_rate": 1.7010526315789475e-05, |
|
"loss": 10.3328, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.09558823529411764, |
|
"grad_norm": 0.08656508475542068, |
|
"learning_rate": 1.6478947368421054e-05, |
|
"loss": 10.3398, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 0.07822012156248093, |
|
"learning_rate": 1.5947368421052633e-05, |
|
"loss": 10.3356, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09671945701357466, |
|
"grad_norm": 0.05577806010842323, |
|
"learning_rate": 1.541578947368421e-05, |
|
"loss": 10.3354, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.09728506787330317, |
|
"grad_norm": 0.05001489445567131, |
|
"learning_rate": 1.4884210526315788e-05, |
|
"loss": 10.335, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.09785067873303167, |
|
"grad_norm": 0.07180096209049225, |
|
"learning_rate": 1.4352631578947369e-05, |
|
"loss": 10.3366, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.09841628959276018, |
|
"grad_norm": 0.08262878656387329, |
|
"learning_rate": 1.3821052631578949e-05, |
|
"loss": 10.3344, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.09898190045248868, |
|
"grad_norm": 0.08866975456476212, |
|
"learning_rate": 1.3289473684210526e-05, |
|
"loss": 10.3304, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09954751131221719, |
|
"grad_norm": 0.08908353745937347, |
|
"learning_rate": 1.2757894736842106e-05, |
|
"loss": 10.3387, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1001131221719457, |
|
"grad_norm": 0.07283877581357956, |
|
"learning_rate": 1.2226315789473685e-05, |
|
"loss": 10.3373, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.10067873303167421, |
|
"grad_norm": 0.04923555254936218, |
|
"learning_rate": 1.1694736842105264e-05, |
|
"loss": 10.3338, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.10124434389140272, |
|
"grad_norm": 0.056405432522296906, |
|
"learning_rate": 1.1163157894736842e-05, |
|
"loss": 10.3338, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.10180995475113122, |
|
"grad_norm": 0.08115344494581223, |
|
"learning_rate": 1.0631578947368421e-05, |
|
"loss": 10.3357, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10237556561085973, |
|
"grad_norm": 0.09056730568408966, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 10.3305, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.10294117647058823, |
|
"grad_norm": 0.0918162390589714, |
|
"learning_rate": 9.56842105263158e-06, |
|
"loss": 10.3387, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.10350678733031674, |
|
"grad_norm": 0.06968989223241806, |
|
"learning_rate": 9.036842105263159e-06, |
|
"loss": 10.3346, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.10407239819004525, |
|
"grad_norm": 0.04789090156555176, |
|
"learning_rate": 8.505263157894738e-06, |
|
"loss": 10.3325, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.10463800904977376, |
|
"grad_norm": 0.05091318488121033, |
|
"learning_rate": 7.973684210526316e-06, |
|
"loss": 10.3329, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10520361990950226, |
|
"grad_norm": 0.0671328529715538, |
|
"learning_rate": 7.442105263157894e-06, |
|
"loss": 10.334, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.10576923076923077, |
|
"grad_norm": 0.08944947272539139, |
|
"learning_rate": 6.9105263157894745e-06, |
|
"loss": 10.3309, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.10633484162895927, |
|
"grad_norm": 0.09175018966197968, |
|
"learning_rate": 6.378947368421053e-06, |
|
"loss": 10.3368, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.10690045248868778, |
|
"grad_norm": 0.07989748567342758, |
|
"learning_rate": 5.847368421052632e-06, |
|
"loss": 10.3345, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1074660633484163, |
|
"grad_norm": 0.05122963339090347, |
|
"learning_rate": 5.315789473684211e-06, |
|
"loss": 10.3373, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1080316742081448, |
|
"grad_norm": 0.04192551225423813, |
|
"learning_rate": 4.78421052631579e-06, |
|
"loss": 10.3345, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1085972850678733, |
|
"grad_norm": 0.06257504224777222, |
|
"learning_rate": 4.252631578947369e-06, |
|
"loss": 10.3357, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.10916289592760181, |
|
"grad_norm": 0.07879539579153061, |
|
"learning_rate": 3.721052631578947e-06, |
|
"loss": 10.3345, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.10972850678733032, |
|
"grad_norm": 0.09828386455774307, |
|
"learning_rate": 3.1894736842105266e-06, |
|
"loss": 10.3395, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.11029411764705882, |
|
"grad_norm": 0.0789114385843277, |
|
"learning_rate": 2.6578947368421053e-06, |
|
"loss": 10.3362, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.11085972850678733, |
|
"grad_norm": 0.05153012275695801, |
|
"learning_rate": 2.1263157894736844e-06, |
|
"loss": 10.3363, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.11142533936651584, |
|
"grad_norm": 0.04866836592555046, |
|
"learning_rate": 1.5947368421052633e-06, |
|
"loss": 10.333, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.11199095022624435, |
|
"grad_norm": 0.052815958857536316, |
|
"learning_rate": 1.0631578947368422e-06, |
|
"loss": 10.335, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.11255656108597285, |
|
"grad_norm": 0.07986342906951904, |
|
"learning_rate": 5.315789473684211e-07, |
|
"loss": 10.3327, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.11312217194570136, |
|
"grad_norm": 0.10749009251594543, |
|
"learning_rate": 0.0, |
|
"loss": 10.3305, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11312217194570136, |
|
"eval_loss": 10.333786964416504, |
|
"eval_runtime": 8.2124, |
|
"eval_samples_per_second": 2900.488, |
|
"eval_steps_per_second": 90.716, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 187990750789632.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|