{ "best_metric": 10.333786964416504, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.11312217194570136, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005656108597285068, "grad_norm": 0.062021128833293915, "learning_rate": 1.0100000000000002e-05, "loss": 10.38, "step": 1 }, { "epoch": 0.0005656108597285068, "eval_loss": 10.380452156066895, "eval_runtime": 8.0865, "eval_samples_per_second": 2945.633, "eval_steps_per_second": 92.128, "step": 1 }, { "epoch": 0.0011312217194570137, "grad_norm": 0.053750570863485336, "learning_rate": 2.0200000000000003e-05, "loss": 10.3795, "step": 2 }, { "epoch": 0.0016968325791855204, "grad_norm": 0.05637124180793762, "learning_rate": 3.0299999999999998e-05, "loss": 10.3798, "step": 3 }, { "epoch": 0.0022624434389140274, "grad_norm": 0.05955745279788971, "learning_rate": 4.0400000000000006e-05, "loss": 10.3783, "step": 4 }, { "epoch": 0.002828054298642534, "grad_norm": 0.05484645813703537, "learning_rate": 5.05e-05, "loss": 10.3809, "step": 5 }, { "epoch": 0.003393665158371041, "grad_norm": 0.0704905167222023, "learning_rate": 6.0599999999999996e-05, "loss": 10.3812, "step": 6 }, { "epoch": 0.003959276018099547, "grad_norm": 0.06389003992080688, "learning_rate": 7.07e-05, "loss": 10.3791, "step": 7 }, { "epoch": 0.004524886877828055, "grad_norm": 0.05992201343178749, "learning_rate": 8.080000000000001e-05, "loss": 10.3783, "step": 8 }, { "epoch": 0.005090497737556561, "grad_norm": 0.06138139218091965, "learning_rate": 9.09e-05, "loss": 10.3794, "step": 9 }, { "epoch": 0.005656108597285068, "grad_norm": 0.06030235439538956, "learning_rate": 0.000101, "loss": 10.3799, "step": 10 }, { "epoch": 0.006221719457013574, "grad_norm": 0.06504027545452118, "learning_rate": 0.00010046842105263158, "loss": 10.3793, "step": 11 }, { "epoch": 0.006787330316742082, "grad_norm": 0.0788484662771225, "learning_rate": 9.993684210526315e-05, "loss": 10.3778, "step": 12 }, { "epoch": 0.007352941176470588, "grad_norm": 0.06486255675554276, "learning_rate": 9.940526315789473e-05, "loss": 10.377, "step": 13 }, { "epoch": 0.007918552036199095, "grad_norm": 0.07012791931629181, "learning_rate": 9.887368421052632e-05, "loss": 10.3765, "step": 14 }, { "epoch": 0.008484162895927601, "grad_norm": 0.07867205142974854, "learning_rate": 9.83421052631579e-05, "loss": 10.3772, "step": 15 }, { "epoch": 0.00904977375565611, "grad_norm": 0.07887768745422363, "learning_rate": 9.781052631578948e-05, "loss": 10.3753, "step": 16 }, { "epoch": 0.009615384615384616, "grad_norm": 0.08439292013645172, "learning_rate": 9.727894736842106e-05, "loss": 10.3755, "step": 17 }, { "epoch": 0.010180995475113122, "grad_norm": 0.09982705861330032, "learning_rate": 9.674736842105263e-05, "loss": 10.3748, "step": 18 }, { "epoch": 0.010746606334841629, "grad_norm": 0.0821399837732315, "learning_rate": 9.621578947368421e-05, "loss": 10.3751, "step": 19 }, { "epoch": 0.011312217194570135, "grad_norm": 0.10993044823408127, "learning_rate": 9.568421052631578e-05, "loss": 10.3726, "step": 20 }, { "epoch": 0.011877828054298642, "grad_norm": 0.09375250339508057, "learning_rate": 9.515263157894737e-05, "loss": 10.3728, "step": 21 }, { "epoch": 0.012443438914027148, "grad_norm": 0.10467270761728287, "learning_rate": 9.462105263157895e-05, "loss": 10.3732, "step": 22 }, { "epoch": 0.013009049773755657, "grad_norm": 0.11246021836996078, "learning_rate": 9.408947368421054e-05, "loss": 10.3718, "step": 23 }, { "epoch": 0.013574660633484163, "grad_norm": 0.1300632804632187, "learning_rate": 9.355789473684211e-05, "loss": 10.3706, "step": 24 }, { "epoch": 0.01414027149321267, "grad_norm": 0.16326434910297394, "learning_rate": 9.302631578947369e-05, "loss": 10.3704, "step": 25 }, { "epoch": 0.014705882352941176, "grad_norm": 0.13459563255310059, "learning_rate": 9.249473684210526e-05, "loss": 10.3687, "step": 26 }, { "epoch": 0.015271493212669683, "grad_norm": 0.12868914008140564, "learning_rate": 9.196315789473685e-05, "loss": 10.3698, "step": 27 }, { "epoch": 0.01583710407239819, "grad_norm": 0.14536577463150024, "learning_rate": 9.143157894736843e-05, "loss": 10.3672, "step": 28 }, { "epoch": 0.016402714932126698, "grad_norm": 0.14456918835639954, "learning_rate": 9.09e-05, "loss": 10.3665, "step": 29 }, { "epoch": 0.016968325791855202, "grad_norm": 0.15334004163742065, "learning_rate": 9.036842105263158e-05, "loss": 10.3661, "step": 30 }, { "epoch": 0.01753393665158371, "grad_norm": 0.18508021533489227, "learning_rate": 8.983684210526316e-05, "loss": 10.3633, "step": 31 }, { "epoch": 0.01809954751131222, "grad_norm": 0.15807212889194489, "learning_rate": 8.930526315789474e-05, "loss": 10.3645, "step": 32 }, { "epoch": 0.018665158371040724, "grad_norm": 0.15088380873203278, "learning_rate": 8.877368421052632e-05, "loss": 10.3629, "step": 33 }, { "epoch": 0.019230769230769232, "grad_norm": 0.1459580808877945, "learning_rate": 8.82421052631579e-05, "loss": 10.3616, "step": 34 }, { "epoch": 0.019796380090497737, "grad_norm": 0.14841324090957642, "learning_rate": 8.771052631578948e-05, "loss": 10.3609, "step": 35 }, { "epoch": 0.020361990950226245, "grad_norm": 0.15354986488819122, "learning_rate": 8.717894736842105e-05, "loss": 10.3602, "step": 36 }, { "epoch": 0.02092760180995475, "grad_norm": 0.17225728929042816, "learning_rate": 8.664736842105263e-05, "loss": 10.3598, "step": 37 }, { "epoch": 0.021493212669683258, "grad_norm": 0.1513800323009491, "learning_rate": 8.61157894736842e-05, "loss": 10.3605, "step": 38 }, { "epoch": 0.022058823529411766, "grad_norm": 0.14970330893993378, "learning_rate": 8.55842105263158e-05, "loss": 10.3568, "step": 39 }, { "epoch": 0.02262443438914027, "grad_norm": 0.14152267575263977, "learning_rate": 8.505263157894737e-05, "loss": 10.3571, "step": 40 }, { "epoch": 0.02319004524886878, "grad_norm": 0.11801969259977341, "learning_rate": 8.452105263157896e-05, "loss": 10.3565, "step": 41 }, { "epoch": 0.023755656108597284, "grad_norm": 0.13193698227405548, "learning_rate": 8.398947368421053e-05, "loss": 10.3548, "step": 42 }, { "epoch": 0.024321266968325792, "grad_norm": 0.12654702365398407, "learning_rate": 8.345789473684211e-05, "loss": 10.3526, "step": 43 }, { "epoch": 0.024886877828054297, "grad_norm": 0.12153801321983337, "learning_rate": 8.292631578947368e-05, "loss": 10.3572, "step": 44 }, { "epoch": 0.025452488687782805, "grad_norm": 0.11495444178581238, "learning_rate": 8.239473684210526e-05, "loss": 10.3546, "step": 45 }, { "epoch": 0.026018099547511313, "grad_norm": 0.09821103513240814, "learning_rate": 8.186315789473683e-05, "loss": 10.3542, "step": 46 }, { "epoch": 0.026583710407239818, "grad_norm": 0.09399436414241791, "learning_rate": 8.133157894736842e-05, "loss": 10.3539, "step": 47 }, { "epoch": 0.027149321266968326, "grad_norm": 0.10562458634376526, "learning_rate": 8.080000000000001e-05, "loss": 10.3534, "step": 48 }, { "epoch": 0.02771493212669683, "grad_norm": 0.10220655053853989, "learning_rate": 8.026842105263159e-05, "loss": 10.3517, "step": 49 }, { "epoch": 0.02828054298642534, "grad_norm": 0.13333728909492493, "learning_rate": 7.973684210526316e-05, "loss": 10.3467, "step": 50 }, { "epoch": 0.02828054298642534, "eval_loss": 10.350386619567871, "eval_runtime": 8.1717, "eval_samples_per_second": 2914.943, "eval_steps_per_second": 91.168, "step": 50 }, { "epoch": 0.028846153846153848, "grad_norm": 0.12691977620124817, "learning_rate": 7.920526315789474e-05, "loss": 10.3513, "step": 51 }, { "epoch": 0.029411764705882353, "grad_norm": 0.08717770874500275, "learning_rate": 7.867368421052631e-05, "loss": 10.3522, "step": 52 }, { "epoch": 0.02997737556561086, "grad_norm": 0.06944411247968674, "learning_rate": 7.814210526315789e-05, "loss": 10.3525, "step": 53 }, { "epoch": 0.030542986425339366, "grad_norm": 0.07963120192289352, "learning_rate": 7.761052631578946e-05, "loss": 10.3511, "step": 54 }, { "epoch": 0.031108597285067874, "grad_norm": 0.0897977203130722, "learning_rate": 7.707894736842105e-05, "loss": 10.3484, "step": 55 }, { "epoch": 0.03167420814479638, "grad_norm": 0.08346674591302872, "learning_rate": 7.654736842105264e-05, "loss": 10.3466, "step": 56 }, { "epoch": 0.03223981900452489, "grad_norm": 0.11057797819375992, "learning_rate": 7.601578947368422e-05, "loss": 10.3507, "step": 57 }, { "epoch": 0.032805429864253395, "grad_norm": 0.07332270592451096, "learning_rate": 7.548421052631579e-05, "loss": 10.3516, "step": 58 }, { "epoch": 0.0333710407239819, "grad_norm": 0.05284840986132622, "learning_rate": 7.495263157894737e-05, "loss": 10.3498, "step": 59 }, { "epoch": 0.033936651583710405, "grad_norm": 0.06752610951662064, "learning_rate": 7.442105263157894e-05, "loss": 10.3497, "step": 60 }, { "epoch": 0.034502262443438916, "grad_norm": 0.06512323766946793, "learning_rate": 7.388947368421053e-05, "loss": 10.3478, "step": 61 }, { "epoch": 0.03506787330316742, "grad_norm": 0.0671728327870369, "learning_rate": 7.335789473684211e-05, "loss": 10.3461, "step": 62 }, { "epoch": 0.035633484162895926, "grad_norm": 0.07600655406713486, "learning_rate": 7.282631578947368e-05, "loss": 10.3527, "step": 63 }, { "epoch": 0.03619909502262444, "grad_norm": 0.0868535041809082, "learning_rate": 7.229473684210527e-05, "loss": 10.3483, "step": 64 }, { "epoch": 0.03676470588235294, "grad_norm": 0.05451541021466255, "learning_rate": 7.176315789473685e-05, "loss": 10.347, "step": 65 }, { "epoch": 0.03733031674208145, "grad_norm": 0.0499880388379097, "learning_rate": 7.123157894736842e-05, "loss": 10.348, "step": 66 }, { "epoch": 0.03789592760180995, "grad_norm": 0.056676704436540604, "learning_rate": 7.07e-05, "loss": 10.3474, "step": 67 }, { "epoch": 0.038461538461538464, "grad_norm": 0.06582105159759521, "learning_rate": 7.016842105263159e-05, "loss": 10.3454, "step": 68 }, { "epoch": 0.03902714932126697, "grad_norm": 0.07939665019512177, "learning_rate": 6.963684210526316e-05, "loss": 10.3508, "step": 69 }, { "epoch": 0.03959276018099547, "grad_norm": 0.07071398943662643, "learning_rate": 6.910526315789474e-05, "loss": 10.3478, "step": 70 }, { "epoch": 0.040158371040723985, "grad_norm": 0.05655914917588234, "learning_rate": 6.857368421052631e-05, "loss": 10.3488, "step": 71 }, { "epoch": 0.04072398190045249, "grad_norm": 0.04718935862183571, "learning_rate": 6.80421052631579e-05, "loss": 10.3471, "step": 72 }, { "epoch": 0.041289592760180995, "grad_norm": 0.05402664840221405, "learning_rate": 6.751052631578948e-05, "loss": 10.3492, "step": 73 }, { "epoch": 0.0418552036199095, "grad_norm": 0.06643565744161606, "learning_rate": 6.697894736842105e-05, "loss": 10.3456, "step": 74 }, { "epoch": 0.04242081447963801, "grad_norm": 0.07058984041213989, "learning_rate": 6.644736842105264e-05, "loss": 10.3428, "step": 75 }, { "epoch": 0.042986425339366516, "grad_norm": 0.09027786552906036, "learning_rate": 6.591578947368422e-05, "loss": 10.3491, "step": 76 }, { "epoch": 0.04355203619909502, "grad_norm": 0.047901950776576996, "learning_rate": 6.538421052631579e-05, "loss": 10.349, "step": 77 }, { "epoch": 0.04411764705882353, "grad_norm": 0.04141535609960556, "learning_rate": 6.485263157894737e-05, "loss": 10.3488, "step": 78 }, { "epoch": 0.04468325791855204, "grad_norm": 0.04282054677605629, "learning_rate": 6.432105263157894e-05, "loss": 10.3473, "step": 79 }, { "epoch": 0.04524886877828054, "grad_norm": 0.05405720695853233, "learning_rate": 6.378947368421053e-05, "loss": 10.3454, "step": 80 }, { "epoch": 0.04581447963800905, "grad_norm": 0.06005045026540756, "learning_rate": 6.32578947368421e-05, "loss": 10.3433, "step": 81 }, { "epoch": 0.04638009049773756, "grad_norm": 0.10236992686986923, "learning_rate": 6.27263157894737e-05, "loss": 10.35, "step": 82 }, { "epoch": 0.04694570135746606, "grad_norm": 0.07769302278757095, "learning_rate": 6.219473684210527e-05, "loss": 10.3463, "step": 83 }, { "epoch": 0.04751131221719457, "grad_norm": 0.03876901790499687, "learning_rate": 6.166315789473685e-05, "loss": 10.3472, "step": 84 }, { "epoch": 0.04807692307692308, "grad_norm": 0.03502112627029419, "learning_rate": 6.113157894736842e-05, "loss": 10.3474, "step": 85 }, { "epoch": 0.048642533936651584, "grad_norm": 0.06084531545639038, "learning_rate": 6.0599999999999996e-05, "loss": 10.3461, "step": 86 }, { "epoch": 0.04920814479638009, "grad_norm": 0.07850203663110733, "learning_rate": 6.006842105263158e-05, "loss": 10.3443, "step": 87 }, { "epoch": 0.049773755656108594, "grad_norm": 0.09611647576093674, "learning_rate": 5.953684210526315e-05, "loss": 10.3509, "step": 88 }, { "epoch": 0.050339366515837106, "grad_norm": 0.07283317297697067, "learning_rate": 5.900526315789474e-05, "loss": 10.3477, "step": 89 }, { "epoch": 0.05090497737556561, "grad_norm": 0.04070362076163292, "learning_rate": 5.847368421052632e-05, "loss": 10.3477, "step": 90 }, { "epoch": 0.051470588235294115, "grad_norm": 0.04328368604183197, "learning_rate": 5.79421052631579e-05, "loss": 10.3458, "step": 91 }, { "epoch": 0.05203619909502263, "grad_norm": 0.048710327595472336, "learning_rate": 5.7410526315789475e-05, "loss": 10.3459, "step": 92 }, { "epoch": 0.05260180995475113, "grad_norm": 0.07019612193107605, "learning_rate": 5.687894736842105e-05, "loss": 10.3449, "step": 93 }, { "epoch": 0.053167420814479636, "grad_norm": 0.07387306541204453, "learning_rate": 5.6347368421052625e-05, "loss": 10.3485, "step": 94 }, { "epoch": 0.05373303167420815, "grad_norm": 0.08147264271974564, "learning_rate": 5.5815789473684214e-05, "loss": 10.347, "step": 95 }, { "epoch": 0.05429864253393665, "grad_norm": 0.06585867702960968, "learning_rate": 5.5284210526315796e-05, "loss": 10.3463, "step": 96 }, { "epoch": 0.05486425339366516, "grad_norm": 0.046585794538259506, "learning_rate": 5.475263157894737e-05, "loss": 10.3451, "step": 97 }, { "epoch": 0.05542986425339366, "grad_norm": 0.05905357748270035, "learning_rate": 5.422105263157895e-05, "loss": 10.3458, "step": 98 }, { "epoch": 0.055995475113122174, "grad_norm": 0.06317008286714554, "learning_rate": 5.368947368421053e-05, "loss": 10.3458, "step": 99 }, { "epoch": 0.05656108597285068, "grad_norm": 0.08555221557617188, "learning_rate": 5.3157894736842104e-05, "loss": 10.3401, "step": 100 }, { "epoch": 0.05656108597285068, "eval_loss": 10.344589233398438, "eval_runtime": 8.2091, "eval_samples_per_second": 2901.663, "eval_steps_per_second": 90.753, "step": 100 }, { "epoch": 0.057126696832579184, "grad_norm": 0.09945505112409592, "learning_rate": 5.262631578947368e-05, "loss": 10.3451, "step": 101 }, { "epoch": 0.057692307692307696, "grad_norm": 0.04811963438987732, "learning_rate": 5.209473684210527e-05, "loss": 10.3452, "step": 102 }, { "epoch": 0.0582579185520362, "grad_norm": 0.043795328587293625, "learning_rate": 5.1563157894736844e-05, "loss": 10.3436, "step": 103 }, { "epoch": 0.058823529411764705, "grad_norm": 0.05168459936976433, "learning_rate": 5.1031578947368426e-05, "loss": 10.3442, "step": 104 }, { "epoch": 0.05938914027149321, "grad_norm": 0.06889228522777557, "learning_rate": 5.05e-05, "loss": 10.344, "step": 105 }, { "epoch": 0.05995475113122172, "grad_norm": 0.07568846642971039, "learning_rate": 4.9968421052631576e-05, "loss": 10.3411, "step": 106 }, { "epoch": 0.060520361990950226, "grad_norm": 0.09742461889982224, "learning_rate": 4.943684210526316e-05, "loss": 10.3466, "step": 107 }, { "epoch": 0.06108597285067873, "grad_norm": 0.08516088128089905, "learning_rate": 4.890526315789474e-05, "loss": 10.3427, "step": 108 }, { "epoch": 0.06165158371040724, "grad_norm": 0.05197165533900261, "learning_rate": 4.8373684210526316e-05, "loss": 10.3429, "step": 109 }, { "epoch": 0.06221719457013575, "grad_norm": 0.05537767708301544, "learning_rate": 4.784210526315789e-05, "loss": 10.3442, "step": 110 }, { "epoch": 0.06278280542986425, "grad_norm": 0.06518259644508362, "learning_rate": 4.731052631578947e-05, "loss": 10.3444, "step": 111 }, { "epoch": 0.06334841628959276, "grad_norm": 0.06740637868642807, "learning_rate": 4.6778947368421055e-05, "loss": 10.3408, "step": 112 }, { "epoch": 0.06391402714932126, "grad_norm": 0.09538576751947403, "learning_rate": 4.624736842105263e-05, "loss": 10.3463, "step": 113 }, { "epoch": 0.06447963800904978, "grad_norm": 0.08066005259752274, "learning_rate": 4.571578947368421e-05, "loss": 10.3457, "step": 114 }, { "epoch": 0.06504524886877829, "grad_norm": 0.06505700945854187, "learning_rate": 4.518421052631579e-05, "loss": 10.3432, "step": 115 }, { "epoch": 0.06561085972850679, "grad_norm": 0.05407899618148804, "learning_rate": 4.465263157894737e-05, "loss": 10.3417, "step": 116 }, { "epoch": 0.0661764705882353, "grad_norm": 0.06977938115596771, "learning_rate": 4.412105263157895e-05, "loss": 10.3432, "step": 117 }, { "epoch": 0.0667420814479638, "grad_norm": 0.07747713476419449, "learning_rate": 4.358947368421053e-05, "loss": 10.3408, "step": 118 }, { "epoch": 0.0673076923076923, "grad_norm": 0.09030125290155411, "learning_rate": 4.30578947368421e-05, "loss": 10.3474, "step": 119 }, { "epoch": 0.06787330316742081, "grad_norm": 0.08575378358364105, "learning_rate": 4.2526315789473685e-05, "loss": 10.3429, "step": 120 }, { "epoch": 0.06843891402714933, "grad_norm": 0.06714797765016556, "learning_rate": 4.199473684210527e-05, "loss": 10.3433, "step": 121 }, { "epoch": 0.06900452488687783, "grad_norm": 0.05677073448896408, "learning_rate": 4.146315789473684e-05, "loss": 10.3432, "step": 122 }, { "epoch": 0.06957013574660634, "grad_norm": 0.059628356248140335, "learning_rate": 4.093157894736842e-05, "loss": 10.3412, "step": 123 }, { "epoch": 0.07013574660633484, "grad_norm": 0.06992416828870773, "learning_rate": 4.0400000000000006e-05, "loss": 10.3418, "step": 124 }, { "epoch": 0.07070135746606335, "grad_norm": 0.09124021232128143, "learning_rate": 3.986842105263158e-05, "loss": 10.3367, "step": 125 }, { "epoch": 0.07126696832579185, "grad_norm": 0.10908302664756775, "learning_rate": 3.933684210526316e-05, "loss": 10.3426, "step": 126 }, { "epoch": 0.07183257918552036, "grad_norm": 0.08162763714790344, "learning_rate": 3.880526315789473e-05, "loss": 10.3418, "step": 127 }, { "epoch": 0.07239819004524888, "grad_norm": 0.05531831085681915, "learning_rate": 3.827368421052632e-05, "loss": 10.3389, "step": 128 }, { "epoch": 0.07296380090497738, "grad_norm": 0.06366059929132462, "learning_rate": 3.7742105263157896e-05, "loss": 10.338, "step": 129 }, { "epoch": 0.07352941176470588, "grad_norm": 0.06867145001888275, "learning_rate": 3.721052631578947e-05, "loss": 10.3417, "step": 130 }, { "epoch": 0.07409502262443439, "grad_norm": 0.10291559994220734, "learning_rate": 3.6678947368421054e-05, "loss": 10.3357, "step": 131 }, { "epoch": 0.0746606334841629, "grad_norm": 0.09882048517465591, "learning_rate": 3.6147368421052636e-05, "loss": 10.3433, "step": 132 }, { "epoch": 0.0752262443438914, "grad_norm": 0.07626540958881378, "learning_rate": 3.561578947368421e-05, "loss": 10.3397, "step": 133 }, { "epoch": 0.0757918552036199, "grad_norm": 0.04596826434135437, "learning_rate": 3.508421052631579e-05, "loss": 10.3394, "step": 134 }, { "epoch": 0.07635746606334842, "grad_norm": 0.0479527972638607, "learning_rate": 3.455263157894737e-05, "loss": 10.3388, "step": 135 }, { "epoch": 0.07692307692307693, "grad_norm": 0.07407166063785553, "learning_rate": 3.402105263157895e-05, "loss": 10.34, "step": 136 }, { "epoch": 0.07748868778280543, "grad_norm": 0.09090636670589447, "learning_rate": 3.3489473684210526e-05, "loss": 10.3363, "step": 137 }, { "epoch": 0.07805429864253394, "grad_norm": 0.08997251093387604, "learning_rate": 3.295789473684211e-05, "loss": 10.3424, "step": 138 }, { "epoch": 0.07861990950226244, "grad_norm": 0.07923514395952225, "learning_rate": 3.242631578947368e-05, "loss": 10.3398, "step": 139 }, { "epoch": 0.07918552036199095, "grad_norm": 0.07008980959653854, "learning_rate": 3.1894736842105265e-05, "loss": 10.3375, "step": 140 }, { "epoch": 0.07975113122171945, "grad_norm": 0.049700357019901276, "learning_rate": 3.136315789473685e-05, "loss": 10.3388, "step": 141 }, { "epoch": 0.08031674208144797, "grad_norm": 0.06060990318655968, "learning_rate": 3.083157894736842e-05, "loss": 10.3363, "step": 142 }, { "epoch": 0.08088235294117647, "grad_norm": 0.08178900182247162, "learning_rate": 3.0299999999999998e-05, "loss": 10.3372, "step": 143 }, { "epoch": 0.08144796380090498, "grad_norm": 0.09628574550151825, "learning_rate": 2.9768421052631577e-05, "loss": 10.3433, "step": 144 }, { "epoch": 0.08201357466063348, "grad_norm": 0.08116014301776886, "learning_rate": 2.923684210526316e-05, "loss": 10.3396, "step": 145 }, { "epoch": 0.08257918552036199, "grad_norm": 0.05855511128902435, "learning_rate": 2.8705263157894737e-05, "loss": 10.3388, "step": 146 }, { "epoch": 0.0831447963800905, "grad_norm": 0.045891135931015015, "learning_rate": 2.8173684210526313e-05, "loss": 10.3368, "step": 147 }, { "epoch": 0.083710407239819, "grad_norm": 0.06934593617916107, "learning_rate": 2.7642105263157898e-05, "loss": 10.3369, "step": 148 }, { "epoch": 0.08427601809954752, "grad_norm": 0.07505083084106445, "learning_rate": 2.7110526315789473e-05, "loss": 10.3353, "step": 149 }, { "epoch": 0.08484162895927602, "grad_norm": 0.09443920105695724, "learning_rate": 2.6578947368421052e-05, "loss": 10.3336, "step": 150 }, { "epoch": 0.08484162895927602, "eval_loss": 10.336673736572266, "eval_runtime": 8.6605, "eval_samples_per_second": 2750.409, "eval_steps_per_second": 86.022, "step": 150 }, { "epoch": 0.08540723981900453, "grad_norm": 0.10061606764793396, "learning_rate": 2.6047368421052634e-05, "loss": 10.3406, "step": 151 }, { "epoch": 0.08597285067873303, "grad_norm": 0.06028294190764427, "learning_rate": 2.5515789473684213e-05, "loss": 10.3384, "step": 152 }, { "epoch": 0.08653846153846154, "grad_norm": 0.0465410090982914, "learning_rate": 2.4984210526315788e-05, "loss": 10.3362, "step": 153 }, { "epoch": 0.08710407239819004, "grad_norm": 0.05667021870613098, "learning_rate": 2.445263157894737e-05, "loss": 10.3345, "step": 154 }, { "epoch": 0.08766968325791855, "grad_norm": 0.06780000030994415, "learning_rate": 2.3921052631578946e-05, "loss": 10.3364, "step": 155 }, { "epoch": 0.08823529411764706, "grad_norm": 0.09162377566099167, "learning_rate": 2.3389473684210528e-05, "loss": 10.3332, "step": 156 }, { "epoch": 0.08880090497737557, "grad_norm": 0.0900709331035614, "learning_rate": 2.2857894736842106e-05, "loss": 10.3398, "step": 157 }, { "epoch": 0.08936651583710407, "grad_norm": 0.06955718994140625, "learning_rate": 2.2326315789473685e-05, "loss": 10.338, "step": 158 }, { "epoch": 0.08993212669683258, "grad_norm": 0.0517783984541893, "learning_rate": 2.1794736842105264e-05, "loss": 10.3337, "step": 159 }, { "epoch": 0.09049773755656108, "grad_norm": 0.05297861248254776, "learning_rate": 2.1263157894736842e-05, "loss": 10.3348, "step": 160 }, { "epoch": 0.09106334841628959, "grad_norm": 0.08668467402458191, "learning_rate": 2.073157894736842e-05, "loss": 10.3373, "step": 161 }, { "epoch": 0.0916289592760181, "grad_norm": 0.08060705661773682, "learning_rate": 2.0200000000000003e-05, "loss": 10.3338, "step": 162 }, { "epoch": 0.09219457013574661, "grad_norm": 0.08586835861206055, "learning_rate": 1.966842105263158e-05, "loss": 10.3389, "step": 163 }, { "epoch": 0.09276018099547512, "grad_norm": 0.07375074923038483, "learning_rate": 1.913684210526316e-05, "loss": 10.3386, "step": 164 }, { "epoch": 0.09332579185520362, "grad_norm": 0.046294908970594406, "learning_rate": 1.8605263157894736e-05, "loss": 10.3356, "step": 165 }, { "epoch": 0.09389140271493213, "grad_norm": 0.04964763671159744, "learning_rate": 1.8073684210526318e-05, "loss": 10.3364, "step": 166 }, { "epoch": 0.09445701357466063, "grad_norm": 0.06984888017177582, "learning_rate": 1.7542105263157897e-05, "loss": 10.3365, "step": 167 }, { "epoch": 0.09502262443438914, "grad_norm": 0.07992294430732727, "learning_rate": 1.7010526315789475e-05, "loss": 10.3328, "step": 168 }, { "epoch": 0.09558823529411764, "grad_norm": 0.08656508475542068, "learning_rate": 1.6478947368421054e-05, "loss": 10.3398, "step": 169 }, { "epoch": 0.09615384615384616, "grad_norm": 0.07822012156248093, "learning_rate": 1.5947368421052633e-05, "loss": 10.3356, "step": 170 }, { "epoch": 0.09671945701357466, "grad_norm": 0.05577806010842323, "learning_rate": 1.541578947368421e-05, "loss": 10.3354, "step": 171 }, { "epoch": 0.09728506787330317, "grad_norm": 0.05001489445567131, "learning_rate": 1.4884210526315788e-05, "loss": 10.335, "step": 172 }, { "epoch": 0.09785067873303167, "grad_norm": 0.07180096209049225, "learning_rate": 1.4352631578947369e-05, "loss": 10.3366, "step": 173 }, { "epoch": 0.09841628959276018, "grad_norm": 0.08262878656387329, "learning_rate": 1.3821052631578949e-05, "loss": 10.3344, "step": 174 }, { "epoch": 0.09898190045248868, "grad_norm": 0.08866975456476212, "learning_rate": 1.3289473684210526e-05, "loss": 10.3304, "step": 175 }, { "epoch": 0.09954751131221719, "grad_norm": 0.08908353745937347, "learning_rate": 1.2757894736842106e-05, "loss": 10.3387, "step": 176 }, { "epoch": 0.1001131221719457, "grad_norm": 0.07283877581357956, "learning_rate": 1.2226315789473685e-05, "loss": 10.3373, "step": 177 }, { "epoch": 0.10067873303167421, "grad_norm": 0.04923555254936218, "learning_rate": 1.1694736842105264e-05, "loss": 10.3338, "step": 178 }, { "epoch": 0.10124434389140272, "grad_norm": 0.056405432522296906, "learning_rate": 1.1163157894736842e-05, "loss": 10.3338, "step": 179 }, { "epoch": 0.10180995475113122, "grad_norm": 0.08115344494581223, "learning_rate": 1.0631578947368421e-05, "loss": 10.3357, "step": 180 }, { "epoch": 0.10237556561085973, "grad_norm": 0.09056730568408966, "learning_rate": 1.0100000000000002e-05, "loss": 10.3305, "step": 181 }, { "epoch": 0.10294117647058823, "grad_norm": 0.0918162390589714, "learning_rate": 9.56842105263158e-06, "loss": 10.3387, "step": 182 }, { "epoch": 0.10350678733031674, "grad_norm": 0.06968989223241806, "learning_rate": 9.036842105263159e-06, "loss": 10.3346, "step": 183 }, { "epoch": 0.10407239819004525, "grad_norm": 0.04789090156555176, "learning_rate": 8.505263157894738e-06, "loss": 10.3325, "step": 184 }, { "epoch": 0.10463800904977376, "grad_norm": 0.05091318488121033, "learning_rate": 7.973684210526316e-06, "loss": 10.3329, "step": 185 }, { "epoch": 0.10520361990950226, "grad_norm": 0.0671328529715538, "learning_rate": 7.442105263157894e-06, "loss": 10.334, "step": 186 }, { "epoch": 0.10576923076923077, "grad_norm": 0.08944947272539139, "learning_rate": 6.9105263157894745e-06, "loss": 10.3309, "step": 187 }, { "epoch": 0.10633484162895927, "grad_norm": 0.09175018966197968, "learning_rate": 6.378947368421053e-06, "loss": 10.3368, "step": 188 }, { "epoch": 0.10690045248868778, "grad_norm": 0.07989748567342758, "learning_rate": 5.847368421052632e-06, "loss": 10.3345, "step": 189 }, { "epoch": 0.1074660633484163, "grad_norm": 0.05122963339090347, "learning_rate": 5.315789473684211e-06, "loss": 10.3373, "step": 190 }, { "epoch": 0.1080316742081448, "grad_norm": 0.04192551225423813, "learning_rate": 4.78421052631579e-06, "loss": 10.3345, "step": 191 }, { "epoch": 0.1085972850678733, "grad_norm": 0.06257504224777222, "learning_rate": 4.252631578947369e-06, "loss": 10.3357, "step": 192 }, { "epoch": 0.10916289592760181, "grad_norm": 0.07879539579153061, "learning_rate": 3.721052631578947e-06, "loss": 10.3345, "step": 193 }, { "epoch": 0.10972850678733032, "grad_norm": 0.09828386455774307, "learning_rate": 3.1894736842105266e-06, "loss": 10.3395, "step": 194 }, { "epoch": 0.11029411764705882, "grad_norm": 0.0789114385843277, "learning_rate": 2.6578947368421053e-06, "loss": 10.3362, "step": 195 }, { "epoch": 0.11085972850678733, "grad_norm": 0.05153012275695801, "learning_rate": 2.1263157894736844e-06, "loss": 10.3363, "step": 196 }, { "epoch": 0.11142533936651584, "grad_norm": 0.04866836592555046, "learning_rate": 1.5947368421052633e-06, "loss": 10.333, "step": 197 }, { "epoch": 0.11199095022624435, "grad_norm": 0.052815958857536316, "learning_rate": 1.0631578947368422e-06, "loss": 10.335, "step": 198 }, { "epoch": 0.11255656108597285, "grad_norm": 0.07986342906951904, "learning_rate": 5.315789473684211e-07, "loss": 10.3327, "step": 199 }, { "epoch": 0.11312217194570136, "grad_norm": 0.10749009251594543, "learning_rate": 0.0, "loss": 10.3305, "step": 200 }, { "epoch": 0.11312217194570136, "eval_loss": 10.333786964416504, "eval_runtime": 8.2124, "eval_samples_per_second": 2900.488, "eval_steps_per_second": 90.716, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 187990750789632.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }