{ "best_metric": 0.0018136479193344712, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.03170577045022194, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001585288522511097, "grad_norm": 12.169403076171875, "learning_rate": 1.013e-05, "loss": 4.2706, "step": 1 }, { "epoch": 0.0001585288522511097, "eval_loss": 4.04376220703125, "eval_runtime": 284.7456, "eval_samples_per_second": 9.328, "eval_steps_per_second": 2.332, "step": 1 }, { "epoch": 0.0003170577045022194, "grad_norm": 16.00709342956543, "learning_rate": 2.026e-05, "loss": 3.9804, "step": 2 }, { "epoch": 0.0004755865567533291, "grad_norm": 15.737310409545898, "learning_rate": 3.039e-05, "loss": 3.9673, "step": 3 }, { "epoch": 0.0006341154090044388, "grad_norm": 14.818962097167969, "learning_rate": 4.052e-05, "loss": 3.2126, "step": 4 }, { "epoch": 0.0007926442612555486, "grad_norm": 10.428650856018066, "learning_rate": 5.065e-05, "loss": 2.0077, "step": 5 }, { "epoch": 0.0009511731135066582, "grad_norm": 12.499900817871094, "learning_rate": 6.078e-05, "loss": 1.2136, "step": 6 }, { "epoch": 0.0011097019657577679, "grad_norm": 10.663622856140137, "learning_rate": 7.091e-05, "loss": 0.3359, "step": 7 }, { "epoch": 0.0012682308180088776, "grad_norm": 46.17512512207031, "learning_rate": 8.104e-05, "loss": 0.412, "step": 8 }, { "epoch": 0.0014267596702599874, "grad_norm": 1.8651701211929321, "learning_rate": 9.117e-05, "loss": 0.0131, "step": 9 }, { "epoch": 0.0015852885225110971, "grad_norm": 0.16528020799160004, "learning_rate": 0.0001013, "loss": 0.0008, "step": 10 }, { "epoch": 0.0017438173747622067, "grad_norm": 0.012765788473188877, "learning_rate": 0.00010076684210526316, "loss": 0.0001, "step": 11 }, { "epoch": 0.0019023462270133164, "grad_norm": 0.013106022030115128, "learning_rate": 0.0001002336842105263, "loss": 0.0001, "step": 12 }, { "epoch": 0.002060875079264426, "grad_norm": 0.024374201893806458, "learning_rate": 9.970052631578946e-05, "loss": 0.0001, "step": 13 }, { "epoch": 0.0022194039315155357, "grad_norm": 0.021921832114458084, "learning_rate": 9.916736842105263e-05, "loss": 0.0001, "step": 14 }, { "epoch": 0.0023779327837666455, "grad_norm": 0.04089897498488426, "learning_rate": 9.863421052631579e-05, "loss": 0.0001, "step": 15 }, { "epoch": 0.0025364616360177552, "grad_norm": 0.021697349846363068, "learning_rate": 9.810105263157895e-05, "loss": 0.0001, "step": 16 }, { "epoch": 0.002694990488268865, "grad_norm": 0.0019667658489197493, "learning_rate": 9.756789473684211e-05, "loss": 0.0, "step": 17 }, { "epoch": 0.0028535193405199747, "grad_norm": 0.0005555606330744922, "learning_rate": 9.703473684210525e-05, "loss": 0.0, "step": 18 }, { "epoch": 0.0030120481927710845, "grad_norm": 0.0005540283164009452, "learning_rate": 9.650157894736842e-05, "loss": 0.0, "step": 19 }, { "epoch": 0.0031705770450221942, "grad_norm": 0.0006892455858178437, "learning_rate": 9.596842105263158e-05, "loss": 0.0, "step": 20 }, { "epoch": 0.0033291058972733036, "grad_norm": 0.0022405856288969517, "learning_rate": 9.543526315789474e-05, "loss": 0.0, "step": 21 }, { "epoch": 0.0034876347495244133, "grad_norm": 0.0009618853800930083, "learning_rate": 9.49021052631579e-05, "loss": 0.0, "step": 22 }, { "epoch": 0.003646163601775523, "grad_norm": 0.0009560861508361995, "learning_rate": 9.436894736842105e-05, "loss": 0.0, "step": 23 }, { "epoch": 0.003804692454026633, "grad_norm": 0.0029522525146603584, "learning_rate": 9.38357894736842e-05, "loss": 0.0, "step": 24 }, { "epoch": 0.003963221306277742, "grad_norm": 0.0008082777494564652, "learning_rate": 9.330263157894737e-05, "loss": 0.0, "step": 25 }, { "epoch": 0.004121750158528852, "grad_norm": 0.0015600634505972266, "learning_rate": 9.276947368421051e-05, "loss": 0.0, "step": 26 }, { "epoch": 0.004280279010779962, "grad_norm": 0.0023957917001098394, "learning_rate": 9.223631578947369e-05, "loss": 0.0, "step": 27 }, { "epoch": 0.004438807863031071, "grad_norm": 0.0004076052864547819, "learning_rate": 9.170315789473684e-05, "loss": 0.0, "step": 28 }, { "epoch": 0.004597336715282181, "grad_norm": 0.00036426776205189526, "learning_rate": 9.117e-05, "loss": 0.0, "step": 29 }, { "epoch": 0.004755865567533291, "grad_norm": 0.0003226393018849194, "learning_rate": 9.063684210526316e-05, "loss": 0.0, "step": 30 }, { "epoch": 0.004914394419784401, "grad_norm": 0.0004357333527877927, "learning_rate": 9.010368421052632e-05, "loss": 0.0, "step": 31 }, { "epoch": 0.0050729232720355105, "grad_norm": 0.00029580152477137744, "learning_rate": 8.957052631578946e-05, "loss": 0.0, "step": 32 }, { "epoch": 0.00523145212428662, "grad_norm": 0.00045763631351292133, "learning_rate": 8.903736842105263e-05, "loss": 0.0, "step": 33 }, { "epoch": 0.00538998097653773, "grad_norm": 0.0003399694978725165, "learning_rate": 8.850421052631579e-05, "loss": 0.0, "step": 34 }, { "epoch": 0.00554850982878884, "grad_norm": 0.0003934537817258388, "learning_rate": 8.797105263157895e-05, "loss": 0.0, "step": 35 }, { "epoch": 0.0057070386810399495, "grad_norm": 0.0005533421644940972, "learning_rate": 8.743789473684211e-05, "loss": 0.0, "step": 36 }, { "epoch": 0.005865567533291059, "grad_norm": 0.002100046258419752, "learning_rate": 8.690473684210526e-05, "loss": 0.0, "step": 37 }, { "epoch": 0.006024096385542169, "grad_norm": 0.0006316175567917526, "learning_rate": 8.637157894736842e-05, "loss": 0.0, "step": 38 }, { "epoch": 0.006182625237793279, "grad_norm": 0.0004144099948462099, "learning_rate": 8.583842105263158e-05, "loss": 0.0, "step": 39 }, { "epoch": 0.0063411540900443885, "grad_norm": 0.002568572061136365, "learning_rate": 8.530526315789472e-05, "loss": 0.0, "step": 40 }, { "epoch": 0.006499682942295497, "grad_norm": 0.0009425367461517453, "learning_rate": 8.47721052631579e-05, "loss": 0.0, "step": 41 }, { "epoch": 0.006658211794546607, "grad_norm": 0.00028256585937924683, "learning_rate": 8.423894736842105e-05, "loss": 0.0, "step": 42 }, { "epoch": 0.006816740646797717, "grad_norm": 0.000341152714099735, "learning_rate": 8.37057894736842e-05, "loss": 0.0, "step": 43 }, { "epoch": 0.006975269499048827, "grad_norm": 0.00031369487987831235, "learning_rate": 8.317263157894737e-05, "loss": 0.0, "step": 44 }, { "epoch": 0.007133798351299936, "grad_norm": 0.00039442809065803885, "learning_rate": 8.263947368421053e-05, "loss": 0.0, "step": 45 }, { "epoch": 0.007292327203551046, "grad_norm": 0.00029302932671271265, "learning_rate": 8.210631578947368e-05, "loss": 0.0, "step": 46 }, { "epoch": 0.007450856055802156, "grad_norm": 0.0003696536587085575, "learning_rate": 8.157315789473684e-05, "loss": 0.0, "step": 47 }, { "epoch": 0.007609384908053266, "grad_norm": 0.00028774861129932106, "learning_rate": 8.104e-05, "loss": 0.0, "step": 48 }, { "epoch": 0.0077679137603043754, "grad_norm": 0.00027515244437381625, "learning_rate": 8.050684210526316e-05, "loss": 0.0, "step": 49 }, { "epoch": 0.007926442612555484, "grad_norm": 0.000243888032855466, "learning_rate": 7.997368421052632e-05, "loss": 0.0, "step": 50 }, { "epoch": 0.007926442612555484, "eval_loss": 0.002942207735031843, "eval_runtime": 284.8103, "eval_samples_per_second": 9.326, "eval_steps_per_second": 2.331, "step": 50 }, { "epoch": 0.008084971464806594, "grad_norm": 9.338234901428223, "learning_rate": 7.944052631578947e-05, "loss": 0.3261, "step": 51 }, { "epoch": 0.008243500317057704, "grad_norm": 6.803124415455386e-05, "learning_rate": 7.890736842105263e-05, "loss": 0.0, "step": 52 }, { "epoch": 0.008402029169308814, "grad_norm": 0.00013048792607150972, "learning_rate": 7.837421052631579e-05, "loss": 0.0, "step": 53 }, { "epoch": 0.008560558021559923, "grad_norm": 0.0003305167774669826, "learning_rate": 7.784105263157893e-05, "loss": 0.0, "step": 54 }, { "epoch": 0.008719086873811033, "grad_norm": 0.0002505861921235919, "learning_rate": 7.730789473684211e-05, "loss": 0.0, "step": 55 }, { "epoch": 0.008877615726062143, "grad_norm": 0.0005144188180565834, "learning_rate": 7.677473684210526e-05, "loss": 0.0, "step": 56 }, { "epoch": 0.009036144578313253, "grad_norm": 0.0003912892425432801, "learning_rate": 7.624157894736842e-05, "loss": 0.0, "step": 57 }, { "epoch": 0.009194673430564362, "grad_norm": 0.0003942723269574344, "learning_rate": 7.570842105263158e-05, "loss": 0.0, "step": 58 }, { "epoch": 0.009353202282815472, "grad_norm": 0.0006460827426053584, "learning_rate": 7.517526315789474e-05, "loss": 0.0, "step": 59 }, { "epoch": 0.009511731135066582, "grad_norm": 0.0014212304959073663, "learning_rate": 7.464210526315789e-05, "loss": 0.0, "step": 60 }, { "epoch": 0.009670259987317692, "grad_norm": 0.0020509830210357904, "learning_rate": 7.410894736842106e-05, "loss": 0.0, "step": 61 }, { "epoch": 0.009828788839568801, "grad_norm": 0.003535072784870863, "learning_rate": 7.35757894736842e-05, "loss": 0.0, "step": 62 }, { "epoch": 0.009987317691819911, "grad_norm": 0.0031283413991332054, "learning_rate": 7.304263157894737e-05, "loss": 0.0, "step": 63 }, { "epoch": 0.010145846544071021, "grad_norm": 0.004257969092577696, "learning_rate": 7.250947368421053e-05, "loss": 0.0, "step": 64 }, { "epoch": 0.01030437539632213, "grad_norm": 0.0008224630146287382, "learning_rate": 7.197631578947368e-05, "loss": 0.0, "step": 65 }, { "epoch": 0.01046290424857324, "grad_norm": 0.00048312422586604953, "learning_rate": 7.144315789473684e-05, "loss": 0.0, "step": 66 }, { "epoch": 0.01062143310082435, "grad_norm": 0.0006397454999387264, "learning_rate": 7.091e-05, "loss": 0.0, "step": 67 }, { "epoch": 0.01077996195307546, "grad_norm": 0.0005098761757835746, "learning_rate": 7.037684210526316e-05, "loss": 0.0, "step": 68 }, { "epoch": 0.01093849080532657, "grad_norm": 0.0004371613613329828, "learning_rate": 6.984368421052632e-05, "loss": 0.0, "step": 69 }, { "epoch": 0.01109701965757768, "grad_norm": 0.0003893864050041884, "learning_rate": 6.931052631578947e-05, "loss": 0.0, "step": 70 }, { "epoch": 0.01125554850982879, "grad_norm": 0.000385530962375924, "learning_rate": 6.877736842105263e-05, "loss": 0.0, "step": 71 }, { "epoch": 0.011414077362079899, "grad_norm": 0.0004044200468342751, "learning_rate": 6.824421052631579e-05, "loss": 0.0, "step": 72 }, { "epoch": 0.011572606214331009, "grad_norm": 0.0003012254892382771, "learning_rate": 6.771105263157895e-05, "loss": 0.0, "step": 73 }, { "epoch": 0.011731135066582118, "grad_norm": 0.0002749539853539318, "learning_rate": 6.71778947368421e-05, "loss": 0.0, "step": 74 }, { "epoch": 0.011889663918833228, "grad_norm": 0.00024228697293438017, "learning_rate": 6.664473684210527e-05, "loss": 0.0, "step": 75 }, { "epoch": 0.012048192771084338, "grad_norm": 0.00025467583327554166, "learning_rate": 6.611157894736842e-05, "loss": 0.0, "step": 76 }, { "epoch": 0.012206721623335448, "grad_norm": 0.00024287324049510062, "learning_rate": 6.557842105263158e-05, "loss": 0.0, "step": 77 }, { "epoch": 0.012365250475586557, "grad_norm": 0.0002723891520872712, "learning_rate": 6.504526315789474e-05, "loss": 0.0, "step": 78 }, { "epoch": 0.012523779327837667, "grad_norm": 0.0002199763839598745, "learning_rate": 6.451210526315789e-05, "loss": 0.0, "step": 79 }, { "epoch": 0.012682308180088777, "grad_norm": 0.0002384045801591128, "learning_rate": 6.397894736842105e-05, "loss": 0.0, "step": 80 }, { "epoch": 0.012840837032339885, "grad_norm": 0.00017316907178610563, "learning_rate": 6.344578947368421e-05, "loss": 0.0, "step": 81 }, { "epoch": 0.012999365884590995, "grad_norm": 0.00017045978165697306, "learning_rate": 6.291263157894737e-05, "loss": 0.0, "step": 82 }, { "epoch": 0.013157894736842105, "grad_norm": 0.00020900469098705798, "learning_rate": 6.237947368421053e-05, "loss": 0.0, "step": 83 }, { "epoch": 0.013316423589093214, "grad_norm": 0.00019530224381014705, "learning_rate": 6.184631578947368e-05, "loss": 0.0, "step": 84 }, { "epoch": 0.013474952441344324, "grad_norm": 0.00019037234596908092, "learning_rate": 6.131315789473684e-05, "loss": 0.0, "step": 85 }, { "epoch": 0.013633481293595434, "grad_norm": 0.00016165118722710758, "learning_rate": 6.078e-05, "loss": 0.0, "step": 86 }, { "epoch": 0.013792010145846544, "grad_norm": 0.00014685910718981177, "learning_rate": 6.024684210526315e-05, "loss": 0.0, "step": 87 }, { "epoch": 0.013950538998097653, "grad_norm": 0.00014423737593460828, "learning_rate": 5.9713684210526305e-05, "loss": 0.0, "step": 88 }, { "epoch": 0.014109067850348763, "grad_norm": 0.00018514647672418505, "learning_rate": 5.918052631578947e-05, "loss": 0.0, "step": 89 }, { "epoch": 0.014267596702599873, "grad_norm": 0.00020832290465477854, "learning_rate": 5.8647368421052634e-05, "loss": 0.0, "step": 90 }, { "epoch": 0.014426125554850983, "grad_norm": 0.0001367575750919059, "learning_rate": 5.811421052631579e-05, "loss": 0.0, "step": 91 }, { "epoch": 0.014584654407102092, "grad_norm": 0.00012530827370937914, "learning_rate": 5.758105263157894e-05, "loss": 0.0, "step": 92 }, { "epoch": 0.014743183259353202, "grad_norm": 0.0001300648000324145, "learning_rate": 5.70478947368421e-05, "loss": 0.0, "step": 93 }, { "epoch": 0.014901712111604312, "grad_norm": 0.00019942222570534796, "learning_rate": 5.6514736842105256e-05, "loss": 0.0, "step": 94 }, { "epoch": 0.015060240963855422, "grad_norm": 0.00024527753703296185, "learning_rate": 5.5981578947368424e-05, "loss": 0.0, "step": 95 }, { "epoch": 0.015218769816106531, "grad_norm": 0.00016720422718208283, "learning_rate": 5.544842105263158e-05, "loss": 0.0, "step": 96 }, { "epoch": 0.015377298668357641, "grad_norm": 0.00019603196415118873, "learning_rate": 5.491526315789474e-05, "loss": 0.0, "step": 97 }, { "epoch": 0.015535827520608751, "grad_norm": 0.0002464319404680282, "learning_rate": 5.438210526315789e-05, "loss": 0.0, "step": 98 }, { "epoch": 0.01569435637285986, "grad_norm": 0.00019614743359852582, "learning_rate": 5.384894736842105e-05, "loss": 0.0, "step": 99 }, { "epoch": 0.01585288522511097, "grad_norm": 0.00017342373030260205, "learning_rate": 5.331578947368421e-05, "loss": 0.0, "step": 100 }, { "epoch": 0.01585288522511097, "eval_loss": 0.0025951999705284834, "eval_runtime": 284.9449, "eval_samples_per_second": 9.321, "eval_steps_per_second": 2.33, "step": 100 }, { "epoch": 0.01601141407736208, "grad_norm": 0.0001433816651115194, "learning_rate": 5.278263157894736e-05, "loss": 0.0, "step": 101 }, { "epoch": 0.016169942929613188, "grad_norm": 0.00011838250065920874, "learning_rate": 5.224947368421053e-05, "loss": 0.0, "step": 102 }, { "epoch": 0.0163284717818643, "grad_norm": 0.00010393361299065873, "learning_rate": 5.171631578947368e-05, "loss": 0.0, "step": 103 }, { "epoch": 0.016487000634115408, "grad_norm": 9.349365427624434e-05, "learning_rate": 5.1183157894736844e-05, "loss": 0.0, "step": 104 }, { "epoch": 0.01664552948636652, "grad_norm": 9.53435810515657e-05, "learning_rate": 5.065e-05, "loss": 0.0, "step": 105 }, { "epoch": 0.016804058338617627, "grad_norm": 8.479709504172206e-05, "learning_rate": 5.011684210526315e-05, "loss": 0.0, "step": 106 }, { "epoch": 0.01696258719086874, "grad_norm": 7.610375905642286e-05, "learning_rate": 4.958368421052631e-05, "loss": 0.0, "step": 107 }, { "epoch": 0.017121116043119847, "grad_norm": 7.144361006794497e-05, "learning_rate": 4.9050526315789473e-05, "loss": 0.0, "step": 108 }, { "epoch": 0.017279644895370958, "grad_norm": 6.864387250971049e-05, "learning_rate": 4.851736842105263e-05, "loss": 0.0, "step": 109 }, { "epoch": 0.017438173747622066, "grad_norm": 7.527913840021938e-05, "learning_rate": 4.798421052631579e-05, "loss": 0.0, "step": 110 }, { "epoch": 0.017596702599873178, "grad_norm": 7.126829586923122e-05, "learning_rate": 4.745105263157895e-05, "loss": 0.0, "step": 111 }, { "epoch": 0.017755231452124286, "grad_norm": 8.467756561003625e-05, "learning_rate": 4.69178947368421e-05, "loss": 0.0, "step": 112 }, { "epoch": 0.017913760304375397, "grad_norm": 7.283923332579434e-05, "learning_rate": 4.638473684210526e-05, "loss": 0.0, "step": 113 }, { "epoch": 0.018072289156626505, "grad_norm": 7.065037789288908e-05, "learning_rate": 4.585157894736842e-05, "loss": 0.0, "step": 114 }, { "epoch": 0.018230818008877617, "grad_norm": 6.294441118370742e-05, "learning_rate": 4.531842105263158e-05, "loss": 0.0, "step": 115 }, { "epoch": 0.018389346861128725, "grad_norm": 6.32048977422528e-05, "learning_rate": 4.478526315789473e-05, "loss": 0.0, "step": 116 }, { "epoch": 0.018547875713379836, "grad_norm": 6.300484528765082e-05, "learning_rate": 4.425210526315789e-05, "loss": 0.0, "step": 117 }, { "epoch": 0.018706404565630944, "grad_norm": 6.188850966282189e-05, "learning_rate": 4.3718947368421054e-05, "loss": 0.0, "step": 118 }, { "epoch": 0.018864933417882056, "grad_norm": 6.136750744190067e-05, "learning_rate": 4.318578947368421e-05, "loss": 0.0, "step": 119 }, { "epoch": 0.019023462270133164, "grad_norm": 5.3558338549919426e-05, "learning_rate": 4.265263157894736e-05, "loss": 0.0, "step": 120 }, { "epoch": 0.019181991122384275, "grad_norm": 5.407687422120944e-05, "learning_rate": 4.211947368421052e-05, "loss": 0.0, "step": 121 }, { "epoch": 0.019340519974635383, "grad_norm": 6.248530553421006e-05, "learning_rate": 4.1586315789473684e-05, "loss": 0.0, "step": 122 }, { "epoch": 0.019499048826886495, "grad_norm": 5.69553958484903e-05, "learning_rate": 4.105315789473684e-05, "loss": 0.0, "step": 123 }, { "epoch": 0.019657577679137603, "grad_norm": 5.165593756828457e-05, "learning_rate": 4.052e-05, "loss": 0.0, "step": 124 }, { "epoch": 0.019816106531388714, "grad_norm": 6.129803659860045e-05, "learning_rate": 3.998684210526316e-05, "loss": 0.0, "step": 125 }, { "epoch": 0.019974635383639822, "grad_norm": 5.8346176956547424e-05, "learning_rate": 3.945368421052631e-05, "loss": 0.0, "step": 126 }, { "epoch": 0.020133164235890934, "grad_norm": 5.828439680044539e-05, "learning_rate": 3.892052631578947e-05, "loss": 0.0, "step": 127 }, { "epoch": 0.020291693088142042, "grad_norm": 6.158412725199014e-05, "learning_rate": 3.838736842105263e-05, "loss": 0.0, "step": 128 }, { "epoch": 0.02045022194039315, "grad_norm": 5.588992280536331e-05, "learning_rate": 3.785421052631579e-05, "loss": 0.0, "step": 129 }, { "epoch": 0.02060875079264426, "grad_norm": 4.7499852371402085e-05, "learning_rate": 3.732105263157894e-05, "loss": 0.0, "step": 130 }, { "epoch": 0.02076727964489537, "grad_norm": 5.2001607400598004e-05, "learning_rate": 3.67878947368421e-05, "loss": 0.0, "step": 131 }, { "epoch": 0.02092580849714648, "grad_norm": 5.1708582759601995e-05, "learning_rate": 3.6254736842105264e-05, "loss": 0.0, "step": 132 }, { "epoch": 0.02108433734939759, "grad_norm": 4.733254172606394e-05, "learning_rate": 3.572157894736842e-05, "loss": 0.0, "step": 133 }, { "epoch": 0.0212428662016487, "grad_norm": 5.3371717513073236e-05, "learning_rate": 3.518842105263158e-05, "loss": 0.0, "step": 134 }, { "epoch": 0.02140139505389981, "grad_norm": 4.716423063655384e-05, "learning_rate": 3.465526315789473e-05, "loss": 0.0, "step": 135 }, { "epoch": 0.02155992390615092, "grad_norm": 4.844993964070454e-05, "learning_rate": 3.4122105263157894e-05, "loss": 0.0, "step": 136 }, { "epoch": 0.021718452758402028, "grad_norm": 5.031727778259665e-05, "learning_rate": 3.358894736842105e-05, "loss": 0.0, "step": 137 }, { "epoch": 0.02187698161065314, "grad_norm": 4.5576842239825055e-05, "learning_rate": 3.305578947368421e-05, "loss": 0.0, "step": 138 }, { "epoch": 0.022035510462904247, "grad_norm": 5.910011168452911e-05, "learning_rate": 3.252263157894737e-05, "loss": 0.0, "step": 139 }, { "epoch": 0.02219403931515536, "grad_norm": 4.8003654228523374e-05, "learning_rate": 3.198947368421052e-05, "loss": 0.0, "step": 140 }, { "epoch": 0.022352568167406467, "grad_norm": 4.5099386625224724e-05, "learning_rate": 3.1456315789473684e-05, "loss": 0.0, "step": 141 }, { "epoch": 0.02251109701965758, "grad_norm": 5.6082306400639936e-05, "learning_rate": 3.092315789473684e-05, "loss": 0.0, "step": 142 }, { "epoch": 0.022669625871908686, "grad_norm": 4.4970944145461544e-05, "learning_rate": 3.039e-05, "loss": 0.0, "step": 143 }, { "epoch": 0.022828154724159798, "grad_norm": 5.0454917072784156e-05, "learning_rate": 2.9856842105263153e-05, "loss": 0.0, "step": 144 }, { "epoch": 0.022986683576410906, "grad_norm": 8.930585318012163e-05, "learning_rate": 2.9323684210526317e-05, "loss": 0.0, "step": 145 }, { "epoch": 0.023145212428662017, "grad_norm": 6.606173701584339e-05, "learning_rate": 2.879052631578947e-05, "loss": 0.0, "step": 146 }, { "epoch": 0.023303741280913125, "grad_norm": 7.71412014728412e-05, "learning_rate": 2.8257368421052628e-05, "loss": 0.0, "step": 147 }, { "epoch": 0.023462270133164237, "grad_norm": 7.689122867304832e-05, "learning_rate": 2.772421052631579e-05, "loss": 0.0, "step": 148 }, { "epoch": 0.023620798985415345, "grad_norm": 7.011953857727349e-05, "learning_rate": 2.7191052631578946e-05, "loss": 0.0, "step": 149 }, { "epoch": 0.023779327837666456, "grad_norm": 8.041402179514989e-05, "learning_rate": 2.6657894736842104e-05, "loss": 0.0, "step": 150 }, { "epoch": 0.023779327837666456, "eval_loss": 0.002579666208475828, "eval_runtime": 284.7051, "eval_samples_per_second": 9.329, "eval_steps_per_second": 2.332, "step": 150 }, { "epoch": 0.023937856689917564, "grad_norm": 4.936805248260498, "learning_rate": 2.6124736842105265e-05, "loss": 0.118, "step": 151 }, { "epoch": 0.024096385542168676, "grad_norm": 8.385646651731804e-05, "learning_rate": 2.5591578947368422e-05, "loss": 0.0, "step": 152 }, { "epoch": 0.024254914394419784, "grad_norm": 0.00011956329399254173, "learning_rate": 2.5058421052631576e-05, "loss": 0.0, "step": 153 }, { "epoch": 0.024413443246670895, "grad_norm": 0.00013739150017499924, "learning_rate": 2.4525263157894737e-05, "loss": 0.0, "step": 154 }, { "epoch": 0.024571972098922003, "grad_norm": 0.00016575689369346946, "learning_rate": 2.3992105263157894e-05, "loss": 0.0, "step": 155 }, { "epoch": 0.024730500951173115, "grad_norm": 0.0002567583287600428, "learning_rate": 2.345894736842105e-05, "loss": 0.0, "step": 156 }, { "epoch": 0.024889029803424223, "grad_norm": 0.0002865093993023038, "learning_rate": 2.292578947368421e-05, "loss": 0.0, "step": 157 }, { "epoch": 0.025047558655675334, "grad_norm": 0.0004684887535404414, "learning_rate": 2.2392631578947366e-05, "loss": 0.0, "step": 158 }, { "epoch": 0.025206087507926443, "grad_norm": 0.0003041870950255543, "learning_rate": 2.1859473684210527e-05, "loss": 0.0, "step": 159 }, { "epoch": 0.025364616360177554, "grad_norm": 0.0004387758672237396, "learning_rate": 2.132631578947368e-05, "loss": 0.0, "step": 160 }, { "epoch": 0.025523145212428662, "grad_norm": 0.00046817571274004877, "learning_rate": 2.0793157894736842e-05, "loss": 0.0, "step": 161 }, { "epoch": 0.02568167406467977, "grad_norm": 0.0006510078674182296, "learning_rate": 2.026e-05, "loss": 0.0, "step": 162 }, { "epoch": 0.02584020291693088, "grad_norm": 0.00042408722219988704, "learning_rate": 1.9726842105263157e-05, "loss": 0.0, "step": 163 }, { "epoch": 0.02599873176918199, "grad_norm": 0.0006571735139004886, "learning_rate": 1.9193684210526314e-05, "loss": 0.0, "step": 164 }, { "epoch": 0.0261572606214331, "grad_norm": 0.0005432140314951539, "learning_rate": 1.866052631578947e-05, "loss": 0.0, "step": 165 }, { "epoch": 0.02631578947368421, "grad_norm": 0.0005207346403039992, "learning_rate": 1.8127368421052632e-05, "loss": 0.0, "step": 166 }, { "epoch": 0.02647431832593532, "grad_norm": 0.0007389848469756544, "learning_rate": 1.759421052631579e-05, "loss": 0.0, "step": 167 }, { "epoch": 0.02663284717818643, "grad_norm": 0.0006487572682090104, "learning_rate": 1.7061052631578947e-05, "loss": 0.0, "step": 168 }, { "epoch": 0.02679137603043754, "grad_norm": 0.000673374452162534, "learning_rate": 1.6527894736842104e-05, "loss": 0.0, "step": 169 }, { "epoch": 0.026949904882688648, "grad_norm": 0.0006955991266295314, "learning_rate": 1.599473684210526e-05, "loss": 0.0, "step": 170 }, { "epoch": 0.02710843373493976, "grad_norm": 0.0007812317926436663, "learning_rate": 1.546157894736842e-05, "loss": 0.0, "step": 171 }, { "epoch": 0.027266962587190868, "grad_norm": 0.0007272360380738974, "learning_rate": 1.4928421052631576e-05, "loss": 0.0, "step": 172 }, { "epoch": 0.02742549143944198, "grad_norm": 0.0007635234505869448, "learning_rate": 1.4395263157894735e-05, "loss": 0.0, "step": 173 }, { "epoch": 0.027584020291693087, "grad_norm": 0.0007117385393939912, "learning_rate": 1.3862105263157895e-05, "loss": 0.0, "step": 174 }, { "epoch": 0.0277425491439442, "grad_norm": 0.0005687863449566066, "learning_rate": 1.3328947368421052e-05, "loss": 0.0, "step": 175 }, { "epoch": 0.027901077996195307, "grad_norm": 0.0005387436249293387, "learning_rate": 1.2795789473684211e-05, "loss": 0.0, "step": 176 }, { "epoch": 0.028059606848446418, "grad_norm": 0.0005557397962547839, "learning_rate": 1.2262631578947368e-05, "loss": 0.0, "step": 177 }, { "epoch": 0.028218135700697526, "grad_norm": 0.0005531954229809344, "learning_rate": 1.1729473684210526e-05, "loss": 0.0, "step": 178 }, { "epoch": 0.028376664552948638, "grad_norm": 0.0010639647953212261, "learning_rate": 1.1196315789473683e-05, "loss": 0.0, "step": 179 }, { "epoch": 0.028535193405199746, "grad_norm": 0.0005514100193977356, "learning_rate": 1.066315789473684e-05, "loss": 0.0, "step": 180 }, { "epoch": 0.028693722257450857, "grad_norm": 0.000542394642252475, "learning_rate": 1.013e-05, "loss": 0.0, "step": 181 }, { "epoch": 0.028852251109701965, "grad_norm": 0.00043924085912294686, "learning_rate": 9.596842105263157e-06, "loss": 0.0, "step": 182 }, { "epoch": 0.029010779961953077, "grad_norm": 0.000603148655500263, "learning_rate": 9.063684210526316e-06, "loss": 0.0, "step": 183 }, { "epoch": 0.029169308814204185, "grad_norm": 0.00047173965140245855, "learning_rate": 8.530526315789473e-06, "loss": 0.0, "step": 184 }, { "epoch": 0.029327837666455296, "grad_norm": 0.0006395349046215415, "learning_rate": 7.99736842105263e-06, "loss": 0.0, "step": 185 }, { "epoch": 0.029486366518706404, "grad_norm": 0.0005980796995572746, "learning_rate": 7.464210526315788e-06, "loss": 0.0, "step": 186 }, { "epoch": 0.029644895370957516, "grad_norm": 0.0006005927571095526, "learning_rate": 6.931052631578947e-06, "loss": 0.0, "step": 187 }, { "epoch": 0.029803424223208624, "grad_norm": 0.00042718948679976165, "learning_rate": 6.3978947368421055e-06, "loss": 0.0, "step": 188 }, { "epoch": 0.029961953075459735, "grad_norm": 0.00037346952012740076, "learning_rate": 5.864736842105263e-06, "loss": 0.0, "step": 189 }, { "epoch": 0.030120481927710843, "grad_norm": 0.00046352826757356524, "learning_rate": 5.33157894736842e-06, "loss": 0.0, "step": 190 }, { "epoch": 0.030279010779961955, "grad_norm": 0.0003845041792374104, "learning_rate": 4.7984210526315785e-06, "loss": 0.0, "step": 191 }, { "epoch": 0.030437539632213063, "grad_norm": 0.00042385683627799153, "learning_rate": 4.265263157894737e-06, "loss": 0.0, "step": 192 }, { "epoch": 0.030596068484464174, "grad_norm": 0.0003921858442481607, "learning_rate": 3.732105263157894e-06, "loss": 0.0, "step": 193 }, { "epoch": 0.030754597336715282, "grad_norm": 0.0005148733034729958, "learning_rate": 3.1989473684210527e-06, "loss": 0.0, "step": 194 }, { "epoch": 0.03091312618896639, "grad_norm": 0.0004899385967291892, "learning_rate": 2.66578947368421e-06, "loss": 0.0, "step": 195 }, { "epoch": 0.031071655041217502, "grad_norm": 0.0006933041149750352, "learning_rate": 2.1326315789473684e-06, "loss": 0.0, "step": 196 }, { "epoch": 0.03123018389346861, "grad_norm": 0.0009382757125422359, "learning_rate": 1.5994736842105264e-06, "loss": 0.0, "step": 197 }, { "epoch": 0.03138871274571972, "grad_norm": 0.0007173538906499743, "learning_rate": 1.0663157894736842e-06, "loss": 0.0, "step": 198 }, { "epoch": 0.03154724159797083, "grad_norm": 0.0005706042284145951, "learning_rate": 5.331578947368421e-07, "loss": 0.0, "step": 199 }, { "epoch": 0.03170577045022194, "grad_norm": 0.0005148272030055523, "learning_rate": 0.0, "loss": 0.0, "step": 200 }, { "epoch": 0.03170577045022194, "eval_loss": 0.0018136479193344712, "eval_runtime": 284.2884, "eval_samples_per_second": 9.343, "eval_steps_per_second": 2.336, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.162764252413952e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }