|
{ |
|
"best_metric": 0.0022250961046665907, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.03170577045022194, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0001585288522511097, |
|
"grad_norm": 12.08696174621582, |
|
"learning_rate": 1.008e-05, |
|
"loss": 4.2706, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0001585288522511097, |
|
"eval_loss": 4.04376220703125, |
|
"eval_runtime": 285.4706, |
|
"eval_samples_per_second": 9.304, |
|
"eval_steps_per_second": 2.326, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003170577045022194, |
|
"grad_norm": 15.856035232543945, |
|
"learning_rate": 2.016e-05, |
|
"loss": 3.9804, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0004755865567533291, |
|
"grad_norm": 15.65820026397705, |
|
"learning_rate": 3.024e-05, |
|
"loss": 3.9763, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006341154090044388, |
|
"grad_norm": 14.708057403564453, |
|
"learning_rate": 4.032e-05, |
|
"loss": 3.2287, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0007926442612555486, |
|
"grad_norm": 10.415152549743652, |
|
"learning_rate": 5.04e-05, |
|
"loss": 2.0174, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009511731135066582, |
|
"grad_norm": 12.43994140625, |
|
"learning_rate": 6.048e-05, |
|
"loss": 1.2254, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0011097019657577679, |
|
"grad_norm": 10.96854019165039, |
|
"learning_rate": 7.055999999999999e-05, |
|
"loss": 0.3292, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0012682308180088776, |
|
"grad_norm": 36.675350189208984, |
|
"learning_rate": 8.064e-05, |
|
"loss": 0.2307, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0014267596702599874, |
|
"grad_norm": 2.064239501953125, |
|
"learning_rate": 9.072e-05, |
|
"loss": 0.0138, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0015852885225110971, |
|
"grad_norm": 0.1330491453409195, |
|
"learning_rate": 0.0001008, |
|
"loss": 0.001, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0017438173747622067, |
|
"grad_norm": 0.01124438177794218, |
|
"learning_rate": 0.00010026947368421052, |
|
"loss": 0.0001, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0019023462270133164, |
|
"grad_norm": 0.004640920553356409, |
|
"learning_rate": 9.973894736842104e-05, |
|
"loss": 0.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002060875079264426, |
|
"grad_norm": 0.007322155870497227, |
|
"learning_rate": 9.920842105263157e-05, |
|
"loss": 0.0, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0022194039315155357, |
|
"grad_norm": 0.012465717270970345, |
|
"learning_rate": 9.86778947368421e-05, |
|
"loss": 0.0001, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0023779327837666455, |
|
"grad_norm": 0.0506725050508976, |
|
"learning_rate": 9.814736842105264e-05, |
|
"loss": 0.0001, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0025364616360177552, |
|
"grad_norm": 0.00856021698564291, |
|
"learning_rate": 9.761684210526316e-05, |
|
"loss": 0.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.002694990488268865, |
|
"grad_norm": 0.001643276889808476, |
|
"learning_rate": 9.708631578947368e-05, |
|
"loss": 0.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0028535193405199747, |
|
"grad_norm": 0.0009701295639388263, |
|
"learning_rate": 9.655578947368421e-05, |
|
"loss": 0.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0030120481927710845, |
|
"grad_norm": 0.0005847598076798022, |
|
"learning_rate": 9.602526315789473e-05, |
|
"loss": 0.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0031705770450221942, |
|
"grad_norm": 0.0006212879670783877, |
|
"learning_rate": 9.549473684210525e-05, |
|
"loss": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0033291058972733036, |
|
"grad_norm": 0.0008075927617028356, |
|
"learning_rate": 9.496421052631579e-05, |
|
"loss": 0.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0034876347495244133, |
|
"grad_norm": 0.0006020912551321089, |
|
"learning_rate": 9.443368421052631e-05, |
|
"loss": 0.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.003646163601775523, |
|
"grad_norm": 0.0005198498256504536, |
|
"learning_rate": 9.390315789473683e-05, |
|
"loss": 0.0, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.003804692454026633, |
|
"grad_norm": 0.0016288729384541512, |
|
"learning_rate": 9.337263157894737e-05, |
|
"loss": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003963221306277742, |
|
"grad_norm": 0.0007833559648133814, |
|
"learning_rate": 9.28421052631579e-05, |
|
"loss": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004121750158528852, |
|
"grad_norm": 0.0014727013185620308, |
|
"learning_rate": 9.231157894736842e-05, |
|
"loss": 0.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004280279010779962, |
|
"grad_norm": 0.0016179227968677878, |
|
"learning_rate": 9.178105263157895e-05, |
|
"loss": 0.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004438807863031071, |
|
"grad_norm": 0.0005032668123021722, |
|
"learning_rate": 9.125052631578948e-05, |
|
"loss": 0.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004597336715282181, |
|
"grad_norm": 0.0005934814107604325, |
|
"learning_rate": 9.072e-05, |
|
"loss": 0.0, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.004755865567533291, |
|
"grad_norm": 0.00047520003863610327, |
|
"learning_rate": 9.018947368421052e-05, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004914394419784401, |
|
"grad_norm": 0.0007378292502835393, |
|
"learning_rate": 8.965894736842104e-05, |
|
"loss": 0.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0050729232720355105, |
|
"grad_norm": 0.0007299358258023858, |
|
"learning_rate": 8.912842105263157e-05, |
|
"loss": 0.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.00523145212428662, |
|
"grad_norm": 0.0005630860105156898, |
|
"learning_rate": 8.85978947368421e-05, |
|
"loss": 0.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00538998097653773, |
|
"grad_norm": 0.0009994081920012832, |
|
"learning_rate": 8.806736842105264e-05, |
|
"loss": 0.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00554850982878884, |
|
"grad_norm": 0.0004544692055787891, |
|
"learning_rate": 8.753684210526316e-05, |
|
"loss": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0057070386810399495, |
|
"grad_norm": 0.00069584691664204, |
|
"learning_rate": 8.700631578947369e-05, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.005865567533291059, |
|
"grad_norm": 0.0019399194279685616, |
|
"learning_rate": 8.647578947368421e-05, |
|
"loss": 0.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006024096385542169, |
|
"grad_norm": 0.0005930989282205701, |
|
"learning_rate": 8.594526315789473e-05, |
|
"loss": 0.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006182625237793279, |
|
"grad_norm": 0.0007997532375156879, |
|
"learning_rate": 8.541473684210525e-05, |
|
"loss": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0063411540900443885, |
|
"grad_norm": 0.0013047147076576948, |
|
"learning_rate": 8.488421052631578e-05, |
|
"loss": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006499682942295497, |
|
"grad_norm": 0.00036864462890662253, |
|
"learning_rate": 8.435368421052631e-05, |
|
"loss": 0.0, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006658211794546607, |
|
"grad_norm": 0.0003495727141853422, |
|
"learning_rate": 8.382315789473684e-05, |
|
"loss": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.006816740646797717, |
|
"grad_norm": 0.0003555091971065849, |
|
"learning_rate": 8.329263157894737e-05, |
|
"loss": 0.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.006975269499048827, |
|
"grad_norm": 0.0002377888304181397, |
|
"learning_rate": 8.27621052631579e-05, |
|
"loss": 0.0, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.007133798351299936, |
|
"grad_norm": 0.0002825877454597503, |
|
"learning_rate": 8.223157894736842e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.007292327203551046, |
|
"grad_norm": 0.0002043453569058329, |
|
"learning_rate": 8.170105263157894e-05, |
|
"loss": 0.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.007450856055802156, |
|
"grad_norm": 0.0002539000706747174, |
|
"learning_rate": 8.117052631578946e-05, |
|
"loss": 0.0, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007609384908053266, |
|
"grad_norm": 0.00020019305520690978, |
|
"learning_rate": 8.064e-05, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0077679137603043754, |
|
"grad_norm": 0.00020320792100392282, |
|
"learning_rate": 8.010947368421052e-05, |
|
"loss": 0.0, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.007926442612555484, |
|
"grad_norm": 0.00017714353452902287, |
|
"learning_rate": 7.957894736842105e-05, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007926442612555484, |
|
"eval_loss": 0.0037753561045974493, |
|
"eval_runtime": 285.8239, |
|
"eval_samples_per_second": 9.292, |
|
"eval_steps_per_second": 2.323, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008084971464806594, |
|
"grad_norm": 11.202030181884766, |
|
"learning_rate": 7.904842105263158e-05, |
|
"loss": 0.3948, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.008243500317057704, |
|
"grad_norm": 4.6145090891513973e-05, |
|
"learning_rate": 7.85178947368421e-05, |
|
"loss": 0.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.008402029169308814, |
|
"grad_norm": 5.335342575563118e-05, |
|
"learning_rate": 7.798736842105263e-05, |
|
"loss": 0.0, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.008560558021559923, |
|
"grad_norm": 6.71695961500518e-05, |
|
"learning_rate": 7.745684210526315e-05, |
|
"loss": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.008719086873811033, |
|
"grad_norm": 7.083545642672107e-05, |
|
"learning_rate": 7.692631578947369e-05, |
|
"loss": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.008877615726062143, |
|
"grad_norm": 0.00011363301746314391, |
|
"learning_rate": 7.639578947368421e-05, |
|
"loss": 0.0, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.009036144578313253, |
|
"grad_norm": 0.00015090873057488352, |
|
"learning_rate": 7.586526315789473e-05, |
|
"loss": 0.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.009194673430564362, |
|
"grad_norm": 0.00017162153380922973, |
|
"learning_rate": 7.533473684210526e-05, |
|
"loss": 0.0, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.009353202282815472, |
|
"grad_norm": 0.00037030354724265635, |
|
"learning_rate": 7.480421052631578e-05, |
|
"loss": 0.0, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.009511731135066582, |
|
"grad_norm": 0.00045603603939525783, |
|
"learning_rate": 7.427368421052632e-05, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.009670259987317692, |
|
"grad_norm": 0.000606843619607389, |
|
"learning_rate": 7.374315789473685e-05, |
|
"loss": 0.0, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.009828788839568801, |
|
"grad_norm": 0.0012902193702757359, |
|
"learning_rate": 7.321263157894737e-05, |
|
"loss": 0.0, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.009987317691819911, |
|
"grad_norm": 0.0015680071664974093, |
|
"learning_rate": 7.26821052631579e-05, |
|
"loss": 0.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.010145846544071021, |
|
"grad_norm": 0.0017102680867537856, |
|
"learning_rate": 7.215157894736842e-05, |
|
"loss": 0.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01030437539632213, |
|
"grad_norm": 0.001034354092553258, |
|
"learning_rate": 7.162105263157894e-05, |
|
"loss": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.01046290424857324, |
|
"grad_norm": 0.0008957475074566901, |
|
"learning_rate": 7.109052631578947e-05, |
|
"loss": 0.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.01062143310082435, |
|
"grad_norm": 0.000811917707324028, |
|
"learning_rate": 7.055999999999999e-05, |
|
"loss": 0.0, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.01077996195307546, |
|
"grad_norm": 0.0010698516853153706, |
|
"learning_rate": 7.002947368421052e-05, |
|
"loss": 0.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.01093849080532657, |
|
"grad_norm": 0.0007929237326607108, |
|
"learning_rate": 6.949894736842105e-05, |
|
"loss": 0.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.01109701965757768, |
|
"grad_norm": 0.0005297662573866546, |
|
"learning_rate": 6.896842105263158e-05, |
|
"loss": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01125554850982879, |
|
"grad_norm": 0.0004970860900357366, |
|
"learning_rate": 6.843789473684211e-05, |
|
"loss": 0.0, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.011414077362079899, |
|
"grad_norm": 0.00042307560215704143, |
|
"learning_rate": 6.790736842105263e-05, |
|
"loss": 0.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.011572606214331009, |
|
"grad_norm": 0.0003402529109735042, |
|
"learning_rate": 6.737684210526315e-05, |
|
"loss": 0.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.011731135066582118, |
|
"grad_norm": 0.00037735735531896353, |
|
"learning_rate": 6.684631578947368e-05, |
|
"loss": 0.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.011889663918833228, |
|
"grad_norm": 0.0004949852591380477, |
|
"learning_rate": 6.631578947368421e-05, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012048192771084338, |
|
"grad_norm": 0.00039665098302066326, |
|
"learning_rate": 6.578526315789473e-05, |
|
"loss": 0.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.012206721623335448, |
|
"grad_norm": 0.0003759284154511988, |
|
"learning_rate": 6.525473684210526e-05, |
|
"loss": 0.0, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.012365250475586557, |
|
"grad_norm": 0.0007349163061007857, |
|
"learning_rate": 6.47242105263158e-05, |
|
"loss": 0.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.012523779327837667, |
|
"grad_norm": 0.00038908098940737545, |
|
"learning_rate": 6.419368421052632e-05, |
|
"loss": 0.0, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.012682308180088777, |
|
"grad_norm": 0.00039207786903716624, |
|
"learning_rate": 6.366315789473684e-05, |
|
"loss": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.012840837032339885, |
|
"grad_norm": 0.00031621192465536296, |
|
"learning_rate": 6.313263157894736e-05, |
|
"loss": 0.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.012999365884590995, |
|
"grad_norm": 0.001207337947562337, |
|
"learning_rate": 6.26021052631579e-05, |
|
"loss": 0.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.013157894736842105, |
|
"grad_norm": 0.0003833776863757521, |
|
"learning_rate": 6.207157894736842e-05, |
|
"loss": 0.0, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.013316423589093214, |
|
"grad_norm": 0.0002807167184073478, |
|
"learning_rate": 6.154105263157894e-05, |
|
"loss": 0.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.013474952441344324, |
|
"grad_norm": 0.00025957514299079776, |
|
"learning_rate": 6.1010526315789474e-05, |
|
"loss": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.013633481293595434, |
|
"grad_norm": 0.00022323857410810888, |
|
"learning_rate": 6.048e-05, |
|
"loss": 0.0, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.013792010145846544, |
|
"grad_norm": 0.00020459384541027248, |
|
"learning_rate": 5.994947368421052e-05, |
|
"loss": 0.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.013950538998097653, |
|
"grad_norm": 0.00021896703401580453, |
|
"learning_rate": 5.941894736842104e-05, |
|
"loss": 0.0, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.014109067850348763, |
|
"grad_norm": 0.0003069478552788496, |
|
"learning_rate": 5.888842105263158e-05, |
|
"loss": 0.0, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.014267596702599873, |
|
"grad_norm": 0.0003965249052271247, |
|
"learning_rate": 5.835789473684211e-05, |
|
"loss": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.014426125554850983, |
|
"grad_norm": 0.00017867452697828412, |
|
"learning_rate": 5.782736842105263e-05, |
|
"loss": 0.0, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.014584654407102092, |
|
"grad_norm": 0.00016691711789462715, |
|
"learning_rate": 5.7296842105263154e-05, |
|
"loss": 0.0, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.014743183259353202, |
|
"grad_norm": 0.0001635922526475042, |
|
"learning_rate": 5.676631578947368e-05, |
|
"loss": 0.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.014901712111604312, |
|
"grad_norm": 0.00017515213403385133, |
|
"learning_rate": 5.623578947368421e-05, |
|
"loss": 0.0, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.015060240963855422, |
|
"grad_norm": 0.00022856853320263326, |
|
"learning_rate": 5.570526315789474e-05, |
|
"loss": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.015218769816106531, |
|
"grad_norm": 0.00013485149247571826, |
|
"learning_rate": 5.5174736842105266e-05, |
|
"loss": 0.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.015377298668357641, |
|
"grad_norm": 0.00022567079577129334, |
|
"learning_rate": 5.464421052631579e-05, |
|
"loss": 0.0, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.015535827520608751, |
|
"grad_norm": 0.00031975016463547945, |
|
"learning_rate": 5.411368421052631e-05, |
|
"loss": 0.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01569435637285986, |
|
"grad_norm": 0.00018277288472745568, |
|
"learning_rate": 5.358315789473684e-05, |
|
"loss": 0.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01585288522511097, |
|
"grad_norm": 0.00018042400188278407, |
|
"learning_rate": 5.3052631578947364e-05, |
|
"loss": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01585288522511097, |
|
"eval_loss": 0.0034066014923155308, |
|
"eval_runtime": 285.7064, |
|
"eval_samples_per_second": 9.296, |
|
"eval_steps_per_second": 2.324, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01601141407736208, |
|
"grad_norm": 0.0001420114713255316, |
|
"learning_rate": 5.252210526315789e-05, |
|
"loss": 0.0, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.016169942929613188, |
|
"grad_norm": 0.00012088767834939063, |
|
"learning_rate": 5.199157894736842e-05, |
|
"loss": 0.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0163284717818643, |
|
"grad_norm": 0.00011178933345945552, |
|
"learning_rate": 5.1461052631578946e-05, |
|
"loss": 0.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.016487000634115408, |
|
"grad_norm": 0.00010531868610996753, |
|
"learning_rate": 5.0930526315789476e-05, |
|
"loss": 0.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01664552948636652, |
|
"grad_norm": 0.00011901999096153304, |
|
"learning_rate": 5.04e-05, |
|
"loss": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.016804058338617627, |
|
"grad_norm": 9.147950186161324e-05, |
|
"learning_rate": 4.986947368421052e-05, |
|
"loss": 0.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01696258719086874, |
|
"grad_norm": 7.989244477357715e-05, |
|
"learning_rate": 4.933894736842105e-05, |
|
"loss": 0.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.017121116043119847, |
|
"grad_norm": 7.46723817428574e-05, |
|
"learning_rate": 4.880842105263158e-05, |
|
"loss": 0.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.017279644895370958, |
|
"grad_norm": 8.380914368899539e-05, |
|
"learning_rate": 4.8277894736842103e-05, |
|
"loss": 0.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.017438173747622066, |
|
"grad_norm": 7.211839692899957e-05, |
|
"learning_rate": 4.7747368421052626e-05, |
|
"loss": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.017596702599873178, |
|
"grad_norm": 7.725647446932271e-05, |
|
"learning_rate": 4.7216842105263156e-05, |
|
"loss": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.017755231452124286, |
|
"grad_norm": 8.656168211018667e-05, |
|
"learning_rate": 4.6686315789473686e-05, |
|
"loss": 0.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.017913760304375397, |
|
"grad_norm": 7.190388714661822e-05, |
|
"learning_rate": 4.615578947368421e-05, |
|
"loss": 0.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.018072289156626505, |
|
"grad_norm": 7.352729880949482e-05, |
|
"learning_rate": 4.562526315789474e-05, |
|
"loss": 0.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.018230818008877617, |
|
"grad_norm": 6.0772359574912116e-05, |
|
"learning_rate": 4.509473684210526e-05, |
|
"loss": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.018389346861128725, |
|
"grad_norm": 5.866353239980526e-05, |
|
"learning_rate": 4.4564210526315784e-05, |
|
"loss": 0.0, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.018547875713379836, |
|
"grad_norm": 6.619851046707481e-05, |
|
"learning_rate": 4.403368421052632e-05, |
|
"loss": 0.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.018706404565630944, |
|
"grad_norm": 6.62534948787652e-05, |
|
"learning_rate": 4.350315789473684e-05, |
|
"loss": 0.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.018864933417882056, |
|
"grad_norm": 6.304805719992146e-05, |
|
"learning_rate": 4.2972631578947366e-05, |
|
"loss": 0.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.019023462270133164, |
|
"grad_norm": 6.0336358728818595e-05, |
|
"learning_rate": 4.244210526315789e-05, |
|
"loss": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.019181991122384275, |
|
"grad_norm": 5.7237852161051705e-05, |
|
"learning_rate": 4.191157894736842e-05, |
|
"loss": 0.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.019340519974635383, |
|
"grad_norm": 5.5000280553940684e-05, |
|
"learning_rate": 4.138105263157895e-05, |
|
"loss": 0.0, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.019499048826886495, |
|
"grad_norm": 6.333758938126266e-05, |
|
"learning_rate": 4.085052631578947e-05, |
|
"loss": 0.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.019657577679137603, |
|
"grad_norm": 6.331897748168558e-05, |
|
"learning_rate": 4.032e-05, |
|
"loss": 0.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.019816106531388714, |
|
"grad_norm": 5.774655437562615e-05, |
|
"learning_rate": 3.978947368421052e-05, |
|
"loss": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.019974635383639822, |
|
"grad_norm": 5.671928738593124e-05, |
|
"learning_rate": 3.925894736842105e-05, |
|
"loss": 0.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.020133164235890934, |
|
"grad_norm": 6.89297157805413e-05, |
|
"learning_rate": 3.8728421052631575e-05, |
|
"loss": 0.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.020291693088142042, |
|
"grad_norm": 6.829660560470074e-05, |
|
"learning_rate": 3.8197894736842105e-05, |
|
"loss": 0.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02045022194039315, |
|
"grad_norm": 5.680310641764663e-05, |
|
"learning_rate": 3.766736842105263e-05, |
|
"loss": 0.0, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.02060875079264426, |
|
"grad_norm": 4.718761192634702e-05, |
|
"learning_rate": 3.713684210526316e-05, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02076727964489537, |
|
"grad_norm": 5.037297523813322e-05, |
|
"learning_rate": 3.660631578947369e-05, |
|
"loss": 0.0, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.02092580849714648, |
|
"grad_norm": 5.18505803484004e-05, |
|
"learning_rate": 3.607578947368421e-05, |
|
"loss": 0.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.02108433734939759, |
|
"grad_norm": 5.0042519433191046e-05, |
|
"learning_rate": 3.554526315789473e-05, |
|
"loss": 0.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0212428662016487, |
|
"grad_norm": 5.112058715894818e-05, |
|
"learning_rate": 3.501473684210526e-05, |
|
"loss": 0.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.02140139505389981, |
|
"grad_norm": 5.2210583817213774e-05, |
|
"learning_rate": 3.448421052631579e-05, |
|
"loss": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02155992390615092, |
|
"grad_norm": 5.472183329402469e-05, |
|
"learning_rate": 3.3953684210526315e-05, |
|
"loss": 0.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.021718452758402028, |
|
"grad_norm": 5.2417293773032725e-05, |
|
"learning_rate": 3.342315789473684e-05, |
|
"loss": 0.0, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.02187698161065314, |
|
"grad_norm": 4.7179757530102506e-05, |
|
"learning_rate": 3.289263157894737e-05, |
|
"loss": 0.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.022035510462904247, |
|
"grad_norm": 5.0374997954349965e-05, |
|
"learning_rate": 3.23621052631579e-05, |
|
"loss": 0.0, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.02219403931515536, |
|
"grad_norm": 5.104386946186423e-05, |
|
"learning_rate": 3.183157894736842e-05, |
|
"loss": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.022352568167406467, |
|
"grad_norm": 4.212348358123563e-05, |
|
"learning_rate": 3.130105263157895e-05, |
|
"loss": 0.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02251109701965758, |
|
"grad_norm": 6.773701898055151e-05, |
|
"learning_rate": 3.077052631578947e-05, |
|
"loss": 0.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.022669625871908686, |
|
"grad_norm": 4.5040869736112654e-05, |
|
"learning_rate": 3.024e-05, |
|
"loss": 0.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.022828154724159798, |
|
"grad_norm": 4.242918294039555e-05, |
|
"learning_rate": 2.970947368421052e-05, |
|
"loss": 0.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.022986683576410906, |
|
"grad_norm": 7.274608651641756e-05, |
|
"learning_rate": 2.9178947368421054e-05, |
|
"loss": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.023145212428662017, |
|
"grad_norm": 7.292564259842038e-05, |
|
"learning_rate": 2.8648421052631577e-05, |
|
"loss": 0.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.023303741280913125, |
|
"grad_norm": 8.248071389971301e-05, |
|
"learning_rate": 2.8117894736842103e-05, |
|
"loss": 0.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.023462270133164237, |
|
"grad_norm": 7.596130308229476e-05, |
|
"learning_rate": 2.7587368421052633e-05, |
|
"loss": 0.0, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.023620798985415345, |
|
"grad_norm": 8.240217721322551e-05, |
|
"learning_rate": 2.7056842105263156e-05, |
|
"loss": 0.0, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.023779327837666456, |
|
"grad_norm": 0.0001047488913172856, |
|
"learning_rate": 2.6526315789473682e-05, |
|
"loss": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.023779327837666456, |
|
"eval_loss": 0.003376348875463009, |
|
"eval_runtime": 285.9072, |
|
"eval_samples_per_second": 9.29, |
|
"eval_steps_per_second": 2.322, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.023937856689917564, |
|
"grad_norm": 5.650425434112549, |
|
"learning_rate": 2.599578947368421e-05, |
|
"loss": 0.1783, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.024096385542168676, |
|
"grad_norm": 0.00012569170212373137, |
|
"learning_rate": 2.5465263157894738e-05, |
|
"loss": 0.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.024254914394419784, |
|
"grad_norm": 0.00018095099949277937, |
|
"learning_rate": 2.493473684210526e-05, |
|
"loss": 0.0, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.024413443246670895, |
|
"grad_norm": 0.0001806170621421188, |
|
"learning_rate": 2.440421052631579e-05, |
|
"loss": 0.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.024571972098922003, |
|
"grad_norm": 0.00021117427968420088, |
|
"learning_rate": 2.3873684210526313e-05, |
|
"loss": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.024730500951173115, |
|
"grad_norm": 0.00030483287991955876, |
|
"learning_rate": 2.3343157894736843e-05, |
|
"loss": 0.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.024889029803424223, |
|
"grad_norm": 0.00029481080127879977, |
|
"learning_rate": 2.281263157894737e-05, |
|
"loss": 0.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.025047558655675334, |
|
"grad_norm": 0.0004884671652689576, |
|
"learning_rate": 2.2282105263157892e-05, |
|
"loss": 0.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.025206087507926443, |
|
"grad_norm": 0.0002942118444480002, |
|
"learning_rate": 2.175157894736842e-05, |
|
"loss": 0.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.025364616360177554, |
|
"grad_norm": 0.000287515576928854, |
|
"learning_rate": 2.1221052631578944e-05, |
|
"loss": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.025523145212428662, |
|
"grad_norm": 0.00042692935676313937, |
|
"learning_rate": 2.0690526315789474e-05, |
|
"loss": 0.0, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.02568167406467977, |
|
"grad_norm": 0.0005435315542854369, |
|
"learning_rate": 2.016e-05, |
|
"loss": 0.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.02584020291693088, |
|
"grad_norm": 0.00035297736758366227, |
|
"learning_rate": 1.9629473684210526e-05, |
|
"loss": 0.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.02599873176918199, |
|
"grad_norm": 0.0005012881010770798, |
|
"learning_rate": 1.9098947368421053e-05, |
|
"loss": 0.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0261572606214331, |
|
"grad_norm": 0.00044648078619502485, |
|
"learning_rate": 1.856842105263158e-05, |
|
"loss": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02631578947368421, |
|
"grad_norm": 0.0004886375973001122, |
|
"learning_rate": 1.8037894736842105e-05, |
|
"loss": 0.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.02647431832593532, |
|
"grad_norm": 0.0005168926436454058, |
|
"learning_rate": 1.750736842105263e-05, |
|
"loss": 0.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.02663284717818643, |
|
"grad_norm": 0.0004425595107022673, |
|
"learning_rate": 1.6976842105263157e-05, |
|
"loss": 0.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.02679137603043754, |
|
"grad_norm": 0.0005173716926947236, |
|
"learning_rate": 1.6446315789473684e-05, |
|
"loss": 0.0, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.026949904882688648, |
|
"grad_norm": 0.00048417490324936807, |
|
"learning_rate": 1.591578947368421e-05, |
|
"loss": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02710843373493976, |
|
"grad_norm": 0.0006713734474033117, |
|
"learning_rate": 1.5385263157894736e-05, |
|
"loss": 0.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.027266962587190868, |
|
"grad_norm": 0.0004995199851691723, |
|
"learning_rate": 1.485473684210526e-05, |
|
"loss": 0.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.02742549143944198, |
|
"grad_norm": 0.0004266517935320735, |
|
"learning_rate": 1.4324210526315789e-05, |
|
"loss": 0.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.027584020291693087, |
|
"grad_norm": 0.0004872908757533878, |
|
"learning_rate": 1.3793684210526316e-05, |
|
"loss": 0.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0277425491439442, |
|
"grad_norm": 0.00042201511678285897, |
|
"learning_rate": 1.3263157894736841e-05, |
|
"loss": 0.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.027901077996195307, |
|
"grad_norm": 0.0004310183576308191, |
|
"learning_rate": 1.2732631578947369e-05, |
|
"loss": 0.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.028059606848446418, |
|
"grad_norm": 0.00046641906374134123, |
|
"learning_rate": 1.2202105263157895e-05, |
|
"loss": 0.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.028218135700697526, |
|
"grad_norm": 0.0004791380779352039, |
|
"learning_rate": 1.1671578947368421e-05, |
|
"loss": 0.0, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.028376664552948638, |
|
"grad_norm": 0.000661531463265419, |
|
"learning_rate": 1.1141052631578946e-05, |
|
"loss": 0.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.028535193405199746, |
|
"grad_norm": 0.0005801509832963347, |
|
"learning_rate": 1.0610526315789472e-05, |
|
"loss": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.028693722257450857, |
|
"grad_norm": 0.00038415539893321693, |
|
"learning_rate": 1.008e-05, |
|
"loss": 0.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.028852251109701965, |
|
"grad_norm": 0.00040587177500128746, |
|
"learning_rate": 9.549473684210526e-06, |
|
"loss": 0.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.029010779961953077, |
|
"grad_norm": 0.00045143821625970304, |
|
"learning_rate": 9.018947368421052e-06, |
|
"loss": 0.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.029169308814204185, |
|
"grad_norm": 0.0005063486169092357, |
|
"learning_rate": 8.488421052631579e-06, |
|
"loss": 0.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.029327837666455296, |
|
"grad_norm": 0.0005101272254250944, |
|
"learning_rate": 7.957894736842105e-06, |
|
"loss": 0.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.029486366518706404, |
|
"grad_norm": 0.00046848724014125764, |
|
"learning_rate": 7.42736842105263e-06, |
|
"loss": 0.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.029644895370957516, |
|
"grad_norm": 0.000389144872315228, |
|
"learning_rate": 6.896842105263158e-06, |
|
"loss": 0.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.029803424223208624, |
|
"grad_norm": 0.00045255443546921015, |
|
"learning_rate": 6.3663157894736845e-06, |
|
"loss": 0.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.029961953075459735, |
|
"grad_norm": 0.00038248361670412123, |
|
"learning_rate": 5.835789473684211e-06, |
|
"loss": 0.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.030120481927710843, |
|
"grad_norm": 0.00043523870408535004, |
|
"learning_rate": 5.305263157894736e-06, |
|
"loss": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.030279010779961955, |
|
"grad_norm": 0.00042310234857723117, |
|
"learning_rate": 4.774736842105263e-06, |
|
"loss": 0.0, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.030437539632213063, |
|
"grad_norm": 0.00040010226075537503, |
|
"learning_rate": 4.244210526315789e-06, |
|
"loss": 0.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.030596068484464174, |
|
"grad_norm": 0.0003844479797407985, |
|
"learning_rate": 3.713684210526315e-06, |
|
"loss": 0.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.030754597336715282, |
|
"grad_norm": 0.0004438844043761492, |
|
"learning_rate": 3.1831578947368422e-06, |
|
"loss": 0.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.03091312618896639, |
|
"grad_norm": 0.0004899434861727059, |
|
"learning_rate": 2.652631578947368e-06, |
|
"loss": 0.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.031071655041217502, |
|
"grad_norm": 0.0006046644994057715, |
|
"learning_rate": 2.1221052631578947e-06, |
|
"loss": 0.0, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03123018389346861, |
|
"grad_norm": 0.0007566219428554177, |
|
"learning_rate": 1.5915789473684211e-06, |
|
"loss": 0.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03138871274571972, |
|
"grad_norm": 0.0006015272228978574, |
|
"learning_rate": 1.0610526315789473e-06, |
|
"loss": 0.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03154724159797083, |
|
"grad_norm": 0.0004844815412070602, |
|
"learning_rate": 5.305263157894737e-07, |
|
"loss": 0.0, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03170577045022194, |
|
"grad_norm": 0.0004923184169456363, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03170577045022194, |
|
"eval_loss": 0.0022250961046665907, |
|
"eval_runtime": 285.0318, |
|
"eval_samples_per_second": 9.318, |
|
"eval_steps_per_second": 2.33, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.162764252413952e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|