|
{ |
|
"best_metric": 1.3449122905731201, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 1.1450094161958568, |
|
"eval_steps": 25, |
|
"global_step": 133, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008609093354856066, |
|
"grad_norm": 0.6988075971603394, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7571, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008609093354856066, |
|
"eval_loss": 3.2656900882720947, |
|
"eval_runtime": 1.2466, |
|
"eval_samples_per_second": 40.11, |
|
"eval_steps_per_second": 10.429, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017218186709712133, |
|
"grad_norm": 0.7088323831558228, |
|
"learning_rate": 4e-05, |
|
"loss": 2.0087, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0258272800645682, |
|
"grad_norm": 1.009822130203247, |
|
"learning_rate": 6e-05, |
|
"loss": 2.4702, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.034436373419424265, |
|
"grad_norm": 0.9133837819099426, |
|
"learning_rate": 8e-05, |
|
"loss": 2.5742, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04304546677428033, |
|
"grad_norm": 1.0774202346801758, |
|
"learning_rate": 0.0001, |
|
"loss": 2.4651, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0516545601291364, |
|
"grad_norm": 1.940169095993042, |
|
"learning_rate": 9.99864468413292e-05, |
|
"loss": 2.7402, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.060263653483992465, |
|
"grad_norm": 2.5114591121673584, |
|
"learning_rate": 9.994579552923277e-05, |
|
"loss": 2.36, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06887274683884853, |
|
"grad_norm": 2.1416642665863037, |
|
"learning_rate": 9.987807055054106e-05, |
|
"loss": 2.1412, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0774818401937046, |
|
"grad_norm": 2.493290424346924, |
|
"learning_rate": 9.978331270024886e-05, |
|
"loss": 2.134, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08609093354856066, |
|
"grad_norm": 2.5377631187438965, |
|
"learning_rate": 9.966157905694196e-05, |
|
"loss": 2.2798, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09470002690341674, |
|
"grad_norm": 5.070113658905029, |
|
"learning_rate": 9.951294294841516e-05, |
|
"loss": 2.0949, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1033091202582728, |
|
"grad_norm": 8.32091999053955, |
|
"learning_rate": 9.933749390750235e-05, |
|
"loss": 2.669, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11191821361312887, |
|
"grad_norm": 10.361486434936523, |
|
"learning_rate": 9.913533761814537e-05, |
|
"loss": 2.7766, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.12052730696798493, |
|
"grad_norm": 5.311871528625488, |
|
"learning_rate": 9.890659585173379e-05, |
|
"loss": 1.494, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.129136400322841, |
|
"grad_norm": 4.676233291625977, |
|
"learning_rate": 9.865140639375449e-05, |
|
"loss": 1.5863, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13774549367769706, |
|
"grad_norm": 3.6029069423675537, |
|
"learning_rate": 9.83699229607948e-05, |
|
"loss": 1.7019, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.14635458703255314, |
|
"grad_norm": 1.6760152578353882, |
|
"learning_rate": 9.80623151079494e-05, |
|
"loss": 1.6638, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1549636803874092, |
|
"grad_norm": 1.4253727197647095, |
|
"learning_rate": 9.772876812668666e-05, |
|
"loss": 1.7801, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.16357277374226525, |
|
"grad_norm": 1.3295799493789673, |
|
"learning_rate": 9.736948293323593e-05, |
|
"loss": 1.7038, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.17218186709712133, |
|
"grad_norm": 0.7724640965461731, |
|
"learning_rate": 9.698467594756325e-05, |
|
"loss": 1.7746, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1807909604519774, |
|
"grad_norm": 0.8214246034622192, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 1.5971, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.18940005380683347, |
|
"grad_norm": 1.1587809324264526, |
|
"learning_rate": 9.613943900665889e-05, |
|
"loss": 1.7559, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.19800914716168955, |
|
"grad_norm": 1.8553802967071533, |
|
"learning_rate": 9.567951819055496e-05, |
|
"loss": 1.7612, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.2066182405165456, |
|
"grad_norm": 2.004556655883789, |
|
"learning_rate": 9.519509355379818e-05, |
|
"loss": 1.5969, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.21522733387140167, |
|
"grad_norm": 5.224137783050537, |
|
"learning_rate": 9.468645689567598e-05, |
|
"loss": 2.0711, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.21522733387140167, |
|
"eval_loss": 1.508137822151184, |
|
"eval_runtime": 1.2723, |
|
"eval_samples_per_second": 39.3, |
|
"eval_steps_per_second": 10.218, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.22383642722625774, |
|
"grad_norm": 1.341894268989563, |
|
"learning_rate": 9.415391459989203e-05, |
|
"loss": 1.2867, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2324455205811138, |
|
"grad_norm": 2.224653720855713, |
|
"learning_rate": 9.359778745001225e-05, |
|
"loss": 1.2927, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.24105461393596986, |
|
"grad_norm": 1.4196522235870361, |
|
"learning_rate": 9.301841043623682e-05, |
|
"loss": 1.3711, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.24966370729082593, |
|
"grad_norm": 1.2161178588867188, |
|
"learning_rate": 9.241613255361455e-05, |
|
"loss": 1.4576, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.258272800645682, |
|
"grad_norm": 0.8192944526672363, |
|
"learning_rate": 9.179131659182127e-05, |
|
"loss": 1.6426, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2668818940005381, |
|
"grad_norm": 0.6419580578804016, |
|
"learning_rate": 9.114433891662902e-05, |
|
"loss": 1.7142, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2754909873553941, |
|
"grad_norm": 0.41701650619506836, |
|
"learning_rate": 9.047558924319729e-05, |
|
"loss": 1.6853, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2841000807102502, |
|
"grad_norm": 0.4797891080379486, |
|
"learning_rate": 8.978547040132317e-05, |
|
"loss": 1.6622, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.29270917406510627, |
|
"grad_norm": 0.737724781036377, |
|
"learning_rate": 8.907439809279181e-05, |
|
"loss": 1.6734, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3013182674199623, |
|
"grad_norm": 1.0522786378860474, |
|
"learning_rate": 8.834280064097317e-05, |
|
"loss": 1.6301, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3099273607748184, |
|
"grad_norm": 1.4976811408996582, |
|
"learning_rate": 8.759111873281603e-05, |
|
"loss": 1.6521, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.31853645412967446, |
|
"grad_norm": 3.2661497592926025, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 1.5304, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3271455474845305, |
|
"grad_norm": 2.8426661491394043, |
|
"learning_rate": 8.602932451316802e-05, |
|
"loss": 1.437, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3357546408393866, |
|
"grad_norm": 0.49562180042266846, |
|
"learning_rate": 8.522015296811584e-05, |
|
"loss": 1.2391, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.34436373419424265, |
|
"grad_norm": 0.5359828472137451, |
|
"learning_rate": 8.439277793291995e-05, |
|
"loss": 1.2585, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.35297282754909876, |
|
"grad_norm": 0.8592618107795715, |
|
"learning_rate": 8.354769778736406e-05, |
|
"loss": 1.3682, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.3615819209039548, |
|
"grad_norm": 0.8251994848251343, |
|
"learning_rate": 8.268542157612821e-05, |
|
"loss": 1.548, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.37019101425881085, |
|
"grad_norm": 0.9783174991607666, |
|
"learning_rate": 8.180646870215952e-05, |
|
"loss": 1.7041, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.37880010761366695, |
|
"grad_norm": 0.894888699054718, |
|
"learning_rate": 8.091136861380305e-05, |
|
"loss": 1.8391, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.387409200968523, |
|
"grad_norm": 0.5933730006217957, |
|
"learning_rate": 8.000066048588211e-05, |
|
"loss": 1.6974, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3960182943233791, |
|
"grad_norm": 0.7440256476402283, |
|
"learning_rate": 7.907489289491939e-05, |
|
"loss": 1.6231, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.40462738767823514, |
|
"grad_norm": 0.7255629897117615, |
|
"learning_rate": 7.813462348869497e-05, |
|
"loss": 1.6172, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4132364810330912, |
|
"grad_norm": 1.1230436563491821, |
|
"learning_rate": 7.71804186503403e-05, |
|
"loss": 1.5745, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.4218455743879473, |
|
"grad_norm": 1.6526938676834106, |
|
"learning_rate": 7.62128531571699e-05, |
|
"loss": 1.2586, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.43045466774280333, |
|
"grad_norm": 5.730405330657959, |
|
"learning_rate": 7.523250983445731e-05, |
|
"loss": 1.7199, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.43045466774280333, |
|
"eval_loss": 1.389930248260498, |
|
"eval_runtime": 1.2729, |
|
"eval_samples_per_second": 39.28, |
|
"eval_steps_per_second": 10.213, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4390637610976594, |
|
"grad_norm": 0.4678877890110016, |
|
"learning_rate": 7.42399792043627e-05, |
|
"loss": 1.2294, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.4476728544525155, |
|
"grad_norm": 0.5002795457839966, |
|
"learning_rate": 7.323585913022454e-05, |
|
"loss": 1.2342, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4562819478073715, |
|
"grad_norm": 0.3534197509288788, |
|
"learning_rate": 7.222075445642904e-05, |
|
"loss": 1.2975, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.4648910411622276, |
|
"grad_norm": 0.6102612018585205, |
|
"learning_rate": 7.119527664407447e-05, |
|
"loss": 1.4773, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.47350013451708367, |
|
"grad_norm": 0.5064122080802917, |
|
"learning_rate": 7.01600434026499e-05, |
|
"loss": 1.5257, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4821092278719397, |
|
"grad_norm": 0.6477398872375488, |
|
"learning_rate": 6.911567831795013e-05, |
|
"loss": 1.7135, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4907183212267958, |
|
"grad_norm": 1.0539360046386719, |
|
"learning_rate": 6.80628104764508e-05, |
|
"loss": 1.8241, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.49932741458165186, |
|
"grad_norm": 0.7702855467796326, |
|
"learning_rate": 6.700207408637044e-05, |
|
"loss": 1.7362, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5079365079365079, |
|
"grad_norm": 0.6455403566360474, |
|
"learning_rate": 6.593410809564689e-05, |
|
"loss": 1.5381, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.516545601291364, |
|
"grad_norm": 0.6673574447631836, |
|
"learning_rate": 6.485955580705913e-05, |
|
"loss": 1.4796, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5251546946462201, |
|
"grad_norm": 0.8242542743682861, |
|
"learning_rate": 6.377906449072578e-05, |
|
"loss": 1.6654, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5337637880010762, |
|
"grad_norm": 1.4092378616333008, |
|
"learning_rate": 6.269328499421356e-05, |
|
"loss": 1.2351, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5423728813559322, |
|
"grad_norm": 2.419718027114868, |
|
"learning_rate": 6.160287135049127e-05, |
|
"loss": 1.4315, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5509819747107882, |
|
"grad_norm": 0.38671913743019104, |
|
"learning_rate": 6.050848038396473e-05, |
|
"loss": 1.2274, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5595910680656443, |
|
"grad_norm": 0.5623155832290649, |
|
"learning_rate": 5.941077131483025e-05, |
|
"loss": 1.3062, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5682001614205004, |
|
"grad_norm": 0.6458035111427307, |
|
"learning_rate": 5.831040536198504e-05, |
|
"loss": 1.4318, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5768092547753565, |
|
"grad_norm": 0.6504884958267212, |
|
"learning_rate": 5.720804534473382e-05, |
|
"loss": 1.3897, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.5854183481302125, |
|
"grad_norm": 0.4223068356513977, |
|
"learning_rate": 5.610435528353106e-05, |
|
"loss": 1.5331, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5940274414850686, |
|
"grad_norm": 0.5046920776367188, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.6225, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6026365348399246, |
|
"grad_norm": 0.41651174426078796, |
|
"learning_rate": 5.389564471646895e-05, |
|
"loss": 1.7376, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6112456281947808, |
|
"grad_norm": 0.32924169301986694, |
|
"learning_rate": 5.27919546552662e-05, |
|
"loss": 1.5401, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6198547215496368, |
|
"grad_norm": 0.4280257821083069, |
|
"learning_rate": 5.168959463801497e-05, |
|
"loss": 1.5662, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6284638149044929, |
|
"grad_norm": 0.6656383275985718, |
|
"learning_rate": 5.058922868516978e-05, |
|
"loss": 1.4713, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6370729082593489, |
|
"grad_norm": 0.8646160960197449, |
|
"learning_rate": 4.9491519616035276e-05, |
|
"loss": 1.2566, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.645682001614205, |
|
"grad_norm": 2.5206289291381836, |
|
"learning_rate": 4.839712864950873e-05, |
|
"loss": 1.7236, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.645682001614205, |
|
"eval_loss": 1.349289894104004, |
|
"eval_runtime": 1.2727, |
|
"eval_samples_per_second": 39.288, |
|
"eval_steps_per_second": 10.215, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.654291094969061, |
|
"grad_norm": 0.4360639750957489, |
|
"learning_rate": 4.730671500578645e-05, |
|
"loss": 1.1383, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6629001883239172, |
|
"grad_norm": 0.8731722235679626, |
|
"learning_rate": 4.6220935509274235e-05, |
|
"loss": 1.4032, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.6715092816787732, |
|
"grad_norm": 0.7142120003700256, |
|
"learning_rate": 4.5140444192940864e-05, |
|
"loss": 1.1904, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6801183750336293, |
|
"grad_norm": 0.594018280506134, |
|
"learning_rate": 4.406589190435313e-05, |
|
"loss": 1.3872, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6887274683884853, |
|
"grad_norm": 0.6022002696990967, |
|
"learning_rate": 4.2997925913629577e-05, |
|
"loss": 1.5956, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6973365617433414, |
|
"grad_norm": 0.5471949577331543, |
|
"learning_rate": 4.19371895235492e-05, |
|
"loss": 1.6525, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7059456550981975, |
|
"grad_norm": 0.3283829391002655, |
|
"learning_rate": 4.0884321682049884e-05, |
|
"loss": 1.772, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7145547484530536, |
|
"grad_norm": 0.34206530451774597, |
|
"learning_rate": 3.98399565973501e-05, |
|
"loss": 1.6938, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7231638418079096, |
|
"grad_norm": 0.35002151131629944, |
|
"learning_rate": 3.880472335592553e-05, |
|
"loss": 1.418, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7317729351627656, |
|
"grad_norm": 0.7404176592826843, |
|
"learning_rate": 3.777924554357096e-05, |
|
"loss": 1.5774, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7403820285176217, |
|
"grad_norm": 0.8380143046379089, |
|
"learning_rate": 3.676414086977546e-05, |
|
"loss": 1.3188, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7489911218724778, |
|
"grad_norm": 2.1834990978240967, |
|
"learning_rate": 3.576002079563732e-05, |
|
"loss": 1.4621, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7576002152273339, |
|
"grad_norm": 2.3319005966186523, |
|
"learning_rate": 3.4767490165542704e-05, |
|
"loss": 1.5594, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7662093085821899, |
|
"grad_norm": 0.3592979311943054, |
|
"learning_rate": 3.378714684283011e-05, |
|
"loss": 1.1, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.774818401937046, |
|
"grad_norm": 0.49761757254600525, |
|
"learning_rate": 3.281958134965972e-05, |
|
"loss": 1.3531, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.783427495291902, |
|
"grad_norm": 0.3277381658554077, |
|
"learning_rate": 3.186537651130503e-05, |
|
"loss": 1.3467, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.7920365886467582, |
|
"grad_norm": 0.3256728947162628, |
|
"learning_rate": 3.0925107105080636e-05, |
|
"loss": 1.5374, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8006456820016142, |
|
"grad_norm": 0.35263001918792725, |
|
"learning_rate": 2.9999339514117912e-05, |
|
"loss": 1.5367, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8092547753564703, |
|
"grad_norm": 0.3698779344558716, |
|
"learning_rate": 2.9088631386196964e-05, |
|
"loss": 1.7344, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8178638687113263, |
|
"grad_norm": 0.445311576128006, |
|
"learning_rate": 2.8193531297840503e-05, |
|
"loss": 1.7141, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8264729620661824, |
|
"grad_norm": 0.4353031814098358, |
|
"learning_rate": 2.73145784238718e-05, |
|
"loss": 1.5168, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.8350820554210385, |
|
"grad_norm": 0.6268022060394287, |
|
"learning_rate": 2.645230221263596e-05, |
|
"loss": 1.4016, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8436911487758946, |
|
"grad_norm": 0.5284622311592102, |
|
"learning_rate": 2.560722206708006e-05, |
|
"loss": 1.5741, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.8523002421307506, |
|
"grad_norm": 0.7828362584114075, |
|
"learning_rate": 2.4779847031884175e-05, |
|
"loss": 1.243, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.8609093354856067, |
|
"grad_norm": 3.621532678604126, |
|
"learning_rate": 2.397067548683199e-05, |
|
"loss": 1.5976, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8609093354856067, |
|
"eval_loss": 1.3449122905731201, |
|
"eval_runtime": 1.2724, |
|
"eval_samples_per_second": 39.295, |
|
"eval_steps_per_second": 10.217, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8695184288404627, |
|
"grad_norm": 0.3012229800224304, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 1.176, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.8781275221953188, |
|
"grad_norm": 0.4754287004470825, |
|
"learning_rate": 2.2408881267183997e-05, |
|
"loss": 1.1958, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.8867366155501749, |
|
"grad_norm": 0.43265655636787415, |
|
"learning_rate": 2.165719935902685e-05, |
|
"loss": 1.3262, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.895345708905031, |
|
"grad_norm": 0.5260616540908813, |
|
"learning_rate": 2.09256019072082e-05, |
|
"loss": 1.3721, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.903954802259887, |
|
"grad_norm": 0.5602609515190125, |
|
"learning_rate": 2.0214529598676836e-05, |
|
"loss": 1.401, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.912563895614743, |
|
"grad_norm": 0.29336562752723694, |
|
"learning_rate": 1.952441075680272e-05, |
|
"loss": 1.6924, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9211729889695991, |
|
"grad_norm": 0.9488304853439331, |
|
"learning_rate": 1.8855661083370986e-05, |
|
"loss": 1.8012, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.9297820823244553, |
|
"grad_norm": 0.3932758867740631, |
|
"learning_rate": 1.820868340817874e-05, |
|
"loss": 1.6428, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9383911756793113, |
|
"grad_norm": 0.3379191756248474, |
|
"learning_rate": 1.758386744638546e-05, |
|
"loss": 1.3678, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.9470002690341673, |
|
"grad_norm": 0.5376018285751343, |
|
"learning_rate": 1.698158956376318e-05, |
|
"loss": 1.6057, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9556093623890234, |
|
"grad_norm": 0.6705049872398376, |
|
"learning_rate": 1.6402212549987762e-05, |
|
"loss": 1.5497, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.9642184557438794, |
|
"grad_norm": 1.5708343982696533, |
|
"learning_rate": 1.584608540010799e-05, |
|
"loss": 1.4589, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.9728275490987356, |
|
"grad_norm": 2.8929443359375, |
|
"learning_rate": 1.531354310432403e-05, |
|
"loss": 1.5784, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.9814366424535916, |
|
"grad_norm": 0.3657113313674927, |
|
"learning_rate": 1.4804906446201816e-05, |
|
"loss": 1.3912, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.9900457358084477, |
|
"grad_norm": 0.3794941306114197, |
|
"learning_rate": 1.4320481809445051e-05, |
|
"loss": 1.5847, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9986548291633037, |
|
"grad_norm": 0.7362991571426392, |
|
"learning_rate": 1.386056099334112e-05, |
|
"loss": 1.399, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.0072639225181599, |
|
"grad_norm": 0.8029009103775024, |
|
"learning_rate": 1.3425421036992098e-05, |
|
"loss": 1.2215, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.0158730158730158, |
|
"grad_norm": 0.5080808997154236, |
|
"learning_rate": 1.3015324052436753e-05, |
|
"loss": 1.2015, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.024482109227872, |
|
"grad_norm": 0.44496291875839233, |
|
"learning_rate": 1.2630517066764069e-05, |
|
"loss": 1.2138, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.033091202582728, |
|
"grad_norm": 0.4348479211330414, |
|
"learning_rate": 1.227123187331335e-05, |
|
"loss": 1.2767, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.041700295937584, |
|
"grad_norm": 0.37992164492607117, |
|
"learning_rate": 1.1937684892050604e-05, |
|
"loss": 1.5242, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.0503093892924402, |
|
"grad_norm": 0.32971861958503723, |
|
"learning_rate": 1.1630077039205209e-05, |
|
"loss": 1.5498, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0589184826472962, |
|
"grad_norm": 0.5224172472953796, |
|
"learning_rate": 1.1348593606245522e-05, |
|
"loss": 1.6984, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.0675275760021523, |
|
"grad_norm": 0.43070971965789795, |
|
"learning_rate": 1.109340414826622e-05, |
|
"loss": 1.5932, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.0761366693570082, |
|
"grad_norm": 0.4774491786956787, |
|
"learning_rate": 1.0864662381854632e-05, |
|
"loss": 1.4308, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0761366693570082, |
|
"eval_loss": 1.3410676717758179, |
|
"eval_runtime": 1.2741, |
|
"eval_samples_per_second": 39.242, |
|
"eval_steps_per_second": 10.203, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0847457627118644, |
|
"grad_norm": 0.5184400677680969, |
|
"learning_rate": 1.0662506092497646e-05, |
|
"loss": 1.4641, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.0933548560667206, |
|
"grad_norm": 0.5525245666503906, |
|
"learning_rate": 1.0487057051584856e-05, |
|
"loss": 1.5545, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.1019639494215765, |
|
"grad_norm": 1.609927773475647, |
|
"learning_rate": 1.0338420943058053e-05, |
|
"loss": 1.3439, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.1105730427764327, |
|
"grad_norm": 2.2938551902770996, |
|
"learning_rate": 1.0216687299751144e-05, |
|
"loss": 1.4817, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.1191821361312886, |
|
"grad_norm": 0.45292142033576965, |
|
"learning_rate": 1.0121929449458941e-05, |
|
"loss": 1.1242, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.1277912294861447, |
|
"grad_norm": 0.4423352777957916, |
|
"learning_rate": 1.0054204470767243e-05, |
|
"loss": 1.1672, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.136400322841001, |
|
"grad_norm": 0.33851832151412964, |
|
"learning_rate": 1.0013553158670811e-05, |
|
"loss": 1.2433, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.1450094161958568, |
|
"grad_norm": 0.3434777855873108, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4094, |
|
"step": 133 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 133, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8256510115053568e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|