|
{ |
|
"best_metric": 0.696293830871582, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.03715745471435207, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00018578727357176033, |
|
"grad_norm": 4.046935558319092, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3446, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00018578727357176033, |
|
"eval_loss": 1.5395567417144775, |
|
"eval_runtime": 210.244, |
|
"eval_samples_per_second": 43.121, |
|
"eval_steps_per_second": 10.783, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00037157454714352065, |
|
"grad_norm": 5.148815631866455, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.5422, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.000557361820715281, |
|
"grad_norm": 8.490533828735352, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.7292, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0007431490942870413, |
|
"grad_norm": 29.692520141601562, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.6123, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0009289363678588017, |
|
"grad_norm": 23.357440948486328, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7596, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001114723641430562, |
|
"grad_norm": 20.767066955566406, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.3862, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0013005109150023223, |
|
"grad_norm": 2.4082024097442627, |
|
"learning_rate": 5.6e-05, |
|
"loss": 1.1814, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0014862981885740826, |
|
"grad_norm": 2.5414106845855713, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 1.1491, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.001672085462145843, |
|
"grad_norm": 1.9966752529144287, |
|
"learning_rate": 7.2e-05, |
|
"loss": 1.0002, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0018578727357176034, |
|
"grad_norm": 2.459282398223877, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0005, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0020436600092893636, |
|
"grad_norm": 1.952752709388733, |
|
"learning_rate": 7.999453219969877e-05, |
|
"loss": 0.8618, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.002229447282861124, |
|
"grad_norm": 1.8736774921417236, |
|
"learning_rate": 7.997813029363704e-05, |
|
"loss": 0.8635, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002415234556432884, |
|
"grad_norm": 2.54675555229187, |
|
"learning_rate": 7.99507987659322e-05, |
|
"loss": 0.8823, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0026010218300046447, |
|
"grad_norm": 2.0304737091064453, |
|
"learning_rate": 7.991254508875098e-05, |
|
"loss": 0.9139, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002786809103576405, |
|
"grad_norm": 1.8423620462417603, |
|
"learning_rate": 7.98633797202668e-05, |
|
"loss": 0.8128, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0029725963771481652, |
|
"grad_norm": 4.305455684661865, |
|
"learning_rate": 7.980331610180046e-05, |
|
"loss": 0.9064, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0031583836507199257, |
|
"grad_norm": 3.134892225265503, |
|
"learning_rate": 7.973237065414553e-05, |
|
"loss": 0.901, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.003344170924291686, |
|
"grad_norm": 1.7530468702316284, |
|
"learning_rate": 7.965056277307902e-05, |
|
"loss": 0.8444, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0035299581978634463, |
|
"grad_norm": 1.8036566972732544, |
|
"learning_rate": 7.955791482405875e-05, |
|
"loss": 0.9215, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0037157454714352067, |
|
"grad_norm": 1.2477421760559082, |
|
"learning_rate": 7.94544521361089e-05, |
|
"loss": 0.7512, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0039015327450069672, |
|
"grad_norm": 1.4336917400360107, |
|
"learning_rate": 7.93402029948953e-05, |
|
"loss": 0.7466, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.004087320018578727, |
|
"grad_norm": 1.7280688285827637, |
|
"learning_rate": 7.921519863499239e-05, |
|
"loss": 0.7877, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004273107292150488, |
|
"grad_norm": 1.2563620805740356, |
|
"learning_rate": 7.907947323134398e-05, |
|
"loss": 0.6979, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004458894565722248, |
|
"grad_norm": 1.380014181137085, |
|
"learning_rate": 7.893306388992023e-05, |
|
"loss": 0.7668, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.004644681839294009, |
|
"grad_norm": 1.2533624172210693, |
|
"learning_rate": 7.877601063757323e-05, |
|
"loss": 0.7324, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004830469112865768, |
|
"grad_norm": 1.1784394979476929, |
|
"learning_rate": 7.860835641109395e-05, |
|
"loss": 0.595, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.005016256386437529, |
|
"grad_norm": 1.1498676538467407, |
|
"learning_rate": 7.843014704547393e-05, |
|
"loss": 0.6538, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.005202043660009289, |
|
"grad_norm": 1.195434331893921, |
|
"learning_rate": 7.824143126137431e-05, |
|
"loss": 0.6842, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.00538783093358105, |
|
"grad_norm": 1.0709013938903809, |
|
"learning_rate": 7.804226065180615e-05, |
|
"loss": 0.611, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.00557361820715281, |
|
"grad_norm": 1.418430209159851, |
|
"learning_rate": 7.783268966802539e-05, |
|
"loss": 0.9021, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00575940548072457, |
|
"grad_norm": 1.3018062114715576, |
|
"learning_rate": 7.761277560464645e-05, |
|
"loss": 0.7383, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0059451927542963304, |
|
"grad_norm": 1.2901121377944946, |
|
"learning_rate": 7.738257858397844e-05, |
|
"loss": 0.704, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.006130980027868091, |
|
"grad_norm": 1.2701770067214966, |
|
"learning_rate": 7.71421615395883e-05, |
|
"loss": 0.8416, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.006316767301439851, |
|
"grad_norm": 1.303396224975586, |
|
"learning_rate": 7.68915901990954e-05, |
|
"loss": 0.7444, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.006502554575011612, |
|
"grad_norm": 1.3861703872680664, |
|
"learning_rate": 7.663093306620231e-05, |
|
"loss": 0.7191, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.006688341848583372, |
|
"grad_norm": 1.503480315208435, |
|
"learning_rate": 7.636026140196651e-05, |
|
"loss": 0.8693, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.006874129122155132, |
|
"grad_norm": 1.307245135307312, |
|
"learning_rate": 7.607964920531837e-05, |
|
"loss": 0.7712, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0070599163957268925, |
|
"grad_norm": 1.2363077402114868, |
|
"learning_rate": 7.578917319283055e-05, |
|
"loss": 0.7318, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.007245703669298653, |
|
"grad_norm": 1.183120608329773, |
|
"learning_rate": 7.548891277774448e-05, |
|
"loss": 0.8098, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0074314909428704135, |
|
"grad_norm": 1.3283722400665283, |
|
"learning_rate": 7.517895004825956e-05, |
|
"loss": 0.7582, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007617278216442174, |
|
"grad_norm": 1.2799726724624634, |
|
"learning_rate": 7.48593697450911e-05, |
|
"loss": 0.8915, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0078030654900139345, |
|
"grad_norm": 1.4513181447982788, |
|
"learning_rate": 7.453025923830296e-05, |
|
"loss": 0.8696, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.007988852763585694, |
|
"grad_norm": 1.2473115921020508, |
|
"learning_rate": 7.419170850342156e-05, |
|
"loss": 0.892, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.008174640037157455, |
|
"grad_norm": 1.3187384605407715, |
|
"learning_rate": 7.384381009683742e-05, |
|
"loss": 0.9073, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.008360427310729215, |
|
"grad_norm": 1.25960111618042, |
|
"learning_rate": 7.348665913050115e-05, |
|
"loss": 0.9463, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.008546214584300976, |
|
"grad_norm": 1.1852588653564453, |
|
"learning_rate": 7.312035324592081e-05, |
|
"loss": 0.8179, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.008732001857872736, |
|
"grad_norm": 1.2183889150619507, |
|
"learning_rate": 7.274499258746771e-05, |
|
"loss": 0.7668, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.008917789131444497, |
|
"grad_norm": 1.3253717422485352, |
|
"learning_rate": 7.236067977499791e-05, |
|
"loss": 0.7596, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.009103576405016257, |
|
"grad_norm": 1.3234570026397705, |
|
"learning_rate": 7.196751987579699e-05, |
|
"loss": 0.7846, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.009289363678588018, |
|
"grad_norm": 1.4833699464797974, |
|
"learning_rate": 7.156562037585576e-05, |
|
"loss": 0.8507, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009289363678588018, |
|
"eval_loss": 0.7784072160720825, |
|
"eval_runtime": 210.9709, |
|
"eval_samples_per_second": 42.973, |
|
"eval_steps_per_second": 10.746, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009475150952159776, |
|
"grad_norm": 1.1448761224746704, |
|
"learning_rate": 7.11550911504845e-05, |
|
"loss": 0.7059, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.009660938225731537, |
|
"grad_norm": 1.5964897871017456, |
|
"learning_rate": 7.073604443427437e-05, |
|
"loss": 1.0514, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.009846725499303297, |
|
"grad_norm": 1.4827876091003418, |
|
"learning_rate": 7.03085947904134e-05, |
|
"loss": 1.0776, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.010032512772875058, |
|
"grad_norm": 1.5401029586791992, |
|
"learning_rate": 6.987285907936617e-05, |
|
"loss": 1.0623, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.010218300046446818, |
|
"grad_norm": 1.462876319885254, |
|
"learning_rate": 6.942895642692527e-05, |
|
"loss": 1.0345, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.010404087320018579, |
|
"grad_norm": 1.5211715698242188, |
|
"learning_rate": 6.897700819164357e-05, |
|
"loss": 1.0968, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.01058987459359034, |
|
"grad_norm": 1.2720927000045776, |
|
"learning_rate": 6.851713793165589e-05, |
|
"loss": 0.8471, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0107756618671621, |
|
"grad_norm": 1.2138237953186035, |
|
"learning_rate": 6.804947137089955e-05, |
|
"loss": 0.7272, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.01096144914073386, |
|
"grad_norm": 1.2674663066864014, |
|
"learning_rate": 6.757413636474263e-05, |
|
"loss": 0.6568, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01114723641430562, |
|
"grad_norm": 0.9728902578353882, |
|
"learning_rate": 6.709126286502965e-05, |
|
"loss": 0.707, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.011333023687877381, |
|
"grad_norm": 1.2180075645446777, |
|
"learning_rate": 6.660098288455393e-05, |
|
"loss": 0.8588, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.01151881096144914, |
|
"grad_norm": 1.1713515520095825, |
|
"learning_rate": 6.610343046096674e-05, |
|
"loss": 0.7691, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0117045982350209, |
|
"grad_norm": 1.0399553775787354, |
|
"learning_rate": 6.559874162013267e-05, |
|
"loss": 0.7024, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.011890385508592661, |
|
"grad_norm": 0.8005794286727905, |
|
"learning_rate": 6.508705433894149e-05, |
|
"loss": 0.5976, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.012076172782164421, |
|
"grad_norm": 1.0212355852127075, |
|
"learning_rate": 6.456850850758673e-05, |
|
"loss": 0.7279, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.012261960055736182, |
|
"grad_norm": 0.9839227795600891, |
|
"learning_rate": 6.404324589132101e-05, |
|
"loss": 0.714, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.012447747329307942, |
|
"grad_norm": 0.9003210067749023, |
|
"learning_rate": 6.351141009169893e-05, |
|
"loss": 0.7392, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.012633534602879703, |
|
"grad_norm": 0.9685829877853394, |
|
"learning_rate": 6.297314650731775e-05, |
|
"loss": 0.697, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.012819321876451463, |
|
"grad_norm": 0.8940137624740601, |
|
"learning_rate": 6.242860229406692e-05, |
|
"loss": 0.5469, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.013005109150023224, |
|
"grad_norm": 0.9139478206634521, |
|
"learning_rate": 6.18779263248971e-05, |
|
"loss": 0.6808, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.013190896423594984, |
|
"grad_norm": 1.0126328468322754, |
|
"learning_rate": 6.132126914911976e-05, |
|
"loss": 0.6446, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.013376683697166745, |
|
"grad_norm": 0.7734840512275696, |
|
"learning_rate": 6.075878295124861e-05, |
|
"loss": 0.6272, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.013562470970738504, |
|
"grad_norm": 0.7684584856033325, |
|
"learning_rate": 6.019062150939376e-05, |
|
"loss": 0.5738, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.013748258244310264, |
|
"grad_norm": 0.9124207496643066, |
|
"learning_rate": 5.9616940153220336e-05, |
|
"loss": 0.623, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.013934045517882025, |
|
"grad_norm": 0.8309489488601685, |
|
"learning_rate": 5.903789572148295e-05, |
|
"loss": 0.5815, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.014119832791453785, |
|
"grad_norm": 0.7745351791381836, |
|
"learning_rate": 5.845364651914752e-05, |
|
"loss": 0.6165, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.014305620065025546, |
|
"grad_norm": 0.9630031585693359, |
|
"learning_rate": 5.786435227411227e-05, |
|
"loss": 0.6886, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.014491407338597306, |
|
"grad_norm": 0.8420267105102539, |
|
"learning_rate": 5.727017409353971e-05, |
|
"loss": 0.6423, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.014677194612169066, |
|
"grad_norm": 0.9119953513145447, |
|
"learning_rate": 5.667127441981162e-05, |
|
"loss": 0.7206, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.014862981885740827, |
|
"grad_norm": 1.015648365020752, |
|
"learning_rate": 5.606781698611879e-05, |
|
"loss": 0.6322, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.015048769159312587, |
|
"grad_norm": 0.9261860251426697, |
|
"learning_rate": 5.5459966771698096e-05, |
|
"loss": 0.6806, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.015234556432884348, |
|
"grad_norm": 0.8736683130264282, |
|
"learning_rate": 5.4847889956728834e-05, |
|
"loss": 0.674, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.015420343706456108, |
|
"grad_norm": 0.9856055378913879, |
|
"learning_rate": 5.423175387690067e-05, |
|
"loss": 0.8235, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.015606130980027869, |
|
"grad_norm": 1.0767531394958496, |
|
"learning_rate": 5.361172697766573e-05, |
|
"loss": 0.778, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.01579191825359963, |
|
"grad_norm": 0.8056624531745911, |
|
"learning_rate": 5.298797876818735e-05, |
|
"loss": 0.617, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.015977705527171388, |
|
"grad_norm": 1.071303367614746, |
|
"learning_rate": 5.23606797749979e-05, |
|
"loss": 0.7675, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.01616349280074315, |
|
"grad_norm": 0.9511001110076904, |
|
"learning_rate": 5.17300014953786e-05, |
|
"loss": 0.7353, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.01634928007431491, |
|
"grad_norm": 0.9310784339904785, |
|
"learning_rate": 5.109611635047379e-05, |
|
"loss": 0.6954, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.01653506734788667, |
|
"grad_norm": 1.0314819812774658, |
|
"learning_rate": 5.04591976381528e-05, |
|
"loss": 0.7053, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.01672085462145843, |
|
"grad_norm": 0.9734024405479431, |
|
"learning_rate": 4.981941948563197e-05, |
|
"loss": 0.7357, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01690664189503019, |
|
"grad_norm": 0.9812660217285156, |
|
"learning_rate": 4.9176956801870065e-05, |
|
"loss": 0.705, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.01709242916860195, |
|
"grad_norm": 1.061806082725525, |
|
"learning_rate": 4.853198522974988e-05, |
|
"loss": 0.7836, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.01727821644217371, |
|
"grad_norm": 1.190076470375061, |
|
"learning_rate": 4.788468109805921e-05, |
|
"loss": 0.8644, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.017464003715745472, |
|
"grad_norm": 1.0145090818405151, |
|
"learning_rate": 4.7235221373284407e-05, |
|
"loss": 0.7877, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.01764979098931723, |
|
"grad_norm": 1.1526635885238647, |
|
"learning_rate": 4.658378361122936e-05, |
|
"loss": 0.8445, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.017835578262888993, |
|
"grad_norm": 1.0449837446212769, |
|
"learning_rate": 4.593054590847368e-05, |
|
"loss": 0.8328, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.018021365536460752, |
|
"grad_norm": 1.0370270013809204, |
|
"learning_rate": 4.5275686853682765e-05, |
|
"loss": 0.7437, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.018207152810032514, |
|
"grad_norm": 1.0356221199035645, |
|
"learning_rate": 4.4619385478783456e-05, |
|
"loss": 0.7405, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.018392940083604273, |
|
"grad_norm": 1.0345137119293213, |
|
"learning_rate": 4.396182121001852e-05, |
|
"loss": 0.6983, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.018578727357176035, |
|
"grad_norm": 1.3793245553970337, |
|
"learning_rate": 4.33031738188933e-05, |
|
"loss": 0.9343, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.018578727357176035, |
|
"eval_loss": 0.7299540638923645, |
|
"eval_runtime": 210.9599, |
|
"eval_samples_per_second": 42.975, |
|
"eval_steps_per_second": 10.746, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.018764514630747794, |
|
"grad_norm": 1.1361013650894165, |
|
"learning_rate": 4.264362337302798e-05, |
|
"loss": 0.6842, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.018950301904319553, |
|
"grad_norm": 1.120656132698059, |
|
"learning_rate": 4.1983350186928894e-05, |
|
"loss": 0.943, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.019136089177891315, |
|
"grad_norm": 1.1091196537017822, |
|
"learning_rate": 4.132253477269233e-05, |
|
"loss": 0.8101, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.019321876451463074, |
|
"grad_norm": 1.1574037075042725, |
|
"learning_rate": 4.0661357790654345e-05, |
|
"loss": 0.9537, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.019507663725034836, |
|
"grad_norm": 1.3250733613967896, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0247, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.019693450998606594, |
|
"grad_norm": 1.2618422508239746, |
|
"learning_rate": 3.933864220934566e-05, |
|
"loss": 0.9238, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.019879238272178357, |
|
"grad_norm": 1.1964272260665894, |
|
"learning_rate": 3.8677465227307676e-05, |
|
"loss": 0.8356, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.020065025545750115, |
|
"grad_norm": 0.930941104888916, |
|
"learning_rate": 3.8016649813071106e-05, |
|
"loss": 0.7564, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.020250812819321878, |
|
"grad_norm": 0.948442280292511, |
|
"learning_rate": 3.735637662697203e-05, |
|
"loss": 0.6836, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.020436600092893636, |
|
"grad_norm": 0.9409064650535583, |
|
"learning_rate": 3.669682618110671e-05, |
|
"loss": 0.696, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0206223873664654, |
|
"grad_norm": 0.9388203024864197, |
|
"learning_rate": 3.6038178789981494e-05, |
|
"loss": 0.6616, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.020808174640037157, |
|
"grad_norm": 0.8305265307426453, |
|
"learning_rate": 3.538061452121656e-05, |
|
"loss": 0.6802, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.020993961913608916, |
|
"grad_norm": 0.8969584107398987, |
|
"learning_rate": 3.472431314631724e-05, |
|
"loss": 0.667, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.02117974918718068, |
|
"grad_norm": 0.8265141844749451, |
|
"learning_rate": 3.406945409152632e-05, |
|
"loss": 0.5857, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.021365536460752437, |
|
"grad_norm": 0.9097794890403748, |
|
"learning_rate": 3.341621638877064e-05, |
|
"loss": 0.7731, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0215513237343242, |
|
"grad_norm": 0.8234865665435791, |
|
"learning_rate": 3.276477862671562e-05, |
|
"loss": 0.6853, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.021737111007895958, |
|
"grad_norm": 0.8503565788269043, |
|
"learning_rate": 3.21153189019408e-05, |
|
"loss": 0.5874, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.02192289828146772, |
|
"grad_norm": 0.7727727890014648, |
|
"learning_rate": 3.146801477025013e-05, |
|
"loss": 0.6099, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.02210868555503948, |
|
"grad_norm": 0.808000385761261, |
|
"learning_rate": 3.082304319812994e-05, |
|
"loss": 0.6345, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.02229447282861124, |
|
"grad_norm": 0.7325134873390198, |
|
"learning_rate": 3.0180580514368037e-05, |
|
"loss": 0.5591, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.022480260102183, |
|
"grad_norm": 0.7028451561927795, |
|
"learning_rate": 2.9540802361847212e-05, |
|
"loss": 0.512, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.022666047375754762, |
|
"grad_norm": 0.7961578369140625, |
|
"learning_rate": 2.890388364952623e-05, |
|
"loss": 0.5754, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.02285183464932652, |
|
"grad_norm": 0.7276977896690369, |
|
"learning_rate": 2.8269998504621416e-05, |
|
"loss": 0.5613, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.02303762192289828, |
|
"grad_norm": 0.858888566493988, |
|
"learning_rate": 2.7639320225002108e-05, |
|
"loss": 0.655, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.023223409196470042, |
|
"grad_norm": 0.9296563863754272, |
|
"learning_rate": 2.7012021231812666e-05, |
|
"loss": 0.6636, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0234091964700418, |
|
"grad_norm": 0.7812833189964294, |
|
"learning_rate": 2.638827302233428e-05, |
|
"loss": 0.6386, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.023594983743613563, |
|
"grad_norm": 0.9057222604751587, |
|
"learning_rate": 2.576824612309934e-05, |
|
"loss": 0.638, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.023780771017185322, |
|
"grad_norm": 0.8555361032485962, |
|
"learning_rate": 2.5152110043271166e-05, |
|
"loss": 0.6836, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.023966558290757084, |
|
"grad_norm": 0.8539828062057495, |
|
"learning_rate": 2.454003322830192e-05, |
|
"loss": 0.7038, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.024152345564328843, |
|
"grad_norm": 0.8139870166778564, |
|
"learning_rate": 2.393218301388123e-05, |
|
"loss": 0.4949, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.024338132837900605, |
|
"grad_norm": 0.8350996375083923, |
|
"learning_rate": 2.3328725580188395e-05, |
|
"loss": 0.6847, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.024523920111472364, |
|
"grad_norm": 0.8771671056747437, |
|
"learning_rate": 2.272982590646029e-05, |
|
"loss": 0.6576, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.024709707385044126, |
|
"grad_norm": 0.9145622253417969, |
|
"learning_rate": 2.2135647725887744e-05, |
|
"loss": 0.6714, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.024895494658615885, |
|
"grad_norm": 0.8157410621643066, |
|
"learning_rate": 2.1546353480852495e-05, |
|
"loss": 0.6085, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.025081281932187643, |
|
"grad_norm": 0.860339879989624, |
|
"learning_rate": 2.096210427851706e-05, |
|
"loss": 0.5648, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.025267069205759406, |
|
"grad_norm": 0.8176294565200806, |
|
"learning_rate": 2.038305984677969e-05, |
|
"loss": 0.6053, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.025452856479331164, |
|
"grad_norm": 0.9252009391784668, |
|
"learning_rate": 1.9809378490606264e-05, |
|
"loss": 0.6228, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.025638643752902927, |
|
"grad_norm": 0.8035367727279663, |
|
"learning_rate": 1.9241217048751406e-05, |
|
"loss": 0.6502, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.025824431026474685, |
|
"grad_norm": 0.9396884441375732, |
|
"learning_rate": 1.867873085088026e-05, |
|
"loss": 0.6928, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.026010218300046448, |
|
"grad_norm": 0.973107635974884, |
|
"learning_rate": 1.8122073675102935e-05, |
|
"loss": 0.7169, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.026196005573618206, |
|
"grad_norm": 0.9451408982276917, |
|
"learning_rate": 1.75713977059331e-05, |
|
"loss": 0.6835, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02638179284718997, |
|
"grad_norm": 0.9551781415939331, |
|
"learning_rate": 1.702685349268226e-05, |
|
"loss": 0.7596, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.026567580120761727, |
|
"grad_norm": 0.9721214175224304, |
|
"learning_rate": 1.648858990830108e-05, |
|
"loss": 0.804, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.02675336739433349, |
|
"grad_norm": 1.019667148590088, |
|
"learning_rate": 1.5956754108678996e-05, |
|
"loss": 0.8623, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.02693915466790525, |
|
"grad_norm": 1.0824493169784546, |
|
"learning_rate": 1.5431491492413288e-05, |
|
"loss": 0.8455, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.027124941941477007, |
|
"grad_norm": 0.9800060987472534, |
|
"learning_rate": 1.491294566105852e-05, |
|
"loss": 0.8264, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.02731072921504877, |
|
"grad_norm": 0.9747028350830078, |
|
"learning_rate": 1.4401258379867335e-05, |
|
"loss": 0.7024, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.027496516488620528, |
|
"grad_norm": 0.8642198443412781, |
|
"learning_rate": 1.3896569539033253e-05, |
|
"loss": 0.7009, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.02768230376219229, |
|
"grad_norm": 0.8279868960380554, |
|
"learning_rate": 1.3399017115446067e-05, |
|
"loss": 0.6983, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.02786809103576405, |
|
"grad_norm": 1.2232890129089355, |
|
"learning_rate": 1.2908737134970367e-05, |
|
"loss": 0.8388, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02786809103576405, |
|
"eval_loss": 0.7057402729988098, |
|
"eval_runtime": 210.9537, |
|
"eval_samples_per_second": 42.976, |
|
"eval_steps_per_second": 10.746, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02805387830933581, |
|
"grad_norm": 0.8277557492256165, |
|
"learning_rate": 1.242586363525737e-05, |
|
"loss": 0.6744, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.02823966558290757, |
|
"grad_norm": 1.129407286643982, |
|
"learning_rate": 1.1950528629100457e-05, |
|
"loss": 0.8988, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.028425452856479332, |
|
"grad_norm": 1.1117703914642334, |
|
"learning_rate": 1.1482862068344121e-05, |
|
"loss": 0.8987, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.02861124013005109, |
|
"grad_norm": 1.274944543838501, |
|
"learning_rate": 1.1022991808356442e-05, |
|
"loss": 0.9714, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.028797027403622853, |
|
"grad_norm": 1.413684368133545, |
|
"learning_rate": 1.0571043573074737e-05, |
|
"loss": 1.0464, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.028982814677194612, |
|
"grad_norm": 1.2533186674118042, |
|
"learning_rate": 1.0127140920633857e-05, |
|
"loss": 0.8553, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.029168601950766374, |
|
"grad_norm": 0.9504323601722717, |
|
"learning_rate": 9.69140520958662e-06, |
|
"loss": 0.6454, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.029354389224338133, |
|
"grad_norm": 0.9204007387161255, |
|
"learning_rate": 9.263955565725648e-06, |
|
"loss": 0.733, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.02954017649790989, |
|
"grad_norm": 0.8115749359130859, |
|
"learning_rate": 8.844908849515509e-06, |
|
"loss": 0.6411, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.029725963771481654, |
|
"grad_norm": 0.7680659294128418, |
|
"learning_rate": 8.434379624144261e-06, |
|
"loss": 0.6213, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.029911751045053413, |
|
"grad_norm": 0.7348408699035645, |
|
"learning_rate": 8.032480124203013e-06, |
|
"loss": 0.608, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.030097538318625175, |
|
"grad_norm": 0.690196692943573, |
|
"learning_rate": 7.639320225002106e-06, |
|
"loss": 0.5074, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.030283325592196934, |
|
"grad_norm": 0.7912430167198181, |
|
"learning_rate": 7.255007412532307e-06, |
|
"loss": 0.6236, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.030469112865768696, |
|
"grad_norm": 0.8454386591911316, |
|
"learning_rate": 6.8796467540791986e-06, |
|
"loss": 0.773, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.030654900139340455, |
|
"grad_norm": 0.7565322518348694, |
|
"learning_rate": 6.513340869498859e-06, |
|
"loss": 0.5278, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.030840687412912217, |
|
"grad_norm": 0.7427991032600403, |
|
"learning_rate": 6.1561899031625794e-06, |
|
"loss": 0.5895, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.031026474686483976, |
|
"grad_norm": 0.72712242603302, |
|
"learning_rate": 5.808291496578435e-06, |
|
"loss": 0.554, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.031212261960055738, |
|
"grad_norm": 0.8168418407440186, |
|
"learning_rate": 5.469740761697044e-06, |
|
"loss": 0.5795, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.0313980492336275, |
|
"grad_norm": 0.7900062203407288, |
|
"learning_rate": 5.140630254908905e-06, |
|
"loss": 0.6155, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.03158383650719926, |
|
"grad_norm": 0.7631322741508484, |
|
"learning_rate": 4.821049951740442e-06, |
|
"loss": 0.6395, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.031769623780771014, |
|
"grad_norm": 0.8723105788230896, |
|
"learning_rate": 4.511087222255528e-06, |
|
"loss": 0.7083, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.031955411054342776, |
|
"grad_norm": 0.8694934248924255, |
|
"learning_rate": 4.2108268071694616e-06, |
|
"loss": 0.6848, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.03214119832791454, |
|
"grad_norm": 0.8055874109268188, |
|
"learning_rate": 3.9203507946816445e-06, |
|
"loss": 0.6301, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.0323269856014863, |
|
"grad_norm": 0.782102644443512, |
|
"learning_rate": 3.6397385980335e-06, |
|
"loss": 0.5799, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.032512772875058056, |
|
"grad_norm": 0.9123784303665161, |
|
"learning_rate": 3.3690669337977e-06, |
|
"loss": 0.6572, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03269856014862982, |
|
"grad_norm": 0.8065102100372314, |
|
"learning_rate": 3.1084098009046106e-06, |
|
"loss": 0.6309, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.03288434742220158, |
|
"grad_norm": 0.7722126841545105, |
|
"learning_rate": 2.8578384604117217e-06, |
|
"loss": 0.5781, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.03307013469577334, |
|
"grad_norm": 0.8441624641418457, |
|
"learning_rate": 2.6174214160215704e-06, |
|
"loss": 0.625, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0332559219693451, |
|
"grad_norm": 0.7183513045310974, |
|
"learning_rate": 2.3872243953535535e-06, |
|
"loss": 0.5938, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.03344170924291686, |
|
"grad_norm": 0.8442609310150146, |
|
"learning_rate": 2.1673103319746146e-06, |
|
"loss": 0.6409, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03362749651648862, |
|
"grad_norm": 0.7444936633110046, |
|
"learning_rate": 1.957739348193859e-06, |
|
"loss": 0.6137, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.03381328379006038, |
|
"grad_norm": 0.851841390132904, |
|
"learning_rate": 1.7585687386256944e-06, |
|
"loss": 0.5644, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.03399907106363214, |
|
"grad_norm": 0.7763927578926086, |
|
"learning_rate": 1.5698529545260744e-06, |
|
"loss": 0.556, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.0341848583372039, |
|
"grad_norm": 0.7218007445335388, |
|
"learning_rate": 1.3916435889060575e-06, |
|
"loss": 0.5211, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.034370645610775664, |
|
"grad_norm": 0.8408937454223633, |
|
"learning_rate": 1.2239893624267852e-06, |
|
"loss": 0.6682, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.03455643288434742, |
|
"grad_norm": 0.8628000020980835, |
|
"learning_rate": 1.0669361100797704e-06, |
|
"loss": 0.6878, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.03474222015791918, |
|
"grad_norm": 0.8714439272880554, |
|
"learning_rate": 9.205267686560293e-07, |
|
"loss": 0.6547, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.034928007431490944, |
|
"grad_norm": 0.7619119882583618, |
|
"learning_rate": 7.848013650076258e-07, |
|
"loss": 0.5769, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.035113794705062706, |
|
"grad_norm": 0.9454699158668518, |
|
"learning_rate": 6.597970051047053e-07, |
|
"loss": 0.6948, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.03529958197863446, |
|
"grad_norm": 0.7809498906135559, |
|
"learning_rate": 5.455478638911071e-07, |
|
"loss": 0.657, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.035485369252206224, |
|
"grad_norm": 0.9400784373283386, |
|
"learning_rate": 4.420851759412603e-07, |
|
"loss": 0.723, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.035671156525777986, |
|
"grad_norm": 1.0285460948944092, |
|
"learning_rate": 3.4943722692099224e-07, |
|
"loss": 0.8145, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03585694379934974, |
|
"grad_norm": 1.0024358034133911, |
|
"learning_rate": 2.676293458544743e-07, |
|
"loss": 0.8062, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.036042731072921504, |
|
"grad_norm": 0.9210175275802612, |
|
"learning_rate": 1.9668389819954338e-07, |
|
"loss": 0.7595, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.036228518346493266, |
|
"grad_norm": 1.0221508741378784, |
|
"learning_rate": 1.3662027973320614e-07, |
|
"loss": 0.7836, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.03641430562006503, |
|
"grad_norm": 0.9825165271759033, |
|
"learning_rate": 8.745491124901861e-08, |
|
"loss": 0.8122, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03660009289363678, |
|
"grad_norm": 1.2274169921875, |
|
"learning_rate": 4.920123406781052e-08, |
|
"loss": 0.9055, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.036785880167208546, |
|
"grad_norm": 1.0650230646133423, |
|
"learning_rate": 2.1869706362958044e-08, |
|
"loss": 0.7219, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03697166744078031, |
|
"grad_norm": 1.0528210401535034, |
|
"learning_rate": 5.467800301239834e-09, |
|
"loss": 0.6992, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03715745471435207, |
|
"grad_norm": 1.143416166305542, |
|
"learning_rate": 0.0, |
|
"loss": 0.808, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03715745471435207, |
|
"eval_loss": 0.696293830871582, |
|
"eval_runtime": 210.9668, |
|
"eval_samples_per_second": 42.974, |
|
"eval_steps_per_second": 10.746, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.895574321423974e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|