|
{ |
|
"best_metric": 11.93097972869873, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.004610632117663331, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.610632117663332e-05, |
|
"grad_norm": 0.007248171139508486, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 11.9313, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 4.610632117663332e-05, |
|
"eval_loss": 11.930997848510742, |
|
"eval_runtime": 189.7787, |
|
"eval_samples_per_second": 48.119, |
|
"eval_steps_per_second": 6.018, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 9.221264235326664e-05, |
|
"grad_norm": 0.011779400520026684, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 11.9347, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00013831896352989995, |
|
"grad_norm": 0.01022297516465187, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 11.9256, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00018442528470653328, |
|
"grad_norm": 0.009763536043465137, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 11.9332, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00023053160588316657, |
|
"grad_norm": 0.01042894832789898, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 11.9328, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0002766379270597999, |
|
"grad_norm": 0.019424298778176308, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 11.9306, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0003227442482364332, |
|
"grad_norm": 0.013701501302421093, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 11.9262, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00036885056941306655, |
|
"grad_norm": 0.013128082267940044, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 11.9268, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0004149568905896998, |
|
"grad_norm": 0.01037242729216814, |
|
"learning_rate": 1.5e-06, |
|
"loss": 11.9255, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00046106321176633315, |
|
"grad_norm": 0.013505612500011921, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 11.9283, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0005071695329429665, |
|
"grad_norm": 0.011518862098455429, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 11.9287, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0005532758541195998, |
|
"grad_norm": 0.012861590832471848, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 11.9328, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0005993821752962331, |
|
"grad_norm": 0.012600081041455269, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 11.9359, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0006454884964728665, |
|
"grad_norm": 0.014691936783492565, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 11.9251, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0006915948176494998, |
|
"grad_norm": 0.01612277142703533, |
|
"learning_rate": 2.5e-06, |
|
"loss": 11.9303, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0007377011388261331, |
|
"grad_norm": 0.019262641668319702, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 11.9292, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0007838074600027664, |
|
"grad_norm": 0.01421594899147749, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 11.9336, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0008299137811793996, |
|
"grad_norm": 0.021003536880016327, |
|
"learning_rate": 3e-06, |
|
"loss": 11.9341, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.000876020102356033, |
|
"grad_norm": 0.01781291700899601, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 11.9307, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0009221264235326663, |
|
"grad_norm": 0.015007808804512024, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 11.9386, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0009682327447092996, |
|
"grad_norm": 0.01923958584666252, |
|
"learning_rate": 3.5e-06, |
|
"loss": 11.9358, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.001014339065885933, |
|
"grad_norm": 0.01430001575499773, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 11.9321, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0010604453870625664, |
|
"grad_norm": 0.023019419983029366, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 11.9269, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0011065517082391996, |
|
"grad_norm": 0.014341956935822964, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 11.9333, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0011526580294158328, |
|
"grad_norm": 0.017692534253001213, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 11.9276, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0011987643505924662, |
|
"grad_norm": 0.018241120502352715, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 11.9286, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0012448706717690995, |
|
"grad_norm": 0.01712462306022644, |
|
"learning_rate": 4.5e-06, |
|
"loss": 11.9349, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.001290976992945733, |
|
"grad_norm": 0.024579280987381935, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 11.9306, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0013370833141223661, |
|
"grad_norm": 0.014921668916940689, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 11.9297, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0013831896352989996, |
|
"grad_norm": 0.019532358273863792, |
|
"learning_rate": 5e-06, |
|
"loss": 11.9351, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0014292959564756328, |
|
"grad_norm": 0.023204781115055084, |
|
"learning_rate": 4.997482666353287e-06, |
|
"loss": 11.9232, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0014754022776522662, |
|
"grad_norm": 0.02387520670890808, |
|
"learning_rate": 4.989935734988098e-06, |
|
"loss": 11.9375, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0015215085988288994, |
|
"grad_norm": 0.0345429852604866, |
|
"learning_rate": 4.977374404419838e-06, |
|
"loss": 11.9248, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0015676149200055329, |
|
"grad_norm": 0.025636054575443268, |
|
"learning_rate": 4.959823971496575e-06, |
|
"loss": 11.9312, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.001613721241182166, |
|
"grad_norm": 0.02809992991387844, |
|
"learning_rate": 4.937319780454559e-06, |
|
"loss": 11.9266, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0016598275623587993, |
|
"grad_norm": 0.031008386984467506, |
|
"learning_rate": 4.909907151739634e-06, |
|
"loss": 11.9359, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0017059338835354327, |
|
"grad_norm": 0.029894821345806122, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 11.9259, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.001752040204712066, |
|
"grad_norm": 0.02281125821173191, |
|
"learning_rate": 4.8405871765993435e-06, |
|
"loss": 11.93, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0017981465258886994, |
|
"grad_norm": 0.03086579591035843, |
|
"learning_rate": 4.7988194313786275e-06, |
|
"loss": 11.926, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0018442528470653326, |
|
"grad_norm": 0.024276772513985634, |
|
"learning_rate": 4.752422169756048e-06, |
|
"loss": 11.9231, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.001890359168241966, |
|
"grad_norm": 0.02484738826751709, |
|
"learning_rate": 4.701488829641845e-06, |
|
"loss": 11.9357, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0019364654894185992, |
|
"grad_norm": 0.04550924152135849, |
|
"learning_rate": 4.646121984004666e-06, |
|
"loss": 11.9309, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0019825718105952327, |
|
"grad_norm": 0.03542514890432358, |
|
"learning_rate": 4.586433134303257e-06, |
|
"loss": 11.9254, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.002028678131771866, |
|
"grad_norm": 0.037729743868112564, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 11.9239, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.002074784452948499, |
|
"grad_norm": 0.04585666581988335, |
|
"learning_rate": 4.454578706170075e-06, |
|
"loss": 11.9279, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0021208907741251328, |
|
"grad_norm": 0.05111996829509735, |
|
"learning_rate": 4.382678665009028e-06, |
|
"loss": 11.9454, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.002166997095301766, |
|
"grad_norm": 0.03707089275121689, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"loss": 11.9256, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.002213103416478399, |
|
"grad_norm": 0.06370975077152252, |
|
"learning_rate": 4.227656622467162e-06, |
|
"loss": 11.9364, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0022592097376550324, |
|
"grad_norm": 0.0724567249417305, |
|
"learning_rate": 4.144846814849282e-06, |
|
"loss": 11.9393, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0023053160588316656, |
|
"grad_norm": 0.07536933571100235, |
|
"learning_rate": 4.058724504646834e-06, |
|
"loss": 11.9193, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0023053160588316656, |
|
"eval_loss": 11.930991172790527, |
|
"eval_runtime": 190.8971, |
|
"eval_samples_per_second": 47.837, |
|
"eval_steps_per_second": 5.982, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0023514223800082993, |
|
"grad_norm": 0.010148766450583935, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 11.9311, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0023975287011849325, |
|
"grad_norm": 0.011732972227036953, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"loss": 11.93, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0024436350223615657, |
|
"grad_norm": 0.008542712777853012, |
|
"learning_rate": 3.782248193514766e-06, |
|
"loss": 11.9327, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.002489741343538199, |
|
"grad_norm": 0.009703322313725948, |
|
"learning_rate": 3.684671656182497e-06, |
|
"loss": 11.9283, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0025358476647148326, |
|
"grad_norm": 0.013916654512286186, |
|
"learning_rate": 3.5847093477938955e-06, |
|
"loss": 11.9337, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.002581953985891466, |
|
"grad_norm": 0.012623277492821217, |
|
"learning_rate": 3.4825625791348093e-06, |
|
"loss": 11.9281, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.002628060307068099, |
|
"grad_norm": 0.013886111788451672, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"loss": 11.9325, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0026741666282447322, |
|
"grad_norm": 0.010405468754470348, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 11.9322, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.002720272949421366, |
|
"grad_norm": 0.010533565655350685, |
|
"learning_rate": 3.165092113916688e-06, |
|
"loss": 11.9338, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.002766379270597999, |
|
"grad_norm": 0.015023970045149326, |
|
"learning_rate": 3.056302334890786e-06, |
|
"loss": 11.9285, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0028124855917746323, |
|
"grad_norm": 0.01545638870447874, |
|
"learning_rate": 2.946392236996592e-06, |
|
"loss": 11.93, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0028585919129512655, |
|
"grad_norm": 0.015377589501440525, |
|
"learning_rate": 2.835583164544139e-06, |
|
"loss": 11.9311, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0029046982341278988, |
|
"grad_norm": 0.01387741044163704, |
|
"learning_rate": 2.724098272258584e-06, |
|
"loss": 11.9288, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0029508045553045324, |
|
"grad_norm": 0.014318383298814297, |
|
"learning_rate": 2.6121620758762877e-06, |
|
"loss": 11.9313, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0029969108764811656, |
|
"grad_norm": 0.019190404564142227, |
|
"learning_rate": 2.5e-06, |
|
"loss": 11.9256, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.003043017197657799, |
|
"grad_norm": 0.01856730319559574, |
|
"learning_rate": 2.3878379241237136e-06, |
|
"loss": 11.9317, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.003089123518834432, |
|
"grad_norm": 0.01225450448691845, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"loss": 11.9345, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0031352298400110657, |
|
"grad_norm": 0.01742597296833992, |
|
"learning_rate": 2.1644168354558623e-06, |
|
"loss": 11.9302, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.003181336161187699, |
|
"grad_norm": 0.01626632548868656, |
|
"learning_rate": 2.053607763003409e-06, |
|
"loss": 11.9344, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.003227442482364332, |
|
"grad_norm": 0.01698143593966961, |
|
"learning_rate": 1.9436976651092143e-06, |
|
"loss": 11.9246, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0032735488035409654, |
|
"grad_norm": 0.013797706924378872, |
|
"learning_rate": 1.8349078860833125e-06, |
|
"loss": 11.9347, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0033196551247175986, |
|
"grad_norm": 0.015473989769816399, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 11.9309, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0033657614458942322, |
|
"grad_norm": 0.017822301015257835, |
|
"learning_rate": 1.6215629397966432e-06, |
|
"loss": 11.9317, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0034118677670708654, |
|
"grad_norm": 0.017122985795140266, |
|
"learning_rate": 1.5174374208651913e-06, |
|
"loss": 11.9321, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0034579740882474987, |
|
"grad_norm": 0.02036653459072113, |
|
"learning_rate": 1.415290652206105e-06, |
|
"loss": 11.9375, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.003504080409424132, |
|
"grad_norm": 0.017380934208631516, |
|
"learning_rate": 1.3153283438175036e-06, |
|
"loss": 11.939, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0035501867306007655, |
|
"grad_norm": 0.0208874624222517, |
|
"learning_rate": 1.217751806485235e-06, |
|
"loss": 11.926, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0035962930517773987, |
|
"grad_norm": 0.02793768234550953, |
|
"learning_rate": 1.122757546369744e-06, |
|
"loss": 11.9347, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.003642399372954032, |
|
"grad_norm": 0.017211785539984703, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 11.9357, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.003688505694130665, |
|
"grad_norm": 0.022742517292499542, |
|
"learning_rate": 9.412754953531664e-07, |
|
"loss": 11.9328, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.003734612015307299, |
|
"grad_norm": 0.02240605652332306, |
|
"learning_rate": 8.551531851507186e-07, |
|
"loss": 11.9287, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.003780718336483932, |
|
"grad_norm": 0.01831667870283127, |
|
"learning_rate": 7.723433775328385e-07, |
|
"loss": 11.936, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0038268246576605653, |
|
"grad_norm": 0.021101830527186394, |
|
"learning_rate": 6.930128404315214e-07, |
|
"loss": 11.9253, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0038729309788371985, |
|
"grad_norm": 0.02482517622411251, |
|
"learning_rate": 6.17321334990973e-07, |
|
"loss": 11.9261, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.003919037300013832, |
|
"grad_norm": 0.02878445014357567, |
|
"learning_rate": 5.454212938299256e-07, |
|
"loss": 11.9309, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.003965143621190465, |
|
"grad_norm": 0.034189388155937195, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 11.9306, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.004011249942367098, |
|
"grad_norm": 0.02674616314470768, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"loss": 11.9353, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.004057356263543732, |
|
"grad_norm": 0.02872542478144169, |
|
"learning_rate": 3.538780159953348e-07, |
|
"loss": 11.9199, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0041034625847203654, |
|
"grad_norm": 0.03754565492272377, |
|
"learning_rate": 2.98511170358155e-07, |
|
"loss": 11.9285, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.004149568905896998, |
|
"grad_norm": 0.043331414461135864, |
|
"learning_rate": 2.4757783024395244e-07, |
|
"loss": 11.9281, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.004195675227073632, |
|
"grad_norm": 0.033205412328243256, |
|
"learning_rate": 2.0118056862137358e-07, |
|
"loss": 11.93, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0042417815482502655, |
|
"grad_norm": 0.03685666620731354, |
|
"learning_rate": 1.59412823400657e-07, |
|
"loss": 11.9276, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.004287887869426898, |
|
"grad_norm": 0.04748856648802757, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 11.935, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.004333994190603532, |
|
"grad_norm": 0.03603879362344742, |
|
"learning_rate": 9.00928482603669e-08, |
|
"loss": 11.9439, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.004380100511780165, |
|
"grad_norm": 0.042884595692157745, |
|
"learning_rate": 6.268021954544095e-08, |
|
"loss": 11.9186, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.004426206832956798, |
|
"grad_norm": 0.05015327408909798, |
|
"learning_rate": 4.017602850342584e-08, |
|
"loss": 11.933, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.004472313154133432, |
|
"grad_norm": 0.04760831966996193, |
|
"learning_rate": 2.262559558016325e-08, |
|
"loss": 11.9286, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.004518419475310065, |
|
"grad_norm": 0.06750523298978806, |
|
"learning_rate": 1.006426501190233e-08, |
|
"loss": 11.9305, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0045645257964866985, |
|
"grad_norm": 0.07084526866674423, |
|
"learning_rate": 2.5173336467135266e-09, |
|
"loss": 11.912, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.004610632117663331, |
|
"grad_norm": 0.10393932461738586, |
|
"learning_rate": 0.0, |
|
"loss": 11.9212, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004610632117663331, |
|
"eval_loss": 11.93097972869873, |
|
"eval_runtime": 190.9735, |
|
"eval_samples_per_second": 47.818, |
|
"eval_steps_per_second": 5.98, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 23868604416.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|