|
{ |
|
"best_metric": 0.8282029628753662, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 2.150537634408602, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010752688172043012, |
|
"grad_norm": 6.99516487121582, |
|
"learning_rate": 5e-06, |
|
"loss": 3.1175, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010752688172043012, |
|
"eval_loss": 4.535027027130127, |
|
"eval_runtime": 14.1563, |
|
"eval_samples_per_second": 11.09, |
|
"eval_steps_per_second": 5.581, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.021505376344086023, |
|
"grad_norm": 10.445697784423828, |
|
"learning_rate": 1e-05, |
|
"loss": 3.5728, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03225806451612903, |
|
"grad_norm": 12.358306884765625, |
|
"learning_rate": 1.5e-05, |
|
"loss": 3.9348, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.043010752688172046, |
|
"grad_norm": 10.28347396850586, |
|
"learning_rate": 2e-05, |
|
"loss": 3.2841, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.053763440860215055, |
|
"grad_norm": 10.178171157836914, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.2135, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 7.281465530395508, |
|
"learning_rate": 3e-05, |
|
"loss": 2.7599, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07526881720430108, |
|
"grad_norm": 9.009861946105957, |
|
"learning_rate": 3.5e-05, |
|
"loss": 2.4959, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.08602150537634409, |
|
"grad_norm": 13.042397499084473, |
|
"learning_rate": 4e-05, |
|
"loss": 2.2706, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0967741935483871, |
|
"grad_norm": 7.2071638107299805, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.94, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.10752688172043011, |
|
"grad_norm": 8.721714973449707, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6717, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11827956989247312, |
|
"grad_norm": 9.722580909729004, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.821, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 8.217576026916504, |
|
"learning_rate": 6e-05, |
|
"loss": 1.58, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.13978494623655913, |
|
"grad_norm": 6.87364387512207, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 1.3209, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.15053763440860216, |
|
"grad_norm": 8.912040710449219, |
|
"learning_rate": 7e-05, |
|
"loss": 1.7184, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 7.484908580780029, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.0966, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.17204301075268819, |
|
"grad_norm": 9.977514266967773, |
|
"learning_rate": 8e-05, |
|
"loss": 1.86, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1827956989247312, |
|
"grad_norm": 6.212277412414551, |
|
"learning_rate": 8.5e-05, |
|
"loss": 1.2687, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 6.920554161071777, |
|
"learning_rate": 9e-05, |
|
"loss": 1.1078, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.20430107526881722, |
|
"grad_norm": 6.519954681396484, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.0365, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.21505376344086022, |
|
"grad_norm": 9.026784896850586, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2932, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.22580645161290322, |
|
"grad_norm": 8.984053611755371, |
|
"learning_rate": 9.999238475781957e-05, |
|
"loss": 1.0453, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.23655913978494625, |
|
"grad_norm": 7.758922100067139, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 1.2444, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.24731182795698925, |
|
"grad_norm": 7.274909496307373, |
|
"learning_rate": 9.99314767377287e-05, |
|
"loss": 0.9704, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 8.09979248046875, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 2.5905, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.26881720430107525, |
|
"grad_norm": 5.225498676300049, |
|
"learning_rate": 9.980973490458728e-05, |
|
"loss": 2.3784, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.27956989247311825, |
|
"grad_norm": 4.132240295410156, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 1.9521, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2903225806451613, |
|
"grad_norm": 4.1804656982421875, |
|
"learning_rate": 9.962730758206611e-05, |
|
"loss": 2.1076, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3010752688172043, |
|
"grad_norm": 3.6776556968688965, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 1.9513, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3118279569892473, |
|
"grad_norm": 7.695979595184326, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 1.6991, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 6.833826065063477, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 1.3824, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 3.845757007598877, |
|
"learning_rate": 9.908135917238321e-05, |
|
"loss": 1.4698, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.34408602150537637, |
|
"grad_norm": 3.1820149421691895, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 1.1889, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.3548387096774194, |
|
"grad_norm": 4.390655517578125, |
|
"learning_rate": 9.871850323926177e-05, |
|
"loss": 1.1593, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3655913978494624, |
|
"grad_norm": 4.404603958129883, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 0.9826, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3763440860215054, |
|
"grad_norm": 3.859941005706787, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 1.0486, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": 7.225592613220215, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 1.0135, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3978494623655914, |
|
"grad_norm": 4.242331504821777, |
|
"learning_rate": 9.781523779815179e-05, |
|
"loss": 0.8687, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.40860215053763443, |
|
"grad_norm": 4.794056415557861, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.9721, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.41935483870967744, |
|
"grad_norm": 3.9935474395751953, |
|
"learning_rate": 9.727592877996585e-05, |
|
"loss": 0.8433, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.43010752688172044, |
|
"grad_norm": 3.3857905864715576, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 0.7633, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.44086021505376344, |
|
"grad_norm": 3.6143317222595215, |
|
"learning_rate": 9.667902132486009e-05, |
|
"loss": 1.0136, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.45161290322580644, |
|
"grad_norm": 3.9374451637268066, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 0.915, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.46236559139784944, |
|
"grad_norm": 6.313716411590576, |
|
"learning_rate": 9.602524267262203e-05, |
|
"loss": 1.191, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.4731182795698925, |
|
"grad_norm": 3.7929749488830566, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 1.0602, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 5.400207996368408, |
|
"learning_rate": 9.53153893518325e-05, |
|
"loss": 0.8725, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4946236559139785, |
|
"grad_norm": 3.7726528644561768, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 0.5123, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.5053763440860215, |
|
"grad_norm": 3.6227760314941406, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 2.2086, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"grad_norm": 3.246999979019165, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 2.3014, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.5268817204301075, |
|
"grad_norm": 4.355656623840332, |
|
"learning_rate": 9.373098535696979e-05, |
|
"loss": 2.0727, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.5376344086021505, |
|
"grad_norm": 2.746245861053467, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 1.5312, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5376344086021505, |
|
"eval_loss": 1.3152074813842773, |
|
"eval_runtime": 14.4699, |
|
"eval_samples_per_second": 10.85, |
|
"eval_steps_per_second": 5.46, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5483870967741935, |
|
"grad_norm": 3.6186563968658447, |
|
"learning_rate": 9.285836503510562e-05, |
|
"loss": 1.7819, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.5591397849462365, |
|
"grad_norm": 3.9859840869903564, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 1.2978, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.5698924731182796, |
|
"grad_norm": 3.9932148456573486, |
|
"learning_rate": 9.193352839727121e-05, |
|
"loss": 1.447, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5806451612903226, |
|
"grad_norm": 3.5936036109924316, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 1.3967, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5913978494623656, |
|
"grad_norm": 3.375316858291626, |
|
"learning_rate": 9.09576022144496e-05, |
|
"loss": 1.0998, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.6021505376344086, |
|
"grad_norm": 3.807166814804077, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.1404, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.6129032258064516, |
|
"grad_norm": 3.084326982498169, |
|
"learning_rate": 8.993177550236464e-05, |
|
"loss": 0.988, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.6236559139784946, |
|
"grad_norm": 3.7563459873199463, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 1.0191, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.6344086021505376, |
|
"grad_norm": 4.25908899307251, |
|
"learning_rate": 8.885729807284856e-05, |
|
"loss": 0.8702, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 3.750455617904663, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.8746, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6559139784946236, |
|
"grad_norm": 3.6707611083984375, |
|
"learning_rate": 8.773547901113862e-05, |
|
"loss": 0.9337, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 3.612056016921997, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 1.1718, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.6774193548387096, |
|
"grad_norm": 4.298011779785156, |
|
"learning_rate": 8.656768508095853e-05, |
|
"loss": 0.628, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.6881720430107527, |
|
"grad_norm": 4.248507022857666, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 0.6906, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.6989247311827957, |
|
"grad_norm": 3.890913963317871, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.0229, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.7096774193548387, |
|
"grad_norm": 4.77676248550415, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 0.8174, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.7204301075268817, |
|
"grad_norm": 4.285477161407471, |
|
"learning_rate": 8.409991800312493e-05, |
|
"loss": 0.8005, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.7311827956989247, |
|
"grad_norm": 3.8125786781311035, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 0.5634, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.7419354838709677, |
|
"grad_norm": 4.32075309753418, |
|
"learning_rate": 8.280295144952536e-05, |
|
"loss": 0.6793, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.7526881720430108, |
|
"grad_norm": 3.8095755577087402, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 2.0449, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7634408602150538, |
|
"grad_norm": 3.9818124771118164, |
|
"learning_rate": 8.146601955249188e-05, |
|
"loss": 1.9486, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.7741935483870968, |
|
"grad_norm": 3.4325692653656006, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 1.8445, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.7849462365591398, |
|
"grad_norm": 3.9117205142974854, |
|
"learning_rate": 8.009075115760243e-05, |
|
"loss": 1.9626, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.7956989247311828, |
|
"grad_norm": 3.40067195892334, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.3113, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 3.8196802139282227, |
|
"learning_rate": 7.86788218175523e-05, |
|
"loss": 1.7274, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.8172043010752689, |
|
"grad_norm": 4.589756488800049, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 1.7595, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.8279569892473119, |
|
"grad_norm": 4.156907081604004, |
|
"learning_rate": 7.723195175075136e-05, |
|
"loss": 1.5407, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.8387096774193549, |
|
"grad_norm": 3.101147174835205, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 0.9117, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.8494623655913979, |
|
"grad_norm": 2.96360182762146, |
|
"learning_rate": 7.575190374550272e-05, |
|
"loss": 1.1026, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.8602150537634409, |
|
"grad_norm": 2.9797332286834717, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.8541, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8709677419354839, |
|
"grad_norm": 3.0931079387664795, |
|
"learning_rate": 7.424048101231686e-05, |
|
"loss": 0.8875, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.8817204301075269, |
|
"grad_norm": 2.8962762355804443, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 0.9001, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.8924731182795699, |
|
"grad_norm": 3.5031051635742188, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 0.8276, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.9032258064516129, |
|
"grad_norm": 3.142409086227417, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.823, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.9139784946236559, |
|
"grad_norm": 2.962707757949829, |
|
"learning_rate": 7.113091308703498e-05, |
|
"loss": 0.8578, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.9247311827956989, |
|
"grad_norm": 3.3335912227630615, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 0.8609, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.9354838709677419, |
|
"grad_norm": 4.589385509490967, |
|
"learning_rate": 6.953655642446368e-05, |
|
"loss": 1.0085, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.946236559139785, |
|
"grad_norm": 3.5878801345825195, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 0.5649, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.956989247311828, |
|
"grad_norm": 3.286903142929077, |
|
"learning_rate": 6.7918397477265e-05, |
|
"loss": 0.4883, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 3.4730312824249268, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 0.7835, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.978494623655914, |
|
"grad_norm": 3.7954018115997314, |
|
"learning_rate": 6.627840772285784e-05, |
|
"loss": 0.7178, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.989247311827957, |
|
"grad_norm": 3.0298449993133545, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.4597, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 11.850226402282715, |
|
"learning_rate": 6.461858523613684e-05, |
|
"loss": 1.5025, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.010752688172043, |
|
"grad_norm": 6.41492223739624, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 1.8111, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.021505376344086, |
|
"grad_norm": 3.9378912448883057, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 1.5149, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.032258064516129, |
|
"grad_norm": 3.533281087875366, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 1.4312, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.043010752688172, |
|
"grad_norm": 3.780578136444092, |
|
"learning_rate": 6.124755271719325e-05, |
|
"loss": 1.2979, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.053763440860215, |
|
"grad_norm": 2.7388339042663574, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 0.988, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.064516129032258, |
|
"grad_norm": 2.726057767868042, |
|
"learning_rate": 5.9540449768827246e-05, |
|
"loss": 1.0731, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.075268817204301, |
|
"grad_norm": 4.266610622406006, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 1.0392, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.075268817204301, |
|
"eval_loss": 1.115802526473999, |
|
"eval_runtime": 14.4453, |
|
"eval_samples_per_second": 10.869, |
|
"eval_steps_per_second": 5.469, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.086021505376344, |
|
"grad_norm": 3.880851984024048, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 0.7542, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.096774193548387, |
|
"grad_norm": 2.7799625396728516, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 0.6733, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.10752688172043, |
|
"grad_norm": 2.877377986907959, |
|
"learning_rate": 5.6093467170257374e-05, |
|
"loss": 0.7712, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.118279569892473, |
|
"grad_norm": 2.6312854290008545, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 0.5234, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.129032258064516, |
|
"grad_norm": 2.5732200145721436, |
|
"learning_rate": 5.435778713738292e-05, |
|
"loss": 0.4727, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.139784946236559, |
|
"grad_norm": 2.3443665504455566, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 0.5915, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.1505376344086022, |
|
"grad_norm": 2.900613784790039, |
|
"learning_rate": 5.26167978121472e-05, |
|
"loss": 0.4433, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.1612903225806452, |
|
"grad_norm": 2.779599666595459, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 0.4696, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.1720430107526882, |
|
"grad_norm": 3.2906293869018555, |
|
"learning_rate": 5.0872620321864185e-05, |
|
"loss": 0.6608, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.1827956989247312, |
|
"grad_norm": 3.181580066680908, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5271, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1935483870967742, |
|
"grad_norm": 3.3558270931243896, |
|
"learning_rate": 4.912737967813583e-05, |
|
"loss": 0.4357, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.2043010752688172, |
|
"grad_norm": 2.8602147102355957, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 0.4805, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.2150537634408602, |
|
"grad_norm": 3.4753870964050293, |
|
"learning_rate": 4.738320218785281e-05, |
|
"loss": 0.5753, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.2258064516129032, |
|
"grad_norm": 2.4663901329040527, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 0.319, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.2365591397849462, |
|
"grad_norm": 4.225439548492432, |
|
"learning_rate": 4.564221286261709e-05, |
|
"loss": 0.5583, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.2473118279569892, |
|
"grad_norm": 2.3677308559417725, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 0.3289, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.2580645161290323, |
|
"grad_norm": 3.1596832275390625, |
|
"learning_rate": 4.390653282974264e-05, |
|
"loss": 1.6511, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.2688172043010753, |
|
"grad_norm": 3.4493701457977295, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 1.6611, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.2795698924731183, |
|
"grad_norm": 3.0767276287078857, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 1.4595, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": 3.6605751514434814, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.9704, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3010752688172043, |
|
"grad_norm": 4.2374420166015625, |
|
"learning_rate": 4.045955023117276e-05, |
|
"loss": 1.2689, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.3118279569892473, |
|
"grad_norm": 3.0131664276123047, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 0.9044, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.3225806451612903, |
|
"grad_norm": 2.836139678955078, |
|
"learning_rate": 3.875244728280676e-05, |
|
"loss": 0.9114, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 3.379606246948242, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 1.2156, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.3440860215053765, |
|
"grad_norm": 3.377495050430298, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 0.6492, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.3548387096774195, |
|
"grad_norm": 3.2035529613494873, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 0.728, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.3655913978494625, |
|
"grad_norm": 2.9157145023345947, |
|
"learning_rate": 3.5381414763863166e-05, |
|
"loss": 0.6313, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.3763440860215055, |
|
"grad_norm": 2.234398126602173, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.6193, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.3870967741935485, |
|
"grad_norm": 2.1972103118896484, |
|
"learning_rate": 3.372159227714218e-05, |
|
"loss": 0.5867, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.3978494623655915, |
|
"grad_norm": 3.0726523399353027, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 0.8497, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4086021505376345, |
|
"grad_norm": 2.2349085807800293, |
|
"learning_rate": 3.2081602522734986e-05, |
|
"loss": 0.5208, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.4193548387096775, |
|
"grad_norm": 2.5332772731781006, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 0.4452, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.4301075268817205, |
|
"grad_norm": 2.002074718475342, |
|
"learning_rate": 3.046344357553632e-05, |
|
"loss": 0.3896, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.4408602150537635, |
|
"grad_norm": 2.316570281982422, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 0.4541, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.4516129032258065, |
|
"grad_norm": 2.4266834259033203, |
|
"learning_rate": 2.886908691296504e-05, |
|
"loss": 0.3945, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.4623655913978495, |
|
"grad_norm": 2.9699063301086426, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 0.6487, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.4731182795698925, |
|
"grad_norm": 2.3638315200805664, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 0.2829, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.4838709677419355, |
|
"grad_norm": 4.946100234985352, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 0.4617, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.4946236559139785, |
|
"grad_norm": 2.5009000301361084, |
|
"learning_rate": 2.575951898768315e-05, |
|
"loss": 0.2181, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.5053763440860215, |
|
"grad_norm": 2.8839311599731445, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.7407, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5161290322580645, |
|
"grad_norm": 2.9398391246795654, |
|
"learning_rate": 2.4248096254497288e-05, |
|
"loss": 1.4332, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.5268817204301075, |
|
"grad_norm": 2.908121347427368, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 1.2238, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.5376344086021505, |
|
"grad_norm": 3.3087892532348633, |
|
"learning_rate": 2.2768048249248648e-05, |
|
"loss": 1.2163, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.5483870967741935, |
|
"grad_norm": 2.738379716873169, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 0.9637, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.5591397849462365, |
|
"grad_norm": 3.258429527282715, |
|
"learning_rate": 2.132117818244771e-05, |
|
"loss": 1.1101, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.5698924731182795, |
|
"grad_norm": 2.7381410598754883, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.9134, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.5806451612903225, |
|
"grad_norm": 2.7571890354156494, |
|
"learning_rate": 1.9909248842397584e-05, |
|
"loss": 0.7396, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.5913978494623655, |
|
"grad_norm": 2.268134355545044, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 0.5299, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.6021505376344085, |
|
"grad_norm": 2.5643818378448486, |
|
"learning_rate": 1.8533980447508137e-05, |
|
"loss": 0.4746, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.6129032258064515, |
|
"grad_norm": 2.6224515438079834, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 0.5465, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6129032258064515, |
|
"eval_loss": 0.868911862373352, |
|
"eval_runtime": 14.4083, |
|
"eval_samples_per_second": 10.896, |
|
"eval_steps_per_second": 5.483, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6236559139784945, |
|
"grad_norm": 3.0091564655303955, |
|
"learning_rate": 1.7197048550474643e-05, |
|
"loss": 0.7287, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.6344086021505375, |
|
"grad_norm": 1.6763337850570679, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 0.2309, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.6451612903225805, |
|
"grad_norm": 2.7991411685943604, |
|
"learning_rate": 1.5900081996875083e-05, |
|
"loss": 0.6103, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.6559139784946235, |
|
"grad_norm": 2.1921868324279785, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 0.4928, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 3.5872364044189453, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.6464, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.6774193548387095, |
|
"grad_norm": 1.8818094730377197, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 0.3707, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.6881720430107527, |
|
"grad_norm": 2.5614068508148193, |
|
"learning_rate": 1.3432314919041478e-05, |
|
"loss": 0.428, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.6989247311827957, |
|
"grad_norm": 2.164555311203003, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 0.408, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.7096774193548387, |
|
"grad_norm": 2.022615432739258, |
|
"learning_rate": 1.22645209888614e-05, |
|
"loss": 0.2232, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.7204301075268817, |
|
"grad_norm": 2.447100877761841, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 0.2983, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7311827956989247, |
|
"grad_norm": 4.865058898925781, |
|
"learning_rate": 1.1142701927151456e-05, |
|
"loss": 0.7972, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.7419354838709677, |
|
"grad_norm": 2.682640790939331, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 0.4589, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.7526881720430108, |
|
"grad_norm": 2.2714807987213135, |
|
"learning_rate": 1.006822449763537e-05, |
|
"loss": 1.3321, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.7634408602150538, |
|
"grad_norm": 2.663078784942627, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.4021, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.7741935483870968, |
|
"grad_norm": 2.38264799118042, |
|
"learning_rate": 9.042397785550405e-06, |
|
"loss": 1.2299, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.7849462365591398, |
|
"grad_norm": 2.5163092613220215, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 1.0744, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.7956989247311828, |
|
"grad_norm": 3.2605020999908447, |
|
"learning_rate": 8.066471602728803e-06, |
|
"loss": 1.1439, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.8064516129032258, |
|
"grad_norm": 2.8853485584259033, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 1.0157, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.817204301075269, |
|
"grad_norm": 2.8386337757110596, |
|
"learning_rate": 7.1416349648943894e-06, |
|
"loss": 0.9278, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.827956989247312, |
|
"grad_norm": 3.006211996078491, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.863, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.838709677419355, |
|
"grad_norm": 3.339857339859009, |
|
"learning_rate": 6.269014643030213e-06, |
|
"loss": 0.7245, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.849462365591398, |
|
"grad_norm": 2.269327402114868, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 0.3663, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.860215053763441, |
|
"grad_norm": 2.1220529079437256, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 0.3854, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.870967741935484, |
|
"grad_norm": 2.488945722579956, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 0.5545, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.881720430107527, |
|
"grad_norm": 2.159728765487671, |
|
"learning_rate": 4.684610648167503e-06, |
|
"loss": 0.4613, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.89247311827957, |
|
"grad_norm": 1.9596354961395264, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 0.3092, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.903225806451613, |
|
"grad_norm": 2.403904914855957, |
|
"learning_rate": 3.974757327377981e-06, |
|
"loss": 0.4026, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.913978494623656, |
|
"grad_norm": 2.790825605392456, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 0.5259, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.924731182795699, |
|
"grad_norm": 1.891068696975708, |
|
"learning_rate": 3.3209786751399187e-06, |
|
"loss": 0.2814, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.935483870967742, |
|
"grad_norm": 2.383929967880249, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.4715, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.946236559139785, |
|
"grad_norm": 1.4359225034713745, |
|
"learning_rate": 2.724071220034158e-06, |
|
"loss": 0.1781, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.956989247311828, |
|
"grad_norm": 2.4820544719696045, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.4342, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.967741935483871, |
|
"grad_norm": 1.628670573234558, |
|
"learning_rate": 2.1847622018482283e-06, |
|
"loss": 0.1903, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.978494623655914, |
|
"grad_norm": 3.297879934310913, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 0.3394, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.989247311827957, |
|
"grad_norm": 2.3827061653137207, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 0.3608, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.618680238723755, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 0.8033, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.010752688172043, |
|
"grad_norm": 1.7273980379104614, |
|
"learning_rate": 1.2814967607382432e-06, |
|
"loss": 1.237, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.021505376344086, |
|
"grad_norm": 2.0505175590515137, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 1.3056, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.032258064516129, |
|
"grad_norm": 2.3544833660125732, |
|
"learning_rate": 9.186408276168013e-07, |
|
"loss": 1.1107, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.043010752688172, |
|
"grad_norm": 1.91807222366333, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 0.7513, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.053763440860215, |
|
"grad_norm": 2.319427728652954, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 0.8994, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 2.064516129032258, |
|
"grad_norm": 2.4607162475585938, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 0.749, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.075268817204301, |
|
"grad_norm": 2.2048518657684326, |
|
"learning_rate": 3.7269241793390085e-07, |
|
"loss": 0.6078, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 2.086021505376344, |
|
"grad_norm": 2.24308443069458, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 0.6862, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 2.096774193548387, |
|
"grad_norm": 1.80559504032135, |
|
"learning_rate": 1.9026509541272275e-07, |
|
"loss": 0.4276, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.10752688172043, |
|
"grad_norm": 1.79929518699646, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 0.3818, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.118279569892473, |
|
"grad_norm": 1.762046456336975, |
|
"learning_rate": 6.852326227130834e-08, |
|
"loss": 0.3659, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 2.129032258064516, |
|
"grad_norm": 1.7550525665283203, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 0.2808, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.139784946236559, |
|
"grad_norm": 1.8459057807922363, |
|
"learning_rate": 7.615242180436522e-09, |
|
"loss": 0.325, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 2.150537634408602, |
|
"grad_norm": 1.6853699684143066, |
|
"learning_rate": 0.0, |
|
"loss": 0.2918, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.150537634408602, |
|
"eval_loss": 0.8282029628753662, |
|
"eval_runtime": 14.4284, |
|
"eval_samples_per_second": 10.881, |
|
"eval_steps_per_second": 5.475, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.124610424184504e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|