|
{ |
|
"best_metric": 0.8921212121212121, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-woody_130epochs/checkpoint-7076", |
|
"epoch": 130.0, |
|
"global_step": 7540, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 6.631299734748011e-07, |
|
"loss": 0.7042, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3262599469496022e-06, |
|
"loss": 0.6892, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.989389920424403e-06, |
|
"loss": 0.6959, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.6525198938992043e-06, |
|
"loss": 0.6807, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.315649867374006e-06, |
|
"loss": 0.6694, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6593939393939394, |
|
"eval_loss": 0.6369909048080444, |
|
"eval_runtime": 203.2039, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.128, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.978779840848806e-06, |
|
"loss": 0.6567, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.641909814323608e-06, |
|
"loss": 0.6483, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.305039787798409e-06, |
|
"loss": 0.6366, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5.968169761273209e-06, |
|
"loss": 0.6244, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.631299734748012e-06, |
|
"loss": 0.6198, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.294429708222812e-06, |
|
"loss": 0.6072, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.703030303030303, |
|
"eval_loss": 0.5813035368919373, |
|
"eval_runtime": 23.498, |
|
"eval_samples_per_second": 35.109, |
|
"eval_steps_per_second": 1.106, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 7.957559681697613e-06, |
|
"loss": 0.6064, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.620689655172414e-06, |
|
"loss": 0.6152, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.283819628647216e-06, |
|
"loss": 0.6144, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 9.946949602122016e-06, |
|
"loss": 0.5924, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.0610079575596817e-05, |
|
"loss": 0.598, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1273209549071619e-05, |
|
"loss": 0.6048, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.703030303030303, |
|
"eval_loss": 0.5645595192909241, |
|
"eval_runtime": 23.5837, |
|
"eval_samples_per_second": 34.982, |
|
"eval_steps_per_second": 1.102, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.1936339522546419e-05, |
|
"loss": 0.598, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.259946949602122e-05, |
|
"loss": 0.5828, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.3262599469496024e-05, |
|
"loss": 0.6087, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.3925729442970822e-05, |
|
"loss": 0.5831, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.4588859416445624e-05, |
|
"loss": 0.5798, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.5251989389920427e-05, |
|
"loss": 0.5849, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.696969696969697, |
|
"eval_loss": 0.5777620077133179, |
|
"eval_runtime": 23.555, |
|
"eval_samples_per_second": 35.024, |
|
"eval_steps_per_second": 1.104, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.5915119363395225e-05, |
|
"loss": 0.5964, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.657824933687003e-05, |
|
"loss": 0.5843, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.7241379310344828e-05, |
|
"loss": 0.5875, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.7904509283819628e-05, |
|
"loss": 0.6012, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.856763925729443e-05, |
|
"loss": 0.581, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.5671, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7236363636363636, |
|
"eval_loss": 0.5394322872161865, |
|
"eval_runtime": 23.629, |
|
"eval_samples_per_second": 34.915, |
|
"eval_steps_per_second": 1.1, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 1.989389920424403e-05, |
|
"loss": 0.5796, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 2.0557029177718835e-05, |
|
"loss": 0.5847, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 2.1220159151193635e-05, |
|
"loss": 0.5662, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.1883289124668434e-05, |
|
"loss": 0.5714, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 2.2546419098143238e-05, |
|
"loss": 0.5575, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7381818181818182, |
|
"eval_loss": 0.5211960673332214, |
|
"eval_runtime": 23.6621, |
|
"eval_samples_per_second": 34.866, |
|
"eval_steps_per_second": 1.099, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 2.3209549071618038e-05, |
|
"loss": 0.5516, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 2.3872679045092838e-05, |
|
"loss": 0.5536, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 2.453580901856764e-05, |
|
"loss": 0.5631, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 2.519893899204244e-05, |
|
"loss": 0.576, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 2.5862068965517244e-05, |
|
"loss": 0.5648, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 2.6525198938992047e-05, |
|
"loss": 0.568, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7357575757575757, |
|
"eval_loss": 0.5218449831008911, |
|
"eval_runtime": 23.9065, |
|
"eval_samples_per_second": 34.509, |
|
"eval_steps_per_second": 1.088, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 2.7188328912466844e-05, |
|
"loss": 0.558, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 2.7851458885941644e-05, |
|
"loss": 0.5434, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 2.8514588859416447e-05, |
|
"loss": 0.533, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 2.9177718832891247e-05, |
|
"loss": 0.5618, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 2.984084880636605e-05, |
|
"loss": 0.5408, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 3.0503978779840854e-05, |
|
"loss": 0.5607, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7527272727272727, |
|
"eval_loss": 0.5183489322662354, |
|
"eval_runtime": 23.6214, |
|
"eval_samples_per_second": 34.926, |
|
"eval_steps_per_second": 1.101, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 3.116710875331565e-05, |
|
"loss": 0.5342, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 3.183023872679045e-05, |
|
"loss": 0.5749, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.2493368700265253e-05, |
|
"loss": 0.5333, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 3.315649867374006e-05, |
|
"loss": 0.5616, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 3.381962864721486e-05, |
|
"loss": 0.5268, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 0.5351, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7466666666666667, |
|
"eval_loss": 0.5138289332389832, |
|
"eval_runtime": 23.7482, |
|
"eval_samples_per_second": 34.739, |
|
"eval_steps_per_second": 1.095, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 3.514588859416445e-05, |
|
"loss": 0.535, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 3.5809018567639256e-05, |
|
"loss": 0.5264, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 3.647214854111406e-05, |
|
"loss": 0.5518, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 3.713527851458886e-05, |
|
"loss": 0.5468, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 3.7798408488063666e-05, |
|
"loss": 0.5671, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.5459, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7393939393939394, |
|
"eval_loss": 0.5290118455886841, |
|
"eval_runtime": 23.7174, |
|
"eval_samples_per_second": 34.785, |
|
"eval_steps_per_second": 1.096, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 3.912466843501326e-05, |
|
"loss": 0.5102, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 3.978779840848806e-05, |
|
"loss": 0.5154, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 4.0450928381962866e-05, |
|
"loss": 0.563, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 4.111405835543767e-05, |
|
"loss": 0.555, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 4.177718832891247e-05, |
|
"loss": 0.5454, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7345454545454545, |
|
"eval_loss": 0.5211759209632874, |
|
"eval_runtime": 23.7045, |
|
"eval_samples_per_second": 34.804, |
|
"eval_steps_per_second": 1.097, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 4.244031830238727e-05, |
|
"loss": 0.5732, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 4.3103448275862066e-05, |
|
"loss": 0.529, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 4.376657824933687e-05, |
|
"loss": 0.5451, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 4.442970822281167e-05, |
|
"loss": 0.5318, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 4.5092838196286476e-05, |
|
"loss": 0.5358, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 4.575596816976128e-05, |
|
"loss": 0.5291, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7575757575757576, |
|
"eval_loss": 0.5130247473716736, |
|
"eval_runtime": 23.7093, |
|
"eval_samples_per_second": 34.796, |
|
"eval_steps_per_second": 1.097, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 4.6419098143236075e-05, |
|
"loss": 0.5193, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 4.708222811671088e-05, |
|
"loss": 0.5553, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 4.7745358090185675e-05, |
|
"loss": 0.5157, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 4.840848806366048e-05, |
|
"loss": 0.5291, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 4.907161803713528e-05, |
|
"loss": 0.542, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 4.9734748010610085e-05, |
|
"loss": 0.5378, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7503030303030302, |
|
"eval_loss": 0.5372377634048462, |
|
"eval_runtime": 23.6749, |
|
"eval_samples_per_second": 34.847, |
|
"eval_steps_per_second": 1.098, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 4.995579133510168e-05, |
|
"loss": 0.5417, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 4.9882110226937815e-05, |
|
"loss": 0.5221, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 4.980842911877395e-05, |
|
"loss": 0.5311, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 4.9734748010610085e-05, |
|
"loss": 0.5242, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 4.9661066902446214e-05, |
|
"loss": 0.5314, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 4.958738579428235e-05, |
|
"loss": 0.5264, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.686060606060606, |
|
"eval_loss": 0.6088762283325195, |
|
"eval_runtime": 23.6208, |
|
"eval_samples_per_second": 34.927, |
|
"eval_steps_per_second": 1.101, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 4.9513704686118484e-05, |
|
"loss": 0.5124, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 4.944002357795461e-05, |
|
"loss": 0.5396, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 4.936634246979075e-05, |
|
"loss": 0.5404, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 4.9292661361626876e-05, |
|
"loss": 0.5445, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 4.921898025346302e-05, |
|
"loss": 0.527, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.4909, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7636363636363637, |
|
"eval_loss": 0.4852340817451477, |
|
"eval_runtime": 23.7731, |
|
"eval_samples_per_second": 34.703, |
|
"eval_steps_per_second": 1.094, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 4.907161803713528e-05, |
|
"loss": 0.4962, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.34, |
|
"learning_rate": 4.899793692897142e-05, |
|
"loss": 0.4797, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 4.8924255820807546e-05, |
|
"loss": 0.5479, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 4.885057471264368e-05, |
|
"loss": 0.5333, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 4.877689360447981e-05, |
|
"loss": 0.5591, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.48167508840560913, |
|
"eval_runtime": 23.7604, |
|
"eval_samples_per_second": 34.722, |
|
"eval_steps_per_second": 1.094, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 4.8703212496315944e-05, |
|
"loss": 0.5216, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 4.862953138815208e-05, |
|
"loss": 0.5183, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 4.8555850279988215e-05, |
|
"loss": 0.5197, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 4.848216917182435e-05, |
|
"loss": 0.4966, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 4.840848806366048e-05, |
|
"loss": 0.5255, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 4.8334806955496614e-05, |
|
"loss": 0.4966, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6933333333333334, |
|
"eval_loss": 0.5673098564147949, |
|
"eval_runtime": 23.9258, |
|
"eval_samples_per_second": 34.482, |
|
"eval_steps_per_second": 1.087, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 4.826112584733274e-05, |
|
"loss": 0.5032, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 4.818744473916888e-05, |
|
"loss": 0.5385, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 4.811376363100501e-05, |
|
"loss": 0.4917, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 4.804008252284115e-05, |
|
"loss": 0.5167, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 4.796640141467728e-05, |
|
"loss": 0.5031, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 4.789272030651341e-05, |
|
"loss": 0.4988, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7418181818181818, |
|
"eval_loss": 0.5131048560142517, |
|
"eval_runtime": 23.8489, |
|
"eval_samples_per_second": 34.593, |
|
"eval_steps_per_second": 1.09, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 4.781903919834955e-05, |
|
"loss": 0.5213, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"learning_rate": 4.7745358090185675e-05, |
|
"loss": 0.5008, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 4.767167698202181e-05, |
|
"loss": 0.5166, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 4.7597995873857946e-05, |
|
"loss": 0.503, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 4.752431476569408e-05, |
|
"loss": 0.4926, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 4.7450633657530216e-05, |
|
"loss": 0.5339, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7393939393939394, |
|
"eval_loss": 0.49982041120529175, |
|
"eval_runtime": 23.602, |
|
"eval_samples_per_second": 34.955, |
|
"eval_steps_per_second": 1.102, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"learning_rate": 4.7376952549366345e-05, |
|
"loss": 0.5236, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 4.730327144120248e-05, |
|
"loss": 0.4846, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 19.48, |
|
"learning_rate": 4.722959033303861e-05, |
|
"loss": 0.5075, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 4.7155909224874743e-05, |
|
"loss": 0.5092, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"learning_rate": 4.708222811671088e-05, |
|
"loss": 0.474, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.4804, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7733333333333333, |
|
"eval_loss": 0.4654778242111206, |
|
"eval_runtime": 23.4849, |
|
"eval_samples_per_second": 35.129, |
|
"eval_steps_per_second": 1.107, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"learning_rate": 4.693486590038315e-05, |
|
"loss": 0.4753, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 4.686118479221928e-05, |
|
"loss": 0.5073, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 20.52, |
|
"learning_rate": 4.678750368405541e-05, |
|
"loss": 0.4844, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 4.671382257589154e-05, |
|
"loss": 0.4988, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"learning_rate": 4.6640141467727676e-05, |
|
"loss": 0.503, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7684848484848484, |
|
"eval_loss": 0.4553923010826111, |
|
"eval_runtime": 23.6326, |
|
"eval_samples_per_second": 34.909, |
|
"eval_steps_per_second": 1.1, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"learning_rate": 4.656646035956381e-05, |
|
"loss": 0.488, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"learning_rate": 4.649277925139994e-05, |
|
"loss": 0.4745, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 4.6419098143236075e-05, |
|
"loss": 0.4962, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 21.55, |
|
"learning_rate": 4.634541703507221e-05, |
|
"loss": 0.4775, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 21.72, |
|
"learning_rate": 4.6271735926908346e-05, |
|
"loss": 0.502, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"learning_rate": 4.6198054818744474e-05, |
|
"loss": 0.4859, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.776969696969697, |
|
"eval_loss": 0.47131648659706116, |
|
"eval_runtime": 23.5898, |
|
"eval_samples_per_second": 34.973, |
|
"eval_steps_per_second": 1.102, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 22.07, |
|
"learning_rate": 4.612437371058061e-05, |
|
"loss": 0.4706, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 22.24, |
|
"learning_rate": 4.6050692602416745e-05, |
|
"loss": 0.4935, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 22.41, |
|
"learning_rate": 4.597701149425287e-05, |
|
"loss": 0.4541, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 22.59, |
|
"learning_rate": 4.590333038608901e-05, |
|
"loss": 0.4832, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 4.5829649277925144e-05, |
|
"loss": 0.4823, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 4.575596816976128e-05, |
|
"loss": 0.504, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7721212121212121, |
|
"eval_loss": 0.454452246427536, |
|
"eval_runtime": 23.5656, |
|
"eval_samples_per_second": 35.009, |
|
"eval_steps_per_second": 1.103, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 23.1, |
|
"learning_rate": 4.568228706159741e-05, |
|
"loss": 0.4717, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 23.28, |
|
"learning_rate": 4.560860595343354e-05, |
|
"loss": 0.4759, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"learning_rate": 4.553492484526968e-05, |
|
"loss": 0.4699, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 4.5461243737105806e-05, |
|
"loss": 0.4549, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 23.79, |
|
"learning_rate": 4.538756262894194e-05, |
|
"loss": 0.4818, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"learning_rate": 4.531388152077807e-05, |
|
"loss": 0.478, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7830303030303031, |
|
"eval_loss": 0.4657587707042694, |
|
"eval_runtime": 23.6314, |
|
"eval_samples_per_second": 34.911, |
|
"eval_steps_per_second": 1.1, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 4.524020041261421e-05, |
|
"loss": 0.4718, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 4.516651930445034e-05, |
|
"loss": 0.4978, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"learning_rate": 4.5092838196286476e-05, |
|
"loss": 0.4881, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 24.66, |
|
"learning_rate": 4.501915708812261e-05, |
|
"loss": 0.4545, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 24.83, |
|
"learning_rate": 4.494547597995874e-05, |
|
"loss": 0.4764, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.4759, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8012121212121213, |
|
"eval_loss": 0.43650707602500916, |
|
"eval_runtime": 23.7193, |
|
"eval_samples_per_second": 34.782, |
|
"eval_steps_per_second": 1.096, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.17, |
|
"learning_rate": 4.4798113763631e-05, |
|
"loss": 0.4747, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 25.34, |
|
"learning_rate": 4.472443265546714e-05, |
|
"loss": 0.4661, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 4.465075154730327e-05, |
|
"loss": 0.4782, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 25.69, |
|
"learning_rate": 4.457707043913941e-05, |
|
"loss": 0.4668, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 25.86, |
|
"learning_rate": 4.4503389330975544e-05, |
|
"loss": 0.4686, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7854545454545454, |
|
"eval_loss": 0.4451583921909332, |
|
"eval_runtime": 23.883, |
|
"eval_samples_per_second": 34.543, |
|
"eval_steps_per_second": 1.089, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 26.03, |
|
"learning_rate": 4.442970822281167e-05, |
|
"loss": 0.4793, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 4.435602711464781e-05, |
|
"loss": 0.4537, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"learning_rate": 4.4282346006483936e-05, |
|
"loss": 0.4727, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 4.420866489832007e-05, |
|
"loss": 0.4496, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 4.4134983790156206e-05, |
|
"loss": 0.4499, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"learning_rate": 4.406130268199234e-05, |
|
"loss": 0.4668, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7878787878787878, |
|
"eval_loss": 0.4427114427089691, |
|
"eval_runtime": 23.8543, |
|
"eval_samples_per_second": 34.585, |
|
"eval_steps_per_second": 1.09, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 27.07, |
|
"learning_rate": 4.398762157382848e-05, |
|
"loss": 0.4921, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 4.3913940465664605e-05, |
|
"loss": 0.4512, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 27.41, |
|
"learning_rate": 4.384025935750074e-05, |
|
"loss": 0.4676, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"learning_rate": 4.376657824933687e-05, |
|
"loss": 0.461, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 27.76, |
|
"learning_rate": 4.3692897141173004e-05, |
|
"loss": 0.4593, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 27.93, |
|
"learning_rate": 4.361921603300913e-05, |
|
"loss": 0.4615, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7684848484848484, |
|
"eval_loss": 0.4439120888710022, |
|
"eval_runtime": 23.7726, |
|
"eval_samples_per_second": 34.704, |
|
"eval_steps_per_second": 1.094, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 28.1, |
|
"learning_rate": 4.3545534924845275e-05, |
|
"loss": 0.4587, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 28.28, |
|
"learning_rate": 4.347185381668141e-05, |
|
"loss": 0.4515, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 28.45, |
|
"learning_rate": 4.339817270851754e-05, |
|
"loss": 0.4412, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 28.62, |
|
"learning_rate": 4.3324491600353674e-05, |
|
"loss": 0.4651, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 28.79, |
|
"learning_rate": 4.32508104921898e-05, |
|
"loss": 0.4595, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 4.317712938402594e-05, |
|
"loss": 0.4588, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7830303030303031, |
|
"eval_loss": 0.4377860128879547, |
|
"eval_runtime": 23.7726, |
|
"eval_samples_per_second": 34.704, |
|
"eval_steps_per_second": 1.094, |
|
"step": 1682 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"learning_rate": 4.3103448275862066e-05, |
|
"loss": 0.4383, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 29.31, |
|
"learning_rate": 4.30297671676982e-05, |
|
"loss": 0.442, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 29.48, |
|
"learning_rate": 4.295608605953434e-05, |
|
"loss": 0.4427, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 29.66, |
|
"learning_rate": 4.288240495137047e-05, |
|
"loss": 0.4761, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 4.2808723843206607e-05, |
|
"loss": 0.4376, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 0.4588, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7987878787878788, |
|
"eval_loss": 0.4229122996330261, |
|
"eval_runtime": 23.6895, |
|
"eval_samples_per_second": 34.825, |
|
"eval_steps_per_second": 1.098, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 30.17, |
|
"learning_rate": 4.266136162687887e-05, |
|
"loss": 0.4711, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 30.34, |
|
"learning_rate": 4.2587680518715e-05, |
|
"loss": 0.4687, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 30.52, |
|
"learning_rate": 4.2513999410551134e-05, |
|
"loss": 0.4489, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 30.69, |
|
"learning_rate": 4.244031830238727e-05, |
|
"loss": 0.4281, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"learning_rate": 4.2366637194223404e-05, |
|
"loss": 0.4296, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7975757575757576, |
|
"eval_loss": 0.41882261633872986, |
|
"eval_runtime": 23.7458, |
|
"eval_samples_per_second": 34.743, |
|
"eval_steps_per_second": 1.095, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 31.03, |
|
"learning_rate": 4.229295608605954e-05, |
|
"loss": 0.4495, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 31.21, |
|
"learning_rate": 4.221927497789567e-05, |
|
"loss": 0.427, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 31.38, |
|
"learning_rate": 4.21455938697318e-05, |
|
"loss": 0.4403, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 31.55, |
|
"learning_rate": 4.207191276156794e-05, |
|
"loss": 0.4552, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 31.72, |
|
"learning_rate": 4.199823165340407e-05, |
|
"loss": 0.4402, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 31.9, |
|
"learning_rate": 4.19245505452402e-05, |
|
"loss": 0.4208, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7890909090909091, |
|
"eval_loss": 0.4315713047981262, |
|
"eval_runtime": 23.5404, |
|
"eval_samples_per_second": 35.046, |
|
"eval_steps_per_second": 1.104, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 32.07, |
|
"learning_rate": 4.185086943707634e-05, |
|
"loss": 0.433, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 32.24, |
|
"learning_rate": 4.177718832891247e-05, |
|
"loss": 0.422, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 32.41, |
|
"learning_rate": 4.17035072207486e-05, |
|
"loss": 0.4428, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 32.59, |
|
"learning_rate": 4.1629826112584736e-05, |
|
"loss": 0.4484, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 32.76, |
|
"learning_rate": 4.155614500442087e-05, |
|
"loss": 0.4321, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 32.93, |
|
"learning_rate": 4.1482463896257e-05, |
|
"loss": 0.4481, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7890909090909091, |
|
"eval_loss": 0.4330705404281616, |
|
"eval_runtime": 23.4646, |
|
"eval_samples_per_second": 35.159, |
|
"eval_steps_per_second": 1.108, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 33.1, |
|
"learning_rate": 4.1408782788093135e-05, |
|
"loss": 0.4067, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 33.28, |
|
"learning_rate": 4.1335101679929264e-05, |
|
"loss": 0.4299, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 33.45, |
|
"learning_rate": 4.1261420571765406e-05, |
|
"loss": 0.4456, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 33.62, |
|
"learning_rate": 4.1187739463601534e-05, |
|
"loss": 0.4536, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 33.79, |
|
"learning_rate": 4.111405835543767e-05, |
|
"loss": 0.4468, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"learning_rate": 4.1040377247273805e-05, |
|
"loss": 0.4253, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7878787878787878, |
|
"eval_loss": 0.45240843296051025, |
|
"eval_runtime": 23.5889, |
|
"eval_samples_per_second": 34.974, |
|
"eval_steps_per_second": 1.102, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 34.14, |
|
"learning_rate": 4.096669613910993e-05, |
|
"loss": 0.429, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 34.31, |
|
"learning_rate": 4.089301503094607e-05, |
|
"loss": 0.4132, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"learning_rate": 4.08193339227822e-05, |
|
"loss": 0.4383, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 34.66, |
|
"learning_rate": 4.074565281461833e-05, |
|
"loss": 0.4453, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 34.83, |
|
"learning_rate": 4.067197170645447e-05, |
|
"loss": 0.4471, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 0.4117, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7951515151515152, |
|
"eval_loss": 0.4569982588291168, |
|
"eval_runtime": 23.745, |
|
"eval_samples_per_second": 34.744, |
|
"eval_steps_per_second": 1.095, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 35.17, |
|
"learning_rate": 4.052460949012674e-05, |
|
"loss": 0.4564, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 35.34, |
|
"learning_rate": 4.0450928381962866e-05, |
|
"loss": 0.43, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"learning_rate": 4.0377247273799e-05, |
|
"loss": 0.4316, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 35.69, |
|
"learning_rate": 4.030356616563513e-05, |
|
"loss": 0.4124, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 35.86, |
|
"learning_rate": 4.0229885057471265e-05, |
|
"loss": 0.4405, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7927272727272727, |
|
"eval_loss": 0.4307224452495575, |
|
"eval_runtime": 23.8059, |
|
"eval_samples_per_second": 34.655, |
|
"eval_steps_per_second": 1.092, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 36.03, |
|
"learning_rate": 4.01562039493074e-05, |
|
"loss": 0.4195, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"learning_rate": 4.0082522841143535e-05, |
|
"loss": 0.4157, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"learning_rate": 4.000884173297967e-05, |
|
"loss": 0.4095, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"learning_rate": 3.99351606248158e-05, |
|
"loss": 0.433, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 36.72, |
|
"learning_rate": 3.9861479516651934e-05, |
|
"loss": 0.4253, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"learning_rate": 3.978779840848806e-05, |
|
"loss": 0.4154, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.8024242424242424, |
|
"eval_loss": 0.4256601929664612, |
|
"eval_runtime": 23.7856, |
|
"eval_samples_per_second": 34.685, |
|
"eval_steps_per_second": 1.093, |
|
"step": 2146 |
|
}, |
|
{ |
|
"epoch": 37.07, |
|
"learning_rate": 3.97141173003242e-05, |
|
"loss": 0.4281, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 37.24, |
|
"learning_rate": 3.9640436192160326e-05, |
|
"loss": 0.4143, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 37.41, |
|
"learning_rate": 3.956675508399647e-05, |
|
"loss": 0.4141, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 37.59, |
|
"learning_rate": 3.9493073975832604e-05, |
|
"loss": 0.4121, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 37.76, |
|
"learning_rate": 3.941939286766873e-05, |
|
"loss": 0.3822, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 37.93, |
|
"learning_rate": 3.934571175950487e-05, |
|
"loss": 0.3962, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7818181818181819, |
|
"eval_loss": 0.5076580047607422, |
|
"eval_runtime": 23.8225, |
|
"eval_samples_per_second": 34.631, |
|
"eval_steps_per_second": 1.091, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 38.1, |
|
"learning_rate": 3.9272030651340996e-05, |
|
"loss": 0.4175, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 38.28, |
|
"learning_rate": 3.919834954317713e-05, |
|
"loss": 0.3774, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 38.45, |
|
"learning_rate": 3.912466843501326e-05, |
|
"loss": 0.4298, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 38.62, |
|
"learning_rate": 3.9050987326849395e-05, |
|
"loss": 0.4265, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 38.79, |
|
"learning_rate": 3.897730621868553e-05, |
|
"loss": 0.4221, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 38.97, |
|
"learning_rate": 3.8903625110521665e-05, |
|
"loss": 0.414, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.8012121212121213, |
|
"eval_loss": 0.46023038029670715, |
|
"eval_runtime": 23.876, |
|
"eval_samples_per_second": 34.554, |
|
"eval_steps_per_second": 1.089, |
|
"step": 2262 |
|
}, |
|
{ |
|
"epoch": 39.14, |
|
"learning_rate": 3.88299440023578e-05, |
|
"loss": 0.4109, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 39.31, |
|
"learning_rate": 3.875626289419393e-05, |
|
"loss": 0.417, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 39.48, |
|
"learning_rate": 3.8682581786030064e-05, |
|
"loss": 0.4014, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 39.66, |
|
"learning_rate": 3.860890067786619e-05, |
|
"loss": 0.4083, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 39.83, |
|
"learning_rate": 3.853521956970233e-05, |
|
"loss": 0.4257, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.3937, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.776969696969697, |
|
"eval_loss": 0.47407081723213196, |
|
"eval_runtime": 23.7803, |
|
"eval_samples_per_second": 34.693, |
|
"eval_steps_per_second": 1.093, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 40.17, |
|
"learning_rate": 3.83878573533746e-05, |
|
"loss": 0.411, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 40.34, |
|
"learning_rate": 3.831417624521073e-05, |
|
"loss": 0.4009, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 40.52, |
|
"learning_rate": 3.824049513704686e-05, |
|
"loss": 0.3884, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 40.69, |
|
"learning_rate": 3.8166814028883e-05, |
|
"loss": 0.3872, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 40.86, |
|
"learning_rate": 3.8093132920719125e-05, |
|
"loss": 0.4186, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.4250431954860687, |
|
"eval_runtime": 23.787, |
|
"eval_samples_per_second": 34.683, |
|
"eval_steps_per_second": 1.093, |
|
"step": 2378 |
|
}, |
|
{ |
|
"epoch": 41.03, |
|
"learning_rate": 3.801945181255526e-05, |
|
"loss": 0.4068, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 41.21, |
|
"learning_rate": 3.7945770704391396e-05, |
|
"loss": 0.4079, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 41.38, |
|
"learning_rate": 3.787208959622753e-05, |
|
"loss": 0.3819, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 41.55, |
|
"learning_rate": 3.7798408488063666e-05, |
|
"loss": 0.3746, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 41.72, |
|
"learning_rate": 3.7724727379899795e-05, |
|
"loss": 0.4099, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 41.9, |
|
"learning_rate": 3.765104627173593e-05, |
|
"loss": 0.4076, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7987878787878788, |
|
"eval_loss": 0.43525293469429016, |
|
"eval_runtime": 23.6926, |
|
"eval_samples_per_second": 34.821, |
|
"eval_steps_per_second": 1.097, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 42.07, |
|
"learning_rate": 3.757736516357206e-05, |
|
"loss": 0.3984, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 42.24, |
|
"learning_rate": 3.7503684055408194e-05, |
|
"loss": 0.3825, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 42.41, |
|
"learning_rate": 3.743000294724433e-05, |
|
"loss": 0.3882, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 42.59, |
|
"learning_rate": 3.735632183908046e-05, |
|
"loss": 0.3785, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 42.76, |
|
"learning_rate": 3.728264073091659e-05, |
|
"loss": 0.3812, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 42.93, |
|
"learning_rate": 3.720895962275273e-05, |
|
"loss": 0.3777, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7878787878787878, |
|
"eval_loss": 0.44424664974212646, |
|
"eval_runtime": 23.6849, |
|
"eval_samples_per_second": 34.832, |
|
"eval_steps_per_second": 1.098, |
|
"step": 2494 |
|
}, |
|
{ |
|
"epoch": 43.1, |
|
"learning_rate": 3.713527851458886e-05, |
|
"loss": 0.4097, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 43.28, |
|
"learning_rate": 3.7061597406425e-05, |
|
"loss": 0.3877, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 43.45, |
|
"learning_rate": 3.698791629826113e-05, |
|
"loss": 0.3915, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 43.62, |
|
"learning_rate": 3.691423519009726e-05, |
|
"loss": 0.381, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 43.79, |
|
"learning_rate": 3.684055408193339e-05, |
|
"loss": 0.3747, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 43.97, |
|
"learning_rate": 3.6766872973769526e-05, |
|
"loss": 0.3968, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7878787878787878, |
|
"eval_loss": 0.45245206356048584, |
|
"eval_runtime": 23.6647, |
|
"eval_samples_per_second": 34.862, |
|
"eval_steps_per_second": 1.099, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 44.14, |
|
"learning_rate": 3.669319186560566e-05, |
|
"loss": 0.3708, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 44.31, |
|
"learning_rate": 3.6619510757441796e-05, |
|
"loss": 0.3856, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 44.48, |
|
"learning_rate": 3.654582964927793e-05, |
|
"loss": 0.3636, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 44.66, |
|
"learning_rate": 3.647214854111406e-05, |
|
"loss": 0.3821, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 44.83, |
|
"learning_rate": 3.6398467432950195e-05, |
|
"loss": 0.3688, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.377, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7987878787878788, |
|
"eval_loss": 0.4198453724384308, |
|
"eval_runtime": 23.5811, |
|
"eval_samples_per_second": 34.986, |
|
"eval_steps_per_second": 1.103, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 45.17, |
|
"learning_rate": 3.625110521662246e-05, |
|
"loss": 0.3546, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 45.34, |
|
"learning_rate": 3.6177424108458594e-05, |
|
"loss": 0.4073, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 45.52, |
|
"learning_rate": 3.610374300029473e-05, |
|
"loss": 0.3891, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 45.69, |
|
"learning_rate": 3.6030061892130864e-05, |
|
"loss": 0.3791, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 45.86, |
|
"learning_rate": 3.595638078396699e-05, |
|
"loss": 0.378, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8096969696969697, |
|
"eval_loss": 0.429715096950531, |
|
"eval_runtime": 23.221, |
|
"eval_samples_per_second": 35.528, |
|
"eval_steps_per_second": 1.12, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 46.03, |
|
"learning_rate": 3.588269967580313e-05, |
|
"loss": 0.3568, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 46.21, |
|
"learning_rate": 3.5809018567639256e-05, |
|
"loss": 0.3727, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"learning_rate": 3.573533745947539e-05, |
|
"loss": 0.401, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"learning_rate": 3.566165635131152e-05, |
|
"loss": 0.3767, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 46.72, |
|
"learning_rate": 3.558797524314766e-05, |
|
"loss": 0.3557, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 46.9, |
|
"learning_rate": 3.55142941349838e-05, |
|
"loss": 0.3675, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.8084848484848485, |
|
"eval_loss": 0.44349896907806396, |
|
"eval_runtime": 23.265, |
|
"eval_samples_per_second": 35.461, |
|
"eval_steps_per_second": 1.118, |
|
"step": 2726 |
|
}, |
|
{ |
|
"epoch": 47.07, |
|
"learning_rate": 3.5440613026819926e-05, |
|
"loss": 0.3693, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 47.24, |
|
"learning_rate": 3.536693191865606e-05, |
|
"loss": 0.363, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 47.41, |
|
"learning_rate": 3.529325081049219e-05, |
|
"loss": 0.3703, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 47.59, |
|
"learning_rate": 3.5219569702328325e-05, |
|
"loss": 0.3777, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 47.76, |
|
"learning_rate": 3.514588859416445e-05, |
|
"loss": 0.3932, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 47.93, |
|
"learning_rate": 3.507220748600059e-05, |
|
"loss": 0.3562, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7951515151515152, |
|
"eval_loss": 0.4477289617061615, |
|
"eval_runtime": 23.5088, |
|
"eval_samples_per_second": 35.093, |
|
"eval_steps_per_second": 1.106, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 48.1, |
|
"learning_rate": 3.4998526377836724e-05, |
|
"loss": 0.3805, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 48.28, |
|
"learning_rate": 3.492484526967286e-05, |
|
"loss": 0.3522, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 48.45, |
|
"learning_rate": 3.4851164161508994e-05, |
|
"loss": 0.3832, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 48.62, |
|
"learning_rate": 3.477748305334512e-05, |
|
"loss": 0.341, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 48.79, |
|
"learning_rate": 3.470380194518126e-05, |
|
"loss": 0.3724, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 48.97, |
|
"learning_rate": 3.4630120837017386e-05, |
|
"loss": 0.381, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8254545454545454, |
|
"eval_loss": 0.4206140637397766, |
|
"eval_runtime": 23.5968, |
|
"eval_samples_per_second": 34.962, |
|
"eval_steps_per_second": 1.102, |
|
"step": 2842 |
|
}, |
|
{ |
|
"epoch": 49.14, |
|
"learning_rate": 3.455643972885352e-05, |
|
"loss": 0.3532, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 49.31, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 0.3457, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 49.48, |
|
"learning_rate": 3.440907751252579e-05, |
|
"loss": 0.3553, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 49.66, |
|
"learning_rate": 3.433539640436193e-05, |
|
"loss": 0.3458, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 49.83, |
|
"learning_rate": 3.4261715296198055e-05, |
|
"loss": 0.3468, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.3603, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8109090909090909, |
|
"eval_loss": 0.4136010706424713, |
|
"eval_runtime": 23.4516, |
|
"eval_samples_per_second": 35.179, |
|
"eval_steps_per_second": 1.109, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 50.17, |
|
"learning_rate": 3.411435307987032e-05, |
|
"loss": 0.3554, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 50.34, |
|
"learning_rate": 3.4040671971706454e-05, |
|
"loss": 0.3537, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 50.52, |
|
"learning_rate": 3.396699086354259e-05, |
|
"loss": 0.3603, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 50.69, |
|
"learning_rate": 3.3893309755378725e-05, |
|
"loss": 0.375, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 50.86, |
|
"learning_rate": 3.381962864721486e-05, |
|
"loss": 0.3331, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.823030303030303, |
|
"eval_loss": 0.414115846157074, |
|
"eval_runtime": 23.5379, |
|
"eval_samples_per_second": 35.05, |
|
"eval_steps_per_second": 1.105, |
|
"step": 2958 |
|
}, |
|
{ |
|
"epoch": 51.03, |
|
"learning_rate": 3.374594753905099e-05, |
|
"loss": 0.3698, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 51.21, |
|
"learning_rate": 3.3672266430887124e-05, |
|
"loss": 0.3428, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 51.38, |
|
"learning_rate": 3.359858532272325e-05, |
|
"loss": 0.3563, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 51.55, |
|
"learning_rate": 3.352490421455939e-05, |
|
"loss": 0.346, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 51.72, |
|
"learning_rate": 3.345122310639552e-05, |
|
"loss": 0.3513, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 51.9, |
|
"learning_rate": 3.337754199823165e-05, |
|
"loss": 0.3471, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8109090909090909, |
|
"eval_loss": 0.42530202865600586, |
|
"eval_runtime": 23.5008, |
|
"eval_samples_per_second": 35.105, |
|
"eval_steps_per_second": 1.106, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 52.07, |
|
"learning_rate": 3.3303860890067786e-05, |
|
"loss": 0.3577, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 52.24, |
|
"learning_rate": 3.323017978190392e-05, |
|
"loss": 0.3557, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 52.41, |
|
"learning_rate": 3.315649867374006e-05, |
|
"loss": 0.341, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 52.59, |
|
"learning_rate": 3.3082817565576185e-05, |
|
"loss": 0.3387, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 52.76, |
|
"learning_rate": 3.300913645741232e-05, |
|
"loss": 0.3559, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 52.93, |
|
"learning_rate": 3.2935455349248456e-05, |
|
"loss": 0.346, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.8048484848484848, |
|
"eval_loss": 0.5203397870063782, |
|
"eval_runtime": 23.5051, |
|
"eval_samples_per_second": 35.099, |
|
"eval_steps_per_second": 1.106, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 53.1, |
|
"learning_rate": 3.2861774241084584e-05, |
|
"loss": 0.3432, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 53.28, |
|
"learning_rate": 3.278809313292072e-05, |
|
"loss": 0.3371, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 53.45, |
|
"learning_rate": 3.2714412024756855e-05, |
|
"loss": 0.3202, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 53.62, |
|
"learning_rate": 3.264073091659299e-05, |
|
"loss": 0.3356, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 53.79, |
|
"learning_rate": 3.256704980842912e-05, |
|
"loss": 0.34, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 53.97, |
|
"learning_rate": 3.2493368700265253e-05, |
|
"loss": 0.3481, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8242424242424242, |
|
"eval_loss": 0.428822785615921, |
|
"eval_runtime": 23.5443, |
|
"eval_samples_per_second": 35.04, |
|
"eval_steps_per_second": 1.104, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 54.14, |
|
"learning_rate": 3.241968759210139e-05, |
|
"loss": 0.3318, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 54.31, |
|
"learning_rate": 3.234600648393752e-05, |
|
"loss": 0.3512, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 54.48, |
|
"learning_rate": 3.227232537577365e-05, |
|
"loss": 0.3605, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 54.66, |
|
"learning_rate": 3.219864426760979e-05, |
|
"loss": 0.3304, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 54.83, |
|
"learning_rate": 3.212496315944592e-05, |
|
"loss": 0.2938, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.3411, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.8193939393939393, |
|
"eval_loss": 0.44161370396614075, |
|
"eval_runtime": 23.4762, |
|
"eval_samples_per_second": 35.142, |
|
"eval_steps_per_second": 1.108, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 55.17, |
|
"learning_rate": 3.1977600943118186e-05, |
|
"loss": 0.3197, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 55.34, |
|
"learning_rate": 3.190391983495432e-05, |
|
"loss": 0.3313, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 55.52, |
|
"learning_rate": 3.183023872679045e-05, |
|
"loss": 0.3374, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 55.69, |
|
"learning_rate": 3.1756557618626585e-05, |
|
"loss": 0.3252, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 55.86, |
|
"learning_rate": 3.1682876510462714e-05, |
|
"loss": 0.3275, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8290909090909091, |
|
"eval_loss": 0.41490283608436584, |
|
"eval_runtime": 23.5685, |
|
"eval_samples_per_second": 35.004, |
|
"eval_steps_per_second": 1.103, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 56.03, |
|
"learning_rate": 3.160919540229885e-05, |
|
"loss": 0.315, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 56.21, |
|
"learning_rate": 3.153551429413499e-05, |
|
"loss": 0.2995, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 56.38, |
|
"learning_rate": 3.146183318597112e-05, |
|
"loss": 0.309, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 56.55, |
|
"learning_rate": 3.1388152077807255e-05, |
|
"loss": 0.3324, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 56.72, |
|
"learning_rate": 3.131447096964338e-05, |
|
"loss": 0.3244, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 56.9, |
|
"learning_rate": 3.124078986147952e-05, |
|
"loss": 0.3067, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.8218181818181818, |
|
"eval_loss": 0.4623379111289978, |
|
"eval_runtime": 23.5221, |
|
"eval_samples_per_second": 35.073, |
|
"eval_steps_per_second": 1.105, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 57.07, |
|
"learning_rate": 3.116710875331565e-05, |
|
"loss": 0.331, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 57.24, |
|
"learning_rate": 3.109342764515178e-05, |
|
"loss": 0.3237, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 57.41, |
|
"learning_rate": 3.101974653698792e-05, |
|
"loss": 0.2951, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 57.59, |
|
"learning_rate": 3.094606542882405e-05, |
|
"loss": 0.3631, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 57.76, |
|
"learning_rate": 3.087238432066019e-05, |
|
"loss": 0.3232, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 57.93, |
|
"learning_rate": 3.0798703212496316e-05, |
|
"loss": 0.3166, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8254545454545454, |
|
"eval_loss": 0.44324037432670593, |
|
"eval_runtime": 23.5517, |
|
"eval_samples_per_second": 35.029, |
|
"eval_steps_per_second": 1.104, |
|
"step": 3364 |
|
}, |
|
{ |
|
"epoch": 58.1, |
|
"learning_rate": 3.072502210433245e-05, |
|
"loss": 0.3452, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 58.28, |
|
"learning_rate": 3.065134099616858e-05, |
|
"loss": 0.3119, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 58.45, |
|
"learning_rate": 3.0577659888004715e-05, |
|
"loss": 0.3145, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 58.62, |
|
"learning_rate": 3.0503978779840854e-05, |
|
"loss": 0.3023, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 58.79, |
|
"learning_rate": 3.0430297671676982e-05, |
|
"loss": 0.3252, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"learning_rate": 3.0356616563513117e-05, |
|
"loss": 0.3294, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.8266666666666667, |
|
"eval_loss": 0.4598628282546997, |
|
"eval_runtime": 23.497, |
|
"eval_samples_per_second": 35.111, |
|
"eval_steps_per_second": 1.107, |
|
"step": 3422 |
|
}, |
|
{ |
|
"epoch": 59.14, |
|
"learning_rate": 3.028293545534925e-05, |
|
"loss": 0.3364, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 59.31, |
|
"learning_rate": 3.0209254347185384e-05, |
|
"loss": 0.3236, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 59.48, |
|
"learning_rate": 3.0135573239021513e-05, |
|
"loss": 0.3239, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 59.66, |
|
"learning_rate": 3.0061892130857648e-05, |
|
"loss": 0.302, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 59.83, |
|
"learning_rate": 2.9988211022693787e-05, |
|
"loss": 0.3141, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.3146, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8290909090909091, |
|
"eval_loss": 0.42662811279296875, |
|
"eval_runtime": 23.5987, |
|
"eval_samples_per_second": 34.96, |
|
"eval_steps_per_second": 1.102, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 60.17, |
|
"learning_rate": 2.984084880636605e-05, |
|
"loss": 0.3119, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 60.34, |
|
"learning_rate": 2.9767167698202182e-05, |
|
"loss": 0.3088, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 60.52, |
|
"learning_rate": 2.9693486590038317e-05, |
|
"loss": 0.3039, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 60.69, |
|
"learning_rate": 2.9619805481874446e-05, |
|
"loss": 0.289, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 60.86, |
|
"learning_rate": 2.954612437371058e-05, |
|
"loss": 0.3091, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.8315151515151515, |
|
"eval_loss": 0.43179792165756226, |
|
"eval_runtime": 23.5411, |
|
"eval_samples_per_second": 35.045, |
|
"eval_steps_per_second": 1.104, |
|
"step": 3538 |
|
}, |
|
{ |
|
"epoch": 61.03, |
|
"learning_rate": 2.9472443265546716e-05, |
|
"loss": 0.3085, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 61.21, |
|
"learning_rate": 2.9398762157382848e-05, |
|
"loss": 0.3185, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 61.38, |
|
"learning_rate": 2.9325081049218983e-05, |
|
"loss": 0.3038, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 61.55, |
|
"learning_rate": 2.9251399941055112e-05, |
|
"loss": 0.319, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 61.72, |
|
"learning_rate": 2.9177718832891247e-05, |
|
"loss": 0.3186, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 61.9, |
|
"learning_rate": 2.910403772472738e-05, |
|
"loss": 0.3277, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8242424242424242, |
|
"eval_loss": 0.42517799139022827, |
|
"eval_runtime": 23.4517, |
|
"eval_samples_per_second": 35.179, |
|
"eval_steps_per_second": 1.109, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 62.07, |
|
"learning_rate": 2.9030356616563514e-05, |
|
"loss": 0.3066, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 62.24, |
|
"learning_rate": 2.895667550839965e-05, |
|
"loss": 0.304, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 62.41, |
|
"learning_rate": 2.888299440023578e-05, |
|
"loss": 0.2949, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 62.59, |
|
"learning_rate": 2.8809313292071916e-05, |
|
"loss": 0.317, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 62.76, |
|
"learning_rate": 2.8735632183908045e-05, |
|
"loss": 0.3023, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 62.93, |
|
"learning_rate": 2.866195107574418e-05, |
|
"loss": 0.296, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.8436363636363636, |
|
"eval_loss": 0.4332396686077118, |
|
"eval_runtime": 23.5392, |
|
"eval_samples_per_second": 35.048, |
|
"eval_steps_per_second": 1.105, |
|
"step": 3654 |
|
}, |
|
{ |
|
"epoch": 63.1, |
|
"learning_rate": 2.8588269967580312e-05, |
|
"loss": 0.282, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 63.28, |
|
"learning_rate": 2.8514588859416447e-05, |
|
"loss": 0.3217, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 63.45, |
|
"learning_rate": 2.8440907751252582e-05, |
|
"loss": 0.3216, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 63.62, |
|
"learning_rate": 2.836722664308871e-05, |
|
"loss": 0.3085, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 63.79, |
|
"learning_rate": 2.829354553492485e-05, |
|
"loss": 0.2937, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 63.97, |
|
"learning_rate": 2.8219864426760978e-05, |
|
"loss": 0.3241, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8193939393939393, |
|
"eval_loss": 0.4729117155075073, |
|
"eval_runtime": 23.5182, |
|
"eval_samples_per_second": 35.079, |
|
"eval_steps_per_second": 1.106, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 64.14, |
|
"learning_rate": 2.8146183318597113e-05, |
|
"loss": 0.3252, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 64.31, |
|
"learning_rate": 2.8072502210433245e-05, |
|
"loss": 0.2944, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 64.48, |
|
"learning_rate": 2.799882110226938e-05, |
|
"loss": 0.2968, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 64.66, |
|
"learning_rate": 2.7925139994105515e-05, |
|
"loss": 0.2836, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 64.83, |
|
"learning_rate": 2.7851458885941644e-05, |
|
"loss": 0.2921, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.3104, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.8448484848484848, |
|
"eval_loss": 0.42283061146736145, |
|
"eval_runtime": 23.4657, |
|
"eval_samples_per_second": 35.158, |
|
"eval_steps_per_second": 1.108, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 65.17, |
|
"learning_rate": 2.770409666961391e-05, |
|
"loss": 0.3001, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 65.34, |
|
"learning_rate": 2.7630415561450046e-05, |
|
"loss": 0.2867, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 65.52, |
|
"learning_rate": 2.7556734453286175e-05, |
|
"loss": 0.3004, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 65.69, |
|
"learning_rate": 2.7483053345122313e-05, |
|
"loss": 0.2981, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 65.86, |
|
"learning_rate": 2.740937223695845e-05, |
|
"loss": 0.2878, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8387878787878787, |
|
"eval_loss": 0.4173077940940857, |
|
"eval_runtime": 23.4471, |
|
"eval_samples_per_second": 35.186, |
|
"eval_steps_per_second": 1.109, |
|
"step": 3828 |
|
}, |
|
{ |
|
"epoch": 66.03, |
|
"learning_rate": 2.7335691128794577e-05, |
|
"loss": 0.2834, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 66.21, |
|
"learning_rate": 2.7262010020630712e-05, |
|
"loss": 0.2924, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 66.38, |
|
"learning_rate": 2.7188328912466844e-05, |
|
"loss": 0.2827, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 66.55, |
|
"learning_rate": 2.711464780430298e-05, |
|
"loss": 0.3193, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 66.72, |
|
"learning_rate": 2.7040966696139114e-05, |
|
"loss": 0.2804, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 66.9, |
|
"learning_rate": 2.6967285587975243e-05, |
|
"loss": 0.265, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.8496969696969697, |
|
"eval_loss": 0.42103952169418335, |
|
"eval_runtime": 23.5916, |
|
"eval_samples_per_second": 34.97, |
|
"eval_steps_per_second": 1.102, |
|
"step": 3886 |
|
}, |
|
{ |
|
"epoch": 67.07, |
|
"learning_rate": 2.6893604479811378e-05, |
|
"loss": 0.2987, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 67.24, |
|
"learning_rate": 2.681992337164751e-05, |
|
"loss": 0.2778, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 67.41, |
|
"learning_rate": 2.6746242263483645e-05, |
|
"loss": 0.2719, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 67.59, |
|
"learning_rate": 2.6672561155319774e-05, |
|
"loss": 0.3142, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 67.76, |
|
"learning_rate": 2.6598880047155912e-05, |
|
"loss": 0.2752, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 67.93, |
|
"learning_rate": 2.6525198938992047e-05, |
|
"loss": 0.3011, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8436363636363636, |
|
"eval_loss": 0.42762354016304016, |
|
"eval_runtime": 23.5177, |
|
"eval_samples_per_second": 35.08, |
|
"eval_steps_per_second": 1.106, |
|
"step": 3944 |
|
}, |
|
{ |
|
"epoch": 68.1, |
|
"learning_rate": 2.6451517830828176e-05, |
|
"loss": 0.2809, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 68.28, |
|
"learning_rate": 2.637783672266431e-05, |
|
"loss": 0.304, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 68.45, |
|
"learning_rate": 2.6304155614500443e-05, |
|
"loss": 0.276, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 68.62, |
|
"learning_rate": 2.6230474506336578e-05, |
|
"loss": 0.2789, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 68.79, |
|
"learning_rate": 2.6156793398172707e-05, |
|
"loss": 0.2942, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 68.97, |
|
"learning_rate": 2.6083112290008842e-05, |
|
"loss": 0.2861, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.8315151515151515, |
|
"eval_loss": 0.49231433868408203, |
|
"eval_runtime": 23.5439, |
|
"eval_samples_per_second": 35.041, |
|
"eval_steps_per_second": 1.104, |
|
"step": 4002 |
|
}, |
|
{ |
|
"epoch": 69.14, |
|
"learning_rate": 2.6009431181844977e-05, |
|
"loss": 0.2987, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 69.31, |
|
"learning_rate": 2.593575007368111e-05, |
|
"loss": 0.2641, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 69.48, |
|
"learning_rate": 2.5862068965517244e-05, |
|
"loss": 0.2815, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 69.66, |
|
"learning_rate": 2.5788387857353376e-05, |
|
"loss": 0.2848, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 69.83, |
|
"learning_rate": 2.571470674918951e-05, |
|
"loss": 0.2739, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.2994, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.4471631944179535, |
|
"eval_runtime": 23.5918, |
|
"eval_samples_per_second": 34.97, |
|
"eval_steps_per_second": 1.102, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 70.17, |
|
"learning_rate": 2.5567344532861775e-05, |
|
"loss": 0.2589, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 70.34, |
|
"learning_rate": 2.549366342469791e-05, |
|
"loss": 0.2623, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 70.52, |
|
"learning_rate": 2.5419982316534042e-05, |
|
"loss": 0.3048, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 70.69, |
|
"learning_rate": 2.5346301208370177e-05, |
|
"loss": 0.2904, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 70.86, |
|
"learning_rate": 2.5272620100206306e-05, |
|
"loss": 0.276, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.8315151515151515, |
|
"eval_loss": 0.45405274629592896, |
|
"eval_runtime": 23.4571, |
|
"eval_samples_per_second": 35.171, |
|
"eval_steps_per_second": 1.108, |
|
"step": 4118 |
|
}, |
|
{ |
|
"epoch": 71.03, |
|
"learning_rate": 2.519893899204244e-05, |
|
"loss": 0.2617, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 71.21, |
|
"learning_rate": 2.5125257883878573e-05, |
|
"loss": 0.2859, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 71.38, |
|
"learning_rate": 2.5051576775714708e-05, |
|
"loss": 0.2266, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 71.55, |
|
"learning_rate": 2.497789566755084e-05, |
|
"loss": 0.2747, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 71.72, |
|
"learning_rate": 2.4904214559386975e-05, |
|
"loss": 0.2633, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 71.9, |
|
"learning_rate": 2.4830533451223107e-05, |
|
"loss": 0.2796, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8521212121212122, |
|
"eval_loss": 0.42177069187164307, |
|
"eval_runtime": 23.6287, |
|
"eval_samples_per_second": 34.915, |
|
"eval_steps_per_second": 1.1, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 72.07, |
|
"learning_rate": 2.4756852343059242e-05, |
|
"loss": 0.2803, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 72.24, |
|
"learning_rate": 2.4683171234895374e-05, |
|
"loss": 0.2934, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 72.41, |
|
"learning_rate": 2.460949012673151e-05, |
|
"loss": 0.2875, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 72.59, |
|
"learning_rate": 2.453580901856764e-05, |
|
"loss": 0.2658, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 72.76, |
|
"learning_rate": 2.4462127910403773e-05, |
|
"loss": 0.2789, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 72.93, |
|
"learning_rate": 2.4388446802239905e-05, |
|
"loss": 0.2727, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.8448484848484848, |
|
"eval_loss": 0.40529289841651917, |
|
"eval_runtime": 23.537, |
|
"eval_samples_per_second": 35.051, |
|
"eval_steps_per_second": 1.105, |
|
"step": 4234 |
|
}, |
|
{ |
|
"epoch": 73.1, |
|
"learning_rate": 2.431476569407604e-05, |
|
"loss": 0.2701, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 73.28, |
|
"learning_rate": 2.4241084585912175e-05, |
|
"loss": 0.2672, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 73.45, |
|
"learning_rate": 2.4167403477748307e-05, |
|
"loss": 0.2628, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 73.62, |
|
"learning_rate": 2.409372236958444e-05, |
|
"loss": 0.2631, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 73.79, |
|
"learning_rate": 2.4020041261420574e-05, |
|
"loss": 0.279, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 73.97, |
|
"learning_rate": 2.3946360153256706e-05, |
|
"loss": 0.255, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.8375757575757575, |
|
"eval_loss": 0.4356235861778259, |
|
"eval_runtime": 23.5987, |
|
"eval_samples_per_second": 34.96, |
|
"eval_steps_per_second": 1.102, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 74.14, |
|
"learning_rate": 2.3872679045092838e-05, |
|
"loss": 0.2618, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 74.31, |
|
"learning_rate": 2.3798997936928973e-05, |
|
"loss": 0.2579, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 74.48, |
|
"learning_rate": 2.3725316828765108e-05, |
|
"loss": 0.2578, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 74.66, |
|
"learning_rate": 2.365163572060124e-05, |
|
"loss": 0.2463, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 74.83, |
|
"learning_rate": 2.3577954612437372e-05, |
|
"loss": 0.2835, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.276, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.8436363636363636, |
|
"eval_loss": 0.4193030595779419, |
|
"eval_runtime": 23.6479, |
|
"eval_samples_per_second": 34.887, |
|
"eval_steps_per_second": 1.099, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 75.17, |
|
"learning_rate": 2.343059239610964e-05, |
|
"loss": 0.251, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 75.34, |
|
"learning_rate": 2.335691128794577e-05, |
|
"loss": 0.2592, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 75.52, |
|
"learning_rate": 2.3283230179781906e-05, |
|
"loss": 0.2296, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 75.69, |
|
"learning_rate": 2.3209549071618038e-05, |
|
"loss": 0.2585, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 75.86, |
|
"learning_rate": 2.3135867963454173e-05, |
|
"loss": 0.261, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8533333333333334, |
|
"eval_loss": 0.44841518998146057, |
|
"eval_runtime": 23.4791, |
|
"eval_samples_per_second": 35.138, |
|
"eval_steps_per_second": 1.107, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 76.03, |
|
"learning_rate": 2.3062186855290305e-05, |
|
"loss": 0.2527, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 76.21, |
|
"learning_rate": 2.2988505747126437e-05, |
|
"loss": 0.2741, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 76.38, |
|
"learning_rate": 2.2914824638962572e-05, |
|
"loss": 0.2554, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 76.55, |
|
"learning_rate": 2.2841143530798704e-05, |
|
"loss": 0.2446, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 76.72, |
|
"learning_rate": 2.276746242263484e-05, |
|
"loss": 0.2483, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 76.9, |
|
"learning_rate": 2.269378131447097e-05, |
|
"loss": 0.2416, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.8193939393939393, |
|
"eval_loss": 0.47216886281967163, |
|
"eval_runtime": 23.9173, |
|
"eval_samples_per_second": 34.494, |
|
"eval_steps_per_second": 1.087, |
|
"step": 4466 |
|
}, |
|
{ |
|
"epoch": 77.07, |
|
"learning_rate": 2.2620100206307106e-05, |
|
"loss": 0.2914, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 77.24, |
|
"learning_rate": 2.2546419098143238e-05, |
|
"loss": 0.2398, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 77.41, |
|
"learning_rate": 2.247273798997937e-05, |
|
"loss": 0.2421, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 77.59, |
|
"learning_rate": 2.23990568818155e-05, |
|
"loss": 0.2533, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 77.76, |
|
"learning_rate": 2.2325375773651637e-05, |
|
"loss": 0.2599, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 77.93, |
|
"learning_rate": 2.2251694665487772e-05, |
|
"loss": 0.2602, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8533333333333334, |
|
"eval_loss": 0.4431077539920807, |
|
"eval_runtime": 23.3474, |
|
"eval_samples_per_second": 35.336, |
|
"eval_steps_per_second": 1.114, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 78.1, |
|
"learning_rate": 2.2178013557323904e-05, |
|
"loss": 0.2522, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 78.28, |
|
"learning_rate": 2.2104332449160036e-05, |
|
"loss": 0.2519, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 78.45, |
|
"learning_rate": 2.203065134099617e-05, |
|
"loss": 0.2607, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 78.62, |
|
"learning_rate": 2.1956970232832303e-05, |
|
"loss": 0.2658, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 78.79, |
|
"learning_rate": 2.1883289124668434e-05, |
|
"loss": 0.2396, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 78.97, |
|
"learning_rate": 2.1809608016504566e-05, |
|
"loss": 0.2591, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.8606060606060606, |
|
"eval_loss": 0.4269072413444519, |
|
"eval_runtime": 23.309, |
|
"eval_samples_per_second": 35.394, |
|
"eval_steps_per_second": 1.115, |
|
"step": 4582 |
|
}, |
|
{ |
|
"epoch": 79.14, |
|
"learning_rate": 2.1735926908340705e-05, |
|
"loss": 0.2482, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 79.31, |
|
"learning_rate": 2.1662245800176837e-05, |
|
"loss": 0.2687, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 79.48, |
|
"learning_rate": 2.158856469201297e-05, |
|
"loss": 0.2377, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 79.66, |
|
"learning_rate": 2.15148835838491e-05, |
|
"loss": 0.2482, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 79.83, |
|
"learning_rate": 2.1441202475685236e-05, |
|
"loss": 0.2567, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.2613, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8484848484848485, |
|
"eval_loss": 0.43348875641822815, |
|
"eval_runtime": 23.2174, |
|
"eval_samples_per_second": 35.534, |
|
"eval_steps_per_second": 1.12, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 80.17, |
|
"learning_rate": 2.12938402593575e-05, |
|
"loss": 0.2367, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 80.34, |
|
"learning_rate": 2.1220159151193635e-05, |
|
"loss": 0.2335, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 80.52, |
|
"learning_rate": 2.114647804302977e-05, |
|
"loss": 0.2416, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 80.69, |
|
"learning_rate": 2.10727969348659e-05, |
|
"loss": 0.2793, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 80.86, |
|
"learning_rate": 2.0999115826702033e-05, |
|
"loss": 0.2555, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.8593939393939394, |
|
"eval_loss": 0.4268703758716583, |
|
"eval_runtime": 23.3646, |
|
"eval_samples_per_second": 35.31, |
|
"eval_steps_per_second": 1.113, |
|
"step": 4698 |
|
}, |
|
{ |
|
"epoch": 81.03, |
|
"learning_rate": 2.092543471853817e-05, |
|
"loss": 0.2395, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 81.21, |
|
"learning_rate": 2.08517536103743e-05, |
|
"loss": 0.2471, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 81.38, |
|
"learning_rate": 2.0778072502210436e-05, |
|
"loss": 0.2338, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 81.55, |
|
"learning_rate": 2.0704391394046568e-05, |
|
"loss": 0.2676, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 81.72, |
|
"learning_rate": 2.0630710285882703e-05, |
|
"loss": 0.2385, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 81.9, |
|
"learning_rate": 2.0557029177718835e-05, |
|
"loss": 0.2832, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.8715151515151515, |
|
"eval_loss": 0.3967880308628082, |
|
"eval_runtime": 23.6669, |
|
"eval_samples_per_second": 34.859, |
|
"eval_steps_per_second": 1.099, |
|
"step": 4756 |
|
}, |
|
{ |
|
"epoch": 82.07, |
|
"learning_rate": 2.0483348069554966e-05, |
|
"loss": 0.2408, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 82.24, |
|
"learning_rate": 2.04096669613911e-05, |
|
"loss": 0.231, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 82.41, |
|
"learning_rate": 2.0335985853227234e-05, |
|
"loss": 0.2405, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 82.59, |
|
"learning_rate": 2.026230474506337e-05, |
|
"loss": 0.2497, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 82.76, |
|
"learning_rate": 2.01886236368995e-05, |
|
"loss": 0.263, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 82.93, |
|
"learning_rate": 2.0114942528735632e-05, |
|
"loss": 0.264, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.8703030303030304, |
|
"eval_loss": 0.41733846068382263, |
|
"eval_runtime": 23.4822, |
|
"eval_samples_per_second": 35.133, |
|
"eval_steps_per_second": 1.107, |
|
"step": 4814 |
|
}, |
|
{ |
|
"epoch": 83.1, |
|
"learning_rate": 2.0041261420571768e-05, |
|
"loss": 0.2537, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 83.28, |
|
"learning_rate": 1.99675803124079e-05, |
|
"loss": 0.2252, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 83.45, |
|
"learning_rate": 1.989389920424403e-05, |
|
"loss": 0.2644, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 83.62, |
|
"learning_rate": 1.9820218096080163e-05, |
|
"loss": 0.2271, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 83.79, |
|
"learning_rate": 1.9746536987916302e-05, |
|
"loss": 0.2352, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 83.97, |
|
"learning_rate": 1.9672855879752434e-05, |
|
"loss": 0.2462, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.8606060606060606, |
|
"eval_loss": 0.4150097966194153, |
|
"eval_runtime": 23.5895, |
|
"eval_samples_per_second": 34.973, |
|
"eval_steps_per_second": 1.102, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 84.14, |
|
"learning_rate": 1.9599174771588565e-05, |
|
"loss": 0.2265, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 84.31, |
|
"learning_rate": 1.9525493663424697e-05, |
|
"loss": 0.2392, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 84.48, |
|
"learning_rate": 1.9451812555260833e-05, |
|
"loss": 0.2288, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 84.66, |
|
"learning_rate": 1.9378131447096964e-05, |
|
"loss": 0.2294, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 84.83, |
|
"learning_rate": 1.9304450338933096e-05, |
|
"loss": 0.2169, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.2424, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.863030303030303, |
|
"eval_loss": 0.4377155900001526, |
|
"eval_runtime": 23.7231, |
|
"eval_samples_per_second": 34.776, |
|
"eval_steps_per_second": 1.096, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 85.17, |
|
"learning_rate": 1.9157088122605367e-05, |
|
"loss": 0.2428, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 85.34, |
|
"learning_rate": 1.90834070144415e-05, |
|
"loss": 0.2142, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 85.52, |
|
"learning_rate": 1.900972590627763e-05, |
|
"loss": 0.244, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 85.69, |
|
"learning_rate": 1.8936044798113766e-05, |
|
"loss": 0.2467, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 85.86, |
|
"learning_rate": 1.8862363689949897e-05, |
|
"loss": 0.2574, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8678787878787879, |
|
"eval_loss": 0.4119945466518402, |
|
"eval_runtime": 23.5185, |
|
"eval_samples_per_second": 35.079, |
|
"eval_steps_per_second": 1.106, |
|
"step": 4988 |
|
}, |
|
{ |
|
"epoch": 86.03, |
|
"learning_rate": 1.878868258178603e-05, |
|
"loss": 0.2465, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 86.21, |
|
"learning_rate": 1.8715001473622164e-05, |
|
"loss": 0.2459, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 86.38, |
|
"learning_rate": 1.8641320365458296e-05, |
|
"loss": 0.2172, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 86.55, |
|
"learning_rate": 1.856763925729443e-05, |
|
"loss": 0.2241, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 86.72, |
|
"learning_rate": 1.8493958149130563e-05, |
|
"loss": 0.2333, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 86.9, |
|
"learning_rate": 1.8420277040966695e-05, |
|
"loss": 0.2273, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.8533333333333334, |
|
"eval_loss": 0.43929052352905273, |
|
"eval_runtime": 23.6051, |
|
"eval_samples_per_second": 34.95, |
|
"eval_steps_per_second": 1.101, |
|
"step": 5046 |
|
}, |
|
{ |
|
"epoch": 87.07, |
|
"learning_rate": 1.834659593280283e-05, |
|
"loss": 0.2036, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 87.24, |
|
"learning_rate": 1.8272914824638966e-05, |
|
"loss": 0.2232, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 87.41, |
|
"learning_rate": 1.8199233716475097e-05, |
|
"loss": 0.2386, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 87.59, |
|
"learning_rate": 1.812555260831123e-05, |
|
"loss": 0.2438, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 87.76, |
|
"learning_rate": 1.8051871500147365e-05, |
|
"loss": 0.2419, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 87.93, |
|
"learning_rate": 1.7978190391983496e-05, |
|
"loss": 0.2334, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.863030303030303, |
|
"eval_loss": 0.43655768036842346, |
|
"eval_runtime": 23.6741, |
|
"eval_samples_per_second": 34.848, |
|
"eval_steps_per_second": 1.098, |
|
"step": 5104 |
|
}, |
|
{ |
|
"epoch": 88.1, |
|
"learning_rate": 1.7904509283819628e-05, |
|
"loss": 0.2277, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 88.28, |
|
"learning_rate": 1.783082817565576e-05, |
|
"loss": 0.25, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 88.45, |
|
"learning_rate": 1.77571470674919e-05, |
|
"loss": 0.2106, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 88.62, |
|
"learning_rate": 1.768346595932803e-05, |
|
"loss": 0.2374, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 88.79, |
|
"learning_rate": 1.7609784851164162e-05, |
|
"loss": 0.233, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 88.97, |
|
"learning_rate": 1.7536103743000294e-05, |
|
"loss": 0.2258, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.863030303030303, |
|
"eval_loss": 0.4189322292804718, |
|
"eval_runtime": 23.6637, |
|
"eval_samples_per_second": 34.864, |
|
"eval_steps_per_second": 1.099, |
|
"step": 5162 |
|
}, |
|
{ |
|
"epoch": 89.14, |
|
"learning_rate": 1.746242263483643e-05, |
|
"loss": 0.2435, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 89.31, |
|
"learning_rate": 1.738874152667256e-05, |
|
"loss": 0.2318, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 89.48, |
|
"learning_rate": 1.7315060418508693e-05, |
|
"loss": 0.2427, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 89.66, |
|
"learning_rate": 1.7241379310344828e-05, |
|
"loss": 0.2309, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 89.83, |
|
"learning_rate": 1.7167698202180964e-05, |
|
"loss": 0.2181, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.2153, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.863030303030303, |
|
"eval_loss": 0.44740307331085205, |
|
"eval_runtime": 23.8255, |
|
"eval_samples_per_second": 34.627, |
|
"eval_steps_per_second": 1.091, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 90.17, |
|
"learning_rate": 1.7020335985853227e-05, |
|
"loss": 0.2084, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 90.34, |
|
"learning_rate": 1.6946654877689362e-05, |
|
"loss": 0.2404, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 90.52, |
|
"learning_rate": 1.6872973769525494e-05, |
|
"loss": 0.2269, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 90.69, |
|
"learning_rate": 1.6799292661361626e-05, |
|
"loss": 0.2317, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 90.86, |
|
"learning_rate": 1.672561155319776e-05, |
|
"loss": 0.2462, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.8642424242424243, |
|
"eval_loss": 0.43616586923599243, |
|
"eval_runtime": 24.1183, |
|
"eval_samples_per_second": 34.206, |
|
"eval_steps_per_second": 1.078, |
|
"step": 5278 |
|
}, |
|
{ |
|
"epoch": 91.03, |
|
"learning_rate": 1.6651930445033893e-05, |
|
"loss": 0.228, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 91.21, |
|
"learning_rate": 1.657824933687003e-05, |
|
"loss": 0.2219, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 91.38, |
|
"learning_rate": 1.650456822870616e-05, |
|
"loss": 0.235, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 91.55, |
|
"learning_rate": 1.6430887120542292e-05, |
|
"loss": 0.2386, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 91.72, |
|
"learning_rate": 1.6357206012378427e-05, |
|
"loss": 0.2325, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 91.9, |
|
"learning_rate": 1.628352490421456e-05, |
|
"loss": 0.2356, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8715151515151515, |
|
"eval_loss": 0.4454053044319153, |
|
"eval_runtime": 24.1964, |
|
"eval_samples_per_second": 34.096, |
|
"eval_steps_per_second": 1.075, |
|
"step": 5336 |
|
}, |
|
{ |
|
"epoch": 92.07, |
|
"learning_rate": 1.6209843796050694e-05, |
|
"loss": 0.2114, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 92.24, |
|
"learning_rate": 1.6136162687886826e-05, |
|
"loss": 0.209, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 92.41, |
|
"learning_rate": 1.606248157972296e-05, |
|
"loss": 0.2502, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 92.59, |
|
"learning_rate": 1.5988800471559093e-05, |
|
"loss": 0.2532, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 92.76, |
|
"learning_rate": 1.5915119363395225e-05, |
|
"loss": 0.1978, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 92.93, |
|
"learning_rate": 1.5841438255231357e-05, |
|
"loss": 0.2019, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.88, |
|
"eval_loss": 0.4412510097026825, |
|
"eval_runtime": 24.2808, |
|
"eval_samples_per_second": 33.977, |
|
"eval_steps_per_second": 1.071, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 93.1, |
|
"learning_rate": 1.5767757147067496e-05, |
|
"loss": 0.2149, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 93.28, |
|
"learning_rate": 1.5694076038903627e-05, |
|
"loss": 0.2038, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 93.45, |
|
"learning_rate": 1.562039493073976e-05, |
|
"loss": 0.2159, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 93.62, |
|
"learning_rate": 1.554671382257589e-05, |
|
"loss": 0.2178, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 93.79, |
|
"learning_rate": 1.5473032714412026e-05, |
|
"loss": 0.2417, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 93.97, |
|
"learning_rate": 1.5399351606248158e-05, |
|
"loss": 0.209, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.8703030303030304, |
|
"eval_loss": 0.44101423025131226, |
|
"eval_runtime": 24.0177, |
|
"eval_samples_per_second": 34.35, |
|
"eval_steps_per_second": 1.083, |
|
"step": 5452 |
|
}, |
|
{ |
|
"epoch": 94.14, |
|
"learning_rate": 1.532567049808429e-05, |
|
"loss": 0.2264, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 94.31, |
|
"learning_rate": 1.5251989389920427e-05, |
|
"loss": 0.2079, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 94.48, |
|
"learning_rate": 1.5178308281756559e-05, |
|
"loss": 0.2426, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 94.66, |
|
"learning_rate": 1.5104627173592692e-05, |
|
"loss": 0.2141, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 94.83, |
|
"learning_rate": 1.5030946065428824e-05, |
|
"loss": 0.2073, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.2201, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.8690909090909091, |
|
"eval_loss": 0.43230336904525757, |
|
"eval_runtime": 24.1464, |
|
"eval_samples_per_second": 34.167, |
|
"eval_steps_per_second": 1.077, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 95.17, |
|
"learning_rate": 1.4883583849101091e-05, |
|
"loss": 0.1933, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 95.34, |
|
"learning_rate": 1.4809902740937223e-05, |
|
"loss": 0.2169, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 95.52, |
|
"learning_rate": 1.4736221632773358e-05, |
|
"loss": 0.2193, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 95.69, |
|
"learning_rate": 1.4662540524609492e-05, |
|
"loss": 0.2055, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 95.86, |
|
"learning_rate": 1.4588859416445624e-05, |
|
"loss": 0.2245, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.8618181818181818, |
|
"eval_loss": 0.4998823404312134, |
|
"eval_runtime": 24.505, |
|
"eval_samples_per_second": 33.667, |
|
"eval_steps_per_second": 1.061, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 96.03, |
|
"learning_rate": 1.4515178308281757e-05, |
|
"loss": 0.2189, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 96.21, |
|
"learning_rate": 1.444149720011789e-05, |
|
"loss": 0.207, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 96.38, |
|
"learning_rate": 1.4367816091954022e-05, |
|
"loss": 0.1915, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 96.55, |
|
"learning_rate": 1.4294134983790156e-05, |
|
"loss": 0.1947, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 96.72, |
|
"learning_rate": 1.4220453875626291e-05, |
|
"loss": 0.2485, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 96.9, |
|
"learning_rate": 1.4146772767462425e-05, |
|
"loss": 0.2178, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.8654545454545455, |
|
"eval_loss": 0.46123164892196655, |
|
"eval_runtime": 24.3912, |
|
"eval_samples_per_second": 33.824, |
|
"eval_steps_per_second": 1.066, |
|
"step": 5626 |
|
}, |
|
{ |
|
"epoch": 97.07, |
|
"learning_rate": 1.4073091659298557e-05, |
|
"loss": 0.2311, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 97.24, |
|
"learning_rate": 1.399941055113469e-05, |
|
"loss": 0.194, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 97.41, |
|
"learning_rate": 1.3925729442970822e-05, |
|
"loss": 0.2048, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 97.59, |
|
"learning_rate": 1.3852048334806955e-05, |
|
"loss": 0.2222, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 97.76, |
|
"learning_rate": 1.3778367226643087e-05, |
|
"loss": 0.2174, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 97.93, |
|
"learning_rate": 1.3704686118479224e-05, |
|
"loss": 0.2163, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.8703030303030304, |
|
"eval_loss": 0.43402180075645447, |
|
"eval_runtime": 24.3613, |
|
"eval_samples_per_second": 33.865, |
|
"eval_steps_per_second": 1.067, |
|
"step": 5684 |
|
}, |
|
{ |
|
"epoch": 98.1, |
|
"learning_rate": 1.3631005010315356e-05, |
|
"loss": 0.2057, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 98.28, |
|
"learning_rate": 1.355732390215149e-05, |
|
"loss": 0.2012, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 98.45, |
|
"learning_rate": 1.3483642793987621e-05, |
|
"loss": 0.2232, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 98.62, |
|
"learning_rate": 1.3409961685823755e-05, |
|
"loss": 0.1861, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 98.79, |
|
"learning_rate": 1.3336280577659887e-05, |
|
"loss": 0.1866, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 98.97, |
|
"learning_rate": 1.3262599469496024e-05, |
|
"loss": 0.2228, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.8787878787878788, |
|
"eval_loss": 0.45044654607772827, |
|
"eval_runtime": 24.2353, |
|
"eval_samples_per_second": 34.041, |
|
"eval_steps_per_second": 1.073, |
|
"step": 5742 |
|
}, |
|
{ |
|
"epoch": 99.14, |
|
"learning_rate": 1.3188918361332156e-05, |
|
"loss": 0.2028, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 99.31, |
|
"learning_rate": 1.3115237253168289e-05, |
|
"loss": 0.2066, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 99.48, |
|
"learning_rate": 1.3041556145004421e-05, |
|
"loss": 0.2192, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 99.66, |
|
"learning_rate": 1.2967875036840554e-05, |
|
"loss": 0.2086, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 99.83, |
|
"learning_rate": 1.2894193928676688e-05, |
|
"loss": 0.2308, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.2151, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.8703030303030304, |
|
"eval_loss": 0.46019893884658813, |
|
"eval_runtime": 23.58, |
|
"eval_samples_per_second": 34.987, |
|
"eval_steps_per_second": 1.103, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 100.17, |
|
"learning_rate": 1.2746831712348955e-05, |
|
"loss": 0.1968, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 100.34, |
|
"learning_rate": 1.2673150604185089e-05, |
|
"loss": 0.1935, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 100.52, |
|
"learning_rate": 1.259946949602122e-05, |
|
"loss": 0.2059, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 100.69, |
|
"learning_rate": 1.2525788387857354e-05, |
|
"loss": 0.2214, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 100.86, |
|
"learning_rate": 1.2452107279693487e-05, |
|
"loss": 0.1988, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.8812121212121212, |
|
"eval_loss": 0.4414079785346985, |
|
"eval_runtime": 23.5588, |
|
"eval_samples_per_second": 35.019, |
|
"eval_steps_per_second": 1.104, |
|
"step": 5858 |
|
}, |
|
{ |
|
"epoch": 101.03, |
|
"learning_rate": 1.2378426171529621e-05, |
|
"loss": 0.1921, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 101.21, |
|
"learning_rate": 1.2304745063365755e-05, |
|
"loss": 0.1989, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 101.38, |
|
"learning_rate": 1.2231063955201886e-05, |
|
"loss": 0.2049, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 101.55, |
|
"learning_rate": 1.215738284703802e-05, |
|
"loss": 0.1953, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 101.72, |
|
"learning_rate": 1.2083701738874153e-05, |
|
"loss": 0.194, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 101.9, |
|
"learning_rate": 1.2010020630710287e-05, |
|
"loss": 0.2227, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.8824242424242424, |
|
"eval_loss": 0.43921926617622375, |
|
"eval_runtime": 23.6474, |
|
"eval_samples_per_second": 34.887, |
|
"eval_steps_per_second": 1.099, |
|
"step": 5916 |
|
}, |
|
{ |
|
"epoch": 102.07, |
|
"learning_rate": 1.1936339522546419e-05, |
|
"loss": 0.2001, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 102.24, |
|
"learning_rate": 1.1862658414382554e-05, |
|
"loss": 0.2004, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 102.41, |
|
"learning_rate": 1.1788977306218686e-05, |
|
"loss": 0.2067, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 102.59, |
|
"learning_rate": 1.171529619805482e-05, |
|
"loss": 0.1827, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 102.76, |
|
"learning_rate": 1.1641615089890953e-05, |
|
"loss": 0.2194, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 102.93, |
|
"learning_rate": 1.1567933981727086e-05, |
|
"loss": 0.1772, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.863030303030303, |
|
"eval_loss": 0.5069222450256348, |
|
"eval_runtime": 23.5634, |
|
"eval_samples_per_second": 35.012, |
|
"eval_steps_per_second": 1.103, |
|
"step": 5974 |
|
}, |
|
{ |
|
"epoch": 103.1, |
|
"learning_rate": 1.1494252873563218e-05, |
|
"loss": 0.1953, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 103.28, |
|
"learning_rate": 1.1420571765399352e-05, |
|
"loss": 0.227, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 103.45, |
|
"learning_rate": 1.1346890657235485e-05, |
|
"loss": 0.2066, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 103.62, |
|
"learning_rate": 1.1273209549071619e-05, |
|
"loss": 0.1985, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 103.79, |
|
"learning_rate": 1.119952844090775e-05, |
|
"loss": 0.215, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 103.97, |
|
"learning_rate": 1.1125847332743886e-05, |
|
"loss": 0.2199, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.4647994339466095, |
|
"eval_runtime": 23.6695, |
|
"eval_samples_per_second": 34.855, |
|
"eval_steps_per_second": 1.098, |
|
"step": 6032 |
|
}, |
|
{ |
|
"epoch": 104.14, |
|
"learning_rate": 1.1052166224580018e-05, |
|
"loss": 0.2171, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 104.31, |
|
"learning_rate": 1.0978485116416151e-05, |
|
"loss": 0.1818, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 104.48, |
|
"learning_rate": 1.0904804008252283e-05, |
|
"loss": 0.1838, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 104.66, |
|
"learning_rate": 1.0831122900088418e-05, |
|
"loss": 0.1846, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 104.83, |
|
"learning_rate": 1.075744179192455e-05, |
|
"loss": 0.1844, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 0.1936, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.8690909090909091, |
|
"eval_loss": 0.48056602478027344, |
|
"eval_runtime": 23.728, |
|
"eval_samples_per_second": 34.769, |
|
"eval_steps_per_second": 1.096, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 105.17, |
|
"learning_rate": 1.0610079575596817e-05, |
|
"loss": 0.1872, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 105.34, |
|
"learning_rate": 1.053639846743295e-05, |
|
"loss": 0.2008, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 105.52, |
|
"learning_rate": 1.0462717359269084e-05, |
|
"loss": 0.1884, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 105.69, |
|
"learning_rate": 1.0389036251105218e-05, |
|
"loss": 0.1783, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 105.86, |
|
"learning_rate": 1.0315355142941351e-05, |
|
"loss": 0.199, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.8763636363636363, |
|
"eval_loss": 0.456930547952652, |
|
"eval_runtime": 23.6049, |
|
"eval_samples_per_second": 34.95, |
|
"eval_steps_per_second": 1.101, |
|
"step": 6148 |
|
}, |
|
{ |
|
"epoch": 106.03, |
|
"learning_rate": 1.0241674034777483e-05, |
|
"loss": 0.187, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 106.21, |
|
"learning_rate": 1.0167992926613617e-05, |
|
"loss": 0.1665, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 106.38, |
|
"learning_rate": 1.009431181844975e-05, |
|
"loss": 0.1757, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 106.55, |
|
"learning_rate": 1.0020630710285884e-05, |
|
"loss": 0.1844, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 106.72, |
|
"learning_rate": 9.946949602122016e-06, |
|
"loss": 0.1787, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 106.9, |
|
"learning_rate": 9.873268493958151e-06, |
|
"loss": 0.2149, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.8739393939393939, |
|
"eval_loss": 0.444536954164505, |
|
"eval_runtime": 23.7185, |
|
"eval_samples_per_second": 34.783, |
|
"eval_steps_per_second": 1.096, |
|
"step": 6206 |
|
}, |
|
{ |
|
"epoch": 107.07, |
|
"learning_rate": 9.799587385794283e-06, |
|
"loss": 0.2029, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 107.24, |
|
"learning_rate": 9.725906277630416e-06, |
|
"loss": 0.2019, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 107.41, |
|
"learning_rate": 9.652225169466548e-06, |
|
"loss": 0.1851, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 107.59, |
|
"learning_rate": 9.578544061302683e-06, |
|
"loss": 0.2086, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 107.76, |
|
"learning_rate": 9.504862953138815e-06, |
|
"loss": 0.2062, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 107.93, |
|
"learning_rate": 9.431181844974949e-06, |
|
"loss": 0.1917, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.8727272727272727, |
|
"eval_loss": 0.4444233477115631, |
|
"eval_runtime": 23.6319, |
|
"eval_samples_per_second": 34.91, |
|
"eval_steps_per_second": 1.1, |
|
"step": 6264 |
|
}, |
|
{ |
|
"epoch": 108.1, |
|
"learning_rate": 9.357500736811082e-06, |
|
"loss": 0.1996, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 108.28, |
|
"learning_rate": 9.283819628647216e-06, |
|
"loss": 0.1945, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 108.45, |
|
"learning_rate": 9.210138520483348e-06, |
|
"loss": 0.176, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 108.62, |
|
"learning_rate": 9.136457412319483e-06, |
|
"loss": 0.1805, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 108.79, |
|
"learning_rate": 9.062776304155615e-06, |
|
"loss": 0.1892, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 108.97, |
|
"learning_rate": 8.989095195991748e-06, |
|
"loss": 0.201, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.8727272727272727, |
|
"eval_loss": 0.4593980014324188, |
|
"eval_runtime": 23.6531, |
|
"eval_samples_per_second": 34.879, |
|
"eval_steps_per_second": 1.099, |
|
"step": 6322 |
|
}, |
|
{ |
|
"epoch": 109.14, |
|
"learning_rate": 8.91541408782788e-06, |
|
"loss": 0.1884, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 109.31, |
|
"learning_rate": 8.841732979664015e-06, |
|
"loss": 0.1718, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 109.48, |
|
"learning_rate": 8.768051871500147e-06, |
|
"loss": 0.2099, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 109.66, |
|
"learning_rate": 8.69437076333628e-06, |
|
"loss": 0.1774, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 109.83, |
|
"learning_rate": 8.620689655172414e-06, |
|
"loss": 0.1868, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.1938, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.8763636363636363, |
|
"eval_loss": 0.4564373195171356, |
|
"eval_runtime": 23.6669, |
|
"eval_samples_per_second": 34.859, |
|
"eval_steps_per_second": 1.099, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 110.17, |
|
"learning_rate": 8.473327438844681e-06, |
|
"loss": 0.2119, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 110.34, |
|
"learning_rate": 8.399646330680813e-06, |
|
"loss": 0.2057, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 110.52, |
|
"learning_rate": 8.325965222516947e-06, |
|
"loss": 0.1808, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 110.69, |
|
"learning_rate": 8.25228411435308e-06, |
|
"loss": 0.212, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 110.86, |
|
"learning_rate": 8.178603006189214e-06, |
|
"loss": 0.1977, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.8739393939393939, |
|
"eval_loss": 0.4397640824317932, |
|
"eval_runtime": 23.6596, |
|
"eval_samples_per_second": 34.87, |
|
"eval_steps_per_second": 1.099, |
|
"step": 6438 |
|
}, |
|
{ |
|
"epoch": 111.03, |
|
"learning_rate": 8.104921898025347e-06, |
|
"loss": 0.2244, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 111.21, |
|
"learning_rate": 8.03124078986148e-06, |
|
"loss": 0.1996, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 111.38, |
|
"learning_rate": 7.957559681697613e-06, |
|
"loss": 0.2058, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 111.55, |
|
"learning_rate": 7.883878573533748e-06, |
|
"loss": 0.1746, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 111.72, |
|
"learning_rate": 7.81019746536988e-06, |
|
"loss": 0.1746, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 111.9, |
|
"learning_rate": 7.736516357206013e-06, |
|
"loss": 0.1776, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.88, |
|
"eval_loss": 0.4356410503387451, |
|
"eval_runtime": 23.6175, |
|
"eval_samples_per_second": 34.932, |
|
"eval_steps_per_second": 1.101, |
|
"step": 6496 |
|
}, |
|
{ |
|
"epoch": 112.07, |
|
"learning_rate": 7.662835249042145e-06, |
|
"loss": 0.1675, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 112.24, |
|
"learning_rate": 7.589154140878279e-06, |
|
"loss": 0.19, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 112.41, |
|
"learning_rate": 7.515473032714412e-06, |
|
"loss": 0.2091, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 112.59, |
|
"learning_rate": 7.4417919245505456e-06, |
|
"loss": 0.1808, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 112.76, |
|
"learning_rate": 7.368110816386679e-06, |
|
"loss": 0.1915, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 112.93, |
|
"learning_rate": 7.294429708222812e-06, |
|
"loss": 0.1939, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.8848484848484849, |
|
"eval_loss": 0.4412323236465454, |
|
"eval_runtime": 23.6066, |
|
"eval_samples_per_second": 34.948, |
|
"eval_steps_per_second": 1.101, |
|
"step": 6554 |
|
}, |
|
{ |
|
"epoch": 113.1, |
|
"learning_rate": 7.220748600058945e-06, |
|
"loss": 0.1803, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 113.28, |
|
"learning_rate": 7.147067491895078e-06, |
|
"loss": 0.1853, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 113.45, |
|
"learning_rate": 7.073386383731212e-06, |
|
"loss": 0.1793, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 113.62, |
|
"learning_rate": 6.999705275567345e-06, |
|
"loss": 0.1863, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 113.79, |
|
"learning_rate": 6.926024167403478e-06, |
|
"loss": 0.1811, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 113.97, |
|
"learning_rate": 6.852343059239612e-06, |
|
"loss": 0.178, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.88, |
|
"eval_loss": 0.4372984766960144, |
|
"eval_runtime": 23.5914, |
|
"eval_samples_per_second": 34.97, |
|
"eval_steps_per_second": 1.102, |
|
"step": 6612 |
|
}, |
|
{ |
|
"epoch": 114.14, |
|
"learning_rate": 6.778661951075745e-06, |
|
"loss": 0.1922, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 114.31, |
|
"learning_rate": 6.7049808429118775e-06, |
|
"loss": 0.1817, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 114.48, |
|
"learning_rate": 6.631299734748012e-06, |
|
"loss": 0.2044, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 114.66, |
|
"learning_rate": 6.5576186265841445e-06, |
|
"loss": 0.1683, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 114.83, |
|
"learning_rate": 6.483937518420277e-06, |
|
"loss": 0.19, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 0.1926, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.8812121212121212, |
|
"eval_loss": 0.4507855176925659, |
|
"eval_runtime": 23.5842, |
|
"eval_samples_per_second": 34.981, |
|
"eval_steps_per_second": 1.102, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 115.17, |
|
"learning_rate": 6.336575302092544e-06, |
|
"loss": 0.1599, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 115.34, |
|
"learning_rate": 6.262894193928677e-06, |
|
"loss": 0.1824, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 115.52, |
|
"learning_rate": 6.1892130857648105e-06, |
|
"loss": 0.1651, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 115.69, |
|
"learning_rate": 6.115531977600943e-06, |
|
"loss": 0.2179, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 115.86, |
|
"learning_rate": 6.041850869437077e-06, |
|
"loss": 0.1979, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.8848484848484849, |
|
"eval_loss": 0.44771116971969604, |
|
"eval_runtime": 23.6223, |
|
"eval_samples_per_second": 34.925, |
|
"eval_steps_per_second": 1.101, |
|
"step": 6728 |
|
}, |
|
{ |
|
"epoch": 116.03, |
|
"learning_rate": 5.968169761273209e-06, |
|
"loss": 0.2057, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 116.21, |
|
"learning_rate": 5.894488653109343e-06, |
|
"loss": 0.1843, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 116.38, |
|
"learning_rate": 5.8208075449454765e-06, |
|
"loss": 0.1637, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 116.55, |
|
"learning_rate": 5.747126436781609e-06, |
|
"loss": 0.1629, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 116.72, |
|
"learning_rate": 5.673445328617743e-06, |
|
"loss": 0.1782, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 116.9, |
|
"learning_rate": 5.599764220453875e-06, |
|
"loss": 0.1958, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.8896969696969697, |
|
"eval_loss": 0.44876348972320557, |
|
"eval_runtime": 23.6296, |
|
"eval_samples_per_second": 34.914, |
|
"eval_steps_per_second": 1.1, |
|
"step": 6786 |
|
}, |
|
{ |
|
"epoch": 117.07, |
|
"learning_rate": 5.526083112290009e-06, |
|
"loss": 0.1827, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 117.24, |
|
"learning_rate": 5.452402004126142e-06, |
|
"loss": 0.1769, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 117.41, |
|
"learning_rate": 5.378720895962275e-06, |
|
"loss": 0.1908, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 117.59, |
|
"learning_rate": 5.305039787798409e-06, |
|
"loss": 0.1763, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 117.76, |
|
"learning_rate": 5.231358679634542e-06, |
|
"loss": 0.1902, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 117.93, |
|
"learning_rate": 5.157677571470676e-06, |
|
"loss": 0.189, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.8836363636363637, |
|
"eval_loss": 0.4553094804286957, |
|
"eval_runtime": 23.6158, |
|
"eval_samples_per_second": 34.934, |
|
"eval_steps_per_second": 1.101, |
|
"step": 6844 |
|
}, |
|
{ |
|
"epoch": 118.1, |
|
"learning_rate": 5.083996463306808e-06, |
|
"loss": 0.1761, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 118.28, |
|
"learning_rate": 5.010315355142942e-06, |
|
"loss": 0.1876, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 118.45, |
|
"learning_rate": 4.9366342469790754e-06, |
|
"loss": 0.171, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 118.62, |
|
"learning_rate": 4.862953138815208e-06, |
|
"loss": 0.2059, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 118.79, |
|
"learning_rate": 4.789272030651342e-06, |
|
"loss": 0.1717, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 118.97, |
|
"learning_rate": 4.715590922487474e-06, |
|
"loss": 0.1838, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.8848484848484849, |
|
"eval_loss": 0.46053749322891235, |
|
"eval_runtime": 23.6485, |
|
"eval_samples_per_second": 34.886, |
|
"eval_steps_per_second": 1.099, |
|
"step": 6902 |
|
}, |
|
{ |
|
"epoch": 119.14, |
|
"learning_rate": 4.641909814323608e-06, |
|
"loss": 0.192, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 119.31, |
|
"learning_rate": 4.568228706159741e-06, |
|
"loss": 0.1723, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 119.48, |
|
"learning_rate": 4.494547597995874e-06, |
|
"loss": 0.1795, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 119.66, |
|
"learning_rate": 4.420866489832008e-06, |
|
"loss": 0.168, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 119.83, |
|
"learning_rate": 4.34718538166814e-06, |
|
"loss": 0.1829, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.1755, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.8836363636363637, |
|
"eval_loss": 0.4462817311286926, |
|
"eval_runtime": 23.7257, |
|
"eval_samples_per_second": 34.772, |
|
"eval_steps_per_second": 1.096, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 120.17, |
|
"learning_rate": 4.1998231653404065e-06, |
|
"loss": 0.1638, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 120.34, |
|
"learning_rate": 4.12614205717654e-06, |
|
"loss": 0.1938, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 120.52, |
|
"learning_rate": 4.052460949012674e-06, |
|
"loss": 0.1925, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 120.69, |
|
"learning_rate": 3.978779840848806e-06, |
|
"loss": 0.1799, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 120.86, |
|
"learning_rate": 3.90509873268494e-06, |
|
"loss": 0.1958, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.8860606060606061, |
|
"eval_loss": 0.4474049210548401, |
|
"eval_runtime": 23.5546, |
|
"eval_samples_per_second": 35.025, |
|
"eval_steps_per_second": 1.104, |
|
"step": 7018 |
|
}, |
|
{ |
|
"epoch": 121.03, |
|
"learning_rate": 3.8314176245210725e-06, |
|
"loss": 0.1876, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 121.21, |
|
"learning_rate": 3.757736516357206e-06, |
|
"loss": 0.1736, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 121.38, |
|
"learning_rate": 3.6840554081933395e-06, |
|
"loss": 0.1766, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 121.55, |
|
"learning_rate": 3.6103743000294727e-06, |
|
"loss": 0.1908, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 121.72, |
|
"learning_rate": 3.536693191865606e-06, |
|
"loss": 0.2085, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 121.9, |
|
"learning_rate": 3.463012083701739e-06, |
|
"loss": 0.1857, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.8921212121212121, |
|
"eval_loss": 0.4550405442714691, |
|
"eval_runtime": 23.7315, |
|
"eval_samples_per_second": 34.764, |
|
"eval_steps_per_second": 1.096, |
|
"step": 7076 |
|
}, |
|
{ |
|
"epoch": 122.07, |
|
"learning_rate": 3.3893309755378724e-06, |
|
"loss": 0.1568, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 122.24, |
|
"learning_rate": 3.315649867374006e-06, |
|
"loss": 0.1809, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 122.41, |
|
"learning_rate": 3.2419687592101386e-06, |
|
"loss": 0.17, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 122.59, |
|
"learning_rate": 3.168287651046272e-06, |
|
"loss": 0.1639, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 122.76, |
|
"learning_rate": 3.0946065428824053e-06, |
|
"loss": 0.1633, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 122.93, |
|
"learning_rate": 3.0209254347185384e-06, |
|
"loss": 0.1466, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.8884848484848484, |
|
"eval_loss": 0.4493720829486847, |
|
"eval_runtime": 23.8169, |
|
"eval_samples_per_second": 34.639, |
|
"eval_steps_per_second": 1.092, |
|
"step": 7134 |
|
}, |
|
{ |
|
"epoch": 123.1, |
|
"learning_rate": 2.9472443265546715e-06, |
|
"loss": 0.1799, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 123.28, |
|
"learning_rate": 2.8735632183908046e-06, |
|
"loss": 0.1993, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 123.45, |
|
"learning_rate": 2.7998821102269377e-06, |
|
"loss": 0.1722, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 123.62, |
|
"learning_rate": 2.726201002063071e-06, |
|
"loss": 0.169, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 123.79, |
|
"learning_rate": 2.6525198938992043e-06, |
|
"loss": 0.1883, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 123.97, |
|
"learning_rate": 2.578838785735338e-06, |
|
"loss": 0.1751, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.8872727272727273, |
|
"eval_loss": 0.4559504985809326, |
|
"eval_runtime": 23.5372, |
|
"eval_samples_per_second": 35.051, |
|
"eval_steps_per_second": 1.105, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 124.14, |
|
"learning_rate": 2.505157677571471e-06, |
|
"loss": 0.1663, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 124.31, |
|
"learning_rate": 2.431476569407604e-06, |
|
"loss": 0.1984, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 124.48, |
|
"learning_rate": 2.357795461243737e-06, |
|
"loss": 0.1946, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 124.66, |
|
"learning_rate": 2.2841143530798707e-06, |
|
"loss": 0.1734, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 124.83, |
|
"learning_rate": 2.210433244916004e-06, |
|
"loss": 0.1741, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.175, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.8896969696969697, |
|
"eval_loss": 0.4383449852466583, |
|
"eval_runtime": 23.3208, |
|
"eval_samples_per_second": 35.376, |
|
"eval_steps_per_second": 1.115, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 125.17, |
|
"learning_rate": 2.06307102858827e-06, |
|
"loss": 0.1755, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 125.34, |
|
"learning_rate": 1.989389920424403e-06, |
|
"loss": 0.1789, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 125.52, |
|
"learning_rate": 1.9157088122605362e-06, |
|
"loss": 0.1755, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 125.69, |
|
"learning_rate": 1.8420277040966698e-06, |
|
"loss": 0.1765, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 125.86, |
|
"learning_rate": 1.768346595932803e-06, |
|
"loss": 0.207, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.8872727272727273, |
|
"eval_loss": 0.4601348042488098, |
|
"eval_runtime": 23.3274, |
|
"eval_samples_per_second": 35.366, |
|
"eval_steps_per_second": 1.115, |
|
"step": 7308 |
|
}, |
|
{ |
|
"epoch": 126.03, |
|
"learning_rate": 1.6946654877689362e-06, |
|
"loss": 0.1855, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 126.21, |
|
"learning_rate": 1.6209843796050693e-06, |
|
"loss": 0.1811, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 126.38, |
|
"learning_rate": 1.5473032714412026e-06, |
|
"loss": 0.1738, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 126.55, |
|
"learning_rate": 1.4736221632773357e-06, |
|
"loss": 0.1742, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 126.72, |
|
"learning_rate": 1.3999410551134688e-06, |
|
"loss": 0.1721, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 126.9, |
|
"learning_rate": 1.3262599469496022e-06, |
|
"loss": 0.1756, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.8896969696969697, |
|
"eval_loss": 0.44251343607902527, |
|
"eval_runtime": 23.4218, |
|
"eval_samples_per_second": 35.224, |
|
"eval_steps_per_second": 1.11, |
|
"step": 7366 |
|
}, |
|
{ |
|
"epoch": 127.07, |
|
"learning_rate": 1.2525788387857355e-06, |
|
"loss": 0.1831, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 127.24, |
|
"learning_rate": 1.1788977306218686e-06, |
|
"loss": 0.1901, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 127.41, |
|
"learning_rate": 1.105216622458002e-06, |
|
"loss": 0.1649, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 127.59, |
|
"learning_rate": 1.031535514294135e-06, |
|
"loss": 0.1745, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 127.76, |
|
"learning_rate": 9.578544061302681e-07, |
|
"loss": 0.184, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 127.93, |
|
"learning_rate": 8.841732979664015e-07, |
|
"loss": 0.1695, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.8909090909090909, |
|
"eval_loss": 0.45327892899513245, |
|
"eval_runtime": 23.3943, |
|
"eval_samples_per_second": 35.265, |
|
"eval_steps_per_second": 1.111, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 128.1, |
|
"learning_rate": 8.104921898025347e-07, |
|
"loss": 0.192, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 128.28, |
|
"learning_rate": 7.368110816386679e-07, |
|
"loss": 0.1588, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 128.45, |
|
"learning_rate": 6.631299734748011e-07, |
|
"loss": 0.1642, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 128.62, |
|
"learning_rate": 5.894488653109343e-07, |
|
"loss": 0.1864, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 128.79, |
|
"learning_rate": 5.157677571470675e-07, |
|
"loss": 0.1947, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 128.97, |
|
"learning_rate": 4.4208664898320077e-07, |
|
"loss": 0.1873, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.8896969696969697, |
|
"eval_loss": 0.4509574770927429, |
|
"eval_runtime": 23.3432, |
|
"eval_samples_per_second": 35.342, |
|
"eval_steps_per_second": 1.114, |
|
"step": 7482 |
|
}, |
|
{ |
|
"epoch": 129.14, |
|
"learning_rate": 3.6840554081933393e-07, |
|
"loss": 0.1633, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 129.31, |
|
"learning_rate": 2.9472443265546715e-07, |
|
"loss": 0.1921, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 129.48, |
|
"learning_rate": 2.2104332449160039e-07, |
|
"loss": 0.1884, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 129.66, |
|
"learning_rate": 1.4736221632773357e-07, |
|
"loss": 0.1792, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 129.83, |
|
"learning_rate": 7.368110816386679e-08, |
|
"loss": 0.1913, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1726, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.8909090909090909, |
|
"eval_loss": 0.44632968306541443, |
|
"eval_runtime": 23.6475, |
|
"eval_samples_per_second": 34.887, |
|
"eval_steps_per_second": 1.099, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"step": 7540, |
|
"total_flos": 2.398578081146118e+19, |
|
"train_loss": 0.33043775234361544, |
|
"train_runtime": 43151.479, |
|
"train_samples_per_second": 22.363, |
|
"train_steps_per_second": 0.175 |
|
} |
|
], |
|
"max_steps": 7540, |
|
"num_train_epochs": 130, |
|
"total_flos": 2.398578081146118e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|