|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 25.53191489361702, |
|
"eval_steps": 300, |
|
"global_step": 4800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-07, |
|
"loss": 1.0874, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-06, |
|
"loss": 0.9487, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7586, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2e-06, |
|
"loss": 0.7225, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7364, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3e-06, |
|
"loss": 0.7265, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.7267, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4e-06, |
|
"loss": 0.5697, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.018, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5.5e-06, |
|
"loss": 0.8242, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6e-06, |
|
"loss": 0.8034, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.5e-06, |
|
"loss": 0.7717, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7e-06, |
|
"loss": 0.8337, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.6884, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8e-06, |
|
"loss": 0.9129, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 0.5637, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9e-06, |
|
"loss": 1.0458, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.9414, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6379, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.0500000000000001e-05, |
|
"loss": 0.9249, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.1e-05, |
|
"loss": 0.6944, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.15e-05, |
|
"loss": 0.9221, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.6475, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.7748, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.3e-05, |
|
"loss": 0.8705, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.35e-05, |
|
"loss": 0.7737, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.8643, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.4500000000000002e-05, |
|
"loss": 0.8428, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.6785, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7930460572242737, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.0663, |
|
"eval_samples_per_second": 247.266, |
|
"eval_steps_per_second": 7.748, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.55e-05, |
|
"loss": 0.6076, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.6e-05, |
|
"loss": 0.5963, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.6626, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.8379, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 0.8851, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.7489, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.85e-05, |
|
"loss": 0.7573, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.8018, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.95e-05, |
|
"loss": 0.6645, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8677, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.05e-05, |
|
"loss": 0.7478, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.1000000000000002e-05, |
|
"loss": 0.8551, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.1499999999999997e-05, |
|
"loss": 0.9323, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.2e-05, |
|
"loss": 0.7536, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 0.5336, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.3e-05, |
|
"loss": 0.8955, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.3500000000000002e-05, |
|
"loss": 0.7926, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.5713, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.4500000000000003e-05, |
|
"loss": 0.8568, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.55e-05, |
|
"loss": 0.6223, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.6e-05, |
|
"loss": 0.7579, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.65e-05, |
|
"loss": 0.6325, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.7e-05, |
|
"loss": 0.7276, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.75e-05, |
|
"loss": 0.8766, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.7107, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.85e-05, |
|
"loss": 0.5904, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.9000000000000004e-05, |
|
"loss": 0.7125, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.95e-05, |
|
"loss": 0.755, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5583, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_accuracy": 0.7613333333333333, |
|
"eval_combined_score": 0.7403762607674487, |
|
"eval_f1": 0.7316181693637903, |
|
"eval_loss": 0.6910097599029541, |
|
"eval_precision": 0.7072202070393374, |
|
"eval_recall": 0.7613333333333333, |
|
"eval_runtime": 6.0882, |
|
"eval_samples_per_second": 246.376, |
|
"eval_steps_per_second": 7.72, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.6527, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.7676, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.15e-05, |
|
"loss": 0.766, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.2e-05, |
|
"loss": 0.6217, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.5405, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.7951, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.552, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.6521, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.4500000000000005e-05, |
|
"loss": 0.6814, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 3.5000000000000004e-05, |
|
"loss": 0.3992, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 3.5499999999999996e-05, |
|
"loss": 0.7018, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.664, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 3.65e-05, |
|
"loss": 0.6891, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.6063, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 3.75e-05, |
|
"loss": 0.6543, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.5638, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.6984, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.9e-05, |
|
"loss": 0.7148, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.95e-05, |
|
"loss": 0.4915, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 4e-05, |
|
"loss": 0.648, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 4.05e-05, |
|
"loss": 0.4527, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.551, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 4.1500000000000006e-05, |
|
"loss": 0.5909, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.2000000000000004e-05, |
|
"loss": 0.603, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.25e-05, |
|
"loss": 1.0745, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 4.2999999999999995e-05, |
|
"loss": 0.6697, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 4.35e-05, |
|
"loss": 0.8374, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 4.4e-05, |
|
"loss": 0.4891, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.45e-05, |
|
"loss": 0.5956, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 0.7857, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.6514685153961182, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 5.9473, |
|
"eval_samples_per_second": 252.216, |
|
"eval_steps_per_second": 7.903, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 4.55e-05, |
|
"loss": 0.8566, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 4.6e-05, |
|
"loss": 0.4698, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 4.65e-05, |
|
"loss": 0.7224, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.7000000000000004e-05, |
|
"loss": 0.5879, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.3592, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.513, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.85e-05, |
|
"loss": 0.6167, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 4.9000000000000005e-05, |
|
"loss": 0.5706, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 0.5523, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6565, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 5.05e-05, |
|
"loss": 0.4634, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 5.1e-05, |
|
"loss": 0.634, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 5.15e-05, |
|
"loss": 0.5472, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 5.2e-05, |
|
"loss": 0.595, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 5.25e-05, |
|
"loss": 0.4889, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 5.3e-05, |
|
"loss": 0.5884, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 5.35e-05, |
|
"loss": 0.4149, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 5.4e-05, |
|
"loss": 0.7573, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 5.45e-05, |
|
"loss": 0.599, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 5.5e-05, |
|
"loss": 0.366, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 5.55e-05, |
|
"loss": 0.4906, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 5.6e-05, |
|
"loss": 0.4378, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 5.6500000000000005e-05, |
|
"loss": 0.4886, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 5.7e-05, |
|
"loss": 0.342, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 5.75e-05, |
|
"loss": 0.7803, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 5.800000000000001e-05, |
|
"loss": 0.3435, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 5.8500000000000006e-05, |
|
"loss": 0.6149, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 5.9e-05, |
|
"loss": 0.3026, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 5.9499999999999996e-05, |
|
"loss": 0.9387, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 6e-05, |
|
"loss": 0.6309, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_accuracy": 0.848, |
|
"eval_combined_score": 0.8403047916631027, |
|
"eval_f1": 0.8270449473875959, |
|
"eval_loss": 0.5592399835586548, |
|
"eval_precision": 0.8381742192648148, |
|
"eval_recall": 0.848, |
|
"eval_runtime": 5.9701, |
|
"eval_samples_per_second": 251.251, |
|
"eval_steps_per_second": 7.873, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 6.05e-05, |
|
"loss": 0.4672, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 6.1e-05, |
|
"loss": 0.4909, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 6.15e-05, |
|
"loss": 0.5007, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 6.2e-05, |
|
"loss": 0.4318, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.3623, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 6.3e-05, |
|
"loss": 0.3294, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 6.35e-05, |
|
"loss": 0.7501, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 6.4e-05, |
|
"loss": 0.6436, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 6.450000000000001e-05, |
|
"loss": 0.525, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 0.6047, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 6.55e-05, |
|
"loss": 0.3636, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 6.6e-05, |
|
"loss": 0.5634, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 6.65e-05, |
|
"loss": 0.3846, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 6.7e-05, |
|
"loss": 0.8436, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 6.75e-05, |
|
"loss": 0.4762, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 0.5856, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 6.850000000000001e-05, |
|
"loss": 0.4042, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 6.900000000000001e-05, |
|
"loss": 0.475, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 6.950000000000001e-05, |
|
"loss": 0.3535, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 7.000000000000001e-05, |
|
"loss": 0.2399, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 7.049999999999999e-05, |
|
"loss": 0.316, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 7.095e-05, |
|
"loss": 0.438, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 7.145e-05, |
|
"loss": 0.2673, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 7.195e-05, |
|
"loss": 0.4286, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 7.245e-05, |
|
"loss": 0.3655, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 7.295e-05, |
|
"loss": 0.434, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 7.345e-05, |
|
"loss": 0.6143, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 7.395000000000001e-05, |
|
"loss": 0.5471, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 7.445000000000001e-05, |
|
"loss": 0.299, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 7.495e-05, |
|
"loss": 0.2216, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_accuracy": 0.8773333333333333, |
|
"eval_combined_score": 0.8618647537688802, |
|
"eval_f1": 0.8431508113173474, |
|
"eval_loss": 0.5708244442939758, |
|
"eval_precision": 0.8496415370915067, |
|
"eval_recall": 0.8773333333333333, |
|
"eval_runtime": 6.0286, |
|
"eval_samples_per_second": 248.816, |
|
"eval_steps_per_second": 7.796, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 7.545e-05, |
|
"loss": 0.5517, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 7.595e-05, |
|
"loss": 0.3237, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 7.645e-05, |
|
"loss": 0.3945, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 7.695e-05, |
|
"loss": 0.3996, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 7.745000000000001e-05, |
|
"loss": 0.3644, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 7.795000000000001e-05, |
|
"loss": 0.6767, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 7.845000000000001e-05, |
|
"loss": 0.434, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 7.895000000000001e-05, |
|
"loss": 0.4047, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 7.945e-05, |
|
"loss": 0.3844, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 7.994999999999999e-05, |
|
"loss": 0.2656, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 8.044999999999999e-05, |
|
"loss": 0.3318, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 8.095e-05, |
|
"loss": 0.4627, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 8.145e-05, |
|
"loss": 0.6657, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 8.195e-05, |
|
"loss": 0.353, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 8.245e-05, |
|
"loss": 0.4543, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 8.295e-05, |
|
"loss": 0.4121, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 8.340000000000001e-05, |
|
"loss": 0.2792, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 8.39e-05, |
|
"loss": 0.3618, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 8.44e-05, |
|
"loss": 0.4922, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 8.49e-05, |
|
"loss": 0.6822, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 8.54e-05, |
|
"loss": 0.4936, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 8.59e-05, |
|
"loss": 0.4652, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 8.640000000000001e-05, |
|
"loss": 0.3529, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 8.690000000000001e-05, |
|
"loss": 0.5115, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 8.740000000000001e-05, |
|
"loss": 0.5859, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 8.790000000000001e-05, |
|
"loss": 0.5069, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 8.840000000000001e-05, |
|
"loss": 0.2584, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 8.885e-05, |
|
"loss": 0.313, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 8.935e-05, |
|
"loss": 0.4103, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 8.985e-05, |
|
"loss": 0.3214, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_accuracy": 0.896, |
|
"eval_combined_score": 0.8819938675326909, |
|
"eval_f1": 0.8583787127091727, |
|
"eval_loss": 0.45495954155921936, |
|
"eval_precision": 0.8775967574215913, |
|
"eval_recall": 0.896, |
|
"eval_runtime": 6.1706, |
|
"eval_samples_per_second": 243.087, |
|
"eval_steps_per_second": 7.617, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 9.035e-05, |
|
"loss": 0.5097, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 9.085e-05, |
|
"loss": 0.4306, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 9.135e-05, |
|
"loss": 0.2573, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 9.185e-05, |
|
"loss": 0.3534, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 9.235000000000001e-05, |
|
"loss": 0.5012, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 9.285000000000001e-05, |
|
"loss": 0.7057, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 9.335e-05, |
|
"loss": 0.5385, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.385e-05, |
|
"loss": 0.2633, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 9.435e-05, |
|
"loss": 0.3853, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 9.485e-05, |
|
"loss": 0.4354, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 9.535e-05, |
|
"loss": 0.7459, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 9.585000000000001e-05, |
|
"loss": 0.2937, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 9.635000000000001e-05, |
|
"loss": 0.2932, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 9.685000000000001e-05, |
|
"loss": 0.4256, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 9.735000000000001e-05, |
|
"loss": 0.4336, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 9.785e-05, |
|
"loss": 0.4684, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 9.835e-05, |
|
"loss": 0.5437, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 9.884999999999999e-05, |
|
"loss": 0.3036, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 9.935e-05, |
|
"loss": 0.1528, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 9.985e-05, |
|
"loss": 0.8779, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 0.00010035, |
|
"loss": 0.53, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.00010085, |
|
"loss": 0.2656, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.00010135, |
|
"loss": 0.3895, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 0.00010185, |
|
"loss": 0.5943, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 0.00010235, |
|
"loss": 0.5021, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.00010284999999999999, |
|
"loss": 0.3804, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 0.00010335, |
|
"loss": 0.2204, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 0.00010385, |
|
"loss": 0.4421, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 0.0001043, |
|
"loss": 1.0409, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 0.00010480000000000001, |
|
"loss": 0.7521, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"eval_accuracy": 0.884, |
|
"eval_combined_score": 0.8540521114926303, |
|
"eval_f1": 0.8422803386258884, |
|
"eval_loss": 0.3819296956062317, |
|
"eval_precision": 0.8059281073446327, |
|
"eval_recall": 0.884, |
|
"eval_runtime": 6.0267, |
|
"eval_samples_per_second": 248.894, |
|
"eval_steps_per_second": 7.799, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.00010530000000000001, |
|
"loss": 0.3117, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 0.00010580000000000001, |
|
"loss": 0.4548, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 0.00010630000000000001, |
|
"loss": 0.484, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 0.00010680000000000001, |
|
"loss": 0.4479, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 0.0001073, |
|
"loss": 0.3099, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 0.0001078, |
|
"loss": 0.4728, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 0.00010829999999999999, |
|
"loss": 0.5189, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 0.0001088, |
|
"loss": 0.5615, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 0.0001093, |
|
"loss": 0.536, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 0.0001098, |
|
"loss": 0.4329, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 0.0001103, |
|
"loss": 0.2304, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 0.0001108, |
|
"loss": 0.5139, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.0001113, |
|
"loss": 0.3353, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 0.0001118, |
|
"loss": 0.7493, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 0.0001123, |
|
"loss": 1.0483, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 0.0001128, |
|
"loss": 0.5422, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 0.0001133, |
|
"loss": 0.771, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 0.0001138, |
|
"loss": 0.841, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 0.0001143, |
|
"loss": 0.7316, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 0.0001148, |
|
"loss": 0.7394, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 0.0001153, |
|
"loss": 0.7084, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 0.0001158, |
|
"loss": 0.8033, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 0.00011630000000000001, |
|
"loss": 0.6616, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.0001168, |
|
"loss": 0.7601, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.0001173, |
|
"loss": 0.7986, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 0.0001178, |
|
"loss": 0.673, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 0.0001183, |
|
"loss": 0.6354, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 0.0001188, |
|
"loss": 0.617, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.0001193, |
|
"loss": 0.5532, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.00011980000000000001, |
|
"loss": 0.5048, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.6582097411155701, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 5.7153, |
|
"eval_samples_per_second": 262.451, |
|
"eval_steps_per_second": 8.223, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.00012030000000000001, |
|
"loss": 0.7655, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 0.00012080000000000001, |
|
"loss": 0.5875, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 0.00012130000000000001, |
|
"loss": 0.6008, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 0.0001218, |
|
"loss": 0.612, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 0.00012230000000000002, |
|
"loss": 0.6436, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 0.00012275, |
|
"loss": 0.3595, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 0.00012325000000000001, |
|
"loss": 0.4373, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.00012375, |
|
"loss": 0.8131, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.00012425, |
|
"loss": 0.7109, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 0.00012475, |
|
"loss": 0.5552, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 0.00012525, |
|
"loss": 0.4912, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 0.00012575, |
|
"loss": 0.5125, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 0.00012625, |
|
"loss": 0.5537, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 0.00012675, |
|
"loss": 0.6721, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.00012725, |
|
"loss": 0.5703, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 0.00012775000000000002, |
|
"loss": 0.4654, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 0.00012825, |
|
"loss": 0.4402, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 0.00012875, |
|
"loss": 0.4534, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 0.00012925, |
|
"loss": 0.5178, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 0.00012975, |
|
"loss": 0.5167, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 0.00013025, |
|
"loss": 0.4782, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.00013075, |
|
"loss": 0.4171, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.00013125000000000002, |
|
"loss": 0.3338, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 0.00013175, |
|
"loss": 0.523, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.00013225000000000002, |
|
"loss": 0.5211, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 0.00013275, |
|
"loss": 0.3958, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 0.00013325, |
|
"loss": 0.3883, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 0.00013375, |
|
"loss": 0.466, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 0.00013425, |
|
"loss": 0.563, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 0.00013475000000000002, |
|
"loss": 0.6435, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"eval_accuracy": 0.8466666666666667, |
|
"eval_combined_score": 0.820593120110759, |
|
"eval_f1": 0.8092415776379762, |
|
"eval_loss": 0.5365216135978699, |
|
"eval_precision": 0.7797975694717267, |
|
"eval_recall": 0.8466666666666667, |
|
"eval_runtime": 5.7927, |
|
"eval_samples_per_second": 258.944, |
|
"eval_steps_per_second": 8.114, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 0.00013525, |
|
"loss": 0.4351, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 0.00013575000000000002, |
|
"loss": 0.9678, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 0.00013625, |
|
"loss": 0.3769, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 0.00013675000000000002, |
|
"loss": 0.6706, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 0.00013725, |
|
"loss": 0.6711, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 0.00013775000000000001, |
|
"loss": 0.5355, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 0.00013825000000000003, |
|
"loss": 0.6193, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 0.00013875, |
|
"loss": 0.6431, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 0.00013925000000000002, |
|
"loss": 0.6239, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 0.00013975, |
|
"loss": 0.4647, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 0.00014025000000000002, |
|
"loss": 0.2059, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.00014074999999999998, |
|
"loss": 0.6128, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 0.00014125, |
|
"loss": 0.9306, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 0.00014175, |
|
"loss": 0.4907, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 0.00014225, |
|
"loss": 0.4597, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 0.00014275, |
|
"loss": 0.7004, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 0.00014324999999999999, |
|
"loss": 0.4713, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 0.00014375, |
|
"loss": 0.5127, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 0.00014424999999999998, |
|
"loss": 0.7406, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 0.00014475, |
|
"loss": 0.5231, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 0.00014524999999999998, |
|
"loss": 0.5013, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 0.00014575, |
|
"loss": 0.4772, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 0.00014625, |
|
"loss": 0.5269, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 0.00014675, |
|
"loss": 0.7292, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 0.00014725, |
|
"loss": 0.5534, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 0.00014774999999999999, |
|
"loss": 0.7513, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 0.00014825, |
|
"loss": 0.661, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 0.00014874999999999998, |
|
"loss": 0.8045, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 0.00014925, |
|
"loss": 0.8469, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 0.00014975, |
|
"loss": 0.9304, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6841020585693436, |
|
"eval_f1": 0.6288777491861415, |
|
"eval_loss": 0.757703959941864, |
|
"eval_precision": 0.6301971517578995, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 5.7335, |
|
"eval_samples_per_second": 261.622, |
|
"eval_steps_per_second": 8.197, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 0.00015025, |
|
"loss": 0.6005, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 0.00015075, |
|
"loss": 0.9853, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 0.00015125, |
|
"loss": 0.9841, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 0.00015175, |
|
"loss": 0.8429, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 0.00015225, |
|
"loss": 0.5311, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 0.00015275, |
|
"loss": 0.8929, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 0.00015325, |
|
"loss": 0.6568, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 0.00015375, |
|
"loss": 0.7808, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 0.00015425, |
|
"loss": 0.8071, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 0.00015475, |
|
"loss": 0.6082, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 0.00015525, |
|
"loss": 0.8183, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 0.00015575, |
|
"loss": 0.8457, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 0.00015625, |
|
"loss": 0.8111, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 0.00015675000000000002, |
|
"loss": 0.8024, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 0.00015725, |
|
"loss": 0.5843, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 0.00015775, |
|
"loss": 0.7493, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.00015825, |
|
"loss": 0.6283, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 0.00015875, |
|
"loss": 0.7267, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 0.00015925, |
|
"loss": 0.8913, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 0.00015975, |
|
"loss": 0.8809, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 0.00016025000000000002, |
|
"loss": 0.7423, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 17.13, |
|
"learning_rate": 0.00016075, |
|
"loss": 0.7022, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 0.00016125000000000002, |
|
"loss": 0.672, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 0.00016175, |
|
"loss": 0.7071, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 0.00016225000000000001, |
|
"loss": 0.7613, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 0.00016275, |
|
"loss": 0.7014, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 0.00016325, |
|
"loss": 0.7322, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 0.00016375000000000002, |
|
"loss": 0.8469, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 0.00016425, |
|
"loss": 0.647, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 0.00016475000000000002, |
|
"loss": 0.7902, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6841020585693436, |
|
"eval_f1": 0.6288777491861415, |
|
"eval_loss": 0.7684083580970764, |
|
"eval_precision": 0.6301971517578995, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 5.6811, |
|
"eval_samples_per_second": 264.032, |
|
"eval_steps_per_second": 8.273, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 0.00016525, |
|
"loss": 0.7717, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 0.00016575000000000002, |
|
"loss": 0.8894, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 0.00016625, |
|
"loss": 0.9062, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"learning_rate": 0.00016675000000000001, |
|
"loss": 0.755, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 0.00016725000000000003, |
|
"loss": 0.6708, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 0.00016775, |
|
"loss": 0.8674, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 0.00016825000000000002, |
|
"loss": 0.7541, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"learning_rate": 0.00016875, |
|
"loss": 0.8417, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 0.00016925000000000002, |
|
"loss": 0.8044, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 0.00016975, |
|
"loss": 0.7059, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 0.00017025000000000002, |
|
"loss": 0.7291, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 0.00017075, |
|
"loss": 0.8393, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 0.00017125000000000002, |
|
"loss": 0.6944, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 0.00017175000000000003, |
|
"loss": 0.7587, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"learning_rate": 0.00017224999999999999, |
|
"loss": 0.8562, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 0.00017275, |
|
"loss": 0.8328, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 0.00017324999999999998, |
|
"loss": 0.7379, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 0.00017375, |
|
"loss": 0.8784, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 0.00017424999999999998, |
|
"loss": 0.8329, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 0.00017475, |
|
"loss": 0.6559, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 0.00017525, |
|
"loss": 0.8082, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 0.00017575, |
|
"loss": 0.7626, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 18.78, |
|
"learning_rate": 0.00017625, |
|
"loss": 0.7877, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 18.83, |
|
"learning_rate": 0.00017675, |
|
"loss": 0.7847, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"learning_rate": 0.00017725, |
|
"loss": 0.5692, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 0.00017774999999999998, |
|
"loss": 0.8115, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.00017825, |
|
"loss": 0.7631, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 0.00017875, |
|
"loss": 0.7847, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 0.00017925, |
|
"loss": 0.7622, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 0.00017975, |
|
"loss": 0.6364, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6841020585693436, |
|
"eval_f1": 0.6288777491861415, |
|
"eval_loss": 0.7637550234794617, |
|
"eval_precision": 0.6301971517578995, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 5.9956, |
|
"eval_samples_per_second": 250.184, |
|
"eval_steps_per_second": 7.839, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.00018025, |
|
"loss": 0.8643, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 0.00018075, |
|
"loss": 0.656, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 0.00018125, |
|
"loss": 0.6252, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 0.00018175, |
|
"loss": 0.8049, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 0.00018225, |
|
"loss": 0.8622, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 0.00018275, |
|
"loss": 0.8585, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 0.00018325, |
|
"loss": 0.7333, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.00018375, |
|
"loss": 0.7186, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 0.00018425, |
|
"loss": 0.7759, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"learning_rate": 0.00018475, |
|
"loss": 0.7912, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 19.73, |
|
"learning_rate": 0.00018525, |
|
"loss": 0.8636, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.00018575000000000002, |
|
"loss": 0.8167, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 0.00018625, |
|
"loss": 0.5816, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 0.00018675, |
|
"loss": 0.8881, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"learning_rate": 0.00018725, |
|
"loss": 0.8232, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00018775, |
|
"loss": 0.7109, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"learning_rate": 0.00018825, |
|
"loss": 0.8016, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 0.00018875, |
|
"loss": 0.6983, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 0.00018925, |
|
"loss": 0.9032, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 0.00018975, |
|
"loss": 0.839, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 0.00019025000000000002, |
|
"loss": 0.8061, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 0.00019075, |
|
"loss": 0.7371, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 20.37, |
|
"learning_rate": 0.00019125000000000001, |
|
"loss": 0.7358, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 0.00019175, |
|
"loss": 0.6059, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"learning_rate": 0.00019225, |
|
"loss": 0.8188, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"learning_rate": 0.00019275, |
|
"loss": 0.7803, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 20.59, |
|
"learning_rate": 0.00019325, |
|
"loss": 0.7161, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 20.64, |
|
"learning_rate": 0.00019375000000000002, |
|
"loss": 0.6724, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 0.00019425, |
|
"loss": 0.7061, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"learning_rate": 0.00019475000000000002, |
|
"loss": 0.6738, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"eval_accuracy": 0.7393333333333333, |
|
"eval_combined_score": 0.7056336725949668, |
|
"eval_f1": 0.6292089176424869, |
|
"eval_loss": 0.7768693566322327, |
|
"eval_precision": 0.7146591060707139, |
|
"eval_recall": 0.7393333333333333, |
|
"eval_runtime": 5.8568, |
|
"eval_samples_per_second": 256.113, |
|
"eval_steps_per_second": 8.025, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 0.00019525, |
|
"loss": 0.694, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 20.85, |
|
"learning_rate": 0.00019575000000000001, |
|
"loss": 1.0223, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"learning_rate": 0.00019625, |
|
"loss": 0.7801, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 0.00019675, |
|
"loss": 0.6794, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 0.00019725000000000002, |
|
"loss": 0.8515, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 21.06, |
|
"learning_rate": 0.00019775, |
|
"loss": 0.8019, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"learning_rate": 0.00019825000000000002, |
|
"loss": 0.8918, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 21.17, |
|
"learning_rate": 0.00019875, |
|
"loss": 0.7041, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 0.00019925000000000002, |
|
"loss": 0.7506, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 0.00019975, |
|
"loss": 0.9895, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 0.00020025000000000002, |
|
"loss": 0.6327, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 0.00020075000000000003, |
|
"loss": 0.7891, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 0.00020125, |
|
"loss": 0.6668, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 0.00020175000000000003, |
|
"loss": 0.7532, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"learning_rate": 0.00020225, |
|
"loss": 0.7809, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 0.00020275000000000002, |
|
"loss": 0.6936, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 21.65, |
|
"learning_rate": 0.00020324999999999998, |
|
"loss": 0.7121, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 0.00020375, |
|
"loss": 0.9925, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 0.00020425, |
|
"loss": 0.8201, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 0.00020475, |
|
"loss": 0.6287, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 0.00020525, |
|
"loss": 0.8232, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 21.91, |
|
"learning_rate": 0.00020575, |
|
"loss": 0.7285, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"learning_rate": 0.00020625, |
|
"loss": 0.7564, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 0.00020674999999999998, |
|
"loss": 0.8331, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 22.07, |
|
"learning_rate": 0.00020725, |
|
"loss": 0.7732, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"learning_rate": 0.00020774999999999998, |
|
"loss": 0.7252, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 22.18, |
|
"learning_rate": 0.00020825, |
|
"loss": 0.6347, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 22.23, |
|
"learning_rate": 0.00020875, |
|
"loss": 0.7721, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 0.00020925, |
|
"loss": 0.7649, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 0.00020975, |
|
"loss": 0.8142, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"eval_accuracy": 0.7393333333333333, |
|
"eval_combined_score": 0.7056336725949668, |
|
"eval_f1": 0.6292089176424869, |
|
"eval_loss": 0.744327962398529, |
|
"eval_precision": 0.7146591060707139, |
|
"eval_recall": 0.7393333333333333, |
|
"eval_runtime": 5.7187, |
|
"eval_samples_per_second": 262.296, |
|
"eval_steps_per_second": 8.219, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 0.00021025, |
|
"loss": 0.7605, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 22.45, |
|
"learning_rate": 0.00021075, |
|
"loss": 0.7106, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 0.00021124999999999998, |
|
"loss": 0.7094, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 0.00021175, |
|
"loss": 0.8416, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 0.00021225, |
|
"loss": 0.7271, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 22.66, |
|
"learning_rate": 0.00021275, |
|
"loss": 0.8397, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 22.71, |
|
"learning_rate": 0.00021325, |
|
"loss": 0.8801, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 22.77, |
|
"learning_rate": 0.00021375, |
|
"loss": 0.8683, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 22.82, |
|
"learning_rate": 0.00021425, |
|
"loss": 0.7017, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 22.87, |
|
"learning_rate": 0.00021475, |
|
"loss": 0.8673, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 0.00021525, |
|
"loss": 0.6106, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"learning_rate": 0.00021575, |
|
"loss": 0.7888, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 0.00021625, |
|
"loss": 0.7133, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 0.00021675, |
|
"loss": 0.7322, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 23.14, |
|
"learning_rate": 0.00021725, |
|
"loss": 0.5461, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 0.00021775, |
|
"loss": 0.7274, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 0.00021825, |
|
"loss": 1.1518, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 23.3, |
|
"learning_rate": 0.00021875, |
|
"loss": 0.8713, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"learning_rate": 0.00021925000000000002, |
|
"loss": 0.8016, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 0.00021975, |
|
"loss": 0.7848, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 23.46, |
|
"learning_rate": 0.00022025000000000001, |
|
"loss": 0.9202, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 23.51, |
|
"learning_rate": 0.00022075, |
|
"loss": 0.6641, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 0.00022125, |
|
"loss": 0.7703, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 0.00022175, |
|
"loss": 0.7311, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 23.67, |
|
"learning_rate": 0.00022225, |
|
"loss": 0.5956, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 0.00022275000000000002, |
|
"loss": 0.7111, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 23.78, |
|
"learning_rate": 0.00022325, |
|
"loss": 0.7672, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 23.83, |
|
"learning_rate": 0.00022375000000000002, |
|
"loss": 0.8842, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"learning_rate": 0.00022425, |
|
"loss": 0.8225, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"learning_rate": 0.00022475000000000001, |
|
"loss": 0.8184, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"eval_accuracy": 0.7393333333333333, |
|
"eval_combined_score": 0.7056336725949668, |
|
"eval_f1": 0.6292089176424869, |
|
"eval_loss": 0.7635167837142944, |
|
"eval_precision": 0.7146591060707139, |
|
"eval_recall": 0.7393333333333333, |
|
"eval_runtime": 6.1889, |
|
"eval_samples_per_second": 242.371, |
|
"eval_steps_per_second": 7.594, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 0.00022525, |
|
"loss": 0.8185, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.00022575, |
|
"loss": 0.7461, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 24.1, |
|
"learning_rate": 0.00022625000000000002, |
|
"loss": 0.6885, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 24.15, |
|
"learning_rate": 0.00022675, |
|
"loss": 0.8025, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"learning_rate": 0.00022725000000000002, |
|
"loss": 0.6607, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 0.00022775, |
|
"loss": 0.7011, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 0.00022825000000000002, |
|
"loss": 0.6394, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 24.36, |
|
"learning_rate": 0.00022875, |
|
"loss": 0.8336, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"learning_rate": 0.00022925000000000002, |
|
"loss": 0.8099, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 24.47, |
|
"learning_rate": 0.00022975000000000003, |
|
"loss": 0.6199, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 0.00023025, |
|
"loss": 0.8079, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 24.57, |
|
"learning_rate": 0.00023075000000000003, |
|
"loss": 0.9195, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 24.63, |
|
"learning_rate": 0.00023125, |
|
"loss": 0.8569, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 24.68, |
|
"learning_rate": 0.00023175000000000002, |
|
"loss": 0.7448, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 24.73, |
|
"learning_rate": 0.00023225, |
|
"loss": 0.7706, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"learning_rate": 0.00023275000000000002, |
|
"loss": 0.6306, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"learning_rate": 0.00023325, |
|
"loss": 0.6033, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 0.00023375000000000002, |
|
"loss": 0.9749, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"learning_rate": 0.00023425000000000003, |
|
"loss": 0.7738, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.00023475, |
|
"loss": 0.8824, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 25.05, |
|
"learning_rate": 0.00023525, |
|
"loss": 0.8072, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 25.11, |
|
"learning_rate": 0.00023574999999999998, |
|
"loss": 0.7295, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 25.16, |
|
"learning_rate": 0.00023625, |
|
"loss": 0.8466, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 0.00023674999999999998, |
|
"loss": 0.7498, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 25.27, |
|
"learning_rate": 0.00023725, |
|
"loss": 0.7199, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 0.00023775, |
|
"loss": 0.7826, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 25.37, |
|
"learning_rate": 0.00023825, |
|
"loss": 0.7767, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 25.43, |
|
"learning_rate": 0.00023875, |
|
"loss": 0.71, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 25.48, |
|
"learning_rate": 0.00023925, |
|
"loss": 0.8495, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 0.00023975, |
|
"loss": 0.7562, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"eval_accuracy": 0.7393333333333333, |
|
"eval_combined_score": 0.7056336725949668, |
|
"eval_f1": 0.6292089176424869, |
|
"eval_loss": 0.7467172741889954, |
|
"eval_precision": 0.7146591060707139, |
|
"eval_recall": 0.7393333333333333, |
|
"eval_runtime": 5.7487, |
|
"eval_samples_per_second": 260.927, |
|
"eval_steps_per_second": 8.176, |
|
"step": 4800 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5640, |
|
"num_train_epochs": 30, |
|
"save_steps": 300, |
|
"total_flos": 1.00772435460096e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|