{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.53191489361702, "eval_steps": 300, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 5e-07, "loss": 1.0874, "step": 10 }, { "epoch": 0.11, "learning_rate": 1e-06, "loss": 0.9487, "step": 20 }, { "epoch": 0.16, "learning_rate": 1.5e-06, "loss": 0.7586, "step": 30 }, { "epoch": 0.21, "learning_rate": 2e-06, "loss": 0.7225, "step": 40 }, { "epoch": 0.27, "learning_rate": 2.5e-06, "loss": 0.7364, "step": 50 }, { "epoch": 0.32, "learning_rate": 3e-06, "loss": 0.7265, "step": 60 }, { "epoch": 0.37, "learning_rate": 3.5e-06, "loss": 0.7267, "step": 70 }, { "epoch": 0.43, "learning_rate": 4e-06, "loss": 0.5697, "step": 80 }, { "epoch": 0.48, "learning_rate": 4.5e-06, "loss": 1.018, "step": 90 }, { "epoch": 0.53, "learning_rate": 5e-06, "loss": 0.7875, "step": 100 }, { "epoch": 0.59, "learning_rate": 5.5e-06, "loss": 0.8242, "step": 110 }, { "epoch": 0.64, "learning_rate": 6e-06, "loss": 0.8034, "step": 120 }, { "epoch": 0.69, "learning_rate": 6.5e-06, "loss": 0.7717, "step": 130 }, { "epoch": 0.74, "learning_rate": 7e-06, "loss": 0.8337, "step": 140 }, { "epoch": 0.8, "learning_rate": 7.5e-06, "loss": 0.6884, "step": 150 }, { "epoch": 0.85, "learning_rate": 8e-06, "loss": 0.9129, "step": 160 }, { "epoch": 0.9, "learning_rate": 8.500000000000002e-06, "loss": 0.5637, "step": 170 }, { "epoch": 0.96, "learning_rate": 9e-06, "loss": 1.0458, "step": 180 }, { "epoch": 1.01, "learning_rate": 9.5e-06, "loss": 0.9414, "step": 190 }, { "epoch": 1.06, "learning_rate": 1e-05, "loss": 0.6379, "step": 200 }, { "epoch": 1.12, "learning_rate": 1.0500000000000001e-05, "loss": 0.9249, "step": 210 }, { "epoch": 1.17, "learning_rate": 1.1e-05, "loss": 0.6944, "step": 220 }, { "epoch": 1.22, "learning_rate": 1.15e-05, "loss": 0.9221, "step": 230 }, { "epoch": 1.28, "learning_rate": 1.2e-05, "loss": 0.6475, "step": 240 }, { "epoch": 1.33, "learning_rate": 1.25e-05, "loss": 0.7748, "step": 250 }, { "epoch": 1.38, "learning_rate": 1.3e-05, "loss": 0.8705, "step": 260 }, { "epoch": 1.44, "learning_rate": 1.35e-05, "loss": 0.7737, "step": 270 }, { "epoch": 1.49, "learning_rate": 1.4e-05, "loss": 0.8643, "step": 280 }, { "epoch": 1.54, "learning_rate": 1.4500000000000002e-05, "loss": 0.8428, "step": 290 }, { "epoch": 1.6, "learning_rate": 1.5e-05, "loss": 0.6785, "step": 300 }, { "epoch": 1.6, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7930460572242737, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0663, "eval_samples_per_second": 247.266, "eval_steps_per_second": 7.748, "step": 300 }, { "epoch": 1.65, "learning_rate": 1.55e-05, "loss": 0.6076, "step": 310 }, { "epoch": 1.7, "learning_rate": 1.6e-05, "loss": 0.5963, "step": 320 }, { "epoch": 1.76, "learning_rate": 1.65e-05, "loss": 0.6626, "step": 330 }, { "epoch": 1.81, "learning_rate": 1.7000000000000003e-05, "loss": 0.8379, "step": 340 }, { "epoch": 1.86, "learning_rate": 1.7500000000000002e-05, "loss": 0.8851, "step": 350 }, { "epoch": 1.91, "learning_rate": 1.8e-05, "loss": 0.7489, "step": 360 }, { "epoch": 1.97, "learning_rate": 1.85e-05, "loss": 0.7573, "step": 370 }, { "epoch": 2.02, "learning_rate": 1.9e-05, "loss": 0.8018, "step": 380 }, { "epoch": 2.07, "learning_rate": 1.95e-05, "loss": 0.6645, "step": 390 }, { "epoch": 2.13, "learning_rate": 2e-05, "loss": 0.8677, "step": 400 }, { "epoch": 2.18, "learning_rate": 2.05e-05, "loss": 0.7478, "step": 410 }, { "epoch": 2.23, "learning_rate": 2.1000000000000002e-05, "loss": 0.8551, "step": 420 }, { "epoch": 2.29, "learning_rate": 2.1499999999999997e-05, "loss": 0.9323, "step": 430 }, { "epoch": 2.34, "learning_rate": 2.2e-05, "loss": 0.7536, "step": 440 }, { "epoch": 2.39, "learning_rate": 2.2499999999999998e-05, "loss": 0.5336, "step": 450 }, { "epoch": 2.45, "learning_rate": 2.3e-05, "loss": 0.8955, "step": 460 }, { "epoch": 2.5, "learning_rate": 2.3500000000000002e-05, "loss": 0.7926, "step": 470 }, { "epoch": 2.55, "learning_rate": 2.4e-05, "loss": 0.5713, "step": 480 }, { "epoch": 2.61, "learning_rate": 2.4500000000000003e-05, "loss": 0.8568, "step": 490 }, { "epoch": 2.66, "learning_rate": 2.5e-05, "loss": 0.6348, "step": 500 }, { "epoch": 2.71, "learning_rate": 2.55e-05, "loss": 0.6223, "step": 510 }, { "epoch": 2.77, "learning_rate": 2.6e-05, "loss": 0.7579, "step": 520 }, { "epoch": 2.82, "learning_rate": 2.65e-05, "loss": 0.6325, "step": 530 }, { "epoch": 2.87, "learning_rate": 2.7e-05, "loss": 0.7276, "step": 540 }, { "epoch": 2.93, "learning_rate": 2.75e-05, "loss": 0.8766, "step": 550 }, { "epoch": 2.98, "learning_rate": 2.8e-05, "loss": 0.7107, "step": 560 }, { "epoch": 3.03, "learning_rate": 2.85e-05, "loss": 0.5904, "step": 570 }, { "epoch": 3.09, "learning_rate": 2.9000000000000004e-05, "loss": 0.7125, "step": 580 }, { "epoch": 3.14, "learning_rate": 2.95e-05, "loss": 0.755, "step": 590 }, { "epoch": 3.19, "learning_rate": 3e-05, "loss": 0.5583, "step": 600 }, { "epoch": 3.19, "eval_accuracy": 0.7613333333333333, "eval_combined_score": 0.7403762607674487, "eval_f1": 0.7316181693637903, "eval_loss": 0.6910097599029541, "eval_precision": 0.7072202070393374, "eval_recall": 0.7613333333333333, "eval_runtime": 6.0882, "eval_samples_per_second": 246.376, "eval_steps_per_second": 7.72, "step": 600 }, { "epoch": 3.24, "learning_rate": 3.05e-05, "loss": 0.6527, "step": 610 }, { "epoch": 3.3, "learning_rate": 3.1e-05, "loss": 0.7676, "step": 620 }, { "epoch": 3.35, "learning_rate": 3.15e-05, "loss": 0.766, "step": 630 }, { "epoch": 3.4, "learning_rate": 3.2e-05, "loss": 0.6217, "step": 640 }, { "epoch": 3.46, "learning_rate": 3.2500000000000004e-05, "loss": 0.5405, "step": 650 }, { "epoch": 3.51, "learning_rate": 3.3e-05, "loss": 0.7951, "step": 660 }, { "epoch": 3.56, "learning_rate": 3.35e-05, "loss": 0.552, "step": 670 }, { "epoch": 3.62, "learning_rate": 3.4000000000000007e-05, "loss": 0.6521, "step": 680 }, { "epoch": 3.67, "learning_rate": 3.4500000000000005e-05, "loss": 0.6814, "step": 690 }, { "epoch": 3.72, "learning_rate": 3.5000000000000004e-05, "loss": 0.3992, "step": 700 }, { "epoch": 3.78, "learning_rate": 3.5499999999999996e-05, "loss": 0.7018, "step": 710 }, { "epoch": 3.83, "learning_rate": 3.6e-05, "loss": 0.664, "step": 720 }, { "epoch": 3.88, "learning_rate": 3.65e-05, "loss": 0.6891, "step": 730 }, { "epoch": 3.94, "learning_rate": 3.7e-05, "loss": 0.6063, "step": 740 }, { "epoch": 3.99, "learning_rate": 3.75e-05, "loss": 0.6543, "step": 750 }, { "epoch": 4.04, "learning_rate": 3.8e-05, "loss": 0.5638, "step": 760 }, { "epoch": 4.1, "learning_rate": 3.85e-05, "loss": 0.6984, "step": 770 }, { "epoch": 4.15, "learning_rate": 3.9e-05, "loss": 0.7148, "step": 780 }, { "epoch": 4.2, "learning_rate": 3.95e-05, "loss": 0.4915, "step": 790 }, { "epoch": 4.26, "learning_rate": 4e-05, "loss": 0.648, "step": 800 }, { "epoch": 4.31, "learning_rate": 4.05e-05, "loss": 0.4527, "step": 810 }, { "epoch": 4.36, "learning_rate": 4.1e-05, "loss": 0.551, "step": 820 }, { "epoch": 4.41, "learning_rate": 4.1500000000000006e-05, "loss": 0.5909, "step": 830 }, { "epoch": 4.47, "learning_rate": 4.2000000000000004e-05, "loss": 0.603, "step": 840 }, { "epoch": 4.52, "learning_rate": 4.25e-05, "loss": 1.0745, "step": 850 }, { "epoch": 4.57, "learning_rate": 4.2999999999999995e-05, "loss": 0.6697, "step": 860 }, { "epoch": 4.63, "learning_rate": 4.35e-05, "loss": 0.8374, "step": 870 }, { "epoch": 4.68, "learning_rate": 4.4e-05, "loss": 0.4891, "step": 880 }, { "epoch": 4.73, "learning_rate": 4.45e-05, "loss": 0.5956, "step": 890 }, { "epoch": 4.79, "learning_rate": 4.4999999999999996e-05, "loss": 0.7857, "step": 900 }, { "epoch": 4.79, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.6514685153961182, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9473, "eval_samples_per_second": 252.216, "eval_steps_per_second": 7.903, "step": 900 }, { "epoch": 4.84, "learning_rate": 4.55e-05, "loss": 0.8566, "step": 910 }, { "epoch": 4.89, "learning_rate": 4.6e-05, "loss": 0.4698, "step": 920 }, { "epoch": 4.95, "learning_rate": 4.65e-05, "loss": 0.7224, "step": 930 }, { "epoch": 5.0, "learning_rate": 4.7000000000000004e-05, "loss": 0.5879, "step": 940 }, { "epoch": 5.05, "learning_rate": 4.75e-05, "loss": 0.3592, "step": 950 }, { "epoch": 5.11, "learning_rate": 4.8e-05, "loss": 0.513, "step": 960 }, { "epoch": 5.16, "learning_rate": 4.85e-05, "loss": 0.6167, "step": 970 }, { "epoch": 5.21, "learning_rate": 4.9000000000000005e-05, "loss": 0.5706, "step": 980 }, { "epoch": 5.27, "learning_rate": 4.9500000000000004e-05, "loss": 0.5523, "step": 990 }, { "epoch": 5.32, "learning_rate": 5e-05, "loss": 0.6565, "step": 1000 }, { "epoch": 5.37, "learning_rate": 5.05e-05, "loss": 0.4634, "step": 1010 }, { "epoch": 5.43, "learning_rate": 5.1e-05, "loss": 0.634, "step": 1020 }, { "epoch": 5.48, "learning_rate": 5.15e-05, "loss": 0.5472, "step": 1030 }, { "epoch": 5.53, "learning_rate": 5.2e-05, "loss": 0.595, "step": 1040 }, { "epoch": 5.59, "learning_rate": 5.25e-05, "loss": 0.4889, "step": 1050 }, { "epoch": 5.64, "learning_rate": 5.3e-05, "loss": 0.5884, "step": 1060 }, { "epoch": 5.69, "learning_rate": 5.35e-05, "loss": 0.4149, "step": 1070 }, { "epoch": 5.74, "learning_rate": 5.4e-05, "loss": 0.7573, "step": 1080 }, { "epoch": 5.8, "learning_rate": 5.45e-05, "loss": 0.599, "step": 1090 }, { "epoch": 5.85, "learning_rate": 5.5e-05, "loss": 0.366, "step": 1100 }, { "epoch": 5.9, "learning_rate": 5.55e-05, "loss": 0.4906, "step": 1110 }, { "epoch": 5.96, "learning_rate": 5.6e-05, "loss": 0.4378, "step": 1120 }, { "epoch": 6.01, "learning_rate": 5.6500000000000005e-05, "loss": 0.4886, "step": 1130 }, { "epoch": 6.06, "learning_rate": 5.7e-05, "loss": 0.342, "step": 1140 }, { "epoch": 6.12, "learning_rate": 5.75e-05, "loss": 0.7803, "step": 1150 }, { "epoch": 6.17, "learning_rate": 5.800000000000001e-05, "loss": 0.3435, "step": 1160 }, { "epoch": 6.22, "learning_rate": 5.8500000000000006e-05, "loss": 0.6149, "step": 1170 }, { "epoch": 6.28, "learning_rate": 5.9e-05, "loss": 0.3026, "step": 1180 }, { "epoch": 6.33, "learning_rate": 5.9499999999999996e-05, "loss": 0.9387, "step": 1190 }, { "epoch": 6.38, "learning_rate": 6e-05, "loss": 0.6309, "step": 1200 }, { "epoch": 6.38, "eval_accuracy": 0.848, "eval_combined_score": 0.8403047916631027, "eval_f1": 0.8270449473875959, "eval_loss": 0.5592399835586548, "eval_precision": 0.8381742192648148, "eval_recall": 0.848, "eval_runtime": 5.9701, "eval_samples_per_second": 251.251, "eval_steps_per_second": 7.873, "step": 1200 }, { "epoch": 6.44, "learning_rate": 6.05e-05, "loss": 0.4672, "step": 1210 }, { "epoch": 6.49, "learning_rate": 6.1e-05, "loss": 0.4909, "step": 1220 }, { "epoch": 6.54, "learning_rate": 6.15e-05, "loss": 0.5007, "step": 1230 }, { "epoch": 6.6, "learning_rate": 6.2e-05, "loss": 0.4318, "step": 1240 }, { "epoch": 6.65, "learning_rate": 6.25e-05, "loss": 0.3623, "step": 1250 }, { "epoch": 6.7, "learning_rate": 6.3e-05, "loss": 0.3294, "step": 1260 }, { "epoch": 6.76, "learning_rate": 6.35e-05, "loss": 0.7501, "step": 1270 }, { "epoch": 6.81, "learning_rate": 6.4e-05, "loss": 0.6436, "step": 1280 }, { "epoch": 6.86, "learning_rate": 6.450000000000001e-05, "loss": 0.525, "step": 1290 }, { "epoch": 6.91, "learning_rate": 6.500000000000001e-05, "loss": 0.6047, "step": 1300 }, { "epoch": 6.97, "learning_rate": 6.55e-05, "loss": 0.3636, "step": 1310 }, { "epoch": 7.02, "learning_rate": 6.6e-05, "loss": 0.5634, "step": 1320 }, { "epoch": 7.07, "learning_rate": 6.65e-05, "loss": 0.3846, "step": 1330 }, { "epoch": 7.13, "learning_rate": 6.7e-05, "loss": 0.8436, "step": 1340 }, { "epoch": 7.18, "learning_rate": 6.75e-05, "loss": 0.4762, "step": 1350 }, { "epoch": 7.23, "learning_rate": 6.800000000000001e-05, "loss": 0.5856, "step": 1360 }, { "epoch": 7.29, "learning_rate": 6.850000000000001e-05, "loss": 0.4042, "step": 1370 }, { "epoch": 7.34, "learning_rate": 6.900000000000001e-05, "loss": 0.475, "step": 1380 }, { "epoch": 7.39, "learning_rate": 6.950000000000001e-05, "loss": 0.3535, "step": 1390 }, { "epoch": 7.45, "learning_rate": 7.000000000000001e-05, "loss": 0.2399, "step": 1400 }, { "epoch": 7.5, "learning_rate": 7.049999999999999e-05, "loss": 0.316, "step": 1410 }, { "epoch": 7.55, "learning_rate": 7.095e-05, "loss": 0.438, "step": 1420 }, { "epoch": 7.61, "learning_rate": 7.145e-05, "loss": 0.2673, "step": 1430 }, { "epoch": 7.66, "learning_rate": 7.195e-05, "loss": 0.4286, "step": 1440 }, { "epoch": 7.71, "learning_rate": 7.245e-05, "loss": 0.3655, "step": 1450 }, { "epoch": 7.77, "learning_rate": 7.295e-05, "loss": 0.434, "step": 1460 }, { "epoch": 7.82, "learning_rate": 7.345e-05, "loss": 0.6143, "step": 1470 }, { "epoch": 7.87, "learning_rate": 7.395000000000001e-05, "loss": 0.5471, "step": 1480 }, { "epoch": 7.93, "learning_rate": 7.445000000000001e-05, "loss": 0.299, "step": 1490 }, { "epoch": 7.98, "learning_rate": 7.495e-05, "loss": 0.2216, "step": 1500 }, { "epoch": 7.98, "eval_accuracy": 0.8773333333333333, "eval_combined_score": 0.8618647537688802, "eval_f1": 0.8431508113173474, "eval_loss": 0.5708244442939758, "eval_precision": 0.8496415370915067, "eval_recall": 0.8773333333333333, "eval_runtime": 6.0286, "eval_samples_per_second": 248.816, "eval_steps_per_second": 7.796, "step": 1500 }, { "epoch": 8.03, "learning_rate": 7.545e-05, "loss": 0.5517, "step": 1510 }, { "epoch": 8.09, "learning_rate": 7.595e-05, "loss": 0.3237, "step": 1520 }, { "epoch": 8.14, "learning_rate": 7.645e-05, "loss": 0.3945, "step": 1530 }, { "epoch": 8.19, "learning_rate": 7.695e-05, "loss": 0.3996, "step": 1540 }, { "epoch": 8.24, "learning_rate": 7.745000000000001e-05, "loss": 0.3644, "step": 1550 }, { "epoch": 8.3, "learning_rate": 7.795000000000001e-05, "loss": 0.6767, "step": 1560 }, { "epoch": 8.35, "learning_rate": 7.845000000000001e-05, "loss": 0.434, "step": 1570 }, { "epoch": 8.4, "learning_rate": 7.895000000000001e-05, "loss": 0.4047, "step": 1580 }, { "epoch": 8.46, "learning_rate": 7.945e-05, "loss": 0.3844, "step": 1590 }, { "epoch": 8.51, "learning_rate": 7.994999999999999e-05, "loss": 0.2656, "step": 1600 }, { "epoch": 8.56, "learning_rate": 8.044999999999999e-05, "loss": 0.3318, "step": 1610 }, { "epoch": 8.62, "learning_rate": 8.095e-05, "loss": 0.4627, "step": 1620 }, { "epoch": 8.67, "learning_rate": 8.145e-05, "loss": 0.6657, "step": 1630 }, { "epoch": 8.72, "learning_rate": 8.195e-05, "loss": 0.353, "step": 1640 }, { "epoch": 8.78, "learning_rate": 8.245e-05, "loss": 0.4543, "step": 1650 }, { "epoch": 8.83, "learning_rate": 8.295e-05, "loss": 0.4121, "step": 1660 }, { "epoch": 8.88, "learning_rate": 8.340000000000001e-05, "loss": 0.2792, "step": 1670 }, { "epoch": 8.94, "learning_rate": 8.39e-05, "loss": 0.3618, "step": 1680 }, { "epoch": 8.99, "learning_rate": 8.44e-05, "loss": 0.4922, "step": 1690 }, { "epoch": 9.04, "learning_rate": 8.49e-05, "loss": 0.6822, "step": 1700 }, { "epoch": 9.1, "learning_rate": 8.54e-05, "loss": 0.4936, "step": 1710 }, { "epoch": 9.15, "learning_rate": 8.59e-05, "loss": 0.4652, "step": 1720 }, { "epoch": 9.2, "learning_rate": 8.640000000000001e-05, "loss": 0.3529, "step": 1730 }, { "epoch": 9.26, "learning_rate": 8.690000000000001e-05, "loss": 0.5115, "step": 1740 }, { "epoch": 9.31, "learning_rate": 8.740000000000001e-05, "loss": 0.5859, "step": 1750 }, { "epoch": 9.36, "learning_rate": 8.790000000000001e-05, "loss": 0.5069, "step": 1760 }, { "epoch": 9.41, "learning_rate": 8.840000000000001e-05, "loss": 0.2584, "step": 1770 }, { "epoch": 9.47, "learning_rate": 8.885e-05, "loss": 0.313, "step": 1780 }, { "epoch": 9.52, "learning_rate": 8.935e-05, "loss": 0.4103, "step": 1790 }, { "epoch": 9.57, "learning_rate": 8.985e-05, "loss": 0.3214, "step": 1800 }, { "epoch": 9.57, "eval_accuracy": 0.896, "eval_combined_score": 0.8819938675326909, "eval_f1": 0.8583787127091727, "eval_loss": 0.45495954155921936, "eval_precision": 0.8775967574215913, "eval_recall": 0.896, "eval_runtime": 6.1706, "eval_samples_per_second": 243.087, "eval_steps_per_second": 7.617, "step": 1800 }, { "epoch": 9.63, "learning_rate": 9.035e-05, "loss": 0.5097, "step": 1810 }, { "epoch": 9.68, "learning_rate": 9.085e-05, "loss": 0.4306, "step": 1820 }, { "epoch": 9.73, "learning_rate": 9.135e-05, "loss": 0.2573, "step": 1830 }, { "epoch": 9.79, "learning_rate": 9.185e-05, "loss": 0.3534, "step": 1840 }, { "epoch": 9.84, "learning_rate": 9.235000000000001e-05, "loss": 0.5012, "step": 1850 }, { "epoch": 9.89, "learning_rate": 9.285000000000001e-05, "loss": 0.7057, "step": 1860 }, { "epoch": 9.95, "learning_rate": 9.335e-05, "loss": 0.5385, "step": 1870 }, { "epoch": 10.0, "learning_rate": 9.385e-05, "loss": 0.2633, "step": 1880 }, { "epoch": 10.05, "learning_rate": 9.435e-05, "loss": 0.3853, "step": 1890 }, { "epoch": 10.11, "learning_rate": 9.485e-05, "loss": 0.4354, "step": 1900 }, { "epoch": 10.16, "learning_rate": 9.535e-05, "loss": 0.7459, "step": 1910 }, { "epoch": 10.21, "learning_rate": 9.585000000000001e-05, "loss": 0.2937, "step": 1920 }, { "epoch": 10.27, "learning_rate": 9.635000000000001e-05, "loss": 0.2932, "step": 1930 }, { "epoch": 10.32, "learning_rate": 9.685000000000001e-05, "loss": 0.4256, "step": 1940 }, { "epoch": 10.37, "learning_rate": 9.735000000000001e-05, "loss": 0.4336, "step": 1950 }, { "epoch": 10.43, "learning_rate": 9.785e-05, "loss": 0.4684, "step": 1960 }, { "epoch": 10.48, "learning_rate": 9.835e-05, "loss": 0.5437, "step": 1970 }, { "epoch": 10.53, "learning_rate": 9.884999999999999e-05, "loss": 0.3036, "step": 1980 }, { "epoch": 10.59, "learning_rate": 9.935e-05, "loss": 0.1528, "step": 1990 }, { "epoch": 10.64, "learning_rate": 9.985e-05, "loss": 0.8779, "step": 2000 }, { "epoch": 10.69, "learning_rate": 0.00010035, "loss": 0.53, "step": 2010 }, { "epoch": 10.74, "learning_rate": 0.00010085, "loss": 0.2656, "step": 2020 }, { "epoch": 10.8, "learning_rate": 0.00010135, "loss": 0.3895, "step": 2030 }, { "epoch": 10.85, "learning_rate": 0.00010185, "loss": 0.5943, "step": 2040 }, { "epoch": 10.9, "learning_rate": 0.00010235, "loss": 0.5021, "step": 2050 }, { "epoch": 10.96, "learning_rate": 0.00010284999999999999, "loss": 0.3804, "step": 2060 }, { "epoch": 11.01, "learning_rate": 0.00010335, "loss": 0.2204, "step": 2070 }, { "epoch": 11.06, "learning_rate": 0.00010385, "loss": 0.4421, "step": 2080 }, { "epoch": 11.12, "learning_rate": 0.0001043, "loss": 1.0409, "step": 2090 }, { "epoch": 11.17, "learning_rate": 0.00010480000000000001, "loss": 0.7521, "step": 2100 }, { "epoch": 11.17, "eval_accuracy": 0.884, "eval_combined_score": 0.8540521114926303, "eval_f1": 0.8422803386258884, "eval_loss": 0.3819296956062317, "eval_precision": 0.8059281073446327, "eval_recall": 0.884, "eval_runtime": 6.0267, "eval_samples_per_second": 248.894, "eval_steps_per_second": 7.799, "step": 2100 }, { "epoch": 11.22, "learning_rate": 0.00010530000000000001, "loss": 0.3117, "step": 2110 }, { "epoch": 11.28, "learning_rate": 0.00010580000000000001, "loss": 0.4548, "step": 2120 }, { "epoch": 11.33, "learning_rate": 0.00010630000000000001, "loss": 0.484, "step": 2130 }, { "epoch": 11.38, "learning_rate": 0.00010680000000000001, "loss": 0.4479, "step": 2140 }, { "epoch": 11.44, "learning_rate": 0.0001073, "loss": 0.3099, "step": 2150 }, { "epoch": 11.49, "learning_rate": 0.0001078, "loss": 0.4728, "step": 2160 }, { "epoch": 11.54, "learning_rate": 0.00010829999999999999, "loss": 0.5189, "step": 2170 }, { "epoch": 11.6, "learning_rate": 0.0001088, "loss": 0.5615, "step": 2180 }, { "epoch": 11.65, "learning_rate": 0.0001093, "loss": 0.536, "step": 2190 }, { "epoch": 11.7, "learning_rate": 0.0001098, "loss": 0.4329, "step": 2200 }, { "epoch": 11.76, "learning_rate": 0.0001103, "loss": 0.2304, "step": 2210 }, { "epoch": 11.81, "learning_rate": 0.0001108, "loss": 0.5139, "step": 2220 }, { "epoch": 11.86, "learning_rate": 0.0001113, "loss": 0.3353, "step": 2230 }, { "epoch": 11.91, "learning_rate": 0.0001118, "loss": 0.7493, "step": 2240 }, { "epoch": 11.97, "learning_rate": 0.0001123, "loss": 1.0483, "step": 2250 }, { "epoch": 12.02, "learning_rate": 0.0001128, "loss": 0.5422, "step": 2260 }, { "epoch": 12.07, "learning_rate": 0.0001133, "loss": 0.771, "step": 2270 }, { "epoch": 12.13, "learning_rate": 0.0001138, "loss": 0.841, "step": 2280 }, { "epoch": 12.18, "learning_rate": 0.0001143, "loss": 0.7316, "step": 2290 }, { "epoch": 12.23, "learning_rate": 0.0001148, "loss": 0.7394, "step": 2300 }, { "epoch": 12.29, "learning_rate": 0.0001153, "loss": 0.7084, "step": 2310 }, { "epoch": 12.34, "learning_rate": 0.0001158, "loss": 0.8033, "step": 2320 }, { "epoch": 12.39, "learning_rate": 0.00011630000000000001, "loss": 0.6616, "step": 2330 }, { "epoch": 12.45, "learning_rate": 0.0001168, "loss": 0.7601, "step": 2340 }, { "epoch": 12.5, "learning_rate": 0.0001173, "loss": 0.7986, "step": 2350 }, { "epoch": 12.55, "learning_rate": 0.0001178, "loss": 0.673, "step": 2360 }, { "epoch": 12.61, "learning_rate": 0.0001183, "loss": 0.6354, "step": 2370 }, { "epoch": 12.66, "learning_rate": 0.0001188, "loss": 0.617, "step": 2380 }, { "epoch": 12.71, "learning_rate": 0.0001193, "loss": 0.5532, "step": 2390 }, { "epoch": 12.77, "learning_rate": 0.00011980000000000001, "loss": 0.5048, "step": 2400 }, { "epoch": 12.77, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.6582097411155701, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.7153, "eval_samples_per_second": 262.451, "eval_steps_per_second": 8.223, "step": 2400 }, { "epoch": 12.82, "learning_rate": 0.00012030000000000001, "loss": 0.7655, "step": 2410 }, { "epoch": 12.87, "learning_rate": 0.00012080000000000001, "loss": 0.5875, "step": 2420 }, { "epoch": 12.93, "learning_rate": 0.00012130000000000001, "loss": 0.6008, "step": 2430 }, { "epoch": 12.98, "learning_rate": 0.0001218, "loss": 0.612, "step": 2440 }, { "epoch": 13.03, "learning_rate": 0.00012230000000000002, "loss": 0.6436, "step": 2450 }, { "epoch": 13.09, "learning_rate": 0.00012275, "loss": 0.3595, "step": 2460 }, { "epoch": 13.14, "learning_rate": 0.00012325000000000001, "loss": 0.4373, "step": 2470 }, { "epoch": 13.19, "learning_rate": 0.00012375, "loss": 0.8131, "step": 2480 }, { "epoch": 13.24, "learning_rate": 0.00012425, "loss": 0.7109, "step": 2490 }, { "epoch": 13.3, "learning_rate": 0.00012475, "loss": 0.5552, "step": 2500 }, { "epoch": 13.35, "learning_rate": 0.00012525, "loss": 0.4912, "step": 2510 }, { "epoch": 13.4, "learning_rate": 0.00012575, "loss": 0.5125, "step": 2520 }, { "epoch": 13.46, "learning_rate": 0.00012625, "loss": 0.5537, "step": 2530 }, { "epoch": 13.51, "learning_rate": 0.00012675, "loss": 0.6721, "step": 2540 }, { "epoch": 13.56, "learning_rate": 0.00012725, "loss": 0.5703, "step": 2550 }, { "epoch": 13.62, "learning_rate": 0.00012775000000000002, "loss": 0.4654, "step": 2560 }, { "epoch": 13.67, "learning_rate": 0.00012825, "loss": 0.4402, "step": 2570 }, { "epoch": 13.72, "learning_rate": 0.00012875, "loss": 0.4534, "step": 2580 }, { "epoch": 13.78, "learning_rate": 0.00012925, "loss": 0.5178, "step": 2590 }, { "epoch": 13.83, "learning_rate": 0.00012975, "loss": 0.5167, "step": 2600 }, { "epoch": 13.88, "learning_rate": 0.00013025, "loss": 0.4782, "step": 2610 }, { "epoch": 13.94, "learning_rate": 0.00013075, "loss": 0.4171, "step": 2620 }, { "epoch": 13.99, "learning_rate": 0.00013125000000000002, "loss": 0.3338, "step": 2630 }, { "epoch": 14.04, "learning_rate": 0.00013175, "loss": 0.523, "step": 2640 }, { "epoch": 14.1, "learning_rate": 0.00013225000000000002, "loss": 0.5211, "step": 2650 }, { "epoch": 14.15, "learning_rate": 0.00013275, "loss": 0.3958, "step": 2660 }, { "epoch": 14.2, "learning_rate": 0.00013325, "loss": 0.3883, "step": 2670 }, { "epoch": 14.26, "learning_rate": 0.00013375, "loss": 0.466, "step": 2680 }, { "epoch": 14.31, "learning_rate": 0.00013425, "loss": 0.563, "step": 2690 }, { "epoch": 14.36, "learning_rate": 0.00013475000000000002, "loss": 0.6435, "step": 2700 }, { "epoch": 14.36, "eval_accuracy": 0.8466666666666667, "eval_combined_score": 0.820593120110759, "eval_f1": 0.8092415776379762, "eval_loss": 0.5365216135978699, "eval_precision": 0.7797975694717267, "eval_recall": 0.8466666666666667, "eval_runtime": 5.7927, "eval_samples_per_second": 258.944, "eval_steps_per_second": 8.114, "step": 2700 }, { "epoch": 14.41, "learning_rate": 0.00013525, "loss": 0.4351, "step": 2710 }, { "epoch": 14.47, "learning_rate": 0.00013575000000000002, "loss": 0.9678, "step": 2720 }, { "epoch": 14.52, "learning_rate": 0.00013625, "loss": 0.3769, "step": 2730 }, { "epoch": 14.57, "learning_rate": 0.00013675000000000002, "loss": 0.6706, "step": 2740 }, { "epoch": 14.63, "learning_rate": 0.00013725, "loss": 0.6711, "step": 2750 }, { "epoch": 14.68, "learning_rate": 0.00013775000000000001, "loss": 0.5355, "step": 2760 }, { "epoch": 14.73, "learning_rate": 0.00013825000000000003, "loss": 0.6193, "step": 2770 }, { "epoch": 14.79, "learning_rate": 0.00013875, "loss": 0.6431, "step": 2780 }, { "epoch": 14.84, "learning_rate": 0.00013925000000000002, "loss": 0.6239, "step": 2790 }, { "epoch": 14.89, "learning_rate": 0.00013975, "loss": 0.4647, "step": 2800 }, { "epoch": 14.95, "learning_rate": 0.00014025000000000002, "loss": 0.2059, "step": 2810 }, { "epoch": 15.0, "learning_rate": 0.00014074999999999998, "loss": 0.6128, "step": 2820 }, { "epoch": 15.05, "learning_rate": 0.00014125, "loss": 0.9306, "step": 2830 }, { "epoch": 15.11, "learning_rate": 0.00014175, "loss": 0.4907, "step": 2840 }, { "epoch": 15.16, "learning_rate": 0.00014225, "loss": 0.4597, "step": 2850 }, { "epoch": 15.21, "learning_rate": 0.00014275, "loss": 0.7004, "step": 2860 }, { "epoch": 15.27, "learning_rate": 0.00014324999999999999, "loss": 0.4713, "step": 2870 }, { "epoch": 15.32, "learning_rate": 0.00014375, "loss": 0.5127, "step": 2880 }, { "epoch": 15.37, "learning_rate": 0.00014424999999999998, "loss": 0.7406, "step": 2890 }, { "epoch": 15.43, "learning_rate": 0.00014475, "loss": 0.5231, "step": 2900 }, { "epoch": 15.48, "learning_rate": 0.00014524999999999998, "loss": 0.5013, "step": 2910 }, { "epoch": 15.53, "learning_rate": 0.00014575, "loss": 0.4772, "step": 2920 }, { "epoch": 15.59, "learning_rate": 0.00014625, "loss": 0.5269, "step": 2930 }, { "epoch": 15.64, "learning_rate": 0.00014675, "loss": 0.7292, "step": 2940 }, { "epoch": 15.69, "learning_rate": 0.00014725, "loss": 0.5534, "step": 2950 }, { "epoch": 15.74, "learning_rate": 0.00014774999999999999, "loss": 0.7513, "step": 2960 }, { "epoch": 15.8, "learning_rate": 0.00014825, "loss": 0.661, "step": 2970 }, { "epoch": 15.85, "learning_rate": 0.00014874999999999998, "loss": 0.8045, "step": 2980 }, { "epoch": 15.9, "learning_rate": 0.00014925, "loss": 0.8469, "step": 2990 }, { "epoch": 15.96, "learning_rate": 0.00014975, "loss": 0.9304, "step": 3000 }, { "epoch": 15.96, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6841020585693436, "eval_f1": 0.6288777491861415, "eval_loss": 0.757703959941864, "eval_precision": 0.6301971517578995, "eval_recall": 0.7386666666666667, "eval_runtime": 5.7335, "eval_samples_per_second": 261.622, "eval_steps_per_second": 8.197, "step": 3000 }, { "epoch": 16.01, "learning_rate": 0.00015025, "loss": 0.6005, "step": 3010 }, { "epoch": 16.06, "learning_rate": 0.00015075, "loss": 0.9853, "step": 3020 }, { "epoch": 16.12, "learning_rate": 0.00015125, "loss": 0.9841, "step": 3030 }, { "epoch": 16.17, "learning_rate": 0.00015175, "loss": 0.8429, "step": 3040 }, { "epoch": 16.22, "learning_rate": 0.00015225, "loss": 0.5311, "step": 3050 }, { "epoch": 16.28, "learning_rate": 0.00015275, "loss": 0.8929, "step": 3060 }, { "epoch": 16.33, "learning_rate": 0.00015325, "loss": 0.6568, "step": 3070 }, { "epoch": 16.38, "learning_rate": 0.00015375, "loss": 0.7808, "step": 3080 }, { "epoch": 16.44, "learning_rate": 0.00015425, "loss": 0.8071, "step": 3090 }, { "epoch": 16.49, "learning_rate": 0.00015475, "loss": 0.6082, "step": 3100 }, { "epoch": 16.54, "learning_rate": 0.00015525, "loss": 0.8183, "step": 3110 }, { "epoch": 16.6, "learning_rate": 0.00015575, "loss": 0.8457, "step": 3120 }, { "epoch": 16.65, "learning_rate": 0.00015625, "loss": 0.8111, "step": 3130 }, { "epoch": 16.7, "learning_rate": 0.00015675000000000002, "loss": 0.8024, "step": 3140 }, { "epoch": 16.76, "learning_rate": 0.00015725, "loss": 0.5843, "step": 3150 }, { "epoch": 16.81, "learning_rate": 0.00015775, "loss": 0.7493, "step": 3160 }, { "epoch": 16.86, "learning_rate": 0.00015825, "loss": 0.6283, "step": 3170 }, { "epoch": 16.91, "learning_rate": 0.00015875, "loss": 0.7267, "step": 3180 }, { "epoch": 16.97, "learning_rate": 0.00015925, "loss": 0.8913, "step": 3190 }, { "epoch": 17.02, "learning_rate": 0.00015975, "loss": 0.8809, "step": 3200 }, { "epoch": 17.07, "learning_rate": 0.00016025000000000002, "loss": 0.7423, "step": 3210 }, { "epoch": 17.13, "learning_rate": 0.00016075, "loss": 0.7022, "step": 3220 }, { "epoch": 17.18, "learning_rate": 0.00016125000000000002, "loss": 0.672, "step": 3230 }, { "epoch": 17.23, "learning_rate": 0.00016175, "loss": 0.7071, "step": 3240 }, { "epoch": 17.29, "learning_rate": 0.00016225000000000001, "loss": 0.7613, "step": 3250 }, { "epoch": 17.34, "learning_rate": 0.00016275, "loss": 0.7014, "step": 3260 }, { "epoch": 17.39, "learning_rate": 0.00016325, "loss": 0.7322, "step": 3270 }, { "epoch": 17.45, "learning_rate": 0.00016375000000000002, "loss": 0.8469, "step": 3280 }, { "epoch": 17.5, "learning_rate": 0.00016425, "loss": 0.647, "step": 3290 }, { "epoch": 17.55, "learning_rate": 0.00016475000000000002, "loss": 0.7902, "step": 3300 }, { "epoch": 17.55, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6841020585693436, "eval_f1": 0.6288777491861415, "eval_loss": 0.7684083580970764, "eval_precision": 0.6301971517578995, "eval_recall": 0.7386666666666667, "eval_runtime": 5.6811, "eval_samples_per_second": 264.032, "eval_steps_per_second": 8.273, "step": 3300 }, { "epoch": 17.61, "learning_rate": 0.00016525, "loss": 0.7717, "step": 3310 }, { "epoch": 17.66, "learning_rate": 0.00016575000000000002, "loss": 0.8894, "step": 3320 }, { "epoch": 17.71, "learning_rate": 0.00016625, "loss": 0.9062, "step": 3330 }, { "epoch": 17.77, "learning_rate": 0.00016675000000000001, "loss": 0.755, "step": 3340 }, { "epoch": 17.82, "learning_rate": 0.00016725000000000003, "loss": 0.6708, "step": 3350 }, { "epoch": 17.87, "learning_rate": 0.00016775, "loss": 0.8674, "step": 3360 }, { "epoch": 17.93, "learning_rate": 0.00016825000000000002, "loss": 0.7541, "step": 3370 }, { "epoch": 17.98, "learning_rate": 0.00016875, "loss": 0.8417, "step": 3380 }, { "epoch": 18.03, "learning_rate": 0.00016925000000000002, "loss": 0.8044, "step": 3390 }, { "epoch": 18.09, "learning_rate": 0.00016975, "loss": 0.7059, "step": 3400 }, { "epoch": 18.14, "learning_rate": 0.00017025000000000002, "loss": 0.7291, "step": 3410 }, { "epoch": 18.19, "learning_rate": 0.00017075, "loss": 0.8393, "step": 3420 }, { "epoch": 18.24, "learning_rate": 0.00017125000000000002, "loss": 0.6944, "step": 3430 }, { "epoch": 18.3, "learning_rate": 0.00017175000000000003, "loss": 0.7587, "step": 3440 }, { "epoch": 18.35, "learning_rate": 0.00017224999999999999, "loss": 0.8562, "step": 3450 }, { "epoch": 18.4, "learning_rate": 0.00017275, "loss": 0.8328, "step": 3460 }, { "epoch": 18.46, "learning_rate": 0.00017324999999999998, "loss": 0.7379, "step": 3470 }, { "epoch": 18.51, "learning_rate": 0.00017375, "loss": 0.8784, "step": 3480 }, { "epoch": 18.56, "learning_rate": 0.00017424999999999998, "loss": 0.8329, "step": 3490 }, { "epoch": 18.62, "learning_rate": 0.00017475, "loss": 0.6559, "step": 3500 }, { "epoch": 18.67, "learning_rate": 0.00017525, "loss": 0.8082, "step": 3510 }, { "epoch": 18.72, "learning_rate": 0.00017575, "loss": 0.7626, "step": 3520 }, { "epoch": 18.78, "learning_rate": 0.00017625, "loss": 0.7877, "step": 3530 }, { "epoch": 18.83, "learning_rate": 0.00017675, "loss": 0.7847, "step": 3540 }, { "epoch": 18.88, "learning_rate": 0.00017725, "loss": 0.5692, "step": 3550 }, { "epoch": 18.94, "learning_rate": 0.00017774999999999998, "loss": 0.8115, "step": 3560 }, { "epoch": 18.99, "learning_rate": 0.00017825, "loss": 0.7631, "step": 3570 }, { "epoch": 19.04, "learning_rate": 0.00017875, "loss": 0.7847, "step": 3580 }, { "epoch": 19.1, "learning_rate": 0.00017925, "loss": 0.7622, "step": 3590 }, { "epoch": 19.15, "learning_rate": 0.00017975, "loss": 0.6364, "step": 3600 }, { "epoch": 19.15, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6841020585693436, "eval_f1": 0.6288777491861415, "eval_loss": 0.7637550234794617, "eval_precision": 0.6301971517578995, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9956, "eval_samples_per_second": 250.184, "eval_steps_per_second": 7.839, "step": 3600 }, { "epoch": 19.2, "learning_rate": 0.00018025, "loss": 0.8643, "step": 3610 }, { "epoch": 19.26, "learning_rate": 0.00018075, "loss": 0.656, "step": 3620 }, { "epoch": 19.31, "learning_rate": 0.00018125, "loss": 0.6252, "step": 3630 }, { "epoch": 19.36, "learning_rate": 0.00018175, "loss": 0.8049, "step": 3640 }, { "epoch": 19.41, "learning_rate": 0.00018225, "loss": 0.8622, "step": 3650 }, { "epoch": 19.47, "learning_rate": 0.00018275, "loss": 0.8585, "step": 3660 }, { "epoch": 19.52, "learning_rate": 0.00018325, "loss": 0.7333, "step": 3670 }, { "epoch": 19.57, "learning_rate": 0.00018375, "loss": 0.7186, "step": 3680 }, { "epoch": 19.63, "learning_rate": 0.00018425, "loss": 0.7759, "step": 3690 }, { "epoch": 19.68, "learning_rate": 0.00018475, "loss": 0.7912, "step": 3700 }, { "epoch": 19.73, "learning_rate": 0.00018525, "loss": 0.8636, "step": 3710 }, { "epoch": 19.79, "learning_rate": 0.00018575000000000002, "loss": 0.8167, "step": 3720 }, { "epoch": 19.84, "learning_rate": 0.00018625, "loss": 0.5816, "step": 3730 }, { "epoch": 19.89, "learning_rate": 0.00018675, "loss": 0.8881, "step": 3740 }, { "epoch": 19.95, "learning_rate": 0.00018725, "loss": 0.8232, "step": 3750 }, { "epoch": 20.0, "learning_rate": 0.00018775, "loss": 0.7109, "step": 3760 }, { "epoch": 20.05, "learning_rate": 0.00018825, "loss": 0.8016, "step": 3770 }, { "epoch": 20.11, "learning_rate": 0.00018875, "loss": 0.6983, "step": 3780 }, { "epoch": 20.16, "learning_rate": 0.00018925, "loss": 0.9032, "step": 3790 }, { "epoch": 20.21, "learning_rate": 0.00018975, "loss": 0.839, "step": 3800 }, { "epoch": 20.27, "learning_rate": 0.00019025000000000002, "loss": 0.8061, "step": 3810 }, { "epoch": 20.32, "learning_rate": 0.00019075, "loss": 0.7371, "step": 3820 }, { "epoch": 20.37, "learning_rate": 0.00019125000000000001, "loss": 0.7358, "step": 3830 }, { "epoch": 20.43, "learning_rate": 0.00019175, "loss": 0.6059, "step": 3840 }, { "epoch": 20.48, "learning_rate": 0.00019225, "loss": 0.8188, "step": 3850 }, { "epoch": 20.53, "learning_rate": 0.00019275, "loss": 0.7803, "step": 3860 }, { "epoch": 20.59, "learning_rate": 0.00019325, "loss": 0.7161, "step": 3870 }, { "epoch": 20.64, "learning_rate": 0.00019375000000000002, "loss": 0.6724, "step": 3880 }, { "epoch": 20.69, "learning_rate": 0.00019425, "loss": 0.7061, "step": 3890 }, { "epoch": 20.74, "learning_rate": 0.00019475000000000002, "loss": 0.6738, "step": 3900 }, { "epoch": 20.74, "eval_accuracy": 0.7393333333333333, "eval_combined_score": 0.7056336725949668, "eval_f1": 0.6292089176424869, "eval_loss": 0.7768693566322327, "eval_precision": 0.7146591060707139, "eval_recall": 0.7393333333333333, "eval_runtime": 5.8568, "eval_samples_per_second": 256.113, "eval_steps_per_second": 8.025, "step": 3900 }, { "epoch": 20.8, "learning_rate": 0.00019525, "loss": 0.694, "step": 3910 }, { "epoch": 20.85, "learning_rate": 0.00019575000000000001, "loss": 1.0223, "step": 3920 }, { "epoch": 20.9, "learning_rate": 0.00019625, "loss": 0.7801, "step": 3930 }, { "epoch": 20.96, "learning_rate": 0.00019675, "loss": 0.6794, "step": 3940 }, { "epoch": 21.01, "learning_rate": 0.00019725000000000002, "loss": 0.8515, "step": 3950 }, { "epoch": 21.06, "learning_rate": 0.00019775, "loss": 0.8019, "step": 3960 }, { "epoch": 21.12, "learning_rate": 0.00019825000000000002, "loss": 0.8918, "step": 3970 }, { "epoch": 21.17, "learning_rate": 0.00019875, "loss": 0.7041, "step": 3980 }, { "epoch": 21.22, "learning_rate": 0.00019925000000000002, "loss": 0.7506, "step": 3990 }, { "epoch": 21.28, "learning_rate": 0.00019975, "loss": 0.9895, "step": 4000 }, { "epoch": 21.33, "learning_rate": 0.00020025000000000002, "loss": 0.6327, "step": 4010 }, { "epoch": 21.38, "learning_rate": 0.00020075000000000003, "loss": 0.7891, "step": 4020 }, { "epoch": 21.44, "learning_rate": 0.00020125, "loss": 0.6668, "step": 4030 }, { "epoch": 21.49, "learning_rate": 0.00020175000000000003, "loss": 0.7532, "step": 4040 }, { "epoch": 21.54, "learning_rate": 0.00020225, "loss": 0.7809, "step": 4050 }, { "epoch": 21.6, "learning_rate": 0.00020275000000000002, "loss": 0.6936, "step": 4060 }, { "epoch": 21.65, "learning_rate": 0.00020324999999999998, "loss": 0.7121, "step": 4070 }, { "epoch": 21.7, "learning_rate": 0.00020375, "loss": 0.9925, "step": 4080 }, { "epoch": 21.76, "learning_rate": 0.00020425, "loss": 0.8201, "step": 4090 }, { "epoch": 21.81, "learning_rate": 0.00020475, "loss": 0.6287, "step": 4100 }, { "epoch": 21.86, "learning_rate": 0.00020525, "loss": 0.8232, "step": 4110 }, { "epoch": 21.91, "learning_rate": 0.00020575, "loss": 0.7285, "step": 4120 }, { "epoch": 21.97, "learning_rate": 0.00020625, "loss": 0.7564, "step": 4130 }, { "epoch": 22.02, "learning_rate": 0.00020674999999999998, "loss": 0.8331, "step": 4140 }, { "epoch": 22.07, "learning_rate": 0.00020725, "loss": 0.7732, "step": 4150 }, { "epoch": 22.13, "learning_rate": 0.00020774999999999998, "loss": 0.7252, "step": 4160 }, { "epoch": 22.18, "learning_rate": 0.00020825, "loss": 0.6347, "step": 4170 }, { "epoch": 22.23, "learning_rate": 0.00020875, "loss": 0.7721, "step": 4180 }, { "epoch": 22.29, "learning_rate": 0.00020925, "loss": 0.7649, "step": 4190 }, { "epoch": 22.34, "learning_rate": 0.00020975, "loss": 0.8142, "step": 4200 }, { "epoch": 22.34, "eval_accuracy": 0.7393333333333333, "eval_combined_score": 0.7056336725949668, "eval_f1": 0.6292089176424869, "eval_loss": 0.744327962398529, "eval_precision": 0.7146591060707139, "eval_recall": 0.7393333333333333, "eval_runtime": 5.7187, "eval_samples_per_second": 262.296, "eval_steps_per_second": 8.219, "step": 4200 }, { "epoch": 22.39, "learning_rate": 0.00021025, "loss": 0.7605, "step": 4210 }, { "epoch": 22.45, "learning_rate": 0.00021075, "loss": 0.7106, "step": 4220 }, { "epoch": 22.5, "learning_rate": 0.00021124999999999998, "loss": 0.7094, "step": 4230 }, { "epoch": 22.55, "learning_rate": 0.00021175, "loss": 0.8416, "step": 4240 }, { "epoch": 22.61, "learning_rate": 0.00021225, "loss": 0.7271, "step": 4250 }, { "epoch": 22.66, "learning_rate": 0.00021275, "loss": 0.8397, "step": 4260 }, { "epoch": 22.71, "learning_rate": 0.00021325, "loss": 0.8801, "step": 4270 }, { "epoch": 22.77, "learning_rate": 0.00021375, "loss": 0.8683, "step": 4280 }, { "epoch": 22.82, "learning_rate": 0.00021425, "loss": 0.7017, "step": 4290 }, { "epoch": 22.87, "learning_rate": 0.00021475, "loss": 0.8673, "step": 4300 }, { "epoch": 22.93, "learning_rate": 0.00021525, "loss": 0.6106, "step": 4310 }, { "epoch": 22.98, "learning_rate": 0.00021575, "loss": 0.7888, "step": 4320 }, { "epoch": 23.03, "learning_rate": 0.00021625, "loss": 0.7133, "step": 4330 }, { "epoch": 23.09, "learning_rate": 0.00021675, "loss": 0.7322, "step": 4340 }, { "epoch": 23.14, "learning_rate": 0.00021725, "loss": 0.5461, "step": 4350 }, { "epoch": 23.19, "learning_rate": 0.00021775, "loss": 0.7274, "step": 4360 }, { "epoch": 23.24, "learning_rate": 0.00021825, "loss": 1.1518, "step": 4370 }, { "epoch": 23.3, "learning_rate": 0.00021875, "loss": 0.8713, "step": 4380 }, { "epoch": 23.35, "learning_rate": 0.00021925000000000002, "loss": 0.8016, "step": 4390 }, { "epoch": 23.4, "learning_rate": 0.00021975, "loss": 0.7848, "step": 4400 }, { "epoch": 23.46, "learning_rate": 0.00022025000000000001, "loss": 0.9202, "step": 4410 }, { "epoch": 23.51, "learning_rate": 0.00022075, "loss": 0.6641, "step": 4420 }, { "epoch": 23.56, "learning_rate": 0.00022125, "loss": 0.7703, "step": 4430 }, { "epoch": 23.62, "learning_rate": 0.00022175, "loss": 0.7311, "step": 4440 }, { "epoch": 23.67, "learning_rate": 0.00022225, "loss": 0.5956, "step": 4450 }, { "epoch": 23.72, "learning_rate": 0.00022275000000000002, "loss": 0.7111, "step": 4460 }, { "epoch": 23.78, "learning_rate": 0.00022325, "loss": 0.7672, "step": 4470 }, { "epoch": 23.83, "learning_rate": 0.00022375000000000002, "loss": 0.8842, "step": 4480 }, { "epoch": 23.88, "learning_rate": 0.00022425, "loss": 0.8225, "step": 4490 }, { "epoch": 23.94, "learning_rate": 0.00022475000000000001, "loss": 0.8184, "step": 4500 }, { "epoch": 23.94, "eval_accuracy": 0.7393333333333333, "eval_combined_score": 0.7056336725949668, "eval_f1": 0.6292089176424869, "eval_loss": 0.7635167837142944, "eval_precision": 0.7146591060707139, "eval_recall": 0.7393333333333333, "eval_runtime": 6.1889, "eval_samples_per_second": 242.371, "eval_steps_per_second": 7.594, "step": 4500 }, { "epoch": 23.99, "learning_rate": 0.00022525, "loss": 0.8185, "step": 4510 }, { "epoch": 24.04, "learning_rate": 0.00022575, "loss": 0.7461, "step": 4520 }, { "epoch": 24.1, "learning_rate": 0.00022625000000000002, "loss": 0.6885, "step": 4530 }, { "epoch": 24.15, "learning_rate": 0.00022675, "loss": 0.8025, "step": 4540 }, { "epoch": 24.2, "learning_rate": 0.00022725000000000002, "loss": 0.6607, "step": 4550 }, { "epoch": 24.26, "learning_rate": 0.00022775, "loss": 0.7011, "step": 4560 }, { "epoch": 24.31, "learning_rate": 0.00022825000000000002, "loss": 0.6394, "step": 4570 }, { "epoch": 24.36, "learning_rate": 0.00022875, "loss": 0.8336, "step": 4580 }, { "epoch": 24.41, "learning_rate": 0.00022925000000000002, "loss": 0.8099, "step": 4590 }, { "epoch": 24.47, "learning_rate": 0.00022975000000000003, "loss": 0.6199, "step": 4600 }, { "epoch": 24.52, "learning_rate": 0.00023025, "loss": 0.8079, "step": 4610 }, { "epoch": 24.57, "learning_rate": 0.00023075000000000003, "loss": 0.9195, "step": 4620 }, { "epoch": 24.63, "learning_rate": 0.00023125, "loss": 0.8569, "step": 4630 }, { "epoch": 24.68, "learning_rate": 0.00023175000000000002, "loss": 0.7448, "step": 4640 }, { "epoch": 24.73, "learning_rate": 0.00023225, "loss": 0.7706, "step": 4650 }, { "epoch": 24.79, "learning_rate": 0.00023275000000000002, "loss": 0.6306, "step": 4660 }, { "epoch": 24.84, "learning_rate": 0.00023325, "loss": 0.6033, "step": 4670 }, { "epoch": 24.89, "learning_rate": 0.00023375000000000002, "loss": 0.9749, "step": 4680 }, { "epoch": 24.95, "learning_rate": 0.00023425000000000003, "loss": 0.7738, "step": 4690 }, { "epoch": 25.0, "learning_rate": 0.00023475, "loss": 0.8824, "step": 4700 }, { "epoch": 25.05, "learning_rate": 0.00023525, "loss": 0.8072, "step": 4710 }, { "epoch": 25.11, "learning_rate": 0.00023574999999999998, "loss": 0.7295, "step": 4720 }, { "epoch": 25.16, "learning_rate": 0.00023625, "loss": 0.8466, "step": 4730 }, { "epoch": 25.21, "learning_rate": 0.00023674999999999998, "loss": 0.7498, "step": 4740 }, { "epoch": 25.27, "learning_rate": 0.00023725, "loss": 0.7199, "step": 4750 }, { "epoch": 25.32, "learning_rate": 0.00023775, "loss": 0.7826, "step": 4760 }, { "epoch": 25.37, "learning_rate": 0.00023825, "loss": 0.7767, "step": 4770 }, { "epoch": 25.43, "learning_rate": 0.00023875, "loss": 0.71, "step": 4780 }, { "epoch": 25.48, "learning_rate": 0.00023925, "loss": 0.8495, "step": 4790 }, { "epoch": 25.53, "learning_rate": 0.00023975, "loss": 0.7562, "step": 4800 }, { "epoch": 25.53, "eval_accuracy": 0.7393333333333333, "eval_combined_score": 0.7056336725949668, "eval_f1": 0.6292089176424869, "eval_loss": 0.7467172741889954, "eval_precision": 0.7146591060707139, "eval_recall": 0.7393333333333333, "eval_runtime": 5.7487, "eval_samples_per_second": 260.927, "eval_steps_per_second": 8.176, "step": 4800 } ], "logging_steps": 10, "max_steps": 5640, "num_train_epochs": 30, "save_steps": 300, "total_flos": 1.00772435460096e+16, "trial_name": null, "trial_params": null }