{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.76595744680851, "eval_steps": 300, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 5e-07, "loss": 1.0874, "step": 10 }, { "epoch": 0.11, "learning_rate": 1e-06, "loss": 0.9487, "step": 20 }, { "epoch": 0.16, "learning_rate": 1.5e-06, "loss": 0.7586, "step": 30 }, { "epoch": 0.21, "learning_rate": 2e-06, "loss": 0.7225, "step": 40 }, { "epoch": 0.27, "learning_rate": 2.5e-06, "loss": 0.7364, "step": 50 }, { "epoch": 0.32, "learning_rate": 3e-06, "loss": 0.7265, "step": 60 }, { "epoch": 0.37, "learning_rate": 3.5e-06, "loss": 0.7267, "step": 70 }, { "epoch": 0.43, "learning_rate": 4e-06, "loss": 0.5697, "step": 80 }, { "epoch": 0.48, "learning_rate": 4.5e-06, "loss": 1.018, "step": 90 }, { "epoch": 0.53, "learning_rate": 5e-06, "loss": 0.7875, "step": 100 }, { "epoch": 0.59, "learning_rate": 5.5e-06, "loss": 0.8242, "step": 110 }, { "epoch": 0.64, "learning_rate": 6e-06, "loss": 0.8034, "step": 120 }, { "epoch": 0.69, "learning_rate": 6.5e-06, "loss": 0.7717, "step": 130 }, { "epoch": 0.74, "learning_rate": 7e-06, "loss": 0.8337, "step": 140 }, { "epoch": 0.8, "learning_rate": 7.5e-06, "loss": 0.6884, "step": 150 }, { "epoch": 0.85, "learning_rate": 8e-06, "loss": 0.9129, "step": 160 }, { "epoch": 0.9, "learning_rate": 8.500000000000002e-06, "loss": 0.5637, "step": 170 }, { "epoch": 0.96, "learning_rate": 9e-06, "loss": 1.0458, "step": 180 }, { "epoch": 1.01, "learning_rate": 9.5e-06, "loss": 0.9414, "step": 190 }, { "epoch": 1.06, "learning_rate": 1e-05, "loss": 0.6379, "step": 200 }, { "epoch": 1.12, "learning_rate": 1.0500000000000001e-05, "loss": 0.9249, "step": 210 }, { "epoch": 1.17, "learning_rate": 1.1e-05, "loss": 0.6944, "step": 220 }, { "epoch": 1.22, "learning_rate": 1.15e-05, "loss": 0.9221, "step": 230 }, { "epoch": 1.28, "learning_rate": 1.2e-05, "loss": 0.6475, "step": 240 }, { "epoch": 1.33, "learning_rate": 1.25e-05, "loss": 0.7748, "step": 250 }, { "epoch": 1.38, "learning_rate": 1.3e-05, "loss": 0.8705, "step": 260 }, { "epoch": 1.44, "learning_rate": 1.35e-05, "loss": 0.7737, "step": 270 }, { "epoch": 1.49, "learning_rate": 1.4e-05, "loss": 0.8643, "step": 280 }, { "epoch": 1.54, "learning_rate": 1.4500000000000002e-05, "loss": 0.8428, "step": 290 }, { "epoch": 1.6, "learning_rate": 1.5e-05, "loss": 0.6785, "step": 300 }, { "epoch": 1.6, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7930460572242737, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0663, "eval_samples_per_second": 247.266, "eval_steps_per_second": 7.748, "step": 300 }, { "epoch": 1.65, "learning_rate": 1.55e-05, "loss": 0.6076, "step": 310 }, { "epoch": 1.7, "learning_rate": 1.6e-05, "loss": 0.5963, "step": 320 }, { "epoch": 1.76, "learning_rate": 1.65e-05, "loss": 0.6626, "step": 330 }, { "epoch": 1.81, "learning_rate": 1.7000000000000003e-05, "loss": 0.8379, "step": 340 }, { "epoch": 1.86, "learning_rate": 1.7500000000000002e-05, "loss": 0.8851, "step": 350 }, { "epoch": 1.91, "learning_rate": 1.8e-05, "loss": 0.7489, "step": 360 }, { "epoch": 1.97, "learning_rate": 1.85e-05, "loss": 0.7573, "step": 370 }, { "epoch": 2.02, "learning_rate": 1.9e-05, "loss": 0.8018, "step": 380 }, { "epoch": 2.07, "learning_rate": 1.95e-05, "loss": 0.6645, "step": 390 }, { "epoch": 2.13, "learning_rate": 2e-05, "loss": 0.8677, "step": 400 }, { "epoch": 2.18, "learning_rate": 2.05e-05, "loss": 0.7478, "step": 410 }, { "epoch": 2.23, "learning_rate": 2.1000000000000002e-05, "loss": 0.8551, "step": 420 }, { "epoch": 2.29, "learning_rate": 2.1499999999999997e-05, "loss": 0.9323, "step": 430 }, { "epoch": 2.34, "learning_rate": 2.2e-05, "loss": 0.7536, "step": 440 }, { "epoch": 2.39, "learning_rate": 2.2499999999999998e-05, "loss": 0.5336, "step": 450 }, { "epoch": 2.45, "learning_rate": 2.3e-05, "loss": 0.8955, "step": 460 }, { "epoch": 2.5, "learning_rate": 2.3500000000000002e-05, "loss": 0.7926, "step": 470 }, { "epoch": 2.55, "learning_rate": 2.4e-05, "loss": 0.5713, "step": 480 }, { "epoch": 2.61, "learning_rate": 2.4500000000000003e-05, "loss": 0.8568, "step": 490 }, { "epoch": 2.66, "learning_rate": 2.5e-05, "loss": 0.6348, "step": 500 }, { "epoch": 2.71, "learning_rate": 2.55e-05, "loss": 0.6223, "step": 510 }, { "epoch": 2.77, "learning_rate": 2.6e-05, "loss": 0.7579, "step": 520 }, { "epoch": 2.82, "learning_rate": 2.65e-05, "loss": 0.6325, "step": 530 }, { "epoch": 2.87, "learning_rate": 2.7e-05, "loss": 0.7276, "step": 540 }, { "epoch": 2.93, "learning_rate": 2.75e-05, "loss": 0.8766, "step": 550 }, { "epoch": 2.98, "learning_rate": 2.8e-05, "loss": 0.7107, "step": 560 }, { "epoch": 3.03, "learning_rate": 2.85e-05, "loss": 0.5904, "step": 570 }, { "epoch": 3.09, "learning_rate": 2.9000000000000004e-05, "loss": 0.7125, "step": 580 }, { "epoch": 3.14, "learning_rate": 2.95e-05, "loss": 0.755, "step": 590 }, { "epoch": 3.19, "learning_rate": 3e-05, "loss": 0.5583, "step": 600 }, { "epoch": 3.19, "eval_accuracy": 0.7613333333333333, "eval_combined_score": 0.7403762607674487, "eval_f1": 0.7316181693637903, "eval_loss": 0.6910097599029541, "eval_precision": 0.7072202070393374, "eval_recall": 0.7613333333333333, "eval_runtime": 6.0882, "eval_samples_per_second": 246.376, "eval_steps_per_second": 7.72, "step": 600 }, { "epoch": 3.24, "learning_rate": 3.05e-05, "loss": 0.6527, "step": 610 }, { "epoch": 3.3, "learning_rate": 3.1e-05, "loss": 0.7676, "step": 620 }, { "epoch": 3.35, "learning_rate": 3.15e-05, "loss": 0.766, "step": 630 }, { "epoch": 3.4, "learning_rate": 3.2e-05, "loss": 0.6217, "step": 640 }, { "epoch": 3.46, "learning_rate": 3.2500000000000004e-05, "loss": 0.5405, "step": 650 }, { "epoch": 3.51, "learning_rate": 3.3e-05, "loss": 0.7951, "step": 660 }, { "epoch": 3.56, "learning_rate": 3.35e-05, "loss": 0.552, "step": 670 }, { "epoch": 3.62, "learning_rate": 3.4000000000000007e-05, "loss": 0.6521, "step": 680 }, { "epoch": 3.67, "learning_rate": 3.4500000000000005e-05, "loss": 0.6814, "step": 690 }, { "epoch": 3.72, "learning_rate": 3.5000000000000004e-05, "loss": 0.3992, "step": 700 }, { "epoch": 3.78, "learning_rate": 3.5499999999999996e-05, "loss": 0.7018, "step": 710 }, { "epoch": 3.83, "learning_rate": 3.6e-05, "loss": 0.664, "step": 720 }, { "epoch": 3.88, "learning_rate": 3.65e-05, "loss": 0.6891, "step": 730 }, { "epoch": 3.94, "learning_rate": 3.7e-05, "loss": 0.6063, "step": 740 }, { "epoch": 3.99, "learning_rate": 3.75e-05, "loss": 0.6543, "step": 750 }, { "epoch": 4.04, "learning_rate": 3.8e-05, "loss": 0.5638, "step": 760 }, { "epoch": 4.1, "learning_rate": 3.85e-05, "loss": 0.6984, "step": 770 }, { "epoch": 4.15, "learning_rate": 3.9e-05, "loss": 0.7148, "step": 780 }, { "epoch": 4.2, "learning_rate": 3.95e-05, "loss": 0.4915, "step": 790 }, { "epoch": 4.26, "learning_rate": 4e-05, "loss": 0.648, "step": 800 }, { "epoch": 4.31, "learning_rate": 4.05e-05, "loss": 0.4527, "step": 810 }, { "epoch": 4.36, "learning_rate": 4.1e-05, "loss": 0.551, "step": 820 }, { "epoch": 4.41, "learning_rate": 4.1500000000000006e-05, "loss": 0.5909, "step": 830 }, { "epoch": 4.47, "learning_rate": 4.2000000000000004e-05, "loss": 0.603, "step": 840 }, { "epoch": 4.52, "learning_rate": 4.25e-05, "loss": 1.0745, "step": 850 }, { "epoch": 4.57, "learning_rate": 4.2999999999999995e-05, "loss": 0.6697, "step": 860 }, { "epoch": 4.63, "learning_rate": 4.35e-05, "loss": 0.8374, "step": 870 }, { "epoch": 4.68, "learning_rate": 4.4e-05, "loss": 0.4891, "step": 880 }, { "epoch": 4.73, "learning_rate": 4.45e-05, "loss": 0.5956, "step": 890 }, { "epoch": 4.79, "learning_rate": 4.4999999999999996e-05, "loss": 0.7857, "step": 900 }, { "epoch": 4.79, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.6514685153961182, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9473, "eval_samples_per_second": 252.216, "eval_steps_per_second": 7.903, "step": 900 }, { "epoch": 4.84, "learning_rate": 4.55e-05, "loss": 0.8566, "step": 910 }, { "epoch": 4.89, "learning_rate": 4.6e-05, "loss": 0.4698, "step": 920 }, { "epoch": 4.95, "learning_rate": 4.65e-05, "loss": 0.7224, "step": 930 }, { "epoch": 5.0, "learning_rate": 4.7000000000000004e-05, "loss": 0.5879, "step": 940 }, { "epoch": 5.05, "learning_rate": 4.75e-05, "loss": 0.3592, "step": 950 }, { "epoch": 5.11, "learning_rate": 4.8e-05, "loss": 0.513, "step": 960 }, { "epoch": 5.16, "learning_rate": 4.85e-05, "loss": 0.6167, "step": 970 }, { "epoch": 5.21, "learning_rate": 4.9000000000000005e-05, "loss": 0.5706, "step": 980 }, { "epoch": 5.27, "learning_rate": 4.9500000000000004e-05, "loss": 0.5523, "step": 990 }, { "epoch": 5.32, "learning_rate": 5e-05, "loss": 0.6565, "step": 1000 }, { "epoch": 5.37, "learning_rate": 5.05e-05, "loss": 0.4634, "step": 1010 }, { "epoch": 5.43, "learning_rate": 5.1e-05, "loss": 0.634, "step": 1020 }, { "epoch": 5.48, "learning_rate": 5.15e-05, "loss": 0.5472, "step": 1030 }, { "epoch": 5.53, "learning_rate": 5.2e-05, "loss": 0.595, "step": 1040 }, { "epoch": 5.59, "learning_rate": 5.25e-05, "loss": 0.4889, "step": 1050 }, { "epoch": 5.64, "learning_rate": 5.3e-05, "loss": 0.5884, "step": 1060 }, { "epoch": 5.69, "learning_rate": 5.35e-05, "loss": 0.4149, "step": 1070 }, { "epoch": 5.74, "learning_rate": 5.4e-05, "loss": 0.7573, "step": 1080 }, { "epoch": 5.8, "learning_rate": 5.45e-05, "loss": 0.599, "step": 1090 }, { "epoch": 5.85, "learning_rate": 5.5e-05, "loss": 0.366, "step": 1100 }, { "epoch": 5.9, "learning_rate": 5.55e-05, "loss": 0.4906, "step": 1110 }, { "epoch": 5.96, "learning_rate": 5.6e-05, "loss": 0.4378, "step": 1120 }, { "epoch": 6.01, "learning_rate": 5.6500000000000005e-05, "loss": 0.4886, "step": 1130 }, { "epoch": 6.06, "learning_rate": 5.7e-05, "loss": 0.342, "step": 1140 }, { "epoch": 6.12, "learning_rate": 5.75e-05, "loss": 0.7803, "step": 1150 }, { "epoch": 6.17, "learning_rate": 5.800000000000001e-05, "loss": 0.3435, "step": 1160 }, { "epoch": 6.22, "learning_rate": 5.8500000000000006e-05, "loss": 0.6149, "step": 1170 }, { "epoch": 6.28, "learning_rate": 5.9e-05, "loss": 0.3026, "step": 1180 }, { "epoch": 6.33, "learning_rate": 5.9499999999999996e-05, "loss": 0.9387, "step": 1190 }, { "epoch": 6.38, "learning_rate": 6e-05, "loss": 0.6309, "step": 1200 }, { "epoch": 6.38, "eval_accuracy": 0.848, "eval_combined_score": 0.8403047916631027, "eval_f1": 0.8270449473875959, "eval_loss": 0.5592399835586548, "eval_precision": 0.8381742192648148, "eval_recall": 0.848, "eval_runtime": 5.9701, "eval_samples_per_second": 251.251, "eval_steps_per_second": 7.873, "step": 1200 }, { "epoch": 6.44, "learning_rate": 6.05e-05, "loss": 0.4672, "step": 1210 }, { "epoch": 6.49, "learning_rate": 6.1e-05, "loss": 0.4909, "step": 1220 }, { "epoch": 6.54, "learning_rate": 6.15e-05, "loss": 0.5007, "step": 1230 }, { "epoch": 6.6, "learning_rate": 6.2e-05, "loss": 0.4318, "step": 1240 }, { "epoch": 6.65, "learning_rate": 6.25e-05, "loss": 0.3623, "step": 1250 }, { "epoch": 6.7, "learning_rate": 6.3e-05, "loss": 0.3294, "step": 1260 }, { "epoch": 6.76, "learning_rate": 6.35e-05, "loss": 0.7501, "step": 1270 }, { "epoch": 6.81, "learning_rate": 6.4e-05, "loss": 0.6436, "step": 1280 }, { "epoch": 6.86, "learning_rate": 6.450000000000001e-05, "loss": 0.525, "step": 1290 }, { "epoch": 6.91, "learning_rate": 6.500000000000001e-05, "loss": 0.6047, "step": 1300 }, { "epoch": 6.97, "learning_rate": 6.55e-05, "loss": 0.3636, "step": 1310 }, { "epoch": 7.02, "learning_rate": 6.6e-05, "loss": 0.5634, "step": 1320 }, { "epoch": 7.07, "learning_rate": 6.65e-05, "loss": 0.3846, "step": 1330 }, { "epoch": 7.13, "learning_rate": 6.7e-05, "loss": 0.8436, "step": 1340 }, { "epoch": 7.18, "learning_rate": 6.75e-05, "loss": 0.4762, "step": 1350 }, { "epoch": 7.23, "learning_rate": 6.800000000000001e-05, "loss": 0.5856, "step": 1360 }, { "epoch": 7.29, "learning_rate": 6.850000000000001e-05, "loss": 0.4042, "step": 1370 }, { "epoch": 7.34, "learning_rate": 6.900000000000001e-05, "loss": 0.475, "step": 1380 }, { "epoch": 7.39, "learning_rate": 6.950000000000001e-05, "loss": 0.3535, "step": 1390 }, { "epoch": 7.45, "learning_rate": 7.000000000000001e-05, "loss": 0.2399, "step": 1400 }, { "epoch": 7.5, "learning_rate": 7.049999999999999e-05, "loss": 0.316, "step": 1410 }, { "epoch": 7.55, "learning_rate": 7.095e-05, "loss": 0.438, "step": 1420 }, { "epoch": 7.61, "learning_rate": 7.145e-05, "loss": 0.2673, "step": 1430 }, { "epoch": 7.66, "learning_rate": 7.195e-05, "loss": 0.4286, "step": 1440 }, { "epoch": 7.71, "learning_rate": 7.245e-05, "loss": 0.3655, "step": 1450 }, { "epoch": 7.77, "learning_rate": 7.295e-05, "loss": 0.434, "step": 1460 }, { "epoch": 7.82, "learning_rate": 7.345e-05, "loss": 0.6143, "step": 1470 }, { "epoch": 7.87, "learning_rate": 7.395000000000001e-05, "loss": 0.5471, "step": 1480 }, { "epoch": 7.93, "learning_rate": 7.445000000000001e-05, "loss": 0.299, "step": 1490 }, { "epoch": 7.98, "learning_rate": 7.495e-05, "loss": 0.2216, "step": 1500 }, { "epoch": 7.98, "eval_accuracy": 0.8773333333333333, "eval_combined_score": 0.8618647537688802, "eval_f1": 0.8431508113173474, "eval_loss": 0.5708244442939758, "eval_precision": 0.8496415370915067, "eval_recall": 0.8773333333333333, "eval_runtime": 6.0286, "eval_samples_per_second": 248.816, "eval_steps_per_second": 7.796, "step": 1500 }, { "epoch": 8.03, "learning_rate": 7.545e-05, "loss": 0.5517, "step": 1510 }, { "epoch": 8.09, "learning_rate": 7.595e-05, "loss": 0.3237, "step": 1520 }, { "epoch": 8.14, "learning_rate": 7.645e-05, "loss": 0.3945, "step": 1530 }, { "epoch": 8.19, "learning_rate": 7.695e-05, "loss": 0.3996, "step": 1540 }, { "epoch": 8.24, "learning_rate": 7.745000000000001e-05, "loss": 0.3644, "step": 1550 }, { "epoch": 8.3, "learning_rate": 7.795000000000001e-05, "loss": 0.6767, "step": 1560 }, { "epoch": 8.35, "learning_rate": 7.845000000000001e-05, "loss": 0.434, "step": 1570 }, { "epoch": 8.4, "learning_rate": 7.895000000000001e-05, "loss": 0.4047, "step": 1580 }, { "epoch": 8.46, "learning_rate": 7.945e-05, "loss": 0.3844, "step": 1590 }, { "epoch": 8.51, "learning_rate": 7.994999999999999e-05, "loss": 0.2656, "step": 1600 }, { "epoch": 8.56, "learning_rate": 8.044999999999999e-05, "loss": 0.3318, "step": 1610 }, { "epoch": 8.62, "learning_rate": 8.095e-05, "loss": 0.4627, "step": 1620 }, { "epoch": 8.67, "learning_rate": 8.145e-05, "loss": 0.6657, "step": 1630 }, { "epoch": 8.72, "learning_rate": 8.195e-05, "loss": 0.353, "step": 1640 }, { "epoch": 8.78, "learning_rate": 8.245e-05, "loss": 0.4543, "step": 1650 }, { "epoch": 8.83, "learning_rate": 8.295e-05, "loss": 0.4121, "step": 1660 }, { "epoch": 8.88, "learning_rate": 8.340000000000001e-05, "loss": 0.2792, "step": 1670 }, { "epoch": 8.94, "learning_rate": 8.39e-05, "loss": 0.3618, "step": 1680 }, { "epoch": 8.99, "learning_rate": 8.44e-05, "loss": 0.4922, "step": 1690 }, { "epoch": 9.04, "learning_rate": 8.49e-05, "loss": 0.6822, "step": 1700 }, { "epoch": 9.1, "learning_rate": 8.54e-05, "loss": 0.4936, "step": 1710 }, { "epoch": 9.15, "learning_rate": 8.59e-05, "loss": 0.4652, "step": 1720 }, { "epoch": 9.2, "learning_rate": 8.640000000000001e-05, "loss": 0.3529, "step": 1730 }, { "epoch": 9.26, "learning_rate": 8.690000000000001e-05, "loss": 0.5115, "step": 1740 }, { "epoch": 9.31, "learning_rate": 8.740000000000001e-05, "loss": 0.5859, "step": 1750 }, { "epoch": 9.36, "learning_rate": 8.790000000000001e-05, "loss": 0.5069, "step": 1760 }, { "epoch": 9.41, "learning_rate": 8.840000000000001e-05, "loss": 0.2584, "step": 1770 }, { "epoch": 9.47, "learning_rate": 8.885e-05, "loss": 0.313, "step": 1780 }, { "epoch": 9.52, "learning_rate": 8.935e-05, "loss": 0.4103, "step": 1790 }, { "epoch": 9.57, "learning_rate": 8.985e-05, "loss": 0.3214, "step": 1800 }, { "epoch": 9.57, "eval_accuracy": 0.896, "eval_combined_score": 0.8819938675326909, "eval_f1": 0.8583787127091727, "eval_loss": 0.45495954155921936, "eval_precision": 0.8775967574215913, "eval_recall": 0.896, "eval_runtime": 6.1706, "eval_samples_per_second": 243.087, "eval_steps_per_second": 7.617, "step": 1800 }, { "epoch": 9.63, "learning_rate": 9.035e-05, "loss": 0.5097, "step": 1810 }, { "epoch": 9.68, "learning_rate": 9.085e-05, "loss": 0.4306, "step": 1820 }, { "epoch": 9.73, "learning_rate": 9.135e-05, "loss": 0.2573, "step": 1830 }, { "epoch": 9.79, "learning_rate": 9.185e-05, "loss": 0.3534, "step": 1840 }, { "epoch": 9.84, "learning_rate": 9.235000000000001e-05, "loss": 0.5012, "step": 1850 }, { "epoch": 9.89, "learning_rate": 9.285000000000001e-05, "loss": 0.7057, "step": 1860 }, { "epoch": 9.95, "learning_rate": 9.335e-05, "loss": 0.5385, "step": 1870 }, { "epoch": 10.0, "learning_rate": 9.385e-05, "loss": 0.2633, "step": 1880 }, { "epoch": 10.05, "learning_rate": 9.435e-05, "loss": 0.3853, "step": 1890 }, { "epoch": 10.11, "learning_rate": 9.485e-05, "loss": 0.4354, "step": 1900 }, { "epoch": 10.16, "learning_rate": 9.535e-05, "loss": 0.7459, "step": 1910 }, { "epoch": 10.21, "learning_rate": 9.585000000000001e-05, "loss": 0.2937, "step": 1920 }, { "epoch": 10.27, "learning_rate": 9.635000000000001e-05, "loss": 0.2932, "step": 1930 }, { "epoch": 10.32, "learning_rate": 9.685000000000001e-05, "loss": 0.4256, "step": 1940 }, { "epoch": 10.37, "learning_rate": 9.735000000000001e-05, "loss": 0.4336, "step": 1950 }, { "epoch": 10.43, "learning_rate": 9.785e-05, "loss": 0.4684, "step": 1960 }, { "epoch": 10.48, "learning_rate": 9.835e-05, "loss": 0.5437, "step": 1970 }, { "epoch": 10.53, "learning_rate": 9.884999999999999e-05, "loss": 0.3036, "step": 1980 }, { "epoch": 10.59, "learning_rate": 9.935e-05, "loss": 0.1528, "step": 1990 }, { "epoch": 10.64, "learning_rate": 9.985e-05, "loss": 0.8779, "step": 2000 }, { "epoch": 10.69, "learning_rate": 0.00010035, "loss": 0.53, "step": 2010 }, { "epoch": 10.74, "learning_rate": 0.00010085, "loss": 0.2656, "step": 2020 }, { "epoch": 10.8, "learning_rate": 0.00010135, "loss": 0.3895, "step": 2030 }, { "epoch": 10.85, "learning_rate": 0.00010185, "loss": 0.5943, "step": 2040 }, { "epoch": 10.9, "learning_rate": 0.00010235, "loss": 0.5021, "step": 2050 }, { "epoch": 10.96, "learning_rate": 0.00010284999999999999, "loss": 0.3804, "step": 2060 }, { "epoch": 11.01, "learning_rate": 0.00010335, "loss": 0.2204, "step": 2070 }, { "epoch": 11.06, "learning_rate": 0.00010385, "loss": 0.4421, "step": 2080 }, { "epoch": 11.12, "learning_rate": 0.0001043, "loss": 1.0409, "step": 2090 }, { "epoch": 11.17, "learning_rate": 0.00010480000000000001, "loss": 0.7521, "step": 2100 }, { "epoch": 11.17, "eval_accuracy": 0.884, "eval_combined_score": 0.8540521114926303, "eval_f1": 0.8422803386258884, "eval_loss": 0.3819296956062317, "eval_precision": 0.8059281073446327, "eval_recall": 0.884, "eval_runtime": 6.0267, "eval_samples_per_second": 248.894, "eval_steps_per_second": 7.799, "step": 2100 }, { "epoch": 11.22, "learning_rate": 0.00010530000000000001, "loss": 0.3117, "step": 2110 }, { "epoch": 11.28, "learning_rate": 0.00010580000000000001, "loss": 0.4548, "step": 2120 }, { "epoch": 11.33, "learning_rate": 0.00010630000000000001, "loss": 0.484, "step": 2130 }, { "epoch": 11.38, "learning_rate": 0.00010680000000000001, "loss": 0.4479, "step": 2140 }, { "epoch": 11.44, "learning_rate": 0.0001073, "loss": 0.3099, "step": 2150 }, { "epoch": 11.49, "learning_rate": 0.0001078, "loss": 0.4728, "step": 2160 }, { "epoch": 11.54, "learning_rate": 0.00010829999999999999, "loss": 0.5189, "step": 2170 }, { "epoch": 11.6, "learning_rate": 0.0001088, "loss": 0.5615, "step": 2180 }, { "epoch": 11.65, "learning_rate": 0.0001093, "loss": 0.536, "step": 2190 }, { "epoch": 11.7, "learning_rate": 0.0001098, "loss": 0.4329, "step": 2200 }, { "epoch": 11.76, "learning_rate": 0.0001103, "loss": 0.2304, "step": 2210 }, { "epoch": 11.81, "learning_rate": 0.0001108, "loss": 0.5139, "step": 2220 }, { "epoch": 11.86, "learning_rate": 0.0001113, "loss": 0.3353, "step": 2230 }, { "epoch": 11.91, "learning_rate": 0.0001118, "loss": 0.7493, "step": 2240 }, { "epoch": 11.97, "learning_rate": 0.0001123, "loss": 1.0483, "step": 2250 }, { "epoch": 12.02, "learning_rate": 0.0001128, "loss": 0.5422, "step": 2260 }, { "epoch": 12.07, "learning_rate": 0.0001133, "loss": 0.771, "step": 2270 }, { "epoch": 12.13, "learning_rate": 0.0001138, "loss": 0.841, "step": 2280 }, { "epoch": 12.18, "learning_rate": 0.0001143, "loss": 0.7316, "step": 2290 }, { "epoch": 12.23, "learning_rate": 0.0001148, "loss": 0.7394, "step": 2300 }, { "epoch": 12.29, "learning_rate": 0.0001153, "loss": 0.7084, "step": 2310 }, { "epoch": 12.34, "learning_rate": 0.0001158, "loss": 0.8033, "step": 2320 }, { "epoch": 12.39, "learning_rate": 0.00011630000000000001, "loss": 0.6616, "step": 2330 }, { "epoch": 12.45, "learning_rate": 0.0001168, "loss": 0.7601, "step": 2340 }, { "epoch": 12.5, "learning_rate": 0.0001173, "loss": 0.7986, "step": 2350 }, { "epoch": 12.55, "learning_rate": 0.0001178, "loss": 0.673, "step": 2360 }, { "epoch": 12.61, "learning_rate": 0.0001183, "loss": 0.6354, "step": 2370 }, { "epoch": 12.66, "learning_rate": 0.0001188, "loss": 0.617, "step": 2380 }, { "epoch": 12.71, "learning_rate": 0.0001193, "loss": 0.5532, "step": 2390 }, { "epoch": 12.77, "learning_rate": 0.00011980000000000001, "loss": 0.5048, "step": 2400 }, { "epoch": 12.77, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.6582097411155701, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.7153, "eval_samples_per_second": 262.451, "eval_steps_per_second": 8.223, "step": 2400 } ], "logging_steps": 10, "max_steps": 5640, "num_train_epochs": 30, "save_steps": 300, "total_flos": 5039147999821824.0, "trial_name": null, "trial_params": null }