{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.9787234042553195, "eval_steps": 20, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 0.0004995563442768412, "loss": 1.3139, "step": 10 }, { "epoch": 0.11, "learning_rate": 0.0004986690328305235, "loss": 1.2812, "step": 20 }, { "epoch": 0.11, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8499402403831482, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 125.9807, "eval_samples_per_second": 11.907, "eval_steps_per_second": 0.373, "step": 20 }, { "epoch": 0.16, "learning_rate": 0.0004977817213842058, "loss": 0.832, "step": 30 }, { "epoch": 0.21, "learning_rate": 0.0004968944099378882, "loss": 0.8156, "step": 40 }, { "epoch": 0.21, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8848057985305786, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 46.3365, "eval_samples_per_second": 32.372, "eval_steps_per_second": 1.014, "step": 40 }, { "epoch": 0.27, "learning_rate": 0.0004960070984915705, "loss": 0.7475, "step": 50 }, { "epoch": 0.32, "learning_rate": 0.0004951197870452529, "loss": 0.7478, "step": 60 }, { "epoch": 0.32, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7691774964332581, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0551, "eval_samples_per_second": 247.726, "eval_steps_per_second": 7.762, "step": 60 }, { "epoch": 0.37, "learning_rate": 0.0004942324755989353, "loss": 0.7663, "step": 70 }, { "epoch": 0.43, "learning_rate": 0.0004933451641526176, "loss": 0.6034, "step": 80 }, { "epoch": 0.43, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7553095817565918, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.025, "eval_samples_per_second": 248.962, "eval_steps_per_second": 7.801, "step": 80 }, { "epoch": 0.48, "learning_rate": 0.0004924578527063, "loss": 0.9511, "step": 90 }, { "epoch": 0.53, "learning_rate": 0.0004915705412599822, "loss": 0.7548, "step": 100 }, { "epoch": 0.53, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7768574357032776, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9796, "eval_samples_per_second": 250.851, "eval_steps_per_second": 7.86, "step": 100 }, { "epoch": 0.59, "learning_rate": 0.0004906832298136646, "loss": 0.858, "step": 110 }, { "epoch": 0.64, "learning_rate": 0.0004897959183673469, "loss": 0.8452, "step": 120 }, { "epoch": 0.64, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.745326817035675, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1179, "eval_samples_per_second": 245.183, "eval_steps_per_second": 7.682, "step": 120 }, { "epoch": 0.69, "learning_rate": 0.0004889086069210293, "loss": 0.8152, "step": 130 }, { "epoch": 0.74, "learning_rate": 0.00048802129547471164, "loss": 0.8972, "step": 140 }, { "epoch": 0.74, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7610827088356018, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0456, "eval_samples_per_second": 248.113, "eval_steps_per_second": 7.774, "step": 140 }, { "epoch": 0.8, "learning_rate": 0.000487133984028394, "loss": 0.7226, "step": 150 }, { "epoch": 0.85, "learning_rate": 0.0004862466725820763, "loss": 0.9377, "step": 160 }, { "epoch": 0.85, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7714303135871887, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9995, "eval_samples_per_second": 250.022, "eval_steps_per_second": 7.834, "step": 160 }, { "epoch": 0.9, "learning_rate": 0.0004853593611357587, "loss": 0.594, "step": 170 }, { "epoch": 0.96, "learning_rate": 0.00048447204968944104, "loss": 1.0968, "step": 180 }, { "epoch": 0.96, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.9954016804695129, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9871, "eval_samples_per_second": 250.537, "eval_steps_per_second": 7.85, "step": 180 }, { "epoch": 1.01, "learning_rate": 0.00048358473824312333, "loss": 0.9644, "step": 190 }, { "epoch": 1.06, "learning_rate": 0.00048269742679680566, "loss": 0.7502, "step": 200 }, { "epoch": 1.06, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7544593214988708, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0077, "eval_samples_per_second": 249.679, "eval_steps_per_second": 7.823, "step": 200 }, { "epoch": 1.12, "learning_rate": 0.000481810115350488, "loss": 0.7231, "step": 210 }, { "epoch": 1.17, "learning_rate": 0.0004809228039041704, "loss": 0.7902, "step": 220 }, { "epoch": 1.17, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7758567929267883, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0531, "eval_samples_per_second": 247.806, "eval_steps_per_second": 7.765, "step": 220 }, { "epoch": 1.22, "learning_rate": 0.00048003549245785273, "loss": 0.7591, "step": 230 }, { "epoch": 1.28, "learning_rate": 0.00047914818101153507, "loss": 0.817, "step": 240 }, { "epoch": 1.28, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.767899751663208, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1711, "eval_samples_per_second": 243.067, "eval_steps_per_second": 7.616, "step": 240 }, { "epoch": 1.33, "learning_rate": 0.0004782608695652174, "loss": 0.6681, "step": 250 }, { "epoch": 1.38, "learning_rate": 0.00047737355811889974, "loss": 0.9199, "step": 260 }, { "epoch": 1.38, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7462519407272339, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0331, "eval_samples_per_second": 248.63, "eval_steps_per_second": 7.79, "step": 260 }, { "epoch": 1.44, "learning_rate": 0.00047648624667258213, "loss": 0.7409, "step": 270 }, { "epoch": 1.49, "learning_rate": 0.0004755989352262644, "loss": 0.7246, "step": 280 }, { "epoch": 1.49, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7581822872161865, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1558, "eval_samples_per_second": 243.673, "eval_steps_per_second": 7.635, "step": 280 }, { "epoch": 1.54, "learning_rate": 0.00047471162377994675, "loss": 0.6876, "step": 290 }, { "epoch": 1.6, "learning_rate": 0.0004738243123336291, "loss": 0.7508, "step": 300 }, { "epoch": 1.6, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7464602589607239, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1548, "eval_samples_per_second": 243.711, "eval_steps_per_second": 7.636, "step": 300 }, { "epoch": 1.65, "learning_rate": 0.0004729370008873114, "loss": 0.8539, "step": 310 }, { "epoch": 1.7, "learning_rate": 0.0004720496894409938, "loss": 0.7011, "step": 320 }, { "epoch": 1.7, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.788334310054779, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1655, "eval_samples_per_second": 243.289, "eval_steps_per_second": 7.623, "step": 320 }, { "epoch": 1.76, "learning_rate": 0.00047116237799467615, "loss": 0.9111, "step": 330 }, { "epoch": 1.81, "learning_rate": 0.0004702750665483585, "loss": 0.8442, "step": 340 }, { "epoch": 1.81, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7656621336936951, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.2177, "eval_samples_per_second": 241.249, "eval_steps_per_second": 7.559, "step": 340 }, { "epoch": 1.86, "learning_rate": 0.00046938775510204083, "loss": 0.7179, "step": 350 }, { "epoch": 1.91, "learning_rate": 0.00046850044365572317, "loss": 1.0021, "step": 360 }, { "epoch": 1.91, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7755117416381836, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0871, "eval_samples_per_second": 246.422, "eval_steps_per_second": 7.721, "step": 360 }, { "epoch": 1.97, "learning_rate": 0.0004676131322094055, "loss": 0.8331, "step": 370 }, { "epoch": 2.02, "learning_rate": 0.00046672582076308784, "loss": 0.7982, "step": 380 }, { "epoch": 2.02, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8436231017112732, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0719, "eval_samples_per_second": 247.04, "eval_steps_per_second": 7.741, "step": 380 }, { "epoch": 2.07, "learning_rate": 0.0004658385093167702, "loss": 0.8408, "step": 390 }, { "epoch": 2.13, "learning_rate": 0.0004649511978704525, "loss": 0.8299, "step": 400 }, { "epoch": 2.13, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7582988142967224, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.2274, "eval_samples_per_second": 240.873, "eval_steps_per_second": 7.547, "step": 400 }, { "epoch": 2.18, "learning_rate": 0.0004640638864241349, "loss": 0.7852, "step": 410 }, { "epoch": 2.23, "learning_rate": 0.00046317657497781724, "loss": 0.8789, "step": 420 }, { "epoch": 2.23, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7546933889389038, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3059, "eval_samples_per_second": 237.872, "eval_steps_per_second": 7.453, "step": 420 }, { "epoch": 2.29, "learning_rate": 0.0004622892635314996, "loss": 0.6384, "step": 430 }, { "epoch": 2.34, "learning_rate": 0.0004614019520851819, "loss": 0.8853, "step": 440 }, { "epoch": 2.34, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7661357522010803, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3477, "eval_samples_per_second": 236.306, "eval_steps_per_second": 7.404, "step": 440 }, { "epoch": 2.39, "learning_rate": 0.0004605146406388642, "loss": 0.7721, "step": 450 }, { "epoch": 2.45, "learning_rate": 0.0004596273291925466, "loss": 0.7522, "step": 460 }, { "epoch": 2.45, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7484121322631836, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.2677, "eval_samples_per_second": 239.321, "eval_steps_per_second": 7.499, "step": 460 }, { "epoch": 2.5, "learning_rate": 0.0004587400177462289, "loss": 0.8076, "step": 470 }, { "epoch": 2.55, "learning_rate": 0.00045785270629991126, "loss": 0.9035, "step": 480 }, { "epoch": 2.55, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7667799592018127, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.4501, "eval_samples_per_second": 232.554, "eval_steps_per_second": 7.287, "step": 480 }, { "epoch": 2.61, "learning_rate": 0.0004569653948535936, "loss": 0.7544, "step": 490 }, { "epoch": 2.66, "learning_rate": 0.000456078083407276, "loss": 0.8717, "step": 500 }, { "epoch": 2.66, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7541614770889282, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.4865, "eval_samples_per_second": 231.251, "eval_steps_per_second": 7.246, "step": 500 }, { "epoch": 2.71, "learning_rate": 0.00045519077196095833, "loss": 0.6823, "step": 510 }, { "epoch": 2.77, "learning_rate": 0.00045430346051464067, "loss": 0.8903, "step": 520 }, { "epoch": 2.77, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.801760733127594, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3567, "eval_samples_per_second": 235.972, "eval_steps_per_second": 7.394, "step": 520 }, { "epoch": 2.82, "learning_rate": 0.000453416149068323, "loss": 0.7565, "step": 530 }, { "epoch": 2.87, "learning_rate": 0.0004525288376220053, "loss": 0.7392, "step": 540 }, { "epoch": 2.87, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7826161980628967, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.5294, "eval_samples_per_second": 229.731, "eval_steps_per_second": 7.198, "step": 540 }, { "epoch": 2.93, "learning_rate": 0.0004516415261756877, "loss": 0.629, "step": 550 }, { "epoch": 2.98, "learning_rate": 0.00045075421472937, "loss": 0.8626, "step": 560 }, { "epoch": 2.98, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7489452958106995, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3521, "eval_samples_per_second": 236.143, "eval_steps_per_second": 7.399, "step": 560 }, { "epoch": 3.03, "learning_rate": 0.00044986690328305235, "loss": 0.7379, "step": 570 }, { "epoch": 3.09, "learning_rate": 0.0004489795918367347, "loss": 0.6063, "step": 580 }, { "epoch": 3.09, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.9793464541435242, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3272, "eval_samples_per_second": 237.071, "eval_steps_per_second": 7.428, "step": 580 }, { "epoch": 3.14, "learning_rate": 0.000448092280390417, "loss": 0.9408, "step": 590 }, { "epoch": 3.19, "learning_rate": 0.0004472049689440994, "loss": 0.8703, "step": 600 }, { "epoch": 3.19, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7475989460945129, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3253, "eval_samples_per_second": 237.143, "eval_steps_per_second": 7.43, "step": 600 }, { "epoch": 3.24, "learning_rate": 0.00044631765749778175, "loss": 0.8594, "step": 610 }, { "epoch": 3.3, "learning_rate": 0.0004454303460514641, "loss": 0.797, "step": 620 }, { "epoch": 3.3, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7692281603813171, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.6464, "eval_samples_per_second": 225.686, "eval_steps_per_second": 7.071, "step": 620 }, { "epoch": 3.35, "learning_rate": 0.0004445430346051464, "loss": 0.8531, "step": 630 }, { "epoch": 3.4, "learning_rate": 0.00044365572315882877, "loss": 0.7526, "step": 640 }, { "epoch": 3.4, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7548736929893494, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.5723, "eval_samples_per_second": 228.23, "eval_steps_per_second": 7.151, "step": 640 }, { "epoch": 3.46, "learning_rate": 0.0004427684117125111, "loss": 0.8442, "step": 650 }, { "epoch": 3.51, "learning_rate": 0.00044188110026619344, "loss": 0.9005, "step": 660 }, { "epoch": 3.51, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8103847503662109, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.4776, "eval_samples_per_second": 231.566, "eval_steps_per_second": 7.256, "step": 660 }, { "epoch": 3.56, "learning_rate": 0.0004409937888198758, "loss": 0.8343, "step": 670 }, { "epoch": 3.62, "learning_rate": 0.0004401064773735581, "loss": 0.7993, "step": 680 }, { "epoch": 3.62, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8053144812583923, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3285, "eval_samples_per_second": 237.021, "eval_steps_per_second": 7.427, "step": 680 }, { "epoch": 3.67, "learning_rate": 0.0004392191659272405, "loss": 0.8369, "step": 690 }, { "epoch": 3.72, "learning_rate": 0.00043833185448092284, "loss": 0.7966, "step": 700 }, { "epoch": 3.72, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7448229193687439, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3865, "eval_samples_per_second": 234.871, "eval_steps_per_second": 7.359, "step": 700 }, { "epoch": 3.78, "learning_rate": 0.0004374445430346052, "loss": 0.7612, "step": 710 }, { "epoch": 3.83, "learning_rate": 0.00043655723158828746, "loss": 0.7656, "step": 720 }, { "epoch": 3.83, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7586243748664856, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3473, "eval_samples_per_second": 236.32, "eval_steps_per_second": 7.405, "step": 720 }, { "epoch": 3.88, "learning_rate": 0.0004356699201419698, "loss": 0.6469, "step": 730 }, { "epoch": 3.94, "learning_rate": 0.0004347826086956522, "loss": 0.6889, "step": 740 }, { "epoch": 3.94, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7640272378921509, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3581, "eval_samples_per_second": 235.92, "eval_steps_per_second": 7.392, "step": 740 }, { "epoch": 3.99, "learning_rate": 0.0004338952972493345, "loss": 0.7086, "step": 750 }, { "epoch": 4.04, "learning_rate": 0.00043300798580301686, "loss": 0.8226, "step": 760 }, { "epoch": 4.04, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8118994235992432, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3396, "eval_samples_per_second": 236.61, "eval_steps_per_second": 7.414, "step": 760 }, { "epoch": 4.1, "learning_rate": 0.0004321206743566992, "loss": 1.0159, "step": 770 }, { "epoch": 4.15, "learning_rate": 0.0004312333629103816, "loss": 0.8907, "step": 780 }, { "epoch": 4.15, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7906585335731506, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.4149, "eval_samples_per_second": 233.829, "eval_steps_per_second": 7.327, "step": 780 }, { "epoch": 4.2, "learning_rate": 0.00043034605146406393, "loss": 0.6486, "step": 790 }, { "epoch": 4.26, "learning_rate": 0.00042945874001774627, "loss": 0.8749, "step": 800 }, { "epoch": 4.26, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7677525877952576, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.3008, "eval_samples_per_second": 238.066, "eval_steps_per_second": 7.459, "step": 800 }, { "epoch": 4.31, "learning_rate": 0.00042857142857142855, "loss": 0.6891, "step": 810 }, { "epoch": 4.36, "learning_rate": 0.0004276841171251109, "loss": 0.749, "step": 820 }, { "epoch": 4.36, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7736731767654419, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.384, "eval_samples_per_second": 234.964, "eval_steps_per_second": 7.362, "step": 820 }, { "epoch": 4.41, "learning_rate": 0.0004267968056787933, "loss": 0.7781, "step": 830 }, { "epoch": 4.47, "learning_rate": 0.0004259094942324756, "loss": 0.7333, "step": 840 }, { "epoch": 4.47, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8354427218437195, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1708, "eval_samples_per_second": 243.081, "eval_steps_per_second": 7.617, "step": 840 }, { "epoch": 4.52, "learning_rate": 0.00042502218278615795, "loss": 1.1763, "step": 850 }, { "epoch": 4.57, "learning_rate": 0.0004241348713398403, "loss": 0.9287, "step": 860 }, { "epoch": 4.57, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7903938889503479, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1862, "eval_samples_per_second": 242.475, "eval_steps_per_second": 7.598, "step": 860 }, { "epoch": 4.63, "learning_rate": 0.0004232475598935226, "loss": 0.7707, "step": 870 }, { "epoch": 4.68, "learning_rate": 0.000422360248447205, "loss": 0.8231, "step": 880 }, { "epoch": 4.68, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7689003944396973, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0616, "eval_samples_per_second": 247.459, "eval_steps_per_second": 7.754, "step": 880 }, { "epoch": 4.73, "learning_rate": 0.00042147293700088735, "loss": 0.723, "step": 890 }, { "epoch": 4.79, "learning_rate": 0.00042058562555456964, "loss": 0.7486, "step": 900 }, { "epoch": 4.79, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7458215951919556, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0863, "eval_samples_per_second": 246.454, "eval_steps_per_second": 7.722, "step": 900 }, { "epoch": 4.84, "learning_rate": 0.000419698314108252, "loss": 0.6488, "step": 910 }, { "epoch": 4.89, "learning_rate": 0.0004188110026619343, "loss": 0.8551, "step": 920 }, { "epoch": 4.89, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7851412892341614, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9684, "eval_samples_per_second": 251.326, "eval_steps_per_second": 7.875, "step": 920 }, { "epoch": 4.95, "learning_rate": 0.0004179236912156167, "loss": 0.7437, "step": 930 }, { "epoch": 5.0, "learning_rate": 0.00041703637976929904, "loss": 0.7452, "step": 940 }, { "epoch": 5.0, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7961435317993164, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0838, "eval_samples_per_second": 246.558, "eval_steps_per_second": 7.725, "step": 940 }, { "epoch": 5.05, "learning_rate": 0.0004161490683229814, "loss": 0.8069, "step": 950 }, { "epoch": 5.11, "learning_rate": 0.0004152617568766637, "loss": 0.7453, "step": 960 }, { "epoch": 5.11, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7462239861488342, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1042, "eval_samples_per_second": 245.731, "eval_steps_per_second": 7.7, "step": 960 }, { "epoch": 5.16, "learning_rate": 0.0004143744454303461, "loss": 0.5743, "step": 970 }, { "epoch": 5.21, "learning_rate": 0.00041348713398402844, "loss": 0.7784, "step": 980 }, { "epoch": 5.21, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7713678479194641, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0898, "eval_samples_per_second": 246.314, "eval_steps_per_second": 7.718, "step": 980 }, { "epoch": 5.27, "learning_rate": 0.0004125998225377107, "loss": 0.7916, "step": 990 }, { "epoch": 5.32, "learning_rate": 0.00041171251109139306, "loss": 0.7943, "step": 1000 }, { "epoch": 5.32, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7590846419334412, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0172, "eval_samples_per_second": 249.284, "eval_steps_per_second": 7.811, "step": 1000 }, { "epoch": 5.37, "learning_rate": 0.0004108251996450754, "loss": 0.7477, "step": 1010 }, { "epoch": 5.43, "learning_rate": 0.0004099378881987578, "loss": 0.8187, "step": 1020 }, { "epoch": 5.43, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7467174530029297, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0527, "eval_samples_per_second": 247.824, "eval_steps_per_second": 7.765, "step": 1020 }, { "epoch": 5.48, "learning_rate": 0.0004090505767524401, "loss": 0.724, "step": 1030 }, { "epoch": 5.53, "learning_rate": 0.00040816326530612246, "loss": 0.82, "step": 1040 }, { "epoch": 5.53, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7653962969779968, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0631, "eval_samples_per_second": 247.397, "eval_steps_per_second": 7.752, "step": 1040 }, { "epoch": 5.59, "learning_rate": 0.0004072759538598048, "loss": 0.7251, "step": 1050 }, { "epoch": 5.64, "learning_rate": 0.00040638864241348714, "loss": 0.8369, "step": 1060 }, { "epoch": 5.64, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7671139240264893, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9969, "eval_samples_per_second": 250.128, "eval_steps_per_second": 7.837, "step": 1060 }, { "epoch": 5.69, "learning_rate": 0.0004055013309671695, "loss": 0.8827, "step": 1070 }, { "epoch": 5.74, "learning_rate": 0.0004046140195208518, "loss": 0.8594, "step": 1080 }, { "epoch": 5.74, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7597693800926208, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1035, "eval_samples_per_second": 245.759, "eval_steps_per_second": 7.7, "step": 1080 }, { "epoch": 5.8, "learning_rate": 0.00040372670807453415, "loss": 0.6991, "step": 1090 }, { "epoch": 5.85, "learning_rate": 0.0004028393966282165, "loss": 0.763, "step": 1100 }, { "epoch": 5.85, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7626741528511047, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.2099, "eval_samples_per_second": 241.55, "eval_steps_per_second": 7.569, "step": 1100 }, { "epoch": 5.9, "learning_rate": 0.0004019520851818989, "loss": 0.803, "step": 1110 }, { "epoch": 5.96, "learning_rate": 0.0004010647737355812, "loss": 0.8508, "step": 1120 }, { "epoch": 5.96, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7459200620651245, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1055, "eval_samples_per_second": 245.68, "eval_steps_per_second": 7.698, "step": 1120 }, { "epoch": 6.01, "learning_rate": 0.00040017746228926355, "loss": 0.655, "step": 1130 }, { "epoch": 6.06, "learning_rate": 0.0003992901508429459, "loss": 0.67, "step": 1140 }, { "epoch": 6.06, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7736058235168457, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0743, "eval_samples_per_second": 246.941, "eval_steps_per_second": 7.737, "step": 1140 }, { "epoch": 6.12, "learning_rate": 0.0003984028393966282, "loss": 0.9277, "step": 1150 }, { "epoch": 6.17, "learning_rate": 0.00039751552795031056, "loss": 0.7358, "step": 1160 }, { "epoch": 6.17, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7588059902191162, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0157, "eval_samples_per_second": 249.349, "eval_steps_per_second": 7.813, "step": 1160 }, { "epoch": 6.22, "learning_rate": 0.0003966282165039929, "loss": 0.6581, "step": 1170 }, { "epoch": 6.28, "learning_rate": 0.00039574090505767524, "loss": 0.6768, "step": 1180 }, { "epoch": 6.28, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7461269497871399, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0429, "eval_samples_per_second": 248.226, "eval_steps_per_second": 7.778, "step": 1180 }, { "epoch": 6.33, "learning_rate": 0.0003948535936113576, "loss": 0.6814, "step": 1190 }, { "epoch": 6.38, "learning_rate": 0.0003939662821650399, "loss": 0.7112, "step": 1200 }, { "epoch": 6.38, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7580690383911133, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.987, "eval_samples_per_second": 250.543, "eval_steps_per_second": 7.85, "step": 1200 }, { "epoch": 6.44, "learning_rate": 0.0003930789707187223, "loss": 0.78, "step": 1210 }, { "epoch": 6.49, "learning_rate": 0.00039219165927240464, "loss": 0.7991, "step": 1220 }, { "epoch": 6.49, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7527822256088257, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0389, "eval_samples_per_second": 248.39, "eval_steps_per_second": 7.783, "step": 1220 }, { "epoch": 6.54, "learning_rate": 0.000391304347826087, "loss": 0.833, "step": 1230 }, { "epoch": 6.6, "learning_rate": 0.0003904170363797693, "loss": 0.9258, "step": 1240 }, { "epoch": 6.6, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7517044544219971, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9942, "eval_samples_per_second": 250.24, "eval_steps_per_second": 7.841, "step": 1240 }, { "epoch": 6.65, "learning_rate": 0.00038952972493345165, "loss": 0.6751, "step": 1250 }, { "epoch": 6.7, "learning_rate": 0.000388642413487134, "loss": 0.7574, "step": 1260 }, { "epoch": 6.7, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7456061244010925, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1978, "eval_samples_per_second": 242.023, "eval_steps_per_second": 7.583, "step": 1260 }, { "epoch": 6.76, "learning_rate": 0.0003877551020408163, "loss": 0.8179, "step": 1270 }, { "epoch": 6.81, "learning_rate": 0.00038686779059449866, "loss": 0.7691, "step": 1280 }, { "epoch": 6.81, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7492356896400452, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0678, "eval_samples_per_second": 247.208, "eval_steps_per_second": 7.746, "step": 1280 }, { "epoch": 6.86, "learning_rate": 0.000385980479148181, "loss": 0.7528, "step": 1290 }, { "epoch": 6.91, "learning_rate": 0.0003850931677018634, "loss": 0.5853, "step": 1300 }, { "epoch": 6.91, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7996741533279419, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9805, "eval_samples_per_second": 250.815, "eval_steps_per_second": 7.859, "step": 1300 }, { "epoch": 6.97, "learning_rate": 0.0003842058562555457, "loss": 1.0142, "step": 1310 }, { "epoch": 7.02, "learning_rate": 0.00038331854480922806, "loss": 0.9782, "step": 1320 }, { "epoch": 7.02, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7786191701889038, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1561, "eval_samples_per_second": 243.66, "eval_steps_per_second": 7.635, "step": 1320 }, { "epoch": 7.07, "learning_rate": 0.0003824312333629104, "loss": 0.8774, "step": 1330 }, { "epoch": 7.13, "learning_rate": 0.0003815439219165927, "loss": 0.73, "step": 1340 }, { "epoch": 7.13, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7597090005874634, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.252, "eval_samples_per_second": 239.925, "eval_steps_per_second": 7.518, "step": 1340 }, { "epoch": 7.18, "learning_rate": 0.0003806566104702751, "loss": 0.6523, "step": 1350 }, { "epoch": 7.23, "learning_rate": 0.0003797692990239574, "loss": 0.5814, "step": 1360 }, { "epoch": 7.23, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8069555759429932, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0437, "eval_samples_per_second": 248.194, "eval_steps_per_second": 7.777, "step": 1360 }, { "epoch": 7.29, "learning_rate": 0.00037888198757763975, "loss": 0.8547, "step": 1370 }, { "epoch": 7.34, "learning_rate": 0.0003779946761313221, "loss": 0.8289, "step": 1380 }, { "epoch": 7.34, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7525202035903931, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0147, "eval_samples_per_second": 249.387, "eval_steps_per_second": 7.814, "step": 1380 }, { "epoch": 7.39, "learning_rate": 0.0003771073646850044, "loss": 0.7157, "step": 1390 }, { "epoch": 7.45, "learning_rate": 0.0003762200532386868, "loss": 0.7269, "step": 1400 }, { "epoch": 7.45, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7548522353172302, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1004, "eval_samples_per_second": 245.886, "eval_steps_per_second": 7.704, "step": 1400 }, { "epoch": 7.5, "learning_rate": 0.00037533274179236915, "loss": 0.8475, "step": 1410 }, { "epoch": 7.55, "learning_rate": 0.0003744454303460515, "loss": 0.7227, "step": 1420 }, { "epoch": 7.55, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7969293594360352, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0981, "eval_samples_per_second": 245.98, "eval_steps_per_second": 7.707, "step": 1420 }, { "epoch": 7.61, "learning_rate": 0.00037355811889973377, "loss": 0.9252, "step": 1430 }, { "epoch": 7.66, "learning_rate": 0.00037267080745341616, "loss": 0.71, "step": 1440 }, { "epoch": 7.66, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8190247416496277, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0637, "eval_samples_per_second": 247.376, "eval_steps_per_second": 7.751, "step": 1440 }, { "epoch": 7.71, "learning_rate": 0.0003717834960070985, "loss": 0.8722, "step": 1450 }, { "epoch": 7.77, "learning_rate": 0.00037089618456078084, "loss": 1.0148, "step": 1460 }, { "epoch": 7.77, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7832964062690735, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.091, "eval_samples_per_second": 246.267, "eval_steps_per_second": 7.716, "step": 1460 }, { "epoch": 7.82, "learning_rate": 0.0003700088731144632, "loss": 0.772, "step": 1470 }, { "epoch": 7.87, "learning_rate": 0.0003691215616681455, "loss": 0.6867, "step": 1480 }, { "epoch": 7.87, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8290088772773743, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1496, "eval_samples_per_second": 243.918, "eval_steps_per_second": 7.643, "step": 1480 }, { "epoch": 7.93, "learning_rate": 0.0003682342502218279, "loss": 0.8245, "step": 1490 }, { "epoch": 7.98, "learning_rate": 0.00036734693877551024, "loss": 0.8665, "step": 1500 }, { "epoch": 7.98, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7548300623893738, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.2167, "eval_samples_per_second": 241.285, "eval_steps_per_second": 7.56, "step": 1500 } ], "logging_steps": 10, "max_steps": 5640, "num_train_epochs": 30, "save_steps": 100, "total_flos": 3149993726705664.0, "trial_name": null, "trial_params": null }