{ "best_metric": 0.7180555555555556, "best_model_checkpoint": "omarmomen/sf_babylm_1/finetune/qnli/checkpoint-3400", "epoch": 6.986899563318778, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "eval_accuracy": 0.6146106719970703, "eval_f1": 0.6907686907686907, "eval_loss": 0.6554794311523438, "eval_mcc": 0.22431521092119797, "eval_runtime": 3.8387, "eval_samples_per_second": 595.521, "eval_steps_per_second": 74.505, "step": 200 }, { "epoch": 0.58, "eval_accuracy": 0.618985116481781, "eval_f1": 0.68130259787779, "eval_loss": 0.6422836780548096, "eval_mcc": 0.2304941478582267, "eval_runtime": 3.8459, "eval_samples_per_second": 594.401, "eval_steps_per_second": 74.365, "step": 400 }, { "epoch": 0.73, "learning_rate": 4.636098981077147e-05, "loss": 0.6654, "step": 500 }, { "epoch": 0.87, "eval_accuracy": 0.6412948369979858, "eval_f1": 0.6870229007633588, "eval_loss": 0.6372341513633728, "eval_mcc": 0.2760337210737091, "eval_runtime": 3.8293, "eval_samples_per_second": 596.975, "eval_steps_per_second": 74.687, "step": 600 }, { "epoch": 1.16, "eval_accuracy": 0.6404199600219727, "eval_f1": 0.7060085836909872, "eval_loss": 0.642782986164093, "eval_mcc": 0.278469065949597, "eval_runtime": 3.8437, "eval_samples_per_second": 594.741, "eval_steps_per_second": 74.408, "step": 800 }, { "epoch": 1.46, "learning_rate": 4.272197962154294e-05, "loss": 0.6069, "step": 1000 }, { "epoch": 1.46, "eval_accuracy": 0.6465441584587097, "eval_f1": 0.674979887369268, "eval_loss": 0.6376312971115112, "eval_mcc": 0.2884175233679133, "eval_runtime": 3.8632, "eval_samples_per_second": 591.738, "eval_steps_per_second": 74.032, "step": 1000 }, { "epoch": 1.75, "eval_accuracy": 0.636045515537262, "eval_f1": 0.7150684931506849, "eval_loss": 0.7253507971763611, "eval_mcc": 0.27745325857018305, "eval_runtime": 3.8079, "eval_samples_per_second": 600.333, "eval_steps_per_second": 75.107, "step": 1200 }, { "epoch": 2.04, "eval_accuracy": 0.6531058549880981, "eval_f1": 0.7024390243902439, "eval_loss": 0.7396670579910278, "eval_mcc": 0.3009260185152751, "eval_runtime": 3.8535, "eval_samples_per_second": 593.225, "eval_steps_per_second": 74.218, "step": 1400 }, { "epoch": 2.18, "learning_rate": 3.9082969432314415e-05, "loss": 0.5563, "step": 1500 }, { "epoch": 2.33, "eval_accuracy": 0.6452318429946899, "eval_f1": 0.7106671423474848, "eval_loss": 0.7774791121482849, "eval_mcc": 0.2892937037338759, "eval_runtime": 3.8484, "eval_samples_per_second": 594.009, "eval_steps_per_second": 74.316, "step": 1600 }, { "epoch": 2.62, "eval_accuracy": 0.6391075849533081, "eval_f1": 0.7114375655823715, "eval_loss": 0.7708711624145508, "eval_mcc": 0.2793701706219116, "eval_runtime": 3.8601, "eval_samples_per_second": 592.217, "eval_steps_per_second": 74.092, "step": 1800 }, { "epoch": 2.91, "learning_rate": 3.544395924308588e-05, "loss": 0.4828, "step": 2000 }, { "epoch": 2.91, "eval_accuracy": 0.6509186625480652, "eval_f1": 0.715203426124197, "eval_loss": 0.7866878509521484, "eval_mcc": 0.3016051235516822, "eval_runtime": 3.8684, "eval_samples_per_second": 590.949, "eval_steps_per_second": 73.933, "step": 2000 }, { "epoch": 3.2, "eval_accuracy": 0.6552931070327759, "eval_f1": 0.7068452380952381, "eval_loss": 0.8981339931488037, "eval_mcc": 0.306012753556783, "eval_runtime": 3.8311, "eval_samples_per_second": 596.695, "eval_steps_per_second": 74.652, "step": 2200 }, { "epoch": 3.49, "eval_accuracy": 0.6509186625480652, "eval_f1": 0.7156094084105489, "eval_loss": 0.8108872175216675, "eval_mcc": 0.3018512444456491, "eval_runtime": 3.8564, "eval_samples_per_second": 592.788, "eval_steps_per_second": 74.163, "step": 2400 }, { "epoch": 3.64, "learning_rate": 3.1804949053857355e-05, "loss": 0.3888, "step": 2500 }, { "epoch": 3.78, "eval_accuracy": 0.6614173054695129, "eval_f1": 0.7135455218356773, "eval_loss": 0.8783448338508606, "eval_mcc": 0.3192843740994599, "eval_runtime": 3.8493, "eval_samples_per_second": 593.875, "eval_steps_per_second": 74.299, "step": 2600 }, { "epoch": 4.08, "eval_accuracy": 0.658355176448822, "eval_f1": 0.7177448500180701, "eval_loss": 1.1642794609069824, "eval_mcc": 0.31565593750873305, "eval_runtime": 3.8931, "eval_samples_per_second": 587.192, "eval_steps_per_second": 73.463, "step": 2800 }, { "epoch": 4.37, "learning_rate": 2.816593886462882e-05, "loss": 0.3206, "step": 3000 }, { "epoch": 4.37, "eval_accuracy": 0.6596675515174866, "eval_f1": 0.7124907612712491, "eval_loss": 1.1600124835968018, "eval_mcc": 0.31574376011142535, "eval_runtime": 3.8489, "eval_samples_per_second": 593.935, "eval_steps_per_second": 74.307, "step": 3000 }, { "epoch": 4.66, "eval_accuracy": 0.6605424284934998, "eval_f1": 0.7058377558756633, "eval_loss": 1.1160058975219727, "eval_mcc": 0.31588676317672204, "eval_runtime": 3.841, "eval_samples_per_second": 595.161, "eval_steps_per_second": 74.46, "step": 3200 }, { "epoch": 4.95, "eval_accuracy": 0.6447944045066833, "eval_f1": 0.7180555555555556, "eval_loss": 1.1237967014312744, "eval_mcc": 0.2937641511860878, "eval_runtime": 3.8606, "eval_samples_per_second": 592.129, "eval_steps_per_second": 74.081, "step": 3400 }, { "epoch": 5.09, "learning_rate": 2.452692867540029e-05, "loss": 0.2694, "step": 3500 }, { "epoch": 5.24, "eval_accuracy": 0.6482939720153809, "eval_f1": 0.7059253840526701, "eval_loss": 1.3788362741470337, "eval_mcc": 0.29270582428613734, "eval_runtime": 3.8607, "eval_samples_per_second": 592.119, "eval_steps_per_second": 74.08, "step": 3600 }, { "epoch": 5.53, "eval_accuracy": 0.6526684165000916, "eval_f1": 0.7114825581395349, "eval_loss": 1.3927693367004395, "eval_mcc": 0.30274237117341346, "eval_runtime": 3.8807, "eval_samples_per_second": 589.066, "eval_steps_per_second": 73.698, "step": 3800 }, { "epoch": 5.82, "learning_rate": 2.088791848617176e-05, "loss": 0.1912, "step": 4000 }, { "epoch": 5.82, "eval_accuracy": 0.665354311466217, "eval_f1": 0.7125140924464487, "eval_loss": 1.3745672702789307, "eval_mcc": 0.32630912773750975, "eval_runtime": 3.8032, "eval_samples_per_second": 601.08, "eval_steps_per_second": 75.201, "step": 4000 }, { "epoch": 6.11, "eval_accuracy": 0.6509186625480652, "eval_f1": 0.7068332108743571, "eval_loss": 1.6758441925048828, "eval_mcc": 0.2978444022174176, "eval_runtime": 3.8421, "eval_samples_per_second": 594.992, "eval_steps_per_second": 74.439, "step": 4200 }, { "epoch": 6.4, "eval_accuracy": 0.6500437259674072, "eval_f1": 0.7084548104956269, "eval_loss": 1.665847659111023, "eval_mcc": 0.2968064603193092, "eval_runtime": 3.8248, "eval_samples_per_second": 597.682, "eval_steps_per_second": 74.776, "step": 4400 }, { "epoch": 6.55, "learning_rate": 1.7248908296943234e-05, "loss": 0.1529, "step": 4500 }, { "epoch": 6.7, "eval_accuracy": 0.6614173054695129, "eval_f1": 0.7098950524737632, "eval_loss": 1.642499566078186, "eval_mcc": 0.31828852494106735, "eval_runtime": 3.8133, "eval_samples_per_second": 599.486, "eval_steps_per_second": 75.001, "step": 4600 }, { "epoch": 6.99, "eval_accuracy": 0.6579177379608154, "eval_f1": 0.7086438152011922, "eval_loss": 1.563865303993225, "eval_mcc": 0.3113962299159553, "eval_runtime": 3.8944, "eval_samples_per_second": 587.003, "eval_steps_per_second": 73.44, "step": 4800 }, { "epoch": 6.99, "step": 4800, "total_flos": 7864613306661888.0, "train_loss": 0.3873964651425679, "train_runtime": 1112.1464, "train_samples_per_second": 394.885, "train_steps_per_second": 6.177 } ], "max_steps": 6870, "num_train_epochs": 10, "total_flos": 7864613306661888.0, "trial_name": null, "trial_params": null }