{ "best_metric": 1.606531023979187, "best_model_checkpoint": "distilbert_final_tweet_dataset_200_epoch/checkpoint-230", "epoch": 29.0, "global_step": 3335, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6327160493827161, "eval_f1": 0.6368645566163996, "eval_loss": 2.3408091068267822, "eval_runtime": 0.7378, "eval_samples_per_second": 439.117, "eval_steps_per_second": 55.567, "step": 115 }, { "epoch": 2.0, "eval_accuracy": 0.6728395061728395, "eval_f1": 0.6724357294978492, "eval_loss": 1.606531023979187, "eval_runtime": 0.7455, "eval_samples_per_second": 434.608, "eval_steps_per_second": 54.997, "step": 230 }, { "epoch": 3.0, "eval_accuracy": 0.6635802469135802, "eval_f1": 0.6659200702678963, "eval_loss": 2.096076250076294, "eval_runtime": 0.7198, "eval_samples_per_second": 450.123, "eval_steps_per_second": 56.96, "step": 345 }, { "epoch": 4.0, "eval_accuracy": 0.6296296296296297, "eval_f1": 0.6251138433515483, "eval_loss": 2.2798941135406494, "eval_runtime": 0.7194, "eval_samples_per_second": 450.35, "eval_steps_per_second": 56.989, "step": 460 }, { "epoch": 4.35, "learning_rate": 4.891304347826087e-05, "loss": 0.1025, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.6512345679012346, "eval_f1": 0.6431049313635814, "eval_loss": 2.6507568359375, "eval_runtime": 0.7337, "eval_samples_per_second": 441.574, "eval_steps_per_second": 55.878, "step": 575 }, { "epoch": 6.0, "eval_accuracy": 0.6697530864197531, "eval_f1": 0.6715964982923955, "eval_loss": 2.2207276821136475, "eval_runtime": 0.8004, "eval_samples_per_second": 404.806, "eval_steps_per_second": 51.225, "step": 690 }, { "epoch": 7.0, "eval_accuracy": 0.6728395061728395, "eval_f1": 0.6722509202387013, "eval_loss": 2.337564706802368, "eval_runtime": 0.7663, "eval_samples_per_second": 422.784, "eval_steps_per_second": 53.5, "step": 805 }, { "epoch": 8.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6652170891831549, "eval_loss": 2.6886019706726074, "eval_runtime": 0.7308, "eval_samples_per_second": 443.343, "eval_steps_per_second": 56.102, "step": 920 }, { "epoch": 8.7, "learning_rate": 4.782608695652174e-05, "loss": 0.0591, "step": 1000 }, { "epoch": 9.0, "eval_accuracy": 0.6728395061728395, "eval_f1": 0.6669062733756546, "eval_loss": 2.4700207710266113, "eval_runtime": 0.7607, "eval_samples_per_second": 425.94, "eval_steps_per_second": 53.9, "step": 1035 }, { "epoch": 10.0, "eval_accuracy": 0.654320987654321, "eval_f1": 0.6600467030032732, "eval_loss": 2.461507797241211, "eval_runtime": 0.7355, "eval_samples_per_second": 440.496, "eval_steps_per_second": 55.742, "step": 1150 }, { "epoch": 11.0, "eval_accuracy": 0.6851851851851852, "eval_f1": 0.6826296938167441, "eval_loss": 2.5344278812408447, "eval_runtime": 0.7295, "eval_samples_per_second": 444.161, "eval_steps_per_second": 56.206, "step": 1265 }, { "epoch": 12.0, "eval_accuracy": 0.6820987654320988, "eval_f1": 0.6847534599635866, "eval_loss": 2.6750032901763916, "eval_runtime": 0.8174, "eval_samples_per_second": 396.364, "eval_steps_per_second": 50.157, "step": 1380 }, { "epoch": 13.0, "eval_accuracy": 0.6574074074074074, "eval_f1": 0.6621576835862549, "eval_loss": 2.416313648223877, "eval_runtime": 0.7489, "eval_samples_per_second": 432.618, "eval_steps_per_second": 54.745, "step": 1495 }, { "epoch": 13.04, "learning_rate": 4.673913043478261e-05, "loss": 0.0346, "step": 1500 }, { "epoch": 14.0, "eval_accuracy": 0.6574074074074074, "eval_f1": 0.6600444146104354, "eval_loss": 2.7329800128936768, "eval_runtime": 0.7311, "eval_samples_per_second": 443.144, "eval_steps_per_second": 56.077, "step": 1610 }, { "epoch": 15.0, "eval_accuracy": 0.654320987654321, "eval_f1": 0.6504462260925387, "eval_loss": 2.4202656745910645, "eval_runtime": 0.7386, "eval_samples_per_second": 438.686, "eval_steps_per_second": 55.513, "step": 1725 }, { "epoch": 16.0, "eval_accuracy": 0.7006172839506173, "eval_f1": 0.7004058172662825, "eval_loss": 2.3290841579437256, "eval_runtime": 0.7341, "eval_samples_per_second": 441.353, "eval_steps_per_second": 55.85, "step": 1840 }, { "epoch": 17.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.6603541359368218, "eval_loss": 2.7727530002593994, "eval_runtime": 0.727, "eval_samples_per_second": 445.679, "eval_steps_per_second": 56.398, "step": 1955 }, { "epoch": 17.39, "learning_rate": 4.565217391304348e-05, "loss": 0.0247, "step": 2000 }, { "epoch": 18.0, "eval_accuracy": 0.6574074074074074, "eval_f1": 0.6600831890627571, "eval_loss": 2.668175458908081, "eval_runtime": 0.8752, "eval_samples_per_second": 370.214, "eval_steps_per_second": 46.848, "step": 2070 }, { "epoch": 19.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.6703415017567717, "eval_loss": 2.8486380577087402, "eval_runtime": 0.7459, "eval_samples_per_second": 434.347, "eval_steps_per_second": 54.964, "step": 2185 }, { "epoch": 20.0, "eval_accuracy": 0.6635802469135802, "eval_f1": 0.6654895433909519, "eval_loss": 2.988004446029663, "eval_runtime": 0.7495, "eval_samples_per_second": 432.284, "eval_steps_per_second": 54.703, "step": 2300 }, { "epoch": 21.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6629249499004221, "eval_loss": 2.923362970352173, "eval_runtime": 0.7413, "eval_samples_per_second": 437.091, "eval_steps_per_second": 55.311, "step": 2415 }, { "epoch": 21.74, "learning_rate": 4.456521739130435e-05, "loss": 0.0264, "step": 2500 }, { "epoch": 22.0, "eval_accuracy": 0.6512345679012346, "eval_f1": 0.6561078483445887, "eval_loss": 2.6863272190093994, "eval_runtime": 0.749, "eval_samples_per_second": 432.552, "eval_steps_per_second": 54.736, "step": 2530 }, { "epoch": 23.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6610722610722611, "eval_loss": 2.7173354625701904, "eval_runtime": 0.7384, "eval_samples_per_second": 438.801, "eval_steps_per_second": 55.527, "step": 2645 }, { "epoch": 24.0, "eval_accuracy": 0.6481481481481481, "eval_f1": 0.6504829054920129, "eval_loss": 3.2829830646514893, "eval_runtime": 0.8839, "eval_samples_per_second": 366.573, "eval_steps_per_second": 46.387, "step": 2760 }, { "epoch": 25.0, "eval_accuracy": 0.6851851851851852, "eval_f1": 0.6787650339814832, "eval_loss": 2.6303372383117676, "eval_runtime": 0.7774, "eval_samples_per_second": 416.749, "eval_steps_per_second": 52.737, "step": 2875 }, { "epoch": 26.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6618789271005957, "eval_loss": 2.580350875854492, "eval_runtime": 0.8987, "eval_samples_per_second": 360.512, "eval_steps_per_second": 45.62, "step": 2990 }, { "epoch": 26.09, "learning_rate": 4.347826086956522e-05, "loss": 0.0178, "step": 3000 }, { "epoch": 27.0, "eval_accuracy": 0.6296296296296297, "eval_f1": 0.631715246325266, "eval_loss": 3.3269691467285156, "eval_runtime": 0.7462, "eval_samples_per_second": 434.183, "eval_steps_per_second": 54.943, "step": 3105 }, { "epoch": 28.0, "eval_accuracy": 0.6512345679012346, "eval_f1": 0.646016361656172, "eval_loss": 2.600759267807007, "eval_runtime": 0.754, "eval_samples_per_second": 429.696, "eval_steps_per_second": 54.375, "step": 3220 }, { "epoch": 29.0, "eval_accuracy": 0.6759259259259259, "eval_f1": 0.6760610574156681, "eval_loss": 2.3673853874206543, "eval_runtime": 0.7318, "eval_samples_per_second": 442.773, "eval_steps_per_second": 56.03, "step": 3335 } ], "max_steps": 23000, "num_train_epochs": 200, "total_flos": 1076269719459636.0, "trial_name": null, "trial_params": null }