{ "best_metric": 1.606531023979187, "best_model_checkpoint": "distilbert_final_tweet_dataset_200_epoch/checkpoint-230", "epoch": 43.0, "global_step": 4945, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6327160493827161, "eval_f1": 0.6368645566163996, "eval_loss": 2.3408091068267822, "eval_runtime": 0.7378, "eval_samples_per_second": 439.117, "eval_steps_per_second": 55.567, "step": 115 }, { "epoch": 2.0, "eval_accuracy": 0.6728395061728395, "eval_f1": 0.6724357294978492, "eval_loss": 1.606531023979187, "eval_runtime": 0.7455, "eval_samples_per_second": 434.608, "eval_steps_per_second": 54.997, "step": 230 }, { "epoch": 3.0, "eval_accuracy": 0.6635802469135802, "eval_f1": 0.6659200702678963, "eval_loss": 2.096076250076294, "eval_runtime": 0.7198, "eval_samples_per_second": 450.123, "eval_steps_per_second": 56.96, "step": 345 }, { "epoch": 4.0, "eval_accuracy": 0.6296296296296297, "eval_f1": 0.6251138433515483, "eval_loss": 2.2798941135406494, "eval_runtime": 0.7194, "eval_samples_per_second": 450.35, "eval_steps_per_second": 56.989, "step": 460 }, { "epoch": 4.35, "learning_rate": 4.891304347826087e-05, "loss": 0.1025, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.6512345679012346, "eval_f1": 0.6431049313635814, "eval_loss": 2.6507568359375, "eval_runtime": 0.7337, "eval_samples_per_second": 441.574, "eval_steps_per_second": 55.878, "step": 575 }, { "epoch": 6.0, "eval_accuracy": 0.6697530864197531, "eval_f1": 0.6715964982923955, "eval_loss": 2.2207276821136475, "eval_runtime": 0.8004, "eval_samples_per_second": 404.806, "eval_steps_per_second": 51.225, "step": 690 }, { "epoch": 7.0, "eval_accuracy": 0.6728395061728395, "eval_f1": 0.6722509202387013, "eval_loss": 2.337564706802368, "eval_runtime": 0.7663, "eval_samples_per_second": 422.784, "eval_steps_per_second": 53.5, "step": 805 }, { "epoch": 8.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6652170891831549, "eval_loss": 2.6886019706726074, "eval_runtime": 0.7308, "eval_samples_per_second": 443.343, "eval_steps_per_second": 56.102, "step": 920 }, { "epoch": 8.7, "learning_rate": 4.782608695652174e-05, "loss": 0.0591, "step": 1000 }, { "epoch": 9.0, "eval_accuracy": 0.6728395061728395, "eval_f1": 0.6669062733756546, "eval_loss": 2.4700207710266113, "eval_runtime": 0.7607, "eval_samples_per_second": 425.94, "eval_steps_per_second": 53.9, "step": 1035 }, { "epoch": 10.0, "eval_accuracy": 0.654320987654321, "eval_f1": 0.6600467030032732, "eval_loss": 2.461507797241211, "eval_runtime": 0.7355, "eval_samples_per_second": 440.496, "eval_steps_per_second": 55.742, "step": 1150 }, { "epoch": 11.0, "eval_accuracy": 0.6851851851851852, "eval_f1": 0.6826296938167441, "eval_loss": 2.5344278812408447, "eval_runtime": 0.7295, "eval_samples_per_second": 444.161, "eval_steps_per_second": 56.206, "step": 1265 }, { "epoch": 12.0, "eval_accuracy": 0.6820987654320988, "eval_f1": 0.6847534599635866, "eval_loss": 2.6750032901763916, "eval_runtime": 0.8174, "eval_samples_per_second": 396.364, "eval_steps_per_second": 50.157, "step": 1380 }, { "epoch": 13.0, "eval_accuracy": 0.6574074074074074, "eval_f1": 0.6621576835862549, "eval_loss": 2.416313648223877, "eval_runtime": 0.7489, "eval_samples_per_second": 432.618, "eval_steps_per_second": 54.745, "step": 1495 }, { "epoch": 13.04, "learning_rate": 4.673913043478261e-05, "loss": 0.0346, "step": 1500 }, { "epoch": 14.0, "eval_accuracy": 0.6574074074074074, "eval_f1": 0.6600444146104354, "eval_loss": 2.7329800128936768, "eval_runtime": 0.7311, "eval_samples_per_second": 443.144, "eval_steps_per_second": 56.077, "step": 1610 }, { "epoch": 15.0, "eval_accuracy": 0.654320987654321, "eval_f1": 0.6504462260925387, "eval_loss": 2.4202656745910645, "eval_runtime": 0.7386, "eval_samples_per_second": 438.686, "eval_steps_per_second": 55.513, "step": 1725 }, { "epoch": 16.0, "eval_accuracy": 0.7006172839506173, "eval_f1": 0.7004058172662825, "eval_loss": 2.3290841579437256, "eval_runtime": 0.7341, "eval_samples_per_second": 441.353, "eval_steps_per_second": 55.85, "step": 1840 }, { "epoch": 17.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.6603541359368218, "eval_loss": 2.7727530002593994, "eval_runtime": 0.727, "eval_samples_per_second": 445.679, "eval_steps_per_second": 56.398, "step": 1955 }, { "epoch": 17.39, "learning_rate": 4.565217391304348e-05, "loss": 0.0247, "step": 2000 }, { "epoch": 18.0, "eval_accuracy": 0.6574074074074074, "eval_f1": 0.6600831890627571, "eval_loss": 2.668175458908081, "eval_runtime": 0.8752, "eval_samples_per_second": 370.214, "eval_steps_per_second": 46.848, "step": 2070 }, { "epoch": 19.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.6703415017567717, "eval_loss": 2.8486380577087402, "eval_runtime": 0.7459, "eval_samples_per_second": 434.347, "eval_steps_per_second": 54.964, "step": 2185 }, { "epoch": 20.0, "eval_accuracy": 0.6635802469135802, "eval_f1": 0.6654895433909519, "eval_loss": 2.988004446029663, "eval_runtime": 0.7495, "eval_samples_per_second": 432.284, "eval_steps_per_second": 54.703, "step": 2300 }, { "epoch": 21.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6629249499004221, "eval_loss": 2.923362970352173, "eval_runtime": 0.7413, "eval_samples_per_second": 437.091, "eval_steps_per_second": 55.311, "step": 2415 }, { "epoch": 21.74, "learning_rate": 4.456521739130435e-05, "loss": 0.0264, "step": 2500 }, { "epoch": 22.0, "eval_accuracy": 0.6512345679012346, "eval_f1": 0.6561078483445887, "eval_loss": 2.6863272190093994, "eval_runtime": 0.749, "eval_samples_per_second": 432.552, "eval_steps_per_second": 54.736, "step": 2530 }, { "epoch": 23.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6610722610722611, "eval_loss": 2.7173354625701904, "eval_runtime": 0.7384, "eval_samples_per_second": 438.801, "eval_steps_per_second": 55.527, "step": 2645 }, { "epoch": 24.0, "eval_accuracy": 0.6481481481481481, "eval_f1": 0.6504829054920129, "eval_loss": 3.2829830646514893, "eval_runtime": 0.8839, "eval_samples_per_second": 366.573, "eval_steps_per_second": 46.387, "step": 2760 }, { "epoch": 25.0, "eval_accuracy": 0.6851851851851852, "eval_f1": 0.6787650339814832, "eval_loss": 2.6303372383117676, "eval_runtime": 0.7774, "eval_samples_per_second": 416.749, "eval_steps_per_second": 52.737, "step": 2875 }, { "epoch": 26.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6618789271005957, "eval_loss": 2.580350875854492, "eval_runtime": 0.8987, "eval_samples_per_second": 360.512, "eval_steps_per_second": 45.62, "step": 2990 }, { "epoch": 26.09, "learning_rate": 4.347826086956522e-05, "loss": 0.0178, "step": 3000 }, { "epoch": 27.0, "eval_accuracy": 0.6296296296296297, "eval_f1": 0.631715246325266, "eval_loss": 3.3269691467285156, "eval_runtime": 0.7462, "eval_samples_per_second": 434.183, "eval_steps_per_second": 54.943, "step": 3105 }, { "epoch": 28.0, "eval_accuracy": 0.6512345679012346, "eval_f1": 0.646016361656172, "eval_loss": 2.600759267807007, "eval_runtime": 0.754, "eval_samples_per_second": 429.696, "eval_steps_per_second": 54.375, "step": 3220 }, { "epoch": 29.0, "eval_accuracy": 0.6759259259259259, "eval_f1": 0.6760610574156681, "eval_loss": 2.3673853874206543, "eval_runtime": 0.7318, "eval_samples_per_second": 442.773, "eval_steps_per_second": 56.03, "step": 3335 }, { "epoch": 30.0, "eval_accuracy": 0.6728395061728395, "eval_f1": 0.6779632769589728, "eval_loss": 2.884216547012329, "eval_runtime": 0.8346, "eval_samples_per_second": 388.205, "eval_steps_per_second": 49.125, "step": 3450 }, { "epoch": 30.43, "learning_rate": 4.239130434782609e-05, "loss": 0.0265, "step": 3500 }, { "epoch": 31.0, "eval_accuracy": 0.691358024691358, "eval_f1": 0.6760321881467606, "eval_loss": 2.432506799697876, "eval_runtime": 0.7578, "eval_samples_per_second": 427.542, "eval_steps_per_second": 54.103, "step": 3565 }, { "epoch": 32.0, "eval_accuracy": 0.6759259259259259, "eval_f1": 0.6731011257710287, "eval_loss": 2.599379062652588, "eval_runtime": 0.883, "eval_samples_per_second": 366.946, "eval_steps_per_second": 46.434, "step": 3680 }, { "epoch": 33.0, "eval_accuracy": 0.7067901234567902, "eval_f1": 0.695461794954067, "eval_loss": 2.5045433044433594, "eval_runtime": 0.7331, "eval_samples_per_second": 441.973, "eval_steps_per_second": 55.929, "step": 3795 }, { "epoch": 34.0, "eval_accuracy": 0.6697530864197531, "eval_f1": 0.6731205747259303, "eval_loss": 3.0988411903381348, "eval_runtime": 0.7479, "eval_samples_per_second": 433.231, "eval_steps_per_second": 54.822, "step": 3910 }, { "epoch": 34.78, "learning_rate": 4.130434782608696e-05, "loss": 0.0161, "step": 4000 }, { "epoch": 35.0, "eval_accuracy": 0.6604938271604939, "eval_f1": 0.6623841608728461, "eval_loss": 3.1595075130462646, "eval_runtime": 0.7269, "eval_samples_per_second": 445.742, "eval_steps_per_second": 56.406, "step": 4025 }, { "epoch": 36.0, "eval_accuracy": 0.6820987654320988, "eval_f1": 0.6742414417588002, "eval_loss": 2.7971243858337402, "eval_runtime": 0.9256, "eval_samples_per_second": 350.06, "eval_steps_per_second": 44.298, "step": 4140 }, { "epoch": 37.0, "eval_accuracy": 0.6728395061728395, "eval_f1": 0.6738584667507038, "eval_loss": 2.8675856590270996, "eval_runtime": 0.7764, "eval_samples_per_second": 417.318, "eval_steps_per_second": 52.809, "step": 4255 }, { "epoch": 38.0, "eval_accuracy": 0.6759259259259259, "eval_f1": 0.678580671598335, "eval_loss": 3.1342642307281494, "eval_runtime": 0.7436, "eval_samples_per_second": 435.727, "eval_steps_per_second": 55.138, "step": 4370 }, { "epoch": 39.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.6685957490826878, "eval_loss": 3.1532750129699707, "eval_runtime": 0.736, "eval_samples_per_second": 440.237, "eval_steps_per_second": 55.709, "step": 4485 }, { "epoch": 39.13, "learning_rate": 4.021739130434783e-05, "loss": 0.0057, "step": 4500 }, { "epoch": 40.0, "eval_accuracy": 0.6697530864197531, "eval_f1": 0.6710645630658144, "eval_loss": 3.224386215209961, "eval_runtime": 0.7446, "eval_samples_per_second": 435.153, "eval_steps_per_second": 55.066, "step": 4600 }, { "epoch": 41.0, "eval_accuracy": 0.6697530864197531, "eval_f1": 0.6710645630658144, "eval_loss": 3.2694830894470215, "eval_runtime": 0.7256, "eval_samples_per_second": 446.497, "eval_steps_per_second": 56.501, "step": 4715 }, { "epoch": 42.0, "eval_accuracy": 0.654320987654321, "eval_f1": 0.6527479860813193, "eval_loss": 3.1891162395477295, "eval_runtime": 1.0318, "eval_samples_per_second": 314.002, "eval_steps_per_second": 39.735, "step": 4830 }, { "epoch": 43.0, "eval_accuracy": 0.6450617283950617, "eval_f1": 0.6456619146444574, "eval_loss": 3.0968563556671143, "eval_runtime": 0.7581, "eval_samples_per_second": 427.411, "eval_steps_per_second": 54.086, "step": 4945 } ], "max_steps": 23000, "num_train_epochs": 200, "total_flos": 1595848204716012.0, "trial_name": null, "trial_params": null }