|
{ |
|
"best_metric": 1.606531023979187, |
|
"best_model_checkpoint": "distilbert_final_tweet_dataset_200_epoch/checkpoint-230", |
|
"epoch": 43.0, |
|
"global_step": 4945, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6327160493827161, |
|
"eval_f1": 0.6368645566163996, |
|
"eval_loss": 2.3408091068267822, |
|
"eval_runtime": 0.7378, |
|
"eval_samples_per_second": 439.117, |
|
"eval_steps_per_second": 55.567, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6728395061728395, |
|
"eval_f1": 0.6724357294978492, |
|
"eval_loss": 1.606531023979187, |
|
"eval_runtime": 0.7455, |
|
"eval_samples_per_second": 434.608, |
|
"eval_steps_per_second": 54.997, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6635802469135802, |
|
"eval_f1": 0.6659200702678963, |
|
"eval_loss": 2.096076250076294, |
|
"eval_runtime": 0.7198, |
|
"eval_samples_per_second": 450.123, |
|
"eval_steps_per_second": 56.96, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6296296296296297, |
|
"eval_f1": 0.6251138433515483, |
|
"eval_loss": 2.2798941135406494, |
|
"eval_runtime": 0.7194, |
|
"eval_samples_per_second": 450.35, |
|
"eval_steps_per_second": 56.989, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 4.891304347826087e-05, |
|
"loss": 0.1025, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6512345679012346, |
|
"eval_f1": 0.6431049313635814, |
|
"eval_loss": 2.6507568359375, |
|
"eval_runtime": 0.7337, |
|
"eval_samples_per_second": 441.574, |
|
"eval_steps_per_second": 55.878, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6697530864197531, |
|
"eval_f1": 0.6715964982923955, |
|
"eval_loss": 2.2207276821136475, |
|
"eval_runtime": 0.8004, |
|
"eval_samples_per_second": 404.806, |
|
"eval_steps_per_second": 51.225, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6728395061728395, |
|
"eval_f1": 0.6722509202387013, |
|
"eval_loss": 2.337564706802368, |
|
"eval_runtime": 0.7663, |
|
"eval_samples_per_second": 422.784, |
|
"eval_steps_per_second": 53.5, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6604938271604939, |
|
"eval_f1": 0.6652170891831549, |
|
"eval_loss": 2.6886019706726074, |
|
"eval_runtime": 0.7308, |
|
"eval_samples_per_second": 443.343, |
|
"eval_steps_per_second": 56.102, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 4.782608695652174e-05, |
|
"loss": 0.0591, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6728395061728395, |
|
"eval_f1": 0.6669062733756546, |
|
"eval_loss": 2.4700207710266113, |
|
"eval_runtime": 0.7607, |
|
"eval_samples_per_second": 425.94, |
|
"eval_steps_per_second": 53.9, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.654320987654321, |
|
"eval_f1": 0.6600467030032732, |
|
"eval_loss": 2.461507797241211, |
|
"eval_runtime": 0.7355, |
|
"eval_samples_per_second": 440.496, |
|
"eval_steps_per_second": 55.742, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6851851851851852, |
|
"eval_f1": 0.6826296938167441, |
|
"eval_loss": 2.5344278812408447, |
|
"eval_runtime": 0.7295, |
|
"eval_samples_per_second": 444.161, |
|
"eval_steps_per_second": 56.206, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6820987654320988, |
|
"eval_f1": 0.6847534599635866, |
|
"eval_loss": 2.6750032901763916, |
|
"eval_runtime": 0.8174, |
|
"eval_samples_per_second": 396.364, |
|
"eval_steps_per_second": 50.157, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6574074074074074, |
|
"eval_f1": 0.6621576835862549, |
|
"eval_loss": 2.416313648223877, |
|
"eval_runtime": 0.7489, |
|
"eval_samples_per_second": 432.618, |
|
"eval_steps_per_second": 54.745, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 4.673913043478261e-05, |
|
"loss": 0.0346, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6574074074074074, |
|
"eval_f1": 0.6600444146104354, |
|
"eval_loss": 2.7329800128936768, |
|
"eval_runtime": 0.7311, |
|
"eval_samples_per_second": 443.144, |
|
"eval_steps_per_second": 56.077, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.654320987654321, |
|
"eval_f1": 0.6504462260925387, |
|
"eval_loss": 2.4202656745910645, |
|
"eval_runtime": 0.7386, |
|
"eval_samples_per_second": 438.686, |
|
"eval_steps_per_second": 55.513, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7006172839506173, |
|
"eval_f1": 0.7004058172662825, |
|
"eval_loss": 2.3290841579437256, |
|
"eval_runtime": 0.7341, |
|
"eval_samples_per_second": 441.353, |
|
"eval_steps_per_second": 55.85, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.6603541359368218, |
|
"eval_loss": 2.7727530002593994, |
|
"eval_runtime": 0.727, |
|
"eval_samples_per_second": 445.679, |
|
"eval_steps_per_second": 56.398, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 4.565217391304348e-05, |
|
"loss": 0.0247, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6574074074074074, |
|
"eval_f1": 0.6600831890627571, |
|
"eval_loss": 2.668175458908081, |
|
"eval_runtime": 0.8752, |
|
"eval_samples_per_second": 370.214, |
|
"eval_steps_per_second": 46.848, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.6703415017567717, |
|
"eval_loss": 2.8486380577087402, |
|
"eval_runtime": 0.7459, |
|
"eval_samples_per_second": 434.347, |
|
"eval_steps_per_second": 54.964, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6635802469135802, |
|
"eval_f1": 0.6654895433909519, |
|
"eval_loss": 2.988004446029663, |
|
"eval_runtime": 0.7495, |
|
"eval_samples_per_second": 432.284, |
|
"eval_steps_per_second": 54.703, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6604938271604939, |
|
"eval_f1": 0.6629249499004221, |
|
"eval_loss": 2.923362970352173, |
|
"eval_runtime": 0.7413, |
|
"eval_samples_per_second": 437.091, |
|
"eval_steps_per_second": 55.311, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 4.456521739130435e-05, |
|
"loss": 0.0264, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6512345679012346, |
|
"eval_f1": 0.6561078483445887, |
|
"eval_loss": 2.6863272190093994, |
|
"eval_runtime": 0.749, |
|
"eval_samples_per_second": 432.552, |
|
"eval_steps_per_second": 54.736, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6604938271604939, |
|
"eval_f1": 0.6610722610722611, |
|
"eval_loss": 2.7173354625701904, |
|
"eval_runtime": 0.7384, |
|
"eval_samples_per_second": 438.801, |
|
"eval_steps_per_second": 55.527, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6481481481481481, |
|
"eval_f1": 0.6504829054920129, |
|
"eval_loss": 3.2829830646514893, |
|
"eval_runtime": 0.8839, |
|
"eval_samples_per_second": 366.573, |
|
"eval_steps_per_second": 46.387, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6851851851851852, |
|
"eval_f1": 0.6787650339814832, |
|
"eval_loss": 2.6303372383117676, |
|
"eval_runtime": 0.7774, |
|
"eval_samples_per_second": 416.749, |
|
"eval_steps_per_second": 52.737, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6604938271604939, |
|
"eval_f1": 0.6618789271005957, |
|
"eval_loss": 2.580350875854492, |
|
"eval_runtime": 0.8987, |
|
"eval_samples_per_second": 360.512, |
|
"eval_steps_per_second": 45.62, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 0.0178, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6296296296296297, |
|
"eval_f1": 0.631715246325266, |
|
"eval_loss": 3.3269691467285156, |
|
"eval_runtime": 0.7462, |
|
"eval_samples_per_second": 434.183, |
|
"eval_steps_per_second": 54.943, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6512345679012346, |
|
"eval_f1": 0.646016361656172, |
|
"eval_loss": 2.600759267807007, |
|
"eval_runtime": 0.754, |
|
"eval_samples_per_second": 429.696, |
|
"eval_steps_per_second": 54.375, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6759259259259259, |
|
"eval_f1": 0.6760610574156681, |
|
"eval_loss": 2.3673853874206543, |
|
"eval_runtime": 0.7318, |
|
"eval_samples_per_second": 442.773, |
|
"eval_steps_per_second": 56.03, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6728395061728395, |
|
"eval_f1": 0.6779632769589728, |
|
"eval_loss": 2.884216547012329, |
|
"eval_runtime": 0.8346, |
|
"eval_samples_per_second": 388.205, |
|
"eval_steps_per_second": 49.125, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 4.239130434782609e-05, |
|
"loss": 0.0265, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.691358024691358, |
|
"eval_f1": 0.6760321881467606, |
|
"eval_loss": 2.432506799697876, |
|
"eval_runtime": 0.7578, |
|
"eval_samples_per_second": 427.542, |
|
"eval_steps_per_second": 54.103, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6759259259259259, |
|
"eval_f1": 0.6731011257710287, |
|
"eval_loss": 2.599379062652588, |
|
"eval_runtime": 0.883, |
|
"eval_samples_per_second": 366.946, |
|
"eval_steps_per_second": 46.434, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7067901234567902, |
|
"eval_f1": 0.695461794954067, |
|
"eval_loss": 2.5045433044433594, |
|
"eval_runtime": 0.7331, |
|
"eval_samples_per_second": 441.973, |
|
"eval_steps_per_second": 55.929, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6697530864197531, |
|
"eval_f1": 0.6731205747259303, |
|
"eval_loss": 3.0988411903381348, |
|
"eval_runtime": 0.7479, |
|
"eval_samples_per_second": 433.231, |
|
"eval_steps_per_second": 54.822, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 4.130434782608696e-05, |
|
"loss": 0.0161, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6604938271604939, |
|
"eval_f1": 0.6623841608728461, |
|
"eval_loss": 3.1595075130462646, |
|
"eval_runtime": 0.7269, |
|
"eval_samples_per_second": 445.742, |
|
"eval_steps_per_second": 56.406, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6820987654320988, |
|
"eval_f1": 0.6742414417588002, |
|
"eval_loss": 2.7971243858337402, |
|
"eval_runtime": 0.9256, |
|
"eval_samples_per_second": 350.06, |
|
"eval_steps_per_second": 44.298, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6728395061728395, |
|
"eval_f1": 0.6738584667507038, |
|
"eval_loss": 2.8675856590270996, |
|
"eval_runtime": 0.7764, |
|
"eval_samples_per_second": 417.318, |
|
"eval_steps_per_second": 52.809, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6759259259259259, |
|
"eval_f1": 0.678580671598335, |
|
"eval_loss": 3.1342642307281494, |
|
"eval_runtime": 0.7436, |
|
"eval_samples_per_second": 435.727, |
|
"eval_steps_per_second": 55.138, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.6685957490826878, |
|
"eval_loss": 3.1532750129699707, |
|
"eval_runtime": 0.736, |
|
"eval_samples_per_second": 440.237, |
|
"eval_steps_per_second": 55.709, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 4.021739130434783e-05, |
|
"loss": 0.0057, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6697530864197531, |
|
"eval_f1": 0.6710645630658144, |
|
"eval_loss": 3.224386215209961, |
|
"eval_runtime": 0.7446, |
|
"eval_samples_per_second": 435.153, |
|
"eval_steps_per_second": 55.066, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6697530864197531, |
|
"eval_f1": 0.6710645630658144, |
|
"eval_loss": 3.2694830894470215, |
|
"eval_runtime": 0.7256, |
|
"eval_samples_per_second": 446.497, |
|
"eval_steps_per_second": 56.501, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.654320987654321, |
|
"eval_f1": 0.6527479860813193, |
|
"eval_loss": 3.1891162395477295, |
|
"eval_runtime": 1.0318, |
|
"eval_samples_per_second": 314.002, |
|
"eval_steps_per_second": 39.735, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.6450617283950617, |
|
"eval_f1": 0.6456619146444574, |
|
"eval_loss": 3.0968563556671143, |
|
"eval_runtime": 0.7581, |
|
"eval_samples_per_second": 427.411, |
|
"eval_steps_per_second": 54.086, |
|
"step": 4945 |
|
} |
|
], |
|
"max_steps": 23000, |
|
"num_train_epochs": 200, |
|
"total_flos": 1595848204716012.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|