cruiser's picture
Training in progress, epoch 43
1fcad30
raw
history blame
13.6 kB
{
"best_metric": 1.606531023979187,
"best_model_checkpoint": "distilbert_final_tweet_dataset_200_epoch/checkpoint-230",
"epoch": 43.0,
"global_step": 4945,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.6327160493827161,
"eval_f1": 0.6368645566163996,
"eval_loss": 2.3408091068267822,
"eval_runtime": 0.7378,
"eval_samples_per_second": 439.117,
"eval_steps_per_second": 55.567,
"step": 115
},
{
"epoch": 2.0,
"eval_accuracy": 0.6728395061728395,
"eval_f1": 0.6724357294978492,
"eval_loss": 1.606531023979187,
"eval_runtime": 0.7455,
"eval_samples_per_second": 434.608,
"eval_steps_per_second": 54.997,
"step": 230
},
{
"epoch": 3.0,
"eval_accuracy": 0.6635802469135802,
"eval_f1": 0.6659200702678963,
"eval_loss": 2.096076250076294,
"eval_runtime": 0.7198,
"eval_samples_per_second": 450.123,
"eval_steps_per_second": 56.96,
"step": 345
},
{
"epoch": 4.0,
"eval_accuracy": 0.6296296296296297,
"eval_f1": 0.6251138433515483,
"eval_loss": 2.2798941135406494,
"eval_runtime": 0.7194,
"eval_samples_per_second": 450.35,
"eval_steps_per_second": 56.989,
"step": 460
},
{
"epoch": 4.35,
"learning_rate": 4.891304347826087e-05,
"loss": 0.1025,
"step": 500
},
{
"epoch": 5.0,
"eval_accuracy": 0.6512345679012346,
"eval_f1": 0.6431049313635814,
"eval_loss": 2.6507568359375,
"eval_runtime": 0.7337,
"eval_samples_per_second": 441.574,
"eval_steps_per_second": 55.878,
"step": 575
},
{
"epoch": 6.0,
"eval_accuracy": 0.6697530864197531,
"eval_f1": 0.6715964982923955,
"eval_loss": 2.2207276821136475,
"eval_runtime": 0.8004,
"eval_samples_per_second": 404.806,
"eval_steps_per_second": 51.225,
"step": 690
},
{
"epoch": 7.0,
"eval_accuracy": 0.6728395061728395,
"eval_f1": 0.6722509202387013,
"eval_loss": 2.337564706802368,
"eval_runtime": 0.7663,
"eval_samples_per_second": 422.784,
"eval_steps_per_second": 53.5,
"step": 805
},
{
"epoch": 8.0,
"eval_accuracy": 0.6604938271604939,
"eval_f1": 0.6652170891831549,
"eval_loss": 2.6886019706726074,
"eval_runtime": 0.7308,
"eval_samples_per_second": 443.343,
"eval_steps_per_second": 56.102,
"step": 920
},
{
"epoch": 8.7,
"learning_rate": 4.782608695652174e-05,
"loss": 0.0591,
"step": 1000
},
{
"epoch": 9.0,
"eval_accuracy": 0.6728395061728395,
"eval_f1": 0.6669062733756546,
"eval_loss": 2.4700207710266113,
"eval_runtime": 0.7607,
"eval_samples_per_second": 425.94,
"eval_steps_per_second": 53.9,
"step": 1035
},
{
"epoch": 10.0,
"eval_accuracy": 0.654320987654321,
"eval_f1": 0.6600467030032732,
"eval_loss": 2.461507797241211,
"eval_runtime": 0.7355,
"eval_samples_per_second": 440.496,
"eval_steps_per_second": 55.742,
"step": 1150
},
{
"epoch": 11.0,
"eval_accuracy": 0.6851851851851852,
"eval_f1": 0.6826296938167441,
"eval_loss": 2.5344278812408447,
"eval_runtime": 0.7295,
"eval_samples_per_second": 444.161,
"eval_steps_per_second": 56.206,
"step": 1265
},
{
"epoch": 12.0,
"eval_accuracy": 0.6820987654320988,
"eval_f1": 0.6847534599635866,
"eval_loss": 2.6750032901763916,
"eval_runtime": 0.8174,
"eval_samples_per_second": 396.364,
"eval_steps_per_second": 50.157,
"step": 1380
},
{
"epoch": 13.0,
"eval_accuracy": 0.6574074074074074,
"eval_f1": 0.6621576835862549,
"eval_loss": 2.416313648223877,
"eval_runtime": 0.7489,
"eval_samples_per_second": 432.618,
"eval_steps_per_second": 54.745,
"step": 1495
},
{
"epoch": 13.04,
"learning_rate": 4.673913043478261e-05,
"loss": 0.0346,
"step": 1500
},
{
"epoch": 14.0,
"eval_accuracy": 0.6574074074074074,
"eval_f1": 0.6600444146104354,
"eval_loss": 2.7329800128936768,
"eval_runtime": 0.7311,
"eval_samples_per_second": 443.144,
"eval_steps_per_second": 56.077,
"step": 1610
},
{
"epoch": 15.0,
"eval_accuracy": 0.654320987654321,
"eval_f1": 0.6504462260925387,
"eval_loss": 2.4202656745910645,
"eval_runtime": 0.7386,
"eval_samples_per_second": 438.686,
"eval_steps_per_second": 55.513,
"step": 1725
},
{
"epoch": 16.0,
"eval_accuracy": 0.7006172839506173,
"eval_f1": 0.7004058172662825,
"eval_loss": 2.3290841579437256,
"eval_runtime": 0.7341,
"eval_samples_per_second": 441.353,
"eval_steps_per_second": 55.85,
"step": 1840
},
{
"epoch": 17.0,
"eval_accuracy": 0.6666666666666666,
"eval_f1": 0.6603541359368218,
"eval_loss": 2.7727530002593994,
"eval_runtime": 0.727,
"eval_samples_per_second": 445.679,
"eval_steps_per_second": 56.398,
"step": 1955
},
{
"epoch": 17.39,
"learning_rate": 4.565217391304348e-05,
"loss": 0.0247,
"step": 2000
},
{
"epoch": 18.0,
"eval_accuracy": 0.6574074074074074,
"eval_f1": 0.6600831890627571,
"eval_loss": 2.668175458908081,
"eval_runtime": 0.8752,
"eval_samples_per_second": 370.214,
"eval_steps_per_second": 46.848,
"step": 2070
},
{
"epoch": 19.0,
"eval_accuracy": 0.6666666666666666,
"eval_f1": 0.6703415017567717,
"eval_loss": 2.8486380577087402,
"eval_runtime": 0.7459,
"eval_samples_per_second": 434.347,
"eval_steps_per_second": 54.964,
"step": 2185
},
{
"epoch": 20.0,
"eval_accuracy": 0.6635802469135802,
"eval_f1": 0.6654895433909519,
"eval_loss": 2.988004446029663,
"eval_runtime": 0.7495,
"eval_samples_per_second": 432.284,
"eval_steps_per_second": 54.703,
"step": 2300
},
{
"epoch": 21.0,
"eval_accuracy": 0.6604938271604939,
"eval_f1": 0.6629249499004221,
"eval_loss": 2.923362970352173,
"eval_runtime": 0.7413,
"eval_samples_per_second": 437.091,
"eval_steps_per_second": 55.311,
"step": 2415
},
{
"epoch": 21.74,
"learning_rate": 4.456521739130435e-05,
"loss": 0.0264,
"step": 2500
},
{
"epoch": 22.0,
"eval_accuracy": 0.6512345679012346,
"eval_f1": 0.6561078483445887,
"eval_loss": 2.6863272190093994,
"eval_runtime": 0.749,
"eval_samples_per_second": 432.552,
"eval_steps_per_second": 54.736,
"step": 2530
},
{
"epoch": 23.0,
"eval_accuracy": 0.6604938271604939,
"eval_f1": 0.6610722610722611,
"eval_loss": 2.7173354625701904,
"eval_runtime": 0.7384,
"eval_samples_per_second": 438.801,
"eval_steps_per_second": 55.527,
"step": 2645
},
{
"epoch": 24.0,
"eval_accuracy": 0.6481481481481481,
"eval_f1": 0.6504829054920129,
"eval_loss": 3.2829830646514893,
"eval_runtime": 0.8839,
"eval_samples_per_second": 366.573,
"eval_steps_per_second": 46.387,
"step": 2760
},
{
"epoch": 25.0,
"eval_accuracy": 0.6851851851851852,
"eval_f1": 0.6787650339814832,
"eval_loss": 2.6303372383117676,
"eval_runtime": 0.7774,
"eval_samples_per_second": 416.749,
"eval_steps_per_second": 52.737,
"step": 2875
},
{
"epoch": 26.0,
"eval_accuracy": 0.6604938271604939,
"eval_f1": 0.6618789271005957,
"eval_loss": 2.580350875854492,
"eval_runtime": 0.8987,
"eval_samples_per_second": 360.512,
"eval_steps_per_second": 45.62,
"step": 2990
},
{
"epoch": 26.09,
"learning_rate": 4.347826086956522e-05,
"loss": 0.0178,
"step": 3000
},
{
"epoch": 27.0,
"eval_accuracy": 0.6296296296296297,
"eval_f1": 0.631715246325266,
"eval_loss": 3.3269691467285156,
"eval_runtime": 0.7462,
"eval_samples_per_second": 434.183,
"eval_steps_per_second": 54.943,
"step": 3105
},
{
"epoch": 28.0,
"eval_accuracy": 0.6512345679012346,
"eval_f1": 0.646016361656172,
"eval_loss": 2.600759267807007,
"eval_runtime": 0.754,
"eval_samples_per_second": 429.696,
"eval_steps_per_second": 54.375,
"step": 3220
},
{
"epoch": 29.0,
"eval_accuracy": 0.6759259259259259,
"eval_f1": 0.6760610574156681,
"eval_loss": 2.3673853874206543,
"eval_runtime": 0.7318,
"eval_samples_per_second": 442.773,
"eval_steps_per_second": 56.03,
"step": 3335
},
{
"epoch": 30.0,
"eval_accuracy": 0.6728395061728395,
"eval_f1": 0.6779632769589728,
"eval_loss": 2.884216547012329,
"eval_runtime": 0.8346,
"eval_samples_per_second": 388.205,
"eval_steps_per_second": 49.125,
"step": 3450
},
{
"epoch": 30.43,
"learning_rate": 4.239130434782609e-05,
"loss": 0.0265,
"step": 3500
},
{
"epoch": 31.0,
"eval_accuracy": 0.691358024691358,
"eval_f1": 0.6760321881467606,
"eval_loss": 2.432506799697876,
"eval_runtime": 0.7578,
"eval_samples_per_second": 427.542,
"eval_steps_per_second": 54.103,
"step": 3565
},
{
"epoch": 32.0,
"eval_accuracy": 0.6759259259259259,
"eval_f1": 0.6731011257710287,
"eval_loss": 2.599379062652588,
"eval_runtime": 0.883,
"eval_samples_per_second": 366.946,
"eval_steps_per_second": 46.434,
"step": 3680
},
{
"epoch": 33.0,
"eval_accuracy": 0.7067901234567902,
"eval_f1": 0.695461794954067,
"eval_loss": 2.5045433044433594,
"eval_runtime": 0.7331,
"eval_samples_per_second": 441.973,
"eval_steps_per_second": 55.929,
"step": 3795
},
{
"epoch": 34.0,
"eval_accuracy": 0.6697530864197531,
"eval_f1": 0.6731205747259303,
"eval_loss": 3.0988411903381348,
"eval_runtime": 0.7479,
"eval_samples_per_second": 433.231,
"eval_steps_per_second": 54.822,
"step": 3910
},
{
"epoch": 34.78,
"learning_rate": 4.130434782608696e-05,
"loss": 0.0161,
"step": 4000
},
{
"epoch": 35.0,
"eval_accuracy": 0.6604938271604939,
"eval_f1": 0.6623841608728461,
"eval_loss": 3.1595075130462646,
"eval_runtime": 0.7269,
"eval_samples_per_second": 445.742,
"eval_steps_per_second": 56.406,
"step": 4025
},
{
"epoch": 36.0,
"eval_accuracy": 0.6820987654320988,
"eval_f1": 0.6742414417588002,
"eval_loss": 2.7971243858337402,
"eval_runtime": 0.9256,
"eval_samples_per_second": 350.06,
"eval_steps_per_second": 44.298,
"step": 4140
},
{
"epoch": 37.0,
"eval_accuracy": 0.6728395061728395,
"eval_f1": 0.6738584667507038,
"eval_loss": 2.8675856590270996,
"eval_runtime": 0.7764,
"eval_samples_per_second": 417.318,
"eval_steps_per_second": 52.809,
"step": 4255
},
{
"epoch": 38.0,
"eval_accuracy": 0.6759259259259259,
"eval_f1": 0.678580671598335,
"eval_loss": 3.1342642307281494,
"eval_runtime": 0.7436,
"eval_samples_per_second": 435.727,
"eval_steps_per_second": 55.138,
"step": 4370
},
{
"epoch": 39.0,
"eval_accuracy": 0.6666666666666666,
"eval_f1": 0.6685957490826878,
"eval_loss": 3.1532750129699707,
"eval_runtime": 0.736,
"eval_samples_per_second": 440.237,
"eval_steps_per_second": 55.709,
"step": 4485
},
{
"epoch": 39.13,
"learning_rate": 4.021739130434783e-05,
"loss": 0.0057,
"step": 4500
},
{
"epoch": 40.0,
"eval_accuracy": 0.6697530864197531,
"eval_f1": 0.6710645630658144,
"eval_loss": 3.224386215209961,
"eval_runtime": 0.7446,
"eval_samples_per_second": 435.153,
"eval_steps_per_second": 55.066,
"step": 4600
},
{
"epoch": 41.0,
"eval_accuracy": 0.6697530864197531,
"eval_f1": 0.6710645630658144,
"eval_loss": 3.2694830894470215,
"eval_runtime": 0.7256,
"eval_samples_per_second": 446.497,
"eval_steps_per_second": 56.501,
"step": 4715
},
{
"epoch": 42.0,
"eval_accuracy": 0.654320987654321,
"eval_f1": 0.6527479860813193,
"eval_loss": 3.1891162395477295,
"eval_runtime": 1.0318,
"eval_samples_per_second": 314.002,
"eval_steps_per_second": 39.735,
"step": 4830
},
{
"epoch": 43.0,
"eval_accuracy": 0.6450617283950617,
"eval_f1": 0.6456619146444574,
"eval_loss": 3.0968563556671143,
"eval_runtime": 0.7581,
"eval_samples_per_second": 427.411,
"eval_steps_per_second": 54.086,
"step": 4945
}
],
"max_steps": 23000,
"num_train_epochs": 200,
"total_flos": 1595848204716012.0,
"trial_name": null,
"trial_params": null
}