Melo1512's picture
End of training
e6f0d36 verified
{
"best_metric": 0.8854679802955665,
"best_model_checkpoint": "vit-msn-small-wbc-classifier-0316-cropped-cleaned-dataset-10/checkpoint-357",
"epoch": 25.0,
"eval_steps": 500,
"global_step": 425,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5882352941176471,
"grad_norm": 12.153236389160156,
"learning_rate": 1.1627906976744187e-05,
"loss": 1.3709,
"step": 10
},
{
"epoch": 1.0,
"eval_accuracy": 0.805008210180624,
"eval_loss": 0.6976552605628967,
"eval_runtime": 7.6934,
"eval_samples_per_second": 316.633,
"eval_steps_per_second": 5.069,
"step": 17
},
{
"epoch": 1.1764705882352942,
"grad_norm": 14.669098854064941,
"learning_rate": 2.3255813953488374e-05,
"loss": 0.7911,
"step": 20
},
{
"epoch": 1.7647058823529411,
"grad_norm": 5.835587024688721,
"learning_rate": 3.488372093023256e-05,
"loss": 0.5673,
"step": 30
},
{
"epoch": 2.0,
"eval_accuracy": 0.8099343185550082,
"eval_loss": 0.5948964357376099,
"eval_runtime": 7.6055,
"eval_samples_per_second": 320.293,
"eval_steps_per_second": 5.128,
"step": 34
},
{
"epoch": 2.3529411764705883,
"grad_norm": 10.460927963256836,
"learning_rate": 4.651162790697675e-05,
"loss": 0.5463,
"step": 40
},
{
"epoch": 2.9411764705882355,
"grad_norm": 9.529006004333496,
"learning_rate": 4.9083769633507855e-05,
"loss": 0.5227,
"step": 50
},
{
"epoch": 3.0,
"eval_accuracy": 0.7931034482758621,
"eval_loss": 0.6151543259620667,
"eval_runtime": 7.5675,
"eval_samples_per_second": 321.904,
"eval_steps_per_second": 5.154,
"step": 51
},
{
"epoch": 3.5294117647058822,
"grad_norm": 6.369431495666504,
"learning_rate": 4.7774869109947645e-05,
"loss": 0.4958,
"step": 60
},
{
"epoch": 4.0,
"eval_accuracy": 0.8435960591133005,
"eval_loss": 0.4351496994495392,
"eval_runtime": 7.5745,
"eval_samples_per_second": 321.605,
"eval_steps_per_second": 5.149,
"step": 68
},
{
"epoch": 4.117647058823529,
"grad_norm": 10.749176979064941,
"learning_rate": 4.6465968586387436e-05,
"loss": 0.483,
"step": 70
},
{
"epoch": 4.705882352941177,
"grad_norm": 11.76589584350586,
"learning_rate": 4.5157068062827226e-05,
"loss": 0.4402,
"step": 80
},
{
"epoch": 5.0,
"eval_accuracy": 0.8579638752052545,
"eval_loss": 0.3776942789554596,
"eval_runtime": 7.5784,
"eval_samples_per_second": 321.442,
"eval_steps_per_second": 5.146,
"step": 85
},
{
"epoch": 5.294117647058823,
"grad_norm": 4.830715656280518,
"learning_rate": 4.384816753926702e-05,
"loss": 0.418,
"step": 90
},
{
"epoch": 5.882352941176471,
"grad_norm": 7.1909027099609375,
"learning_rate": 4.253926701570681e-05,
"loss": 0.3878,
"step": 100
},
{
"epoch": 6.0,
"eval_accuracy": 0.8698686371100164,
"eval_loss": 0.39695626497268677,
"eval_runtime": 7.4679,
"eval_samples_per_second": 326.194,
"eval_steps_per_second": 5.222,
"step": 102
},
{
"epoch": 6.470588235294118,
"grad_norm": 4.031527042388916,
"learning_rate": 4.12303664921466e-05,
"loss": 0.3646,
"step": 110
},
{
"epoch": 7.0,
"eval_accuracy": 0.8641215106732348,
"eval_loss": 0.3792504668235779,
"eval_runtime": 7.7676,
"eval_samples_per_second": 313.611,
"eval_steps_per_second": 5.021,
"step": 119
},
{
"epoch": 7.0588235294117645,
"grad_norm": 5.431394100189209,
"learning_rate": 3.992146596858639e-05,
"loss": 0.3965,
"step": 120
},
{
"epoch": 7.647058823529412,
"grad_norm": 4.352228164672852,
"learning_rate": 3.861256544502618e-05,
"loss": 0.3452,
"step": 130
},
{
"epoch": 8.0,
"eval_accuracy": 0.8805418719211823,
"eval_loss": 0.3549965023994446,
"eval_runtime": 7.4981,
"eval_samples_per_second": 324.881,
"eval_steps_per_second": 5.201,
"step": 136
},
{
"epoch": 8.235294117647058,
"grad_norm": 13.60920524597168,
"learning_rate": 3.730366492146597e-05,
"loss": 0.3754,
"step": 140
},
{
"epoch": 8.823529411764707,
"grad_norm": 7.518331050872803,
"learning_rate": 3.599476439790576e-05,
"loss": 0.344,
"step": 150
},
{
"epoch": 9.0,
"eval_accuracy": 0.8735632183908046,
"eval_loss": 0.40034234523773193,
"eval_runtime": 7.6227,
"eval_samples_per_second": 319.572,
"eval_steps_per_second": 5.116,
"step": 153
},
{
"epoch": 9.411764705882353,
"grad_norm": 7.200953960418701,
"learning_rate": 3.468586387434556e-05,
"loss": 0.3418,
"step": 160
},
{
"epoch": 10.0,
"grad_norm": 7.663539886474609,
"learning_rate": 3.337696335078534e-05,
"loss": 0.3365,
"step": 170
},
{
"epoch": 10.0,
"eval_accuracy": 0.8830049261083743,
"eval_loss": 0.36542433500289917,
"eval_runtime": 7.6247,
"eval_samples_per_second": 319.489,
"eval_steps_per_second": 5.115,
"step": 170
},
{
"epoch": 10.588235294117647,
"grad_norm": 4.720971584320068,
"learning_rate": 3.206806282722513e-05,
"loss": 0.3223,
"step": 180
},
{
"epoch": 11.0,
"eval_accuracy": 0.8764367816091954,
"eval_loss": 0.3570525646209717,
"eval_runtime": 7.6607,
"eval_samples_per_second": 317.988,
"eval_steps_per_second": 5.091,
"step": 187
},
{
"epoch": 11.176470588235293,
"grad_norm": 4.508999824523926,
"learning_rate": 3.075916230366492e-05,
"loss": 0.3084,
"step": 190
},
{
"epoch": 11.764705882352942,
"grad_norm": 7.280440807342529,
"learning_rate": 2.9450261780104715e-05,
"loss": 0.2819,
"step": 200
},
{
"epoch": 12.0,
"eval_accuracy": 0.8788998357963875,
"eval_loss": 0.3665020167827606,
"eval_runtime": 7.4797,
"eval_samples_per_second": 325.683,
"eval_steps_per_second": 5.214,
"step": 204
},
{
"epoch": 12.352941176470589,
"grad_norm": 5.721529483795166,
"learning_rate": 2.8141361256544502e-05,
"loss": 0.3067,
"step": 210
},
{
"epoch": 12.941176470588236,
"grad_norm": 5.043443202972412,
"learning_rate": 2.6832460732984293e-05,
"loss": 0.2998,
"step": 220
},
{
"epoch": 13.0,
"eval_accuracy": 0.8838259441707718,
"eval_loss": 0.36087653040885925,
"eval_runtime": 7.7018,
"eval_samples_per_second": 316.291,
"eval_steps_per_second": 5.064,
"step": 221
},
{
"epoch": 13.529411764705882,
"grad_norm": 9.1295804977417,
"learning_rate": 2.5523560209424086e-05,
"loss": 0.2959,
"step": 230
},
{
"epoch": 14.0,
"eval_accuracy": 0.8719211822660099,
"eval_loss": 0.4335402846336365,
"eval_runtime": 7.5815,
"eval_samples_per_second": 321.307,
"eval_steps_per_second": 5.144,
"step": 238
},
{
"epoch": 14.117647058823529,
"grad_norm": 6.499159812927246,
"learning_rate": 2.4214659685863873e-05,
"loss": 0.2732,
"step": 240
},
{
"epoch": 14.705882352941176,
"grad_norm": 5.268309116363525,
"learning_rate": 2.2905759162303667e-05,
"loss": 0.2662,
"step": 250
},
{
"epoch": 15.0,
"eval_accuracy": 0.8784893267651889,
"eval_loss": 0.42450225353240967,
"eval_runtime": 7.7843,
"eval_samples_per_second": 312.939,
"eval_steps_per_second": 5.01,
"step": 255
},
{
"epoch": 15.294117647058824,
"grad_norm": 3.793020725250244,
"learning_rate": 2.1596858638743454e-05,
"loss": 0.2693,
"step": 260
},
{
"epoch": 15.882352941176471,
"grad_norm": 8.22383975982666,
"learning_rate": 2.0287958115183248e-05,
"loss": 0.2668,
"step": 270
},
{
"epoch": 16.0,
"eval_accuracy": 0.8846469622331691,
"eval_loss": 0.37603434920310974,
"eval_runtime": 7.5418,
"eval_samples_per_second": 322.999,
"eval_steps_per_second": 5.171,
"step": 272
},
{
"epoch": 16.470588235294116,
"grad_norm": 4.452105522155762,
"learning_rate": 1.8979057591623035e-05,
"loss": 0.2576,
"step": 280
},
{
"epoch": 17.0,
"eval_accuracy": 0.8830049261083743,
"eval_loss": 0.3727973699569702,
"eval_runtime": 7.5894,
"eval_samples_per_second": 320.973,
"eval_steps_per_second": 5.139,
"step": 289
},
{
"epoch": 17.058823529411764,
"grad_norm": 5.180832386016846,
"learning_rate": 1.767015706806283e-05,
"loss": 0.2578,
"step": 290
},
{
"epoch": 17.647058823529413,
"grad_norm": 6.889744758605957,
"learning_rate": 1.636125654450262e-05,
"loss": 0.2398,
"step": 300
},
{
"epoch": 18.0,
"eval_accuracy": 0.8813628899835796,
"eval_loss": 0.41920730471611023,
"eval_runtime": 7.5395,
"eval_samples_per_second": 323.1,
"eval_steps_per_second": 5.173,
"step": 306
},
{
"epoch": 18.235294117647058,
"grad_norm": 5.117687225341797,
"learning_rate": 1.505235602094241e-05,
"loss": 0.2353,
"step": 310
},
{
"epoch": 18.823529411764707,
"grad_norm": 5.613910675048828,
"learning_rate": 1.3743455497382199e-05,
"loss": 0.2278,
"step": 320
},
{
"epoch": 19.0,
"eval_accuracy": 0.8805418719211823,
"eval_loss": 0.41564103960990906,
"eval_runtime": 7.6556,
"eval_samples_per_second": 318.197,
"eval_steps_per_second": 5.094,
"step": 323
},
{
"epoch": 19.41176470588235,
"grad_norm": 5.283733367919922,
"learning_rate": 1.243455497382199e-05,
"loss": 0.2297,
"step": 330
},
{
"epoch": 20.0,
"grad_norm": 5.314838886260986,
"learning_rate": 1.1125654450261781e-05,
"loss": 0.2033,
"step": 340
},
{
"epoch": 20.0,
"eval_accuracy": 0.8850574712643678,
"eval_loss": 0.41589659452438354,
"eval_runtime": 7.4417,
"eval_samples_per_second": 327.342,
"eval_steps_per_second": 5.241,
"step": 340
},
{
"epoch": 20.58823529411765,
"grad_norm": 5.559089183807373,
"learning_rate": 9.816753926701572e-06,
"loss": 0.2037,
"step": 350
},
{
"epoch": 21.0,
"eval_accuracy": 0.8854679802955665,
"eval_loss": 0.3985845744609833,
"eval_runtime": 7.5913,
"eval_samples_per_second": 320.893,
"eval_steps_per_second": 5.137,
"step": 357
},
{
"epoch": 21.176470588235293,
"grad_norm": 4.5341596603393555,
"learning_rate": 8.507853403141362e-06,
"loss": 0.2106,
"step": 360
},
{
"epoch": 21.764705882352942,
"grad_norm": 4.3524580001831055,
"learning_rate": 7.1989528795811526e-06,
"loss": 0.1934,
"step": 370
},
{
"epoch": 22.0,
"eval_accuracy": 0.882183908045977,
"eval_loss": 0.42198774218559265,
"eval_runtime": 7.5092,
"eval_samples_per_second": 324.401,
"eval_steps_per_second": 5.194,
"step": 374
},
{
"epoch": 22.352941176470587,
"grad_norm": 6.747894763946533,
"learning_rate": 5.890052356020943e-06,
"loss": 0.2043,
"step": 380
},
{
"epoch": 22.941176470588236,
"grad_norm": 5.149715900421143,
"learning_rate": 4.5811518324607335e-06,
"loss": 0.1983,
"step": 390
},
{
"epoch": 23.0,
"eval_accuracy": 0.8854679802955665,
"eval_loss": 0.4159087836742401,
"eval_runtime": 7.4544,
"eval_samples_per_second": 326.786,
"eval_steps_per_second": 5.232,
"step": 391
},
{
"epoch": 23.529411764705884,
"grad_norm": 4.871130466461182,
"learning_rate": 3.272251308900524e-06,
"loss": 0.1746,
"step": 400
},
{
"epoch": 24.0,
"eval_accuracy": 0.8854679802955665,
"eval_loss": 0.41789308190345764,
"eval_runtime": 7.7744,
"eval_samples_per_second": 313.335,
"eval_steps_per_second": 5.016,
"step": 408
},
{
"epoch": 24.11764705882353,
"grad_norm": 4.728577136993408,
"learning_rate": 1.9633507853403143e-06,
"loss": 0.1925,
"step": 410
},
{
"epoch": 24.705882352941178,
"grad_norm": 5.179035663604736,
"learning_rate": 6.544502617801048e-07,
"loss": 0.1776,
"step": 420
},
{
"epoch": 25.0,
"eval_accuracy": 0.8834154351395731,
"eval_loss": 0.42465320229530334,
"eval_runtime": 7.6371,
"eval_samples_per_second": 318.97,
"eval_steps_per_second": 5.107,
"step": 425
},
{
"epoch": 25.0,
"step": 425,
"total_flos": 2.113465532645376e+18,
"train_loss": 0.34626551782383636,
"train_runtime": 1013.337,
"train_samples_per_second": 106.579,
"train_steps_per_second": 0.419
}
],
"logging_steps": 10,
"max_steps": 425,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.113465532645376e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}