|
{ |
|
"best_metric": 0.8854679802955665, |
|
"best_model_checkpoint": "vit-msn-small-wbc-classifier-0316-cropped-cleaned-dataset-10/checkpoint-357", |
|
"epoch": 25.0, |
|
"eval_steps": 500, |
|
"global_step": 425, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 12.153236389160156, |
|
"learning_rate": 1.1627906976744187e-05, |
|
"loss": 1.3709, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.805008210180624, |
|
"eval_loss": 0.6976552605628967, |
|
"eval_runtime": 7.6934, |
|
"eval_samples_per_second": 316.633, |
|
"eval_steps_per_second": 5.069, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 14.669098854064941, |
|
"learning_rate": 2.3255813953488374e-05, |
|
"loss": 0.7911, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 5.835587024688721, |
|
"learning_rate": 3.488372093023256e-05, |
|
"loss": 0.5673, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8099343185550082, |
|
"eval_loss": 0.5948964357376099, |
|
"eval_runtime": 7.6055, |
|
"eval_samples_per_second": 320.293, |
|
"eval_steps_per_second": 5.128, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 10.460927963256836, |
|
"learning_rate": 4.651162790697675e-05, |
|
"loss": 0.5463, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 9.529006004333496, |
|
"learning_rate": 4.9083769633507855e-05, |
|
"loss": 0.5227, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7931034482758621, |
|
"eval_loss": 0.6151543259620667, |
|
"eval_runtime": 7.5675, |
|
"eval_samples_per_second": 321.904, |
|
"eval_steps_per_second": 5.154, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 3.5294117647058822, |
|
"grad_norm": 6.369431495666504, |
|
"learning_rate": 4.7774869109947645e-05, |
|
"loss": 0.4958, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8435960591133005, |
|
"eval_loss": 0.4351496994495392, |
|
"eval_runtime": 7.5745, |
|
"eval_samples_per_second": 321.605, |
|
"eval_steps_per_second": 5.149, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.117647058823529, |
|
"grad_norm": 10.749176979064941, |
|
"learning_rate": 4.6465968586387436e-05, |
|
"loss": 0.483, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.705882352941177, |
|
"grad_norm": 11.76589584350586, |
|
"learning_rate": 4.5157068062827226e-05, |
|
"loss": 0.4402, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8579638752052545, |
|
"eval_loss": 0.3776942789554596, |
|
"eval_runtime": 7.5784, |
|
"eval_samples_per_second": 321.442, |
|
"eval_steps_per_second": 5.146, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.294117647058823, |
|
"grad_norm": 4.830715656280518, |
|
"learning_rate": 4.384816753926702e-05, |
|
"loss": 0.418, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 7.1909027099609375, |
|
"learning_rate": 4.253926701570681e-05, |
|
"loss": 0.3878, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8698686371100164, |
|
"eval_loss": 0.39695626497268677, |
|
"eval_runtime": 7.4679, |
|
"eval_samples_per_second": 326.194, |
|
"eval_steps_per_second": 5.222, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 6.470588235294118, |
|
"grad_norm": 4.031527042388916, |
|
"learning_rate": 4.12303664921466e-05, |
|
"loss": 0.3646, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8641215106732348, |
|
"eval_loss": 0.3792504668235779, |
|
"eval_runtime": 7.7676, |
|
"eval_samples_per_second": 313.611, |
|
"eval_steps_per_second": 5.021, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 7.0588235294117645, |
|
"grad_norm": 5.431394100189209, |
|
"learning_rate": 3.992146596858639e-05, |
|
"loss": 0.3965, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 7.647058823529412, |
|
"grad_norm": 4.352228164672852, |
|
"learning_rate": 3.861256544502618e-05, |
|
"loss": 0.3452, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8805418719211823, |
|
"eval_loss": 0.3549965023994446, |
|
"eval_runtime": 7.4981, |
|
"eval_samples_per_second": 324.881, |
|
"eval_steps_per_second": 5.201, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 8.235294117647058, |
|
"grad_norm": 13.60920524597168, |
|
"learning_rate": 3.730366492146597e-05, |
|
"loss": 0.3754, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 8.823529411764707, |
|
"grad_norm": 7.518331050872803, |
|
"learning_rate": 3.599476439790576e-05, |
|
"loss": 0.344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8735632183908046, |
|
"eval_loss": 0.40034234523773193, |
|
"eval_runtime": 7.6227, |
|
"eval_samples_per_second": 319.572, |
|
"eval_steps_per_second": 5.116, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 9.411764705882353, |
|
"grad_norm": 7.200953960418701, |
|
"learning_rate": 3.468586387434556e-05, |
|
"loss": 0.3418, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 7.663539886474609, |
|
"learning_rate": 3.337696335078534e-05, |
|
"loss": 0.3365, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8830049261083743, |
|
"eval_loss": 0.36542433500289917, |
|
"eval_runtime": 7.6247, |
|
"eval_samples_per_second": 319.489, |
|
"eval_steps_per_second": 5.115, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 10.588235294117647, |
|
"grad_norm": 4.720971584320068, |
|
"learning_rate": 3.206806282722513e-05, |
|
"loss": 0.3223, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8764367816091954, |
|
"eval_loss": 0.3570525646209717, |
|
"eval_runtime": 7.6607, |
|
"eval_samples_per_second": 317.988, |
|
"eval_steps_per_second": 5.091, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 11.176470588235293, |
|
"grad_norm": 4.508999824523926, |
|
"learning_rate": 3.075916230366492e-05, |
|
"loss": 0.3084, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 11.764705882352942, |
|
"grad_norm": 7.280440807342529, |
|
"learning_rate": 2.9450261780104715e-05, |
|
"loss": 0.2819, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8788998357963875, |
|
"eval_loss": 0.3665020167827606, |
|
"eval_runtime": 7.4797, |
|
"eval_samples_per_second": 325.683, |
|
"eval_steps_per_second": 5.214, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 12.352941176470589, |
|
"grad_norm": 5.721529483795166, |
|
"learning_rate": 2.8141361256544502e-05, |
|
"loss": 0.3067, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 12.941176470588236, |
|
"grad_norm": 5.043443202972412, |
|
"learning_rate": 2.6832460732984293e-05, |
|
"loss": 0.2998, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8838259441707718, |
|
"eval_loss": 0.36087653040885925, |
|
"eval_runtime": 7.7018, |
|
"eval_samples_per_second": 316.291, |
|
"eval_steps_per_second": 5.064, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 13.529411764705882, |
|
"grad_norm": 9.1295804977417, |
|
"learning_rate": 2.5523560209424086e-05, |
|
"loss": 0.2959, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8719211822660099, |
|
"eval_loss": 0.4335402846336365, |
|
"eval_runtime": 7.5815, |
|
"eval_samples_per_second": 321.307, |
|
"eval_steps_per_second": 5.144, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 14.117647058823529, |
|
"grad_norm": 6.499159812927246, |
|
"learning_rate": 2.4214659685863873e-05, |
|
"loss": 0.2732, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 14.705882352941176, |
|
"grad_norm": 5.268309116363525, |
|
"learning_rate": 2.2905759162303667e-05, |
|
"loss": 0.2662, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8784893267651889, |
|
"eval_loss": 0.42450225353240967, |
|
"eval_runtime": 7.7843, |
|
"eval_samples_per_second": 312.939, |
|
"eval_steps_per_second": 5.01, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 15.294117647058824, |
|
"grad_norm": 3.793020725250244, |
|
"learning_rate": 2.1596858638743454e-05, |
|
"loss": 0.2693, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 15.882352941176471, |
|
"grad_norm": 8.22383975982666, |
|
"learning_rate": 2.0287958115183248e-05, |
|
"loss": 0.2668, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8846469622331691, |
|
"eval_loss": 0.37603434920310974, |
|
"eval_runtime": 7.5418, |
|
"eval_samples_per_second": 322.999, |
|
"eval_steps_per_second": 5.171, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 16.470588235294116, |
|
"grad_norm": 4.452105522155762, |
|
"learning_rate": 1.8979057591623035e-05, |
|
"loss": 0.2576, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8830049261083743, |
|
"eval_loss": 0.3727973699569702, |
|
"eval_runtime": 7.5894, |
|
"eval_samples_per_second": 320.973, |
|
"eval_steps_per_second": 5.139, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 17.058823529411764, |
|
"grad_norm": 5.180832386016846, |
|
"learning_rate": 1.767015706806283e-05, |
|
"loss": 0.2578, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 17.647058823529413, |
|
"grad_norm": 6.889744758605957, |
|
"learning_rate": 1.636125654450262e-05, |
|
"loss": 0.2398, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8813628899835796, |
|
"eval_loss": 0.41920730471611023, |
|
"eval_runtime": 7.5395, |
|
"eval_samples_per_second": 323.1, |
|
"eval_steps_per_second": 5.173, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 18.235294117647058, |
|
"grad_norm": 5.117687225341797, |
|
"learning_rate": 1.505235602094241e-05, |
|
"loss": 0.2353, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"grad_norm": 5.613910675048828, |
|
"learning_rate": 1.3743455497382199e-05, |
|
"loss": 0.2278, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8805418719211823, |
|
"eval_loss": 0.41564103960990906, |
|
"eval_runtime": 7.6556, |
|
"eval_samples_per_second": 318.197, |
|
"eval_steps_per_second": 5.094, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 19.41176470588235, |
|
"grad_norm": 5.283733367919922, |
|
"learning_rate": 1.243455497382199e-05, |
|
"loss": 0.2297, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 5.314838886260986, |
|
"learning_rate": 1.1125654450261781e-05, |
|
"loss": 0.2033, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8850574712643678, |
|
"eval_loss": 0.41589659452438354, |
|
"eval_runtime": 7.4417, |
|
"eval_samples_per_second": 327.342, |
|
"eval_steps_per_second": 5.241, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 20.58823529411765, |
|
"grad_norm": 5.559089183807373, |
|
"learning_rate": 9.816753926701572e-06, |
|
"loss": 0.2037, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8854679802955665, |
|
"eval_loss": 0.3985845744609833, |
|
"eval_runtime": 7.5913, |
|
"eval_samples_per_second": 320.893, |
|
"eval_steps_per_second": 5.137, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 21.176470588235293, |
|
"grad_norm": 4.5341596603393555, |
|
"learning_rate": 8.507853403141362e-06, |
|
"loss": 0.2106, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 21.764705882352942, |
|
"grad_norm": 4.3524580001831055, |
|
"learning_rate": 7.1989528795811526e-06, |
|
"loss": 0.1934, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.882183908045977, |
|
"eval_loss": 0.42198774218559265, |
|
"eval_runtime": 7.5092, |
|
"eval_samples_per_second": 324.401, |
|
"eval_steps_per_second": 5.194, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 22.352941176470587, |
|
"grad_norm": 6.747894763946533, |
|
"learning_rate": 5.890052356020943e-06, |
|
"loss": 0.2043, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 22.941176470588236, |
|
"grad_norm": 5.149715900421143, |
|
"learning_rate": 4.5811518324607335e-06, |
|
"loss": 0.1983, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8854679802955665, |
|
"eval_loss": 0.4159087836742401, |
|
"eval_runtime": 7.4544, |
|
"eval_samples_per_second": 326.786, |
|
"eval_steps_per_second": 5.232, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 23.529411764705884, |
|
"grad_norm": 4.871130466461182, |
|
"learning_rate": 3.272251308900524e-06, |
|
"loss": 0.1746, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8854679802955665, |
|
"eval_loss": 0.41789308190345764, |
|
"eval_runtime": 7.7744, |
|
"eval_samples_per_second": 313.335, |
|
"eval_steps_per_second": 5.016, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 24.11764705882353, |
|
"grad_norm": 4.728577136993408, |
|
"learning_rate": 1.9633507853403143e-06, |
|
"loss": 0.1925, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 24.705882352941178, |
|
"grad_norm": 5.179035663604736, |
|
"learning_rate": 6.544502617801048e-07, |
|
"loss": 0.1776, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8834154351395731, |
|
"eval_loss": 0.42465320229530334, |
|
"eval_runtime": 7.6371, |
|
"eval_samples_per_second": 318.97, |
|
"eval_steps_per_second": 5.107, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 425, |
|
"total_flos": 2.113465532645376e+18, |
|
"train_loss": 0.34626551782383636, |
|
"train_runtime": 1013.337, |
|
"train_samples_per_second": 106.579, |
|
"train_steps_per_second": 0.419 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 425, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.113465532645376e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|