{ "best_metric": 0.8854679802955665, "best_model_checkpoint": "vit-msn-small-wbc-classifier-0316-cropped-cleaned-dataset-10/checkpoint-357", "epoch": 25.0, "eval_steps": 500, "global_step": 425, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5882352941176471, "grad_norm": 12.153236389160156, "learning_rate": 1.1627906976744187e-05, "loss": 1.3709, "step": 10 }, { "epoch": 1.0, "eval_accuracy": 0.805008210180624, "eval_loss": 0.6976552605628967, "eval_runtime": 7.6934, "eval_samples_per_second": 316.633, "eval_steps_per_second": 5.069, "step": 17 }, { "epoch": 1.1764705882352942, "grad_norm": 14.669098854064941, "learning_rate": 2.3255813953488374e-05, "loss": 0.7911, "step": 20 }, { "epoch": 1.7647058823529411, "grad_norm": 5.835587024688721, "learning_rate": 3.488372093023256e-05, "loss": 0.5673, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.8099343185550082, "eval_loss": 0.5948964357376099, "eval_runtime": 7.6055, "eval_samples_per_second": 320.293, "eval_steps_per_second": 5.128, "step": 34 }, { "epoch": 2.3529411764705883, "grad_norm": 10.460927963256836, "learning_rate": 4.651162790697675e-05, "loss": 0.5463, "step": 40 }, { "epoch": 2.9411764705882355, "grad_norm": 9.529006004333496, "learning_rate": 4.9083769633507855e-05, "loss": 0.5227, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.7931034482758621, "eval_loss": 0.6151543259620667, "eval_runtime": 7.5675, "eval_samples_per_second": 321.904, "eval_steps_per_second": 5.154, "step": 51 }, { "epoch": 3.5294117647058822, "grad_norm": 6.369431495666504, "learning_rate": 4.7774869109947645e-05, "loss": 0.4958, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.8435960591133005, "eval_loss": 0.4351496994495392, "eval_runtime": 7.5745, "eval_samples_per_second": 321.605, "eval_steps_per_second": 5.149, "step": 68 }, { "epoch": 4.117647058823529, "grad_norm": 10.749176979064941, "learning_rate": 4.6465968586387436e-05, "loss": 0.483, "step": 70 }, { "epoch": 4.705882352941177, "grad_norm": 11.76589584350586, "learning_rate": 4.5157068062827226e-05, "loss": 0.4402, "step": 80 }, { "epoch": 5.0, "eval_accuracy": 0.8579638752052545, "eval_loss": 0.3776942789554596, "eval_runtime": 7.5784, "eval_samples_per_second": 321.442, "eval_steps_per_second": 5.146, "step": 85 }, { "epoch": 5.294117647058823, "grad_norm": 4.830715656280518, "learning_rate": 4.384816753926702e-05, "loss": 0.418, "step": 90 }, { "epoch": 5.882352941176471, "grad_norm": 7.1909027099609375, "learning_rate": 4.253926701570681e-05, "loss": 0.3878, "step": 100 }, { "epoch": 6.0, "eval_accuracy": 0.8698686371100164, "eval_loss": 0.39695626497268677, "eval_runtime": 7.4679, "eval_samples_per_second": 326.194, "eval_steps_per_second": 5.222, "step": 102 }, { "epoch": 6.470588235294118, "grad_norm": 4.031527042388916, "learning_rate": 4.12303664921466e-05, "loss": 0.3646, "step": 110 }, { "epoch": 7.0, "eval_accuracy": 0.8641215106732348, "eval_loss": 0.3792504668235779, "eval_runtime": 7.7676, "eval_samples_per_second": 313.611, "eval_steps_per_second": 5.021, "step": 119 }, { "epoch": 7.0588235294117645, "grad_norm": 5.431394100189209, "learning_rate": 3.992146596858639e-05, "loss": 0.3965, "step": 120 }, { "epoch": 7.647058823529412, "grad_norm": 4.352228164672852, "learning_rate": 3.861256544502618e-05, "loss": 0.3452, "step": 130 }, { "epoch": 8.0, "eval_accuracy": 0.8805418719211823, "eval_loss": 0.3549965023994446, "eval_runtime": 7.4981, "eval_samples_per_second": 324.881, "eval_steps_per_second": 5.201, "step": 136 }, { "epoch": 8.235294117647058, "grad_norm": 13.60920524597168, "learning_rate": 3.730366492146597e-05, "loss": 0.3754, "step": 140 }, { "epoch": 8.823529411764707, "grad_norm": 7.518331050872803, "learning_rate": 3.599476439790576e-05, "loss": 0.344, "step": 150 }, { "epoch": 9.0, "eval_accuracy": 0.8735632183908046, "eval_loss": 0.40034234523773193, "eval_runtime": 7.6227, "eval_samples_per_second": 319.572, "eval_steps_per_second": 5.116, "step": 153 }, { "epoch": 9.411764705882353, "grad_norm": 7.200953960418701, "learning_rate": 3.468586387434556e-05, "loss": 0.3418, "step": 160 }, { "epoch": 10.0, "grad_norm": 7.663539886474609, "learning_rate": 3.337696335078534e-05, "loss": 0.3365, "step": 170 }, { "epoch": 10.0, "eval_accuracy": 0.8830049261083743, "eval_loss": 0.36542433500289917, "eval_runtime": 7.6247, "eval_samples_per_second": 319.489, "eval_steps_per_second": 5.115, "step": 170 }, { "epoch": 10.588235294117647, "grad_norm": 4.720971584320068, "learning_rate": 3.206806282722513e-05, "loss": 0.3223, "step": 180 }, { "epoch": 11.0, "eval_accuracy": 0.8764367816091954, "eval_loss": 0.3570525646209717, "eval_runtime": 7.6607, "eval_samples_per_second": 317.988, "eval_steps_per_second": 5.091, "step": 187 }, { "epoch": 11.176470588235293, "grad_norm": 4.508999824523926, "learning_rate": 3.075916230366492e-05, "loss": 0.3084, "step": 190 }, { "epoch": 11.764705882352942, "grad_norm": 7.280440807342529, "learning_rate": 2.9450261780104715e-05, "loss": 0.2819, "step": 200 }, { "epoch": 12.0, "eval_accuracy": 0.8788998357963875, "eval_loss": 0.3665020167827606, "eval_runtime": 7.4797, "eval_samples_per_second": 325.683, "eval_steps_per_second": 5.214, "step": 204 }, { "epoch": 12.352941176470589, "grad_norm": 5.721529483795166, "learning_rate": 2.8141361256544502e-05, "loss": 0.3067, "step": 210 }, { "epoch": 12.941176470588236, "grad_norm": 5.043443202972412, "learning_rate": 2.6832460732984293e-05, "loss": 0.2998, "step": 220 }, { "epoch": 13.0, "eval_accuracy": 0.8838259441707718, "eval_loss": 0.36087653040885925, "eval_runtime": 7.7018, "eval_samples_per_second": 316.291, "eval_steps_per_second": 5.064, "step": 221 }, { "epoch": 13.529411764705882, "grad_norm": 9.1295804977417, "learning_rate": 2.5523560209424086e-05, "loss": 0.2959, "step": 230 }, { "epoch": 14.0, "eval_accuracy": 0.8719211822660099, "eval_loss": 0.4335402846336365, "eval_runtime": 7.5815, "eval_samples_per_second": 321.307, "eval_steps_per_second": 5.144, "step": 238 }, { "epoch": 14.117647058823529, "grad_norm": 6.499159812927246, "learning_rate": 2.4214659685863873e-05, "loss": 0.2732, "step": 240 }, { "epoch": 14.705882352941176, "grad_norm": 5.268309116363525, "learning_rate": 2.2905759162303667e-05, "loss": 0.2662, "step": 250 }, { "epoch": 15.0, "eval_accuracy": 0.8784893267651889, "eval_loss": 0.42450225353240967, "eval_runtime": 7.7843, "eval_samples_per_second": 312.939, "eval_steps_per_second": 5.01, "step": 255 }, { "epoch": 15.294117647058824, "grad_norm": 3.793020725250244, "learning_rate": 2.1596858638743454e-05, "loss": 0.2693, "step": 260 }, { "epoch": 15.882352941176471, "grad_norm": 8.22383975982666, "learning_rate": 2.0287958115183248e-05, "loss": 0.2668, "step": 270 }, { "epoch": 16.0, "eval_accuracy": 0.8846469622331691, "eval_loss": 0.37603434920310974, "eval_runtime": 7.5418, "eval_samples_per_second": 322.999, "eval_steps_per_second": 5.171, "step": 272 }, { "epoch": 16.470588235294116, "grad_norm": 4.452105522155762, "learning_rate": 1.8979057591623035e-05, "loss": 0.2576, "step": 280 }, { "epoch": 17.0, "eval_accuracy": 0.8830049261083743, "eval_loss": 0.3727973699569702, "eval_runtime": 7.5894, "eval_samples_per_second": 320.973, "eval_steps_per_second": 5.139, "step": 289 }, { "epoch": 17.058823529411764, "grad_norm": 5.180832386016846, "learning_rate": 1.767015706806283e-05, "loss": 0.2578, "step": 290 }, { "epoch": 17.647058823529413, "grad_norm": 6.889744758605957, "learning_rate": 1.636125654450262e-05, "loss": 0.2398, "step": 300 }, { "epoch": 18.0, "eval_accuracy": 0.8813628899835796, "eval_loss": 0.41920730471611023, "eval_runtime": 7.5395, "eval_samples_per_second": 323.1, "eval_steps_per_second": 5.173, "step": 306 }, { "epoch": 18.235294117647058, "grad_norm": 5.117687225341797, "learning_rate": 1.505235602094241e-05, "loss": 0.2353, "step": 310 }, { "epoch": 18.823529411764707, "grad_norm": 5.613910675048828, "learning_rate": 1.3743455497382199e-05, "loss": 0.2278, "step": 320 }, { "epoch": 19.0, "eval_accuracy": 0.8805418719211823, "eval_loss": 0.41564103960990906, "eval_runtime": 7.6556, "eval_samples_per_second": 318.197, "eval_steps_per_second": 5.094, "step": 323 }, { "epoch": 19.41176470588235, "grad_norm": 5.283733367919922, "learning_rate": 1.243455497382199e-05, "loss": 0.2297, "step": 330 }, { "epoch": 20.0, "grad_norm": 5.314838886260986, "learning_rate": 1.1125654450261781e-05, "loss": 0.2033, "step": 340 }, { "epoch": 20.0, "eval_accuracy": 0.8850574712643678, "eval_loss": 0.41589659452438354, "eval_runtime": 7.4417, "eval_samples_per_second": 327.342, "eval_steps_per_second": 5.241, "step": 340 }, { "epoch": 20.58823529411765, "grad_norm": 5.559089183807373, "learning_rate": 9.816753926701572e-06, "loss": 0.2037, "step": 350 }, { "epoch": 21.0, "eval_accuracy": 0.8854679802955665, "eval_loss": 0.3985845744609833, "eval_runtime": 7.5913, "eval_samples_per_second": 320.893, "eval_steps_per_second": 5.137, "step": 357 }, { "epoch": 21.176470588235293, "grad_norm": 4.5341596603393555, "learning_rate": 8.507853403141362e-06, "loss": 0.2106, "step": 360 }, { "epoch": 21.764705882352942, "grad_norm": 4.3524580001831055, "learning_rate": 7.1989528795811526e-06, "loss": 0.1934, "step": 370 }, { "epoch": 22.0, "eval_accuracy": 0.882183908045977, "eval_loss": 0.42198774218559265, "eval_runtime": 7.5092, "eval_samples_per_second": 324.401, "eval_steps_per_second": 5.194, "step": 374 }, { "epoch": 22.352941176470587, "grad_norm": 6.747894763946533, "learning_rate": 5.890052356020943e-06, "loss": 0.2043, "step": 380 }, { "epoch": 22.941176470588236, "grad_norm": 5.149715900421143, "learning_rate": 4.5811518324607335e-06, "loss": 0.1983, "step": 390 }, { "epoch": 23.0, "eval_accuracy": 0.8854679802955665, "eval_loss": 0.4159087836742401, "eval_runtime": 7.4544, "eval_samples_per_second": 326.786, "eval_steps_per_second": 5.232, "step": 391 }, { "epoch": 23.529411764705884, "grad_norm": 4.871130466461182, "learning_rate": 3.272251308900524e-06, "loss": 0.1746, "step": 400 }, { "epoch": 24.0, "eval_accuracy": 0.8854679802955665, "eval_loss": 0.41789308190345764, "eval_runtime": 7.7744, "eval_samples_per_second": 313.335, "eval_steps_per_second": 5.016, "step": 408 }, { "epoch": 24.11764705882353, "grad_norm": 4.728577136993408, "learning_rate": 1.9633507853403143e-06, "loss": 0.1925, "step": 410 }, { "epoch": 24.705882352941178, "grad_norm": 5.179035663604736, "learning_rate": 6.544502617801048e-07, "loss": 0.1776, "step": 420 }, { "epoch": 25.0, "eval_accuracy": 0.8834154351395731, "eval_loss": 0.42465320229530334, "eval_runtime": 7.6371, "eval_samples_per_second": 318.97, "eval_steps_per_second": 5.107, "step": 425 }, { "epoch": 25.0, "step": 425, "total_flos": 2.113465532645376e+18, "train_loss": 0.34626551782383636, "train_runtime": 1013.337, "train_samples_per_second": 106.579, "train_steps_per_second": 0.419 } ], "logging_steps": 10, "max_steps": 425, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.113465532645376e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }