{ "best_metric": 0.8571428571428571, "best_model_checkpoint": "distillBEiT/checkpoints/checkpoint-1953", "epoch": 31.0, "eval_steps": 500, "global_step": 1953, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9126984126984125e-05, "loss": 6.699, "step": 63 }, { "epoch": 1.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 11.804450035095215, "eval_runtime": 2.7027, "eval_samples_per_second": 20.72, "eval_steps_per_second": 2.59, "step": 63 }, { "epoch": 2.0, "learning_rate": 4.812698412698413e-05, "loss": 4.176, "step": 126 }, { "epoch": 2.0, "eval_accuracy": 0.44642857142857145, "eval_loss": 8.514544486999512, "eval_runtime": 2.7591, "eval_samples_per_second": 20.297, "eval_steps_per_second": 2.537, "step": 126 }, { "epoch": 3.0, "learning_rate": 4.712698412698413e-05, "loss": 2.6846, "step": 189 }, { "epoch": 3.0, "eval_accuracy": 0.125, "eval_loss": 26.979318618774414, "eval_runtime": 2.7027, "eval_samples_per_second": 20.72, "eval_steps_per_second": 2.59, "step": 189 }, { "epoch": 4.0, "learning_rate": 4.612698412698413e-05, "loss": 2.2263, "step": 252 }, { "epoch": 4.0, "eval_accuracy": 0.08928571428571429, "eval_loss": 33.445133209228516, "eval_runtime": 2.6746, "eval_samples_per_second": 20.937, "eval_steps_per_second": 2.617, "step": 252 }, { "epoch": 5.0, "learning_rate": 4.512698412698413e-05, "loss": 1.8895, "step": 315 }, { "epoch": 5.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 8.057156562805176, "eval_runtime": 2.7714, "eval_samples_per_second": 20.207, "eval_steps_per_second": 2.526, "step": 315 }, { "epoch": 6.0, "learning_rate": 4.4126984126984126e-05, "loss": 1.525, "step": 378 }, { "epoch": 6.0, "eval_accuracy": 0.4107142857142857, "eval_loss": 8.990975379943848, "eval_runtime": 2.745, "eval_samples_per_second": 20.4, "eval_steps_per_second": 2.55, "step": 378 }, { "epoch": 7.0, "learning_rate": 4.312698412698413e-05, "loss": 1.4566, "step": 441 }, { "epoch": 7.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 6.924566745758057, "eval_runtime": 2.7577, "eval_samples_per_second": 20.307, "eval_steps_per_second": 2.538, "step": 441 }, { "epoch": 8.0, "learning_rate": 4.212698412698413e-05, "loss": 1.3691, "step": 504 }, { "epoch": 8.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 11.207305908203125, "eval_runtime": 2.6969, "eval_samples_per_second": 20.764, "eval_steps_per_second": 2.596, "step": 504 }, { "epoch": 9.0, "learning_rate": 4.112698412698413e-05, "loss": 1.168, "step": 567 }, { "epoch": 9.0, "eval_accuracy": 0.7142857142857143, "eval_loss": 6.766927242279053, "eval_runtime": 2.8201, "eval_samples_per_second": 19.857, "eval_steps_per_second": 2.482, "step": 567 }, { "epoch": 10.0, "learning_rate": 4.012698412698413e-05, "loss": 1.1166, "step": 630 }, { "epoch": 10.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 4.691386699676514, "eval_runtime": 2.7695, "eval_samples_per_second": 20.22, "eval_steps_per_second": 2.528, "step": 630 }, { "epoch": 11.0, "learning_rate": 3.9126984126984126e-05, "loss": 1.0649, "step": 693 }, { "epoch": 11.0, "eval_accuracy": 0.7678571428571429, "eval_loss": 5.042652606964111, "eval_runtime": 2.6881, "eval_samples_per_second": 20.833, "eval_steps_per_second": 2.604, "step": 693 }, { "epoch": 12.0, "learning_rate": 3.812698412698413e-05, "loss": 0.97, "step": 756 }, { "epoch": 12.0, "eval_accuracy": 0.7142857142857143, "eval_loss": 6.7766571044921875, "eval_runtime": 2.6425, "eval_samples_per_second": 21.192, "eval_steps_per_second": 2.649, "step": 756 }, { "epoch": 13.0, "learning_rate": 3.7126984126984125e-05, "loss": 1.0896, "step": 819 }, { "epoch": 13.0, "eval_accuracy": 0.8035714285714286, "eval_loss": 11.280374526977539, "eval_runtime": 2.7964, "eval_samples_per_second": 20.026, "eval_steps_per_second": 2.503, "step": 819 }, { "epoch": 14.0, "learning_rate": 3.6126984126984135e-05, "loss": 0.9401, "step": 882 }, { "epoch": 14.0, "eval_accuracy": 0.6607142857142857, "eval_loss": 11.328347206115723, "eval_runtime": 2.8029, "eval_samples_per_second": 19.979, "eval_steps_per_second": 2.497, "step": 882 }, { "epoch": 15.0, "learning_rate": 3.512698412698413e-05, "loss": 0.9331, "step": 945 }, { "epoch": 15.0, "eval_accuracy": 0.6964285714285714, "eval_loss": 12.213973999023438, "eval_runtime": 2.6558, "eval_samples_per_second": 21.086, "eval_steps_per_second": 2.636, "step": 945 }, { "epoch": 16.0, "learning_rate": 3.412698412698413e-05, "loss": 0.8237, "step": 1008 }, { "epoch": 16.0, "eval_accuracy": 0.7321428571428571, "eval_loss": 7.346871852874756, "eval_runtime": 2.7127, "eval_samples_per_second": 20.644, "eval_steps_per_second": 2.58, "step": 1008 }, { "epoch": 17.0, "learning_rate": 3.312698412698413e-05, "loss": 0.7515, "step": 1071 }, { "epoch": 17.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 4.754016399383545, "eval_runtime": 2.7003, "eval_samples_per_second": 20.738, "eval_steps_per_second": 2.592, "step": 1071 }, { "epoch": 18.0, "learning_rate": 3.2126984126984126e-05, "loss": 0.7622, "step": 1134 }, { "epoch": 18.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 4.091554641723633, "eval_runtime": 2.7272, "eval_samples_per_second": 20.534, "eval_steps_per_second": 2.567, "step": 1134 }, { "epoch": 19.0, "learning_rate": 3.112698412698413e-05, "loss": 0.853, "step": 1197 }, { "epoch": 19.0, "eval_accuracy": 0.16071428571428573, "eval_loss": 12.633813858032227, "eval_runtime": 2.6777, "eval_samples_per_second": 20.914, "eval_steps_per_second": 2.614, "step": 1197 }, { "epoch": 20.0, "learning_rate": 3.0126984126984124e-05, "loss": 0.6455, "step": 1260 }, { "epoch": 20.0, "eval_accuracy": 0.7678571428571429, "eval_loss": 9.073665618896484, "eval_runtime": 2.7224, "eval_samples_per_second": 20.57, "eval_steps_per_second": 2.571, "step": 1260 }, { "epoch": 21.0, "learning_rate": 2.912698412698413e-05, "loss": 0.6667, "step": 1323 }, { "epoch": 21.0, "eval_accuracy": 0.7321428571428571, "eval_loss": 4.806458950042725, "eval_runtime": 2.6589, "eval_samples_per_second": 21.062, "eval_steps_per_second": 2.633, "step": 1323 }, { "epoch": 22.0, "learning_rate": 2.812698412698413e-05, "loss": 0.689, "step": 1386 }, { "epoch": 22.0, "eval_accuracy": 0.48214285714285715, "eval_loss": 9.992805480957031, "eval_runtime": 2.6988, "eval_samples_per_second": 20.75, "eval_steps_per_second": 2.594, "step": 1386 }, { "epoch": 23.0, "learning_rate": 2.712698412698413e-05, "loss": 0.6914, "step": 1449 }, { "epoch": 23.0, "eval_accuracy": 0.5357142857142857, "eval_loss": 18.975879669189453, "eval_runtime": 2.6271, "eval_samples_per_second": 21.316, "eval_steps_per_second": 2.665, "step": 1449 }, { "epoch": 24.0, "learning_rate": 2.612698412698413e-05, "loss": 0.677, "step": 1512 }, { "epoch": 24.0, "eval_accuracy": 0.125, "eval_loss": 15.608841896057129, "eval_runtime": 2.674, "eval_samples_per_second": 20.942, "eval_steps_per_second": 2.618, "step": 1512 }, { "epoch": 25.0, "learning_rate": 2.5126984126984128e-05, "loss": 0.6575, "step": 1575 }, { "epoch": 25.0, "eval_accuracy": 0.625, "eval_loss": 13.633025169372559, "eval_runtime": 2.6744, "eval_samples_per_second": 20.94, "eval_steps_per_second": 2.617, "step": 1575 }, { "epoch": 26.0, "learning_rate": 2.4126984126984128e-05, "loss": 0.5564, "step": 1638 }, { "epoch": 26.0, "eval_accuracy": 0.375, "eval_loss": 9.142752647399902, "eval_runtime": 2.7086, "eval_samples_per_second": 20.675, "eval_steps_per_second": 2.584, "step": 1638 }, { "epoch": 27.0, "learning_rate": 2.3126984126984127e-05, "loss": 0.7167, "step": 1701 }, { "epoch": 27.0, "eval_accuracy": 0.75, "eval_loss": 12.48905086517334, "eval_runtime": 2.6896, "eval_samples_per_second": 20.821, "eval_steps_per_second": 2.603, "step": 1701 }, { "epoch": 28.0, "learning_rate": 2.212698412698413e-05, "loss": 0.5167, "step": 1764 }, { "epoch": 28.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 6.086201190948486, "eval_runtime": 2.6432, "eval_samples_per_second": 21.186, "eval_steps_per_second": 2.648, "step": 1764 }, { "epoch": 29.0, "learning_rate": 2.112698412698413e-05, "loss": 0.5433, "step": 1827 }, { "epoch": 29.0, "eval_accuracy": 0.375, "eval_loss": 10.653279304504395, "eval_runtime": 2.7065, "eval_samples_per_second": 20.691, "eval_steps_per_second": 2.586, "step": 1827 }, { "epoch": 30.0, "learning_rate": 2.012698412698413e-05, "loss": 0.4732, "step": 1890 }, { "epoch": 30.0, "eval_accuracy": 0.7321428571428571, "eval_loss": 5.170372009277344, "eval_runtime": 2.6871, "eval_samples_per_second": 20.841, "eval_steps_per_second": 2.605, "step": 1890 }, { "epoch": 31.0, "learning_rate": 1.9126984126984128e-05, "loss": 0.4945, "step": 1953 }, { "epoch": 31.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 3.8952624797821045, "eval_runtime": 2.7235, "eval_samples_per_second": 20.562, "eval_steps_per_second": 2.57, "step": 1953 } ], "logging_steps": 500, "max_steps": 3150, "num_train_epochs": 50, "save_steps": 500, "total_flos": 3.126627671021568e+16, "trial_name": null, "trial_params": null }