{ "best_metric": 0.09489229321479797, "best_model_checkpoint": "/vit-base-patch32-224-in21k/checkpoint-273", "epoch": 40.0, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.45760300755500793, "eval_runtime": 9.802, "eval_samples_per_second": 8.06, "eval_steps_per_second": 0.102, "step": 7 }, { "epoch": 1.43, "learning_rate": 1.928571428571429e-05, "loss": 0.5021, "step": 10 }, { "epoch": 2.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.39525437355041504, "eval_runtime": 9.734, "eval_samples_per_second": 8.116, "eval_steps_per_second": 0.103, "step": 14 }, { "epoch": 2.86, "learning_rate": 1.8571428571428575e-05, "loss": 0.3595, "step": 20 }, { "epoch": 3.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.38087165355682373, "eval_runtime": 9.8033, "eval_samples_per_second": 8.059, "eval_steps_per_second": 0.102, "step": 21 }, { "epoch": 4.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.32862257957458496, "eval_runtime": 9.7791, "eval_samples_per_second": 8.078, "eval_steps_per_second": 0.102, "step": 28 }, { "epoch": 4.29, "learning_rate": 1.785714285714286e-05, "loss": 0.3009, "step": 30 }, { "epoch": 5.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.29453742504119873, "eval_runtime": 9.9491, "eval_samples_per_second": 7.94, "eval_steps_per_second": 0.101, "step": 35 }, { "epoch": 5.71, "learning_rate": 1.7142857142857142e-05, "loss": 0.2843, "step": 40 }, { "epoch": 6.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.35278087854385376, "eval_runtime": 9.8405, "eval_samples_per_second": 8.028, "eval_steps_per_second": 0.102, "step": 42 }, { "epoch": 7.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.23452825844287872, "eval_runtime": 9.7839, "eval_samples_per_second": 8.074, "eval_steps_per_second": 0.102, "step": 49 }, { "epoch": 7.14, "learning_rate": 1.642857142857143e-05, "loss": 0.266, "step": 50 }, { "epoch": 8.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.24986670911312103, "eval_runtime": 9.8907, "eval_samples_per_second": 7.987, "eval_steps_per_second": 0.101, "step": 56 }, { "epoch": 8.57, "learning_rate": 1.5714285714285715e-05, "loss": 0.222, "step": 60 }, { "epoch": 9.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.25441065430641174, "eval_runtime": 9.8794, "eval_samples_per_second": 7.996, "eval_steps_per_second": 0.101, "step": 63 }, { "epoch": 10.0, "learning_rate": 1.5000000000000002e-05, "loss": 0.2018, "step": 70 }, { "epoch": 10.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.19540712237358093, "eval_runtime": 10.3358, "eval_samples_per_second": 7.643, "eval_steps_per_second": 0.097, "step": 70 }, { "epoch": 11.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.23508581519126892, "eval_runtime": 9.7761, "eval_samples_per_second": 8.081, "eval_steps_per_second": 0.102, "step": 77 }, { "epoch": 11.43, "learning_rate": 1.4285714285714287e-05, "loss": 0.1948, "step": 80 }, { "epoch": 12.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.17053687572479248, "eval_runtime": 9.7433, "eval_samples_per_second": 8.108, "eval_steps_per_second": 0.103, "step": 84 }, { "epoch": 12.86, "learning_rate": 1.3571428571428574e-05, "loss": 0.2053, "step": 90 }, { "epoch": 13.0, "eval_f1": 0.8734177215189873, "eval_loss": 0.16247014701366425, "eval_runtime": 9.7199, "eval_samples_per_second": 8.128, "eval_steps_per_second": 0.103, "step": 91 }, { "epoch": 14.0, "eval_f1": 0.9367088607594937, "eval_loss": 0.17189449071884155, "eval_runtime": 9.7734, "eval_samples_per_second": 8.083, "eval_steps_per_second": 0.102, "step": 98 }, { "epoch": 14.29, "learning_rate": 1.2857142857142859e-05, "loss": 0.1729, "step": 100 }, { "epoch": 15.0, "eval_f1": 0.9367088607594937, "eval_loss": 0.1488722860813141, "eval_runtime": 9.7192, "eval_samples_per_second": 8.128, "eval_steps_per_second": 0.103, "step": 105 }, { "epoch": 15.71, "learning_rate": 1.2142857142857142e-05, "loss": 0.1535, "step": 110 }, { "epoch": 16.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.14498455822467804, "eval_runtime": 9.7322, "eval_samples_per_second": 8.117, "eval_steps_per_second": 0.103, "step": 112 }, { "epoch": 17.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.1749649941921234, "eval_runtime": 9.718, "eval_samples_per_second": 8.129, "eval_steps_per_second": 0.103, "step": 119 }, { "epoch": 17.14, "learning_rate": 1.1428571428571429e-05, "loss": 0.1492, "step": 120 }, { "epoch": 18.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.15143541991710663, "eval_runtime": 9.7273, "eval_samples_per_second": 8.121, "eval_steps_per_second": 0.103, "step": 126 }, { "epoch": 18.57, "learning_rate": 1.0714285714285714e-05, "loss": 0.1349, "step": 130 }, { "epoch": 19.0, "eval_f1": 0.9620253164556962, "eval_loss": 0.1304464191198349, "eval_runtime": 9.7442, "eval_samples_per_second": 8.107, "eval_steps_per_second": 0.103, "step": 133 }, { "epoch": 20.0, "learning_rate": 1e-05, "loss": 0.1538, "step": 140 }, { "epoch": 20.0, "eval_f1": 0.9620253164556962, "eval_loss": 0.12909165024757385, "eval_runtime": 9.7157, "eval_samples_per_second": 8.131, "eval_steps_per_second": 0.103, "step": 140 }, { "epoch": 21.0, "eval_f1": 0.9620253164556962, "eval_loss": 0.13061794638633728, "eval_runtime": 9.7515, "eval_samples_per_second": 8.101, "eval_steps_per_second": 0.103, "step": 147 }, { "epoch": 21.43, "learning_rate": 9.285714285714288e-06, "loss": 0.1357, "step": 150 }, { "epoch": 22.0, "eval_f1": 0.9620253164556962, "eval_loss": 0.12830054759979248, "eval_runtime": 9.7367, "eval_samples_per_second": 8.114, "eval_steps_per_second": 0.103, "step": 154 }, { "epoch": 22.86, "learning_rate": 8.571428571428571e-06, "loss": 0.147, "step": 160 }, { "epoch": 23.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.12891362607479095, "eval_runtime": 9.7565, "eval_samples_per_second": 8.097, "eval_steps_per_second": 0.102, "step": 161 }, { "epoch": 24.0, "eval_f1": 0.9746835443037974, "eval_loss": 0.1338558942079544, "eval_runtime": 9.7379, "eval_samples_per_second": 8.113, "eval_steps_per_second": 0.103, "step": 168 }, { "epoch": 24.29, "learning_rate": 7.857142857142858e-06, "loss": 0.1388, "step": 170 }, { "epoch": 25.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.12436553090810776, "eval_runtime": 9.7614, "eval_samples_per_second": 8.093, "eval_steps_per_second": 0.102, "step": 175 }, { "epoch": 25.71, "learning_rate": 7.1428571428571436e-06, "loss": 0.1192, "step": 180 }, { "epoch": 26.0, "eval_f1": 0.9746835443037974, "eval_loss": 0.11165592074394226, "eval_runtime": 9.7177, "eval_samples_per_second": 8.129, "eval_steps_per_second": 0.103, "step": 182 }, { "epoch": 27.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.11046960204839706, "eval_runtime": 9.7417, "eval_samples_per_second": 8.109, "eval_steps_per_second": 0.103, "step": 189 }, { "epoch": 27.14, "learning_rate": 6.4285714285714295e-06, "loss": 0.112, "step": 190 }, { "epoch": 28.0, "eval_f1": 0.9746835443037974, "eval_loss": 0.1078834980726242, "eval_runtime": 9.7562, "eval_samples_per_second": 8.097, "eval_steps_per_second": 0.102, "step": 196 }, { "epoch": 28.57, "learning_rate": 5.7142857142857145e-06, "loss": 0.1215, "step": 200 }, { "epoch": 29.0, "eval_f1": 0.9620253164556962, "eval_loss": 0.11511888355016708, "eval_runtime": 9.7361, "eval_samples_per_second": 8.114, "eval_steps_per_second": 0.103, "step": 203 }, { "epoch": 30.0, "learning_rate": 5e-06, "loss": 0.1139, "step": 210 }, { "epoch": 30.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.10075395554304123, "eval_runtime": 9.7351, "eval_samples_per_second": 8.115, "eval_steps_per_second": 0.103, "step": 210 }, { "epoch": 31.0, "eval_f1": 0.9746835443037974, "eval_loss": 0.10330603271722794, "eval_runtime": 9.7699, "eval_samples_per_second": 8.086, "eval_steps_per_second": 0.102, "step": 217 }, { "epoch": 31.43, "learning_rate": 4.2857142857142855e-06, "loss": 0.1164, "step": 220 }, { "epoch": 32.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.0984945222735405, "eval_runtime": 9.7232, "eval_samples_per_second": 8.125, "eval_steps_per_second": 0.103, "step": 224 }, { "epoch": 32.86, "learning_rate": 3.5714285714285718e-06, "loss": 0.1192, "step": 230 }, { "epoch": 33.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.09554588049650192, "eval_runtime": 9.7164, "eval_samples_per_second": 8.131, "eval_steps_per_second": 0.103, "step": 231 }, { "epoch": 34.0, "eval_f1": 0.9620253164556962, "eval_loss": 0.10772588849067688, "eval_runtime": 9.7297, "eval_samples_per_second": 8.119, "eval_steps_per_second": 0.103, "step": 238 }, { "epoch": 34.29, "learning_rate": 2.8571428571428573e-06, "loss": 0.1132, "step": 240 }, { "epoch": 35.0, "eval_f1": 0.9620253164556962, "eval_loss": 0.11074268072843552, "eval_runtime": 9.7358, "eval_samples_per_second": 8.114, "eval_steps_per_second": 0.103, "step": 245 }, { "epoch": 35.71, "learning_rate": 2.1428571428571427e-06, "loss": 0.1021, "step": 250 }, { "epoch": 36.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.09575933963060379, "eval_runtime": 9.7306, "eval_samples_per_second": 8.119, "eval_steps_per_second": 0.103, "step": 252 }, { "epoch": 37.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.09573517739772797, "eval_runtime": 9.7256, "eval_samples_per_second": 8.123, "eval_steps_per_second": 0.103, "step": 259 }, { "epoch": 37.14, "learning_rate": 1.4285714285714286e-06, "loss": 0.0945, "step": 260 }, { "epoch": 38.0, "eval_f1": 0.9746835443037974, "eval_loss": 0.09509044885635376, "eval_runtime": 9.7255, "eval_samples_per_second": 8.123, "eval_steps_per_second": 0.103, "step": 266 }, { "epoch": 38.57, "learning_rate": 7.142857142857143e-07, "loss": 0.1244, "step": 270 }, { "epoch": 39.0, "eval_f1": 0.9746835443037974, "eval_loss": 0.09489229321479797, "eval_runtime": 9.7321, "eval_samples_per_second": 8.117, "eval_steps_per_second": 0.103, "step": 273 }, { "epoch": 40.0, "learning_rate": 0.0, "loss": 0.1012, "step": 280 }, { "epoch": 40.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.09554900228977203, "eval_runtime": 9.7342, "eval_samples_per_second": 8.116, "eval_steps_per_second": 0.103, "step": 280 }, { "epoch": 40.0, "step": 280, "total_flos": 1.4091487038849024e+18, "train_loss": 0.17715629466942379, "train_runtime": 2286.0002, "train_samples_per_second": 7.804, "train_steps_per_second": 0.122 } ], "max_steps": 280, "num_train_epochs": 40, "total_flos": 1.4091487038849024e+18, "trial_name": null, "trial_params": null }