|
{ |
|
"best_metric": 0.09489229321479797, |
|
"best_model_checkpoint": "/vit-base-patch32-224-in21k/checkpoint-273", |
|
"epoch": 40.0, |
|
"global_step": 280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.45760300755500793, |
|
"eval_runtime": 9.802, |
|
"eval_samples_per_second": 8.06, |
|
"eval_steps_per_second": 0.102, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.928571428571429e-05, |
|
"loss": 0.5021, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.39525437355041504, |
|
"eval_runtime": 9.734, |
|
"eval_samples_per_second": 8.116, |
|
"eval_steps_per_second": 0.103, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.8571428571428575e-05, |
|
"loss": 0.3595, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.38087165355682373, |
|
"eval_runtime": 9.8033, |
|
"eval_samples_per_second": 8.059, |
|
"eval_steps_per_second": 0.102, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.32862257957458496, |
|
"eval_runtime": 9.7791, |
|
"eval_samples_per_second": 8.078, |
|
"eval_steps_per_second": 0.102, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.3009, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.29453742504119873, |
|
"eval_runtime": 9.9491, |
|
"eval_samples_per_second": 7.94, |
|
"eval_steps_per_second": 0.101, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.2843, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.35278087854385376, |
|
"eval_runtime": 9.8405, |
|
"eval_samples_per_second": 8.028, |
|
"eval_steps_per_second": 0.102, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.23452825844287872, |
|
"eval_runtime": 9.7839, |
|
"eval_samples_per_second": 8.074, |
|
"eval_steps_per_second": 0.102, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.642857142857143e-05, |
|
"loss": 0.266, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.24986670911312103, |
|
"eval_runtime": 9.8907, |
|
"eval_samples_per_second": 7.987, |
|
"eval_steps_per_second": 0.101, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 0.222, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.25441065430641174, |
|
"eval_runtime": 9.8794, |
|
"eval_samples_per_second": 7.996, |
|
"eval_steps_per_second": 0.101, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.2018, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.19540712237358093, |
|
"eval_runtime": 10.3358, |
|
"eval_samples_per_second": 7.643, |
|
"eval_steps_per_second": 0.097, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.23508581519126892, |
|
"eval_runtime": 9.7761, |
|
"eval_samples_per_second": 8.081, |
|
"eval_steps_per_second": 0.102, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.1948, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.17053687572479248, |
|
"eval_runtime": 9.7433, |
|
"eval_samples_per_second": 8.108, |
|
"eval_steps_per_second": 0.103, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 1.3571428571428574e-05, |
|
"loss": 0.2053, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.8734177215189873, |
|
"eval_loss": 0.16247014701366425, |
|
"eval_runtime": 9.7199, |
|
"eval_samples_per_second": 8.128, |
|
"eval_steps_per_second": 0.103, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.9367088607594937, |
|
"eval_loss": 0.17189449071884155, |
|
"eval_runtime": 9.7734, |
|
"eval_samples_per_second": 8.083, |
|
"eval_steps_per_second": 0.102, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 1.2857142857142859e-05, |
|
"loss": 0.1729, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.9367088607594937, |
|
"eval_loss": 0.1488722860813141, |
|
"eval_runtime": 9.7192, |
|
"eval_samples_per_second": 8.128, |
|
"eval_steps_per_second": 0.103, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 1.2142857142857142e-05, |
|
"loss": 0.1535, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.14498455822467804, |
|
"eval_runtime": 9.7322, |
|
"eval_samples_per_second": 8.117, |
|
"eval_steps_per_second": 0.103, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.1749649941921234, |
|
"eval_runtime": 9.718, |
|
"eval_samples_per_second": 8.129, |
|
"eval_steps_per_second": 0.103, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.1492, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.15143541991710663, |
|
"eval_runtime": 9.7273, |
|
"eval_samples_per_second": 8.121, |
|
"eval_steps_per_second": 0.103, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.1349, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_f1": 0.9620253164556962, |
|
"eval_loss": 0.1304464191198349, |
|
"eval_runtime": 9.7442, |
|
"eval_samples_per_second": 8.107, |
|
"eval_steps_per_second": 0.103, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1538, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.9620253164556962, |
|
"eval_loss": 0.12909165024757385, |
|
"eval_runtime": 9.7157, |
|
"eval_samples_per_second": 8.131, |
|
"eval_steps_per_second": 0.103, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_f1": 0.9620253164556962, |
|
"eval_loss": 0.13061794638633728, |
|
"eval_runtime": 9.7515, |
|
"eval_samples_per_second": 8.101, |
|
"eval_steps_per_second": 0.103, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 9.285714285714288e-06, |
|
"loss": 0.1357, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_f1": 0.9620253164556962, |
|
"eval_loss": 0.12830054759979248, |
|
"eval_runtime": 9.7367, |
|
"eval_samples_per_second": 8.114, |
|
"eval_steps_per_second": 0.103, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.147, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.12891362607479095, |
|
"eval_runtime": 9.7565, |
|
"eval_samples_per_second": 8.097, |
|
"eval_steps_per_second": 0.102, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_f1": 0.9746835443037974, |
|
"eval_loss": 0.1338558942079544, |
|
"eval_runtime": 9.7379, |
|
"eval_samples_per_second": 8.113, |
|
"eval_steps_per_second": 0.103, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 7.857142857142858e-06, |
|
"loss": 0.1388, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.12436553090810776, |
|
"eval_runtime": 9.7614, |
|
"eval_samples_per_second": 8.093, |
|
"eval_steps_per_second": 0.102, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.1192, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_f1": 0.9746835443037974, |
|
"eval_loss": 0.11165592074394226, |
|
"eval_runtime": 9.7177, |
|
"eval_samples_per_second": 8.129, |
|
"eval_steps_per_second": 0.103, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.11046960204839706, |
|
"eval_runtime": 9.7417, |
|
"eval_samples_per_second": 8.109, |
|
"eval_steps_per_second": 0.103, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 27.14, |
|
"learning_rate": 6.4285714285714295e-06, |
|
"loss": 0.112, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_f1": 0.9746835443037974, |
|
"eval_loss": 0.1078834980726242, |
|
"eval_runtime": 9.7562, |
|
"eval_samples_per_second": 8.097, |
|
"eval_steps_per_second": 0.102, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.1215, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_f1": 0.9620253164556962, |
|
"eval_loss": 0.11511888355016708, |
|
"eval_runtime": 9.7361, |
|
"eval_samples_per_second": 8.114, |
|
"eval_steps_per_second": 0.103, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1139, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.10075395554304123, |
|
"eval_runtime": 9.7351, |
|
"eval_samples_per_second": 8.115, |
|
"eval_steps_per_second": 0.103, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_f1": 0.9746835443037974, |
|
"eval_loss": 0.10330603271722794, |
|
"eval_runtime": 9.7699, |
|
"eval_samples_per_second": 8.086, |
|
"eval_steps_per_second": 0.102, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 31.43, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.1164, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.0984945222735405, |
|
"eval_runtime": 9.7232, |
|
"eval_samples_per_second": 8.125, |
|
"eval_steps_per_second": 0.103, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"loss": 0.1192, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.09554588049650192, |
|
"eval_runtime": 9.7164, |
|
"eval_samples_per_second": 8.131, |
|
"eval_steps_per_second": 0.103, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_f1": 0.9620253164556962, |
|
"eval_loss": 0.10772588849067688, |
|
"eval_runtime": 9.7297, |
|
"eval_samples_per_second": 8.119, |
|
"eval_steps_per_second": 0.103, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 34.29, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.1132, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_f1": 0.9620253164556962, |
|
"eval_loss": 0.11074268072843552, |
|
"eval_runtime": 9.7358, |
|
"eval_samples_per_second": 8.114, |
|
"eval_steps_per_second": 0.103, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 0.1021, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.09575933963060379, |
|
"eval_runtime": 9.7306, |
|
"eval_samples_per_second": 8.119, |
|
"eval_steps_per_second": 0.103, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.09573517739772797, |
|
"eval_runtime": 9.7256, |
|
"eval_samples_per_second": 8.123, |
|
"eval_steps_per_second": 0.103, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 37.14, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.0945, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_f1": 0.9746835443037974, |
|
"eval_loss": 0.09509044885635376, |
|
"eval_runtime": 9.7255, |
|
"eval_samples_per_second": 8.123, |
|
"eval_steps_per_second": 0.103, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 0.1244, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_f1": 0.9746835443037974, |
|
"eval_loss": 0.09489229321479797, |
|
"eval_runtime": 9.7321, |
|
"eval_samples_per_second": 8.117, |
|
"eval_steps_per_second": 0.103, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1012, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.09554900228977203, |
|
"eval_runtime": 9.7342, |
|
"eval_samples_per_second": 8.116, |
|
"eval_steps_per_second": 0.103, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 280, |
|
"total_flos": 1.4091487038849024e+18, |
|
"train_loss": 0.17715629466942379, |
|
"train_runtime": 2286.0002, |
|
"train_samples_per_second": 7.804, |
|
"train_steps_per_second": 0.122 |
|
} |
|
], |
|
"max_steps": 280, |
|
"num_train_epochs": 40, |
|
"total_flos": 1.4091487038849024e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|