|
{ |
|
"best_metric": 0.06278952211141586, |
|
"best_model_checkpoint": "/vit-base-patch32-224-in21k/checkpoint-217", |
|
"epoch": 40.0, |
|
"global_step": 280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.4529457688331604, |
|
"eval_runtime": 8.6926, |
|
"eval_samples_per_second": 9.088, |
|
"eval_steps_per_second": 0.115, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.928571428571429e-05, |
|
"loss": 0.5024, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.39888718724250793, |
|
"eval_runtime": 9.0793, |
|
"eval_samples_per_second": 8.701, |
|
"eval_steps_per_second": 0.11, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.8571428571428575e-05, |
|
"loss": 0.3533, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.37408992648124695, |
|
"eval_runtime": 9.0097, |
|
"eval_samples_per_second": 8.768, |
|
"eval_steps_per_second": 0.111, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.3160648047924042, |
|
"eval_runtime": 9.1428, |
|
"eval_samples_per_second": 8.641, |
|
"eval_steps_per_second": 0.109, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.285, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.282362163066864, |
|
"eval_runtime": 9.1103, |
|
"eval_samples_per_second": 8.671, |
|
"eval_steps_per_second": 0.11, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.2491, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.8607594936708861, |
|
"eval_loss": 0.2700817883014679, |
|
"eval_runtime": 8.8127, |
|
"eval_samples_per_second": 8.964, |
|
"eval_steps_per_second": 0.113, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.9113924050632911, |
|
"eval_loss": 0.2062235176563263, |
|
"eval_runtime": 8.5646, |
|
"eval_samples_per_second": 9.224, |
|
"eval_steps_per_second": 0.117, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.642857142857143e-05, |
|
"loss": 0.2032, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.20497918128967285, |
|
"eval_runtime": 8.6022, |
|
"eval_samples_per_second": 9.184, |
|
"eval_steps_per_second": 0.116, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 0.157, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.20131482183933258, |
|
"eval_runtime": 8.5286, |
|
"eval_samples_per_second": 9.263, |
|
"eval_steps_per_second": 0.117, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.1127, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.9367088607594937, |
|
"eval_loss": 0.19601519405841827, |
|
"eval_runtime": 8.52, |
|
"eval_samples_per_second": 9.272, |
|
"eval_steps_per_second": 0.117, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.14171478152275085, |
|
"eval_runtime": 8.4584, |
|
"eval_samples_per_second": 9.34, |
|
"eval_steps_per_second": 0.118, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.0903, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.1306915283203125, |
|
"eval_runtime": 8.4328, |
|
"eval_samples_per_second": 9.368, |
|
"eval_steps_per_second": 0.119, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 1.3571428571428574e-05, |
|
"loss": 0.0922, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.08702569454908371, |
|
"eval_runtime": 8.8303, |
|
"eval_samples_per_second": 8.947, |
|
"eval_steps_per_second": 0.113, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.9240506329113924, |
|
"eval_loss": 0.20480988919734955, |
|
"eval_runtime": 8.6551, |
|
"eval_samples_per_second": 9.128, |
|
"eval_steps_per_second": 0.116, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 1.2857142857142859e-05, |
|
"loss": 0.0595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.9620253164556962, |
|
"eval_loss": 0.12036000937223434, |
|
"eval_runtime": 8.6389, |
|
"eval_samples_per_second": 9.145, |
|
"eval_steps_per_second": 0.116, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 1.2142857142857142e-05, |
|
"loss": 0.0527, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.9367088607594937, |
|
"eval_loss": 0.2552852928638458, |
|
"eval_runtime": 8.4875, |
|
"eval_samples_per_second": 9.308, |
|
"eval_steps_per_second": 0.118, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.9367088607594937, |
|
"eval_loss": 0.16753825545310974, |
|
"eval_runtime": 8.4252, |
|
"eval_samples_per_second": 9.377, |
|
"eval_steps_per_second": 0.119, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.0477, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_f1": 0.9240506329113924, |
|
"eval_loss": 0.22650040686130524, |
|
"eval_runtime": 8.392, |
|
"eval_samples_per_second": 9.414, |
|
"eval_steps_per_second": 0.119, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.0411, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_f1": 0.9367088607594937, |
|
"eval_loss": 0.1900627613067627, |
|
"eval_runtime": 8.4937, |
|
"eval_samples_per_second": 9.301, |
|
"eval_steps_per_second": 0.118, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0299, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.9240506329113924, |
|
"eval_loss": 0.2422873079776764, |
|
"eval_runtime": 8.7448, |
|
"eval_samples_per_second": 9.034, |
|
"eval_steps_per_second": 0.114, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.06394638121128082, |
|
"eval_runtime": 8.7204, |
|
"eval_samples_per_second": 9.059, |
|
"eval_steps_per_second": 0.115, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 9.285714285714288e-06, |
|
"loss": 0.0487, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.12548162043094635, |
|
"eval_runtime": 8.9171, |
|
"eval_samples_per_second": 8.859, |
|
"eval_steps_per_second": 0.112, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.0359, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.12127607315778732, |
|
"eval_runtime": 8.6147, |
|
"eval_samples_per_second": 9.17, |
|
"eval_steps_per_second": 0.116, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_f1": 0.9746835443037974, |
|
"eval_loss": 0.07274330407381058, |
|
"eval_runtime": 8.5807, |
|
"eval_samples_per_second": 9.207, |
|
"eval_steps_per_second": 0.117, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 7.857142857142858e-06, |
|
"loss": 0.0302, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.11162865161895752, |
|
"eval_runtime": 8.6735, |
|
"eval_samples_per_second": 9.108, |
|
"eval_steps_per_second": 0.115, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.0304, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.10622164607048035, |
|
"eval_runtime": 8.8211, |
|
"eval_samples_per_second": 8.956, |
|
"eval_steps_per_second": 0.113, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_f1": 0.9240506329113924, |
|
"eval_loss": 0.20966486632823944, |
|
"eval_runtime": 8.7082, |
|
"eval_samples_per_second": 9.072, |
|
"eval_steps_per_second": 0.115, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 27.14, |
|
"learning_rate": 6.4285714285714295e-06, |
|
"loss": 0.0274, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.1276017129421234, |
|
"eval_runtime": 8.676, |
|
"eval_samples_per_second": 9.106, |
|
"eval_steps_per_second": 0.115, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.0291, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.09670199453830719, |
|
"eval_runtime": 8.5086, |
|
"eval_samples_per_second": 9.285, |
|
"eval_steps_per_second": 0.118, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0202, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_f1": 0.9746835443037974, |
|
"eval_loss": 0.07649976760149002, |
|
"eval_runtime": 8.9676, |
|
"eval_samples_per_second": 8.81, |
|
"eval_steps_per_second": 0.112, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_f1": 0.9873417721518988, |
|
"eval_loss": 0.06278952211141586, |
|
"eval_runtime": 8.7232, |
|
"eval_samples_per_second": 9.056, |
|
"eval_steps_per_second": 0.115, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 31.43, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.0232, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.13882263004779816, |
|
"eval_runtime": 8.6516, |
|
"eval_samples_per_second": 9.131, |
|
"eval_steps_per_second": 0.116, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"loss": 0.0264, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.10616844147443771, |
|
"eval_runtime": 8.6952, |
|
"eval_samples_per_second": 9.085, |
|
"eval_steps_per_second": 0.115, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.1320488601922989, |
|
"eval_runtime": 8.7896, |
|
"eval_samples_per_second": 8.988, |
|
"eval_steps_per_second": 0.114, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 34.29, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.0219, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.1528194099664688, |
|
"eval_runtime": 8.8303, |
|
"eval_samples_per_second": 8.946, |
|
"eval_steps_per_second": 0.113, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 0.0194, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.1746273934841156, |
|
"eval_runtime": 8.7909, |
|
"eval_samples_per_second": 8.987, |
|
"eval_steps_per_second": 0.114, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.16089513897895813, |
|
"eval_runtime": 8.8367, |
|
"eval_samples_per_second": 8.94, |
|
"eval_steps_per_second": 0.113, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 37.14, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.0204, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.14817634224891663, |
|
"eval_runtime": 8.8819, |
|
"eval_samples_per_second": 8.895, |
|
"eval_steps_per_second": 0.113, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 0.0217, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.152223601937294, |
|
"eval_runtime": 8.8958, |
|
"eval_samples_per_second": 8.881, |
|
"eval_steps_per_second": 0.112, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0216, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_f1": 0.9493670886075949, |
|
"eval_loss": 0.14991530776023865, |
|
"eval_runtime": 8.6557, |
|
"eval_samples_per_second": 9.127, |
|
"eval_steps_per_second": 0.116, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 280, |
|
"total_flos": 1.4091487038849024e+18, |
|
"train_loss": 0.09473916946777276, |
|
"train_runtime": 2533.8499, |
|
"train_samples_per_second": 7.041, |
|
"train_steps_per_second": 0.111 |
|
} |
|
], |
|
"max_steps": 280, |
|
"num_train_epochs": 40, |
|
"total_flos": 1.4091487038849024e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|