|
{ |
|
"best_metric": 0.8655030800821355, |
|
"best_model_checkpoint": "AnimeCharacterClassifierMark1/checkpoint-258", |
|
"epoch": 16.0, |
|
"eval_steps": 500, |
|
"global_step": 276, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 5.0145, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.009240246406570842, |
|
"eval_loss": 4.930349826812744, |
|
"eval_runtime": 9.5929, |
|
"eval_samples_per_second": 101.533, |
|
"eval_steps_per_second": 0.834, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 4.932, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 4.8416, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.028747433264887063, |
|
"eval_loss": 4.748697757720947, |
|
"eval_runtime": 8.8137, |
|
"eval_samples_per_second": 110.51, |
|
"eval_steps_per_second": 0.908, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 4.6652, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 4.4383, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.11704312114989733, |
|
"eval_loss": 4.359685897827148, |
|
"eval_runtime": 10.311, |
|
"eval_samples_per_second": 94.462, |
|
"eval_steps_per_second": 0.776, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 4.0762, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.32238193018480493, |
|
"eval_loss": 3.641871213912964, |
|
"eval_runtime": 9.02, |
|
"eval_samples_per_second": 107.982, |
|
"eval_steps_per_second": 0.887, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 3.6518, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 4.937694704049845e-05, |
|
"loss": 3.108, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.5246406570841889, |
|
"eval_loss": 2.857390880584717, |
|
"eval_runtime": 8.9304, |
|
"eval_samples_per_second": 109.065, |
|
"eval_steps_per_second": 0.896, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 4.85981308411215e-05, |
|
"loss": 2.606, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.781931464174455e-05, |
|
"loss": 2.1571, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_accuracy": 0.6652977412731006, |
|
"eval_loss": 2.2128942012786865, |
|
"eval_runtime": 8.8437, |
|
"eval_samples_per_second": 110.136, |
|
"eval_steps_per_second": 0.905, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.7040498442367604e-05, |
|
"loss": 1.7668, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 4.6261682242990654e-05, |
|
"loss": 1.4685, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_accuracy": 0.7494866529774127, |
|
"eval_loss": 1.7289572954177856, |
|
"eval_runtime": 9.9463, |
|
"eval_samples_per_second": 97.926, |
|
"eval_steps_per_second": 0.804, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 4.548286604361371e-05, |
|
"loss": 1.1649, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.797741273100616, |
|
"eval_loss": 1.3861801624298096, |
|
"eval_runtime": 8.8226, |
|
"eval_samples_per_second": 110.398, |
|
"eval_steps_per_second": 0.907, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.470404984423676e-05, |
|
"loss": 0.9897, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 4.392523364485982e-05, |
|
"loss": 0.7905, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8213552361396304, |
|
"eval_loss": 1.1588941812515259, |
|
"eval_runtime": 8.795, |
|
"eval_samples_per_second": 110.745, |
|
"eval_steps_per_second": 0.91, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 4.314641744548287e-05, |
|
"loss": 0.6727, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 4.236760124610592e-05, |
|
"loss": 0.5549, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_accuracy": 0.8295687885010267, |
|
"eval_loss": 1.0262539386749268, |
|
"eval_runtime": 8.8584, |
|
"eval_samples_per_second": 109.953, |
|
"eval_steps_per_second": 0.903, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 4.1588785046728974e-05, |
|
"loss": 0.4577, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_accuracy": 0.8367556468172485, |
|
"eval_loss": 0.8994325995445251, |
|
"eval_runtime": 8.7654, |
|
"eval_samples_per_second": 111.119, |
|
"eval_steps_per_second": 0.913, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.0809968847352024e-05, |
|
"loss": 0.3757, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 4.003115264797508e-05, |
|
"loss": 0.2964, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8552361396303901, |
|
"eval_loss": 0.808638870716095, |
|
"eval_runtime": 11.2618, |
|
"eval_samples_per_second": 86.487, |
|
"eval_steps_per_second": 0.71, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 3.925233644859813e-05, |
|
"loss": 0.2592, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 3.847352024922119e-05, |
|
"loss": 0.194, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.8583162217659137, |
|
"eval_loss": 0.744567334651947, |
|
"eval_runtime": 8.8124, |
|
"eval_samples_per_second": 110.526, |
|
"eval_steps_per_second": 0.908, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3.769470404984424e-05, |
|
"loss": 0.1626, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 3.691588785046729e-05, |
|
"loss": 0.1358, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_accuracy": 0.8572895277207392, |
|
"eval_loss": 0.7063936591148376, |
|
"eval_runtime": 8.8917, |
|
"eval_samples_per_second": 109.54, |
|
"eval_steps_per_second": 0.9, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.6137071651090344e-05, |
|
"loss": 0.1116, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_accuracy": 0.8655030800821355, |
|
"eval_loss": 0.67196124792099, |
|
"eval_runtime": 9.0077, |
|
"eval_samples_per_second": 108.129, |
|
"eval_steps_per_second": 0.888, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 3.5358255451713394e-05, |
|
"loss": 0.0974, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 3.457943925233645e-05, |
|
"loss": 0.0811, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.864476386036961, |
|
"eval_loss": 0.6515084505081177, |
|
"eval_runtime": 9.3868, |
|
"eval_samples_per_second": 103.762, |
|
"eval_steps_per_second": 0.852, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 276, |
|
"total_flos": 1.087746145977493e+19, |
|
"train_loss": 1.7795628476617993, |
|
"train_runtime": 3092.1212, |
|
"train_samples_per_second": 119.027, |
|
"train_steps_per_second": 0.231 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 714, |
|
"num_train_epochs": 42, |
|
"save_steps": 500, |
|
"total_flos": 1.087746145977493e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|