|
{ |
|
"best_metric": 2.390516519546509, |
|
"best_model_checkpoint": "MIReAD_3e-05/checkpoint-13806", |
|
"epoch": 6.0, |
|
"global_step": 27612, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.94567579313342e-05, |
|
"loss": 4.5385, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.8913515862668408e-05, |
|
"loss": 3.745, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.8370273794002608e-05, |
|
"loss": 3.3652, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.782703172533681e-05, |
|
"loss": 3.1532, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.728378965667101e-05, |
|
"loss": 2.9768, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.6740547588005218e-05, |
|
"loss": 2.8561, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.6197305519339418e-05, |
|
"loss": 2.7393, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.565406345067362e-05, |
|
"loss": 2.6236, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.511082138200782e-05, |
|
"loss": 2.6074, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.3415979708306912, |
|
"eval_f1": 0.28057933815352265, |
|
"eval_loss": 2.6715340614318848, |
|
"eval_precision": 0.3280031388570244, |
|
"eval_recall": 0.30555570676656096, |
|
"eval_runtime": 128.8869, |
|
"eval_samples_per_second": 122.355, |
|
"eval_steps_per_second": 7.65, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.456757931334203e-05, |
|
"loss": 2.4343, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.402433724467623e-05, |
|
"loss": 2.2922, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.3481095176010432e-05, |
|
"loss": 2.2894, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.2937853107344632e-05, |
|
"loss": 2.2635, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.239461103867884e-05, |
|
"loss": 2.238, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.185136897001304e-05, |
|
"loss": 2.2425, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1308126901347242e-05, |
|
"loss": 2.1702, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.0764884832681442e-05, |
|
"loss": 2.1909, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.0221642764015646e-05, |
|
"loss": 2.1209, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3929613189600507, |
|
"eval_f1": 0.3569292122063319, |
|
"eval_loss": 2.4266042709350586, |
|
"eval_precision": 0.37263788729318065, |
|
"eval_recall": 0.3854876449553655, |
|
"eval_runtime": 127.8545, |
|
"eval_samples_per_second": 123.343, |
|
"eval_steps_per_second": 7.712, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.967840069534985e-05, |
|
"loss": 1.9419, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.913515862668405e-05, |
|
"loss": 1.7981, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.8591916558018253e-05, |
|
"loss": 1.82, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.8048674489352456e-05, |
|
"loss": 1.7695, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.750543242068666e-05, |
|
"loss": 1.7817, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.696219035202086e-05, |
|
"loss": 1.7977, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.6418948283355063e-05, |
|
"loss": 1.7337, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.5875706214689266e-05, |
|
"loss": 1.7563, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.533246414602347e-05, |
|
"loss": 1.7413, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.40786303107165506, |
|
"eval_f1": 0.3830566278260409, |
|
"eval_loss": 2.390516519546509, |
|
"eval_precision": 0.40575163850397916, |
|
"eval_recall": 0.39817650401283294, |
|
"eval_runtime": 129.2884, |
|
"eval_samples_per_second": 121.975, |
|
"eval_steps_per_second": 7.626, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.478922207735767e-05, |
|
"loss": 1.6035, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.4245980008691873e-05, |
|
"loss": 1.4123, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.3702737940026075e-05, |
|
"loss": 1.3897, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.3159495871360279e-05, |
|
"loss": 1.3592, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.261625380269448e-05, |
|
"loss": 1.3673, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.2073011734028684e-05, |
|
"loss": 1.3792, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.1529769665362886e-05, |
|
"loss": 1.3385, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.0986527596697089e-05, |
|
"loss": 1.3258, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.044328552803129e-05, |
|
"loss": 1.3415, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4099556119213697, |
|
"eval_f1": 0.3978990080253614, |
|
"eval_loss": 2.46903133392334, |
|
"eval_precision": 0.4214347585145809, |
|
"eval_recall": 0.40462921661844276, |
|
"eval_runtime": 129.7625, |
|
"eval_samples_per_second": 121.53, |
|
"eval_steps_per_second": 7.598, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.900043459365494e-06, |
|
"loss": 1.2695, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 9.356801390699696e-06, |
|
"loss": 1.0401, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 8.8135593220339e-06, |
|
"loss": 1.0362, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 8.270317253368101e-06, |
|
"loss": 1.0133, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 7.727075184702305e-06, |
|
"loss": 1.0128, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 7.183833116036506e-06, |
|
"loss": 0.9873, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.640591047370709e-06, |
|
"loss": 0.9855, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 6.0973489787049115e-06, |
|
"loss": 1.0153, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.554106910039114e-06, |
|
"loss": 0.9783, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 5.010864841373317e-06, |
|
"loss": 0.9837, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4050095117311351, |
|
"eval_f1": 0.3983089847187763, |
|
"eval_loss": 2.622011184692383, |
|
"eval_precision": 0.411294379610805, |
|
"eval_recall": 0.40211336103710194, |
|
"eval_runtime": 129.5856, |
|
"eval_samples_per_second": 121.696, |
|
"eval_steps_per_second": 7.609, |
|
"step": 23010 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.467622772707519e-06, |
|
"loss": 0.7989, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 3.924380704041722e-06, |
|
"loss": 0.776, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 3.3811386353759236e-06, |
|
"loss": 0.7795, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 2.837896566710126e-06, |
|
"loss": 0.7417, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.2946544980443283e-06, |
|
"loss": 0.7566, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 1.7514124293785311e-06, |
|
"loss": 0.7571, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.2081703607127337e-06, |
|
"loss": 0.7666, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 6.649282920469362e-07, |
|
"loss": 0.7696, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 1.2168622338113866e-07, |
|
"loss": 0.7456, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.40107799619530754, |
|
"eval_f1": 0.39625451475720025, |
|
"eval_loss": 2.7117061614990234, |
|
"eval_precision": 0.4066294033650095, |
|
"eval_recall": 0.39986711618175386, |
|
"eval_runtime": 130.357, |
|
"eval_samples_per_second": 120.975, |
|
"eval_steps_per_second": 7.564, |
|
"step": 27612 |
|
} |
|
], |
|
"max_steps": 27612, |
|
"num_train_epochs": 6, |
|
"total_flos": 1.1642961087465062e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|