|
{ |
|
"best_metric": 0.6232164195970928, |
|
"best_model_checkpoint": "./fine-tune/roberta-base/cola/checkpoint-5350", |
|
"epoch": 10.0, |
|
"global_step": 5350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.9288128852654602e-05, |
|
"loss": 0.5497, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5504242181777954, |
|
"eval_matthews_correlation": 0.4613462408273676, |
|
"eval_runtime": 1.7936, |
|
"eval_samples_per_second": 581.514, |
|
"eval_steps_per_second": 73.038, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.729966196062836e-05, |
|
"loss": 0.3786, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.48504745960235596, |
|
"eval_matthews_correlation": 0.5470285194988502, |
|
"eval_runtime": 1.8903, |
|
"eval_samples_per_second": 551.77, |
|
"eval_steps_per_second": 69.302, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.531119506860211e-05, |
|
"loss": 0.2733, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.5036450624465942, |
|
"eval_matthews_correlation": 0.5791579814417055, |
|
"eval_runtime": 2.2071, |
|
"eval_samples_per_second": 472.565, |
|
"eval_steps_per_second": 59.354, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.3322728176575861e-05, |
|
"loss": 0.2204, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.5532266497612, |
|
"eval_matthews_correlation": 0.6138670730002377, |
|
"eval_runtime": 1.8422, |
|
"eval_samples_per_second": 566.18, |
|
"eval_steps_per_second": 71.112, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.1334261284549613e-05, |
|
"loss": 0.164, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.9516334533691406, |
|
"eval_matthews_correlation": 0.5934048733741295, |
|
"eval_runtime": 1.8822, |
|
"eval_samples_per_second": 554.136, |
|
"eval_steps_per_second": 69.599, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 9.345794392523365e-06, |
|
"loss": 0.1351, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.9050658345222473, |
|
"eval_matthews_correlation": 0.5754342635449895, |
|
"eval_runtime": 1.846, |
|
"eval_samples_per_second": 565.019, |
|
"eval_steps_per_second": 70.966, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 7.357327500497118e-06, |
|
"loss": 0.1065, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.9006233215332031, |
|
"eval_matthews_correlation": 0.6161067903576496, |
|
"eval_runtime": 1.877, |
|
"eval_samples_per_second": 555.681, |
|
"eval_steps_per_second": 69.793, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 5.36886060847087e-06, |
|
"loss": 0.0874, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.9457036852836609, |
|
"eval_matthews_correlation": 0.6157170770507245, |
|
"eval_runtime": 1.8772, |
|
"eval_samples_per_second": 555.609, |
|
"eval_steps_per_second": 69.784, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 3.3803937164446215e-06, |
|
"loss": 0.0579, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.0371503829956055, |
|
"eval_matthews_correlation": 0.6006530974238766, |
|
"eval_runtime": 1.8847, |
|
"eval_samples_per_second": 553.403, |
|
"eval_steps_per_second": 69.507, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 1.3919268244183738e-06, |
|
"loss": 0.0451, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.0570732355117798, |
|
"eval_matthews_correlation": 0.6232164195970928, |
|
"eval_runtime": 1.8418, |
|
"eval_samples_per_second": 566.299, |
|
"eval_steps_per_second": 71.127, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5350, |
|
"total_flos": 5624656585958400.0, |
|
"train_loss": 0.191466952528909, |
|
"train_runtime": 617.5978, |
|
"train_samples_per_second": 138.456, |
|
"train_steps_per_second": 8.663 |
|
} |
|
], |
|
"max_steps": 5350, |
|
"num_train_epochs": 10, |
|
"total_flos": 5624656585958400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|