|
{ |
|
"best_metric": 0.8284272507637513, |
|
"best_model_checkpoint": "test-klue/ynat/run-1/checkpoint-500", |
|
"epoch": 3.4965034965034967, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.859444109925402e-05, |
|
"loss": 1.497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_f1": 0.6734208042191661, |
|
"eval_loss": 1.089546799659729, |
|
"eval_runtime": 13.0037, |
|
"eval_samples_per_second": 700.339, |
|
"eval_steps_per_second": 1.384, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.418884402412214e-05, |
|
"loss": 0.5665, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_f1": 0.8012954608718272, |
|
"eval_loss": 0.6758251190185547, |
|
"eval_runtime": 13.0152, |
|
"eval_samples_per_second": 699.72, |
|
"eval_steps_per_second": 1.383, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.9783246948990256e-05, |
|
"loss": 0.5083, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_f1": 0.7895493614371508, |
|
"eval_loss": 0.6960522532463074, |
|
"eval_runtime": 12.9373, |
|
"eval_samples_per_second": 703.936, |
|
"eval_steps_per_second": 1.391, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.537764987385838e-05, |
|
"loss": 0.3865, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_f1": 0.8201909522421974, |
|
"eval_loss": 0.6086650490760803, |
|
"eval_runtime": 12.7238, |
|
"eval_samples_per_second": 715.746, |
|
"eval_steps_per_second": 1.415, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.09720527987265e-05, |
|
"loss": 0.385, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_f1": 0.8359538639496097, |
|
"eval_loss": 0.50257807970047, |
|
"eval_runtime": 12.8233, |
|
"eval_samples_per_second": 710.19, |
|
"eval_steps_per_second": 1.404, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.656645572359462e-05, |
|
"loss": 0.357, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_f1": 0.8294020659039051, |
|
"eval_loss": 0.5500179529190063, |
|
"eval_runtime": 12.8836, |
|
"eval_samples_per_second": 706.867, |
|
"eval_steps_per_second": 1.397, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.216085864846273e-05, |
|
"loss": 0.2337, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_f1": 0.8324393645838855, |
|
"eval_loss": 0.5851877927780151, |
|
"eval_runtime": 12.9246, |
|
"eval_samples_per_second": 704.625, |
|
"eval_steps_per_second": 1.393, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.775526157333085e-05, |
|
"loss": 0.2136, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_f1": 0.8449098012505971, |
|
"eval_loss": 0.5285059809684753, |
|
"eval_runtime": 12.7922, |
|
"eval_samples_per_second": 711.919, |
|
"eval_steps_per_second": 1.407, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.3349664498198968e-05, |
|
"loss": 0.1926, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_f1": 0.8313597336118761, |
|
"eval_loss": 0.6143582463264465, |
|
"eval_runtime": 12.8169, |
|
"eval_samples_per_second": 710.548, |
|
"eval_steps_per_second": 1.404, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.894406742306709e-05, |
|
"loss": 0.1359, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_f1": 0.8284272507637513, |
|
"eval_loss": 0.7059531211853027, |
|
"eval_runtime": 12.7876, |
|
"eval_samples_per_second": 712.173, |
|
"eval_steps_per_second": 1.408, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 715, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 178689205892400.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 5.859444109925402e-05, |
|
"num_train_epochs": 5, |
|
"per_device_train_batch_size": 32, |
|
"seed": 11 |
|
} |
|
} |
|
|