|
{ |
|
"best_metric": 0.863342821598053, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_mnli_256/checkpoint-27612", |
|
"epoch": 14.0, |
|
"global_step": 42952, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.0008, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5404992358634743, |
|
"eval_loss": 0.9489795565605164, |
|
"eval_runtime": 22.1002, |
|
"eval_samples_per_second": 444.113, |
|
"eval_steps_per_second": 3.484, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.9205, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5674987264391238, |
|
"eval_loss": 0.9166129231452942, |
|
"eval_runtime": 22.1405, |
|
"eval_samples_per_second": 443.305, |
|
"eval_steps_per_second": 3.478, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.8928, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.578604177279674, |
|
"eval_loss": 0.902202844619751, |
|
"eval_runtime": 22.2149, |
|
"eval_samples_per_second": 441.82, |
|
"eval_steps_per_second": 3.466, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.872, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5967396841569027, |
|
"eval_loss": 0.8842912316322327, |
|
"eval_runtime": 22.1304, |
|
"eval_samples_per_second": 443.507, |
|
"eval_steps_per_second": 3.479, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.8531, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5959246051961283, |
|
"eval_loss": 0.8806653618812561, |
|
"eval_runtime": 22.1032, |
|
"eval_samples_per_second": 444.053, |
|
"eval_steps_per_second": 3.484, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.8359, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5998981151299032, |
|
"eval_loss": 0.8763103485107422, |
|
"eval_runtime": 22.114, |
|
"eval_samples_per_second": 443.837, |
|
"eval_steps_per_second": 3.482, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.8197, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6009169638308711, |
|
"eval_loss": 0.8814870119094849, |
|
"eval_runtime": 22.1114, |
|
"eval_samples_per_second": 443.889, |
|
"eval_steps_per_second": 3.482, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.8028, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5933774834437087, |
|
"eval_loss": 0.9012145400047302, |
|
"eval_runtime": 22.1167, |
|
"eval_samples_per_second": 443.782, |
|
"eval_steps_per_second": 3.482, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.786, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6190524707080999, |
|
"eval_loss": 0.863342821598053, |
|
"eval_runtime": 22.1033, |
|
"eval_samples_per_second": 444.052, |
|
"eval_steps_per_second": 3.484, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.769, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6097809475292919, |
|
"eval_loss": 0.8733872771263123, |
|
"eval_runtime": 22.175, |
|
"eval_samples_per_second": 442.616, |
|
"eval_steps_per_second": 3.472, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.752, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6220071319409067, |
|
"eval_loss": 0.8681850433349609, |
|
"eval_runtime": 22.0877, |
|
"eval_samples_per_second": 444.365, |
|
"eval_steps_per_second": 3.486, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.736, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.617524197656648, |
|
"eval_loss": 0.8740953803062439, |
|
"eval_runtime": 22.1771, |
|
"eval_samples_per_second": 442.573, |
|
"eval_steps_per_second": 3.472, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.7204, |
|
"step": 39884 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6047885888945491, |
|
"eval_loss": 0.8993946313858032, |
|
"eval_runtime": 22.1027, |
|
"eval_samples_per_second": 444.063, |
|
"eval_steps_per_second": 3.484, |
|
"step": 39884 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.7038, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6079470198675496, |
|
"eval_loss": 0.8940238356590271, |
|
"eval_runtime": 22.1071, |
|
"eval_samples_per_second": 443.976, |
|
"eval_steps_per_second": 3.483, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"step": 42952, |
|
"total_flos": 1.4482128069931827e+17, |
|
"train_loss": 0.8189185247849343, |
|
"train_runtime": 37903.3591, |
|
"train_samples_per_second": 518.031, |
|
"train_steps_per_second": 4.047 |
|
} |
|
], |
|
"max_steps": 153400, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.4482128069931827e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|