|
{ |
|
"best_metric": 0.7034921608463985, |
|
"best_model_checkpoint": "omarmomen/sf_babylm_1/finetune/control_raising_relative_token_position/checkpoint-800", |
|
"epoch": 10.0, |
|
"global_step": 1280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.6759610176086426, |
|
"eval_f1": 0.6656290750010516, |
|
"eval_loss": 2.702744722366333, |
|
"eval_mcc": 0.3517766360451299, |
|
"eval_runtime": 31.4433, |
|
"eval_samples_per_second": 780.167, |
|
"eval_steps_per_second": 97.541, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.673270583152771, |
|
"eval_f1": 0.6733504503403024, |
|
"eval_loss": 3.024230718612671, |
|
"eval_mcc": 0.3467492602821547, |
|
"eval_runtime": 31.5737, |
|
"eval_samples_per_second": 776.943, |
|
"eval_steps_per_second": 97.138, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 3.0468750000000002e-05, |
|
"loss": 0.035, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"eval_accuracy": 0.6791814565658569, |
|
"eval_f1": 0.6623476917796465, |
|
"eval_loss": 3.0073440074920654, |
|
"eval_mcc": 0.35869913049024865, |
|
"eval_runtime": 31.455, |
|
"eval_samples_per_second": 779.876, |
|
"eval_steps_per_second": 97.504, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_accuracy": 0.684684693813324, |
|
"eval_f1": 0.7034921608463985, |
|
"eval_loss": 3.041003942489624, |
|
"eval_mcc": 0.37473206132796, |
|
"eval_runtime": 31.3563, |
|
"eval_samples_per_second": 782.33, |
|
"eval_steps_per_second": 97.811, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 1.09375e-05, |
|
"loss": 0.0002, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_accuracy": 0.6741673946380615, |
|
"eval_f1": 0.6745388655889898, |
|
"eval_loss": 3.2542927265167236, |
|
"eval_mcc": 0.34857354071039653, |
|
"eval_runtime": 31.5138, |
|
"eval_samples_per_second": 778.42, |
|
"eval_steps_per_second": 97.322, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"eval_accuracy": 0.6747788786888123, |
|
"eval_f1": 0.6763751419763102, |
|
"eval_loss": 3.2722206115722656, |
|
"eval_mcc": 0.34993760586669787, |
|
"eval_runtime": 31.3294, |
|
"eval_samples_per_second": 783.002, |
|
"eval_steps_per_second": 97.895, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1280, |
|
"total_flos": 2092658450872320.0, |
|
"train_loss": 0.013747621335642179, |
|
"train_runtime": 397.5816, |
|
"train_samples_per_second": 205.392, |
|
"train_steps_per_second": 3.219 |
|
} |
|
], |
|
"max_steps": 1280, |
|
"num_train_epochs": 10, |
|
"total_flos": 2092658450872320.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|