|
{ |
|
"best_metric": 2.3411779403686523, |
|
"best_model_checkpoint": "./outputs/checkpoint-1200", |
|
"epoch": 0.8743169398907104, |
|
"eval_steps": 100, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.7403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.642388343811035, |
|
"eval_runtime": 205.2517, |
|
"eval_samples_per_second": 30.567, |
|
"eval_steps_per_second": 3.825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.606, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.5923426151275635, |
|
"eval_runtime": 208.3105, |
|
"eval_samples_per_second": 30.119, |
|
"eval_steps_per_second": 3.768, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5632, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.5577170848846436, |
|
"eval_runtime": 205.6257, |
|
"eval_samples_per_second": 30.512, |
|
"eval_steps_per_second": 3.818, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5369, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.5247888565063477, |
|
"eval_runtime": 205.7237, |
|
"eval_samples_per_second": 30.497, |
|
"eval_steps_per_second": 3.816, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4954, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.498934745788574, |
|
"eval_runtime": 207.1055, |
|
"eval_samples_per_second": 30.294, |
|
"eval_steps_per_second": 3.79, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 2.469, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.470606565475464, |
|
"eval_runtime": 205.8606, |
|
"eval_samples_per_second": 30.477, |
|
"eval_steps_per_second": 3.813, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4509, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.446812391281128, |
|
"eval_runtime": 205.2079, |
|
"eval_samples_per_second": 30.574, |
|
"eval_steps_per_second": 3.825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4284, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.42557954788208, |
|
"eval_runtime": 205.3478, |
|
"eval_samples_per_second": 30.553, |
|
"eval_steps_per_second": 3.823, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3916, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.403062343597412, |
|
"eval_runtime": 204.8842, |
|
"eval_samples_per_second": 30.622, |
|
"eval_steps_per_second": 3.831, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3883, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.384012222290039, |
|
"eval_runtime": 204.4165, |
|
"eval_samples_per_second": 30.692, |
|
"eval_steps_per_second": 3.84, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 2.382, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.361379384994507, |
|
"eval_runtime": 204.6076, |
|
"eval_samples_per_second": 30.664, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3372, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.3411779403686523, |
|
"eval_runtime": 204.8024, |
|
"eval_samples_per_second": 30.634, |
|
"eval_steps_per_second": 3.833, |
|
"step": 1200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 3.500881324867584e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|