|
{ |
|
"best_metric": 2.1539299488067627, |
|
"best_model_checkpoint": "./outputs/checkpoint-2400", |
|
"epoch": 1.748633879781421, |
|
"eval_steps": 100, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.7403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.642388343811035, |
|
"eval_runtime": 205.2517, |
|
"eval_samples_per_second": 30.567, |
|
"eval_steps_per_second": 3.825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.606, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.5923426151275635, |
|
"eval_runtime": 208.3105, |
|
"eval_samples_per_second": 30.119, |
|
"eval_steps_per_second": 3.768, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5632, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.5577170848846436, |
|
"eval_runtime": 205.6257, |
|
"eval_samples_per_second": 30.512, |
|
"eval_steps_per_second": 3.818, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5369, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.5247888565063477, |
|
"eval_runtime": 205.7237, |
|
"eval_samples_per_second": 30.497, |
|
"eval_steps_per_second": 3.816, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4954, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.498934745788574, |
|
"eval_runtime": 207.1055, |
|
"eval_samples_per_second": 30.294, |
|
"eval_steps_per_second": 3.79, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 2.469, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.470606565475464, |
|
"eval_runtime": 205.8606, |
|
"eval_samples_per_second": 30.477, |
|
"eval_steps_per_second": 3.813, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4509, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.446812391281128, |
|
"eval_runtime": 205.2079, |
|
"eval_samples_per_second": 30.574, |
|
"eval_steps_per_second": 3.825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4284, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.42557954788208, |
|
"eval_runtime": 205.3478, |
|
"eval_samples_per_second": 30.553, |
|
"eval_steps_per_second": 3.823, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3916, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.403062343597412, |
|
"eval_runtime": 204.8842, |
|
"eval_samples_per_second": 30.622, |
|
"eval_steps_per_second": 3.831, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3883, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.384012222290039, |
|
"eval_runtime": 204.4165, |
|
"eval_samples_per_second": 30.692, |
|
"eval_steps_per_second": 3.84, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 2.382, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.361379384994507, |
|
"eval_runtime": 204.6076, |
|
"eval_samples_per_second": 30.664, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3372, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.3411779403686523, |
|
"eval_runtime": 204.8024, |
|
"eval_samples_per_second": 30.634, |
|
"eval_steps_per_second": 3.833, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3496, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.3248472213745117, |
|
"eval_runtime": 204.1277, |
|
"eval_samples_per_second": 30.736, |
|
"eval_steps_per_second": 3.846, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3001, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.301698684692383, |
|
"eval_runtime": 204.3213, |
|
"eval_samples_per_second": 30.707, |
|
"eval_steps_per_second": 3.842, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2598, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.284607172012329, |
|
"eval_runtime": 204.5306, |
|
"eval_samples_per_second": 30.675, |
|
"eval_steps_per_second": 3.838, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2518, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.2680861949920654, |
|
"eval_runtime": 204.3902, |
|
"eval_samples_per_second": 30.696, |
|
"eval_steps_per_second": 3.841, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2518, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.2509853839874268, |
|
"eval_runtime": 204.5985, |
|
"eval_samples_per_second": 30.665, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2139, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.2367281913757324, |
|
"eval_runtime": 206.3249, |
|
"eval_samples_per_second": 30.408, |
|
"eval_steps_per_second": 3.805, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1986, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.222033977508545, |
|
"eval_runtime": 205.0156, |
|
"eval_samples_per_second": 30.603, |
|
"eval_steps_per_second": 3.829, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1927, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.2064921855926514, |
|
"eval_runtime": 205.4048, |
|
"eval_samples_per_second": 30.545, |
|
"eval_steps_per_second": 3.822, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1762, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.1937856674194336, |
|
"eval_runtime": 204.8721, |
|
"eval_samples_per_second": 30.624, |
|
"eval_steps_per_second": 3.832, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1584, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.1781294345855713, |
|
"eval_runtime": 204.973, |
|
"eval_samples_per_second": 30.609, |
|
"eval_steps_per_second": 3.83, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1481, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.1652047634124756, |
|
"eval_runtime": 204.8186, |
|
"eval_samples_per_second": 30.632, |
|
"eval_steps_per_second": 3.833, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1553, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.1539299488067627, |
|
"eval_runtime": 204.9017, |
|
"eval_samples_per_second": 30.62, |
|
"eval_steps_per_second": 3.831, |
|
"step": 2400 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 7.009593112553472e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|