|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.01876348625574632, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.381743127873159e-05, |
|
"eval_loss": 0.9324500560760498, |
|
"eval_runtime": 245.1562, |
|
"eval_samples_per_second": 9.153, |
|
"eval_steps_per_second": 4.577, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009381743127873159, |
|
"grad_norm": 1.0036320686340332, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5169, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0018763486255746317, |
|
"grad_norm": 0.7501591444015503, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1409, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0028145229383619475, |
|
"grad_norm": 0.8483043909072876, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9805, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0037526972511492634, |
|
"grad_norm": 0.5633870363235474, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1368, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00469087156393658, |
|
"grad_norm": 0.8526553511619568, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0437, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00469087156393658, |
|
"eval_loss": 0.5282271504402161, |
|
"eval_runtime": 245.0343, |
|
"eval_samples_per_second": 9.158, |
|
"eval_steps_per_second": 4.579, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005629045876723895, |
|
"grad_norm": 0.6110089421272278, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9785, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.006567220189511211, |
|
"grad_norm": 0.6589618921279907, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2064, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.007505394502298527, |
|
"grad_norm": 0.6540957093238831, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0105, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.008443568815085843, |
|
"grad_norm": 0.7634976506233215, |
|
"learning_rate": 0.0002, |
|
"loss": 0.97, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00938174312787316, |
|
"grad_norm": 0.7584695816040039, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9857, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00938174312787316, |
|
"eval_loss": 0.5102854371070862, |
|
"eval_runtime": 245.1252, |
|
"eval_samples_per_second": 9.155, |
|
"eval_steps_per_second": 4.577, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.010319917440660475, |
|
"grad_norm": 0.5788394808769226, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0154, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01125809175344779, |
|
"grad_norm": 0.5604102611541748, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1134, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.012196266066235107, |
|
"grad_norm": 0.836513876914978, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1753, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.013134440379022422, |
|
"grad_norm": 1.2483829259872437, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0936, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.014072614691809739, |
|
"grad_norm": 0.7146385312080383, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0279, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.014072614691809739, |
|
"eval_loss": 0.5074164867401123, |
|
"eval_runtime": 245.1114, |
|
"eval_samples_per_second": 9.155, |
|
"eval_steps_per_second": 4.578, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.015010789004597054, |
|
"grad_norm": 0.8109822869300842, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0838, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01594896331738437, |
|
"grad_norm": 0.43211817741394043, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0516, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.016887137630171686, |
|
"grad_norm": 0.5124821662902832, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9819, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.017825311942959002, |
|
"grad_norm": 2.5899274349212646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.987, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01876348625574632, |
|
"grad_norm": 0.7276327610015869, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9722, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01876348625574632, |
|
"eval_loss": 0.4974622130393982, |
|
"eval_runtime": 245.2117, |
|
"eval_samples_per_second": 9.151, |
|
"eval_steps_per_second": 4.576, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.4339615678464e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|