|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.2753128555176336, |
|
"eval_steps": 1000, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11376564277588168, |
|
"grad_norm": 0.24430778622627258, |
|
"learning_rate": 9.99848701000714e-05, |
|
"loss": 1.1872, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22753128555176336, |
|
"grad_norm": 0.2623399794101715, |
|
"learning_rate": 9.945628442036389e-05, |
|
"loss": 1.0129, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3412969283276451, |
|
"grad_norm": 0.27533629536628723, |
|
"learning_rate": 9.818033542127878e-05, |
|
"loss": 0.9522, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4550625711035267, |
|
"grad_norm": 0.3079286813735962, |
|
"learning_rate": 9.61763047267894e-05, |
|
"loss": 0.9455, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5688282138794084, |
|
"grad_norm": 0.30581459403038025, |
|
"learning_rate": 9.347447643626802e-05, |
|
"loss": 0.9141, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6825938566552902, |
|
"grad_norm": 0.3370199501514435, |
|
"learning_rate": 9.011567948345395e-05, |
|
"loss": 0.9212, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7963594994311718, |
|
"grad_norm": 0.3328758776187897, |
|
"learning_rate": 8.615067064615998e-05, |
|
"loss": 0.886, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9101251422070534, |
|
"grad_norm": 0.3751530647277832, |
|
"learning_rate": 8.16393675304241e-05, |
|
"loss": 0.8991, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.023890784982935, |
|
"grad_norm": 0.33782124519348145, |
|
"learning_rate": 7.66499431199391e-05, |
|
"loss": 0.8787, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1376564277588168, |
|
"grad_norm": 0.3481363356113434, |
|
"learning_rate": 7.125779557356193e-05, |
|
"loss": 0.8458, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2514220705346986, |
|
"grad_norm": 0.41307157278060913, |
|
"learning_rate": 6.554440883890547e-05, |
|
"loss": 0.8482, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.36518771331058, |
|
"grad_norm": 0.4020177721977234, |
|
"learning_rate": 5.959612129995763e-05, |
|
"loss": 0.8467, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4789533560864618, |
|
"grad_norm": 0.3888244926929474, |
|
"learning_rate": 5.3502821066426265e-05, |
|
"loss": 0.8151, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5927189988623436, |
|
"grad_norm": 0.391117662191391, |
|
"learning_rate": 4.7356587621068075e-05, |
|
"loss": 0.822, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7064846416382253, |
|
"grad_norm": 0.4161781668663025, |
|
"learning_rate": 4.125030035187653e-05, |
|
"loss": 0.7955, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.820250284414107, |
|
"grad_norm": 0.4105525314807892, |
|
"learning_rate": 3.527623499642595e-05, |
|
"loss": 0.8151, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9340159271899886, |
|
"grad_norm": 0.41017189621925354, |
|
"learning_rate": 2.952466920833622e-05, |
|
"loss": 0.824, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.04778156996587, |
|
"grad_norm": 0.41780519485473633, |
|
"learning_rate": 2.408251831797206e-05, |
|
"loss": 0.7866, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.161547212741752, |
|
"grad_norm": 0.4392172396183014, |
|
"learning_rate": 1.903202190320803e-05, |
|
"loss": 0.7672, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.2753128555176336, |
|
"grad_norm": 0.45009684562683105, |
|
"learning_rate": 1.4449501018269717e-05, |
|
"loss": 0.763, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.2753128555176336, |
|
"eval_loss": 0.8412221074104309, |
|
"eval_runtime": 215.6811, |
|
"eval_samples_per_second": 0.923, |
|
"eval_steps_per_second": 0.923, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1317, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.7075818133428634e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|