robiulawaldev's picture
Training in progress, step 200, checkpoint
0c37f9b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.009028733945782453,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 4.5143669728912266e-05,
"eval_loss": 2.5635130405426025,
"eval_runtime": 298.4496,
"eval_samples_per_second": 31.252,
"eval_steps_per_second": 7.814,
"step": 1
},
{
"epoch": 0.00045143669728912265,
"grad_norm": 0.6829887628555298,
"learning_rate": 0.0002,
"loss": 2.3672,
"step": 10
},
{
"epoch": 0.0009028733945782453,
"grad_norm": 0.6255219578742981,
"learning_rate": 0.0002,
"loss": 2.174,
"step": 20
},
{
"epoch": 0.001354310091867368,
"grad_norm": 0.6357229948043823,
"learning_rate": 0.0002,
"loss": 2.1169,
"step": 30
},
{
"epoch": 0.0018057467891564906,
"grad_norm": 0.6617687940597534,
"learning_rate": 0.0002,
"loss": 2.2879,
"step": 40
},
{
"epoch": 0.0022571834864456132,
"grad_norm": 0.6272353529930115,
"learning_rate": 0.0002,
"loss": 2.219,
"step": 50
},
{
"epoch": 0.0022571834864456132,
"eval_loss": 2.1563453674316406,
"eval_runtime": 297.2114,
"eval_samples_per_second": 31.382,
"eval_steps_per_second": 7.846,
"step": 50
},
{
"epoch": 0.002708620183734736,
"grad_norm": 0.6150534749031067,
"learning_rate": 0.0002,
"loss": 2.0668,
"step": 60
},
{
"epoch": 0.0031600568810238585,
"grad_norm": 0.629759669303894,
"learning_rate": 0.0002,
"loss": 2.1185,
"step": 70
},
{
"epoch": 0.003611493578312981,
"grad_norm": 0.5716665983200073,
"learning_rate": 0.0002,
"loss": 2.1695,
"step": 80
},
{
"epoch": 0.004062930275602104,
"grad_norm": 0.8046770691871643,
"learning_rate": 0.0002,
"loss": 2.002,
"step": 90
},
{
"epoch": 0.0045143669728912265,
"grad_norm": 0.5744296312332153,
"learning_rate": 0.0002,
"loss": 2.1034,
"step": 100
},
{
"epoch": 0.0045143669728912265,
"eval_loss": 2.102687120437622,
"eval_runtime": 296.9029,
"eval_samples_per_second": 31.414,
"eval_steps_per_second": 7.854,
"step": 100
},
{
"epoch": 0.004965803670180349,
"grad_norm": 0.6123510599136353,
"learning_rate": 0.0002,
"loss": 2.0783,
"step": 110
},
{
"epoch": 0.005417240367469472,
"grad_norm": 0.6725351214408875,
"learning_rate": 0.0002,
"loss": 2.0282,
"step": 120
},
{
"epoch": 0.005868677064758594,
"grad_norm": 0.6317930221557617,
"learning_rate": 0.0002,
"loss": 2.1056,
"step": 130
},
{
"epoch": 0.006320113762047717,
"grad_norm": 0.6830048561096191,
"learning_rate": 0.0002,
"loss": 2.063,
"step": 140
},
{
"epoch": 0.00677155045933684,
"grad_norm": 0.6954776644706726,
"learning_rate": 0.0002,
"loss": 1.9812,
"step": 150
},
{
"epoch": 0.00677155045933684,
"eval_loss": 2.0730745792388916,
"eval_runtime": 296.9969,
"eval_samples_per_second": 31.404,
"eval_steps_per_second": 7.852,
"step": 150
},
{
"epoch": 0.007222987156625962,
"grad_norm": 0.6882342100143433,
"learning_rate": 0.0002,
"loss": 2.0299,
"step": 160
},
{
"epoch": 0.007674423853915085,
"grad_norm": 0.6809920072555542,
"learning_rate": 0.0002,
"loss": 2.1699,
"step": 170
},
{
"epoch": 0.008125860551204208,
"grad_norm": 0.6971638798713684,
"learning_rate": 0.0002,
"loss": 1.9678,
"step": 180
},
{
"epoch": 0.00857729724849333,
"grad_norm": 0.6451826691627502,
"learning_rate": 0.0002,
"loss": 2.0033,
"step": 190
},
{
"epoch": 0.009028733945782453,
"grad_norm": 0.739595890045166,
"learning_rate": 0.0002,
"loss": 2.037,
"step": 200
},
{
"epoch": 0.009028733945782453,
"eval_loss": 2.0487308502197266,
"eval_runtime": 297.0738,
"eval_samples_per_second": 31.396,
"eval_steps_per_second": 7.85,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.890390469967872e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}