Hanzalwi's picture
Training in progress, step 2200, checkpoint
5bf3f1f
raw
history blame
7.26 kB
{
"best_metric": 1.234089970588684,
"best_model_checkpoint": "./outputs/checkpoint-2200",
"epoch": 2.9333333333333336,
"eval_steps": 100,
"global_step": 2200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13,
"learning_rate": 0.0002,
"loss": 1.4897,
"step": 100
},
{
"epoch": 0.13,
"eval_loss": 1.3608959913253784,
"eval_runtime": 129.2301,
"eval_samples_per_second": 14.935,
"eval_steps_per_second": 1.873,
"step": 100
},
{
"epoch": 0.27,
"learning_rate": 0.0002,
"loss": 1.1623,
"step": 200
},
{
"epoch": 0.27,
"eval_loss": 1.3195548057556152,
"eval_runtime": 128.3462,
"eval_samples_per_second": 15.037,
"eval_steps_per_second": 1.886,
"step": 200
},
{
"epoch": 0.4,
"learning_rate": 0.0002,
"loss": 1.1372,
"step": 300
},
{
"epoch": 0.4,
"eval_loss": 1.3046056032180786,
"eval_runtime": 127.9116,
"eval_samples_per_second": 15.089,
"eval_steps_per_second": 1.892,
"step": 300
},
{
"epoch": 0.53,
"learning_rate": 0.0002,
"loss": 1.1256,
"step": 400
},
{
"epoch": 0.53,
"eval_loss": 1.2946586608886719,
"eval_runtime": 128.2992,
"eval_samples_per_second": 15.043,
"eval_steps_per_second": 1.886,
"step": 400
},
{
"epoch": 0.67,
"learning_rate": 0.0002,
"loss": 1.1119,
"step": 500
},
{
"epoch": 0.67,
"eval_loss": 1.2872602939605713,
"eval_runtime": 128.1326,
"eval_samples_per_second": 15.063,
"eval_steps_per_second": 1.889,
"step": 500
},
{
"epoch": 0.8,
"learning_rate": 0.0002,
"loss": 1.1139,
"step": 600
},
{
"epoch": 0.8,
"eval_loss": 1.2798593044281006,
"eval_runtime": 127.1516,
"eval_samples_per_second": 15.179,
"eval_steps_per_second": 1.903,
"step": 600
},
{
"epoch": 0.93,
"learning_rate": 0.0002,
"loss": 1.1079,
"step": 700
},
{
"epoch": 0.93,
"eval_loss": 1.2776304483413696,
"eval_runtime": 127.0627,
"eval_samples_per_second": 15.189,
"eval_steps_per_second": 1.905,
"step": 700
},
{
"epoch": 1.07,
"learning_rate": 0.0002,
"loss": 1.0976,
"step": 800
},
{
"epoch": 1.07,
"eval_loss": 1.2741276025772095,
"eval_runtime": 127.4814,
"eval_samples_per_second": 15.139,
"eval_steps_per_second": 1.898,
"step": 800
},
{
"epoch": 1.2,
"learning_rate": 0.0002,
"loss": 1.0956,
"step": 900
},
{
"epoch": 1.2,
"eval_loss": 1.2668750286102295,
"eval_runtime": 127.4471,
"eval_samples_per_second": 15.144,
"eval_steps_per_second": 1.899,
"step": 900
},
{
"epoch": 1.33,
"learning_rate": 0.0002,
"loss": 1.0927,
"step": 1000
},
{
"epoch": 1.33,
"eval_loss": 1.2661573886871338,
"eval_runtime": 127.7348,
"eval_samples_per_second": 15.109,
"eval_steps_per_second": 1.895,
"step": 1000
},
{
"epoch": 1.47,
"learning_rate": 0.0002,
"loss": 1.0881,
"step": 1100
},
{
"epoch": 1.47,
"eval_loss": 1.260953664779663,
"eval_runtime": 127.6819,
"eval_samples_per_second": 15.116,
"eval_steps_per_second": 1.895,
"step": 1100
},
{
"epoch": 1.6,
"learning_rate": 0.0002,
"loss": 1.0776,
"step": 1200
},
{
"epoch": 1.6,
"eval_loss": 1.2578165531158447,
"eval_runtime": 128.6774,
"eval_samples_per_second": 14.999,
"eval_steps_per_second": 1.881,
"step": 1200
},
{
"epoch": 1.73,
"learning_rate": 0.0002,
"loss": 1.0839,
"step": 1300
},
{
"epoch": 1.73,
"eval_loss": 1.2555885314941406,
"eval_runtime": 127.1086,
"eval_samples_per_second": 15.184,
"eval_steps_per_second": 1.904,
"step": 1300
},
{
"epoch": 1.87,
"learning_rate": 0.0002,
"loss": 1.0843,
"step": 1400
},
{
"epoch": 1.87,
"eval_loss": 1.2506641149520874,
"eval_runtime": 130.0402,
"eval_samples_per_second": 14.842,
"eval_steps_per_second": 1.861,
"step": 1400
},
{
"epoch": 2.0,
"learning_rate": 0.0002,
"loss": 1.0785,
"step": 1500
},
{
"epoch": 2.0,
"eval_loss": 1.2521893978118896,
"eval_runtime": 127.1994,
"eval_samples_per_second": 15.173,
"eval_steps_per_second": 1.903,
"step": 1500
},
{
"epoch": 2.13,
"learning_rate": 0.0002,
"loss": 1.0763,
"step": 1600
},
{
"epoch": 2.13,
"eval_loss": 1.247235655784607,
"eval_runtime": 127.1673,
"eval_samples_per_second": 15.177,
"eval_steps_per_second": 1.903,
"step": 1600
},
{
"epoch": 2.27,
"learning_rate": 0.0002,
"loss": 1.0725,
"step": 1700
},
{
"epoch": 2.27,
"eval_loss": 1.2424886226654053,
"eval_runtime": 127.0277,
"eval_samples_per_second": 15.194,
"eval_steps_per_second": 1.905,
"step": 1700
},
{
"epoch": 2.4,
"learning_rate": 0.0002,
"loss": 1.0668,
"step": 1800
},
{
"epoch": 2.4,
"eval_loss": 1.2424781322479248,
"eval_runtime": 127.5992,
"eval_samples_per_second": 15.125,
"eval_steps_per_second": 1.897,
"step": 1800
},
{
"epoch": 2.53,
"learning_rate": 0.0002,
"loss": 1.0603,
"step": 1900
},
{
"epoch": 2.53,
"eval_loss": 1.2405894994735718,
"eval_runtime": 127.1826,
"eval_samples_per_second": 15.175,
"eval_steps_per_second": 1.903,
"step": 1900
},
{
"epoch": 2.67,
"learning_rate": 0.0002,
"loss": 1.0716,
"step": 2000
},
{
"epoch": 2.67,
"eval_loss": 1.23755943775177,
"eval_runtime": 126.9688,
"eval_samples_per_second": 15.201,
"eval_steps_per_second": 1.906,
"step": 2000
},
{
"epoch": 2.8,
"learning_rate": 0.0002,
"loss": 1.0647,
"step": 2100
},
{
"epoch": 2.8,
"eval_loss": 1.2376608848571777,
"eval_runtime": 127.1015,
"eval_samples_per_second": 15.185,
"eval_steps_per_second": 1.904,
"step": 2100
},
{
"epoch": 2.93,
"learning_rate": 0.0002,
"loss": 1.0595,
"step": 2200
},
{
"epoch": 2.93,
"eval_loss": 1.234089970588684,
"eval_runtime": 126.9486,
"eval_samples_per_second": 15.203,
"eval_steps_per_second": 1.906,
"step": 2200
}
],
"logging_steps": 100,
"max_steps": 2250,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 7.800080571706982e+16,
"trial_name": null,
"trial_params": null
}