robiual-awal's picture
Training in progress, step 200, checkpoint
daa25ce verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.015601841017240034,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 7.800920508620017e-05,
"eval_loss": 2.6932764053344727,
"eval_runtime": 161.6543,
"eval_samples_per_second": 33.392,
"eval_steps_per_second": 16.696,
"step": 1
},
{
"epoch": 0.0007800920508620017,
"grad_norm": 1.6918610334396362,
"learning_rate": 0.0002,
"loss": 1.6264,
"step": 10
},
{
"epoch": 0.0015601841017240034,
"grad_norm": 0.8769293427467346,
"learning_rate": 0.0002,
"loss": 0.9108,
"step": 20
},
{
"epoch": 0.002340276152586005,
"grad_norm": 0.8490325808525085,
"learning_rate": 0.0002,
"loss": 0.8986,
"step": 30
},
{
"epoch": 0.003120368203448007,
"grad_norm": 0.7544106841087341,
"learning_rate": 0.0002,
"loss": 0.837,
"step": 40
},
{
"epoch": 0.0039004602543100085,
"grad_norm": 0.8223780989646912,
"learning_rate": 0.0002,
"loss": 0.9035,
"step": 50
},
{
"epoch": 0.0039004602543100085,
"eval_loss": 0.8136085271835327,
"eval_runtime": 160.6948,
"eval_samples_per_second": 33.592,
"eval_steps_per_second": 16.796,
"step": 50
},
{
"epoch": 0.00468055230517201,
"grad_norm": 1.2069497108459473,
"learning_rate": 0.0002,
"loss": 0.7518,
"step": 60
},
{
"epoch": 0.005460644356034012,
"grad_norm": 0.8096679449081421,
"learning_rate": 0.0002,
"loss": 0.8775,
"step": 70
},
{
"epoch": 0.006240736406896014,
"grad_norm": 0.6722874045372009,
"learning_rate": 0.0002,
"loss": 0.8393,
"step": 80
},
{
"epoch": 0.007020828457758015,
"grad_norm": 0.7288264036178589,
"learning_rate": 0.0002,
"loss": 0.8743,
"step": 90
},
{
"epoch": 0.007800920508620017,
"grad_norm": 0.756664514541626,
"learning_rate": 0.0002,
"loss": 0.8471,
"step": 100
},
{
"epoch": 0.007800920508620017,
"eval_loss": 0.787709653377533,
"eval_runtime": 160.5519,
"eval_samples_per_second": 33.622,
"eval_steps_per_second": 16.811,
"step": 100
},
{
"epoch": 0.008581012559482019,
"grad_norm": 0.6181501746177673,
"learning_rate": 0.0002,
"loss": 0.8125,
"step": 110
},
{
"epoch": 0.00936110461034402,
"grad_norm": 0.9252316355705261,
"learning_rate": 0.0002,
"loss": 0.8039,
"step": 120
},
{
"epoch": 0.010141196661206022,
"grad_norm": 0.8269910216331482,
"learning_rate": 0.0002,
"loss": 0.8983,
"step": 130
},
{
"epoch": 0.010921288712068024,
"grad_norm": 0.7751689553260803,
"learning_rate": 0.0002,
"loss": 0.7934,
"step": 140
},
{
"epoch": 0.011701380762930026,
"grad_norm": 0.7328248620033264,
"learning_rate": 0.0002,
"loss": 0.7806,
"step": 150
},
{
"epoch": 0.011701380762930026,
"eval_loss": 0.7735591530799866,
"eval_runtime": 160.5021,
"eval_samples_per_second": 33.632,
"eval_steps_per_second": 16.816,
"step": 150
},
{
"epoch": 0.012481472813792027,
"grad_norm": 0.7903790473937988,
"learning_rate": 0.0002,
"loss": 0.9333,
"step": 160
},
{
"epoch": 0.013261564864654029,
"grad_norm": 0.736691951751709,
"learning_rate": 0.0002,
"loss": 0.7328,
"step": 170
},
{
"epoch": 0.01404165691551603,
"grad_norm": 0.7038111090660095,
"learning_rate": 0.0002,
"loss": 0.7207,
"step": 180
},
{
"epoch": 0.014821748966378032,
"grad_norm": 0.7238824367523193,
"learning_rate": 0.0002,
"loss": 0.7662,
"step": 190
},
{
"epoch": 0.015601841017240034,
"grad_norm": 0.7471156120300293,
"learning_rate": 0.0002,
"loss": 0.7058,
"step": 200
},
{
"epoch": 0.015601841017240034,
"eval_loss": 0.7626497149467468,
"eval_runtime": 160.42,
"eval_samples_per_second": 33.649,
"eval_steps_per_second": 16.825,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.328602305200128e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}