csikasote's picture
End of training
bd94250 verified
raw
history blame
11.3 kB
{
"best_metric": Infinity,
"best_model_checkpoint": null,
"epoch": 3.8174178762414055,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15278838808250572,
"grad_norm": 10.146588325500488,
"learning_rate": 0.000285,
"loss": 14.705,
"step": 100
},
{
"epoch": 0.15278838808250572,
"eval_loss": Infinity,
"eval_runtime": 65.1562,
"eval_samples_per_second": 14.059,
"eval_steps_per_second": 3.515,
"eval_wer": 1.0003503035964503,
"step": 100
},
{
"epoch": 0.30557677616501144,
"grad_norm": 5.455625057220459,
"learning_rate": 0.00028812499999999997,
"loss": 6.0879,
"step": 200
},
{
"epoch": 0.30557677616501144,
"eval_loss": Infinity,
"eval_runtime": 65.1167,
"eval_samples_per_second": 14.067,
"eval_steps_per_second": 3.517,
"eval_wer": 1.0203176085941148,
"step": 200
},
{
"epoch": 0.45836516424751717,
"grad_norm": 5.808646202087402,
"learning_rate": 0.00027562499999999994,
"loss": 5.4042,
"step": 300
},
{
"epoch": 0.45836516424751717,
"eval_loss": Infinity,
"eval_runtime": 65.2917,
"eval_samples_per_second": 14.029,
"eval_steps_per_second": 3.507,
"eval_wer": 1.0159971975712283,
"step": 300
},
{
"epoch": 0.6111535523300229,
"grad_norm": 2.3630435466766357,
"learning_rate": 0.00026312499999999996,
"loss": 2.8156,
"step": 400
},
{
"epoch": 0.6111535523300229,
"eval_loss": Infinity,
"eval_runtime": 65.5406,
"eval_samples_per_second": 13.976,
"eval_steps_per_second": 3.494,
"eval_wer": 0.6057916861279776,
"step": 400
},
{
"epoch": 0.7639419404125286,
"grad_norm": 11.420530319213867,
"learning_rate": 0.000250625,
"loss": 1.9111,
"step": 500
},
{
"epoch": 0.7639419404125286,
"eval_loss": Infinity,
"eval_runtime": 65.6804,
"eval_samples_per_second": 13.946,
"eval_steps_per_second": 3.487,
"eval_wer": 0.5723960765997198,
"step": 500
},
{
"epoch": 0.9167303284950343,
"grad_norm": 6.228967189788818,
"learning_rate": 0.00023812499999999997,
"loss": 1.82,
"step": 600
},
{
"epoch": 0.9167303284950343,
"eval_loss": Infinity,
"eval_runtime": 65.0545,
"eval_samples_per_second": 14.081,
"eval_steps_per_second": 3.52,
"eval_wer": 0.5643390938813638,
"step": 600
},
{
"epoch": 1.0687547746371275,
"grad_norm": 5.563028812408447,
"learning_rate": 0.00022562499999999997,
"loss": 1.6294,
"step": 700
},
{
"epoch": 1.0687547746371275,
"eval_loss": Infinity,
"eval_runtime": 65.1677,
"eval_samples_per_second": 14.056,
"eval_steps_per_second": 3.514,
"eval_wer": 0.5685427370387669,
"step": 700
},
{
"epoch": 1.2215431627196334,
"grad_norm": 2.7232375144958496,
"learning_rate": 0.000213125,
"loss": 1.6856,
"step": 800
},
{
"epoch": 1.2215431627196334,
"eval_loss": Infinity,
"eval_runtime": 66.032,
"eval_samples_per_second": 13.872,
"eval_steps_per_second": 3.468,
"eval_wer": 0.5530126109294722,
"step": 800
},
{
"epoch": 1.374331550802139,
"grad_norm": 2.6046690940856934,
"learning_rate": 0.00020062499999999996,
"loss": 1.6363,
"step": 900
},
{
"epoch": 1.374331550802139,
"eval_loss": Infinity,
"eval_runtime": 65.9815,
"eval_samples_per_second": 13.883,
"eval_steps_per_second": 3.471,
"eval_wer": 0.5500934142923868,
"step": 900
},
{
"epoch": 1.5271199388846448,
"grad_norm": 2.4681789875030518,
"learning_rate": 0.00018812499999999998,
"loss": 1.5114,
"step": 1000
},
{
"epoch": 1.5271199388846448,
"eval_loss": Infinity,
"eval_runtime": 65.3495,
"eval_samples_per_second": 14.017,
"eval_steps_per_second": 3.504,
"eval_wer": 0.5416861279775805,
"step": 1000
},
{
"epoch": 1.6799083269671504,
"grad_norm": 2.219026803970337,
"learning_rate": 0.000175625,
"loss": 1.5417,
"step": 1100
},
{
"epoch": 1.6799083269671504,
"eval_loss": Infinity,
"eval_runtime": 65.2544,
"eval_samples_per_second": 14.037,
"eval_steps_per_second": 3.509,
"eval_wer": 0.5358477347034096,
"step": 1100
},
{
"epoch": 1.8326967150496563,
"grad_norm": 7.856187343597412,
"learning_rate": 0.00016312499999999997,
"loss": 1.6518,
"step": 1200
},
{
"epoch": 1.8326967150496563,
"eval_loss": Infinity,
"eval_runtime": 66.3581,
"eval_samples_per_second": 13.804,
"eval_steps_per_second": 3.451,
"eval_wer": 0.5337459131247081,
"step": 1200
},
{
"epoch": 1.985485103132162,
"grad_norm": 3.3855655193328857,
"learning_rate": 0.000150625,
"loss": 1.4795,
"step": 1300
},
{
"epoch": 1.985485103132162,
"eval_loss": Infinity,
"eval_runtime": 65.8957,
"eval_samples_per_second": 13.901,
"eval_steps_per_second": 3.475,
"eval_wer": 0.5291919663708547,
"step": 1300
},
{
"epoch": 2.137509549274255,
"grad_norm": 3.592418670654297,
"learning_rate": 0.00013812499999999998,
"loss": 1.5822,
"step": 1400
},
{
"epoch": 2.137509549274255,
"eval_loss": Infinity,
"eval_runtime": 65.4473,
"eval_samples_per_second": 13.996,
"eval_steps_per_second": 3.499,
"eval_wer": 0.5277907519850538,
"step": 1400
},
{
"epoch": 2.290297937356761,
"grad_norm": 5.870262622833252,
"learning_rate": 0.000125625,
"loss": 1.4938,
"step": 1500
},
{
"epoch": 2.290297937356761,
"eval_loss": Infinity,
"eval_runtime": 65.3633,
"eval_samples_per_second": 14.014,
"eval_steps_per_second": 3.503,
"eval_wer": 0.5193834656702475,
"step": 1500
},
{
"epoch": 2.4430863254392667,
"grad_norm": 2.455545663833618,
"learning_rate": 0.00011312499999999999,
"loss": 1.5701,
"step": 1600
},
{
"epoch": 2.4430863254392667,
"eval_loss": Infinity,
"eval_runtime": 66.1226,
"eval_samples_per_second": 13.853,
"eval_steps_per_second": 3.463,
"eval_wer": 0.5352638953759925,
"step": 1600
},
{
"epoch": 2.5958747135217726,
"grad_norm": 2.66916823387146,
"learning_rate": 0.00010062499999999998,
"loss": 1.47,
"step": 1700
},
{
"epoch": 2.5958747135217726,
"eval_loss": Infinity,
"eval_runtime": 66.22,
"eval_samples_per_second": 13.833,
"eval_steps_per_second": 3.458,
"eval_wer": 0.5183325548808968,
"step": 1700
},
{
"epoch": 2.748663101604278,
"grad_norm": 7.257411956787109,
"learning_rate": 8.8125e-05,
"loss": 1.4109,
"step": 1800
},
{
"epoch": 2.748663101604278,
"eval_loss": Infinity,
"eval_runtime": 65.3542,
"eval_samples_per_second": 14.016,
"eval_steps_per_second": 3.504,
"eval_wer": 0.5342129845866418,
"step": 1800
},
{
"epoch": 2.901451489686784,
"grad_norm": 3.3290886878967285,
"learning_rate": 7.5625e-05,
"loss": 1.3993,
"step": 1900
},
{
"epoch": 2.901451489686784,
"eval_loss": Infinity,
"eval_runtime": 65.5447,
"eval_samples_per_second": 13.975,
"eval_steps_per_second": 3.494,
"eval_wer": 0.5175151798225128,
"step": 1900
},
{
"epoch": 3.053475935828877,
"grad_norm": 2.5621745586395264,
"learning_rate": 6.312499999999999e-05,
"loss": 1.4848,
"step": 2000
},
{
"epoch": 3.053475935828877,
"eval_loss": Infinity,
"eval_runtime": 66.0221,
"eval_samples_per_second": 13.874,
"eval_steps_per_second": 3.469,
"eval_wer": 0.5177487155534797,
"step": 2000
},
{
"epoch": 3.2062643239113826,
"grad_norm": 5.282195568084717,
"learning_rate": 5.0625e-05,
"loss": 1.4331,
"step": 2100
},
{
"epoch": 3.2062643239113826,
"eval_loss": Infinity,
"eval_runtime": 66.1067,
"eval_samples_per_second": 13.856,
"eval_steps_per_second": 3.464,
"eval_wer": 0.5286081270434376,
"step": 2100
},
{
"epoch": 3.3590527119938884,
"grad_norm": 4.0164008140563965,
"learning_rate": 3.812499999999999e-05,
"loss": 1.4392,
"step": 2200
},
{
"epoch": 3.3590527119938884,
"eval_loss": Infinity,
"eval_runtime": 65.662,
"eval_samples_per_second": 13.95,
"eval_steps_per_second": 3.488,
"eval_wer": 0.5212517515179822,
"step": 2200
},
{
"epoch": 3.5118411000763943,
"grad_norm": 8.766704559326172,
"learning_rate": 2.5625e-05,
"loss": 1.3324,
"step": 2300
},
{
"epoch": 3.5118411000763943,
"eval_loss": Infinity,
"eval_runtime": 65.6317,
"eval_samples_per_second": 13.957,
"eval_steps_per_second": 3.489,
"eval_wer": 0.5161139654367118,
"step": 2300
},
{
"epoch": 3.6646294881588997,
"grad_norm": 2.9708027839660645,
"learning_rate": 1.3124999999999999e-05,
"loss": 1.501,
"step": 2400
},
{
"epoch": 3.6646294881588997,
"eval_loss": Infinity,
"eval_runtime": 66.0549,
"eval_samples_per_second": 13.867,
"eval_steps_per_second": 3.467,
"eval_wer": 0.5159971975712284,
"step": 2400
},
{
"epoch": 3.8174178762414055,
"grad_norm": 3.6458466053009033,
"learning_rate": 6.249999999999999e-07,
"loss": 1.3526,
"step": 2500
},
{
"epoch": 3.8174178762414055,
"eval_loss": Infinity,
"eval_runtime": 66.6457,
"eval_samples_per_second": 13.744,
"eval_steps_per_second": 3.436,
"eval_wer": 0.5163475011676787,
"step": 2500
},
{
"epoch": 3.8174178762414055,
"step": 2500,
"total_flos": 1.2788709685256737e+19,
"train_loss": 2.4539583984375,
"train_runtime": 5128.7055,
"train_samples_per_second": 3.9,
"train_steps_per_second": 0.487
}
],
"logging_steps": 100,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2788709685256737e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}