|
{ |
|
"best_metric": Infinity, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.8174178762414055, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15278838808250572, |
|
"grad_norm": 10.146588325500488, |
|
"learning_rate": 0.000285, |
|
"loss": 14.705, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15278838808250572, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.1562, |
|
"eval_samples_per_second": 14.059, |
|
"eval_steps_per_second": 3.515, |
|
"eval_wer": 1.0003503035964503, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.30557677616501144, |
|
"grad_norm": 5.455625057220459, |
|
"learning_rate": 0.00028812499999999997, |
|
"loss": 6.0879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.30557677616501144, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.1167, |
|
"eval_samples_per_second": 14.067, |
|
"eval_steps_per_second": 3.517, |
|
"eval_wer": 1.0203176085941148, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45836516424751717, |
|
"grad_norm": 5.808646202087402, |
|
"learning_rate": 0.00027562499999999994, |
|
"loss": 5.4042, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45836516424751717, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.2917, |
|
"eval_samples_per_second": 14.029, |
|
"eval_steps_per_second": 3.507, |
|
"eval_wer": 1.0159971975712283, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6111535523300229, |
|
"grad_norm": 2.3630435466766357, |
|
"learning_rate": 0.00026312499999999996, |
|
"loss": 2.8156, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6111535523300229, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.5406, |
|
"eval_samples_per_second": 13.976, |
|
"eval_steps_per_second": 3.494, |
|
"eval_wer": 0.6057916861279776, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7639419404125286, |
|
"grad_norm": 11.420530319213867, |
|
"learning_rate": 0.000250625, |
|
"loss": 1.9111, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7639419404125286, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.6804, |
|
"eval_samples_per_second": 13.946, |
|
"eval_steps_per_second": 3.487, |
|
"eval_wer": 0.5723960765997198, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9167303284950343, |
|
"grad_norm": 6.228967189788818, |
|
"learning_rate": 0.00023812499999999997, |
|
"loss": 1.82, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9167303284950343, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.0545, |
|
"eval_samples_per_second": 14.081, |
|
"eval_steps_per_second": 3.52, |
|
"eval_wer": 0.5643390938813638, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0687547746371275, |
|
"grad_norm": 5.563028812408447, |
|
"learning_rate": 0.00022562499999999997, |
|
"loss": 1.6294, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0687547746371275, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.1677, |
|
"eval_samples_per_second": 14.056, |
|
"eval_steps_per_second": 3.514, |
|
"eval_wer": 0.5685427370387669, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2215431627196334, |
|
"grad_norm": 2.7232375144958496, |
|
"learning_rate": 0.000213125, |
|
"loss": 1.6856, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2215431627196334, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 66.032, |
|
"eval_samples_per_second": 13.872, |
|
"eval_steps_per_second": 3.468, |
|
"eval_wer": 0.5530126109294722, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.374331550802139, |
|
"grad_norm": 2.6046690940856934, |
|
"learning_rate": 0.00020062499999999996, |
|
"loss": 1.6363, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.374331550802139, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.9815, |
|
"eval_samples_per_second": 13.883, |
|
"eval_steps_per_second": 3.471, |
|
"eval_wer": 0.5500934142923868, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5271199388846448, |
|
"grad_norm": 2.4681789875030518, |
|
"learning_rate": 0.00018812499999999998, |
|
"loss": 1.5114, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5271199388846448, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.3495, |
|
"eval_samples_per_second": 14.017, |
|
"eval_steps_per_second": 3.504, |
|
"eval_wer": 0.5416861279775805, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6799083269671504, |
|
"grad_norm": 2.219026803970337, |
|
"learning_rate": 0.000175625, |
|
"loss": 1.5417, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6799083269671504, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.2544, |
|
"eval_samples_per_second": 14.037, |
|
"eval_steps_per_second": 3.509, |
|
"eval_wer": 0.5358477347034096, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8326967150496563, |
|
"grad_norm": 7.856187343597412, |
|
"learning_rate": 0.00016312499999999997, |
|
"loss": 1.6518, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8326967150496563, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 66.3581, |
|
"eval_samples_per_second": 13.804, |
|
"eval_steps_per_second": 3.451, |
|
"eval_wer": 0.5337459131247081, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.985485103132162, |
|
"grad_norm": 3.3855655193328857, |
|
"learning_rate": 0.000150625, |
|
"loss": 1.4795, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.985485103132162, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.8957, |
|
"eval_samples_per_second": 13.901, |
|
"eval_steps_per_second": 3.475, |
|
"eval_wer": 0.5291919663708547, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.137509549274255, |
|
"grad_norm": 3.592418670654297, |
|
"learning_rate": 0.00013812499999999998, |
|
"loss": 1.5822, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.137509549274255, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.4473, |
|
"eval_samples_per_second": 13.996, |
|
"eval_steps_per_second": 3.499, |
|
"eval_wer": 0.5277907519850538, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.290297937356761, |
|
"grad_norm": 5.870262622833252, |
|
"learning_rate": 0.000125625, |
|
"loss": 1.4938, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.290297937356761, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.3633, |
|
"eval_samples_per_second": 14.014, |
|
"eval_steps_per_second": 3.503, |
|
"eval_wer": 0.5193834656702475, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4430863254392667, |
|
"grad_norm": 2.455545663833618, |
|
"learning_rate": 0.00011312499999999999, |
|
"loss": 1.5701, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4430863254392667, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 66.1226, |
|
"eval_samples_per_second": 13.853, |
|
"eval_steps_per_second": 3.463, |
|
"eval_wer": 0.5352638953759925, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.5958747135217726, |
|
"grad_norm": 2.66916823387146, |
|
"learning_rate": 0.00010062499999999998, |
|
"loss": 1.47, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.5958747135217726, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 66.22, |
|
"eval_samples_per_second": 13.833, |
|
"eval_steps_per_second": 3.458, |
|
"eval_wer": 0.5183325548808968, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.748663101604278, |
|
"grad_norm": 7.257411956787109, |
|
"learning_rate": 8.8125e-05, |
|
"loss": 1.4109, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.748663101604278, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.3542, |
|
"eval_samples_per_second": 14.016, |
|
"eval_steps_per_second": 3.504, |
|
"eval_wer": 0.5342129845866418, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.901451489686784, |
|
"grad_norm": 3.3290886878967285, |
|
"learning_rate": 7.5625e-05, |
|
"loss": 1.3993, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.901451489686784, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.5447, |
|
"eval_samples_per_second": 13.975, |
|
"eval_steps_per_second": 3.494, |
|
"eval_wer": 0.5175151798225128, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.053475935828877, |
|
"grad_norm": 2.5621745586395264, |
|
"learning_rate": 6.312499999999999e-05, |
|
"loss": 1.4848, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.053475935828877, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 66.0221, |
|
"eval_samples_per_second": 13.874, |
|
"eval_steps_per_second": 3.469, |
|
"eval_wer": 0.5177487155534797, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2062643239113826, |
|
"grad_norm": 5.282195568084717, |
|
"learning_rate": 5.0625e-05, |
|
"loss": 1.4331, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.2062643239113826, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 66.1067, |
|
"eval_samples_per_second": 13.856, |
|
"eval_steps_per_second": 3.464, |
|
"eval_wer": 0.5286081270434376, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.3590527119938884, |
|
"grad_norm": 4.0164008140563965, |
|
"learning_rate": 3.812499999999999e-05, |
|
"loss": 1.4392, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.3590527119938884, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.662, |
|
"eval_samples_per_second": 13.95, |
|
"eval_steps_per_second": 3.488, |
|
"eval_wer": 0.5212517515179822, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.5118411000763943, |
|
"grad_norm": 8.766704559326172, |
|
"learning_rate": 2.5625e-05, |
|
"loss": 1.3324, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.5118411000763943, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.6317, |
|
"eval_samples_per_second": 13.957, |
|
"eval_steps_per_second": 3.489, |
|
"eval_wer": 0.5161139654367118, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.6646294881588997, |
|
"grad_norm": 2.9708027839660645, |
|
"learning_rate": 1.3124999999999999e-05, |
|
"loss": 1.501, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.6646294881588997, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 66.0549, |
|
"eval_samples_per_second": 13.867, |
|
"eval_steps_per_second": 3.467, |
|
"eval_wer": 0.5159971975712284, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.8174178762414055, |
|
"grad_norm": 3.6458466053009033, |
|
"learning_rate": 6.249999999999999e-07, |
|
"loss": 1.3526, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.8174178762414055, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 66.6457, |
|
"eval_samples_per_second": 13.744, |
|
"eval_steps_per_second": 3.436, |
|
"eval_wer": 0.5163475011676787, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.8174178762414055, |
|
"step": 2500, |
|
"total_flos": 1.2788709685256737e+19, |
|
"train_loss": 2.4539583984375, |
|
"train_runtime": 5128.7055, |
|
"train_samples_per_second": 3.9, |
|
"train_steps_per_second": 0.487 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2788709685256737e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|