{ "best_metric": Infinity, "best_model_checkpoint": null, "epoch": 3.8174178762414055, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15278838808250572, "grad_norm": 10.146588325500488, "learning_rate": 0.000285, "loss": 14.705, "step": 100 }, { "epoch": 0.15278838808250572, "eval_loss": Infinity, "eval_runtime": 65.1562, "eval_samples_per_second": 14.059, "eval_steps_per_second": 3.515, "eval_wer": 1.0003503035964503, "step": 100 }, { "epoch": 0.30557677616501144, "grad_norm": 5.455625057220459, "learning_rate": 0.00028812499999999997, "loss": 6.0879, "step": 200 }, { "epoch": 0.30557677616501144, "eval_loss": Infinity, "eval_runtime": 65.1167, "eval_samples_per_second": 14.067, "eval_steps_per_second": 3.517, "eval_wer": 1.0203176085941148, "step": 200 }, { "epoch": 0.45836516424751717, "grad_norm": 5.808646202087402, "learning_rate": 0.00027562499999999994, "loss": 5.4042, "step": 300 }, { "epoch": 0.45836516424751717, "eval_loss": Infinity, "eval_runtime": 65.2917, "eval_samples_per_second": 14.029, "eval_steps_per_second": 3.507, "eval_wer": 1.0159971975712283, "step": 300 }, { "epoch": 0.6111535523300229, "grad_norm": 2.3630435466766357, "learning_rate": 0.00026312499999999996, "loss": 2.8156, "step": 400 }, { "epoch": 0.6111535523300229, "eval_loss": Infinity, "eval_runtime": 65.5406, "eval_samples_per_second": 13.976, "eval_steps_per_second": 3.494, "eval_wer": 0.6057916861279776, "step": 400 }, { "epoch": 0.7639419404125286, "grad_norm": 11.420530319213867, "learning_rate": 0.000250625, "loss": 1.9111, "step": 500 }, { "epoch": 0.7639419404125286, "eval_loss": Infinity, "eval_runtime": 65.6804, "eval_samples_per_second": 13.946, "eval_steps_per_second": 3.487, "eval_wer": 0.5723960765997198, "step": 500 }, { "epoch": 0.9167303284950343, "grad_norm": 6.228967189788818, "learning_rate": 0.00023812499999999997, "loss": 1.82, "step": 600 }, { "epoch": 0.9167303284950343, "eval_loss": Infinity, "eval_runtime": 65.0545, "eval_samples_per_second": 14.081, "eval_steps_per_second": 3.52, "eval_wer": 0.5643390938813638, "step": 600 }, { "epoch": 1.0687547746371275, "grad_norm": 5.563028812408447, "learning_rate": 0.00022562499999999997, "loss": 1.6294, "step": 700 }, { "epoch": 1.0687547746371275, "eval_loss": Infinity, "eval_runtime": 65.1677, "eval_samples_per_second": 14.056, "eval_steps_per_second": 3.514, "eval_wer": 0.5685427370387669, "step": 700 }, { "epoch": 1.2215431627196334, "grad_norm": 2.7232375144958496, "learning_rate": 0.000213125, "loss": 1.6856, "step": 800 }, { "epoch": 1.2215431627196334, "eval_loss": Infinity, "eval_runtime": 66.032, "eval_samples_per_second": 13.872, "eval_steps_per_second": 3.468, "eval_wer": 0.5530126109294722, "step": 800 }, { "epoch": 1.374331550802139, "grad_norm": 2.6046690940856934, "learning_rate": 0.00020062499999999996, "loss": 1.6363, "step": 900 }, { "epoch": 1.374331550802139, "eval_loss": Infinity, "eval_runtime": 65.9815, "eval_samples_per_second": 13.883, "eval_steps_per_second": 3.471, "eval_wer": 0.5500934142923868, "step": 900 }, { "epoch": 1.5271199388846448, "grad_norm": 2.4681789875030518, "learning_rate": 0.00018812499999999998, "loss": 1.5114, "step": 1000 }, { "epoch": 1.5271199388846448, "eval_loss": Infinity, "eval_runtime": 65.3495, "eval_samples_per_second": 14.017, "eval_steps_per_second": 3.504, "eval_wer": 0.5416861279775805, "step": 1000 }, { "epoch": 1.6799083269671504, "grad_norm": 2.219026803970337, "learning_rate": 0.000175625, "loss": 1.5417, "step": 1100 }, { "epoch": 1.6799083269671504, "eval_loss": Infinity, "eval_runtime": 65.2544, "eval_samples_per_second": 14.037, "eval_steps_per_second": 3.509, "eval_wer": 0.5358477347034096, "step": 1100 }, { "epoch": 1.8326967150496563, "grad_norm": 7.856187343597412, "learning_rate": 0.00016312499999999997, "loss": 1.6518, "step": 1200 }, { "epoch": 1.8326967150496563, "eval_loss": Infinity, "eval_runtime": 66.3581, "eval_samples_per_second": 13.804, "eval_steps_per_second": 3.451, "eval_wer": 0.5337459131247081, "step": 1200 }, { "epoch": 1.985485103132162, "grad_norm": 3.3855655193328857, "learning_rate": 0.000150625, "loss": 1.4795, "step": 1300 }, { "epoch": 1.985485103132162, "eval_loss": Infinity, "eval_runtime": 65.8957, "eval_samples_per_second": 13.901, "eval_steps_per_second": 3.475, "eval_wer": 0.5291919663708547, "step": 1300 }, { "epoch": 2.137509549274255, "grad_norm": 3.592418670654297, "learning_rate": 0.00013812499999999998, "loss": 1.5822, "step": 1400 }, { "epoch": 2.137509549274255, "eval_loss": Infinity, "eval_runtime": 65.4473, "eval_samples_per_second": 13.996, "eval_steps_per_second": 3.499, "eval_wer": 0.5277907519850538, "step": 1400 }, { "epoch": 2.290297937356761, "grad_norm": 5.870262622833252, "learning_rate": 0.000125625, "loss": 1.4938, "step": 1500 }, { "epoch": 2.290297937356761, "eval_loss": Infinity, "eval_runtime": 65.3633, "eval_samples_per_second": 14.014, "eval_steps_per_second": 3.503, "eval_wer": 0.5193834656702475, "step": 1500 }, { "epoch": 2.4430863254392667, "grad_norm": 2.455545663833618, "learning_rate": 0.00011312499999999999, "loss": 1.5701, "step": 1600 }, { "epoch": 2.4430863254392667, "eval_loss": Infinity, "eval_runtime": 66.1226, "eval_samples_per_second": 13.853, "eval_steps_per_second": 3.463, "eval_wer": 0.5352638953759925, "step": 1600 }, { "epoch": 2.5958747135217726, "grad_norm": 2.66916823387146, "learning_rate": 0.00010062499999999998, "loss": 1.47, "step": 1700 }, { "epoch": 2.5958747135217726, "eval_loss": Infinity, "eval_runtime": 66.22, "eval_samples_per_second": 13.833, "eval_steps_per_second": 3.458, "eval_wer": 0.5183325548808968, "step": 1700 }, { "epoch": 2.748663101604278, "grad_norm": 7.257411956787109, "learning_rate": 8.8125e-05, "loss": 1.4109, "step": 1800 }, { "epoch": 2.748663101604278, "eval_loss": Infinity, "eval_runtime": 65.3542, "eval_samples_per_second": 14.016, "eval_steps_per_second": 3.504, "eval_wer": 0.5342129845866418, "step": 1800 }, { "epoch": 2.901451489686784, "grad_norm": 3.3290886878967285, "learning_rate": 7.5625e-05, "loss": 1.3993, "step": 1900 }, { "epoch": 2.901451489686784, "eval_loss": Infinity, "eval_runtime": 65.5447, "eval_samples_per_second": 13.975, "eval_steps_per_second": 3.494, "eval_wer": 0.5175151798225128, "step": 1900 }, { "epoch": 3.053475935828877, "grad_norm": 2.5621745586395264, "learning_rate": 6.312499999999999e-05, "loss": 1.4848, "step": 2000 }, { "epoch": 3.053475935828877, "eval_loss": Infinity, "eval_runtime": 66.0221, "eval_samples_per_second": 13.874, "eval_steps_per_second": 3.469, "eval_wer": 0.5177487155534797, "step": 2000 }, { "epoch": 3.2062643239113826, "grad_norm": 5.282195568084717, "learning_rate": 5.0625e-05, "loss": 1.4331, "step": 2100 }, { "epoch": 3.2062643239113826, "eval_loss": Infinity, "eval_runtime": 66.1067, "eval_samples_per_second": 13.856, "eval_steps_per_second": 3.464, "eval_wer": 0.5286081270434376, "step": 2100 }, { "epoch": 3.3590527119938884, "grad_norm": 4.0164008140563965, "learning_rate": 3.812499999999999e-05, "loss": 1.4392, "step": 2200 }, { "epoch": 3.3590527119938884, "eval_loss": Infinity, "eval_runtime": 65.662, "eval_samples_per_second": 13.95, "eval_steps_per_second": 3.488, "eval_wer": 0.5212517515179822, "step": 2200 }, { "epoch": 3.5118411000763943, "grad_norm": 8.766704559326172, "learning_rate": 2.5625e-05, "loss": 1.3324, "step": 2300 }, { "epoch": 3.5118411000763943, "eval_loss": Infinity, "eval_runtime": 65.6317, "eval_samples_per_second": 13.957, "eval_steps_per_second": 3.489, "eval_wer": 0.5161139654367118, "step": 2300 }, { "epoch": 3.6646294881588997, "grad_norm": 2.9708027839660645, "learning_rate": 1.3124999999999999e-05, "loss": 1.501, "step": 2400 }, { "epoch": 3.6646294881588997, "eval_loss": Infinity, "eval_runtime": 66.0549, "eval_samples_per_second": 13.867, "eval_steps_per_second": 3.467, "eval_wer": 0.5159971975712284, "step": 2400 }, { "epoch": 3.8174178762414055, "grad_norm": 3.6458466053009033, "learning_rate": 6.249999999999999e-07, "loss": 1.3526, "step": 2500 }, { "epoch": 3.8174178762414055, "eval_loss": Infinity, "eval_runtime": 66.6457, "eval_samples_per_second": 13.744, "eval_steps_per_second": 3.436, "eval_wer": 0.5163475011676787, "step": 2500 }, { "epoch": 3.8174178762414055, "step": 2500, "total_flos": 1.2788709685256737e+19, "train_loss": 2.4539583984375, "train_runtime": 5128.7055, "train_samples_per_second": 3.9, "train_steps_per_second": 0.487 } ], "logging_steps": 100, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2788709685256737e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }