{ "best_metric": Infinity, "best_model_checkpoint": null, "epoch": 1.5271838729383018, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06108735491753207, "grad_norm": 5.72606897354126, "learning_rate": 0.000285, "loss": 14.5485, "step": 100 }, { "epoch": 0.06108735491753207, "eval_loss": Infinity, "eval_runtime": 63.2459, "eval_samples_per_second": 14.483, "eval_steps_per_second": 3.621, "eval_wer": 1.0038533395609528, "step": 100 }, { "epoch": 0.12217470983506414, "grad_norm": 4.610683441162109, "learning_rate": 0.00028812499999999997, "loss": 6.1502, "step": 200 }, { "epoch": 0.12217470983506414, "eval_loss": Infinity, "eval_runtime": 63.1126, "eval_samples_per_second": 14.514, "eval_steps_per_second": 3.628, "eval_wer": 1.0674918262494162, "step": 200 }, { "epoch": 0.1832620647525962, "grad_norm": 6.0751118659973145, "learning_rate": 0.00027562499999999994, "loss": 5.1685, "step": 300 }, { "epoch": 0.1832620647525962, "eval_loss": Infinity, "eval_runtime": 62.7627, "eval_samples_per_second": 14.595, "eval_steps_per_second": 3.649, "eval_wer": 1.005254553946754, "step": 300 }, { "epoch": 0.24434941967012827, "grad_norm": 6.21919059753418, "learning_rate": 0.00026312499999999996, "loss": 2.0876, "step": 400 }, { "epoch": 0.24434941967012827, "eval_loss": Infinity, "eval_runtime": 63.0153, "eval_samples_per_second": 14.536, "eval_steps_per_second": 3.634, "eval_wer": 0.5857076132648296, "step": 400 }, { "epoch": 0.30543677458766033, "grad_norm": 3.409900188446045, "learning_rate": 0.000250625, "loss": 1.7116, "step": 500 }, { "epoch": 0.30543677458766033, "eval_loss": Infinity, "eval_runtime": 63.3982, "eval_samples_per_second": 14.448, "eval_steps_per_second": 3.612, "eval_wer": 0.5758991125642223, "step": 500 }, { "epoch": 0.3665241295051924, "grad_norm": 5.974458694458008, "learning_rate": 0.00023812499999999997, "loss": 1.6505, "step": 600 }, { "epoch": 0.3665241295051924, "eval_loss": Infinity, "eval_runtime": 62.8299, "eval_samples_per_second": 14.579, "eval_steps_per_second": 3.645, "eval_wer": 0.5579168612797758, "step": 600 }, { "epoch": 0.4276114844227245, "grad_norm": 4.58453369140625, "learning_rate": 0.00022562499999999997, "loss": 1.6573, "step": 700 }, { "epoch": 0.4276114844227245, "eval_loss": Infinity, "eval_runtime": 62.8462, "eval_samples_per_second": 14.575, "eval_steps_per_second": 3.644, "eval_wer": 0.5470574497898179, "step": 700 }, { "epoch": 0.48869883934025654, "grad_norm": 3.033734083175659, "learning_rate": 0.000213125, "loss": 1.4679, "step": 800 }, { "epoch": 0.48869883934025654, "eval_loss": Infinity, "eval_runtime": 63.0713, "eval_samples_per_second": 14.523, "eval_steps_per_second": 3.631, "eval_wer": 0.5527790751985053, "step": 800 }, { "epoch": 0.5497861942577886, "grad_norm": 4.104335308074951, "learning_rate": 0.00020062499999999996, "loss": 1.4955, "step": 900 }, { "epoch": 0.5497861942577886, "eval_loss": Infinity, "eval_runtime": 63.3962, "eval_samples_per_second": 14.449, "eval_steps_per_second": 3.612, "eval_wer": 0.5368986454927603, "step": 900 }, { "epoch": 0.6108735491753207, "grad_norm": 22.828868865966797, "learning_rate": 0.00018812499999999998, "loss": 1.664, "step": 1000 }, { "epoch": 0.6108735491753207, "eval_loss": Infinity, "eval_runtime": 63.1315, "eval_samples_per_second": 14.509, "eval_steps_per_second": 3.627, "eval_wer": 0.5328117702008407, "step": 1000 }, { "epoch": 0.6719609040928528, "grad_norm": 3.1733903884887695, "learning_rate": 0.000175625, "loss": 1.61, "step": 1100 }, { "epoch": 0.6719609040928528, "eval_loss": Infinity, "eval_runtime": 63.2714, "eval_samples_per_second": 14.477, "eval_steps_per_second": 3.619, "eval_wer": 0.5335123773937412, "step": 1100 }, { "epoch": 0.7330482590103848, "grad_norm": 4.8088698387146, "learning_rate": 0.00016312499999999997, "loss": 1.6414, "step": 1200 }, { "epoch": 0.7330482590103848, "eval_loss": Infinity, "eval_runtime": 63.0546, "eval_samples_per_second": 14.527, "eval_steps_per_second": 3.632, "eval_wer": 0.5293087342363382, "step": 1200 }, { "epoch": 0.7941356139279169, "grad_norm": 3.214020252227783, "learning_rate": 0.000150625, "loss": 1.6321, "step": 1300 }, { "epoch": 0.7941356139279169, "eval_loss": Infinity, "eval_runtime": 63.7075, "eval_samples_per_second": 14.378, "eval_steps_per_second": 3.595, "eval_wer": 0.5270901447921532, "step": 1300 }, { "epoch": 0.855222968845449, "grad_norm": 4.21952486038208, "learning_rate": 0.00013812499999999998, "loss": 1.4686, "step": 1400 }, { "epoch": 0.855222968845449, "eval_loss": Infinity, "eval_runtime": 63.2487, "eval_samples_per_second": 14.483, "eval_steps_per_second": 3.621, "eval_wer": 0.5296590378327884, "step": 1400 }, { "epoch": 0.916310323762981, "grad_norm": 15.665727615356445, "learning_rate": 0.000125625, "loss": 1.5073, "step": 1500 }, { "epoch": 0.916310323762981, "eval_loss": Infinity, "eval_runtime": 63.2373, "eval_samples_per_second": 14.485, "eval_steps_per_second": 3.621, "eval_wer": 0.5325782344698738, "step": 1500 }, { "epoch": 0.9773976786805131, "grad_norm": 5.875314712524414, "learning_rate": 0.00011312499999999999, "loss": 1.6164, "step": 1600 }, { "epoch": 0.9773976786805131, "eval_loss": Infinity, "eval_runtime": 63.2705, "eval_samples_per_second": 14.478, "eval_steps_per_second": 3.619, "eval_wer": 0.5234703409621672, "step": 1600 }, { "epoch": 1.0384850335980451, "grad_norm": 3.3934853076934814, "learning_rate": 0.00010062499999999998, "loss": 1.577, "step": 1700 }, { "epoch": 1.0384850335980451, "eval_loss": Infinity, "eval_runtime": 63.8167, "eval_samples_per_second": 14.354, "eval_steps_per_second": 3.588, "eval_wer": 0.5238206445586174, "step": 1700 }, { "epoch": 1.0995723885155773, "grad_norm": 1.4915640354156494, "learning_rate": 8.8125e-05, "loss": 1.383, "step": 1800 }, { "epoch": 1.0995723885155773, "eval_loss": Infinity, "eval_runtime": 63.3959, "eval_samples_per_second": 14.449, "eval_steps_per_second": 3.612, "eval_wer": 0.5217188229799159, "step": 1800 }, { "epoch": 1.1606597434331094, "grad_norm": 2.607821464538574, "learning_rate": 7.5625e-05, "loss": 1.4391, "step": 1900 }, { "epoch": 1.1606597434331094, "eval_loss": Infinity, "eval_runtime": 63.2533, "eval_samples_per_second": 14.481, "eval_steps_per_second": 3.62, "eval_wer": 0.5291919663708547, "step": 1900 }, { "epoch": 1.2217470983506413, "grad_norm": 2.207900285720825, "learning_rate": 6.312499999999999e-05, "loss": 1.5327, "step": 2000 }, { "epoch": 1.2217470983506413, "eval_loss": Infinity, "eval_runtime": 63.6941, "eval_samples_per_second": 14.381, "eval_steps_per_second": 3.595, "eval_wer": 0.5254553946753854, "step": 2000 }, { "epoch": 1.2828344532681735, "grad_norm": 1.4653774499893188, "learning_rate": 5.0625e-05, "loss": 1.3653, "step": 2100 }, { "epoch": 1.2828344532681735, "eval_loss": Infinity, "eval_runtime": 63.8557, "eval_samples_per_second": 14.345, "eval_steps_per_second": 3.586, "eval_wer": 0.5195002335357309, "step": 2100 }, { "epoch": 1.3439218081857056, "grad_norm": 2.36387038230896, "learning_rate": 3.812499999999999e-05, "loss": 1.4901, "step": 2200 }, { "epoch": 1.3439218081857056, "eval_loss": Infinity, "eval_runtime": 63.4906, "eval_samples_per_second": 14.427, "eval_steps_per_second": 3.607, "eval_wer": 0.5186828584773471, "step": 2200 }, { "epoch": 1.4050091631032378, "grad_norm": 12.051000595092773, "learning_rate": 2.5625e-05, "loss": 1.4263, "step": 2300 }, { "epoch": 1.4050091631032378, "eval_loss": Infinity, "eval_runtime": 63.7851, "eval_samples_per_second": 14.361, "eval_steps_per_second": 3.59, "eval_wer": 0.5169313404950957, "step": 2300 }, { "epoch": 1.4660965180207697, "grad_norm": 7.876661777496338, "learning_rate": 1.3124999999999999e-05, "loss": 1.4603, "step": 2400 }, { "epoch": 1.4660965180207697, "eval_loss": Infinity, "eval_runtime": 63.6972, "eval_samples_per_second": 14.381, "eval_steps_per_second": 3.595, "eval_wer": 0.5178654834189631, "step": 2400 }, { "epoch": 1.5271838729383018, "grad_norm": 3.0910115242004395, "learning_rate": 6.249999999999999e-07, "loss": 1.4802, "step": 2500 }, { "epoch": 1.5271838729383018, "eval_loss": Infinity, "eval_runtime": 64.3534, "eval_samples_per_second": 14.234, "eval_steps_per_second": 3.558, "eval_wer": 0.5155301261092947, "step": 2500 }, { "epoch": 1.5271838729383018, "step": 2500, "total_flos": 1.2858032865257505e+19, "train_loss": 2.409264056396484, "train_runtime": 4973.4846, "train_samples_per_second": 4.021, "train_steps_per_second": 0.503 } ], "logging_steps": 100, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2858032865257505e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }