{ "best_metric": Infinity, "best_model_checkpoint": null, "epoch": 1.8811136192626035, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07524454477050414, "grad_norm": 8.513701438903809, "learning_rate": 0.000285, "loss": 13.9528, "step": 100 }, { "epoch": 0.07524454477050414, "eval_loss": Infinity, "eval_runtime": 30.8079, "eval_samples_per_second": 15.418, "eval_steps_per_second": 3.863, "eval_wer": 1.043132803632236, "step": 100 }, { "epoch": 0.1504890895410083, "grad_norm": 6.828241348266602, "learning_rate": 0.00028812499999999997, "loss": 6.1846, "step": 200 }, { "epoch": 0.1504890895410083, "eval_loss": Infinity, "eval_runtime": 30.7299, "eval_samples_per_second": 15.457, "eval_steps_per_second": 3.872, "eval_wer": 1.0029511918274687, "step": 200 }, { "epoch": 0.22573363431151242, "grad_norm": 7.705227851867676, "learning_rate": 0.00027562499999999994, "loss": 5.4651, "step": 300 }, { "epoch": 0.22573363431151242, "eval_loss": Infinity, "eval_runtime": 30.7705, "eval_samples_per_second": 15.437, "eval_steps_per_second": 3.867, "eval_wer": 1.0385925085130534, "step": 300 }, { "epoch": 0.3009781790820166, "grad_norm": 6.132575988769531, "learning_rate": 0.00026312499999999996, "loss": 4.4356, "step": 400 }, { "epoch": 0.3009781790820166, "eval_loss": Infinity, "eval_runtime": 30.8056, "eval_samples_per_second": 15.419, "eval_steps_per_second": 3.863, "eval_wer": 0.8830874006810443, "step": 400 }, { "epoch": 0.3762227238525207, "grad_norm": 3.9721577167510986, "learning_rate": 0.000250625, "loss": 2.2016, "step": 500 }, { "epoch": 0.3762227238525207, "eval_loss": Infinity, "eval_runtime": 30.8903, "eval_samples_per_second": 15.377, "eval_steps_per_second": 3.852, "eval_wer": 0.6217934165720772, "step": 500 }, { "epoch": 0.45146726862302483, "grad_norm": 4.83721923828125, "learning_rate": 0.00023812499999999997, "loss": 1.8013, "step": 600 }, { "epoch": 0.45146726862302483, "eval_loss": Infinity, "eval_runtime": 30.8946, "eval_samples_per_second": 15.375, "eval_steps_per_second": 3.852, "eval_wer": 0.5745743473325766, "step": 600 }, { "epoch": 0.526711813393529, "grad_norm": 5.4762725830078125, "learning_rate": 0.00022562499999999997, "loss": 1.7499, "step": 700 }, { "epoch": 0.526711813393529, "eval_loss": Infinity, "eval_runtime": 30.6173, "eval_samples_per_second": 15.514, "eval_steps_per_second": 3.887, "eval_wer": 0.5793416572077185, "step": 700 }, { "epoch": 0.6019563581640331, "grad_norm": 5.545216083526611, "learning_rate": 0.000213125, "loss": 1.6979, "step": 800 }, { "epoch": 0.6019563581640331, "eval_loss": Infinity, "eval_runtime": 30.6746, "eval_samples_per_second": 15.485, "eval_steps_per_second": 3.879, "eval_wer": 0.5500567536889898, "step": 800 }, { "epoch": 0.6772009029345373, "grad_norm": 6.604818344116211, "learning_rate": 0.00020062499999999996, "loss": 1.5567, "step": 900 }, { "epoch": 0.6772009029345373, "eval_loss": Infinity, "eval_runtime": 31.0923, "eval_samples_per_second": 15.277, "eval_steps_per_second": 3.827, "eval_wer": 0.5439273552780931, "step": 900 }, { "epoch": 0.7524454477050414, "grad_norm": 5.870357036590576, "learning_rate": 0.00018824999999999997, "loss": 1.6301, "step": 1000 }, { "epoch": 0.7524454477050414, "eval_loss": Infinity, "eval_runtime": 30.7668, "eval_samples_per_second": 15.439, "eval_steps_per_second": 3.868, "eval_wer": 0.535527809307605, "step": 1000 }, { "epoch": 0.8276899924755455, "grad_norm": 20.379985809326172, "learning_rate": 0.00017575, "loss": 1.6362, "step": 1100 }, { "epoch": 0.8276899924755455, "eval_loss": Infinity, "eval_runtime": 30.7224, "eval_samples_per_second": 15.461, "eval_steps_per_second": 3.873, "eval_wer": 0.5366628830874007, "step": 1100 }, { "epoch": 0.9029345372460497, "grad_norm": 7.096904277801514, "learning_rate": 0.00016324999999999998, "loss": 1.5247, "step": 1200 }, { "epoch": 0.9029345372460497, "eval_loss": Infinity, "eval_runtime": 30.6145, "eval_samples_per_second": 15.516, "eval_steps_per_second": 3.887, "eval_wer": 0.5325766174801362, "step": 1200 }, { "epoch": 0.9781790820165538, "grad_norm": 6.06346321105957, "learning_rate": 0.00015074999999999998, "loss": 1.4012, "step": 1300 }, { "epoch": 0.9781790820165538, "eval_loss": Infinity, "eval_runtime": 30.9705, "eval_samples_per_second": 15.337, "eval_steps_per_second": 3.842, "eval_wer": 0.5346197502837684, "step": 1300 }, { "epoch": 1.053423626787058, "grad_norm": 5.044532775878906, "learning_rate": 0.00013824999999999997, "loss": 1.6397, "step": 1400 }, { "epoch": 1.053423626787058, "eval_loss": Infinity, "eval_runtime": 30.897, "eval_samples_per_second": 15.374, "eval_steps_per_second": 3.852, "eval_wer": 0.5300794551645857, "step": 1400 }, { "epoch": 1.1286681715575622, "grad_norm": 4.185507297515869, "learning_rate": 0.00012575, "loss": 1.5258, "step": 1500 }, { "epoch": 1.1286681715575622, "eval_loss": Infinity, "eval_runtime": 30.6085, "eval_samples_per_second": 15.519, "eval_steps_per_second": 3.888, "eval_wer": 0.5284903518728717, "step": 1500 }, { "epoch": 1.2039127163280663, "grad_norm": 2.0165092945098877, "learning_rate": 0.00011324999999999999, "loss": 1.4144, "step": 1600 }, { "epoch": 1.2039127163280663, "eval_loss": Infinity, "eval_runtime": 30.8112, "eval_samples_per_second": 15.416, "eval_steps_per_second": 3.862, "eval_wer": 0.5244040862656073, "step": 1600 }, { "epoch": 1.2791572610985704, "grad_norm": 6.2198486328125, "learning_rate": 0.00010074999999999998, "loss": 1.4363, "step": 1700 }, { "epoch": 1.2791572610985704, "eval_loss": Infinity, "eval_runtime": 31.0265, "eval_samples_per_second": 15.31, "eval_steps_per_second": 3.835, "eval_wer": 0.5144154370034052, "step": 1700 }, { "epoch": 1.3544018058690745, "grad_norm": 6.625217437744141, "learning_rate": 8.837499999999998e-05, "loss": 1.3733, "step": 1800 }, { "epoch": 1.3544018058690745, "eval_loss": Infinity, "eval_runtime": 31.0253, "eval_samples_per_second": 15.31, "eval_steps_per_second": 3.836, "eval_wer": 0.5357548240635641, "step": 1800 }, { "epoch": 1.4296463506395787, "grad_norm": 3.523829460144043, "learning_rate": 7.5875e-05, "loss": 1.4592, "step": 1900 }, { "epoch": 1.4296463506395787, "eval_loss": Infinity, "eval_runtime": 30.9124, "eval_samples_per_second": 15.366, "eval_steps_per_second": 3.85, "eval_wer": 0.5598183881952327, "step": 1900 }, { "epoch": 1.5048908954100828, "grad_norm": 8.395014762878418, "learning_rate": 6.3375e-05, "loss": 1.3499, "step": 2000 }, { "epoch": 1.5048908954100828, "eval_loss": Infinity, "eval_runtime": 30.7712, "eval_samples_per_second": 15.437, "eval_steps_per_second": 3.867, "eval_wer": 0.5191827468785472, "step": 2000 }, { "epoch": 1.580135440180587, "grad_norm": 2.0930612087249756, "learning_rate": 5.0874999999999997e-05, "loss": 1.4039, "step": 2100 }, { "epoch": 1.580135440180587, "eval_loss": Infinity, "eval_runtime": 31.265, "eval_samples_per_second": 15.193, "eval_steps_per_second": 3.806, "eval_wer": 0.5228149829738933, "step": 2100 }, { "epoch": 1.655379984951091, "grad_norm": 1.5763949155807495, "learning_rate": 3.8375e-05, "loss": 1.4057, "step": 2200 }, { "epoch": 1.655379984951091, "eval_loss": Infinity, "eval_runtime": 30.8864, "eval_samples_per_second": 15.379, "eval_steps_per_second": 3.853, "eval_wer": 0.52894438138479, "step": 2200 }, { "epoch": 1.7306245297215952, "grad_norm": 2.9080910682678223, "learning_rate": 2.5874999999999995e-05, "loss": 1.4961, "step": 2300 }, { "epoch": 1.7306245297215952, "eval_loss": Infinity, "eval_runtime": 31.0242, "eval_samples_per_second": 15.311, "eval_steps_per_second": 3.836, "eval_wer": 0.5323496027241771, "step": 2300 }, { "epoch": 1.8058690744920993, "grad_norm": 3.506730079650879, "learning_rate": 1.3375e-05, "loss": 1.3975, "step": 2400 }, { "epoch": 1.8058690744920993, "eval_loss": Infinity, "eval_runtime": 30.8502, "eval_samples_per_second": 15.397, "eval_steps_per_second": 3.857, "eval_wer": 0.5119182746878547, "step": 2400 }, { "epoch": 1.8811136192626035, "grad_norm": 12.351927757263184, "learning_rate": 8.75e-07, "loss": 1.4725, "step": 2500 }, { "epoch": 1.8811136192626035, "eval_loss": Infinity, "eval_runtime": 31.0235, "eval_samples_per_second": 15.311, "eval_steps_per_second": 3.836, "eval_wer": 0.5259931895573212, "step": 2500 }, { "epoch": 1.8811136192626035, "step": 2500, "total_flos": 1.2622443564523827e+19, "train_loss": 2.5044897521972658, "train_runtime": 3534.0194, "train_samples_per_second": 5.659, "train_steps_per_second": 0.707 } ], "logging_steps": 100, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2622443564523827e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }