{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.91614255765199, "eval_steps": 500, "global_step": 9500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0482180293501049, "grad_norm": 0.002079850761219859, "learning_rate": 4.99e-05, "loss": 0.0303, "step": 500 }, { "epoch": 1.0482180293501049, "eval_cer": 0.025307563615524685, "eval_loss": 0.021985899657011032, "eval_runtime": 51.7047, "eval_samples_per_second": 44.213, "eval_steps_per_second": 5.531, "eval_wer": 0.0908849221420433, "step": 500 }, { "epoch": 2.0964360587002098, "grad_norm": 0.5530248880386353, "learning_rate": 4.724004424778761e-05, "loss": 0.0594, "step": 1000 }, { "epoch": 2.0964360587002098, "eval_cer": 0.025889877898862178, "eval_loss": 0.020988421514630318, "eval_runtime": 51.2495, "eval_samples_per_second": 44.605, "eval_steps_per_second": 5.581, "eval_wer": 0.09160653247246486, "step": 1000 }, { "epoch": 3.1446540880503147, "grad_norm": 1.1215722560882568, "learning_rate": 4.448008849557522e-05, "loss": 0.0767, "step": 1500 }, { "epoch": 3.1446540880503147, "eval_cer": 0.025104215770549686, "eval_loss": 0.014869533479213715, "eval_runtime": 51.4984, "eval_samples_per_second": 44.39, "eval_steps_per_second": 5.554, "eval_wer": 0.09115077857956703, "step": 1500 }, { "epoch": 4.1928721174004195, "grad_norm": 4.590636253356934, "learning_rate": 4.172566371681416e-05, "loss": 0.062, "step": 2000 }, { "epoch": 4.1928721174004195, "eval_cer": 0.02556636996367468, "eval_loss": 0.017348747700452805, "eval_runtime": 51.1673, "eval_samples_per_second": 44.677, "eval_steps_per_second": 5.59, "eval_wer": 0.0916824914546145, "step": 2000 }, { "epoch": 5.241090146750524, "grad_norm": 1.3328059911727905, "learning_rate": 3.896017699115044e-05, "loss": 0.0687, "step": 2500 }, { "epoch": 5.241090146750524, "eval_cer": 0.025741988557062178, "eval_loss": 0.018996117636561394, "eval_runtime": 51.3668, "eval_samples_per_second": 44.503, "eval_steps_per_second": 5.568, "eval_wer": 0.09183440941891378, "step": 2500 }, { "epoch": 6.289308176100629, "grad_norm": 0.1056140810251236, "learning_rate": 3.6205752212389385e-05, "loss": 0.0617, "step": 3000 }, { "epoch": 6.289308176100629, "eval_cer": 0.025242862028487186, "eval_loss": 0.01558039989322424, "eval_runtime": 51.1418, "eval_samples_per_second": 44.699, "eval_steps_per_second": 5.592, "eval_wer": 0.09149259399924041, "step": 3000 }, { "epoch": 7.337526205450734, "grad_norm": 0.9397304654121399, "learning_rate": 3.3445796460176995e-05, "loss": 0.0673, "step": 3500 }, { "epoch": 7.337526205450734, "eval_cer": 0.02564031463457468, "eval_loss": 0.018212666735053062, "eval_runtime": 51.318, "eval_samples_per_second": 44.546, "eval_steps_per_second": 5.573, "eval_wer": 0.09141663501709077, "step": 3500 }, { "epoch": 8.385744234800839, "grad_norm": 2.7481186389923096, "learning_rate": 3.0680309734513276e-05, "loss": 0.0645, "step": 4000 }, { "epoch": 8.385744234800839, "eval_cer": 0.025215132776899685, "eval_loss": 0.014686434529721737, "eval_runtime": 51.1418, "eval_samples_per_second": 44.699, "eval_steps_per_second": 5.592, "eval_wer": 0.0916824914546145, "step": 4000 }, { "epoch": 9.433962264150944, "grad_norm": 1.0427154302597046, "learning_rate": 2.7914823008849562e-05, "loss": 0.0633, "step": 4500 }, { "epoch": 9.433962264150944, "eval_cer": 0.025427723705737183, "eval_loss": 0.016345607116818428, "eval_runtime": 51.3562, "eval_samples_per_second": 44.513, "eval_steps_per_second": 5.569, "eval_wer": 0.09130269654386632, "step": 4500 }, { "epoch": 10.482180293501049, "grad_norm": 0.39028722047805786, "learning_rate": 2.515486725663717e-05, "loss": 0.0507, "step": 5000 }, { "epoch": 10.482180293501049, "eval_cer": 0.02552015454436218, "eval_loss": 0.01751078851521015, "eval_runtime": 51.3306, "eval_samples_per_second": 44.535, "eval_steps_per_second": 5.572, "eval_wer": 0.09160653247246486, "step": 5000 }, { "epoch": 11.530398322851154, "grad_norm": 0.7193567752838135, "learning_rate": 2.2389380530973453e-05, "loss": 0.064, "step": 5500 }, { "epoch": 11.530398322851154, "eval_cer": 0.025215132776899685, "eval_loss": 0.014882609248161316, "eval_runtime": 51.3252, "eval_samples_per_second": 44.54, "eval_steps_per_second": 5.572, "eval_wer": 0.09118875807064185, "step": 5500 }, { "epoch": 12.578616352201259, "grad_norm": 0.4278053641319275, "learning_rate": 1.964048672566372e-05, "loss": 0.0508, "step": 6000 }, { "epoch": 12.578616352201259, "eval_cer": 0.025150431189862186, "eval_loss": 0.015342830680310726, "eval_runtime": 51.4039, "eval_samples_per_second": 44.471, "eval_steps_per_second": 5.564, "eval_wer": 0.09092290163311811, "step": 6000 }, { "epoch": 13.626834381551364, "grad_norm": 1.9797695875167847, "learning_rate": 1.6875000000000004e-05, "loss": 0.0486, "step": 6500 }, { "epoch": 13.626834381551364, "eval_cer": 0.025178160441449684, "eval_loss": 0.016490615904331207, "eval_runtime": 51.3445, "eval_samples_per_second": 44.523, "eval_steps_per_second": 5.57, "eval_wer": 0.0910748195974174, "step": 6500 }, { "epoch": 14.675052410901468, "grad_norm": 0.6631584763526917, "learning_rate": 1.4115044247787612e-05, "loss": 0.0435, "step": 7000 }, { "epoch": 14.675052410901468, "eval_cer": 0.025058000351237186, "eval_loss": 0.01397643517702818, "eval_runtime": 51.3713, "eval_samples_per_second": 44.5, "eval_steps_per_second": 5.567, "eval_wer": 0.09080896315989366, "step": 7000 }, { "epoch": 15.723270440251572, "grad_norm": 1.2671451568603516, "learning_rate": 1.1355088495575223e-05, "loss": 0.0457, "step": 7500 }, { "epoch": 15.723270440251572, "eval_cer": 0.025048757267374686, "eval_loss": 0.014803554862737656, "eval_runtime": 51.4523, "eval_samples_per_second": 44.43, "eval_steps_per_second": 5.559, "eval_wer": 0.09061906570451957, "step": 7500 }, { "epoch": 16.771488469601678, "grad_norm": 0.5452253818511963, "learning_rate": 8.606194690265488e-06, "loss": 0.0417, "step": 8000 }, { "epoch": 16.771488469601678, "eval_cer": 0.02506724343509969, "eval_loss": 0.0149121955037117, "eval_runtime": 51.4559, "eval_samples_per_second": 44.426, "eval_steps_per_second": 5.558, "eval_wer": 0.09092290163311811, "step": 8000 }, { "epoch": 17.81970649895178, "grad_norm": 0.47779375314712524, "learning_rate": 5.8517699115044245e-06, "loss": 0.047, "step": 8500 }, { "epoch": 17.81970649895178, "eval_cer": 0.025002541848062186, "eval_loss": 0.014238793402910233, "eval_runtime": 51.42, "eval_samples_per_second": 44.457, "eval_steps_per_second": 5.562, "eval_wer": 0.09069502468666919, "step": 8500 }, { "epoch": 18.867924528301888, "grad_norm": 0.3653850555419922, "learning_rate": 3.086283185840708e-06, "loss": 0.0521, "step": 9000 }, { "epoch": 18.867924528301888, "eval_cer": 0.02491935409329969, "eval_loss": 0.013357444666326046, "eval_runtime": 51.3616, "eval_samples_per_second": 44.508, "eval_steps_per_second": 5.568, "eval_wer": 0.09073300417774402, "step": 9000 }, { "epoch": 19.91614255765199, "grad_norm": 1.0006723403930664, "learning_rate": 3.263274336283186e-07, "loss": 0.0479, "step": 9500 }, { "epoch": 19.91614255765199, "eval_cer": 0.024974812596474688, "eval_loss": 0.013586528599262238, "eval_runtime": 51.4469, "eval_samples_per_second": 44.434, "eval_steps_per_second": 5.559, "eval_wer": 0.09073300417774402, "step": 9500 } ], "logging_steps": 500, "max_steps": 9540, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.082771650621779e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }