|
{ |
|
"best_metric": Infinity, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8811136192626035, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07524454477050414, |
|
"grad_norm": 8.513701438903809, |
|
"learning_rate": 0.000285, |
|
"loss": 13.9528, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07524454477050414, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.8079, |
|
"eval_samples_per_second": 15.418, |
|
"eval_steps_per_second": 3.863, |
|
"eval_wer": 1.043132803632236, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1504890895410083, |
|
"grad_norm": 6.828241348266602, |
|
"learning_rate": 0.00028812499999999997, |
|
"loss": 6.1846, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1504890895410083, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.7299, |
|
"eval_samples_per_second": 15.457, |
|
"eval_steps_per_second": 3.872, |
|
"eval_wer": 1.0029511918274687, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22573363431151242, |
|
"grad_norm": 7.705227851867676, |
|
"learning_rate": 0.00027562499999999994, |
|
"loss": 5.4651, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22573363431151242, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.7705, |
|
"eval_samples_per_second": 15.437, |
|
"eval_steps_per_second": 3.867, |
|
"eval_wer": 1.0385925085130534, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3009781790820166, |
|
"grad_norm": 6.132575988769531, |
|
"learning_rate": 0.00026312499999999996, |
|
"loss": 4.4356, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3009781790820166, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.8056, |
|
"eval_samples_per_second": 15.419, |
|
"eval_steps_per_second": 3.863, |
|
"eval_wer": 0.8830874006810443, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3762227238525207, |
|
"grad_norm": 3.9721577167510986, |
|
"learning_rate": 0.000250625, |
|
"loss": 2.2016, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3762227238525207, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.8903, |
|
"eval_samples_per_second": 15.377, |
|
"eval_steps_per_second": 3.852, |
|
"eval_wer": 0.6217934165720772, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.45146726862302483, |
|
"grad_norm": 4.83721923828125, |
|
"learning_rate": 0.00023812499999999997, |
|
"loss": 1.8013, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45146726862302483, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.8946, |
|
"eval_samples_per_second": 15.375, |
|
"eval_steps_per_second": 3.852, |
|
"eval_wer": 0.5745743473325766, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.526711813393529, |
|
"grad_norm": 5.4762725830078125, |
|
"learning_rate": 0.00022562499999999997, |
|
"loss": 1.7499, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.526711813393529, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.6173, |
|
"eval_samples_per_second": 15.514, |
|
"eval_steps_per_second": 3.887, |
|
"eval_wer": 0.5793416572077185, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6019563581640331, |
|
"grad_norm": 5.545216083526611, |
|
"learning_rate": 0.000213125, |
|
"loss": 1.6979, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6019563581640331, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.6746, |
|
"eval_samples_per_second": 15.485, |
|
"eval_steps_per_second": 3.879, |
|
"eval_wer": 0.5500567536889898, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6772009029345373, |
|
"grad_norm": 6.604818344116211, |
|
"learning_rate": 0.00020062499999999996, |
|
"loss": 1.5567, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6772009029345373, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 31.0923, |
|
"eval_samples_per_second": 15.277, |
|
"eval_steps_per_second": 3.827, |
|
"eval_wer": 0.5439273552780931, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7524454477050414, |
|
"grad_norm": 5.870357036590576, |
|
"learning_rate": 0.00018824999999999997, |
|
"loss": 1.6301, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7524454477050414, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.7668, |
|
"eval_samples_per_second": 15.439, |
|
"eval_steps_per_second": 3.868, |
|
"eval_wer": 0.535527809307605, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8276899924755455, |
|
"grad_norm": 20.379985809326172, |
|
"learning_rate": 0.00017575, |
|
"loss": 1.6362, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8276899924755455, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.7224, |
|
"eval_samples_per_second": 15.461, |
|
"eval_steps_per_second": 3.873, |
|
"eval_wer": 0.5366628830874007, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9029345372460497, |
|
"grad_norm": 7.096904277801514, |
|
"learning_rate": 0.00016324999999999998, |
|
"loss": 1.5247, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9029345372460497, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.6145, |
|
"eval_samples_per_second": 15.516, |
|
"eval_steps_per_second": 3.887, |
|
"eval_wer": 0.5325766174801362, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9781790820165538, |
|
"grad_norm": 6.06346321105957, |
|
"learning_rate": 0.00015074999999999998, |
|
"loss": 1.4012, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9781790820165538, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.9705, |
|
"eval_samples_per_second": 15.337, |
|
"eval_steps_per_second": 3.842, |
|
"eval_wer": 0.5346197502837684, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.053423626787058, |
|
"grad_norm": 5.044532775878906, |
|
"learning_rate": 0.00013824999999999997, |
|
"loss": 1.6397, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.053423626787058, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.897, |
|
"eval_samples_per_second": 15.374, |
|
"eval_steps_per_second": 3.852, |
|
"eval_wer": 0.5300794551645857, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1286681715575622, |
|
"grad_norm": 4.185507297515869, |
|
"learning_rate": 0.00012575, |
|
"loss": 1.5258, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1286681715575622, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.6085, |
|
"eval_samples_per_second": 15.519, |
|
"eval_steps_per_second": 3.888, |
|
"eval_wer": 0.5284903518728717, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2039127163280663, |
|
"grad_norm": 2.0165092945098877, |
|
"learning_rate": 0.00011324999999999999, |
|
"loss": 1.4144, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.2039127163280663, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.8112, |
|
"eval_samples_per_second": 15.416, |
|
"eval_steps_per_second": 3.862, |
|
"eval_wer": 0.5244040862656073, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.2791572610985704, |
|
"grad_norm": 6.2198486328125, |
|
"learning_rate": 0.00010074999999999998, |
|
"loss": 1.4363, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2791572610985704, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 31.0265, |
|
"eval_samples_per_second": 15.31, |
|
"eval_steps_per_second": 3.835, |
|
"eval_wer": 0.5144154370034052, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.3544018058690745, |
|
"grad_norm": 6.625217437744141, |
|
"learning_rate": 8.837499999999998e-05, |
|
"loss": 1.3733, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3544018058690745, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 31.0253, |
|
"eval_samples_per_second": 15.31, |
|
"eval_steps_per_second": 3.836, |
|
"eval_wer": 0.5357548240635641, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4296463506395787, |
|
"grad_norm": 3.523829460144043, |
|
"learning_rate": 7.5875e-05, |
|
"loss": 1.4592, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.4296463506395787, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.9124, |
|
"eval_samples_per_second": 15.366, |
|
"eval_steps_per_second": 3.85, |
|
"eval_wer": 0.5598183881952327, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.5048908954100828, |
|
"grad_norm": 8.395014762878418, |
|
"learning_rate": 6.3375e-05, |
|
"loss": 1.3499, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5048908954100828, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.7712, |
|
"eval_samples_per_second": 15.437, |
|
"eval_steps_per_second": 3.867, |
|
"eval_wer": 0.5191827468785472, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.580135440180587, |
|
"grad_norm": 2.0930612087249756, |
|
"learning_rate": 5.0874999999999997e-05, |
|
"loss": 1.4039, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.580135440180587, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 31.265, |
|
"eval_samples_per_second": 15.193, |
|
"eval_steps_per_second": 3.806, |
|
"eval_wer": 0.5228149829738933, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.655379984951091, |
|
"grad_norm": 1.5763949155807495, |
|
"learning_rate": 3.8375e-05, |
|
"loss": 1.4057, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.655379984951091, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.8864, |
|
"eval_samples_per_second": 15.379, |
|
"eval_steps_per_second": 3.853, |
|
"eval_wer": 0.52894438138479, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.7306245297215952, |
|
"grad_norm": 2.9080910682678223, |
|
"learning_rate": 2.5874999999999995e-05, |
|
"loss": 1.4961, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.7306245297215952, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 31.0242, |
|
"eval_samples_per_second": 15.311, |
|
"eval_steps_per_second": 3.836, |
|
"eval_wer": 0.5323496027241771, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.8058690744920993, |
|
"grad_norm": 3.506730079650879, |
|
"learning_rate": 1.3375e-05, |
|
"loss": 1.3975, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.8058690744920993, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 30.8502, |
|
"eval_samples_per_second": 15.397, |
|
"eval_steps_per_second": 3.857, |
|
"eval_wer": 0.5119182746878547, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.8811136192626035, |
|
"grad_norm": 12.351927757263184, |
|
"learning_rate": 8.75e-07, |
|
"loss": 1.4725, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8811136192626035, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 31.0235, |
|
"eval_samples_per_second": 15.311, |
|
"eval_steps_per_second": 3.836, |
|
"eval_wer": 0.5259931895573212, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8811136192626035, |
|
"step": 2500, |
|
"total_flos": 1.2622443564523827e+19, |
|
"train_loss": 2.5044897521972658, |
|
"train_runtime": 3534.0194, |
|
"train_samples_per_second": 5.659, |
|
"train_steps_per_second": 0.707 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2622443564523827e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|