{ "best_metric": 19.095815786035196, "best_model_checkpoint": "./Graduation_Project_Whisper_base/checkpoint-2400", "epoch": 1.9994079336885733, "eval_steps": 400, "global_step": 2532, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03764351590438547, "grad_norm": 503138.9375, "learning_rate": 1.0000000000000002e-06, "loss": 0.5445, "step": 50 }, { "epoch": 0.07528703180877094, "grad_norm": 226777.984375, "learning_rate": 2.0000000000000003e-06, "loss": 0.1677, "step": 100 }, { "epoch": 0.11293054771315642, "grad_norm": 162510.234375, "learning_rate": 3e-06, "loss": 0.1051, "step": 150 }, { "epoch": 0.1505740636175419, "grad_norm": 163651.140625, "learning_rate": 4.000000000000001e-06, "loss": 0.0785, "step": 200 }, { "epoch": 0.18821757952192736, "grad_norm": 146978.328125, "learning_rate": 5e-06, "loss": 0.0657, "step": 250 }, { "epoch": 0.22586109542631283, "grad_norm": 131384.75, "learning_rate": 6e-06, "loss": 0.054, "step": 300 }, { "epoch": 0.2635046113306983, "grad_norm": 127696.390625, "learning_rate": 7e-06, "loss": 0.0488, "step": 350 }, { "epoch": 0.3011481272350838, "grad_norm": 138088.9375, "learning_rate": 8.000000000000001e-06, "loss": 0.0425, "step": 400 }, { "epoch": 0.3011481272350838, "eval_cer": 11.295171257365732, "eval_loss": 0.03295731544494629, "eval_runtime": 3971.8715, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.152, "eval_wer": 37.69008874996231, "step": 400 }, { "epoch": 0.33879164313946925, "grad_norm": 127706.0390625, "learning_rate": 9e-06, "loss": 0.0375, "step": 450 }, { "epoch": 0.3764351590438547, "grad_norm": 119614.671875, "learning_rate": 1e-05, "loss": 0.034, "step": 500 }, { "epoch": 0.4140786749482402, "grad_norm": 125161.125, "learning_rate": 9.91029509614553e-06, "loss": 0.0328, "step": 550 }, { "epoch": 0.45172219085262566, "grad_norm": 111814.9609375, "learning_rate": 9.644399172492337e-06, "loss": 0.0271, "step": 600 }, { "epoch": 0.48936570675701113, "grad_norm": 147685.515625, "learning_rate": 9.211853096347059e-06, "loss": 0.0298, "step": 650 }, { "epoch": 0.5270092226613966, "grad_norm": 169561.140625, "learning_rate": 8.628177469378995e-06, "loss": 0.0302, "step": 700 }, { "epoch": 0.564652738565782, "grad_norm": 72348.1328125, "learning_rate": 7.914315717987892e-06, "loss": 0.0235, "step": 750 }, { "epoch": 0.6022962544701675, "grad_norm": 95024.140625, "learning_rate": 7.095882602083321e-06, "loss": 0.0258, "step": 800 }, { "epoch": 0.6022962544701675, "eval_cer": 7.953864927074945, "eval_loss": 0.021815981715917587, "eval_runtime": 3808.1602, "eval_samples_per_second": 2.533, "eval_steps_per_second": 0.158, "eval_wer": 25.474153960580143, "step": 800 }, { "epoch": 0.639939770374553, "grad_norm": 77923.2734375, "learning_rate": 6.2022451072546926e-06, "loss": 0.0225, "step": 850 }, { "epoch": 0.6775832862789385, "grad_norm": 128029.6015625, "learning_rate": 5.265468699723748e-06, "loss": 0.024, "step": 900 }, { "epoch": 0.7152268021833239, "grad_norm": 132533.578125, "learning_rate": 4.319166754518768e-06, "loss": 0.0234, "step": 950 }, { "epoch": 0.7528703180877094, "grad_norm": 92432.9453125, "learning_rate": 3.397294441644515e-06, "loss": 0.0199, "step": 1000 }, { "epoch": 0.7905138339920948, "grad_norm": 94760.9453125, "learning_rate": 2.5329303479779855e-06, "loss": 0.0224, "step": 1050 }, { "epoch": 0.8281573498964804, "grad_norm": 75528.8984375, "learning_rate": 1.7570895526862202e-06, "loss": 0.0204, "step": 1100 }, { "epoch": 0.8658008658008658, "grad_norm": 125463.9296875, "learning_rate": 1.0976107453484314e-06, "loss": 0.0199, "step": 1150 }, { "epoch": 0.9034443817052513, "grad_norm": 139042.265625, "learning_rate": 5.781573191671386e-07, "loss": 0.0197, "step": 1200 }, { "epoch": 0.9034443817052513, "eval_cer": 7.018806706804402, "eval_loss": 0.01941610500216484, "eval_runtime": 3809.2886, "eval_samples_per_second": 2.532, "eval_steps_per_second": 0.158, "eval_wer": 22.336244760937955, "step": 1200 }, { "epoch": 0.9410878976096367, "grad_norm": 87157.8125, "learning_rate": 2.1736828200332628e-07, "loss": 0.0207, "step": 1250 }, { "epoch": 0.9787314135140223, "grad_norm": 90153.140625, "learning_rate": 2.8189452213207014e-08, "loss": 0.0195, "step": 1300 }, { "epoch": 1.0663114268798106, "grad_norm": 81696.2734375, "learning_rate": 6.269188506178019e-06, "loss": 0.0116, "step": 1350 }, { "epoch": 1.1057825143082691, "grad_norm": 81981.46875, "learning_rate": 5.891915195166075e-06, "loss": 0.0106, "step": 1400 }, { "epoch": 1.1452536017367279, "grad_norm": 76380.390625, "learning_rate": 5.509314676457187e-06, "loss": 0.0101, "step": 1450 }, { "epoch": 1.1847246891651866, "grad_norm": 70844.5625, "learning_rate": 5.123672136399975e-06, "loss": 0.0092, "step": 1500 }, { "epoch": 1.224195776593645, "grad_norm": 75742.8828125, "learning_rate": 4.737290930648428e-06, "loss": 0.0082, "step": 1550 }, { "epoch": 1.2636668640221038, "grad_norm": 55820.46484375, "learning_rate": 4.352478826739623e-06, "loss": 0.0083, "step": 1600 }, { "epoch": 1.2636668640221038, "eval_cer": 6.160355666025902, "eval_loss": 0.01826481893658638, "eval_runtime": 4336.2114, "eval_samples_per_second": 2.225, "eval_steps_per_second": 0.139, "eval_wer": 20.22252821806559, "step": 1600 }, { "epoch": 1.3031379514505623, "grad_norm": 73017.6015625, "learning_rate": 3.971534220320291e-06, "loss": 0.0085, "step": 1650 }, { "epoch": 1.342609038879021, "grad_norm": 47336.1015625, "learning_rate": 3.5967324073495363e-06, "loss": 0.0081, "step": 1700 }, { "epoch": 1.3820801263074798, "grad_norm": 79073.7421875, "learning_rate": 3.2303119942707794e-06, "loss": 0.0075, "step": 1750 }, { "epoch": 1.4215512137359383, "grad_norm": 68802.984375, "learning_rate": 2.8744615273218836e-06, "loss": 0.0074, "step": 1800 }, { "epoch": 1.461022301164397, "grad_norm": 60333.73828125, "learning_rate": 2.531306420843542e-06, "loss": 0.007, "step": 1850 }, { "epoch": 1.5004933885928557, "grad_norm": 52213.80859375, "learning_rate": 2.2028962626602346e-06, "loss": 0.007, "step": 1900 }, { "epoch": 1.5399644760213143, "grad_norm": 48230.20703125, "learning_rate": 1.8911925723557807e-06, "loss": 0.0068, "step": 1950 }, { "epoch": 1.5794355634497732, "grad_norm": 49029.17578125, "learning_rate": 1.5980570855606025e-06, "loss": 0.0066, "step": 2000 }, { "epoch": 1.5794355634497732, "eval_cer": 5.946899128741567, "eval_loss": 0.01755833439528942, "eval_runtime": 4292.0414, "eval_samples_per_second": 2.248, "eval_steps_per_second": 0.14, "eval_wer": 19.420461741027005, "step": 2000 }, { "epoch": 1.6189066508782317, "grad_norm": 70595.109375, "learning_rate": 1.3252406342259527e-06, "loss": 0.0069, "step": 2050 }, { "epoch": 1.6583777383066902, "grad_norm": 50325.43359375, "learning_rate": 1.0743726893007257e-06, "loss": 0.0065, "step": 2100 }, { "epoch": 1.6978488257351492, "grad_norm": 45970.44921875, "learning_rate": 8.469516282700979e-07, "loss": 0.0067, "step": 2150 }, { "epoch": 1.7373199131636077, "grad_norm": 70741.96875, "learning_rate": 6.443357856857563e-07, "loss": 0.0067, "step": 2200 }, { "epoch": 1.7767910005920662, "grad_norm": 51143.1953125, "learning_rate": 4.677353401408974e-07, "loss": 0.0063, "step": 2250 }, { "epoch": 1.816262088020525, "grad_norm": 58438.66796875, "learning_rate": 3.182050861472541e-07, "loss": 0.0067, "step": 2300 }, { "epoch": 1.8557331754489836, "grad_norm": 65700.25, "learning_rate": 1.9663813408607845e-07, "loss": 0.0065, "step": 2350 }, { "epoch": 1.8952042628774421, "grad_norm": 52580.59765625, "learning_rate": 1.0376057586187538e-07, "loss": 0.0065, "step": 2400 }, { "epoch": 1.8952042628774421, "eval_cer": 5.860923579002043, "eval_loss": 0.017402183264493942, "eval_runtime": 4269.4409, "eval_samples_per_second": 2.26, "eval_steps_per_second": 0.141, "eval_wer": 19.095815786035196, "step": 2400 }, { "epoch": 1.9346753503059009, "grad_norm": 53811.92578125, "learning_rate": 4.012714811970464e-08, "loss": 0.0059, "step": 2450 }, { "epoch": 1.9741464377343596, "grad_norm": 51623.82421875, "learning_rate": 6.117918928693623e-09, "loss": 0.0066, "step": 2500 }, { "epoch": 1.9994079336885733, "step": 2532, "total_flos": 1.051043265773568e+19, "train_loss": 0.003555825636332257, "train_runtime": 25310.9727, "train_samples_per_second": 6.405, "train_steps_per_second": 0.1 } ], "logging_steps": 50, "max_steps": 2532, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.051043265773568e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }