{ "best_metric": 0.9392894506454468, "best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-toigen-male-model/checkpoint-400", "epoch": 9.437054631828978, "eval_steps": 200, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2375296912114014, "grad_norm": 150.66383361816406, "learning_rate": 4.0000000000000003e-07, "loss": 14.4659, "step": 25 }, { "epoch": 0.4750593824228028, "grad_norm": 96.092529296875, "learning_rate": 9.000000000000001e-07, "loss": 11.6879, "step": 50 }, { "epoch": 0.7125890736342043, "grad_norm": 94.01177215576172, "learning_rate": 1.3800000000000001e-06, "loss": 9.6203, "step": 75 }, { "epoch": 0.9501187648456056, "grad_norm": 88.87047576904297, "learning_rate": 1.8800000000000002e-06, "loss": 7.6311, "step": 100 }, { "epoch": 1.180522565320665, "grad_norm": 77.90989685058594, "learning_rate": 2.38e-06, "loss": 6.1323, "step": 125 }, { "epoch": 1.4180522565320666, "grad_norm": 63.12504959106445, "learning_rate": 2.88e-06, "loss": 5.5927, "step": 150 }, { "epoch": 1.655581947743468, "grad_norm": 63.86695861816406, "learning_rate": 3.3800000000000007e-06, "loss": 5.0682, "step": 175 }, { "epoch": 1.8931116389548692, "grad_norm": 75.34732055664062, "learning_rate": 3.88e-06, "loss": 4.343, "step": 200 }, { "epoch": 1.8931116389548692, "eval_loss": 1.066441535949707, "eval_runtime": 125.6362, "eval_samples_per_second": 1.679, "eval_steps_per_second": 0.844, "eval_wer": 0.606701030927835, "step": 200 }, { "epoch": 2.1235154394299287, "grad_norm": 74.82162475585938, "learning_rate": 4.38e-06, "loss": 3.9738, "step": 225 }, { "epoch": 2.36104513064133, "grad_norm": 60.270687103271484, "learning_rate": 4.880000000000001e-06, "loss": 3.4967, "step": 250 }, { "epoch": 2.598574821852732, "grad_norm": 72.7863998413086, "learning_rate": 5.380000000000001e-06, "loss": 3.4653, "step": 275 }, { "epoch": 2.836104513064133, "grad_norm": 42.029544830322266, "learning_rate": 5.8800000000000005e-06, "loss": 3.0671, "step": 300 }, { "epoch": 3.0665083135391926, "grad_norm": 58.11145782470703, "learning_rate": 6.380000000000001e-06, "loss": 2.7671, "step": 325 }, { "epoch": 3.304038004750594, "grad_norm": 72.86565399169922, "learning_rate": 6.88e-06, "loss": 2.1218, "step": 350 }, { "epoch": 3.5415676959619953, "grad_norm": 29.798490524291992, "learning_rate": 7.3800000000000005e-06, "loss": 1.8895, "step": 375 }, { "epoch": 3.7790973871733966, "grad_norm": 62.0257453918457, "learning_rate": 7.88e-06, "loss": 1.9496, "step": 400 }, { "epoch": 3.7790973871733966, "eval_loss": 0.9392894506454468, "eval_runtime": 129.8196, "eval_samples_per_second": 1.625, "eval_steps_per_second": 0.817, "eval_wer": 0.5695876288659794, "step": 400 }, { "epoch": 4.009501187648456, "grad_norm": 36.066246032714844, "learning_rate": 8.380000000000001e-06, "loss": 1.9781, "step": 425 }, { "epoch": 4.247030878859857, "grad_norm": 42.63113021850586, "learning_rate": 8.880000000000001e-06, "loss": 1.0441, "step": 450 }, { "epoch": 4.484560570071259, "grad_norm": 33.34630584716797, "learning_rate": 9.38e-06, "loss": 1.1374, "step": 475 }, { "epoch": 4.72209026128266, "grad_norm": 61.29568862915039, "learning_rate": 9.88e-06, "loss": 1.0169, "step": 500 }, { "epoch": 4.959619952494061, "grad_norm": 49.31779861450195, "learning_rate": 9.957777777777779e-06, "loss": 1.3693, "step": 525 }, { "epoch": 5.190023752969121, "grad_norm": 35.42589569091797, "learning_rate": 9.902222222222223e-06, "loss": 0.7001, "step": 550 }, { "epoch": 5.427553444180522, "grad_norm": 22.584501266479492, "learning_rate": 9.846666666666668e-06, "loss": 0.5772, "step": 575 }, { "epoch": 5.665083135391924, "grad_norm": 21.677404403686523, "learning_rate": 9.791111111111112e-06, "loss": 0.702, "step": 600 }, { "epoch": 5.665083135391924, "eval_loss": 0.9410276412963867, "eval_runtime": 124.8464, "eval_samples_per_second": 1.69, "eval_steps_per_second": 0.849, "eval_wer": 0.4814432989690722, "step": 600 }, { "epoch": 5.902612826603326, "grad_norm": 27.595264434814453, "learning_rate": 9.735555555555556e-06, "loss": 0.6341, "step": 625 }, { "epoch": 6.133016627078385, "grad_norm": 15.151907920837402, "learning_rate": 9.68e-06, "loss": 0.39, "step": 650 }, { "epoch": 6.370546318289787, "grad_norm": 63.603759765625, "learning_rate": 9.624444444444445e-06, "loss": 0.3234, "step": 675 }, { "epoch": 6.608076009501188, "grad_norm": 33.50586700439453, "learning_rate": 9.56888888888889e-06, "loss": 0.2813, "step": 700 }, { "epoch": 6.845605700712589, "grad_norm": 29.78474235534668, "learning_rate": 9.513333333333334e-06, "loss": 0.3366, "step": 725 }, { "epoch": 7.076009501187649, "grad_norm": 13.300681114196777, "learning_rate": 9.457777777777778e-06, "loss": 0.2659, "step": 750 }, { "epoch": 7.31353919239905, "grad_norm": 26.387012481689453, "learning_rate": 9.402222222222222e-06, "loss": 0.1854, "step": 775 }, { "epoch": 7.551068883610451, "grad_norm": 20.7415771484375, "learning_rate": 9.346666666666666e-06, "loss": 0.2108, "step": 800 }, { "epoch": 7.551068883610451, "eval_loss": 0.9733582735061646, "eval_runtime": 125.0244, "eval_samples_per_second": 1.688, "eval_steps_per_second": 0.848, "eval_wer": 0.4551546391752577, "step": 800 }, { "epoch": 7.788598574821853, "grad_norm": 16.81197166442871, "learning_rate": 9.291111111111112e-06, "loss": 0.211, "step": 825 }, { "epoch": 8.019002375296912, "grad_norm": 8.114081382751465, "learning_rate": 9.235555555555556e-06, "loss": 0.2144, "step": 850 }, { "epoch": 8.256532066508314, "grad_norm": 7.230250835418701, "learning_rate": 9.180000000000002e-06, "loss": 0.1472, "step": 875 }, { "epoch": 8.494061757719715, "grad_norm": 34.53820037841797, "learning_rate": 9.124444444444444e-06, "loss": 0.1506, "step": 900 }, { "epoch": 8.731591448931116, "grad_norm": 20.564693450927734, "learning_rate": 9.06888888888889e-06, "loss": 0.1414, "step": 925 }, { "epoch": 8.969121140142517, "grad_norm": 24.549924850463867, "learning_rate": 9.013333333333334e-06, "loss": 0.1398, "step": 950 }, { "epoch": 9.199524940617577, "grad_norm": 9.88586139678955, "learning_rate": 8.957777777777778e-06, "loss": 0.0744, "step": 975 }, { "epoch": 9.437054631828978, "grad_norm": 7.651462554931641, "learning_rate": 8.902222222222224e-06, "loss": 0.1073, "step": 1000 }, { "epoch": 9.437054631828978, "eval_loss": 1.0236197710037231, "eval_runtime": 125.7559, "eval_samples_per_second": 1.678, "eval_steps_per_second": 0.843, "eval_wer": 0.44484536082474224, "step": 1000 }, { "epoch": 9.437054631828978, "step": 1000, "total_flos": 8.10972659515392e+18, "train_loss": 2.490273738861084, "train_runtime": 2581.6643, "train_samples_per_second": 15.494, "train_steps_per_second": 1.937 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 48, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.10972659515392e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }