{ "best_metric": 0.2479187548160553, "best_model_checkpoint": "./mms-1b-bem-genbed-all/checkpoint-3600", "epoch": 5.0, "eval_steps": 200, "global_step": 3640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27472527472527475, "eval_loss": 0.6311588883399963, "eval_runtime": 129.4588, "eval_samples_per_second": 14.978, "eval_steps_per_second": 1.877, "eval_wer": 0.6736216216216216, "step": 200 }, { "epoch": 0.5494505494505495, "eval_loss": 0.3198663890361786, "eval_runtime": 127.9369, "eval_samples_per_second": 15.156, "eval_steps_per_second": 1.899, "eval_wer": 0.49194594594594593, "step": 400 }, { "epoch": 0.6868131868131868, "grad_norm": 1.675441861152649, "learning_rate": 0.00029699999999999996, "loss": 2.9901, "step": 500 }, { "epoch": 0.8241758241758241, "eval_loss": 0.3013916313648224, "eval_runtime": 128.0261, "eval_samples_per_second": 15.145, "eval_steps_per_second": 1.898, "eval_wer": 0.4612972972972973, "step": 600 }, { "epoch": 1.098901098901099, "eval_loss": 0.28253573179244995, "eval_runtime": 127.3129, "eval_samples_per_second": 15.23, "eval_steps_per_second": 1.909, "eval_wer": 0.4432972972972973, "step": 800 }, { "epoch": 1.3736263736263736, "grad_norm": 0.6800592541694641, "learning_rate": 0.00025280254777070065, "loss": 0.3968, "step": 1000 }, { "epoch": 1.3736263736263736, "eval_loss": 0.2782987058162689, "eval_runtime": 127.2721, "eval_samples_per_second": 15.235, "eval_steps_per_second": 1.909, "eval_wer": 0.4541081081081081, "step": 1000 }, { "epoch": 1.6483516483516483, "eval_loss": 0.27315396070480347, "eval_runtime": 128.0724, "eval_samples_per_second": 15.14, "eval_steps_per_second": 1.897, "eval_wer": 0.4294054054054054, "step": 1200 }, { "epoch": 1.9230769230769231, "eval_loss": 0.26492610573768616, "eval_runtime": 127.9148, "eval_samples_per_second": 15.159, "eval_steps_per_second": 1.9, "eval_wer": 0.4243783783783784, "step": 1400 }, { "epoch": 2.0604395604395602, "grad_norm": 0.9411349296569824, "learning_rate": 0.00020503184713375794, "loss": 0.3766, "step": 1500 }, { "epoch": 2.197802197802198, "eval_loss": 0.26205211877822876, "eval_runtime": 128.7617, "eval_samples_per_second": 15.059, "eval_steps_per_second": 1.887, "eval_wer": 0.42043243243243245, "step": 1600 }, { "epoch": 2.4725274725274726, "eval_loss": 0.262787789106369, "eval_runtime": 128.5209, "eval_samples_per_second": 15.087, "eval_steps_per_second": 1.891, "eval_wer": 0.4170810810810811, "step": 1800 }, { "epoch": 2.7472527472527473, "grad_norm": 1.4841364622116089, "learning_rate": 0.00015726114649681526, "loss": 0.3537, "step": 2000 }, { "epoch": 2.7472527472527473, "eval_loss": 0.2579393982887268, "eval_runtime": 128.5847, "eval_samples_per_second": 15.08, "eval_steps_per_second": 1.89, "eval_wer": 0.4187027027027027, "step": 2000 }, { "epoch": 3.021978021978022, "eval_loss": 0.25568732619285583, "eval_runtime": 128.193, "eval_samples_per_second": 15.126, "eval_steps_per_second": 1.896, "eval_wer": 0.40335135135135136, "step": 2200 }, { "epoch": 3.2967032967032965, "eval_loss": 0.252371609210968, "eval_runtime": 128.8222, "eval_samples_per_second": 15.052, "eval_steps_per_second": 1.886, "eval_wer": 0.4090810810810811, "step": 2400 }, { "epoch": 3.4340659340659343, "grad_norm": 2.9654958248138428, "learning_rate": 0.00010949044585987259, "loss": 0.3529, "step": 2500 }, { "epoch": 3.571428571428571, "eval_loss": 0.25349992513656616, "eval_runtime": 128.7325, "eval_samples_per_second": 15.062, "eval_steps_per_second": 1.888, "eval_wer": 0.4061081081081081, "step": 2600 }, { "epoch": 3.8461538461538463, "eval_loss": 0.24949324131011963, "eval_runtime": 129.6607, "eval_samples_per_second": 14.954, "eval_steps_per_second": 1.874, "eval_wer": 0.4034054054054054, "step": 2800 }, { "epoch": 4.1208791208791204, "grad_norm": 0.9809963703155518, "learning_rate": 6.171974522292994e-05, "loss": 0.3393, "step": 3000 }, { "epoch": 4.1208791208791204, "eval_loss": 0.24937787652015686, "eval_runtime": 130.0963, "eval_samples_per_second": 14.904, "eval_steps_per_second": 1.868, "eval_wer": 0.4064864864864865, "step": 3000 }, { "epoch": 4.395604395604396, "eval_loss": 0.24878770112991333, "eval_runtime": 129.7286, "eval_samples_per_second": 14.947, "eval_steps_per_second": 1.873, "eval_wer": 0.40664864864864864, "step": 3200 }, { "epoch": 4.670329670329671, "eval_loss": 0.2481740117073059, "eval_runtime": 129.3443, "eval_samples_per_second": 14.991, "eval_steps_per_second": 1.879, "eval_wer": 0.40275675675675676, "step": 3400 }, { "epoch": 4.8076923076923075, "grad_norm": 0.8750921487808228, "learning_rate": 1.394904458598726e-05, "loss": 0.3332, "step": 3500 }, { "epoch": 4.945054945054945, "eval_loss": 0.2479187548160553, "eval_runtime": 129.4727, "eval_samples_per_second": 14.976, "eval_steps_per_second": 1.877, "eval_wer": 0.40616216216216217, "step": 3600 }, { "epoch": 5.0, "step": 3640, "total_flos": 1.737871213031041e+19, "train_loss": 0.719452987922417, "train_runtime": 6513.7768, "train_samples_per_second": 4.465, "train_steps_per_second": 0.559 } ], "logging_steps": 500, "max_steps": 3640, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.737871213031041e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }