{ "best_metric": 0.5309441685676575, "best_model_checkpoint": "/scratch/skscla001/results/mms-1b-all-bem-natbed-n-model/checkpoint-3000", "epoch": 9.269662921348315, "eval_steps": 100, "global_step": 3300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2808988764044944, "grad_norm": 6.5729265213012695, "learning_rate": 0.00028799999999999995, "loss": 7.3623, "step": 100 }, { "epoch": 0.2808988764044944, "eval_loss": 0.9286547899246216, "eval_runtime": 51.6813, "eval_samples_per_second": 12.577, "eval_steps_per_second": 1.587, "eval_wer": 0.7282935315514787, "step": 100 }, { "epoch": 0.5617977528089888, "grad_norm": 1.9608774185180664, "learning_rate": 0.00029727788279773154, "loss": 0.9213, "step": 200 }, { "epoch": 0.5617977528089888, "eval_loss": 0.651074230670929, "eval_runtime": 51.0267, "eval_samples_per_second": 12.738, "eval_steps_per_second": 1.607, "eval_wer": 0.593072908429543, "step": 200 }, { "epoch": 0.8426966292134831, "grad_norm": 3.582282066345215, "learning_rate": 0.0002944423440453686, "loss": 0.7224, "step": 300 }, { "epoch": 0.8426966292134831, "eval_loss": 0.6386900544166565, "eval_runtime": 51.4556, "eval_samples_per_second": 12.632, "eval_steps_per_second": 1.594, "eval_wer": 0.5434129368970425, "step": 300 }, { "epoch": 1.1235955056179776, "grad_norm": 1.1070303916931152, "learning_rate": 0.00029160680529300565, "loss": 0.7132, "step": 400 }, { "epoch": 1.1235955056179776, "eval_loss": 0.6140071749687195, "eval_runtime": 51.4993, "eval_samples_per_second": 12.622, "eval_steps_per_second": 1.592, "eval_wer": 0.5212715483156729, "step": 400 }, { "epoch": 1.404494382022472, "grad_norm": 2.090729236602783, "learning_rate": 0.0002887712665406427, "loss": 0.7195, "step": 500 }, { "epoch": 1.404494382022472, "eval_loss": 0.6097270250320435, "eval_runtime": 51.1603, "eval_samples_per_second": 12.705, "eval_steps_per_second": 1.603, "eval_wer": 0.514629131741262, "step": 500 }, { "epoch": 1.6853932584269664, "grad_norm": 1.059830904006958, "learning_rate": 0.00028593572778827975, "loss": 0.7054, "step": 600 }, { "epoch": 1.6853932584269664, "eval_loss": 0.6144998669624329, "eval_runtime": 51.4827, "eval_samples_per_second": 12.626, "eval_steps_per_second": 1.593, "eval_wer": 0.5125731456587063, "step": 600 }, { "epoch": 1.9662921348314608, "grad_norm": 1.5953254699707031, "learning_rate": 0.0002831285444234404, "loss": 0.7417, "step": 700 }, { "epoch": 1.9662921348314608, "eval_loss": 0.606216311454773, "eval_runtime": 51.7647, "eval_samples_per_second": 12.557, "eval_steps_per_second": 1.584, "eval_wer": 0.5256998260319469, "step": 700 }, { "epoch": 2.247191011235955, "grad_norm": 0.7664604187011719, "learning_rate": 0.00028029300567107747, "loss": 0.7029, "step": 800 }, { "epoch": 2.247191011235955, "eval_loss": 0.6021894812583923, "eval_runtime": 52.1384, "eval_samples_per_second": 12.467, "eval_steps_per_second": 1.573, "eval_wer": 0.4947018820180294, "step": 800 }, { "epoch": 2.5280898876404496, "grad_norm": 1.113645076751709, "learning_rate": 0.0002774574669187145, "loss": 0.6845, "step": 900 }, { "epoch": 2.5280898876404496, "eval_loss": 0.5886064767837524, "eval_runtime": 51.3267, "eval_samples_per_second": 12.664, "eval_steps_per_second": 1.598, "eval_wer": 0.5022932152459275, "step": 900 }, { "epoch": 2.808988764044944, "grad_norm": 1.8644826412200928, "learning_rate": 0.00027462192816635157, "loss": 0.663, "step": 1000 }, { "epoch": 2.808988764044944, "eval_loss": 0.5914585590362549, "eval_runtime": 51.3494, "eval_samples_per_second": 12.658, "eval_steps_per_second": 1.597, "eval_wer": 0.49264589593547364, "step": 1000 }, { "epoch": 3.0898876404494384, "grad_norm": 0.9567739963531494, "learning_rate": 0.0002717863894139887, "loss": 0.7129, "step": 1100 }, { "epoch": 3.0898876404494384, "eval_loss": 0.5832971930503845, "eval_runtime": 51.7503, "eval_samples_per_second": 12.56, "eval_steps_per_second": 1.585, "eval_wer": 0.4920132848331488, "step": 1100 }, { "epoch": 3.370786516853933, "grad_norm": 2.428119659423828, "learning_rate": 0.00026895085066162567, "loss": 0.6735, "step": 1200 }, { "epoch": 3.370786516853933, "eval_loss": 0.5876715779304504, "eval_runtime": 51.7607, "eval_samples_per_second": 12.558, "eval_steps_per_second": 1.584, "eval_wer": 0.483156729400601, "step": 1200 }, { "epoch": 3.6516853932584272, "grad_norm": 10.181241989135742, "learning_rate": 0.0002661153119092627, "loss": 0.672, "step": 1300 }, { "epoch": 3.6516853932584272, "eval_loss": 0.5862510204315186, "eval_runtime": 51.4111, "eval_samples_per_second": 12.643, "eval_steps_per_second": 1.595, "eval_wer": 0.5151035900680057, "step": 1300 }, { "epoch": 3.932584269662921, "grad_norm": 2.6570065021514893, "learning_rate": 0.0002632797731568998, "loss": 0.6494, "step": 1400 }, { "epoch": 3.932584269662921, "eval_loss": 0.579518735408783, "eval_runtime": 51.3192, "eval_samples_per_second": 12.666, "eval_steps_per_second": 1.598, "eval_wer": 0.48442195160525064, "step": 1400 }, { "epoch": 4.213483146067416, "grad_norm": 2.2995524406433105, "learning_rate": 0.0002604442344045368, "loss": 0.7049, "step": 1500 }, { "epoch": 4.213483146067416, "eval_loss": 0.5723974704742432, "eval_runtime": 51.7136, "eval_samples_per_second": 12.569, "eval_steps_per_second": 1.586, "eval_wer": 0.47161157678317256, "step": 1500 }, { "epoch": 4.49438202247191, "grad_norm": 1.1362881660461426, "learning_rate": 0.0002576086956521739, "loss": 0.5898, "step": 1600 }, { "epoch": 4.49438202247191, "eval_loss": 0.5640456676483154, "eval_runtime": 51.8126, "eval_samples_per_second": 12.545, "eval_steps_per_second": 1.583, "eval_wer": 0.4761980072750277, "step": 1600 }, { "epoch": 4.775280898876405, "grad_norm": 1.0056232213974, "learning_rate": 0.00025477315689981093, "loss": 0.6581, "step": 1700 }, { "epoch": 4.775280898876405, "eval_loss": 0.5581757426261902, "eval_runtime": 51.7242, "eval_samples_per_second": 12.567, "eval_steps_per_second": 1.585, "eval_wer": 0.4724023406610786, "step": 1700 }, { "epoch": 5.056179775280899, "grad_norm": 0.7103342413902283, "learning_rate": 0.000251937618147448, "loss": 0.6262, "step": 1800 }, { "epoch": 5.056179775280899, "eval_loss": 0.5446608066558838, "eval_runtime": 51.6418, "eval_samples_per_second": 12.587, "eval_steps_per_second": 1.588, "eval_wer": 0.4750909378459592, "step": 1800 }, { "epoch": 5.337078651685394, "grad_norm": 1.2600806951522827, "learning_rate": 0.00024910207939508503, "loss": 0.6179, "step": 1900 }, { "epoch": 5.337078651685394, "eval_loss": 0.549724280834198, "eval_runtime": 51.7411, "eval_samples_per_second": 12.563, "eval_steps_per_second": 1.585, "eval_wer": 0.4656017713110865, "step": 1900 }, { "epoch": 5.617977528089888, "grad_norm": 14.24419116973877, "learning_rate": 0.0002462665406427221, "loss": 0.5896, "step": 2000 }, { "epoch": 5.617977528089888, "eval_loss": 0.5444263815879822, "eval_runtime": 51.8052, "eval_samples_per_second": 12.547, "eval_steps_per_second": 1.583, "eval_wer": 0.477937687806421, "step": 2000 }, { "epoch": 5.898876404494382, "grad_norm": 0.867696225643158, "learning_rate": 0.00024343100189035916, "loss": 0.6438, "step": 2100 }, { "epoch": 5.898876404494382, "eval_loss": 0.5399273037910461, "eval_runtime": 51.4899, "eval_samples_per_second": 12.624, "eval_steps_per_second": 1.593, "eval_wer": 0.47003004902736045, "step": 2100 }, { "epoch": 6.179775280898877, "grad_norm": 1.4865626096725464, "learning_rate": 0.0002405954631379962, "loss": 0.6086, "step": 2200 }, { "epoch": 6.179775280898877, "eval_loss": 0.5520233511924744, "eval_runtime": 51.3367, "eval_samples_per_second": 12.662, "eval_steps_per_second": 1.597, "eval_wer": 0.4597501186145817, "step": 2200 }, { "epoch": 6.460674157303371, "grad_norm": 1.7858107089996338, "learning_rate": 0.00023775992438563324, "loss": 0.6226, "step": 2300 }, { "epoch": 6.460674157303371, "eval_loss": 0.5385509133338928, "eval_runtime": 51.8468, "eval_samples_per_second": 12.537, "eval_steps_per_second": 1.582, "eval_wer": 0.4796773683378143, "step": 2300 }, { "epoch": 6.741573033707866, "grad_norm": 3.9033384323120117, "learning_rate": 0.0002349243856332703, "loss": 0.6148, "step": 2400 }, { "epoch": 6.741573033707866, "eval_loss": 0.5573983788490295, "eval_runtime": 51.5238, "eval_samples_per_second": 12.616, "eval_steps_per_second": 1.591, "eval_wer": 0.4679740629448047, "step": 2400 }, { "epoch": 7.022471910112359, "grad_norm": 1.0407049655914307, "learning_rate": 0.00023208884688090737, "loss": 0.5838, "step": 2500 }, { "epoch": 7.022471910112359, "eval_loss": 0.5497230887413025, "eval_runtime": 51.2462, "eval_samples_per_second": 12.684, "eval_steps_per_second": 1.6, "eval_wer": 0.4638620907796932, "step": 2500 }, { "epoch": 7.303370786516854, "grad_norm": 0.836614191532135, "learning_rate": 0.00022925330812854442, "loss": 0.5407, "step": 2600 }, { "epoch": 7.303370786516854, "eval_loss": 0.5377057790756226, "eval_runtime": 51.5353, "eval_samples_per_second": 12.613, "eval_steps_per_second": 1.591, "eval_wer": 0.46307132690178715, "step": 2600 }, { "epoch": 7.584269662921348, "grad_norm": 2.6512720584869385, "learning_rate": 0.00022641776937618147, "loss": 0.6186, "step": 2700 }, { "epoch": 7.584269662921348, "eval_loss": 0.5403843522071838, "eval_runtime": 51.536, "eval_samples_per_second": 12.613, "eval_steps_per_second": 1.591, "eval_wer": 0.4714534240075913, "step": 2700 }, { "epoch": 7.865168539325842, "grad_norm": 2.884155035018921, "learning_rate": 0.0002235822306238185, "loss": 0.5922, "step": 2800 }, { "epoch": 7.865168539325842, "eval_loss": 0.5381007790565491, "eval_runtime": 51.5402, "eval_samples_per_second": 12.612, "eval_steps_per_second": 1.591, "eval_wer": 0.46085718804365017, "step": 2800 }, { "epoch": 8.146067415730338, "grad_norm": 1.4212076663970947, "learning_rate": 0.00022074669187145554, "loss": 0.5799, "step": 2900 }, { "epoch": 8.146067415730338, "eval_loss": 0.5311689376831055, "eval_runtime": 51.4207, "eval_samples_per_second": 12.641, "eval_steps_per_second": 1.595, "eval_wer": 0.46196425747271863, "step": 2900 }, { "epoch": 8.426966292134832, "grad_norm": 1.6221522092819214, "learning_rate": 0.0002179111531190926, "loss": 0.5914, "step": 3000 }, { "epoch": 8.426966292134832, "eval_loss": 0.5309441685676575, "eval_runtime": 51.2813, "eval_samples_per_second": 12.675, "eval_steps_per_second": 1.599, "eval_wer": 0.46307132690178715, "step": 3000 }, { "epoch": 8.707865168539326, "grad_norm": 0.46160590648651123, "learning_rate": 0.00021507561436672967, "loss": 0.6194, "step": 3100 }, { "epoch": 8.707865168539326, "eval_loss": 0.5316519141197205, "eval_runtime": 51.9102, "eval_samples_per_second": 12.522, "eval_steps_per_second": 1.58, "eval_wer": 0.46781591016922347, "step": 3100 }, { "epoch": 8.98876404494382, "grad_norm": 0.4698401093482971, "learning_rate": 0.00021224007561436672, "loss": 0.5851, "step": 3200 }, { "epoch": 8.98876404494382, "eval_loss": 0.5388666987419128, "eval_runtime": 51.8653, "eval_samples_per_second": 12.532, "eval_steps_per_second": 1.581, "eval_wer": 0.4575359797564447, "step": 3200 }, { "epoch": 9.269662921348315, "grad_norm": 2.0234947204589844, "learning_rate": 0.00020940453686200375, "loss": 0.5764, "step": 3300 }, { "epoch": 9.269662921348315, "eval_loss": 0.5578745603561401, "eval_runtime": 51.3849, "eval_samples_per_second": 12.65, "eval_steps_per_second": 1.596, "eval_wer": 0.45500553534714533, "step": 3300 }, { "epoch": 9.269662921348315, "step": 3300, "total_flos": 1.8863577270010868e+19, "train_loss": 0.8547844314575195, "train_runtime": 5284.1307, "train_samples_per_second": 16.152, "train_steps_per_second": 2.021 } ], "logging_steps": 100, "max_steps": 10680, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8863577270010868e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }