{ "best_metric": 0.6179416179656982, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-female-model/checkpoint-1500", "epoch": 15.652173913043478, "eval_steps": 100, "global_step": 1800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8695652173913043, "grad_norm": 3.3569300174713135, "learning_rate": 0.000285, "loss": 6.7809, "step": 100 }, { "epoch": 0.8695652173913043, "eval_loss": 3.1491334438323975, "eval_runtime": 19.3955, "eval_samples_per_second": 12.838, "eval_steps_per_second": 3.248, "eval_wer": 0.9758556024378809, "step": 100 }, { "epoch": 1.7391304347826086, "grad_norm": 3.8020479679107666, "learning_rate": 0.0002914925373134328, "loss": 2.6957, "step": 200 }, { "epoch": 1.7391304347826086, "eval_loss": 2.001054286956787, "eval_runtime": 19.3287, "eval_samples_per_second": 12.882, "eval_steps_per_second": 3.259, "eval_wer": 0.9278012189404594, "step": 200 }, { "epoch": 2.608695652173913, "grad_norm": 1.5241467952728271, "learning_rate": 0.0002825373134328358, "loss": 1.1829, "step": 300 }, { "epoch": 2.608695652173913, "eval_loss": 0.806449294090271, "eval_runtime": 19.2752, "eval_samples_per_second": 12.918, "eval_steps_per_second": 3.268, "eval_wer": 0.6420534458509142, "step": 300 }, { "epoch": 3.4782608695652173, "grad_norm": 1.655979871749878, "learning_rate": 0.0002735820895522388, "loss": 0.8527, "step": 400 }, { "epoch": 3.4782608695652173, "eval_loss": 0.7470424175262451, "eval_runtime": 19.1645, "eval_samples_per_second": 12.993, "eval_steps_per_second": 3.287, "eval_wer": 0.53328645100797, "step": 400 }, { "epoch": 4.3478260869565215, "grad_norm": 5.353499412536621, "learning_rate": 0.00026462686567164175, "loss": 0.8376, "step": 500 }, { "epoch": 4.3478260869565215, "eval_loss": 0.69962078332901, "eval_runtime": 19.293, "eval_samples_per_second": 12.906, "eval_steps_per_second": 3.265, "eval_wer": 0.49226441631504925, "step": 500 }, { "epoch": 5.217391304347826, "grad_norm": 2.5344457626342773, "learning_rate": 0.00025567164179104475, "loss": 0.7567, "step": 600 }, { "epoch": 5.217391304347826, "eval_loss": 0.6791635751724243, "eval_runtime": 19.2801, "eval_samples_per_second": 12.915, "eval_steps_per_second": 3.268, "eval_wer": 0.46858884200656353, "step": 600 }, { "epoch": 6.086956521739131, "grad_norm": 0.9048321843147278, "learning_rate": 0.00024671641791044774, "loss": 0.7014, "step": 700 }, { "epoch": 6.086956521739131, "eval_loss": 0.6882011890411377, "eval_runtime": 19.2193, "eval_samples_per_second": 12.956, "eval_steps_per_second": 3.278, "eval_wer": 0.45546179090482886, "step": 700 }, { "epoch": 6.956521739130435, "grad_norm": 2.4095747470855713, "learning_rate": 0.00023776119402985074, "loss": 0.7242, "step": 800 }, { "epoch": 6.956521739130435, "eval_loss": 0.6556691527366638, "eval_runtime": 19.2162, "eval_samples_per_second": 12.958, "eval_steps_per_second": 3.278, "eval_wer": 0.4578059071729958, "step": 800 }, { "epoch": 7.826086956521739, "grad_norm": 1.2900141477584839, "learning_rate": 0.0002288059701492537, "loss": 0.6895, "step": 900 }, { "epoch": 7.826086956521739, "eval_loss": 0.6531555652618408, "eval_runtime": 19.3114, "eval_samples_per_second": 12.894, "eval_steps_per_second": 3.262, "eval_wer": 0.4409282700421941, "step": 900 }, { "epoch": 8.695652173913043, "grad_norm": 1.196593999862671, "learning_rate": 0.00021985074626865668, "loss": 0.6543, "step": 1000 }, { "epoch": 8.695652173913043, "eval_loss": 0.6477147340774536, "eval_runtime": 19.299, "eval_samples_per_second": 12.902, "eval_steps_per_second": 3.264, "eval_wer": 0.42780121894045947, "step": 1000 }, { "epoch": 9.565217391304348, "grad_norm": 5.2444281578063965, "learning_rate": 0.00021089552238805968, "loss": 0.6386, "step": 1100 }, { "epoch": 9.565217391304348, "eval_loss": 0.6591968536376953, "eval_runtime": 19.2958, "eval_samples_per_second": 12.904, "eval_steps_per_second": 3.265, "eval_wer": 0.4219409282700422, "step": 1100 }, { "epoch": 10.434782608695652, "grad_norm": 1.0960686206817627, "learning_rate": 0.00020194029850746268, "loss": 0.6145, "step": 1200 }, { "epoch": 10.434782608695652, "eval_loss": 0.6434625387191772, "eval_runtime": 19.1713, "eval_samples_per_second": 12.988, "eval_steps_per_second": 3.286, "eval_wer": 0.42569151429910923, "step": 1200 }, { "epoch": 11.304347826086957, "grad_norm": 1.0903316736221313, "learning_rate": 0.00019298507462686568, "loss": 0.6513, "step": 1300 }, { "epoch": 11.304347826086957, "eval_loss": 0.6570419073104858, "eval_runtime": 19.3251, "eval_samples_per_second": 12.885, "eval_steps_per_second": 3.26, "eval_wer": 0.4146741678387248, "step": 1300 }, { "epoch": 12.173913043478262, "grad_norm": 1.123496413230896, "learning_rate": 0.00018402985074626862, "loss": 0.5788, "step": 1400 }, { "epoch": 12.173913043478262, "eval_loss": 0.6258613467216492, "eval_runtime": 19.2989, "eval_samples_per_second": 12.902, "eval_steps_per_second": 3.264, "eval_wer": 0.4085794655414909, "step": 1400 }, { "epoch": 13.043478260869565, "grad_norm": 0.7355613708496094, "learning_rate": 0.00017507462686567162, "loss": 0.6061, "step": 1500 }, { "epoch": 13.043478260869565, "eval_loss": 0.6179416179656982, "eval_runtime": 19.2693, "eval_samples_per_second": 12.922, "eval_steps_per_second": 3.269, "eval_wer": 0.4006094702297234, "step": 1500 }, { "epoch": 13.91304347826087, "grad_norm": 2.3467090129852295, "learning_rate": 0.00016611940298507462, "loss": 0.5647, "step": 1600 }, { "epoch": 13.91304347826087, "eval_loss": 0.6186335682868958, "eval_runtime": 19.1768, "eval_samples_per_second": 12.984, "eval_steps_per_second": 3.285, "eval_wer": 0.4006094702297234, "step": 1600 }, { "epoch": 14.782608695652174, "grad_norm": 1.0731009244918823, "learning_rate": 0.00015716417910447762, "loss": 0.5715, "step": 1700 }, { "epoch": 14.782608695652174, "eval_loss": 0.627108633518219, "eval_runtime": 19.405, "eval_samples_per_second": 12.832, "eval_steps_per_second": 3.247, "eval_wer": 0.3984997655883732, "step": 1700 }, { "epoch": 15.652173913043478, "grad_norm": 0.8772910237312317, "learning_rate": 0.0001482089552238806, "loss": 0.5502, "step": 1800 }, { "epoch": 15.652173913043478, "eval_loss": 0.6218141317367554, "eval_runtime": 19.3309, "eval_samples_per_second": 12.881, "eval_steps_per_second": 3.259, "eval_wer": 0.3961556493202063, "step": 1800 }, { "epoch": 15.652173913043478, "step": 1800, "total_flos": 5.624155339355839e+18, "train_loss": 1.1473070081075032, "train_runtime": 1548.2979, "train_samples_per_second": 8.874, "train_steps_per_second": 2.228 } ], "logging_steps": 100, "max_steps": 3450, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.624155339355839e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }