|
{ |
|
"best_metric": 0.2257038652896881, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-swagen-combined-30hrs-model/checkpoint-2900", |
|
"epoch": 1.315852442671984, |
|
"eval_steps": 100, |
|
"global_step": 3300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03988035892323031, |
|
"grad_norm": 51.95917510986328, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 16.9208, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03988035892323031, |
|
"eval_loss": 4.268084526062012, |
|
"eval_runtime": 62.3338, |
|
"eval_samples_per_second": 18.16, |
|
"eval_steps_per_second": 4.54, |
|
"eval_wer": 0.9999006754072308, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07976071784646062, |
|
"grad_norm": 27.331029891967773, |
|
"learning_rate": 0.00029962055651710823, |
|
"loss": 7.2021, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07976071784646062, |
|
"eval_loss": 3.4067752361297607, |
|
"eval_runtime": 62.1258, |
|
"eval_samples_per_second": 18.221, |
|
"eval_steps_per_second": 4.555, |
|
"eval_wer": 1.0026817640047676, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11964107676969092, |
|
"grad_norm": 32.389076232910156, |
|
"learning_rate": 0.0002992211423245906, |
|
"loss": 6.725, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11964107676969092, |
|
"eval_loss": 3.1580252647399902, |
|
"eval_runtime": 62.3765, |
|
"eval_samples_per_second": 18.148, |
|
"eval_steps_per_second": 4.537, |
|
"eval_wer": 1.014600715137068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15952143569292124, |
|
"grad_norm": 34.745182037353516, |
|
"learning_rate": 0.0002988217281320729, |
|
"loss": 6.2214, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15952143569292124, |
|
"eval_loss": 3.043919324874878, |
|
"eval_runtime": 62.1466, |
|
"eval_samples_per_second": 18.215, |
|
"eval_steps_per_second": 4.554, |
|
"eval_wer": 1.0026817640047676, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19940179461615154, |
|
"grad_norm": 25.307340621948242, |
|
"learning_rate": 0.0002984223139395553, |
|
"loss": 5.9996, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19940179461615154, |
|
"eval_loss": 3.004955768585205, |
|
"eval_runtime": 62.9799, |
|
"eval_samples_per_second": 17.974, |
|
"eval_steps_per_second": 4.493, |
|
"eval_wer": 1.0015891934843066, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23928215353938184, |
|
"grad_norm": 6.2275800704956055, |
|
"learning_rate": 0.00029802289974703767, |
|
"loss": 5.8133, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23928215353938184, |
|
"eval_loss": 2.9414522647857666, |
|
"eval_runtime": 62.5714, |
|
"eval_samples_per_second": 18.091, |
|
"eval_steps_per_second": 4.523, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.27916251246261214, |
|
"grad_norm": 6.14309549331665, |
|
"learning_rate": 0.00029762348555452, |
|
"loss": 5.805, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.27916251246261214, |
|
"eval_loss": 2.8951313495635986, |
|
"eval_runtime": 62.4303, |
|
"eval_samples_per_second": 18.132, |
|
"eval_steps_per_second": 4.533, |
|
"eval_wer": 0.9997020262216925, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3190428713858425, |
|
"grad_norm": 14.912216186523438, |
|
"learning_rate": 0.00029722407136200237, |
|
"loss": 5.7621, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3190428713858425, |
|
"eval_loss": 2.905362844467163, |
|
"eval_runtime": 62.3651, |
|
"eval_samples_per_second": 18.151, |
|
"eval_steps_per_second": 4.538, |
|
"eval_wer": 0.9996027016289233, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3589232303090728, |
|
"grad_norm": 3.54911470413208, |
|
"learning_rate": 0.00029682465716948474, |
|
"loss": 5.6973, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3589232303090728, |
|
"eval_loss": 2.810933828353882, |
|
"eval_runtime": 63.0608, |
|
"eval_samples_per_second": 17.951, |
|
"eval_steps_per_second": 4.488, |
|
"eval_wer": 0.9908621374652364, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3988035892323031, |
|
"grad_norm": 5.402283668518066, |
|
"learning_rate": 0.00029642524297696706, |
|
"loss": 5.6541, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3988035892323031, |
|
"eval_loss": 2.835520029067993, |
|
"eval_runtime": 62.7046, |
|
"eval_samples_per_second": 18.053, |
|
"eval_steps_per_second": 4.513, |
|
"eval_wer": 0.9910607866507747, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4386839481555334, |
|
"grad_norm": 5.038330554962158, |
|
"learning_rate": 0.00029602582878444943, |
|
"loss": 5.4159, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4386839481555334, |
|
"eval_loss": 2.7631733417510986, |
|
"eval_runtime": 62.482, |
|
"eval_samples_per_second": 18.117, |
|
"eval_steps_per_second": 4.529, |
|
"eval_wer": 0.978347238776321, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4785643070787637, |
|
"grad_norm": 10.235861778259277, |
|
"learning_rate": 0.0002956264145919318, |
|
"loss": 5.4112, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4785643070787637, |
|
"eval_loss": 2.6498703956604004, |
|
"eval_runtime": 62.6482, |
|
"eval_samples_per_second": 18.069, |
|
"eval_steps_per_second": 4.517, |
|
"eval_wer": 0.9793404847040127, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.518444666001994, |
|
"grad_norm": 15.807747840881348, |
|
"learning_rate": 0.0002952270003994142, |
|
"loss": 4.2059, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.518444666001994, |
|
"eval_loss": 0.36752405762672424, |
|
"eval_runtime": 63.4538, |
|
"eval_samples_per_second": 17.84, |
|
"eval_steps_per_second": 4.46, |
|
"eval_wer": 0.261720301946762, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5583250249252243, |
|
"grad_norm": 3.0918314456939697, |
|
"learning_rate": 0.0002948275862068965, |
|
"loss": 0.6493, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5583250249252243, |
|
"eval_loss": 0.27466678619384766, |
|
"eval_runtime": 62.8469, |
|
"eval_samples_per_second": 18.012, |
|
"eval_steps_per_second": 4.503, |
|
"eval_wer": 0.20768772348033374, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5982053838484547, |
|
"grad_norm": 3.152963638305664, |
|
"learning_rate": 0.0002944281720143789, |
|
"loss": 0.5624, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5982053838484547, |
|
"eval_loss": 0.2648875117301941, |
|
"eval_runtime": 63.2142, |
|
"eval_samples_per_second": 17.907, |
|
"eval_steps_per_second": 4.477, |
|
"eval_wer": 0.20232419547079858, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.638085742771685, |
|
"grad_norm": 2.553431749343872, |
|
"learning_rate": 0.00029402875782186125, |
|
"loss": 0.5197, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.638085742771685, |
|
"eval_loss": 0.2619471848011017, |
|
"eval_runtime": 63.2314, |
|
"eval_samples_per_second": 17.902, |
|
"eval_steps_per_second": 4.476, |
|
"eval_wer": 0.19884783472387763, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 11.26433277130127, |
|
"learning_rate": 0.00029363333777126876, |
|
"loss": 0.4715, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"eval_loss": 0.2589423656463623, |
|
"eval_runtime": 63.8104, |
|
"eval_samples_per_second": 17.74, |
|
"eval_steps_per_second": 4.435, |
|
"eval_wer": 0.19815256257449346, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7178464606181456, |
|
"grad_norm": 2.320037603378296, |
|
"learning_rate": 0.00029323392357875113, |
|
"loss": 0.5126, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7178464606181456, |
|
"eval_loss": 0.2518182694911957, |
|
"eval_runtime": 63.1237, |
|
"eval_samples_per_second": 17.933, |
|
"eval_steps_per_second": 4.483, |
|
"eval_wer": 0.19785458879618595, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7577268195413759, |
|
"grad_norm": 2.7758800983428955, |
|
"learning_rate": 0.0002928345093862335, |
|
"loss": 0.4916, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7577268195413759, |
|
"eval_loss": 0.254902720451355, |
|
"eval_runtime": 63.0039, |
|
"eval_samples_per_second": 17.967, |
|
"eval_steps_per_second": 4.492, |
|
"eval_wer": 0.19576877234803336, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7976071784646062, |
|
"grad_norm": 3.670294761657715, |
|
"learning_rate": 0.0002924350951937158, |
|
"loss": 0.4667, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7976071784646062, |
|
"eval_loss": 0.2501005530357361, |
|
"eval_runtime": 63.1402, |
|
"eval_samples_per_second": 17.928, |
|
"eval_steps_per_second": 4.482, |
|
"eval_wer": 0.1946762018275725, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8374875373878365, |
|
"grad_norm": 3.5203053951263428, |
|
"learning_rate": 0.0002920356810011982, |
|
"loss": 0.4713, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8374875373878365, |
|
"eval_loss": 0.24789908528327942, |
|
"eval_runtime": 63.6613, |
|
"eval_samples_per_second": 17.782, |
|
"eval_steps_per_second": 4.445, |
|
"eval_wer": 0.19427890345649582, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8773678963110668, |
|
"grad_norm": 3.342277765274048, |
|
"learning_rate": 0.0002916362668086806, |
|
"loss": 0.4875, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8773678963110668, |
|
"eval_loss": 0.24493974447250366, |
|
"eval_runtime": 63.0142, |
|
"eval_samples_per_second": 17.964, |
|
"eval_steps_per_second": 4.491, |
|
"eval_wer": 0.19308700834326578, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9172482552342971, |
|
"grad_norm": 4.0236711502075195, |
|
"learning_rate": 0.0002912368526161629, |
|
"loss": 0.4611, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9172482552342971, |
|
"eval_loss": 0.24361559748649597, |
|
"eval_runtime": 62.9448, |
|
"eval_samples_per_second": 17.984, |
|
"eval_steps_per_second": 4.496, |
|
"eval_wer": 0.19348430671434247, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9571286141575274, |
|
"grad_norm": 2.9615795612335205, |
|
"learning_rate": 0.00029083743842364527, |
|
"loss": 0.4587, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9571286141575274, |
|
"eval_loss": 0.24336393177509308, |
|
"eval_runtime": 63.2457, |
|
"eval_samples_per_second": 17.898, |
|
"eval_steps_per_second": 4.475, |
|
"eval_wer": 0.19278903456495827, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9970089730807578, |
|
"grad_norm": 2.3204922676086426, |
|
"learning_rate": 0.00029043802423112764, |
|
"loss": 0.4679, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9970089730807578, |
|
"eval_loss": 0.24085378646850586, |
|
"eval_runtime": 63.8549, |
|
"eval_samples_per_second": 17.728, |
|
"eval_steps_per_second": 4.432, |
|
"eval_wer": 0.18951132300357568, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.036689930209372, |
|
"grad_norm": 1.1865150928497314, |
|
"learning_rate": 0.00029003861003861, |
|
"loss": 0.4141, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.036689930209372, |
|
"eval_loss": 0.23312124609947205, |
|
"eval_runtime": 63.2647, |
|
"eval_samples_per_second": 17.893, |
|
"eval_steps_per_second": 4.473, |
|
"eval_wer": 0.18961064759634486, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.076570289132602, |
|
"grad_norm": 1.3028316497802734, |
|
"learning_rate": 0.0002896391958460924, |
|
"loss": 0.4263, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.076570289132602, |
|
"eval_loss": 0.23293058574199677, |
|
"eval_runtime": 62.8596, |
|
"eval_samples_per_second": 18.008, |
|
"eval_steps_per_second": 4.502, |
|
"eval_wer": 0.19199443782280493, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1164506480558325, |
|
"grad_norm": 7.91491174697876, |
|
"learning_rate": 0.0002892397816535747, |
|
"loss": 0.4142, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1164506480558325, |
|
"eval_loss": 0.23239396512508392, |
|
"eval_runtime": 63.0701, |
|
"eval_samples_per_second": 17.948, |
|
"eval_steps_per_second": 4.487, |
|
"eval_wer": 0.1917957886372666, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1563310069790629, |
|
"grad_norm": 1.7904026508331299, |
|
"learning_rate": 0.0002888403674610571, |
|
"loss": 0.4606, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.1563310069790629, |
|
"eval_loss": 0.2257038652896881, |
|
"eval_runtime": 63.835, |
|
"eval_samples_per_second": 17.733, |
|
"eval_steps_per_second": 4.433, |
|
"eval_wer": 0.19427890345649582, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.196211365902293, |
|
"grad_norm": 2.778794527053833, |
|
"learning_rate": 0.00028844095326853946, |
|
"loss": 0.4048, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.196211365902293, |
|
"eval_loss": 0.2288675457239151, |
|
"eval_runtime": 63.4733, |
|
"eval_samples_per_second": 17.834, |
|
"eval_steps_per_second": 4.459, |
|
"eval_wer": 0.19278903456495827, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2360917248255234, |
|
"grad_norm": 1.2311575412750244, |
|
"learning_rate": 0.00028804153907602183, |
|
"loss": 0.4172, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.2360917248255234, |
|
"eval_loss": 0.2326343059539795, |
|
"eval_runtime": 63.2571, |
|
"eval_samples_per_second": 17.895, |
|
"eval_steps_per_second": 4.474, |
|
"eval_wer": 0.19378228049264998, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.2759720837487538, |
|
"grad_norm": 1.667809247970581, |
|
"learning_rate": 0.00028764212488350415, |
|
"loss": 0.4294, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.2759720837487538, |
|
"eval_loss": 0.232680082321167, |
|
"eval_runtime": 63.7285, |
|
"eval_samples_per_second": 17.763, |
|
"eval_steps_per_second": 4.441, |
|
"eval_wer": 0.1940802542709575, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.315852442671984, |
|
"grad_norm": 2.067225456237793, |
|
"learning_rate": 0.00028724271069098653, |
|
"loss": 0.468, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.315852442671984, |
|
"eval_loss": 0.22773675620555878, |
|
"eval_runtime": 63.9749, |
|
"eval_samples_per_second": 17.694, |
|
"eval_steps_per_second": 4.424, |
|
"eval_wer": 0.19219308700834326, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.315852442671984, |
|
"step": 3300, |
|
"total_flos": 1.351677187667784e+19, |
|
"train_loss": 2.917839215596517, |
|
"train_runtime": 6068.548, |
|
"train_samples_per_second": 99.162, |
|
"train_steps_per_second": 12.393 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 75210, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.351677187667784e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|