mms-1b-bigcgen-female-20hrs-model / trainer_state.json
csikasote's picture
End of training
08137cc verified
{
"best_metric": Infinity,
"best_model_checkpoint": null,
"epoch": 1.8811136192626035,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07524454477050414,
"grad_norm": 8.513701438903809,
"learning_rate": 0.000285,
"loss": 13.9528,
"step": 100
},
{
"epoch": 0.07524454477050414,
"eval_loss": Infinity,
"eval_runtime": 30.8079,
"eval_samples_per_second": 15.418,
"eval_steps_per_second": 3.863,
"eval_wer": 1.043132803632236,
"step": 100
},
{
"epoch": 0.1504890895410083,
"grad_norm": 6.828241348266602,
"learning_rate": 0.00028812499999999997,
"loss": 6.1846,
"step": 200
},
{
"epoch": 0.1504890895410083,
"eval_loss": Infinity,
"eval_runtime": 30.7299,
"eval_samples_per_second": 15.457,
"eval_steps_per_second": 3.872,
"eval_wer": 1.0029511918274687,
"step": 200
},
{
"epoch": 0.22573363431151242,
"grad_norm": 7.705227851867676,
"learning_rate": 0.00027562499999999994,
"loss": 5.4651,
"step": 300
},
{
"epoch": 0.22573363431151242,
"eval_loss": Infinity,
"eval_runtime": 30.7705,
"eval_samples_per_second": 15.437,
"eval_steps_per_second": 3.867,
"eval_wer": 1.0385925085130534,
"step": 300
},
{
"epoch": 0.3009781790820166,
"grad_norm": 6.132575988769531,
"learning_rate": 0.00026312499999999996,
"loss": 4.4356,
"step": 400
},
{
"epoch": 0.3009781790820166,
"eval_loss": Infinity,
"eval_runtime": 30.8056,
"eval_samples_per_second": 15.419,
"eval_steps_per_second": 3.863,
"eval_wer": 0.8830874006810443,
"step": 400
},
{
"epoch": 0.3762227238525207,
"grad_norm": 3.9721577167510986,
"learning_rate": 0.000250625,
"loss": 2.2016,
"step": 500
},
{
"epoch": 0.3762227238525207,
"eval_loss": Infinity,
"eval_runtime": 30.8903,
"eval_samples_per_second": 15.377,
"eval_steps_per_second": 3.852,
"eval_wer": 0.6217934165720772,
"step": 500
},
{
"epoch": 0.45146726862302483,
"grad_norm": 4.83721923828125,
"learning_rate": 0.00023812499999999997,
"loss": 1.8013,
"step": 600
},
{
"epoch": 0.45146726862302483,
"eval_loss": Infinity,
"eval_runtime": 30.8946,
"eval_samples_per_second": 15.375,
"eval_steps_per_second": 3.852,
"eval_wer": 0.5745743473325766,
"step": 600
},
{
"epoch": 0.526711813393529,
"grad_norm": 5.4762725830078125,
"learning_rate": 0.00022562499999999997,
"loss": 1.7499,
"step": 700
},
{
"epoch": 0.526711813393529,
"eval_loss": Infinity,
"eval_runtime": 30.6173,
"eval_samples_per_second": 15.514,
"eval_steps_per_second": 3.887,
"eval_wer": 0.5793416572077185,
"step": 700
},
{
"epoch": 0.6019563581640331,
"grad_norm": 5.545216083526611,
"learning_rate": 0.000213125,
"loss": 1.6979,
"step": 800
},
{
"epoch": 0.6019563581640331,
"eval_loss": Infinity,
"eval_runtime": 30.6746,
"eval_samples_per_second": 15.485,
"eval_steps_per_second": 3.879,
"eval_wer": 0.5500567536889898,
"step": 800
},
{
"epoch": 0.6772009029345373,
"grad_norm": 6.604818344116211,
"learning_rate": 0.00020062499999999996,
"loss": 1.5567,
"step": 900
},
{
"epoch": 0.6772009029345373,
"eval_loss": Infinity,
"eval_runtime": 31.0923,
"eval_samples_per_second": 15.277,
"eval_steps_per_second": 3.827,
"eval_wer": 0.5439273552780931,
"step": 900
},
{
"epoch": 0.7524454477050414,
"grad_norm": 5.870357036590576,
"learning_rate": 0.00018824999999999997,
"loss": 1.6301,
"step": 1000
},
{
"epoch": 0.7524454477050414,
"eval_loss": Infinity,
"eval_runtime": 30.7668,
"eval_samples_per_second": 15.439,
"eval_steps_per_second": 3.868,
"eval_wer": 0.535527809307605,
"step": 1000
},
{
"epoch": 0.8276899924755455,
"grad_norm": 20.379985809326172,
"learning_rate": 0.00017575,
"loss": 1.6362,
"step": 1100
},
{
"epoch": 0.8276899924755455,
"eval_loss": Infinity,
"eval_runtime": 30.7224,
"eval_samples_per_second": 15.461,
"eval_steps_per_second": 3.873,
"eval_wer": 0.5366628830874007,
"step": 1100
},
{
"epoch": 0.9029345372460497,
"grad_norm": 7.096904277801514,
"learning_rate": 0.00016324999999999998,
"loss": 1.5247,
"step": 1200
},
{
"epoch": 0.9029345372460497,
"eval_loss": Infinity,
"eval_runtime": 30.6145,
"eval_samples_per_second": 15.516,
"eval_steps_per_second": 3.887,
"eval_wer": 0.5325766174801362,
"step": 1200
},
{
"epoch": 0.9781790820165538,
"grad_norm": 6.06346321105957,
"learning_rate": 0.00015074999999999998,
"loss": 1.4012,
"step": 1300
},
{
"epoch": 0.9781790820165538,
"eval_loss": Infinity,
"eval_runtime": 30.9705,
"eval_samples_per_second": 15.337,
"eval_steps_per_second": 3.842,
"eval_wer": 0.5346197502837684,
"step": 1300
},
{
"epoch": 1.053423626787058,
"grad_norm": 5.044532775878906,
"learning_rate": 0.00013824999999999997,
"loss": 1.6397,
"step": 1400
},
{
"epoch": 1.053423626787058,
"eval_loss": Infinity,
"eval_runtime": 30.897,
"eval_samples_per_second": 15.374,
"eval_steps_per_second": 3.852,
"eval_wer": 0.5300794551645857,
"step": 1400
},
{
"epoch": 1.1286681715575622,
"grad_norm": 4.185507297515869,
"learning_rate": 0.00012575,
"loss": 1.5258,
"step": 1500
},
{
"epoch": 1.1286681715575622,
"eval_loss": Infinity,
"eval_runtime": 30.6085,
"eval_samples_per_second": 15.519,
"eval_steps_per_second": 3.888,
"eval_wer": 0.5284903518728717,
"step": 1500
},
{
"epoch": 1.2039127163280663,
"grad_norm": 2.0165092945098877,
"learning_rate": 0.00011324999999999999,
"loss": 1.4144,
"step": 1600
},
{
"epoch": 1.2039127163280663,
"eval_loss": Infinity,
"eval_runtime": 30.8112,
"eval_samples_per_second": 15.416,
"eval_steps_per_second": 3.862,
"eval_wer": 0.5244040862656073,
"step": 1600
},
{
"epoch": 1.2791572610985704,
"grad_norm": 6.2198486328125,
"learning_rate": 0.00010074999999999998,
"loss": 1.4363,
"step": 1700
},
{
"epoch": 1.2791572610985704,
"eval_loss": Infinity,
"eval_runtime": 31.0265,
"eval_samples_per_second": 15.31,
"eval_steps_per_second": 3.835,
"eval_wer": 0.5144154370034052,
"step": 1700
},
{
"epoch": 1.3544018058690745,
"grad_norm": 6.625217437744141,
"learning_rate": 8.837499999999998e-05,
"loss": 1.3733,
"step": 1800
},
{
"epoch": 1.3544018058690745,
"eval_loss": Infinity,
"eval_runtime": 31.0253,
"eval_samples_per_second": 15.31,
"eval_steps_per_second": 3.836,
"eval_wer": 0.5357548240635641,
"step": 1800
},
{
"epoch": 1.4296463506395787,
"grad_norm": 3.523829460144043,
"learning_rate": 7.5875e-05,
"loss": 1.4592,
"step": 1900
},
{
"epoch": 1.4296463506395787,
"eval_loss": Infinity,
"eval_runtime": 30.9124,
"eval_samples_per_second": 15.366,
"eval_steps_per_second": 3.85,
"eval_wer": 0.5598183881952327,
"step": 1900
},
{
"epoch": 1.5048908954100828,
"grad_norm": 8.395014762878418,
"learning_rate": 6.3375e-05,
"loss": 1.3499,
"step": 2000
},
{
"epoch": 1.5048908954100828,
"eval_loss": Infinity,
"eval_runtime": 30.7712,
"eval_samples_per_second": 15.437,
"eval_steps_per_second": 3.867,
"eval_wer": 0.5191827468785472,
"step": 2000
},
{
"epoch": 1.580135440180587,
"grad_norm": 2.0930612087249756,
"learning_rate": 5.0874999999999997e-05,
"loss": 1.4039,
"step": 2100
},
{
"epoch": 1.580135440180587,
"eval_loss": Infinity,
"eval_runtime": 31.265,
"eval_samples_per_second": 15.193,
"eval_steps_per_second": 3.806,
"eval_wer": 0.5228149829738933,
"step": 2100
},
{
"epoch": 1.655379984951091,
"grad_norm": 1.5763949155807495,
"learning_rate": 3.8375e-05,
"loss": 1.4057,
"step": 2200
},
{
"epoch": 1.655379984951091,
"eval_loss": Infinity,
"eval_runtime": 30.8864,
"eval_samples_per_second": 15.379,
"eval_steps_per_second": 3.853,
"eval_wer": 0.52894438138479,
"step": 2200
},
{
"epoch": 1.7306245297215952,
"grad_norm": 2.9080910682678223,
"learning_rate": 2.5874999999999995e-05,
"loss": 1.4961,
"step": 2300
},
{
"epoch": 1.7306245297215952,
"eval_loss": Infinity,
"eval_runtime": 31.0242,
"eval_samples_per_second": 15.311,
"eval_steps_per_second": 3.836,
"eval_wer": 0.5323496027241771,
"step": 2300
},
{
"epoch": 1.8058690744920993,
"grad_norm": 3.506730079650879,
"learning_rate": 1.3375e-05,
"loss": 1.3975,
"step": 2400
},
{
"epoch": 1.8058690744920993,
"eval_loss": Infinity,
"eval_runtime": 30.8502,
"eval_samples_per_second": 15.397,
"eval_steps_per_second": 3.857,
"eval_wer": 0.5119182746878547,
"step": 2400
},
{
"epoch": 1.8811136192626035,
"grad_norm": 12.351927757263184,
"learning_rate": 8.75e-07,
"loss": 1.4725,
"step": 2500
},
{
"epoch": 1.8811136192626035,
"eval_loss": Infinity,
"eval_runtime": 31.0235,
"eval_samples_per_second": 15.311,
"eval_steps_per_second": 3.836,
"eval_wer": 0.5259931895573212,
"step": 2500
},
{
"epoch": 1.8811136192626035,
"step": 2500,
"total_flos": 1.2622443564523827e+19,
"train_loss": 2.5044897521972658,
"train_runtime": 3534.0194,
"train_samples_per_second": 5.659,
"train_steps_per_second": 0.707
}
],
"logging_steps": 100,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2622443564523827e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}