wav2vec2-xls-r-2b-ft-btb-cy / trainer_state.json
DewiBrynJones's picture
End of training
ed087b1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.536067892503536,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14144271570014144,
"eval_loss": 1.2105046510696411,
"eval_runtime": 283.0228,
"eval_samples_per_second": 19.984,
"eval_steps_per_second": 2.498,
"eval_wer": 0.8709347154900737,
"step": 100
},
{
"epoch": 0.2828854314002829,
"eval_loss": 0.9787197113037109,
"eval_runtime": 281.4976,
"eval_samples_per_second": 20.093,
"eval_steps_per_second": 2.512,
"eval_wer": 0.6986290412887332,
"step": 200
},
{
"epoch": 0.4243281471004243,
"eval_loss": 1.190703272819519,
"eval_runtime": 281.4959,
"eval_samples_per_second": 20.093,
"eval_steps_per_second": 2.512,
"eval_wer": 0.7126745692758074,
"step": 300
},
{
"epoch": 0.5657708628005658,
"eval_loss": 1.0559463500976562,
"eval_runtime": 280.2535,
"eval_samples_per_second": 20.182,
"eval_steps_per_second": 2.523,
"eval_wer": 0.7169298203516181,
"step": 400
},
{
"epoch": 0.7072135785007072,
"grad_norm": 5.1558146476745605,
"learning_rate": 0.00029699999999999996,
"loss": 1.4456,
"step": 500
},
{
"epoch": 0.7072135785007072,
"eval_loss": 1.2105939388275146,
"eval_runtime": 281.5867,
"eval_samples_per_second": 20.086,
"eval_steps_per_second": 2.511,
"eval_wer": 0.794404184863464,
"step": 500
},
{
"epoch": 0.8486562942008486,
"eval_loss": 1.0231719017028809,
"eval_runtime": 280.7447,
"eval_samples_per_second": 20.146,
"eval_steps_per_second": 2.518,
"eval_wer": 0.7033002191614276,
"step": 600
},
{
"epoch": 0.9900990099009901,
"eval_loss": 1.038698673248291,
"eval_runtime": 281.8275,
"eval_samples_per_second": 20.069,
"eval_steps_per_second": 2.509,
"eval_wer": 0.7335988865959591,
"step": 700
},
{
"epoch": 1.1315417256011315,
"eval_loss": 0.7234079837799072,
"eval_runtime": 280.5616,
"eval_samples_per_second": 20.16,
"eval_steps_per_second": 2.52,
"eval_wer": 0.5223240709635104,
"step": 800
},
{
"epoch": 1.272984441301273,
"eval_loss": 0.7242198586463928,
"eval_runtime": 286.25,
"eval_samples_per_second": 19.759,
"eval_steps_per_second": 2.47,
"eval_wer": 0.5566220345219242,
"step": 900
},
{
"epoch": 1.4144271570014144,
"grad_norm": 1.7350859642028809,
"learning_rate": 0.0002259,
"loss": 0.9155,
"step": 1000
},
{
"epoch": 1.4144271570014144,
"eval_loss": 0.7096899151802063,
"eval_runtime": 280.6062,
"eval_samples_per_second": 20.156,
"eval_steps_per_second": 2.52,
"eval_wer": 0.5258754459215178,
"step": 1000
},
{
"epoch": 1.5558698727015559,
"eval_loss": 0.6368164420127869,
"eval_runtime": 282.6165,
"eval_samples_per_second": 20.013,
"eval_steps_per_second": 2.502,
"eval_wer": 0.47965957991393515,
"step": 1100
},
{
"epoch": 1.6973125884016973,
"eval_loss": 0.6065136194229126,
"eval_runtime": 280.4494,
"eval_samples_per_second": 20.168,
"eval_steps_per_second": 2.521,
"eval_wer": 0.4652941082369503,
"step": 1200
},
{
"epoch": 1.8387553041018387,
"eval_loss": 0.6206967830657959,
"eval_runtime": 282.4382,
"eval_samples_per_second": 20.026,
"eval_steps_per_second": 2.503,
"eval_wer": 0.4716929820351618,
"step": 1300
},
{
"epoch": 1.9801980198019802,
"eval_loss": 0.5924867987632751,
"eval_runtime": 281.7785,
"eval_samples_per_second": 20.073,
"eval_steps_per_second": 2.509,
"eval_wer": 0.470669162227448,
"step": 1400
},
{
"epoch": 2.1216407355021216,
"grad_norm": 1.3663442134857178,
"learning_rate": 0.0001512,
"loss": 0.7436,
"step": 1500
},
{
"epoch": 2.1216407355021216,
"eval_loss": 0.5382007360458374,
"eval_runtime": 284.0884,
"eval_samples_per_second": 19.909,
"eval_steps_per_second": 2.489,
"eval_wer": 0.40460079026091406,
"step": 1500
},
{
"epoch": 2.263083451202263,
"eval_loss": 0.5200654864311218,
"eval_runtime": 282.3736,
"eval_samples_per_second": 20.03,
"eval_steps_per_second": 2.504,
"eval_wer": 0.3995616771448225,
"step": 1600
},
{
"epoch": 2.4045261669024045,
"eval_loss": 0.4883446991443634,
"eval_runtime": 283.5276,
"eval_samples_per_second": 19.949,
"eval_steps_per_second": 2.494,
"eval_wer": 0.36979091679864345,
"step": 1700
},
{
"epoch": 2.545968882602546,
"eval_loss": 0.47043663263320923,
"eval_runtime": 282.1263,
"eval_samples_per_second": 20.048,
"eval_steps_per_second": 2.506,
"eval_wer": 0.36585560941274337,
"step": 1800
},
{
"epoch": 2.6874115983026874,
"eval_loss": 0.4443446099758148,
"eval_runtime": 282.7688,
"eval_samples_per_second": 20.002,
"eval_steps_per_second": 2.5,
"eval_wer": 0.3521460223000752,
"step": 1900
},
{
"epoch": 2.828854314002829,
"grad_norm": 1.0020660161972046,
"learning_rate": 7.635e-05,
"loss": 0.5645,
"step": 2000
},
{
"epoch": 2.828854314002829,
"eval_loss": 0.4469930827617645,
"eval_runtime": 282.9465,
"eval_samples_per_second": 19.99,
"eval_steps_per_second": 2.499,
"eval_wer": 0.34761881908784054,
"step": 2000
},
{
"epoch": 2.9702970297029703,
"eval_loss": 0.41922062635421753,
"eval_runtime": 281.1649,
"eval_samples_per_second": 20.116,
"eval_steps_per_second": 2.515,
"eval_wer": 0.3241669466173953,
"step": 2100
},
{
"epoch": 3.1117397454031117,
"eval_loss": 0.41775766015052795,
"eval_runtime": 282.3459,
"eval_samples_per_second": 20.032,
"eval_steps_per_second": 2.504,
"eval_wer": 0.3160883684471533,
"step": 2200
},
{
"epoch": 3.253182461103253,
"eval_loss": 0.4122001826763153,
"eval_runtime": 282.9107,
"eval_samples_per_second": 19.992,
"eval_steps_per_second": 2.499,
"eval_wer": 0.305370254835149,
"step": 2300
},
{
"epoch": 3.3946251768033946,
"eval_loss": 0.396011620759964,
"eval_runtime": 280.8684,
"eval_samples_per_second": 20.138,
"eval_steps_per_second": 2.517,
"eval_wer": 0.2990353697749196,
"step": 2400
},
{
"epoch": 3.536067892503536,
"grad_norm": 0.5122537612915039,
"learning_rate": 1.9499999999999995e-06,
"loss": 0.4232,
"step": 2500
},
{
"epoch": 3.536067892503536,
"eval_loss": 0.3903259038925171,
"eval_runtime": 282.302,
"eval_samples_per_second": 20.035,
"eval_steps_per_second": 2.504,
"eval_wer": 0.2956919582153541,
"step": 2500
},
{
"epoch": 3.536067892503536,
"step": 2500,
"total_flos": 6.538015641955614e+19,
"train_loss": 0.8184805297851563,
"train_runtime": 14471.5924,
"train_samples_per_second": 5.528,
"train_steps_per_second": 0.173
}
],
"logging_steps": 500,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 400,
"total_flos": 6.538015641955614e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}