|
{ |
|
"best_metric": 13.366213870443335, |
|
"best_model_checkpoint": "./Whisper_tiny_fine_tune_Quran/checkpoint-8000", |
|
"epoch": 2.999911979579262, |
|
"eval_steps": 400, |
|
"global_step": 8520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017604084147522226, |
|
"grad_norm": 216358.46875, |
|
"learning_rate": 1e-05, |
|
"loss": 3.1564, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03520816829504445, |
|
"grad_norm": 176759.03125, |
|
"learning_rate": 2e-05, |
|
"loss": 0.089, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.052812252442566675, |
|
"grad_norm": 188886.625, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0535, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0704163365900889, |
|
"grad_norm": 126744.6484375, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0411, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08802042073761113, |
|
"grad_norm": 98751.015625, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0333, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.10562450488513335, |
|
"grad_norm": 137741.75, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0311, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12322858903265557, |
|
"grad_norm": 101993.84375, |
|
"learning_rate": 7e-05, |
|
"loss": 0.0267, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1408326731801778, |
|
"grad_norm": 113272.96875, |
|
"learning_rate": 8e-05, |
|
"loss": 0.0258, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1408326731801778, |
|
"eval_cer": 52.221755807134876, |
|
"eval_loss": 0.024616289883852005, |
|
"eval_runtime": 1237.0383, |
|
"eval_samples_per_second": 3.126, |
|
"eval_steps_per_second": 0.196, |
|
"eval_wer": 104.93483591877222, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15843675732770002, |
|
"grad_norm": 132086.53125, |
|
"learning_rate": 9e-05, |
|
"loss": 0.0248, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.17604084147522225, |
|
"grad_norm": 93074.5546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0208, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1936449256227445, |
|
"grad_norm": 96403.0390625, |
|
"learning_rate": 9.988738792578126e-05, |
|
"loss": 0.0196, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2112490097702667, |
|
"grad_norm": 72429.7890625, |
|
"learning_rate": 9.955005896229543e-05, |
|
"loss": 0.0205, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.22885309391778894, |
|
"grad_norm": 79151.7109375, |
|
"learning_rate": 9.898953260211338e-05, |
|
"loss": 0.0187, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.24645717806531114, |
|
"grad_norm": 87347.1171875, |
|
"learning_rate": 9.820833372667812e-05, |
|
"loss": 0.0172, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2640612622128334, |
|
"grad_norm": 107349.203125, |
|
"learning_rate": 9.720998123301923e-05, |
|
"loss": 0.0182, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2816653463603556, |
|
"grad_norm": 79326.1875, |
|
"learning_rate": 9.599897218294122e-05, |
|
"loss": 0.0177, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2816653463603556, |
|
"eval_cer": 10.263303778282646, |
|
"eval_loss": 0.018445400521159172, |
|
"eval_runtime": 1170.4993, |
|
"eval_samples_per_second": 3.304, |
|
"eval_steps_per_second": 0.207, |
|
"eval_wer": 26.20891080924696, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.29926943050787785, |
|
"grad_norm": 52984.33203125, |
|
"learning_rate": 9.458076154608515e-05, |
|
"loss": 0.0152, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.31687351465540003, |
|
"grad_norm": 68339.46875, |
|
"learning_rate": 9.296173762811085e-05, |
|
"loss": 0.016, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.33447759880292227, |
|
"grad_norm": 87585.4921875, |
|
"learning_rate": 9.114919329468282e-05, |
|
"loss": 0.0145, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3520816829504445, |
|
"grad_norm": 105782.3515625, |
|
"learning_rate": 8.915129312088112e-05, |
|
"loss": 0.0141, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36968576709796674, |
|
"grad_norm": 57010.09765625, |
|
"learning_rate": 8.697703661401186e-05, |
|
"loss": 0.0134, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.387289851245489, |
|
"grad_norm": 50487.80078125, |
|
"learning_rate": 8.463621767547998e-05, |
|
"loss": 0.0132, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.40489393539301116, |
|
"grad_norm": 67209.078125, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 0.0133, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.4224980195405334, |
|
"grad_norm": 51471.0, |
|
"learning_rate": 7.949777200115616e-05, |
|
"loss": 0.0116, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4224980195405334, |
|
"eval_cer": 7.321043473802604, |
|
"eval_loss": 0.016037294641137123, |
|
"eval_runtime": 1114.0654, |
|
"eval_samples_per_second": 3.471, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 20.951698674675555, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.44010210368805563, |
|
"grad_norm": 47404.3046875, |
|
"learning_rate": 7.672329130639005e-05, |
|
"loss": 0.0126, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.45770618783557787, |
|
"grad_norm": 45749.5, |
|
"learning_rate": 7.38284360010654e-05, |
|
"loss": 0.0118, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.47531027198310005, |
|
"grad_norm": 41610.55078125, |
|
"learning_rate": 7.082624591160201e-05, |
|
"loss": 0.0118, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4929143561306223, |
|
"grad_norm": 47256.76953125, |
|
"learning_rate": 6.773024435212678e-05, |
|
"loss": 0.0113, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5105184402781445, |
|
"grad_norm": 41286.640625, |
|
"learning_rate": 6.455437720893564e-05, |
|
"loss": 0.0107, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5281225244256668, |
|
"grad_norm": 36747.9375, |
|
"learning_rate": 6.131295012148612e-05, |
|
"loss": 0.0093, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.545726608573189, |
|
"grad_norm": 35157.80859375, |
|
"learning_rate": 5.8020564042888015e-05, |
|
"loss": 0.01, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5633306927207112, |
|
"grad_norm": 50015.47265625, |
|
"learning_rate": 5.469204947015897e-05, |
|
"loss": 0.0101, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5633306927207112, |
|
"eval_cer": 5.822676104629366, |
|
"eval_loss": 0.014131452888250351, |
|
"eval_runtime": 1123.1179, |
|
"eval_samples_per_second": 3.443, |
|
"eval_steps_per_second": 0.215, |
|
"eval_wer": 17.501997630397046, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5809347768682335, |
|
"grad_norm": 51103.59765625, |
|
"learning_rate": 5.134239964050307e-05, |
|
"loss": 0.0093, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5985388610157557, |
|
"grad_norm": 32847.37109375, |
|
"learning_rate": 4.798670299452926e-05, |
|
"loss": 0.0084, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6161429451632778, |
|
"grad_norm": 29846.638671875, |
|
"learning_rate": 4.4640075210627615e-05, |
|
"loss": 0.0092, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6337470293108001, |
|
"grad_norm": 41928.12109375, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.0075, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6513511134583223, |
|
"grad_norm": 45941.98046875, |
|
"learning_rate": 3.803421678562213e-05, |
|
"loss": 0.0078, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6689551976058445, |
|
"grad_norm": 52671.2109375, |
|
"learning_rate": 3.480474212128766e-05, |
|
"loss": 0.0073, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6865592817533668, |
|
"grad_norm": 26375.0625, |
|
"learning_rate": 3.164371423727362e-05, |
|
"loss": 0.0083, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.704163365900889, |
|
"grad_norm": 63857.80078125, |
|
"learning_rate": 2.8565371929847284e-05, |
|
"loss": 0.008, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.704163365900889, |
|
"eval_cer": 5.123480724061357, |
|
"eval_loss": 0.012732591480016708, |
|
"eval_runtime": 1120.5002, |
|
"eval_samples_per_second": 3.451, |
|
"eval_steps_per_second": 0.216, |
|
"eval_wer": 16.3695478466922, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7217674500484113, |
|
"grad_norm": 28659.234375, |
|
"learning_rate": 2.5583581539504464e-05, |
|
"loss": 0.0078, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7393715341959335, |
|
"grad_norm": 39795.39453125, |
|
"learning_rate": 2.2711774490274766e-05, |
|
"loss": 0.0078, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7569756183434557, |
|
"grad_norm": 51697.984375, |
|
"learning_rate": 1.996288678810105e-05, |
|
"loss": 0.0068, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.774579702490978, |
|
"grad_norm": 46724.51953125, |
|
"learning_rate": 1.734930075082076e-05, |
|
"loss": 0.0072, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7921837866385001, |
|
"grad_norm": 28872.095703125, |
|
"learning_rate": 1.4882789232226125e-05, |
|
"loss": 0.0055, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8097878707860223, |
|
"grad_norm": 48174.0859375, |
|
"learning_rate": 1.257446259144494e-05, |
|
"loss": 0.0075, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8273919549335446, |
|
"grad_norm": 70300.703125, |
|
"learning_rate": 1.0434718646516917e-05, |
|
"loss": 0.0063, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8449960390810668, |
|
"grad_norm": 27386.814453125, |
|
"learning_rate": 8.473195837599418e-06, |
|
"loss": 0.0057, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8449960390810668, |
|
"eval_cer": 4.816821705177955, |
|
"eval_loss": 0.0119090611115098, |
|
"eval_runtime": 1122.3369, |
|
"eval_samples_per_second": 3.445, |
|
"eval_steps_per_second": 0.216, |
|
"eval_wer": 15.234342710715564, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.862600123228589, |
|
"grad_norm": 26832.78515625, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.0067, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8802042073761113, |
|
"grad_norm": 46090.953125, |
|
"learning_rate": 5.1193136180493095e-06, |
|
"loss": 0.0054, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8978082915236335, |
|
"grad_norm": 34130.36328125, |
|
"learning_rate": 3.7420617127538248e-06, |
|
"loss": 0.0064, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9154123756711557, |
|
"grad_norm": 60160.4296875, |
|
"learning_rate": 2.573177902642726e-06, |
|
"loss": 0.0058, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.933016459818678, |
|
"grad_norm": 23166.681640625, |
|
"learning_rate": 1.6179274049310966e-06, |
|
"loss": 0.0058, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9506205439662001, |
|
"grad_norm": 22671.916015625, |
|
"learning_rate": 8.806131292167618e-07, |
|
"loss": 0.0054, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9682246281137223, |
|
"grad_norm": 36751.93359375, |
|
"learning_rate": 3.6455629509730136e-07, |
|
"loss": 0.0056, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.9858287122612446, |
|
"grad_norm": 34122.0859375, |
|
"learning_rate": 7.208147179291192e-08, |
|
"loss": 0.0056, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9858287122612446, |
|
"eval_cer": 4.667818106660942, |
|
"eval_loss": 0.01161186769604683, |
|
"eval_runtime": 1115.7233, |
|
"eval_samples_per_second": 3.466, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 14.63643126773758, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0035208168295044, |
|
"grad_norm": 41080.359375, |
|
"learning_rate": 5.7252149230569694e-05, |
|
"loss": 0.0048, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.0211249009770267, |
|
"grad_norm": 49550.32421875, |
|
"learning_rate": 5.574886573911056e-05, |
|
"loss": 0.0058, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.038728985124549, |
|
"grad_norm": 45467.51171875, |
|
"learning_rate": 5.4240296223775465e-05, |
|
"loss": 0.0064, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0563330692720712, |
|
"grad_norm": 57751.12890625, |
|
"learning_rate": 5.272782779896898e-05, |
|
"loss": 0.0068, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0739371534195934, |
|
"grad_norm": 42728.49609375, |
|
"learning_rate": 5.1212851164103436e-05, |
|
"loss": 0.0068, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.0915412375671156, |
|
"grad_norm": 55132.671875, |
|
"learning_rate": 4.969675932486503e-05, |
|
"loss": 0.0057, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.1091453217146379, |
|
"grad_norm": 20835.39453125, |
|
"learning_rate": 4.8180946312358995e-05, |
|
"loss": 0.0057, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.12674940586216, |
|
"grad_norm": 29240.0234375, |
|
"learning_rate": 4.666680590131225e-05, |
|
"loss": 0.0071, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.12674940586216, |
|
"eval_cer": 5.304207669359413, |
|
"eval_loss": 0.013532178476452827, |
|
"eval_runtime": 1162.1054, |
|
"eval_samples_per_second": 3.328, |
|
"eval_steps_per_second": 0.208, |
|
"eval_wer": 15.892871903672884, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1443534900096823, |
|
"grad_norm": 38637.1328125, |
|
"learning_rate": 4.515573032851158e-05, |
|
"loss": 0.0062, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1619575741572046, |
|
"grad_norm": 35056.9765625, |
|
"learning_rate": 4.364910901265606e-05, |
|
"loss": 0.0058, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1795616583047268, |
|
"grad_norm": 33209.58203125, |
|
"learning_rate": 4.214832727680054e-05, |
|
"loss": 0.0062, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.1971657424522488, |
|
"grad_norm": 36293.5625, |
|
"learning_rate": 4.0654765074565124e-05, |
|
"loss": 0.0058, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.214769826599771, |
|
"grad_norm": 45826.09765625, |
|
"learning_rate": 3.9169795721281845e-05, |
|
"loss": 0.0057, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.2323739107472933, |
|
"grad_norm": 57588.5078125, |
|
"learning_rate": 3.769478463124507e-05, |
|
"loss": 0.0063, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2499779948948155, |
|
"grad_norm": 54418.765625, |
|
"learning_rate": 3.623108806222684e-05, |
|
"loss": 0.0055, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.2675820790423378, |
|
"grad_norm": 69616.1875, |
|
"learning_rate": 3.4780051868411675e-05, |
|
"loss": 0.0059, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.2675820790423378, |
|
"eval_cer": 5.043691700339344, |
|
"eval_loss": 0.01317252404987812, |
|
"eval_runtime": 1129.5416, |
|
"eval_samples_per_second": 3.424, |
|
"eval_steps_per_second": 0.214, |
|
"eval_wer": 15.716529358278455, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.28518616318986, |
|
"grad_norm": 38242.609375, |
|
"learning_rate": 3.334301026289712e-05, |
|
"loss": 0.0061, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.3027902473373822, |
|
"grad_norm": 31986.978515625, |
|
"learning_rate": 3.1921284590898456e-05, |
|
"loss": 0.0045, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.3203943314849045, |
|
"grad_norm": 41348.12890625, |
|
"learning_rate": 3.051618211478504e-05, |
|
"loss": 0.0043, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.3379984156324267, |
|
"grad_norm": 37051.91796875, |
|
"learning_rate": 2.912899481206582e-05, |
|
"loss": 0.0047, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.355602499779949, |
|
"grad_norm": 42654.66796875, |
|
"learning_rate": 2.7760998187429067e-05, |
|
"loss": 0.0046, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.3732065839274712, |
|
"grad_norm": 32946.38671875, |
|
"learning_rate": 2.6413450099928783e-05, |
|
"loss": 0.0056, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.3908106680749934, |
|
"grad_norm": 58651.94921875, |
|
"learning_rate": 2.508758960639599e-05, |
|
"loss": 0.0057, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.4084147522225157, |
|
"grad_norm": 40705.35546875, |
|
"learning_rate": 2.3784635822138424e-05, |
|
"loss": 0.0056, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4084147522225157, |
|
"eval_cer": 5.364770422305037, |
|
"eval_loss": 0.01243683509528637, |
|
"eval_runtime": 1130.3251, |
|
"eval_samples_per_second": 3.421, |
|
"eval_steps_per_second": 0.214, |
|
"eval_wer": 14.575813517758245, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.426018836370038, |
|
"grad_norm": 23855.8359375, |
|
"learning_rate": 2.2505786799976525e-05, |
|
"loss": 0.0043, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.4436229205175601, |
|
"grad_norm": 62266.87890625, |
|
"learning_rate": 2.1252218428645846e-05, |
|
"loss": 0.0048, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.4612270046650824, |
|
"grad_norm": 19551.705078125, |
|
"learning_rate": 2.0025083351579337e-05, |
|
"loss": 0.0048, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.4788310888126044, |
|
"grad_norm": 33841.171875, |
|
"learning_rate": 1.8825509907063327e-05, |
|
"loss": 0.0046, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.4964351729601266, |
|
"grad_norm": 50253.11328125, |
|
"learning_rate": 1.765460109074188e-05, |
|
"loss": 0.0048, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.5140392571076489, |
|
"grad_norm": 34579.1875, |
|
"learning_rate": 1.6513433541423528e-05, |
|
"loss": 0.0042, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.531643341255171, |
|
"grad_norm": 21523.380859375, |
|
"learning_rate": 1.5403056551122697e-05, |
|
"loss": 0.0047, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.5492474254026933, |
|
"grad_norm": 14405.34375, |
|
"learning_rate": 1.4324491100246385e-05, |
|
"loss": 0.0041, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.5492474254026933, |
|
"eval_cer": 4.753054573769599, |
|
"eval_loss": 0.012177330441772938, |
|
"eval_runtime": 1116.052, |
|
"eval_samples_per_second": 3.465, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 14.225883779241174, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.5668515095502156, |
|
"grad_norm": 32692.677734375, |
|
"learning_rate": 1.3278728918812978e-05, |
|
"loss": 0.0045, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.5844555936977378, |
|
"grad_norm": 27757.576171875, |
|
"learning_rate": 1.2266731574566536e-05, |
|
"loss": 0.0044, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.60205967784526, |
|
"grad_norm": 43189.07421875, |
|
"learning_rate": 1.1289429588824962e-05, |
|
"loss": 0.0047, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.6196637619927823, |
|
"grad_norm": 34926.53515625, |
|
"learning_rate": 1.0347721580875126e-05, |
|
"loss": 0.0045, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.6372678461403045, |
|
"grad_norm": 25656.78125, |
|
"learning_rate": 9.442473441701422e-06, |
|
"loss": 0.0045, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.6548719302878268, |
|
"grad_norm": 36669.20703125, |
|
"learning_rate": 8.574517537807897e-06, |
|
"loss": 0.0033, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.672476014435349, |
|
"grad_norm": 36220.40625, |
|
"learning_rate": 7.744651945865571e-06, |
|
"loss": 0.0041, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.6900800985828712, |
|
"grad_norm": 19782.095703125, |
|
"learning_rate": 6.953639718889076e-06, |
|
"loss": 0.0038, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.6900800985828712, |
|
"eval_cer": 4.743121000535131, |
|
"eval_loss": 0.012049088254570961, |
|
"eval_runtime": 1111.599, |
|
"eval_samples_per_second": 3.479, |
|
"eval_steps_per_second": 0.218, |
|
"eval_wer": 13.80431488165762, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.7076841827303935, |
|
"grad_norm": 36681.79296875, |
|
"learning_rate": 6.202208184617064e-06, |
|
"loss": 0.0034, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.7252882668779157, |
|
"grad_norm": 40895.7734375, |
|
"learning_rate": 5.491048276741784e-06, |
|
"loss": 0.0046, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.742892351025438, |
|
"grad_norm": 13767.77734375, |
|
"learning_rate": 4.820813899602506e-06, |
|
"loss": 0.0045, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.7604964351729602, |
|
"grad_norm": 39934.8203125, |
|
"learning_rate": 4.192121326927073e-06, |
|
"loss": 0.0042, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7781005193204824, |
|
"grad_norm": 94029.875, |
|
"learning_rate": 3.605548635174533e-06, |
|
"loss": 0.0036, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.7957046034680046, |
|
"grad_norm": 13603.7431640625, |
|
"learning_rate": 3.061635171999566e-06, |
|
"loss": 0.0038, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.8133086876155269, |
|
"grad_norm": 12118.0068359375, |
|
"learning_rate": 2.5608810603278634e-06, |
|
"loss": 0.004, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.8309127717630491, |
|
"grad_norm": 19234.5703125, |
|
"learning_rate": 2.1037467384981026e-06, |
|
"loss": 0.004, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.8309127717630491, |
|
"eval_cer": 4.95685304399932, |
|
"eval_loss": 0.01188047043979168, |
|
"eval_runtime": 1212.5658, |
|
"eval_samples_per_second": 3.189, |
|
"eval_steps_per_second": 0.2, |
|
"eval_wer": 14.181798142892568, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.8485168559105714, |
|
"grad_norm": 19712.021484375, |
|
"learning_rate": 1.6906525368936055e-06, |
|
"loss": 0.0034, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.8661209400580936, |
|
"grad_norm": 8896.380859375, |
|
"learning_rate": 1.3219782914527634e-06, |
|
"loss": 0.0039, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.8837250242056158, |
|
"grad_norm": 20239.08984375, |
|
"learning_rate": 9.980629944137509e-07, |
|
"loss": 0.0037, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.901329108353138, |
|
"grad_norm": 22119.759765625, |
|
"learning_rate": 7.192044826145771e-07, |
|
"loss": 0.0035, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.9189331925006603, |
|
"grad_norm": 23297.744140625, |
|
"learning_rate": 4.856591636351604e-07, |
|
"loss": 0.0039, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.9365372766481825, |
|
"grad_norm": 8485.7099609375, |
|
"learning_rate": 2.976417800331144e-07, |
|
"loss": 0.0047, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9541413607957046, |
|
"grad_norm": 28440.11328125, |
|
"learning_rate": 1.5532521189017267e-07, |
|
"loss": 0.0036, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.9717454449432268, |
|
"grad_norm": 29707.8125, |
|
"learning_rate": 5.8840317850683555e-08, |
|
"loss": 0.0036, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.9717454449432268, |
|
"eval_cer": 4.91711875106145, |
|
"eval_loss": 0.011817782185971737, |
|
"eval_runtime": 1130.1756, |
|
"eval_samples_per_second": 3.422, |
|
"eval_steps_per_second": 0.214, |
|
"eval_wer": 14.074339404292838, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.989349529090749, |
|
"grad_norm": 46225.79296875, |
|
"learning_rate": 8.275814798408554e-09, |
|
"loss": 0.0037, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.007041633659009, |
|
"grad_norm": 14135.712890625, |
|
"learning_rate": 2.7527634528122847e-05, |
|
"loss": 0.004, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.024645717806531, |
|
"grad_norm": 22203.5390625, |
|
"learning_rate": 2.6657185738447493e-05, |
|
"loss": 0.0042, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.0422498019540534, |
|
"grad_norm": 21549.859375, |
|
"learning_rate": 2.5795691222129982e-05, |
|
"loss": 0.004, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.0598538861015756, |
|
"grad_norm": 8696.861328125, |
|
"learning_rate": 2.4943481447352425e-05, |
|
"loss": 0.0038, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.077457970249098, |
|
"grad_norm": 15930.8994140625, |
|
"learning_rate": 2.410088332068215e-05, |
|
"loss": 0.0031, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.09506205439662, |
|
"grad_norm": 24799.234375, |
|
"learning_rate": 2.326822006167068e-05, |
|
"loss": 0.0033, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.1126661385441423, |
|
"grad_norm": 57942.671875, |
|
"learning_rate": 2.2445811078867185e-05, |
|
"loss": 0.0033, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.1126661385441423, |
|
"eval_cer": 5.04529388957071, |
|
"eval_loss": 0.01286122016608715, |
|
"eval_runtime": 1149.1813, |
|
"eval_samples_per_second": 3.365, |
|
"eval_steps_per_second": 0.211, |
|
"eval_wer": 15.082798335767228, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.1302702226916646, |
|
"grad_norm": 27342.32421875, |
|
"learning_rate": 2.1633971847293678e-05, |
|
"loss": 0.0032, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.147874306839187, |
|
"grad_norm": 9434.23828125, |
|
"learning_rate": 2.0833013787429323e-05, |
|
"loss": 0.0034, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.165478390986709, |
|
"grad_norm": 49190.4453125, |
|
"learning_rate": 2.0043244145749896e-05, |
|
"loss": 0.0041, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.1830824751342313, |
|
"grad_norm": 29072.685546875, |
|
"learning_rate": 1.9264965876868396e-05, |
|
"loss": 0.0029, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.2006865592817535, |
|
"grad_norm": 26559.041015625, |
|
"learning_rate": 1.8498477527322123e-05, |
|
"loss": 0.0036, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.2182906434292757, |
|
"grad_norm": 37322.6953125, |
|
"learning_rate": 1.774407312105051e-05, |
|
"loss": 0.0033, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.235894727576798, |
|
"grad_norm": 35458.96875, |
|
"learning_rate": 1.7002042046608017e-05, |
|
"loss": 0.0033, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.25349881172432, |
|
"grad_norm": 32754.345703125, |
|
"learning_rate": 1.627266894615504e-05, |
|
"loss": 0.0033, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.25349881172432, |
|
"eval_cer": 5.1423865569914735, |
|
"eval_loss": 0.012781694531440735, |
|
"eval_runtime": 1113.544, |
|
"eval_samples_per_second": 3.473, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 14.93400931309068, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.2711028958718424, |
|
"grad_norm": 28290.462890625, |
|
"learning_rate": 1.55562336062695e-05, |
|
"loss": 0.0031, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.2887069800193647, |
|
"grad_norm": 34798.16796875, |
|
"learning_rate": 1.4853010850621062e-05, |
|
"loss": 0.0037, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.306311064166887, |
|
"grad_norm": 15387.654296875, |
|
"learning_rate": 1.4163270434549237e-05, |
|
"loss": 0.0025, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.323915148314409, |
|
"grad_norm": 42429.45703125, |
|
"learning_rate": 1.348727694158547e-05, |
|
"loss": 0.0036, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.3415192324619314, |
|
"grad_norm": 62645.078125, |
|
"learning_rate": 1.2825289681959313e-05, |
|
"loss": 0.0037, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.3591233166094536, |
|
"grad_norm": 37475.56640625, |
|
"learning_rate": 1.2177562593127274e-05, |
|
"loss": 0.0035, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.376727400756976, |
|
"grad_norm": 21763.126953125, |
|
"learning_rate": 1.154434414236284e-05, |
|
"loss": 0.0032, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.3943314849044977, |
|
"grad_norm": 23169.912109375, |
|
"learning_rate": 1.0925877231444687e-05, |
|
"loss": 0.0033, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.3943314849044977, |
|
"eval_cer": 5.017095359098672, |
|
"eval_loss": 0.012343456037342548, |
|
"eval_runtime": 1116.3074, |
|
"eval_samples_per_second": 3.464, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 14.73286859725016, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.41193556905202, |
|
"grad_norm": 22481.037109375, |
|
"learning_rate": 1.0322399103479884e-05, |
|
"loss": 0.0028, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.429539653199542, |
|
"grad_norm": 51517.1015625, |
|
"learning_rate": 9.734141251897733e-06, |
|
"loss": 0.0029, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.4471437373470644, |
|
"grad_norm": 34979.6171875, |
|
"learning_rate": 9.16132933164922e-06, |
|
"loss": 0.003, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.4647478214945866, |
|
"grad_norm": 15414.427734375, |
|
"learning_rate": 8.604183072646055e-06, |
|
"loss": 0.0026, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.482351905642109, |
|
"grad_norm": 22982.21875, |
|
"learning_rate": 8.06291619547253e-06, |
|
"loss": 0.0025, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.499955989789631, |
|
"grad_norm": 32445.208984375, |
|
"learning_rate": 7.5377363294026505e-06, |
|
"loss": 0.0031, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.5175600739371533, |
|
"grad_norm": 38354.1640625, |
|
"learning_rate": 7.028844932753714e-06, |
|
"loss": 0.0033, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.5351641580846755, |
|
"grad_norm": 13522.6416015625, |
|
"learning_rate": 6.536437215607261e-06, |
|
"loss": 0.0033, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.5351641580846755, |
|
"eval_cer": 4.367567844703002, |
|
"eval_loss": 0.012211957015097141, |
|
"eval_runtime": 1109.9647, |
|
"eval_samples_per_second": 3.484, |
|
"eval_steps_per_second": 0.218, |
|
"eval_wer": 13.674813324883587, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.552768242232198, |
|
"grad_norm": 19421.33203125, |
|
"learning_rate": 6.060702064926682e-06, |
|
"loss": 0.003, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.57037232637972, |
|
"grad_norm": 12878.841796875, |
|
"learning_rate": 5.601821972100435e-06, |
|
"loss": 0.0032, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.5879764105272423, |
|
"grad_norm": 16374.8388671875, |
|
"learning_rate": 5.159972962938581e-06, |
|
"loss": 0.0021, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.6055804946747645, |
|
"grad_norm": 19888.9140625, |
|
"learning_rate": 4.735324530149521e-06, |
|
"loss": 0.0032, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.6231845788222867, |
|
"grad_norm": 25133.033203125, |
|
"learning_rate": 4.328039568322817e-06, |
|
"loss": 0.0031, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.640788662969809, |
|
"grad_norm": 26627.578125, |
|
"learning_rate": 3.938274311443019e-06, |
|
"loss": 0.003, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.658392747117331, |
|
"grad_norm": 27871.9375, |
|
"learning_rate": 3.5661782729585146e-06, |
|
"loss": 0.0021, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.6759968312648534, |
|
"grad_norm": 42111.23828125, |
|
"learning_rate": 3.2118941884283825e-06, |
|
"loss": 0.0034, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.6759968312648534, |
|
"eval_cer": 4.530029832763488, |
|
"eval_loss": 0.012235511094331741, |
|
"eval_runtime": 1115.0261, |
|
"eval_samples_per_second": 3.468, |
|
"eval_steps_per_second": 0.217, |
|
"eval_wer": 13.561843881740279, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.6936009154123757, |
|
"grad_norm": 41923.59765625, |
|
"learning_rate": 2.8755579607691928e-06, |
|
"loss": 0.0032, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.711204999559898, |
|
"grad_norm": 18917.880859375, |
|
"learning_rate": 2.557298608122849e-06, |
|
"loss": 0.0027, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.72880908370742, |
|
"grad_norm": 20727.607421875, |
|
"learning_rate": 2.257238214365365e-06, |
|
"loss": 0.0025, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.7464131678549424, |
|
"grad_norm": 8564.83984375, |
|
"learning_rate": 1.975491882275665e-06, |
|
"loss": 0.0031, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.7640172520024646, |
|
"grad_norm": 38865.1953125, |
|
"learning_rate": 1.7121676893823213e-06, |
|
"loss": 0.0028, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.781621336149987, |
|
"grad_norm": 9149.9501953125, |
|
"learning_rate": 1.467366646505125e-06, |
|
"loss": 0.0026, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.799225420297509, |
|
"grad_norm": 33032.7890625, |
|
"learning_rate": 1.241182659007495e-06, |
|
"loss": 0.0025, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.8168295044450313, |
|
"grad_norm": 39062.0859375, |
|
"learning_rate": 1.0337024907744576e-06, |
|
"loss": 0.0025, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.8168295044450313, |
|
"eval_cer": 4.469787517664137, |
|
"eval_loss": 0.012169728055596352, |
|
"eval_runtime": 1112.2456, |
|
"eval_samples_per_second": 3.477, |
|
"eval_steps_per_second": 0.218, |
|
"eval_wer": 13.366213870443335, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.8344335885925536, |
|
"grad_norm": 21345.421875, |
|
"learning_rate": 8.450057309301873e-07, |
|
"loss": 0.003, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.852037672740076, |
|
"grad_norm": 21389.115234375, |
|
"learning_rate": 6.751647633076952e-07, |
|
"loss": 0.0027, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.869641756887598, |
|
"grad_norm": 40714.12890625, |
|
"learning_rate": 5.242447386824833e-07, |
|
"loss": 0.0032, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.8872458410351203, |
|
"grad_norm": 13599.498046875, |
|
"learning_rate": 3.9230354978077586e-07, |
|
"loss": 0.0024, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.9048499251826425, |
|
"grad_norm": 18847.646484375, |
|
"learning_rate": 2.7939180907197717e-07, |
|
"loss": 0.0029, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.9224540093301647, |
|
"grad_norm": 25907.548828125, |
|
"learning_rate": 1.855528293537856e-07, |
|
"loss": 0.0031, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.940058093477687, |
|
"grad_norm": 36510.4765625, |
|
"learning_rate": 1.108226071374452e-07, |
|
"loss": 0.0027, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.9576621776252088, |
|
"grad_norm": 34380.96875, |
|
"learning_rate": 5.522980883952422e-08, |
|
"loss": 0.0028, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.9576621776252088, |
|
"eval_cer": 4.579377261089553, |
|
"eval_loss": 0.0121904406696558, |
|
"eval_runtime": 1111.3761, |
|
"eval_samples_per_second": 3.479, |
|
"eval_steps_per_second": 0.218, |
|
"eval_wer": 13.553577824924917, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.975266261772731, |
|
"grad_norm": 29869.33984375, |
|
"learning_rate": 1.8795759785555922e-08, |
|
"loss": 0.0027, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.9928703459202532, |
|
"grad_norm": 46153.02734375, |
|
"learning_rate": 1.5344360296265513e-09, |
|
"loss": 0.003, |
|
"step": 8500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 8520, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.34241856782336e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|