Baselhany's picture
Training in progress, step 8520, checkpoint
02ed4a8 verified
raw
history blame
35.6 kB
{
"best_metric": 13.366213870443335,
"best_model_checkpoint": "./Whisper_tiny_fine_tune_Quran/checkpoint-8000",
"epoch": 2.999911979579262,
"eval_steps": 400,
"global_step": 8520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017604084147522226,
"grad_norm": 216358.46875,
"learning_rate": 1e-05,
"loss": 3.1564,
"step": 50
},
{
"epoch": 0.03520816829504445,
"grad_norm": 176759.03125,
"learning_rate": 2e-05,
"loss": 0.089,
"step": 100
},
{
"epoch": 0.052812252442566675,
"grad_norm": 188886.625,
"learning_rate": 3e-05,
"loss": 0.0535,
"step": 150
},
{
"epoch": 0.0704163365900889,
"grad_norm": 126744.6484375,
"learning_rate": 4e-05,
"loss": 0.0411,
"step": 200
},
{
"epoch": 0.08802042073761113,
"grad_norm": 98751.015625,
"learning_rate": 5e-05,
"loss": 0.0333,
"step": 250
},
{
"epoch": 0.10562450488513335,
"grad_norm": 137741.75,
"learning_rate": 6e-05,
"loss": 0.0311,
"step": 300
},
{
"epoch": 0.12322858903265557,
"grad_norm": 101993.84375,
"learning_rate": 7e-05,
"loss": 0.0267,
"step": 350
},
{
"epoch": 0.1408326731801778,
"grad_norm": 113272.96875,
"learning_rate": 8e-05,
"loss": 0.0258,
"step": 400
},
{
"epoch": 0.1408326731801778,
"eval_cer": 52.221755807134876,
"eval_loss": 0.024616289883852005,
"eval_runtime": 1237.0383,
"eval_samples_per_second": 3.126,
"eval_steps_per_second": 0.196,
"eval_wer": 104.93483591877222,
"step": 400
},
{
"epoch": 0.15843675732770002,
"grad_norm": 132086.53125,
"learning_rate": 9e-05,
"loss": 0.0248,
"step": 450
},
{
"epoch": 0.17604084147522225,
"grad_norm": 93074.5546875,
"learning_rate": 0.0001,
"loss": 0.0208,
"step": 500
},
{
"epoch": 0.1936449256227445,
"grad_norm": 96403.0390625,
"learning_rate": 9.988738792578126e-05,
"loss": 0.0196,
"step": 550
},
{
"epoch": 0.2112490097702667,
"grad_norm": 72429.7890625,
"learning_rate": 9.955005896229543e-05,
"loss": 0.0205,
"step": 600
},
{
"epoch": 0.22885309391778894,
"grad_norm": 79151.7109375,
"learning_rate": 9.898953260211338e-05,
"loss": 0.0187,
"step": 650
},
{
"epoch": 0.24645717806531114,
"grad_norm": 87347.1171875,
"learning_rate": 9.820833372667812e-05,
"loss": 0.0172,
"step": 700
},
{
"epoch": 0.2640612622128334,
"grad_norm": 107349.203125,
"learning_rate": 9.720998123301923e-05,
"loss": 0.0182,
"step": 750
},
{
"epoch": 0.2816653463603556,
"grad_norm": 79326.1875,
"learning_rate": 9.599897218294122e-05,
"loss": 0.0177,
"step": 800
},
{
"epoch": 0.2816653463603556,
"eval_cer": 10.263303778282646,
"eval_loss": 0.018445400521159172,
"eval_runtime": 1170.4993,
"eval_samples_per_second": 3.304,
"eval_steps_per_second": 0.207,
"eval_wer": 26.20891080924696,
"step": 800
},
{
"epoch": 0.29926943050787785,
"grad_norm": 52984.33203125,
"learning_rate": 9.458076154608515e-05,
"loss": 0.0152,
"step": 850
},
{
"epoch": 0.31687351465540003,
"grad_norm": 68339.46875,
"learning_rate": 9.296173762811085e-05,
"loss": 0.016,
"step": 900
},
{
"epoch": 0.33447759880292227,
"grad_norm": 87585.4921875,
"learning_rate": 9.114919329468282e-05,
"loss": 0.0145,
"step": 950
},
{
"epoch": 0.3520816829504445,
"grad_norm": 105782.3515625,
"learning_rate": 8.915129312088112e-05,
"loss": 0.0141,
"step": 1000
},
{
"epoch": 0.36968576709796674,
"grad_norm": 57010.09765625,
"learning_rate": 8.697703661401186e-05,
"loss": 0.0134,
"step": 1050
},
{
"epoch": 0.387289851245489,
"grad_norm": 50487.80078125,
"learning_rate": 8.463621767547998e-05,
"loss": 0.0132,
"step": 1100
},
{
"epoch": 0.40489393539301116,
"grad_norm": 67209.078125,
"learning_rate": 8.213938048432697e-05,
"loss": 0.0133,
"step": 1150
},
{
"epoch": 0.4224980195405334,
"grad_norm": 51471.0,
"learning_rate": 7.949777200115616e-05,
"loss": 0.0116,
"step": 1200
},
{
"epoch": 0.4224980195405334,
"eval_cer": 7.321043473802604,
"eval_loss": 0.016037294641137123,
"eval_runtime": 1114.0654,
"eval_samples_per_second": 3.471,
"eval_steps_per_second": 0.217,
"eval_wer": 20.951698674675555,
"step": 1200
},
{
"epoch": 0.44010210368805563,
"grad_norm": 47404.3046875,
"learning_rate": 7.672329130639005e-05,
"loss": 0.0126,
"step": 1250
},
{
"epoch": 0.45770618783557787,
"grad_norm": 45749.5,
"learning_rate": 7.38284360010654e-05,
"loss": 0.0118,
"step": 1300
},
{
"epoch": 0.47531027198310005,
"grad_norm": 41610.55078125,
"learning_rate": 7.082624591160201e-05,
"loss": 0.0118,
"step": 1350
},
{
"epoch": 0.4929143561306223,
"grad_norm": 47256.76953125,
"learning_rate": 6.773024435212678e-05,
"loss": 0.0113,
"step": 1400
},
{
"epoch": 0.5105184402781445,
"grad_norm": 41286.640625,
"learning_rate": 6.455437720893564e-05,
"loss": 0.0107,
"step": 1450
},
{
"epoch": 0.5281225244256668,
"grad_norm": 36747.9375,
"learning_rate": 6.131295012148612e-05,
"loss": 0.0093,
"step": 1500
},
{
"epoch": 0.545726608573189,
"grad_norm": 35157.80859375,
"learning_rate": 5.8020564042888015e-05,
"loss": 0.01,
"step": 1550
},
{
"epoch": 0.5633306927207112,
"grad_norm": 50015.47265625,
"learning_rate": 5.469204947015897e-05,
"loss": 0.0101,
"step": 1600
},
{
"epoch": 0.5633306927207112,
"eval_cer": 5.822676104629366,
"eval_loss": 0.014131452888250351,
"eval_runtime": 1123.1179,
"eval_samples_per_second": 3.443,
"eval_steps_per_second": 0.215,
"eval_wer": 17.501997630397046,
"step": 1600
},
{
"epoch": 0.5809347768682335,
"grad_norm": 51103.59765625,
"learning_rate": 5.134239964050307e-05,
"loss": 0.0093,
"step": 1650
},
{
"epoch": 0.5985388610157557,
"grad_norm": 32847.37109375,
"learning_rate": 4.798670299452926e-05,
"loss": 0.0084,
"step": 1700
},
{
"epoch": 0.6161429451632778,
"grad_norm": 29846.638671875,
"learning_rate": 4.4640075210627615e-05,
"loss": 0.0092,
"step": 1750
},
{
"epoch": 0.6337470293108001,
"grad_norm": 41928.12109375,
"learning_rate": 4.131759111665349e-05,
"loss": 0.0075,
"step": 1800
},
{
"epoch": 0.6513511134583223,
"grad_norm": 45941.98046875,
"learning_rate": 3.803421678562213e-05,
"loss": 0.0078,
"step": 1850
},
{
"epoch": 0.6689551976058445,
"grad_norm": 52671.2109375,
"learning_rate": 3.480474212128766e-05,
"loss": 0.0073,
"step": 1900
},
{
"epoch": 0.6865592817533668,
"grad_norm": 26375.0625,
"learning_rate": 3.164371423727362e-05,
"loss": 0.0083,
"step": 1950
},
{
"epoch": 0.704163365900889,
"grad_norm": 63857.80078125,
"learning_rate": 2.8565371929847284e-05,
"loss": 0.008,
"step": 2000
},
{
"epoch": 0.704163365900889,
"eval_cer": 5.123480724061357,
"eval_loss": 0.012732591480016708,
"eval_runtime": 1120.5002,
"eval_samples_per_second": 3.451,
"eval_steps_per_second": 0.216,
"eval_wer": 16.3695478466922,
"step": 2000
},
{
"epoch": 0.7217674500484113,
"grad_norm": 28659.234375,
"learning_rate": 2.5583581539504464e-05,
"loss": 0.0078,
"step": 2050
},
{
"epoch": 0.7393715341959335,
"grad_norm": 39795.39453125,
"learning_rate": 2.2711774490274766e-05,
"loss": 0.0078,
"step": 2100
},
{
"epoch": 0.7569756183434557,
"grad_norm": 51697.984375,
"learning_rate": 1.996288678810105e-05,
"loss": 0.0068,
"step": 2150
},
{
"epoch": 0.774579702490978,
"grad_norm": 46724.51953125,
"learning_rate": 1.734930075082076e-05,
"loss": 0.0072,
"step": 2200
},
{
"epoch": 0.7921837866385001,
"grad_norm": 28872.095703125,
"learning_rate": 1.4882789232226125e-05,
"loss": 0.0055,
"step": 2250
},
{
"epoch": 0.8097878707860223,
"grad_norm": 48174.0859375,
"learning_rate": 1.257446259144494e-05,
"loss": 0.0075,
"step": 2300
},
{
"epoch": 0.8273919549335446,
"grad_norm": 70300.703125,
"learning_rate": 1.0434718646516917e-05,
"loss": 0.0063,
"step": 2350
},
{
"epoch": 0.8449960390810668,
"grad_norm": 27386.814453125,
"learning_rate": 8.473195837599418e-06,
"loss": 0.0057,
"step": 2400
},
{
"epoch": 0.8449960390810668,
"eval_cer": 4.816821705177955,
"eval_loss": 0.0119090611115098,
"eval_runtime": 1122.3369,
"eval_samples_per_second": 3.445,
"eval_steps_per_second": 0.216,
"eval_wer": 15.234342710715564,
"step": 2400
},
{
"epoch": 0.862600123228589,
"grad_norm": 26832.78515625,
"learning_rate": 6.698729810778065e-06,
"loss": 0.0067,
"step": 2450
},
{
"epoch": 0.8802042073761113,
"grad_norm": 46090.953125,
"learning_rate": 5.1193136180493095e-06,
"loss": 0.0054,
"step": 2500
},
{
"epoch": 0.8978082915236335,
"grad_norm": 34130.36328125,
"learning_rate": 3.7420617127538248e-06,
"loss": 0.0064,
"step": 2550
},
{
"epoch": 0.9154123756711557,
"grad_norm": 60160.4296875,
"learning_rate": 2.573177902642726e-06,
"loss": 0.0058,
"step": 2600
},
{
"epoch": 0.933016459818678,
"grad_norm": 23166.681640625,
"learning_rate": 1.6179274049310966e-06,
"loss": 0.0058,
"step": 2650
},
{
"epoch": 0.9506205439662001,
"grad_norm": 22671.916015625,
"learning_rate": 8.806131292167618e-07,
"loss": 0.0054,
"step": 2700
},
{
"epoch": 0.9682246281137223,
"grad_norm": 36751.93359375,
"learning_rate": 3.6455629509730136e-07,
"loss": 0.0056,
"step": 2750
},
{
"epoch": 0.9858287122612446,
"grad_norm": 34122.0859375,
"learning_rate": 7.208147179291192e-08,
"loss": 0.0056,
"step": 2800
},
{
"epoch": 0.9858287122612446,
"eval_cer": 4.667818106660942,
"eval_loss": 0.01161186769604683,
"eval_runtime": 1115.7233,
"eval_samples_per_second": 3.466,
"eval_steps_per_second": 0.217,
"eval_wer": 14.63643126773758,
"step": 2800
},
{
"epoch": 1.0035208168295044,
"grad_norm": 41080.359375,
"learning_rate": 5.7252149230569694e-05,
"loss": 0.0048,
"step": 2850
},
{
"epoch": 1.0211249009770267,
"grad_norm": 49550.32421875,
"learning_rate": 5.574886573911056e-05,
"loss": 0.0058,
"step": 2900
},
{
"epoch": 1.038728985124549,
"grad_norm": 45467.51171875,
"learning_rate": 5.4240296223775465e-05,
"loss": 0.0064,
"step": 2950
},
{
"epoch": 1.0563330692720712,
"grad_norm": 57751.12890625,
"learning_rate": 5.272782779896898e-05,
"loss": 0.0068,
"step": 3000
},
{
"epoch": 1.0739371534195934,
"grad_norm": 42728.49609375,
"learning_rate": 5.1212851164103436e-05,
"loss": 0.0068,
"step": 3050
},
{
"epoch": 1.0915412375671156,
"grad_norm": 55132.671875,
"learning_rate": 4.969675932486503e-05,
"loss": 0.0057,
"step": 3100
},
{
"epoch": 1.1091453217146379,
"grad_norm": 20835.39453125,
"learning_rate": 4.8180946312358995e-05,
"loss": 0.0057,
"step": 3150
},
{
"epoch": 1.12674940586216,
"grad_norm": 29240.0234375,
"learning_rate": 4.666680590131225e-05,
"loss": 0.0071,
"step": 3200
},
{
"epoch": 1.12674940586216,
"eval_cer": 5.304207669359413,
"eval_loss": 0.013532178476452827,
"eval_runtime": 1162.1054,
"eval_samples_per_second": 3.328,
"eval_steps_per_second": 0.208,
"eval_wer": 15.892871903672884,
"step": 3200
},
{
"epoch": 1.1443534900096823,
"grad_norm": 38637.1328125,
"learning_rate": 4.515573032851158e-05,
"loss": 0.0062,
"step": 3250
},
{
"epoch": 1.1619575741572046,
"grad_norm": 35056.9765625,
"learning_rate": 4.364910901265606e-05,
"loss": 0.0058,
"step": 3300
},
{
"epoch": 1.1795616583047268,
"grad_norm": 33209.58203125,
"learning_rate": 4.214832727680054e-05,
"loss": 0.0062,
"step": 3350
},
{
"epoch": 1.1971657424522488,
"grad_norm": 36293.5625,
"learning_rate": 4.0654765074565124e-05,
"loss": 0.0058,
"step": 3400
},
{
"epoch": 1.214769826599771,
"grad_norm": 45826.09765625,
"learning_rate": 3.9169795721281845e-05,
"loss": 0.0057,
"step": 3450
},
{
"epoch": 1.2323739107472933,
"grad_norm": 57588.5078125,
"learning_rate": 3.769478463124507e-05,
"loss": 0.0063,
"step": 3500
},
{
"epoch": 1.2499779948948155,
"grad_norm": 54418.765625,
"learning_rate": 3.623108806222684e-05,
"loss": 0.0055,
"step": 3550
},
{
"epoch": 1.2675820790423378,
"grad_norm": 69616.1875,
"learning_rate": 3.4780051868411675e-05,
"loss": 0.0059,
"step": 3600
},
{
"epoch": 1.2675820790423378,
"eval_cer": 5.043691700339344,
"eval_loss": 0.01317252404987812,
"eval_runtime": 1129.5416,
"eval_samples_per_second": 3.424,
"eval_steps_per_second": 0.214,
"eval_wer": 15.716529358278455,
"step": 3600
},
{
"epoch": 1.28518616318986,
"grad_norm": 38242.609375,
"learning_rate": 3.334301026289712e-05,
"loss": 0.0061,
"step": 3650
},
{
"epoch": 1.3027902473373822,
"grad_norm": 31986.978515625,
"learning_rate": 3.1921284590898456e-05,
"loss": 0.0045,
"step": 3700
},
{
"epoch": 1.3203943314849045,
"grad_norm": 41348.12890625,
"learning_rate": 3.051618211478504e-05,
"loss": 0.0043,
"step": 3750
},
{
"epoch": 1.3379984156324267,
"grad_norm": 37051.91796875,
"learning_rate": 2.912899481206582e-05,
"loss": 0.0047,
"step": 3800
},
{
"epoch": 1.355602499779949,
"grad_norm": 42654.66796875,
"learning_rate": 2.7760998187429067e-05,
"loss": 0.0046,
"step": 3850
},
{
"epoch": 1.3732065839274712,
"grad_norm": 32946.38671875,
"learning_rate": 2.6413450099928783e-05,
"loss": 0.0056,
"step": 3900
},
{
"epoch": 1.3908106680749934,
"grad_norm": 58651.94921875,
"learning_rate": 2.508758960639599e-05,
"loss": 0.0057,
"step": 3950
},
{
"epoch": 1.4084147522225157,
"grad_norm": 40705.35546875,
"learning_rate": 2.3784635822138424e-05,
"loss": 0.0056,
"step": 4000
},
{
"epoch": 1.4084147522225157,
"eval_cer": 5.364770422305037,
"eval_loss": 0.01243683509528637,
"eval_runtime": 1130.3251,
"eval_samples_per_second": 3.421,
"eval_steps_per_second": 0.214,
"eval_wer": 14.575813517758245,
"step": 4000
},
{
"epoch": 1.426018836370038,
"grad_norm": 23855.8359375,
"learning_rate": 2.2505786799976525e-05,
"loss": 0.0043,
"step": 4050
},
{
"epoch": 1.4436229205175601,
"grad_norm": 62266.87890625,
"learning_rate": 2.1252218428645846e-05,
"loss": 0.0048,
"step": 4100
},
{
"epoch": 1.4612270046650824,
"grad_norm": 19551.705078125,
"learning_rate": 2.0025083351579337e-05,
"loss": 0.0048,
"step": 4150
},
{
"epoch": 1.4788310888126044,
"grad_norm": 33841.171875,
"learning_rate": 1.8825509907063327e-05,
"loss": 0.0046,
"step": 4200
},
{
"epoch": 1.4964351729601266,
"grad_norm": 50253.11328125,
"learning_rate": 1.765460109074188e-05,
"loss": 0.0048,
"step": 4250
},
{
"epoch": 1.5140392571076489,
"grad_norm": 34579.1875,
"learning_rate": 1.6513433541423528e-05,
"loss": 0.0042,
"step": 4300
},
{
"epoch": 1.531643341255171,
"grad_norm": 21523.380859375,
"learning_rate": 1.5403056551122697e-05,
"loss": 0.0047,
"step": 4350
},
{
"epoch": 1.5492474254026933,
"grad_norm": 14405.34375,
"learning_rate": 1.4324491100246385e-05,
"loss": 0.0041,
"step": 4400
},
{
"epoch": 1.5492474254026933,
"eval_cer": 4.753054573769599,
"eval_loss": 0.012177330441772938,
"eval_runtime": 1116.052,
"eval_samples_per_second": 3.465,
"eval_steps_per_second": 0.217,
"eval_wer": 14.225883779241174,
"step": 4400
},
{
"epoch": 1.5668515095502156,
"grad_norm": 32692.677734375,
"learning_rate": 1.3278728918812978e-05,
"loss": 0.0045,
"step": 4450
},
{
"epoch": 1.5844555936977378,
"grad_norm": 27757.576171875,
"learning_rate": 1.2266731574566536e-05,
"loss": 0.0044,
"step": 4500
},
{
"epoch": 1.60205967784526,
"grad_norm": 43189.07421875,
"learning_rate": 1.1289429588824962e-05,
"loss": 0.0047,
"step": 4550
},
{
"epoch": 1.6196637619927823,
"grad_norm": 34926.53515625,
"learning_rate": 1.0347721580875126e-05,
"loss": 0.0045,
"step": 4600
},
{
"epoch": 1.6372678461403045,
"grad_norm": 25656.78125,
"learning_rate": 9.442473441701422e-06,
"loss": 0.0045,
"step": 4650
},
{
"epoch": 1.6548719302878268,
"grad_norm": 36669.20703125,
"learning_rate": 8.574517537807897e-06,
"loss": 0.0033,
"step": 4700
},
{
"epoch": 1.672476014435349,
"grad_norm": 36220.40625,
"learning_rate": 7.744651945865571e-06,
"loss": 0.0041,
"step": 4750
},
{
"epoch": 1.6900800985828712,
"grad_norm": 19782.095703125,
"learning_rate": 6.953639718889076e-06,
"loss": 0.0038,
"step": 4800
},
{
"epoch": 1.6900800985828712,
"eval_cer": 4.743121000535131,
"eval_loss": 0.012049088254570961,
"eval_runtime": 1111.599,
"eval_samples_per_second": 3.479,
"eval_steps_per_second": 0.218,
"eval_wer": 13.80431488165762,
"step": 4800
},
{
"epoch": 1.7076841827303935,
"grad_norm": 36681.79296875,
"learning_rate": 6.202208184617064e-06,
"loss": 0.0034,
"step": 4850
},
{
"epoch": 1.7252882668779157,
"grad_norm": 40895.7734375,
"learning_rate": 5.491048276741784e-06,
"loss": 0.0046,
"step": 4900
},
{
"epoch": 1.742892351025438,
"grad_norm": 13767.77734375,
"learning_rate": 4.820813899602506e-06,
"loss": 0.0045,
"step": 4950
},
{
"epoch": 1.7604964351729602,
"grad_norm": 39934.8203125,
"learning_rate": 4.192121326927073e-06,
"loss": 0.0042,
"step": 5000
},
{
"epoch": 1.7781005193204824,
"grad_norm": 94029.875,
"learning_rate": 3.605548635174533e-06,
"loss": 0.0036,
"step": 5050
},
{
"epoch": 1.7957046034680046,
"grad_norm": 13603.7431640625,
"learning_rate": 3.061635171999566e-06,
"loss": 0.0038,
"step": 5100
},
{
"epoch": 1.8133086876155269,
"grad_norm": 12118.0068359375,
"learning_rate": 2.5608810603278634e-06,
"loss": 0.004,
"step": 5150
},
{
"epoch": 1.8309127717630491,
"grad_norm": 19234.5703125,
"learning_rate": 2.1037467384981026e-06,
"loss": 0.004,
"step": 5200
},
{
"epoch": 1.8309127717630491,
"eval_cer": 4.95685304399932,
"eval_loss": 0.01188047043979168,
"eval_runtime": 1212.5658,
"eval_samples_per_second": 3.189,
"eval_steps_per_second": 0.2,
"eval_wer": 14.181798142892568,
"step": 5200
},
{
"epoch": 1.8485168559105714,
"grad_norm": 19712.021484375,
"learning_rate": 1.6906525368936055e-06,
"loss": 0.0034,
"step": 5250
},
{
"epoch": 1.8661209400580936,
"grad_norm": 8896.380859375,
"learning_rate": 1.3219782914527634e-06,
"loss": 0.0039,
"step": 5300
},
{
"epoch": 1.8837250242056158,
"grad_norm": 20239.08984375,
"learning_rate": 9.980629944137509e-07,
"loss": 0.0037,
"step": 5350
},
{
"epoch": 1.901329108353138,
"grad_norm": 22119.759765625,
"learning_rate": 7.192044826145771e-07,
"loss": 0.0035,
"step": 5400
},
{
"epoch": 1.9189331925006603,
"grad_norm": 23297.744140625,
"learning_rate": 4.856591636351604e-07,
"loss": 0.0039,
"step": 5450
},
{
"epoch": 1.9365372766481825,
"grad_norm": 8485.7099609375,
"learning_rate": 2.976417800331144e-07,
"loss": 0.0047,
"step": 5500
},
{
"epoch": 1.9541413607957046,
"grad_norm": 28440.11328125,
"learning_rate": 1.5532521189017267e-07,
"loss": 0.0036,
"step": 5550
},
{
"epoch": 1.9717454449432268,
"grad_norm": 29707.8125,
"learning_rate": 5.8840317850683555e-08,
"loss": 0.0036,
"step": 5600
},
{
"epoch": 1.9717454449432268,
"eval_cer": 4.91711875106145,
"eval_loss": 0.011817782185971737,
"eval_runtime": 1130.1756,
"eval_samples_per_second": 3.422,
"eval_steps_per_second": 0.214,
"eval_wer": 14.074339404292838,
"step": 5600
},
{
"epoch": 1.989349529090749,
"grad_norm": 46225.79296875,
"learning_rate": 8.275814798408554e-09,
"loss": 0.0037,
"step": 5650
},
{
"epoch": 2.007041633659009,
"grad_norm": 14135.712890625,
"learning_rate": 2.7527634528122847e-05,
"loss": 0.004,
"step": 5700
},
{
"epoch": 2.024645717806531,
"grad_norm": 22203.5390625,
"learning_rate": 2.6657185738447493e-05,
"loss": 0.0042,
"step": 5750
},
{
"epoch": 2.0422498019540534,
"grad_norm": 21549.859375,
"learning_rate": 2.5795691222129982e-05,
"loss": 0.004,
"step": 5800
},
{
"epoch": 2.0598538861015756,
"grad_norm": 8696.861328125,
"learning_rate": 2.4943481447352425e-05,
"loss": 0.0038,
"step": 5850
},
{
"epoch": 2.077457970249098,
"grad_norm": 15930.8994140625,
"learning_rate": 2.410088332068215e-05,
"loss": 0.0031,
"step": 5900
},
{
"epoch": 2.09506205439662,
"grad_norm": 24799.234375,
"learning_rate": 2.326822006167068e-05,
"loss": 0.0033,
"step": 5950
},
{
"epoch": 2.1126661385441423,
"grad_norm": 57942.671875,
"learning_rate": 2.2445811078867185e-05,
"loss": 0.0033,
"step": 6000
},
{
"epoch": 2.1126661385441423,
"eval_cer": 5.04529388957071,
"eval_loss": 0.01286122016608715,
"eval_runtime": 1149.1813,
"eval_samples_per_second": 3.365,
"eval_steps_per_second": 0.211,
"eval_wer": 15.082798335767228,
"step": 6000
},
{
"epoch": 2.1302702226916646,
"grad_norm": 27342.32421875,
"learning_rate": 2.1633971847293678e-05,
"loss": 0.0032,
"step": 6050
},
{
"epoch": 2.147874306839187,
"grad_norm": 9434.23828125,
"learning_rate": 2.0833013787429323e-05,
"loss": 0.0034,
"step": 6100
},
{
"epoch": 2.165478390986709,
"grad_norm": 49190.4453125,
"learning_rate": 2.0043244145749896e-05,
"loss": 0.0041,
"step": 6150
},
{
"epoch": 2.1830824751342313,
"grad_norm": 29072.685546875,
"learning_rate": 1.9264965876868396e-05,
"loss": 0.0029,
"step": 6200
},
{
"epoch": 2.2006865592817535,
"grad_norm": 26559.041015625,
"learning_rate": 1.8498477527322123e-05,
"loss": 0.0036,
"step": 6250
},
{
"epoch": 2.2182906434292757,
"grad_norm": 37322.6953125,
"learning_rate": 1.774407312105051e-05,
"loss": 0.0033,
"step": 6300
},
{
"epoch": 2.235894727576798,
"grad_norm": 35458.96875,
"learning_rate": 1.7002042046608017e-05,
"loss": 0.0033,
"step": 6350
},
{
"epoch": 2.25349881172432,
"grad_norm": 32754.345703125,
"learning_rate": 1.627266894615504e-05,
"loss": 0.0033,
"step": 6400
},
{
"epoch": 2.25349881172432,
"eval_cer": 5.1423865569914735,
"eval_loss": 0.012781694531440735,
"eval_runtime": 1113.544,
"eval_samples_per_second": 3.473,
"eval_steps_per_second": 0.217,
"eval_wer": 14.93400931309068,
"step": 6400
},
{
"epoch": 2.2711028958718424,
"grad_norm": 28290.462890625,
"learning_rate": 1.55562336062695e-05,
"loss": 0.0031,
"step": 6450
},
{
"epoch": 2.2887069800193647,
"grad_norm": 34798.16796875,
"learning_rate": 1.4853010850621062e-05,
"loss": 0.0037,
"step": 6500
},
{
"epoch": 2.306311064166887,
"grad_norm": 15387.654296875,
"learning_rate": 1.4163270434549237e-05,
"loss": 0.0025,
"step": 6550
},
{
"epoch": 2.323915148314409,
"grad_norm": 42429.45703125,
"learning_rate": 1.348727694158547e-05,
"loss": 0.0036,
"step": 6600
},
{
"epoch": 2.3415192324619314,
"grad_norm": 62645.078125,
"learning_rate": 1.2825289681959313e-05,
"loss": 0.0037,
"step": 6650
},
{
"epoch": 2.3591233166094536,
"grad_norm": 37475.56640625,
"learning_rate": 1.2177562593127274e-05,
"loss": 0.0035,
"step": 6700
},
{
"epoch": 2.376727400756976,
"grad_norm": 21763.126953125,
"learning_rate": 1.154434414236284e-05,
"loss": 0.0032,
"step": 6750
},
{
"epoch": 2.3943314849044977,
"grad_norm": 23169.912109375,
"learning_rate": 1.0925877231444687e-05,
"loss": 0.0033,
"step": 6800
},
{
"epoch": 2.3943314849044977,
"eval_cer": 5.017095359098672,
"eval_loss": 0.012343456037342548,
"eval_runtime": 1116.3074,
"eval_samples_per_second": 3.464,
"eval_steps_per_second": 0.217,
"eval_wer": 14.73286859725016,
"step": 6800
},
{
"epoch": 2.41193556905202,
"grad_norm": 22481.037109375,
"learning_rate": 1.0322399103479884e-05,
"loss": 0.0028,
"step": 6850
},
{
"epoch": 2.429539653199542,
"grad_norm": 51517.1015625,
"learning_rate": 9.734141251897733e-06,
"loss": 0.0029,
"step": 6900
},
{
"epoch": 2.4471437373470644,
"grad_norm": 34979.6171875,
"learning_rate": 9.16132933164922e-06,
"loss": 0.003,
"step": 6950
},
{
"epoch": 2.4647478214945866,
"grad_norm": 15414.427734375,
"learning_rate": 8.604183072646055e-06,
"loss": 0.0026,
"step": 7000
},
{
"epoch": 2.482351905642109,
"grad_norm": 22982.21875,
"learning_rate": 8.06291619547253e-06,
"loss": 0.0025,
"step": 7050
},
{
"epoch": 2.499955989789631,
"grad_norm": 32445.208984375,
"learning_rate": 7.5377363294026505e-06,
"loss": 0.0031,
"step": 7100
},
{
"epoch": 2.5175600739371533,
"grad_norm": 38354.1640625,
"learning_rate": 7.028844932753714e-06,
"loss": 0.0033,
"step": 7150
},
{
"epoch": 2.5351641580846755,
"grad_norm": 13522.6416015625,
"learning_rate": 6.536437215607261e-06,
"loss": 0.0033,
"step": 7200
},
{
"epoch": 2.5351641580846755,
"eval_cer": 4.367567844703002,
"eval_loss": 0.012211957015097141,
"eval_runtime": 1109.9647,
"eval_samples_per_second": 3.484,
"eval_steps_per_second": 0.218,
"eval_wer": 13.674813324883587,
"step": 7200
},
{
"epoch": 2.552768242232198,
"grad_norm": 19421.33203125,
"learning_rate": 6.060702064926682e-06,
"loss": 0.003,
"step": 7250
},
{
"epoch": 2.57037232637972,
"grad_norm": 12878.841796875,
"learning_rate": 5.601821972100435e-06,
"loss": 0.0032,
"step": 7300
},
{
"epoch": 2.5879764105272423,
"grad_norm": 16374.8388671875,
"learning_rate": 5.159972962938581e-06,
"loss": 0.0021,
"step": 7350
},
{
"epoch": 2.6055804946747645,
"grad_norm": 19888.9140625,
"learning_rate": 4.735324530149521e-06,
"loss": 0.0032,
"step": 7400
},
{
"epoch": 2.6231845788222867,
"grad_norm": 25133.033203125,
"learning_rate": 4.328039568322817e-06,
"loss": 0.0031,
"step": 7450
},
{
"epoch": 2.640788662969809,
"grad_norm": 26627.578125,
"learning_rate": 3.938274311443019e-06,
"loss": 0.003,
"step": 7500
},
{
"epoch": 2.658392747117331,
"grad_norm": 27871.9375,
"learning_rate": 3.5661782729585146e-06,
"loss": 0.0021,
"step": 7550
},
{
"epoch": 2.6759968312648534,
"grad_norm": 42111.23828125,
"learning_rate": 3.2118941884283825e-06,
"loss": 0.0034,
"step": 7600
},
{
"epoch": 2.6759968312648534,
"eval_cer": 4.530029832763488,
"eval_loss": 0.012235511094331741,
"eval_runtime": 1115.0261,
"eval_samples_per_second": 3.468,
"eval_steps_per_second": 0.217,
"eval_wer": 13.561843881740279,
"step": 7600
},
{
"epoch": 2.6936009154123757,
"grad_norm": 41923.59765625,
"learning_rate": 2.8755579607691928e-06,
"loss": 0.0032,
"step": 7650
},
{
"epoch": 2.711204999559898,
"grad_norm": 18917.880859375,
"learning_rate": 2.557298608122849e-06,
"loss": 0.0027,
"step": 7700
},
{
"epoch": 2.72880908370742,
"grad_norm": 20727.607421875,
"learning_rate": 2.257238214365365e-06,
"loss": 0.0025,
"step": 7750
},
{
"epoch": 2.7464131678549424,
"grad_norm": 8564.83984375,
"learning_rate": 1.975491882275665e-06,
"loss": 0.0031,
"step": 7800
},
{
"epoch": 2.7640172520024646,
"grad_norm": 38865.1953125,
"learning_rate": 1.7121676893823213e-06,
"loss": 0.0028,
"step": 7850
},
{
"epoch": 2.781621336149987,
"grad_norm": 9149.9501953125,
"learning_rate": 1.467366646505125e-06,
"loss": 0.0026,
"step": 7900
},
{
"epoch": 2.799225420297509,
"grad_norm": 33032.7890625,
"learning_rate": 1.241182659007495e-06,
"loss": 0.0025,
"step": 7950
},
{
"epoch": 2.8168295044450313,
"grad_norm": 39062.0859375,
"learning_rate": 1.0337024907744576e-06,
"loss": 0.0025,
"step": 8000
},
{
"epoch": 2.8168295044450313,
"eval_cer": 4.469787517664137,
"eval_loss": 0.012169728055596352,
"eval_runtime": 1112.2456,
"eval_samples_per_second": 3.477,
"eval_steps_per_second": 0.218,
"eval_wer": 13.366213870443335,
"step": 8000
},
{
"epoch": 2.8344335885925536,
"grad_norm": 21345.421875,
"learning_rate": 8.450057309301873e-07,
"loss": 0.003,
"step": 8050
},
{
"epoch": 2.852037672740076,
"grad_norm": 21389.115234375,
"learning_rate": 6.751647633076952e-07,
"loss": 0.0027,
"step": 8100
},
{
"epoch": 2.869641756887598,
"grad_norm": 40714.12890625,
"learning_rate": 5.242447386824833e-07,
"loss": 0.0032,
"step": 8150
},
{
"epoch": 2.8872458410351203,
"grad_norm": 13599.498046875,
"learning_rate": 3.9230354978077586e-07,
"loss": 0.0024,
"step": 8200
},
{
"epoch": 2.9048499251826425,
"grad_norm": 18847.646484375,
"learning_rate": 2.7939180907197717e-07,
"loss": 0.0029,
"step": 8250
},
{
"epoch": 2.9224540093301647,
"grad_norm": 25907.548828125,
"learning_rate": 1.855528293537856e-07,
"loss": 0.0031,
"step": 8300
},
{
"epoch": 2.940058093477687,
"grad_norm": 36510.4765625,
"learning_rate": 1.108226071374452e-07,
"loss": 0.0027,
"step": 8350
},
{
"epoch": 2.9576621776252088,
"grad_norm": 34380.96875,
"learning_rate": 5.522980883952422e-08,
"loss": 0.0028,
"step": 8400
},
{
"epoch": 2.9576621776252088,
"eval_cer": 4.579377261089553,
"eval_loss": 0.0121904406696558,
"eval_runtime": 1111.3761,
"eval_samples_per_second": 3.479,
"eval_steps_per_second": 0.218,
"eval_wer": 13.553577824924917,
"step": 8400
},
{
"epoch": 2.975266261772731,
"grad_norm": 29869.33984375,
"learning_rate": 1.8795759785555922e-08,
"loss": 0.0027,
"step": 8450
},
{
"epoch": 2.9928703459202532,
"grad_norm": 46153.02734375,
"learning_rate": 1.5344360296265513e-09,
"loss": 0.003,
"step": 8500
}
],
"logging_steps": 50,
"max_steps": 8520,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.34241856782336e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}