{ "best_metric": 13.366213870443335, "best_model_checkpoint": "./Whisper_tiny_fine_tune_Quran/checkpoint-8000", "epoch": 2.9576621776252088, "eval_steps": 400, "global_step": 8400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017604084147522226, "grad_norm": 216358.46875, "learning_rate": 1e-05, "loss": 3.1564, "step": 50 }, { "epoch": 0.03520816829504445, "grad_norm": 176759.03125, "learning_rate": 2e-05, "loss": 0.089, "step": 100 }, { "epoch": 0.052812252442566675, "grad_norm": 188886.625, "learning_rate": 3e-05, "loss": 0.0535, "step": 150 }, { "epoch": 0.0704163365900889, "grad_norm": 126744.6484375, "learning_rate": 4e-05, "loss": 0.0411, "step": 200 }, { "epoch": 0.08802042073761113, "grad_norm": 98751.015625, "learning_rate": 5e-05, "loss": 0.0333, "step": 250 }, { "epoch": 0.10562450488513335, "grad_norm": 137741.75, "learning_rate": 6e-05, "loss": 0.0311, "step": 300 }, { "epoch": 0.12322858903265557, "grad_norm": 101993.84375, "learning_rate": 7e-05, "loss": 0.0267, "step": 350 }, { "epoch": 0.1408326731801778, "grad_norm": 113272.96875, "learning_rate": 8e-05, "loss": 0.0258, "step": 400 }, { "epoch": 0.1408326731801778, "eval_cer": 52.221755807134876, "eval_loss": 0.024616289883852005, "eval_runtime": 1237.0383, "eval_samples_per_second": 3.126, "eval_steps_per_second": 0.196, "eval_wer": 104.93483591877222, "step": 400 }, { "epoch": 0.15843675732770002, "grad_norm": 132086.53125, "learning_rate": 9e-05, "loss": 0.0248, "step": 450 }, { "epoch": 0.17604084147522225, "grad_norm": 93074.5546875, "learning_rate": 0.0001, "loss": 0.0208, "step": 500 }, { "epoch": 0.1936449256227445, "grad_norm": 96403.0390625, "learning_rate": 9.988738792578126e-05, "loss": 0.0196, "step": 550 }, { "epoch": 0.2112490097702667, "grad_norm": 72429.7890625, "learning_rate": 9.955005896229543e-05, "loss": 0.0205, "step": 600 }, { "epoch": 0.22885309391778894, "grad_norm": 79151.7109375, "learning_rate": 9.898953260211338e-05, "loss": 0.0187, "step": 650 }, { "epoch": 0.24645717806531114, "grad_norm": 87347.1171875, "learning_rate": 9.820833372667812e-05, "loss": 0.0172, "step": 700 }, { "epoch": 0.2640612622128334, "grad_norm": 107349.203125, "learning_rate": 9.720998123301923e-05, "loss": 0.0182, "step": 750 }, { "epoch": 0.2816653463603556, "grad_norm": 79326.1875, "learning_rate": 9.599897218294122e-05, "loss": 0.0177, "step": 800 }, { "epoch": 0.2816653463603556, "eval_cer": 10.263303778282646, "eval_loss": 0.018445400521159172, "eval_runtime": 1170.4993, "eval_samples_per_second": 3.304, "eval_steps_per_second": 0.207, "eval_wer": 26.20891080924696, "step": 800 }, { "epoch": 0.29926943050787785, "grad_norm": 52984.33203125, "learning_rate": 9.458076154608515e-05, "loss": 0.0152, "step": 850 }, { "epoch": 0.31687351465540003, "grad_norm": 68339.46875, "learning_rate": 9.296173762811085e-05, "loss": 0.016, "step": 900 }, { "epoch": 0.33447759880292227, "grad_norm": 87585.4921875, "learning_rate": 9.114919329468282e-05, "loss": 0.0145, "step": 950 }, { "epoch": 0.3520816829504445, "grad_norm": 105782.3515625, "learning_rate": 8.915129312088112e-05, "loss": 0.0141, "step": 1000 }, { "epoch": 0.36968576709796674, "grad_norm": 57010.09765625, "learning_rate": 8.697703661401186e-05, "loss": 0.0134, "step": 1050 }, { "epoch": 0.387289851245489, "grad_norm": 50487.80078125, "learning_rate": 8.463621767547998e-05, "loss": 0.0132, "step": 1100 }, { "epoch": 0.40489393539301116, "grad_norm": 67209.078125, "learning_rate": 8.213938048432697e-05, "loss": 0.0133, "step": 1150 }, { "epoch": 0.4224980195405334, "grad_norm": 51471.0, "learning_rate": 7.949777200115616e-05, "loss": 0.0116, "step": 1200 }, { "epoch": 0.4224980195405334, "eval_cer": 7.321043473802604, "eval_loss": 0.016037294641137123, "eval_runtime": 1114.0654, "eval_samples_per_second": 3.471, "eval_steps_per_second": 0.217, "eval_wer": 20.951698674675555, "step": 1200 }, { "epoch": 0.44010210368805563, "grad_norm": 47404.3046875, "learning_rate": 7.672329130639005e-05, "loss": 0.0126, "step": 1250 }, { "epoch": 0.45770618783557787, "grad_norm": 45749.5, "learning_rate": 7.38284360010654e-05, "loss": 0.0118, "step": 1300 }, { "epoch": 0.47531027198310005, "grad_norm": 41610.55078125, "learning_rate": 7.082624591160201e-05, "loss": 0.0118, "step": 1350 }, { "epoch": 0.4929143561306223, "grad_norm": 47256.76953125, "learning_rate": 6.773024435212678e-05, "loss": 0.0113, "step": 1400 }, { "epoch": 0.5105184402781445, "grad_norm": 41286.640625, "learning_rate": 6.455437720893564e-05, "loss": 0.0107, "step": 1450 }, { "epoch": 0.5281225244256668, "grad_norm": 36747.9375, "learning_rate": 6.131295012148612e-05, "loss": 0.0093, "step": 1500 }, { "epoch": 0.545726608573189, "grad_norm": 35157.80859375, "learning_rate": 5.8020564042888015e-05, "loss": 0.01, "step": 1550 }, { "epoch": 0.5633306927207112, "grad_norm": 50015.47265625, "learning_rate": 5.469204947015897e-05, "loss": 0.0101, "step": 1600 }, { "epoch": 0.5633306927207112, "eval_cer": 5.822676104629366, "eval_loss": 0.014131452888250351, "eval_runtime": 1123.1179, "eval_samples_per_second": 3.443, "eval_steps_per_second": 0.215, "eval_wer": 17.501997630397046, "step": 1600 }, { "epoch": 0.5809347768682335, "grad_norm": 51103.59765625, "learning_rate": 5.134239964050307e-05, "loss": 0.0093, "step": 1650 }, { "epoch": 0.5985388610157557, "grad_norm": 32847.37109375, "learning_rate": 4.798670299452926e-05, "loss": 0.0084, "step": 1700 }, { "epoch": 0.6161429451632778, "grad_norm": 29846.638671875, "learning_rate": 4.4640075210627615e-05, "loss": 0.0092, "step": 1750 }, { "epoch": 0.6337470293108001, "grad_norm": 41928.12109375, "learning_rate": 4.131759111665349e-05, "loss": 0.0075, "step": 1800 }, { "epoch": 0.6513511134583223, "grad_norm": 45941.98046875, "learning_rate": 3.803421678562213e-05, "loss": 0.0078, "step": 1850 }, { "epoch": 0.6689551976058445, "grad_norm": 52671.2109375, "learning_rate": 3.480474212128766e-05, "loss": 0.0073, "step": 1900 }, { "epoch": 0.6865592817533668, "grad_norm": 26375.0625, "learning_rate": 3.164371423727362e-05, "loss": 0.0083, "step": 1950 }, { "epoch": 0.704163365900889, "grad_norm": 63857.80078125, "learning_rate": 2.8565371929847284e-05, "loss": 0.008, "step": 2000 }, { "epoch": 0.704163365900889, "eval_cer": 5.123480724061357, "eval_loss": 0.012732591480016708, "eval_runtime": 1120.5002, "eval_samples_per_second": 3.451, "eval_steps_per_second": 0.216, "eval_wer": 16.3695478466922, "step": 2000 }, { "epoch": 0.7217674500484113, "grad_norm": 28659.234375, "learning_rate": 2.5583581539504464e-05, "loss": 0.0078, "step": 2050 }, { "epoch": 0.7393715341959335, "grad_norm": 39795.39453125, "learning_rate": 2.2711774490274766e-05, "loss": 0.0078, "step": 2100 }, { "epoch": 0.7569756183434557, "grad_norm": 51697.984375, "learning_rate": 1.996288678810105e-05, "loss": 0.0068, "step": 2150 }, { "epoch": 0.774579702490978, "grad_norm": 46724.51953125, "learning_rate": 1.734930075082076e-05, "loss": 0.0072, "step": 2200 }, { "epoch": 0.7921837866385001, "grad_norm": 28872.095703125, "learning_rate": 1.4882789232226125e-05, "loss": 0.0055, "step": 2250 }, { "epoch": 0.8097878707860223, "grad_norm": 48174.0859375, "learning_rate": 1.257446259144494e-05, "loss": 0.0075, "step": 2300 }, { "epoch": 0.8273919549335446, "grad_norm": 70300.703125, "learning_rate": 1.0434718646516917e-05, "loss": 0.0063, "step": 2350 }, { "epoch": 0.8449960390810668, "grad_norm": 27386.814453125, "learning_rate": 8.473195837599418e-06, "loss": 0.0057, "step": 2400 }, { "epoch": 0.8449960390810668, "eval_cer": 4.816821705177955, "eval_loss": 0.0119090611115098, "eval_runtime": 1122.3369, "eval_samples_per_second": 3.445, "eval_steps_per_second": 0.216, "eval_wer": 15.234342710715564, "step": 2400 }, { "epoch": 0.862600123228589, "grad_norm": 26832.78515625, "learning_rate": 6.698729810778065e-06, "loss": 0.0067, "step": 2450 }, { "epoch": 0.8802042073761113, "grad_norm": 46090.953125, "learning_rate": 5.1193136180493095e-06, "loss": 0.0054, "step": 2500 }, { "epoch": 0.8978082915236335, "grad_norm": 34130.36328125, "learning_rate": 3.7420617127538248e-06, "loss": 0.0064, "step": 2550 }, { "epoch": 0.9154123756711557, "grad_norm": 60160.4296875, "learning_rate": 2.573177902642726e-06, "loss": 0.0058, "step": 2600 }, { "epoch": 0.933016459818678, "grad_norm": 23166.681640625, "learning_rate": 1.6179274049310966e-06, "loss": 0.0058, "step": 2650 }, { "epoch": 0.9506205439662001, "grad_norm": 22671.916015625, "learning_rate": 8.806131292167618e-07, "loss": 0.0054, "step": 2700 }, { "epoch": 0.9682246281137223, "grad_norm": 36751.93359375, "learning_rate": 3.6455629509730136e-07, "loss": 0.0056, "step": 2750 }, { "epoch": 0.9858287122612446, "grad_norm": 34122.0859375, "learning_rate": 7.208147179291192e-08, "loss": 0.0056, "step": 2800 }, { "epoch": 0.9858287122612446, "eval_cer": 4.667818106660942, "eval_loss": 0.01161186769604683, "eval_runtime": 1115.7233, "eval_samples_per_second": 3.466, "eval_steps_per_second": 0.217, "eval_wer": 14.63643126773758, "step": 2800 }, { "epoch": 1.0035208168295044, "grad_norm": 41080.359375, "learning_rate": 5.7252149230569694e-05, "loss": 0.0048, "step": 2850 }, { "epoch": 1.0211249009770267, "grad_norm": 49550.32421875, "learning_rate": 5.574886573911056e-05, "loss": 0.0058, "step": 2900 }, { "epoch": 1.038728985124549, "grad_norm": 45467.51171875, "learning_rate": 5.4240296223775465e-05, "loss": 0.0064, "step": 2950 }, { "epoch": 1.0563330692720712, "grad_norm": 57751.12890625, "learning_rate": 5.272782779896898e-05, "loss": 0.0068, "step": 3000 }, { "epoch": 1.0739371534195934, "grad_norm": 42728.49609375, "learning_rate": 5.1212851164103436e-05, "loss": 0.0068, "step": 3050 }, { "epoch": 1.0915412375671156, "grad_norm": 55132.671875, "learning_rate": 4.969675932486503e-05, "loss": 0.0057, "step": 3100 }, { "epoch": 1.1091453217146379, "grad_norm": 20835.39453125, "learning_rate": 4.8180946312358995e-05, "loss": 0.0057, "step": 3150 }, { "epoch": 1.12674940586216, "grad_norm": 29240.0234375, "learning_rate": 4.666680590131225e-05, "loss": 0.0071, "step": 3200 }, { "epoch": 1.12674940586216, "eval_cer": 5.304207669359413, "eval_loss": 0.013532178476452827, "eval_runtime": 1162.1054, "eval_samples_per_second": 3.328, "eval_steps_per_second": 0.208, "eval_wer": 15.892871903672884, "step": 3200 }, { "epoch": 1.1443534900096823, "grad_norm": 38637.1328125, "learning_rate": 4.515573032851158e-05, "loss": 0.0062, "step": 3250 }, { "epoch": 1.1619575741572046, "grad_norm": 35056.9765625, "learning_rate": 4.364910901265606e-05, "loss": 0.0058, "step": 3300 }, { "epoch": 1.1795616583047268, "grad_norm": 33209.58203125, "learning_rate": 4.214832727680054e-05, "loss": 0.0062, "step": 3350 }, { "epoch": 1.1971657424522488, "grad_norm": 36293.5625, "learning_rate": 4.0654765074565124e-05, "loss": 0.0058, "step": 3400 }, { "epoch": 1.214769826599771, "grad_norm": 45826.09765625, "learning_rate": 3.9169795721281845e-05, "loss": 0.0057, "step": 3450 }, { "epoch": 1.2323739107472933, "grad_norm": 57588.5078125, "learning_rate": 3.769478463124507e-05, "loss": 0.0063, "step": 3500 }, { "epoch": 1.2499779948948155, "grad_norm": 54418.765625, "learning_rate": 3.623108806222684e-05, "loss": 0.0055, "step": 3550 }, { "epoch": 1.2675820790423378, "grad_norm": 69616.1875, "learning_rate": 3.4780051868411675e-05, "loss": 0.0059, "step": 3600 }, { "epoch": 1.2675820790423378, "eval_cer": 5.043691700339344, "eval_loss": 0.01317252404987812, "eval_runtime": 1129.5416, "eval_samples_per_second": 3.424, "eval_steps_per_second": 0.214, "eval_wer": 15.716529358278455, "step": 3600 }, { "epoch": 1.28518616318986, "grad_norm": 38242.609375, "learning_rate": 3.334301026289712e-05, "loss": 0.0061, "step": 3650 }, { "epoch": 1.3027902473373822, "grad_norm": 31986.978515625, "learning_rate": 3.1921284590898456e-05, "loss": 0.0045, "step": 3700 }, { "epoch": 1.3203943314849045, "grad_norm": 41348.12890625, "learning_rate": 3.051618211478504e-05, "loss": 0.0043, "step": 3750 }, { "epoch": 1.3379984156324267, "grad_norm": 37051.91796875, "learning_rate": 2.912899481206582e-05, "loss": 0.0047, "step": 3800 }, { "epoch": 1.355602499779949, "grad_norm": 42654.66796875, "learning_rate": 2.7760998187429067e-05, "loss": 0.0046, "step": 3850 }, { "epoch": 1.3732065839274712, "grad_norm": 32946.38671875, "learning_rate": 2.6413450099928783e-05, "loss": 0.0056, "step": 3900 }, { "epoch": 1.3908106680749934, "grad_norm": 58651.94921875, "learning_rate": 2.508758960639599e-05, "loss": 0.0057, "step": 3950 }, { "epoch": 1.4084147522225157, "grad_norm": 40705.35546875, "learning_rate": 2.3784635822138424e-05, "loss": 0.0056, "step": 4000 }, { "epoch": 1.4084147522225157, "eval_cer": 5.364770422305037, "eval_loss": 0.01243683509528637, "eval_runtime": 1130.3251, "eval_samples_per_second": 3.421, "eval_steps_per_second": 0.214, "eval_wer": 14.575813517758245, "step": 4000 }, { "epoch": 1.426018836370038, "grad_norm": 23855.8359375, "learning_rate": 2.2505786799976525e-05, "loss": 0.0043, "step": 4050 }, { "epoch": 1.4436229205175601, "grad_norm": 62266.87890625, "learning_rate": 2.1252218428645846e-05, "loss": 0.0048, "step": 4100 }, { "epoch": 1.4612270046650824, "grad_norm": 19551.705078125, "learning_rate": 2.0025083351579337e-05, "loss": 0.0048, "step": 4150 }, { "epoch": 1.4788310888126044, "grad_norm": 33841.171875, "learning_rate": 1.8825509907063327e-05, "loss": 0.0046, "step": 4200 }, { "epoch": 1.4964351729601266, "grad_norm": 50253.11328125, "learning_rate": 1.765460109074188e-05, "loss": 0.0048, "step": 4250 }, { "epoch": 1.5140392571076489, "grad_norm": 34579.1875, "learning_rate": 1.6513433541423528e-05, "loss": 0.0042, "step": 4300 }, { "epoch": 1.531643341255171, "grad_norm": 21523.380859375, "learning_rate": 1.5403056551122697e-05, "loss": 0.0047, "step": 4350 }, { "epoch": 1.5492474254026933, "grad_norm": 14405.34375, "learning_rate": 1.4324491100246385e-05, "loss": 0.0041, "step": 4400 }, { "epoch": 1.5492474254026933, "eval_cer": 4.753054573769599, "eval_loss": 0.012177330441772938, "eval_runtime": 1116.052, "eval_samples_per_second": 3.465, "eval_steps_per_second": 0.217, "eval_wer": 14.225883779241174, "step": 4400 }, { "epoch": 1.5668515095502156, "grad_norm": 32692.677734375, "learning_rate": 1.3278728918812978e-05, "loss": 0.0045, "step": 4450 }, { "epoch": 1.5844555936977378, "grad_norm": 27757.576171875, "learning_rate": 1.2266731574566536e-05, "loss": 0.0044, "step": 4500 }, { "epoch": 1.60205967784526, "grad_norm": 43189.07421875, "learning_rate": 1.1289429588824962e-05, "loss": 0.0047, "step": 4550 }, { "epoch": 1.6196637619927823, "grad_norm": 34926.53515625, "learning_rate": 1.0347721580875126e-05, "loss": 0.0045, "step": 4600 }, { "epoch": 1.6372678461403045, "grad_norm": 25656.78125, "learning_rate": 9.442473441701422e-06, "loss": 0.0045, "step": 4650 }, { "epoch": 1.6548719302878268, "grad_norm": 36669.20703125, "learning_rate": 8.574517537807897e-06, "loss": 0.0033, "step": 4700 }, { "epoch": 1.672476014435349, "grad_norm": 36220.40625, "learning_rate": 7.744651945865571e-06, "loss": 0.0041, "step": 4750 }, { "epoch": 1.6900800985828712, "grad_norm": 19782.095703125, "learning_rate": 6.953639718889076e-06, "loss": 0.0038, "step": 4800 }, { "epoch": 1.6900800985828712, "eval_cer": 4.743121000535131, "eval_loss": 0.012049088254570961, "eval_runtime": 1111.599, "eval_samples_per_second": 3.479, "eval_steps_per_second": 0.218, "eval_wer": 13.80431488165762, "step": 4800 }, { "epoch": 1.7076841827303935, "grad_norm": 36681.79296875, "learning_rate": 6.202208184617064e-06, "loss": 0.0034, "step": 4850 }, { "epoch": 1.7252882668779157, "grad_norm": 40895.7734375, "learning_rate": 5.491048276741784e-06, "loss": 0.0046, "step": 4900 }, { "epoch": 1.742892351025438, "grad_norm": 13767.77734375, "learning_rate": 4.820813899602506e-06, "loss": 0.0045, "step": 4950 }, { "epoch": 1.7604964351729602, "grad_norm": 39934.8203125, "learning_rate": 4.192121326927073e-06, "loss": 0.0042, "step": 5000 }, { "epoch": 1.7781005193204824, "grad_norm": 94029.875, "learning_rate": 3.605548635174533e-06, "loss": 0.0036, "step": 5050 }, { "epoch": 1.7957046034680046, "grad_norm": 13603.7431640625, "learning_rate": 3.061635171999566e-06, "loss": 0.0038, "step": 5100 }, { "epoch": 1.8133086876155269, "grad_norm": 12118.0068359375, "learning_rate": 2.5608810603278634e-06, "loss": 0.004, "step": 5150 }, { "epoch": 1.8309127717630491, "grad_norm": 19234.5703125, "learning_rate": 2.1037467384981026e-06, "loss": 0.004, "step": 5200 }, { "epoch": 1.8309127717630491, "eval_cer": 4.95685304399932, "eval_loss": 0.01188047043979168, "eval_runtime": 1212.5658, "eval_samples_per_second": 3.189, "eval_steps_per_second": 0.2, "eval_wer": 14.181798142892568, "step": 5200 }, { "epoch": 1.8485168559105714, "grad_norm": 19712.021484375, "learning_rate": 1.6906525368936055e-06, "loss": 0.0034, "step": 5250 }, { "epoch": 1.8661209400580936, "grad_norm": 8896.380859375, "learning_rate": 1.3219782914527634e-06, "loss": 0.0039, "step": 5300 }, { "epoch": 1.8837250242056158, "grad_norm": 20239.08984375, "learning_rate": 9.980629944137509e-07, "loss": 0.0037, "step": 5350 }, { "epoch": 1.901329108353138, "grad_norm": 22119.759765625, "learning_rate": 7.192044826145771e-07, "loss": 0.0035, "step": 5400 }, { "epoch": 1.9189331925006603, "grad_norm": 23297.744140625, "learning_rate": 4.856591636351604e-07, "loss": 0.0039, "step": 5450 }, { "epoch": 1.9365372766481825, "grad_norm": 8485.7099609375, "learning_rate": 2.976417800331144e-07, "loss": 0.0047, "step": 5500 }, { "epoch": 1.9541413607957046, "grad_norm": 28440.11328125, "learning_rate": 1.5532521189017267e-07, "loss": 0.0036, "step": 5550 }, { "epoch": 1.9717454449432268, "grad_norm": 29707.8125, "learning_rate": 5.8840317850683555e-08, "loss": 0.0036, "step": 5600 }, { "epoch": 1.9717454449432268, "eval_cer": 4.91711875106145, "eval_loss": 0.011817782185971737, "eval_runtime": 1130.1756, "eval_samples_per_second": 3.422, "eval_steps_per_second": 0.214, "eval_wer": 14.074339404292838, "step": 5600 }, { "epoch": 1.989349529090749, "grad_norm": 46225.79296875, "learning_rate": 8.275814798408554e-09, "loss": 0.0037, "step": 5650 }, { "epoch": 2.007041633659009, "grad_norm": 14135.712890625, "learning_rate": 2.7527634528122847e-05, "loss": 0.004, "step": 5700 }, { "epoch": 2.024645717806531, "grad_norm": 22203.5390625, "learning_rate": 2.6657185738447493e-05, "loss": 0.0042, "step": 5750 }, { "epoch": 2.0422498019540534, "grad_norm": 21549.859375, "learning_rate": 2.5795691222129982e-05, "loss": 0.004, "step": 5800 }, { "epoch": 2.0598538861015756, "grad_norm": 8696.861328125, "learning_rate": 2.4943481447352425e-05, "loss": 0.0038, "step": 5850 }, { "epoch": 2.077457970249098, "grad_norm": 15930.8994140625, "learning_rate": 2.410088332068215e-05, "loss": 0.0031, "step": 5900 }, { "epoch": 2.09506205439662, "grad_norm": 24799.234375, "learning_rate": 2.326822006167068e-05, "loss": 0.0033, "step": 5950 }, { "epoch": 2.1126661385441423, "grad_norm": 57942.671875, "learning_rate": 2.2445811078867185e-05, "loss": 0.0033, "step": 6000 }, { "epoch": 2.1126661385441423, "eval_cer": 5.04529388957071, "eval_loss": 0.01286122016608715, "eval_runtime": 1149.1813, "eval_samples_per_second": 3.365, "eval_steps_per_second": 0.211, "eval_wer": 15.082798335767228, "step": 6000 }, { "epoch": 2.1302702226916646, "grad_norm": 27342.32421875, "learning_rate": 2.1633971847293678e-05, "loss": 0.0032, "step": 6050 }, { "epoch": 2.147874306839187, "grad_norm": 9434.23828125, "learning_rate": 2.0833013787429323e-05, "loss": 0.0034, "step": 6100 }, { "epoch": 2.165478390986709, "grad_norm": 49190.4453125, "learning_rate": 2.0043244145749896e-05, "loss": 0.0041, "step": 6150 }, { "epoch": 2.1830824751342313, "grad_norm": 29072.685546875, "learning_rate": 1.9264965876868396e-05, "loss": 0.0029, "step": 6200 }, { "epoch": 2.2006865592817535, "grad_norm": 26559.041015625, "learning_rate": 1.8498477527322123e-05, "loss": 0.0036, "step": 6250 }, { "epoch": 2.2182906434292757, "grad_norm": 37322.6953125, "learning_rate": 1.774407312105051e-05, "loss": 0.0033, "step": 6300 }, { "epoch": 2.235894727576798, "grad_norm": 35458.96875, "learning_rate": 1.7002042046608017e-05, "loss": 0.0033, "step": 6350 }, { "epoch": 2.25349881172432, "grad_norm": 32754.345703125, "learning_rate": 1.627266894615504e-05, "loss": 0.0033, "step": 6400 }, { "epoch": 2.25349881172432, "eval_cer": 5.1423865569914735, "eval_loss": 0.012781694531440735, "eval_runtime": 1113.544, "eval_samples_per_second": 3.473, "eval_steps_per_second": 0.217, "eval_wer": 14.93400931309068, "step": 6400 }, { "epoch": 2.2711028958718424, "grad_norm": 28290.462890625, "learning_rate": 1.55562336062695e-05, "loss": 0.0031, "step": 6450 }, { "epoch": 2.2887069800193647, "grad_norm": 34798.16796875, "learning_rate": 1.4853010850621062e-05, "loss": 0.0037, "step": 6500 }, { "epoch": 2.306311064166887, "grad_norm": 15387.654296875, "learning_rate": 1.4163270434549237e-05, "loss": 0.0025, "step": 6550 }, { "epoch": 2.323915148314409, "grad_norm": 42429.45703125, "learning_rate": 1.348727694158547e-05, "loss": 0.0036, "step": 6600 }, { "epoch": 2.3415192324619314, "grad_norm": 62645.078125, "learning_rate": 1.2825289681959313e-05, "loss": 0.0037, "step": 6650 }, { "epoch": 2.3591233166094536, "grad_norm": 37475.56640625, "learning_rate": 1.2177562593127274e-05, "loss": 0.0035, "step": 6700 }, { "epoch": 2.376727400756976, "grad_norm": 21763.126953125, "learning_rate": 1.154434414236284e-05, "loss": 0.0032, "step": 6750 }, { "epoch": 2.3943314849044977, "grad_norm": 23169.912109375, "learning_rate": 1.0925877231444687e-05, "loss": 0.0033, "step": 6800 }, { "epoch": 2.3943314849044977, "eval_cer": 5.017095359098672, "eval_loss": 0.012343456037342548, "eval_runtime": 1116.3074, "eval_samples_per_second": 3.464, "eval_steps_per_second": 0.217, "eval_wer": 14.73286859725016, "step": 6800 }, { "epoch": 2.41193556905202, "grad_norm": 22481.037109375, "learning_rate": 1.0322399103479884e-05, "loss": 0.0028, "step": 6850 }, { "epoch": 2.429539653199542, "grad_norm": 51517.1015625, "learning_rate": 9.734141251897733e-06, "loss": 0.0029, "step": 6900 }, { "epoch": 2.4471437373470644, "grad_norm": 34979.6171875, "learning_rate": 9.16132933164922e-06, "loss": 0.003, "step": 6950 }, { "epoch": 2.4647478214945866, "grad_norm": 15414.427734375, "learning_rate": 8.604183072646055e-06, "loss": 0.0026, "step": 7000 }, { "epoch": 2.482351905642109, "grad_norm": 22982.21875, "learning_rate": 8.06291619547253e-06, "loss": 0.0025, "step": 7050 }, { "epoch": 2.499955989789631, "grad_norm": 32445.208984375, "learning_rate": 7.5377363294026505e-06, "loss": 0.0031, "step": 7100 }, { "epoch": 2.5175600739371533, "grad_norm": 38354.1640625, "learning_rate": 7.028844932753714e-06, "loss": 0.0033, "step": 7150 }, { "epoch": 2.5351641580846755, "grad_norm": 13522.6416015625, "learning_rate": 6.536437215607261e-06, "loss": 0.0033, "step": 7200 }, { "epoch": 2.5351641580846755, "eval_cer": 4.367567844703002, "eval_loss": 0.012211957015097141, "eval_runtime": 1109.9647, "eval_samples_per_second": 3.484, "eval_steps_per_second": 0.218, "eval_wer": 13.674813324883587, "step": 7200 }, { "epoch": 2.552768242232198, "grad_norm": 19421.33203125, "learning_rate": 6.060702064926682e-06, "loss": 0.003, "step": 7250 }, { "epoch": 2.57037232637972, "grad_norm": 12878.841796875, "learning_rate": 5.601821972100435e-06, "loss": 0.0032, "step": 7300 }, { "epoch": 2.5879764105272423, "grad_norm": 16374.8388671875, "learning_rate": 5.159972962938581e-06, "loss": 0.0021, "step": 7350 }, { "epoch": 2.6055804946747645, "grad_norm": 19888.9140625, "learning_rate": 4.735324530149521e-06, "loss": 0.0032, "step": 7400 }, { "epoch": 2.6231845788222867, "grad_norm": 25133.033203125, "learning_rate": 4.328039568322817e-06, "loss": 0.0031, "step": 7450 }, { "epoch": 2.640788662969809, "grad_norm": 26627.578125, "learning_rate": 3.938274311443019e-06, "loss": 0.003, "step": 7500 }, { "epoch": 2.658392747117331, "grad_norm": 27871.9375, "learning_rate": 3.5661782729585146e-06, "loss": 0.0021, "step": 7550 }, { "epoch": 2.6759968312648534, "grad_norm": 42111.23828125, "learning_rate": 3.2118941884283825e-06, "loss": 0.0034, "step": 7600 }, { "epoch": 2.6759968312648534, "eval_cer": 4.530029832763488, "eval_loss": 0.012235511094331741, "eval_runtime": 1115.0261, "eval_samples_per_second": 3.468, "eval_steps_per_second": 0.217, "eval_wer": 13.561843881740279, "step": 7600 }, { "epoch": 2.6936009154123757, "grad_norm": 41923.59765625, "learning_rate": 2.8755579607691928e-06, "loss": 0.0032, "step": 7650 }, { "epoch": 2.711204999559898, "grad_norm": 18917.880859375, "learning_rate": 2.557298608122849e-06, "loss": 0.0027, "step": 7700 }, { "epoch": 2.72880908370742, "grad_norm": 20727.607421875, "learning_rate": 2.257238214365365e-06, "loss": 0.0025, "step": 7750 }, { "epoch": 2.7464131678549424, "grad_norm": 8564.83984375, "learning_rate": 1.975491882275665e-06, "loss": 0.0031, "step": 7800 }, { "epoch": 2.7640172520024646, "grad_norm": 38865.1953125, "learning_rate": 1.7121676893823213e-06, "loss": 0.0028, "step": 7850 }, { "epoch": 2.781621336149987, "grad_norm": 9149.9501953125, "learning_rate": 1.467366646505125e-06, "loss": 0.0026, "step": 7900 }, { "epoch": 2.799225420297509, "grad_norm": 33032.7890625, "learning_rate": 1.241182659007495e-06, "loss": 0.0025, "step": 7950 }, { "epoch": 2.8168295044450313, "grad_norm": 39062.0859375, "learning_rate": 1.0337024907744576e-06, "loss": 0.0025, "step": 8000 }, { "epoch": 2.8168295044450313, "eval_cer": 4.469787517664137, "eval_loss": 0.012169728055596352, "eval_runtime": 1112.2456, "eval_samples_per_second": 3.477, "eval_steps_per_second": 0.218, "eval_wer": 13.366213870443335, "step": 8000 }, { "epoch": 2.8344335885925536, "grad_norm": 21345.421875, "learning_rate": 8.450057309301873e-07, "loss": 0.003, "step": 8050 }, { "epoch": 2.852037672740076, "grad_norm": 21389.115234375, "learning_rate": 6.751647633076952e-07, "loss": 0.0027, "step": 8100 }, { "epoch": 2.869641756887598, "grad_norm": 40714.12890625, "learning_rate": 5.242447386824833e-07, "loss": 0.0032, "step": 8150 }, { "epoch": 2.8872458410351203, "grad_norm": 13599.498046875, "learning_rate": 3.9230354978077586e-07, "loss": 0.0024, "step": 8200 }, { "epoch": 2.9048499251826425, "grad_norm": 18847.646484375, "learning_rate": 2.7939180907197717e-07, "loss": 0.0029, "step": 8250 }, { "epoch": 2.9224540093301647, "grad_norm": 25907.548828125, "learning_rate": 1.855528293537856e-07, "loss": 0.0031, "step": 8300 }, { "epoch": 2.940058093477687, "grad_norm": 36510.4765625, "learning_rate": 1.108226071374452e-07, "loss": 0.0027, "step": 8350 }, { "epoch": 2.9576621776252088, "grad_norm": 34380.96875, "learning_rate": 5.522980883952422e-08, "loss": 0.0028, "step": 8400 }, { "epoch": 2.9576621776252088, "eval_cer": 4.579377261089553, "eval_loss": 0.0121904406696558, "eval_runtime": 1111.3761, "eval_samples_per_second": 3.479, "eval_steps_per_second": 0.218, "eval_wer": 13.553577824924917, "step": 8400 } ], "logging_steps": 50, "max_steps": 8520, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3235112640512e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }