yuweiiizz's picture
Training in progress, step 4000, checkpoint
01c34fc verified
raw
history blame
29.2 kB
{
"best_metric": 51.82156133828997,
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-4000",
"epoch": 1.6009605763458075,
"eval_steps": 1000,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010006003602161296,
"grad_norm": 197.02195739746094,
"learning_rate": 5.000000000000001e-07,
"loss": 7.3864,
"step": 25
},
{
"epoch": 0.020012007204322592,
"grad_norm": 41.90484619140625,
"learning_rate": 1.0000000000000002e-06,
"loss": 5.5537,
"step": 50
},
{
"epoch": 0.03001801080648389,
"grad_norm": 32.08101272583008,
"learning_rate": 1.5e-06,
"loss": 3.7704,
"step": 75
},
{
"epoch": 0.040024014408645184,
"grad_norm": 28.216585159301758,
"learning_rate": 2.0000000000000003e-06,
"loss": 2.6296,
"step": 100
},
{
"epoch": 0.05003001801080648,
"grad_norm": 23.683828353881836,
"learning_rate": 2.5e-06,
"loss": 2.4474,
"step": 125
},
{
"epoch": 0.06003602161296778,
"grad_norm": 21.883520126342773,
"learning_rate": 3e-06,
"loss": 2.2054,
"step": 150
},
{
"epoch": 0.07004202521512908,
"grad_norm": 24.772098541259766,
"learning_rate": 3.5e-06,
"loss": 2.0695,
"step": 175
},
{
"epoch": 0.08004802881729037,
"grad_norm": 24.105548858642578,
"learning_rate": 4.000000000000001e-06,
"loss": 1.9116,
"step": 200
},
{
"epoch": 0.09005403241945167,
"grad_norm": 18.805519104003906,
"learning_rate": 4.5e-06,
"loss": 1.7643,
"step": 225
},
{
"epoch": 0.10006003602161297,
"grad_norm": 15.599541664123535,
"learning_rate": 5e-06,
"loss": 1.6394,
"step": 250
},
{
"epoch": 0.11006603962377426,
"grad_norm": 15.514196395874023,
"learning_rate": 5.500000000000001e-06,
"loss": 1.6016,
"step": 275
},
{
"epoch": 0.12007204322593557,
"grad_norm": 15.5431547164917,
"learning_rate": 6e-06,
"loss": 1.5851,
"step": 300
},
{
"epoch": 0.13007804682809687,
"grad_norm": 16.450502395629883,
"learning_rate": 6.5000000000000004e-06,
"loss": 1.5076,
"step": 325
},
{
"epoch": 0.14008405043025815,
"grad_norm": 16.393997192382812,
"learning_rate": 7e-06,
"loss": 1.5487,
"step": 350
},
{
"epoch": 0.15009005403241946,
"grad_norm": 14.165709495544434,
"learning_rate": 7.500000000000001e-06,
"loss": 1.5365,
"step": 375
},
{
"epoch": 0.16009605763458074,
"grad_norm": 15.929381370544434,
"learning_rate": 8.000000000000001e-06,
"loss": 1.5023,
"step": 400
},
{
"epoch": 0.17010206123674204,
"grad_norm": 14.422001838684082,
"learning_rate": 8.5e-06,
"loss": 1.3558,
"step": 425
},
{
"epoch": 0.18010806483890335,
"grad_norm": 13.510339736938477,
"learning_rate": 9e-06,
"loss": 1.3898,
"step": 450
},
{
"epoch": 0.19011406844106463,
"grad_norm": 14.485660552978516,
"learning_rate": 9.5e-06,
"loss": 1.4279,
"step": 475
},
{
"epoch": 0.20012007204322593,
"grad_norm": 14.117327690124512,
"learning_rate": 1e-05,
"loss": 1.3455,
"step": 500
},
{
"epoch": 0.21012607564538724,
"grad_norm": 16.12464714050293,
"learning_rate": 9.944395017793596e-06,
"loss": 1.4,
"step": 525
},
{
"epoch": 0.22013207924754852,
"grad_norm": 15.304022789001465,
"learning_rate": 9.888790035587188e-06,
"loss": 1.4159,
"step": 550
},
{
"epoch": 0.23013808284970982,
"grad_norm": 14.668664932250977,
"learning_rate": 9.833185053380784e-06,
"loss": 1.3445,
"step": 575
},
{
"epoch": 0.24014408645187113,
"grad_norm": 13.041420936584473,
"learning_rate": 9.777580071174379e-06,
"loss": 1.3622,
"step": 600
},
{
"epoch": 0.25015009005403244,
"grad_norm": 15.908055305480957,
"learning_rate": 9.721975088967973e-06,
"loss": 1.3234,
"step": 625
},
{
"epoch": 0.26015609365619374,
"grad_norm": 13.73078727722168,
"learning_rate": 9.666370106761567e-06,
"loss": 1.2332,
"step": 650
},
{
"epoch": 0.270162097258355,
"grad_norm": 14.327301979064941,
"learning_rate": 9.610765124555162e-06,
"loss": 1.3042,
"step": 675
},
{
"epoch": 0.2801681008605163,
"grad_norm": 14.390907287597656,
"learning_rate": 9.555160142348756e-06,
"loss": 1.3216,
"step": 700
},
{
"epoch": 0.2901741044626776,
"grad_norm": 13.917515754699707,
"learning_rate": 9.49955516014235e-06,
"loss": 1.2931,
"step": 725
},
{
"epoch": 0.3001801080648389,
"grad_norm": 15.108023643493652,
"learning_rate": 9.443950177935945e-06,
"loss": 1.3286,
"step": 750
},
{
"epoch": 0.3101861116670002,
"grad_norm": 13.692678451538086,
"learning_rate": 9.388345195729539e-06,
"loss": 1.3057,
"step": 775
},
{
"epoch": 0.32019211526916147,
"grad_norm": 13.685354232788086,
"learning_rate": 9.332740213523132e-06,
"loss": 1.2402,
"step": 800
},
{
"epoch": 0.3301981188713228,
"grad_norm": 14.591761589050293,
"learning_rate": 9.277135231316726e-06,
"loss": 1.2688,
"step": 825
},
{
"epoch": 0.3402041224734841,
"grad_norm": 15.677751541137695,
"learning_rate": 9.221530249110321e-06,
"loss": 1.3076,
"step": 850
},
{
"epoch": 0.3502101260756454,
"grad_norm": 15.109577178955078,
"learning_rate": 9.165925266903915e-06,
"loss": 1.2141,
"step": 875
},
{
"epoch": 0.3602161296778067,
"grad_norm": 10.552845001220703,
"learning_rate": 9.110320284697509e-06,
"loss": 1.2393,
"step": 900
},
{
"epoch": 0.370222133279968,
"grad_norm": 12.321894645690918,
"learning_rate": 9.054715302491104e-06,
"loss": 1.2417,
"step": 925
},
{
"epoch": 0.38022813688212925,
"grad_norm": 13.729790687561035,
"learning_rate": 8.999110320284698e-06,
"loss": 1.2082,
"step": 950
},
{
"epoch": 0.39023414048429056,
"grad_norm": 13.137016296386719,
"learning_rate": 8.943505338078292e-06,
"loss": 1.2048,
"step": 975
},
{
"epoch": 0.40024014408645187,
"grad_norm": 12.194613456726074,
"learning_rate": 8.887900355871887e-06,
"loss": 1.2739,
"step": 1000
},
{
"epoch": 0.40024014408645187,
"eval_cer": 61.346116219917825,
"eval_loss": 1.169872522354126,
"eval_runtime": 1744.6409,
"eval_samples_per_second": 2.274,
"eval_steps_per_second": 0.284,
"step": 1000
},
{
"epoch": 0.41024614768861317,
"grad_norm": 14.949773788452148,
"learning_rate": 8.832295373665481e-06,
"loss": 1.2059,
"step": 1025
},
{
"epoch": 0.4202521512907745,
"grad_norm": 13.134845733642578,
"learning_rate": 8.776690391459075e-06,
"loss": 1.206,
"step": 1050
},
{
"epoch": 0.4302581548929358,
"grad_norm": 15.212950706481934,
"learning_rate": 8.72108540925267e-06,
"loss": 1.2332,
"step": 1075
},
{
"epoch": 0.44026415849509704,
"grad_norm": 14.196298599243164,
"learning_rate": 8.665480427046264e-06,
"loss": 1.1585,
"step": 1100
},
{
"epoch": 0.45027016209725834,
"grad_norm": 14.354203224182129,
"learning_rate": 8.609875444839858e-06,
"loss": 1.1982,
"step": 1125
},
{
"epoch": 0.46027616569941965,
"grad_norm": 14.843274116516113,
"learning_rate": 8.554270462633453e-06,
"loss": 1.1914,
"step": 1150
},
{
"epoch": 0.47028216930158095,
"grad_norm": 13.450782775878906,
"learning_rate": 8.498665480427047e-06,
"loss": 1.1773,
"step": 1175
},
{
"epoch": 0.48028817290374226,
"grad_norm": 12.50664234161377,
"learning_rate": 8.44306049822064e-06,
"loss": 1.277,
"step": 1200
},
{
"epoch": 0.49029417650590357,
"grad_norm": 11.999517440795898,
"learning_rate": 8.387455516014236e-06,
"loss": 1.09,
"step": 1225
},
{
"epoch": 0.5003001801080649,
"grad_norm": 14.905177116394043,
"learning_rate": 8.33185053380783e-06,
"loss": 1.1566,
"step": 1250
},
{
"epoch": 0.5103061837102262,
"grad_norm": 14.532490730285645,
"learning_rate": 8.276245551601423e-06,
"loss": 1.1516,
"step": 1275
},
{
"epoch": 0.5203121873123875,
"grad_norm": 13.656341552734375,
"learning_rate": 8.220640569395019e-06,
"loss": 1.1212,
"step": 1300
},
{
"epoch": 0.5303181909145487,
"grad_norm": 12.786870956420898,
"learning_rate": 8.165035587188612e-06,
"loss": 1.0919,
"step": 1325
},
{
"epoch": 0.54032419451671,
"grad_norm": 14.387174606323242,
"learning_rate": 8.109430604982206e-06,
"loss": 1.1252,
"step": 1350
},
{
"epoch": 0.5503301981188713,
"grad_norm": 13.124211311340332,
"learning_rate": 8.053825622775802e-06,
"loss": 1.1736,
"step": 1375
},
{
"epoch": 0.5603362017210326,
"grad_norm": 14.467448234558105,
"learning_rate": 7.998220640569395e-06,
"loss": 1.0995,
"step": 1400
},
{
"epoch": 0.5703422053231939,
"grad_norm": 13.015649795532227,
"learning_rate": 7.94261565836299e-06,
"loss": 1.2058,
"step": 1425
},
{
"epoch": 0.5803482089253552,
"grad_norm": 12.927563667297363,
"learning_rate": 7.887010676156584e-06,
"loss": 1.1048,
"step": 1450
},
{
"epoch": 0.5903542125275165,
"grad_norm": 14.437759399414062,
"learning_rate": 7.831405693950178e-06,
"loss": 1.1138,
"step": 1475
},
{
"epoch": 0.6003602161296778,
"grad_norm": 13.162938117980957,
"learning_rate": 7.775800711743774e-06,
"loss": 1.08,
"step": 1500
},
{
"epoch": 0.6103662197318391,
"grad_norm": 11.896222114562988,
"learning_rate": 7.720195729537367e-06,
"loss": 1.1819,
"step": 1525
},
{
"epoch": 0.6203722233340004,
"grad_norm": 17.318538665771484,
"learning_rate": 7.664590747330961e-06,
"loss": 1.124,
"step": 1550
},
{
"epoch": 0.6303782269361617,
"grad_norm": 12.138945579528809,
"learning_rate": 7.608985765124556e-06,
"loss": 1.2297,
"step": 1575
},
{
"epoch": 0.6403842305383229,
"grad_norm": 14.958952903747559,
"learning_rate": 7.55338078291815e-06,
"loss": 1.1257,
"step": 1600
},
{
"epoch": 0.6503902341404842,
"grad_norm": 12.975215911865234,
"learning_rate": 7.497775800711744e-06,
"loss": 1.0724,
"step": 1625
},
{
"epoch": 0.6603962377426456,
"grad_norm": 14.213400840759277,
"learning_rate": 7.4421708185053385e-06,
"loss": 1.186,
"step": 1650
},
{
"epoch": 0.6704022413448069,
"grad_norm": 13.799762725830078,
"learning_rate": 7.386565836298933e-06,
"loss": 1.0768,
"step": 1675
},
{
"epoch": 0.6804082449469682,
"grad_norm": 11.595632553100586,
"learning_rate": 7.330960854092527e-06,
"loss": 1.1245,
"step": 1700
},
{
"epoch": 0.6904142485491295,
"grad_norm": 13.799065589904785,
"learning_rate": 7.275355871886121e-06,
"loss": 1.1161,
"step": 1725
},
{
"epoch": 0.7004202521512908,
"grad_norm": 12.326783180236816,
"learning_rate": 7.219750889679717e-06,
"loss": 1.0904,
"step": 1750
},
{
"epoch": 0.7104262557534521,
"grad_norm": 13.207780838012695,
"learning_rate": 7.16414590747331e-06,
"loss": 1.1031,
"step": 1775
},
{
"epoch": 0.7204322593556134,
"grad_norm": 13.019478797912598,
"learning_rate": 7.108540925266904e-06,
"loss": 1.1205,
"step": 1800
},
{
"epoch": 0.7304382629577747,
"grad_norm": 12.38670825958252,
"learning_rate": 7.0529359430605e-06,
"loss": 1.0972,
"step": 1825
},
{
"epoch": 0.740444266559936,
"grad_norm": 13.468473434448242,
"learning_rate": 6.9973309608540925e-06,
"loss": 0.9878,
"step": 1850
},
{
"epoch": 0.7504502701620973,
"grad_norm": 12.52755069732666,
"learning_rate": 6.941725978647688e-06,
"loss": 1.0955,
"step": 1875
},
{
"epoch": 0.7604562737642585,
"grad_norm": 13.126026153564453,
"learning_rate": 6.886120996441281e-06,
"loss": 1.1019,
"step": 1900
},
{
"epoch": 0.7704622773664198,
"grad_norm": 12.999829292297363,
"learning_rate": 6.830516014234876e-06,
"loss": 1.0055,
"step": 1925
},
{
"epoch": 0.7804682809685811,
"grad_norm": 11.212359428405762,
"learning_rate": 6.774911032028471e-06,
"loss": 0.9664,
"step": 1950
},
{
"epoch": 0.7904742845707424,
"grad_norm": 11.80057144165039,
"learning_rate": 6.7193060498220645e-06,
"loss": 0.9562,
"step": 1975
},
{
"epoch": 0.8004802881729037,
"grad_norm": 13.41251277923584,
"learning_rate": 6.663701067615659e-06,
"loss": 1.0283,
"step": 2000
},
{
"epoch": 0.8004802881729037,
"eval_cer": 55.51946781451771,
"eval_loss": 1.0144044160842896,
"eval_runtime": 1745.0611,
"eval_samples_per_second": 2.273,
"eval_steps_per_second": 0.284,
"step": 2000
},
{
"epoch": 0.810486291775065,
"grad_norm": 14.396341323852539,
"learning_rate": 6.608096085409254e-06,
"loss": 1.0206,
"step": 2025
},
{
"epoch": 0.8204922953772263,
"grad_norm": 12.45606517791748,
"learning_rate": 6.552491103202847e-06,
"loss": 1.0119,
"step": 2050
},
{
"epoch": 0.8304982989793877,
"grad_norm": 13.013134002685547,
"learning_rate": 6.496886120996442e-06,
"loss": 1.102,
"step": 2075
},
{
"epoch": 0.840504302581549,
"grad_norm": 14.00875473022461,
"learning_rate": 6.4412811387900366e-06,
"loss": 1.0198,
"step": 2100
},
{
"epoch": 0.8505103061837103,
"grad_norm": 13.691847801208496,
"learning_rate": 6.38567615658363e-06,
"loss": 1.1056,
"step": 2125
},
{
"epoch": 0.8605163097858716,
"grad_norm": 13.617313385009766,
"learning_rate": 6.330071174377225e-06,
"loss": 1.0866,
"step": 2150
},
{
"epoch": 0.8705223133880328,
"grad_norm": 14.322003364562988,
"learning_rate": 6.2744661921708194e-06,
"loss": 1.0566,
"step": 2175
},
{
"epoch": 0.8805283169901941,
"grad_norm": 13.71176528930664,
"learning_rate": 6.218861209964413e-06,
"loss": 1.1146,
"step": 2200
},
{
"epoch": 0.8905343205923554,
"grad_norm": 12.21072006225586,
"learning_rate": 6.163256227758008e-06,
"loss": 1.1476,
"step": 2225
},
{
"epoch": 0.9005403241945167,
"grad_norm": 14.359560012817383,
"learning_rate": 6.107651245551602e-06,
"loss": 1.0357,
"step": 2250
},
{
"epoch": 0.910546327796678,
"grad_norm": 13.458608627319336,
"learning_rate": 6.052046263345196e-06,
"loss": 1.0524,
"step": 2275
},
{
"epoch": 0.9205523313988393,
"grad_norm": 13.808725357055664,
"learning_rate": 5.996441281138791e-06,
"loss": 0.9287,
"step": 2300
},
{
"epoch": 0.9305583350010006,
"grad_norm": 14.640447616577148,
"learning_rate": 5.940836298932385e-06,
"loss": 1.0611,
"step": 2325
},
{
"epoch": 0.9405643386031619,
"grad_norm": 12.251474380493164,
"learning_rate": 5.885231316725979e-06,
"loss": 1.0569,
"step": 2350
},
{
"epoch": 0.9505703422053232,
"grad_norm": 10.18205451965332,
"learning_rate": 5.8296263345195735e-06,
"loss": 1.0099,
"step": 2375
},
{
"epoch": 0.9605763458074845,
"grad_norm": 12.422398567199707,
"learning_rate": 5.774021352313167e-06,
"loss": 1.119,
"step": 2400
},
{
"epoch": 0.9705823494096458,
"grad_norm": 15.4508056640625,
"learning_rate": 5.718416370106762e-06,
"loss": 1.0657,
"step": 2425
},
{
"epoch": 0.9805883530118071,
"grad_norm": 13.556448936462402,
"learning_rate": 5.662811387900356e-06,
"loss": 1.0186,
"step": 2450
},
{
"epoch": 0.9905943566139683,
"grad_norm": 17.107561111450195,
"learning_rate": 5.60720640569395e-06,
"loss": 0.971,
"step": 2475
},
{
"epoch": 1.0006003602161297,
"grad_norm": 11.349823951721191,
"learning_rate": 5.551601423487545e-06,
"loss": 1.0679,
"step": 2500
},
{
"epoch": 1.010606363818291,
"grad_norm": 11.253293991088867,
"learning_rate": 5.495996441281139e-06,
"loss": 0.7139,
"step": 2525
},
{
"epoch": 1.0206123674204524,
"grad_norm": 10.042984008789062,
"learning_rate": 5.440391459074733e-06,
"loss": 0.774,
"step": 2550
},
{
"epoch": 1.0306183710226136,
"grad_norm": 11.355831146240234,
"learning_rate": 5.3847864768683275e-06,
"loss": 0.7949,
"step": 2575
},
{
"epoch": 1.040624374624775,
"grad_norm": 11.004257202148438,
"learning_rate": 5.329181494661922e-06,
"loss": 0.7722,
"step": 2600
},
{
"epoch": 1.0506303782269362,
"grad_norm": 11.717720031738281,
"learning_rate": 5.273576512455516e-06,
"loss": 0.7796,
"step": 2625
},
{
"epoch": 1.0606363818290974,
"grad_norm": 12.214451789855957,
"learning_rate": 5.21797153024911e-06,
"loss": 0.7728,
"step": 2650
},
{
"epoch": 1.0706423854312588,
"grad_norm": 11.615367889404297,
"learning_rate": 5.162366548042706e-06,
"loss": 0.7563,
"step": 2675
},
{
"epoch": 1.08064838903342,
"grad_norm": 13.19363021850586,
"learning_rate": 5.106761565836299e-06,
"loss": 0.7958,
"step": 2700
},
{
"epoch": 1.0906543926355814,
"grad_norm": 14.007135391235352,
"learning_rate": 5.051156583629894e-06,
"loss": 0.8891,
"step": 2725
},
{
"epoch": 1.1006603962377426,
"grad_norm": 10.497177124023438,
"learning_rate": 4.995551601423488e-06,
"loss": 0.7156,
"step": 2750
},
{
"epoch": 1.110666399839904,
"grad_norm": 12.282913208007812,
"learning_rate": 4.939946619217082e-06,
"loss": 0.7262,
"step": 2775
},
{
"epoch": 1.1206724034420652,
"grad_norm": 14.421050071716309,
"learning_rate": 4.884341637010677e-06,
"loss": 0.7824,
"step": 2800
},
{
"epoch": 1.1306784070442266,
"grad_norm": 13.261626243591309,
"learning_rate": 4.828736654804271e-06,
"loss": 0.7993,
"step": 2825
},
{
"epoch": 1.1406844106463878,
"grad_norm": 11.418912887573242,
"learning_rate": 4.773131672597865e-06,
"loss": 0.7521,
"step": 2850
},
{
"epoch": 1.1506904142485492,
"grad_norm": 14.025506019592285,
"learning_rate": 4.71752669039146e-06,
"loss": 0.8405,
"step": 2875
},
{
"epoch": 1.1606964178507104,
"grad_norm": 12.407489776611328,
"learning_rate": 4.661921708185054e-06,
"loss": 0.7985,
"step": 2900
},
{
"epoch": 1.1707024214528716,
"grad_norm": 11.83797550201416,
"learning_rate": 4.606316725978648e-06,
"loss": 0.799,
"step": 2925
},
{
"epoch": 1.180708425055033,
"grad_norm": 10.337922096252441,
"learning_rate": 4.550711743772243e-06,
"loss": 0.7965,
"step": 2950
},
{
"epoch": 1.1907144286571942,
"grad_norm": 11.562491416931152,
"learning_rate": 4.4951067615658365e-06,
"loss": 0.7836,
"step": 2975
},
{
"epoch": 1.2007204322593557,
"grad_norm": 11.886847496032715,
"learning_rate": 4.439501779359431e-06,
"loss": 0.7925,
"step": 3000
},
{
"epoch": 1.2007204322593557,
"eval_cer": 51.82938759538251,
"eval_loss": 0.9463357329368591,
"eval_runtime": 1801.523,
"eval_samples_per_second": 2.202,
"eval_steps_per_second": 0.275,
"step": 3000
},
{
"epoch": 1.2107264358615168,
"grad_norm": 10.6748628616333,
"learning_rate": 4.383896797153026e-06,
"loss": 0.7877,
"step": 3025
},
{
"epoch": 1.2207324394636783,
"grad_norm": 11.652464866638184,
"learning_rate": 4.328291814946619e-06,
"loss": 0.7794,
"step": 3050
},
{
"epoch": 1.2307384430658395,
"grad_norm": 11.998939514160156,
"learning_rate": 4.272686832740214e-06,
"loss": 0.8183,
"step": 3075
},
{
"epoch": 1.2407444466680009,
"grad_norm": 13.142699241638184,
"learning_rate": 4.217081850533808e-06,
"loss": 0.7834,
"step": 3100
},
{
"epoch": 1.250750450270162,
"grad_norm": 11.67496395111084,
"learning_rate": 4.161476868327402e-06,
"loss": 0.7594,
"step": 3125
},
{
"epoch": 1.2607564538723235,
"grad_norm": 11.429244995117188,
"learning_rate": 4.105871886120997e-06,
"loss": 0.7963,
"step": 3150
},
{
"epoch": 1.2707624574744847,
"grad_norm": 12.160046577453613,
"learning_rate": 4.0502669039145905e-06,
"loss": 0.7441,
"step": 3175
},
{
"epoch": 1.2807684610766459,
"grad_norm": 12.606410026550293,
"learning_rate": 3.994661921708186e-06,
"loss": 0.8733,
"step": 3200
},
{
"epoch": 1.2907744646788073,
"grad_norm": 14.263989448547363,
"learning_rate": 3.93905693950178e-06,
"loss": 0.7433,
"step": 3225
},
{
"epoch": 1.3007804682809687,
"grad_norm": 11.299212455749512,
"learning_rate": 3.883451957295374e-06,
"loss": 0.789,
"step": 3250
},
{
"epoch": 1.31078647188313,
"grad_norm": 12.999605178833008,
"learning_rate": 3.827846975088969e-06,
"loss": 0.7868,
"step": 3275
},
{
"epoch": 1.320792475485291,
"grad_norm": 10.38305950164795,
"learning_rate": 3.7722419928825625e-06,
"loss": 0.7433,
"step": 3300
},
{
"epoch": 1.3307984790874525,
"grad_norm": 13.05246639251709,
"learning_rate": 3.7166370106761567e-06,
"loss": 0.7547,
"step": 3325
},
{
"epoch": 1.3408044826896137,
"grad_norm": 11.195088386535645,
"learning_rate": 3.661032028469751e-06,
"loss": 0.8126,
"step": 3350
},
{
"epoch": 1.3508104862917751,
"grad_norm": 11.096240997314453,
"learning_rate": 3.605427046263346e-06,
"loss": 0.743,
"step": 3375
},
{
"epoch": 1.3608164898939363,
"grad_norm": 13.594226837158203,
"learning_rate": 3.5498220640569395e-06,
"loss": 0.7965,
"step": 3400
},
{
"epoch": 1.3708224934960977,
"grad_norm": 15.316413879394531,
"learning_rate": 3.4942170818505337e-06,
"loss": 0.7956,
"step": 3425
},
{
"epoch": 1.380828497098259,
"grad_norm": 12.78977108001709,
"learning_rate": 3.4386120996441287e-06,
"loss": 0.7693,
"step": 3450
},
{
"epoch": 1.3908345007004201,
"grad_norm": 12.662712097167969,
"learning_rate": 3.383007117437723e-06,
"loss": 0.7768,
"step": 3475
},
{
"epoch": 1.4008405043025816,
"grad_norm": 8.803949356079102,
"learning_rate": 3.327402135231317e-06,
"loss": 0.7622,
"step": 3500
},
{
"epoch": 1.410846507904743,
"grad_norm": 13.736053466796875,
"learning_rate": 3.2717971530249116e-06,
"loss": 0.8152,
"step": 3525
},
{
"epoch": 1.4208525115069042,
"grad_norm": 12.255024909973145,
"learning_rate": 3.2161921708185057e-06,
"loss": 0.8149,
"step": 3550
},
{
"epoch": 1.4308585151090654,
"grad_norm": 12.75201416015625,
"learning_rate": 3.1605871886121e-06,
"loss": 0.7471,
"step": 3575
},
{
"epoch": 1.4408645187112268,
"grad_norm": 13.30036449432373,
"learning_rate": 3.1049822064056944e-06,
"loss": 0.6892,
"step": 3600
},
{
"epoch": 1.450870522313388,
"grad_norm": 10.946511268615723,
"learning_rate": 3.0493772241992886e-06,
"loss": 0.7916,
"step": 3625
},
{
"epoch": 1.4608765259155494,
"grad_norm": 10.852522850036621,
"learning_rate": 2.9937722419928827e-06,
"loss": 0.7329,
"step": 3650
},
{
"epoch": 1.4708825295177106,
"grad_norm": 11.466883659362793,
"learning_rate": 2.938167259786477e-06,
"loss": 0.7588,
"step": 3675
},
{
"epoch": 1.480888533119872,
"grad_norm": 12.728093147277832,
"learning_rate": 2.8825622775800715e-06,
"loss": 0.7527,
"step": 3700
},
{
"epoch": 1.4908945367220332,
"grad_norm": 9.343868255615234,
"learning_rate": 2.8269572953736656e-06,
"loss": 0.7451,
"step": 3725
},
{
"epoch": 1.5009005403241944,
"grad_norm": 14.047112464904785,
"learning_rate": 2.7713523131672598e-06,
"loss": 0.7905,
"step": 3750
},
{
"epoch": 1.5109065439263558,
"grad_norm": 10.599916458129883,
"learning_rate": 2.7157473309608543e-06,
"loss": 0.7819,
"step": 3775
},
{
"epoch": 1.5209125475285172,
"grad_norm": 14.341135025024414,
"learning_rate": 2.6601423487544485e-06,
"loss": 0.7166,
"step": 3800
},
{
"epoch": 1.5309185511306784,
"grad_norm": 11.71387767791748,
"learning_rate": 2.6045373665480426e-06,
"loss": 0.7386,
"step": 3825
},
{
"epoch": 1.5409245547328396,
"grad_norm": 12.604011535644531,
"learning_rate": 2.5489323843416376e-06,
"loss": 0.6969,
"step": 3850
},
{
"epoch": 1.550930558335001,
"grad_norm": 9.848773002624512,
"learning_rate": 2.4933274021352318e-06,
"loss": 0.8356,
"step": 3875
},
{
"epoch": 1.5609365619371625,
"grad_norm": 14.05534553527832,
"learning_rate": 2.437722419928826e-06,
"loss": 0.7282,
"step": 3900
},
{
"epoch": 1.5709425655393234,
"grad_norm": 12.9791259765625,
"learning_rate": 2.38211743772242e-06,
"loss": 0.7059,
"step": 3925
},
{
"epoch": 1.5809485691414848,
"grad_norm": 10.708452224731445,
"learning_rate": 2.3265124555160142e-06,
"loss": 0.7712,
"step": 3950
},
{
"epoch": 1.5909545727436463,
"grad_norm": 13.361218452453613,
"learning_rate": 2.270907473309609e-06,
"loss": 0.719,
"step": 3975
},
{
"epoch": 1.6009605763458075,
"grad_norm": 11.740647315979004,
"learning_rate": 2.215302491103203e-06,
"loss": 0.8262,
"step": 4000
},
{
"epoch": 1.6009605763458075,
"eval_cer": 51.82156133828997,
"eval_loss": 0.9109482169151306,
"eval_runtime": 1918.0204,
"eval_samples_per_second": 2.068,
"eval_steps_per_second": 0.259,
"step": 4000
}
],
"logging_steps": 25,
"max_steps": 4996,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"total_flos": 1.846744552267776e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}