|
{ |
|
"best_metric": 51.82156133828997, |
|
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-4000", |
|
"epoch": 1.6009605763458075, |
|
"eval_steps": 1000, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010006003602161296, |
|
"grad_norm": 197.02195739746094, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 7.3864, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.020012007204322592, |
|
"grad_norm": 41.90484619140625, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 5.5537, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03001801080648389, |
|
"grad_norm": 32.08101272583008, |
|
"learning_rate": 1.5e-06, |
|
"loss": 3.7704, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.040024014408645184, |
|
"grad_norm": 28.216585159301758, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.6296, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05003001801080648, |
|
"grad_norm": 23.683828353881836, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.4474, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06003602161296778, |
|
"grad_norm": 21.883520126342773, |
|
"learning_rate": 3e-06, |
|
"loss": 2.2054, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07004202521512908, |
|
"grad_norm": 24.772098541259766, |
|
"learning_rate": 3.5e-06, |
|
"loss": 2.0695, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08004802881729037, |
|
"grad_norm": 24.105548858642578, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.9116, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09005403241945167, |
|
"grad_norm": 18.805519104003906, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.7643, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10006003602161297, |
|
"grad_norm": 15.599541664123535, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6394, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11006603962377426, |
|
"grad_norm": 15.514196395874023, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 1.6016, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12007204322593557, |
|
"grad_norm": 15.5431547164917, |
|
"learning_rate": 6e-06, |
|
"loss": 1.5851, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13007804682809687, |
|
"grad_norm": 16.450502395629883, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.5076, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.14008405043025815, |
|
"grad_norm": 16.393997192382812, |
|
"learning_rate": 7e-06, |
|
"loss": 1.5487, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15009005403241946, |
|
"grad_norm": 14.165709495544434, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.5365, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.16009605763458074, |
|
"grad_norm": 15.929381370544434, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.5023, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17010206123674204, |
|
"grad_norm": 14.422001838684082, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.3558, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.18010806483890335, |
|
"grad_norm": 13.510339736938477, |
|
"learning_rate": 9e-06, |
|
"loss": 1.3898, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19011406844106463, |
|
"grad_norm": 14.485660552978516, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.4279, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.20012007204322593, |
|
"grad_norm": 14.117327690124512, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3455, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21012607564538724, |
|
"grad_norm": 16.12464714050293, |
|
"learning_rate": 9.944395017793596e-06, |
|
"loss": 1.4, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.22013207924754852, |
|
"grad_norm": 15.304022789001465, |
|
"learning_rate": 9.888790035587188e-06, |
|
"loss": 1.4159, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.23013808284970982, |
|
"grad_norm": 14.668664932250977, |
|
"learning_rate": 9.833185053380784e-06, |
|
"loss": 1.3445, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.24014408645187113, |
|
"grad_norm": 13.041420936584473, |
|
"learning_rate": 9.777580071174379e-06, |
|
"loss": 1.3622, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25015009005403244, |
|
"grad_norm": 15.908055305480957, |
|
"learning_rate": 9.721975088967973e-06, |
|
"loss": 1.3234, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.26015609365619374, |
|
"grad_norm": 13.73078727722168, |
|
"learning_rate": 9.666370106761567e-06, |
|
"loss": 1.2332, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.270162097258355, |
|
"grad_norm": 14.327301979064941, |
|
"learning_rate": 9.610765124555162e-06, |
|
"loss": 1.3042, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.2801681008605163, |
|
"grad_norm": 14.390907287597656, |
|
"learning_rate": 9.555160142348756e-06, |
|
"loss": 1.3216, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2901741044626776, |
|
"grad_norm": 13.917515754699707, |
|
"learning_rate": 9.49955516014235e-06, |
|
"loss": 1.2931, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3001801080648389, |
|
"grad_norm": 15.108023643493652, |
|
"learning_rate": 9.443950177935945e-06, |
|
"loss": 1.3286, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3101861116670002, |
|
"grad_norm": 13.692678451538086, |
|
"learning_rate": 9.388345195729539e-06, |
|
"loss": 1.3057, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.32019211526916147, |
|
"grad_norm": 13.685354232788086, |
|
"learning_rate": 9.332740213523132e-06, |
|
"loss": 1.2402, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3301981188713228, |
|
"grad_norm": 14.591761589050293, |
|
"learning_rate": 9.277135231316726e-06, |
|
"loss": 1.2688, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3402041224734841, |
|
"grad_norm": 15.677751541137695, |
|
"learning_rate": 9.221530249110321e-06, |
|
"loss": 1.3076, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3502101260756454, |
|
"grad_norm": 15.109577178955078, |
|
"learning_rate": 9.165925266903915e-06, |
|
"loss": 1.2141, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3602161296778067, |
|
"grad_norm": 10.552845001220703, |
|
"learning_rate": 9.110320284697509e-06, |
|
"loss": 1.2393, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.370222133279968, |
|
"grad_norm": 12.321894645690918, |
|
"learning_rate": 9.054715302491104e-06, |
|
"loss": 1.2417, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.38022813688212925, |
|
"grad_norm": 13.729790687561035, |
|
"learning_rate": 8.999110320284698e-06, |
|
"loss": 1.2082, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.39023414048429056, |
|
"grad_norm": 13.137016296386719, |
|
"learning_rate": 8.943505338078292e-06, |
|
"loss": 1.2048, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.40024014408645187, |
|
"grad_norm": 12.194613456726074, |
|
"learning_rate": 8.887900355871887e-06, |
|
"loss": 1.2739, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.40024014408645187, |
|
"eval_cer": 61.346116219917825, |
|
"eval_loss": 1.169872522354126, |
|
"eval_runtime": 1744.6409, |
|
"eval_samples_per_second": 2.274, |
|
"eval_steps_per_second": 0.284, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41024614768861317, |
|
"grad_norm": 14.949773788452148, |
|
"learning_rate": 8.832295373665481e-06, |
|
"loss": 1.2059, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.4202521512907745, |
|
"grad_norm": 13.134845733642578, |
|
"learning_rate": 8.776690391459075e-06, |
|
"loss": 1.206, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4302581548929358, |
|
"grad_norm": 15.212950706481934, |
|
"learning_rate": 8.72108540925267e-06, |
|
"loss": 1.2332, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.44026415849509704, |
|
"grad_norm": 14.196298599243164, |
|
"learning_rate": 8.665480427046264e-06, |
|
"loss": 1.1585, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.45027016209725834, |
|
"grad_norm": 14.354203224182129, |
|
"learning_rate": 8.609875444839858e-06, |
|
"loss": 1.1982, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.46027616569941965, |
|
"grad_norm": 14.843274116516113, |
|
"learning_rate": 8.554270462633453e-06, |
|
"loss": 1.1914, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.47028216930158095, |
|
"grad_norm": 13.450782775878906, |
|
"learning_rate": 8.498665480427047e-06, |
|
"loss": 1.1773, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.48028817290374226, |
|
"grad_norm": 12.50664234161377, |
|
"learning_rate": 8.44306049822064e-06, |
|
"loss": 1.277, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.49029417650590357, |
|
"grad_norm": 11.999517440795898, |
|
"learning_rate": 8.387455516014236e-06, |
|
"loss": 1.09, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5003001801080649, |
|
"grad_norm": 14.905177116394043, |
|
"learning_rate": 8.33185053380783e-06, |
|
"loss": 1.1566, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5103061837102262, |
|
"grad_norm": 14.532490730285645, |
|
"learning_rate": 8.276245551601423e-06, |
|
"loss": 1.1516, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5203121873123875, |
|
"grad_norm": 13.656341552734375, |
|
"learning_rate": 8.220640569395019e-06, |
|
"loss": 1.1212, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5303181909145487, |
|
"grad_norm": 12.786870956420898, |
|
"learning_rate": 8.165035587188612e-06, |
|
"loss": 1.0919, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.54032419451671, |
|
"grad_norm": 14.387174606323242, |
|
"learning_rate": 8.109430604982206e-06, |
|
"loss": 1.1252, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5503301981188713, |
|
"grad_norm": 13.124211311340332, |
|
"learning_rate": 8.053825622775802e-06, |
|
"loss": 1.1736, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5603362017210326, |
|
"grad_norm": 14.467448234558105, |
|
"learning_rate": 7.998220640569395e-06, |
|
"loss": 1.0995, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5703422053231939, |
|
"grad_norm": 13.015649795532227, |
|
"learning_rate": 7.94261565836299e-06, |
|
"loss": 1.2058, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.5803482089253552, |
|
"grad_norm": 12.927563667297363, |
|
"learning_rate": 7.887010676156584e-06, |
|
"loss": 1.1048, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5903542125275165, |
|
"grad_norm": 14.437759399414062, |
|
"learning_rate": 7.831405693950178e-06, |
|
"loss": 1.1138, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6003602161296778, |
|
"grad_norm": 13.162938117980957, |
|
"learning_rate": 7.775800711743774e-06, |
|
"loss": 1.08, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6103662197318391, |
|
"grad_norm": 11.896222114562988, |
|
"learning_rate": 7.720195729537367e-06, |
|
"loss": 1.1819, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6203722233340004, |
|
"grad_norm": 17.318538665771484, |
|
"learning_rate": 7.664590747330961e-06, |
|
"loss": 1.124, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6303782269361617, |
|
"grad_norm": 12.138945579528809, |
|
"learning_rate": 7.608985765124556e-06, |
|
"loss": 1.2297, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.6403842305383229, |
|
"grad_norm": 14.958952903747559, |
|
"learning_rate": 7.55338078291815e-06, |
|
"loss": 1.1257, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6503902341404842, |
|
"grad_norm": 12.975215911865234, |
|
"learning_rate": 7.497775800711744e-06, |
|
"loss": 1.0724, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.6603962377426456, |
|
"grad_norm": 14.213400840759277, |
|
"learning_rate": 7.4421708185053385e-06, |
|
"loss": 1.186, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6704022413448069, |
|
"grad_norm": 13.799762725830078, |
|
"learning_rate": 7.386565836298933e-06, |
|
"loss": 1.0768, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.6804082449469682, |
|
"grad_norm": 11.595632553100586, |
|
"learning_rate": 7.330960854092527e-06, |
|
"loss": 1.1245, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6904142485491295, |
|
"grad_norm": 13.799065589904785, |
|
"learning_rate": 7.275355871886121e-06, |
|
"loss": 1.1161, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7004202521512908, |
|
"grad_norm": 12.326783180236816, |
|
"learning_rate": 7.219750889679717e-06, |
|
"loss": 1.0904, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7104262557534521, |
|
"grad_norm": 13.207780838012695, |
|
"learning_rate": 7.16414590747331e-06, |
|
"loss": 1.1031, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7204322593556134, |
|
"grad_norm": 13.019478797912598, |
|
"learning_rate": 7.108540925266904e-06, |
|
"loss": 1.1205, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7304382629577747, |
|
"grad_norm": 12.38670825958252, |
|
"learning_rate": 7.0529359430605e-06, |
|
"loss": 1.0972, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.740444266559936, |
|
"grad_norm": 13.468473434448242, |
|
"learning_rate": 6.9973309608540925e-06, |
|
"loss": 0.9878, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.7504502701620973, |
|
"grad_norm": 12.52755069732666, |
|
"learning_rate": 6.941725978647688e-06, |
|
"loss": 1.0955, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.7604562737642585, |
|
"grad_norm": 13.126026153564453, |
|
"learning_rate": 6.886120996441281e-06, |
|
"loss": 1.1019, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7704622773664198, |
|
"grad_norm": 12.999829292297363, |
|
"learning_rate": 6.830516014234876e-06, |
|
"loss": 1.0055, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.7804682809685811, |
|
"grad_norm": 11.212359428405762, |
|
"learning_rate": 6.774911032028471e-06, |
|
"loss": 0.9664, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.7904742845707424, |
|
"grad_norm": 11.80057144165039, |
|
"learning_rate": 6.7193060498220645e-06, |
|
"loss": 0.9562, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8004802881729037, |
|
"grad_norm": 13.41251277923584, |
|
"learning_rate": 6.663701067615659e-06, |
|
"loss": 1.0283, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8004802881729037, |
|
"eval_cer": 55.51946781451771, |
|
"eval_loss": 1.0144044160842896, |
|
"eval_runtime": 1745.0611, |
|
"eval_samples_per_second": 2.273, |
|
"eval_steps_per_second": 0.284, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.810486291775065, |
|
"grad_norm": 14.396341323852539, |
|
"learning_rate": 6.608096085409254e-06, |
|
"loss": 1.0206, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.8204922953772263, |
|
"grad_norm": 12.45606517791748, |
|
"learning_rate": 6.552491103202847e-06, |
|
"loss": 1.0119, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.8304982989793877, |
|
"grad_norm": 13.013134002685547, |
|
"learning_rate": 6.496886120996442e-06, |
|
"loss": 1.102, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.840504302581549, |
|
"grad_norm": 14.00875473022461, |
|
"learning_rate": 6.4412811387900366e-06, |
|
"loss": 1.0198, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8505103061837103, |
|
"grad_norm": 13.691847801208496, |
|
"learning_rate": 6.38567615658363e-06, |
|
"loss": 1.1056, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.8605163097858716, |
|
"grad_norm": 13.617313385009766, |
|
"learning_rate": 6.330071174377225e-06, |
|
"loss": 1.0866, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.8705223133880328, |
|
"grad_norm": 14.322003364562988, |
|
"learning_rate": 6.2744661921708194e-06, |
|
"loss": 1.0566, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.8805283169901941, |
|
"grad_norm": 13.71176528930664, |
|
"learning_rate": 6.218861209964413e-06, |
|
"loss": 1.1146, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8905343205923554, |
|
"grad_norm": 12.21072006225586, |
|
"learning_rate": 6.163256227758008e-06, |
|
"loss": 1.1476, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9005403241945167, |
|
"grad_norm": 14.359560012817383, |
|
"learning_rate": 6.107651245551602e-06, |
|
"loss": 1.0357, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.910546327796678, |
|
"grad_norm": 13.458608627319336, |
|
"learning_rate": 6.052046263345196e-06, |
|
"loss": 1.0524, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.9205523313988393, |
|
"grad_norm": 13.808725357055664, |
|
"learning_rate": 5.996441281138791e-06, |
|
"loss": 0.9287, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9305583350010006, |
|
"grad_norm": 14.640447616577148, |
|
"learning_rate": 5.940836298932385e-06, |
|
"loss": 1.0611, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.9405643386031619, |
|
"grad_norm": 12.251474380493164, |
|
"learning_rate": 5.885231316725979e-06, |
|
"loss": 1.0569, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.9505703422053232, |
|
"grad_norm": 10.18205451965332, |
|
"learning_rate": 5.8296263345195735e-06, |
|
"loss": 1.0099, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.9605763458074845, |
|
"grad_norm": 12.422398567199707, |
|
"learning_rate": 5.774021352313167e-06, |
|
"loss": 1.119, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9705823494096458, |
|
"grad_norm": 15.4508056640625, |
|
"learning_rate": 5.718416370106762e-06, |
|
"loss": 1.0657, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.9805883530118071, |
|
"grad_norm": 13.556448936462402, |
|
"learning_rate": 5.662811387900356e-06, |
|
"loss": 1.0186, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.9905943566139683, |
|
"grad_norm": 17.107561111450195, |
|
"learning_rate": 5.60720640569395e-06, |
|
"loss": 0.971, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.0006003602161297, |
|
"grad_norm": 11.349823951721191, |
|
"learning_rate": 5.551601423487545e-06, |
|
"loss": 1.0679, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.010606363818291, |
|
"grad_norm": 11.253293991088867, |
|
"learning_rate": 5.495996441281139e-06, |
|
"loss": 0.7139, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.0206123674204524, |
|
"grad_norm": 10.042984008789062, |
|
"learning_rate": 5.440391459074733e-06, |
|
"loss": 0.774, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.0306183710226136, |
|
"grad_norm": 11.355831146240234, |
|
"learning_rate": 5.3847864768683275e-06, |
|
"loss": 0.7949, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.040624374624775, |
|
"grad_norm": 11.004257202148438, |
|
"learning_rate": 5.329181494661922e-06, |
|
"loss": 0.7722, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0506303782269362, |
|
"grad_norm": 11.717720031738281, |
|
"learning_rate": 5.273576512455516e-06, |
|
"loss": 0.7796, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.0606363818290974, |
|
"grad_norm": 12.214451789855957, |
|
"learning_rate": 5.21797153024911e-06, |
|
"loss": 0.7728, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.0706423854312588, |
|
"grad_norm": 11.615367889404297, |
|
"learning_rate": 5.162366548042706e-06, |
|
"loss": 0.7563, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.08064838903342, |
|
"grad_norm": 13.19363021850586, |
|
"learning_rate": 5.106761565836299e-06, |
|
"loss": 0.7958, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.0906543926355814, |
|
"grad_norm": 14.007135391235352, |
|
"learning_rate": 5.051156583629894e-06, |
|
"loss": 0.8891, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.1006603962377426, |
|
"grad_norm": 10.497177124023438, |
|
"learning_rate": 4.995551601423488e-06, |
|
"loss": 0.7156, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.110666399839904, |
|
"grad_norm": 12.282913208007812, |
|
"learning_rate": 4.939946619217082e-06, |
|
"loss": 0.7262, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.1206724034420652, |
|
"grad_norm": 14.421050071716309, |
|
"learning_rate": 4.884341637010677e-06, |
|
"loss": 0.7824, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1306784070442266, |
|
"grad_norm": 13.261626243591309, |
|
"learning_rate": 4.828736654804271e-06, |
|
"loss": 0.7993, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.1406844106463878, |
|
"grad_norm": 11.418912887573242, |
|
"learning_rate": 4.773131672597865e-06, |
|
"loss": 0.7521, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.1506904142485492, |
|
"grad_norm": 14.025506019592285, |
|
"learning_rate": 4.71752669039146e-06, |
|
"loss": 0.8405, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.1606964178507104, |
|
"grad_norm": 12.407489776611328, |
|
"learning_rate": 4.661921708185054e-06, |
|
"loss": 0.7985, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.1707024214528716, |
|
"grad_norm": 11.83797550201416, |
|
"learning_rate": 4.606316725978648e-06, |
|
"loss": 0.799, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.180708425055033, |
|
"grad_norm": 10.337922096252441, |
|
"learning_rate": 4.550711743772243e-06, |
|
"loss": 0.7965, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.1907144286571942, |
|
"grad_norm": 11.562491416931152, |
|
"learning_rate": 4.4951067615658365e-06, |
|
"loss": 0.7836, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.2007204322593557, |
|
"grad_norm": 11.886847496032715, |
|
"learning_rate": 4.439501779359431e-06, |
|
"loss": 0.7925, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2007204322593557, |
|
"eval_cer": 51.82938759538251, |
|
"eval_loss": 0.9463357329368591, |
|
"eval_runtime": 1801.523, |
|
"eval_samples_per_second": 2.202, |
|
"eval_steps_per_second": 0.275, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2107264358615168, |
|
"grad_norm": 10.6748628616333, |
|
"learning_rate": 4.383896797153026e-06, |
|
"loss": 0.7877, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.2207324394636783, |
|
"grad_norm": 11.652464866638184, |
|
"learning_rate": 4.328291814946619e-06, |
|
"loss": 0.7794, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.2307384430658395, |
|
"grad_norm": 11.998939514160156, |
|
"learning_rate": 4.272686832740214e-06, |
|
"loss": 0.8183, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.2407444466680009, |
|
"grad_norm": 13.142699241638184, |
|
"learning_rate": 4.217081850533808e-06, |
|
"loss": 0.7834, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.250750450270162, |
|
"grad_norm": 11.67496395111084, |
|
"learning_rate": 4.161476868327402e-06, |
|
"loss": 0.7594, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.2607564538723235, |
|
"grad_norm": 11.429244995117188, |
|
"learning_rate": 4.105871886120997e-06, |
|
"loss": 0.7963, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.2707624574744847, |
|
"grad_norm": 12.160046577453613, |
|
"learning_rate": 4.0502669039145905e-06, |
|
"loss": 0.7441, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.2807684610766459, |
|
"grad_norm": 12.606410026550293, |
|
"learning_rate": 3.994661921708186e-06, |
|
"loss": 0.8733, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.2907744646788073, |
|
"grad_norm": 14.263989448547363, |
|
"learning_rate": 3.93905693950178e-06, |
|
"loss": 0.7433, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.3007804682809687, |
|
"grad_norm": 11.299212455749512, |
|
"learning_rate": 3.883451957295374e-06, |
|
"loss": 0.789, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.31078647188313, |
|
"grad_norm": 12.999605178833008, |
|
"learning_rate": 3.827846975088969e-06, |
|
"loss": 0.7868, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.320792475485291, |
|
"grad_norm": 10.38305950164795, |
|
"learning_rate": 3.7722419928825625e-06, |
|
"loss": 0.7433, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.3307984790874525, |
|
"grad_norm": 13.05246639251709, |
|
"learning_rate": 3.7166370106761567e-06, |
|
"loss": 0.7547, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.3408044826896137, |
|
"grad_norm": 11.195088386535645, |
|
"learning_rate": 3.661032028469751e-06, |
|
"loss": 0.8126, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.3508104862917751, |
|
"grad_norm": 11.096240997314453, |
|
"learning_rate": 3.605427046263346e-06, |
|
"loss": 0.743, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.3608164898939363, |
|
"grad_norm": 13.594226837158203, |
|
"learning_rate": 3.5498220640569395e-06, |
|
"loss": 0.7965, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.3708224934960977, |
|
"grad_norm": 15.316413879394531, |
|
"learning_rate": 3.4942170818505337e-06, |
|
"loss": 0.7956, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.380828497098259, |
|
"grad_norm": 12.78977108001709, |
|
"learning_rate": 3.4386120996441287e-06, |
|
"loss": 0.7693, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.3908345007004201, |
|
"grad_norm": 12.662712097167969, |
|
"learning_rate": 3.383007117437723e-06, |
|
"loss": 0.7768, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.4008405043025816, |
|
"grad_norm": 8.803949356079102, |
|
"learning_rate": 3.327402135231317e-06, |
|
"loss": 0.7622, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.410846507904743, |
|
"grad_norm": 13.736053466796875, |
|
"learning_rate": 3.2717971530249116e-06, |
|
"loss": 0.8152, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.4208525115069042, |
|
"grad_norm": 12.255024909973145, |
|
"learning_rate": 3.2161921708185057e-06, |
|
"loss": 0.8149, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.4308585151090654, |
|
"grad_norm": 12.75201416015625, |
|
"learning_rate": 3.1605871886121e-06, |
|
"loss": 0.7471, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.4408645187112268, |
|
"grad_norm": 13.30036449432373, |
|
"learning_rate": 3.1049822064056944e-06, |
|
"loss": 0.6892, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.450870522313388, |
|
"grad_norm": 10.946511268615723, |
|
"learning_rate": 3.0493772241992886e-06, |
|
"loss": 0.7916, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.4608765259155494, |
|
"grad_norm": 10.852522850036621, |
|
"learning_rate": 2.9937722419928827e-06, |
|
"loss": 0.7329, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.4708825295177106, |
|
"grad_norm": 11.466883659362793, |
|
"learning_rate": 2.938167259786477e-06, |
|
"loss": 0.7588, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.480888533119872, |
|
"grad_norm": 12.728093147277832, |
|
"learning_rate": 2.8825622775800715e-06, |
|
"loss": 0.7527, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.4908945367220332, |
|
"grad_norm": 9.343868255615234, |
|
"learning_rate": 2.8269572953736656e-06, |
|
"loss": 0.7451, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.5009005403241944, |
|
"grad_norm": 14.047112464904785, |
|
"learning_rate": 2.7713523131672598e-06, |
|
"loss": 0.7905, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.5109065439263558, |
|
"grad_norm": 10.599916458129883, |
|
"learning_rate": 2.7157473309608543e-06, |
|
"loss": 0.7819, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.5209125475285172, |
|
"grad_norm": 14.341135025024414, |
|
"learning_rate": 2.6601423487544485e-06, |
|
"loss": 0.7166, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.5309185511306784, |
|
"grad_norm": 11.71387767791748, |
|
"learning_rate": 2.6045373665480426e-06, |
|
"loss": 0.7386, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.5409245547328396, |
|
"grad_norm": 12.604011535644531, |
|
"learning_rate": 2.5489323843416376e-06, |
|
"loss": 0.6969, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.550930558335001, |
|
"grad_norm": 9.848773002624512, |
|
"learning_rate": 2.4933274021352318e-06, |
|
"loss": 0.8356, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.5609365619371625, |
|
"grad_norm": 14.05534553527832, |
|
"learning_rate": 2.437722419928826e-06, |
|
"loss": 0.7282, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.5709425655393234, |
|
"grad_norm": 12.9791259765625, |
|
"learning_rate": 2.38211743772242e-06, |
|
"loss": 0.7059, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.5809485691414848, |
|
"grad_norm": 10.708452224731445, |
|
"learning_rate": 2.3265124555160142e-06, |
|
"loss": 0.7712, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.5909545727436463, |
|
"grad_norm": 13.361218452453613, |
|
"learning_rate": 2.270907473309609e-06, |
|
"loss": 0.719, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.6009605763458075, |
|
"grad_norm": 11.740647315979004, |
|
"learning_rate": 2.215302491103203e-06, |
|
"loss": 0.8262, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6009605763458075, |
|
"eval_cer": 51.82156133828997, |
|
"eval_loss": 0.9109482169151306, |
|
"eval_runtime": 1918.0204, |
|
"eval_samples_per_second": 2.068, |
|
"eval_steps_per_second": 0.259, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 4996, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"total_flos": 1.846744552267776e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|