|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.21892377074302727, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00043784754148605456, |
|
"grad_norm": 11.96724490271355, |
|
"learning_rate": 7.519033870117711e-06, |
|
"loss": 0.682, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0008756950829721091, |
|
"grad_norm": 16.377854891876865, |
|
"learning_rate": 9.782488603436574e-06, |
|
"loss": 0.5773, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0013135426244581636, |
|
"grad_norm": 19.097945880780852, |
|
"learning_rate": 1.1106524744507912e-05, |
|
"loss": 0.4648, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0017513901659442182, |
|
"grad_norm": 17.780105205349074, |
|
"learning_rate": 1.2045943336755435e-05, |
|
"loss": 0.5231, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.002189237707430273, |
|
"grad_norm": 11.341588174257303, |
|
"learning_rate": 1.2774613006916558e-05, |
|
"loss": 0.373, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.002627085248916327, |
|
"grad_norm": 12.468061614573642, |
|
"learning_rate": 1.3369979477826773e-05, |
|
"loss": 0.4499, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.003064932790402382, |
|
"grad_norm": 14.844241870962383, |
|
"learning_rate": 1.3873354656555003e-05, |
|
"loss": 0.4481, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0035027803318884365, |
|
"grad_norm": 13.17618773461328, |
|
"learning_rate": 1.4268322372265782e-05, |
|
"loss": 0.4385, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.003940627873374491, |
|
"grad_norm": 13.959456456486652, |
|
"learning_rate": 1.4657529486032087e-05, |
|
"loss": 0.5157, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.004378475414860546, |
|
"grad_norm": 9.088640068563528, |
|
"learning_rate": 1.5005248620577926e-05, |
|
"loss": 0.5372, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0048163229563466005, |
|
"grad_norm": 8.985973448159925, |
|
"learning_rate": 1.531947884589086e-05, |
|
"loss": 0.3981, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.005254170497832654, |
|
"grad_norm": 16.03083595499672, |
|
"learning_rate": 1.5606107901730336e-05, |
|
"loss": 0.4008, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.005692018039318709, |
|
"grad_norm": 23.2563822200454, |
|
"learning_rate": 1.586959551766198e-05, |
|
"loss": 0.4434, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.006129865580804764, |
|
"grad_norm": 13.106170808319108, |
|
"learning_rate": 1.611340086727408e-05, |
|
"loss": 0.4264, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.006567713122290818, |
|
"grad_norm": 10.892983486327262, |
|
"learning_rate": 1.634026115826661e-05, |
|
"loss": 0.538, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.007005560663776873, |
|
"grad_norm": 15.273372562202105, |
|
"learning_rate": 1.6531777105584646e-05, |
|
"loss": 0.4921, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.007443408205262928, |
|
"grad_norm": 12.078764233569435, |
|
"learning_rate": 1.6732175860784077e-05, |
|
"loss": 0.4219, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.007881255746748982, |
|
"grad_norm": 18.9218120999161, |
|
"learning_rate": 1.692098421935095e-05, |
|
"loss": 0.51, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.008319103288235036, |
|
"grad_norm": 11.062242407951706, |
|
"learning_rate": 1.7099469894607657e-05, |
|
"loss": 0.4021, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.008756950829721092, |
|
"grad_norm": 10.4039117059008, |
|
"learning_rate": 1.726870335389679e-05, |
|
"loss": 0.414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.009194798371207145, |
|
"grad_norm": 13.360369800315105, |
|
"learning_rate": 1.742959672866302e-05, |
|
"loss": 0.3934, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.009632645912693201, |
|
"grad_norm": 17.885935179522868, |
|
"learning_rate": 1.7582933579209726e-05, |
|
"loss": 0.4299, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.010070493454179255, |
|
"grad_norm": 15.613293466188615, |
|
"learning_rate": 1.7729391978127236e-05, |
|
"loss": 0.4348, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.010508340995665309, |
|
"grad_norm": 8.671536275760825, |
|
"learning_rate": 1.7869562635049198e-05, |
|
"loss": 0.4612, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.010946188537151364, |
|
"grad_norm": 16.1819762006921, |
|
"learning_rate": 1.8003963288492603e-05, |
|
"loss": 0.3756, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.011384036078637418, |
|
"grad_norm": 13.36774911152187, |
|
"learning_rate": 1.813305025098084e-05, |
|
"loss": 0.4427, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.011821883620123473, |
|
"grad_norm": 10.629955258208, |
|
"learning_rate": 1.8257227757513754e-05, |
|
"loss": 0.4213, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.012259731161609527, |
|
"grad_norm": 19.332750684355226, |
|
"learning_rate": 1.8376855600592943e-05, |
|
"loss": 0.346, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.012697578703095583, |
|
"grad_norm": 16.066310801594668, |
|
"learning_rate": 1.8492255415374714e-05, |
|
"loss": 0.4522, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.013135426244581637, |
|
"grad_norm": 14.844884825041587, |
|
"learning_rate": 1.860371589158547e-05, |
|
"loss": 0.4505, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01357327378606769, |
|
"grad_norm": 16.539413080837956, |
|
"learning_rate": 1.8711497124872535e-05, |
|
"loss": 0.4296, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.014011121327553746, |
|
"grad_norm": 11.178304803994907, |
|
"learning_rate": 1.8815834272664066e-05, |
|
"loss": 0.4404, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0144489688690398, |
|
"grad_norm": 9.171001542204067, |
|
"learning_rate": 1.8916940643811347e-05, |
|
"loss": 0.3785, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.014886816410525855, |
|
"grad_norm": 18.255820822315396, |
|
"learning_rate": 1.9015010324094007e-05, |
|
"loss": 0.4524, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.015324663952011909, |
|
"grad_norm": 13.044007567571079, |
|
"learning_rate": 1.911022041882251e-05, |
|
"loss": 0.4876, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.015762511493497965, |
|
"grad_norm": 13.758756866189971, |
|
"learning_rate": 1.9202732977654023e-05, |
|
"loss": 0.5539, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.01620035903498402, |
|
"grad_norm": 15.903726373816992, |
|
"learning_rate": 1.929269665417383e-05, |
|
"loss": 0.4976, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.016638206576470072, |
|
"grad_norm": 15.248268904693534, |
|
"learning_rate": 1.938024814292676e-05, |
|
"loss": 0.5259, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.017076054117956128, |
|
"grad_norm": 10.722522838992456, |
|
"learning_rate": 1.9465513428778125e-05, |
|
"loss": 0.489, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.017513901659442183, |
|
"grad_norm": 16.61965558187522, |
|
"learning_rate": 1.9548608877267744e-05, |
|
"loss": 0.439, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.017951749200928235, |
|
"grad_norm": 19.548934962148113, |
|
"learning_rate": 1.9629642189639832e-05, |
|
"loss": 0.4282, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.01838959674241429, |
|
"grad_norm": 15.936747454659427, |
|
"learning_rate": 1.9708713242215694e-05, |
|
"loss": 0.3974, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.018827444283900346, |
|
"grad_norm": 17.53178788446236, |
|
"learning_rate": 1.9785914826520243e-05, |
|
"loss": 0.4517, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.019265291825386402, |
|
"grad_norm": 15.309413053796602, |
|
"learning_rate": 1.9861333303919378e-05, |
|
"loss": 0.4155, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.019703139366872454, |
|
"grad_norm": 13.248113533897241, |
|
"learning_rate": 1.9935049186350462e-05, |
|
"loss": 0.4589, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.02014098690835851, |
|
"grad_norm": 18.437663863148952, |
|
"learning_rate": 2e-05, |
|
"loss": 0.388, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.020578834449844565, |
|
"grad_norm": 14.089094860711976, |
|
"learning_rate": 1.9995577276044317e-05, |
|
"loss": 0.4637, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.021016681991330617, |
|
"grad_norm": 50.22724968906943, |
|
"learning_rate": 1.9991154552088635e-05, |
|
"loss": 0.3967, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.021454529532816673, |
|
"grad_norm": 9.448811402831021, |
|
"learning_rate": 1.998673182813295e-05, |
|
"loss": 0.405, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.021892377074302728, |
|
"grad_norm": 12.511886438347087, |
|
"learning_rate": 1.9982309104177265e-05, |
|
"loss": 0.4245, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.022330224615788784, |
|
"grad_norm": 26.23559407530874, |
|
"learning_rate": 1.997788638022158e-05, |
|
"loss": 0.4486, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.022768072157274836, |
|
"grad_norm": 10.162620448000466, |
|
"learning_rate": 1.9973463656265898e-05, |
|
"loss": 0.4898, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.02320591969876089, |
|
"grad_norm": 10.505426367087482, |
|
"learning_rate": 1.996904093231021e-05, |
|
"loss": 0.4011, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.023643767240246947, |
|
"grad_norm": 7.19153886182727, |
|
"learning_rate": 1.9964618208354528e-05, |
|
"loss": 0.4566, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.024081614781733, |
|
"grad_norm": 16.625591066795565, |
|
"learning_rate": 1.9960195484398842e-05, |
|
"loss": 0.3865, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.024519462323219055, |
|
"grad_norm": 21.196277881857508, |
|
"learning_rate": 1.995577276044316e-05, |
|
"loss": 0.3797, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.02495730986470511, |
|
"grad_norm": 19.530616395972775, |
|
"learning_rate": 1.9951350036487472e-05, |
|
"loss": 0.4596, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.025395157406191166, |
|
"grad_norm": 10.52123656050975, |
|
"learning_rate": 1.994692731253179e-05, |
|
"loss": 0.3836, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.025833004947677218, |
|
"grad_norm": 14.373796230168502, |
|
"learning_rate": 1.9942504588576105e-05, |
|
"loss": 0.4916, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.026270852489163273, |
|
"grad_norm": 11.25970447321859, |
|
"learning_rate": 1.9938081864620424e-05, |
|
"loss": 0.3832, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02670870003064933, |
|
"grad_norm": 16.21224392326194, |
|
"learning_rate": 1.9933659140664735e-05, |
|
"loss": 0.4749, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.02714654757213538, |
|
"grad_norm": 14.71066366579976, |
|
"learning_rate": 1.9929236416709053e-05, |
|
"loss": 0.3882, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.027584395113621436, |
|
"grad_norm": 11.37877349304637, |
|
"learning_rate": 1.992481369275337e-05, |
|
"loss": 0.4396, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.028022242655107492, |
|
"grad_norm": 16.754438580812046, |
|
"learning_rate": 1.9920390968797683e-05, |
|
"loss": 0.4539, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.028460090196593547, |
|
"grad_norm": 11.479142887670093, |
|
"learning_rate": 1.9915968244841998e-05, |
|
"loss": 0.451, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0288979377380796, |
|
"grad_norm": 8.382864831937072, |
|
"learning_rate": 1.9911545520886316e-05, |
|
"loss": 0.4716, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.029335785279565655, |
|
"grad_norm": 19.710550432188437, |
|
"learning_rate": 1.990712279693063e-05, |
|
"loss": 0.3545, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.02977363282105171, |
|
"grad_norm": 17.903211831860908, |
|
"learning_rate": 1.9902700072974946e-05, |
|
"loss": 0.4335, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.030211480362537766, |
|
"grad_norm": 16.390743229733264, |
|
"learning_rate": 1.989827734901926e-05, |
|
"loss": 0.3806, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.030649327904023818, |
|
"grad_norm": 14.980896037745874, |
|
"learning_rate": 1.989385462506358e-05, |
|
"loss": 0.3767, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.031087175445509874, |
|
"grad_norm": 11.397175060743534, |
|
"learning_rate": 1.9889431901107894e-05, |
|
"loss": 0.4746, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.03152502298699593, |
|
"grad_norm": 19.04061884539536, |
|
"learning_rate": 1.988500917715221e-05, |
|
"loss": 0.413, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.031962870528481985, |
|
"grad_norm": 13.947751231087285, |
|
"learning_rate": 1.9880586453196527e-05, |
|
"loss": 0.3386, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.03240071806996804, |
|
"grad_norm": 41.64839133720008, |
|
"learning_rate": 1.987616372924084e-05, |
|
"loss": 0.4444, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.03283856561145409, |
|
"grad_norm": 13.185903732811664, |
|
"learning_rate": 1.9871741005285157e-05, |
|
"loss": 0.4383, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.033276413152940144, |
|
"grad_norm": 15.456378713797612, |
|
"learning_rate": 1.9867318281329472e-05, |
|
"loss": 0.4087, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.0337142606944262, |
|
"grad_norm": 13.751468649371544, |
|
"learning_rate": 1.986289555737379e-05, |
|
"loss": 0.4245, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.034152108235912255, |
|
"grad_norm": 15.975026190651212, |
|
"learning_rate": 1.9858472833418102e-05, |
|
"loss": 0.4263, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.03458995577739831, |
|
"grad_norm": 11.294733923572386, |
|
"learning_rate": 1.985405010946242e-05, |
|
"loss": 0.5273, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.03502780331888437, |
|
"grad_norm": 22.907563592109977, |
|
"learning_rate": 1.9849627385506735e-05, |
|
"loss": 0.3766, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03546565086037042, |
|
"grad_norm": 19.465013466925168, |
|
"learning_rate": 1.9845204661551053e-05, |
|
"loss": 0.3739, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.03590349840185647, |
|
"grad_norm": 13.553846560729426, |
|
"learning_rate": 1.9840781937595365e-05, |
|
"loss": 0.481, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.036341345943342526, |
|
"grad_norm": 22.910094642393723, |
|
"learning_rate": 1.9836359213639683e-05, |
|
"loss": 0.4659, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.03677919348482858, |
|
"grad_norm": 16.26462964503755, |
|
"learning_rate": 1.9831936489683998e-05, |
|
"loss": 0.381, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.03721704102631464, |
|
"grad_norm": 9.94980804411553, |
|
"learning_rate": 1.9827513765728313e-05, |
|
"loss": 0.4003, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.03765488856780069, |
|
"grad_norm": 13.328592093222, |
|
"learning_rate": 1.9823091041772628e-05, |
|
"loss": 0.3755, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.03809273610928675, |
|
"grad_norm": 18.322055654010875, |
|
"learning_rate": 1.9818668317816946e-05, |
|
"loss": 0.494, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.038530583650772804, |
|
"grad_norm": 12.090527797691786, |
|
"learning_rate": 1.981424559386126e-05, |
|
"loss": 0.4751, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.03896843119225885, |
|
"grad_norm": 19.070570141131398, |
|
"learning_rate": 1.9809822869905576e-05, |
|
"loss": 0.4483, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.03940627873374491, |
|
"grad_norm": 18.183543820420173, |
|
"learning_rate": 1.980540014594989e-05, |
|
"loss": 0.3881, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.039844126275230964, |
|
"grad_norm": 11.47870821074828, |
|
"learning_rate": 1.980097742199421e-05, |
|
"loss": 0.465, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.04028197381671702, |
|
"grad_norm": 4.662342565910864, |
|
"learning_rate": 1.9796554698038524e-05, |
|
"loss": 0.3553, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.040719821358203075, |
|
"grad_norm": 11.599443461780693, |
|
"learning_rate": 1.979213197408284e-05, |
|
"loss": 0.4284, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.04115766889968913, |
|
"grad_norm": 20.899400532717515, |
|
"learning_rate": 1.9787709250127154e-05, |
|
"loss": 0.3849, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.041595516441175186, |
|
"grad_norm": 13.83963592226824, |
|
"learning_rate": 1.978328652617147e-05, |
|
"loss": 0.4228, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.042033363982661234, |
|
"grad_norm": 10.129058777896109, |
|
"learning_rate": 1.9778863802215787e-05, |
|
"loss": 0.4608, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.04247121152414729, |
|
"grad_norm": 13.525629087473678, |
|
"learning_rate": 1.9774441078260102e-05, |
|
"loss": 0.4168, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.042909059065633345, |
|
"grad_norm": 12.286627992129988, |
|
"learning_rate": 1.9770018354304417e-05, |
|
"loss": 0.3782, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.0433469066071194, |
|
"grad_norm": 15.201916543698117, |
|
"learning_rate": 1.976559563034873e-05, |
|
"loss": 0.4614, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.043784754148605456, |
|
"grad_norm": 12.218728770361992, |
|
"learning_rate": 1.976117290639305e-05, |
|
"loss": 0.3442, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04422260169009151, |
|
"grad_norm": 15.316130787659379, |
|
"learning_rate": 1.9756750182437365e-05, |
|
"loss": 0.424, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.04466044923157757, |
|
"grad_norm": 18.53714384430074, |
|
"learning_rate": 1.975276973087725e-05, |
|
"loss": 0.44, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.045098296773063616, |
|
"grad_norm": 10.148359314556405, |
|
"learning_rate": 1.9748347006921564e-05, |
|
"loss": 0.4016, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.04553614431454967, |
|
"grad_norm": 10.835539686055602, |
|
"learning_rate": 1.974392428296588e-05, |
|
"loss": 0.3899, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.04597399185603573, |
|
"grad_norm": 13.894807990285942, |
|
"learning_rate": 1.9739501559010197e-05, |
|
"loss": 0.4385, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.04641183939752178, |
|
"grad_norm": 13.530858128888262, |
|
"learning_rate": 1.9735078835054512e-05, |
|
"loss": 0.4324, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.04684968693900784, |
|
"grad_norm": 12.011714939023786, |
|
"learning_rate": 1.9730656111098827e-05, |
|
"loss": 0.4168, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.047287534480493894, |
|
"grad_norm": 11.289218951662432, |
|
"learning_rate": 1.9726233387143142e-05, |
|
"loss": 0.4438, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.04772538202197995, |
|
"grad_norm": 13.657029328270559, |
|
"learning_rate": 1.972181066318746e-05, |
|
"loss": 0.3996, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.048163229563466, |
|
"grad_norm": 8.72748000086665, |
|
"learning_rate": 1.9717387939231775e-05, |
|
"loss": 0.4578, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04860107710495205, |
|
"grad_norm": 10.975240759500627, |
|
"learning_rate": 1.971296521527609e-05, |
|
"loss": 0.4165, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.04903892464643811, |
|
"grad_norm": 11.29698373153932, |
|
"learning_rate": 1.9708542491320405e-05, |
|
"loss": 0.3545, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.049476772187924165, |
|
"grad_norm": 20.506288633859523, |
|
"learning_rate": 1.9704119767364723e-05, |
|
"loss": 0.3568, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.04991461972941022, |
|
"grad_norm": 11.698600515801893, |
|
"learning_rate": 1.9699697043409035e-05, |
|
"loss": 0.4479, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.050352467270896276, |
|
"grad_norm": 13.862971410140013, |
|
"learning_rate": 1.9695274319453353e-05, |
|
"loss": 0.3974, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.05079031481238233, |
|
"grad_norm": 12.469196053464175, |
|
"learning_rate": 1.9690851595497668e-05, |
|
"loss": 0.4402, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.05122816235386838, |
|
"grad_norm": 11.3264424935876, |
|
"learning_rate": 1.9686428871541986e-05, |
|
"loss": 0.3643, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.051666009895354435, |
|
"grad_norm": 19.974826992464948, |
|
"learning_rate": 1.9682006147586298e-05, |
|
"loss": 0.3762, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.05210385743684049, |
|
"grad_norm": 10.918494229304311, |
|
"learning_rate": 1.9677583423630616e-05, |
|
"loss": 0.3545, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.052541704978326546, |
|
"grad_norm": 11.568181801830196, |
|
"learning_rate": 1.967316069967493e-05, |
|
"loss": 0.4007, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0529795525198126, |
|
"grad_norm": 17.075179869568725, |
|
"learning_rate": 1.966873797571925e-05, |
|
"loss": 0.3989, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.05341740006129866, |
|
"grad_norm": 15.560201574750726, |
|
"learning_rate": 1.9664315251763564e-05, |
|
"loss": 0.4096, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.05385524760278471, |
|
"grad_norm": 11.91165895325058, |
|
"learning_rate": 1.965989252780788e-05, |
|
"loss": 0.436, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.05429309514427076, |
|
"grad_norm": 11.127002664395162, |
|
"learning_rate": 1.9655469803852194e-05, |
|
"loss": 0.3146, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.05473094268575682, |
|
"grad_norm": 17.968378053215634, |
|
"learning_rate": 1.965104707989651e-05, |
|
"loss": 0.444, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.05516879022724287, |
|
"grad_norm": 15.278957202738837, |
|
"learning_rate": 1.9646624355940827e-05, |
|
"loss": 0.461, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.05560663776872893, |
|
"grad_norm": 14.312647123288594, |
|
"learning_rate": 1.9642201631985142e-05, |
|
"loss": 0.398, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.056044485310214984, |
|
"grad_norm": 12.143629725906578, |
|
"learning_rate": 1.9637778908029457e-05, |
|
"loss": 0.3818, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.05648233285170104, |
|
"grad_norm": 10.960126916192301, |
|
"learning_rate": 1.9633356184073772e-05, |
|
"loss": 0.4273, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.056920180393187095, |
|
"grad_norm": 14.260248079643242, |
|
"learning_rate": 1.962893346011809e-05, |
|
"loss": 0.469, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.05735802793467314, |
|
"grad_norm": 9.509984040352219, |
|
"learning_rate": 1.9624510736162405e-05, |
|
"loss": 0.3732, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.0577958754761592, |
|
"grad_norm": 12.88397674204099, |
|
"learning_rate": 1.962008801220672e-05, |
|
"loss": 0.4473, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.058233723017645254, |
|
"grad_norm": 13.708354791471775, |
|
"learning_rate": 1.9615665288251035e-05, |
|
"loss": 0.4093, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.05867157055913131, |
|
"grad_norm": 11.009628243610226, |
|
"learning_rate": 1.9611242564295353e-05, |
|
"loss": 0.4258, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.059109418100617366, |
|
"grad_norm": 15.94608664981347, |
|
"learning_rate": 1.9606819840339664e-05, |
|
"loss": 0.4117, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.05954726564210342, |
|
"grad_norm": 8.631681646096013, |
|
"learning_rate": 1.9602397116383983e-05, |
|
"loss": 0.4384, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.05998511318358948, |
|
"grad_norm": 12.838858533847487, |
|
"learning_rate": 1.9597974392428298e-05, |
|
"loss": 0.3764, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.06042296072507553, |
|
"grad_norm": 41.39191821545224, |
|
"learning_rate": 1.9593551668472616e-05, |
|
"loss": 0.3166, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.06086080826656158, |
|
"grad_norm": 13.630700275122859, |
|
"learning_rate": 1.9589128944516927e-05, |
|
"loss": 0.4142, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.061298655808047636, |
|
"grad_norm": 10.941493062958882, |
|
"learning_rate": 1.9584706220561246e-05, |
|
"loss": 0.393, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.06173650334953369, |
|
"grad_norm": 12.209335407537546, |
|
"learning_rate": 1.958028349660556e-05, |
|
"loss": 0.3882, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.06217435089101975, |
|
"grad_norm": 11.160854212292483, |
|
"learning_rate": 1.957586077264988e-05, |
|
"loss": 0.3571, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.0626121984325058, |
|
"grad_norm": 15.758848096703037, |
|
"learning_rate": 1.957143804869419e-05, |
|
"loss": 0.4188, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.06305004597399186, |
|
"grad_norm": 14.95914949335199, |
|
"learning_rate": 1.956701532473851e-05, |
|
"loss": 0.3811, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.06348789351547791, |
|
"grad_norm": 12.602990028799342, |
|
"learning_rate": 1.9562592600782824e-05, |
|
"loss": 0.351, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.06392574105696397, |
|
"grad_norm": 11.138632661742742, |
|
"learning_rate": 1.955816987682714e-05, |
|
"loss": 0.3217, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.06436358859845003, |
|
"grad_norm": 10.073650366260493, |
|
"learning_rate": 1.9553747152871457e-05, |
|
"loss": 0.3915, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.06480143613993608, |
|
"grad_norm": 11.607027332443641, |
|
"learning_rate": 1.954932442891577e-05, |
|
"loss": 0.4501, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.06523928368142212, |
|
"grad_norm": 11.6963320729176, |
|
"learning_rate": 1.9544901704960087e-05, |
|
"loss": 0.3876, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.06567713122290818, |
|
"grad_norm": 12.978277397409164, |
|
"learning_rate": 1.95404789810044e-05, |
|
"loss": 0.4058, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06611497876439423, |
|
"grad_norm": 10.230356140798722, |
|
"learning_rate": 1.953605625704872e-05, |
|
"loss": 0.3503, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.06655282630588029, |
|
"grad_norm": 10.411279694304964, |
|
"learning_rate": 1.953163353309303e-05, |
|
"loss": 0.4545, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.06699067384736634, |
|
"grad_norm": 12.48524397593703, |
|
"learning_rate": 1.952721080913735e-05, |
|
"loss": 0.4056, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.0674285213888524, |
|
"grad_norm": 9.054498757739506, |
|
"learning_rate": 1.9522788085181664e-05, |
|
"loss": 0.422, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.06786636893033846, |
|
"grad_norm": 12.468833215574444, |
|
"learning_rate": 1.9518365361225983e-05, |
|
"loss": 0.3668, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.06830421647182451, |
|
"grad_norm": 7.37855108081838, |
|
"learning_rate": 1.9513942637270294e-05, |
|
"loss": 0.3418, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.06874206401331057, |
|
"grad_norm": 18.087926793945318, |
|
"learning_rate": 1.9509519913314612e-05, |
|
"loss": 0.4362, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.06917991155479662, |
|
"grad_norm": 9.463333040806022, |
|
"learning_rate": 1.9505097189358927e-05, |
|
"loss": 0.351, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.06961775909628268, |
|
"grad_norm": 13.249219763473633, |
|
"learning_rate": 1.9500674465403246e-05, |
|
"loss": 0.3334, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.07005560663776873, |
|
"grad_norm": 9.494154085119547, |
|
"learning_rate": 1.9496251741447557e-05, |
|
"loss": 0.3766, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07049345417925479, |
|
"grad_norm": 12.84431192695635, |
|
"learning_rate": 1.9491829017491875e-05, |
|
"loss": 0.3904, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.07093130172074084, |
|
"grad_norm": 10.983543223419419, |
|
"learning_rate": 1.948740629353619e-05, |
|
"loss": 0.4181, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.07136914926222689, |
|
"grad_norm": 17.280817340311735, |
|
"learning_rate": 1.948298356958051e-05, |
|
"loss": 0.3994, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.07180699680371294, |
|
"grad_norm": 11.179902123311116, |
|
"learning_rate": 1.947856084562482e-05, |
|
"loss": 0.41, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.072244844345199, |
|
"grad_norm": 7.022304150684218, |
|
"learning_rate": 1.947413812166914e-05, |
|
"loss": 0.3959, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.07268269188668505, |
|
"grad_norm": 11.715877252052287, |
|
"learning_rate": 1.9469715397713453e-05, |
|
"loss": 0.3781, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.07312053942817111, |
|
"grad_norm": 10.5240917837852, |
|
"learning_rate": 1.9465292673757768e-05, |
|
"loss": 0.4285, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.07355838696965716, |
|
"grad_norm": 13.043269992216052, |
|
"learning_rate": 1.9460869949802083e-05, |
|
"loss": 0.3953, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.07399623451114322, |
|
"grad_norm": 10.473522043776144, |
|
"learning_rate": 1.94564472258464e-05, |
|
"loss": 0.3795, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.07443408205262927, |
|
"grad_norm": 12.81124231632124, |
|
"learning_rate": 1.9452024501890716e-05, |
|
"loss": 0.4329, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.07487192959411533, |
|
"grad_norm": 9.570293367327814, |
|
"learning_rate": 1.944760177793503e-05, |
|
"loss": 0.4536, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.07530977713560139, |
|
"grad_norm": 13.781425492257062, |
|
"learning_rate": 1.944317905397935e-05, |
|
"loss": 0.3918, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.07574762467708744, |
|
"grad_norm": 13.427510367449685, |
|
"learning_rate": 1.943875633002366e-05, |
|
"loss": 0.3379, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.0761854722185735, |
|
"grad_norm": 12.599200486748096, |
|
"learning_rate": 1.943433360606798e-05, |
|
"loss": 0.3939, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.07662331976005955, |
|
"grad_norm": 14.384131674124605, |
|
"learning_rate": 1.9429910882112294e-05, |
|
"loss": 0.405, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.07706116730154561, |
|
"grad_norm": 9.414388690536624, |
|
"learning_rate": 1.9425488158156612e-05, |
|
"loss": 0.4003, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.07749901484303165, |
|
"grad_norm": 18.045324270495772, |
|
"learning_rate": 1.9421065434200924e-05, |
|
"loss": 0.4676, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.0779368623845177, |
|
"grad_norm": 11.920491989530506, |
|
"learning_rate": 1.9416642710245242e-05, |
|
"loss": 0.3452, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.07837470992600376, |
|
"grad_norm": 13.25199040272023, |
|
"learning_rate": 1.9412219986289557e-05, |
|
"loss": 0.3281, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.07881255746748982, |
|
"grad_norm": 18.052479006801686, |
|
"learning_rate": 1.9407797262333875e-05, |
|
"loss": 0.393, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07925040500897587, |
|
"grad_norm": 9.88776702327391, |
|
"learning_rate": 1.9403374538378187e-05, |
|
"loss": 0.3531, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.07968825255046193, |
|
"grad_norm": 8.54763684547313, |
|
"learning_rate": 1.9398951814422505e-05, |
|
"loss": 0.4018, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.08012610009194798, |
|
"grad_norm": 12.109050524886657, |
|
"learning_rate": 1.939452909046682e-05, |
|
"loss": 0.4658, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.08056394763343404, |
|
"grad_norm": 7.7518635631951485, |
|
"learning_rate": 1.9390106366511138e-05, |
|
"loss": 0.4297, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.0810017951749201, |
|
"grad_norm": 14.594279048895539, |
|
"learning_rate": 1.938568364255545e-05, |
|
"loss": 0.355, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.08143964271640615, |
|
"grad_norm": 11.417142166667903, |
|
"learning_rate": 1.9381260918599768e-05, |
|
"loss": 0.392, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.0818774902578922, |
|
"grad_norm": 11.637581528522489, |
|
"learning_rate": 1.9376838194644083e-05, |
|
"loss": 0.3802, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.08231533779937826, |
|
"grad_norm": 11.85655948956895, |
|
"learning_rate": 1.9372415470688398e-05, |
|
"loss": 0.3977, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.08275318534086432, |
|
"grad_norm": 10.54522592721261, |
|
"learning_rate": 1.9367992746732713e-05, |
|
"loss": 0.3971, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.08319103288235037, |
|
"grad_norm": 11.259013994047795, |
|
"learning_rate": 1.936357002277703e-05, |
|
"loss": 0.4662, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.08362888042383643, |
|
"grad_norm": 10.413681904734188, |
|
"learning_rate": 1.9359147298821346e-05, |
|
"loss": 0.2992, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.08406672796532247, |
|
"grad_norm": 16.01143047206335, |
|
"learning_rate": 1.935472457486566e-05, |
|
"loss": 0.4038, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.08450457550680852, |
|
"grad_norm": 8.758483697657311, |
|
"learning_rate": 1.9350301850909976e-05, |
|
"loss": 0.3402, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.08494242304829458, |
|
"grad_norm": 13.47768507552826, |
|
"learning_rate": 1.934587912695429e-05, |
|
"loss": 0.3971, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.08538027058978064, |
|
"grad_norm": 14.13100123568219, |
|
"learning_rate": 1.934145640299861e-05, |
|
"loss": 0.3485, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.08581811813126669, |
|
"grad_norm": 11.367957313924562, |
|
"learning_rate": 1.9337033679042924e-05, |
|
"loss": 0.3561, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.08625596567275275, |
|
"grad_norm": 10.362775839894933, |
|
"learning_rate": 1.933261095508724e-05, |
|
"loss": 0.3037, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.0866938132142388, |
|
"grad_norm": 14.232027197294531, |
|
"learning_rate": 1.9328188231131554e-05, |
|
"loss": 0.3772, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.08713166075572486, |
|
"grad_norm": 7.416354083499904, |
|
"learning_rate": 1.9323765507175872e-05, |
|
"loss": 0.3444, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.08756950829721091, |
|
"grad_norm": 11.816638685730455, |
|
"learning_rate": 1.9319342783220187e-05, |
|
"loss": 0.4283, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08800735583869697, |
|
"grad_norm": 7.26923187311744, |
|
"learning_rate": 1.9314920059264505e-05, |
|
"loss": 0.3865, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.08844520338018302, |
|
"grad_norm": 15.434817185278508, |
|
"learning_rate": 1.9310497335308817e-05, |
|
"loss": 0.4109, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.08888305092166908, |
|
"grad_norm": 10.681457596084588, |
|
"learning_rate": 1.9306074611353135e-05, |
|
"loss": 0.3436, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.08932089846315514, |
|
"grad_norm": 11.348178640084303, |
|
"learning_rate": 1.930165188739745e-05, |
|
"loss": 0.3667, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.08975874600464119, |
|
"grad_norm": 14.026197173621947, |
|
"learning_rate": 1.9297229163441768e-05, |
|
"loss": 0.4237, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.09019659354612723, |
|
"grad_norm": 13.112088492075678, |
|
"learning_rate": 1.929280643948608e-05, |
|
"loss": 0.4262, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.09063444108761329, |
|
"grad_norm": 9.622691088092534, |
|
"learning_rate": 1.9288383715530398e-05, |
|
"loss": 0.3801, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.09107228862909934, |
|
"grad_norm": 9.697640310926039, |
|
"learning_rate": 1.9283960991574713e-05, |
|
"loss": 0.39, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.0915101361705854, |
|
"grad_norm": 12.500317378783329, |
|
"learning_rate": 1.9279538267619028e-05, |
|
"loss": 0.4054, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.09194798371207145, |
|
"grad_norm": 10.789036689302037, |
|
"learning_rate": 1.9275115543663342e-05, |
|
"loss": 0.4074, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.09238583125355751, |
|
"grad_norm": 12.509375756268714, |
|
"learning_rate": 1.927069281970766e-05, |
|
"loss": 0.3667, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.09282367879504357, |
|
"grad_norm": 9.389386341327063, |
|
"learning_rate": 1.9266270095751976e-05, |
|
"loss": 0.4499, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.09326152633652962, |
|
"grad_norm": 15.390016743215236, |
|
"learning_rate": 1.926184737179629e-05, |
|
"loss": 0.4473, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.09369937387801568, |
|
"grad_norm": 19.17657099807316, |
|
"learning_rate": 1.9257424647840605e-05, |
|
"loss": 0.3094, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.09413722141950173, |
|
"grad_norm": 14.244069789562317, |
|
"learning_rate": 1.925300192388492e-05, |
|
"loss": 0.426, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.09457506896098779, |
|
"grad_norm": 14.851204953612982, |
|
"learning_rate": 1.924857919992924e-05, |
|
"loss": 0.4369, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.09501291650247384, |
|
"grad_norm": 8.66142168035377, |
|
"learning_rate": 1.9244156475973554e-05, |
|
"loss": 0.3666, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.0954507640439599, |
|
"grad_norm": 10.22720519072457, |
|
"learning_rate": 1.923973375201787e-05, |
|
"loss": 0.4175, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.09588861158544595, |
|
"grad_norm": 9.595268402718855, |
|
"learning_rate": 1.9235311028062183e-05, |
|
"loss": 0.4074, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.096326459126932, |
|
"grad_norm": 11.423439263182056, |
|
"learning_rate": 1.92308883041065e-05, |
|
"loss": 0.3714, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.09676430666841805, |
|
"grad_norm": 12.195829235128782, |
|
"learning_rate": 1.9226465580150816e-05, |
|
"loss": 0.3617, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.0972021542099041, |
|
"grad_norm": 13.401702180614619, |
|
"learning_rate": 1.922204285619513e-05, |
|
"loss": 0.3476, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.09764000175139016, |
|
"grad_norm": 14.400413274132806, |
|
"learning_rate": 1.9217620132239446e-05, |
|
"loss": 0.3654, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.09807784929287622, |
|
"grad_norm": 13.728323756706255, |
|
"learning_rate": 1.9213197408283765e-05, |
|
"loss": 0.3755, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.09851569683436227, |
|
"grad_norm": 9.537467954489292, |
|
"learning_rate": 1.920877468432808e-05, |
|
"loss": 0.4267, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.09895354437584833, |
|
"grad_norm": 12.562863169113138, |
|
"learning_rate": 1.9204351960372394e-05, |
|
"loss": 0.3631, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.09939139191733438, |
|
"grad_norm": 8.916130073539824, |
|
"learning_rate": 1.919992923641671e-05, |
|
"loss": 0.3976, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.09982923945882044, |
|
"grad_norm": 10.835660087923376, |
|
"learning_rate": 1.9195506512461028e-05, |
|
"loss": 0.344, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.1002670870003065, |
|
"grad_norm": 14.089519398644, |
|
"learning_rate": 1.9191083788505342e-05, |
|
"loss": 0.3957, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.10070493454179255, |
|
"grad_norm": 13.631563897871652, |
|
"learning_rate": 1.9186661064549657e-05, |
|
"loss": 0.4412, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.1011427820832786, |
|
"grad_norm": 11.687133130522081, |
|
"learning_rate": 1.9182238340593972e-05, |
|
"loss": 0.4246, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.10158062962476466, |
|
"grad_norm": 11.012593429809936, |
|
"learning_rate": 1.917781561663829e-05, |
|
"loss": 0.3929, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.10201847716625072, |
|
"grad_norm": 11.437466647406971, |
|
"learning_rate": 1.9173392892682605e-05, |
|
"loss": 0.4196, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.10245632470773676, |
|
"grad_norm": 13.09895341492618, |
|
"learning_rate": 1.916897016872692e-05, |
|
"loss": 0.3513, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.10289417224922282, |
|
"grad_norm": 12.293757546388127, |
|
"learning_rate": 1.9164547444771235e-05, |
|
"loss": 0.3992, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.10333201979070887, |
|
"grad_norm": 10.261487153006069, |
|
"learning_rate": 1.916012472081555e-05, |
|
"loss": 0.3066, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.10376986733219493, |
|
"grad_norm": 14.709144519485733, |
|
"learning_rate": 1.915570199685987e-05, |
|
"loss": 0.4397, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.10420771487368098, |
|
"grad_norm": 10.834726410330754, |
|
"learning_rate": 1.9151279272904183e-05, |
|
"loss": 0.3525, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.10464556241516704, |
|
"grad_norm": 14.674602673430272, |
|
"learning_rate": 1.9146856548948498e-05, |
|
"loss": 0.3694, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.10508340995665309, |
|
"grad_norm": 16.41260572575191, |
|
"learning_rate": 1.9142433824992813e-05, |
|
"loss": 0.3565, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.10552125749813915, |
|
"grad_norm": 10.31589706418058, |
|
"learning_rate": 1.913801110103713e-05, |
|
"loss": 0.4554, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.1059591050396252, |
|
"grad_norm": 8.228303991240578, |
|
"learning_rate": 1.9133588377081446e-05, |
|
"loss": 0.4028, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.10639695258111126, |
|
"grad_norm": 13.474103036444346, |
|
"learning_rate": 1.912916565312576e-05, |
|
"loss": 0.4172, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.10683480012259731, |
|
"grad_norm": 11.893594097933823, |
|
"learning_rate": 1.9124742929170076e-05, |
|
"loss": 0.3997, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.10727264766408337, |
|
"grad_norm": 15.503153818507785, |
|
"learning_rate": 1.9120320205214394e-05, |
|
"loss": 0.4038, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.10771049520556943, |
|
"grad_norm": 10.232230065068523, |
|
"learning_rate": 1.911589748125871e-05, |
|
"loss": 0.4035, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.10814834274705548, |
|
"grad_norm": 12.04140450127315, |
|
"learning_rate": 1.9111474757303024e-05, |
|
"loss": 0.3076, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.10858619028854152, |
|
"grad_norm": 15.468189206217254, |
|
"learning_rate": 1.910705203334734e-05, |
|
"loss": 0.3361, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.10902403783002758, |
|
"grad_norm": 17.902774600013995, |
|
"learning_rate": 1.9102629309391657e-05, |
|
"loss": 0.4081, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.10946188537151363, |
|
"grad_norm": 24.91450836462123, |
|
"learning_rate": 1.9098206585435972e-05, |
|
"loss": 0.4367, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.10989973291299969, |
|
"grad_norm": 11.744904407487343, |
|
"learning_rate": 1.9094226133875857e-05, |
|
"loss": 0.4292, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.11033758045448575, |
|
"grad_norm": 13.899816366396042, |
|
"learning_rate": 1.908980340992017e-05, |
|
"loss": 0.4321, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.1107754279959718, |
|
"grad_norm": 12.102738226657959, |
|
"learning_rate": 1.9085380685964486e-05, |
|
"loss": 0.3108, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.11121327553745786, |
|
"grad_norm": 14.16084103947617, |
|
"learning_rate": 1.9080957962008805e-05, |
|
"loss": 0.335, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.11165112307894391, |
|
"grad_norm": 11.115420213753518, |
|
"learning_rate": 1.9076535238053116e-05, |
|
"loss": 0.3912, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.11208897062042997, |
|
"grad_norm": 13.023671826026302, |
|
"learning_rate": 1.9072112514097434e-05, |
|
"loss": 0.4168, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.11252681816191602, |
|
"grad_norm": 11.952192442175816, |
|
"learning_rate": 1.906768979014175e-05, |
|
"loss": 0.4028, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.11296466570340208, |
|
"grad_norm": 9.592598016519975, |
|
"learning_rate": 1.9063267066186068e-05, |
|
"loss": 0.4134, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.11340251324488813, |
|
"grad_norm": 9.083352138488156, |
|
"learning_rate": 1.905884434223038e-05, |
|
"loss": 0.3328, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.11384036078637419, |
|
"grad_norm": 18.60156824207177, |
|
"learning_rate": 1.9054421618274697e-05, |
|
"loss": 0.3479, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.11427820832786025, |
|
"grad_norm": 9.845963292845576, |
|
"learning_rate": 1.9049998894319012e-05, |
|
"loss": 0.3555, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.11471605586934629, |
|
"grad_norm": 10.182726906192356, |
|
"learning_rate": 1.904557617036333e-05, |
|
"loss": 0.4251, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.11515390341083234, |
|
"grad_norm": 7.335163697208383, |
|
"learning_rate": 1.9041153446407642e-05, |
|
"loss": 0.3376, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.1155917509523184, |
|
"grad_norm": 11.37657070843338, |
|
"learning_rate": 1.903673072245196e-05, |
|
"loss": 0.3645, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.11602959849380445, |
|
"grad_norm": 10.877548094642906, |
|
"learning_rate": 1.9032307998496275e-05, |
|
"loss": 0.3726, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.11646744603529051, |
|
"grad_norm": 15.333882252915895, |
|
"learning_rate": 1.902788527454059e-05, |
|
"loss": 0.385, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.11690529357677656, |
|
"grad_norm": 12.823331692401915, |
|
"learning_rate": 1.9023462550584905e-05, |
|
"loss": 0.3943, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.11734314111826262, |
|
"grad_norm": 12.692981757202359, |
|
"learning_rate": 1.9019039826629223e-05, |
|
"loss": 0.4218, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.11778098865974868, |
|
"grad_norm": 12.550522897473236, |
|
"learning_rate": 1.9014617102673538e-05, |
|
"loss": 0.4077, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.11821883620123473, |
|
"grad_norm": 11.727829165225376, |
|
"learning_rate": 1.9010194378717853e-05, |
|
"loss": 0.4516, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.11865668374272079, |
|
"grad_norm": 9.239870866145635, |
|
"learning_rate": 1.900577165476217e-05, |
|
"loss": 0.3879, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.11909453128420684, |
|
"grad_norm": 10.991158486727915, |
|
"learning_rate": 1.9001348930806486e-05, |
|
"loss": 0.4201, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.1195323788256929, |
|
"grad_norm": 10.775808182785527, |
|
"learning_rate": 1.89969262068508e-05, |
|
"loss": 0.3612, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.11997022636717895, |
|
"grad_norm": 8.786090643394553, |
|
"learning_rate": 1.8992503482895116e-05, |
|
"loss": 0.4029, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.12040807390866501, |
|
"grad_norm": 10.644438789736052, |
|
"learning_rate": 1.8988080758939434e-05, |
|
"loss": 0.3642, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.12084592145015106, |
|
"grad_norm": 10.429045582422866, |
|
"learning_rate": 1.8983658034983746e-05, |
|
"loss": 0.3972, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.1212837689916371, |
|
"grad_norm": 19.068393684191452, |
|
"learning_rate": 1.8979235311028064e-05, |
|
"loss": 0.4214, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.12172161653312316, |
|
"grad_norm": 6.510885660190795, |
|
"learning_rate": 1.897481258707238e-05, |
|
"loss": 0.3927, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.12215946407460922, |
|
"grad_norm": 13.320869214905798, |
|
"learning_rate": 1.8970389863116697e-05, |
|
"loss": 0.3801, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.12259731161609527, |
|
"grad_norm": 11.91416431821247, |
|
"learning_rate": 1.896596713916101e-05, |
|
"loss": 0.3617, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.12303515915758133, |
|
"grad_norm": 16.39803737899181, |
|
"learning_rate": 1.8961544415205327e-05, |
|
"loss": 0.4252, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.12347300669906738, |
|
"grad_norm": 18.444202304052418, |
|
"learning_rate": 1.8957121691249642e-05, |
|
"loss": 0.3753, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.12391085424055344, |
|
"grad_norm": 11.295343530031559, |
|
"learning_rate": 1.895269896729396e-05, |
|
"loss": 0.3921, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.1243487017820395, |
|
"grad_norm": 9.710371030089025, |
|
"learning_rate": 1.8948276243338272e-05, |
|
"loss": 0.3506, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.12478654932352555, |
|
"grad_norm": 13.077018882435036, |
|
"learning_rate": 1.894385351938259e-05, |
|
"loss": 0.3025, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.1252243968650116, |
|
"grad_norm": 9.815328591239517, |
|
"learning_rate": 1.8939430795426905e-05, |
|
"loss": 0.3919, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.12566224440649765, |
|
"grad_norm": 10.956422665808212, |
|
"learning_rate": 1.893500807147122e-05, |
|
"loss": 0.3592, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.12610009194798372, |
|
"grad_norm": 7.24660201847797, |
|
"learning_rate": 1.8930585347515535e-05, |
|
"loss": 0.4198, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.12653793948946976, |
|
"grad_norm": 7.084606579805614, |
|
"learning_rate": 1.8926162623559853e-05, |
|
"loss": 0.4077, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.12697578703095583, |
|
"grad_norm": 16.831228496029244, |
|
"learning_rate": 1.8921739899604168e-05, |
|
"loss": 0.3456, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.12741363457244187, |
|
"grad_norm": 12.587652277854042, |
|
"learning_rate": 1.8917317175648483e-05, |
|
"loss": 0.4014, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.12785148211392794, |
|
"grad_norm": 9.243117375162731, |
|
"learning_rate": 1.8912894451692798e-05, |
|
"loss": 0.3575, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.12828932965541398, |
|
"grad_norm": 11.159631895058327, |
|
"learning_rate": 1.8908471727737116e-05, |
|
"loss": 0.3241, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.12872717719690005, |
|
"grad_norm": 10.310646954134011, |
|
"learning_rate": 1.890404900378143e-05, |
|
"loss": 0.3682, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.1291650247383861, |
|
"grad_norm": 10.173470601980204, |
|
"learning_rate": 1.8899626279825746e-05, |
|
"loss": 0.4251, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.12960287227987216, |
|
"grad_norm": 12.609142050783468, |
|
"learning_rate": 1.889520355587006e-05, |
|
"loss": 0.394, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.1300407198213582, |
|
"grad_norm": 9.635654645850565, |
|
"learning_rate": 1.8890780831914376e-05, |
|
"loss": 0.4089, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.13047856736284424, |
|
"grad_norm": 11.436287904310273, |
|
"learning_rate": 1.8886358107958694e-05, |
|
"loss": 0.3826, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.1309164149043303, |
|
"grad_norm": 16.392761382159563, |
|
"learning_rate": 1.888193538400301e-05, |
|
"loss": 0.3276, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.13135426244581636, |
|
"grad_norm": 17.239395198967156, |
|
"learning_rate": 1.8877512660047327e-05, |
|
"loss": 0.4094, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13179210998730242, |
|
"grad_norm": 14.545360777292585, |
|
"learning_rate": 1.887308993609164e-05, |
|
"loss": 0.3779, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.13222995752878847, |
|
"grad_norm": 14.366498738758244, |
|
"learning_rate": 1.8868667212135957e-05, |
|
"loss": 0.4288, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.13266780507027454, |
|
"grad_norm": 17.039699201481028, |
|
"learning_rate": 1.886424448818027e-05, |
|
"loss": 0.36, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.13310565261176058, |
|
"grad_norm": 9.226512886191754, |
|
"learning_rate": 1.885982176422459e-05, |
|
"loss": 0.3765, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.13354350015324665, |
|
"grad_norm": 9.028526449867499, |
|
"learning_rate": 1.88553990402689e-05, |
|
"loss": 0.4395, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.1339813476947327, |
|
"grad_norm": 13.6426618700454, |
|
"learning_rate": 1.885097631631322e-05, |
|
"loss": 0.3723, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.13441919523621876, |
|
"grad_norm": 13.305857832591359, |
|
"learning_rate": 1.8846553592357535e-05, |
|
"loss": 0.399, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.1348570427777048, |
|
"grad_norm": 7.874733814403194, |
|
"learning_rate": 1.884213086840185e-05, |
|
"loss": 0.4011, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.13529489031919087, |
|
"grad_norm": 13.430051646641026, |
|
"learning_rate": 1.8837708144446164e-05, |
|
"loss": 0.3906, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.1357327378606769, |
|
"grad_norm": 8.662733163489246, |
|
"learning_rate": 1.8833285420490483e-05, |
|
"loss": 0.4138, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.13617058540216298, |
|
"grad_norm": 9.801119868851403, |
|
"learning_rate": 1.8828862696534798e-05, |
|
"loss": 0.3641, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.13660843294364902, |
|
"grad_norm": 11.778191693148269, |
|
"learning_rate": 1.8824439972579112e-05, |
|
"loss": 0.3762, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.13704628048513506, |
|
"grad_norm": 8.279726053764739, |
|
"learning_rate": 1.8820017248623427e-05, |
|
"loss": 0.3693, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.13748412802662113, |
|
"grad_norm": 10.69124033655974, |
|
"learning_rate": 1.8815594524667746e-05, |
|
"loss": 0.3369, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.13792197556810717, |
|
"grad_norm": 6.139622574926641, |
|
"learning_rate": 1.881117180071206e-05, |
|
"loss": 0.3526, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.13835982310959324, |
|
"grad_norm": 8.884320160504993, |
|
"learning_rate": 1.8806749076756375e-05, |
|
"loss": 0.321, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.13879767065107929, |
|
"grad_norm": 11.836052882069614, |
|
"learning_rate": 1.880232635280069e-05, |
|
"loss": 0.3988, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.13923551819256536, |
|
"grad_norm": 9.920797262455972, |
|
"learning_rate": 1.8797903628845005e-05, |
|
"loss": 0.5179, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.1396733657340514, |
|
"grad_norm": 14.813362193063755, |
|
"learning_rate": 1.8793480904889324e-05, |
|
"loss": 0.377, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.14011121327553747, |
|
"grad_norm": 6.695253814831782, |
|
"learning_rate": 1.878905818093364e-05, |
|
"loss": 0.3863, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.1405490608170235, |
|
"grad_norm": 11.085686646540799, |
|
"learning_rate": 1.8784635456977953e-05, |
|
"loss": 0.3708, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.14098690835850958, |
|
"grad_norm": 13.830245136728134, |
|
"learning_rate": 1.8780212733022268e-05, |
|
"loss": 0.3151, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.14142475589999562, |
|
"grad_norm": 12.120622926321499, |
|
"learning_rate": 1.8775790009066586e-05, |
|
"loss": 0.3459, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.1418626034414817, |
|
"grad_norm": 16.30580487007426, |
|
"learning_rate": 1.87713672851109e-05, |
|
"loss": 0.3745, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.14230045098296773, |
|
"grad_norm": 9.992654009584912, |
|
"learning_rate": 1.876694456115522e-05, |
|
"loss": 0.3993, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.14273829852445377, |
|
"grad_norm": 8.666190158753487, |
|
"learning_rate": 1.876252183719953e-05, |
|
"loss": 0.3774, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.14317614606593984, |
|
"grad_norm": 10.386340090996436, |
|
"learning_rate": 1.875809911324385e-05, |
|
"loss": 0.3806, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.14361399360742588, |
|
"grad_norm": 22.19356554965066, |
|
"learning_rate": 1.8753676389288164e-05, |
|
"loss": 0.3824, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.14405184114891195, |
|
"grad_norm": 9.21049973448166, |
|
"learning_rate": 1.874925366533248e-05, |
|
"loss": 0.3353, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.144489688690398, |
|
"grad_norm": 12.072168142150097, |
|
"learning_rate": 1.8744830941376794e-05, |
|
"loss": 0.4011, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.14492753623188406, |
|
"grad_norm": 12.59324716931968, |
|
"learning_rate": 1.8740408217421112e-05, |
|
"loss": 0.4021, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.1453653837733701, |
|
"grad_norm": 10.05214029931775, |
|
"learning_rate": 1.8735985493465427e-05, |
|
"loss": 0.3553, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.14580323131485617, |
|
"grad_norm": 11.803155664591488, |
|
"learning_rate": 1.8731562769509742e-05, |
|
"loss": 0.356, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.14624107885634222, |
|
"grad_norm": 14.92368689915674, |
|
"learning_rate": 1.8727140045554057e-05, |
|
"loss": 0.3801, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.14667892639782829, |
|
"grad_norm": 17.079776945779418, |
|
"learning_rate": 1.8722717321598375e-05, |
|
"loss": 0.3477, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.14711677393931433, |
|
"grad_norm": 13.097782069556722, |
|
"learning_rate": 1.871829459764269e-05, |
|
"loss": 0.3671, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.1475546214808004, |
|
"grad_norm": 8.15632623512624, |
|
"learning_rate": 1.8713871873687005e-05, |
|
"loss": 0.3389, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.14799246902228644, |
|
"grad_norm": 13.960404123834712, |
|
"learning_rate": 1.870944914973132e-05, |
|
"loss": 0.3485, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.1484303165637725, |
|
"grad_norm": 11.071464397882252, |
|
"learning_rate": 1.8705026425775635e-05, |
|
"loss": 0.3416, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.14886816410525855, |
|
"grad_norm": 13.277501194270975, |
|
"learning_rate": 1.8700603701819953e-05, |
|
"loss": 0.3789, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1493060116467446, |
|
"grad_norm": 10.804325254909127, |
|
"learning_rate": 1.8696180977864268e-05, |
|
"loss": 0.4302, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.14974385918823066, |
|
"grad_norm": 13.368363204432509, |
|
"learning_rate": 1.8691758253908583e-05, |
|
"loss": 0.3827, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.1501817067297167, |
|
"grad_norm": 10.0824595511505, |
|
"learning_rate": 1.8687335529952898e-05, |
|
"loss": 0.3658, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.15061955427120277, |
|
"grad_norm": 12.416220706039892, |
|
"learning_rate": 1.8682912805997216e-05, |
|
"loss": 0.3614, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.1510574018126888, |
|
"grad_norm": 16.7125572642998, |
|
"learning_rate": 1.867849008204153e-05, |
|
"loss": 0.4461, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.15149524935417488, |
|
"grad_norm": 12.811501366379437, |
|
"learning_rate": 1.8674067358085846e-05, |
|
"loss": 0.4033, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.15193309689566092, |
|
"grad_norm": 17.173250485174975, |
|
"learning_rate": 1.866964463413016e-05, |
|
"loss": 0.3277, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.152370944437147, |
|
"grad_norm": 8.979558831403725, |
|
"learning_rate": 1.866522191017448e-05, |
|
"loss": 0.3373, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.15280879197863304, |
|
"grad_norm": 7.797438606883994, |
|
"learning_rate": 1.8660799186218794e-05, |
|
"loss": 0.3954, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.1532466395201191, |
|
"grad_norm": 16.155288520109096, |
|
"learning_rate": 1.865637646226311e-05, |
|
"loss": 0.385, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.15368448706160515, |
|
"grad_norm": 16.957081191616158, |
|
"learning_rate": 1.8651953738307424e-05, |
|
"loss": 0.3742, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.15412233460309122, |
|
"grad_norm": 9.531468523365854, |
|
"learning_rate": 1.8647531014351742e-05, |
|
"loss": 0.3326, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.15456018214457726, |
|
"grad_norm": 25.31643246845194, |
|
"learning_rate": 1.8643108290396057e-05, |
|
"loss": 0.39, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.1549980296860633, |
|
"grad_norm": 10.078617054363939, |
|
"learning_rate": 1.8638685566440372e-05, |
|
"loss": 0.4025, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.15543587722754937, |
|
"grad_norm": 11.02217779389865, |
|
"learning_rate": 1.8634262842484687e-05, |
|
"loss": 0.4015, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.1558737247690354, |
|
"grad_norm": 14.098789980216388, |
|
"learning_rate": 1.8629840118529005e-05, |
|
"loss": 0.3344, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.15631157231052148, |
|
"grad_norm": 15.721719692602761, |
|
"learning_rate": 1.862541739457332e-05, |
|
"loss": 0.3715, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.15674941985200752, |
|
"grad_norm": 12.716040770267046, |
|
"learning_rate": 1.8620994670617635e-05, |
|
"loss": 0.3651, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.1571872673934936, |
|
"grad_norm": 12.965209183423223, |
|
"learning_rate": 1.861657194666195e-05, |
|
"loss": 0.4063, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.15762511493497963, |
|
"grad_norm": 8.666927431536255, |
|
"learning_rate": 1.8612149222706265e-05, |
|
"loss": 0.3274, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.1580629624764657, |
|
"grad_norm": 14.516286713758953, |
|
"learning_rate": 1.8607726498750583e-05, |
|
"loss": 0.4267, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.15850081001795174, |
|
"grad_norm": 9.395231262901326, |
|
"learning_rate": 1.8603303774794898e-05, |
|
"loss": 0.3378, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.1589386575594378, |
|
"grad_norm": 15.580920245081602, |
|
"learning_rate": 1.8598881050839213e-05, |
|
"loss": 0.3727, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.15937650510092385, |
|
"grad_norm": 9.421511818637732, |
|
"learning_rate": 1.8594458326883528e-05, |
|
"loss": 0.4091, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.15981435264240992, |
|
"grad_norm": 10.903593860274887, |
|
"learning_rate": 1.8590035602927846e-05, |
|
"loss": 0.3852, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.16025220018389597, |
|
"grad_norm": 15.079459239057043, |
|
"learning_rate": 1.858561287897216e-05, |
|
"loss": 0.3161, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.16069004772538203, |
|
"grad_norm": 11.906906790652602, |
|
"learning_rate": 1.8581632427412045e-05, |
|
"loss": 0.4032, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.16112789526686808, |
|
"grad_norm": 7.299516154937353, |
|
"learning_rate": 1.857720970345636e-05, |
|
"loss": 0.4608, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.16156574280835412, |
|
"grad_norm": 10.405574742308731, |
|
"learning_rate": 1.8572786979500675e-05, |
|
"loss": 0.3277, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.1620035903498402, |
|
"grad_norm": 12.45192353736054, |
|
"learning_rate": 1.8568364255544993e-05, |
|
"loss": 0.4101, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.16244143789132623, |
|
"grad_norm": 17.784864115352764, |
|
"learning_rate": 1.8563941531589308e-05, |
|
"loss": 0.4311, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.1628792854328123, |
|
"grad_norm": 12.704077503112067, |
|
"learning_rate": 1.8559518807633623e-05, |
|
"loss": 0.3409, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.16331713297429834, |
|
"grad_norm": 26.63374521687372, |
|
"learning_rate": 1.8555096083677938e-05, |
|
"loss": 0.3658, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.1637549805157844, |
|
"grad_norm": 10.121501930571688, |
|
"learning_rate": 1.8550673359722256e-05, |
|
"loss": 0.3402, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.16419282805727045, |
|
"grad_norm": 14.094860227716598, |
|
"learning_rate": 1.854625063576657e-05, |
|
"loss": 0.4277, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.16463067559875652, |
|
"grad_norm": 9.994141579233393, |
|
"learning_rate": 1.8541827911810886e-05, |
|
"loss": 0.4692, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.16506852314024256, |
|
"grad_norm": 8.67508870280096, |
|
"learning_rate": 1.85374051878552e-05, |
|
"loss": 0.4016, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.16550637068172863, |
|
"grad_norm": 14.502486443265836, |
|
"learning_rate": 1.853298246389952e-05, |
|
"loss": 0.4075, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.16594421822321467, |
|
"grad_norm": 11.515960895499763, |
|
"learning_rate": 1.852855973994383e-05, |
|
"loss": 0.3435, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.16638206576470074, |
|
"grad_norm": 9.745079168132195, |
|
"learning_rate": 1.852413701598815e-05, |
|
"loss": 0.3109, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.16681991330618678, |
|
"grad_norm": 8.495004869159922, |
|
"learning_rate": 1.8519714292032464e-05, |
|
"loss": 0.3922, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.16725776084767285, |
|
"grad_norm": 8.988712438049474, |
|
"learning_rate": 1.8515291568076782e-05, |
|
"loss": 0.4012, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.1676956083891589, |
|
"grad_norm": 12.853240059584076, |
|
"learning_rate": 1.8510868844121094e-05, |
|
"loss": 0.4125, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.16813345593064494, |
|
"grad_norm": 9.825049278388068, |
|
"learning_rate": 1.8506446120165412e-05, |
|
"loss": 0.3032, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.168571303472131, |
|
"grad_norm": 7.994348437089401, |
|
"learning_rate": 1.8502023396209727e-05, |
|
"loss": 0.3636, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.16900915101361705, |
|
"grad_norm": 12.765237118303892, |
|
"learning_rate": 1.8497600672254045e-05, |
|
"loss": 0.3478, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.16944699855510312, |
|
"grad_norm": 12.546641498449894, |
|
"learning_rate": 1.8493177948298357e-05, |
|
"loss": 0.3, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.16988484609658916, |
|
"grad_norm": 23.911174431535404, |
|
"learning_rate": 1.8488755224342675e-05, |
|
"loss": 0.451, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.17032269363807523, |
|
"grad_norm": 10.520515381695157, |
|
"learning_rate": 1.848433250038699e-05, |
|
"loss": 0.358, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.17076054117956127, |
|
"grad_norm": 11.224917445909368, |
|
"learning_rate": 1.8479909776431305e-05, |
|
"loss": 0.3162, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.17119838872104734, |
|
"grad_norm": 18.902395009875733, |
|
"learning_rate": 1.847548705247562e-05, |
|
"loss": 0.4223, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.17163623626253338, |
|
"grad_norm": 11.55198595552304, |
|
"learning_rate": 1.8471064328519938e-05, |
|
"loss": 0.3808, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.17207408380401945, |
|
"grad_norm": 11.00362301896321, |
|
"learning_rate": 1.8466641604564253e-05, |
|
"loss": 0.3252, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.1725119313455055, |
|
"grad_norm": 10.336995942120799, |
|
"learning_rate": 1.8462218880608568e-05, |
|
"loss": 0.364, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.17294977888699156, |
|
"grad_norm": 10.710989141583203, |
|
"learning_rate": 1.8457796156652883e-05, |
|
"loss": 0.4231, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.1733876264284776, |
|
"grad_norm": 12.229225448822383, |
|
"learning_rate": 1.84533734326972e-05, |
|
"loss": 0.4117, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.17382547396996365, |
|
"grad_norm": 14.915171514555029, |
|
"learning_rate": 1.8448950708741516e-05, |
|
"loss": 0.4032, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.17426332151144971, |
|
"grad_norm": 12.229642807866787, |
|
"learning_rate": 1.844452798478583e-05, |
|
"loss": 0.4361, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.17470116905293576, |
|
"grad_norm": 7.010608264256624, |
|
"learning_rate": 1.844010526083015e-05, |
|
"loss": 0.3638, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.17513901659442183, |
|
"grad_norm": 15.912932485500148, |
|
"learning_rate": 1.843568253687446e-05, |
|
"loss": 0.455, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17557686413590787, |
|
"grad_norm": 6.924969603544103, |
|
"learning_rate": 1.843125981291878e-05, |
|
"loss": 0.2625, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.17601471167739394, |
|
"grad_norm": 15.139219047531501, |
|
"learning_rate": 1.8426837088963094e-05, |
|
"loss": 0.4141, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.17645255921887998, |
|
"grad_norm": 11.17823518693945, |
|
"learning_rate": 1.8422414365007412e-05, |
|
"loss": 0.3224, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.17689040676036605, |
|
"grad_norm": 15.7161064579388, |
|
"learning_rate": 1.8417991641051723e-05, |
|
"loss": 0.3664, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.1773282543018521, |
|
"grad_norm": 13.792070427062033, |
|
"learning_rate": 1.841356891709604e-05, |
|
"loss": 0.3284, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.17776610184333816, |
|
"grad_norm": 9.000617959741795, |
|
"learning_rate": 1.8409146193140357e-05, |
|
"loss": 0.344, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.1782039493848242, |
|
"grad_norm": 12.402957661971133, |
|
"learning_rate": 1.8404723469184675e-05, |
|
"loss": 0.3871, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.17864179692631027, |
|
"grad_norm": 12.032781330103441, |
|
"learning_rate": 1.8400300745228986e-05, |
|
"loss": 0.3787, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.1790796444677963, |
|
"grad_norm": 11.222860194370364, |
|
"learning_rate": 1.8395878021273305e-05, |
|
"loss": 0.3077, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.17951749200928238, |
|
"grad_norm": 13.7968119601476, |
|
"learning_rate": 1.839145529731762e-05, |
|
"loss": 0.4786, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.17995533955076842, |
|
"grad_norm": 11.921299671774793, |
|
"learning_rate": 1.8387032573361934e-05, |
|
"loss": 0.4037, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.18039318709225446, |
|
"grad_norm": 8.879545954875514, |
|
"learning_rate": 1.838260984940625e-05, |
|
"loss": 0.4584, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.18083103463374053, |
|
"grad_norm": 14.982597428617236, |
|
"learning_rate": 1.8378187125450568e-05, |
|
"loss": 0.3815, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.18126888217522658, |
|
"grad_norm": 9.147231103073068, |
|
"learning_rate": 1.8373764401494882e-05, |
|
"loss": 0.4014, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.18170672971671264, |
|
"grad_norm": 12.294983152398926, |
|
"learning_rate": 1.8369341677539197e-05, |
|
"loss": 0.4019, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.1821445772581987, |
|
"grad_norm": 9.927235418370907, |
|
"learning_rate": 1.8364918953583512e-05, |
|
"loss": 0.3566, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.18258242479968476, |
|
"grad_norm": 11.734461472925389, |
|
"learning_rate": 1.8360496229627827e-05, |
|
"loss": 0.4055, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.1830202723411708, |
|
"grad_norm": 17.077445949446012, |
|
"learning_rate": 1.8356073505672145e-05, |
|
"loss": 0.3583, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.18345811988265687, |
|
"grad_norm": 12.654896604913636, |
|
"learning_rate": 1.835165078171646e-05, |
|
"loss": 0.3492, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.1838959674241429, |
|
"grad_norm": 16.58090166573664, |
|
"learning_rate": 1.8347228057760775e-05, |
|
"loss": 0.4069, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.18433381496562898, |
|
"grad_norm": 12.423854991570149, |
|
"learning_rate": 1.834280533380509e-05, |
|
"loss": 0.3649, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.18477166250711502, |
|
"grad_norm": 8.687455211496507, |
|
"learning_rate": 1.833838260984941e-05, |
|
"loss": 0.3713, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.1852095100486011, |
|
"grad_norm": 10.328403486459152, |
|
"learning_rate": 1.8333959885893723e-05, |
|
"loss": 0.3679, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.18564735759008713, |
|
"grad_norm": 12.579913013442551, |
|
"learning_rate": 1.832953716193804e-05, |
|
"loss": 0.3538, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.18608520513157317, |
|
"grad_norm": 10.7018660870028, |
|
"learning_rate": 1.8325114437982353e-05, |
|
"loss": 0.3401, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.18652305267305924, |
|
"grad_norm": 7.833366421027678, |
|
"learning_rate": 1.832069171402667e-05, |
|
"loss": 0.4208, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.18696090021454528, |
|
"grad_norm": 13.791401625254139, |
|
"learning_rate": 1.8316268990070986e-05, |
|
"loss": 0.3355, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.18739874775603135, |
|
"grad_norm": 15.146634353101764, |
|
"learning_rate": 1.8311846266115305e-05, |
|
"loss": 0.4045, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.1878365952975174, |
|
"grad_norm": 11.438151690151432, |
|
"learning_rate": 1.8307423542159616e-05, |
|
"loss": 0.392, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.18827444283900346, |
|
"grad_norm": 12.907194015301421, |
|
"learning_rate": 1.8303000818203934e-05, |
|
"loss": 0.4075, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.1887122903804895, |
|
"grad_norm": 13.004770430694967, |
|
"learning_rate": 1.829857809424825e-05, |
|
"loss": 0.3703, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.18915013792197558, |
|
"grad_norm": 13.767940400122603, |
|
"learning_rate": 1.8294155370292564e-05, |
|
"loss": 0.3479, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.18958798546346162, |
|
"grad_norm": 13.598058539546074, |
|
"learning_rate": 1.828973264633688e-05, |
|
"loss": 0.3426, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.1900258330049477, |
|
"grad_norm": 15.83770263349553, |
|
"learning_rate": 1.8285309922381197e-05, |
|
"loss": 0.3301, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.19046368054643373, |
|
"grad_norm": 9.739199794350526, |
|
"learning_rate": 1.8280887198425512e-05, |
|
"loss": 0.3444, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.1909015280879198, |
|
"grad_norm": 11.924698121670366, |
|
"learning_rate": 1.8276464474469827e-05, |
|
"loss": 0.3052, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.19133937562940584, |
|
"grad_norm": 12.922426444929348, |
|
"learning_rate": 1.8272041750514142e-05, |
|
"loss": 0.3838, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.1917772231708919, |
|
"grad_norm": 10.139430182884723, |
|
"learning_rate": 1.8267619026558457e-05, |
|
"loss": 0.339, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.19221507071237795, |
|
"grad_norm": 16.77456639919075, |
|
"learning_rate": 1.8263196302602775e-05, |
|
"loss": 0.3662, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.192652918253864, |
|
"grad_norm": 16.923778423361814, |
|
"learning_rate": 1.825877357864709e-05, |
|
"loss": 0.4345, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.19309076579535006, |
|
"grad_norm": 9.79791653342669, |
|
"learning_rate": 1.8254350854691405e-05, |
|
"loss": 0.374, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.1935286133368361, |
|
"grad_norm": 14.223990277487626, |
|
"learning_rate": 1.824992813073572e-05, |
|
"loss": 0.3418, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.19396646087832217, |
|
"grad_norm": 10.280657812067675, |
|
"learning_rate": 1.8245505406780038e-05, |
|
"loss": 0.4258, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.1944043084198082, |
|
"grad_norm": 11.454773579977681, |
|
"learning_rate": 1.8241082682824353e-05, |
|
"loss": 0.3558, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.19484215596129428, |
|
"grad_norm": 10.05797176474998, |
|
"learning_rate": 1.8236659958868668e-05, |
|
"loss": 0.4195, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.19528000350278032, |
|
"grad_norm": 17.413968742737737, |
|
"learning_rate": 1.8232237234912983e-05, |
|
"loss": 0.4842, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.1957178510442664, |
|
"grad_norm": 7.594947751878203, |
|
"learning_rate": 1.82278145109573e-05, |
|
"loss": 0.376, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.19615569858575244, |
|
"grad_norm": 7.752555917921395, |
|
"learning_rate": 1.8223391787001616e-05, |
|
"loss": 0.3874, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.1965935461272385, |
|
"grad_norm": 11.095188960924157, |
|
"learning_rate": 1.821896906304593e-05, |
|
"loss": 0.3199, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.19703139366872455, |
|
"grad_norm": 8.85243989748995, |
|
"learning_rate": 1.8214546339090246e-05, |
|
"loss": 0.3514, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.19746924121021062, |
|
"grad_norm": 10.110803517936658, |
|
"learning_rate": 1.8210123615134564e-05, |
|
"loss": 0.3794, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.19790708875169666, |
|
"grad_norm": 10.373451209228024, |
|
"learning_rate": 1.820570089117888e-05, |
|
"loss": 0.408, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.19834493629318273, |
|
"grad_norm": 13.162706254319437, |
|
"learning_rate": 1.8201278167223194e-05, |
|
"loss": 0.3425, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.19878278383466877, |
|
"grad_norm": 15.282957890404685, |
|
"learning_rate": 1.819685544326751e-05, |
|
"loss": 0.3852, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.1992206313761548, |
|
"grad_norm": 13.370558527120265, |
|
"learning_rate": 1.8192432719311827e-05, |
|
"loss": 0.3652, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.19965847891764088, |
|
"grad_norm": 10.259778080672527, |
|
"learning_rate": 1.8188009995356142e-05, |
|
"loss": 0.356, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.20009632645912692, |
|
"grad_norm": 10.736808667558975, |
|
"learning_rate": 1.8183587271400457e-05, |
|
"loss": 0.3329, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.200534174000613, |
|
"grad_norm": 8.501933158502284, |
|
"learning_rate": 1.817916454744477e-05, |
|
"loss": 0.3397, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.20097202154209903, |
|
"grad_norm": 28.24241657929224, |
|
"learning_rate": 1.8174741823489087e-05, |
|
"loss": 0.393, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.2014098690835851, |
|
"grad_norm": 13.727765781245198, |
|
"learning_rate": 1.8170319099533405e-05, |
|
"loss": 0.306, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.20184771662507114, |
|
"grad_norm": 9.98011595817274, |
|
"learning_rate": 1.816589637557772e-05, |
|
"loss": 0.3357, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.2022855641665572, |
|
"grad_norm": 10.817739639396102, |
|
"learning_rate": 1.8161473651622035e-05, |
|
"loss": 0.3352, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.20272341170804326, |
|
"grad_norm": 17.39150590199315, |
|
"learning_rate": 1.815705092766635e-05, |
|
"loss": 0.453, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.20316125924952932, |
|
"grad_norm": 15.480462835918628, |
|
"learning_rate": 1.8152628203710668e-05, |
|
"loss": 0.3469, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.20359910679101537, |
|
"grad_norm": 14.206320760863697, |
|
"learning_rate": 1.8148205479754983e-05, |
|
"loss": 0.3593, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.20403695433250144, |
|
"grad_norm": 10.767887226596823, |
|
"learning_rate": 1.8143782755799298e-05, |
|
"loss": 0.3401, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.20447480187398748, |
|
"grad_norm": 12.432796643275674, |
|
"learning_rate": 1.8139360031843613e-05, |
|
"loss": 0.3875, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.20491264941547352, |
|
"grad_norm": 9.006109071644179, |
|
"learning_rate": 1.813493730788793e-05, |
|
"loss": 0.3813, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.2053504969569596, |
|
"grad_norm": 15.616401442667728, |
|
"learning_rate": 1.8130514583932246e-05, |
|
"loss": 0.3465, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.20578834449844563, |
|
"grad_norm": 10.59820948899193, |
|
"learning_rate": 1.812609185997656e-05, |
|
"loss": 0.382, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.2062261920399317, |
|
"grad_norm": 14.208386612211477, |
|
"learning_rate": 1.8121669136020875e-05, |
|
"loss": 0.3476, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.20666403958141774, |
|
"grad_norm": 12.589160774020666, |
|
"learning_rate": 1.8117246412065194e-05, |
|
"loss": 0.3421, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.2071018871229038, |
|
"grad_norm": 7.568667332444265, |
|
"learning_rate": 1.811282368810951e-05, |
|
"loss": 0.3727, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.20753973466438985, |
|
"grad_norm": 10.812577263881279, |
|
"learning_rate": 1.8108400964153824e-05, |
|
"loss": 0.3881, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.20797758220587592, |
|
"grad_norm": 13.018455316703584, |
|
"learning_rate": 1.810397824019814e-05, |
|
"loss": 0.3337, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.20841542974736196, |
|
"grad_norm": 13.969299442144436, |
|
"learning_rate": 1.8099555516242457e-05, |
|
"loss": 0.4362, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.20885327728884803, |
|
"grad_norm": 14.510644097153572, |
|
"learning_rate": 1.809513279228677e-05, |
|
"loss": 0.3534, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.20929112483033407, |
|
"grad_norm": 6.43215364830629, |
|
"learning_rate": 1.8090710068331087e-05, |
|
"loss": 0.3813, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.20972897237182014, |
|
"grad_norm": 9.94692028441624, |
|
"learning_rate": 1.80862873443754e-05, |
|
"loss": 0.3787, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.21016681991330619, |
|
"grad_norm": 14.658374754366635, |
|
"learning_rate": 1.8081864620419716e-05, |
|
"loss": 0.332, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.21060466745479225, |
|
"grad_norm": 12.603661359425915, |
|
"learning_rate": 1.8077441896464035e-05, |
|
"loss": 0.5098, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.2110425149962783, |
|
"grad_norm": 8.069510317555627, |
|
"learning_rate": 1.807301917250835e-05, |
|
"loss": 0.3345, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.21148036253776434, |
|
"grad_norm": 15.50460058047792, |
|
"learning_rate": 1.8068596448552664e-05, |
|
"loss": 0.4389, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.2119182100792504, |
|
"grad_norm": 7.479416895287082, |
|
"learning_rate": 1.806417372459698e-05, |
|
"loss": 0.4058, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.21235605762073645, |
|
"grad_norm": 10.414700579430006, |
|
"learning_rate": 1.8059751000641298e-05, |
|
"loss": 0.3713, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.21279390516222252, |
|
"grad_norm": 11.290754293107446, |
|
"learning_rate": 1.8055328276685612e-05, |
|
"loss": 0.3759, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.21323175270370856, |
|
"grad_norm": 11.549866408859454, |
|
"learning_rate": 1.8050905552729927e-05, |
|
"loss": 0.3288, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.21366960024519463, |
|
"grad_norm": 18.046144465733533, |
|
"learning_rate": 1.8046482828774242e-05, |
|
"loss": 0.2962, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.21410744778668067, |
|
"grad_norm": 10.63709064776316, |
|
"learning_rate": 1.804206010481856e-05, |
|
"loss": 0.3126, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.21454529532816674, |
|
"grad_norm": 13.401948678962873, |
|
"learning_rate": 1.8037637380862875e-05, |
|
"loss": 0.335, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.21498314286965278, |
|
"grad_norm": 28.923645525898227, |
|
"learning_rate": 1.803321465690719e-05, |
|
"loss": 0.3323, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.21542099041113885, |
|
"grad_norm": 12.023966063146357, |
|
"learning_rate": 1.8028791932951505e-05, |
|
"loss": 0.3436, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.2158588379526249, |
|
"grad_norm": 16.66651723158919, |
|
"learning_rate": 1.8024369208995823e-05, |
|
"loss": 0.3434, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.21629668549411096, |
|
"grad_norm": 11.447028188385977, |
|
"learning_rate": 1.801994648504014e-05, |
|
"loss": 0.3734, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.216734533035597, |
|
"grad_norm": 9.454223770724575, |
|
"learning_rate": 1.8015523761084453e-05, |
|
"loss": 0.3928, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.21717238057708305, |
|
"grad_norm": 8.902373316783942, |
|
"learning_rate": 1.8011101037128768e-05, |
|
"loss": 0.3856, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.21761022811856912, |
|
"grad_norm": 18.191989619833368, |
|
"learning_rate": 1.8006678313173086e-05, |
|
"loss": 0.4122, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.21804807566005516, |
|
"grad_norm": 10.46306034169101, |
|
"learning_rate": 1.80022555892174e-05, |
|
"loss": 0.3303, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.21848592320154123, |
|
"grad_norm": 14.018809081253679, |
|
"learning_rate": 1.7997832865261716e-05, |
|
"loss": 0.3167, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.21892377074302727, |
|
"grad_norm": 9.557186071382484, |
|
"learning_rate": 1.799341014130603e-05, |
|
"loss": 0.4131, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 45678, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|