openhermes-phi-1_5-sft-qlora / trainer_state.json
Ritvik19's picture
Upload 15 files
7d7e60e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9993739889922224,
"eval_steps": 500,
"global_step": 1559,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006410352719642222,
"grad_norm": 0.10498046875,
"learning_rate": 1.282051282051282e-06,
"loss": 1.8493,
"step": 1
},
{
"epoch": 0.003205176359821111,
"grad_norm": 0.1103515625,
"learning_rate": 6.41025641025641e-06,
"loss": 1.8865,
"step": 5
},
{
"epoch": 0.006410352719642222,
"grad_norm": 0.1005859375,
"learning_rate": 1.282051282051282e-05,
"loss": 1.8383,
"step": 10
},
{
"epoch": 0.009615529079463333,
"grad_norm": 0.10693359375,
"learning_rate": 1.923076923076923e-05,
"loss": 1.8385,
"step": 15
},
{
"epoch": 0.012820705439284444,
"grad_norm": 0.1103515625,
"learning_rate": 2.564102564102564e-05,
"loss": 1.8346,
"step": 20
},
{
"epoch": 0.016025881799105555,
"grad_norm": 0.1298828125,
"learning_rate": 3.205128205128206e-05,
"loss": 1.8127,
"step": 25
},
{
"epoch": 0.019231058158926666,
"grad_norm": 0.1435546875,
"learning_rate": 3.846153846153846e-05,
"loss": 1.7981,
"step": 30
},
{
"epoch": 0.022436234518747777,
"grad_norm": 0.1494140625,
"learning_rate": 4.4871794871794874e-05,
"loss": 1.7907,
"step": 35
},
{
"epoch": 0.025641410878568888,
"grad_norm": 0.1416015625,
"learning_rate": 5.128205128205128e-05,
"loss": 1.7468,
"step": 40
},
{
"epoch": 0.02884658723839,
"grad_norm": 0.1328125,
"learning_rate": 5.769230769230769e-05,
"loss": 1.7105,
"step": 45
},
{
"epoch": 0.03205176359821111,
"grad_norm": 0.126953125,
"learning_rate": 6.410256410256412e-05,
"loss": 1.6887,
"step": 50
},
{
"epoch": 0.035256939958032224,
"grad_norm": 0.107421875,
"learning_rate": 7.051282051282052e-05,
"loss": 1.6757,
"step": 55
},
{
"epoch": 0.03846211631785333,
"grad_norm": 0.10009765625,
"learning_rate": 7.692307692307693e-05,
"loss": 1.6736,
"step": 60
},
{
"epoch": 0.041667292677674446,
"grad_norm": 0.078125,
"learning_rate": 8.333333333333334e-05,
"loss": 1.6252,
"step": 65
},
{
"epoch": 0.04487246903749555,
"grad_norm": 0.06201171875,
"learning_rate": 8.974358974358975e-05,
"loss": 1.5655,
"step": 70
},
{
"epoch": 0.04807764539731667,
"grad_norm": 0.049072265625,
"learning_rate": 9.615384615384617e-05,
"loss": 1.5646,
"step": 75
},
{
"epoch": 0.051282821757137775,
"grad_norm": 0.04345703125,
"learning_rate": 0.00010256410256410256,
"loss": 1.5861,
"step": 80
},
{
"epoch": 0.05448799811695889,
"grad_norm": 0.039794921875,
"learning_rate": 0.00010897435897435896,
"loss": 1.5379,
"step": 85
},
{
"epoch": 0.05769317447678,
"grad_norm": 0.0400390625,
"learning_rate": 0.00011538461538461538,
"loss": 1.5557,
"step": 90
},
{
"epoch": 0.06089835083660111,
"grad_norm": 0.037841796875,
"learning_rate": 0.00012179487179487179,
"loss": 1.5102,
"step": 95
},
{
"epoch": 0.06410352719642222,
"grad_norm": 0.038330078125,
"learning_rate": 0.00012820512820512823,
"loss": 1.5048,
"step": 100
},
{
"epoch": 0.06730870355624333,
"grad_norm": 0.033935546875,
"learning_rate": 0.00013461538461538464,
"loss": 1.5127,
"step": 105
},
{
"epoch": 0.07051387991606445,
"grad_norm": 0.03173828125,
"learning_rate": 0.00014102564102564104,
"loss": 1.5161,
"step": 110
},
{
"epoch": 0.07371905627588556,
"grad_norm": 0.0301513671875,
"learning_rate": 0.00014743589743589745,
"loss": 1.4948,
"step": 115
},
{
"epoch": 0.07692423263570666,
"grad_norm": 0.03125,
"learning_rate": 0.00015384615384615385,
"loss": 1.4584,
"step": 120
},
{
"epoch": 0.08012940899552777,
"grad_norm": 0.029052734375,
"learning_rate": 0.00016025641025641028,
"loss": 1.4704,
"step": 125
},
{
"epoch": 0.08333458535534889,
"grad_norm": 0.0279541015625,
"learning_rate": 0.0001666666666666667,
"loss": 1.4411,
"step": 130
},
{
"epoch": 0.08653976171517,
"grad_norm": 0.0263671875,
"learning_rate": 0.0001730769230769231,
"loss": 1.4723,
"step": 135
},
{
"epoch": 0.0897449380749911,
"grad_norm": 0.02685546875,
"learning_rate": 0.0001794871794871795,
"loss": 1.4505,
"step": 140
},
{
"epoch": 0.09295011443481223,
"grad_norm": 0.0291748046875,
"learning_rate": 0.0001858974358974359,
"loss": 1.4367,
"step": 145
},
{
"epoch": 0.09615529079463334,
"grad_norm": 0.0262451171875,
"learning_rate": 0.00019230769230769233,
"loss": 1.4291,
"step": 150
},
{
"epoch": 0.09936046715445444,
"grad_norm": 0.0390625,
"learning_rate": 0.00019871794871794874,
"loss": 1.4075,
"step": 155
},
{
"epoch": 0.10256564351427555,
"grad_norm": 0.03857421875,
"learning_rate": 0.00019999598882613538,
"loss": 1.4203,
"step": 160
},
{
"epoch": 0.10577081987409667,
"grad_norm": 0.029541015625,
"learning_rate": 0.00019997969398381457,
"loss": 1.4188,
"step": 165
},
{
"epoch": 0.10897599623391778,
"grad_norm": 0.025146484375,
"learning_rate": 0.00019995086681563726,
"loss": 1.4512,
"step": 170
},
{
"epoch": 0.11218117259373889,
"grad_norm": 0.025146484375,
"learning_rate": 0.0001999095109350519,
"loss": 1.417,
"step": 175
},
{
"epoch": 0.11538634895356,
"grad_norm": 0.02734375,
"learning_rate": 0.0001998556315259648,
"loss": 1.4309,
"step": 180
},
{
"epoch": 0.11859152531338112,
"grad_norm": 0.0255126953125,
"learning_rate": 0.00019978923534209054,
"loss": 1.4201,
"step": 185
},
{
"epoch": 0.12179670167320222,
"grad_norm": 0.0286865234375,
"learning_rate": 0.00019971033070610518,
"loss": 1.4187,
"step": 190
},
{
"epoch": 0.12500187803302334,
"grad_norm": 0.030517578125,
"learning_rate": 0.0001996189275086033,
"loss": 1.4153,
"step": 195
},
{
"epoch": 0.12820705439284444,
"grad_norm": 0.0272216796875,
"learning_rate": 0.00019951503720685784,
"loss": 1.4279,
"step": 200
},
{
"epoch": 0.13141223075266556,
"grad_norm": 0.0267333984375,
"learning_rate": 0.0001993986728233844,
"loss": 1.4052,
"step": 205
},
{
"epoch": 0.13461740711248665,
"grad_norm": 0.0264892578125,
"learning_rate": 0.0001992698489443085,
"loss": 1.3943,
"step": 210
},
{
"epoch": 0.13782258347230777,
"grad_norm": 0.032470703125,
"learning_rate": 0.0001991285817175375,
"loss": 1.3931,
"step": 215
},
{
"epoch": 0.1410277598321289,
"grad_norm": 0.0291748046875,
"learning_rate": 0.0001989748888507363,
"loss": 1.3931,
"step": 220
},
{
"epoch": 0.14423293619195,
"grad_norm": 0.03125,
"learning_rate": 0.00019880878960910772,
"loss": 1.3899,
"step": 225
},
{
"epoch": 0.1474381125517711,
"grad_norm": 0.0322265625,
"learning_rate": 0.0001986303048129778,
"loss": 1.4305,
"step": 230
},
{
"epoch": 0.15064328891159223,
"grad_norm": 0.033203125,
"learning_rate": 0.0001984394568351858,
"loss": 1.4028,
"step": 235
},
{
"epoch": 0.15384846527141333,
"grad_norm": 0.03369140625,
"learning_rate": 0.00019823626959827997,
"loss": 1.3758,
"step": 240
},
{
"epoch": 0.15705364163123445,
"grad_norm": 0.041015625,
"learning_rate": 0.0001980207685715186,
"loss": 1.407,
"step": 245
},
{
"epoch": 0.16025881799105554,
"grad_norm": 0.034912109375,
"learning_rate": 0.00019779298076767795,
"loss": 1.3923,
"step": 250
},
{
"epoch": 0.16346399435087666,
"grad_norm": 0.047119140625,
"learning_rate": 0.00019755293473966572,
"loss": 1.3967,
"step": 255
},
{
"epoch": 0.16666917071069778,
"grad_norm": 0.043701171875,
"learning_rate": 0.00019730066057694235,
"loss": 1.4007,
"step": 260
},
{
"epoch": 0.16987434707051888,
"grad_norm": 0.050048828125,
"learning_rate": 0.00019703618990174918,
"loss": 1.3978,
"step": 265
},
{
"epoch": 0.17307952343034,
"grad_norm": 0.048095703125,
"learning_rate": 0.00019675955586514468,
"loss": 1.3744,
"step": 270
},
{
"epoch": 0.17628469979016112,
"grad_norm": 0.033935546875,
"learning_rate": 0.00019647079314284897,
"loss": 1.3929,
"step": 275
},
{
"epoch": 0.1794898761499822,
"grad_norm": 0.033935546875,
"learning_rate": 0.0001961699379308974,
"loss": 1.4031,
"step": 280
},
{
"epoch": 0.18269505250980334,
"grad_norm": 0.04052734375,
"learning_rate": 0.0001958570279411032,
"loss": 1.3813,
"step": 285
},
{
"epoch": 0.18590022886962446,
"grad_norm": 0.052734375,
"learning_rate": 0.00019553210239633056,
"loss": 1.3956,
"step": 290
},
{
"epoch": 0.18910540522944555,
"grad_norm": 0.048095703125,
"learning_rate": 0.00019519520202557797,
"loss": 1.3988,
"step": 295
},
{
"epoch": 0.19231058158926667,
"grad_norm": 0.037109375,
"learning_rate": 0.00019484636905887296,
"loss": 1.3925,
"step": 300
},
{
"epoch": 0.19551575794908777,
"grad_norm": 0.036865234375,
"learning_rate": 0.00019448564722197853,
"loss": 1.376,
"step": 305
},
{
"epoch": 0.1987209343089089,
"grad_norm": 0.04052734375,
"learning_rate": 0.00019411308173091228,
"loss": 1.3974,
"step": 310
},
{
"epoch": 0.20192611066873,
"grad_norm": 0.052490234375,
"learning_rate": 0.0001937287192862787,
"loss": 1.3765,
"step": 315
},
{
"epoch": 0.2051312870285511,
"grad_norm": 0.059326171875,
"learning_rate": 0.00019333260806741502,
"loss": 1.3769,
"step": 320
},
{
"epoch": 0.20833646338837222,
"grad_norm": 0.052490234375,
"learning_rate": 0.00019292479772635237,
"loss": 1.3792,
"step": 325
},
{
"epoch": 0.21154163974819334,
"grad_norm": 0.048583984375,
"learning_rate": 0.00019250533938159166,
"loss": 1.3968,
"step": 330
},
{
"epoch": 0.21474681610801444,
"grad_norm": 0.040283203125,
"learning_rate": 0.00019207428561169608,
"loss": 1.38,
"step": 335
},
{
"epoch": 0.21795199246783556,
"grad_norm": 0.043701171875,
"learning_rate": 0.0001916316904487005,
"loss": 1.3737,
"step": 340
},
{
"epoch": 0.22115716882765665,
"grad_norm": 0.03759765625,
"learning_rate": 0.00019117760937133844,
"loss": 1.4065,
"step": 345
},
{
"epoch": 0.22436234518747777,
"grad_norm": 0.038330078125,
"learning_rate": 0.00019071209929808806,
"loss": 1.4012,
"step": 350
},
{
"epoch": 0.2275675215472989,
"grad_norm": 0.041748046875,
"learning_rate": 0.00019023521858003742,
"loss": 1.3941,
"step": 355
},
{
"epoch": 0.23077269790712,
"grad_norm": 0.037841796875,
"learning_rate": 0.00018974702699357029,
"loss": 1.4072,
"step": 360
},
{
"epoch": 0.2339778742669411,
"grad_norm": 0.03759765625,
"learning_rate": 0.00018924758573287315,
"loss": 1.3531,
"step": 365
},
{
"epoch": 0.23718305062676223,
"grad_norm": 0.03662109375,
"learning_rate": 0.00018873695740226468,
"loss": 1.3682,
"step": 370
},
{
"epoch": 0.24038822698658333,
"grad_norm": 0.047607421875,
"learning_rate": 0.0001882152060083484,
"loss": 1.3796,
"step": 375
},
{
"epoch": 0.24359340334640445,
"grad_norm": 0.041015625,
"learning_rate": 0.00018768239695198945,
"loss": 1.3835,
"step": 380
},
{
"epoch": 0.24679857970622554,
"grad_norm": 0.04541015625,
"learning_rate": 0.0001871385970201168,
"loss": 1.3678,
"step": 385
},
{
"epoch": 0.2500037560660467,
"grad_norm": 0.04345703125,
"learning_rate": 0.00018658387437735135,
"loss": 1.3778,
"step": 390
},
{
"epoch": 0.2532089324258678,
"grad_norm": 0.06396484375,
"learning_rate": 0.00018601829855746185,
"loss": 1.3811,
"step": 395
},
{
"epoch": 0.2564141087856889,
"grad_norm": 0.057373046875,
"learning_rate": 0.00018544194045464886,
"loss": 1.3851,
"step": 400
},
{
"epoch": 0.25961928514551,
"grad_norm": 0.0458984375,
"learning_rate": 0.0001848548723146581,
"loss": 1.3865,
"step": 405
},
{
"epoch": 0.2628244615053311,
"grad_norm": 0.047119140625,
"learning_rate": 0.00018425716772572473,
"loss": 1.3638,
"step": 410
},
{
"epoch": 0.2660296378651522,
"grad_norm": 0.04443359375,
"learning_rate": 0.00018364890160934904,
"loss": 1.3918,
"step": 415
},
{
"epoch": 0.2692348142249733,
"grad_norm": 0.042236328125,
"learning_rate": 0.00018303015021090525,
"loss": 1.3794,
"step": 420
},
{
"epoch": 0.27243999058479446,
"grad_norm": 0.06005859375,
"learning_rate": 0.00018240099109008412,
"loss": 1.3836,
"step": 425
},
{
"epoch": 0.27564516694461555,
"grad_norm": 0.05419921875,
"learning_rate": 0.000181761503111171,
"loss": 1.3676,
"step": 430
},
{
"epoch": 0.27885034330443664,
"grad_norm": 0.04443359375,
"learning_rate": 0.0001811117664331604,
"loss": 1.3513,
"step": 435
},
{
"epoch": 0.2820555196642578,
"grad_norm": 0.047607421875,
"learning_rate": 0.00018045186249970784,
"loss": 1.3602,
"step": 440
},
{
"epoch": 0.2852606960240789,
"grad_norm": 0.043212890625,
"learning_rate": 0.00017978187402892148,
"loss": 1.3468,
"step": 445
},
{
"epoch": 0.2884658723839,
"grad_norm": 0.05078125,
"learning_rate": 0.00017910188500299304,
"loss": 1.3651,
"step": 450
},
{
"epoch": 0.29167104874372113,
"grad_norm": 0.04296875,
"learning_rate": 0.00017841198065767107,
"loss": 1.3763,
"step": 455
},
{
"epoch": 0.2948762251035422,
"grad_norm": 0.044921875,
"learning_rate": 0.00017771224747157652,
"loss": 1.3597,
"step": 460
},
{
"epoch": 0.2980814014633633,
"grad_norm": 0.0654296875,
"learning_rate": 0.00017700277315536305,
"loss": 1.3558,
"step": 465
},
{
"epoch": 0.30128657782318446,
"grad_norm": 0.052978515625,
"learning_rate": 0.00017628364664072218,
"loss": 1.3534,
"step": 470
},
{
"epoch": 0.30449175418300556,
"grad_norm": 0.04248046875,
"learning_rate": 0.00017555495806923635,
"loss": 1.3525,
"step": 475
},
{
"epoch": 0.30769693054282665,
"grad_norm": 0.044189453125,
"learning_rate": 0.00017481679878107926,
"loss": 1.3715,
"step": 480
},
{
"epoch": 0.3109021069026478,
"grad_norm": 0.058837890625,
"learning_rate": 0.00017406926130356692,
"loss": 1.3689,
"step": 485
},
{
"epoch": 0.3141072832624689,
"grad_norm": 0.095703125,
"learning_rate": 0.00017331243933955918,
"loss": 1.3686,
"step": 490
},
{
"epoch": 0.31731245962229,
"grad_norm": 0.059326171875,
"learning_rate": 0.00017254642775571438,
"loss": 1.3784,
"step": 495
},
{
"epoch": 0.3205176359821111,
"grad_norm": 0.07373046875,
"learning_rate": 0.00017177132257059787,
"loss": 1.3488,
"step": 500
},
{
"epoch": 0.32372281234193223,
"grad_norm": 0.0439453125,
"learning_rate": 0.00017098722094264617,
"loss": 1.3789,
"step": 505
},
{
"epoch": 0.3269279887017533,
"grad_norm": 0.052490234375,
"learning_rate": 0.00017019422115798833,
"loss": 1.3414,
"step": 510
},
{
"epoch": 0.3301331650615744,
"grad_norm": 0.0458984375,
"learning_rate": 0.0001693924226181259,
"loss": 1.3667,
"step": 515
},
{
"epoch": 0.33333834142139557,
"grad_norm": 0.05322265625,
"learning_rate": 0.00016858192582747304,
"loss": 1.3749,
"step": 520
},
{
"epoch": 0.33654351778121666,
"grad_norm": 0.0634765625,
"learning_rate": 0.00016776283238075851,
"loss": 1.3929,
"step": 525
},
{
"epoch": 0.33974869414103775,
"grad_norm": 0.050537109375,
"learning_rate": 0.00016693524495029068,
"loss": 1.3527,
"step": 530
},
{
"epoch": 0.3429538705008589,
"grad_norm": 0.059814453125,
"learning_rate": 0.00016609926727308806,
"loss": 1.3577,
"step": 535
},
{
"epoch": 0.34615904686068,
"grad_norm": 0.07861328125,
"learning_rate": 0.00016525500413787554,
"loss": 1.3639,
"step": 540
},
{
"epoch": 0.3493642232205011,
"grad_norm": 0.0595703125,
"learning_rate": 0.00016440256137194965,
"loss": 1.3608,
"step": 545
},
{
"epoch": 0.35256939958032224,
"grad_norm": 0.052978515625,
"learning_rate": 0.0001635420458279131,
"loss": 1.3324,
"step": 550
},
{
"epoch": 0.35577457594014333,
"grad_norm": 0.062255859375,
"learning_rate": 0.0001626735653702809,
"loss": 1.3283,
"step": 555
},
{
"epoch": 0.3589797522999644,
"grad_norm": 0.04931640625,
"learning_rate": 0.00016179722886195967,
"loss": 1.3287,
"step": 560
},
{
"epoch": 0.3621849286597856,
"grad_norm": 0.0703125,
"learning_rate": 0.00016091314615060195,
"loss": 1.3799,
"step": 565
},
{
"epoch": 0.36539010501960667,
"grad_norm": 0.051025390625,
"learning_rate": 0.00016002142805483685,
"loss": 1.3399,
"step": 570
},
{
"epoch": 0.36859528137942776,
"grad_norm": 0.05908203125,
"learning_rate": 0.00015912218635037896,
"loss": 1.3698,
"step": 575
},
{
"epoch": 0.3718004577392489,
"grad_norm": 0.05078125,
"learning_rate": 0.0001582155337560177,
"loss": 1.3378,
"step": 580
},
{
"epoch": 0.37500563409907,
"grad_norm": 0.051025390625,
"learning_rate": 0.00015730158391948784,
"loss": 1.337,
"step": 585
},
{
"epoch": 0.3782108104588911,
"grad_norm": 0.0498046875,
"learning_rate": 0.0001563804514032242,
"loss": 1.3527,
"step": 590
},
{
"epoch": 0.3814159868187122,
"grad_norm": 0.052734375,
"learning_rate": 0.0001554522516700011,
"loss": 1.3583,
"step": 595
},
{
"epoch": 0.38462116317853334,
"grad_norm": 0.06201171875,
"learning_rate": 0.00015451710106845955,
"loss": 1.3421,
"step": 600
},
{
"epoch": 0.38782633953835444,
"grad_norm": 0.050537109375,
"learning_rate": 0.0001535751168185228,
"loss": 1.3577,
"step": 605
},
{
"epoch": 0.39103151589817553,
"grad_norm": 0.05517578125,
"learning_rate": 0.00015262641699670328,
"loss": 1.3706,
"step": 610
},
{
"epoch": 0.3942366922579967,
"grad_norm": 0.054931640625,
"learning_rate": 0.0001516711205213016,
"loss": 1.3439,
"step": 615
},
{
"epoch": 0.3974418686178178,
"grad_norm": 0.0478515625,
"learning_rate": 0.00015070934713750042,
"loss": 1.3353,
"step": 620
},
{
"epoch": 0.40064704497763887,
"grad_norm": 0.048583984375,
"learning_rate": 0.00014974121740235456,
"loss": 1.3489,
"step": 625
},
{
"epoch": 0.40385222133746,
"grad_norm": 0.057373046875,
"learning_rate": 0.00014876685266967924,
"loss": 1.3481,
"step": 630
},
{
"epoch": 0.4070573976972811,
"grad_norm": 0.053466796875,
"learning_rate": 0.00014778637507483866,
"loss": 1.3533,
"step": 635
},
{
"epoch": 0.4102625740571022,
"grad_norm": 0.06494140625,
"learning_rate": 0.0001467999075194363,
"loss": 1.3522,
"step": 640
},
{
"epoch": 0.41346775041692335,
"grad_norm": 0.06689453125,
"learning_rate": 0.00014580757365590963,
"loss": 1.3712,
"step": 645
},
{
"epoch": 0.41667292677674445,
"grad_norm": 0.053955078125,
"learning_rate": 0.00014480949787203014,
"loss": 1.3606,
"step": 650
},
{
"epoch": 0.41987810313656554,
"grad_norm": 0.046875,
"learning_rate": 0.0001438058052753118,
"loss": 1.3488,
"step": 655
},
{
"epoch": 0.4230832794963867,
"grad_norm": 0.058837890625,
"learning_rate": 0.00014279662167732867,
"loss": 1.342,
"step": 660
},
{
"epoch": 0.4262884558562078,
"grad_norm": 0.07080078125,
"learning_rate": 0.00014178207357794486,
"loss": 1.3712,
"step": 665
},
{
"epoch": 0.4294936322160289,
"grad_norm": 0.05029296875,
"learning_rate": 0.00014076228814945778,
"loss": 1.3227,
"step": 670
},
{
"epoch": 0.43269880857585,
"grad_norm": 0.06982421875,
"learning_rate": 0.00013973739322065728,
"loss": 1.3201,
"step": 675
},
{
"epoch": 0.4359039849356711,
"grad_norm": 0.05029296875,
"learning_rate": 0.00013870751726080256,
"loss": 1.3406,
"step": 680
},
{
"epoch": 0.4391091612954922,
"grad_norm": 0.06201171875,
"learning_rate": 0.00013767278936351854,
"loss": 1.3636,
"step": 685
},
{
"epoch": 0.4423143376553133,
"grad_norm": 0.0458984375,
"learning_rate": 0.0001366333392306143,
"loss": 1.3576,
"step": 690
},
{
"epoch": 0.44551951401513445,
"grad_norm": 0.06005859375,
"learning_rate": 0.00013558929715582515,
"loss": 1.3517,
"step": 695
},
{
"epoch": 0.44872469037495555,
"grad_norm": 0.05126953125,
"learning_rate": 0.00013454079400848027,
"loss": 1.3376,
"step": 700
},
{
"epoch": 0.45192986673477664,
"grad_norm": 0.059326171875,
"learning_rate": 0.00013348796121709862,
"loss": 1.3633,
"step": 705
},
{
"epoch": 0.4551350430945978,
"grad_norm": 0.05078125,
"learning_rate": 0.00013243093075291444,
"loss": 1.3217,
"step": 710
},
{
"epoch": 0.4583402194544189,
"grad_norm": 0.056884765625,
"learning_rate": 0.00013136983511333482,
"loss": 1.3265,
"step": 715
},
{
"epoch": 0.46154539581424,
"grad_norm": 0.05859375,
"learning_rate": 0.00013030480730533145,
"loss": 1.3451,
"step": 720
},
{
"epoch": 0.4647505721740611,
"grad_norm": 0.054443359375,
"learning_rate": 0.00012923598082876812,
"loss": 1.376,
"step": 725
},
{
"epoch": 0.4679557485338822,
"grad_norm": 0.058349609375,
"learning_rate": 0.0001281634896596669,
"loss": 1.3524,
"step": 730
},
{
"epoch": 0.4711609248937033,
"grad_norm": 0.0634765625,
"learning_rate": 0.00012708746823341446,
"loss": 1.3599,
"step": 735
},
{
"epoch": 0.47436610125352446,
"grad_norm": 0.053466796875,
"learning_rate": 0.00012600805142791042,
"loss": 1.3416,
"step": 740
},
{
"epoch": 0.47757127761334556,
"grad_norm": 0.055419921875,
"learning_rate": 0.000124925374546661,
"loss": 1.3574,
"step": 745
},
{
"epoch": 0.48077645397316665,
"grad_norm": 0.052978515625,
"learning_rate": 0.0001238395733018187,
"loss": 1.3574,
"step": 750
},
{
"epoch": 0.4839816303329878,
"grad_norm": 0.053466796875,
"learning_rate": 0.00012275078379717089,
"loss": 1.3341,
"step": 755
},
{
"epoch": 0.4871868066928089,
"grad_norm": 0.0556640625,
"learning_rate": 0.00012165914251107952,
"loss": 1.3241,
"step": 760
},
{
"epoch": 0.49039198305263,
"grad_norm": 0.054443359375,
"learning_rate": 0.00012056478627937365,
"loss": 1.3788,
"step": 765
},
{
"epoch": 0.4935971594124511,
"grad_norm": 0.049560546875,
"learning_rate": 0.00011946785227819726,
"loss": 1.3581,
"step": 770
},
{
"epoch": 0.49680233577227223,
"grad_norm": 0.05615234375,
"learning_rate": 0.00011836847800681443,
"loss": 1.3328,
"step": 775
},
{
"epoch": 0.5000075121320934,
"grad_norm": 0.0556640625,
"learning_rate": 0.00011726680127037401,
"loss": 1.3533,
"step": 780
},
{
"epoch": 0.5032126884919145,
"grad_norm": 0.05419921875,
"learning_rate": 0.00011616296016263582,
"loss": 1.3622,
"step": 785
},
{
"epoch": 0.5064178648517356,
"grad_norm": 0.049072265625,
"learning_rate": 0.00011505709304866084,
"loss": 1.3446,
"step": 790
},
{
"epoch": 0.5096230412115567,
"grad_norm": 0.0712890625,
"learning_rate": 0.00011394933854746733,
"loss": 1.3384,
"step": 795
},
{
"epoch": 0.5128282175713778,
"grad_norm": 0.055908203125,
"learning_rate": 0.00011283983551465511,
"loss": 1.3378,
"step": 800
},
{
"epoch": 0.5160333939311988,
"grad_norm": 0.060791015625,
"learning_rate": 0.00011172872302500017,
"loss": 1.3656,
"step": 805
},
{
"epoch": 0.51923857029102,
"grad_norm": 0.0791015625,
"learning_rate": 0.00011061614035502193,
"loss": 1.3521,
"step": 810
},
{
"epoch": 0.5224437466508411,
"grad_norm": 0.05859375,
"learning_rate": 0.00010950222696552486,
"loss": 1.3614,
"step": 815
},
{
"epoch": 0.5256489230106622,
"grad_norm": 0.08203125,
"learning_rate": 0.00010838712248411753,
"loss": 1.3314,
"step": 820
},
{
"epoch": 0.5288540993704833,
"grad_norm": 0.05322265625,
"learning_rate": 0.00010727096668771036,
"loss": 1.338,
"step": 825
},
{
"epoch": 0.5320592757303044,
"grad_norm": 0.0556640625,
"learning_rate": 0.0001061538994849946,
"loss": 1.3611,
"step": 830
},
{
"epoch": 0.5352644520901255,
"grad_norm": 0.06201171875,
"learning_rate": 0.00010503606089890529,
"loss": 1.3175,
"step": 835
},
{
"epoch": 0.5384696284499466,
"grad_norm": 0.05712890625,
"learning_rate": 0.00010391759104906928,
"loss": 1.3525,
"step": 840
},
{
"epoch": 0.5416748048097678,
"grad_norm": 0.0498046875,
"learning_rate": 0.00010279863013424154,
"loss": 1.3313,
"step": 845
},
{
"epoch": 0.5448799811695889,
"grad_norm": 0.051025390625,
"learning_rate": 0.00010167931841473142,
"loss": 1.3349,
"step": 850
},
{
"epoch": 0.54808515752941,
"grad_norm": 0.06298828125,
"learning_rate": 0.00010055979619482112,
"loss": 1.3408,
"step": 855
},
{
"epoch": 0.5512903338892311,
"grad_norm": 0.058837890625,
"learning_rate": 9.944020380517889e-05,
"loss": 1.3175,
"step": 860
},
{
"epoch": 0.5544955102490522,
"grad_norm": 0.050048828125,
"learning_rate": 9.832068158526862e-05,
"loss": 1.3375,
"step": 865
},
{
"epoch": 0.5577006866088733,
"grad_norm": 0.0498046875,
"learning_rate": 9.720136986575848e-05,
"loss": 1.3475,
"step": 870
},
{
"epoch": 0.5609058629686945,
"grad_norm": 0.051513671875,
"learning_rate": 9.608240895093076e-05,
"loss": 1.3295,
"step": 875
},
{
"epoch": 0.5641110393285156,
"grad_norm": 0.046142578125,
"learning_rate": 9.496393910109472e-05,
"loss": 1.3429,
"step": 880
},
{
"epoch": 0.5673162156883367,
"grad_norm": 0.04443359375,
"learning_rate": 9.384610051500545e-05,
"loss": 1.3293,
"step": 885
},
{
"epoch": 0.5705213920481578,
"grad_norm": 0.052734375,
"learning_rate": 9.272903331228968e-05,
"loss": 1.3498,
"step": 890
},
{
"epoch": 0.5737265684079789,
"grad_norm": 0.062255859375,
"learning_rate": 9.161287751588248e-05,
"loss": 1.3351,
"step": 895
},
{
"epoch": 0.5769317447678,
"grad_norm": 0.064453125,
"learning_rate": 9.049777303447516e-05,
"loss": 1.353,
"step": 900
},
{
"epoch": 0.5801369211276212,
"grad_norm": 0.0556640625,
"learning_rate": 8.938385964497808e-05,
"loss": 1.3363,
"step": 905
},
{
"epoch": 0.5833420974874423,
"grad_norm": 0.06201171875,
"learning_rate": 8.827127697499984e-05,
"loss": 1.3696,
"step": 910
},
{
"epoch": 0.5865472738472634,
"grad_norm": 0.080078125,
"learning_rate": 8.71601644853449e-05,
"loss": 1.3481,
"step": 915
},
{
"epoch": 0.5897524502070844,
"grad_norm": 0.06884765625,
"learning_rate": 8.605066145253268e-05,
"loss": 1.3256,
"step": 920
},
{
"epoch": 0.5929576265669055,
"grad_norm": 0.051513671875,
"learning_rate": 8.494290695133917e-05,
"loss": 1.3544,
"step": 925
},
{
"epoch": 0.5961628029267266,
"grad_norm": 0.05810546875,
"learning_rate": 8.383703983736419e-05,
"loss": 1.3443,
"step": 930
},
{
"epoch": 0.5993679792865477,
"grad_norm": 0.06103515625,
"learning_rate": 8.2733198729626e-05,
"loss": 1.3816,
"step": 935
},
{
"epoch": 0.6025731556463689,
"grad_norm": 0.046142578125,
"learning_rate": 8.163152199318558e-05,
"loss": 1.3247,
"step": 940
},
{
"epoch": 0.60577833200619,
"grad_norm": 0.053466796875,
"learning_rate": 8.053214772180277e-05,
"loss": 1.3532,
"step": 945
},
{
"epoch": 0.6089835083660111,
"grad_norm": 0.05419921875,
"learning_rate": 7.94352137206264e-05,
"loss": 1.3443,
"step": 950
},
{
"epoch": 0.6121886847258322,
"grad_norm": 0.047119140625,
"learning_rate": 7.83408574889205e-05,
"loss": 1.3327,
"step": 955
},
{
"epoch": 0.6153938610856533,
"grad_norm": 0.0537109375,
"learning_rate": 7.724921620282916e-05,
"loss": 1.334,
"step": 960
},
{
"epoch": 0.6185990374454744,
"grad_norm": 0.0703125,
"learning_rate": 7.616042669818133e-05,
"loss": 1.3572,
"step": 965
},
{
"epoch": 0.6218042138052956,
"grad_norm": 0.055419921875,
"learning_rate": 7.507462545333903e-05,
"loss": 1.3322,
"step": 970
},
{
"epoch": 0.6250093901651167,
"grad_norm": 0.07958984375,
"learning_rate": 7.399194857208961e-05,
"loss": 1.3222,
"step": 975
},
{
"epoch": 0.6282145665249378,
"grad_norm": 0.05078125,
"learning_rate": 7.291253176658561e-05,
"loss": 1.3375,
"step": 980
},
{
"epoch": 0.6314197428847589,
"grad_norm": 0.08251953125,
"learning_rate": 7.183651034033313e-05,
"loss": 1.3397,
"step": 985
},
{
"epoch": 0.63462491924458,
"grad_norm": 0.04931640625,
"learning_rate": 7.07640191712319e-05,
"loss": 1.34,
"step": 990
},
{
"epoch": 0.6378300956044011,
"grad_norm": 0.049072265625,
"learning_rate": 6.969519269466857e-05,
"loss": 1.3344,
"step": 995
},
{
"epoch": 0.6410352719642222,
"grad_norm": 0.052490234375,
"learning_rate": 6.863016488666517e-05,
"loss": 1.3475,
"step": 1000
},
{
"epoch": 0.6442404483240434,
"grad_norm": 0.04736328125,
"learning_rate": 6.756906924708558e-05,
"loss": 1.3317,
"step": 1005
},
{
"epoch": 0.6474456246838645,
"grad_norm": 0.050537109375,
"learning_rate": 6.651203878290139e-05,
"loss": 1.3243,
"step": 1010
},
{
"epoch": 0.6506508010436856,
"grad_norm": 0.053955078125,
"learning_rate": 6.545920599151975e-05,
"loss": 1.3351,
"step": 1015
},
{
"epoch": 0.6538559774035066,
"grad_norm": 0.058837890625,
"learning_rate": 6.441070284417487e-05,
"loss": 1.3536,
"step": 1020
},
{
"epoch": 0.6570611537633277,
"grad_norm": 0.060791015625,
"learning_rate": 6.336666076938572e-05,
"loss": 1.3064,
"step": 1025
},
{
"epoch": 0.6602663301231488,
"grad_norm": 0.056396484375,
"learning_rate": 6.232721063648148e-05,
"loss": 1.3496,
"step": 1030
},
{
"epoch": 0.66347150648297,
"grad_norm": 0.0478515625,
"learning_rate": 6.12924827391975e-05,
"loss": 1.3487,
"step": 1035
},
{
"epoch": 0.6666766828427911,
"grad_norm": 0.05126953125,
"learning_rate": 6.026260677934272e-05,
"loss": 1.3241,
"step": 1040
},
{
"epoch": 0.6698818592026122,
"grad_norm": 0.0478515625,
"learning_rate": 5.9237711850542246e-05,
"loss": 1.3454,
"step": 1045
},
{
"epoch": 0.6730870355624333,
"grad_norm": 0.046142578125,
"learning_rate": 5.8217926422055126e-05,
"loss": 1.3364,
"step": 1050
},
{
"epoch": 0.6762922119222544,
"grad_norm": 0.054443359375,
"learning_rate": 5.7203378322671355e-05,
"loss": 1.3152,
"step": 1055
},
{
"epoch": 0.6794973882820755,
"grad_norm": 0.0546875,
"learning_rate": 5.619419472468823e-05,
"loss": 1.3486,
"step": 1060
},
{
"epoch": 0.6827025646418967,
"grad_norm": 0.05029296875,
"learning_rate": 5.519050212796986e-05,
"loss": 1.3301,
"step": 1065
},
{
"epoch": 0.6859077410017178,
"grad_norm": 0.051513671875,
"learning_rate": 5.419242634409039e-05,
"loss": 1.3279,
"step": 1070
},
{
"epoch": 0.6891129173615389,
"grad_norm": 0.0478515625,
"learning_rate": 5.32000924805637e-05,
"loss": 1.3415,
"step": 1075
},
{
"epoch": 0.69231809372136,
"grad_norm": 0.04638671875,
"learning_rate": 5.2213624925161386e-05,
"loss": 1.3449,
"step": 1080
},
{
"epoch": 0.6955232700811811,
"grad_norm": 0.04541015625,
"learning_rate": 5.123314733032074e-05,
"loss": 1.3442,
"step": 1085
},
{
"epoch": 0.6987284464410022,
"grad_norm": 0.04736328125,
"learning_rate": 5.0258782597645446e-05,
"loss": 1.3309,
"step": 1090
},
{
"epoch": 0.7019336228008233,
"grad_norm": 0.0478515625,
"learning_rate": 4.929065286249959e-05,
"loss": 1.3564,
"step": 1095
},
{
"epoch": 0.7051387991606445,
"grad_norm": 0.048095703125,
"learning_rate": 4.832887947869841e-05,
"loss": 1.3578,
"step": 1100
},
{
"epoch": 0.7083439755204656,
"grad_norm": 0.047119140625,
"learning_rate": 4.737358300329673e-05,
"loss": 1.3417,
"step": 1105
},
{
"epoch": 0.7115491518802867,
"grad_norm": 0.05029296875,
"learning_rate": 4.642488318147723e-05,
"loss": 1.3259,
"step": 1110
},
{
"epoch": 0.7147543282401078,
"grad_norm": 0.052001953125,
"learning_rate": 4.548289893154051e-05,
"loss": 1.3568,
"step": 1115
},
{
"epoch": 0.7179595045999289,
"grad_norm": 0.047607421875,
"learning_rate": 4.4547748329998925e-05,
"loss": 1.3211,
"step": 1120
},
{
"epoch": 0.72116468095975,
"grad_norm": 0.05126953125,
"learning_rate": 4.361954859677584e-05,
"loss": 1.3398,
"step": 1125
},
{
"epoch": 0.7243698573195712,
"grad_norm": 0.048095703125,
"learning_rate": 4.2698416080512204e-05,
"loss": 1.3266,
"step": 1130
},
{
"epoch": 0.7275750336793922,
"grad_norm": 0.050048828125,
"learning_rate": 4.1784466243982324e-05,
"loss": 1.3447,
"step": 1135
},
{
"epoch": 0.7307802100392133,
"grad_norm": 0.052001953125,
"learning_rate": 4.0877813649621076e-05,
"loss": 1.3385,
"step": 1140
},
{
"epoch": 0.7339853863990344,
"grad_norm": 0.04638671875,
"learning_rate": 3.997857194516319e-05,
"loss": 1.3403,
"step": 1145
},
{
"epoch": 0.7371905627588555,
"grad_norm": 0.05078125,
"learning_rate": 3.9086853849398065e-05,
"loss": 1.3503,
"step": 1150
},
{
"epoch": 0.7403957391186766,
"grad_norm": 0.06396484375,
"learning_rate": 3.8202771138040336e-05,
"loss": 1.3354,
"step": 1155
},
{
"epoch": 0.7436009154784978,
"grad_norm": 0.05078125,
"learning_rate": 3.732643462971912e-05,
"loss": 1.3258,
"step": 1160
},
{
"epoch": 0.7468060918383189,
"grad_norm": 0.049560546875,
"learning_rate": 3.6457954172086896e-05,
"loss": 1.3493,
"step": 1165
},
{
"epoch": 0.75001126819814,
"grad_norm": 0.046875,
"learning_rate": 3.559743862805034e-05,
"loss": 1.3275,
"step": 1170
},
{
"epoch": 0.7532164445579611,
"grad_norm": 0.045654296875,
"learning_rate": 3.47449958621245e-05,
"loss": 1.3148,
"step": 1175
},
{
"epoch": 0.7564216209177822,
"grad_norm": 0.051513671875,
"learning_rate": 3.390073272691198e-05,
"loss": 1.3338,
"step": 1180
},
{
"epoch": 0.7596267972776033,
"grad_norm": 0.049072265625,
"learning_rate": 3.306475504970931e-05,
"loss": 1.2935,
"step": 1185
},
{
"epoch": 0.7628319736374244,
"grad_norm": 0.04833984375,
"learning_rate": 3.2237167619241495e-05,
"loss": 1.3275,
"step": 1190
},
{
"epoch": 0.7660371499972456,
"grad_norm": 0.056396484375,
"learning_rate": 3.141807417252697e-05,
"loss": 1.3461,
"step": 1195
},
{
"epoch": 0.7692423263570667,
"grad_norm": 0.04345703125,
"learning_rate": 3.060757738187409e-05,
"loss": 1.3394,
"step": 1200
},
{
"epoch": 0.7724475027168878,
"grad_norm": 0.053955078125,
"learning_rate": 2.980577884201169e-05,
"loss": 1.3511,
"step": 1205
},
{
"epoch": 0.7756526790767089,
"grad_norm": 0.04736328125,
"learning_rate": 2.9012779057353855e-05,
"loss": 1.3213,
"step": 1210
},
{
"epoch": 0.77885785543653,
"grad_norm": 0.0576171875,
"learning_rate": 2.822867742940214e-05,
"loss": 1.3384,
"step": 1215
},
{
"epoch": 0.7820630317963511,
"grad_norm": 0.04833984375,
"learning_rate": 2.745357224428563e-05,
"loss": 1.343,
"step": 1220
},
{
"epoch": 0.7852682081561723,
"grad_norm": 0.049560546875,
"learning_rate": 2.6687560660440858e-05,
"loss": 1.3541,
"step": 1225
},
{
"epoch": 0.7884733845159934,
"grad_norm": 0.047607421875,
"learning_rate": 2.593073869643312e-05,
"loss": 1.3491,
"step": 1230
},
{
"epoch": 0.7916785608758145,
"grad_norm": 0.04248046875,
"learning_rate": 2.518320121892076e-05,
"loss": 1.3439,
"step": 1235
},
{
"epoch": 0.7948837372356355,
"grad_norm": 0.04736328125,
"learning_rate": 2.4445041930763678e-05,
"loss": 1.3236,
"step": 1240
},
{
"epoch": 0.7980889135954566,
"grad_norm": 0.0478515625,
"learning_rate": 2.371635335927781e-05,
"loss": 1.3505,
"step": 1245
},
{
"epoch": 0.8012940899552777,
"grad_norm": 0.0517578125,
"learning_rate": 2.2997226844636977e-05,
"loss": 1.3223,
"step": 1250
},
{
"epoch": 0.8044992663150989,
"grad_norm": 0.046630859375,
"learning_rate": 2.2287752528423468e-05,
"loss": 1.3282,
"step": 1255
},
{
"epoch": 0.80770444267492,
"grad_norm": 0.046875,
"learning_rate": 2.1588019342328968e-05,
"loss": 1.3294,
"step": 1260
},
{
"epoch": 0.8109096190347411,
"grad_norm": 0.0439453125,
"learning_rate": 2.089811499700699e-05,
"loss": 1.3356,
"step": 1265
},
{
"epoch": 0.8141147953945622,
"grad_norm": 0.045654296875,
"learning_rate": 2.021812597107855e-05,
"loss": 1.3486,
"step": 1270
},
{
"epoch": 0.8173199717543833,
"grad_norm": 0.04931640625,
"learning_rate": 1.954813750029216e-05,
"loss": 1.3492,
"step": 1275
},
{
"epoch": 0.8205251481142044,
"grad_norm": 0.05126953125,
"learning_rate": 1.8888233566839653e-05,
"loss": 1.329,
"step": 1280
},
{
"epoch": 0.8237303244740255,
"grad_norm": 0.048095703125,
"learning_rate": 1.8238496888828982e-05,
"loss": 1.317,
"step": 1285
},
{
"epoch": 0.8269355008338467,
"grad_norm": 0.051513671875,
"learning_rate": 1.759900890991589e-05,
"loss": 1.3177,
"step": 1290
},
{
"epoch": 0.8301406771936678,
"grad_norm": 0.0458984375,
"learning_rate": 1.696984978909476e-05,
"loss": 1.323,
"step": 1295
},
{
"epoch": 0.8333458535534889,
"grad_norm": 0.0439453125,
"learning_rate": 1.6351098390650966e-05,
"loss": 1.3517,
"step": 1300
},
{
"epoch": 0.83655102991331,
"grad_norm": 0.052978515625,
"learning_rate": 1.5742832274275288e-05,
"loss": 1.35,
"step": 1305
},
{
"epoch": 0.8397562062731311,
"grad_norm": 0.049072265625,
"learning_rate": 1.514512768534193e-05,
"loss": 1.3614,
"step": 1310
},
{
"epoch": 0.8429613826329522,
"grad_norm": 0.0439453125,
"learning_rate": 1.4558059545351143e-05,
"loss": 1.3389,
"step": 1315
},
{
"epoch": 0.8461665589927734,
"grad_norm": 0.04541015625,
"learning_rate": 1.3981701442538153e-05,
"loss": 1.3272,
"step": 1320
},
{
"epoch": 0.8493717353525945,
"grad_norm": 0.048583984375,
"learning_rate": 1.3416125622648668e-05,
"loss": 1.3324,
"step": 1325
},
{
"epoch": 0.8525769117124156,
"grad_norm": 0.04541015625,
"learning_rate": 1.286140297988323e-05,
"loss": 1.3352,
"step": 1330
},
{
"epoch": 0.8557820880722367,
"grad_norm": 0.04443359375,
"learning_rate": 1.231760304801054e-05,
"loss": 1.3361,
"step": 1335
},
{
"epoch": 0.8589872644320578,
"grad_norm": 0.047119140625,
"learning_rate": 1.1784793991651621e-05,
"loss": 1.3252,
"step": 1340
},
{
"epoch": 0.8621924407918788,
"grad_norm": 0.044189453125,
"learning_rate": 1.1263042597735362e-05,
"loss": 1.3468,
"step": 1345
},
{
"epoch": 0.8653976171517,
"grad_norm": 0.046630859375,
"learning_rate": 1.0752414267126875e-05,
"loss": 1.3301,
"step": 1350
},
{
"epoch": 0.8686027935115211,
"grad_norm": 0.05029296875,
"learning_rate": 1.0252973006429733e-05,
"loss": 1.36,
"step": 1355
},
{
"epoch": 0.8718079698713422,
"grad_norm": 0.047119140625,
"learning_rate": 9.764781419962577e-06,
"loss": 1.3482,
"step": 1360
},
{
"epoch": 0.8750131462311633,
"grad_norm": 0.04638671875,
"learning_rate": 9.287900701911944e-06,
"loss": 1.3232,
"step": 1365
},
{
"epoch": 0.8782183225909844,
"grad_norm": 0.04931640625,
"learning_rate": 8.822390628661582e-06,
"loss": 1.3571,
"step": 1370
},
{
"epoch": 0.8814234989508055,
"grad_norm": 0.044921875,
"learning_rate": 8.368309551299536e-06,
"loss": 1.3274,
"step": 1375
},
{
"epoch": 0.8846286753106266,
"grad_norm": 0.04541015625,
"learning_rate": 7.92571438830394e-06,
"loss": 1.3656,
"step": 1380
},
{
"epoch": 0.8878338516704478,
"grad_norm": 0.046142578125,
"learning_rate": 7.494660618408378e-06,
"loss": 1.3659,
"step": 1385
},
{
"epoch": 0.8910390280302689,
"grad_norm": 0.04541015625,
"learning_rate": 7.075202273647652e-06,
"loss": 1.3305,
"step": 1390
},
{
"epoch": 0.89424420439009,
"grad_norm": 0.046875,
"learning_rate": 6.667391932584999e-06,
"loss": 1.36,
"step": 1395
},
{
"epoch": 0.8974493807499111,
"grad_norm": 0.0458984375,
"learning_rate": 6.271280713721317e-06,
"loss": 1.3382,
"step": 1400
},
{
"epoch": 0.9006545571097322,
"grad_norm": 0.04638671875,
"learning_rate": 5.886918269087716e-06,
"loss": 1.326,
"step": 1405
},
{
"epoch": 0.9038597334695533,
"grad_norm": 0.046875,
"learning_rate": 5.514352778021492e-06,
"loss": 1.3602,
"step": 1410
},
{
"epoch": 0.9070649098293745,
"grad_norm": 0.046142578125,
"learning_rate": 5.153630941127063e-06,
"loss": 1.3407,
"step": 1415
},
{
"epoch": 0.9102700861891956,
"grad_norm": 0.046875,
"learning_rate": 4.804797974422026e-06,
"loss": 1.3241,
"step": 1420
},
{
"epoch": 0.9134752625490167,
"grad_norm": 0.050537109375,
"learning_rate": 4.4678976036694355e-06,
"loss": 1.3324,
"step": 1425
},
{
"epoch": 0.9166804389088378,
"grad_norm": 0.04443359375,
"learning_rate": 4.142972058896811e-06,
"loss": 1.3267,
"step": 1430
},
{
"epoch": 0.9198856152686589,
"grad_norm": 0.044921875,
"learning_rate": 3.830062069102602e-06,
"loss": 1.3496,
"step": 1435
},
{
"epoch": 0.92309079162848,
"grad_norm": 0.046630859375,
"learning_rate": 3.529206857151035e-06,
"loss": 1.3481,
"step": 1440
},
{
"epoch": 0.9262959679883012,
"grad_norm": 0.04345703125,
"learning_rate": 3.240444134855347e-06,
"loss": 1.3433,
"step": 1445
},
{
"epoch": 0.9295011443481223,
"grad_norm": 0.045654296875,
"learning_rate": 2.963810098250841e-06,
"loss": 1.3555,
"step": 1450
},
{
"epoch": 0.9327063207079433,
"grad_norm": 0.044921875,
"learning_rate": 2.6993394230576674e-06,
"loss": 1.3218,
"step": 1455
},
{
"epoch": 0.9359114970677644,
"grad_norm": 0.04638671875,
"learning_rate": 2.4470652603343023e-06,
"loss": 1.346,
"step": 1460
},
{
"epoch": 0.9391166734275855,
"grad_norm": 0.044677734375,
"learning_rate": 2.2070192323220607e-06,
"loss": 1.3551,
"step": 1465
},
{
"epoch": 0.9423218497874066,
"grad_norm": 0.0439453125,
"learning_rate": 1.9792314284813986e-06,
"loss": 1.3262,
"step": 1470
},
{
"epoch": 0.9455270261472277,
"grad_norm": 0.04736328125,
"learning_rate": 1.763730401720065e-06,
"loss": 1.3257,
"step": 1475
},
{
"epoch": 0.9487322025070489,
"grad_norm": 0.046142578125,
"learning_rate": 1.5605431648141878e-06,
"loss": 1.3158,
"step": 1480
},
{
"epoch": 0.95193737886687,
"grad_norm": 0.044677734375,
"learning_rate": 1.3696951870222018e-06,
"loss": 1.3637,
"step": 1485
},
{
"epoch": 0.9551425552266911,
"grad_norm": 0.053466796875,
"learning_rate": 1.1912103908922945e-06,
"loss": 1.3337,
"step": 1490
},
{
"epoch": 0.9583477315865122,
"grad_norm": 0.050048828125,
"learning_rate": 1.0251111492637244e-06,
"loss": 1.3557,
"step": 1495
},
{
"epoch": 0.9615529079463333,
"grad_norm": 0.05126953125,
"learning_rate": 8.714182824624883e-07,
"loss": 1.3373,
"step": 1500
},
{
"epoch": 0.9647580843061544,
"grad_norm": 0.0458984375,
"learning_rate": 7.301510556914859e-07,
"loss": 1.3274,
"step": 1505
},
{
"epoch": 0.9679632606659756,
"grad_norm": 0.05224609375,
"learning_rate": 6.01327176615607e-07,
"loss": 1.3894,
"step": 1510
},
{
"epoch": 0.9711684370257967,
"grad_norm": 0.045166015625,
"learning_rate": 4.84962793142163e-07,
"loss": 1.3419,
"step": 1515
},
{
"epoch": 0.9743736133856178,
"grad_norm": 0.044677734375,
"learning_rate": 3.8107249139672783e-07,
"loss": 1.3321,
"step": 1520
},
{
"epoch": 0.9775787897454389,
"grad_norm": 0.0478515625,
"learning_rate": 2.89669293894812e-07,
"loss": 1.3497,
"step": 1525
},
{
"epoch": 0.98078396610526,
"grad_norm": 0.049072265625,
"learning_rate": 2.1076465790946798e-07,
"loss": 1.3518,
"step": 1530
},
{
"epoch": 0.9839891424650811,
"grad_norm": 0.04638671875,
"learning_rate": 1.443684740351947e-07,
"loss": 1.3224,
"step": 1535
},
{
"epoch": 0.9871943188249022,
"grad_norm": 0.049072265625,
"learning_rate": 9.048906494811826e-08,
"loss": 1.3513,
"step": 1540
},
{
"epoch": 0.9903994951847234,
"grad_norm": 0.050048828125,
"learning_rate": 4.9133184362748497e-08,
"loss": 1.3494,
"step": 1545
},
{
"epoch": 0.9936046715445445,
"grad_norm": 0.04443359375,
"learning_rate": 2.0306016185456244e-08,
"loss": 1.3344,
"step": 1550
},
{
"epoch": 0.9968098479043656,
"grad_norm": 0.047119140625,
"learning_rate": 4.011173864637563e-09,
"loss": 1.3662,
"step": 1555
},
{
"epoch": 0.9993739889922224,
"eval_loss": 1.4191993474960327,
"eval_runtime": 1938.5869,
"eval_samples_per_second": 7.3,
"eval_steps_per_second": 7.3,
"step": 1559
},
{
"epoch": 0.9993739889922224,
"step": 1559,
"total_flos": 3.232184148701479e+18,
"train_loss": 0.016414370117774753,
"train_runtime": 2971.8566,
"train_samples_per_second": 67.189,
"train_steps_per_second": 0.525
}
],
"logging_steps": 5,
"max_steps": 1559,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 20,
"total_flos": 3.232184148701479e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}