|
{ |
|
"best_metric": 10.3283109664917, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.16703786191536749, |
|
"eval_steps": 25, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011135857461024498, |
|
"grad_norm": 0.021220365539193153, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 10.3743, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011135857461024498, |
|
"eval_loss": 10.371726036071777, |
|
"eval_runtime": 0.0507, |
|
"eval_samples_per_second": 985.944, |
|
"eval_steps_per_second": 39.438, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0022271714922048997, |
|
"grad_norm": 0.02089685946702957, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 10.3748, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0033407572383073497, |
|
"grad_norm": 0.022434862330555916, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 10.3751, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004454342984409799, |
|
"grad_norm": 0.022230561822652817, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 10.3744, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005567928730512249, |
|
"grad_norm": 0.022404590621590614, |
|
"learning_rate": 0.00015, |
|
"loss": 10.3725, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0066815144766146995, |
|
"grad_norm": 0.023340720683336258, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 10.3725, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0077951002227171495, |
|
"grad_norm": 0.02542109414935112, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 10.3724, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008908685968819599, |
|
"grad_norm": 0.027613935992121696, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 10.3733, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01002227171492205, |
|
"grad_norm": 0.027721751481294632, |
|
"learning_rate": 0.00027, |
|
"loss": 10.3718, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.011135857461024499, |
|
"grad_norm": 0.031006429344415665, |
|
"learning_rate": 0.0003, |
|
"loss": 10.3713, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012249443207126948, |
|
"grad_norm": 0.03272189199924469, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 10.3694, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.013363028953229399, |
|
"grad_norm": 0.03870847821235657, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 10.3682, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.014476614699331848, |
|
"grad_norm": 0.026634380221366882, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 10.3716, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.015590200445434299, |
|
"grad_norm": 0.030647795647382736, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 10.3724, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01670378619153675, |
|
"grad_norm": 0.03259807825088501, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 10.3729, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.017817371937639197, |
|
"grad_norm": 0.03251082822680473, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 10.3723, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01893095768374165, |
|
"grad_norm": 0.03374587371945381, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 10.3709, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0200445434298441, |
|
"grad_norm": 0.03536859527230263, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 10.3695, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.021158129175946547, |
|
"grad_norm": 0.04094082862138748, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 10.3676, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.022271714922048998, |
|
"grad_norm": 0.04486091434955597, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 10.3681, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02338530066815145, |
|
"grad_norm": 0.04667678475379944, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 10.3666, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.024498886414253896, |
|
"grad_norm": 0.049042511731386185, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 10.3668, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.025612472160356347, |
|
"grad_norm": 0.05766143649816513, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 10.364, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.026726057906458798, |
|
"grad_norm": 0.063766248524189, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 10.362, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02783964365256125, |
|
"grad_norm": 0.06413163244724274, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 10.3616, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02783964365256125, |
|
"eval_loss": 10.362093925476074, |
|
"eval_runtime": 0.0468, |
|
"eval_samples_per_second": 1068.689, |
|
"eval_steps_per_second": 42.748, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.028953229398663696, |
|
"grad_norm": 0.05297641456127167, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 10.3688, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.030066815144766147, |
|
"grad_norm": 0.059603746980428696, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 10.3657, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.031180400890868598, |
|
"grad_norm": 0.05739077180624008, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 10.3648, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03229398663697105, |
|
"grad_norm": 0.06212243810296059, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 10.3629, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0334075723830735, |
|
"grad_norm": 0.0610235333442688, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 10.3626, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.034521158129175944, |
|
"grad_norm": 0.05962621048092842, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 10.3607, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.035634743875278395, |
|
"grad_norm": 0.06733676046133041, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 10.3592, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.036748329621380846, |
|
"grad_norm": 0.06607528775930405, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 10.3575, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0378619153674833, |
|
"grad_norm": 0.06120593473315239, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 10.3574, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03897550111358575, |
|
"grad_norm": 0.06261507421731949, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 10.3548, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0400890868596882, |
|
"grad_norm": 0.05739546939730644, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 10.3533, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04120267260579064, |
|
"grad_norm": 0.05693826824426651, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 10.3503, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.042316258351893093, |
|
"grad_norm": 0.05451498553156853, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 10.3581, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.043429844097995544, |
|
"grad_norm": 0.055710602551698685, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 10.3566, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.044543429844097995, |
|
"grad_norm": 0.05021951347589493, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 10.3548, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.045657015590200446, |
|
"grad_norm": 0.04118210822343826, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 10.3563, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0467706013363029, |
|
"grad_norm": 0.03563275188207626, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 10.3531, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04788418708240535, |
|
"grad_norm": 0.036302387714385986, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 10.3524, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04899777282850779, |
|
"grad_norm": 0.03217688947916031, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 10.3522, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05011135857461024, |
|
"grad_norm": 0.035605840384960175, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 10.3514, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.051224944320712694, |
|
"grad_norm": 0.03646039217710495, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 10.3483, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.052338530066815145, |
|
"grad_norm": 0.03975389897823334, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 10.3468, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.053452115812917596, |
|
"grad_norm": 0.04297318309545517, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 10.3462, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05456570155902005, |
|
"grad_norm": 0.05375898629426956, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 10.3476, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0556792873051225, |
|
"grad_norm": 0.06830257922410965, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 10.3447, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0556792873051225, |
|
"eval_loss": 10.347752571105957, |
|
"eval_runtime": 0.0506, |
|
"eval_samples_per_second": 988.519, |
|
"eval_steps_per_second": 39.541, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05679287305122494, |
|
"grad_norm": 0.036631014198064804, |
|
"learning_rate": 0.0002668315918143169, |
|
"loss": 10.3538, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05790645879732739, |
|
"grad_norm": 0.03295717015862465, |
|
"learning_rate": 0.00026526016662852886, |
|
"loss": 10.3509, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05902004454342984, |
|
"grad_norm": 0.033492058515548706, |
|
"learning_rate": 0.00026365723046405023, |
|
"loss": 10.351, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.060133630289532294, |
|
"grad_norm": 0.031195539981126785, |
|
"learning_rate": 0.0002620232215476231, |
|
"loss": 10.3488, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.061247216035634745, |
|
"grad_norm": 0.034528084099292755, |
|
"learning_rate": 0.0002603585866009697, |
|
"loss": 10.348, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.062360801781737196, |
|
"grad_norm": 0.03689737245440483, |
|
"learning_rate": 0.00025866378071866334, |
|
"loss": 10.3479, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06347438752783964, |
|
"grad_norm": 0.040240008383989334, |
|
"learning_rate": 0.00025693926724370956, |
|
"loss": 10.346, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0645879732739421, |
|
"grad_norm": 0.035155221819877625, |
|
"learning_rate": 0.00025518551764087326, |
|
"loss": 10.3445, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06570155902004454, |
|
"grad_norm": 0.04302839934825897, |
|
"learning_rate": 0.00025340301136778483, |
|
"loss": 10.3426, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.066815144766147, |
|
"grad_norm": 0.04797535389661789, |
|
"learning_rate": 0.00025159223574386114, |
|
"loss": 10.3419, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06792873051224944, |
|
"grad_norm": 0.05042591318488121, |
|
"learning_rate": 0.0002497536858170772, |
|
"loss": 10.3424, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06904231625835189, |
|
"grad_norm": 0.06060624122619629, |
|
"learning_rate": 0.00024788786422862526, |
|
"loss": 10.3386, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07015590200445435, |
|
"grad_norm": 0.03394079953432083, |
|
"learning_rate": 0.00024599528107549745, |
|
"loss": 10.3505, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07126948775055679, |
|
"grad_norm": 0.03250078111886978, |
|
"learning_rate": 0.00024407645377103054, |
|
"loss": 10.3476, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07238307349665925, |
|
"grad_norm": 0.03402571752667427, |
|
"learning_rate": 0.00024213190690345018, |
|
"loss": 10.3451, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07349665924276169, |
|
"grad_norm": 0.03165813162922859, |
|
"learning_rate": 0.00024016217209245374, |
|
"loss": 10.3455, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07461024498886415, |
|
"grad_norm": 0.033986710011959076, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 10.3431, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0757238307349666, |
|
"grad_norm": 0.03550755977630615, |
|
"learning_rate": 0.0002361492994024415, |
|
"loss": 10.3431, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.07683741648106904, |
|
"grad_norm": 0.03433714807033539, |
|
"learning_rate": 0.0002341072586027509, |
|
"loss": 10.3426, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0779510022271715, |
|
"grad_norm": 0.03356612101197243, |
|
"learning_rate": 0.00023204222371836405, |
|
"loss": 10.3421, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07906458797327394, |
|
"grad_norm": 0.03245285525918007, |
|
"learning_rate": 0.00022995475930919905, |
|
"loss": 10.3419, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0801781737193764, |
|
"grad_norm": 0.035388920456171036, |
|
"learning_rate": 0.00022784543606718227, |
|
"loss": 10.3396, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08129175946547884, |
|
"grad_norm": 0.038405828177928925, |
|
"learning_rate": 0.00022571483066022657, |
|
"loss": 10.3362, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08240534521158129, |
|
"grad_norm": 0.04556138068437576, |
|
"learning_rate": 0.0002235635255745762, |
|
"loss": 10.3345, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08351893095768374, |
|
"grad_norm": 0.05725998058915138, |
|
"learning_rate": 0.00022139210895556104, |
|
"loss": 10.3327, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08351893095768374, |
|
"eval_loss": 10.338118553161621, |
|
"eval_runtime": 0.0504, |
|
"eval_samples_per_second": 991.693, |
|
"eval_steps_per_second": 39.668, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08463251670378619, |
|
"grad_norm": 0.03490576148033142, |
|
"learning_rate": 0.00021920117444680317, |
|
"loss": 10.3483, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08574610244988864, |
|
"grad_norm": 0.03555111214518547, |
|
"learning_rate": 0.00021699132102792097, |
|
"loss": 10.3443, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.08685968819599109, |
|
"grad_norm": 0.0329761877655983, |
|
"learning_rate": 0.0002147631528507739, |
|
"loss": 10.3436, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.08797327394209355, |
|
"grad_norm": 0.03092610463500023, |
|
"learning_rate": 0.00021251727907429355, |
|
"loss": 10.3443, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08908685968819599, |
|
"grad_norm": 0.03112075664103031, |
|
"learning_rate": 0.0002102543136979454, |
|
"loss": 10.3411, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09020044543429843, |
|
"grad_norm": 0.028758404776453972, |
|
"learning_rate": 0.0002079748753938678, |
|
"loss": 10.3401, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.09131403118040089, |
|
"grad_norm": 0.023690655827522278, |
|
"learning_rate": 0.0002056795873377331, |
|
"loss": 10.3377, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09242761692650334, |
|
"grad_norm": 0.02615417167544365, |
|
"learning_rate": 0.00020336907703837748, |
|
"loss": 10.339, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0935412026726058, |
|
"grad_norm": 0.02577822096645832, |
|
"learning_rate": 0.00020104397616624645, |
|
"loss": 10.3357, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09465478841870824, |
|
"grad_norm": 0.032399583607912064, |
|
"learning_rate": 0.00019870492038070252, |
|
"loss": 10.3349, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0957683741648107, |
|
"grad_norm": 0.03822220861911774, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 10.3314, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09688195991091314, |
|
"grad_norm": 0.05163590610027313, |
|
"learning_rate": 0.0001939875056076697, |
|
"loss": 10.3299, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.09799554565701558, |
|
"grad_norm": 0.038402412086725235, |
|
"learning_rate": 0.00019161043631427666, |
|
"loss": 10.3455, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.09910913140311804, |
|
"grad_norm": 0.03179153427481651, |
|
"learning_rate": 0.00018922199114307294, |
|
"loss": 10.3438, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.10022271714922049, |
|
"grad_norm": 0.031388312578201294, |
|
"learning_rate": 0.00018682282307111987, |
|
"loss": 10.343, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10133630289532294, |
|
"grad_norm": 0.025438381358981133, |
|
"learning_rate": 0.00018441358800701273, |
|
"loss": 10.3417, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.10244988864142539, |
|
"grad_norm": 0.025364622473716736, |
|
"learning_rate": 0.00018199494461156203, |
|
"loss": 10.3393, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.10356347438752785, |
|
"grad_norm": 0.024232415482401848, |
|
"learning_rate": 0.000179567554117722, |
|
"loss": 10.337, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.10467706013363029, |
|
"grad_norm": 0.028822433203458786, |
|
"learning_rate": 0.00017713208014981648, |
|
"loss": 10.3379, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.10579064587973273, |
|
"grad_norm": 0.022030413150787354, |
|
"learning_rate": 0.00017468918854211007, |
|
"loss": 10.337, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10690423162583519, |
|
"grad_norm": 0.0319385826587677, |
|
"learning_rate": 0.00017223954715677627, |
|
"loss": 10.3355, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.10801781737193764, |
|
"grad_norm": 0.029396483674645424, |
|
"learning_rate": 0.00016978382570131034, |
|
"loss": 10.3319, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1091314031180401, |
|
"grad_norm": 0.03641531616449356, |
|
"learning_rate": 0.00016732269554543794, |
|
"loss": 10.3319, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11024498886414254, |
|
"grad_norm": 0.045224692672491074, |
|
"learning_rate": 0.00016485682953756942, |
|
"loss": 10.3314, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.111358574610245, |
|
"grad_norm": 0.05978056415915489, |
|
"learning_rate": 0.00016238690182084986, |
|
"loss": 10.3267, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.111358574610245, |
|
"eval_loss": 10.334311485290527, |
|
"eval_runtime": 0.0448, |
|
"eval_samples_per_second": 1116.397, |
|
"eval_steps_per_second": 44.656, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11247216035634744, |
|
"grad_norm": 0.04336141422390938, |
|
"learning_rate": 0.0001599135876488549, |
|
"loss": 10.3448, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.11358574610244988, |
|
"grad_norm": 0.029405318200588226, |
|
"learning_rate": 0.00015743756320098332, |
|
"loss": 10.3418, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.11469933184855234, |
|
"grad_norm": 0.024563252925872803, |
|
"learning_rate": 0.0001549595053975962, |
|
"loss": 10.3404, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.11581291759465479, |
|
"grad_norm": 0.032005004584789276, |
|
"learning_rate": 0.00015248009171495378, |
|
"loss": 10.3378, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.11692650334075724, |
|
"grad_norm": 0.027886446565389633, |
|
"learning_rate": 0.00015, |
|
"loss": 10.3384, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11804008908685969, |
|
"grad_norm": 0.027392663061618805, |
|
"learning_rate": 0.00014751990828504622, |
|
"loss": 10.3359, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.11915367483296214, |
|
"grad_norm": 0.029057586565613747, |
|
"learning_rate": 0.00014504049460240375, |
|
"loss": 10.3357, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.12026726057906459, |
|
"grad_norm": 0.02602003701031208, |
|
"learning_rate": 0.00014256243679901663, |
|
"loss": 10.334, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.12138084632516703, |
|
"grad_norm": 0.025092778727412224, |
|
"learning_rate": 0.00014008641235114508, |
|
"loss": 10.3295, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.12249443207126949, |
|
"grad_norm": 0.0306704044342041, |
|
"learning_rate": 0.00013761309817915014, |
|
"loss": 10.3304, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12360801781737193, |
|
"grad_norm": 0.03935825452208519, |
|
"learning_rate": 0.00013514317046243058, |
|
"loss": 10.3302, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.12472160356347439, |
|
"grad_norm": 0.05053841695189476, |
|
"learning_rate": 0.00013267730445456208, |
|
"loss": 10.3247, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.12583518930957685, |
|
"grad_norm": 0.03532646968960762, |
|
"learning_rate": 0.00013021617429868963, |
|
"loss": 10.3419, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.12694877505567928, |
|
"grad_norm": 0.03398346155881882, |
|
"learning_rate": 0.00012776045284322368, |
|
"loss": 10.341, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.12806236080178174, |
|
"grad_norm": 0.031128259375691414, |
|
"learning_rate": 0.00012531081145788987, |
|
"loss": 10.3399, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1291759465478842, |
|
"grad_norm": 0.02975599467754364, |
|
"learning_rate": 0.00012286791985018355, |
|
"loss": 10.3378, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.13028953229398663, |
|
"grad_norm": 0.025660140439867973, |
|
"learning_rate": 0.00012043244588227796, |
|
"loss": 10.3369, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.13140311804008908, |
|
"grad_norm": 0.023060623556375504, |
|
"learning_rate": 0.00011800505538843798, |
|
"loss": 10.3345, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.13251670378619154, |
|
"grad_norm": 0.02799002081155777, |
|
"learning_rate": 0.00011558641199298727, |
|
"loss": 10.3328, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.133630289532294, |
|
"grad_norm": 0.024050775915384293, |
|
"learning_rate": 0.00011317717692888012, |
|
"loss": 10.3321, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13474387527839643, |
|
"grad_norm": 0.02656245231628418, |
|
"learning_rate": 0.00011077800885692702, |
|
"loss": 10.3342, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.1358574610244989, |
|
"grad_norm": 0.02685682475566864, |
|
"learning_rate": 0.00010838956368572334, |
|
"loss": 10.3297, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.13697104677060135, |
|
"grad_norm": 0.03605301305651665, |
|
"learning_rate": 0.0001060124943923303, |
|
"loss": 10.3265, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.13808463251670378, |
|
"grad_norm": 0.04310872033238411, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 10.325, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.13919821826280623, |
|
"grad_norm": 0.052310604602098465, |
|
"learning_rate": 0.00010129507961929748, |
|
"loss": 10.3216, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.13919821826280623, |
|
"eval_loss": 10.330706596374512, |
|
"eval_runtime": 0.0484, |
|
"eval_samples_per_second": 1033.941, |
|
"eval_steps_per_second": 41.358, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1403118040089087, |
|
"grad_norm": 0.03873632475733757, |
|
"learning_rate": 9.895602383375353e-05, |
|
"loss": 10.343, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.14142538975501115, |
|
"grad_norm": 0.03256813436746597, |
|
"learning_rate": 9.663092296162251e-05, |
|
"loss": 10.3388, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.14253897550111358, |
|
"grad_norm": 0.026527272537350655, |
|
"learning_rate": 9.432041266226686e-05, |
|
"loss": 10.3378, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.14365256124721604, |
|
"grad_norm": 0.022660735994577408, |
|
"learning_rate": 9.202512460613219e-05, |
|
"loss": 10.3365, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.1447661469933185, |
|
"grad_norm": 0.0257252287119627, |
|
"learning_rate": 8.97456863020546e-05, |
|
"loss": 10.3343, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14587973273942093, |
|
"grad_norm": 0.02294372208416462, |
|
"learning_rate": 8.748272092570646e-05, |
|
"loss": 10.3345, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.14699331848552338, |
|
"grad_norm": 0.022150015458464622, |
|
"learning_rate": 8.523684714922608e-05, |
|
"loss": 10.33, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.14810690423162584, |
|
"grad_norm": 0.023904934525489807, |
|
"learning_rate": 8.300867897207903e-05, |
|
"loss": 10.3325, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.1492204899777283, |
|
"grad_norm": 0.025626808404922485, |
|
"learning_rate": 8.079882555319684e-05, |
|
"loss": 10.329, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.15033407572383073, |
|
"grad_norm": 0.031538087874650955, |
|
"learning_rate": 7.860789104443896e-05, |
|
"loss": 10.3266, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1514476614699332, |
|
"grad_norm": 0.03154623880982399, |
|
"learning_rate": 7.643647442542382e-05, |
|
"loss": 10.3253, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.15256124721603564, |
|
"grad_norm": 0.04129767417907715, |
|
"learning_rate": 7.428516933977347e-05, |
|
"loss": 10.3231, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.15367483296213807, |
|
"grad_norm": 0.03668758645653725, |
|
"learning_rate": 7.215456393281776e-05, |
|
"loss": 10.3407, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.15478841870824053, |
|
"grad_norm": 0.03435783460736275, |
|
"learning_rate": 7.004524069080096e-05, |
|
"loss": 10.3408, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.155902004454343, |
|
"grad_norm": 0.03316853567957878, |
|
"learning_rate": 6.795777628163599e-05, |
|
"loss": 10.3389, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15701559020044542, |
|
"grad_norm": 0.025575635954737663, |
|
"learning_rate": 6.58927413972491e-05, |
|
"loss": 10.3364, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.15812917594654788, |
|
"grad_norm": 0.021810244768857956, |
|
"learning_rate": 6.385070059755846e-05, |
|
"loss": 10.3354, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.15924276169265034, |
|
"grad_norm": 0.021209685131907463, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 10.3342, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1603563474387528, |
|
"grad_norm": 0.02310410887002945, |
|
"learning_rate": 5.983782790754623e-05, |
|
"loss": 10.3325, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.16146993318485522, |
|
"grad_norm": 0.021886780858039856, |
|
"learning_rate": 5.786809309654982e-05, |
|
"loss": 10.3306, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.16258351893095768, |
|
"grad_norm": 0.025645460933446884, |
|
"learning_rate": 5.592354622896944e-05, |
|
"loss": 10.3291, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.16369710467706014, |
|
"grad_norm": 0.028998544439673424, |
|
"learning_rate": 5.40047189245025e-05, |
|
"loss": 10.3274, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.16481069042316257, |
|
"grad_norm": 0.03274320811033249, |
|
"learning_rate": 5.211213577137469e-05, |
|
"loss": 10.3251, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.16592427616926503, |
|
"grad_norm": 0.041816357523202896, |
|
"learning_rate": 5.024631418292274e-05, |
|
"loss": 10.3252, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.16703786191536749, |
|
"grad_norm": 0.05579761043190956, |
|
"learning_rate": 4.840776425613886e-05, |
|
"loss": 10.321, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16703786191536749, |
|
"eval_loss": 10.3283109664917, |
|
"eval_runtime": 0.0477, |
|
"eval_samples_per_second": 1048.482, |
|
"eval_steps_per_second": 41.939, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 69380829872128.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|