diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,15793 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 20.0, + "global_step": 65700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 3.998599695585997e-05, + "loss": 3.8582, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 3.997138508371385e-05, + "loss": 3.284, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.995616438356165e-05, + "loss": 3.1009, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 3.994094368340944e-05, + "loss": 3.1079, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 3.9925722983257234e-05, + "loss": 3.1098, + "step": 125 + }, + { + "epoch": 0.05, + "learning_rate": 3.991050228310503e-05, + "loss": 3.1987, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 3.989528158295282e-05, + "loss": 2.9982, + "step": 175 + }, + { + "epoch": 0.06, + "learning_rate": 3.9880060882800616e-05, + "loss": 2.9164, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 3.9864840182648406e-05, + "loss": 2.9484, + "step": 225 + }, + { + "epoch": 0.08, + "learning_rate": 3.9849619482496195e-05, + "loss": 2.8611, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 3.983439878234399e-05, + "loss": 2.9438, + "step": 275 + }, + { + "epoch": 0.09, + "learning_rate": 3.981917808219178e-05, + "loss": 3.0905, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 3.980395738203958e-05, + "loss": 3.0313, + "step": 325 + }, + { + "epoch": 0.11, + "learning_rate": 3.978873668188737e-05, + "loss": 2.9305, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 3.977351598173516e-05, + "loss": 2.9258, + "step": 375 + }, + { + "epoch": 0.12, + "learning_rate": 3.975829528158296e-05, + "loss": 2.9008, + "step": 400 + }, + { + "epoch": 0.13, + "learning_rate": 3.974307458143075e-05, + "loss": 2.7581, + "step": 425 + }, + { + "epoch": 0.14, + "learning_rate": 3.9727853881278545e-05, + "loss": 2.8324, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 3.9712633181126334e-05, + "loss": 2.6921, + "step": 475 + }, + { + "epoch": 0.15, + "learning_rate": 3.9697412480974123e-05, + "loss": 2.8431, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 3.968219178082192e-05, + "loss": 2.7828, + "step": 525 + }, + { + "epoch": 0.17, + "learning_rate": 3.9666971080669716e-05, + "loss": 2.8666, + "step": 550 + }, + { + "epoch": 0.18, + "learning_rate": 3.9651750380517505e-05, + "loss": 2.8547, + "step": 575 + }, + { + "epoch": 0.18, + "learning_rate": 3.9637138508371385e-05, + "loss": 2.6909, + "step": 600 + }, + { + "epoch": 0.19, + "learning_rate": 3.962191780821918e-05, + "loss": 2.7679, + "step": 625 + }, + { + "epoch": 0.2, + "learning_rate": 3.960669710806698e-05, + "loss": 2.7284, + "step": 650 + }, + { + "epoch": 0.21, + "learning_rate": 3.959147640791477e-05, + "loss": 2.902, + "step": 675 + }, + { + "epoch": 0.21, + "learning_rate": 3.957625570776256e-05, + "loss": 2.8097, + "step": 700 + }, + { + "epoch": 0.22, + "learning_rate": 3.956103500761035e-05, + "loss": 2.7846, + "step": 725 + }, + { + "epoch": 0.23, + "learning_rate": 3.954581430745815e-05, + "loss": 2.7917, + "step": 750 + }, + { + "epoch": 0.24, + "learning_rate": 3.953059360730594e-05, + "loss": 2.7978, + "step": 775 + }, + { + "epoch": 0.24, + "learning_rate": 3.951537290715373e-05, + "loss": 2.7402, + "step": 800 + }, + { + "epoch": 0.25, + "learning_rate": 3.9500152207001524e-05, + "loss": 2.7104, + "step": 825 + }, + { + "epoch": 0.26, + "learning_rate": 3.948493150684932e-05, + "loss": 2.7742, + "step": 850 + }, + { + "epoch": 0.27, + "learning_rate": 3.946971080669711e-05, + "loss": 2.6598, + "step": 875 + }, + { + "epoch": 0.27, + "learning_rate": 3.9454490106544906e-05, + "loss": 2.6493, + "step": 900 + }, + { + "epoch": 0.28, + "learning_rate": 3.9439269406392695e-05, + "loss": 2.6935, + "step": 925 + }, + { + "epoch": 0.29, + "learning_rate": 3.942404870624049e-05, + "loss": 2.6664, + "step": 950 + }, + { + "epoch": 0.3, + "learning_rate": 3.940882800608828e-05, + "loss": 2.7125, + "step": 975 + }, + { + "epoch": 0.3, + "learning_rate": 3.939360730593608e-05, + "loss": 2.77, + "step": 1000 + }, + { + "epoch": 0.31, + "learning_rate": 3.9378386605783867e-05, + "loss": 2.7562, + "step": 1025 + }, + { + "epoch": 0.32, + "learning_rate": 3.936316590563166e-05, + "loss": 2.699, + "step": 1050 + }, + { + "epoch": 0.33, + "learning_rate": 3.934794520547946e-05, + "loss": 2.8319, + "step": 1075 + }, + { + "epoch": 0.33, + "learning_rate": 3.933272450532725e-05, + "loss": 2.6888, + "step": 1100 + }, + { + "epoch": 0.34, + "learning_rate": 3.9317503805175045e-05, + "loss": 2.7843, + "step": 1125 + }, + { + "epoch": 0.35, + "learning_rate": 3.9302283105022834e-05, + "loss": 2.6738, + "step": 1150 + }, + { + "epoch": 0.36, + "learning_rate": 3.9287062404870624e-05, + "loss": 2.7412, + "step": 1175 + }, + { + "epoch": 0.37, + "learning_rate": 3.927184170471842e-05, + "loss": 2.6732, + "step": 1200 + }, + { + "epoch": 0.37, + "learning_rate": 3.925662100456621e-05, + "loss": 2.7467, + "step": 1225 + }, + { + "epoch": 0.38, + "learning_rate": 3.9241400304414005e-05, + "loss": 2.7866, + "step": 1250 + }, + { + "epoch": 0.39, + "learning_rate": 3.92261796042618e-05, + "loss": 2.8271, + "step": 1275 + }, + { + "epoch": 0.4, + "learning_rate": 3.921095890410959e-05, + "loss": 2.6714, + "step": 1300 + }, + { + "epoch": 0.4, + "learning_rate": 3.919573820395739e-05, + "loss": 2.6909, + "step": 1325 + }, + { + "epoch": 0.41, + "learning_rate": 3.918051750380518e-05, + "loss": 2.7683, + "step": 1350 + }, + { + "epoch": 0.42, + "learning_rate": 3.916529680365297e-05, + "loss": 2.7829, + "step": 1375 + }, + { + "epoch": 0.43, + "learning_rate": 3.915007610350076e-05, + "loss": 2.6544, + "step": 1400 + }, + { + "epoch": 0.43, + "learning_rate": 3.913485540334855e-05, + "loss": 2.6491, + "step": 1425 + }, + { + "epoch": 0.44, + "learning_rate": 3.9119634703196355e-05, + "loss": 2.7998, + "step": 1450 + }, + { + "epoch": 0.45, + "learning_rate": 3.9104414003044144e-05, + "loss": 2.6747, + "step": 1475 + }, + { + "epoch": 0.46, + "learning_rate": 3.9089193302891934e-05, + "loss": 2.8246, + "step": 1500 + }, + { + "epoch": 0.46, + "learning_rate": 3.907397260273973e-05, + "loss": 2.6864, + "step": 1525 + }, + { + "epoch": 0.47, + "learning_rate": 3.905875190258752e-05, + "loss": 2.7112, + "step": 1550 + }, + { + "epoch": 0.48, + "learning_rate": 3.9043531202435316e-05, + "loss": 2.641, + "step": 1575 + }, + { + "epoch": 0.49, + "learning_rate": 3.9028310502283105e-05, + "loss": 2.7254, + "step": 1600 + }, + { + "epoch": 0.49, + "learning_rate": 3.90130898021309e-05, + "loss": 2.6282, + "step": 1625 + }, + { + "epoch": 0.5, + "learning_rate": 3.89978691019787e-05, + "loss": 2.6655, + "step": 1650 + }, + { + "epoch": 0.51, + "learning_rate": 3.898264840182649e-05, + "loss": 2.646, + "step": 1675 + }, + { + "epoch": 0.52, + "learning_rate": 3.896742770167428e-05, + "loss": 2.6284, + "step": 1700 + }, + { + "epoch": 0.53, + "learning_rate": 3.895220700152207e-05, + "loss": 2.6256, + "step": 1725 + }, + { + "epoch": 0.53, + "learning_rate": 3.893698630136987e-05, + "loss": 2.5699, + "step": 1750 + }, + { + "epoch": 0.54, + "learning_rate": 3.892176560121766e-05, + "loss": 2.6774, + "step": 1775 + }, + { + "epoch": 0.55, + "learning_rate": 3.890654490106545e-05, + "loss": 2.7368, + "step": 1800 + }, + { + "epoch": 0.56, + "learning_rate": 3.8891324200913244e-05, + "loss": 2.6614, + "step": 1825 + }, + { + "epoch": 0.56, + "learning_rate": 3.887610350076104e-05, + "loss": 2.6495, + "step": 1850 + }, + { + "epoch": 0.57, + "learning_rate": 3.886088280060883e-05, + "loss": 2.6243, + "step": 1875 + }, + { + "epoch": 0.58, + "learning_rate": 3.8845662100456626e-05, + "loss": 2.5812, + "step": 1900 + }, + { + "epoch": 0.59, + "learning_rate": 3.8830441400304416e-05, + "loss": 2.6791, + "step": 1925 + }, + { + "epoch": 0.59, + "learning_rate": 3.881522070015221e-05, + "loss": 2.5731, + "step": 1950 + }, + { + "epoch": 0.6, + "learning_rate": 3.88e-05, + "loss": 2.5387, + "step": 1975 + }, + { + "epoch": 0.61, + "learning_rate": 3.87847792998478e-05, + "loss": 2.6743, + "step": 2000 + }, + { + "epoch": 0.62, + "learning_rate": 3.876955859969559e-05, + "loss": 2.5973, + "step": 2025 + }, + { + "epoch": 0.62, + "learning_rate": 3.875433789954338e-05, + "loss": 2.6038, + "step": 2050 + }, + { + "epoch": 0.63, + "learning_rate": 3.873911719939118e-05, + "loss": 2.7553, + "step": 2075 + }, + { + "epoch": 0.64, + "learning_rate": 3.872389649923897e-05, + "loss": 2.7192, + "step": 2100 + }, + { + "epoch": 0.65, + "learning_rate": 3.870867579908676e-05, + "loss": 2.6565, + "step": 2125 + }, + { + "epoch": 0.65, + "learning_rate": 3.8693455098934554e-05, + "loss": 2.5753, + "step": 2150 + }, + { + "epoch": 0.66, + "learning_rate": 3.8678234398782344e-05, + "loss": 2.501, + "step": 2175 + }, + { + "epoch": 0.67, + "learning_rate": 3.866301369863014e-05, + "loss": 2.72, + "step": 2200 + }, + { + "epoch": 0.68, + "learning_rate": 3.864779299847793e-05, + "loss": 2.5979, + "step": 2225 + }, + { + "epoch": 0.68, + "learning_rate": 3.8632572298325726e-05, + "loss": 2.587, + "step": 2250 + }, + { + "epoch": 0.69, + "learning_rate": 3.861735159817352e-05, + "loss": 2.7236, + "step": 2275 + }, + { + "epoch": 0.7, + "learning_rate": 3.860213089802131e-05, + "loss": 2.6234, + "step": 2300 + }, + { + "epoch": 0.71, + "learning_rate": 3.858691019786911e-05, + "loss": 2.652, + "step": 2325 + }, + { + "epoch": 0.72, + "learning_rate": 3.85716894977169e-05, + "loss": 2.5885, + "step": 2350 + }, + { + "epoch": 0.72, + "learning_rate": 3.8556468797564693e-05, + "loss": 2.6084, + "step": 2375 + }, + { + "epoch": 0.73, + "learning_rate": 3.854124809741248e-05, + "loss": 2.693, + "step": 2400 + }, + { + "epoch": 0.74, + "learning_rate": 3.852602739726027e-05, + "loss": 2.6778, + "step": 2425 + }, + { + "epoch": 0.75, + "learning_rate": 3.851080669710807e-05, + "loss": 2.591, + "step": 2450 + }, + { + "epoch": 0.75, + "learning_rate": 3.8495585996955865e-05, + "loss": 2.5932, + "step": 2475 + }, + { + "epoch": 0.76, + "learning_rate": 3.8480365296803654e-05, + "loss": 2.5999, + "step": 2500 + }, + { + "epoch": 0.77, + "learning_rate": 3.846514459665145e-05, + "loss": 2.5795, + "step": 2525 + }, + { + "epoch": 0.78, + "learning_rate": 3.844992389649924e-05, + "loss": 2.543, + "step": 2550 + }, + { + "epoch": 0.78, + "learning_rate": 3.8434703196347036e-05, + "loss": 2.5534, + "step": 2575 + }, + { + "epoch": 0.79, + "learning_rate": 3.8419482496194826e-05, + "loss": 2.6031, + "step": 2600 + }, + { + "epoch": 0.8, + "learning_rate": 3.840426179604262e-05, + "loss": 2.6382, + "step": 2625 + }, + { + "epoch": 0.81, + "learning_rate": 3.838904109589042e-05, + "loss": 2.5381, + "step": 2650 + }, + { + "epoch": 0.81, + "learning_rate": 3.837382039573821e-05, + "loss": 2.4354, + "step": 2675 + }, + { + "epoch": 0.82, + "learning_rate": 3.8358599695586004e-05, + "loss": 2.6449, + "step": 2700 + }, + { + "epoch": 0.83, + "learning_rate": 3.834337899543379e-05, + "loss": 2.5669, + "step": 2725 + }, + { + "epoch": 0.84, + "learning_rate": 3.832815829528158e-05, + "loss": 2.6498, + "step": 2750 + }, + { + "epoch": 0.84, + "learning_rate": 3.831293759512938e-05, + "loss": 2.6078, + "step": 2775 + }, + { + "epoch": 0.85, + "learning_rate": 3.829771689497717e-05, + "loss": 2.6951, + "step": 2800 + }, + { + "epoch": 0.86, + "learning_rate": 3.8282496194824965e-05, + "loss": 2.5454, + "step": 2825 + }, + { + "epoch": 0.87, + "learning_rate": 3.826727549467276e-05, + "loss": 2.6792, + "step": 2850 + }, + { + "epoch": 0.88, + "learning_rate": 3.825205479452055e-05, + "loss": 2.5385, + "step": 2875 + }, + { + "epoch": 0.88, + "learning_rate": 3.8236834094368346e-05, + "loss": 2.502, + "step": 2900 + }, + { + "epoch": 0.89, + "learning_rate": 3.8221613394216136e-05, + "loss": 2.5359, + "step": 2925 + }, + { + "epoch": 0.9, + "learning_rate": 3.820639269406393e-05, + "loss": 2.5942, + "step": 2950 + }, + { + "epoch": 0.91, + "learning_rate": 3.819117199391172e-05, + "loss": 2.5167, + "step": 2975 + }, + { + "epoch": 0.91, + "learning_rate": 3.817595129375952e-05, + "loss": 2.4514, + "step": 3000 + }, + { + "epoch": 0.92, + "learning_rate": 3.816073059360731e-05, + "loss": 2.6363, + "step": 3025 + }, + { + "epoch": 0.93, + "learning_rate": 3.8145509893455103e-05, + "loss": 2.39, + "step": 3050 + }, + { + "epoch": 0.94, + "learning_rate": 3.813028919330289e-05, + "loss": 2.5625, + "step": 3075 + }, + { + "epoch": 0.94, + "learning_rate": 3.811506849315069e-05, + "loss": 2.6096, + "step": 3100 + }, + { + "epoch": 0.95, + "learning_rate": 3.809984779299848e-05, + "loss": 2.7414, + "step": 3125 + }, + { + "epoch": 0.96, + "learning_rate": 3.8084627092846275e-05, + "loss": 2.6735, + "step": 3150 + }, + { + "epoch": 0.97, + "learning_rate": 3.8069406392694064e-05, + "loss": 2.6245, + "step": 3175 + }, + { + "epoch": 0.97, + "learning_rate": 3.805418569254186e-05, + "loss": 2.5353, + "step": 3200 + }, + { + "epoch": 0.98, + "learning_rate": 3.803896499238965e-05, + "loss": 2.4748, + "step": 3225 + }, + { + "epoch": 0.99, + "learning_rate": 3.8023744292237446e-05, + "loss": 2.6444, + "step": 3250 + }, + { + "epoch": 1.0, + "learning_rate": 3.800852359208524e-05, + "loss": 2.6216, + "step": 3275 + }, + { + "epoch": 1.0, + "learning_rate": 3.799330289193303e-05, + "loss": 2.6074, + "step": 3300 + }, + { + "epoch": 1.01, + "learning_rate": 3.797808219178083e-05, + "loss": 2.5025, + "step": 3325 + }, + { + "epoch": 1.02, + "learning_rate": 3.796286149162862e-05, + "loss": 2.62, + "step": 3350 + }, + { + "epoch": 1.03, + "learning_rate": 3.794764079147641e-05, + "loss": 2.521, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 3.79324200913242e-05, + "loss": 2.4202, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 3.791719939117199e-05, + "loss": 2.5935, + "step": 3425 + }, + { + "epoch": 1.05, + "learning_rate": 3.790197869101979e-05, + "loss": 2.5516, + "step": 3450 + }, + { + "epoch": 1.06, + "learning_rate": 3.7886757990867585e-05, + "loss": 2.5356, + "step": 3475 + }, + { + "epoch": 1.07, + "learning_rate": 3.7871537290715375e-05, + "loss": 2.6087, + "step": 3500 + }, + { + "epoch": 1.07, + "learning_rate": 3.785631659056317e-05, + "loss": 2.4974, + "step": 3525 + }, + { + "epoch": 1.08, + "learning_rate": 3.784109589041096e-05, + "loss": 2.4709, + "step": 3550 + }, + { + "epoch": 1.09, + "learning_rate": 3.7825875190258757e-05, + "loss": 2.5459, + "step": 3575 + }, + { + "epoch": 1.1, + "learning_rate": 3.7810654490106546e-05, + "loss": 2.5447, + "step": 3600 + }, + { + "epoch": 1.1, + "learning_rate": 3.779543378995434e-05, + "loss": 2.5014, + "step": 3625 + }, + { + "epoch": 1.11, + "learning_rate": 3.778021308980214e-05, + "loss": 2.56, + "step": 3650 + }, + { + "epoch": 1.12, + "learning_rate": 3.776499238964993e-05, + "loss": 2.5625, + "step": 3675 + }, + { + "epoch": 1.13, + "learning_rate": 3.774977168949772e-05, + "loss": 2.5072, + "step": 3700 + }, + { + "epoch": 1.13, + "learning_rate": 3.7734550989345514e-05, + "loss": 2.5928, + "step": 3725 + }, + { + "epoch": 1.14, + "learning_rate": 3.77193302891933e-05, + "loss": 2.4591, + "step": 3750 + }, + { + "epoch": 1.15, + "learning_rate": 3.77041095890411e-05, + "loss": 2.3873, + "step": 3775 + }, + { + "epoch": 1.16, + "learning_rate": 3.768888888888889e-05, + "loss": 2.468, + "step": 3800 + }, + { + "epoch": 1.16, + "learning_rate": 3.7673668188736685e-05, + "loss": 2.5052, + "step": 3825 + }, + { + "epoch": 1.17, + "learning_rate": 3.765844748858448e-05, + "loss": 2.5028, + "step": 3850 + }, + { + "epoch": 1.18, + "learning_rate": 3.764322678843227e-05, + "loss": 2.516, + "step": 3875 + }, + { + "epoch": 1.19, + "learning_rate": 3.762800608828007e-05, + "loss": 2.514, + "step": 3900 + }, + { + "epoch": 1.19, + "learning_rate": 3.7612785388127856e-05, + "loss": 2.5943, + "step": 3925 + }, + { + "epoch": 1.2, + "learning_rate": 3.759756468797565e-05, + "loss": 2.4263, + "step": 3950 + }, + { + "epoch": 1.21, + "learning_rate": 3.758234398782344e-05, + "loss": 2.3991, + "step": 3975 + }, + { + "epoch": 1.22, + "learning_rate": 3.756712328767123e-05, + "loss": 2.3838, + "step": 4000 + }, + { + "epoch": 1.23, + "learning_rate": 3.755190258751903e-05, + "loss": 2.4018, + "step": 4025 + }, + { + "epoch": 1.23, + "learning_rate": 3.7536681887366824e-05, + "loss": 2.4734, + "step": 4050 + }, + { + "epoch": 1.24, + "learning_rate": 3.752146118721461e-05, + "loss": 2.4167, + "step": 4075 + }, + { + "epoch": 1.25, + "learning_rate": 3.750624048706241e-05, + "loss": 2.3863, + "step": 4100 + }, + { + "epoch": 1.26, + "learning_rate": 3.74910197869102e-05, + "loss": 2.5197, + "step": 4125 + }, + { + "epoch": 1.26, + "learning_rate": 3.7475799086757995e-05, + "loss": 2.4539, + "step": 4150 + }, + { + "epoch": 1.27, + "learning_rate": 3.7460578386605785e-05, + "loss": 2.4153, + "step": 4175 + }, + { + "epoch": 1.28, + "learning_rate": 3.744535768645358e-05, + "loss": 2.5164, + "step": 4200 + }, + { + "epoch": 1.29, + "learning_rate": 3.743013698630137e-05, + "loss": 2.4194, + "step": 4225 + }, + { + "epoch": 1.29, + "learning_rate": 3.7414916286149167e-05, + "loss": 2.4861, + "step": 4250 + }, + { + "epoch": 1.3, + "learning_rate": 3.739969558599696e-05, + "loss": 2.4588, + "step": 4275 + }, + { + "epoch": 1.31, + "learning_rate": 3.738447488584475e-05, + "loss": 2.5212, + "step": 4300 + }, + { + "epoch": 1.32, + "learning_rate": 3.736925418569254e-05, + "loss": 2.3907, + "step": 4325 + }, + { + "epoch": 1.32, + "learning_rate": 3.735403348554034e-05, + "loss": 2.4816, + "step": 4350 + }, + { + "epoch": 1.33, + "learning_rate": 3.733881278538813e-05, + "loss": 2.5161, + "step": 4375 + }, + { + "epoch": 1.34, + "learning_rate": 3.7323592085235924e-05, + "loss": 2.5058, + "step": 4400 + }, + { + "epoch": 1.35, + "learning_rate": 3.730837138508371e-05, + "loss": 2.4269, + "step": 4425 + }, + { + "epoch": 1.35, + "learning_rate": 3.729315068493151e-05, + "loss": 2.5356, + "step": 4450 + }, + { + "epoch": 1.36, + "learning_rate": 3.7277929984779306e-05, + "loss": 2.509, + "step": 4475 + }, + { + "epoch": 1.37, + "learning_rate": 3.7262709284627095e-05, + "loss": 2.397, + "step": 4500 + }, + { + "epoch": 1.38, + "learning_rate": 3.724748858447489e-05, + "loss": 2.5031, + "step": 4525 + }, + { + "epoch": 1.39, + "learning_rate": 3.723226788432268e-05, + "loss": 2.4124, + "step": 4550 + }, + { + "epoch": 1.39, + "learning_rate": 3.721704718417048e-05, + "loss": 2.5102, + "step": 4575 + }, + { + "epoch": 1.4, + "learning_rate": 3.7201826484018266e-05, + "loss": 2.486, + "step": 4600 + }, + { + "epoch": 1.41, + "learning_rate": 3.7186605783866056e-05, + "loss": 2.407, + "step": 4625 + }, + { + "epoch": 1.42, + "learning_rate": 3.717138508371386e-05, + "loss": 2.464, + "step": 4650 + }, + { + "epoch": 1.42, + "learning_rate": 3.715616438356165e-05, + "loss": 2.5452, + "step": 4675 + }, + { + "epoch": 1.43, + "learning_rate": 3.714094368340944e-05, + "loss": 2.4468, + "step": 4700 + }, + { + "epoch": 1.44, + "learning_rate": 3.7125722983257234e-05, + "loss": 2.4068, + "step": 4725 + }, + { + "epoch": 1.45, + "learning_rate": 3.711050228310502e-05, + "loss": 2.4681, + "step": 4750 + }, + { + "epoch": 1.45, + "learning_rate": 3.709528158295282e-05, + "loss": 2.3566, + "step": 4775 + }, + { + "epoch": 1.46, + "learning_rate": 3.70806697108067e-05, + "loss": 2.4723, + "step": 4800 + }, + { + "epoch": 1.47, + "learning_rate": 3.7065449010654495e-05, + "loss": 2.4191, + "step": 4825 + }, + { + "epoch": 1.48, + "learning_rate": 3.7050228310502285e-05, + "loss": 2.5294, + "step": 4850 + }, + { + "epoch": 1.48, + "learning_rate": 3.703500761035008e-05, + "loss": 2.7049, + "step": 4875 + }, + { + "epoch": 1.49, + "learning_rate": 3.701978691019787e-05, + "loss": 2.5428, + "step": 4900 + }, + { + "epoch": 1.5, + "learning_rate": 3.700456621004567e-05, + "loss": 2.4997, + "step": 4925 + }, + { + "epoch": 1.51, + "learning_rate": 3.6989345509893456e-05, + "loss": 2.3452, + "step": 4950 + }, + { + "epoch": 1.51, + "learning_rate": 3.697412480974125e-05, + "loss": 2.5259, + "step": 4975 + }, + { + "epoch": 1.52, + "learning_rate": 3.695890410958904e-05, + "loss": 2.3965, + "step": 5000 + }, + { + "epoch": 1.53, + "learning_rate": 3.694368340943684e-05, + "loss": 2.501, + "step": 5025 + }, + { + "epoch": 1.54, + "learning_rate": 3.692846270928463e-05, + "loss": 2.4793, + "step": 5050 + }, + { + "epoch": 1.54, + "learning_rate": 3.6913242009132424e-05, + "loss": 2.5491, + "step": 5075 + }, + { + "epoch": 1.55, + "learning_rate": 3.689802130898021e-05, + "loss": 2.4435, + "step": 5100 + }, + { + "epoch": 1.56, + "learning_rate": 3.688280060882801e-05, + "loss": 2.4705, + "step": 5125 + }, + { + "epoch": 1.57, + "learning_rate": 3.6867579908675806e-05, + "loss": 2.475, + "step": 5150 + }, + { + "epoch": 1.58, + "learning_rate": 3.6852359208523595e-05, + "loss": 2.5521, + "step": 5175 + }, + { + "epoch": 1.58, + "learning_rate": 3.683713850837139e-05, + "loss": 2.5303, + "step": 5200 + }, + { + "epoch": 1.59, + "learning_rate": 3.682191780821918e-05, + "loss": 2.4951, + "step": 5225 + }, + { + "epoch": 1.6, + "learning_rate": 3.680669710806697e-05, + "loss": 2.4559, + "step": 5250 + }, + { + "epoch": 1.61, + "learning_rate": 3.6791476407914766e-05, + "loss": 2.5772, + "step": 5275 + }, + { + "epoch": 1.61, + "learning_rate": 3.6776255707762556e-05, + "loss": 2.4943, + "step": 5300 + }, + { + "epoch": 1.62, + "learning_rate": 3.676164383561644e-05, + "loss": 2.4351, + "step": 5325 + }, + { + "epoch": 1.63, + "learning_rate": 3.674642313546423e-05, + "loss": 2.5454, + "step": 5350 + }, + { + "epoch": 1.64, + "learning_rate": 3.673120243531203e-05, + "loss": 2.492, + "step": 5375 + }, + { + "epoch": 1.64, + "learning_rate": 3.6715981735159824e-05, + "loss": 2.5228, + "step": 5400 + }, + { + "epoch": 1.65, + "learning_rate": 3.6700761035007614e-05, + "loss": 2.4511, + "step": 5425 + }, + { + "epoch": 1.66, + "learning_rate": 3.668554033485541e-05, + "loss": 2.4755, + "step": 5450 + }, + { + "epoch": 1.67, + "learning_rate": 3.66703196347032e-05, + "loss": 2.4568, + "step": 5475 + }, + { + "epoch": 1.67, + "learning_rate": 3.6655098934550995e-05, + "loss": 2.4765, + "step": 5500 + }, + { + "epoch": 1.68, + "learning_rate": 3.6639878234398785e-05, + "loss": 2.4977, + "step": 5525 + }, + { + "epoch": 1.69, + "learning_rate": 3.6624657534246574e-05, + "loss": 2.5427, + "step": 5550 + }, + { + "epoch": 1.7, + "learning_rate": 3.660943683409437e-05, + "loss": 2.6176, + "step": 5575 + }, + { + "epoch": 1.7, + "learning_rate": 3.659421613394217e-05, + "loss": 2.5213, + "step": 5600 + }, + { + "epoch": 1.71, + "learning_rate": 3.6578995433789956e-05, + "loss": 2.5476, + "step": 5625 + }, + { + "epoch": 1.72, + "learning_rate": 3.656377473363775e-05, + "loss": 2.4277, + "step": 5650 + }, + { + "epoch": 1.73, + "learning_rate": 3.654855403348554e-05, + "loss": 2.5481, + "step": 5675 + }, + { + "epoch": 1.74, + "learning_rate": 3.653333333333334e-05, + "loss": 2.3107, + "step": 5700 + }, + { + "epoch": 1.74, + "learning_rate": 3.651811263318113e-05, + "loss": 2.4638, + "step": 5725 + }, + { + "epoch": 1.75, + "learning_rate": 3.6502891933028924e-05, + "loss": 2.4764, + "step": 5750 + }, + { + "epoch": 1.76, + "learning_rate": 3.648767123287671e-05, + "loss": 2.5169, + "step": 5775 + }, + { + "epoch": 1.77, + "learning_rate": 3.647245053272451e-05, + "loss": 2.5419, + "step": 5800 + }, + { + "epoch": 1.77, + "learning_rate": 3.6457229832572306e-05, + "loss": 2.469, + "step": 5825 + }, + { + "epoch": 1.78, + "learning_rate": 3.6442009132420095e-05, + "loss": 2.5355, + "step": 5850 + }, + { + "epoch": 1.79, + "learning_rate": 3.6426788432267885e-05, + "loss": 2.4811, + "step": 5875 + }, + { + "epoch": 1.8, + "learning_rate": 3.641156773211568e-05, + "loss": 2.3855, + "step": 5900 + }, + { + "epoch": 1.8, + "learning_rate": 3.639634703196347e-05, + "loss": 2.5192, + "step": 5925 + }, + { + "epoch": 1.81, + "learning_rate": 3.6381126331811267e-05, + "loss": 2.5408, + "step": 5950 + }, + { + "epoch": 1.82, + "learning_rate": 3.6365905631659056e-05, + "loss": 2.4392, + "step": 5975 + }, + { + "epoch": 1.83, + "learning_rate": 3.635068493150685e-05, + "loss": 2.33, + "step": 6000 + }, + { + "epoch": 1.83, + "learning_rate": 3.633546423135465e-05, + "loss": 2.3823, + "step": 6025 + }, + { + "epoch": 1.84, + "learning_rate": 3.632024353120244e-05, + "loss": 2.5043, + "step": 6050 + }, + { + "epoch": 1.85, + "learning_rate": 3.6305022831050234e-05, + "loss": 2.5216, + "step": 6075 + }, + { + "epoch": 1.86, + "learning_rate": 3.6289802130898024e-05, + "loss": 2.5068, + "step": 6100 + }, + { + "epoch": 1.86, + "learning_rate": 3.627458143074582e-05, + "loss": 2.4264, + "step": 6125 + }, + { + "epoch": 1.87, + "learning_rate": 3.625936073059361e-05, + "loss": 2.3439, + "step": 6150 + }, + { + "epoch": 1.88, + "learning_rate": 3.62441400304414e-05, + "loss": 2.467, + "step": 6175 + }, + { + "epoch": 1.89, + "learning_rate": 3.62289193302892e-05, + "loss": 2.4853, + "step": 6200 + }, + { + "epoch": 1.89, + "learning_rate": 3.621369863013699e-05, + "loss": 2.4064, + "step": 6225 + }, + { + "epoch": 1.9, + "learning_rate": 3.619847792998478e-05, + "loss": 2.4467, + "step": 6250 + }, + { + "epoch": 1.91, + "learning_rate": 3.618325722983258e-05, + "loss": 2.4435, + "step": 6275 + }, + { + "epoch": 1.92, + "learning_rate": 3.6168036529680366e-05, + "loss": 2.4629, + "step": 6300 + }, + { + "epoch": 1.93, + "learning_rate": 3.615281582952816e-05, + "loss": 2.5577, + "step": 6325 + }, + { + "epoch": 1.93, + "learning_rate": 3.613759512937595e-05, + "loss": 2.5897, + "step": 6350 + }, + { + "epoch": 1.94, + "learning_rate": 3.612237442922375e-05, + "loss": 2.4605, + "step": 6375 + }, + { + "epoch": 1.95, + "learning_rate": 3.6107153729071544e-05, + "loss": 2.4198, + "step": 6400 + }, + { + "epoch": 1.96, + "learning_rate": 3.6091933028919334e-05, + "loss": 2.5914, + "step": 6425 + }, + { + "epoch": 1.96, + "learning_rate": 3.607671232876713e-05, + "loss": 2.4329, + "step": 6450 + }, + { + "epoch": 1.97, + "learning_rate": 3.606149162861492e-05, + "loss": 2.3896, + "step": 6475 + }, + { + "epoch": 1.98, + "learning_rate": 3.604627092846271e-05, + "loss": 2.4753, + "step": 6500 + }, + { + "epoch": 1.99, + "learning_rate": 3.6031050228310505e-05, + "loss": 2.4698, + "step": 6525 + }, + { + "epoch": 1.99, + "learning_rate": 3.6015829528158295e-05, + "loss": 2.4075, + "step": 6550 + }, + { + "epoch": 2.0, + "learning_rate": 3.600060882800609e-05, + "loss": 2.4441, + "step": 6575 + }, + { + "epoch": 2.01, + "learning_rate": 3.598538812785389e-05, + "loss": 2.3363, + "step": 6600 + }, + { + "epoch": 2.02, + "learning_rate": 3.597016742770168e-05, + "loss": 2.3688, + "step": 6625 + }, + { + "epoch": 2.02, + "learning_rate": 3.595494672754947e-05, + "loss": 2.362, + "step": 6650 + }, + { + "epoch": 2.03, + "learning_rate": 3.593972602739726e-05, + "loss": 2.4022, + "step": 6675 + }, + { + "epoch": 2.04, + "learning_rate": 3.592450532724506e-05, + "loss": 2.295, + "step": 6700 + }, + { + "epoch": 2.05, + "learning_rate": 3.590928462709285e-05, + "loss": 2.4906, + "step": 6725 + }, + { + "epoch": 2.05, + "learning_rate": 3.5894063926940644e-05, + "loss": 2.3863, + "step": 6750 + }, + { + "epoch": 2.06, + "learning_rate": 3.5878843226788434e-05, + "loss": 2.4918, + "step": 6775 + }, + { + "epoch": 2.07, + "learning_rate": 3.586362252663623e-05, + "loss": 2.3376, + "step": 6800 + }, + { + "epoch": 2.08, + "learning_rate": 3.5848401826484026e-05, + "loss": 2.4073, + "step": 6825 + }, + { + "epoch": 2.09, + "learning_rate": 3.5833181126331816e-05, + "loss": 2.4083, + "step": 6850 + }, + { + "epoch": 2.09, + "learning_rate": 3.5817960426179605e-05, + "loss": 2.3808, + "step": 6875 + }, + { + "epoch": 2.1, + "learning_rate": 3.58027397260274e-05, + "loss": 2.389, + "step": 6900 + }, + { + "epoch": 2.11, + "learning_rate": 3.578751902587519e-05, + "loss": 2.4459, + "step": 6925 + }, + { + "epoch": 2.12, + "learning_rate": 3.577229832572299e-05, + "loss": 2.3535, + "step": 6950 + }, + { + "epoch": 2.12, + "learning_rate": 3.5757077625570776e-05, + "loss": 2.3977, + "step": 6975 + }, + { + "epoch": 2.13, + "learning_rate": 3.574185692541857e-05, + "loss": 2.4717, + "step": 7000 + }, + { + "epoch": 2.14, + "learning_rate": 3.572663622526637e-05, + "loss": 2.4242, + "step": 7025 + }, + { + "epoch": 2.15, + "learning_rate": 3.571141552511416e-05, + "loss": 2.5278, + "step": 7050 + }, + { + "epoch": 2.15, + "learning_rate": 3.5696194824961955e-05, + "loss": 2.3942, + "step": 7075 + }, + { + "epoch": 2.16, + "learning_rate": 3.5680974124809744e-05, + "loss": 2.4924, + "step": 7100 + }, + { + "epoch": 2.17, + "learning_rate": 3.5665753424657533e-05, + "loss": 2.4798, + "step": 7125 + }, + { + "epoch": 2.18, + "learning_rate": 3.565053272450533e-05, + "loss": 2.4777, + "step": 7150 + }, + { + "epoch": 2.18, + "learning_rate": 3.563531202435312e-05, + "loss": 2.4038, + "step": 7175 + }, + { + "epoch": 2.19, + "learning_rate": 3.5620091324200915e-05, + "loss": 2.4612, + "step": 7200 + }, + { + "epoch": 2.2, + "learning_rate": 3.560487062404871e-05, + "loss": 2.4008, + "step": 7225 + }, + { + "epoch": 2.21, + "learning_rate": 3.55896499238965e-05, + "loss": 2.4106, + "step": 7250 + }, + { + "epoch": 2.21, + "learning_rate": 3.55744292237443e-05, + "loss": 2.3472, + "step": 7275 + }, + { + "epoch": 2.22, + "learning_rate": 3.555920852359209e-05, + "loss": 2.3121, + "step": 7300 + }, + { + "epoch": 2.23, + "learning_rate": 3.554398782343988e-05, + "loss": 2.3585, + "step": 7325 + }, + { + "epoch": 2.24, + "learning_rate": 3.552876712328767e-05, + "loss": 2.421, + "step": 7350 + }, + { + "epoch": 2.25, + "learning_rate": 3.551354642313547e-05, + "loss": 2.3866, + "step": 7375 + }, + { + "epoch": 2.25, + "learning_rate": 3.5498325722983265e-05, + "loss": 2.3897, + "step": 7400 + }, + { + "epoch": 2.26, + "learning_rate": 3.5483105022831054e-05, + "loss": 2.3006, + "step": 7425 + }, + { + "epoch": 2.27, + "learning_rate": 3.546788432267885e-05, + "loss": 2.4119, + "step": 7450 + }, + { + "epoch": 2.28, + "learning_rate": 3.545266362252664e-05, + "loss": 2.3586, + "step": 7475 + }, + { + "epoch": 2.28, + "learning_rate": 3.543744292237443e-05, + "loss": 2.4986, + "step": 7500 + }, + { + "epoch": 2.29, + "learning_rate": 3.5422222222222226e-05, + "loss": 2.401, + "step": 7525 + }, + { + "epoch": 2.3, + "learning_rate": 3.5407001522070015e-05, + "loss": 2.4442, + "step": 7550 + }, + { + "epoch": 2.31, + "learning_rate": 3.539178082191781e-05, + "loss": 2.4115, + "step": 7575 + }, + { + "epoch": 2.31, + "learning_rate": 3.537656012176561e-05, + "loss": 2.3774, + "step": 7600 + }, + { + "epoch": 2.32, + "learning_rate": 3.53613394216134e-05, + "loss": 2.4354, + "step": 7625 + }, + { + "epoch": 2.33, + "learning_rate": 3.534611872146119e-05, + "loss": 2.4084, + "step": 7650 + }, + { + "epoch": 2.34, + "learning_rate": 3.533089802130898e-05, + "loss": 2.3302, + "step": 7675 + }, + { + "epoch": 2.34, + "learning_rate": 3.531567732115678e-05, + "loss": 2.4794, + "step": 7700 + }, + { + "epoch": 2.35, + "learning_rate": 3.530045662100457e-05, + "loss": 2.5226, + "step": 7725 + }, + { + "epoch": 2.36, + "learning_rate": 3.528523592085236e-05, + "loss": 2.4108, + "step": 7750 + }, + { + "epoch": 2.37, + "learning_rate": 3.5270015220700154e-05, + "loss": 2.3983, + "step": 7775 + }, + { + "epoch": 2.37, + "learning_rate": 3.525479452054795e-05, + "loss": 2.355, + "step": 7800 + }, + { + "epoch": 2.38, + "learning_rate": 3.523957382039574e-05, + "loss": 2.4036, + "step": 7825 + }, + { + "epoch": 2.39, + "learning_rate": 3.5224353120243536e-05, + "loss": 2.3806, + "step": 7850 + }, + { + "epoch": 2.4, + "learning_rate": 3.5209132420091325e-05, + "loss": 2.397, + "step": 7875 + }, + { + "epoch": 2.4, + "learning_rate": 3.519391171993912e-05, + "loss": 2.4013, + "step": 7900 + }, + { + "epoch": 2.41, + "learning_rate": 3.517869101978691e-05, + "loss": 2.3221, + "step": 7925 + }, + { + "epoch": 2.42, + "learning_rate": 3.516347031963471e-05, + "loss": 2.4978, + "step": 7950 + }, + { + "epoch": 2.43, + "learning_rate": 3.51482496194825e-05, + "loss": 2.3336, + "step": 7975 + }, + { + "epoch": 2.44, + "learning_rate": 3.513302891933029e-05, + "loss": 2.3407, + "step": 8000 + }, + { + "epoch": 2.44, + "learning_rate": 3.511780821917809e-05, + "loss": 2.3885, + "step": 8025 + }, + { + "epoch": 2.45, + "learning_rate": 3.510258751902588e-05, + "loss": 2.3823, + "step": 8050 + }, + { + "epoch": 2.46, + "learning_rate": 3.5087366818873675e-05, + "loss": 2.3793, + "step": 8075 + }, + { + "epoch": 2.47, + "learning_rate": 3.5072146118721464e-05, + "loss": 2.5151, + "step": 8100 + }, + { + "epoch": 2.47, + "learning_rate": 3.5056925418569254e-05, + "loss": 2.4551, + "step": 8125 + }, + { + "epoch": 2.48, + "learning_rate": 3.504170471841705e-05, + "loss": 2.2755, + "step": 8150 + }, + { + "epoch": 2.49, + "learning_rate": 3.502648401826484e-05, + "loss": 2.3501, + "step": 8175 + }, + { + "epoch": 2.5, + "learning_rate": 3.5011263318112636e-05, + "loss": 2.1867, + "step": 8200 + }, + { + "epoch": 2.5, + "learning_rate": 3.499604261796043e-05, + "loss": 2.4121, + "step": 8225 + }, + { + "epoch": 2.51, + "learning_rate": 3.498082191780822e-05, + "loss": 2.3982, + "step": 8250 + }, + { + "epoch": 2.52, + "learning_rate": 3.496560121765602e-05, + "loss": 2.3759, + "step": 8275 + }, + { + "epoch": 2.53, + "learning_rate": 3.495038051750381e-05, + "loss": 2.392, + "step": 8300 + }, + { + "epoch": 2.53, + "learning_rate": 3.49351598173516e-05, + "loss": 2.3885, + "step": 8325 + }, + { + "epoch": 2.54, + "learning_rate": 3.491993911719939e-05, + "loss": 2.385, + "step": 8350 + }, + { + "epoch": 2.55, + "learning_rate": 3.490471841704718e-05, + "loss": 2.3473, + "step": 8375 + }, + { + "epoch": 2.56, + "learning_rate": 3.4889497716894985e-05, + "loss": 2.3964, + "step": 8400 + }, + { + "epoch": 2.56, + "learning_rate": 3.4874277016742775e-05, + "loss": 2.4064, + "step": 8425 + }, + { + "epoch": 2.57, + "learning_rate": 3.4859056316590564e-05, + "loss": 2.418, + "step": 8450 + }, + { + "epoch": 2.58, + "learning_rate": 3.484383561643836e-05, + "loss": 2.3923, + "step": 8475 + }, + { + "epoch": 2.59, + "learning_rate": 3.482861491628615e-05, + "loss": 2.3131, + "step": 8500 + }, + { + "epoch": 2.6, + "learning_rate": 3.4813394216133946e-05, + "loss": 2.28, + "step": 8525 + }, + { + "epoch": 2.6, + "learning_rate": 3.4798173515981735e-05, + "loss": 2.2658, + "step": 8550 + }, + { + "epoch": 2.61, + "learning_rate": 3.478295281582953e-05, + "loss": 2.3469, + "step": 8575 + }, + { + "epoch": 2.62, + "learning_rate": 3.476773211567733e-05, + "loss": 2.3742, + "step": 8600 + }, + { + "epoch": 2.63, + "learning_rate": 3.475251141552512e-05, + "loss": 2.2876, + "step": 8625 + }, + { + "epoch": 2.63, + "learning_rate": 3.4737290715372914e-05, + "loss": 2.3574, + "step": 8650 + }, + { + "epoch": 2.64, + "learning_rate": 3.47220700152207e-05, + "loss": 2.3034, + "step": 8675 + }, + { + "epoch": 2.65, + "learning_rate": 3.47068493150685e-05, + "loss": 2.4737, + "step": 8700 + }, + { + "epoch": 2.66, + "learning_rate": 3.469162861491629e-05, + "loss": 2.4021, + "step": 8725 + }, + { + "epoch": 2.66, + "learning_rate": 3.467640791476408e-05, + "loss": 2.3528, + "step": 8750 + }, + { + "epoch": 2.67, + "learning_rate": 3.4661187214611874e-05, + "loss": 2.3844, + "step": 8775 + }, + { + "epoch": 2.68, + "learning_rate": 3.464596651445967e-05, + "loss": 2.3548, + "step": 8800 + }, + { + "epoch": 2.69, + "learning_rate": 3.463074581430746e-05, + "loss": 2.3506, + "step": 8825 + }, + { + "epoch": 2.69, + "learning_rate": 3.4615525114155256e-05, + "loss": 2.2949, + "step": 8850 + }, + { + "epoch": 2.7, + "learning_rate": 3.4600304414003046e-05, + "loss": 2.4181, + "step": 8875 + }, + { + "epoch": 2.71, + "learning_rate": 3.458508371385084e-05, + "loss": 2.3763, + "step": 8900 + }, + { + "epoch": 2.72, + "learning_rate": 3.456986301369863e-05, + "loss": 2.3273, + "step": 8925 + }, + { + "epoch": 2.72, + "learning_rate": 3.455464231354643e-05, + "loss": 2.2915, + "step": 8950 + }, + { + "epoch": 2.73, + "learning_rate": 3.453942161339422e-05, + "loss": 2.2817, + "step": 8975 + }, + { + "epoch": 2.74, + "learning_rate": 3.452420091324201e-05, + "loss": 2.4281, + "step": 9000 + }, + { + "epoch": 2.75, + "learning_rate": 3.450898021308981e-05, + "loss": 2.3455, + "step": 9025 + }, + { + "epoch": 2.75, + "learning_rate": 3.44937595129376e-05, + "loss": 2.3443, + "step": 9050 + }, + { + "epoch": 2.76, + "learning_rate": 3.447853881278539e-05, + "loss": 2.3764, + "step": 9075 + }, + { + "epoch": 2.77, + "learning_rate": 3.4463318112633185e-05, + "loss": 2.3281, + "step": 9100 + }, + { + "epoch": 2.78, + "learning_rate": 3.4448097412480974e-05, + "loss": 2.3462, + "step": 9125 + }, + { + "epoch": 2.79, + "learning_rate": 3.443287671232877e-05, + "loss": 2.3027, + "step": 9150 + }, + { + "epoch": 2.79, + "learning_rate": 3.441765601217656e-05, + "loss": 2.3474, + "step": 9175 + }, + { + "epoch": 2.8, + "learning_rate": 3.4402435312024356e-05, + "loss": 2.4758, + "step": 9200 + }, + { + "epoch": 2.81, + "learning_rate": 3.438721461187215e-05, + "loss": 2.4488, + "step": 9225 + }, + { + "epoch": 2.82, + "learning_rate": 3.437199391171994e-05, + "loss": 2.2771, + "step": 9250 + }, + { + "epoch": 2.82, + "learning_rate": 3.435677321156774e-05, + "loss": 2.3559, + "step": 9275 + }, + { + "epoch": 2.83, + "learning_rate": 3.434155251141553e-05, + "loss": 2.4645, + "step": 9300 + }, + { + "epoch": 2.84, + "learning_rate": 3.4326331811263324e-05, + "loss": 2.3265, + "step": 9325 + }, + { + "epoch": 2.85, + "learning_rate": 3.431111111111111e-05, + "loss": 2.3063, + "step": 9350 + }, + { + "epoch": 2.85, + "learning_rate": 3.42958904109589e-05, + "loss": 2.3427, + "step": 9375 + }, + { + "epoch": 2.86, + "learning_rate": 3.42806697108067e-05, + "loss": 2.372, + "step": 9400 + }, + { + "epoch": 2.87, + "learning_rate": 3.4265449010654495e-05, + "loss": 2.4457, + "step": 9425 + }, + { + "epoch": 2.88, + "learning_rate": 3.4250228310502284e-05, + "loss": 2.4012, + "step": 9450 + }, + { + "epoch": 2.88, + "learning_rate": 3.423500761035008e-05, + "loss": 2.4501, + "step": 9475 + }, + { + "epoch": 2.89, + "learning_rate": 3.421978691019787e-05, + "loss": 2.4032, + "step": 9500 + }, + { + "epoch": 2.9, + "learning_rate": 3.4204566210045666e-05, + "loss": 2.2623, + "step": 9525 + }, + { + "epoch": 2.91, + "learning_rate": 3.4189345509893456e-05, + "loss": 2.3914, + "step": 9550 + }, + { + "epoch": 2.91, + "learning_rate": 3.417412480974125e-05, + "loss": 2.2566, + "step": 9575 + }, + { + "epoch": 2.92, + "learning_rate": 3.415890410958904e-05, + "loss": 2.3409, + "step": 9600 + }, + { + "epoch": 2.93, + "learning_rate": 3.414368340943684e-05, + "loss": 2.4972, + "step": 9625 + }, + { + "epoch": 2.94, + "learning_rate": 3.4128462709284634e-05, + "loss": 2.4752, + "step": 9650 + }, + { + "epoch": 2.95, + "learning_rate": 3.4113242009132423e-05, + "loss": 2.3794, + "step": 9675 + }, + { + "epoch": 2.95, + "learning_rate": 3.40986301369863e-05, + "loss": 2.3816, + "step": 9700 + }, + { + "epoch": 2.96, + "learning_rate": 3.40834094368341e-05, + "loss": 2.3872, + "step": 9725 + }, + { + "epoch": 2.97, + "learning_rate": 3.406818873668189e-05, + "loss": 2.3142, + "step": 9750 + }, + { + "epoch": 2.98, + "learning_rate": 3.4052968036529685e-05, + "loss": 2.3174, + "step": 9775 + }, + { + "epoch": 2.98, + "learning_rate": 3.4037747336377474e-05, + "loss": 2.2765, + "step": 9800 + }, + { + "epoch": 2.99, + "learning_rate": 3.402252663622527e-05, + "loss": 2.3761, + "step": 9825 + }, + { + "epoch": 3.0, + "learning_rate": 3.400730593607306e-05, + "loss": 2.2079, + "step": 9850 + }, + { + "epoch": 3.01, + "learning_rate": 3.3992085235920856e-05, + "loss": 2.4002, + "step": 9875 + }, + { + "epoch": 3.01, + "learning_rate": 3.397686453576865e-05, + "loss": 2.3128, + "step": 9900 + }, + { + "epoch": 3.02, + "learning_rate": 3.396164383561644e-05, + "loss": 2.3319, + "step": 9925 + }, + { + "epoch": 3.03, + "learning_rate": 3.394642313546424e-05, + "loss": 2.3102, + "step": 9950 + }, + { + "epoch": 3.04, + "learning_rate": 3.393120243531203e-05, + "loss": 2.3264, + "step": 9975 + }, + { + "epoch": 3.04, + "learning_rate": 3.391598173515982e-05, + "loss": 2.285, + "step": 10000 + }, + { + "epoch": 3.05, + "learning_rate": 3.390076103500761e-05, + "loss": 2.3575, + "step": 10025 + }, + { + "epoch": 3.06, + "learning_rate": 3.38855403348554e-05, + "loss": 2.3238, + "step": 10050 + }, + { + "epoch": 3.07, + "learning_rate": 3.38703196347032e-05, + "loss": 2.3473, + "step": 10075 + }, + { + "epoch": 3.07, + "learning_rate": 3.3855098934550995e-05, + "loss": 2.3622, + "step": 10100 + }, + { + "epoch": 3.08, + "learning_rate": 3.3839878234398785e-05, + "loss": 2.3262, + "step": 10125 + }, + { + "epoch": 3.09, + "learning_rate": 3.382465753424658e-05, + "loss": 2.2765, + "step": 10150 + }, + { + "epoch": 3.1, + "learning_rate": 3.380943683409437e-05, + "loss": 2.3726, + "step": 10175 + }, + { + "epoch": 3.11, + "learning_rate": 3.3794216133942167e-05, + "loss": 2.3951, + "step": 10200 + }, + { + "epoch": 3.11, + "learning_rate": 3.3778995433789956e-05, + "loss": 2.2942, + "step": 10225 + }, + { + "epoch": 3.12, + "learning_rate": 3.3763774733637745e-05, + "loss": 2.2152, + "step": 10250 + }, + { + "epoch": 3.13, + "learning_rate": 3.374855403348554e-05, + "loss": 2.2103, + "step": 10275 + }, + { + "epoch": 3.14, + "learning_rate": 3.373333333333334e-05, + "loss": 2.3017, + "step": 10300 + }, + { + "epoch": 3.14, + "learning_rate": 3.371811263318113e-05, + "loss": 2.34, + "step": 10325 + }, + { + "epoch": 3.15, + "learning_rate": 3.3702891933028924e-05, + "loss": 2.411, + "step": 10350 + }, + { + "epoch": 3.16, + "learning_rate": 3.368767123287671e-05, + "loss": 2.3578, + "step": 10375 + }, + { + "epoch": 3.17, + "learning_rate": 3.367245053272451e-05, + "loss": 2.3459, + "step": 10400 + }, + { + "epoch": 3.17, + "learning_rate": 3.36572298325723e-05, + "loss": 2.37, + "step": 10425 + }, + { + "epoch": 3.18, + "learning_rate": 3.3642009132420095e-05, + "loss": 2.2468, + "step": 10450 + }, + { + "epoch": 3.19, + "learning_rate": 3.3626788432267884e-05, + "loss": 2.347, + "step": 10475 + }, + { + "epoch": 3.2, + "learning_rate": 3.361156773211568e-05, + "loss": 2.2949, + "step": 10500 + }, + { + "epoch": 3.2, + "learning_rate": 3.359634703196348e-05, + "loss": 2.2398, + "step": 10525 + }, + { + "epoch": 3.21, + "learning_rate": 3.3581126331811266e-05, + "loss": 2.3733, + "step": 10550 + }, + { + "epoch": 3.22, + "learning_rate": 3.356590563165906e-05, + "loss": 2.3245, + "step": 10575 + }, + { + "epoch": 3.23, + "learning_rate": 3.355068493150685e-05, + "loss": 2.3472, + "step": 10600 + }, + { + "epoch": 3.23, + "learning_rate": 3.353546423135464e-05, + "loss": 2.2422, + "step": 10625 + }, + { + "epoch": 3.24, + "learning_rate": 3.352024353120244e-05, + "loss": 2.2771, + "step": 10650 + }, + { + "epoch": 3.25, + "learning_rate": 3.350502283105023e-05, + "loss": 2.3022, + "step": 10675 + }, + { + "epoch": 3.26, + "learning_rate": 3.348980213089802e-05, + "loss": 2.363, + "step": 10700 + }, + { + "epoch": 3.26, + "learning_rate": 3.347458143074582e-05, + "loss": 2.3812, + "step": 10725 + }, + { + "epoch": 3.27, + "learning_rate": 3.345936073059361e-05, + "loss": 2.3246, + "step": 10750 + }, + { + "epoch": 3.28, + "learning_rate": 3.3444140030441405e-05, + "loss": 2.2674, + "step": 10775 + }, + { + "epoch": 3.29, + "learning_rate": 3.3428919330289195e-05, + "loss": 2.2517, + "step": 10800 + }, + { + "epoch": 3.3, + "learning_rate": 3.341369863013699e-05, + "loss": 2.3662, + "step": 10825 + }, + { + "epoch": 3.3, + "learning_rate": 3.339847792998478e-05, + "loss": 2.3391, + "step": 10850 + }, + { + "epoch": 3.31, + "learning_rate": 3.338325722983257e-05, + "loss": 2.3473, + "step": 10875 + }, + { + "epoch": 3.32, + "learning_rate": 3.336803652968037e-05, + "loss": 2.34, + "step": 10900 + }, + { + "epoch": 3.33, + "learning_rate": 3.335281582952816e-05, + "loss": 2.3697, + "step": 10925 + }, + { + "epoch": 3.33, + "learning_rate": 3.333759512937595e-05, + "loss": 2.3801, + "step": 10950 + }, + { + "epoch": 3.34, + "learning_rate": 3.332237442922375e-05, + "loss": 2.2578, + "step": 10975 + }, + { + "epoch": 3.35, + "learning_rate": 3.330715372907154e-05, + "loss": 2.287, + "step": 11000 + }, + { + "epoch": 3.36, + "learning_rate": 3.3291933028919334e-05, + "loss": 2.3065, + "step": 11025 + }, + { + "epoch": 3.36, + "learning_rate": 3.327671232876712e-05, + "loss": 2.3242, + "step": 11050 + }, + { + "epoch": 3.37, + "learning_rate": 3.326149162861492e-05, + "loss": 2.381, + "step": 11075 + }, + { + "epoch": 3.38, + "learning_rate": 3.3246270928462716e-05, + "loss": 2.3044, + "step": 11100 + }, + { + "epoch": 3.39, + "learning_rate": 3.3231050228310505e-05, + "loss": 2.3529, + "step": 11125 + }, + { + "epoch": 3.39, + "learning_rate": 3.32158295281583e-05, + "loss": 2.3205, + "step": 11150 + }, + { + "epoch": 3.4, + "learning_rate": 3.320060882800609e-05, + "loss": 2.2909, + "step": 11175 + }, + { + "epoch": 3.41, + "learning_rate": 3.318538812785389e-05, + "loss": 2.2733, + "step": 11200 + }, + { + "epoch": 3.42, + "learning_rate": 3.3170167427701676e-05, + "loss": 2.3524, + "step": 11225 + }, + { + "epoch": 3.42, + "learning_rate": 3.3154946727549466e-05, + "loss": 2.4237, + "step": 11250 + }, + { + "epoch": 3.43, + "learning_rate": 3.313972602739726e-05, + "loss": 2.3506, + "step": 11275 + }, + { + "epoch": 3.44, + "learning_rate": 3.312450532724506e-05, + "loss": 2.239, + "step": 11300 + }, + { + "epoch": 3.45, + "learning_rate": 3.310928462709285e-05, + "loss": 2.2446, + "step": 11325 + }, + { + "epoch": 3.46, + "learning_rate": 3.3094063926940644e-05, + "loss": 2.4092, + "step": 11350 + }, + { + "epoch": 3.46, + "learning_rate": 3.307884322678843e-05, + "loss": 2.2258, + "step": 11375 + }, + { + "epoch": 3.47, + "learning_rate": 3.306362252663623e-05, + "loss": 2.3767, + "step": 11400 + }, + { + "epoch": 3.48, + "learning_rate": 3.304840182648402e-05, + "loss": 2.3656, + "step": 11425 + }, + { + "epoch": 3.49, + "learning_rate": 3.3033181126331815e-05, + "loss": 2.3565, + "step": 11450 + }, + { + "epoch": 3.49, + "learning_rate": 3.3017960426179605e-05, + "loss": 2.3451, + "step": 11475 + }, + { + "epoch": 3.5, + "learning_rate": 3.30027397260274e-05, + "loss": 2.3693, + "step": 11500 + }, + { + "epoch": 3.51, + "learning_rate": 3.29875190258752e-05, + "loss": 2.2994, + "step": 11525 + }, + { + "epoch": 3.52, + "learning_rate": 3.297229832572299e-05, + "loss": 2.3294, + "step": 11550 + }, + { + "epoch": 3.52, + "learning_rate": 3.2957077625570776e-05, + "loss": 2.3281, + "step": 11575 + }, + { + "epoch": 3.53, + "learning_rate": 3.294185692541857e-05, + "loss": 2.2991, + "step": 11600 + }, + { + "epoch": 3.54, + "learning_rate": 3.292663622526636e-05, + "loss": 2.4062, + "step": 11625 + }, + { + "epoch": 3.55, + "learning_rate": 3.291141552511416e-05, + "loss": 2.2678, + "step": 11650 + }, + { + "epoch": 3.55, + "learning_rate": 3.289619482496195e-05, + "loss": 2.3024, + "step": 11675 + }, + { + "epoch": 3.56, + "learning_rate": 3.2880974124809744e-05, + "loss": 2.2003, + "step": 11700 + }, + { + "epoch": 3.57, + "learning_rate": 3.286575342465754e-05, + "loss": 2.3133, + "step": 11725 + }, + { + "epoch": 3.58, + "learning_rate": 3.285053272450533e-05, + "loss": 2.347, + "step": 11750 + }, + { + "epoch": 3.58, + "learning_rate": 3.2835312024353126e-05, + "loss": 2.2283, + "step": 11775 + }, + { + "epoch": 3.59, + "learning_rate": 3.2820091324200915e-05, + "loss": 2.293, + "step": 11800 + }, + { + "epoch": 3.6, + "learning_rate": 3.280487062404871e-05, + "loss": 2.3133, + "step": 11825 + }, + { + "epoch": 3.61, + "learning_rate": 3.27896499238965e-05, + "loss": 2.306, + "step": 11850 + }, + { + "epoch": 3.61, + "learning_rate": 3.277442922374429e-05, + "loss": 2.3111, + "step": 11875 + }, + { + "epoch": 3.62, + "learning_rate": 3.275920852359209e-05, + "loss": 2.2691, + "step": 11900 + }, + { + "epoch": 3.63, + "learning_rate": 3.274398782343988e-05, + "loss": 2.4566, + "step": 11925 + }, + { + "epoch": 3.64, + "learning_rate": 3.272876712328767e-05, + "loss": 2.3288, + "step": 11950 + }, + { + "epoch": 3.65, + "learning_rate": 3.271354642313547e-05, + "loss": 2.2988, + "step": 11975 + }, + { + "epoch": 3.65, + "learning_rate": 3.269832572298326e-05, + "loss": 2.2849, + "step": 12000 + }, + { + "epoch": 3.66, + "learning_rate": 3.2683105022831054e-05, + "loss": 2.2855, + "step": 12025 + }, + { + "epoch": 3.67, + "learning_rate": 3.2667884322678843e-05, + "loss": 2.3684, + "step": 12050 + }, + { + "epoch": 3.68, + "learning_rate": 3.265266362252664e-05, + "loss": 2.2872, + "step": 12075 + }, + { + "epoch": 3.68, + "learning_rate": 3.2637442922374436e-05, + "loss": 2.2523, + "step": 12100 + }, + { + "epoch": 3.69, + "learning_rate": 3.2622222222222225e-05, + "loss": 2.1726, + "step": 12125 + }, + { + "epoch": 3.7, + "learning_rate": 3.260700152207002e-05, + "loss": 2.4131, + "step": 12150 + }, + { + "epoch": 3.71, + "learning_rate": 3.259178082191781e-05, + "loss": 2.1719, + "step": 12175 + }, + { + "epoch": 3.71, + "learning_rate": 3.25765601217656e-05, + "loss": 2.3665, + "step": 12200 + }, + { + "epoch": 3.72, + "learning_rate": 3.25613394216134e-05, + "loss": 2.3097, + "step": 12225 + }, + { + "epoch": 3.73, + "learning_rate": 3.2546118721461186e-05, + "loss": 2.3808, + "step": 12250 + }, + { + "epoch": 3.74, + "learning_rate": 3.253089802130898e-05, + "loss": 2.3309, + "step": 12275 + }, + { + "epoch": 3.74, + "learning_rate": 3.251567732115678e-05, + "loss": 2.2463, + "step": 12300 + }, + { + "epoch": 3.75, + "learning_rate": 3.250045662100457e-05, + "loss": 2.2689, + "step": 12325 + }, + { + "epoch": 3.76, + "learning_rate": 3.2485235920852364e-05, + "loss": 2.301, + "step": 12350 + }, + { + "epoch": 3.77, + "learning_rate": 3.2470015220700154e-05, + "loss": 2.2508, + "step": 12375 + }, + { + "epoch": 3.77, + "learning_rate": 3.245479452054795e-05, + "loss": 2.2932, + "step": 12400 + }, + { + "epoch": 3.78, + "learning_rate": 3.243957382039574e-05, + "loss": 2.2948, + "step": 12425 + }, + { + "epoch": 3.79, + "learning_rate": 3.2424353120243536e-05, + "loss": 2.3646, + "step": 12450 + }, + { + "epoch": 3.8, + "learning_rate": 3.2409132420091325e-05, + "loss": 2.279, + "step": 12475 + }, + { + "epoch": 3.81, + "learning_rate": 3.239391171993912e-05, + "loss": 2.2907, + "step": 12500 + }, + { + "epoch": 3.81, + "learning_rate": 3.2379299847793e-05, + "loss": 2.2119, + "step": 12525 + }, + { + "epoch": 3.82, + "learning_rate": 3.236407914764079e-05, + "loss": 2.24, + "step": 12550 + }, + { + "epoch": 3.83, + "learning_rate": 3.2348858447488587e-05, + "loss": 2.3259, + "step": 12575 + }, + { + "epoch": 3.84, + "learning_rate": 3.233363774733638e-05, + "loss": 2.2823, + "step": 12600 + }, + { + "epoch": 3.84, + "learning_rate": 3.231841704718417e-05, + "loss": 2.3152, + "step": 12625 + }, + { + "epoch": 3.85, + "learning_rate": 3.230319634703197e-05, + "loss": 2.2235, + "step": 12650 + }, + { + "epoch": 3.86, + "learning_rate": 3.228797564687976e-05, + "loss": 2.2921, + "step": 12675 + }, + { + "epoch": 3.87, + "learning_rate": 3.2272754946727554e-05, + "loss": 2.2987, + "step": 12700 + }, + { + "epoch": 3.87, + "learning_rate": 3.2257534246575344e-05, + "loss": 2.2525, + "step": 12725 + }, + { + "epoch": 3.88, + "learning_rate": 3.224231354642313e-05, + "loss": 2.31, + "step": 12750 + }, + { + "epoch": 3.89, + "learning_rate": 3.2227092846270936e-05, + "loss": 2.2679, + "step": 12775 + }, + { + "epoch": 3.9, + "learning_rate": 3.2211872146118725e-05, + "loss": 2.3119, + "step": 12800 + }, + { + "epoch": 3.9, + "learning_rate": 3.2196651445966515e-05, + "loss": 2.2647, + "step": 12825 + }, + { + "epoch": 3.91, + "learning_rate": 3.218143074581431e-05, + "loss": 2.2724, + "step": 12850 + }, + { + "epoch": 3.92, + "learning_rate": 3.21662100456621e-05, + "loss": 2.1599, + "step": 12875 + }, + { + "epoch": 3.93, + "learning_rate": 3.21509893455099e-05, + "loss": 2.307, + "step": 12900 + }, + { + "epoch": 3.93, + "learning_rate": 3.2135768645357686e-05, + "loss": 2.282, + "step": 12925 + }, + { + "epoch": 3.94, + "learning_rate": 3.212054794520548e-05, + "loss": 2.2976, + "step": 12950 + }, + { + "epoch": 3.95, + "learning_rate": 3.210532724505328e-05, + "loss": 2.2577, + "step": 12975 + }, + { + "epoch": 3.96, + "learning_rate": 3.209010654490107e-05, + "loss": 2.3745, + "step": 13000 + }, + { + "epoch": 3.96, + "learning_rate": 3.2074885844748864e-05, + "loss": 2.3284, + "step": 13025 + }, + { + "epoch": 3.97, + "learning_rate": 3.2059665144596654e-05, + "loss": 2.2928, + "step": 13050 + }, + { + "epoch": 3.98, + "learning_rate": 3.204444444444445e-05, + "loss": 2.3184, + "step": 13075 + }, + { + "epoch": 3.99, + "learning_rate": 3.202922374429224e-05, + "loss": 2.377, + "step": 13100 + }, + { + "epoch": 4.0, + "learning_rate": 3.201400304414003e-05, + "loss": 2.2414, + "step": 13125 + }, + { + "epoch": 4.0, + "learning_rate": 3.1998782343987825e-05, + "loss": 2.2791, + "step": 13150 + }, + { + "epoch": 4.01, + "learning_rate": 3.198356164383562e-05, + "loss": 2.2645, + "step": 13175 + }, + { + "epoch": 4.02, + "learning_rate": 3.196834094368341e-05, + "loss": 2.2783, + "step": 13200 + }, + { + "epoch": 4.03, + "learning_rate": 3.195312024353121e-05, + "loss": 2.2995, + "step": 13225 + }, + { + "epoch": 4.03, + "learning_rate": 3.1937899543378997e-05, + "loss": 2.3162, + "step": 13250 + }, + { + "epoch": 4.04, + "learning_rate": 3.192267884322679e-05, + "loss": 2.273, + "step": 13275 + }, + { + "epoch": 4.05, + "learning_rate": 3.190745814307458e-05, + "loss": 2.1421, + "step": 13300 + }, + { + "epoch": 4.06, + "learning_rate": 3.189223744292238e-05, + "loss": 2.2324, + "step": 13325 + }, + { + "epoch": 4.06, + "learning_rate": 3.187701674277017e-05, + "loss": 2.148, + "step": 13350 + }, + { + "epoch": 4.07, + "learning_rate": 3.1861796042617964e-05, + "loss": 2.2015, + "step": 13375 + }, + { + "epoch": 4.08, + "learning_rate": 3.184657534246576e-05, + "loss": 2.2374, + "step": 13400 + }, + { + "epoch": 4.09, + "learning_rate": 3.183135464231355e-05, + "loss": 2.172, + "step": 13425 + }, + { + "epoch": 4.09, + "learning_rate": 3.181613394216134e-05, + "loss": 2.2591, + "step": 13450 + }, + { + "epoch": 4.1, + "learning_rate": 3.1800913242009136e-05, + "loss": 2.3048, + "step": 13475 + }, + { + "epoch": 4.11, + "learning_rate": 3.1785692541856925e-05, + "loss": 2.2699, + "step": 13500 + }, + { + "epoch": 4.12, + "learning_rate": 3.177047184170472e-05, + "loss": 2.2023, + "step": 13525 + }, + { + "epoch": 4.12, + "learning_rate": 3.175525114155251e-05, + "loss": 2.2619, + "step": 13550 + }, + { + "epoch": 4.13, + "learning_rate": 3.174003044140031e-05, + "loss": 2.2799, + "step": 13575 + }, + { + "epoch": 4.14, + "learning_rate": 3.17248097412481e-05, + "loss": 2.2879, + "step": 13600 + }, + { + "epoch": 4.15, + "learning_rate": 3.170958904109589e-05, + "loss": 2.2305, + "step": 13625 + }, + { + "epoch": 4.16, + "learning_rate": 3.169436834094369e-05, + "loss": 2.2701, + "step": 13650 + }, + { + "epoch": 4.16, + "learning_rate": 3.167914764079148e-05, + "loss": 2.1813, + "step": 13675 + }, + { + "epoch": 4.17, + "learning_rate": 3.1663926940639274e-05, + "loss": 2.239, + "step": 13700 + }, + { + "epoch": 4.18, + "learning_rate": 3.1648706240487064e-05, + "loss": 2.1826, + "step": 13725 + }, + { + "epoch": 4.19, + "learning_rate": 3.1633485540334853e-05, + "loss": 2.2252, + "step": 13750 + }, + { + "epoch": 4.19, + "learning_rate": 3.1618264840182656e-05, + "loss": 2.1614, + "step": 13775 + }, + { + "epoch": 4.2, + "learning_rate": 3.1603044140030446e-05, + "loss": 2.2616, + "step": 13800 + }, + { + "epoch": 4.21, + "learning_rate": 3.1587823439878235e-05, + "loss": 2.174, + "step": 13825 + }, + { + "epoch": 4.22, + "learning_rate": 3.157260273972603e-05, + "loss": 2.176, + "step": 13850 + }, + { + "epoch": 4.22, + "learning_rate": 3.155738203957382e-05, + "loss": 2.3032, + "step": 13875 + }, + { + "epoch": 4.23, + "learning_rate": 3.154216133942162e-05, + "loss": 2.3267, + "step": 13900 + }, + { + "epoch": 4.24, + "learning_rate": 3.152694063926941e-05, + "loss": 2.2344, + "step": 13925 + }, + { + "epoch": 4.25, + "learning_rate": 3.15117199391172e-05, + "loss": 2.2329, + "step": 13950 + }, + { + "epoch": 4.25, + "learning_rate": 3.1496499238965e-05, + "loss": 2.2776, + "step": 13975 + }, + { + "epoch": 4.26, + "learning_rate": 3.148127853881279e-05, + "loss": 2.2616, + "step": 14000 + }, + { + "epoch": 4.27, + "learning_rate": 3.1466057838660585e-05, + "loss": 2.2726, + "step": 14025 + }, + { + "epoch": 4.28, + "learning_rate": 3.1450837138508374e-05, + "loss": 2.2973, + "step": 14050 + }, + { + "epoch": 4.28, + "learning_rate": 3.1435616438356164e-05, + "loss": 2.4038, + "step": 14075 + }, + { + "epoch": 4.29, + "learning_rate": 3.142039573820396e-05, + "loss": 2.1422, + "step": 14100 + }, + { + "epoch": 4.3, + "learning_rate": 3.140517503805175e-05, + "loss": 2.2385, + "step": 14125 + }, + { + "epoch": 4.31, + "learning_rate": 3.1389954337899546e-05, + "loss": 2.3466, + "step": 14150 + }, + { + "epoch": 4.32, + "learning_rate": 3.137473363774734e-05, + "loss": 2.3624, + "step": 14175 + }, + { + "epoch": 4.32, + "learning_rate": 3.135951293759513e-05, + "loss": 2.1896, + "step": 14200 + }, + { + "epoch": 4.33, + "learning_rate": 3.134429223744293e-05, + "loss": 2.2879, + "step": 14225 + }, + { + "epoch": 4.34, + "learning_rate": 3.132907153729072e-05, + "loss": 2.2648, + "step": 14250 + }, + { + "epoch": 4.35, + "learning_rate": 3.131385083713851e-05, + "loss": 2.1587, + "step": 14275 + }, + { + "epoch": 4.35, + "learning_rate": 3.12986301369863e-05, + "loss": 2.2501, + "step": 14300 + }, + { + "epoch": 4.36, + "learning_rate": 3.12834094368341e-05, + "loss": 2.3226, + "step": 14325 + }, + { + "epoch": 4.37, + "learning_rate": 3.126818873668189e-05, + "loss": 2.2835, + "step": 14350 + }, + { + "epoch": 4.38, + "learning_rate": 3.1252968036529685e-05, + "loss": 2.1606, + "step": 14375 + }, + { + "epoch": 4.38, + "learning_rate": 3.123774733637748e-05, + "loss": 2.3472, + "step": 14400 + }, + { + "epoch": 4.39, + "learning_rate": 3.122252663622527e-05, + "loss": 2.3695, + "step": 14425 + }, + { + "epoch": 4.4, + "learning_rate": 3.120730593607306e-05, + "loss": 2.3861, + "step": 14450 + }, + { + "epoch": 4.41, + "learning_rate": 3.1192085235920856e-05, + "loss": 2.169, + "step": 14475 + }, + { + "epoch": 4.41, + "learning_rate": 3.1176864535768645e-05, + "loss": 2.2452, + "step": 14500 + }, + { + "epoch": 4.42, + "learning_rate": 3.116164383561644e-05, + "loss": 2.3446, + "step": 14525 + }, + { + "epoch": 4.43, + "learning_rate": 3.114642313546423e-05, + "loss": 2.265, + "step": 14550 + }, + { + "epoch": 4.44, + "learning_rate": 3.113120243531203e-05, + "loss": 2.3011, + "step": 14575 + }, + { + "epoch": 4.44, + "learning_rate": 3.1115981735159824e-05, + "loss": 2.1554, + "step": 14600 + }, + { + "epoch": 4.45, + "learning_rate": 3.110076103500761e-05, + "loss": 2.3185, + "step": 14625 + }, + { + "epoch": 4.46, + "learning_rate": 3.108554033485541e-05, + "loss": 2.1786, + "step": 14650 + }, + { + "epoch": 4.47, + "learning_rate": 3.10703196347032e-05, + "loss": 2.3914, + "step": 14675 + }, + { + "epoch": 4.47, + "learning_rate": 3.105509893455099e-05, + "loss": 2.2981, + "step": 14700 + }, + { + "epoch": 4.48, + "learning_rate": 3.1039878234398784e-05, + "loss": 2.1446, + "step": 14725 + }, + { + "epoch": 4.49, + "learning_rate": 3.1024657534246574e-05, + "loss": 2.2635, + "step": 14750 + }, + { + "epoch": 4.5, + "learning_rate": 3.100943683409437e-05, + "loss": 2.3167, + "step": 14775 + }, + { + "epoch": 4.51, + "learning_rate": 3.0994216133942166e-05, + "loss": 2.161, + "step": 14800 + }, + { + "epoch": 4.51, + "learning_rate": 3.0978995433789956e-05, + "loss": 2.2632, + "step": 14825 + }, + { + "epoch": 4.52, + "learning_rate": 3.096377473363775e-05, + "loss": 2.2193, + "step": 14850 + }, + { + "epoch": 4.53, + "learning_rate": 3.094855403348554e-05, + "loss": 2.2079, + "step": 14875 + }, + { + "epoch": 4.54, + "learning_rate": 3.093333333333334e-05, + "loss": 2.2739, + "step": 14900 + }, + { + "epoch": 4.54, + "learning_rate": 3.091811263318113e-05, + "loss": 2.2497, + "step": 14925 + }, + { + "epoch": 4.55, + "learning_rate": 3.090289193302892e-05, + "loss": 2.3314, + "step": 14950 + }, + { + "epoch": 4.56, + "learning_rate": 3.088767123287672e-05, + "loss": 2.14, + "step": 14975 + }, + { + "epoch": 4.57, + "learning_rate": 3.087245053272451e-05, + "loss": 2.2944, + "step": 15000 + }, + { + "epoch": 4.57, + "learning_rate": 3.0857229832572305e-05, + "loss": 2.2942, + "step": 15025 + }, + { + "epoch": 4.58, + "learning_rate": 3.0842009132420095e-05, + "loss": 2.2938, + "step": 15050 + }, + { + "epoch": 4.59, + "learning_rate": 3.0826788432267884e-05, + "loss": 2.2439, + "step": 15075 + }, + { + "epoch": 4.6, + "learning_rate": 3.081156773211568e-05, + "loss": 2.3085, + "step": 15100 + }, + { + "epoch": 4.6, + "learning_rate": 3.079634703196347e-05, + "loss": 2.2249, + "step": 15125 + }, + { + "epoch": 4.61, + "learning_rate": 3.0781126331811266e-05, + "loss": 2.2429, + "step": 15150 + }, + { + "epoch": 4.62, + "learning_rate": 3.076590563165906e-05, + "loss": 2.2207, + "step": 15175 + }, + { + "epoch": 4.63, + "learning_rate": 3.075068493150685e-05, + "loss": 2.2028, + "step": 15200 + }, + { + "epoch": 4.63, + "learning_rate": 3.073546423135465e-05, + "loss": 2.1323, + "step": 15225 + }, + { + "epoch": 4.64, + "learning_rate": 3.072024353120244e-05, + "loss": 2.2831, + "step": 15250 + }, + { + "epoch": 4.65, + "learning_rate": 3.0705022831050234e-05, + "loss": 2.2963, + "step": 15275 + }, + { + "epoch": 4.66, + "learning_rate": 3.068980213089802e-05, + "loss": 2.29, + "step": 15300 + }, + { + "epoch": 4.67, + "learning_rate": 3.067458143074581e-05, + "loss": 2.2412, + "step": 15325 + }, + { + "epoch": 4.67, + "learning_rate": 3.065936073059361e-05, + "loss": 2.3399, + "step": 15350 + }, + { + "epoch": 4.68, + "learning_rate": 3.0644140030441405e-05, + "loss": 2.1743, + "step": 15375 + }, + { + "epoch": 4.69, + "learning_rate": 3.0628919330289194e-05, + "loss": 2.2421, + "step": 15400 + }, + { + "epoch": 4.7, + "learning_rate": 3.061369863013699e-05, + "loss": 2.337, + "step": 15425 + }, + { + "epoch": 4.7, + "learning_rate": 3.059847792998478e-05, + "loss": 2.3268, + "step": 15450 + }, + { + "epoch": 4.71, + "learning_rate": 3.0583257229832576e-05, + "loss": 2.2685, + "step": 15475 + }, + { + "epoch": 4.72, + "learning_rate": 3.0568036529680366e-05, + "loss": 2.2797, + "step": 15500 + }, + { + "epoch": 4.73, + "learning_rate": 3.055281582952816e-05, + "loss": 2.27, + "step": 15525 + }, + { + "epoch": 4.73, + "learning_rate": 3.053759512937595e-05, + "loss": 2.2245, + "step": 15550 + }, + { + "epoch": 4.74, + "learning_rate": 3.052237442922375e-05, + "loss": 2.25, + "step": 15575 + }, + { + "epoch": 4.75, + "learning_rate": 3.050715372907154e-05, + "loss": 2.1833, + "step": 15600 + }, + { + "epoch": 4.76, + "learning_rate": 3.0491933028919333e-05, + "loss": 2.2166, + "step": 15625 + }, + { + "epoch": 4.76, + "learning_rate": 3.0476712328767126e-05, + "loss": 2.3001, + "step": 15650 + }, + { + "epoch": 4.77, + "learning_rate": 3.046149162861492e-05, + "loss": 2.2848, + "step": 15675 + }, + { + "epoch": 4.78, + "learning_rate": 3.0446270928462712e-05, + "loss": 2.1487, + "step": 15700 + }, + { + "epoch": 4.79, + "learning_rate": 3.0431050228310505e-05, + "loss": 2.1833, + "step": 15725 + }, + { + "epoch": 4.79, + "learning_rate": 3.0415829528158298e-05, + "loss": 2.1392, + "step": 15750 + }, + { + "epoch": 4.8, + "learning_rate": 3.0400608828006094e-05, + "loss": 2.2441, + "step": 15775 + }, + { + "epoch": 4.81, + "learning_rate": 3.0385388127853887e-05, + "loss": 2.2952, + "step": 15800 + }, + { + "epoch": 4.82, + "learning_rate": 3.037016742770168e-05, + "loss": 2.1949, + "step": 15825 + }, + { + "epoch": 4.82, + "learning_rate": 3.035494672754947e-05, + "loss": 2.2957, + "step": 15850 + }, + { + "epoch": 4.83, + "learning_rate": 3.0339726027397262e-05, + "loss": 2.1833, + "step": 15875 + }, + { + "epoch": 4.84, + "learning_rate": 3.0324505327245055e-05, + "loss": 2.2777, + "step": 15900 + }, + { + "epoch": 4.85, + "learning_rate": 3.0309284627092847e-05, + "loss": 2.3056, + "step": 15925 + }, + { + "epoch": 4.86, + "learning_rate": 3.029406392694064e-05, + "loss": 2.2749, + "step": 15950 + }, + { + "epoch": 4.86, + "learning_rate": 3.0278843226788436e-05, + "loss": 2.3683, + "step": 15975 + }, + { + "epoch": 4.87, + "learning_rate": 3.026362252663623e-05, + "loss": 2.3215, + "step": 16000 + }, + { + "epoch": 4.88, + "learning_rate": 3.0248401826484022e-05, + "loss": 2.1942, + "step": 16025 + }, + { + "epoch": 4.89, + "learning_rate": 3.0233181126331815e-05, + "loss": 2.17, + "step": 16050 + }, + { + "epoch": 4.89, + "learning_rate": 3.0217960426179608e-05, + "loss": 2.2755, + "step": 16075 + }, + { + "epoch": 4.9, + "learning_rate": 3.02027397260274e-05, + "loss": 2.2263, + "step": 16100 + }, + { + "epoch": 4.91, + "learning_rate": 3.018751902587519e-05, + "loss": 2.2477, + "step": 16125 + }, + { + "epoch": 4.92, + "learning_rate": 3.0172298325722983e-05, + "loss": 2.3542, + "step": 16150 + }, + { + "epoch": 4.92, + "learning_rate": 3.0157077625570783e-05, + "loss": 2.2229, + "step": 16175 + }, + { + "epoch": 4.93, + "learning_rate": 3.0141856925418572e-05, + "loss": 2.1218, + "step": 16200 + }, + { + "epoch": 4.94, + "learning_rate": 3.0126636225266365e-05, + "loss": 2.2416, + "step": 16225 + }, + { + "epoch": 4.95, + "learning_rate": 3.0112024353120244e-05, + "loss": 2.2834, + "step": 16250 + }, + { + "epoch": 4.95, + "learning_rate": 3.009680365296804e-05, + "loss": 2.2788, + "step": 16275 + }, + { + "epoch": 4.96, + "learning_rate": 3.0081582952815833e-05, + "loss": 2.1984, + "step": 16300 + }, + { + "epoch": 4.97, + "learning_rate": 3.0066362252663626e-05, + "loss": 2.2963, + "step": 16325 + }, + { + "epoch": 4.98, + "learning_rate": 3.005114155251142e-05, + "loss": 2.2688, + "step": 16350 + }, + { + "epoch": 4.98, + "learning_rate": 3.0035920852359212e-05, + "loss": 2.2477, + "step": 16375 + }, + { + "epoch": 4.99, + "learning_rate": 3.0020700152207e-05, + "loss": 2.2984, + "step": 16400 + }, + { + "epoch": 5.0, + "learning_rate": 3.0005479452054794e-05, + "loss": 2.1656, + "step": 16425 + }, + { + "epoch": 5.01, + "learning_rate": 2.9990258751902587e-05, + "loss": 2.2591, + "step": 16450 + }, + { + "epoch": 5.02, + "learning_rate": 2.9975038051750383e-05, + "loss": 2.129, + "step": 16475 + }, + { + "epoch": 5.02, + "learning_rate": 2.9959817351598176e-05, + "loss": 2.2228, + "step": 16500 + }, + { + "epoch": 5.03, + "learning_rate": 2.994459665144597e-05, + "loss": 2.2753, + "step": 16525 + }, + { + "epoch": 5.04, + "learning_rate": 2.9929375951293762e-05, + "loss": 2.1261, + "step": 16550 + }, + { + "epoch": 5.05, + "learning_rate": 2.9914155251141555e-05, + "loss": 2.194, + "step": 16575 + }, + { + "epoch": 5.05, + "learning_rate": 2.9898934550989348e-05, + "loss": 2.2243, + "step": 16600 + }, + { + "epoch": 5.06, + "learning_rate": 2.988371385083714e-05, + "loss": 2.1936, + "step": 16625 + }, + { + "epoch": 5.07, + "learning_rate": 2.9868493150684933e-05, + "loss": 2.1148, + "step": 16650 + }, + { + "epoch": 5.08, + "learning_rate": 2.985327245053273e-05, + "loss": 2.215, + "step": 16675 + }, + { + "epoch": 5.08, + "learning_rate": 2.9838051750380522e-05, + "loss": 2.192, + "step": 16700 + }, + { + "epoch": 5.09, + "learning_rate": 2.9822831050228315e-05, + "loss": 2.1374, + "step": 16725 + }, + { + "epoch": 5.1, + "learning_rate": 2.9807610350076105e-05, + "loss": 2.202, + "step": 16750 + }, + { + "epoch": 5.11, + "learning_rate": 2.9792389649923897e-05, + "loss": 2.3239, + "step": 16775 + }, + { + "epoch": 5.11, + "learning_rate": 2.977716894977169e-05, + "loss": 2.2086, + "step": 16800 + }, + { + "epoch": 5.12, + "learning_rate": 2.9761948249619483e-05, + "loss": 2.2127, + "step": 16825 + }, + { + "epoch": 5.13, + "learning_rate": 2.9746727549467276e-05, + "loss": 2.1751, + "step": 16850 + }, + { + "epoch": 5.14, + "learning_rate": 2.9731506849315072e-05, + "loss": 2.2261, + "step": 16875 + }, + { + "epoch": 5.14, + "learning_rate": 2.9716286149162865e-05, + "loss": 2.3385, + "step": 16900 + }, + { + "epoch": 5.15, + "learning_rate": 2.9701065449010658e-05, + "loss": 2.2147, + "step": 16925 + }, + { + "epoch": 5.16, + "learning_rate": 2.968584474885845e-05, + "loss": 2.2131, + "step": 16950 + }, + { + "epoch": 5.17, + "learning_rate": 2.9670624048706244e-05, + "loss": 2.1689, + "step": 16975 + }, + { + "epoch": 5.18, + "learning_rate": 2.9655403348554036e-05, + "loss": 2.1745, + "step": 17000 + }, + { + "epoch": 5.18, + "learning_rate": 2.9640182648401826e-05, + "loss": 2.1453, + "step": 17025 + }, + { + "epoch": 5.19, + "learning_rate": 2.962496194824962e-05, + "loss": 2.2432, + "step": 17050 + }, + { + "epoch": 5.2, + "learning_rate": 2.9609741248097418e-05, + "loss": 2.2764, + "step": 17075 + }, + { + "epoch": 5.21, + "learning_rate": 2.9594520547945208e-05, + "loss": 2.2671, + "step": 17100 + }, + { + "epoch": 5.21, + "learning_rate": 2.9579299847793e-05, + "loss": 2.2056, + "step": 17125 + }, + { + "epoch": 5.22, + "learning_rate": 2.9564079147640793e-05, + "loss": 2.2265, + "step": 17150 + }, + { + "epoch": 5.23, + "learning_rate": 2.9549467275494676e-05, + "loss": 2.2748, + "step": 17175 + }, + { + "epoch": 5.24, + "learning_rate": 2.953424657534247e-05, + "loss": 2.1414, + "step": 17200 + }, + { + "epoch": 5.24, + "learning_rate": 2.9519025875190262e-05, + "loss": 2.218, + "step": 17225 + }, + { + "epoch": 5.25, + "learning_rate": 2.9503805175038055e-05, + "loss": 2.2895, + "step": 17250 + }, + { + "epoch": 5.26, + "learning_rate": 2.9488584474885848e-05, + "loss": 2.2456, + "step": 17275 + }, + { + "epoch": 5.27, + "learning_rate": 2.9473363774733637e-05, + "loss": 2.1579, + "step": 17300 + }, + { + "epoch": 5.27, + "learning_rate": 2.945814307458143e-05, + "loss": 2.1798, + "step": 17325 + }, + { + "epoch": 5.28, + "learning_rate": 2.944292237442923e-05, + "loss": 2.2155, + "step": 17350 + }, + { + "epoch": 5.29, + "learning_rate": 2.942770167427702e-05, + "loss": 2.2606, + "step": 17375 + }, + { + "epoch": 5.3, + "learning_rate": 2.9412480974124812e-05, + "loss": 2.1556, + "step": 17400 + }, + { + "epoch": 5.3, + "learning_rate": 2.9397260273972605e-05, + "loss": 2.2074, + "step": 17425 + }, + { + "epoch": 5.31, + "learning_rate": 2.9382039573820398e-05, + "loss": 2.2237, + "step": 17450 + }, + { + "epoch": 5.32, + "learning_rate": 2.936681887366819e-05, + "loss": 2.1979, + "step": 17475 + }, + { + "epoch": 5.33, + "learning_rate": 2.9351598173515983e-05, + "loss": 2.2213, + "step": 17500 + }, + { + "epoch": 5.33, + "learning_rate": 2.9336377473363776e-05, + "loss": 2.2229, + "step": 17525 + }, + { + "epoch": 5.34, + "learning_rate": 2.9321156773211572e-05, + "loss": 2.2467, + "step": 17550 + }, + { + "epoch": 5.35, + "learning_rate": 2.9305936073059365e-05, + "loss": 2.3175, + "step": 17575 + }, + { + "epoch": 5.36, + "learning_rate": 2.9290715372907158e-05, + "loss": 2.105, + "step": 17600 + }, + { + "epoch": 5.37, + "learning_rate": 2.927549467275495e-05, + "loss": 2.2018, + "step": 17625 + }, + { + "epoch": 5.37, + "learning_rate": 2.926027397260274e-05, + "loss": 2.0804, + "step": 17650 + }, + { + "epoch": 5.38, + "learning_rate": 2.9245053272450533e-05, + "loss": 2.2365, + "step": 17675 + }, + { + "epoch": 5.39, + "learning_rate": 2.9229832572298326e-05, + "loss": 2.1185, + "step": 17700 + }, + { + "epoch": 5.4, + "learning_rate": 2.921461187214612e-05, + "loss": 2.1344, + "step": 17725 + }, + { + "epoch": 5.4, + "learning_rate": 2.9199391171993915e-05, + "loss": 2.2028, + "step": 17750 + }, + { + "epoch": 5.41, + "learning_rate": 2.9184170471841708e-05, + "loss": 2.2562, + "step": 17775 + }, + { + "epoch": 5.42, + "learning_rate": 2.91689497716895e-05, + "loss": 2.2123, + "step": 17800 + }, + { + "epoch": 5.43, + "learning_rate": 2.9153729071537294e-05, + "loss": 2.2453, + "step": 17825 + }, + { + "epoch": 5.43, + "learning_rate": 2.9138508371385086e-05, + "loss": 2.2053, + "step": 17850 + }, + { + "epoch": 5.44, + "learning_rate": 2.912328767123288e-05, + "loss": 2.1562, + "step": 17875 + }, + { + "epoch": 5.45, + "learning_rate": 2.9108066971080672e-05, + "loss": 2.0943, + "step": 17900 + }, + { + "epoch": 5.46, + "learning_rate": 2.909284627092846e-05, + "loss": 2.1485, + "step": 17925 + }, + { + "epoch": 5.46, + "learning_rate": 2.907762557077626e-05, + "loss": 2.2019, + "step": 17950 + }, + { + "epoch": 5.47, + "learning_rate": 2.9062404870624054e-05, + "loss": 2.1521, + "step": 17975 + }, + { + "epoch": 5.48, + "learning_rate": 2.9047184170471843e-05, + "loss": 2.2207, + "step": 18000 + }, + { + "epoch": 5.49, + "learning_rate": 2.9031963470319636e-05, + "loss": 2.1452, + "step": 18025 + }, + { + "epoch": 5.49, + "learning_rate": 2.901674277016743e-05, + "loss": 2.1539, + "step": 18050 + }, + { + "epoch": 5.5, + "learning_rate": 2.9001522070015222e-05, + "loss": 2.1437, + "step": 18075 + }, + { + "epoch": 5.51, + "learning_rate": 2.8986301369863015e-05, + "loss": 2.1719, + "step": 18100 + }, + { + "epoch": 5.52, + "learning_rate": 2.8971080669710808e-05, + "loss": 2.1572, + "step": 18125 + }, + { + "epoch": 5.53, + "learning_rate": 2.8955859969558604e-05, + "loss": 2.2169, + "step": 18150 + }, + { + "epoch": 5.53, + "learning_rate": 2.8940639269406397e-05, + "loss": 2.2541, + "step": 18175 + }, + { + "epoch": 5.54, + "learning_rate": 2.892541856925419e-05, + "loss": 2.2111, + "step": 18200 + }, + { + "epoch": 5.55, + "learning_rate": 2.8910197869101982e-05, + "loss": 2.2755, + "step": 18225 + }, + { + "epoch": 5.56, + "learning_rate": 2.8894977168949775e-05, + "loss": 2.131, + "step": 18250 + }, + { + "epoch": 5.56, + "learning_rate": 2.8879756468797565e-05, + "loss": 2.1832, + "step": 18275 + }, + { + "epoch": 5.57, + "learning_rate": 2.8864535768645357e-05, + "loss": 2.2637, + "step": 18300 + }, + { + "epoch": 5.58, + "learning_rate": 2.884931506849315e-05, + "loss": 2.3163, + "step": 18325 + }, + { + "epoch": 5.59, + "learning_rate": 2.8834094368340947e-05, + "loss": 2.2512, + "step": 18350 + }, + { + "epoch": 5.59, + "learning_rate": 2.881887366818874e-05, + "loss": 2.2196, + "step": 18375 + }, + { + "epoch": 5.6, + "learning_rate": 2.8803652968036532e-05, + "loss": 2.1694, + "step": 18400 + }, + { + "epoch": 5.61, + "learning_rate": 2.8788432267884325e-05, + "loss": 2.1062, + "step": 18425 + }, + { + "epoch": 5.62, + "learning_rate": 2.8773211567732118e-05, + "loss": 2.2445, + "step": 18450 + }, + { + "epoch": 5.62, + "learning_rate": 2.875799086757991e-05, + "loss": 2.2507, + "step": 18475 + }, + { + "epoch": 5.63, + "learning_rate": 2.8742770167427704e-05, + "loss": 2.2063, + "step": 18500 + }, + { + "epoch": 5.64, + "learning_rate": 2.8727549467275496e-05, + "loss": 2.3017, + "step": 18525 + }, + { + "epoch": 5.65, + "learning_rate": 2.8712328767123293e-05, + "loss": 2.1666, + "step": 18550 + }, + { + "epoch": 5.65, + "learning_rate": 2.8697108066971085e-05, + "loss": 2.0839, + "step": 18575 + }, + { + "epoch": 5.66, + "learning_rate": 2.868188736681888e-05, + "loss": 2.2795, + "step": 18600 + }, + { + "epoch": 5.67, + "learning_rate": 2.8666666666666668e-05, + "loss": 2.2939, + "step": 18625 + }, + { + "epoch": 5.68, + "learning_rate": 2.865144596651446e-05, + "loss": 2.2818, + "step": 18650 + }, + { + "epoch": 5.68, + "learning_rate": 2.8636225266362253e-05, + "loss": 2.2006, + "step": 18675 + }, + { + "epoch": 5.69, + "learning_rate": 2.8621004566210046e-05, + "loss": 2.055, + "step": 18700 + }, + { + "epoch": 5.7, + "learning_rate": 2.860578386605784e-05, + "loss": 2.217, + "step": 18725 + }, + { + "epoch": 5.71, + "learning_rate": 2.8590563165905635e-05, + "loss": 2.2419, + "step": 18750 + }, + { + "epoch": 5.72, + "learning_rate": 2.8575342465753428e-05, + "loss": 2.2466, + "step": 18775 + }, + { + "epoch": 5.72, + "learning_rate": 2.856012176560122e-05, + "loss": 2.2655, + "step": 18800 + }, + { + "epoch": 5.73, + "learning_rate": 2.8544901065449014e-05, + "loss": 2.1704, + "step": 18825 + }, + { + "epoch": 5.74, + "learning_rate": 2.8529680365296807e-05, + "loss": 2.2642, + "step": 18850 + }, + { + "epoch": 5.75, + "learning_rate": 2.85144596651446e-05, + "loss": 2.1253, + "step": 18875 + }, + { + "epoch": 5.75, + "learning_rate": 2.849923896499239e-05, + "loss": 2.1265, + "step": 18900 + }, + { + "epoch": 5.76, + "learning_rate": 2.8484018264840182e-05, + "loss": 2.2404, + "step": 18925 + }, + { + "epoch": 5.77, + "learning_rate": 2.846879756468798e-05, + "loss": 2.2131, + "step": 18950 + }, + { + "epoch": 5.78, + "learning_rate": 2.845357686453577e-05, + "loss": 2.2115, + "step": 18975 + }, + { + "epoch": 5.78, + "learning_rate": 2.8438356164383564e-05, + "loss": 2.1654, + "step": 19000 + }, + { + "epoch": 5.79, + "learning_rate": 2.8423135464231357e-05, + "loss": 2.1543, + "step": 19025 + }, + { + "epoch": 5.8, + "learning_rate": 2.840791476407915e-05, + "loss": 2.1774, + "step": 19050 + }, + { + "epoch": 5.81, + "learning_rate": 2.8392694063926942e-05, + "loss": 2.348, + "step": 19075 + }, + { + "epoch": 5.81, + "learning_rate": 2.8377473363774735e-05, + "loss": 2.1481, + "step": 19100 + }, + { + "epoch": 5.82, + "learning_rate": 2.8362252663622528e-05, + "loss": 2.2803, + "step": 19125 + }, + { + "epoch": 5.83, + "learning_rate": 2.8347031963470324e-05, + "loss": 2.1804, + "step": 19150 + }, + { + "epoch": 5.84, + "learning_rate": 2.8331811263318117e-05, + "loss": 2.1929, + "step": 19175 + }, + { + "epoch": 5.84, + "learning_rate": 2.831659056316591e-05, + "loss": 2.1959, + "step": 19200 + }, + { + "epoch": 5.85, + "learning_rate": 2.8301369863013703e-05, + "loss": 2.2637, + "step": 19225 + }, + { + "epoch": 5.86, + "learning_rate": 2.8286149162861492e-05, + "loss": 2.2302, + "step": 19250 + }, + { + "epoch": 5.87, + "learning_rate": 2.8270928462709285e-05, + "loss": 2.2437, + "step": 19275 + }, + { + "epoch": 5.88, + "learning_rate": 2.8255707762557078e-05, + "loss": 2.2423, + "step": 19300 + }, + { + "epoch": 5.88, + "learning_rate": 2.824048706240487e-05, + "loss": 2.2753, + "step": 19325 + }, + { + "epoch": 5.89, + "learning_rate": 2.8225266362252667e-05, + "loss": 2.1957, + "step": 19350 + }, + { + "epoch": 5.9, + "learning_rate": 2.821004566210046e-05, + "loss": 2.2732, + "step": 19375 + }, + { + "epoch": 5.91, + "learning_rate": 2.8194824961948253e-05, + "loss": 2.2094, + "step": 19400 + }, + { + "epoch": 5.91, + "learning_rate": 2.8179604261796045e-05, + "loss": 2.106, + "step": 19425 + }, + { + "epoch": 5.92, + "learning_rate": 2.8164383561643838e-05, + "loss": 2.2468, + "step": 19450 + }, + { + "epoch": 5.93, + "learning_rate": 2.814916286149163e-05, + "loss": 2.1399, + "step": 19475 + }, + { + "epoch": 5.94, + "learning_rate": 2.8133942161339424e-05, + "loss": 2.2894, + "step": 19500 + }, + { + "epoch": 5.94, + "learning_rate": 2.8118721461187213e-05, + "loss": 2.2403, + "step": 19525 + }, + { + "epoch": 5.95, + "learning_rate": 2.8103500761035013e-05, + "loss": 2.1764, + "step": 19550 + }, + { + "epoch": 5.96, + "learning_rate": 2.8088280060882806e-05, + "loss": 2.2491, + "step": 19575 + }, + { + "epoch": 5.97, + "learning_rate": 2.8073059360730595e-05, + "loss": 2.2154, + "step": 19600 + }, + { + "epoch": 5.97, + "learning_rate": 2.8057838660578388e-05, + "loss": 2.1643, + "step": 19625 + }, + { + "epoch": 5.98, + "learning_rate": 2.804261796042618e-05, + "loss": 2.1644, + "step": 19650 + }, + { + "epoch": 5.99, + "learning_rate": 2.8027397260273974e-05, + "loss": 2.1413, + "step": 19675 + }, + { + "epoch": 6.0, + "learning_rate": 2.8012176560121767e-05, + "loss": 2.1563, + "step": 19700 + }, + { + "epoch": 6.0, + "learning_rate": 2.799695585996956e-05, + "loss": 2.2266, + "step": 19725 + }, + { + "epoch": 6.01, + "learning_rate": 2.7981735159817356e-05, + "loss": 2.1458, + "step": 19750 + }, + { + "epoch": 6.02, + "learning_rate": 2.796651445966515e-05, + "loss": 2.1858, + "step": 19775 + }, + { + "epoch": 6.03, + "learning_rate": 2.795129375951294e-05, + "loss": 2.1965, + "step": 19800 + }, + { + "epoch": 6.04, + "learning_rate": 2.7936073059360734e-05, + "loss": 2.1685, + "step": 19825 + }, + { + "epoch": 6.04, + "learning_rate": 2.7920852359208527e-05, + "loss": 2.1751, + "step": 19850 + }, + { + "epoch": 6.05, + "learning_rate": 2.7905631659056317e-05, + "loss": 2.0926, + "step": 19875 + }, + { + "epoch": 6.06, + "learning_rate": 2.789041095890411e-05, + "loss": 2.2655, + "step": 19900 + }, + { + "epoch": 6.07, + "learning_rate": 2.7875190258751902e-05, + "loss": 2.1804, + "step": 19925 + }, + { + "epoch": 6.07, + "learning_rate": 2.78599695585997e-05, + "loss": 2.1363, + "step": 19950 + }, + { + "epoch": 6.08, + "learning_rate": 2.784474885844749e-05, + "loss": 2.2423, + "step": 19975 + }, + { + "epoch": 6.09, + "learning_rate": 2.7829528158295284e-05, + "loss": 2.0739, + "step": 20000 + }, + { + "epoch": 6.1, + "learning_rate": 2.7814307458143077e-05, + "loss": 2.2207, + "step": 20025 + }, + { + "epoch": 6.1, + "learning_rate": 2.779908675799087e-05, + "loss": 2.1476, + "step": 20050 + }, + { + "epoch": 6.11, + "learning_rate": 2.7783866057838663e-05, + "loss": 2.3169, + "step": 20075 + }, + { + "epoch": 6.12, + "learning_rate": 2.7768645357686455e-05, + "loss": 2.1508, + "step": 20100 + }, + { + "epoch": 6.13, + "learning_rate": 2.775342465753425e-05, + "loss": 2.1237, + "step": 20125 + }, + { + "epoch": 6.13, + "learning_rate": 2.7738203957382045e-05, + "loss": 2.2374, + "step": 20150 + }, + { + "epoch": 6.14, + "learning_rate": 2.7722983257229837e-05, + "loss": 2.2625, + "step": 20175 + }, + { + "epoch": 6.15, + "learning_rate": 2.770776255707763e-05, + "loss": 2.1413, + "step": 20200 + }, + { + "epoch": 6.16, + "learning_rate": 2.769254185692542e-05, + "loss": 2.2093, + "step": 20225 + }, + { + "epoch": 6.16, + "learning_rate": 2.7677321156773213e-05, + "loss": 2.1778, + "step": 20250 + }, + { + "epoch": 6.17, + "learning_rate": 2.7662100456621005e-05, + "loss": 2.1181, + "step": 20275 + }, + { + "epoch": 6.18, + "learning_rate": 2.7646879756468798e-05, + "loss": 2.2382, + "step": 20300 + }, + { + "epoch": 6.19, + "learning_rate": 2.763165905631659e-05, + "loss": 2.2121, + "step": 20325 + }, + { + "epoch": 6.19, + "learning_rate": 2.7616438356164387e-05, + "loss": 2.1685, + "step": 20350 + }, + { + "epoch": 6.2, + "learning_rate": 2.760121765601218e-05, + "loss": 2.1297, + "step": 20375 + }, + { + "epoch": 6.21, + "learning_rate": 2.7585996955859973e-05, + "loss": 2.0985, + "step": 20400 + }, + { + "epoch": 6.22, + "learning_rate": 2.7570776255707766e-05, + "loss": 2.2109, + "step": 20425 + }, + { + "epoch": 6.23, + "learning_rate": 2.755555555555556e-05, + "loss": 2.1285, + "step": 20450 + }, + { + "epoch": 6.23, + "learning_rate": 2.754033485540335e-05, + "loss": 2.1805, + "step": 20475 + }, + { + "epoch": 6.24, + "learning_rate": 2.752511415525114e-05, + "loss": 2.2301, + "step": 20500 + }, + { + "epoch": 6.25, + "learning_rate": 2.7509893455098934e-05, + "loss": 2.0494, + "step": 20525 + }, + { + "epoch": 6.26, + "learning_rate": 2.7494672754946733e-05, + "loss": 2.2462, + "step": 20550 + }, + { + "epoch": 6.26, + "learning_rate": 2.7479452054794523e-05, + "loss": 2.174, + "step": 20575 + }, + { + "epoch": 6.27, + "learning_rate": 2.7464231354642316e-05, + "loss": 2.2102, + "step": 20600 + }, + { + "epoch": 6.28, + "learning_rate": 2.744901065449011e-05, + "loss": 2.1673, + "step": 20625 + }, + { + "epoch": 6.29, + "learning_rate": 2.74337899543379e-05, + "loss": 2.1535, + "step": 20650 + }, + { + "epoch": 6.29, + "learning_rate": 2.7418569254185694e-05, + "loss": 2.1412, + "step": 20675 + }, + { + "epoch": 6.3, + "learning_rate": 2.7403348554033487e-05, + "loss": 2.1825, + "step": 20700 + }, + { + "epoch": 6.31, + "learning_rate": 2.738812785388128e-05, + "loss": 2.1337, + "step": 20725 + }, + { + "epoch": 6.32, + "learning_rate": 2.7372907153729076e-05, + "loss": 2.1748, + "step": 20750 + }, + { + "epoch": 6.32, + "learning_rate": 2.735768645357687e-05, + "loss": 2.2003, + "step": 20775 + }, + { + "epoch": 6.33, + "learning_rate": 2.7342465753424662e-05, + "loss": 2.1943, + "step": 20800 + }, + { + "epoch": 6.34, + "learning_rate": 2.7327245053272455e-05, + "loss": 2.188, + "step": 20825 + }, + { + "epoch": 6.35, + "learning_rate": 2.7312024353120244e-05, + "loss": 2.1538, + "step": 20850 + }, + { + "epoch": 6.35, + "learning_rate": 2.7296803652968037e-05, + "loss": 2.1763, + "step": 20875 + }, + { + "epoch": 6.36, + "learning_rate": 2.728158295281583e-05, + "loss": 2.202, + "step": 20900 + }, + { + "epoch": 6.37, + "learning_rate": 2.7266362252663623e-05, + "loss": 2.0576, + "step": 20925 + }, + { + "epoch": 6.38, + "learning_rate": 2.725114155251142e-05, + "loss": 2.0509, + "step": 20950 + }, + { + "epoch": 6.39, + "learning_rate": 2.723592085235921e-05, + "loss": 2.1629, + "step": 20975 + }, + { + "epoch": 6.39, + "learning_rate": 2.7220700152207005e-05, + "loss": 2.2119, + "step": 21000 + }, + { + "epoch": 6.4, + "learning_rate": 2.7205479452054797e-05, + "loss": 2.1668, + "step": 21025 + }, + { + "epoch": 6.41, + "learning_rate": 2.719025875190259e-05, + "loss": 2.1514, + "step": 21050 + }, + { + "epoch": 6.42, + "learning_rate": 2.7175038051750383e-05, + "loss": 2.1272, + "step": 21075 + }, + { + "epoch": 6.42, + "learning_rate": 2.7159817351598176e-05, + "loss": 2.1029, + "step": 21100 + }, + { + "epoch": 6.43, + "learning_rate": 2.7144596651445965e-05, + "loss": 2.112, + "step": 21125 + }, + { + "epoch": 6.44, + "learning_rate": 2.7129375951293765e-05, + "loss": 2.1119, + "step": 21150 + }, + { + "epoch": 6.45, + "learning_rate": 2.7114155251141558e-05, + "loss": 2.0902, + "step": 21175 + }, + { + "epoch": 6.45, + "learning_rate": 2.7098934550989347e-05, + "loss": 2.1778, + "step": 21200 + }, + { + "epoch": 6.46, + "learning_rate": 2.708371385083714e-05, + "loss": 2.0972, + "step": 21225 + }, + { + "epoch": 6.47, + "learning_rate": 2.7068493150684933e-05, + "loss": 2.1768, + "step": 21250 + }, + { + "epoch": 6.48, + "learning_rate": 2.7053272450532726e-05, + "loss": 2.178, + "step": 21275 + }, + { + "epoch": 6.48, + "learning_rate": 2.703805175038052e-05, + "loss": 2.1795, + "step": 21300 + }, + { + "epoch": 6.49, + "learning_rate": 2.702283105022831e-05, + "loss": 2.0706, + "step": 21325 + }, + { + "epoch": 6.5, + "learning_rate": 2.7007610350076108e-05, + "loss": 2.241, + "step": 21350 + }, + { + "epoch": 6.51, + "learning_rate": 2.69923896499239e-05, + "loss": 2.102, + "step": 21375 + }, + { + "epoch": 6.51, + "learning_rate": 2.6977168949771693e-05, + "loss": 2.2229, + "step": 21400 + }, + { + "epoch": 6.52, + "learning_rate": 2.6961948249619486e-05, + "loss": 2.083, + "step": 21425 + }, + { + "epoch": 6.53, + "learning_rate": 2.694672754946728e-05, + "loss": 2.0917, + "step": 21450 + }, + { + "epoch": 6.54, + "learning_rate": 2.693150684931507e-05, + "loss": 2.256, + "step": 21475 + }, + { + "epoch": 6.54, + "learning_rate": 2.691628614916286e-05, + "loss": 2.1306, + "step": 21500 + }, + { + "epoch": 6.55, + "learning_rate": 2.6901065449010654e-05, + "loss": 2.1752, + "step": 21525 + }, + { + "epoch": 6.56, + "learning_rate": 2.688584474885845e-05, + "loss": 2.1803, + "step": 21550 + }, + { + "epoch": 6.57, + "learning_rate": 2.6870624048706243e-05, + "loss": 2.1966, + "step": 21575 + }, + { + "epoch": 6.58, + "learning_rate": 2.6855403348554036e-05, + "loss": 2.193, + "step": 21600 + }, + { + "epoch": 6.58, + "learning_rate": 2.684018264840183e-05, + "loss": 2.1957, + "step": 21625 + }, + { + "epoch": 6.59, + "learning_rate": 2.6824961948249622e-05, + "loss": 2.2211, + "step": 21650 + }, + { + "epoch": 6.6, + "learning_rate": 2.6809741248097415e-05, + "loss": 2.1052, + "step": 21675 + }, + { + "epoch": 6.61, + "learning_rate": 2.6794520547945207e-05, + "loss": 2.1593, + "step": 21700 + }, + { + "epoch": 6.61, + "learning_rate": 2.6779299847793e-05, + "loss": 2.0678, + "step": 21725 + }, + { + "epoch": 6.62, + "learning_rate": 2.6764079147640796e-05, + "loss": 2.2496, + "step": 21750 + }, + { + "epoch": 6.63, + "learning_rate": 2.674885844748859e-05, + "loss": 2.1239, + "step": 21775 + }, + { + "epoch": 6.64, + "learning_rate": 2.6733637747336382e-05, + "loss": 2.1516, + "step": 21800 + }, + { + "epoch": 6.64, + "learning_rate": 2.671841704718417e-05, + "loss": 2.0752, + "step": 21825 + }, + { + "epoch": 6.65, + "learning_rate": 2.6703196347031964e-05, + "loss": 2.2453, + "step": 21850 + }, + { + "epoch": 6.66, + "learning_rate": 2.6687975646879757e-05, + "loss": 2.3072, + "step": 21875 + }, + { + "epoch": 6.67, + "learning_rate": 2.667275494672755e-05, + "loss": 2.1156, + "step": 21900 + }, + { + "epoch": 6.67, + "learning_rate": 2.6657534246575343e-05, + "loss": 2.1962, + "step": 21925 + }, + { + "epoch": 6.68, + "learning_rate": 2.664231354642314e-05, + "loss": 2.0963, + "step": 21950 + }, + { + "epoch": 6.69, + "learning_rate": 2.6627092846270932e-05, + "loss": 2.191, + "step": 21975 + }, + { + "epoch": 6.7, + "learning_rate": 2.6611872146118725e-05, + "loss": 2.2095, + "step": 22000 + }, + { + "epoch": 6.7, + "learning_rate": 2.6596651445966518e-05, + "loss": 2.1171, + "step": 22025 + }, + { + "epoch": 6.71, + "learning_rate": 2.658143074581431e-05, + "loss": 2.0649, + "step": 22050 + }, + { + "epoch": 6.72, + "learning_rate": 2.6566210045662103e-05, + "loss": 2.1147, + "step": 22075 + }, + { + "epoch": 6.73, + "learning_rate": 2.6550989345509893e-05, + "loss": 2.2051, + "step": 22100 + }, + { + "epoch": 6.74, + "learning_rate": 2.6535768645357686e-05, + "loss": 2.2622, + "step": 22125 + }, + { + "epoch": 6.74, + "learning_rate": 2.6520547945205485e-05, + "loss": 2.1057, + "step": 22150 + }, + { + "epoch": 6.75, + "learning_rate": 2.6505327245053275e-05, + "loss": 2.2136, + "step": 22175 + }, + { + "epoch": 6.76, + "learning_rate": 2.6490106544901068e-05, + "loss": 2.1146, + "step": 22200 + }, + { + "epoch": 6.77, + "learning_rate": 2.647488584474886e-05, + "loss": 2.1073, + "step": 22225 + }, + { + "epoch": 6.77, + "learning_rate": 2.6459665144596653e-05, + "loss": 2.2008, + "step": 22250 + }, + { + "epoch": 6.78, + "learning_rate": 2.6444444444444446e-05, + "loss": 2.1423, + "step": 22275 + }, + { + "epoch": 6.79, + "learning_rate": 2.642922374429224e-05, + "loss": 2.1471, + "step": 22300 + }, + { + "epoch": 6.8, + "learning_rate": 2.6414003044140032e-05, + "loss": 2.2099, + "step": 22325 + }, + { + "epoch": 6.8, + "learning_rate": 2.6398782343987828e-05, + "loss": 2.1029, + "step": 22350 + }, + { + "epoch": 6.81, + "learning_rate": 2.638356164383562e-05, + "loss": 2.1722, + "step": 22375 + }, + { + "epoch": 6.82, + "learning_rate": 2.6368340943683414e-05, + "loss": 2.2043, + "step": 22400 + }, + { + "epoch": 6.83, + "learning_rate": 2.6353120243531207e-05, + "loss": 2.144, + "step": 22425 + }, + { + "epoch": 6.83, + "learning_rate": 2.6337899543378996e-05, + "loss": 2.1838, + "step": 22450 + }, + { + "epoch": 6.84, + "learning_rate": 2.632267884322679e-05, + "loss": 2.2831, + "step": 22475 + }, + { + "epoch": 6.85, + "learning_rate": 2.630745814307458e-05, + "loss": 2.1283, + "step": 22500 + }, + { + "epoch": 6.86, + "learning_rate": 2.6292237442922375e-05, + "loss": 2.1583, + "step": 22525 + }, + { + "epoch": 6.86, + "learning_rate": 2.627701674277017e-05, + "loss": 2.1836, + "step": 22550 + }, + { + "epoch": 6.87, + "learning_rate": 2.6261796042617964e-05, + "loss": 2.1956, + "step": 22575 + }, + { + "epoch": 6.88, + "learning_rate": 2.6246575342465756e-05, + "loss": 2.1679, + "step": 22600 + }, + { + "epoch": 6.89, + "learning_rate": 2.623135464231355e-05, + "loss": 2.1463, + "step": 22625 + }, + { + "epoch": 6.89, + "learning_rate": 2.6216133942161342e-05, + "loss": 2.2765, + "step": 22650 + }, + { + "epoch": 6.9, + "learning_rate": 2.6200913242009135e-05, + "loss": 2.094, + "step": 22675 + }, + { + "epoch": 6.91, + "learning_rate": 2.6185692541856928e-05, + "loss": 2.0926, + "step": 22700 + }, + { + "epoch": 6.92, + "learning_rate": 2.6170471841704717e-05, + "loss": 2.0845, + "step": 22725 + }, + { + "epoch": 6.93, + "learning_rate": 2.6155251141552517e-05, + "loss": 2.2262, + "step": 22750 + }, + { + "epoch": 6.93, + "learning_rate": 2.614003044140031e-05, + "loss": 2.1352, + "step": 22775 + }, + { + "epoch": 6.94, + "learning_rate": 2.61248097412481e-05, + "loss": 2.1186, + "step": 22800 + }, + { + "epoch": 6.95, + "learning_rate": 2.6109589041095892e-05, + "loss": 2.1048, + "step": 22825 + }, + { + "epoch": 6.96, + "learning_rate": 2.6094368340943685e-05, + "loss": 2.1266, + "step": 22850 + }, + { + "epoch": 6.96, + "learning_rate": 2.6079147640791478e-05, + "loss": 2.2109, + "step": 22875 + }, + { + "epoch": 6.97, + "learning_rate": 2.606392694063927e-05, + "loss": 2.1962, + "step": 22900 + }, + { + "epoch": 6.98, + "learning_rate": 2.6048706240487063e-05, + "loss": 2.2317, + "step": 22925 + }, + { + "epoch": 6.99, + "learning_rate": 2.603348554033486e-05, + "loss": 2.1549, + "step": 22950 + }, + { + "epoch": 6.99, + "learning_rate": 2.6018264840182652e-05, + "loss": 2.2, + "step": 22975 + }, + { + "epoch": 7.0, + "learning_rate": 2.6003044140030445e-05, + "loss": 2.1342, + "step": 23000 + }, + { + "epoch": 7.01, + "learning_rate": 2.5987823439878238e-05, + "loss": 2.0854, + "step": 23025 + }, + { + "epoch": 7.02, + "learning_rate": 2.597260273972603e-05, + "loss": 2.0935, + "step": 23050 + }, + { + "epoch": 7.02, + "learning_rate": 2.595738203957382e-05, + "loss": 2.0792, + "step": 23075 + }, + { + "epoch": 7.03, + "learning_rate": 2.5942161339421613e-05, + "loss": 2.1502, + "step": 23100 + }, + { + "epoch": 7.04, + "learning_rate": 2.5926940639269406e-05, + "loss": 2.1767, + "step": 23125 + }, + { + "epoch": 7.05, + "learning_rate": 2.5911719939117202e-05, + "loss": 2.2009, + "step": 23150 + }, + { + "epoch": 7.05, + "learning_rate": 2.5896499238964995e-05, + "loss": 2.1147, + "step": 23175 + }, + { + "epoch": 7.06, + "learning_rate": 2.5881278538812788e-05, + "loss": 2.1359, + "step": 23200 + }, + { + "epoch": 7.07, + "learning_rate": 2.586605783866058e-05, + "loss": 2.0548, + "step": 23225 + }, + { + "epoch": 7.08, + "learning_rate": 2.5850837138508374e-05, + "loss": 2.0974, + "step": 23250 + }, + { + "epoch": 7.09, + "learning_rate": 2.5835616438356166e-05, + "loss": 2.0914, + "step": 23275 + }, + { + "epoch": 7.09, + "learning_rate": 2.582039573820396e-05, + "loss": 2.0525, + "step": 23300 + }, + { + "epoch": 7.1, + "learning_rate": 2.5805175038051752e-05, + "loss": 2.1788, + "step": 23325 + }, + { + "epoch": 7.11, + "learning_rate": 2.578995433789955e-05, + "loss": 2.0718, + "step": 23350 + }, + { + "epoch": 7.12, + "learning_rate": 2.577473363774734e-05, + "loss": 2.191, + "step": 23375 + }, + { + "epoch": 7.12, + "learning_rate": 2.5759512937595134e-05, + "loss": 2.0822, + "step": 23400 + }, + { + "epoch": 7.13, + "learning_rate": 2.5744292237442924e-05, + "loss": 2.1245, + "step": 23425 + }, + { + "epoch": 7.14, + "learning_rate": 2.5729071537290716e-05, + "loss": 2.1894, + "step": 23450 + }, + { + "epoch": 7.15, + "learning_rate": 2.571385083713851e-05, + "loss": 2.1632, + "step": 23475 + }, + { + "epoch": 7.15, + "learning_rate": 2.5698630136986302e-05, + "loss": 2.2474, + "step": 23500 + }, + { + "epoch": 7.16, + "learning_rate": 2.5683409436834095e-05, + "loss": 2.1482, + "step": 23525 + }, + { + "epoch": 7.17, + "learning_rate": 2.566818873668189e-05, + "loss": 2.164, + "step": 23550 + }, + { + "epoch": 7.18, + "learning_rate": 2.5652968036529684e-05, + "loss": 2.0679, + "step": 23575 + }, + { + "epoch": 7.18, + "learning_rate": 2.5637747336377477e-05, + "loss": 2.0875, + "step": 23600 + }, + { + "epoch": 7.19, + "learning_rate": 2.562252663622527e-05, + "loss": 2.1351, + "step": 23625 + }, + { + "epoch": 7.2, + "learning_rate": 2.5607305936073062e-05, + "loss": 2.137, + "step": 23650 + }, + { + "epoch": 7.21, + "learning_rate": 2.5592085235920855e-05, + "loss": 2.1051, + "step": 23675 + }, + { + "epoch": 7.21, + "learning_rate": 2.5576864535768645e-05, + "loss": 2.0808, + "step": 23700 + }, + { + "epoch": 7.22, + "learning_rate": 2.5561643835616438e-05, + "loss": 2.1877, + "step": 23725 + }, + { + "epoch": 7.23, + "learning_rate": 2.5546423135464237e-05, + "loss": 2.16, + "step": 23750 + }, + { + "epoch": 7.24, + "learning_rate": 2.5531202435312027e-05, + "loss": 2.1346, + "step": 23775 + }, + { + "epoch": 7.25, + "learning_rate": 2.551598173515982e-05, + "loss": 2.0598, + "step": 23800 + }, + { + "epoch": 7.25, + "learning_rate": 2.5500761035007612e-05, + "loss": 2.1666, + "step": 23825 + }, + { + "epoch": 7.26, + "learning_rate": 2.5485540334855405e-05, + "loss": 2.1896, + "step": 23850 + }, + { + "epoch": 7.27, + "learning_rate": 2.5470319634703198e-05, + "loss": 2.1093, + "step": 23875 + }, + { + "epoch": 7.28, + "learning_rate": 2.545509893455099e-05, + "loss": 2.0752, + "step": 23900 + }, + { + "epoch": 7.28, + "learning_rate": 2.5439878234398784e-05, + "loss": 2.1268, + "step": 23925 + }, + { + "epoch": 7.29, + "learning_rate": 2.542465753424658e-05, + "loss": 2.1263, + "step": 23950 + }, + { + "epoch": 7.3, + "learning_rate": 2.5409436834094373e-05, + "loss": 2.1407, + "step": 23975 + }, + { + "epoch": 7.31, + "learning_rate": 2.539482496194825e-05, + "loss": 2.1309, + "step": 24000 + }, + { + "epoch": 7.31, + "learning_rate": 2.5379604261796042e-05, + "loss": 2.093, + "step": 24025 + }, + { + "epoch": 7.32, + "learning_rate": 2.5364383561643838e-05, + "loss": 2.1654, + "step": 24050 + }, + { + "epoch": 7.33, + "learning_rate": 2.534916286149163e-05, + "loss": 2.1689, + "step": 24075 + }, + { + "epoch": 7.34, + "learning_rate": 2.5333942161339424e-05, + "loss": 2.1654, + "step": 24100 + }, + { + "epoch": 7.34, + "learning_rate": 2.5318721461187216e-05, + "loss": 2.2091, + "step": 24125 + }, + { + "epoch": 7.35, + "learning_rate": 2.53041095890411e-05, + "loss": 2.0029, + "step": 24150 + }, + { + "epoch": 7.36, + "learning_rate": 2.5288888888888892e-05, + "loss": 2.1621, + "step": 24175 + }, + { + "epoch": 7.37, + "learning_rate": 2.5273668188736685e-05, + "loss": 2.1024, + "step": 24200 + }, + { + "epoch": 7.37, + "learning_rate": 2.5258447488584478e-05, + "loss": 2.0642, + "step": 24225 + }, + { + "epoch": 7.38, + "learning_rate": 2.5243226788432267e-05, + "loss": 2.1373, + "step": 24250 + }, + { + "epoch": 7.39, + "learning_rate": 2.522800608828006e-05, + "loss": 2.1484, + "step": 24275 + }, + { + "epoch": 7.4, + "learning_rate": 2.5212785388127853e-05, + "loss": 2.1251, + "step": 24300 + }, + { + "epoch": 7.4, + "learning_rate": 2.519756468797565e-05, + "loss": 2.1474, + "step": 24325 + }, + { + "epoch": 7.41, + "learning_rate": 2.5182343987823442e-05, + "loss": 2.173, + "step": 24350 + }, + { + "epoch": 7.42, + "learning_rate": 2.5167123287671235e-05, + "loss": 2.0536, + "step": 24375 + }, + { + "epoch": 7.43, + "learning_rate": 2.5151902587519028e-05, + "loss": 2.1659, + "step": 24400 + }, + { + "epoch": 7.44, + "learning_rate": 2.513668188736682e-05, + "loss": 2.1325, + "step": 24425 + }, + { + "epoch": 7.44, + "learning_rate": 2.5121461187214613e-05, + "loss": 2.1678, + "step": 24450 + }, + { + "epoch": 7.45, + "learning_rate": 2.5106240487062406e-05, + "loss": 2.1031, + "step": 24475 + }, + { + "epoch": 7.46, + "learning_rate": 2.50910197869102e-05, + "loss": 2.1283, + "step": 24500 + }, + { + "epoch": 7.47, + "learning_rate": 2.5075799086757995e-05, + "loss": 2.0485, + "step": 24525 + }, + { + "epoch": 7.47, + "learning_rate": 2.5060578386605788e-05, + "loss": 2.1137, + "step": 24550 + }, + { + "epoch": 7.48, + "learning_rate": 2.504535768645358e-05, + "loss": 2.043, + "step": 24575 + }, + { + "epoch": 7.49, + "learning_rate": 2.503013698630137e-05, + "loss": 2.128, + "step": 24600 + }, + { + "epoch": 7.5, + "learning_rate": 2.5014916286149163e-05, + "loss": 2.1445, + "step": 24625 + }, + { + "epoch": 7.5, + "learning_rate": 2.4999695585996956e-05, + "loss": 2.1807, + "step": 24650 + }, + { + "epoch": 7.51, + "learning_rate": 2.498447488584475e-05, + "loss": 2.1145, + "step": 24675 + }, + { + "epoch": 7.52, + "learning_rate": 2.4969254185692542e-05, + "loss": 2.1025, + "step": 24700 + }, + { + "epoch": 7.53, + "learning_rate": 2.4954033485540338e-05, + "loss": 2.2375, + "step": 24725 + }, + { + "epoch": 7.53, + "learning_rate": 2.493881278538813e-05, + "loss": 2.0273, + "step": 24750 + }, + { + "epoch": 7.54, + "learning_rate": 2.4923592085235924e-05, + "loss": 2.202, + "step": 24775 + }, + { + "epoch": 7.55, + "learning_rate": 2.4908371385083717e-05, + "loss": 2.0888, + "step": 24800 + }, + { + "epoch": 7.56, + "learning_rate": 2.48937595129376e-05, + "loss": 2.1503, + "step": 24825 + }, + { + "epoch": 7.56, + "learning_rate": 2.4878538812785392e-05, + "loss": 1.9954, + "step": 24850 + }, + { + "epoch": 7.57, + "learning_rate": 2.4863318112633185e-05, + "loss": 2.1307, + "step": 24875 + }, + { + "epoch": 7.58, + "learning_rate": 2.4848097412480975e-05, + "loss": 2.0709, + "step": 24900 + }, + { + "epoch": 7.59, + "learning_rate": 2.4832876712328767e-05, + "loss": 2.0874, + "step": 24925 + }, + { + "epoch": 7.6, + "learning_rate": 2.481765601217656e-05, + "loss": 2.0786, + "step": 24950 + }, + { + "epoch": 7.6, + "learning_rate": 2.4802435312024353e-05, + "loss": 2.1518, + "step": 24975 + }, + { + "epoch": 7.61, + "learning_rate": 2.478721461187215e-05, + "loss": 2.1586, + "step": 25000 + }, + { + "epoch": 7.62, + "learning_rate": 2.4771993911719942e-05, + "loss": 2.0567, + "step": 25025 + }, + { + "epoch": 7.63, + "learning_rate": 2.4756773211567735e-05, + "loss": 2.1463, + "step": 25050 + }, + { + "epoch": 7.63, + "learning_rate": 2.4741552511415528e-05, + "loss": 2.1937, + "step": 25075 + }, + { + "epoch": 7.64, + "learning_rate": 2.472633181126332e-05, + "loss": 2.0548, + "step": 25100 + }, + { + "epoch": 7.65, + "learning_rate": 2.4711111111111114e-05, + "loss": 2.2335, + "step": 25125 + }, + { + "epoch": 7.66, + "learning_rate": 2.4695890410958906e-05, + "loss": 2.0733, + "step": 25150 + }, + { + "epoch": 7.66, + "learning_rate": 2.4680669710806696e-05, + "loss": 2.057, + "step": 25175 + }, + { + "epoch": 7.67, + "learning_rate": 2.4665449010654495e-05, + "loss": 2.0907, + "step": 25200 + }, + { + "epoch": 7.68, + "learning_rate": 2.465083713850837e-05, + "loss": 2.124, + "step": 25225 + }, + { + "epoch": 7.69, + "learning_rate": 2.4635616438356164e-05, + "loss": 2.1252, + "step": 25250 + }, + { + "epoch": 7.69, + "learning_rate": 2.462039573820396e-05, + "loss": 2.135, + "step": 25275 + }, + { + "epoch": 7.7, + "learning_rate": 2.4605175038051753e-05, + "loss": 2.0997, + "step": 25300 + }, + { + "epoch": 7.71, + "learning_rate": 2.4589954337899546e-05, + "loss": 2.1866, + "step": 25325 + }, + { + "epoch": 7.72, + "learning_rate": 2.457473363774734e-05, + "loss": 2.1311, + "step": 25350 + }, + { + "epoch": 7.72, + "learning_rate": 2.4559512937595132e-05, + "loss": 2.1598, + "step": 25375 + }, + { + "epoch": 7.73, + "learning_rate": 2.4544292237442925e-05, + "loss": 1.9998, + "step": 25400 + }, + { + "epoch": 7.74, + "learning_rate": 2.4529071537290718e-05, + "loss": 2.1786, + "step": 25425 + }, + { + "epoch": 7.75, + "learning_rate": 2.4513850837138507e-05, + "loss": 2.1348, + "step": 25450 + }, + { + "epoch": 7.75, + "learning_rate": 2.4498630136986307e-05, + "loss": 2.1772, + "step": 25475 + }, + { + "epoch": 7.76, + "learning_rate": 2.44834094368341e-05, + "loss": 2.1718, + "step": 25500 + }, + { + "epoch": 7.77, + "learning_rate": 2.446818873668189e-05, + "loss": 2.0588, + "step": 25525 + }, + { + "epoch": 7.78, + "learning_rate": 2.4452968036529682e-05, + "loss": 2.1257, + "step": 25550 + }, + { + "epoch": 7.79, + "learning_rate": 2.4437747336377475e-05, + "loss": 2.0751, + "step": 25575 + }, + { + "epoch": 7.79, + "learning_rate": 2.4422526636225268e-05, + "loss": 2.2192, + "step": 25600 + }, + { + "epoch": 7.8, + "learning_rate": 2.440730593607306e-05, + "loss": 2.125, + "step": 25625 + }, + { + "epoch": 7.81, + "learning_rate": 2.4392085235920853e-05, + "loss": 2.0945, + "step": 25650 + }, + { + "epoch": 7.82, + "learning_rate": 2.437686453576865e-05, + "loss": 2.2376, + "step": 25675 + }, + { + "epoch": 7.82, + "learning_rate": 2.4361643835616442e-05, + "loss": 2.1176, + "step": 25700 + }, + { + "epoch": 7.83, + "learning_rate": 2.4346423135464235e-05, + "loss": 2.0887, + "step": 25725 + }, + { + "epoch": 7.84, + "learning_rate": 2.4331202435312028e-05, + "loss": 2.1796, + "step": 25750 + }, + { + "epoch": 7.85, + "learning_rate": 2.431598173515982e-05, + "loss": 2.1368, + "step": 25775 + }, + { + "epoch": 7.85, + "learning_rate": 2.430076103500761e-05, + "loss": 2.2211, + "step": 25800 + }, + { + "epoch": 7.86, + "learning_rate": 2.4285540334855403e-05, + "loss": 2.147, + "step": 25825 + }, + { + "epoch": 7.87, + "learning_rate": 2.4270319634703196e-05, + "loss": 2.2212, + "step": 25850 + }, + { + "epoch": 7.88, + "learning_rate": 2.4255098934550992e-05, + "loss": 2.0163, + "step": 25875 + }, + { + "epoch": 7.88, + "learning_rate": 2.4239878234398785e-05, + "loss": 2.1679, + "step": 25900 + }, + { + "epoch": 7.89, + "learning_rate": 2.4224657534246578e-05, + "loss": 2.1631, + "step": 25925 + }, + { + "epoch": 7.9, + "learning_rate": 2.420943683409437e-05, + "loss": 2.2289, + "step": 25950 + }, + { + "epoch": 7.91, + "learning_rate": 2.4194216133942164e-05, + "loss": 2.1687, + "step": 25975 + }, + { + "epoch": 7.91, + "learning_rate": 2.4178995433789956e-05, + "loss": 2.1701, + "step": 26000 + }, + { + "epoch": 7.92, + "learning_rate": 2.416377473363775e-05, + "loss": 2.1613, + "step": 26025 + }, + { + "epoch": 7.93, + "learning_rate": 2.4148554033485542e-05, + "loss": 2.0793, + "step": 26050 + }, + { + "epoch": 7.94, + "learning_rate": 2.413333333333334e-05, + "loss": 2.2479, + "step": 26075 + }, + { + "epoch": 7.95, + "learning_rate": 2.411811263318113e-05, + "loss": 2.1886, + "step": 26100 + }, + { + "epoch": 7.95, + "learning_rate": 2.4102891933028924e-05, + "loss": 2.2579, + "step": 26125 + }, + { + "epoch": 7.96, + "learning_rate": 2.4087671232876713e-05, + "loss": 2.1238, + "step": 26150 + }, + { + "epoch": 7.97, + "learning_rate": 2.4072450532724506e-05, + "loss": 2.121, + "step": 26175 + }, + { + "epoch": 7.98, + "learning_rate": 2.40572298325723e-05, + "loss": 2.0184, + "step": 26200 + }, + { + "epoch": 7.98, + "learning_rate": 2.4042009132420092e-05, + "loss": 2.0781, + "step": 26225 + }, + { + "epoch": 7.99, + "learning_rate": 2.4026788432267885e-05, + "loss": 2.1665, + "step": 26250 + }, + { + "epoch": 8.0, + "learning_rate": 2.401156773211568e-05, + "loss": 2.078, + "step": 26275 + }, + { + "epoch": 8.01, + "learning_rate": 2.3996347031963474e-05, + "loss": 2.0936, + "step": 26300 + }, + { + "epoch": 8.01, + "learning_rate": 2.3981126331811267e-05, + "loss": 2.1457, + "step": 26325 + }, + { + "epoch": 8.02, + "learning_rate": 2.396590563165906e-05, + "loss": 2.218, + "step": 26350 + }, + { + "epoch": 8.03, + "learning_rate": 2.3950684931506852e-05, + "loss": 2.1591, + "step": 26375 + }, + { + "epoch": 8.04, + "learning_rate": 2.3935464231354645e-05, + "loss": 2.0221, + "step": 26400 + }, + { + "epoch": 8.04, + "learning_rate": 2.3920243531202435e-05, + "loss": 2.0926, + "step": 26425 + }, + { + "epoch": 8.05, + "learning_rate": 2.3905022831050228e-05, + "loss": 2.0845, + "step": 26450 + }, + { + "epoch": 8.06, + "learning_rate": 2.3889802130898027e-05, + "loss": 2.1087, + "step": 26475 + }, + { + "epoch": 8.07, + "learning_rate": 2.3874581430745817e-05, + "loss": 2.1333, + "step": 26500 + }, + { + "epoch": 8.07, + "learning_rate": 2.385936073059361e-05, + "loss": 2.0067, + "step": 26525 + }, + { + "epoch": 8.08, + "learning_rate": 2.3844140030441402e-05, + "loss": 2.1004, + "step": 26550 + }, + { + "epoch": 8.09, + "learning_rate": 2.3828919330289195e-05, + "loss": 2.1132, + "step": 26575 + }, + { + "epoch": 8.1, + "learning_rate": 2.3813698630136988e-05, + "loss": 2.1223, + "step": 26600 + }, + { + "epoch": 8.11, + "learning_rate": 2.379847792998478e-05, + "loss": 2.054, + "step": 26625 + }, + { + "epoch": 8.11, + "learning_rate": 2.3783257229832574e-05, + "loss": 2.0263, + "step": 26650 + }, + { + "epoch": 8.12, + "learning_rate": 2.376803652968037e-05, + "loss": 2.102, + "step": 26675 + }, + { + "epoch": 8.13, + "learning_rate": 2.3752815829528163e-05, + "loss": 2.0161, + "step": 26700 + }, + { + "epoch": 8.14, + "learning_rate": 2.3737595129375956e-05, + "loss": 2.1884, + "step": 26725 + }, + { + "epoch": 8.14, + "learning_rate": 2.372237442922375e-05, + "loss": 2.0326, + "step": 26750 + }, + { + "epoch": 8.15, + "learning_rate": 2.3707153729071538e-05, + "loss": 2.0053, + "step": 26775 + }, + { + "epoch": 8.16, + "learning_rate": 2.369193302891933e-05, + "loss": 2.101, + "step": 26800 + }, + { + "epoch": 8.17, + "learning_rate": 2.3676712328767124e-05, + "loss": 2.1225, + "step": 26825 + }, + { + "epoch": 8.17, + "learning_rate": 2.3661491628614916e-05, + "loss": 2.165, + "step": 26850 + }, + { + "epoch": 8.18, + "learning_rate": 2.3646270928462713e-05, + "loss": 2.0627, + "step": 26875 + }, + { + "epoch": 8.19, + "learning_rate": 2.3631050228310505e-05, + "loss": 2.1096, + "step": 26900 + }, + { + "epoch": 8.2, + "learning_rate": 2.3615829528158298e-05, + "loss": 2.1333, + "step": 26925 + }, + { + "epoch": 8.2, + "learning_rate": 2.360060882800609e-05, + "loss": 2.0718, + "step": 26950 + }, + { + "epoch": 8.21, + "learning_rate": 2.3585388127853884e-05, + "loss": 2.0667, + "step": 26975 + }, + { + "epoch": 8.22, + "learning_rate": 2.3570167427701677e-05, + "loss": 2.0372, + "step": 27000 + }, + { + "epoch": 8.23, + "learning_rate": 2.355494672754947e-05, + "loss": 2.1661, + "step": 27025 + }, + { + "epoch": 8.23, + "learning_rate": 2.353972602739726e-05, + "loss": 2.1163, + "step": 27050 + }, + { + "epoch": 8.24, + "learning_rate": 2.3524505327245052e-05, + "loss": 2.0357, + "step": 27075 + }, + { + "epoch": 8.25, + "learning_rate": 2.350928462709285e-05, + "loss": 2.0945, + "step": 27100 + }, + { + "epoch": 8.26, + "learning_rate": 2.349406392694064e-05, + "loss": 2.2122, + "step": 27125 + }, + { + "epoch": 8.26, + "learning_rate": 2.3478843226788434e-05, + "loss": 2.2343, + "step": 27150 + }, + { + "epoch": 8.27, + "learning_rate": 2.3463622526636227e-05, + "loss": 2.083, + "step": 27175 + }, + { + "epoch": 8.28, + "learning_rate": 2.344840182648402e-05, + "loss": 2.1022, + "step": 27200 + }, + { + "epoch": 8.29, + "learning_rate": 2.3433181126331812e-05, + "loss": 2.0508, + "step": 27225 + }, + { + "epoch": 8.3, + "learning_rate": 2.3417960426179605e-05, + "loss": 2.0839, + "step": 27250 + }, + { + "epoch": 8.3, + "learning_rate": 2.3402739726027398e-05, + "loss": 2.0814, + "step": 27275 + }, + { + "epoch": 8.31, + "learning_rate": 2.3387519025875194e-05, + "loss": 2.0378, + "step": 27300 + }, + { + "epoch": 8.32, + "learning_rate": 2.3372298325722987e-05, + "loss": 2.0934, + "step": 27325 + }, + { + "epoch": 8.33, + "learning_rate": 2.335707762557078e-05, + "loss": 2.0811, + "step": 27350 + }, + { + "epoch": 8.33, + "learning_rate": 2.3341856925418573e-05, + "loss": 2.0548, + "step": 27375 + }, + { + "epoch": 8.34, + "learning_rate": 2.3326636225266362e-05, + "loss": 2.0106, + "step": 27400 + }, + { + "epoch": 8.35, + "learning_rate": 2.3311415525114155e-05, + "loss": 2.0514, + "step": 27425 + }, + { + "epoch": 8.36, + "learning_rate": 2.3296194824961948e-05, + "loss": 2.0765, + "step": 27450 + }, + { + "epoch": 8.36, + "learning_rate": 2.328097412480974e-05, + "loss": 2.2477, + "step": 27475 + }, + { + "epoch": 8.37, + "learning_rate": 2.3265753424657537e-05, + "loss": 2.0045, + "step": 27500 + }, + { + "epoch": 8.38, + "learning_rate": 2.325053272450533e-05, + "loss": 2.1184, + "step": 27525 + }, + { + "epoch": 8.39, + "learning_rate": 2.3235312024353123e-05, + "loss": 2.143, + "step": 27550 + }, + { + "epoch": 8.39, + "learning_rate": 2.3220091324200915e-05, + "loss": 2.0317, + "step": 27575 + }, + { + "epoch": 8.4, + "learning_rate": 2.320487062404871e-05, + "loss": 2.0431, + "step": 27600 + }, + { + "epoch": 8.41, + "learning_rate": 2.31896499238965e-05, + "loss": 2.0331, + "step": 27625 + }, + { + "epoch": 8.42, + "learning_rate": 2.3174429223744294e-05, + "loss": 1.9947, + "step": 27650 + }, + { + "epoch": 8.42, + "learning_rate": 2.3159208523592083e-05, + "loss": 2.143, + "step": 27675 + }, + { + "epoch": 8.43, + "learning_rate": 2.3143987823439883e-05, + "loss": 2.2113, + "step": 27700 + }, + { + "epoch": 8.44, + "learning_rate": 2.3128767123287676e-05, + "loss": 2.0961, + "step": 27725 + }, + { + "epoch": 8.45, + "learning_rate": 2.3113546423135465e-05, + "loss": 2.0829, + "step": 27750 + }, + { + "epoch": 8.46, + "learning_rate": 2.3098325722983258e-05, + "loss": 2.1045, + "step": 27775 + }, + { + "epoch": 8.46, + "learning_rate": 2.308310502283105e-05, + "loss": 2.2277, + "step": 27800 + }, + { + "epoch": 8.47, + "learning_rate": 2.3067884322678844e-05, + "loss": 1.9936, + "step": 27825 + }, + { + "epoch": 8.48, + "learning_rate": 2.3052663622526637e-05, + "loss": 2.1255, + "step": 27850 + }, + { + "epoch": 8.49, + "learning_rate": 2.303744292237443e-05, + "loss": 2.1072, + "step": 27875 + }, + { + "epoch": 8.49, + "learning_rate": 2.3022222222222226e-05, + "loss": 2.0673, + "step": 27900 + }, + { + "epoch": 8.5, + "learning_rate": 2.300700152207002e-05, + "loss": 2.1319, + "step": 27925 + }, + { + "epoch": 8.51, + "learning_rate": 2.299178082191781e-05, + "loss": 2.1209, + "step": 27950 + }, + { + "epoch": 8.52, + "learning_rate": 2.2977168949771694e-05, + "loss": 2.1048, + "step": 27975 + }, + { + "epoch": 8.52, + "learning_rate": 2.2961948249619487e-05, + "loss": 2.0485, + "step": 28000 + }, + { + "epoch": 8.53, + "learning_rate": 2.2946727549467277e-05, + "loss": 2.0914, + "step": 28025 + }, + { + "epoch": 8.54, + "learning_rate": 2.293150684931507e-05, + "loss": 2.0312, + "step": 28050 + }, + { + "epoch": 8.55, + "learning_rate": 2.2916286149162862e-05, + "loss": 2.1114, + "step": 28075 + }, + { + "epoch": 8.55, + "learning_rate": 2.2901065449010655e-05, + "loss": 2.0711, + "step": 28100 + }, + { + "epoch": 8.56, + "learning_rate": 2.2885844748858448e-05, + "loss": 2.182, + "step": 28125 + }, + { + "epoch": 8.57, + "learning_rate": 2.287062404870624e-05, + "loss": 2.1511, + "step": 28150 + }, + { + "epoch": 8.58, + "learning_rate": 2.2855403348554037e-05, + "loss": 2.0635, + "step": 28175 + }, + { + "epoch": 8.58, + "learning_rate": 2.284018264840183e-05, + "loss": 2.0999, + "step": 28200 + }, + { + "epoch": 8.59, + "learning_rate": 2.2824961948249623e-05, + "loss": 2.0751, + "step": 28225 + }, + { + "epoch": 8.6, + "learning_rate": 2.2809741248097416e-05, + "loss": 2.0463, + "step": 28250 + }, + { + "epoch": 8.61, + "learning_rate": 2.279452054794521e-05, + "loss": 2.0961, + "step": 28275 + }, + { + "epoch": 8.61, + "learning_rate": 2.2779299847792998e-05, + "loss": 2.1299, + "step": 28300 + }, + { + "epoch": 8.62, + "learning_rate": 2.276407914764079e-05, + "loss": 2.0544, + "step": 28325 + }, + { + "epoch": 8.63, + "learning_rate": 2.2748858447488584e-05, + "loss": 2.0825, + "step": 28350 + }, + { + "epoch": 8.64, + "learning_rate": 2.273363774733638e-05, + "loss": 2.1132, + "step": 28375 + }, + { + "epoch": 8.65, + "learning_rate": 2.2718417047184173e-05, + "loss": 2.1715, + "step": 28400 + }, + { + "epoch": 8.65, + "learning_rate": 2.2703196347031965e-05, + "loss": 2.0798, + "step": 28425 + }, + { + "epoch": 8.66, + "learning_rate": 2.268797564687976e-05, + "loss": 2.0872, + "step": 28450 + }, + { + "epoch": 8.67, + "learning_rate": 2.267275494672755e-05, + "loss": 2.0075, + "step": 28475 + }, + { + "epoch": 8.68, + "learning_rate": 2.2657534246575344e-05, + "loss": 2.029, + "step": 28500 + }, + { + "epoch": 8.68, + "learning_rate": 2.2642313546423137e-05, + "loss": 2.1095, + "step": 28525 + }, + { + "epoch": 8.69, + "learning_rate": 2.262709284627093e-05, + "loss": 2.144, + "step": 28550 + }, + { + "epoch": 8.7, + "learning_rate": 2.2611872146118726e-05, + "loss": 2.1777, + "step": 28575 + }, + { + "epoch": 8.71, + "learning_rate": 2.259665144596652e-05, + "loss": 2.1214, + "step": 28600 + }, + { + "epoch": 8.71, + "learning_rate": 2.258143074581431e-05, + "loss": 2.1533, + "step": 28625 + }, + { + "epoch": 8.72, + "learning_rate": 2.25662100456621e-05, + "loss": 2.2198, + "step": 28650 + }, + { + "epoch": 8.73, + "learning_rate": 2.2550989345509894e-05, + "loss": 2.1757, + "step": 28675 + }, + { + "epoch": 8.74, + "learning_rate": 2.2535768645357687e-05, + "loss": 1.9718, + "step": 28700 + }, + { + "epoch": 8.74, + "learning_rate": 2.252054794520548e-05, + "loss": 2.06, + "step": 28725 + }, + { + "epoch": 8.75, + "learning_rate": 2.2505327245053272e-05, + "loss": 2.113, + "step": 28750 + }, + { + "epoch": 8.76, + "learning_rate": 2.249010654490107e-05, + "loss": 1.9986, + "step": 28775 + }, + { + "epoch": 8.77, + "learning_rate": 2.247488584474886e-05, + "loss": 2.0891, + "step": 28800 + }, + { + "epoch": 8.77, + "learning_rate": 2.2459665144596654e-05, + "loss": 2.1643, + "step": 28825 + }, + { + "epoch": 8.78, + "learning_rate": 2.2444444444444447e-05, + "loss": 2.1331, + "step": 28850 + }, + { + "epoch": 8.79, + "learning_rate": 2.242922374429224e-05, + "loss": 2.1478, + "step": 28875 + }, + { + "epoch": 8.8, + "learning_rate": 2.2414003044140033e-05, + "loss": 2.1363, + "step": 28900 + }, + { + "epoch": 8.81, + "learning_rate": 2.2398782343987822e-05, + "loss": 2.092, + "step": 28925 + }, + { + "epoch": 8.81, + "learning_rate": 2.2383561643835615e-05, + "loss": 2.1163, + "step": 28950 + }, + { + "epoch": 8.82, + "learning_rate": 2.2368340943683415e-05, + "loss": 2.0726, + "step": 28975 + }, + { + "epoch": 8.83, + "learning_rate": 2.2353120243531204e-05, + "loss": 2.1177, + "step": 29000 + }, + { + "epoch": 8.84, + "learning_rate": 2.2337899543378997e-05, + "loss": 2.1109, + "step": 29025 + }, + { + "epoch": 8.84, + "learning_rate": 2.232267884322679e-05, + "loss": 1.9997, + "step": 29050 + }, + { + "epoch": 8.85, + "learning_rate": 2.2307458143074583e-05, + "loss": 2.1331, + "step": 29075 + }, + { + "epoch": 8.86, + "learning_rate": 2.2292237442922376e-05, + "loss": 2.1436, + "step": 29100 + }, + { + "epoch": 8.87, + "learning_rate": 2.227701674277017e-05, + "loss": 1.897, + "step": 29125 + }, + { + "epoch": 8.87, + "learning_rate": 2.226179604261796e-05, + "loss": 2.1544, + "step": 29150 + }, + { + "epoch": 8.88, + "learning_rate": 2.2246575342465757e-05, + "loss": 2.0765, + "step": 29175 + }, + { + "epoch": 8.89, + "learning_rate": 2.223135464231355e-05, + "loss": 2.0012, + "step": 29200 + }, + { + "epoch": 8.9, + "learning_rate": 2.2216133942161343e-05, + "loss": 2.1186, + "step": 29225 + }, + { + "epoch": 8.9, + "learning_rate": 2.2200913242009136e-05, + "loss": 2.0887, + "step": 29250 + }, + { + "epoch": 8.91, + "learning_rate": 2.2185692541856925e-05, + "loss": 2.109, + "step": 29275 + }, + { + "epoch": 8.92, + "learning_rate": 2.2170471841704718e-05, + "loss": 2.119, + "step": 29300 + }, + { + "epoch": 8.93, + "learning_rate": 2.215525114155251e-05, + "loss": 2.0025, + "step": 29325 + }, + { + "epoch": 8.93, + "learning_rate": 2.2140030441400304e-05, + "loss": 2.0606, + "step": 29350 + }, + { + "epoch": 8.94, + "learning_rate": 2.21248097412481e-05, + "loss": 2.1352, + "step": 29375 + }, + { + "epoch": 8.95, + "learning_rate": 2.2109589041095893e-05, + "loss": 2.1215, + "step": 29400 + }, + { + "epoch": 8.96, + "learning_rate": 2.2094368340943686e-05, + "loss": 2.0059, + "step": 29425 + }, + { + "epoch": 8.96, + "learning_rate": 2.207914764079148e-05, + "loss": 2.1267, + "step": 29450 + }, + { + "epoch": 8.97, + "learning_rate": 2.206392694063927e-05, + "loss": 2.1622, + "step": 29475 + }, + { + "epoch": 8.98, + "learning_rate": 2.2048706240487064e-05, + "loss": 2.1535, + "step": 29500 + }, + { + "epoch": 8.99, + "learning_rate": 2.2033485540334857e-05, + "loss": 2.0931, + "step": 29525 + }, + { + "epoch": 9.0, + "learning_rate": 2.2018264840182647e-05, + "loss": 2.0341, + "step": 29550 + }, + { + "epoch": 9.0, + "learning_rate": 2.2003044140030446e-05, + "loss": 2.0635, + "step": 29575 + }, + { + "epoch": 9.01, + "learning_rate": 2.198782343987824e-05, + "loss": 2.1759, + "step": 29600 + }, + { + "epoch": 9.02, + "learning_rate": 2.197260273972603e-05, + "loss": 1.9937, + "step": 29625 + }, + { + "epoch": 9.03, + "learning_rate": 2.195738203957382e-05, + "loss": 1.9897, + "step": 29650 + }, + { + "epoch": 9.03, + "learning_rate": 2.1942161339421614e-05, + "loss": 2.0304, + "step": 29675 + }, + { + "epoch": 9.04, + "learning_rate": 2.1926940639269407e-05, + "loss": 2.0477, + "step": 29700 + }, + { + "epoch": 9.05, + "learning_rate": 2.19117199391172e-05, + "loss": 1.9738, + "step": 29725 + }, + { + "epoch": 9.06, + "learning_rate": 2.1896499238964993e-05, + "loss": 1.9111, + "step": 29750 + }, + { + "epoch": 9.06, + "learning_rate": 2.188127853881279e-05, + "loss": 1.9978, + "step": 29775 + }, + { + "epoch": 9.07, + "learning_rate": 2.1866057838660582e-05, + "loss": 2.134, + "step": 29800 + }, + { + "epoch": 9.08, + "learning_rate": 2.1850837138508375e-05, + "loss": 2.059, + "step": 29825 + }, + { + "epoch": 9.09, + "learning_rate": 2.1835616438356168e-05, + "loss": 2.0287, + "step": 29850 + }, + { + "epoch": 9.09, + "learning_rate": 2.182039573820396e-05, + "loss": 2.0847, + "step": 29875 + }, + { + "epoch": 9.1, + "learning_rate": 2.180517503805175e-05, + "loss": 1.9959, + "step": 29900 + }, + { + "epoch": 9.11, + "learning_rate": 2.1789954337899543e-05, + "loss": 2.0193, + "step": 29925 + }, + { + "epoch": 9.12, + "learning_rate": 2.1774733637747335e-05, + "loss": 2.0612, + "step": 29950 + }, + { + "epoch": 9.12, + "learning_rate": 2.1759512937595132e-05, + "loss": 2.0862, + "step": 29975 + }, + { + "epoch": 9.13, + "learning_rate": 2.1744292237442925e-05, + "loss": 2.0891, + "step": 30000 + }, + { + "epoch": 9.14, + "learning_rate": 2.1729071537290717e-05, + "loss": 2.0906, + "step": 30025 + }, + { + "epoch": 9.15, + "learning_rate": 2.171385083713851e-05, + "loss": 1.9966, + "step": 30050 + }, + { + "epoch": 9.16, + "learning_rate": 2.1698630136986303e-05, + "loss": 2.0964, + "step": 30075 + }, + { + "epoch": 9.16, + "learning_rate": 2.1683409436834096e-05, + "loss": 2.0682, + "step": 30100 + }, + { + "epoch": 9.17, + "learning_rate": 2.166818873668189e-05, + "loss": 2.0812, + "step": 30125 + }, + { + "epoch": 9.18, + "learning_rate": 2.165296803652968e-05, + "loss": 2.0623, + "step": 30150 + }, + { + "epoch": 9.19, + "learning_rate": 2.1637747336377478e-05, + "loss": 2.1535, + "step": 30175 + }, + { + "epoch": 9.19, + "learning_rate": 2.162252663622527e-05, + "loss": 2.1764, + "step": 30200 + }, + { + "epoch": 9.2, + "learning_rate": 2.1607305936073064e-05, + "loss": 1.9868, + "step": 30225 + }, + { + "epoch": 9.21, + "learning_rate": 2.1592085235920853e-05, + "loss": 2.0232, + "step": 30250 + }, + { + "epoch": 9.22, + "learning_rate": 2.1576864535768646e-05, + "loss": 2.102, + "step": 30275 + }, + { + "epoch": 9.22, + "learning_rate": 2.156164383561644e-05, + "loss": 2.0973, + "step": 30300 + }, + { + "epoch": 9.23, + "learning_rate": 2.154642313546423e-05, + "loss": 2.0408, + "step": 30325 + }, + { + "epoch": 9.24, + "learning_rate": 2.1531202435312024e-05, + "loss": 2.1228, + "step": 30350 + }, + { + "epoch": 9.25, + "learning_rate": 2.151598173515982e-05, + "loss": 2.0236, + "step": 30375 + }, + { + "epoch": 9.25, + "learning_rate": 2.1500761035007613e-05, + "loss": 2.0122, + "step": 30400 + }, + { + "epoch": 9.26, + "learning_rate": 2.1485540334855406e-05, + "loss": 2.0017, + "step": 30425 + }, + { + "epoch": 9.27, + "learning_rate": 2.14703196347032e-05, + "loss": 2.0903, + "step": 30450 + }, + { + "epoch": 9.28, + "learning_rate": 2.1455098934550992e-05, + "loss": 2.0511, + "step": 30475 + }, + { + "epoch": 9.28, + "learning_rate": 2.1439878234398785e-05, + "loss": 2.0603, + "step": 30500 + }, + { + "epoch": 9.29, + "learning_rate": 2.1424657534246574e-05, + "loss": 2.0815, + "step": 30525 + }, + { + "epoch": 9.3, + "learning_rate": 2.1409436834094367e-05, + "loss": 2.0313, + "step": 30550 + }, + { + "epoch": 9.31, + "learning_rate": 2.1394216133942167e-05, + "loss": 1.9726, + "step": 30575 + }, + { + "epoch": 9.32, + "learning_rate": 2.1378995433789956e-05, + "loss": 2.0626, + "step": 30600 + }, + { + "epoch": 9.32, + "learning_rate": 2.136377473363775e-05, + "loss": 2.1151, + "step": 30625 + }, + { + "epoch": 9.33, + "learning_rate": 2.1348554033485542e-05, + "loss": 2.0992, + "step": 30650 + }, + { + "epoch": 9.34, + "learning_rate": 2.1333333333333335e-05, + "loss": 2.0547, + "step": 30675 + }, + { + "epoch": 9.35, + "learning_rate": 2.1318112633181127e-05, + "loss": 2.0273, + "step": 30700 + }, + { + "epoch": 9.35, + "learning_rate": 2.130289193302892e-05, + "loss": 2.0666, + "step": 30725 + }, + { + "epoch": 9.36, + "learning_rate": 2.1287671232876713e-05, + "loss": 2.1278, + "step": 30750 + }, + { + "epoch": 9.37, + "learning_rate": 2.127245053272451e-05, + "loss": 2.08, + "step": 30775 + }, + { + "epoch": 9.38, + "learning_rate": 2.1257229832572302e-05, + "loss": 2.1754, + "step": 30800 + }, + { + "epoch": 9.38, + "learning_rate": 2.1242009132420095e-05, + "loss": 2.0204, + "step": 30825 + }, + { + "epoch": 9.39, + "learning_rate": 2.1226788432267888e-05, + "loss": 2.0768, + "step": 30850 + }, + { + "epoch": 9.4, + "learning_rate": 2.1211567732115677e-05, + "loss": 2.1745, + "step": 30875 + }, + { + "epoch": 9.41, + "learning_rate": 2.119634703196347e-05, + "loss": 2.0624, + "step": 30900 + }, + { + "epoch": 9.41, + "learning_rate": 2.1181126331811263e-05, + "loss": 2.0544, + "step": 30925 + }, + { + "epoch": 9.42, + "learning_rate": 2.1165905631659056e-05, + "loss": 1.9643, + "step": 30950 + }, + { + "epoch": 9.43, + "learning_rate": 2.1150684931506852e-05, + "loss": 2.0692, + "step": 30975 + }, + { + "epoch": 9.44, + "learning_rate": 2.1135464231354645e-05, + "loss": 2.0349, + "step": 31000 + }, + { + "epoch": 9.44, + "learning_rate": 2.1120243531202438e-05, + "loss": 2.0648, + "step": 31025 + }, + { + "epoch": 9.45, + "learning_rate": 2.110502283105023e-05, + "loss": 2.0094, + "step": 31050 + }, + { + "epoch": 9.46, + "learning_rate": 2.1089802130898023e-05, + "loss": 2.118, + "step": 31075 + }, + { + "epoch": 9.47, + "learning_rate": 2.1074581430745816e-05, + "loss": 2.0851, + "step": 31100 + }, + { + "epoch": 9.47, + "learning_rate": 2.105936073059361e-05, + "loss": 2.0231, + "step": 31125 + }, + { + "epoch": 9.48, + "learning_rate": 2.10441400304414e-05, + "loss": 2.1099, + "step": 31150 + }, + { + "epoch": 9.49, + "learning_rate": 2.1028919330289198e-05, + "loss": 2.1271, + "step": 31175 + }, + { + "epoch": 9.5, + "learning_rate": 2.101369863013699e-05, + "loss": 2.0688, + "step": 31200 + }, + { + "epoch": 9.51, + "learning_rate": 2.099847792998478e-05, + "loss": 2.0647, + "step": 31225 + }, + { + "epoch": 9.51, + "learning_rate": 2.0983257229832573e-05, + "loss": 2.1555, + "step": 31250 + }, + { + "epoch": 9.52, + "learning_rate": 2.0968036529680366e-05, + "loss": 2.0468, + "step": 31275 + }, + { + "epoch": 9.53, + "learning_rate": 2.095281582952816e-05, + "loss": 2.1865, + "step": 31300 + }, + { + "epoch": 9.54, + "learning_rate": 2.0937595129375952e-05, + "loss": 2.1422, + "step": 31325 + }, + { + "epoch": 9.54, + "learning_rate": 2.0922374429223745e-05, + "loss": 2.1395, + "step": 31350 + }, + { + "epoch": 9.55, + "learning_rate": 2.090715372907154e-05, + "loss": 2.09, + "step": 31375 + }, + { + "epoch": 9.56, + "learning_rate": 2.0891933028919334e-05, + "loss": 2.0477, + "step": 31400 + }, + { + "epoch": 9.57, + "learning_rate": 2.0876712328767127e-05, + "loss": 2.0753, + "step": 31425 + }, + { + "epoch": 9.57, + "learning_rate": 2.086149162861492e-05, + "loss": 2.0948, + "step": 31450 + }, + { + "epoch": 9.58, + "learning_rate": 2.0846270928462712e-05, + "loss": 2.0595, + "step": 31475 + }, + { + "epoch": 9.59, + "learning_rate": 2.0831050228310502e-05, + "loss": 2.01, + "step": 31500 + }, + { + "epoch": 9.6, + "learning_rate": 2.0815829528158295e-05, + "loss": 2.1305, + "step": 31525 + }, + { + "epoch": 9.6, + "learning_rate": 2.0800608828006087e-05, + "loss": 2.1159, + "step": 31550 + }, + { + "epoch": 9.61, + "learning_rate": 2.0785388127853884e-05, + "loss": 2.0244, + "step": 31575 + }, + { + "epoch": 9.62, + "learning_rate": 2.0770167427701676e-05, + "loss": 1.9978, + "step": 31600 + }, + { + "epoch": 9.63, + "learning_rate": 2.075494672754947e-05, + "loss": 2.0019, + "step": 31625 + }, + { + "epoch": 9.63, + "learning_rate": 2.0739726027397262e-05, + "loss": 2.1559, + "step": 31650 + }, + { + "epoch": 9.64, + "learning_rate": 2.0724505327245055e-05, + "loss": 2.0498, + "step": 31675 + }, + { + "epoch": 9.65, + "learning_rate": 2.0709284627092848e-05, + "loss": 2.0949, + "step": 31700 + }, + { + "epoch": 9.66, + "learning_rate": 2.069406392694064e-05, + "loss": 2.0069, + "step": 31725 + }, + { + "epoch": 9.67, + "learning_rate": 2.0678843226788434e-05, + "loss": 2.1079, + "step": 31750 + }, + { + "epoch": 9.67, + "learning_rate": 2.066362252663623e-05, + "loss": 2.0984, + "step": 31775 + }, + { + "epoch": 9.68, + "learning_rate": 2.0648401826484023e-05, + "loss": 2.0458, + "step": 31800 + }, + { + "epoch": 9.69, + "learning_rate": 2.0633181126331815e-05, + "loss": 2.0529, + "step": 31825 + }, + { + "epoch": 9.7, + "learning_rate": 2.0617960426179605e-05, + "loss": 2.1335, + "step": 31850 + }, + { + "epoch": 9.7, + "learning_rate": 2.0602739726027398e-05, + "loss": 2.0454, + "step": 31875 + }, + { + "epoch": 9.71, + "learning_rate": 2.058751902587519e-05, + "loss": 2.0767, + "step": 31900 + }, + { + "epoch": 9.72, + "learning_rate": 2.0572298325722983e-05, + "loss": 2.0955, + "step": 31925 + }, + { + "epoch": 9.73, + "learning_rate": 2.0557077625570776e-05, + "loss": 2.0321, + "step": 31950 + }, + { + "epoch": 9.73, + "learning_rate": 2.0541856925418572e-05, + "loss": 1.9845, + "step": 31975 + }, + { + "epoch": 9.74, + "learning_rate": 2.0526636225266365e-05, + "loss": 1.9996, + "step": 32000 + }, + { + "epoch": 9.75, + "learning_rate": 2.0511415525114158e-05, + "loss": 2.1175, + "step": 32025 + }, + { + "epoch": 9.76, + "learning_rate": 2.049619482496195e-05, + "loss": 2.1869, + "step": 32050 + }, + { + "epoch": 9.76, + "learning_rate": 2.0480974124809744e-05, + "loss": 2.0169, + "step": 32075 + }, + { + "epoch": 9.77, + "learning_rate": 2.0465753424657537e-05, + "loss": 2.0489, + "step": 32100 + }, + { + "epoch": 9.78, + "learning_rate": 2.0450532724505326e-05, + "loss": 2.1809, + "step": 32125 + }, + { + "epoch": 9.79, + "learning_rate": 2.043531202435312e-05, + "loss": 2.1068, + "step": 32150 + }, + { + "epoch": 9.79, + "learning_rate": 2.042009132420092e-05, + "loss": 2.1649, + "step": 32175 + }, + { + "epoch": 9.8, + "learning_rate": 2.0404870624048708e-05, + "loss": 2.1867, + "step": 32200 + }, + { + "epoch": 9.81, + "learning_rate": 2.03896499238965e-05, + "loss": 2.1665, + "step": 32225 + }, + { + "epoch": 9.82, + "learning_rate": 2.0374429223744294e-05, + "loss": 2.1239, + "step": 32250 + }, + { + "epoch": 9.82, + "learning_rate": 2.0359208523592087e-05, + "loss": 2.0871, + "step": 32275 + }, + { + "epoch": 9.83, + "learning_rate": 2.034398782343988e-05, + "loss": 2.0232, + "step": 32300 + }, + { + "epoch": 9.84, + "learning_rate": 2.0328767123287672e-05, + "loss": 2.028, + "step": 32325 + }, + { + "epoch": 9.85, + "learning_rate": 2.0313546423135465e-05, + "loss": 2.0638, + "step": 32350 + }, + { + "epoch": 9.86, + "learning_rate": 2.029832572298326e-05, + "loss": 2.0474, + "step": 32375 + }, + { + "epoch": 9.86, + "learning_rate": 2.0283105022831054e-05, + "loss": 2.0768, + "step": 32400 + }, + { + "epoch": 9.87, + "learning_rate": 2.0267884322678847e-05, + "loss": 2.0103, + "step": 32425 + }, + { + "epoch": 9.88, + "learning_rate": 2.025266362252664e-05, + "loss": 2.0039, + "step": 32450 + }, + { + "epoch": 9.89, + "learning_rate": 2.023744292237443e-05, + "loss": 2.0592, + "step": 32475 + }, + { + "epoch": 9.89, + "learning_rate": 2.0222222222222222e-05, + "loss": 1.9841, + "step": 32500 + }, + { + "epoch": 9.9, + "learning_rate": 2.0207001522070015e-05, + "loss": 2.0784, + "step": 32525 + }, + { + "epoch": 9.91, + "learning_rate": 2.0191780821917808e-05, + "loss": 2.1935, + "step": 32550 + }, + { + "epoch": 9.92, + "learning_rate": 2.0176560121765604e-05, + "loss": 2.0357, + "step": 32575 + }, + { + "epoch": 9.92, + "learning_rate": 2.0161339421613397e-05, + "loss": 2.1061, + "step": 32600 + }, + { + "epoch": 9.93, + "learning_rate": 2.014611872146119e-05, + "loss": 2.1107, + "step": 32625 + }, + { + "epoch": 9.94, + "learning_rate": 2.0130898021308983e-05, + "loss": 2.1319, + "step": 32650 + }, + { + "epoch": 9.95, + "learning_rate": 2.0115677321156775e-05, + "loss": 2.059, + "step": 32675 + }, + { + "epoch": 9.95, + "learning_rate": 2.0100456621004568e-05, + "loss": 2.1054, + "step": 32700 + }, + { + "epoch": 9.96, + "learning_rate": 2.008523592085236e-05, + "loss": 2.1039, + "step": 32725 + }, + { + "epoch": 9.97, + "learning_rate": 2.007001522070015e-05, + "loss": 2.0244, + "step": 32750 + }, + { + "epoch": 9.98, + "learning_rate": 2.005479452054795e-05, + "loss": 2.0507, + "step": 32775 + }, + { + "epoch": 9.98, + "learning_rate": 2.0039573820395743e-05, + "loss": 2.0643, + "step": 32800 + }, + { + "epoch": 9.99, + "learning_rate": 2.0024353120243532e-05, + "loss": 2.0597, + "step": 32825 + }, + { + "epoch": 10.0, + "learning_rate": 2.0009132420091325e-05, + "loss": 2.0781, + "step": 32850 + }, + { + "epoch": 10.01, + "learning_rate": 1.9993911719939118e-05, + "loss": 2.039, + "step": 32875 + }, + { + "epoch": 10.02, + "learning_rate": 1.997869101978691e-05, + "loss": 2.0825, + "step": 32900 + }, + { + "epoch": 10.02, + "learning_rate": 1.9963470319634707e-05, + "loss": 2.0215, + "step": 32925 + }, + { + "epoch": 10.03, + "learning_rate": 1.9948249619482497e-05, + "loss": 1.9872, + "step": 32950 + }, + { + "epoch": 10.04, + "learning_rate": 1.993302891933029e-05, + "loss": 1.9938, + "step": 32975 + }, + { + "epoch": 10.05, + "learning_rate": 1.9917808219178082e-05, + "loss": 2.0155, + "step": 33000 + }, + { + "epoch": 10.05, + "learning_rate": 1.990258751902588e-05, + "loss": 1.9574, + "step": 33025 + }, + { + "epoch": 10.06, + "learning_rate": 1.988736681887367e-05, + "loss": 1.9415, + "step": 33050 + }, + { + "epoch": 10.07, + "learning_rate": 1.9872146118721464e-05, + "loss": 2.1311, + "step": 33075 + }, + { + "epoch": 10.08, + "learning_rate": 1.9856925418569254e-05, + "loss": 2.0027, + "step": 33100 + }, + { + "epoch": 10.08, + "learning_rate": 1.984170471841705e-05, + "loss": 2.0285, + "step": 33125 + }, + { + "epoch": 10.09, + "learning_rate": 1.9826484018264843e-05, + "loss": 2.0183, + "step": 33150 + }, + { + "epoch": 10.1, + "learning_rate": 1.9811263318112636e-05, + "loss": 2.0641, + "step": 33175 + }, + { + "epoch": 10.11, + "learning_rate": 1.979604261796043e-05, + "loss": 2.0903, + "step": 33200 + }, + { + "epoch": 10.11, + "learning_rate": 1.978082191780822e-05, + "loss": 1.9673, + "step": 33225 + }, + { + "epoch": 10.12, + "learning_rate": 1.9765601217656014e-05, + "loss": 2.0883, + "step": 33250 + }, + { + "epoch": 10.13, + "learning_rate": 1.9750380517503807e-05, + "loss": 2.0343, + "step": 33275 + }, + { + "epoch": 10.14, + "learning_rate": 1.97351598173516e-05, + "loss": 2.0388, + "step": 33300 + }, + { + "epoch": 10.14, + "learning_rate": 1.9719939117199393e-05, + "loss": 1.9403, + "step": 33325 + }, + { + "epoch": 10.15, + "learning_rate": 1.9704718417047185e-05, + "loss": 2.1293, + "step": 33350 + }, + { + "epoch": 10.16, + "learning_rate": 1.9689497716894978e-05, + "loss": 1.9008, + "step": 33375 + }, + { + "epoch": 10.17, + "learning_rate": 1.967427701674277e-05, + "loss": 2.0903, + "step": 33400 + }, + { + "epoch": 10.18, + "learning_rate": 1.9659056316590567e-05, + "loss": 2.1492, + "step": 33425 + }, + { + "epoch": 10.18, + "learning_rate": 1.9643835616438357e-05, + "loss": 2.0181, + "step": 33450 + }, + { + "epoch": 10.19, + "learning_rate": 1.962861491628615e-05, + "loss": 2.1131, + "step": 33475 + }, + { + "epoch": 10.2, + "learning_rate": 1.9613394216133942e-05, + "loss": 1.9928, + "step": 33500 + }, + { + "epoch": 10.21, + "learning_rate": 1.959817351598174e-05, + "loss": 2.091, + "step": 33525 + }, + { + "epoch": 10.21, + "learning_rate": 1.958295281582953e-05, + "loss": 2.0378, + "step": 33550 + }, + { + "epoch": 10.22, + "learning_rate": 1.956773211567732e-05, + "loss": 2.0189, + "step": 33575 + }, + { + "epoch": 10.23, + "learning_rate": 1.9552511415525114e-05, + "loss": 2.1016, + "step": 33600 + }, + { + "epoch": 10.24, + "learning_rate": 1.953729071537291e-05, + "loss": 2.0928, + "step": 33625 + }, + { + "epoch": 10.24, + "learning_rate": 1.9522070015220703e-05, + "loss": 2.1194, + "step": 33650 + }, + { + "epoch": 10.25, + "learning_rate": 1.9506849315068496e-05, + "loss": 2.0628, + "step": 33675 + }, + { + "epoch": 10.26, + "learning_rate": 1.949162861491629e-05, + "loss": 2.0574, + "step": 33700 + }, + { + "epoch": 10.27, + "learning_rate": 1.947640791476408e-05, + "loss": 1.9877, + "step": 33725 + }, + { + "epoch": 10.27, + "learning_rate": 1.9461187214611874e-05, + "loss": 1.959, + "step": 33750 + }, + { + "epoch": 10.28, + "learning_rate": 1.9445966514459667e-05, + "loss": 2.0333, + "step": 33775 + }, + { + "epoch": 10.29, + "learning_rate": 1.943074581430746e-05, + "loss": 2.0887, + "step": 33800 + }, + { + "epoch": 10.3, + "learning_rate": 1.9415525114155253e-05, + "loss": 2.0249, + "step": 33825 + }, + { + "epoch": 10.3, + "learning_rate": 1.9400304414003046e-05, + "loss": 2.0648, + "step": 33850 + }, + { + "epoch": 10.31, + "learning_rate": 1.938508371385084e-05, + "loss": 2.1138, + "step": 33875 + }, + { + "epoch": 10.32, + "learning_rate": 1.936986301369863e-05, + "loss": 2.0822, + "step": 33900 + }, + { + "epoch": 10.33, + "learning_rate": 1.9354642313546424e-05, + "loss": 2.1693, + "step": 33925 + }, + { + "epoch": 10.33, + "learning_rate": 1.9339421613394217e-05, + "loss": 2.0125, + "step": 33950 + }, + { + "epoch": 10.34, + "learning_rate": 1.932420091324201e-05, + "loss": 2.0564, + "step": 33975 + }, + { + "epoch": 10.35, + "learning_rate": 1.9308980213089803e-05, + "loss": 2.1638, + "step": 34000 + }, + { + "epoch": 10.36, + "learning_rate": 1.92937595129376e-05, + "loss": 2.0393, + "step": 34025 + }, + { + "epoch": 10.37, + "learning_rate": 1.9278538812785392e-05, + "loss": 2.0248, + "step": 34050 + }, + { + "epoch": 10.37, + "learning_rate": 1.926331811263318e-05, + "loss": 1.9676, + "step": 34075 + }, + { + "epoch": 10.38, + "learning_rate": 1.9248097412480974e-05, + "loss": 2.0428, + "step": 34100 + }, + { + "epoch": 10.39, + "learning_rate": 1.923287671232877e-05, + "loss": 2.0268, + "step": 34125 + }, + { + "epoch": 10.4, + "learning_rate": 1.9217656012176563e-05, + "loss": 2.1396, + "step": 34150 + }, + { + "epoch": 10.4, + "learning_rate": 1.9202435312024356e-05, + "loss": 2.0436, + "step": 34175 + }, + { + "epoch": 10.41, + "learning_rate": 1.9187214611872145e-05, + "loss": 2.1294, + "step": 34200 + }, + { + "epoch": 10.42, + "learning_rate": 1.917199391171994e-05, + "loss": 2.0051, + "step": 34225 + }, + { + "epoch": 10.43, + "learning_rate": 1.9156773211567734e-05, + "loss": 2.0874, + "step": 34250 + }, + { + "epoch": 10.43, + "learning_rate": 1.9141552511415527e-05, + "loss": 2.0328, + "step": 34275 + }, + { + "epoch": 10.44, + "learning_rate": 1.912633181126332e-05, + "loss": 2.0851, + "step": 34300 + }, + { + "epoch": 10.45, + "learning_rate": 1.9111111111111113e-05, + "loss": 2.051, + "step": 34325 + }, + { + "epoch": 10.46, + "learning_rate": 1.9095890410958906e-05, + "loss": 2.1141, + "step": 34350 + }, + { + "epoch": 10.46, + "learning_rate": 1.90806697108067e-05, + "loss": 2.0351, + "step": 34375 + }, + { + "epoch": 10.47, + "learning_rate": 1.906544901065449e-05, + "loss": 1.9976, + "step": 34400 + }, + { + "epoch": 10.48, + "learning_rate": 1.9050228310502284e-05, + "loss": 2.0309, + "step": 34425 + }, + { + "epoch": 10.49, + "learning_rate": 1.9035007610350077e-05, + "loss": 2.0315, + "step": 34450 + }, + { + "epoch": 10.49, + "learning_rate": 1.901978691019787e-05, + "loss": 2.1068, + "step": 34475 + }, + { + "epoch": 10.5, + "learning_rate": 1.9004566210045663e-05, + "loss": 1.9746, + "step": 34500 + }, + { + "epoch": 10.51, + "learning_rate": 1.898934550989346e-05, + "loss": 2.0455, + "step": 34525 + }, + { + "epoch": 10.52, + "learning_rate": 1.897412480974125e-05, + "loss": 2.1544, + "step": 34550 + }, + { + "epoch": 10.53, + "learning_rate": 1.895890410958904e-05, + "loss": 2.0103, + "step": 34575 + }, + { + "epoch": 10.53, + "learning_rate": 1.8943683409436834e-05, + "loss": 2.0323, + "step": 34600 + }, + { + "epoch": 10.54, + "learning_rate": 1.892846270928463e-05, + "loss": 2.138, + "step": 34625 + }, + { + "epoch": 10.55, + "learning_rate": 1.8913242009132423e-05, + "loss": 2.0743, + "step": 34650 + }, + { + "epoch": 10.56, + "learning_rate": 1.8898021308980216e-05, + "loss": 2.097, + "step": 34675 + }, + { + "epoch": 10.56, + "learning_rate": 1.8882800608828006e-05, + "loss": 2.1001, + "step": 34700 + }, + { + "epoch": 10.57, + "learning_rate": 1.8867579908675802e-05, + "loss": 2.1197, + "step": 34725 + }, + { + "epoch": 10.58, + "learning_rate": 1.8852359208523595e-05, + "loss": 2.0936, + "step": 34750 + }, + { + "epoch": 10.59, + "learning_rate": 1.8837138508371387e-05, + "loss": 2.0947, + "step": 34775 + }, + { + "epoch": 10.59, + "learning_rate": 1.882191780821918e-05, + "loss": 1.9545, + "step": 34800 + }, + { + "epoch": 10.6, + "learning_rate": 1.8806697108066973e-05, + "loss": 1.9313, + "step": 34825 + }, + { + "epoch": 10.61, + "learning_rate": 1.8791476407914766e-05, + "loss": 1.9543, + "step": 34850 + }, + { + "epoch": 10.62, + "learning_rate": 1.877625570776256e-05, + "loss": 1.8876, + "step": 34875 + }, + { + "epoch": 10.62, + "learning_rate": 1.876103500761035e-05, + "loss": 2.0014, + "step": 34900 + }, + { + "epoch": 10.63, + "learning_rate": 1.8745814307458145e-05, + "loss": 2.0244, + "step": 34925 + }, + { + "epoch": 10.64, + "learning_rate": 1.8730593607305937e-05, + "loss": 2.033, + "step": 34950 + }, + { + "epoch": 10.65, + "learning_rate": 1.871537290715373e-05, + "loss": 2.0249, + "step": 34975 + }, + { + "epoch": 10.65, + "learning_rate": 1.8700761035007613e-05, + "loss": 2.052, + "step": 35000 + }, + { + "epoch": 10.66, + "learning_rate": 1.8685540334855406e-05, + "loss": 1.9771, + "step": 35025 + }, + { + "epoch": 10.67, + "learning_rate": 1.86703196347032e-05, + "loss": 2.036, + "step": 35050 + }, + { + "epoch": 10.68, + "learning_rate": 1.865509893455099e-05, + "loss": 2.0739, + "step": 35075 + }, + { + "epoch": 10.68, + "learning_rate": 1.8639878234398784e-05, + "loss": 1.9556, + "step": 35100 + }, + { + "epoch": 10.69, + "learning_rate": 1.8624657534246577e-05, + "loss": 2.0215, + "step": 35125 + }, + { + "epoch": 10.7, + "learning_rate": 1.860943683409437e-05, + "loss": 1.9275, + "step": 35150 + }, + { + "epoch": 10.71, + "learning_rate": 1.8594216133942163e-05, + "loss": 2.0273, + "step": 35175 + }, + { + "epoch": 10.72, + "learning_rate": 1.8578995433789956e-05, + "loss": 2.0919, + "step": 35200 + }, + { + "epoch": 10.72, + "learning_rate": 1.856377473363775e-05, + "loss": 2.0538, + "step": 35225 + }, + { + "epoch": 10.73, + "learning_rate": 1.854855403348554e-05, + "loss": 2.0061, + "step": 35250 + }, + { + "epoch": 10.74, + "learning_rate": 1.8533333333333334e-05, + "loss": 2.0814, + "step": 35275 + }, + { + "epoch": 10.75, + "learning_rate": 1.851811263318113e-05, + "loss": 2.1047, + "step": 35300 + }, + { + "epoch": 10.75, + "learning_rate": 1.850289193302892e-05, + "loss": 1.9969, + "step": 35325 + }, + { + "epoch": 10.76, + "learning_rate": 1.8487671232876713e-05, + "loss": 2.0814, + "step": 35350 + }, + { + "epoch": 10.77, + "learning_rate": 1.8472450532724506e-05, + "loss": 1.9949, + "step": 35375 + }, + { + "epoch": 10.78, + "learning_rate": 1.8457229832572302e-05, + "loss": 2.037, + "step": 35400 + }, + { + "epoch": 10.78, + "learning_rate": 1.8442009132420095e-05, + "loss": 2.0312, + "step": 35425 + }, + { + "epoch": 10.79, + "learning_rate": 1.8426788432267884e-05, + "loss": 1.987, + "step": 35450 + }, + { + "epoch": 10.8, + "learning_rate": 1.8411567732115677e-05, + "loss": 2.1242, + "step": 35475 + }, + { + "epoch": 10.81, + "learning_rate": 1.839634703196347e-05, + "loss": 2.0262, + "step": 35500 + }, + { + "epoch": 10.81, + "learning_rate": 1.8381126331811266e-05, + "loss": 2.0393, + "step": 35525 + }, + { + "epoch": 10.82, + "learning_rate": 1.836590563165906e-05, + "loss": 2.0485, + "step": 35550 + }, + { + "epoch": 10.83, + "learning_rate": 1.8350684931506852e-05, + "loss": 2.0249, + "step": 35575 + }, + { + "epoch": 10.84, + "learning_rate": 1.833546423135464e-05, + "loss": 1.9595, + "step": 35600 + }, + { + "epoch": 10.84, + "learning_rate": 1.8320243531202437e-05, + "loss": 2.0546, + "step": 35625 + }, + { + "epoch": 10.85, + "learning_rate": 1.830502283105023e-05, + "loss": 2.0052, + "step": 35650 + }, + { + "epoch": 10.86, + "learning_rate": 1.8289802130898023e-05, + "loss": 2.0484, + "step": 35675 + }, + { + "epoch": 10.87, + "learning_rate": 1.8274581430745816e-05, + "loss": 2.0976, + "step": 35700 + }, + { + "epoch": 10.88, + "learning_rate": 1.825936073059361e-05, + "loss": 2.0822, + "step": 35725 + }, + { + "epoch": 10.88, + "learning_rate": 1.82441400304414e-05, + "loss": 2.0432, + "step": 35750 + }, + { + "epoch": 10.89, + "learning_rate": 1.8228919330289195e-05, + "loss": 1.9908, + "step": 35775 + }, + { + "epoch": 10.9, + "learning_rate": 1.8213698630136987e-05, + "loss": 2.1414, + "step": 35800 + }, + { + "epoch": 10.91, + "learning_rate": 1.819847792998478e-05, + "loss": 1.9891, + "step": 35825 + }, + { + "epoch": 10.91, + "learning_rate": 1.8183257229832573e-05, + "loss": 2.054, + "step": 35850 + }, + { + "epoch": 10.92, + "learning_rate": 1.8168036529680366e-05, + "loss": 2.1285, + "step": 35875 + }, + { + "epoch": 10.93, + "learning_rate": 1.815281582952816e-05, + "loss": 2.0289, + "step": 35900 + }, + { + "epoch": 10.94, + "learning_rate": 1.8137595129375955e-05, + "loss": 2.0027, + "step": 35925 + }, + { + "epoch": 10.94, + "learning_rate": 1.8122374429223744e-05, + "loss": 2.0613, + "step": 35950 + }, + { + "epoch": 10.95, + "learning_rate": 1.8107153729071537e-05, + "loss": 2.0867, + "step": 35975 + }, + { + "epoch": 10.96, + "learning_rate": 1.809193302891933e-05, + "loss": 2.0042, + "step": 36000 + }, + { + "epoch": 10.97, + "learning_rate": 1.8076712328767126e-05, + "loss": 1.9788, + "step": 36025 + }, + { + "epoch": 10.97, + "learning_rate": 1.806149162861492e-05, + "loss": 2.1272, + "step": 36050 + }, + { + "epoch": 10.98, + "learning_rate": 1.804627092846271e-05, + "loss": 2.1209, + "step": 36075 + }, + { + "epoch": 10.99, + "learning_rate": 1.80310502283105e-05, + "loss": 2.0478, + "step": 36100 + }, + { + "epoch": 11.0, + "learning_rate": 1.8015829528158298e-05, + "loss": 2.0026, + "step": 36125 + }, + { + "epoch": 11.0, + "learning_rate": 1.800060882800609e-05, + "loss": 1.9632, + "step": 36150 + }, + { + "epoch": 11.01, + "learning_rate": 1.7985388127853883e-05, + "loss": 1.9789, + "step": 36175 + }, + { + "epoch": 11.02, + "learning_rate": 1.7970167427701676e-05, + "loss": 2.006, + "step": 36200 + }, + { + "epoch": 11.03, + "learning_rate": 1.795494672754947e-05, + "loss": 1.9146, + "step": 36225 + }, + { + "epoch": 11.04, + "learning_rate": 1.7939726027397262e-05, + "loss": 2.0145, + "step": 36250 + }, + { + "epoch": 11.04, + "learning_rate": 1.7924505327245055e-05, + "loss": 1.9779, + "step": 36275 + }, + { + "epoch": 11.05, + "learning_rate": 1.7909284627092848e-05, + "loss": 2.0325, + "step": 36300 + }, + { + "epoch": 11.06, + "learning_rate": 1.789406392694064e-05, + "loss": 2.1302, + "step": 36325 + }, + { + "epoch": 11.07, + "learning_rate": 1.7878843226788433e-05, + "loss": 2.035, + "step": 36350 + }, + { + "epoch": 11.07, + "learning_rate": 1.7863622526636226e-05, + "loss": 2.0235, + "step": 36375 + }, + { + "epoch": 11.08, + "learning_rate": 1.784840182648402e-05, + "loss": 2.0755, + "step": 36400 + }, + { + "epoch": 11.09, + "learning_rate": 1.7833181126331812e-05, + "loss": 2.1273, + "step": 36425 + }, + { + "epoch": 11.1, + "learning_rate": 1.7817960426179605e-05, + "loss": 1.8809, + "step": 36450 + }, + { + "epoch": 11.1, + "learning_rate": 1.7802739726027397e-05, + "loss": 2.0696, + "step": 36475 + }, + { + "epoch": 11.11, + "learning_rate": 1.778751902587519e-05, + "loss": 2.0482, + "step": 36500 + }, + { + "epoch": 11.12, + "learning_rate": 1.7772298325722986e-05, + "loss": 1.9742, + "step": 36525 + }, + { + "epoch": 11.13, + "learning_rate": 1.775707762557078e-05, + "loss": 2.0024, + "step": 36550 + }, + { + "epoch": 11.13, + "learning_rate": 1.774185692541857e-05, + "loss": 2.13, + "step": 36575 + }, + { + "epoch": 11.14, + "learning_rate": 1.772663622526636e-05, + "loss": 2.055, + "step": 36600 + }, + { + "epoch": 11.15, + "learning_rate": 1.7711415525114158e-05, + "loss": 2.0081, + "step": 36625 + }, + { + "epoch": 11.16, + "learning_rate": 1.769619482496195e-05, + "loss": 2.083, + "step": 36650 + }, + { + "epoch": 11.16, + "learning_rate": 1.7680974124809744e-05, + "loss": 1.9954, + "step": 36675 + }, + { + "epoch": 11.17, + "learning_rate": 1.7665753424657533e-05, + "loss": 1.9231, + "step": 36700 + }, + { + "epoch": 11.18, + "learning_rate": 1.765053272450533e-05, + "loss": 2.0633, + "step": 36725 + }, + { + "epoch": 11.19, + "learning_rate": 1.7635312024353122e-05, + "loss": 2.0269, + "step": 36750 + }, + { + "epoch": 11.19, + "learning_rate": 1.7620091324200915e-05, + "loss": 2.0619, + "step": 36775 + }, + { + "epoch": 11.2, + "learning_rate": 1.7604870624048708e-05, + "loss": 2.1641, + "step": 36800 + }, + { + "epoch": 11.21, + "learning_rate": 1.75896499238965e-05, + "loss": 2.0819, + "step": 36825 + }, + { + "epoch": 11.22, + "learning_rate": 1.7574429223744293e-05, + "loss": 2.0132, + "step": 36850 + }, + { + "epoch": 11.23, + "learning_rate": 1.7559208523592086e-05, + "loss": 2.0833, + "step": 36875 + }, + { + "epoch": 11.23, + "learning_rate": 1.754398782343988e-05, + "loss": 1.9813, + "step": 36900 + }, + { + "epoch": 11.24, + "learning_rate": 1.7528767123287672e-05, + "loss": 1.9838, + "step": 36925 + }, + { + "epoch": 11.25, + "learning_rate": 1.7513546423135465e-05, + "loss": 1.9377, + "step": 36950 + }, + { + "epoch": 11.26, + "learning_rate": 1.7498325722983258e-05, + "loss": 2.0033, + "step": 36975 + }, + { + "epoch": 11.26, + "learning_rate": 1.748310502283105e-05, + "loss": 2.0323, + "step": 37000 + }, + { + "epoch": 11.27, + "learning_rate": 1.7467884322678847e-05, + "loss": 1.8743, + "step": 37025 + }, + { + "epoch": 11.28, + "learning_rate": 1.7452663622526636e-05, + "loss": 2.021, + "step": 37050 + }, + { + "epoch": 11.29, + "learning_rate": 1.743744292237443e-05, + "loss": 1.995, + "step": 37075 + }, + { + "epoch": 11.29, + "learning_rate": 1.7422222222222222e-05, + "loss": 2.0027, + "step": 37100 + }, + { + "epoch": 11.3, + "learning_rate": 1.7407001522070018e-05, + "loss": 1.9754, + "step": 37125 + }, + { + "epoch": 11.31, + "learning_rate": 1.739178082191781e-05, + "loss": 2.042, + "step": 37150 + }, + { + "epoch": 11.32, + "learning_rate": 1.7376560121765604e-05, + "loss": 1.9945, + "step": 37175 + }, + { + "epoch": 11.32, + "learning_rate": 1.7361339421613393e-05, + "loss": 1.9659, + "step": 37200 + }, + { + "epoch": 11.33, + "learning_rate": 1.734611872146119e-05, + "loss": 2.083, + "step": 37225 + }, + { + "epoch": 11.34, + "learning_rate": 1.7330898021308982e-05, + "loss": 1.9871, + "step": 37250 + }, + { + "epoch": 11.35, + "learning_rate": 1.7315677321156775e-05, + "loss": 1.9931, + "step": 37275 + }, + { + "epoch": 11.35, + "learning_rate": 1.7300456621004568e-05, + "loss": 2.0554, + "step": 37300 + }, + { + "epoch": 11.36, + "learning_rate": 1.728523592085236e-05, + "loss": 2.118, + "step": 37325 + }, + { + "epoch": 11.37, + "learning_rate": 1.7270015220700154e-05, + "loss": 2.0515, + "step": 37350 + }, + { + "epoch": 11.38, + "learning_rate": 1.7254794520547946e-05, + "loss": 2.0505, + "step": 37375 + }, + { + "epoch": 11.39, + "learning_rate": 1.723957382039574e-05, + "loss": 2.1095, + "step": 37400 + }, + { + "epoch": 11.39, + "learning_rate": 1.7224353120243532e-05, + "loss": 2.0882, + "step": 37425 + }, + { + "epoch": 11.4, + "learning_rate": 1.7209132420091325e-05, + "loss": 1.9593, + "step": 37450 + }, + { + "epoch": 11.41, + "learning_rate": 1.7193911719939118e-05, + "loss": 2.0295, + "step": 37475 + }, + { + "epoch": 11.42, + "learning_rate": 1.717869101978691e-05, + "loss": 2.0128, + "step": 37500 + }, + { + "epoch": 11.42, + "learning_rate": 1.7163470319634707e-05, + "loss": 2.0363, + "step": 37525 + }, + { + "epoch": 11.43, + "learning_rate": 1.7148249619482496e-05, + "loss": 2.0494, + "step": 37550 + }, + { + "epoch": 11.44, + "learning_rate": 1.713302891933029e-05, + "loss": 2.0278, + "step": 37575 + }, + { + "epoch": 11.45, + "learning_rate": 1.7117808219178082e-05, + "loss": 1.9859, + "step": 37600 + }, + { + "epoch": 11.45, + "learning_rate": 1.7102587519025878e-05, + "loss": 2.0568, + "step": 37625 + }, + { + "epoch": 11.46, + "learning_rate": 1.708736681887367e-05, + "loss": 1.9878, + "step": 37650 + }, + { + "epoch": 11.47, + "learning_rate": 1.707214611872146e-05, + "loss": 2.0664, + "step": 37675 + }, + { + "epoch": 11.48, + "learning_rate": 1.7056925418569253e-05, + "loss": 1.9954, + "step": 37700 + }, + { + "epoch": 11.48, + "learning_rate": 1.704170471841705e-05, + "loss": 1.9442, + "step": 37725 + }, + { + "epoch": 11.49, + "learning_rate": 1.7026484018264842e-05, + "loss": 1.9689, + "step": 37750 + }, + { + "epoch": 11.5, + "learning_rate": 1.7011263318112635e-05, + "loss": 1.9743, + "step": 37775 + }, + { + "epoch": 11.51, + "learning_rate": 1.6996042617960428e-05, + "loss": 2.0726, + "step": 37800 + }, + { + "epoch": 11.51, + "learning_rate": 1.698082191780822e-05, + "loss": 1.9607, + "step": 37825 + }, + { + "epoch": 11.52, + "learning_rate": 1.6965601217656014e-05, + "loss": 2.0583, + "step": 37850 + }, + { + "epoch": 11.53, + "learning_rate": 1.6950380517503807e-05, + "loss": 2.0146, + "step": 37875 + }, + { + "epoch": 11.54, + "learning_rate": 1.69351598173516e-05, + "loss": 2.01, + "step": 37900 + }, + { + "epoch": 11.54, + "learning_rate": 1.6919939117199392e-05, + "loss": 2.0699, + "step": 37925 + }, + { + "epoch": 11.55, + "learning_rate": 1.6904718417047185e-05, + "loss": 2.0734, + "step": 37950 + }, + { + "epoch": 11.56, + "learning_rate": 1.6889497716894978e-05, + "loss": 2.0048, + "step": 37975 + }, + { + "epoch": 11.57, + "learning_rate": 1.687427701674277e-05, + "loss": 1.9044, + "step": 38000 + }, + { + "epoch": 11.58, + "learning_rate": 1.6859056316590564e-05, + "loss": 2.0226, + "step": 38025 + }, + { + "epoch": 11.58, + "learning_rate": 1.6843835616438357e-05, + "loss": 1.9519, + "step": 38050 + }, + { + "epoch": 11.59, + "learning_rate": 1.682861491628615e-05, + "loss": 2.03, + "step": 38075 + }, + { + "epoch": 11.6, + "learning_rate": 1.6813394216133942e-05, + "loss": 2.0279, + "step": 38100 + }, + { + "epoch": 11.61, + "learning_rate": 1.679817351598174e-05, + "loss": 1.9242, + "step": 38125 + }, + { + "epoch": 11.61, + "learning_rate": 1.678295281582953e-05, + "loss": 2.0287, + "step": 38150 + }, + { + "epoch": 11.62, + "learning_rate": 1.676773211567732e-05, + "loss": 1.9837, + "step": 38175 + }, + { + "epoch": 11.63, + "learning_rate": 1.6752511415525114e-05, + "loss": 2.1662, + "step": 38200 + }, + { + "epoch": 11.64, + "learning_rate": 1.673729071537291e-05, + "loss": 2.0756, + "step": 38225 + }, + { + "epoch": 11.64, + "learning_rate": 1.6722070015220703e-05, + "loss": 2.0991, + "step": 38250 + }, + { + "epoch": 11.65, + "learning_rate": 1.6706849315068495e-05, + "loss": 2.0585, + "step": 38275 + }, + { + "epoch": 11.66, + "learning_rate": 1.6691628614916285e-05, + "loss": 2.0222, + "step": 38300 + }, + { + "epoch": 11.67, + "learning_rate": 1.667640791476408e-05, + "loss": 2.1262, + "step": 38325 + }, + { + "epoch": 11.67, + "learning_rate": 1.6661187214611874e-05, + "loss": 2.0557, + "step": 38350 + }, + { + "epoch": 11.68, + "learning_rate": 1.6645966514459667e-05, + "loss": 2.0179, + "step": 38375 + }, + { + "epoch": 11.69, + "learning_rate": 1.663074581430746e-05, + "loss": 2.0048, + "step": 38400 + }, + { + "epoch": 11.7, + "learning_rate": 1.6615525114155252e-05, + "loss": 1.9947, + "step": 38425 + }, + { + "epoch": 11.7, + "learning_rate": 1.6600304414003045e-05, + "loss": 1.9683, + "step": 38450 + }, + { + "epoch": 11.71, + "learning_rate": 1.6585083713850838e-05, + "loss": 1.9387, + "step": 38475 + }, + { + "epoch": 11.72, + "learning_rate": 1.656986301369863e-05, + "loss": 2.0681, + "step": 38500 + }, + { + "epoch": 11.73, + "learning_rate": 1.6554642313546424e-05, + "loss": 2.0681, + "step": 38525 + }, + { + "epoch": 11.74, + "learning_rate": 1.6539421613394217e-05, + "loss": 2.0351, + "step": 38550 + }, + { + "epoch": 11.74, + "learning_rate": 1.652420091324201e-05, + "loss": 2.0481, + "step": 38575 + }, + { + "epoch": 11.75, + "learning_rate": 1.6508980213089802e-05, + "loss": 1.9093, + "step": 38600 + }, + { + "epoch": 11.76, + "learning_rate": 1.64937595129376e-05, + "loss": 1.9843, + "step": 38625 + }, + { + "epoch": 11.77, + "learning_rate": 1.6478538812785388e-05, + "loss": 1.958, + "step": 38650 + }, + { + "epoch": 11.77, + "learning_rate": 1.646331811263318e-05, + "loss": 2.0893, + "step": 38675 + }, + { + "epoch": 11.78, + "learning_rate": 1.6448097412480974e-05, + "loss": 1.9919, + "step": 38700 + }, + { + "epoch": 11.79, + "learning_rate": 1.643287671232877e-05, + "loss": 1.906, + "step": 38725 + }, + { + "epoch": 11.8, + "learning_rate": 1.6417656012176563e-05, + "loss": 2.062, + "step": 38750 + }, + { + "epoch": 11.8, + "learning_rate": 1.6402435312024356e-05, + "loss": 2.1367, + "step": 38775 + }, + { + "epoch": 11.81, + "learning_rate": 1.6387214611872145e-05, + "loss": 2.022, + "step": 38800 + }, + { + "epoch": 11.82, + "learning_rate": 1.637199391171994e-05, + "loss": 1.9896, + "step": 38825 + }, + { + "epoch": 11.83, + "learning_rate": 1.6356773211567734e-05, + "loss": 1.9913, + "step": 38850 + }, + { + "epoch": 11.83, + "learning_rate": 1.6341552511415527e-05, + "loss": 2.0554, + "step": 38875 + }, + { + "epoch": 11.84, + "learning_rate": 1.632633181126332e-05, + "loss": 2.0311, + "step": 38900 + }, + { + "epoch": 11.85, + "learning_rate": 1.6311111111111113e-05, + "loss": 2.0395, + "step": 38925 + }, + { + "epoch": 11.86, + "learning_rate": 1.6295890410958906e-05, + "loss": 2.0641, + "step": 38950 + }, + { + "epoch": 11.86, + "learning_rate": 1.62806697108067e-05, + "loss": 1.9859, + "step": 38975 + }, + { + "epoch": 11.87, + "learning_rate": 1.626544901065449e-05, + "loss": 1.9978, + "step": 39000 + }, + { + "epoch": 11.88, + "learning_rate": 1.6250228310502284e-05, + "loss": 2.042, + "step": 39025 + }, + { + "epoch": 11.89, + "learning_rate": 1.6235007610350077e-05, + "loss": 1.9892, + "step": 39050 + }, + { + "epoch": 11.89, + "learning_rate": 1.621978691019787e-05, + "loss": 2.0111, + "step": 39075 + }, + { + "epoch": 11.9, + "learning_rate": 1.6204566210045663e-05, + "loss": 2.0407, + "step": 39100 + }, + { + "epoch": 11.91, + "learning_rate": 1.618934550989346e-05, + "loss": 2.0049, + "step": 39125 + }, + { + "epoch": 11.92, + "learning_rate": 1.6174124809741248e-05, + "loss": 1.9931, + "step": 39150 + }, + { + "epoch": 11.93, + "learning_rate": 1.615890410958904e-05, + "loss": 2.0479, + "step": 39175 + }, + { + "epoch": 11.93, + "learning_rate": 1.6143683409436834e-05, + "loss": 2.1207, + "step": 39200 + }, + { + "epoch": 11.94, + "learning_rate": 1.612846270928463e-05, + "loss": 2.0321, + "step": 39225 + }, + { + "epoch": 11.95, + "learning_rate": 1.6113242009132423e-05, + "loss": 2.0861, + "step": 39250 + }, + { + "epoch": 11.96, + "learning_rate": 1.6098021308980212e-05, + "loss": 2.0689, + "step": 39275 + }, + { + "epoch": 11.96, + "learning_rate": 1.6082800608828005e-05, + "loss": 2.0498, + "step": 39300 + }, + { + "epoch": 11.97, + "learning_rate": 1.60675799086758e-05, + "loss": 2.0761, + "step": 39325 + }, + { + "epoch": 11.98, + "learning_rate": 1.6052359208523594e-05, + "loss": 1.9528, + "step": 39350 + }, + { + "epoch": 11.99, + "learning_rate": 1.6037138508371387e-05, + "loss": 2.0626, + "step": 39375 + }, + { + "epoch": 11.99, + "learning_rate": 1.602191780821918e-05, + "loss": 2.0268, + "step": 39400 + }, + { + "epoch": 12.0, + "learning_rate": 1.6006697108066973e-05, + "loss": 1.9873, + "step": 39425 + }, + { + "epoch": 12.01, + "learning_rate": 1.5991476407914766e-05, + "loss": 1.9849, + "step": 39450 + }, + { + "epoch": 12.02, + "learning_rate": 1.597625570776256e-05, + "loss": 1.9636, + "step": 39475 + }, + { + "epoch": 12.02, + "learning_rate": 1.596103500761035e-05, + "loss": 2.0264, + "step": 39500 + }, + { + "epoch": 12.03, + "learning_rate": 1.5945814307458144e-05, + "loss": 2.0667, + "step": 39525 + }, + { + "epoch": 12.04, + "learning_rate": 1.5930593607305937e-05, + "loss": 2.0723, + "step": 39550 + }, + { + "epoch": 12.05, + "learning_rate": 1.591537290715373e-05, + "loss": 1.923, + "step": 39575 + }, + { + "epoch": 12.05, + "learning_rate": 1.5900152207001523e-05, + "loss": 2.0104, + "step": 39600 + }, + { + "epoch": 12.06, + "learning_rate": 1.5884931506849316e-05, + "loss": 1.9761, + "step": 39625 + }, + { + "epoch": 12.07, + "learning_rate": 1.586971080669711e-05, + "loss": 1.9907, + "step": 39650 + }, + { + "epoch": 12.08, + "learning_rate": 1.58544901065449e-05, + "loss": 1.988, + "step": 39675 + }, + { + "epoch": 12.09, + "learning_rate": 1.5839878234398784e-05, + "loss": 1.8632, + "step": 39700 + }, + { + "epoch": 12.09, + "learning_rate": 1.5824657534246577e-05, + "loss": 2.0041, + "step": 39725 + }, + { + "epoch": 12.1, + "learning_rate": 1.580943683409437e-05, + "loss": 1.9654, + "step": 39750 + }, + { + "epoch": 12.11, + "learning_rate": 1.5794216133942163e-05, + "loss": 2.0408, + "step": 39775 + }, + { + "epoch": 12.12, + "learning_rate": 1.5778995433789956e-05, + "loss": 1.991, + "step": 39800 + }, + { + "epoch": 12.12, + "learning_rate": 1.576377473363775e-05, + "loss": 2.0496, + "step": 39825 + }, + { + "epoch": 12.13, + "learning_rate": 1.574855403348554e-05, + "loss": 1.9504, + "step": 39850 + }, + { + "epoch": 12.14, + "learning_rate": 1.5733333333333334e-05, + "loss": 1.982, + "step": 39875 + }, + { + "epoch": 12.15, + "learning_rate": 1.5718112633181127e-05, + "loss": 1.9645, + "step": 39900 + }, + { + "epoch": 12.15, + "learning_rate": 1.570289193302892e-05, + "loss": 1.9775, + "step": 39925 + }, + { + "epoch": 12.16, + "learning_rate": 1.5687671232876713e-05, + "loss": 2.0309, + "step": 39950 + }, + { + "epoch": 12.17, + "learning_rate": 1.5672450532724505e-05, + "loss": 1.94, + "step": 39975 + }, + { + "epoch": 12.18, + "learning_rate": 1.56572298325723e-05, + "loss": 1.9667, + "step": 40000 + }, + { + "epoch": 12.18, + "learning_rate": 1.5642009132420094e-05, + "loss": 1.9854, + "step": 40025 + }, + { + "epoch": 12.19, + "learning_rate": 1.5626788432267884e-05, + "loss": 2.031, + "step": 40050 + }, + { + "epoch": 12.2, + "learning_rate": 1.5611567732115677e-05, + "loss": 2.0135, + "step": 40075 + }, + { + "epoch": 12.21, + "learning_rate": 1.5596347031963473e-05, + "loss": 2.0855, + "step": 40100 + }, + { + "epoch": 12.21, + "learning_rate": 1.5581126331811266e-05, + "loss": 1.8975, + "step": 40125 + }, + { + "epoch": 12.22, + "learning_rate": 1.556590563165906e-05, + "loss": 2.0706, + "step": 40150 + }, + { + "epoch": 12.23, + "learning_rate": 1.5550684931506848e-05, + "loss": 2.043, + "step": 40175 + }, + { + "epoch": 12.24, + "learning_rate": 1.5535464231354644e-05, + "loss": 1.9185, + "step": 40200 + }, + { + "epoch": 12.25, + "learning_rate": 1.5520243531202437e-05, + "loss": 2.03, + "step": 40225 + }, + { + "epoch": 12.25, + "learning_rate": 1.5505631659056317e-05, + "loss": 2.0385, + "step": 40250 + }, + { + "epoch": 12.26, + "learning_rate": 1.5490410958904113e-05, + "loss": 2.0019, + "step": 40275 + }, + { + "epoch": 12.27, + "learning_rate": 1.5475190258751906e-05, + "loss": 1.9028, + "step": 40300 + }, + { + "epoch": 12.28, + "learning_rate": 1.5459969558599695e-05, + "loss": 2.0015, + "step": 40325 + }, + { + "epoch": 12.28, + "learning_rate": 1.5444748858447488e-05, + "loss": 1.94, + "step": 40350 + }, + { + "epoch": 12.29, + "learning_rate": 1.5429528158295284e-05, + "loss": 2.0226, + "step": 40375 + }, + { + "epoch": 12.3, + "learning_rate": 1.5414307458143077e-05, + "loss": 2.0047, + "step": 40400 + }, + { + "epoch": 12.31, + "learning_rate": 1.539908675799087e-05, + "loss": 2.0061, + "step": 40425 + }, + { + "epoch": 12.31, + "learning_rate": 1.5383866057838663e-05, + "loss": 2.0525, + "step": 40450 + }, + { + "epoch": 12.32, + "learning_rate": 1.5368645357686456e-05, + "loss": 1.9416, + "step": 40475 + }, + { + "epoch": 12.33, + "learning_rate": 1.535342465753425e-05, + "loss": 1.8657, + "step": 40500 + }, + { + "epoch": 12.34, + "learning_rate": 1.533820395738204e-05, + "loss": 1.9785, + "step": 40525 + }, + { + "epoch": 12.34, + "learning_rate": 1.5322983257229834e-05, + "loss": 1.9967, + "step": 40550 + }, + { + "epoch": 12.35, + "learning_rate": 1.5307762557077627e-05, + "loss": 2.1364, + "step": 40575 + }, + { + "epoch": 12.36, + "learning_rate": 1.529254185692542e-05, + "loss": 2.0049, + "step": 40600 + }, + { + "epoch": 12.37, + "learning_rate": 1.5277321156773213e-05, + "loss": 1.9425, + "step": 40625 + }, + { + "epoch": 12.37, + "learning_rate": 1.5262100456621006e-05, + "loss": 1.9966, + "step": 40650 + }, + { + "epoch": 12.38, + "learning_rate": 1.52468797564688e-05, + "loss": 2.0006, + "step": 40675 + }, + { + "epoch": 12.39, + "learning_rate": 1.5231659056316593e-05, + "loss": 2.0976, + "step": 40700 + }, + { + "epoch": 12.4, + "learning_rate": 1.5216438356164384e-05, + "loss": 2.0786, + "step": 40725 + }, + { + "epoch": 12.4, + "learning_rate": 1.5201217656012177e-05, + "loss": 2.013, + "step": 40750 + }, + { + "epoch": 12.41, + "learning_rate": 1.5185996955859971e-05, + "loss": 2.027, + "step": 40775 + }, + { + "epoch": 12.42, + "learning_rate": 1.5170776255707764e-05, + "loss": 2.0121, + "step": 40800 + }, + { + "epoch": 12.43, + "learning_rate": 1.5155555555555557e-05, + "loss": 2.0365, + "step": 40825 + }, + { + "epoch": 12.44, + "learning_rate": 1.514033485540335e-05, + "loss": 1.9879, + "step": 40850 + }, + { + "epoch": 12.44, + "learning_rate": 1.5125114155251144e-05, + "loss": 1.9805, + "step": 40875 + }, + { + "epoch": 12.45, + "learning_rate": 1.5109893455098936e-05, + "loss": 1.9679, + "step": 40900 + }, + { + "epoch": 12.46, + "learning_rate": 1.5094672754946728e-05, + "loss": 2.0051, + "step": 40925 + }, + { + "epoch": 12.47, + "learning_rate": 1.5079452054794521e-05, + "loss": 1.9057, + "step": 40950 + }, + { + "epoch": 12.47, + "learning_rate": 1.5064231354642316e-05, + "loss": 2.0421, + "step": 40975 + }, + { + "epoch": 12.48, + "learning_rate": 1.5049010654490109e-05, + "loss": 1.9747, + "step": 41000 + }, + { + "epoch": 12.49, + "learning_rate": 1.5033789954337901e-05, + "loss": 1.9291, + "step": 41025 + }, + { + "epoch": 12.5, + "learning_rate": 1.5018569254185693e-05, + "loss": 1.9702, + "step": 41050 + }, + { + "epoch": 12.5, + "learning_rate": 1.5003348554033487e-05, + "loss": 1.9764, + "step": 41075 + }, + { + "epoch": 12.51, + "learning_rate": 1.498812785388128e-05, + "loss": 2.0485, + "step": 41100 + }, + { + "epoch": 12.52, + "learning_rate": 1.4972907153729073e-05, + "loss": 2.0413, + "step": 41125 + }, + { + "epoch": 12.53, + "learning_rate": 1.4957686453576866e-05, + "loss": 1.9439, + "step": 41150 + }, + { + "epoch": 12.53, + "learning_rate": 1.494246575342466e-05, + "loss": 1.9991, + "step": 41175 + }, + { + "epoch": 12.54, + "learning_rate": 1.4927245053272451e-05, + "loss": 1.9862, + "step": 41200 + }, + { + "epoch": 12.55, + "learning_rate": 1.4912024353120244e-05, + "loss": 2.0129, + "step": 41225 + }, + { + "epoch": 12.56, + "learning_rate": 1.4896803652968037e-05, + "loss": 2.0102, + "step": 41250 + }, + { + "epoch": 12.56, + "learning_rate": 1.4881582952815832e-05, + "loss": 1.9498, + "step": 41275 + }, + { + "epoch": 12.57, + "learning_rate": 1.4866362252663624e-05, + "loss": 2.0143, + "step": 41300 + }, + { + "epoch": 12.58, + "learning_rate": 1.4851141552511417e-05, + "loss": 2.1354, + "step": 41325 + }, + { + "epoch": 12.59, + "learning_rate": 1.4835920852359208e-05, + "loss": 1.9088, + "step": 41350 + }, + { + "epoch": 12.6, + "learning_rate": 1.4820700152207003e-05, + "loss": 1.9757, + "step": 41375 + }, + { + "epoch": 12.6, + "learning_rate": 1.4805479452054796e-05, + "loss": 2.0642, + "step": 41400 + }, + { + "epoch": 12.61, + "learning_rate": 1.4790258751902589e-05, + "loss": 1.9588, + "step": 41425 + }, + { + "epoch": 12.62, + "learning_rate": 1.4775038051750381e-05, + "loss": 2.0616, + "step": 41450 + }, + { + "epoch": 12.63, + "learning_rate": 1.4759817351598176e-05, + "loss": 2.0629, + "step": 41475 + }, + { + "epoch": 12.63, + "learning_rate": 1.4744596651445969e-05, + "loss": 2.0032, + "step": 41500 + }, + { + "epoch": 12.64, + "learning_rate": 1.472937595129376e-05, + "loss": 2.0258, + "step": 41525 + }, + { + "epoch": 12.65, + "learning_rate": 1.4714155251141553e-05, + "loss": 2.0051, + "step": 41550 + }, + { + "epoch": 12.66, + "learning_rate": 1.4698934550989347e-05, + "loss": 1.9466, + "step": 41575 + }, + { + "epoch": 12.66, + "learning_rate": 1.468371385083714e-05, + "loss": 1.9509, + "step": 41600 + }, + { + "epoch": 12.67, + "learning_rate": 1.4668493150684933e-05, + "loss": 1.9443, + "step": 41625 + }, + { + "epoch": 12.68, + "learning_rate": 1.4653272450532726e-05, + "loss": 2.09, + "step": 41650 + }, + { + "epoch": 12.69, + "learning_rate": 1.463805175038052e-05, + "loss": 1.9646, + "step": 41675 + }, + { + "epoch": 12.69, + "learning_rate": 1.4622831050228312e-05, + "loss": 1.9612, + "step": 41700 + }, + { + "epoch": 12.7, + "learning_rate": 1.4607610350076104e-05, + "loss": 2.0115, + "step": 41725 + }, + { + "epoch": 12.71, + "learning_rate": 1.4592389649923897e-05, + "loss": 1.9786, + "step": 41750 + }, + { + "epoch": 12.72, + "learning_rate": 1.4577168949771692e-05, + "loss": 1.9919, + "step": 41775 + }, + { + "epoch": 12.72, + "learning_rate": 1.4561948249619485e-05, + "loss": 1.9885, + "step": 41800 + }, + { + "epoch": 12.73, + "learning_rate": 1.4546727549467277e-05, + "loss": 2.0261, + "step": 41825 + }, + { + "epoch": 12.74, + "learning_rate": 1.4531506849315069e-05, + "loss": 2.0076, + "step": 41850 + }, + { + "epoch": 12.75, + "learning_rate": 1.4516286149162863e-05, + "loss": 2.1131, + "step": 41875 + }, + { + "epoch": 12.75, + "learning_rate": 1.4501065449010656e-05, + "loss": 1.9584, + "step": 41900 + }, + { + "epoch": 12.76, + "learning_rate": 1.4485844748858449e-05, + "loss": 2.0659, + "step": 41925 + }, + { + "epoch": 12.77, + "learning_rate": 1.4470624048706242e-05, + "loss": 1.9934, + "step": 41950 + }, + { + "epoch": 12.78, + "learning_rate": 1.4455403348554036e-05, + "loss": 2.0764, + "step": 41975 + }, + { + "epoch": 12.79, + "learning_rate": 1.4440182648401827e-05, + "loss": 1.9509, + "step": 42000 + }, + { + "epoch": 12.79, + "learning_rate": 1.442496194824962e-05, + "loss": 1.952, + "step": 42025 + }, + { + "epoch": 12.8, + "learning_rate": 1.4409741248097413e-05, + "loss": 1.9733, + "step": 42050 + }, + { + "epoch": 12.81, + "learning_rate": 1.4394520547945208e-05, + "loss": 2.055, + "step": 42075 + }, + { + "epoch": 12.82, + "learning_rate": 1.4379299847793e-05, + "loss": 2.0173, + "step": 42100 + }, + { + "epoch": 12.82, + "learning_rate": 1.4364079147640793e-05, + "loss": 2.0792, + "step": 42125 + }, + { + "epoch": 12.83, + "learning_rate": 1.4348858447488584e-05, + "loss": 2.0051, + "step": 42150 + }, + { + "epoch": 12.84, + "learning_rate": 1.4334246575342467e-05, + "loss": 2.1026, + "step": 42175 + }, + { + "epoch": 12.85, + "learning_rate": 1.431902587519026e-05, + "loss": 1.9983, + "step": 42200 + }, + { + "epoch": 12.85, + "learning_rate": 1.4303805175038053e-05, + "loss": 2.0111, + "step": 42225 + }, + { + "epoch": 12.86, + "learning_rate": 1.4288584474885844e-05, + "loss": 1.9746, + "step": 42250 + }, + { + "epoch": 12.87, + "learning_rate": 1.427336377473364e-05, + "loss": 1.9585, + "step": 42275 + }, + { + "epoch": 12.88, + "learning_rate": 1.4258143074581431e-05, + "loss": 1.9328, + "step": 42300 + }, + { + "epoch": 12.88, + "learning_rate": 1.4242922374429224e-05, + "loss": 2.0076, + "step": 42325 + }, + { + "epoch": 12.89, + "learning_rate": 1.4227701674277017e-05, + "loss": 1.9502, + "step": 42350 + }, + { + "epoch": 12.9, + "learning_rate": 1.4212480974124812e-05, + "loss": 2.0057, + "step": 42375 + }, + { + "epoch": 12.91, + "learning_rate": 1.4197260273972605e-05, + "loss": 2.0274, + "step": 42400 + }, + { + "epoch": 12.91, + "learning_rate": 1.4182039573820396e-05, + "loss": 2.0714, + "step": 42425 + }, + { + "epoch": 12.92, + "learning_rate": 1.4166818873668189e-05, + "loss": 2.0538, + "step": 42450 + }, + { + "epoch": 12.93, + "learning_rate": 1.4151598173515983e-05, + "loss": 2.0079, + "step": 42475 + }, + { + "epoch": 12.94, + "learning_rate": 1.4136377473363776e-05, + "loss": 2.012, + "step": 42500 + }, + { + "epoch": 12.95, + "learning_rate": 1.4121156773211569e-05, + "loss": 2.0648, + "step": 42525 + }, + { + "epoch": 12.95, + "learning_rate": 1.4105936073059362e-05, + "loss": 1.9938, + "step": 42550 + }, + { + "epoch": 12.96, + "learning_rate": 1.4090715372907156e-05, + "loss": 2.0337, + "step": 42575 + }, + { + "epoch": 12.97, + "learning_rate": 1.4075494672754947e-05, + "loss": 1.9989, + "step": 42600 + }, + { + "epoch": 12.98, + "learning_rate": 1.406027397260274e-05, + "loss": 2.0122, + "step": 42625 + }, + { + "epoch": 12.98, + "learning_rate": 1.4045053272450533e-05, + "loss": 2.0362, + "step": 42650 + }, + { + "epoch": 12.99, + "learning_rate": 1.4029832572298327e-05, + "loss": 2.0769, + "step": 42675 + }, + { + "epoch": 13.0, + "learning_rate": 1.401461187214612e-05, + "loss": 1.9409, + "step": 42700 + }, + { + "epoch": 13.01, + "learning_rate": 1.3999391171993913e-05, + "loss": 2.0088, + "step": 42725 + }, + { + "epoch": 13.01, + "learning_rate": 1.3984170471841704e-05, + "loss": 1.9523, + "step": 42750 + }, + { + "epoch": 13.02, + "learning_rate": 1.3968949771689499e-05, + "loss": 2.0154, + "step": 42775 + }, + { + "epoch": 13.03, + "learning_rate": 1.3953729071537292e-05, + "loss": 1.8923, + "step": 42800 + }, + { + "epoch": 13.04, + "learning_rate": 1.3938508371385084e-05, + "loss": 1.9403, + "step": 42825 + }, + { + "epoch": 13.04, + "learning_rate": 1.3923287671232877e-05, + "loss": 1.9734, + "step": 42850 + }, + { + "epoch": 13.05, + "learning_rate": 1.3908066971080672e-05, + "loss": 1.9603, + "step": 42875 + }, + { + "epoch": 13.06, + "learning_rate": 1.3892846270928465e-05, + "loss": 2.0133, + "step": 42900 + }, + { + "epoch": 13.07, + "learning_rate": 1.3877625570776256e-05, + "loss": 1.9425, + "step": 42925 + }, + { + "epoch": 13.07, + "learning_rate": 1.3862404870624049e-05, + "loss": 2.0419, + "step": 42950 + }, + { + "epoch": 13.08, + "learning_rate": 1.3847184170471843e-05, + "loss": 2.0259, + "step": 42975 + }, + { + "epoch": 13.09, + "learning_rate": 1.3831963470319636e-05, + "loss": 2.0865, + "step": 43000 + }, + { + "epoch": 13.1, + "learning_rate": 1.3816742770167429e-05, + "loss": 1.939, + "step": 43025 + }, + { + "epoch": 13.11, + "learning_rate": 1.380152207001522e-05, + "loss": 1.9798, + "step": 43050 + }, + { + "epoch": 13.11, + "learning_rate": 1.3786301369863016e-05, + "loss": 1.9552, + "step": 43075 + }, + { + "epoch": 13.12, + "learning_rate": 1.3771080669710807e-05, + "loss": 2.015, + "step": 43100 + }, + { + "epoch": 13.13, + "learning_rate": 1.37558599695586e-05, + "loss": 2.0089, + "step": 43125 + }, + { + "epoch": 13.14, + "learning_rate": 1.3740639269406393e-05, + "loss": 2.0603, + "step": 43150 + }, + { + "epoch": 13.14, + "learning_rate": 1.3725418569254188e-05, + "loss": 2.0267, + "step": 43175 + }, + { + "epoch": 13.15, + "learning_rate": 1.371019786910198e-05, + "loss": 1.9773, + "step": 43200 + }, + { + "epoch": 13.16, + "learning_rate": 1.3694977168949772e-05, + "loss": 2.0409, + "step": 43225 + }, + { + "epoch": 13.17, + "learning_rate": 1.3679756468797564e-05, + "loss": 2.0045, + "step": 43250 + }, + { + "epoch": 13.17, + "learning_rate": 1.3664535768645359e-05, + "loss": 1.9808, + "step": 43275 + }, + { + "epoch": 13.18, + "learning_rate": 1.3649315068493152e-05, + "loss": 1.9854, + "step": 43300 + }, + { + "epoch": 13.19, + "learning_rate": 1.3634094368340945e-05, + "loss": 2.0598, + "step": 43325 + }, + { + "epoch": 13.2, + "learning_rate": 1.3618873668188738e-05, + "loss": 1.9223, + "step": 43350 + }, + { + "epoch": 13.2, + "learning_rate": 1.3603652968036532e-05, + "loss": 1.917, + "step": 43375 + }, + { + "epoch": 13.21, + "learning_rate": 1.3588432267884323e-05, + "loss": 2.0412, + "step": 43400 + }, + { + "epoch": 13.22, + "learning_rate": 1.3573211567732116e-05, + "loss": 1.9624, + "step": 43425 + }, + { + "epoch": 13.23, + "learning_rate": 1.3557990867579909e-05, + "loss": 2.0322, + "step": 43450 + }, + { + "epoch": 13.23, + "learning_rate": 1.3543378995433792e-05, + "loss": 1.9836, + "step": 43475 + }, + { + "epoch": 13.24, + "learning_rate": 1.3528158295281583e-05, + "loss": 2.0068, + "step": 43500 + }, + { + "epoch": 13.25, + "learning_rate": 1.3512937595129376e-05, + "loss": 2.0092, + "step": 43525 + }, + { + "epoch": 13.26, + "learning_rate": 1.349771689497717e-05, + "loss": 2.0502, + "step": 43550 + }, + { + "epoch": 13.26, + "learning_rate": 1.3482496194824963e-05, + "loss": 1.9953, + "step": 43575 + }, + { + "epoch": 13.27, + "learning_rate": 1.3467275494672756e-05, + "loss": 1.9734, + "step": 43600 + }, + { + "epoch": 13.28, + "learning_rate": 1.3452054794520549e-05, + "loss": 1.9861, + "step": 43625 + }, + { + "epoch": 13.29, + "learning_rate": 1.3436834094368343e-05, + "loss": 1.9144, + "step": 43650 + }, + { + "epoch": 13.3, + "learning_rate": 1.3421613394216134e-05, + "loss": 1.8742, + "step": 43675 + }, + { + "epoch": 13.3, + "learning_rate": 1.3406392694063927e-05, + "loss": 2.1363, + "step": 43700 + }, + { + "epoch": 13.31, + "learning_rate": 1.339117199391172e-05, + "loss": 1.9555, + "step": 43725 + }, + { + "epoch": 13.32, + "learning_rate": 1.3375951293759515e-05, + "loss": 1.8268, + "step": 43750 + }, + { + "epoch": 13.33, + "learning_rate": 1.3360730593607308e-05, + "loss": 1.9572, + "step": 43775 + }, + { + "epoch": 13.33, + "learning_rate": 1.33455098934551e-05, + "loss": 2.0172, + "step": 43800 + }, + { + "epoch": 13.34, + "learning_rate": 1.3330289193302892e-05, + "loss": 1.9264, + "step": 43825 + }, + { + "epoch": 13.35, + "learning_rate": 1.3315068493150686e-05, + "loss": 1.9078, + "step": 43850 + }, + { + "epoch": 13.36, + "learning_rate": 1.3299847792998479e-05, + "loss": 1.9868, + "step": 43875 + }, + { + "epoch": 13.36, + "learning_rate": 1.3284627092846272e-05, + "loss": 2.0821, + "step": 43900 + }, + { + "epoch": 13.37, + "learning_rate": 1.3269406392694065e-05, + "loss": 2.0153, + "step": 43925 + }, + { + "epoch": 13.38, + "learning_rate": 1.3254185692541859e-05, + "loss": 1.9711, + "step": 43950 + }, + { + "epoch": 13.39, + "learning_rate": 1.3238964992389652e-05, + "loss": 1.9948, + "step": 43975 + }, + { + "epoch": 13.39, + "learning_rate": 1.3223744292237443e-05, + "loss": 2.0175, + "step": 44000 + }, + { + "epoch": 13.4, + "learning_rate": 1.3208523592085236e-05, + "loss": 2.0479, + "step": 44025 + }, + { + "epoch": 13.41, + "learning_rate": 1.319330289193303e-05, + "loss": 1.97, + "step": 44050 + }, + { + "epoch": 13.42, + "learning_rate": 1.3178082191780823e-05, + "loss": 2.1167, + "step": 44075 + }, + { + "epoch": 13.42, + "learning_rate": 1.3162861491628616e-05, + "loss": 1.9078, + "step": 44100 + }, + { + "epoch": 13.43, + "learning_rate": 1.3147640791476409e-05, + "loss": 1.9969, + "step": 44125 + }, + { + "epoch": 13.44, + "learning_rate": 1.3132420091324204e-05, + "loss": 2.076, + "step": 44150 + }, + { + "epoch": 13.45, + "learning_rate": 1.3117199391171995e-05, + "loss": 2.0344, + "step": 44175 + }, + { + "epoch": 13.46, + "learning_rate": 1.3101978691019788e-05, + "loss": 1.9706, + "step": 44200 + }, + { + "epoch": 13.46, + "learning_rate": 1.308675799086758e-05, + "loss": 1.9537, + "step": 44225 + }, + { + "epoch": 13.47, + "learning_rate": 1.3071537290715375e-05, + "loss": 1.9367, + "step": 44250 + }, + { + "epoch": 13.48, + "learning_rate": 1.3056316590563168e-05, + "loss": 1.9292, + "step": 44275 + }, + { + "epoch": 13.49, + "learning_rate": 1.3041095890410959e-05, + "loss": 1.9613, + "step": 44300 + }, + { + "epoch": 13.49, + "learning_rate": 1.3025875190258752e-05, + "loss": 1.9972, + "step": 44325 + }, + { + "epoch": 13.5, + "learning_rate": 1.3010654490106546e-05, + "loss": 2.0244, + "step": 44350 + }, + { + "epoch": 13.51, + "learning_rate": 1.2995433789954339e-05, + "loss": 1.8689, + "step": 44375 + }, + { + "epoch": 13.52, + "learning_rate": 1.2980213089802132e-05, + "loss": 2.0505, + "step": 44400 + }, + { + "epoch": 13.52, + "learning_rate": 1.2964992389649925e-05, + "loss": 1.9141, + "step": 44425 + }, + { + "epoch": 13.53, + "learning_rate": 1.294977168949772e-05, + "loss": 1.9494, + "step": 44450 + }, + { + "epoch": 13.54, + "learning_rate": 1.293455098934551e-05, + "loss": 2.0679, + "step": 44475 + }, + { + "epoch": 13.55, + "learning_rate": 1.2919330289193303e-05, + "loss": 2.0277, + "step": 44500 + }, + { + "epoch": 13.55, + "learning_rate": 1.2904109589041096e-05, + "loss": 1.979, + "step": 44525 + }, + { + "epoch": 13.56, + "learning_rate": 1.288888888888889e-05, + "loss": 1.9098, + "step": 44550 + }, + { + "epoch": 13.57, + "learning_rate": 1.2873668188736683e-05, + "loss": 1.944, + "step": 44575 + }, + { + "epoch": 13.58, + "learning_rate": 1.2858447488584476e-05, + "loss": 2.0591, + "step": 44600 + }, + { + "epoch": 13.58, + "learning_rate": 1.2843226788432267e-05, + "loss": 2.0085, + "step": 44625 + }, + { + "epoch": 13.59, + "learning_rate": 1.2828006088280062e-05, + "loss": 1.9236, + "step": 44650 + }, + { + "epoch": 13.6, + "learning_rate": 1.2812785388127855e-05, + "loss": 1.9142, + "step": 44675 + }, + { + "epoch": 13.61, + "learning_rate": 1.2797564687975648e-05, + "loss": 1.9177, + "step": 44700 + }, + { + "epoch": 13.61, + "learning_rate": 1.278234398782344e-05, + "loss": 1.9706, + "step": 44725 + }, + { + "epoch": 13.62, + "learning_rate": 1.2767123287671235e-05, + "loss": 1.8988, + "step": 44750 + }, + { + "epoch": 13.63, + "learning_rate": 1.2751902587519028e-05, + "loss": 1.9589, + "step": 44775 + }, + { + "epoch": 13.64, + "learning_rate": 1.2736681887366819e-05, + "loss": 2.0487, + "step": 44800 + }, + { + "epoch": 13.65, + "learning_rate": 1.2721461187214612e-05, + "loss": 2.066, + "step": 44825 + }, + { + "epoch": 13.65, + "learning_rate": 1.2706240487062406e-05, + "loss": 2.0137, + "step": 44850 + }, + { + "epoch": 13.66, + "learning_rate": 1.26910197869102e-05, + "loss": 2.0245, + "step": 44875 + }, + { + "epoch": 13.67, + "learning_rate": 1.2675799086757992e-05, + "loss": 2.0623, + "step": 44900 + }, + { + "epoch": 13.68, + "learning_rate": 1.2660578386605783e-05, + "loss": 1.9067, + "step": 44925 + }, + { + "epoch": 13.68, + "learning_rate": 1.264535768645358e-05, + "loss": 2.0114, + "step": 44950 + }, + { + "epoch": 13.69, + "learning_rate": 1.263013698630137e-05, + "loss": 1.9816, + "step": 44975 + }, + { + "epoch": 13.7, + "learning_rate": 1.2614916286149163e-05, + "loss": 2.0646, + "step": 45000 + }, + { + "epoch": 13.71, + "learning_rate": 1.2599695585996956e-05, + "loss": 1.9261, + "step": 45025 + }, + { + "epoch": 13.71, + "learning_rate": 1.258447488584475e-05, + "loss": 2.0998, + "step": 45050 + }, + { + "epoch": 13.72, + "learning_rate": 1.2569254185692544e-05, + "loss": 2.0812, + "step": 45075 + }, + { + "epoch": 13.73, + "learning_rate": 1.2554033485540335e-05, + "loss": 1.9178, + "step": 45100 + }, + { + "epoch": 13.74, + "learning_rate": 1.2538812785388128e-05, + "loss": 1.9155, + "step": 45125 + }, + { + "epoch": 13.74, + "learning_rate": 1.2523592085235922e-05, + "loss": 1.9267, + "step": 45150 + }, + { + "epoch": 13.75, + "learning_rate": 1.2508371385083715e-05, + "loss": 2.039, + "step": 45175 + }, + { + "epoch": 13.76, + "learning_rate": 1.2493150684931508e-05, + "loss": 2.0541, + "step": 45200 + }, + { + "epoch": 13.77, + "learning_rate": 1.24779299847793e-05, + "loss": 1.9726, + "step": 45225 + }, + { + "epoch": 13.77, + "learning_rate": 1.2462709284627095e-05, + "loss": 1.9593, + "step": 45250 + }, + { + "epoch": 13.78, + "learning_rate": 1.2447488584474886e-05, + "loss": 1.9423, + "step": 45275 + }, + { + "epoch": 13.79, + "learning_rate": 1.243226788432268e-05, + "loss": 1.9559, + "step": 45300 + }, + { + "epoch": 13.8, + "learning_rate": 1.2417047184170472e-05, + "loss": 1.9699, + "step": 45325 + }, + { + "epoch": 13.81, + "learning_rate": 1.2401826484018267e-05, + "loss": 1.9827, + "step": 45350 + }, + { + "epoch": 13.81, + "learning_rate": 1.238660578386606e-05, + "loss": 1.9052, + "step": 45375 + }, + { + "epoch": 13.82, + "learning_rate": 1.2371385083713852e-05, + "loss": 1.962, + "step": 45400 + }, + { + "epoch": 13.83, + "learning_rate": 1.2356164383561643e-05, + "loss": 1.9576, + "step": 45425 + }, + { + "epoch": 13.84, + "learning_rate": 1.2340943683409438e-05, + "loss": 2.0912, + "step": 45450 + }, + { + "epoch": 13.84, + "learning_rate": 1.232572298325723e-05, + "loss": 1.9244, + "step": 45475 + }, + { + "epoch": 13.85, + "learning_rate": 1.2310502283105024e-05, + "loss": 1.9651, + "step": 45500 + }, + { + "epoch": 13.86, + "learning_rate": 1.2295281582952816e-05, + "loss": 2.1149, + "step": 45525 + }, + { + "epoch": 13.87, + "learning_rate": 1.2280060882800611e-05, + "loss": 1.9963, + "step": 45550 + }, + { + "epoch": 13.87, + "learning_rate": 1.2264840182648404e-05, + "loss": 1.9967, + "step": 45575 + }, + { + "epoch": 13.88, + "learning_rate": 1.2249619482496195e-05, + "loss": 1.9299, + "step": 45600 + }, + { + "epoch": 13.89, + "learning_rate": 1.2234398782343988e-05, + "loss": 1.936, + "step": 45625 + }, + { + "epoch": 13.9, + "learning_rate": 1.2219178082191782e-05, + "loss": 2.0121, + "step": 45650 + }, + { + "epoch": 13.9, + "learning_rate": 1.2203957382039575e-05, + "loss": 1.9676, + "step": 45675 + }, + { + "epoch": 13.91, + "learning_rate": 1.2188736681887368e-05, + "loss": 1.9345, + "step": 45700 + }, + { + "epoch": 13.92, + "learning_rate": 1.217351598173516e-05, + "loss": 1.9886, + "step": 45725 + }, + { + "epoch": 13.93, + "learning_rate": 1.2158295281582955e-05, + "loss": 1.9737, + "step": 45750 + }, + { + "epoch": 13.93, + "learning_rate": 1.2143074581430747e-05, + "loss": 1.9502, + "step": 45775 + }, + { + "epoch": 13.94, + "learning_rate": 1.212785388127854e-05, + "loss": 1.925, + "step": 45800 + }, + { + "epoch": 13.95, + "learning_rate": 1.2112633181126332e-05, + "loss": 2.0506, + "step": 45825 + }, + { + "epoch": 13.96, + "learning_rate": 1.2097412480974127e-05, + "loss": 1.9704, + "step": 45850 + }, + { + "epoch": 13.96, + "learning_rate": 1.208219178082192e-05, + "loss": 1.9452, + "step": 45875 + }, + { + "epoch": 13.97, + "learning_rate": 1.206697108066971e-05, + "loss": 2.022, + "step": 45900 + }, + { + "epoch": 13.98, + "learning_rate": 1.2051750380517504e-05, + "loss": 1.9387, + "step": 45925 + }, + { + "epoch": 13.99, + "learning_rate": 1.2036529680365298e-05, + "loss": 1.9663, + "step": 45950 + }, + { + "epoch": 14.0, + "learning_rate": 1.2021308980213091e-05, + "loss": 2.0126, + "step": 45975 + }, + { + "epoch": 14.0, + "learning_rate": 1.2006088280060884e-05, + "loss": 2.0184, + "step": 46000 + }, + { + "epoch": 14.01, + "learning_rate": 1.1990867579908677e-05, + "loss": 1.893, + "step": 46025 + }, + { + "epoch": 14.02, + "learning_rate": 1.1975646879756471e-05, + "loss": 2.0362, + "step": 46050 + }, + { + "epoch": 14.03, + "learning_rate": 1.1960426179604262e-05, + "loss": 1.9939, + "step": 46075 + }, + { + "epoch": 14.03, + "learning_rate": 1.1945205479452055e-05, + "loss": 2.0096, + "step": 46100 + }, + { + "epoch": 14.04, + "learning_rate": 1.1929984779299848e-05, + "loss": 2.0146, + "step": 46125 + }, + { + "epoch": 14.05, + "learning_rate": 1.1914764079147643e-05, + "loss": 1.9037, + "step": 46150 + }, + { + "epoch": 14.06, + "learning_rate": 1.1899543378995435e-05, + "loss": 2.0156, + "step": 46175 + }, + { + "epoch": 14.06, + "learning_rate": 1.1884322678843228e-05, + "loss": 1.9033, + "step": 46200 + }, + { + "epoch": 14.07, + "learning_rate": 1.186910197869102e-05, + "loss": 2.0248, + "step": 46225 + }, + { + "epoch": 14.08, + "learning_rate": 1.1853881278538814e-05, + "loss": 2.0205, + "step": 46250 + }, + { + "epoch": 14.09, + "learning_rate": 1.1838660578386607e-05, + "loss": 1.991, + "step": 46275 + }, + { + "epoch": 14.09, + "learning_rate": 1.18234398782344e-05, + "loss": 1.888, + "step": 46300 + }, + { + "epoch": 14.1, + "learning_rate": 1.1808219178082192e-05, + "loss": 1.8724, + "step": 46325 + }, + { + "epoch": 14.11, + "learning_rate": 1.1792998477929987e-05, + "loss": 1.8538, + "step": 46350 + }, + { + "epoch": 14.12, + "learning_rate": 1.177777777777778e-05, + "loss": 1.9678, + "step": 46375 + }, + { + "epoch": 14.12, + "learning_rate": 1.1762557077625571e-05, + "loss": 1.9731, + "step": 46400 + }, + { + "epoch": 14.13, + "learning_rate": 1.1747336377473364e-05, + "loss": 1.9184, + "step": 46425 + }, + { + "epoch": 14.14, + "learning_rate": 1.1732115677321158e-05, + "loss": 2.0107, + "step": 46450 + }, + { + "epoch": 14.15, + "learning_rate": 1.1716894977168951e-05, + "loss": 1.8769, + "step": 46475 + }, + { + "epoch": 14.16, + "learning_rate": 1.1701674277016744e-05, + "loss": 1.9563, + "step": 46500 + }, + { + "epoch": 14.16, + "learning_rate": 1.1686453576864535e-05, + "loss": 1.9436, + "step": 46525 + }, + { + "epoch": 14.17, + "learning_rate": 1.1671232876712331e-05, + "loss": 1.943, + "step": 46550 + }, + { + "epoch": 14.18, + "learning_rate": 1.1656012176560123e-05, + "loss": 1.9551, + "step": 46575 + }, + { + "epoch": 14.19, + "learning_rate": 1.1640791476407915e-05, + "loss": 2.0068, + "step": 46600 + }, + { + "epoch": 14.19, + "learning_rate": 1.1625570776255708e-05, + "loss": 2.0303, + "step": 46625 + }, + { + "epoch": 14.2, + "learning_rate": 1.1610350076103503e-05, + "loss": 1.9128, + "step": 46650 + }, + { + "epoch": 14.21, + "learning_rate": 1.1595129375951296e-05, + "loss": 2.0005, + "step": 46675 + }, + { + "epoch": 14.22, + "learning_rate": 1.1579908675799087e-05, + "loss": 1.9755, + "step": 46700 + }, + { + "epoch": 14.22, + "learning_rate": 1.156468797564688e-05, + "loss": 1.9389, + "step": 46725 + }, + { + "epoch": 14.23, + "learning_rate": 1.1549467275494674e-05, + "loss": 1.9299, + "step": 46750 + }, + { + "epoch": 14.24, + "learning_rate": 1.1534246575342467e-05, + "loss": 2.0026, + "step": 46775 + }, + { + "epoch": 14.25, + "learning_rate": 1.151902587519026e-05, + "loss": 1.9436, + "step": 46800 + }, + { + "epoch": 14.25, + "learning_rate": 1.1503805175038053e-05, + "loss": 1.929, + "step": 46825 + }, + { + "epoch": 14.26, + "learning_rate": 1.1488584474885847e-05, + "loss": 2.0119, + "step": 46850 + }, + { + "epoch": 14.27, + "learning_rate": 1.1473363774733638e-05, + "loss": 1.9605, + "step": 46875 + }, + { + "epoch": 14.28, + "learning_rate": 1.1458143074581431e-05, + "loss": 1.9182, + "step": 46900 + }, + { + "epoch": 14.28, + "learning_rate": 1.1442922374429224e-05, + "loss": 2.0203, + "step": 46925 + }, + { + "epoch": 14.29, + "learning_rate": 1.1427701674277019e-05, + "loss": 1.9565, + "step": 46950 + }, + { + "epoch": 14.3, + "learning_rate": 1.1412480974124811e-05, + "loss": 2.0476, + "step": 46975 + }, + { + "epoch": 14.31, + "learning_rate": 1.1397260273972604e-05, + "loss": 1.9167, + "step": 47000 + }, + { + "epoch": 14.32, + "learning_rate": 1.1382039573820395e-05, + "loss": 1.8304, + "step": 47025 + }, + { + "epoch": 14.32, + "learning_rate": 1.136681887366819e-05, + "loss": 2.0416, + "step": 47050 + }, + { + "epoch": 14.33, + "learning_rate": 1.1351598173515983e-05, + "loss": 2.0166, + "step": 47075 + }, + { + "epoch": 14.34, + "learning_rate": 1.1336377473363776e-05, + "loss": 1.9663, + "step": 47100 + }, + { + "epoch": 14.35, + "learning_rate": 1.1321156773211568e-05, + "loss": 2.0136, + "step": 47125 + }, + { + "epoch": 14.35, + "learning_rate": 1.1305936073059363e-05, + "loss": 1.936, + "step": 47150 + }, + { + "epoch": 14.36, + "learning_rate": 1.1290715372907156e-05, + "loss": 1.9158, + "step": 47175 + }, + { + "epoch": 14.37, + "learning_rate": 1.1275494672754947e-05, + "loss": 1.9207, + "step": 47200 + }, + { + "epoch": 14.38, + "learning_rate": 1.126027397260274e-05, + "loss": 2.0042, + "step": 47225 + }, + { + "epoch": 14.38, + "learning_rate": 1.1245053272450534e-05, + "loss": 2.0697, + "step": 47250 + }, + { + "epoch": 14.39, + "learning_rate": 1.1229832572298327e-05, + "loss": 1.9097, + "step": 47275 + }, + { + "epoch": 14.4, + "learning_rate": 1.121461187214612e-05, + "loss": 1.9998, + "step": 47300 + }, + { + "epoch": 14.41, + "learning_rate": 1.1199391171993911e-05, + "loss": 1.9464, + "step": 47325 + }, + { + "epoch": 14.41, + "learning_rate": 1.1184170471841707e-05, + "loss": 1.9361, + "step": 47350 + }, + { + "epoch": 14.42, + "learning_rate": 1.1168949771689499e-05, + "loss": 2.0586, + "step": 47375 + }, + { + "epoch": 14.43, + "learning_rate": 1.1153729071537291e-05, + "loss": 1.9005, + "step": 47400 + }, + { + "epoch": 14.44, + "learning_rate": 1.1138508371385084e-05, + "loss": 1.969, + "step": 47425 + }, + { + "epoch": 14.44, + "learning_rate": 1.1123287671232879e-05, + "loss": 2.015, + "step": 47450 + }, + { + "epoch": 14.45, + "learning_rate": 1.1108066971080672e-05, + "loss": 2.03, + "step": 47475 + }, + { + "epoch": 14.46, + "learning_rate": 1.1092846270928463e-05, + "loss": 1.9863, + "step": 47500 + }, + { + "epoch": 14.47, + "learning_rate": 1.1077625570776256e-05, + "loss": 1.9524, + "step": 47525 + }, + { + "epoch": 14.47, + "learning_rate": 1.106240487062405e-05, + "loss": 1.9094, + "step": 47550 + }, + { + "epoch": 14.48, + "learning_rate": 1.1047184170471843e-05, + "loss": 1.9733, + "step": 47575 + }, + { + "epoch": 14.49, + "learning_rate": 1.1031963470319636e-05, + "loss": 1.9335, + "step": 47600 + }, + { + "epoch": 14.5, + "learning_rate": 1.1016742770167429e-05, + "loss": 1.9351, + "step": 47625 + }, + { + "epoch": 14.51, + "learning_rate": 1.1001522070015223e-05, + "loss": 1.8879, + "step": 47650 + }, + { + "epoch": 14.51, + "learning_rate": 1.0986301369863014e-05, + "loss": 1.956, + "step": 47675 + }, + { + "epoch": 14.52, + "learning_rate": 1.0971080669710807e-05, + "loss": 1.9061, + "step": 47700 + }, + { + "epoch": 14.53, + "learning_rate": 1.09558599695586e-05, + "loss": 2.0037, + "step": 47725 + }, + { + "epoch": 14.54, + "learning_rate": 1.0940639269406395e-05, + "loss": 2.0163, + "step": 47750 + }, + { + "epoch": 14.54, + "learning_rate": 1.0925418569254187e-05, + "loss": 2.1089, + "step": 47775 + }, + { + "epoch": 14.55, + "learning_rate": 1.091019786910198e-05, + "loss": 1.9014, + "step": 47800 + }, + { + "epoch": 14.56, + "learning_rate": 1.0894977168949771e-05, + "loss": 2.033, + "step": 47825 + }, + { + "epoch": 14.57, + "learning_rate": 1.0879756468797566e-05, + "loss": 1.9819, + "step": 47850 + }, + { + "epoch": 14.57, + "learning_rate": 1.0864535768645359e-05, + "loss": 1.9444, + "step": 47875 + }, + { + "epoch": 14.58, + "learning_rate": 1.0849315068493152e-05, + "loss": 1.9933, + "step": 47900 + }, + { + "epoch": 14.59, + "learning_rate": 1.0834094368340944e-05, + "loss": 2.0139, + "step": 47925 + }, + { + "epoch": 14.6, + "learning_rate": 1.0818873668188739e-05, + "loss": 2.0485, + "step": 47950 + }, + { + "epoch": 14.6, + "learning_rate": 1.0803652968036532e-05, + "loss": 1.9591, + "step": 47975 + }, + { + "epoch": 14.61, + "learning_rate": 1.0788432267884323e-05, + "loss": 1.954, + "step": 48000 + }, + { + "epoch": 14.62, + "learning_rate": 1.0773211567732116e-05, + "loss": 1.9854, + "step": 48025 + }, + { + "epoch": 14.63, + "learning_rate": 1.075799086757991e-05, + "loss": 1.8831, + "step": 48050 + }, + { + "epoch": 14.63, + "learning_rate": 1.0742770167427703e-05, + "loss": 2.074, + "step": 48075 + }, + { + "epoch": 14.64, + "learning_rate": 1.0727549467275496e-05, + "loss": 1.9053, + "step": 48100 + }, + { + "epoch": 14.65, + "learning_rate": 1.0712328767123287e-05, + "loss": 2.0317, + "step": 48125 + }, + { + "epoch": 14.66, + "learning_rate": 1.0697108066971083e-05, + "loss": 1.9007, + "step": 48150 + }, + { + "epoch": 14.67, + "learning_rate": 1.0681887366818874e-05, + "loss": 1.9001, + "step": 48175 + }, + { + "epoch": 14.67, + "learning_rate": 1.0666666666666667e-05, + "loss": 1.936, + "step": 48200 + }, + { + "epoch": 14.68, + "learning_rate": 1.065144596651446e-05, + "loss": 1.9031, + "step": 48225 + }, + { + "epoch": 14.69, + "learning_rate": 1.0636225266362255e-05, + "loss": 2.0159, + "step": 48250 + }, + { + "epoch": 14.7, + "learning_rate": 1.0621004566210048e-05, + "loss": 1.9913, + "step": 48275 + }, + { + "epoch": 14.7, + "learning_rate": 1.0605783866057839e-05, + "loss": 2.0991, + "step": 48300 + }, + { + "epoch": 14.71, + "learning_rate": 1.0590563165905632e-05, + "loss": 2.0389, + "step": 48325 + }, + { + "epoch": 14.72, + "learning_rate": 1.0575342465753426e-05, + "loss": 1.9573, + "step": 48350 + }, + { + "epoch": 14.73, + "learning_rate": 1.0560121765601219e-05, + "loss": 1.9287, + "step": 48375 + }, + { + "epoch": 14.73, + "learning_rate": 1.0544901065449012e-05, + "loss": 1.9629, + "step": 48400 + }, + { + "epoch": 14.74, + "learning_rate": 1.0529680365296805e-05, + "loss": 2.0366, + "step": 48425 + }, + { + "epoch": 14.75, + "learning_rate": 1.0514459665144599e-05, + "loss": 2.0515, + "step": 48450 + }, + { + "epoch": 14.76, + "learning_rate": 1.049923896499239e-05, + "loss": 1.949, + "step": 48475 + }, + { + "epoch": 14.76, + "learning_rate": 1.0484018264840183e-05, + "loss": 2.0248, + "step": 48500 + }, + { + "epoch": 14.77, + "learning_rate": 1.0468797564687976e-05, + "loss": 1.9764, + "step": 48525 + }, + { + "epoch": 14.78, + "learning_rate": 1.045357686453577e-05, + "loss": 1.8845, + "step": 48550 + }, + { + "epoch": 14.79, + "learning_rate": 1.0438356164383563e-05, + "loss": 2.0516, + "step": 48575 + }, + { + "epoch": 14.79, + "learning_rate": 1.0423135464231356e-05, + "loss": 1.9194, + "step": 48600 + }, + { + "epoch": 14.8, + "learning_rate": 1.0407914764079147e-05, + "loss": 2.0027, + "step": 48625 + }, + { + "epoch": 14.81, + "learning_rate": 1.0392694063926942e-05, + "loss": 1.9999, + "step": 48650 + }, + { + "epoch": 14.82, + "learning_rate": 1.0377473363774735e-05, + "loss": 1.9374, + "step": 48675 + }, + { + "epoch": 14.82, + "learning_rate": 1.0362252663622528e-05, + "loss": 2.0, + "step": 48700 + }, + { + "epoch": 14.83, + "learning_rate": 1.034703196347032e-05, + "loss": 2.0698, + "step": 48725 + }, + { + "epoch": 14.84, + "learning_rate": 1.0331811263318115e-05, + "loss": 1.9073, + "step": 48750 + }, + { + "epoch": 14.85, + "learning_rate": 1.0316590563165908e-05, + "loss": 1.879, + "step": 48775 + }, + { + "epoch": 14.86, + "learning_rate": 1.0301369863013699e-05, + "loss": 2.0149, + "step": 48800 + }, + { + "epoch": 14.86, + "learning_rate": 1.0286149162861492e-05, + "loss": 1.9765, + "step": 48825 + }, + { + "epoch": 14.87, + "learning_rate": 1.0270928462709286e-05, + "loss": 2.0193, + "step": 48850 + }, + { + "epoch": 14.88, + "learning_rate": 1.0255707762557079e-05, + "loss": 2.0319, + "step": 48875 + }, + { + "epoch": 14.89, + "learning_rate": 1.0240487062404872e-05, + "loss": 2.0198, + "step": 48900 + }, + { + "epoch": 14.89, + "learning_rate": 1.0225266362252663e-05, + "loss": 1.9011, + "step": 48925 + }, + { + "epoch": 14.9, + "learning_rate": 1.021004566210046e-05, + "loss": 1.943, + "step": 48950 + }, + { + "epoch": 14.91, + "learning_rate": 1.019482496194825e-05, + "loss": 1.9647, + "step": 48975 + }, + { + "epoch": 14.92, + "learning_rate": 1.0179604261796043e-05, + "loss": 1.9477, + "step": 49000 + }, + { + "epoch": 14.92, + "learning_rate": 1.0164383561643836e-05, + "loss": 1.9735, + "step": 49025 + }, + { + "epoch": 14.93, + "learning_rate": 1.014916286149163e-05, + "loss": 1.9634, + "step": 49050 + }, + { + "epoch": 14.94, + "learning_rate": 1.0133942161339423e-05, + "loss": 2.0279, + "step": 49075 + }, + { + "epoch": 14.95, + "learning_rate": 1.0118721461187215e-05, + "loss": 1.9043, + "step": 49100 + }, + { + "epoch": 14.95, + "learning_rate": 1.0103500761035007e-05, + "loss": 1.9345, + "step": 49125 + }, + { + "epoch": 14.96, + "learning_rate": 1.0088280060882802e-05, + "loss": 1.9, + "step": 49150 + }, + { + "epoch": 14.97, + "learning_rate": 1.0073059360730595e-05, + "loss": 1.9883, + "step": 49175 + }, + { + "epoch": 14.98, + "learning_rate": 1.0057838660578388e-05, + "loss": 1.9939, + "step": 49200 + }, + { + "epoch": 14.98, + "learning_rate": 1.004261796042618e-05, + "loss": 1.8871, + "step": 49225 + }, + { + "epoch": 14.99, + "learning_rate": 1.0027397260273975e-05, + "loss": 2.009, + "step": 49250 + }, + { + "epoch": 15.0, + "learning_rate": 1.0012176560121766e-05, + "loss": 2.0039, + "step": 49275 + }, + { + "epoch": 15.01, + "learning_rate": 9.996955859969559e-06, + "loss": 2.0019, + "step": 49300 + }, + { + "epoch": 15.02, + "learning_rate": 9.981735159817354e-06, + "loss": 1.9455, + "step": 49325 + }, + { + "epoch": 15.02, + "learning_rate": 9.966514459665145e-06, + "loss": 1.9116, + "step": 49350 + }, + { + "epoch": 15.03, + "learning_rate": 9.95129375951294e-06, + "loss": 1.9623, + "step": 49375 + }, + { + "epoch": 15.04, + "learning_rate": 9.936073059360732e-06, + "loss": 2.0, + "step": 49400 + }, + { + "epoch": 15.05, + "learning_rate": 9.920852359208525e-06, + "loss": 1.9419, + "step": 49425 + }, + { + "epoch": 15.05, + "learning_rate": 9.905631659056318e-06, + "loss": 1.9811, + "step": 49450 + }, + { + "epoch": 15.06, + "learning_rate": 9.89041095890411e-06, + "loss": 1.9864, + "step": 49475 + }, + { + "epoch": 15.07, + "learning_rate": 9.875190258751903e-06, + "loss": 1.8739, + "step": 49500 + }, + { + "epoch": 15.08, + "learning_rate": 9.859969558599696e-06, + "loss": 2.0509, + "step": 49525 + }, + { + "epoch": 15.08, + "learning_rate": 9.844748858447489e-06, + "loss": 2.0082, + "step": 49550 + }, + { + "epoch": 15.09, + "learning_rate": 9.829528158295284e-06, + "loss": 1.961, + "step": 49575 + }, + { + "epoch": 15.1, + "learning_rate": 9.814307458143075e-06, + "loss": 1.9278, + "step": 49600 + }, + { + "epoch": 15.11, + "learning_rate": 9.79908675799087e-06, + "loss": 1.9934, + "step": 49625 + }, + { + "epoch": 15.11, + "learning_rate": 9.78386605783866e-06, + "loss": 1.8641, + "step": 49650 + }, + { + "epoch": 15.12, + "learning_rate": 9.768645357686455e-06, + "loss": 1.9328, + "step": 49675 + }, + { + "epoch": 15.13, + "learning_rate": 9.753424657534248e-06, + "loss": 1.991, + "step": 49700 + }, + { + "epoch": 15.14, + "learning_rate": 9.73820395738204e-06, + "loss": 1.9905, + "step": 49725 + }, + { + "epoch": 15.14, + "learning_rate": 9.722983257229834e-06, + "loss": 2.0744, + "step": 49750 + }, + { + "epoch": 15.15, + "learning_rate": 9.707762557077626e-06, + "loss": 1.8815, + "step": 49775 + }, + { + "epoch": 15.16, + "learning_rate": 9.69254185692542e-06, + "loss": 1.8839, + "step": 49800 + }, + { + "epoch": 15.17, + "learning_rate": 9.677321156773212e-06, + "loss": 1.9512, + "step": 49825 + }, + { + "epoch": 15.18, + "learning_rate": 9.662100456621005e-06, + "loss": 1.9877, + "step": 49850 + }, + { + "epoch": 15.18, + "learning_rate": 9.6468797564688e-06, + "loss": 2.0149, + "step": 49875 + }, + { + "epoch": 15.19, + "learning_rate": 9.63165905631659e-06, + "loss": 1.9746, + "step": 49900 + }, + { + "epoch": 15.2, + "learning_rate": 9.616438356164385e-06, + "loss": 1.9746, + "step": 49925 + }, + { + "epoch": 15.21, + "learning_rate": 9.601217656012178e-06, + "loss": 1.9566, + "step": 49950 + }, + { + "epoch": 15.21, + "learning_rate": 9.58599695585997e-06, + "loss": 1.889, + "step": 49975 + }, + { + "epoch": 15.22, + "learning_rate": 9.570776255707764e-06, + "loss": 1.978, + "step": 50000 + }, + { + "epoch": 15.23, + "learning_rate": 9.555555555555556e-06, + "loss": 1.9767, + "step": 50025 + }, + { + "epoch": 15.24, + "learning_rate": 9.54033485540335e-06, + "loss": 1.9758, + "step": 50050 + }, + { + "epoch": 15.24, + "learning_rate": 9.525114155251142e-06, + "loss": 2.0169, + "step": 50075 + }, + { + "epoch": 15.25, + "learning_rate": 9.509893455098935e-06, + "loss": 1.9795, + "step": 50100 + }, + { + "epoch": 15.26, + "learning_rate": 9.49467275494673e-06, + "loss": 1.9105, + "step": 50125 + }, + { + "epoch": 15.27, + "learning_rate": 9.47945205479452e-06, + "loss": 1.9358, + "step": 50150 + }, + { + "epoch": 15.27, + "learning_rate": 9.464231354642315e-06, + "loss": 1.9197, + "step": 50175 + }, + { + "epoch": 15.28, + "learning_rate": 9.449010654490108e-06, + "loss": 1.9679, + "step": 50200 + }, + { + "epoch": 15.29, + "learning_rate": 9.433789954337901e-06, + "loss": 2.0478, + "step": 50225 + }, + { + "epoch": 15.3, + "learning_rate": 9.418569254185694e-06, + "loss": 1.9561, + "step": 50250 + }, + { + "epoch": 15.3, + "learning_rate": 9.403348554033487e-06, + "loss": 2.0571, + "step": 50275 + }, + { + "epoch": 15.31, + "learning_rate": 9.38812785388128e-06, + "loss": 1.8852, + "step": 50300 + }, + { + "epoch": 15.32, + "learning_rate": 9.372907153729072e-06, + "loss": 1.8964, + "step": 50325 + }, + { + "epoch": 15.33, + "learning_rate": 9.357686453576865e-06, + "loss": 1.9007, + "step": 50350 + }, + { + "epoch": 15.33, + "learning_rate": 9.342465753424658e-06, + "loss": 1.9047, + "step": 50375 + }, + { + "epoch": 15.34, + "learning_rate": 9.32724505327245e-06, + "loss": 1.8826, + "step": 50400 + }, + { + "epoch": 15.35, + "learning_rate": 9.312024353120245e-06, + "loss": 2.0846, + "step": 50425 + }, + { + "epoch": 15.36, + "learning_rate": 9.296803652968036e-06, + "loss": 1.8743, + "step": 50450 + }, + { + "epoch": 15.37, + "learning_rate": 9.281582952815831e-06, + "loss": 1.8567, + "step": 50475 + }, + { + "epoch": 15.37, + "learning_rate": 9.266362252663624e-06, + "loss": 1.9336, + "step": 50500 + }, + { + "epoch": 15.38, + "learning_rate": 9.251141552511417e-06, + "loss": 1.9018, + "step": 50525 + }, + { + "epoch": 15.39, + "learning_rate": 9.23592085235921e-06, + "loss": 1.9833, + "step": 50550 + }, + { + "epoch": 15.4, + "learning_rate": 9.220700152207002e-06, + "loss": 1.9243, + "step": 50575 + }, + { + "epoch": 15.4, + "learning_rate": 9.205479452054795e-06, + "loss": 1.999, + "step": 50600 + }, + { + "epoch": 15.41, + "learning_rate": 9.190258751902588e-06, + "loss": 2.07, + "step": 50625 + }, + { + "epoch": 15.42, + "learning_rate": 9.17564687975647e-06, + "loss": 2.0052, + "step": 50650 + }, + { + "epoch": 15.43, + "learning_rate": 9.160426179604262e-06, + "loss": 1.9705, + "step": 50675 + }, + { + "epoch": 15.43, + "learning_rate": 9.145205479452055e-06, + "loss": 1.9387, + "step": 50700 + }, + { + "epoch": 15.44, + "learning_rate": 9.12998477929985e-06, + "loss": 2.0054, + "step": 50725 + }, + { + "epoch": 15.45, + "learning_rate": 9.11476407914764e-06, + "loss": 1.947, + "step": 50750 + }, + { + "epoch": 15.46, + "learning_rate": 9.099543378995435e-06, + "loss": 1.9877, + "step": 50775 + }, + { + "epoch": 15.46, + "learning_rate": 9.084322678843226e-06, + "loss": 1.9343, + "step": 50800 + }, + { + "epoch": 15.47, + "learning_rate": 9.06910197869102e-06, + "loss": 1.977, + "step": 50825 + }, + { + "epoch": 15.48, + "learning_rate": 9.053881278538814e-06, + "loss": 2.0358, + "step": 50850 + }, + { + "epoch": 15.49, + "learning_rate": 9.038660578386606e-06, + "loss": 1.9384, + "step": 50875 + }, + { + "epoch": 15.49, + "learning_rate": 9.0234398782344e-06, + "loss": 1.9109, + "step": 50900 + }, + { + "epoch": 15.5, + "learning_rate": 9.008219178082192e-06, + "loss": 1.9731, + "step": 50925 + }, + { + "epoch": 15.51, + "learning_rate": 8.992998477929985e-06, + "loss": 1.9435, + "step": 50950 + }, + { + "epoch": 15.52, + "learning_rate": 8.977777777777778e-06, + "loss": 1.8879, + "step": 50975 + }, + { + "epoch": 15.53, + "learning_rate": 8.96255707762557e-06, + "loss": 1.9415, + "step": 51000 + }, + { + "epoch": 15.53, + "learning_rate": 8.947336377473365e-06, + "loss": 1.9765, + "step": 51025 + }, + { + "epoch": 15.54, + "learning_rate": 8.932115677321156e-06, + "loss": 1.9545, + "step": 51050 + }, + { + "epoch": 15.55, + "learning_rate": 8.916894977168951e-06, + "loss": 1.8787, + "step": 51075 + }, + { + "epoch": 15.56, + "learning_rate": 8.901674277016744e-06, + "loss": 1.9561, + "step": 51100 + }, + { + "epoch": 15.56, + "learning_rate": 8.886453576864537e-06, + "loss": 2.0651, + "step": 51125 + }, + { + "epoch": 15.57, + "learning_rate": 8.87123287671233e-06, + "loss": 1.8878, + "step": 51150 + }, + { + "epoch": 15.58, + "learning_rate": 8.856012176560122e-06, + "loss": 1.9929, + "step": 51175 + }, + { + "epoch": 15.59, + "learning_rate": 8.840791476407915e-06, + "loss": 1.9261, + "step": 51200 + }, + { + "epoch": 15.59, + "learning_rate": 8.825570776255708e-06, + "loss": 2.0192, + "step": 51225 + }, + { + "epoch": 15.6, + "learning_rate": 8.8103500761035e-06, + "loss": 1.9563, + "step": 51250 + }, + { + "epoch": 15.61, + "learning_rate": 8.795129375951295e-06, + "loss": 1.9667, + "step": 51275 + }, + { + "epoch": 15.62, + "learning_rate": 8.779908675799086e-06, + "loss": 1.9906, + "step": 51300 + }, + { + "epoch": 15.62, + "learning_rate": 8.764687975646881e-06, + "loss": 1.9746, + "step": 51325 + }, + { + "epoch": 15.63, + "learning_rate": 8.749467275494674e-06, + "loss": 1.9885, + "step": 51350 + }, + { + "epoch": 15.64, + "learning_rate": 8.734246575342467e-06, + "loss": 1.9304, + "step": 51375 + }, + { + "epoch": 15.65, + "learning_rate": 8.71902587519026e-06, + "loss": 1.9789, + "step": 51400 + }, + { + "epoch": 15.65, + "learning_rate": 8.703805175038052e-06, + "loss": 1.9598, + "step": 51425 + }, + { + "epoch": 15.66, + "learning_rate": 8.688584474885845e-06, + "loss": 1.9788, + "step": 51450 + }, + { + "epoch": 15.67, + "learning_rate": 8.673363774733638e-06, + "loss": 1.9393, + "step": 51475 + }, + { + "epoch": 15.68, + "learning_rate": 8.658143074581431e-06, + "loss": 1.933, + "step": 51500 + }, + { + "epoch": 15.68, + "learning_rate": 8.642922374429224e-06, + "loss": 1.9745, + "step": 51525 + }, + { + "epoch": 15.69, + "learning_rate": 8.627701674277017e-06, + "loss": 1.9297, + "step": 51550 + }, + { + "epoch": 15.7, + "learning_rate": 8.612480974124811e-06, + "loss": 1.8979, + "step": 51575 + }, + { + "epoch": 15.71, + "learning_rate": 8.597260273972602e-06, + "loss": 1.9345, + "step": 51600 + }, + { + "epoch": 15.72, + "learning_rate": 8.582039573820397e-06, + "loss": 1.9887, + "step": 51625 + }, + { + "epoch": 15.72, + "learning_rate": 8.56681887366819e-06, + "loss": 2.0398, + "step": 51650 + }, + { + "epoch": 15.73, + "learning_rate": 8.551598173515982e-06, + "loss": 1.9723, + "step": 51675 + }, + { + "epoch": 15.74, + "learning_rate": 8.536377473363775e-06, + "loss": 1.9411, + "step": 51700 + }, + { + "epoch": 15.75, + "learning_rate": 8.521156773211568e-06, + "loss": 1.91, + "step": 51725 + }, + { + "epoch": 15.75, + "learning_rate": 8.505936073059361e-06, + "loss": 1.9958, + "step": 51750 + }, + { + "epoch": 15.76, + "learning_rate": 8.490715372907154e-06, + "loss": 1.9466, + "step": 51775 + }, + { + "epoch": 15.77, + "learning_rate": 8.475494672754947e-06, + "loss": 1.8703, + "step": 51800 + }, + { + "epoch": 15.78, + "learning_rate": 8.460273972602741e-06, + "loss": 1.9106, + "step": 51825 + }, + { + "epoch": 15.78, + "learning_rate": 8.445053272450532e-06, + "loss": 1.9092, + "step": 51850 + }, + { + "epoch": 15.79, + "learning_rate": 8.429832572298327e-06, + "loss": 1.9051, + "step": 51875 + }, + { + "epoch": 15.8, + "learning_rate": 8.41461187214612e-06, + "loss": 1.9716, + "step": 51900 + }, + { + "epoch": 15.81, + "learning_rate": 8.399391171993913e-06, + "loss": 1.8972, + "step": 51925 + }, + { + "epoch": 15.81, + "learning_rate": 8.384170471841705e-06, + "loss": 2.0439, + "step": 51950 + }, + { + "epoch": 15.82, + "learning_rate": 8.368949771689498e-06, + "loss": 1.9814, + "step": 51975 + }, + { + "epoch": 15.83, + "learning_rate": 8.353729071537291e-06, + "loss": 1.9067, + "step": 52000 + }, + { + "epoch": 15.84, + "learning_rate": 8.338508371385084e-06, + "loss": 1.9205, + "step": 52025 + }, + { + "epoch": 15.84, + "learning_rate": 8.323287671232877e-06, + "loss": 1.9346, + "step": 52050 + }, + { + "epoch": 15.85, + "learning_rate": 8.308066971080671e-06, + "loss": 1.9421, + "step": 52075 + }, + { + "epoch": 15.86, + "learning_rate": 8.292846270928462e-06, + "loss": 1.8881, + "step": 52100 + }, + { + "epoch": 15.87, + "learning_rate": 8.277625570776257e-06, + "loss": 2.0199, + "step": 52125 + }, + { + "epoch": 15.88, + "learning_rate": 8.26240487062405e-06, + "loss": 1.8355, + "step": 52150 + }, + { + "epoch": 15.88, + "learning_rate": 8.247184170471843e-06, + "loss": 1.9857, + "step": 52175 + }, + { + "epoch": 15.89, + "learning_rate": 8.231963470319635e-06, + "loss": 1.9821, + "step": 52200 + }, + { + "epoch": 15.9, + "learning_rate": 8.216742770167428e-06, + "loss": 1.9458, + "step": 52225 + }, + { + "epoch": 15.91, + "learning_rate": 8.201522070015221e-06, + "loss": 1.9139, + "step": 52250 + }, + { + "epoch": 15.91, + "learning_rate": 8.186301369863014e-06, + "loss": 1.9435, + "step": 52275 + }, + { + "epoch": 15.92, + "learning_rate": 8.171080669710807e-06, + "loss": 2.0494, + "step": 52300 + }, + { + "epoch": 15.93, + "learning_rate": 8.1558599695586e-06, + "loss": 2.0148, + "step": 52325 + }, + { + "epoch": 15.94, + "learning_rate": 8.140639269406393e-06, + "loss": 1.9855, + "step": 52350 + }, + { + "epoch": 15.94, + "learning_rate": 8.125418569254187e-06, + "loss": 1.8551, + "step": 52375 + }, + { + "epoch": 15.95, + "learning_rate": 8.110197869101978e-06, + "loss": 2.0901, + "step": 52400 + }, + { + "epoch": 15.96, + "learning_rate": 8.094977168949773e-06, + "loss": 1.9605, + "step": 52425 + }, + { + "epoch": 15.97, + "learning_rate": 8.079756468797566e-06, + "loss": 2.009, + "step": 52450 + }, + { + "epoch": 15.97, + "learning_rate": 8.064535768645358e-06, + "loss": 1.9351, + "step": 52475 + }, + { + "epoch": 15.98, + "learning_rate": 8.049315068493151e-06, + "loss": 1.9546, + "step": 52500 + }, + { + "epoch": 15.99, + "learning_rate": 8.034094368340944e-06, + "loss": 1.9336, + "step": 52525 + }, + { + "epoch": 16.0, + "learning_rate": 8.018873668188737e-06, + "loss": 1.9764, + "step": 52550 + }, + { + "epoch": 16.0, + "learning_rate": 8.00365296803653e-06, + "loss": 1.9384, + "step": 52575 + }, + { + "epoch": 16.01, + "learning_rate": 7.988432267884323e-06, + "loss": 1.8792, + "step": 52600 + }, + { + "epoch": 16.02, + "learning_rate": 7.973211567732117e-06, + "loss": 1.8954, + "step": 52625 + }, + { + "epoch": 16.03, + "learning_rate": 7.957990867579908e-06, + "loss": 1.887, + "step": 52650 + }, + { + "epoch": 16.04, + "learning_rate": 7.942770167427703e-06, + "loss": 1.8549, + "step": 52675 + }, + { + "epoch": 16.04, + "learning_rate": 7.927549467275496e-06, + "loss": 2.0322, + "step": 52700 + }, + { + "epoch": 16.05, + "learning_rate": 7.912328767123288e-06, + "loss": 1.931, + "step": 52725 + }, + { + "epoch": 16.06, + "learning_rate": 7.897108066971081e-06, + "loss": 1.9956, + "step": 52750 + }, + { + "epoch": 16.07, + "learning_rate": 7.881887366818874e-06, + "loss": 1.9696, + "step": 52775 + }, + { + "epoch": 16.07, + "learning_rate": 7.866666666666667e-06, + "loss": 1.9958, + "step": 52800 + }, + { + "epoch": 16.08, + "learning_rate": 7.85144596651446e-06, + "loss": 1.9175, + "step": 52825 + }, + { + "epoch": 16.09, + "learning_rate": 7.836225266362253e-06, + "loss": 1.8408, + "step": 52850 + }, + { + "epoch": 16.1, + "learning_rate": 7.821004566210047e-06, + "loss": 1.8441, + "step": 52875 + }, + { + "epoch": 16.1, + "learning_rate": 7.805783866057838e-06, + "loss": 1.9698, + "step": 52900 + }, + { + "epoch": 16.11, + "learning_rate": 7.790563165905633e-06, + "loss": 1.8653, + "step": 52925 + }, + { + "epoch": 16.12, + "learning_rate": 7.775342465753424e-06, + "loss": 2.0227, + "step": 52950 + }, + { + "epoch": 16.13, + "learning_rate": 7.760121765601219e-06, + "loss": 1.9209, + "step": 52975 + }, + { + "epoch": 16.13, + "learning_rate": 7.744901065449011e-06, + "loss": 1.9751, + "step": 53000 + }, + { + "epoch": 16.14, + "learning_rate": 7.729680365296804e-06, + "loss": 1.9758, + "step": 53025 + }, + { + "epoch": 16.15, + "learning_rate": 7.714459665144597e-06, + "loss": 2.0452, + "step": 53050 + }, + { + "epoch": 16.16, + "learning_rate": 7.69923896499239e-06, + "loss": 1.9115, + "step": 53075 + }, + { + "epoch": 16.16, + "learning_rate": 7.684018264840183e-06, + "loss": 1.961, + "step": 53100 + }, + { + "epoch": 16.17, + "learning_rate": 7.668797564687976e-06, + "loss": 1.9015, + "step": 53125 + }, + { + "epoch": 16.18, + "learning_rate": 7.653576864535768e-06, + "loss": 1.9459, + "step": 53150 + }, + { + "epoch": 16.19, + "learning_rate": 7.638356164383563e-06, + "loss": 1.9937, + "step": 53175 + }, + { + "epoch": 16.19, + "learning_rate": 7.623135464231355e-06, + "loss": 1.9711, + "step": 53200 + }, + { + "epoch": 16.2, + "learning_rate": 7.607914764079149e-06, + "loss": 1.9129, + "step": 53225 + }, + { + "epoch": 16.21, + "learning_rate": 7.592694063926941e-06, + "loss": 1.9291, + "step": 53250 + }, + { + "epoch": 16.22, + "learning_rate": 7.577473363774734e-06, + "loss": 2.0245, + "step": 53275 + }, + { + "epoch": 16.23, + "learning_rate": 7.562252663622527e-06, + "loss": 1.9558, + "step": 53300 + }, + { + "epoch": 16.23, + "learning_rate": 7.54703196347032e-06, + "loss": 1.9083, + "step": 53325 + }, + { + "epoch": 16.24, + "learning_rate": 7.531811263318113e-06, + "loss": 1.9875, + "step": 53350 + }, + { + "epoch": 16.25, + "learning_rate": 7.5165905631659066e-06, + "loss": 1.9424, + "step": 53375 + }, + { + "epoch": 16.26, + "learning_rate": 7.5013698630136986e-06, + "loss": 1.9449, + "step": 53400 + }, + { + "epoch": 16.26, + "learning_rate": 7.486149162861492e-06, + "loss": 1.8812, + "step": 53425 + }, + { + "epoch": 16.27, + "learning_rate": 7.470928462709285e-06, + "loss": 1.9874, + "step": 53450 + }, + { + "epoch": 16.28, + "learning_rate": 7.455707762557079e-06, + "loss": 2.0413, + "step": 53475 + }, + { + "epoch": 16.29, + "learning_rate": 7.440487062404871e-06, + "loss": 1.9994, + "step": 53500 + }, + { + "epoch": 16.29, + "learning_rate": 7.4252663622526645e-06, + "loss": 1.9185, + "step": 53525 + }, + { + "epoch": 16.3, + "learning_rate": 7.4100456621004565e-06, + "loss": 1.8973, + "step": 53550 + }, + { + "epoch": 16.31, + "learning_rate": 7.39482496194825e-06, + "loss": 1.9159, + "step": 53575 + }, + { + "epoch": 16.32, + "learning_rate": 7.379604261796043e-06, + "loss": 1.9243, + "step": 53600 + }, + { + "epoch": 16.32, + "learning_rate": 7.364383561643837e-06, + "loss": 1.9837, + "step": 53625 + }, + { + "epoch": 16.33, + "learning_rate": 7.349162861491629e-06, + "loss": 1.9124, + "step": 53650 + }, + { + "epoch": 16.34, + "learning_rate": 7.333942161339422e-06, + "loss": 1.9815, + "step": 53675 + }, + { + "epoch": 16.35, + "learning_rate": 7.318721461187215e-06, + "loss": 1.911, + "step": 53700 + }, + { + "epoch": 16.35, + "learning_rate": 7.303500761035008e-06, + "loss": 1.934, + "step": 53725 + }, + { + "epoch": 16.36, + "learning_rate": 7.288280060882801e-06, + "loss": 1.9157, + "step": 53750 + }, + { + "epoch": 16.37, + "learning_rate": 7.2730593607305946e-06, + "loss": 1.9445, + "step": 53775 + }, + { + "epoch": 16.38, + "learning_rate": 7.2578386605783865e-06, + "loss": 1.9126, + "step": 53800 + }, + { + "epoch": 16.39, + "learning_rate": 7.24261796042618e-06, + "loss": 1.9318, + "step": 53825 + }, + { + "epoch": 16.39, + "learning_rate": 7.227397260273973e-06, + "loss": 1.9675, + "step": 53850 + }, + { + "epoch": 16.4, + "learning_rate": 7.212176560121767e-06, + "loss": 2.0658, + "step": 53875 + }, + { + "epoch": 16.41, + "learning_rate": 7.196955859969559e-06, + "loss": 1.9834, + "step": 53900 + }, + { + "epoch": 16.42, + "learning_rate": 7.1817351598173524e-06, + "loss": 1.9381, + "step": 53925 + }, + { + "epoch": 16.42, + "learning_rate": 7.1665144596651444e-06, + "loss": 1.9349, + "step": 53950 + }, + { + "epoch": 16.43, + "learning_rate": 7.151293759512938e-06, + "loss": 1.981, + "step": 53975 + }, + { + "epoch": 16.44, + "learning_rate": 7.136073059360731e-06, + "loss": 1.9228, + "step": 54000 + }, + { + "epoch": 16.45, + "learning_rate": 7.120852359208525e-06, + "loss": 1.9982, + "step": 54025 + }, + { + "epoch": 16.45, + "learning_rate": 7.105631659056317e-06, + "loss": 1.9066, + "step": 54050 + }, + { + "epoch": 16.46, + "learning_rate": 7.09041095890411e-06, + "loss": 1.8969, + "step": 54075 + }, + { + "epoch": 16.47, + "learning_rate": 7.075190258751903e-06, + "loss": 1.9243, + "step": 54100 + }, + { + "epoch": 16.48, + "learning_rate": 7.059969558599696e-06, + "loss": 2.0025, + "step": 54125 + }, + { + "epoch": 16.48, + "learning_rate": 7.044748858447489e-06, + "loss": 1.9632, + "step": 54150 + }, + { + "epoch": 16.49, + "learning_rate": 7.0295281582952825e-06, + "loss": 1.9642, + "step": 54175 + }, + { + "epoch": 16.5, + "learning_rate": 7.0143074581430745e-06, + "loss": 1.8765, + "step": 54200 + }, + { + "epoch": 16.51, + "learning_rate": 6.999086757990868e-06, + "loss": 1.8405, + "step": 54225 + }, + { + "epoch": 16.51, + "learning_rate": 6.983866057838661e-06, + "loss": 1.9473, + "step": 54250 + }, + { + "epoch": 16.52, + "learning_rate": 6.968645357686455e-06, + "loss": 1.9706, + "step": 54275 + }, + { + "epoch": 16.53, + "learning_rate": 6.953424657534247e-06, + "loss": 1.9083, + "step": 54300 + }, + { + "epoch": 16.54, + "learning_rate": 6.93820395738204e-06, + "loss": 1.9204, + "step": 54325 + }, + { + "epoch": 16.54, + "learning_rate": 6.922983257229832e-06, + "loss": 1.9436, + "step": 54350 + }, + { + "epoch": 16.55, + "learning_rate": 6.907762557077626e-06, + "loss": 2.0207, + "step": 54375 + }, + { + "epoch": 16.56, + "learning_rate": 6.892541856925419e-06, + "loss": 1.961, + "step": 54400 + }, + { + "epoch": 16.57, + "learning_rate": 6.877321156773213e-06, + "loss": 1.8648, + "step": 54425 + }, + { + "epoch": 16.58, + "learning_rate": 6.862100456621005e-06, + "loss": 1.9194, + "step": 54450 + }, + { + "epoch": 16.58, + "learning_rate": 6.846879756468798e-06, + "loss": 1.9261, + "step": 54475 + }, + { + "epoch": 16.59, + "learning_rate": 6.831659056316591e-06, + "loss": 2.0225, + "step": 54500 + }, + { + "epoch": 16.6, + "learning_rate": 6.816438356164384e-06, + "loss": 1.9711, + "step": 54525 + }, + { + "epoch": 16.61, + "learning_rate": 6.801217656012177e-06, + "loss": 2.0096, + "step": 54550 + }, + { + "epoch": 16.61, + "learning_rate": 6.7859969558599705e-06, + "loss": 1.9443, + "step": 54575 + }, + { + "epoch": 16.62, + "learning_rate": 6.7707762557077625e-06, + "loss": 1.944, + "step": 54600 + }, + { + "epoch": 16.63, + "learning_rate": 6.755555555555556e-06, + "loss": 2.0549, + "step": 54625 + }, + { + "epoch": 16.64, + "learning_rate": 6.740334855403349e-06, + "loss": 1.9064, + "step": 54650 + }, + { + "epoch": 16.64, + "learning_rate": 6.72572298325723e-06, + "loss": 1.9491, + "step": 54675 + }, + { + "epoch": 16.65, + "learning_rate": 6.710502283105024e-06, + "loss": 1.9886, + "step": 54700 + }, + { + "epoch": 16.66, + "learning_rate": 6.695281582952816e-06, + "loss": 1.9576, + "step": 54725 + }, + { + "epoch": 16.67, + "learning_rate": 6.68006088280061e-06, + "loss": 1.921, + "step": 54750 + }, + { + "epoch": 16.67, + "learning_rate": 6.6648401826484024e-06, + "loss": 1.9427, + "step": 54775 + }, + { + "epoch": 16.68, + "learning_rate": 6.649619482496196e-06, + "loss": 1.9386, + "step": 54800 + }, + { + "epoch": 16.69, + "learning_rate": 6.634398782343988e-06, + "loss": 1.9491, + "step": 54825 + }, + { + "epoch": 16.7, + "learning_rate": 6.619178082191782e-06, + "loss": 1.9421, + "step": 54850 + }, + { + "epoch": 16.7, + "learning_rate": 6.603957382039574e-06, + "loss": 1.9423, + "step": 54875 + }, + { + "epoch": 16.71, + "learning_rate": 6.5887366818873675e-06, + "loss": 1.9382, + "step": 54900 + }, + { + "epoch": 16.72, + "learning_rate": 6.57351598173516e-06, + "loss": 1.9891, + "step": 54925 + }, + { + "epoch": 16.73, + "learning_rate": 6.558295281582954e-06, + "loss": 1.881, + "step": 54950 + }, + { + "epoch": 16.74, + "learning_rate": 6.543074581430746e-06, + "loss": 1.9859, + "step": 54975 + }, + { + "epoch": 16.74, + "learning_rate": 6.52785388127854e-06, + "loss": 1.8991, + "step": 55000 + }, + { + "epoch": 16.75, + "learning_rate": 6.5126331811263325e-06, + "loss": 1.9316, + "step": 55025 + }, + { + "epoch": 16.76, + "learning_rate": 6.497412480974125e-06, + "loss": 1.9139, + "step": 55050 + }, + { + "epoch": 16.77, + "learning_rate": 6.482191780821918e-06, + "loss": 1.8429, + "step": 55075 + }, + { + "epoch": 16.77, + "learning_rate": 6.466971080669712e-06, + "loss": 1.9872, + "step": 55100 + }, + { + "epoch": 16.78, + "learning_rate": 6.451750380517504e-06, + "loss": 1.8164, + "step": 55125 + }, + { + "epoch": 16.79, + "learning_rate": 6.4365296803652976e-06, + "loss": 1.9674, + "step": 55150 + }, + { + "epoch": 16.8, + "learning_rate": 6.42130898021309e-06, + "loss": 1.9245, + "step": 55175 + }, + { + "epoch": 16.8, + "learning_rate": 6.406088280060884e-06, + "loss": 1.9864, + "step": 55200 + }, + { + "epoch": 16.81, + "learning_rate": 6.390867579908676e-06, + "loss": 1.9186, + "step": 55225 + }, + { + "epoch": 16.82, + "learning_rate": 6.37564687975647e-06, + "loss": 1.9924, + "step": 55250 + }, + { + "epoch": 16.83, + "learning_rate": 6.360426179604262e-06, + "loss": 1.9816, + "step": 55275 + }, + { + "epoch": 16.83, + "learning_rate": 6.3452054794520555e-06, + "loss": 1.9403, + "step": 55300 + }, + { + "epoch": 16.84, + "learning_rate": 6.3305936073059375e-06, + "loss": 1.9687, + "step": 55325 + }, + { + "epoch": 16.85, + "learning_rate": 6.3153729071537295e-06, + "loss": 1.9345, + "step": 55350 + }, + { + "epoch": 16.86, + "learning_rate": 6.300152207001523e-06, + "loss": 2.0326, + "step": 55375 + }, + { + "epoch": 16.86, + "learning_rate": 6.284931506849315e-06, + "loss": 1.9578, + "step": 55400 + }, + { + "epoch": 16.87, + "learning_rate": 6.269710806697109e-06, + "loss": 1.8891, + "step": 55425 + }, + { + "epoch": 16.88, + "learning_rate": 6.254490106544902e-06, + "loss": 1.9388, + "step": 55450 + }, + { + "epoch": 16.89, + "learning_rate": 6.239269406392695e-06, + "loss": 1.8948, + "step": 55475 + }, + { + "epoch": 16.89, + "learning_rate": 6.224048706240487e-06, + "loss": 2.0185, + "step": 55500 + }, + { + "epoch": 16.9, + "learning_rate": 6.208828006088281e-06, + "loss": 2.0193, + "step": 55525 + }, + { + "epoch": 16.91, + "learning_rate": 6.193607305936074e-06, + "loss": 1.9999, + "step": 55550 + }, + { + "epoch": 16.92, + "learning_rate": 6.178386605783867e-06, + "loss": 1.9439, + "step": 55575 + }, + { + "epoch": 16.93, + "learning_rate": 6.16316590563166e-06, + "loss": 2.025, + "step": 55600 + }, + { + "epoch": 16.93, + "learning_rate": 6.147945205479453e-06, + "loss": 1.9343, + "step": 55625 + }, + { + "epoch": 16.94, + "learning_rate": 6.132724505327245e-06, + "loss": 1.982, + "step": 55650 + }, + { + "epoch": 16.95, + "learning_rate": 6.117503805175039e-06, + "loss": 1.9344, + "step": 55675 + }, + { + "epoch": 16.96, + "learning_rate": 6.102283105022832e-06, + "loss": 1.8833, + "step": 55700 + }, + { + "epoch": 16.96, + "learning_rate": 6.087062404870625e-06, + "loss": 1.9786, + "step": 55725 + }, + { + "epoch": 16.97, + "learning_rate": 6.0718417047184175e-06, + "loss": 1.8689, + "step": 55750 + }, + { + "epoch": 16.98, + "learning_rate": 6.056621004566211e-06, + "loss": 1.9735, + "step": 55775 + }, + { + "epoch": 16.99, + "learning_rate": 6.041400304414003e-06, + "loss": 1.9081, + "step": 55800 + }, + { + "epoch": 16.99, + "learning_rate": 6.026179604261797e-06, + "loss": 1.9836, + "step": 55825 + }, + { + "epoch": 17.0, + "learning_rate": 6.01095890410959e-06, + "loss": 1.9072, + "step": 55850 + }, + { + "epoch": 17.01, + "learning_rate": 5.995738203957383e-06, + "loss": 1.9328, + "step": 55875 + }, + { + "epoch": 17.02, + "learning_rate": 5.980517503805175e-06, + "loss": 1.8841, + "step": 55900 + }, + { + "epoch": 17.02, + "learning_rate": 5.965296803652969e-06, + "loss": 1.8813, + "step": 55925 + }, + { + "epoch": 17.03, + "learning_rate": 5.950076103500762e-06, + "loss": 1.8905, + "step": 55950 + }, + { + "epoch": 17.04, + "learning_rate": 5.934855403348555e-06, + "loss": 1.9598, + "step": 55975 + }, + { + "epoch": 17.05, + "learning_rate": 5.9196347031963476e-06, + "loss": 1.9439, + "step": 56000 + }, + { + "epoch": 17.05, + "learning_rate": 5.904414003044141e-06, + "loss": 1.8998, + "step": 56025 + }, + { + "epoch": 17.06, + "learning_rate": 5.889193302891933e-06, + "loss": 1.8348, + "step": 56050 + }, + { + "epoch": 17.07, + "learning_rate": 5.873972602739727e-06, + "loss": 1.8576, + "step": 56075 + }, + { + "epoch": 17.08, + "learning_rate": 5.85875190258752e-06, + "loss": 1.9066, + "step": 56100 + }, + { + "epoch": 17.09, + "learning_rate": 5.843531202435313e-06, + "loss": 1.9717, + "step": 56125 + }, + { + "epoch": 17.09, + "learning_rate": 5.8283105022831055e-06, + "loss": 1.9157, + "step": 56150 + }, + { + "epoch": 17.1, + "learning_rate": 5.813089802130899e-06, + "loss": 1.9095, + "step": 56175 + }, + { + "epoch": 17.11, + "learning_rate": 5.797869101978691e-06, + "loss": 1.9038, + "step": 56200 + }, + { + "epoch": 17.12, + "learning_rate": 5.782648401826485e-06, + "loss": 1.9096, + "step": 56225 + }, + { + "epoch": 17.12, + "learning_rate": 5.767427701674278e-06, + "loss": 1.9724, + "step": 56250 + }, + { + "epoch": 17.13, + "learning_rate": 5.752207001522071e-06, + "loss": 1.9575, + "step": 56275 + }, + { + "epoch": 17.14, + "learning_rate": 5.736986301369863e-06, + "loss": 1.9643, + "step": 56300 + }, + { + "epoch": 17.15, + "learning_rate": 5.721765601217657e-06, + "loss": 1.9695, + "step": 56325 + }, + { + "epoch": 17.15, + "learning_rate": 5.70654490106545e-06, + "loss": 1.9695, + "step": 56350 + }, + { + "epoch": 17.16, + "learning_rate": 5.691324200913243e-06, + "loss": 1.8842, + "step": 56375 + }, + { + "epoch": 17.17, + "learning_rate": 5.6761035007610355e-06, + "loss": 1.9068, + "step": 56400 + }, + { + "epoch": 17.18, + "learning_rate": 5.660882800608829e-06, + "loss": 1.8677, + "step": 56425 + }, + { + "epoch": 17.18, + "learning_rate": 5.645662100456621e-06, + "loss": 1.8634, + "step": 56450 + }, + { + "epoch": 17.19, + "learning_rate": 5.630441400304415e-06, + "loss": 1.9824, + "step": 56475 + }, + { + "epoch": 17.2, + "learning_rate": 5.615220700152208e-06, + "loss": 1.9314, + "step": 56500 + }, + { + "epoch": 17.21, + "learning_rate": 5.600000000000001e-06, + "loss": 1.9477, + "step": 56525 + }, + { + "epoch": 17.21, + "learning_rate": 5.5847792998477934e-06, + "loss": 1.9666, + "step": 56550 + }, + { + "epoch": 17.22, + "learning_rate": 5.569558599695587e-06, + "loss": 1.8937, + "step": 56575 + }, + { + "epoch": 17.23, + "learning_rate": 5.554337899543379e-06, + "loss": 1.9245, + "step": 56600 + }, + { + "epoch": 17.24, + "learning_rate": 5.539117199391173e-06, + "loss": 1.864, + "step": 56625 + }, + { + "epoch": 17.25, + "learning_rate": 5.523896499238966e-06, + "loss": 1.9335, + "step": 56650 + }, + { + "epoch": 17.25, + "learning_rate": 5.508675799086759e-06, + "loss": 1.9818, + "step": 56675 + }, + { + "epoch": 17.26, + "learning_rate": 5.493455098934551e-06, + "loss": 1.8967, + "step": 56700 + }, + { + "epoch": 17.27, + "learning_rate": 5.478234398782345e-06, + "loss": 1.8585, + "step": 56725 + }, + { + "epoch": 17.28, + "learning_rate": 5.463013698630137e-06, + "loss": 1.9773, + "step": 56750 + }, + { + "epoch": 17.28, + "learning_rate": 5.447792998477931e-06, + "loss": 1.9371, + "step": 56775 + }, + { + "epoch": 17.29, + "learning_rate": 5.4325722983257235e-06, + "loss": 1.9483, + "step": 56800 + }, + { + "epoch": 17.3, + "learning_rate": 5.417351598173517e-06, + "loss": 1.8756, + "step": 56825 + }, + { + "epoch": 17.31, + "learning_rate": 5.402130898021309e-06, + "loss": 1.7767, + "step": 56850 + }, + { + "epoch": 17.31, + "learning_rate": 5.386910197869103e-06, + "loss": 1.952, + "step": 56875 + }, + { + "epoch": 17.32, + "learning_rate": 5.371689497716896e-06, + "loss": 1.9177, + "step": 56900 + }, + { + "epoch": 17.33, + "learning_rate": 5.3564687975646886e-06, + "loss": 1.9437, + "step": 56925 + }, + { + "epoch": 17.34, + "learning_rate": 5.341248097412481e-06, + "loss": 1.9826, + "step": 56950 + }, + { + "epoch": 17.34, + "learning_rate": 5.326027397260275e-06, + "loss": 1.944, + "step": 56975 + }, + { + "epoch": 17.35, + "learning_rate": 5.310806697108067e-06, + "loss": 1.9131, + "step": 57000 + }, + { + "epoch": 17.36, + "learning_rate": 5.295585996955861e-06, + "loss": 1.917, + "step": 57025 + }, + { + "epoch": 17.37, + "learning_rate": 5.280365296803654e-06, + "loss": 1.9166, + "step": 57050 + }, + { + "epoch": 17.37, + "learning_rate": 5.265144596651447e-06, + "loss": 1.8458, + "step": 57075 + }, + { + "epoch": 17.38, + "learning_rate": 5.249923896499239e-06, + "loss": 1.9775, + "step": 57100 + }, + { + "epoch": 17.39, + "learning_rate": 5.234703196347033e-06, + "loss": 1.9948, + "step": 57125 + }, + { + "epoch": 17.4, + "learning_rate": 5.219482496194825e-06, + "loss": 1.9398, + "step": 57150 + }, + { + "epoch": 17.4, + "learning_rate": 5.204261796042619e-06, + "loss": 1.9874, + "step": 57175 + }, + { + "epoch": 17.41, + "learning_rate": 5.1890410958904115e-06, + "loss": 1.9431, + "step": 57200 + }, + { + "epoch": 17.42, + "learning_rate": 5.1738203957382035e-06, + "loss": 1.9826, + "step": 57225 + }, + { + "epoch": 17.43, + "learning_rate": 5.158599695585997e-06, + "loss": 1.8329, + "step": 57250 + }, + { + "epoch": 17.44, + "learning_rate": 5.14337899543379e-06, + "loss": 1.9459, + "step": 57275 + }, + { + "epoch": 17.44, + "learning_rate": 5.128158295281584e-06, + "loss": 1.8475, + "step": 57300 + }, + { + "epoch": 17.45, + "learning_rate": 5.112937595129376e-06, + "loss": 1.9633, + "step": 57325 + }, + { + "epoch": 17.46, + "learning_rate": 5.097716894977169e-06, + "loss": 1.879, + "step": 57350 + }, + { + "epoch": 17.47, + "learning_rate": 5.082496194824962e-06, + "loss": 1.9553, + "step": 57375 + }, + { + "epoch": 17.47, + "learning_rate": 5.067275494672755e-06, + "loss": 1.9861, + "step": 57400 + }, + { + "epoch": 17.48, + "learning_rate": 5.052054794520548e-06, + "loss": 1.9381, + "step": 57425 + }, + { + "epoch": 17.49, + "learning_rate": 5.036834094368342e-06, + "loss": 1.8685, + "step": 57450 + }, + { + "epoch": 17.5, + "learning_rate": 5.021613394216134e-06, + "loss": 1.8962, + "step": 57475 + }, + { + "epoch": 17.5, + "learning_rate": 5.006392694063927e-06, + "loss": 1.879, + "step": 57500 + }, + { + "epoch": 17.51, + "learning_rate": 4.99117199391172e-06, + "loss": 2.0326, + "step": 57525 + }, + { + "epoch": 17.52, + "learning_rate": 4.975951293759513e-06, + "loss": 1.9831, + "step": 57550 + }, + { + "epoch": 17.53, + "learning_rate": 4.960730593607307e-06, + "loss": 2.0538, + "step": 57575 + }, + { + "epoch": 17.53, + "learning_rate": 4.9455098934550995e-06, + "loss": 1.9686, + "step": 57600 + }, + { + "epoch": 17.54, + "learning_rate": 4.930289193302892e-06, + "loss": 1.9546, + "step": 57625 + }, + { + "epoch": 17.55, + "learning_rate": 4.915068493150685e-06, + "loss": 2.0175, + "step": 57650 + }, + { + "epoch": 17.56, + "learning_rate": 4.899847792998478e-06, + "loss": 1.9396, + "step": 57675 + }, + { + "epoch": 17.56, + "learning_rate": 4.884627092846272e-06, + "loss": 1.8997, + "step": 57700 + }, + { + "epoch": 17.57, + "learning_rate": 4.8694063926940645e-06, + "loss": 1.9828, + "step": 57725 + }, + { + "epoch": 17.58, + "learning_rate": 4.854185692541857e-06, + "loss": 2.0147, + "step": 57750 + }, + { + "epoch": 17.59, + "learning_rate": 4.83896499238965e-06, + "loss": 2.0053, + "step": 57775 + }, + { + "epoch": 17.6, + "learning_rate": 4.823744292237443e-06, + "loss": 1.9466, + "step": 57800 + }, + { + "epoch": 17.6, + "learning_rate": 4.808523592085237e-06, + "loss": 2.0544, + "step": 57825 + }, + { + "epoch": 17.61, + "learning_rate": 4.793911719939118e-06, + "loss": 2.0409, + "step": 57850 + }, + { + "epoch": 17.62, + "learning_rate": 4.778691019786911e-06, + "loss": 1.9398, + "step": 57875 + }, + { + "epoch": 17.63, + "learning_rate": 4.763470319634704e-06, + "loss": 1.961, + "step": 57900 + }, + { + "epoch": 17.63, + "learning_rate": 4.7482496194824965e-06, + "loss": 1.9101, + "step": 57925 + }, + { + "epoch": 17.64, + "learning_rate": 4.73302891933029e-06, + "loss": 1.9418, + "step": 57950 + }, + { + "epoch": 17.65, + "learning_rate": 4.717808219178083e-06, + "loss": 2.0025, + "step": 57975 + }, + { + "epoch": 17.66, + "learning_rate": 4.702587519025876e-06, + "loss": 1.8889, + "step": 58000 + }, + { + "epoch": 17.66, + "learning_rate": 4.687366818873669e-06, + "loss": 1.9201, + "step": 58025 + }, + { + "epoch": 17.67, + "learning_rate": 4.6721461187214615e-06, + "loss": 1.8512, + "step": 58050 + }, + { + "epoch": 17.68, + "learning_rate": 4.656925418569254e-06, + "loss": 1.9532, + "step": 58075 + }, + { + "epoch": 17.69, + "learning_rate": 4.641704718417048e-06, + "loss": 1.9218, + "step": 58100 + }, + { + "epoch": 17.69, + "learning_rate": 4.626484018264841e-06, + "loss": 1.8991, + "step": 58125 + }, + { + "epoch": 17.7, + "learning_rate": 4.611263318112634e-06, + "loss": 1.8098, + "step": 58150 + }, + { + "epoch": 17.71, + "learning_rate": 4.5960426179604265e-06, + "loss": 1.8689, + "step": 58175 + }, + { + "epoch": 17.72, + "learning_rate": 4.580821917808219e-06, + "loss": 1.9573, + "step": 58200 + }, + { + "epoch": 17.72, + "learning_rate": 4.565601217656013e-06, + "loss": 1.9308, + "step": 58225 + }, + { + "epoch": 17.73, + "learning_rate": 4.550380517503806e-06, + "loss": 1.9812, + "step": 58250 + }, + { + "epoch": 17.74, + "learning_rate": 4.535159817351599e-06, + "loss": 1.9183, + "step": 58275 + }, + { + "epoch": 17.75, + "learning_rate": 4.519939117199392e-06, + "loss": 1.9293, + "step": 58300 + }, + { + "epoch": 17.75, + "learning_rate": 4.5047184170471844e-06, + "loss": 1.8619, + "step": 58325 + }, + { + "epoch": 17.76, + "learning_rate": 4.489497716894978e-06, + "loss": 1.9365, + "step": 58350 + }, + { + "epoch": 17.77, + "learning_rate": 4.474277016742771e-06, + "loss": 1.961, + "step": 58375 + }, + { + "epoch": 17.78, + "learning_rate": 4.459056316590564e-06, + "loss": 1.9869, + "step": 58400 + }, + { + "epoch": 17.79, + "learning_rate": 4.443835616438357e-06, + "loss": 1.9984, + "step": 58425 + }, + { + "epoch": 17.79, + "learning_rate": 4.4286149162861495e-06, + "loss": 1.9806, + "step": 58450 + }, + { + "epoch": 17.8, + "learning_rate": 4.413394216133942e-06, + "loss": 1.9724, + "step": 58475 + }, + { + "epoch": 17.81, + "learning_rate": 4.398173515981736e-06, + "loss": 1.9639, + "step": 58500 + }, + { + "epoch": 17.82, + "learning_rate": 4.382952815829529e-06, + "loss": 2.0249, + "step": 58525 + }, + { + "epoch": 17.82, + "learning_rate": 4.367732115677322e-06, + "loss": 1.9433, + "step": 58550 + }, + { + "epoch": 17.83, + "learning_rate": 4.3525114155251145e-06, + "loss": 1.9502, + "step": 58575 + }, + { + "epoch": 17.84, + "learning_rate": 4.337290715372907e-06, + "loss": 1.996, + "step": 58600 + }, + { + "epoch": 17.85, + "learning_rate": 4.322070015220701e-06, + "loss": 1.9632, + "step": 58625 + }, + { + "epoch": 17.85, + "learning_rate": 4.306849315068494e-06, + "loss": 1.9467, + "step": 58650 + }, + { + "epoch": 17.86, + "learning_rate": 4.291628614916287e-06, + "loss": 1.915, + "step": 58675 + }, + { + "epoch": 17.87, + "learning_rate": 4.2764079147640796e-06, + "loss": 1.9257, + "step": 58700 + }, + { + "epoch": 17.88, + "learning_rate": 4.261187214611872e-06, + "loss": 1.9646, + "step": 58725 + }, + { + "epoch": 17.88, + "learning_rate": 4.245966514459665e-06, + "loss": 1.9155, + "step": 58750 + }, + { + "epoch": 17.89, + "learning_rate": 4.230745814307459e-06, + "loss": 1.8798, + "step": 58775 + }, + { + "epoch": 17.9, + "learning_rate": 4.215525114155252e-06, + "loss": 1.9897, + "step": 58800 + }, + { + "epoch": 17.91, + "learning_rate": 4.200304414003045e-06, + "loss": 1.9645, + "step": 58825 + }, + { + "epoch": 17.91, + "learning_rate": 4.1850837138508375e-06, + "loss": 1.9438, + "step": 58850 + }, + { + "epoch": 17.92, + "learning_rate": 4.16986301369863e-06, + "loss": 1.986, + "step": 58875 + }, + { + "epoch": 17.93, + "learning_rate": 4.154642313546424e-06, + "loss": 1.9541, + "step": 58900 + }, + { + "epoch": 17.94, + "learning_rate": 4.139421613394217e-06, + "loss": 1.9109, + "step": 58925 + }, + { + "epoch": 17.95, + "learning_rate": 4.12420091324201e-06, + "loss": 1.9129, + "step": 58950 + }, + { + "epoch": 17.95, + "learning_rate": 4.1089802130898025e-06, + "loss": 1.9034, + "step": 58975 + }, + { + "epoch": 17.96, + "learning_rate": 4.093759512937595e-06, + "loss": 2.0131, + "step": 59000 + }, + { + "epoch": 17.97, + "learning_rate": 4.078538812785389e-06, + "loss": 1.8497, + "step": 59025 + }, + { + "epoch": 17.98, + "learning_rate": 4.063318112633182e-06, + "loss": 1.9809, + "step": 59050 + }, + { + "epoch": 17.98, + "learning_rate": 4.048097412480975e-06, + "loss": 1.9217, + "step": 59075 + }, + { + "epoch": 17.99, + "learning_rate": 4.0328767123287676e-06, + "loss": 1.8929, + "step": 59100 + }, + { + "epoch": 18.0, + "learning_rate": 4.01765601217656e-06, + "loss": 1.9193, + "step": 59125 + }, + { + "epoch": 18.01, + "learning_rate": 4.002435312024353e-06, + "loss": 1.9315, + "step": 59150 + }, + { + "epoch": 18.01, + "learning_rate": 3.987214611872147e-06, + "loss": 1.8903, + "step": 59175 + }, + { + "epoch": 18.02, + "learning_rate": 3.97199391171994e-06, + "loss": 1.9688, + "step": 59200 + }, + { + "epoch": 18.03, + "learning_rate": 3.956773211567733e-06, + "loss": 1.9036, + "step": 59225 + }, + { + "epoch": 18.04, + "learning_rate": 3.9415525114155254e-06, + "loss": 1.9615, + "step": 59250 + }, + { + "epoch": 18.04, + "learning_rate": 3.926331811263318e-06, + "loss": 1.8522, + "step": 59275 + }, + { + "epoch": 18.05, + "learning_rate": 3.911111111111112e-06, + "loss": 1.9418, + "step": 59300 + }, + { + "epoch": 18.06, + "learning_rate": 3.895890410958905e-06, + "loss": 1.8732, + "step": 59325 + }, + { + "epoch": 18.07, + "learning_rate": 3.880669710806698e-06, + "loss": 1.9244, + "step": 59350 + }, + { + "epoch": 18.07, + "learning_rate": 3.8654490106544905e-06, + "loss": 1.9065, + "step": 59375 + }, + { + "epoch": 18.08, + "learning_rate": 3.850228310502283e-06, + "loss": 1.9452, + "step": 59400 + }, + { + "epoch": 18.09, + "learning_rate": 3.835007610350077e-06, + "loss": 1.9752, + "step": 59425 + }, + { + "epoch": 18.1, + "learning_rate": 3.81978691019787e-06, + "loss": 1.9594, + "step": 59450 + }, + { + "epoch": 18.11, + "learning_rate": 3.8045662100456627e-06, + "loss": 1.9121, + "step": 59475 + }, + { + "epoch": 18.11, + "learning_rate": 3.7893455098934555e-06, + "loss": 1.975, + "step": 59500 + }, + { + "epoch": 18.12, + "learning_rate": 3.774124809741249e-06, + "loss": 1.8229, + "step": 59525 + }, + { + "epoch": 18.13, + "learning_rate": 3.7589041095890416e-06, + "loss": 1.9183, + "step": 59550 + }, + { + "epoch": 18.14, + "learning_rate": 3.7436834094368345e-06, + "loss": 1.824, + "step": 59575 + }, + { + "epoch": 18.14, + "learning_rate": 3.7284627092846277e-06, + "loss": 1.9862, + "step": 59600 + }, + { + "epoch": 18.15, + "learning_rate": 3.7132420091324206e-06, + "loss": 1.9525, + "step": 59625 + }, + { + "epoch": 18.16, + "learning_rate": 3.6980213089802134e-06, + "loss": 1.9417, + "step": 59650 + }, + { + "epoch": 18.17, + "learning_rate": 3.6828006088280067e-06, + "loss": 1.9509, + "step": 59675 + }, + { + "epoch": 18.17, + "learning_rate": 3.6675799086757995e-06, + "loss": 1.9492, + "step": 59700 + }, + { + "epoch": 18.18, + "learning_rate": 3.652359208523593e-06, + "loss": 1.947, + "step": 59725 + }, + { + "epoch": 18.19, + "learning_rate": 3.6371385083713856e-06, + "loss": 1.8975, + "step": 59750 + }, + { + "epoch": 18.2, + "learning_rate": 3.6219178082191785e-06, + "loss": 1.8844, + "step": 59775 + }, + { + "epoch": 18.2, + "learning_rate": 3.6066971080669717e-06, + "loss": 1.9304, + "step": 59800 + }, + { + "epoch": 18.21, + "learning_rate": 3.5914764079147646e-06, + "loss": 2.0124, + "step": 59825 + }, + { + "epoch": 18.22, + "learning_rate": 3.5762557077625574e-06, + "loss": 1.876, + "step": 59850 + }, + { + "epoch": 18.23, + "learning_rate": 3.5610350076103507e-06, + "loss": 1.893, + "step": 59875 + }, + { + "epoch": 18.23, + "learning_rate": 3.5458143074581435e-06, + "loss": 1.9703, + "step": 59900 + }, + { + "epoch": 18.24, + "learning_rate": 3.5305936073059368e-06, + "loss": 1.9334, + "step": 59925 + }, + { + "epoch": 18.25, + "learning_rate": 3.5153729071537296e-06, + "loss": 1.9834, + "step": 59950 + }, + { + "epoch": 18.26, + "learning_rate": 3.5001522070015225e-06, + "loss": 1.868, + "step": 59975 + }, + { + "epoch": 18.26, + "learning_rate": 3.4849315068493157e-06, + "loss": 1.9806, + "step": 60000 + }, + { + "epoch": 18.27, + "learning_rate": 3.4697108066971086e-06, + "loss": 1.9378, + "step": 60025 + }, + { + "epoch": 18.28, + "learning_rate": 3.4544901065449014e-06, + "loss": 1.8521, + "step": 60050 + }, + { + "epoch": 18.29, + "learning_rate": 3.4392694063926947e-06, + "loss": 1.9511, + "step": 60075 + }, + { + "epoch": 18.3, + "learning_rate": 3.4240487062404875e-06, + "loss": 1.8839, + "step": 60100 + }, + { + "epoch": 18.3, + "learning_rate": 3.4088280060882808e-06, + "loss": 1.954, + "step": 60125 + }, + { + "epoch": 18.31, + "learning_rate": 3.3936073059360736e-06, + "loss": 1.9526, + "step": 60150 + }, + { + "epoch": 18.32, + "learning_rate": 3.3783866057838664e-06, + "loss": 1.8578, + "step": 60175 + }, + { + "epoch": 18.33, + "learning_rate": 3.3631659056316597e-06, + "loss": 1.9272, + "step": 60200 + }, + { + "epoch": 18.33, + "learning_rate": 3.3479452054794526e-06, + "loss": 1.8606, + "step": 60225 + }, + { + "epoch": 18.34, + "learning_rate": 3.3327245053272454e-06, + "loss": 1.8964, + "step": 60250 + }, + { + "epoch": 18.35, + "learning_rate": 3.3175038051750387e-06, + "loss": 1.9205, + "step": 60275 + }, + { + "epoch": 18.36, + "learning_rate": 3.3022831050228315e-06, + "loss": 1.8762, + "step": 60300 + }, + { + "epoch": 18.36, + "learning_rate": 3.2870624048706248e-06, + "loss": 1.93, + "step": 60325 + }, + { + "epoch": 18.37, + "learning_rate": 3.2718417047184176e-06, + "loss": 2.0007, + "step": 60350 + }, + { + "epoch": 18.38, + "learning_rate": 3.2566210045662104e-06, + "loss": 1.935, + "step": 60375 + }, + { + "epoch": 18.39, + "learning_rate": 3.2414003044140037e-06, + "loss": 1.9675, + "step": 60400 + }, + { + "epoch": 18.39, + "learning_rate": 3.2261796042617965e-06, + "loss": 2.0031, + "step": 60425 + }, + { + "epoch": 18.4, + "learning_rate": 3.2109589041095894e-06, + "loss": 1.9624, + "step": 60450 + }, + { + "epoch": 18.41, + "learning_rate": 3.1957382039573826e-06, + "loss": 1.9126, + "step": 60475 + }, + { + "epoch": 18.42, + "learning_rate": 3.1805175038051755e-06, + "loss": 1.9268, + "step": 60500 + }, + { + "epoch": 18.42, + "learning_rate": 3.1652968036529687e-06, + "loss": 1.8126, + "step": 60525 + }, + { + "epoch": 18.43, + "learning_rate": 3.1500761035007616e-06, + "loss": 1.9809, + "step": 60550 + }, + { + "epoch": 18.44, + "learning_rate": 3.1348554033485544e-06, + "loss": 1.8864, + "step": 60575 + }, + { + "epoch": 18.45, + "learning_rate": 3.1196347031963477e-06, + "loss": 1.9743, + "step": 60600 + }, + { + "epoch": 18.46, + "learning_rate": 3.1044140030441405e-06, + "loss": 2.0185, + "step": 60625 + }, + { + "epoch": 18.46, + "learning_rate": 3.0891933028919334e-06, + "loss": 1.9321, + "step": 60650 + }, + { + "epoch": 18.47, + "learning_rate": 3.0739726027397266e-06, + "loss": 1.9233, + "step": 60675 + }, + { + "epoch": 18.48, + "learning_rate": 3.0587519025875195e-06, + "loss": 2.0277, + "step": 60700 + }, + { + "epoch": 18.49, + "learning_rate": 3.0435312024353123e-06, + "loss": 1.98, + "step": 60725 + }, + { + "epoch": 18.49, + "learning_rate": 3.0283105022831056e-06, + "loss": 1.9194, + "step": 60750 + }, + { + "epoch": 18.5, + "learning_rate": 3.0130898021308984e-06, + "loss": 1.9477, + "step": 60775 + }, + { + "epoch": 18.51, + "learning_rate": 2.9978691019786917e-06, + "loss": 1.881, + "step": 60800 + }, + { + "epoch": 18.52, + "learning_rate": 2.9826484018264845e-06, + "loss": 1.9989, + "step": 60825 + }, + { + "epoch": 18.52, + "learning_rate": 2.9680365296803653e-06, + "loss": 1.941, + "step": 60850 + }, + { + "epoch": 18.53, + "learning_rate": 2.9528158295281586e-06, + "loss": 1.9003, + "step": 60875 + }, + { + "epoch": 18.54, + "learning_rate": 2.9375951293759514e-06, + "loss": 1.8749, + "step": 60900 + }, + { + "epoch": 18.55, + "learning_rate": 2.9223744292237442e-06, + "loss": 1.8517, + "step": 60925 + }, + { + "epoch": 18.55, + "learning_rate": 2.9071537290715375e-06, + "loss": 1.9296, + "step": 60950 + }, + { + "epoch": 18.56, + "learning_rate": 2.8919330289193303e-06, + "loss": 1.8825, + "step": 60975 + }, + { + "epoch": 18.57, + "learning_rate": 2.876712328767123e-06, + "loss": 1.9291, + "step": 61000 + }, + { + "epoch": 18.58, + "learning_rate": 2.8614916286149164e-06, + "loss": 1.9779, + "step": 61025 + }, + { + "epoch": 18.58, + "learning_rate": 2.8462709284627093e-06, + "loss": 1.9288, + "step": 61050 + }, + { + "epoch": 18.59, + "learning_rate": 2.8310502283105025e-06, + "loss": 1.9261, + "step": 61075 + }, + { + "epoch": 18.6, + "learning_rate": 2.8158295281582954e-06, + "loss": 1.939, + "step": 61100 + }, + { + "epoch": 18.61, + "learning_rate": 2.8006088280060882e-06, + "loss": 2.0491, + "step": 61125 + }, + { + "epoch": 18.61, + "learning_rate": 2.7853881278538815e-06, + "loss": 1.9455, + "step": 61150 + }, + { + "epoch": 18.62, + "learning_rate": 2.7701674277016743e-06, + "loss": 1.913, + "step": 61175 + }, + { + "epoch": 18.63, + "learning_rate": 2.754946727549467e-06, + "loss": 1.9188, + "step": 61200 + }, + { + "epoch": 18.64, + "learning_rate": 2.7397260273972604e-06, + "loss": 1.9576, + "step": 61225 + }, + { + "epoch": 18.65, + "learning_rate": 2.7245053272450533e-06, + "loss": 1.8853, + "step": 61250 + }, + { + "epoch": 18.65, + "learning_rate": 2.7092846270928465e-06, + "loss": 1.9414, + "step": 61275 + }, + { + "epoch": 18.66, + "learning_rate": 2.6940639269406394e-06, + "loss": 1.8567, + "step": 61300 + }, + { + "epoch": 18.67, + "learning_rate": 2.6788432267884322e-06, + "loss": 1.9412, + "step": 61325 + }, + { + "epoch": 18.68, + "learning_rate": 2.6636225266362255e-06, + "loss": 1.9092, + "step": 61350 + }, + { + "epoch": 18.68, + "learning_rate": 2.6484018264840183e-06, + "loss": 1.9909, + "step": 61375 + }, + { + "epoch": 18.69, + "learning_rate": 2.633181126331811e-06, + "loss": 1.9695, + "step": 61400 + }, + { + "epoch": 18.7, + "learning_rate": 2.6179604261796044e-06, + "loss": 1.9589, + "step": 61425 + }, + { + "epoch": 18.71, + "learning_rate": 2.6027397260273973e-06, + "loss": 1.9191, + "step": 61450 + }, + { + "epoch": 18.71, + "learning_rate": 2.5875190258751905e-06, + "loss": 1.893, + "step": 61475 + }, + { + "epoch": 18.72, + "learning_rate": 2.5722983257229834e-06, + "loss": 1.9381, + "step": 61500 + }, + { + "epoch": 18.73, + "learning_rate": 2.557077625570776e-06, + "loss": 1.9433, + "step": 61525 + }, + { + "epoch": 18.74, + "learning_rate": 2.5418569254185695e-06, + "loss": 1.8897, + "step": 61550 + }, + { + "epoch": 18.74, + "learning_rate": 2.5266362252663623e-06, + "loss": 1.977, + "step": 61575 + }, + { + "epoch": 18.75, + "learning_rate": 2.511415525114155e-06, + "loss": 1.8949, + "step": 61600 + }, + { + "epoch": 18.76, + "learning_rate": 2.4961948249619484e-06, + "loss": 1.9551, + "step": 61625 + }, + { + "epoch": 18.77, + "learning_rate": 2.4809741248097413e-06, + "loss": 1.8667, + "step": 61650 + }, + { + "epoch": 18.77, + "learning_rate": 2.4657534246575345e-06, + "loss": 1.8624, + "step": 61675 + }, + { + "epoch": 18.78, + "learning_rate": 2.4505327245053274e-06, + "loss": 1.9746, + "step": 61700 + }, + { + "epoch": 18.79, + "learning_rate": 2.43531202435312e-06, + "loss": 1.9408, + "step": 61725 + }, + { + "epoch": 18.8, + "learning_rate": 2.4200913242009135e-06, + "loss": 1.9824, + "step": 61750 + }, + { + "epoch": 18.81, + "learning_rate": 2.4048706240487063e-06, + "loss": 1.8111, + "step": 61775 + }, + { + "epoch": 18.81, + "learning_rate": 2.389649923896499e-06, + "loss": 2.0112, + "step": 61800 + }, + { + "epoch": 18.82, + "learning_rate": 2.3744292237442924e-06, + "loss": 1.8705, + "step": 61825 + }, + { + "epoch": 18.83, + "learning_rate": 2.3592085235920852e-06, + "loss": 1.9911, + "step": 61850 + }, + { + "epoch": 18.84, + "learning_rate": 2.343987823439878e-06, + "loss": 1.9958, + "step": 61875 + }, + { + "epoch": 18.84, + "learning_rate": 2.3287671232876713e-06, + "loss": 1.9827, + "step": 61900 + }, + { + "epoch": 18.85, + "learning_rate": 2.313546423135464e-06, + "loss": 1.8441, + "step": 61925 + }, + { + "epoch": 18.86, + "learning_rate": 2.2983257229832575e-06, + "loss": 1.9146, + "step": 61950 + }, + { + "epoch": 18.87, + "learning_rate": 2.2831050228310503e-06, + "loss": 1.9028, + "step": 61975 + }, + { + "epoch": 18.87, + "learning_rate": 2.267884322678843e-06, + "loss": 1.9987, + "step": 62000 + }, + { + "epoch": 18.88, + "learning_rate": 2.2526636225266364e-06, + "loss": 1.8399, + "step": 62025 + }, + { + "epoch": 18.89, + "learning_rate": 2.2374429223744292e-06, + "loss": 1.9116, + "step": 62050 + }, + { + "epoch": 18.9, + "learning_rate": 2.222222222222222e-06, + "loss": 1.9387, + "step": 62075 + }, + { + "epoch": 18.9, + "learning_rate": 2.2070015220700153e-06, + "loss": 1.9983, + "step": 62100 + }, + { + "epoch": 18.91, + "learning_rate": 2.191780821917808e-06, + "loss": 1.9453, + "step": 62125 + }, + { + "epoch": 18.92, + "learning_rate": 2.1765601217656014e-06, + "loss": 1.952, + "step": 62150 + }, + { + "epoch": 18.93, + "learning_rate": 2.1613394216133943e-06, + "loss": 1.9109, + "step": 62175 + }, + { + "epoch": 18.93, + "learning_rate": 2.146118721461187e-06, + "loss": 1.8966, + "step": 62200 + }, + { + "epoch": 18.94, + "learning_rate": 2.1308980213089804e-06, + "loss": 1.9123, + "step": 62225 + }, + { + "epoch": 18.95, + "learning_rate": 2.1156773211567732e-06, + "loss": 1.8517, + "step": 62250 + }, + { + "epoch": 18.96, + "learning_rate": 2.100456621004566e-06, + "loss": 1.9353, + "step": 62275 + }, + { + "epoch": 18.96, + "learning_rate": 2.0852359208523593e-06, + "loss": 1.8875, + "step": 62300 + }, + { + "epoch": 18.97, + "learning_rate": 2.070015220700152e-06, + "loss": 1.9581, + "step": 62325 + }, + { + "epoch": 18.98, + "learning_rate": 2.0547945205479454e-06, + "loss": 1.925, + "step": 62350 + }, + { + "epoch": 18.99, + "learning_rate": 2.0395738203957383e-06, + "loss": 1.9363, + "step": 62375 + }, + { + "epoch": 19.0, + "learning_rate": 2.024353120243531e-06, + "loss": 1.9514, + "step": 62400 + }, + { + "epoch": 19.0, + "learning_rate": 2.0091324200913244e-06, + "loss": 1.8421, + "step": 62425 + }, + { + "epoch": 19.01, + "learning_rate": 1.9939117199391172e-06, + "loss": 2.0036, + "step": 62450 + }, + { + "epoch": 19.02, + "learning_rate": 1.97869101978691e-06, + "loss": 1.8492, + "step": 62475 + }, + { + "epoch": 19.03, + "learning_rate": 1.9634703196347033e-06, + "loss": 1.8921, + "step": 62500 + }, + { + "epoch": 19.03, + "learning_rate": 1.948249619482496e-06, + "loss": 1.9476, + "step": 62525 + }, + { + "epoch": 19.04, + "learning_rate": 1.9330289193302894e-06, + "loss": 1.8792, + "step": 62550 + }, + { + "epoch": 19.05, + "learning_rate": 1.9178082191780823e-06, + "loss": 1.8571, + "step": 62575 + }, + { + "epoch": 19.06, + "learning_rate": 1.9025875190258753e-06, + "loss": 1.9357, + "step": 62600 + }, + { + "epoch": 19.06, + "learning_rate": 1.8873668188736682e-06, + "loss": 1.8947, + "step": 62625 + }, + { + "epoch": 19.07, + "learning_rate": 1.8721461187214612e-06, + "loss": 1.8989, + "step": 62650 + }, + { + "epoch": 19.08, + "learning_rate": 1.8569254185692543e-06, + "loss": 1.997, + "step": 62675 + }, + { + "epoch": 19.09, + "learning_rate": 1.8417047184170473e-06, + "loss": 1.9384, + "step": 62700 + }, + { + "epoch": 19.09, + "learning_rate": 1.8264840182648401e-06, + "loss": 1.8858, + "step": 62725 + }, + { + "epoch": 19.1, + "learning_rate": 1.8112633181126332e-06, + "loss": 1.8676, + "step": 62750 + }, + { + "epoch": 19.11, + "learning_rate": 1.7960426179604263e-06, + "loss": 1.9975, + "step": 62775 + }, + { + "epoch": 19.12, + "learning_rate": 1.7808219178082193e-06, + "loss": 1.9733, + "step": 62800 + }, + { + "epoch": 19.12, + "learning_rate": 1.7656012176560121e-06, + "loss": 1.9497, + "step": 62825 + }, + { + "epoch": 19.13, + "learning_rate": 1.7503805175038052e-06, + "loss": 1.8918, + "step": 62850 + }, + { + "epoch": 19.14, + "learning_rate": 1.7351598173515982e-06, + "loss": 1.9333, + "step": 62875 + }, + { + "epoch": 19.15, + "learning_rate": 1.7199391171993913e-06, + "loss": 1.9743, + "step": 62900 + }, + { + "epoch": 19.16, + "learning_rate": 1.7047184170471841e-06, + "loss": 1.8864, + "step": 62925 + }, + { + "epoch": 19.16, + "learning_rate": 1.6894977168949772e-06, + "loss": 1.9328, + "step": 62950 + }, + { + "epoch": 19.17, + "learning_rate": 1.6742770167427702e-06, + "loss": 1.9335, + "step": 62975 + }, + { + "epoch": 19.18, + "learning_rate": 1.6596651445966516e-06, + "loss": 1.9752, + "step": 63000 + }, + { + "epoch": 19.19, + "learning_rate": 1.6444444444444447e-06, + "loss": 1.9317, + "step": 63025 + }, + { + "epoch": 19.19, + "learning_rate": 1.6292237442922375e-06, + "loss": 1.9205, + "step": 63050 + }, + { + "epoch": 19.2, + "learning_rate": 1.6140030441400306e-06, + "loss": 1.9616, + "step": 63075 + }, + { + "epoch": 19.21, + "learning_rate": 1.5987823439878236e-06, + "loss": 2.047, + "step": 63100 + }, + { + "epoch": 19.22, + "learning_rate": 1.5835616438356167e-06, + "loss": 1.8741, + "step": 63125 + }, + { + "epoch": 19.22, + "learning_rate": 1.5683409436834095e-06, + "loss": 1.9207, + "step": 63150 + }, + { + "epoch": 19.23, + "learning_rate": 1.5531202435312026e-06, + "loss": 1.8698, + "step": 63175 + }, + { + "epoch": 19.24, + "learning_rate": 1.5378995433789956e-06, + "loss": 2.013, + "step": 63200 + }, + { + "epoch": 19.25, + "learning_rate": 1.5226788432267887e-06, + "loss": 1.8767, + "step": 63225 + }, + { + "epoch": 19.25, + "learning_rate": 1.5074581430745815e-06, + "loss": 1.9414, + "step": 63250 + }, + { + "epoch": 19.26, + "learning_rate": 1.4922374429223746e-06, + "loss": 2.025, + "step": 63275 + }, + { + "epoch": 19.27, + "learning_rate": 1.4770167427701676e-06, + "loss": 1.9642, + "step": 63300 + }, + { + "epoch": 19.28, + "learning_rate": 1.4617960426179607e-06, + "loss": 1.9146, + "step": 63325 + }, + { + "epoch": 19.28, + "learning_rate": 1.4465753424657535e-06, + "loss": 1.8681, + "step": 63350 + }, + { + "epoch": 19.29, + "learning_rate": 1.4313546423135466e-06, + "loss": 1.8663, + "step": 63375 + }, + { + "epoch": 19.3, + "learning_rate": 1.4161339421613396e-06, + "loss": 1.9674, + "step": 63400 + }, + { + "epoch": 19.31, + "learning_rate": 1.4009132420091327e-06, + "loss": 1.9506, + "step": 63425 + }, + { + "epoch": 19.32, + "learning_rate": 1.3856925418569255e-06, + "loss": 1.9306, + "step": 63450 + }, + { + "epoch": 19.32, + "learning_rate": 1.3704718417047186e-06, + "loss": 1.9052, + "step": 63475 + }, + { + "epoch": 19.33, + "learning_rate": 1.3552511415525116e-06, + "loss": 1.9244, + "step": 63500 + }, + { + "epoch": 19.34, + "learning_rate": 1.3400304414003047e-06, + "loss": 1.9635, + "step": 63525 + }, + { + "epoch": 19.35, + "learning_rate": 1.3248097412480975e-06, + "loss": 1.9551, + "step": 63550 + }, + { + "epoch": 19.35, + "learning_rate": 1.3095890410958906e-06, + "loss": 1.9475, + "step": 63575 + }, + { + "epoch": 19.36, + "learning_rate": 1.2943683409436836e-06, + "loss": 1.9417, + "step": 63600 + }, + { + "epoch": 19.37, + "learning_rate": 1.2791476407914767e-06, + "loss": 1.9187, + "step": 63625 + }, + { + "epoch": 19.38, + "learning_rate": 1.2639269406392695e-06, + "loss": 2.0175, + "step": 63650 + }, + { + "epoch": 19.38, + "learning_rate": 1.2487062404870626e-06, + "loss": 1.9486, + "step": 63675 + }, + { + "epoch": 19.39, + "learning_rate": 1.2334855403348556e-06, + "loss": 1.8935, + "step": 63700 + }, + { + "epoch": 19.4, + "learning_rate": 1.2182648401826487e-06, + "loss": 1.9383, + "step": 63725 + }, + { + "epoch": 19.41, + "learning_rate": 1.2030441400304415e-06, + "loss": 1.9645, + "step": 63750 + }, + { + "epoch": 19.41, + "learning_rate": 1.1878234398782346e-06, + "loss": 1.9119, + "step": 63775 + }, + { + "epoch": 19.42, + "learning_rate": 1.1726027397260276e-06, + "loss": 1.8621, + "step": 63800 + }, + { + "epoch": 19.43, + "learning_rate": 1.1573820395738207e-06, + "loss": 1.7967, + "step": 63825 + }, + { + "epoch": 19.44, + "learning_rate": 1.1421613394216135e-06, + "loss": 1.9894, + "step": 63850 + }, + { + "epoch": 19.44, + "learning_rate": 1.1269406392694063e-06, + "loss": 1.8729, + "step": 63875 + }, + { + "epoch": 19.45, + "learning_rate": 1.1117199391171994e-06, + "loss": 1.9467, + "step": 63900 + }, + { + "epoch": 19.46, + "learning_rate": 1.0964992389649924e-06, + "loss": 1.9585, + "step": 63925 + }, + { + "epoch": 19.47, + "learning_rate": 1.0812785388127855e-06, + "loss": 1.9316, + "step": 63950 + }, + { + "epoch": 19.47, + "learning_rate": 1.0660578386605783e-06, + "loss": 1.8996, + "step": 63975 + }, + { + "epoch": 19.48, + "learning_rate": 1.0508371385083714e-06, + "loss": 1.8568, + "step": 64000 + }, + { + "epoch": 19.49, + "learning_rate": 1.0356164383561644e-06, + "loss": 1.924, + "step": 64025 + }, + { + "epoch": 19.5, + "learning_rate": 1.0203957382039575e-06, + "loss": 1.8947, + "step": 64050 + }, + { + "epoch": 19.51, + "learning_rate": 1.0051750380517503e-06, + "loss": 2.0517, + "step": 64075 + }, + { + "epoch": 19.51, + "learning_rate": 9.899543378995434e-07, + "loss": 2.0314, + "step": 64100 + }, + { + "epoch": 19.52, + "learning_rate": 9.747336377473364e-07, + "loss": 1.8407, + "step": 64125 + }, + { + "epoch": 19.53, + "learning_rate": 9.595129375951295e-07, + "loss": 2.0457, + "step": 64150 + }, + { + "epoch": 19.54, + "learning_rate": 9.442922374429224e-07, + "loss": 1.9252, + "step": 64175 + }, + { + "epoch": 19.54, + "learning_rate": 9.290715372907154e-07, + "loss": 1.9663, + "step": 64200 + }, + { + "epoch": 19.55, + "learning_rate": 9.138508371385084e-07, + "loss": 2.0035, + "step": 64225 + }, + { + "epoch": 19.56, + "learning_rate": 8.986301369863014e-07, + "loss": 1.8369, + "step": 64250 + }, + { + "epoch": 19.57, + "learning_rate": 8.834094368340944e-07, + "loss": 1.89, + "step": 64275 + }, + { + "epoch": 19.57, + "learning_rate": 8.681887366818874e-07, + "loss": 1.813, + "step": 64300 + }, + { + "epoch": 19.58, + "learning_rate": 8.529680365296804e-07, + "loss": 1.9395, + "step": 64325 + }, + { + "epoch": 19.59, + "learning_rate": 8.377473363774734e-07, + "loss": 1.9558, + "step": 64350 + }, + { + "epoch": 19.6, + "learning_rate": 8.225266362252664e-07, + "loss": 1.9236, + "step": 64375 + }, + { + "epoch": 19.6, + "learning_rate": 8.073059360730594e-07, + "loss": 1.8632, + "step": 64400 + }, + { + "epoch": 19.61, + "learning_rate": 7.920852359208524e-07, + "loss": 1.8458, + "step": 64425 + }, + { + "epoch": 19.62, + "learning_rate": 7.768645357686454e-07, + "loss": 1.8997, + "step": 64450 + }, + { + "epoch": 19.63, + "learning_rate": 7.616438356164384e-07, + "loss": 1.9626, + "step": 64475 + }, + { + "epoch": 19.63, + "learning_rate": 7.464231354642314e-07, + "loss": 1.8967, + "step": 64500 + }, + { + "epoch": 19.64, + "learning_rate": 7.312024353120244e-07, + "loss": 1.961, + "step": 64525 + }, + { + "epoch": 19.65, + "learning_rate": 7.159817351598174e-07, + "loss": 1.8946, + "step": 64550 + }, + { + "epoch": 19.66, + "learning_rate": 7.007610350076104e-07, + "loss": 1.9476, + "step": 64575 + }, + { + "epoch": 19.67, + "learning_rate": 6.855403348554034e-07, + "loss": 1.9353, + "step": 64600 + }, + { + "epoch": 19.67, + "learning_rate": 6.703196347031964e-07, + "loss": 1.9945, + "step": 64625 + }, + { + "epoch": 19.68, + "learning_rate": 6.550989345509894e-07, + "loss": 1.9124, + "step": 64650 + }, + { + "epoch": 19.69, + "learning_rate": 6.398782343987824e-07, + "loss": 2.0121, + "step": 64675 + }, + { + "epoch": 19.7, + "learning_rate": 6.246575342465755e-07, + "loss": 1.9495, + "step": 64700 + }, + { + "epoch": 19.7, + "learning_rate": 6.094368340943684e-07, + "loss": 1.9243, + "step": 64725 + }, + { + "epoch": 19.71, + "learning_rate": 5.942161339421615e-07, + "loss": 1.9793, + "step": 64750 + }, + { + "epoch": 19.72, + "learning_rate": 5.789954337899544e-07, + "loss": 1.8846, + "step": 64775 + }, + { + "epoch": 19.73, + "learning_rate": 5.637747336377475e-07, + "loss": 1.8872, + "step": 64800 + }, + { + "epoch": 19.73, + "learning_rate": 5.485540334855404e-07, + "loss": 1.9736, + "step": 64825 + }, + { + "epoch": 19.74, + "learning_rate": 5.333333333333335e-07, + "loss": 1.8818, + "step": 64850 + }, + { + "epoch": 19.75, + "learning_rate": 5.181126331811263e-07, + "loss": 1.9747, + "step": 64875 + }, + { + "epoch": 19.76, + "learning_rate": 5.028919330289193e-07, + "loss": 1.9198, + "step": 64900 + }, + { + "epoch": 19.76, + "learning_rate": 4.876712328767123e-07, + "loss": 1.8793, + "step": 64925 + }, + { + "epoch": 19.77, + "learning_rate": 4.7245053272450534e-07, + "loss": 1.9494, + "step": 64950 + }, + { + "epoch": 19.78, + "learning_rate": 4.5722983257229834e-07, + "loss": 1.9567, + "step": 64975 + }, + { + "epoch": 19.79, + "learning_rate": 4.4200913242009134e-07, + "loss": 1.8885, + "step": 65000 + }, + { + "epoch": 19.79, + "learning_rate": 4.2678843226788434e-07, + "loss": 1.946, + "step": 65025 + }, + { + "epoch": 19.8, + "learning_rate": 4.1156773211567733e-07, + "loss": 1.9554, + "step": 65050 + }, + { + "epoch": 19.81, + "learning_rate": 3.9634703196347033e-07, + "loss": 1.8447, + "step": 65075 + }, + { + "epoch": 19.82, + "learning_rate": 3.8112633181126333e-07, + "loss": 1.9562, + "step": 65100 + }, + { + "epoch": 19.82, + "learning_rate": 3.6590563165905633e-07, + "loss": 1.9531, + "step": 65125 + }, + { + "epoch": 19.83, + "learning_rate": 3.5068493150684933e-07, + "loss": 1.9695, + "step": 65150 + }, + { + "epoch": 19.84, + "learning_rate": 3.3546423135464233e-07, + "loss": 1.9478, + "step": 65175 + }, + { + "epoch": 19.85, + "learning_rate": 3.2024353120243533e-07, + "loss": 2.0431, + "step": 65200 + }, + { + "epoch": 19.86, + "learning_rate": 3.050228310502283e-07, + "loss": 1.8231, + "step": 65225 + }, + { + "epoch": 19.86, + "learning_rate": 2.898021308980213e-07, + "loss": 1.8677, + "step": 65250 + }, + { + "epoch": 19.87, + "learning_rate": 2.745814307458143e-07, + "loss": 2.0174, + "step": 65275 + }, + { + "epoch": 19.88, + "learning_rate": 2.593607305936073e-07, + "loss": 1.9269, + "step": 65300 + }, + { + "epoch": 19.89, + "learning_rate": 2.441400304414003e-07, + "loss": 1.8943, + "step": 65325 + }, + { + "epoch": 19.89, + "learning_rate": 2.2891933028919335e-07, + "loss": 1.9661, + "step": 65350 + }, + { + "epoch": 19.9, + "learning_rate": 2.1369863013698635e-07, + "loss": 1.9616, + "step": 65375 + }, + { + "epoch": 19.91, + "learning_rate": 1.984779299847793e-07, + "loss": 1.8594, + "step": 65400 + }, + { + "epoch": 19.92, + "learning_rate": 1.832572298325723e-07, + "loss": 1.8842, + "step": 65425 + }, + { + "epoch": 19.92, + "learning_rate": 1.680365296803653e-07, + "loss": 2.032, + "step": 65450 + }, + { + "epoch": 19.93, + "learning_rate": 1.5281582952815831e-07, + "loss": 1.8597, + "step": 65475 + }, + { + "epoch": 19.94, + "learning_rate": 1.375951293759513e-07, + "loss": 1.9084, + "step": 65500 + }, + { + "epoch": 19.95, + "learning_rate": 1.223744292237443e-07, + "loss": 1.8235, + "step": 65525 + }, + { + "epoch": 19.95, + "learning_rate": 1.0715372907153731e-07, + "loss": 1.8212, + "step": 65550 + }, + { + "epoch": 19.96, + "learning_rate": 9.193302891933031e-08, + "loss": 1.8892, + "step": 65575 + }, + { + "epoch": 19.97, + "learning_rate": 7.67123287671233e-08, + "loss": 1.9391, + "step": 65600 + }, + { + "epoch": 19.98, + "learning_rate": 6.149162861491629e-08, + "loss": 1.922, + "step": 65625 + }, + { + "epoch": 19.98, + "learning_rate": 4.687975646879757e-08, + "loss": 1.8743, + "step": 65650 + }, + { + "epoch": 19.99, + "learning_rate": 3.1659056316590565e-08, + "loss": 1.8869, + "step": 65675 + }, + { + "epoch": 20.0, + "learning_rate": 1.6438356164383564e-08, + "loss": 2.0741, + "step": 65700 + }, + { + "epoch": 20.0, + "step": 65700, + "total_flos": 1.138795714927657e+18, + "train_loss": 2.126255558368096, + "train_runtime": 26711.3051, + "train_samples_per_second": 14.755, + "train_steps_per_second": 2.46 + } + ], + "max_steps": 65700, + "num_train_epochs": 20, + "total_flos": 1.138795714927657e+18, + "trial_name": null, + "trial_params": null +}