|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 141057, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.982276668297213e-05, |
|
"loss": 2.4577, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.964553336594426e-05, |
|
"loss": 2.4076, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.94683000489164e-05, |
|
"loss": 2.3823, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.929106673188853e-05, |
|
"loss": 2.3764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.911383341486066e-05, |
|
"loss": 2.3482, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.893660009783279e-05, |
|
"loss": 2.3238, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8759366780804925e-05, |
|
"loss": 2.3344, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8582133463777055e-05, |
|
"loss": 2.3137, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.840490014674919e-05, |
|
"loss": 2.3036, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.822766682972132e-05, |
|
"loss": 2.2799, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.805043351269346e-05, |
|
"loss": 2.283, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.787320019566559e-05, |
|
"loss": 2.271, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.769596687863772e-05, |
|
"loss": 2.267, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.751873356160985e-05, |
|
"loss": 2.2637, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7341500244581984e-05, |
|
"loss": 2.2565, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7164266927554114e-05, |
|
"loss": 2.2501, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6987033610526244e-05, |
|
"loss": 2.2233, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6809800293498373e-05, |
|
"loss": 2.2321, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.663256697647051e-05, |
|
"loss": 2.221, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.645533365944264e-05, |
|
"loss": 2.2343, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.627810034241477e-05, |
|
"loss": 2.2038, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.61008670253869e-05, |
|
"loss": 2.2013, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.592363370835903e-05, |
|
"loss": 2.2105, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5746400391331166e-05, |
|
"loss": 2.1881, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5569167074303296e-05, |
|
"loss": 2.1832, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5391933757275426e-05, |
|
"loss": 2.1774, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.521470044024756e-05, |
|
"loss": 2.1714, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.503746712321969e-05, |
|
"loss": 2.1602, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.486023380619183e-05, |
|
"loss": 2.1583, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.468300048916396e-05, |
|
"loss": 2.1707, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.450576717213609e-05, |
|
"loss": 2.1689, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4328533855108225e-05, |
|
"loss": 2.1479, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4151300538080355e-05, |
|
"loss": 2.1554, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.3974067221052485e-05, |
|
"loss": 2.1309, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3796833904024615e-05, |
|
"loss": 2.1328, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.361960058699675e-05, |
|
"loss": 2.1469, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.344236726996888e-05, |
|
"loss": 2.1316, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.326513395294101e-05, |
|
"loss": 2.1387, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.308790063591314e-05, |
|
"loss": 2.1143, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.291066731888528e-05, |
|
"loss": 2.1301, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.273343400185741e-05, |
|
"loss": 2.1275, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.255620068482954e-05, |
|
"loss": 2.1172, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.237896736780167e-05, |
|
"loss": 2.118, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2201734050773804e-05, |
|
"loss": 2.1287, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.202450073374593e-05, |
|
"loss": 2.1148, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.184726741671807e-05, |
|
"loss": 2.1019, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.16700340996902e-05, |
|
"loss": 2.1088, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.149280078266233e-05, |
|
"loss": 2.1092, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1315567465634466e-05, |
|
"loss": 2.0987, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1138334148606596e-05, |
|
"loss": 2.0778, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0961100831578726e-05, |
|
"loss": 2.0963, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0783867514550856e-05, |
|
"loss": 2.0868, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.060663419752299e-05, |
|
"loss": 2.0881, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.042940088049512e-05, |
|
"loss": 2.0887, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.025216756346725e-05, |
|
"loss": 2.0921, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.007493424643938e-05, |
|
"loss": 2.0782, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.989770092941152e-05, |
|
"loss": 2.0626, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.972046761238365e-05, |
|
"loss": 2.077, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.954323429535578e-05, |
|
"loss": 2.0728, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.936600097832791e-05, |
|
"loss": 2.0528, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.918876766130004e-05, |
|
"loss": 2.0661, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9011534344272175e-05, |
|
"loss": 2.0639, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8834301027244304e-05, |
|
"loss": 2.063, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.865706771021644e-05, |
|
"loss": 2.0445, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.847983439318857e-05, |
|
"loss": 2.0514, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.830260107616071e-05, |
|
"loss": 2.0538, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.812536775913284e-05, |
|
"loss": 2.041, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.794813444210497e-05, |
|
"loss": 2.0441, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.77709011250771e-05, |
|
"loss": 2.044, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7593667808049234e-05, |
|
"loss": 2.0449, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7416434491021363e-05, |
|
"loss": 2.0381, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.723920117399349e-05, |
|
"loss": 2.0302, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.706196785696562e-05, |
|
"loss": 2.028, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.688473453993776e-05, |
|
"loss": 2.0335, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.670750122290989e-05, |
|
"loss": 2.0289, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.653026790588202e-05, |
|
"loss": 2.0072, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.635303458885415e-05, |
|
"loss": 2.0341, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6175801271826286e-05, |
|
"loss": 2.0233, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5998567954798416e-05, |
|
"loss": 2.0146, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5821334637770546e-05, |
|
"loss": 2.0205, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5644101320742675e-05, |
|
"loss": 2.009, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.546686800371481e-05, |
|
"loss": 2.0023, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.528963468668695e-05, |
|
"loss": 2.0102, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.511240136965908e-05, |
|
"loss": 2.0074, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.493516805263121e-05, |
|
"loss": 1.9968, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.475793473560334e-05, |
|
"loss": 1.9968, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4580701418575475e-05, |
|
"loss": 2.0042, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4403468101547605e-05, |
|
"loss": 1.9947, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4226234784519735e-05, |
|
"loss": 1.9995, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4049001467491864e-05, |
|
"loss": 1.9929, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3871768150464e-05, |
|
"loss": 1.9935, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.369453483343613e-05, |
|
"loss": 1.9931, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.351730151640826e-05, |
|
"loss": 1.9944, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.334006819938039e-05, |
|
"loss": 1.9779, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.316283488235253e-05, |
|
"loss": 1.9819, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.298560156532466e-05, |
|
"loss": 1.9798, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.280836824829679e-05, |
|
"loss": 1.9767, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.263113493126892e-05, |
|
"loss": 1.963, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.245390161424105e-05, |
|
"loss": 1.9686, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.227666829721318e-05, |
|
"loss": 1.9529, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.209943498018532e-05, |
|
"loss": 1.96, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.192220166315745e-05, |
|
"loss": 1.9683, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.174496834612958e-05, |
|
"loss": 1.9574, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1567735029101716e-05, |
|
"loss": 1.9573, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1390501712073846e-05, |
|
"loss": 1.9582, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.1213268395045976e-05, |
|
"loss": 1.9559, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.1036035078018106e-05, |
|
"loss": 1.9464, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.085880176099024e-05, |
|
"loss": 1.9512, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.068156844396237e-05, |
|
"loss": 1.9676, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0504335126934502e-05, |
|
"loss": 1.946, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0327101809906632e-05, |
|
"loss": 1.9586, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.014986849287877e-05, |
|
"loss": 1.9553, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9972635175850898e-05, |
|
"loss": 1.9424, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.979540185882303e-05, |
|
"loss": 1.9431, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.961816854179516e-05, |
|
"loss": 1.9562, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9440935224767298e-05, |
|
"loss": 1.9376, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9263701907739428e-05, |
|
"loss": 1.9313, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9086468590711558e-05, |
|
"loss": 1.9492, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8909235273683687e-05, |
|
"loss": 1.9396, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8732001956655824e-05, |
|
"loss": 1.9343, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8554768639627954e-05, |
|
"loss": 1.9446, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8377535322600084e-05, |
|
"loss": 1.9325, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8200302005572217e-05, |
|
"loss": 1.9321, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8023068688544347e-05, |
|
"loss": 1.9295, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7845835371516483e-05, |
|
"loss": 1.928, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7668602054488613e-05, |
|
"loss": 1.939, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7491368737460743e-05, |
|
"loss": 1.9172, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7314135420432873e-05, |
|
"loss": 1.9185, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.713690210340501e-05, |
|
"loss": 1.9299, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.695966878637714e-05, |
|
"loss": 1.9301, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.678243546934927e-05, |
|
"loss": 1.9189, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6605202152321402e-05, |
|
"loss": 1.9303, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6427968835293536e-05, |
|
"loss": 1.9151, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.625073551826567e-05, |
|
"loss": 1.9236, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.60735022012378e-05, |
|
"loss": 1.9198, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.589626888420993e-05, |
|
"loss": 1.91, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5719035567182065e-05, |
|
"loss": 1.8999, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5541802250154195e-05, |
|
"loss": 1.9141, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5364568933126325e-05, |
|
"loss": 1.9012, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5187335616098455e-05, |
|
"loss": 1.8999, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.501010229907059e-05, |
|
"loss": 1.8923, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.483286898204272e-05, |
|
"loss": 1.9068, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4655635665014854e-05, |
|
"loss": 1.9013, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4478402347986984e-05, |
|
"loss": 1.9018, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4301169030959117e-05, |
|
"loss": 1.8892, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4123935713931247e-05, |
|
"loss": 1.9016, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.394670239690338e-05, |
|
"loss": 1.9058, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.376946907987551e-05, |
|
"loss": 1.91, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3592235762847644e-05, |
|
"loss": 1.889, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3415002445819777e-05, |
|
"loss": 1.8988, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.323776912879191e-05, |
|
"loss": 1.8957, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.306053581176404e-05, |
|
"loss": 1.8938, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2883302494736173e-05, |
|
"loss": 1.8979, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2706069177708303e-05, |
|
"loss": 1.8898, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2528835860680436e-05, |
|
"loss": 1.891, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2351602543652566e-05, |
|
"loss": 1.8858, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.21743692266247e-05, |
|
"loss": 1.8749, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.199713590959683e-05, |
|
"loss": 1.8833, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1819902592568962e-05, |
|
"loss": 1.8788, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1642669275541096e-05, |
|
"loss": 1.8809, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.146543595851323e-05, |
|
"loss": 1.8796, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.128820264148536e-05, |
|
"loss": 1.8863, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.111096932445749e-05, |
|
"loss": 1.8814, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0933736007429622e-05, |
|
"loss": 1.8814, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.075650269040175e-05, |
|
"loss": 1.8654, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0579269373373885e-05, |
|
"loss": 1.8806, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0402036056346015e-05, |
|
"loss": 1.8659, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0224802739318148e-05, |
|
"loss": 1.8837, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.004756942229028e-05, |
|
"loss": 1.8608, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9870336105262414e-05, |
|
"loss": 1.8754, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9693102788234544e-05, |
|
"loss": 1.8703, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9515869471206677e-05, |
|
"loss": 1.8642, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9338636154178807e-05, |
|
"loss": 1.8701, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.916140283715094e-05, |
|
"loss": 1.863, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.898416952012307e-05, |
|
"loss": 1.8672, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8806936203095204e-05, |
|
"loss": 1.8638, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8629702886067333e-05, |
|
"loss": 1.8709, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.845246956903947e-05, |
|
"loss": 1.8496, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.82752362520116e-05, |
|
"loss": 1.8601, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8098002934983733e-05, |
|
"loss": 1.8628, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7920769617955863e-05, |
|
"loss": 1.8551, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7743536300927993e-05, |
|
"loss": 1.8493, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7566302983900126e-05, |
|
"loss": 1.866, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7389069666872256e-05, |
|
"loss": 1.8527, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.721183634984439e-05, |
|
"loss": 1.8541, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7034603032816522e-05, |
|
"loss": 1.8483, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6857369715788656e-05, |
|
"loss": 1.8555, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6680136398760785e-05, |
|
"loss": 1.8524, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.650290308173292e-05, |
|
"loss": 1.8502, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.632566976470505e-05, |
|
"loss": 1.8332, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.614843644767718e-05, |
|
"loss": 1.8336, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.597120313064931e-05, |
|
"loss": 1.8362, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5793969813621445e-05, |
|
"loss": 1.845, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5616736496593575e-05, |
|
"loss": 1.8313, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5439503179565708e-05, |
|
"loss": 1.8447, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.526226986253784e-05, |
|
"loss": 1.8309, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5085036545509973e-05, |
|
"loss": 1.8321, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4907803228482104e-05, |
|
"loss": 1.8326, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4730569911454237e-05, |
|
"loss": 1.8386, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4553336594426367e-05, |
|
"loss": 1.8316, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.43761032773985e-05, |
|
"loss": 1.8337, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.419886996037063e-05, |
|
"loss": 1.8395, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4021636643342762e-05, |
|
"loss": 1.8345, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3844403326314895e-05, |
|
"loss": 1.8324, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3667170009287025e-05, |
|
"loss": 1.8222, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3489936692259158e-05, |
|
"loss": 1.8077, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.331270337523129e-05, |
|
"loss": 1.8202, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3135470058203423e-05, |
|
"loss": 1.8052, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2958236741175553e-05, |
|
"loss": 1.8069, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2781003424147686e-05, |
|
"loss": 1.8158, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2603770107119817e-05, |
|
"loss": 1.8183, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.242653679009195e-05, |
|
"loss": 1.8116, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.224930347306408e-05, |
|
"loss": 1.8095, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2072070156036212e-05, |
|
"loss": 1.8207, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1894836839008344e-05, |
|
"loss": 1.8184, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1717603521980477e-05, |
|
"loss": 1.8278, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1540370204952608e-05, |
|
"loss": 1.8164, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.136313688792474e-05, |
|
"loss": 1.8244, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1185903570896871e-05, |
|
"loss": 1.8199, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1008670253869003e-05, |
|
"loss": 1.8252, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0831436936841136e-05, |
|
"loss": 1.808, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0654203619813268e-05, |
|
"loss": 1.8097, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.04769703027854e-05, |
|
"loss": 1.8049, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.029973698575753e-05, |
|
"loss": 1.8081, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0122503668729662e-05, |
|
"loss": 1.8134, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.945270351701796e-06, |
|
"loss": 1.7976, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.768037034673927e-06, |
|
"loss": 1.8036, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.590803717646059e-06, |
|
"loss": 1.7979, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.41357040061819e-06, |
|
"loss": 1.8165, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.236337083590323e-06, |
|
"loss": 1.8069, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.059103766562455e-06, |
|
"loss": 1.7922, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.881870449534587e-06, |
|
"loss": 1.7981, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.704637132506716e-06, |
|
"loss": 1.7998, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.52740381547885e-06, |
|
"loss": 1.7938, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.350170498450981e-06, |
|
"loss": 1.8041, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.172937181423113e-06, |
|
"loss": 1.8037, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.995703864395244e-06, |
|
"loss": 1.7945, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.818470547367376e-06, |
|
"loss": 1.8138, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.641237230339509e-06, |
|
"loss": 1.792, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.4640039133116405e-06, |
|
"loss": 1.7963, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.286770596283772e-06, |
|
"loss": 1.7906, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.1095372792559036e-06, |
|
"loss": 1.7933, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.932303962228036e-06, |
|
"loss": 1.7877, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.7550706452001675e-06, |
|
"loss": 1.7999, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.5778373281723e-06, |
|
"loss": 1.7839, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.400604011144431e-06, |
|
"loss": 1.7907, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.223370694116564e-06, |
|
"loss": 1.7869, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.0461373770886945e-06, |
|
"loss": 1.7736, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.868904060060827e-06, |
|
"loss": 1.7825, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.691670743032958e-06, |
|
"loss": 1.7831, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.51443742600509e-06, |
|
"loss": 1.7831, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.337204108977222e-06, |
|
"loss": 1.7848, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.159970791949354e-06, |
|
"loss": 1.793, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.982737474921486e-06, |
|
"loss": 1.7887, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.805504157893618e-06, |
|
"loss": 1.7824, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.62827084086575e-06, |
|
"loss": 1.7885, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.451037523837882e-06, |
|
"loss": 1.7892, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.273804206810013e-06, |
|
"loss": 1.7832, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.096570889782145e-06, |
|
"loss": 1.7705, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.919337572754276e-06, |
|
"loss": 1.7775, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.7421042557264087e-06, |
|
"loss": 1.7737, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.5648709386985406e-06, |
|
"loss": 1.7858, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.387637621670672e-06, |
|
"loss": 1.784, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.210404304642804e-06, |
|
"loss": 1.7766, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.033170987614936e-06, |
|
"loss": 1.7806, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.8559376705870676e-06, |
|
"loss": 1.7574, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6787043535591996e-06, |
|
"loss": 1.7697, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.5014710365313316e-06, |
|
"loss": 1.7834, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.3242377195034635e-06, |
|
"loss": 1.7644, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.147004402475595e-06, |
|
"loss": 1.7794, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9697710854477266e-06, |
|
"loss": 1.7657, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7925377684198588e-06, |
|
"loss": 1.7786, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6153044513919905e-06, |
|
"loss": 1.7756, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4380711343641223e-06, |
|
"loss": 1.7742, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.2608378173362542e-06, |
|
"loss": 1.7711, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.0836045003083862e-06, |
|
"loss": 1.7596, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.063711832805178e-07, |
|
"loss": 1.7777, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.291378662526497e-07, |
|
"loss": 1.7767, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.519045492247815e-07, |
|
"loss": 1.7738, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.746712321969133e-07, |
|
"loss": 1.787, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.9743791516904515e-07, |
|
"loss": 1.7725, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.0204598141176972e-08, |
|
"loss": 1.7788, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 141057, |
|
"total_flos": 3.722159267218719e+17, |
|
"train_loss": 1.9451130962921488, |
|
"train_runtime": 85182.9847, |
|
"train_samples_per_second": 16.559, |
|
"train_steps_per_second": 1.656 |
|
} |
|
], |
|
"max_steps": 141057, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.722159267218719e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|