{ "best_metric": null, "best_model_checkpoint": null, "epoch": 500.0, "eval_steps": 500, "global_step": 6500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.003992, "loss": 10.6341, "step": 13 }, { "epoch": 2.0, "learning_rate": 0.003984, "loss": 8.0261, "step": 26 }, { "epoch": 3.0, "learning_rate": 0.003976, "loss": 7.6356, "step": 39 }, { "epoch": 4.0, "learning_rate": 0.003968, "loss": 7.489, "step": 52 }, { "epoch": 5.0, "learning_rate": 0.00396, "loss": 7.3955, "step": 65 }, { "epoch": 6.0, "learning_rate": 0.003952, "loss": 7.3814, "step": 78 }, { "epoch": 7.0, "learning_rate": 0.0039440000000000005, "loss": 7.3919, "step": 91 }, { "epoch": 8.0, "learning_rate": 0.003936, "loss": 7.2877, "step": 104 }, { "epoch": 9.0, "learning_rate": 0.003928, "loss": 7.0588, "step": 117 }, { "epoch": 10.0, "learning_rate": 0.00392, "loss": 6.9853, "step": 130 }, { "epoch": 11.0, "learning_rate": 0.003912, "loss": 6.9981, "step": 143 }, { "epoch": 12.0, "learning_rate": 0.003904, "loss": 6.8759, "step": 156 }, { "epoch": 13.0, "learning_rate": 0.003896, "loss": 6.8897, "step": 169 }, { "epoch": 14.0, "learning_rate": 0.003888, "loss": 7.1851, "step": 182 }, { "epoch": 15.0, "learning_rate": 0.0038799999999999998, "loss": 7.3121, "step": 195 }, { "epoch": 16.0, "learning_rate": 0.003872, "loss": 7.2602, "step": 208 }, { "epoch": 17.0, "learning_rate": 0.003864, "loss": 7.2026, "step": 221 }, { "epoch": 18.0, "learning_rate": 0.003856, "loss": 7.2713, "step": 234 }, { "epoch": 19.0, "learning_rate": 0.003848, "loss": 7.1885, "step": 247 }, { "epoch": 20.0, "learning_rate": 0.00384, "loss": 7.2042, "step": 260 }, { "epoch": 21.0, "learning_rate": 0.003832, "loss": 7.1744, "step": 273 }, { "epoch": 22.0, "learning_rate": 0.0038239999999999997, "loss": 7.0481, "step": 286 }, { "epoch": 23.0, "learning_rate": 0.003816, "loss": 6.8698, "step": 299 }, { "epoch": 24.0, "learning_rate": 0.0038079999999999998, "loss": 6.7722, "step": 312 }, { "epoch": 25.0, "learning_rate": 0.0038, "loss": 6.7018, "step": 325 }, { "epoch": 26.0, "learning_rate": 0.003792, "loss": 6.6881, "step": 338 }, { "epoch": 27.0, "learning_rate": 0.003784, "loss": 6.7485, "step": 351 }, { "epoch": 28.0, "learning_rate": 0.003776, "loss": 6.5876, "step": 364 }, { "epoch": 29.0, "learning_rate": 0.003768, "loss": 6.5597, "step": 377 }, { "epoch": 30.0, "learning_rate": 0.00376, "loss": 6.5379, "step": 390 }, { "epoch": 31.0, "learning_rate": 0.0037519999999999997, "loss": 6.3772, "step": 403 }, { "epoch": 32.0, "learning_rate": 0.0037440000000000004, "loss": 6.3651, "step": 416 }, { "epoch": 33.0, "learning_rate": 0.003736, "loss": 6.305, "step": 429 }, { "epoch": 34.0, "learning_rate": 0.0037280000000000004, "loss": 6.2724, "step": 442 }, { "epoch": 35.0, "learning_rate": 0.00372, "loss": 6.183, "step": 455 }, { "epoch": 36.0, "learning_rate": 0.0037120000000000005, "loss": 6.2141, "step": 468 }, { "epoch": 37.0, "learning_rate": 0.0037040000000000003, "loss": 6.1447, "step": 481 }, { "epoch": 38.0, "learning_rate": 0.003696, "loss": 6.3683, "step": 494 }, { "epoch": 39.0, "learning_rate": 0.0036880000000000003, "loss": 6.2738, "step": 507 }, { "epoch": 40.0, "learning_rate": 0.00368, "loss": 6.0499, "step": 520 }, { "epoch": 41.0, "learning_rate": 0.0036720000000000004, "loss": 5.9005, "step": 533 }, { "epoch": 42.0, "learning_rate": 0.003664, "loss": 5.8533, "step": 546 }, { "epoch": 43.0, "learning_rate": 0.0036560000000000004, "loss": 5.8199, "step": 559 }, { "epoch": 44.0, "learning_rate": 0.003648, "loss": 6.051, "step": 572 }, { "epoch": 45.0, "learning_rate": 0.00364, "loss": 5.8496, "step": 585 }, { "epoch": 46.0, "learning_rate": 0.0036320000000000002, "loss": 5.7252, "step": 598 }, { "epoch": 47.0, "learning_rate": 0.003624, "loss": 5.6958, "step": 611 }, { "epoch": 48.0, "learning_rate": 0.0036160000000000003, "loss": 5.7218, "step": 624 }, { "epoch": 49.0, "learning_rate": 0.003608, "loss": 5.6656, "step": 637 }, { "epoch": 50.0, "learning_rate": 0.0036000000000000003, "loss": 5.612, "step": 650 }, { "epoch": 51.0, "learning_rate": 0.003592, "loss": 5.5532, "step": 663 }, { "epoch": 52.0, "learning_rate": 0.003584, "loss": 5.4327, "step": 676 }, { "epoch": 53.0, "learning_rate": 0.003576, "loss": 5.3979, "step": 689 }, { "epoch": 54.0, "learning_rate": 0.003568, "loss": 5.2903, "step": 702 }, { "epoch": 55.0, "learning_rate": 0.0035600000000000002, "loss": 5.4521, "step": 715 }, { "epoch": 56.0, "learning_rate": 0.003552, "loss": 5.6021, "step": 728 }, { "epoch": 57.0, "learning_rate": 0.0035440000000000003, "loss": 5.5058, "step": 741 }, { "epoch": 58.0, "learning_rate": 0.003536, "loss": 5.2167, "step": 754 }, { "epoch": 59.0, "learning_rate": 0.003528, "loss": 5.2102, "step": 767 }, { "epoch": 60.0, "learning_rate": 0.00352, "loss": 5.2617, "step": 780 }, { "epoch": 61.0, "learning_rate": 0.003512, "loss": 5.3012, "step": 793 }, { "epoch": 62.0, "learning_rate": 0.003504, "loss": 5.2158, "step": 806 }, { "epoch": 63.0, "learning_rate": 0.003496, "loss": 5.1959, "step": 819 }, { "epoch": 64.0, "learning_rate": 0.003488, "loss": 5.1716, "step": 832 }, { "epoch": 65.0, "learning_rate": 0.00348, "loss": 5.0796, "step": 845 }, { "epoch": 66.0, "learning_rate": 0.0034720000000000003, "loss": 4.9764, "step": 858 }, { "epoch": 67.0, "learning_rate": 0.003464, "loss": 4.974, "step": 871 }, { "epoch": 68.0, "learning_rate": 0.003456, "loss": 4.876, "step": 884 }, { "epoch": 69.0, "learning_rate": 0.003448, "loss": 4.8596, "step": 897 }, { "epoch": 70.0, "learning_rate": 0.00344, "loss": 4.7792, "step": 910 }, { "epoch": 71.0, "learning_rate": 0.003432, "loss": 4.765, "step": 923 }, { "epoch": 72.0, "learning_rate": 0.003424, "loss": 4.7933, "step": 936 }, { "epoch": 73.0, "learning_rate": 0.003416, "loss": 4.7636, "step": 949 }, { "epoch": 74.0, "learning_rate": 0.003408, "loss": 4.7114, "step": 962 }, { "epoch": 75.0, "learning_rate": 0.0034, "loss": 4.7079, "step": 975 }, { "epoch": 76.0, "learning_rate": 0.003392, "loss": 4.6745, "step": 988 }, { "epoch": 77.0, "learning_rate": 0.003384, "loss": 4.6765, "step": 1001 }, { "epoch": 78.0, "learning_rate": 0.003376, "loss": 4.5913, "step": 1014 }, { "epoch": 79.0, "learning_rate": 0.003368, "loss": 4.7949, "step": 1027 }, { "epoch": 80.0, "learning_rate": 0.00336, "loss": 4.6311, "step": 1040 }, { "epoch": 81.0, "learning_rate": 0.003352, "loss": 4.4818, "step": 1053 }, { "epoch": 82.0, "learning_rate": 0.0033439999999999998, "loss": 4.4462, "step": 1066 }, { "epoch": 83.0, "learning_rate": 0.003336, "loss": 4.5129, "step": 1079 }, { "epoch": 84.0, "learning_rate": 0.003328, "loss": 4.4626, "step": 1092 }, { "epoch": 85.0, "learning_rate": 0.00332, "loss": 4.3505, "step": 1105 }, { "epoch": 86.0, "learning_rate": 0.003312, "loss": 4.3377, "step": 1118 }, { "epoch": 87.0, "learning_rate": 0.003304, "loss": 4.4076, "step": 1131 }, { "epoch": 88.0, "learning_rate": 0.003296, "loss": 4.3765, "step": 1144 }, { "epoch": 89.0, "learning_rate": 0.0032879999999999997, "loss": 4.2473, "step": 1157 }, { "epoch": 90.0, "learning_rate": 0.00328, "loss": 4.2142, "step": 1170 }, { "epoch": 91.0, "learning_rate": 0.0032719999999999997, "loss": 4.1567, "step": 1183 }, { "epoch": 92.0, "learning_rate": 0.003264, "loss": 4.1569, "step": 1196 }, { "epoch": 93.0, "learning_rate": 0.0032559999999999998, "loss": 4.1347, "step": 1209 }, { "epoch": 94.0, "learning_rate": 0.0032480000000000005, "loss": 4.0786, "step": 1222 }, { "epoch": 95.0, "learning_rate": 0.0032400000000000003, "loss": 4.0796, "step": 1235 }, { "epoch": 96.0, "learning_rate": 0.003232, "loss": 4.0432, "step": 1248 }, { "epoch": 97.0, "learning_rate": 0.0032240000000000003, "loss": 4.033, "step": 1261 }, { "epoch": 98.0, "learning_rate": 0.003216, "loss": 3.952, "step": 1274 }, { "epoch": 99.0, "learning_rate": 0.0032080000000000003, "loss": 4.0043, "step": 1287 }, { "epoch": 100.0, "learning_rate": 0.0032, "loss": 4.2161, "step": 1300 }, { "epoch": 101.0, "learning_rate": 0.0031920000000000004, "loss": 4.1006, "step": 1313 }, { "epoch": 102.0, "learning_rate": 0.003184, "loss": 4.1527, "step": 1326 }, { "epoch": 103.0, "learning_rate": 0.0031760000000000004, "loss": 3.9791, "step": 1339 }, { "epoch": 104.0, "learning_rate": 0.0031680000000000002, "loss": 3.9599, "step": 1352 }, { "epoch": 105.0, "learning_rate": 0.00316, "loss": 3.9998, "step": 1365 }, { "epoch": 106.0, "learning_rate": 0.0031520000000000003, "loss": 3.973, "step": 1378 }, { "epoch": 107.0, "learning_rate": 0.003144, "loss": 3.9976, "step": 1391 }, { "epoch": 108.0, "learning_rate": 0.0031360000000000003, "loss": 3.9862, "step": 1404 }, { "epoch": 109.0, "learning_rate": 0.003128, "loss": 3.8562, "step": 1417 }, { "epoch": 110.0, "learning_rate": 0.0031200000000000004, "loss": 3.8322, "step": 1430 }, { "epoch": 111.0, "learning_rate": 0.003112, "loss": 3.8451, "step": 1443 }, { "epoch": 112.0, "learning_rate": 0.003104, "loss": 3.8274, "step": 1456 }, { "epoch": 113.0, "learning_rate": 0.0030960000000000002, "loss": 3.8483, "step": 1469 }, { "epoch": 114.0, "learning_rate": 0.003088, "loss": 3.7911, "step": 1482 }, { "epoch": 115.0, "learning_rate": 0.0030800000000000003, "loss": 3.8203, "step": 1495 }, { "epoch": 116.0, "learning_rate": 0.003072, "loss": 3.7111, "step": 1508 }, { "epoch": 117.0, "learning_rate": 0.0030640000000000003, "loss": 3.7186, "step": 1521 }, { "epoch": 118.0, "learning_rate": 0.003056, "loss": 3.6357, "step": 1534 }, { "epoch": 119.0, "learning_rate": 0.003048, "loss": 3.6484, "step": 1547 }, { "epoch": 120.0, "learning_rate": 0.00304, "loss": 3.7188, "step": 1560 }, { "epoch": 121.0, "learning_rate": 0.003032, "loss": 3.6217, "step": 1573 }, { "epoch": 122.0, "learning_rate": 0.003024, "loss": 3.5853, "step": 1586 }, { "epoch": 123.0, "learning_rate": 0.003016, "loss": 3.6381, "step": 1599 }, { "epoch": 124.0, "learning_rate": 0.0030080000000000003, "loss": 3.6051, "step": 1612 }, { "epoch": 125.0, "learning_rate": 0.003, "loss": 3.6293, "step": 1625 }, { "epoch": 126.0, "learning_rate": 0.002992, "loss": 3.626, "step": 1638 }, { "epoch": 127.0, "learning_rate": 0.002984, "loss": 3.6121, "step": 1651 }, { "epoch": 128.0, "learning_rate": 0.002976, "loss": 3.5777, "step": 1664 }, { "epoch": 129.0, "learning_rate": 0.002968, "loss": 3.551, "step": 1677 }, { "epoch": 130.0, "learning_rate": 0.00296, "loss": 3.534, "step": 1690 }, { "epoch": 131.0, "learning_rate": 0.002952, "loss": 3.5946, "step": 1703 }, { "epoch": 132.0, "learning_rate": 0.002944, "loss": 3.6511, "step": 1716 }, { "epoch": 133.0, "learning_rate": 0.002936, "loss": 3.5556, "step": 1729 }, { "epoch": 134.0, "learning_rate": 0.002928, "loss": 3.5453, "step": 1742 }, { "epoch": 135.0, "learning_rate": 0.00292, "loss": 3.5641, "step": 1755 }, { "epoch": 136.0, "learning_rate": 0.002912, "loss": 3.5357, "step": 1768 }, { "epoch": 137.0, "learning_rate": 0.002904, "loss": 3.5738, "step": 1781 }, { "epoch": 138.0, "learning_rate": 0.002896, "loss": 3.4697, "step": 1794 }, { "epoch": 139.0, "learning_rate": 0.002888, "loss": 3.4405, "step": 1807 }, { "epoch": 140.0, "learning_rate": 0.0028799999999999997, "loss": 3.3998, "step": 1820 }, { "epoch": 141.0, "learning_rate": 0.002872, "loss": 3.4035, "step": 1833 }, { "epoch": 142.0, "learning_rate": 0.002864, "loss": 3.4335, "step": 1846 }, { "epoch": 143.0, "learning_rate": 0.002856, "loss": 3.4105, "step": 1859 }, { "epoch": 144.0, "learning_rate": 0.002848, "loss": 3.3161, "step": 1872 }, { "epoch": 145.0, "learning_rate": 0.00284, "loss": 3.2802, "step": 1885 }, { "epoch": 146.0, "learning_rate": 0.002832, "loss": 3.2573, "step": 1898 }, { "epoch": 147.0, "learning_rate": 0.0028239999999999997, "loss": 3.265, "step": 1911 }, { "epoch": 148.0, "learning_rate": 0.002816, "loss": 3.3362, "step": 1924 }, { "epoch": 149.0, "learning_rate": 0.0028079999999999997, "loss": 3.2085, "step": 1937 }, { "epoch": 150.0, "learning_rate": 0.0028, "loss": 3.2445, "step": 1950 }, { "epoch": 151.0, "learning_rate": 0.0027919999999999998, "loss": 3.2212, "step": 1963 }, { "epoch": 152.0, "learning_rate": 0.002784, "loss": 3.2135, "step": 1976 }, { "epoch": 153.0, "learning_rate": 0.002776, "loss": 3.173, "step": 1989 }, { "epoch": 154.0, "learning_rate": 0.002768, "loss": 3.1946, "step": 2002 }, { "epoch": 155.0, "learning_rate": 0.00276, "loss": 3.1739, "step": 2015 }, { "epoch": 156.0, "learning_rate": 0.0027519999999999997, "loss": 3.1975, "step": 2028 }, { "epoch": 157.0, "learning_rate": 0.0027440000000000003, "loss": 3.148, "step": 2041 }, { "epoch": 158.0, "learning_rate": 0.002736, "loss": 3.1124, "step": 2054 }, { "epoch": 159.0, "learning_rate": 0.0027280000000000004, "loss": 3.1101, "step": 2067 }, { "epoch": 160.0, "learning_rate": 0.00272, "loss": 3.155, "step": 2080 }, { "epoch": 161.0, "learning_rate": 0.0027120000000000004, "loss": 3.091, "step": 2093 }, { "epoch": 162.0, "learning_rate": 0.0027040000000000002, "loss": 3.0156, "step": 2106 }, { "epoch": 163.0, "learning_rate": 0.002696, "loss": 3.031, "step": 2119 }, { "epoch": 164.0, "learning_rate": 0.0026880000000000003, "loss": 3.0426, "step": 2132 }, { "epoch": 165.0, "learning_rate": 0.00268, "loss": 2.9667, "step": 2145 }, { "epoch": 166.0, "learning_rate": 0.0026720000000000003, "loss": 2.9496, "step": 2158 }, { "epoch": 167.0, "learning_rate": 0.002664, "loss": 3.0151, "step": 2171 }, { "epoch": 168.0, "learning_rate": 0.0026560000000000004, "loss": 3.0202, "step": 2184 }, { "epoch": 169.0, "learning_rate": 0.002648, "loss": 3.1202, "step": 2197 }, { "epoch": 170.0, "learning_rate": 0.00264, "loss": 3.0814, "step": 2210 }, { "epoch": 171.0, "learning_rate": 0.0026320000000000002, "loss": 2.9501, "step": 2223 }, { "epoch": 172.0, "learning_rate": 0.002624, "loss": 2.8994, "step": 2236 }, { "epoch": 173.0, "learning_rate": 0.0026160000000000003, "loss": 2.8437, "step": 2249 }, { "epoch": 174.0, "learning_rate": 0.002608, "loss": 2.8867, "step": 2262 }, { "epoch": 175.0, "learning_rate": 0.0026000000000000003, "loss": 2.8977, "step": 2275 }, { "epoch": 176.0, "learning_rate": 0.002592, "loss": 2.8601, "step": 2288 }, { "epoch": 177.0, "learning_rate": 0.002584, "loss": 2.9511, "step": 2301 }, { "epoch": 178.0, "learning_rate": 0.002576, "loss": 2.8396, "step": 2314 }, { "epoch": 179.0, "learning_rate": 0.002568, "loss": 2.8238, "step": 2327 }, { "epoch": 180.0, "learning_rate": 0.00256, "loss": 2.8048, "step": 2340 }, { "epoch": 181.0, "learning_rate": 0.002552, "loss": 2.7583, "step": 2353 }, { "epoch": 182.0, "learning_rate": 0.0025440000000000003, "loss": 2.7443, "step": 2366 }, { "epoch": 183.0, "learning_rate": 0.002536, "loss": 2.7362, "step": 2379 }, { "epoch": 184.0, "learning_rate": 0.002528, "loss": 2.7878, "step": 2392 }, { "epoch": 185.0, "learning_rate": 0.00252, "loss": 2.7811, "step": 2405 }, { "epoch": 186.0, "learning_rate": 0.002512, "loss": 2.7213, "step": 2418 }, { "epoch": 187.0, "learning_rate": 0.002504, "loss": 2.7716, "step": 2431 }, { "epoch": 188.0, "learning_rate": 0.002496, "loss": 2.7761, "step": 2444 }, { "epoch": 189.0, "learning_rate": 0.002488, "loss": 2.7456, "step": 2457 }, { "epoch": 190.0, "learning_rate": 0.00248, "loss": 2.9211, "step": 2470 }, { "epoch": 191.0, "learning_rate": 0.0024720000000000002, "loss": 2.9644, "step": 2483 }, { "epoch": 192.0, "learning_rate": 0.002464, "loss": 2.7444, "step": 2496 }, { "epoch": 193.0, "learning_rate": 0.002456, "loss": 2.7094, "step": 2509 }, { "epoch": 194.0, "learning_rate": 0.002448, "loss": 2.6593, "step": 2522 }, { "epoch": 195.0, "learning_rate": 0.00244, "loss": 2.6424, "step": 2535 }, { "epoch": 196.0, "learning_rate": 0.002432, "loss": 2.5913, "step": 2548 }, { "epoch": 197.0, "learning_rate": 0.002424, "loss": 2.6003, "step": 2561 }, { "epoch": 198.0, "learning_rate": 0.002416, "loss": 2.6317, "step": 2574 }, { "epoch": 199.0, "learning_rate": 0.002408, "loss": 2.6468, "step": 2587 }, { "epoch": 200.0, "learning_rate": 0.0024, "loss": 2.5951, "step": 2600 }, { "epoch": 201.0, "learning_rate": 0.002392, "loss": 2.5915, "step": 2613 }, { "epoch": 202.0, "learning_rate": 0.002384, "loss": 2.568, "step": 2626 }, { "epoch": 203.0, "learning_rate": 0.002376, "loss": 2.5466, "step": 2639 }, { "epoch": 204.0, "learning_rate": 0.002368, "loss": 2.6858, "step": 2652 }, { "epoch": 205.0, "learning_rate": 0.00236, "loss": 2.5551, "step": 2665 }, { "epoch": 206.0, "learning_rate": 0.002352, "loss": 2.5618, "step": 2678 }, { "epoch": 207.0, "learning_rate": 0.0023439999999999997, "loss": 2.5309, "step": 2691 }, { "epoch": 208.0, "learning_rate": 0.002336, "loss": 2.5307, "step": 2704 }, { "epoch": 209.0, "learning_rate": 0.0023279999999999998, "loss": 2.5008, "step": 2717 }, { "epoch": 210.0, "learning_rate": 0.00232, "loss": 2.5485, "step": 2730 }, { "epoch": 211.0, "learning_rate": 0.002312, "loss": 2.547, "step": 2743 }, { "epoch": 212.0, "learning_rate": 0.002304, "loss": 2.461, "step": 2756 }, { "epoch": 213.0, "learning_rate": 0.002296, "loss": 2.4375, "step": 2769 }, { "epoch": 214.0, "learning_rate": 0.0022879999999999997, "loss": 2.4417, "step": 2782 }, { "epoch": 215.0, "learning_rate": 0.00228, "loss": 2.4427, "step": 2795 }, { "epoch": 216.0, "learning_rate": 0.0022719999999999997, "loss": 2.4756, "step": 2808 }, { "epoch": 217.0, "learning_rate": 0.002264, "loss": 2.4662, "step": 2821 }, { "epoch": 218.0, "learning_rate": 0.0022559999999999998, "loss": 2.4931, "step": 2834 }, { "epoch": 219.0, "learning_rate": 0.0022480000000000004, "loss": 2.4438, "step": 2847 }, { "epoch": 220.0, "learning_rate": 0.0022400000000000002, "loss": 2.3834, "step": 2860 }, { "epoch": 221.0, "learning_rate": 0.002232, "loss": 2.4078, "step": 2873 }, { "epoch": 222.0, "learning_rate": 0.0022240000000000003, "loss": 2.3813, "step": 2886 }, { "epoch": 223.0, "learning_rate": 0.002216, "loss": 2.382, "step": 2899 }, { "epoch": 224.0, "learning_rate": 0.0022080000000000003, "loss": 2.361, "step": 2912 }, { "epoch": 225.0, "learning_rate": 0.0022, "loss": 2.3106, "step": 2925 }, { "epoch": 226.0, "learning_rate": 0.0021920000000000004, "loss": 2.2991, "step": 2938 }, { "epoch": 227.0, "learning_rate": 0.002184, "loss": 2.231, "step": 2951 }, { "epoch": 228.0, "learning_rate": 0.0021760000000000004, "loss": 2.2748, "step": 2964 }, { "epoch": 229.0, "learning_rate": 0.0021680000000000002, "loss": 2.2974, "step": 2977 }, { "epoch": 230.0, "learning_rate": 0.00216, "loss": 2.2974, "step": 2990 }, { "epoch": 231.0, "learning_rate": 0.0021520000000000003, "loss": 2.2755, "step": 3003 }, { "epoch": 232.0, "learning_rate": 0.002144, "loss": 2.287, "step": 3016 }, { "epoch": 233.0, "learning_rate": 0.0021360000000000003, "loss": 2.2462, "step": 3029 }, { "epoch": 234.0, "learning_rate": 0.002128, "loss": 2.2528, "step": 3042 }, { "epoch": 235.0, "learning_rate": 0.0021200000000000004, "loss": 2.2052, "step": 3055 }, { "epoch": 236.0, "learning_rate": 0.002112, "loss": 2.2461, "step": 3068 }, { "epoch": 237.0, "learning_rate": 0.002104, "loss": 2.2099, "step": 3081 }, { "epoch": 238.0, "learning_rate": 0.002096, "loss": 2.1273, "step": 3094 }, { "epoch": 239.0, "learning_rate": 0.002088, "loss": 2.1668, "step": 3107 }, { "epoch": 240.0, "learning_rate": 0.0020800000000000003, "loss": 2.1719, "step": 3120 }, { "epoch": 241.0, "learning_rate": 0.002072, "loss": 2.171, "step": 3133 }, { "epoch": 242.0, "learning_rate": 0.0020640000000000003, "loss": 2.1436, "step": 3146 }, { "epoch": 243.0, "learning_rate": 0.002056, "loss": 2.1698, "step": 3159 }, { "epoch": 244.0, "learning_rate": 0.002048, "loss": 2.1576, "step": 3172 }, { "epoch": 245.0, "learning_rate": 0.00204, "loss": 2.1641, "step": 3185 }, { "epoch": 246.0, "learning_rate": 0.002032, "loss": 2.1721, "step": 3198 }, { "epoch": 247.0, "learning_rate": 0.002024, "loss": 2.1615, "step": 3211 }, { "epoch": 248.0, "learning_rate": 0.002016, "loss": 2.0983, "step": 3224 }, { "epoch": 249.0, "learning_rate": 0.0020080000000000002, "loss": 2.108, "step": 3237 }, { "epoch": 250.0, "learning_rate": 0.002, "loss": 2.1167, "step": 3250 }, { "epoch": 251.0, "learning_rate": 0.001992, "loss": 2.0951, "step": 3263 }, { "epoch": 252.0, "learning_rate": 0.001984, "loss": 2.0415, "step": 3276 }, { "epoch": 253.0, "learning_rate": 0.001976, "loss": 2.101, "step": 3289 }, { "epoch": 254.0, "learning_rate": 0.001968, "loss": 2.1233, "step": 3302 }, { "epoch": 255.0, "learning_rate": 0.00196, "loss": 2.0782, "step": 3315 }, { "epoch": 256.0, "learning_rate": 0.001952, "loss": 2.0033, "step": 3328 }, { "epoch": 257.0, "learning_rate": 0.001944, "loss": 2.051, "step": 3341 }, { "epoch": 258.0, "learning_rate": 0.001936, "loss": 2.0587, "step": 3354 }, { "epoch": 259.0, "learning_rate": 0.001928, "loss": 1.9981, "step": 3367 }, { "epoch": 260.0, "learning_rate": 0.00192, "loss": 2.0506, "step": 3380 }, { "epoch": 261.0, "learning_rate": 0.0019119999999999999, "loss": 2.0815, "step": 3393 }, { "epoch": 262.0, "learning_rate": 0.0019039999999999999, "loss": 2.0054, "step": 3406 }, { "epoch": 263.0, "learning_rate": 0.001896, "loss": 1.9923, "step": 3419 }, { "epoch": 264.0, "learning_rate": 0.001888, "loss": 1.9892, "step": 3432 }, { "epoch": 265.0, "learning_rate": 0.00188, "loss": 1.9406, "step": 3445 }, { "epoch": 266.0, "learning_rate": 0.0018720000000000002, "loss": 1.9295, "step": 3458 }, { "epoch": 267.0, "learning_rate": 0.0018640000000000002, "loss": 1.9791, "step": 3471 }, { "epoch": 268.0, "learning_rate": 0.0018560000000000002, "loss": 1.9413, "step": 3484 }, { "epoch": 269.0, "learning_rate": 0.001848, "loss": 1.9363, "step": 3497 }, { "epoch": 270.0, "learning_rate": 0.00184, "loss": 2.0056, "step": 3510 }, { "epoch": 271.0, "learning_rate": 0.001832, "loss": 1.9298, "step": 3523 }, { "epoch": 272.0, "learning_rate": 0.001824, "loss": 1.9045, "step": 3536 }, { "epoch": 273.0, "learning_rate": 0.0018160000000000001, "loss": 1.9165, "step": 3549 }, { "epoch": 274.0, "learning_rate": 0.0018080000000000001, "loss": 1.9214, "step": 3562 }, { "epoch": 275.0, "learning_rate": 0.0018000000000000002, "loss": 1.9063, "step": 3575 }, { "epoch": 276.0, "learning_rate": 0.001792, "loss": 1.9016, "step": 3588 }, { "epoch": 277.0, "learning_rate": 0.001784, "loss": 1.8091, "step": 3601 }, { "epoch": 278.0, "learning_rate": 0.001776, "loss": 1.8626, "step": 3614 }, { "epoch": 279.0, "learning_rate": 0.001768, "loss": 1.8663, "step": 3627 }, { "epoch": 280.0, "learning_rate": 0.00176, "loss": 1.9432, "step": 3640 }, { "epoch": 281.0, "learning_rate": 0.001752, "loss": 1.8664, "step": 3653 }, { "epoch": 282.0, "learning_rate": 0.001744, "loss": 1.8603, "step": 3666 }, { "epoch": 283.0, "learning_rate": 0.0017360000000000001, "loss": 1.8335, "step": 3679 }, { "epoch": 284.0, "learning_rate": 0.001728, "loss": 1.8625, "step": 3692 }, { "epoch": 285.0, "learning_rate": 0.00172, "loss": 1.8043, "step": 3705 }, { "epoch": 286.0, "learning_rate": 0.001712, "loss": 1.8061, "step": 3718 }, { "epoch": 287.0, "learning_rate": 0.001704, "loss": 1.835, "step": 3731 }, { "epoch": 288.0, "learning_rate": 0.001696, "loss": 1.7944, "step": 3744 }, { "epoch": 289.0, "learning_rate": 0.001688, "loss": 1.8492, "step": 3757 }, { "epoch": 290.0, "learning_rate": 0.00168, "loss": 1.812, "step": 3770 }, { "epoch": 291.0, "learning_rate": 0.0016719999999999999, "loss": 1.8175, "step": 3783 }, { "epoch": 292.0, "learning_rate": 0.001664, "loss": 1.7943, "step": 3796 }, { "epoch": 293.0, "learning_rate": 0.001656, "loss": 1.8063, "step": 3809 }, { "epoch": 294.0, "learning_rate": 0.001648, "loss": 1.7992, "step": 3822 }, { "epoch": 295.0, "learning_rate": 0.00164, "loss": 1.7959, "step": 3835 }, { "epoch": 296.0, "learning_rate": 0.001632, "loss": 1.7256, "step": 3848 }, { "epoch": 297.0, "learning_rate": 0.0016240000000000002, "loss": 1.7673, "step": 3861 }, { "epoch": 298.0, "learning_rate": 0.001616, "loss": 1.8299, "step": 3874 }, { "epoch": 299.0, "learning_rate": 0.001608, "loss": 1.8147, "step": 3887 }, { "epoch": 300.0, "learning_rate": 0.0016, "loss": 1.7495, "step": 3900 }, { "epoch": 301.0, "learning_rate": 0.001592, "loss": 1.8001, "step": 3913 }, { "epoch": 302.0, "learning_rate": 0.0015840000000000001, "loss": 1.7707, "step": 3926 }, { "epoch": 303.0, "learning_rate": 0.0015760000000000001, "loss": 1.7283, "step": 3939 }, { "epoch": 304.0, "learning_rate": 0.0015680000000000002, "loss": 1.7133, "step": 3952 }, { "epoch": 305.0, "learning_rate": 0.0015600000000000002, "loss": 1.71, "step": 3965 }, { "epoch": 306.0, "learning_rate": 0.001552, "loss": 1.6685, "step": 3978 }, { "epoch": 307.0, "learning_rate": 0.001544, "loss": 1.6526, "step": 3991 }, { "epoch": 308.0, "learning_rate": 0.001536, "loss": 1.6433, "step": 4004 }, { "epoch": 309.0, "learning_rate": 0.001528, "loss": 1.6823, "step": 4017 }, { "epoch": 310.0, "learning_rate": 0.00152, "loss": 1.6843, "step": 4030 }, { "epoch": 311.0, "learning_rate": 0.001512, "loss": 1.7029, "step": 4043 }, { "epoch": 312.0, "learning_rate": 0.0015040000000000001, "loss": 1.6362, "step": 4056 }, { "epoch": 313.0, "learning_rate": 0.001496, "loss": 1.6648, "step": 4069 }, { "epoch": 314.0, "learning_rate": 0.001488, "loss": 1.7202, "step": 4082 }, { "epoch": 315.0, "learning_rate": 0.00148, "loss": 1.677, "step": 4095 }, { "epoch": 316.0, "learning_rate": 0.001472, "loss": 1.6187, "step": 4108 }, { "epoch": 317.0, "learning_rate": 0.001464, "loss": 1.6398, "step": 4121 }, { "epoch": 318.0, "learning_rate": 0.001456, "loss": 1.6371, "step": 4134 }, { "epoch": 319.0, "learning_rate": 0.001448, "loss": 1.6081, "step": 4147 }, { "epoch": 320.0, "learning_rate": 0.0014399999999999999, "loss": 1.5936, "step": 4160 }, { "epoch": 321.0, "learning_rate": 0.001432, "loss": 1.6336, "step": 4173 }, { "epoch": 322.0, "learning_rate": 0.001424, "loss": 1.6022, "step": 4186 }, { "epoch": 323.0, "learning_rate": 0.001416, "loss": 1.6336, "step": 4199 }, { "epoch": 324.0, "learning_rate": 0.001408, "loss": 1.5898, "step": 4212 }, { "epoch": 325.0, "learning_rate": 0.0014, "loss": 1.5528, "step": 4225 }, { "epoch": 326.0, "learning_rate": 0.001392, "loss": 1.5734, "step": 4238 }, { "epoch": 327.0, "learning_rate": 0.001384, "loss": 1.618, "step": 4251 }, { "epoch": 328.0, "learning_rate": 0.0013759999999999998, "loss": 1.6529, "step": 4264 }, { "epoch": 329.0, "learning_rate": 0.001368, "loss": 1.5824, "step": 4277 }, { "epoch": 330.0, "learning_rate": 0.00136, "loss": 1.609, "step": 4290 }, { "epoch": 331.0, "learning_rate": 0.0013520000000000001, "loss": 1.5796, "step": 4303 }, { "epoch": 332.0, "learning_rate": 0.0013440000000000001, "loss": 1.5924, "step": 4316 }, { "epoch": 333.0, "learning_rate": 0.0013360000000000002, "loss": 1.5841, "step": 4329 }, { "epoch": 334.0, "learning_rate": 0.0013280000000000002, "loss": 1.5487, "step": 4342 }, { "epoch": 335.0, "learning_rate": 0.00132, "loss": 1.4625, "step": 4355 }, { "epoch": 336.0, "learning_rate": 0.001312, "loss": 1.5241, "step": 4368 }, { "epoch": 337.0, "learning_rate": 0.001304, "loss": 1.4823, "step": 4381 }, { "epoch": 338.0, "learning_rate": 0.001296, "loss": 1.5027, "step": 4394 }, { "epoch": 339.0, "learning_rate": 0.001288, "loss": 1.5211, "step": 4407 }, { "epoch": 340.0, "learning_rate": 0.00128, "loss": 1.4912, "step": 4420 }, { "epoch": 341.0, "learning_rate": 0.0012720000000000001, "loss": 1.4792, "step": 4433 }, { "epoch": 342.0, "learning_rate": 0.001264, "loss": 1.4932, "step": 4446 }, { "epoch": 343.0, "learning_rate": 0.001256, "loss": 1.4861, "step": 4459 }, { "epoch": 344.0, "learning_rate": 0.001248, "loss": 1.5171, "step": 4472 }, { "epoch": 345.0, "learning_rate": 0.00124, "loss": 1.494, "step": 4485 }, { "epoch": 346.0, "learning_rate": 0.001232, "loss": 1.4992, "step": 4498 }, { "epoch": 347.0, "learning_rate": 0.001224, "loss": 1.5033, "step": 4511 }, { "epoch": 348.0, "learning_rate": 0.001216, "loss": 1.5039, "step": 4524 }, { "epoch": 349.0, "learning_rate": 0.001208, "loss": 1.5341, "step": 4537 }, { "epoch": 350.0, "learning_rate": 0.0012, "loss": 1.5049, "step": 4550 }, { "epoch": 351.0, "learning_rate": 0.001192, "loss": 1.5104, "step": 4563 }, { "epoch": 352.0, "learning_rate": 0.001184, "loss": 1.4569, "step": 4576 }, { "epoch": 353.0, "learning_rate": 0.001176, "loss": 1.3996, "step": 4589 }, { "epoch": 354.0, "learning_rate": 0.001168, "loss": 1.4337, "step": 4602 }, { "epoch": 355.0, "learning_rate": 0.00116, "loss": 1.4572, "step": 4615 }, { "epoch": 356.0, "learning_rate": 0.001152, "loss": 1.4668, "step": 4628 }, { "epoch": 357.0, "learning_rate": 0.0011439999999999998, "loss": 1.4298, "step": 4641 }, { "epoch": 358.0, "learning_rate": 0.0011359999999999999, "loss": 1.4187, "step": 4654 }, { "epoch": 359.0, "learning_rate": 0.0011279999999999999, "loss": 1.4026, "step": 4667 }, { "epoch": 360.0, "learning_rate": 0.0011200000000000001, "loss": 1.4461, "step": 4680 }, { "epoch": 361.0, "learning_rate": 0.0011120000000000001, "loss": 1.4497, "step": 4693 }, { "epoch": 362.0, "learning_rate": 0.0011040000000000002, "loss": 1.3667, "step": 4706 }, { "epoch": 363.0, "learning_rate": 0.0010960000000000002, "loss": 1.4237, "step": 4719 }, { "epoch": 364.0, "learning_rate": 0.0010880000000000002, "loss": 1.485, "step": 4732 }, { "epoch": 365.0, "learning_rate": 0.00108, "loss": 1.4271, "step": 4745 }, { "epoch": 366.0, "learning_rate": 0.001072, "loss": 1.4046, "step": 4758 }, { "epoch": 367.0, "learning_rate": 0.001064, "loss": 1.3771, "step": 4771 }, { "epoch": 368.0, "learning_rate": 0.001056, "loss": 1.4054, "step": 4784 }, { "epoch": 369.0, "learning_rate": 0.001048, "loss": 1.3886, "step": 4797 }, { "epoch": 370.0, "learning_rate": 0.0010400000000000001, "loss": 1.3583, "step": 4810 }, { "epoch": 371.0, "learning_rate": 0.0010320000000000001, "loss": 1.3606, "step": 4823 }, { "epoch": 372.0, "learning_rate": 0.001024, "loss": 1.3619, "step": 4836 }, { "epoch": 373.0, "learning_rate": 0.001016, "loss": 1.3723, "step": 4849 }, { "epoch": 374.0, "learning_rate": 0.001008, "loss": 1.3604, "step": 4862 }, { "epoch": 375.0, "learning_rate": 0.001, "loss": 1.3745, "step": 4875 }, { "epoch": 376.0, "learning_rate": 0.000992, "loss": 1.393, "step": 4888 }, { "epoch": 377.0, "learning_rate": 0.000984, "loss": 1.3846, "step": 4901 }, { "epoch": 378.0, "learning_rate": 0.000976, "loss": 1.4033, "step": 4914 }, { "epoch": 379.0, "learning_rate": 0.000968, "loss": 1.3204, "step": 4927 }, { "epoch": 380.0, "learning_rate": 0.00096, "loss": 1.3257, "step": 4940 }, { "epoch": 381.0, "learning_rate": 0.0009519999999999999, "loss": 1.3274, "step": 4953 }, { "epoch": 382.0, "learning_rate": 0.000944, "loss": 1.3177, "step": 4966 }, { "epoch": 383.0, "learning_rate": 0.0009360000000000001, "loss": 1.3204, "step": 4979 }, { "epoch": 384.0, "learning_rate": 0.0009280000000000001, "loss": 1.3349, "step": 4992 }, { "epoch": 385.0, "learning_rate": 0.00092, "loss": 1.3149, "step": 5005 }, { "epoch": 386.0, "learning_rate": 0.000912, "loss": 1.2994, "step": 5018 }, { "epoch": 387.0, "learning_rate": 0.0009040000000000001, "loss": 1.3295, "step": 5031 }, { "epoch": 388.0, "learning_rate": 0.000896, "loss": 1.2975, "step": 5044 }, { "epoch": 389.0, "learning_rate": 0.000888, "loss": 1.3118, "step": 5057 }, { "epoch": 390.0, "learning_rate": 0.00088, "loss": 1.2712, "step": 5070 }, { "epoch": 391.0, "learning_rate": 0.000872, "loss": 1.3184, "step": 5083 }, { "epoch": 392.0, "learning_rate": 0.000864, "loss": 1.2687, "step": 5096 }, { "epoch": 393.0, "learning_rate": 0.000856, "loss": 1.2826, "step": 5109 }, { "epoch": 394.0, "learning_rate": 0.000848, "loss": 1.2766, "step": 5122 }, { "epoch": 395.0, "learning_rate": 0.00084, "loss": 1.2935, "step": 5135 }, { "epoch": 396.0, "learning_rate": 0.000832, "loss": 1.288, "step": 5148 }, { "epoch": 397.0, "learning_rate": 0.000824, "loss": 1.2617, "step": 5161 }, { "epoch": 398.0, "learning_rate": 0.000816, "loss": 1.2675, "step": 5174 }, { "epoch": 399.0, "learning_rate": 0.000808, "loss": 1.2895, "step": 5187 }, { "epoch": 400.0, "learning_rate": 0.0008, "loss": 1.2721, "step": 5200 }, { "epoch": 401.0, "learning_rate": 0.0007920000000000001, "loss": 1.2897, "step": 5213 }, { "epoch": 402.0, "learning_rate": 0.0007840000000000001, "loss": 1.2608, "step": 5226 }, { "epoch": 403.0, "learning_rate": 0.000776, "loss": 1.271, "step": 5239 }, { "epoch": 404.0, "learning_rate": 0.000768, "loss": 1.2581, "step": 5252 }, { "epoch": 405.0, "learning_rate": 0.00076, "loss": 1.2497, "step": 5265 }, { "epoch": 406.0, "learning_rate": 0.0007520000000000001, "loss": 1.2846, "step": 5278 }, { "epoch": 407.0, "learning_rate": 0.000744, "loss": 1.2718, "step": 5291 }, { "epoch": 408.0, "learning_rate": 0.000736, "loss": 1.2733, "step": 5304 }, { "epoch": 409.0, "learning_rate": 0.000728, "loss": 1.2918, "step": 5317 }, { "epoch": 410.0, "learning_rate": 0.0007199999999999999, "loss": 1.2659, "step": 5330 }, { "epoch": 411.0, "learning_rate": 0.000712, "loss": 1.2946, "step": 5343 }, { "epoch": 412.0, "learning_rate": 0.000704, "loss": 1.2425, "step": 5356 }, { "epoch": 413.0, "learning_rate": 0.000696, "loss": 1.2293, "step": 5369 }, { "epoch": 414.0, "learning_rate": 0.0006879999999999999, "loss": 1.2847, "step": 5382 }, { "epoch": 415.0, "learning_rate": 0.00068, "loss": 1.2318, "step": 5395 }, { "epoch": 416.0, "learning_rate": 0.0006720000000000001, "loss": 1.237, "step": 5408 }, { "epoch": 417.0, "learning_rate": 0.0006640000000000001, "loss": 1.1875, "step": 5421 }, { "epoch": 418.0, "learning_rate": 0.000656, "loss": 1.2204, "step": 5434 }, { "epoch": 419.0, "learning_rate": 0.000648, "loss": 1.1848, "step": 5447 }, { "epoch": 420.0, "learning_rate": 0.00064, "loss": 1.2146, "step": 5460 }, { "epoch": 421.0, "learning_rate": 0.000632, "loss": 1.1621, "step": 5473 }, { "epoch": 422.0, "learning_rate": 0.000624, "loss": 1.1883, "step": 5486 }, { "epoch": 423.0, "learning_rate": 0.000616, "loss": 1.183, "step": 5499 }, { "epoch": 424.0, "learning_rate": 0.000608, "loss": 1.1649, "step": 5512 }, { "epoch": 425.0, "learning_rate": 0.0006, "loss": 1.1824, "step": 5525 }, { "epoch": 426.0, "learning_rate": 0.000592, "loss": 1.2073, "step": 5538 }, { "epoch": 427.0, "learning_rate": 0.000584, "loss": 1.147, "step": 5551 }, { "epoch": 428.0, "learning_rate": 0.000576, "loss": 1.1798, "step": 5564 }, { "epoch": 429.0, "learning_rate": 0.0005679999999999999, "loss": 1.14, "step": 5577 }, { "epoch": 430.0, "learning_rate": 0.0005600000000000001, "loss": 1.1585, "step": 5590 }, { "epoch": 431.0, "learning_rate": 0.0005520000000000001, "loss": 1.1687, "step": 5603 }, { "epoch": 432.0, "learning_rate": 0.0005440000000000001, "loss": 1.1285, "step": 5616 }, { "epoch": 433.0, "learning_rate": 0.000536, "loss": 1.1472, "step": 5629 }, { "epoch": 434.0, "learning_rate": 0.000528, "loss": 1.1894, "step": 5642 }, { "epoch": 435.0, "learning_rate": 0.0005200000000000001, "loss": 1.1606, "step": 5655 }, { "epoch": 436.0, "learning_rate": 0.000512, "loss": 1.1294, "step": 5668 }, { "epoch": 437.0, "learning_rate": 0.000504, "loss": 1.1597, "step": 5681 }, { "epoch": 438.0, "learning_rate": 0.000496, "loss": 1.1772, "step": 5694 }, { "epoch": 439.0, "learning_rate": 0.000488, "loss": 1.2044, "step": 5707 }, { "epoch": 440.0, "learning_rate": 0.00048, "loss": 1.1543, "step": 5720 }, { "epoch": 441.0, "learning_rate": 0.000472, "loss": 1.1868, "step": 5733 }, { "epoch": 442.0, "learning_rate": 0.00046400000000000006, "loss": 1.1821, "step": 5746 }, { "epoch": 443.0, "learning_rate": 0.000456, "loss": 1.0897, "step": 5759 }, { "epoch": 444.0, "learning_rate": 0.000448, "loss": 1.0977, "step": 5772 }, { "epoch": 445.0, "learning_rate": 0.00044, "loss": 1.1695, "step": 5785 }, { "epoch": 446.0, "learning_rate": 0.000432, "loss": 1.1332, "step": 5798 }, { "epoch": 447.0, "learning_rate": 0.000424, "loss": 1.1321, "step": 5811 }, { "epoch": 448.0, "learning_rate": 0.000416, "loss": 1.1315, "step": 5824 }, { "epoch": 449.0, "learning_rate": 0.000408, "loss": 1.1178, "step": 5837 }, { "epoch": 450.0, "learning_rate": 0.0004, "loss": 1.1163, "step": 5850 }, { "epoch": 451.0, "learning_rate": 0.00039200000000000004, "loss": 1.1414, "step": 5863 }, { "epoch": 452.0, "learning_rate": 0.000384, "loss": 1.1274, "step": 5876 }, { "epoch": 453.0, "learning_rate": 0.00037600000000000003, "loss": 1.1067, "step": 5889 }, { "epoch": 454.0, "learning_rate": 0.000368, "loss": 1.0889, "step": 5902 }, { "epoch": 455.0, "learning_rate": 0.00035999999999999997, "loss": 1.0844, "step": 5915 }, { "epoch": 456.0, "learning_rate": 0.000352, "loss": 1.1341, "step": 5928 }, { "epoch": 457.0, "learning_rate": 0.00034399999999999996, "loss": 1.0644, "step": 5941 }, { "epoch": 458.0, "learning_rate": 0.00033600000000000004, "loss": 1.0991, "step": 5954 }, { "epoch": 459.0, "learning_rate": 0.000328, "loss": 1.1176, "step": 5967 }, { "epoch": 460.0, "learning_rate": 0.00032, "loss": 1.0997, "step": 5980 }, { "epoch": 461.0, "learning_rate": 0.000312, "loss": 1.0997, "step": 5993 }, { "epoch": 462.0, "learning_rate": 0.000304, "loss": 1.0763, "step": 6006 }, { "epoch": 463.0, "learning_rate": 0.000296, "loss": 1.1102, "step": 6019 }, { "epoch": 464.0, "learning_rate": 0.000288, "loss": 1.1236, "step": 6032 }, { "epoch": 465.0, "learning_rate": 0.00028000000000000003, "loss": 1.0941, "step": 6045 }, { "epoch": 466.0, "learning_rate": 0.00027200000000000005, "loss": 1.0976, "step": 6058 }, { "epoch": 467.0, "learning_rate": 0.000264, "loss": 1.0688, "step": 6071 }, { "epoch": 468.0, "learning_rate": 0.000256, "loss": 1.0591, "step": 6084 }, { "epoch": 469.0, "learning_rate": 0.000248, "loss": 1.0695, "step": 6097 }, { "epoch": 470.0, "learning_rate": 0.00024, "loss": 1.071, "step": 6110 }, { "epoch": 471.0, "learning_rate": 0.00023200000000000003, "loss": 1.0709, "step": 6123 }, { "epoch": 472.0, "learning_rate": 0.000224, "loss": 1.0767, "step": 6136 }, { "epoch": 473.0, "learning_rate": 0.000216, "loss": 1.0741, "step": 6149 }, { "epoch": 474.0, "learning_rate": 0.000208, "loss": 1.0644, "step": 6162 }, { "epoch": 475.0, "learning_rate": 0.0002, "loss": 1.0699, "step": 6175 }, { "epoch": 476.0, "learning_rate": 0.000192, "loss": 1.0727, "step": 6188 }, { "epoch": 477.0, "learning_rate": 0.000184, "loss": 1.06, "step": 6201 }, { "epoch": 478.0, "learning_rate": 0.000176, "loss": 1.0568, "step": 6214 }, { "epoch": 479.0, "learning_rate": 0.00016800000000000002, "loss": 1.0616, "step": 6227 }, { "epoch": 480.0, "learning_rate": 0.00016, "loss": 1.0491, "step": 6240 }, { "epoch": 481.0, "learning_rate": 0.000152, "loss": 1.0502, "step": 6253 }, { "epoch": 482.0, "learning_rate": 0.000144, "loss": 1.0742, "step": 6266 }, { "epoch": 483.0, "learning_rate": 0.00013600000000000003, "loss": 1.0582, "step": 6279 }, { "epoch": 484.0, "learning_rate": 0.000128, "loss": 1.0803, "step": 6292 }, { "epoch": 485.0, "learning_rate": 0.00012, "loss": 1.067, "step": 6305 }, { "epoch": 486.0, "learning_rate": 0.000112, "loss": 1.0397, "step": 6318 }, { "epoch": 487.0, "learning_rate": 0.000104, "loss": 1.0489, "step": 6331 }, { "epoch": 488.0, "learning_rate": 9.6e-05, "loss": 1.0378, "step": 6344 }, { "epoch": 489.0, "learning_rate": 8.8e-05, "loss": 1.0418, "step": 6357 }, { "epoch": 490.0, "learning_rate": 8e-05, "loss": 1.0344, "step": 6370 }, { "epoch": 491.0, "learning_rate": 7.2e-05, "loss": 1.0768, "step": 6383 }, { "epoch": 492.0, "learning_rate": 6.4e-05, "loss": 1.0296, "step": 6396 }, { "epoch": 493.0, "learning_rate": 5.6e-05, "loss": 1.0336, "step": 6409 }, { "epoch": 494.0, "learning_rate": 4.8e-05, "loss": 1.0568, "step": 6422 }, { "epoch": 495.0, "learning_rate": 4e-05, "loss": 1.0647, "step": 6435 }, { "epoch": 496.0, "learning_rate": 3.2e-05, "loss": 1.0448, "step": 6448 }, { "epoch": 497.0, "learning_rate": 2.4e-05, "loss": 1.0602, "step": 6461 }, { "epoch": 498.0, "learning_rate": 1.6e-05, "loss": 1.0615, "step": 6474 }, { "epoch": 499.0, "learning_rate": 8e-06, "loss": 1.0389, "step": 6487 }, { "epoch": 500.0, "learning_rate": 0.0, "loss": 1.0629, "step": 6500 }, { "epoch": 500.0, "step": 6500, "total_flos": 284798065115136.0, "train_loss": 2.748476623535156, "train_runtime": 71445.8185, "train_samples_per_second": 0.7, "train_steps_per_second": 0.091 }, { "epoch": 500.0, "step": 6500, "total_flos": 284798065115136.0, "train_loss": 0.0, "train_runtime": 1.3574, "train_samples_per_second": 36834.222, "train_steps_per_second": 4788.449 } ], "logging_steps": 500, "max_steps": 6500, "num_train_epochs": 500, "save_steps": 500, "total_flos": 284798065115136.0, "trial_name": null, "trial_params": null }