|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 500.0, |
|
"eval_steps": 500, |
|
"global_step": 6500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.003992, |
|
"loss": 10.6341, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.003984, |
|
"loss": 8.0261, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.003976, |
|
"loss": 7.6356, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.003968, |
|
"loss": 7.489, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00396, |
|
"loss": 7.3955, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.003952, |
|
"loss": 7.3814, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0039440000000000005, |
|
"loss": 7.3919, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.003936, |
|
"loss": 7.2877, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.003928, |
|
"loss": 7.0588, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.00392, |
|
"loss": 6.9853, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.003912, |
|
"loss": 6.9981, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.003904, |
|
"loss": 6.8759, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.003896, |
|
"loss": 6.8897, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.003888, |
|
"loss": 7.1851, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0038799999999999998, |
|
"loss": 7.3121, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.003872, |
|
"loss": 7.2602, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.003864, |
|
"loss": 7.2026, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.003856, |
|
"loss": 7.2713, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 0.003848, |
|
"loss": 7.1885, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00384, |
|
"loss": 7.2042, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 0.003832, |
|
"loss": 7.1744, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 0.0038239999999999997, |
|
"loss": 7.0481, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 0.003816, |
|
"loss": 6.8698, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.0038079999999999998, |
|
"loss": 6.7722, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.0038, |
|
"loss": 6.7018, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 0.003792, |
|
"loss": 6.6881, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 0.003784, |
|
"loss": 6.7485, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.003776, |
|
"loss": 6.5876, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 0.003768, |
|
"loss": 6.5597, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.00376, |
|
"loss": 6.5379, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 0.0037519999999999997, |
|
"loss": 6.3772, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.0037440000000000004, |
|
"loss": 6.3651, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 0.003736, |
|
"loss": 6.305, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 0.0037280000000000004, |
|
"loss": 6.2724, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 0.00372, |
|
"loss": 6.183, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 0.0037120000000000005, |
|
"loss": 6.2141, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 0.0037040000000000003, |
|
"loss": 6.1447, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 0.003696, |
|
"loss": 6.3683, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 0.0036880000000000003, |
|
"loss": 6.2738, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.00368, |
|
"loss": 6.0499, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 0.0036720000000000004, |
|
"loss": 5.9005, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 0.003664, |
|
"loss": 5.8533, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 0.0036560000000000004, |
|
"loss": 5.8199, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 0.003648, |
|
"loss": 6.051, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 0.00364, |
|
"loss": 5.8496, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 0.0036320000000000002, |
|
"loss": 5.7252, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 0.003624, |
|
"loss": 5.6958, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 0.0036160000000000003, |
|
"loss": 5.7218, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 0.003608, |
|
"loss": 5.6656, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0036000000000000003, |
|
"loss": 5.612, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 0.003592, |
|
"loss": 5.5532, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 0.003584, |
|
"loss": 5.4327, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 0.003576, |
|
"loss": 5.3979, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 0.003568, |
|
"loss": 5.2903, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 0.0035600000000000002, |
|
"loss": 5.4521, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 0.003552, |
|
"loss": 5.6021, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 0.0035440000000000003, |
|
"loss": 5.5058, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 0.003536, |
|
"loss": 5.2167, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 0.003528, |
|
"loss": 5.2102, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00352, |
|
"loss": 5.2617, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 0.003512, |
|
"loss": 5.3012, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 0.003504, |
|
"loss": 5.2158, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 0.003496, |
|
"loss": 5.1959, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 0.003488, |
|
"loss": 5.1716, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 0.00348, |
|
"loss": 5.0796, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 0.0034720000000000003, |
|
"loss": 4.9764, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 0.003464, |
|
"loss": 4.974, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 0.003456, |
|
"loss": 4.876, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 0.003448, |
|
"loss": 4.8596, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 0.00344, |
|
"loss": 4.7792, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 0.003432, |
|
"loss": 4.765, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 0.003424, |
|
"loss": 4.7933, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 0.003416, |
|
"loss": 4.7636, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 0.003408, |
|
"loss": 4.7114, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 0.0034, |
|
"loss": 4.7079, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 0.003392, |
|
"loss": 4.6745, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"learning_rate": 0.003384, |
|
"loss": 4.6765, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 0.003376, |
|
"loss": 4.5913, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"learning_rate": 0.003368, |
|
"loss": 4.7949, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.00336, |
|
"loss": 4.6311, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"learning_rate": 0.003352, |
|
"loss": 4.4818, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 0.0033439999999999998, |
|
"loss": 4.4462, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 0.003336, |
|
"loss": 4.5129, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 0.003328, |
|
"loss": 4.4626, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 0.00332, |
|
"loss": 4.3505, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 0.003312, |
|
"loss": 4.3377, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"learning_rate": 0.003304, |
|
"loss": 4.4076, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 0.003296, |
|
"loss": 4.3765, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"learning_rate": 0.0032879999999999997, |
|
"loss": 4.2473, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 0.00328, |
|
"loss": 4.2142, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 0.0032719999999999997, |
|
"loss": 4.1567, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 0.003264, |
|
"loss": 4.1569, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 0.0032559999999999998, |
|
"loss": 4.1347, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 0.0032480000000000005, |
|
"loss": 4.0786, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 0.0032400000000000003, |
|
"loss": 4.0796, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 0.003232, |
|
"loss": 4.0432, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 0.0032240000000000003, |
|
"loss": 4.033, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 0.003216, |
|
"loss": 3.952, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 0.0032080000000000003, |
|
"loss": 4.0043, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.0032, |
|
"loss": 4.2161, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"learning_rate": 0.0031920000000000004, |
|
"loss": 4.1006, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"learning_rate": 0.003184, |
|
"loss": 4.1527, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"learning_rate": 0.0031760000000000004, |
|
"loss": 3.9791, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 0.0031680000000000002, |
|
"loss": 3.9599, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 0.00316, |
|
"loss": 3.9998, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 0.0031520000000000003, |
|
"loss": 3.973, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 0.003144, |
|
"loss": 3.9976, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"learning_rate": 0.0031360000000000003, |
|
"loss": 3.9862, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"learning_rate": 0.003128, |
|
"loss": 3.8562, |
|
"step": 1417 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 0.0031200000000000004, |
|
"loss": 3.8322, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 0.003112, |
|
"loss": 3.8451, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 0.003104, |
|
"loss": 3.8274, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"learning_rate": 0.0030960000000000002, |
|
"loss": 3.8483, |
|
"step": 1469 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"learning_rate": 0.003088, |
|
"loss": 3.7911, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 0.0030800000000000003, |
|
"loss": 3.8203, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 0.003072, |
|
"loss": 3.7111, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"learning_rate": 0.0030640000000000003, |
|
"loss": 3.7186, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"learning_rate": 0.003056, |
|
"loss": 3.6357, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"learning_rate": 0.003048, |
|
"loss": 3.6484, |
|
"step": 1547 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 0.00304, |
|
"loss": 3.7188, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"learning_rate": 0.003032, |
|
"loss": 3.6217, |
|
"step": 1573 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"learning_rate": 0.003024, |
|
"loss": 3.5853, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"learning_rate": 0.003016, |
|
"loss": 3.6381, |
|
"step": 1599 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"learning_rate": 0.0030080000000000003, |
|
"loss": 3.6051, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 0.003, |
|
"loss": 3.6293, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"learning_rate": 0.002992, |
|
"loss": 3.626, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 0.002984, |
|
"loss": 3.6121, |
|
"step": 1651 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 0.002976, |
|
"loss": 3.5777, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"learning_rate": 0.002968, |
|
"loss": 3.551, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 0.00296, |
|
"loss": 3.534, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"learning_rate": 0.002952, |
|
"loss": 3.5946, |
|
"step": 1703 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"learning_rate": 0.002944, |
|
"loss": 3.6511, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"learning_rate": 0.002936, |
|
"loss": 3.5556, |
|
"step": 1729 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 0.002928, |
|
"loss": 3.5453, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 0.00292, |
|
"loss": 3.5641, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 0.002912, |
|
"loss": 3.5357, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"learning_rate": 0.002904, |
|
"loss": 3.5738, |
|
"step": 1781 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"learning_rate": 0.002896, |
|
"loss": 3.4697, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"learning_rate": 0.002888, |
|
"loss": 3.4405, |
|
"step": 1807 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 0.0028799999999999997, |
|
"loss": 3.3998, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"learning_rate": 0.002872, |
|
"loss": 3.4035, |
|
"step": 1833 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"learning_rate": 0.002864, |
|
"loss": 3.4335, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"learning_rate": 0.002856, |
|
"loss": 3.4105, |
|
"step": 1859 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 0.002848, |
|
"loss": 3.3161, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 0.00284, |
|
"loss": 3.2802, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"learning_rate": 0.002832, |
|
"loss": 3.2573, |
|
"step": 1898 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"learning_rate": 0.0028239999999999997, |
|
"loss": 3.265, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"learning_rate": 0.002816, |
|
"loss": 3.3362, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"learning_rate": 0.0028079999999999997, |
|
"loss": 3.2085, |
|
"step": 1937 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 0.0028, |
|
"loss": 3.2445, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 0.0027919999999999998, |
|
"loss": 3.2212, |
|
"step": 1963 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 0.002784, |
|
"loss": 3.2135, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"learning_rate": 0.002776, |
|
"loss": 3.173, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"learning_rate": 0.002768, |
|
"loss": 3.1946, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 0.00276, |
|
"loss": 3.1739, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"learning_rate": 0.0027519999999999997, |
|
"loss": 3.1975, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"learning_rate": 0.0027440000000000003, |
|
"loss": 3.148, |
|
"step": 2041 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"learning_rate": 0.002736, |
|
"loss": 3.1124, |
|
"step": 2054 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"learning_rate": 0.0027280000000000004, |
|
"loss": 3.1101, |
|
"step": 2067 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 0.00272, |
|
"loss": 3.155, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"learning_rate": 0.0027120000000000004, |
|
"loss": 3.091, |
|
"step": 2093 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"learning_rate": 0.0027040000000000002, |
|
"loss": 3.0156, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"learning_rate": 0.002696, |
|
"loss": 3.031, |
|
"step": 2119 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"learning_rate": 0.0026880000000000003, |
|
"loss": 3.0426, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 0.00268, |
|
"loss": 2.9667, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"learning_rate": 0.0026720000000000003, |
|
"loss": 2.9496, |
|
"step": 2158 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"learning_rate": 0.002664, |
|
"loss": 3.0151, |
|
"step": 2171 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"learning_rate": 0.0026560000000000004, |
|
"loss": 3.0202, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"learning_rate": 0.002648, |
|
"loss": 3.1202, |
|
"step": 2197 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"learning_rate": 0.00264, |
|
"loss": 3.0814, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"learning_rate": 0.0026320000000000002, |
|
"loss": 2.9501, |
|
"step": 2223 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"learning_rate": 0.002624, |
|
"loss": 2.8994, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"learning_rate": 0.0026160000000000003, |
|
"loss": 2.8437, |
|
"step": 2249 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"learning_rate": 0.002608, |
|
"loss": 2.8867, |
|
"step": 2262 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 0.0026000000000000003, |
|
"loss": 2.8977, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"learning_rate": 0.002592, |
|
"loss": 2.8601, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"learning_rate": 0.002584, |
|
"loss": 2.9511, |
|
"step": 2301 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"learning_rate": 0.002576, |
|
"loss": 2.8396, |
|
"step": 2314 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"learning_rate": 0.002568, |
|
"loss": 2.8238, |
|
"step": 2327 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"learning_rate": 0.00256, |
|
"loss": 2.8048, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"learning_rate": 0.002552, |
|
"loss": 2.7583, |
|
"step": 2353 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"learning_rate": 0.0025440000000000003, |
|
"loss": 2.7443, |
|
"step": 2366 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"learning_rate": 0.002536, |
|
"loss": 2.7362, |
|
"step": 2379 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"learning_rate": 0.002528, |
|
"loss": 2.7878, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"learning_rate": 0.00252, |
|
"loss": 2.7811, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"learning_rate": 0.002512, |
|
"loss": 2.7213, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"learning_rate": 0.002504, |
|
"loss": 2.7716, |
|
"step": 2431 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"learning_rate": 0.002496, |
|
"loss": 2.7761, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"learning_rate": 0.002488, |
|
"loss": 2.7456, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"learning_rate": 0.00248, |
|
"loss": 2.9211, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"learning_rate": 0.0024720000000000002, |
|
"loss": 2.9644, |
|
"step": 2483 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"learning_rate": 0.002464, |
|
"loss": 2.7444, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"learning_rate": 0.002456, |
|
"loss": 2.7094, |
|
"step": 2509 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"learning_rate": 0.002448, |
|
"loss": 2.6593, |
|
"step": 2522 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"learning_rate": 0.00244, |
|
"loss": 2.6424, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"learning_rate": 0.002432, |
|
"loss": 2.5913, |
|
"step": 2548 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"learning_rate": 0.002424, |
|
"loss": 2.6003, |
|
"step": 2561 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"learning_rate": 0.002416, |
|
"loss": 2.6317, |
|
"step": 2574 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"learning_rate": 0.002408, |
|
"loss": 2.6468, |
|
"step": 2587 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 0.0024, |
|
"loss": 2.5951, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 201.0, |
|
"learning_rate": 0.002392, |
|
"loss": 2.5915, |
|
"step": 2613 |
|
}, |
|
{ |
|
"epoch": 202.0, |
|
"learning_rate": 0.002384, |
|
"loss": 2.568, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 203.0, |
|
"learning_rate": 0.002376, |
|
"loss": 2.5466, |
|
"step": 2639 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"learning_rate": 0.002368, |
|
"loss": 2.6858, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 205.0, |
|
"learning_rate": 0.00236, |
|
"loss": 2.5551, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 206.0, |
|
"learning_rate": 0.002352, |
|
"loss": 2.5618, |
|
"step": 2678 |
|
}, |
|
{ |
|
"epoch": 207.0, |
|
"learning_rate": 0.0023439999999999997, |
|
"loss": 2.5309, |
|
"step": 2691 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"learning_rate": 0.002336, |
|
"loss": 2.5307, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 209.0, |
|
"learning_rate": 0.0023279999999999998, |
|
"loss": 2.5008, |
|
"step": 2717 |
|
}, |
|
{ |
|
"epoch": 210.0, |
|
"learning_rate": 0.00232, |
|
"loss": 2.5485, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 211.0, |
|
"learning_rate": 0.002312, |
|
"loss": 2.547, |
|
"step": 2743 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"learning_rate": 0.002304, |
|
"loss": 2.461, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 213.0, |
|
"learning_rate": 0.002296, |
|
"loss": 2.4375, |
|
"step": 2769 |
|
}, |
|
{ |
|
"epoch": 214.0, |
|
"learning_rate": 0.0022879999999999997, |
|
"loss": 2.4417, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 215.0, |
|
"learning_rate": 0.00228, |
|
"loss": 2.4427, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"learning_rate": 0.0022719999999999997, |
|
"loss": 2.4756, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 217.0, |
|
"learning_rate": 0.002264, |
|
"loss": 2.4662, |
|
"step": 2821 |
|
}, |
|
{ |
|
"epoch": 218.0, |
|
"learning_rate": 0.0022559999999999998, |
|
"loss": 2.4931, |
|
"step": 2834 |
|
}, |
|
{ |
|
"epoch": 219.0, |
|
"learning_rate": 0.0022480000000000004, |
|
"loss": 2.4438, |
|
"step": 2847 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"learning_rate": 0.0022400000000000002, |
|
"loss": 2.3834, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 221.0, |
|
"learning_rate": 0.002232, |
|
"loss": 2.4078, |
|
"step": 2873 |
|
}, |
|
{ |
|
"epoch": 222.0, |
|
"learning_rate": 0.0022240000000000003, |
|
"loss": 2.3813, |
|
"step": 2886 |
|
}, |
|
{ |
|
"epoch": 223.0, |
|
"learning_rate": 0.002216, |
|
"loss": 2.382, |
|
"step": 2899 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"learning_rate": 0.0022080000000000003, |
|
"loss": 2.361, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"learning_rate": 0.0022, |
|
"loss": 2.3106, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 226.0, |
|
"learning_rate": 0.0021920000000000004, |
|
"loss": 2.2991, |
|
"step": 2938 |
|
}, |
|
{ |
|
"epoch": 227.0, |
|
"learning_rate": 0.002184, |
|
"loss": 2.231, |
|
"step": 2951 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"learning_rate": 0.0021760000000000004, |
|
"loss": 2.2748, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 229.0, |
|
"learning_rate": 0.0021680000000000002, |
|
"loss": 2.2974, |
|
"step": 2977 |
|
}, |
|
{ |
|
"epoch": 230.0, |
|
"learning_rate": 0.00216, |
|
"loss": 2.2974, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 231.0, |
|
"learning_rate": 0.0021520000000000003, |
|
"loss": 2.2755, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"learning_rate": 0.002144, |
|
"loss": 2.287, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 233.0, |
|
"learning_rate": 0.0021360000000000003, |
|
"loss": 2.2462, |
|
"step": 3029 |
|
}, |
|
{ |
|
"epoch": 234.0, |
|
"learning_rate": 0.002128, |
|
"loss": 2.2528, |
|
"step": 3042 |
|
}, |
|
{ |
|
"epoch": 235.0, |
|
"learning_rate": 0.0021200000000000004, |
|
"loss": 2.2052, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"learning_rate": 0.002112, |
|
"loss": 2.2461, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 237.0, |
|
"learning_rate": 0.002104, |
|
"loss": 2.2099, |
|
"step": 3081 |
|
}, |
|
{ |
|
"epoch": 238.0, |
|
"learning_rate": 0.002096, |
|
"loss": 2.1273, |
|
"step": 3094 |
|
}, |
|
{ |
|
"epoch": 239.0, |
|
"learning_rate": 0.002088, |
|
"loss": 2.1668, |
|
"step": 3107 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"learning_rate": 0.0020800000000000003, |
|
"loss": 2.1719, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 241.0, |
|
"learning_rate": 0.002072, |
|
"loss": 2.171, |
|
"step": 3133 |
|
}, |
|
{ |
|
"epoch": 242.0, |
|
"learning_rate": 0.0020640000000000003, |
|
"loss": 2.1436, |
|
"step": 3146 |
|
}, |
|
{ |
|
"epoch": 243.0, |
|
"learning_rate": 0.002056, |
|
"loss": 2.1698, |
|
"step": 3159 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"learning_rate": 0.002048, |
|
"loss": 2.1576, |
|
"step": 3172 |
|
}, |
|
{ |
|
"epoch": 245.0, |
|
"learning_rate": 0.00204, |
|
"loss": 2.1641, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 246.0, |
|
"learning_rate": 0.002032, |
|
"loss": 2.1721, |
|
"step": 3198 |
|
}, |
|
{ |
|
"epoch": 247.0, |
|
"learning_rate": 0.002024, |
|
"loss": 2.1615, |
|
"step": 3211 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"learning_rate": 0.002016, |
|
"loss": 2.0983, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 249.0, |
|
"learning_rate": 0.0020080000000000002, |
|
"loss": 2.108, |
|
"step": 3237 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 0.002, |
|
"loss": 2.1167, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 251.0, |
|
"learning_rate": 0.001992, |
|
"loss": 2.0951, |
|
"step": 3263 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"learning_rate": 0.001984, |
|
"loss": 2.0415, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 253.0, |
|
"learning_rate": 0.001976, |
|
"loss": 2.101, |
|
"step": 3289 |
|
}, |
|
{ |
|
"epoch": 254.0, |
|
"learning_rate": 0.001968, |
|
"loss": 2.1233, |
|
"step": 3302 |
|
}, |
|
{ |
|
"epoch": 255.0, |
|
"learning_rate": 0.00196, |
|
"loss": 2.0782, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"learning_rate": 0.001952, |
|
"loss": 2.0033, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 257.0, |
|
"learning_rate": 0.001944, |
|
"loss": 2.051, |
|
"step": 3341 |
|
}, |
|
{ |
|
"epoch": 258.0, |
|
"learning_rate": 0.001936, |
|
"loss": 2.0587, |
|
"step": 3354 |
|
}, |
|
{ |
|
"epoch": 259.0, |
|
"learning_rate": 0.001928, |
|
"loss": 1.9981, |
|
"step": 3367 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"learning_rate": 0.00192, |
|
"loss": 2.0506, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 261.0, |
|
"learning_rate": 0.0019119999999999999, |
|
"loss": 2.0815, |
|
"step": 3393 |
|
}, |
|
{ |
|
"epoch": 262.0, |
|
"learning_rate": 0.0019039999999999999, |
|
"loss": 2.0054, |
|
"step": 3406 |
|
}, |
|
{ |
|
"epoch": 263.0, |
|
"learning_rate": 0.001896, |
|
"loss": 1.9923, |
|
"step": 3419 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"learning_rate": 0.001888, |
|
"loss": 1.9892, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 265.0, |
|
"learning_rate": 0.00188, |
|
"loss": 1.9406, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 266.0, |
|
"learning_rate": 0.0018720000000000002, |
|
"loss": 1.9295, |
|
"step": 3458 |
|
}, |
|
{ |
|
"epoch": 267.0, |
|
"learning_rate": 0.0018640000000000002, |
|
"loss": 1.9791, |
|
"step": 3471 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"learning_rate": 0.0018560000000000002, |
|
"loss": 1.9413, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 269.0, |
|
"learning_rate": 0.001848, |
|
"loss": 1.9363, |
|
"step": 3497 |
|
}, |
|
{ |
|
"epoch": 270.0, |
|
"learning_rate": 0.00184, |
|
"loss": 2.0056, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 271.0, |
|
"learning_rate": 0.001832, |
|
"loss": 1.9298, |
|
"step": 3523 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"learning_rate": 0.001824, |
|
"loss": 1.9045, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 273.0, |
|
"learning_rate": 0.0018160000000000001, |
|
"loss": 1.9165, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 274.0, |
|
"learning_rate": 0.0018080000000000001, |
|
"loss": 1.9214, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"learning_rate": 0.0018000000000000002, |
|
"loss": 1.9063, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"learning_rate": 0.001792, |
|
"loss": 1.9016, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 277.0, |
|
"learning_rate": 0.001784, |
|
"loss": 1.8091, |
|
"step": 3601 |
|
}, |
|
{ |
|
"epoch": 278.0, |
|
"learning_rate": 0.001776, |
|
"loss": 1.8626, |
|
"step": 3614 |
|
}, |
|
{ |
|
"epoch": 279.0, |
|
"learning_rate": 0.001768, |
|
"loss": 1.8663, |
|
"step": 3627 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"learning_rate": 0.00176, |
|
"loss": 1.9432, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 281.0, |
|
"learning_rate": 0.001752, |
|
"loss": 1.8664, |
|
"step": 3653 |
|
}, |
|
{ |
|
"epoch": 282.0, |
|
"learning_rate": 0.001744, |
|
"loss": 1.8603, |
|
"step": 3666 |
|
}, |
|
{ |
|
"epoch": 283.0, |
|
"learning_rate": 0.0017360000000000001, |
|
"loss": 1.8335, |
|
"step": 3679 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"learning_rate": 0.001728, |
|
"loss": 1.8625, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 285.0, |
|
"learning_rate": 0.00172, |
|
"loss": 1.8043, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 286.0, |
|
"learning_rate": 0.001712, |
|
"loss": 1.8061, |
|
"step": 3718 |
|
}, |
|
{ |
|
"epoch": 287.0, |
|
"learning_rate": 0.001704, |
|
"loss": 1.835, |
|
"step": 3731 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"learning_rate": 0.001696, |
|
"loss": 1.7944, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 289.0, |
|
"learning_rate": 0.001688, |
|
"loss": 1.8492, |
|
"step": 3757 |
|
}, |
|
{ |
|
"epoch": 290.0, |
|
"learning_rate": 0.00168, |
|
"loss": 1.812, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 291.0, |
|
"learning_rate": 0.0016719999999999999, |
|
"loss": 1.8175, |
|
"step": 3783 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"learning_rate": 0.001664, |
|
"loss": 1.7943, |
|
"step": 3796 |
|
}, |
|
{ |
|
"epoch": 293.0, |
|
"learning_rate": 0.001656, |
|
"loss": 1.8063, |
|
"step": 3809 |
|
}, |
|
{ |
|
"epoch": 294.0, |
|
"learning_rate": 0.001648, |
|
"loss": 1.7992, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 295.0, |
|
"learning_rate": 0.00164, |
|
"loss": 1.7959, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"learning_rate": 0.001632, |
|
"loss": 1.7256, |
|
"step": 3848 |
|
}, |
|
{ |
|
"epoch": 297.0, |
|
"learning_rate": 0.0016240000000000002, |
|
"loss": 1.7673, |
|
"step": 3861 |
|
}, |
|
{ |
|
"epoch": 298.0, |
|
"learning_rate": 0.001616, |
|
"loss": 1.8299, |
|
"step": 3874 |
|
}, |
|
{ |
|
"epoch": 299.0, |
|
"learning_rate": 0.001608, |
|
"loss": 1.8147, |
|
"step": 3887 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 0.0016, |
|
"loss": 1.7495, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 301.0, |
|
"learning_rate": 0.001592, |
|
"loss": 1.8001, |
|
"step": 3913 |
|
}, |
|
{ |
|
"epoch": 302.0, |
|
"learning_rate": 0.0015840000000000001, |
|
"loss": 1.7707, |
|
"step": 3926 |
|
}, |
|
{ |
|
"epoch": 303.0, |
|
"learning_rate": 0.0015760000000000001, |
|
"loss": 1.7283, |
|
"step": 3939 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"learning_rate": 0.0015680000000000002, |
|
"loss": 1.7133, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 305.0, |
|
"learning_rate": 0.0015600000000000002, |
|
"loss": 1.71, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 306.0, |
|
"learning_rate": 0.001552, |
|
"loss": 1.6685, |
|
"step": 3978 |
|
}, |
|
{ |
|
"epoch": 307.0, |
|
"learning_rate": 0.001544, |
|
"loss": 1.6526, |
|
"step": 3991 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"learning_rate": 0.001536, |
|
"loss": 1.6433, |
|
"step": 4004 |
|
}, |
|
{ |
|
"epoch": 309.0, |
|
"learning_rate": 0.001528, |
|
"loss": 1.6823, |
|
"step": 4017 |
|
}, |
|
{ |
|
"epoch": 310.0, |
|
"learning_rate": 0.00152, |
|
"loss": 1.6843, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 311.0, |
|
"learning_rate": 0.001512, |
|
"loss": 1.7029, |
|
"step": 4043 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"learning_rate": 0.0015040000000000001, |
|
"loss": 1.6362, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 313.0, |
|
"learning_rate": 0.001496, |
|
"loss": 1.6648, |
|
"step": 4069 |
|
}, |
|
{ |
|
"epoch": 314.0, |
|
"learning_rate": 0.001488, |
|
"loss": 1.7202, |
|
"step": 4082 |
|
}, |
|
{ |
|
"epoch": 315.0, |
|
"learning_rate": 0.00148, |
|
"loss": 1.677, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"learning_rate": 0.001472, |
|
"loss": 1.6187, |
|
"step": 4108 |
|
}, |
|
{ |
|
"epoch": 317.0, |
|
"learning_rate": 0.001464, |
|
"loss": 1.6398, |
|
"step": 4121 |
|
}, |
|
{ |
|
"epoch": 318.0, |
|
"learning_rate": 0.001456, |
|
"loss": 1.6371, |
|
"step": 4134 |
|
}, |
|
{ |
|
"epoch": 319.0, |
|
"learning_rate": 0.001448, |
|
"loss": 1.6081, |
|
"step": 4147 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"learning_rate": 0.0014399999999999999, |
|
"loss": 1.5936, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 321.0, |
|
"learning_rate": 0.001432, |
|
"loss": 1.6336, |
|
"step": 4173 |
|
}, |
|
{ |
|
"epoch": 322.0, |
|
"learning_rate": 0.001424, |
|
"loss": 1.6022, |
|
"step": 4186 |
|
}, |
|
{ |
|
"epoch": 323.0, |
|
"learning_rate": 0.001416, |
|
"loss": 1.6336, |
|
"step": 4199 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"learning_rate": 0.001408, |
|
"loss": 1.5898, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"learning_rate": 0.0014, |
|
"loss": 1.5528, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 326.0, |
|
"learning_rate": 0.001392, |
|
"loss": 1.5734, |
|
"step": 4238 |
|
}, |
|
{ |
|
"epoch": 327.0, |
|
"learning_rate": 0.001384, |
|
"loss": 1.618, |
|
"step": 4251 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"learning_rate": 0.0013759999999999998, |
|
"loss": 1.6529, |
|
"step": 4264 |
|
}, |
|
{ |
|
"epoch": 329.0, |
|
"learning_rate": 0.001368, |
|
"loss": 1.5824, |
|
"step": 4277 |
|
}, |
|
{ |
|
"epoch": 330.0, |
|
"learning_rate": 0.00136, |
|
"loss": 1.609, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 331.0, |
|
"learning_rate": 0.0013520000000000001, |
|
"loss": 1.5796, |
|
"step": 4303 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"learning_rate": 0.0013440000000000001, |
|
"loss": 1.5924, |
|
"step": 4316 |
|
}, |
|
{ |
|
"epoch": 333.0, |
|
"learning_rate": 0.0013360000000000002, |
|
"loss": 1.5841, |
|
"step": 4329 |
|
}, |
|
{ |
|
"epoch": 334.0, |
|
"learning_rate": 0.0013280000000000002, |
|
"loss": 1.5487, |
|
"step": 4342 |
|
}, |
|
{ |
|
"epoch": 335.0, |
|
"learning_rate": 0.00132, |
|
"loss": 1.4625, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"learning_rate": 0.001312, |
|
"loss": 1.5241, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 337.0, |
|
"learning_rate": 0.001304, |
|
"loss": 1.4823, |
|
"step": 4381 |
|
}, |
|
{ |
|
"epoch": 338.0, |
|
"learning_rate": 0.001296, |
|
"loss": 1.5027, |
|
"step": 4394 |
|
}, |
|
{ |
|
"epoch": 339.0, |
|
"learning_rate": 0.001288, |
|
"loss": 1.5211, |
|
"step": 4407 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"learning_rate": 0.00128, |
|
"loss": 1.4912, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 341.0, |
|
"learning_rate": 0.0012720000000000001, |
|
"loss": 1.4792, |
|
"step": 4433 |
|
}, |
|
{ |
|
"epoch": 342.0, |
|
"learning_rate": 0.001264, |
|
"loss": 1.4932, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 343.0, |
|
"learning_rate": 0.001256, |
|
"loss": 1.4861, |
|
"step": 4459 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"learning_rate": 0.001248, |
|
"loss": 1.5171, |
|
"step": 4472 |
|
}, |
|
{ |
|
"epoch": 345.0, |
|
"learning_rate": 0.00124, |
|
"loss": 1.494, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 346.0, |
|
"learning_rate": 0.001232, |
|
"loss": 1.4992, |
|
"step": 4498 |
|
}, |
|
{ |
|
"epoch": 347.0, |
|
"learning_rate": 0.001224, |
|
"loss": 1.5033, |
|
"step": 4511 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"learning_rate": 0.001216, |
|
"loss": 1.5039, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 349.0, |
|
"learning_rate": 0.001208, |
|
"loss": 1.5341, |
|
"step": 4537 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"learning_rate": 0.0012, |
|
"loss": 1.5049, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 351.0, |
|
"learning_rate": 0.001192, |
|
"loss": 1.5104, |
|
"step": 4563 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"learning_rate": 0.001184, |
|
"loss": 1.4569, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 353.0, |
|
"learning_rate": 0.001176, |
|
"loss": 1.3996, |
|
"step": 4589 |
|
}, |
|
{ |
|
"epoch": 354.0, |
|
"learning_rate": 0.001168, |
|
"loss": 1.4337, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 355.0, |
|
"learning_rate": 0.00116, |
|
"loss": 1.4572, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"learning_rate": 0.001152, |
|
"loss": 1.4668, |
|
"step": 4628 |
|
}, |
|
{ |
|
"epoch": 357.0, |
|
"learning_rate": 0.0011439999999999998, |
|
"loss": 1.4298, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 358.0, |
|
"learning_rate": 0.0011359999999999999, |
|
"loss": 1.4187, |
|
"step": 4654 |
|
}, |
|
{ |
|
"epoch": 359.0, |
|
"learning_rate": 0.0011279999999999999, |
|
"loss": 1.4026, |
|
"step": 4667 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"learning_rate": 0.0011200000000000001, |
|
"loss": 1.4461, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 361.0, |
|
"learning_rate": 0.0011120000000000001, |
|
"loss": 1.4497, |
|
"step": 4693 |
|
}, |
|
{ |
|
"epoch": 362.0, |
|
"learning_rate": 0.0011040000000000002, |
|
"loss": 1.3667, |
|
"step": 4706 |
|
}, |
|
{ |
|
"epoch": 363.0, |
|
"learning_rate": 0.0010960000000000002, |
|
"loss": 1.4237, |
|
"step": 4719 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"learning_rate": 0.0010880000000000002, |
|
"loss": 1.485, |
|
"step": 4732 |
|
}, |
|
{ |
|
"epoch": 365.0, |
|
"learning_rate": 0.00108, |
|
"loss": 1.4271, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 366.0, |
|
"learning_rate": 0.001072, |
|
"loss": 1.4046, |
|
"step": 4758 |
|
}, |
|
{ |
|
"epoch": 367.0, |
|
"learning_rate": 0.001064, |
|
"loss": 1.3771, |
|
"step": 4771 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"learning_rate": 0.001056, |
|
"loss": 1.4054, |
|
"step": 4784 |
|
}, |
|
{ |
|
"epoch": 369.0, |
|
"learning_rate": 0.001048, |
|
"loss": 1.3886, |
|
"step": 4797 |
|
}, |
|
{ |
|
"epoch": 370.0, |
|
"learning_rate": 0.0010400000000000001, |
|
"loss": 1.3583, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 371.0, |
|
"learning_rate": 0.0010320000000000001, |
|
"loss": 1.3606, |
|
"step": 4823 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"learning_rate": 0.001024, |
|
"loss": 1.3619, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 373.0, |
|
"learning_rate": 0.001016, |
|
"loss": 1.3723, |
|
"step": 4849 |
|
}, |
|
{ |
|
"epoch": 374.0, |
|
"learning_rate": 0.001008, |
|
"loss": 1.3604, |
|
"step": 4862 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"learning_rate": 0.001, |
|
"loss": 1.3745, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"learning_rate": 0.000992, |
|
"loss": 1.393, |
|
"step": 4888 |
|
}, |
|
{ |
|
"epoch": 377.0, |
|
"learning_rate": 0.000984, |
|
"loss": 1.3846, |
|
"step": 4901 |
|
}, |
|
{ |
|
"epoch": 378.0, |
|
"learning_rate": 0.000976, |
|
"loss": 1.4033, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 379.0, |
|
"learning_rate": 0.000968, |
|
"loss": 1.3204, |
|
"step": 4927 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"learning_rate": 0.00096, |
|
"loss": 1.3257, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 381.0, |
|
"learning_rate": 0.0009519999999999999, |
|
"loss": 1.3274, |
|
"step": 4953 |
|
}, |
|
{ |
|
"epoch": 382.0, |
|
"learning_rate": 0.000944, |
|
"loss": 1.3177, |
|
"step": 4966 |
|
}, |
|
{ |
|
"epoch": 383.0, |
|
"learning_rate": 0.0009360000000000001, |
|
"loss": 1.3204, |
|
"step": 4979 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"learning_rate": 0.0009280000000000001, |
|
"loss": 1.3349, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 385.0, |
|
"learning_rate": 0.00092, |
|
"loss": 1.3149, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 386.0, |
|
"learning_rate": 0.000912, |
|
"loss": 1.2994, |
|
"step": 5018 |
|
}, |
|
{ |
|
"epoch": 387.0, |
|
"learning_rate": 0.0009040000000000001, |
|
"loss": 1.3295, |
|
"step": 5031 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"learning_rate": 0.000896, |
|
"loss": 1.2975, |
|
"step": 5044 |
|
}, |
|
{ |
|
"epoch": 389.0, |
|
"learning_rate": 0.000888, |
|
"loss": 1.3118, |
|
"step": 5057 |
|
}, |
|
{ |
|
"epoch": 390.0, |
|
"learning_rate": 0.00088, |
|
"loss": 1.2712, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 391.0, |
|
"learning_rate": 0.000872, |
|
"loss": 1.3184, |
|
"step": 5083 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"learning_rate": 0.000864, |
|
"loss": 1.2687, |
|
"step": 5096 |
|
}, |
|
{ |
|
"epoch": 393.0, |
|
"learning_rate": 0.000856, |
|
"loss": 1.2826, |
|
"step": 5109 |
|
}, |
|
{ |
|
"epoch": 394.0, |
|
"learning_rate": 0.000848, |
|
"loss": 1.2766, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 395.0, |
|
"learning_rate": 0.00084, |
|
"loss": 1.2935, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"learning_rate": 0.000832, |
|
"loss": 1.288, |
|
"step": 5148 |
|
}, |
|
{ |
|
"epoch": 397.0, |
|
"learning_rate": 0.000824, |
|
"loss": 1.2617, |
|
"step": 5161 |
|
}, |
|
{ |
|
"epoch": 398.0, |
|
"learning_rate": 0.000816, |
|
"loss": 1.2675, |
|
"step": 5174 |
|
}, |
|
{ |
|
"epoch": 399.0, |
|
"learning_rate": 0.000808, |
|
"loss": 1.2895, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"learning_rate": 0.0008, |
|
"loss": 1.2721, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 401.0, |
|
"learning_rate": 0.0007920000000000001, |
|
"loss": 1.2897, |
|
"step": 5213 |
|
}, |
|
{ |
|
"epoch": 402.0, |
|
"learning_rate": 0.0007840000000000001, |
|
"loss": 1.2608, |
|
"step": 5226 |
|
}, |
|
{ |
|
"epoch": 403.0, |
|
"learning_rate": 0.000776, |
|
"loss": 1.271, |
|
"step": 5239 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"learning_rate": 0.000768, |
|
"loss": 1.2581, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 405.0, |
|
"learning_rate": 0.00076, |
|
"loss": 1.2497, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 406.0, |
|
"learning_rate": 0.0007520000000000001, |
|
"loss": 1.2846, |
|
"step": 5278 |
|
}, |
|
{ |
|
"epoch": 407.0, |
|
"learning_rate": 0.000744, |
|
"loss": 1.2718, |
|
"step": 5291 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"learning_rate": 0.000736, |
|
"loss": 1.2733, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 409.0, |
|
"learning_rate": 0.000728, |
|
"loss": 1.2918, |
|
"step": 5317 |
|
}, |
|
{ |
|
"epoch": 410.0, |
|
"learning_rate": 0.0007199999999999999, |
|
"loss": 1.2659, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 411.0, |
|
"learning_rate": 0.000712, |
|
"loss": 1.2946, |
|
"step": 5343 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"learning_rate": 0.000704, |
|
"loss": 1.2425, |
|
"step": 5356 |
|
}, |
|
{ |
|
"epoch": 413.0, |
|
"learning_rate": 0.000696, |
|
"loss": 1.2293, |
|
"step": 5369 |
|
}, |
|
{ |
|
"epoch": 414.0, |
|
"learning_rate": 0.0006879999999999999, |
|
"loss": 1.2847, |
|
"step": 5382 |
|
}, |
|
{ |
|
"epoch": 415.0, |
|
"learning_rate": 0.00068, |
|
"loss": 1.2318, |
|
"step": 5395 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"learning_rate": 0.0006720000000000001, |
|
"loss": 1.237, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 417.0, |
|
"learning_rate": 0.0006640000000000001, |
|
"loss": 1.1875, |
|
"step": 5421 |
|
}, |
|
{ |
|
"epoch": 418.0, |
|
"learning_rate": 0.000656, |
|
"loss": 1.2204, |
|
"step": 5434 |
|
}, |
|
{ |
|
"epoch": 419.0, |
|
"learning_rate": 0.000648, |
|
"loss": 1.1848, |
|
"step": 5447 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"learning_rate": 0.00064, |
|
"loss": 1.2146, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 421.0, |
|
"learning_rate": 0.000632, |
|
"loss": 1.1621, |
|
"step": 5473 |
|
}, |
|
{ |
|
"epoch": 422.0, |
|
"learning_rate": 0.000624, |
|
"loss": 1.1883, |
|
"step": 5486 |
|
}, |
|
{ |
|
"epoch": 423.0, |
|
"learning_rate": 0.000616, |
|
"loss": 1.183, |
|
"step": 5499 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"learning_rate": 0.000608, |
|
"loss": 1.1649, |
|
"step": 5512 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"learning_rate": 0.0006, |
|
"loss": 1.1824, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 426.0, |
|
"learning_rate": 0.000592, |
|
"loss": 1.2073, |
|
"step": 5538 |
|
}, |
|
{ |
|
"epoch": 427.0, |
|
"learning_rate": 0.000584, |
|
"loss": 1.147, |
|
"step": 5551 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"learning_rate": 0.000576, |
|
"loss": 1.1798, |
|
"step": 5564 |
|
}, |
|
{ |
|
"epoch": 429.0, |
|
"learning_rate": 0.0005679999999999999, |
|
"loss": 1.14, |
|
"step": 5577 |
|
}, |
|
{ |
|
"epoch": 430.0, |
|
"learning_rate": 0.0005600000000000001, |
|
"loss": 1.1585, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 431.0, |
|
"learning_rate": 0.0005520000000000001, |
|
"loss": 1.1687, |
|
"step": 5603 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"learning_rate": 0.0005440000000000001, |
|
"loss": 1.1285, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 433.0, |
|
"learning_rate": 0.000536, |
|
"loss": 1.1472, |
|
"step": 5629 |
|
}, |
|
{ |
|
"epoch": 434.0, |
|
"learning_rate": 0.000528, |
|
"loss": 1.1894, |
|
"step": 5642 |
|
}, |
|
{ |
|
"epoch": 435.0, |
|
"learning_rate": 0.0005200000000000001, |
|
"loss": 1.1606, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"learning_rate": 0.000512, |
|
"loss": 1.1294, |
|
"step": 5668 |
|
}, |
|
{ |
|
"epoch": 437.0, |
|
"learning_rate": 0.000504, |
|
"loss": 1.1597, |
|
"step": 5681 |
|
}, |
|
{ |
|
"epoch": 438.0, |
|
"learning_rate": 0.000496, |
|
"loss": 1.1772, |
|
"step": 5694 |
|
}, |
|
{ |
|
"epoch": 439.0, |
|
"learning_rate": 0.000488, |
|
"loss": 1.2044, |
|
"step": 5707 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"learning_rate": 0.00048, |
|
"loss": 1.1543, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 441.0, |
|
"learning_rate": 0.000472, |
|
"loss": 1.1868, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 442.0, |
|
"learning_rate": 0.00046400000000000006, |
|
"loss": 1.1821, |
|
"step": 5746 |
|
}, |
|
{ |
|
"epoch": 443.0, |
|
"learning_rate": 0.000456, |
|
"loss": 1.0897, |
|
"step": 5759 |
|
}, |
|
{ |
|
"epoch": 444.0, |
|
"learning_rate": 0.000448, |
|
"loss": 1.0977, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 445.0, |
|
"learning_rate": 0.00044, |
|
"loss": 1.1695, |
|
"step": 5785 |
|
}, |
|
{ |
|
"epoch": 446.0, |
|
"learning_rate": 0.000432, |
|
"loss": 1.1332, |
|
"step": 5798 |
|
}, |
|
{ |
|
"epoch": 447.0, |
|
"learning_rate": 0.000424, |
|
"loss": 1.1321, |
|
"step": 5811 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"learning_rate": 0.000416, |
|
"loss": 1.1315, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 449.0, |
|
"learning_rate": 0.000408, |
|
"loss": 1.1178, |
|
"step": 5837 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"learning_rate": 0.0004, |
|
"loss": 1.1163, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 451.0, |
|
"learning_rate": 0.00039200000000000004, |
|
"loss": 1.1414, |
|
"step": 5863 |
|
}, |
|
{ |
|
"epoch": 452.0, |
|
"learning_rate": 0.000384, |
|
"loss": 1.1274, |
|
"step": 5876 |
|
}, |
|
{ |
|
"epoch": 453.0, |
|
"learning_rate": 0.00037600000000000003, |
|
"loss": 1.1067, |
|
"step": 5889 |
|
}, |
|
{ |
|
"epoch": 454.0, |
|
"learning_rate": 0.000368, |
|
"loss": 1.0889, |
|
"step": 5902 |
|
}, |
|
{ |
|
"epoch": 455.0, |
|
"learning_rate": 0.00035999999999999997, |
|
"loss": 1.0844, |
|
"step": 5915 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"learning_rate": 0.000352, |
|
"loss": 1.1341, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 457.0, |
|
"learning_rate": 0.00034399999999999996, |
|
"loss": 1.0644, |
|
"step": 5941 |
|
}, |
|
{ |
|
"epoch": 458.0, |
|
"learning_rate": 0.00033600000000000004, |
|
"loss": 1.0991, |
|
"step": 5954 |
|
}, |
|
{ |
|
"epoch": 459.0, |
|
"learning_rate": 0.000328, |
|
"loss": 1.1176, |
|
"step": 5967 |
|
}, |
|
{ |
|
"epoch": 460.0, |
|
"learning_rate": 0.00032, |
|
"loss": 1.0997, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 461.0, |
|
"learning_rate": 0.000312, |
|
"loss": 1.0997, |
|
"step": 5993 |
|
}, |
|
{ |
|
"epoch": 462.0, |
|
"learning_rate": 0.000304, |
|
"loss": 1.0763, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 463.0, |
|
"learning_rate": 0.000296, |
|
"loss": 1.1102, |
|
"step": 6019 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"learning_rate": 0.000288, |
|
"loss": 1.1236, |
|
"step": 6032 |
|
}, |
|
{ |
|
"epoch": 465.0, |
|
"learning_rate": 0.00028000000000000003, |
|
"loss": 1.0941, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 466.0, |
|
"learning_rate": 0.00027200000000000005, |
|
"loss": 1.0976, |
|
"step": 6058 |
|
}, |
|
{ |
|
"epoch": 467.0, |
|
"learning_rate": 0.000264, |
|
"loss": 1.0688, |
|
"step": 6071 |
|
}, |
|
{ |
|
"epoch": 468.0, |
|
"learning_rate": 0.000256, |
|
"loss": 1.0591, |
|
"step": 6084 |
|
}, |
|
{ |
|
"epoch": 469.0, |
|
"learning_rate": 0.000248, |
|
"loss": 1.0695, |
|
"step": 6097 |
|
}, |
|
{ |
|
"epoch": 470.0, |
|
"learning_rate": 0.00024, |
|
"loss": 1.071, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 471.0, |
|
"learning_rate": 0.00023200000000000003, |
|
"loss": 1.0709, |
|
"step": 6123 |
|
}, |
|
{ |
|
"epoch": 472.0, |
|
"learning_rate": 0.000224, |
|
"loss": 1.0767, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 473.0, |
|
"learning_rate": 0.000216, |
|
"loss": 1.0741, |
|
"step": 6149 |
|
}, |
|
{ |
|
"epoch": 474.0, |
|
"learning_rate": 0.000208, |
|
"loss": 1.0644, |
|
"step": 6162 |
|
}, |
|
{ |
|
"epoch": 475.0, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0699, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 476.0, |
|
"learning_rate": 0.000192, |
|
"loss": 1.0727, |
|
"step": 6188 |
|
}, |
|
{ |
|
"epoch": 477.0, |
|
"learning_rate": 0.000184, |
|
"loss": 1.06, |
|
"step": 6201 |
|
}, |
|
{ |
|
"epoch": 478.0, |
|
"learning_rate": 0.000176, |
|
"loss": 1.0568, |
|
"step": 6214 |
|
}, |
|
{ |
|
"epoch": 479.0, |
|
"learning_rate": 0.00016800000000000002, |
|
"loss": 1.0616, |
|
"step": 6227 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"learning_rate": 0.00016, |
|
"loss": 1.0491, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 481.0, |
|
"learning_rate": 0.000152, |
|
"loss": 1.0502, |
|
"step": 6253 |
|
}, |
|
{ |
|
"epoch": 482.0, |
|
"learning_rate": 0.000144, |
|
"loss": 1.0742, |
|
"step": 6266 |
|
}, |
|
{ |
|
"epoch": 483.0, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 1.0582, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 484.0, |
|
"learning_rate": 0.000128, |
|
"loss": 1.0803, |
|
"step": 6292 |
|
}, |
|
{ |
|
"epoch": 485.0, |
|
"learning_rate": 0.00012, |
|
"loss": 1.067, |
|
"step": 6305 |
|
}, |
|
{ |
|
"epoch": 486.0, |
|
"learning_rate": 0.000112, |
|
"loss": 1.0397, |
|
"step": 6318 |
|
}, |
|
{ |
|
"epoch": 487.0, |
|
"learning_rate": 0.000104, |
|
"loss": 1.0489, |
|
"step": 6331 |
|
}, |
|
{ |
|
"epoch": 488.0, |
|
"learning_rate": 9.6e-05, |
|
"loss": 1.0378, |
|
"step": 6344 |
|
}, |
|
{ |
|
"epoch": 489.0, |
|
"learning_rate": 8.8e-05, |
|
"loss": 1.0418, |
|
"step": 6357 |
|
}, |
|
{ |
|
"epoch": 490.0, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0344, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 491.0, |
|
"learning_rate": 7.2e-05, |
|
"loss": 1.0768, |
|
"step": 6383 |
|
}, |
|
{ |
|
"epoch": 492.0, |
|
"learning_rate": 6.4e-05, |
|
"loss": 1.0296, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 493.0, |
|
"learning_rate": 5.6e-05, |
|
"loss": 1.0336, |
|
"step": 6409 |
|
}, |
|
{ |
|
"epoch": 494.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.0568, |
|
"step": 6422 |
|
}, |
|
{ |
|
"epoch": 495.0, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0647, |
|
"step": 6435 |
|
}, |
|
{ |
|
"epoch": 496.0, |
|
"learning_rate": 3.2e-05, |
|
"loss": 1.0448, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 497.0, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.0602, |
|
"step": 6461 |
|
}, |
|
{ |
|
"epoch": 498.0, |
|
"learning_rate": 1.6e-05, |
|
"loss": 1.0615, |
|
"step": 6474 |
|
}, |
|
{ |
|
"epoch": 499.0, |
|
"learning_rate": 8e-06, |
|
"loss": 1.0389, |
|
"step": 6487 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.0629, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"step": 6500, |
|
"total_flos": 284798065115136.0, |
|
"train_loss": 2.748476623535156, |
|
"train_runtime": 71445.8185, |
|
"train_samples_per_second": 0.7, |
|
"train_steps_per_second": 0.091 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"step": 6500, |
|
"total_flos": 284798065115136.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 1.3574, |
|
"train_samples_per_second": 36834.222, |
|
"train_steps_per_second": 4788.449 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6500, |
|
"num_train_epochs": 500, |
|
"save_steps": 500, |
|
"total_flos": 284798065115136.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|