farmery commited on
Commit
b8ffbea
·
verified ·
1 Parent(s): ec0e03d

Training in progress, step 262, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16e6e2abc0a0790ff0176bd582135a996eca10e395f3fd01f2ec6d779f7e5117
3
  size 2145944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51d08fbfc476d7228ecff85a37cd582fec298d6aeff1313b2f32a7d7a247f389
3
  size 2145944
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:499e4c787359b5dbbe372ff47bca335ffbea8a7c78df8ed3de13838964471589
3
  size 4310020
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da485dce9219bd43fe1f51cc8c91686429b611e3340ce0ec26bb5387b5fad0cf
3
  size 4310020
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e5e7629c4425464c7213d511efd59b71b04aa4e86a4c735d8756aba61f15b74
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa224dd65dca4ab776f8dfe511f81735b9e4fd11f9a0317a52dfa2485e46e6f
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cdc4d15f65239cd790ca0d032954dccbdfcfa65ec895ad42543fe40e09b7225
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e14d0228dab9df662f2d3b7b779488e60a65bd00470c0f5a3e4ba7ac4f6e5fcd
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34b19c01518c61096535226e6a580a80a62cb40227c80f175b68c8043607aba3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9780f94b8edac11f3b88dab785f36e6cb86d80dd6e97e8f4124136d3a344dc0
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a87aa4c88a8dbf2d012d8a18d14c0b3660a9e4052b3506f95eb8e6634db9a779
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14b109b21bca311159de54b392a8335a2a601784a7afd3d6deeae1f386f8f637
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17250bb3cf0e46ac843a7054fd246b1d5fde6a57c6f8c080eadaa80b09c70107
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2c937ae5fa5ee67bf36c998bfc57575ecf9f7a1d29c1688d121888e5038981b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9096098953377736,
5
  "eval_steps": 500,
6
- "global_step": 239,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1680,6 +1680,167 @@
1680
  "learning_rate": 4.891628760948114e-06,
1681
  "loss": 9.8696,
1682
  "step": 239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1683
  }
1684
  ],
1685
  "logging_steps": 1,
@@ -1694,12 +1855,12 @@
1694
  "should_evaluate": false,
1695
  "should_log": false,
1696
  "should_save": true,
1697
- "should_training_stop": false
1698
  },
1699
  "attributes": {}
1700
  }
1701
  },
1702
- "total_flos": 198449773412352.0,
1703
  "train_batch_size": 4,
1704
  "trial_name": null,
1705
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9971455756422455,
5
  "eval_steps": 500,
6
+ "global_step": 262,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1680
  "learning_rate": 4.891628760948114e-06,
1681
  "loss": 9.8696,
1682
  "step": 239
1683
+ },
1684
+ {
1685
+ "epoch": 0.9134157944814463,
1686
+ "grad_norm": 0.7585572600364685,
1687
+ "learning_rate": 4.4818529516926726e-06,
1688
+ "loss": 9.8702,
1689
+ "step": 240
1690
+ },
1691
+ {
1692
+ "epoch": 0.917221693625119,
1693
+ "grad_norm": 0.7781784534454346,
1694
+ "learning_rate": 4.089194655986306e-06,
1695
+ "loss": 9.8693,
1696
+ "step": 241
1697
+ },
1698
+ {
1699
+ "epoch": 0.9210275927687916,
1700
+ "grad_norm": 0.7663347125053406,
1701
+ "learning_rate": 3.7138015365554833e-06,
1702
+ "loss": 9.8703,
1703
+ "step": 242
1704
+ },
1705
+ {
1706
+ "epoch": 0.9248334919124643,
1707
+ "grad_norm": 0.7624083757400513,
1708
+ "learning_rate": 3.3558147633999728e-06,
1709
+ "loss": 9.8692,
1710
+ "step": 243
1711
+ },
1712
+ {
1713
+ "epoch": 0.928639391056137,
1714
+ "grad_norm": 0.7586108446121216,
1715
+ "learning_rate": 3.0153689607045845e-06,
1716
+ "loss": 9.8695,
1717
+ "step": 244
1718
+ },
1719
+ {
1720
+ "epoch": 0.9324452901998097,
1721
+ "grad_norm": 0.7663297057151794,
1722
+ "learning_rate": 2.692592156212487e-06,
1723
+ "loss": 9.8714,
1724
+ "step": 245
1725
+ },
1726
+ {
1727
+ "epoch": 0.9362511893434824,
1728
+ "grad_norm": 0.7702814936637878,
1729
+ "learning_rate": 2.3876057330792346e-06,
1730
+ "loss": 9.87,
1731
+ "step": 246
1732
+ },
1733
+ {
1734
+ "epoch": 0.9400570884871551,
1735
+ "grad_norm": 0.7781908512115479,
1736
+ "learning_rate": 2.100524384225555e-06,
1737
+ "loss": 9.8706,
1738
+ "step": 247
1739
+ },
1740
+ {
1741
+ "epoch": 0.9438629876308278,
1742
+ "grad_norm": 0.7821645140647888,
1743
+ "learning_rate": 1.8314560692059835e-06,
1744
+ "loss": 9.8697,
1745
+ "step": 248
1746
+ },
1747
+ {
1748
+ "epoch": 0.9476688867745005,
1749
+ "grad_norm": 0.7859928011894226,
1750
+ "learning_rate": 1.5805019736097104e-06,
1751
+ "loss": 9.8684,
1752
+ "step": 249
1753
+ },
1754
+ {
1755
+ "epoch": 0.9514747859181731,
1756
+ "grad_norm": 0.810000479221344,
1757
+ "learning_rate": 1.3477564710088098e-06,
1758
+ "loss": 9.8693,
1759
+ "step": 250
1760
+ },
1761
+ {
1762
+ "epoch": 0.9552806850618458,
1763
+ "grad_norm": 0.7431778311729431,
1764
+ "learning_rate": 1.1333070874682216e-06,
1765
+ "loss": 9.8695,
1766
+ "step": 251
1767
+ },
1768
+ {
1769
+ "epoch": 0.9590865842055185,
1770
+ "grad_norm": 0.7624398469924927,
1771
+ "learning_rate": 9.372344686307655e-07,
1772
+ "loss": 9.8701,
1773
+ "step": 252
1774
+ },
1775
+ {
1776
+ "epoch": 0.9628924833491912,
1777
+ "grad_norm": 0.7584826946258545,
1778
+ "learning_rate": 7.596123493895991e-07,
1779
+ "loss": 9.8705,
1780
+ "step": 253
1781
+ },
1782
+ {
1783
+ "epoch": 0.966698382492864,
1784
+ "grad_norm": 0.7702676653862,
1785
+ "learning_rate": 6.005075261595494e-07,
1786
+ "loss": 9.8698,
1787
+ "step": 254
1788
+ },
1789
+ {
1790
+ "epoch": 0.9705042816365367,
1791
+ "grad_norm": 0.7663776278495789,
1792
+ "learning_rate": 4.5997983175773417e-07,
1793
+ "loss": 9.87,
1794
+ "step": 255
1795
+ },
1796
+ {
1797
+ "epoch": 0.9743101807802094,
1798
+ "grad_norm": 0.7741448283195496,
1799
+ "learning_rate": 3.380821129028489e-07,
1800
+ "loss": 9.8685,
1801
+ "step": 256
1802
+ },
1803
+ {
1804
+ "epoch": 0.978116079923882,
1805
+ "grad_norm": 0.7703086137771606,
1806
+ "learning_rate": 2.3486021034170857e-07,
1807
+ "loss": 9.8698,
1808
+ "step": 257
1809
+ },
1810
+ {
1811
+ "epoch": 0.9819219790675547,
1812
+ "grad_norm": 0.7586408853530884,
1813
+ "learning_rate": 1.503529416103988e-07,
1814
+ "loss": 9.8716,
1815
+ "step": 258
1816
+ },
1817
+ {
1818
+ "epoch": 0.9857278782112274,
1819
+ "grad_norm": 0.7703169584274292,
1820
+ "learning_rate": 8.459208643659122e-08,
1821
+ "loss": 9.87,
1822
+ "step": 259
1823
+ },
1824
+ {
1825
+ "epoch": 0.9895337773549001,
1826
+ "grad_norm": 0.7703465223312378,
1827
+ "learning_rate": 3.760237478849793e-08,
1828
+ "loss": 9.8706,
1829
+ "step": 260
1830
+ },
1831
+ {
1832
+ "epoch": 0.9933396764985728,
1833
+ "grad_norm": 0.7702966928482056,
1834
+ "learning_rate": 9.401477574932926e-09,
1835
+ "loss": 9.8716,
1836
+ "step": 261
1837
+ },
1838
+ {
1839
+ "epoch": 0.9971455756422455,
1840
+ "grad_norm": 0.778302788734436,
1841
+ "learning_rate": 0.0,
1842
+ "loss": 9.8713,
1843
+ "step": 262
1844
  }
1845
  ],
1846
  "logging_steps": 1,
 
1855
  "should_evaluate": false,
1856
  "should_log": false,
1857
  "should_save": true,
1858
+ "should_training_stop": true
1859
  },
1860
  "attributes": {}
1861
  }
1862
  },
1863
+ "total_flos": 217547450351616.0,
1864
  "train_batch_size": 4,
1865
  "trial_name": null,
1866
  "trial_params": null