prxy5604 commited on
Commit
595e9ab
·
verified ·
1 Parent(s): fc15994

Training in progress, step 269, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fef308b8921bdaa4b3462e88f1c7c0ade05615f1ab91ad36b5610d3f5b3c17d
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2402ba3b7e185a4a682ad4a8dcd873d072db5bd9f26577594fe8c12692aa0ce
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a90254e302ec8b6d14e289c29594d11ebf0d484538ff3c10b69180beee9fbee6
3
  size 403961210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef1132e125fdfa928ac6191c556ecf798bf1025bacae5051f64ada271bd69b63
3
  size 403961210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b3a0b8ae4b1ef3d273ffeb9e44259fa1cdfe737176b54fe581e2c5bb5beb35e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5111362039b3f73ecb7178a57b3d65b8c684f11cc8bf6dfc7918c1f6f7759a4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0f2082f8ae2500626171b5f3c174135f50005132f4fb0d89617e1b5f611c23e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f9f3df9bf8d413fd91bebc4c208272404e0533fd2d3a72c098ffa93deb1063e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.2524751424789429,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
- "epoch": 2.793296089385475,
5
  "eval_steps": 50,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1805,6 +1805,139 @@
1805
  "eval_samples_per_second": 42.603,
1806
  "eval_steps_per_second": 21.442,
1807
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1808
  }
1809
  ],
1810
  "logging_steps": 1,
@@ -1828,12 +1961,12 @@
1828
  "should_evaluate": false,
1829
  "should_log": false,
1830
  "should_save": true,
1831
- "should_training_stop": false
1832
  },
1833
  "attributes": {}
1834
  }
1835
  },
1836
- "total_flos": 5.3328696311808e+16,
1837
  "train_batch_size": 8,
1838
  "trial_name": null,
1839
  "trial_params": null
 
1
  {
2
  "best_metric": 1.2524751424789429,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
+ "epoch": 3.005586592178771,
5
  "eval_steps": 50,
6
+ "global_step": 269,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1805
  "eval_samples_per_second": 42.603,
1806
  "eval_steps_per_second": 21.442,
1807
  "step": 250
1808
+ },
1809
+ {
1810
+ "epoch": 2.804469273743017,
1811
+ "grad_norm": 0.999933660030365,
1812
+ "learning_rate": 1.3930347737136196e-06,
1813
+ "loss": 1.1728,
1814
+ "step": 251
1815
+ },
1816
+ {
1817
+ "epoch": 2.815642458100559,
1818
+ "grad_norm": 1.084299921989441,
1819
+ "learning_rate": 1.2431795228615372e-06,
1820
+ "loss": 1.1947,
1821
+ "step": 252
1822
+ },
1823
+ {
1824
+ "epoch": 2.826815642458101,
1825
+ "grad_norm": 1.0853486061096191,
1826
+ "learning_rate": 1.101748557319715e-06,
1827
+ "loss": 1.1514,
1828
+ "step": 253
1829
+ },
1830
+ {
1831
+ "epoch": 2.8379888268156424,
1832
+ "grad_norm": 1.0919440984725952,
1833
+ "learning_rate": 9.687663137678604e-07,
1834
+ "loss": 1.1536,
1835
+ "step": 254
1836
+ },
1837
+ {
1838
+ "epoch": 2.8491620111731844,
1839
+ "grad_norm": 1.0306580066680908,
1840
+ "learning_rate": 8.442557691013043e-07,
1841
+ "loss": 1.1416,
1842
+ "step": 255
1843
+ },
1844
+ {
1845
+ "epoch": 2.8603351955307263,
1846
+ "grad_norm": 1.1208820343017578,
1847
+ "learning_rate": 7.282384364610206e-07,
1848
+ "loss": 1.1873,
1849
+ "step": 256
1850
+ },
1851
+ {
1852
+ "epoch": 2.871508379888268,
1853
+ "grad_norm": 1.0364559888839722,
1854
+ "learning_rate": 6.207343615165561e-07,
1855
+ "loss": 1.0184,
1856
+ "step": 257
1857
+ },
1858
+ {
1859
+ "epoch": 2.88268156424581,
1860
+ "grad_norm": 0.9816321730613708,
1861
+ "learning_rate": 5.217621190024779e-07,
1862
+ "loss": 1.017,
1863
+ "step": 258
1864
+ },
1865
+ {
1866
+ "epoch": 2.893854748603352,
1867
+ "grad_norm": 1.1331318616867065,
1868
+ "learning_rate": 4.3133880950905205e-07,
1869
+ "loss": 1.1131,
1870
+ "step": 259
1871
+ },
1872
+ {
1873
+ "epoch": 2.905027932960894,
1874
+ "grad_norm": 1.209434151649475,
1875
+ "learning_rate": 3.494800565275125e-07,
1876
+ "loss": 1.18,
1877
+ "step": 260
1878
+ },
1879
+ {
1880
+ "epoch": 2.916201117318436,
1881
+ "grad_norm": 1.2905470132827759,
1882
+ "learning_rate": 2.762000037506485e-07,
1883
+ "loss": 1.1171,
1884
+ "step": 261
1885
+ },
1886
+ {
1887
+ "epoch": 2.927374301675978,
1888
+ "grad_norm": 1.216124176979065,
1889
+ "learning_rate": 2.115113126290258e-07,
1890
+ "loss": 1.0804,
1891
+ "step": 262
1892
+ },
1893
+ {
1894
+ "epoch": 2.9385474860335195,
1895
+ "grad_norm": 1.300574541091919,
1896
+ "learning_rate": 1.554251601833201e-07,
1897
+ "loss": 1.0849,
1898
+ "step": 263
1899
+ },
1900
+ {
1901
+ "epoch": 2.9497206703910615,
1902
+ "grad_norm": 1.376492977142334,
1903
+ "learning_rate": 1.0795123707312283e-07,
1904
+ "loss": 1.0931,
1905
+ "step": 264
1906
+ },
1907
+ {
1908
+ "epoch": 2.9608938547486034,
1909
+ "grad_norm": 1.507287859916687,
1910
+ "learning_rate": 6.909774592258056e-08,
1911
+ "loss": 1.1502,
1912
+ "step": 265
1913
+ },
1914
+ {
1915
+ "epoch": 2.972067039106145,
1916
+ "grad_norm": 1.6715766191482544,
1917
+ "learning_rate": 3.8871399903134265e-08,
1918
+ "loss": 1.0179,
1919
+ "step": 266
1920
+ },
1921
+ {
1922
+ "epoch": 2.983240223463687,
1923
+ "grad_norm": 2.2043421268463135,
1924
+ "learning_rate": 1.7277421573608232e-08,
1925
+ "loss": 0.9667,
1926
+ "step": 267
1927
+ },
1928
+ {
1929
+ "epoch": 2.994413407821229,
1930
+ "grad_norm": 1.0078221559524536,
1931
+ "learning_rate": 4.319541977831909e-09,
1932
+ "loss": 1.1912,
1933
+ "step": 268
1934
+ },
1935
+ {
1936
+ "epoch": 3.005586592178771,
1937
+ "grad_norm": 2.012565851211548,
1938
+ "learning_rate": 0.0,
1939
+ "loss": 1.8972,
1940
+ "step": 269
1941
  }
1942
  ],
1943
  "logging_steps": 1,
 
1961
  "should_evaluate": false,
1962
  "should_log": false,
1963
  "should_save": true,
1964
+ "should_training_stop": true
1965
  },
1966
  "attributes": {}
1967
  }
1968
  },
1969
+ "total_flos": 5.738167723150541e+16,
1970
  "train_batch_size": 8,
1971
  "trial_name": null,
1972
  "trial_params": null