besimray commited on
Commit
2681f4e
·
verified ·
1 Parent(s): ae08f45

Training in progress, step 220, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e8b163ef36cc7e57a8ee15a6ebe44a66dd7b467f2cc54f8de9f11808fbf4a74
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1687d42b6c8fc20fba69955ada5c36a016df8b97c35e2197e9cefb81565a448
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e11899b82d3568219c123451efb8f9c3baa4ecaaf187d3d8094ff509232f7bf
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a11756c6f7f6e54186c62470b3c672e88950e155939dddf52de8c5061bb695f5
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:578c17b2105d3c6bea4883770e5007551a3f2302d9968187a7d2ba2aece88a9c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d87c59ed120a2ee66d7021d83c6d58b1559824c8ca94cb61ee05deb92e21229f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99f3078781d07b22b8aceece06e9b1a7abc294ae09750b1339d4cf73fa58b867
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ce5266a3e298d4cec8a8c6eda5afc2b0bfc8277ce078fdf7b96388fe070633d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.02186393737793,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-210",
4
- "epoch": 0.009491310930826422,
5
  "eval_steps": 5,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1821,6 +1821,92 @@
1821
  "eval_samples_per_second": 52.802,
1822
  "eval_steps_per_second": 26.404,
1823
  "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1824
  }
1825
  ],
1826
  "logging_steps": 1,
@@ -1849,7 +1935,7 @@
1849
  "attributes": {}
1850
  }
1851
  },
1852
- "total_flos": 2206413619200.0,
1853
  "train_batch_size": 2,
1854
  "trial_name": null,
1855
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.021401405334473,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-220",
4
+ "epoch": 0.009943278118008632,
5
  "eval_steps": 5,
6
+ "global_step": 220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1821
  "eval_samples_per_second": 52.802,
1822
  "eval_steps_per_second": 26.404,
1823
  "step": 210
1824
+ },
1825
+ {
1826
+ "epoch": 0.009536507649544643,
1827
+ "grad_norm": 0.5239633321762085,
1828
+ "learning_rate": 0.00012783753647424635,
1829
+ "loss": 44.1326,
1830
+ "step": 211
1831
+ },
1832
+ {
1833
+ "epoch": 0.009581704368262865,
1834
+ "grad_norm": 0.4532044231891632,
1835
+ "learning_rate": 0.00012722116999329712,
1836
+ "loss": 44.1039,
1837
+ "step": 212
1838
+ },
1839
+ {
1840
+ "epoch": 0.009626901086981086,
1841
+ "grad_norm": 0.5784953832626343,
1842
+ "learning_rate": 0.00012660368455666752,
1843
+ "loss": 44.0902,
1844
+ "step": 213
1845
+ },
1846
+ {
1847
+ "epoch": 0.009672097805699307,
1848
+ "grad_norm": 0.46399155259132385,
1849
+ "learning_rate": 0.0001259851055467653,
1850
+ "loss": 44.0665,
1851
+ "step": 214
1852
+ },
1853
+ {
1854
+ "epoch": 0.009717294524417528,
1855
+ "grad_norm": 0.5353842973709106,
1856
+ "learning_rate": 0.00012536545839095074,
1857
+ "loss": 44.0339,
1858
+ "step": 215
1859
+ },
1860
+ {
1861
+ "epoch": 0.009717294524417528,
1862
+ "eval_loss": 11.021649360656738,
1863
+ "eval_runtime": 176.1431,
1864
+ "eval_samples_per_second": 52.895,
1865
+ "eval_steps_per_second": 26.45,
1866
+ "step": 215
1867
+ },
1868
+ {
1869
+ "epoch": 0.009762491243135748,
1870
+ "grad_norm": 0.4887973666191101,
1871
+ "learning_rate": 0.00012474476856049144,
1872
+ "loss": 44.074,
1873
+ "step": 216
1874
+ },
1875
+ {
1876
+ "epoch": 0.009807687961853969,
1877
+ "grad_norm": 0.44021403789520264,
1878
+ "learning_rate": 0.00012412306156951526,
1879
+ "loss": 44.0695,
1880
+ "step": 217
1881
+ },
1882
+ {
1883
+ "epoch": 0.00985288468057219,
1884
+ "grad_norm": 0.5092349052429199,
1885
+ "learning_rate": 0.00012350036297396154,
1886
+ "loss": 44.0596,
1887
+ "step": 218
1888
+ },
1889
+ {
1890
+ "epoch": 0.009898081399290411,
1891
+ "grad_norm": 0.47505757212638855,
1892
+ "learning_rate": 0.00012287669837053055,
1893
+ "loss": 44.0435,
1894
+ "step": 219
1895
+ },
1896
+ {
1897
+ "epoch": 0.009943278118008632,
1898
+ "grad_norm": 0.4098033308982849,
1899
+ "learning_rate": 0.00012225209339563145,
1900
+ "loss": 44.1334,
1901
+ "step": 220
1902
+ },
1903
+ {
1904
+ "epoch": 0.009943278118008632,
1905
+ "eval_loss": 11.021401405334473,
1906
+ "eval_runtime": 176.2917,
1907
+ "eval_samples_per_second": 52.85,
1908
+ "eval_steps_per_second": 26.428,
1909
+ "step": 220
1910
  }
1911
  ],
1912
  "logging_steps": 1,
 
1935
  "attributes": {}
1936
  }
1937
  },
1938
+ "total_flos": 2311480934400.0,
1939
  "train_batch_size": 2,
1940
  "trial_name": null,
1941
  "trial_params": null