neuralwonderland commited on
Commit
e12d7c8
·
verified ·
1 Parent(s): 7ac549e

Training in progress, step 2550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a8c4d05cebc8a6230506025ac32b92b43f97706b15c5ef0630b359fcedb61fd
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f287e10b3519c25fc83edd7acd8cc7cd1c222a41d94fdee3bad4cce6af776567
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0768f36181b7ac2e23e854e832362cdba94250b37612dba5c19cd4985727d9a
3
- size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2fb02b6a02ceadae09c2702b5172cbc44178e9b1056a25f944bb6996f32c4a8
3
+ size 1049049378
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc077b33af19e58da72048d8d3eba838a65dd66577cf1fffba1ce82ab86f524d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ad7e6d1a2a4a73b6262b8f73328a902c07cad4e904b1d224f6efe38cd6b2de
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e289fadf25e00f6e814140ec41c4ae5fbbf1fcf1f7907f11f4e9d252c610f893
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a0eec867f87d3a5ffc128abe2e98cc84850220a61f050e675a6109f6d217117
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.2038679122924805,
3
  "best_model_checkpoint": "./output/checkpoint-2400",
4
- "epoch": 0.10750279955207166,
5
  "eval_steps": 150,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1815,6 +1815,119 @@
1815
  "eval_samples_per_second": 9.666,
1816
  "eval_steps_per_second": 9.666,
1817
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1818
  }
1819
  ],
1820
  "logging_steps": 10,
@@ -1834,7 +1947,7 @@
1834
  "attributes": {}
1835
  }
1836
  },
1837
- "total_flos": 3.109754858840064e+17,
1838
  "train_batch_size": 4,
1839
  "trial_name": null,
1840
  "trial_params": null
 
1
  {
2
  "best_metric": 1.2038679122924805,
3
  "best_model_checkpoint": "./output/checkpoint-2400",
4
+ "epoch": 0.11422172452407615,
5
  "eval_steps": 150,
6
+ "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1815
  "eval_samples_per_second": 9.666,
1816
  "eval_steps_per_second": 9.666,
1817
  "step": 2400
1818
+ },
1819
+ {
1820
+ "epoch": 0.10795072788353863,
1821
+ "grad_norm": 3.8270246982574463,
1822
+ "learning_rate": 4.0861474083878765e-06,
1823
+ "loss": 1.0918,
1824
+ "step": 2410
1825
+ },
1826
+ {
1827
+ "epoch": 0.10839865621500559,
1828
+ "grad_norm": 5.627485752105713,
1829
+ "learning_rate": 4.062194651918585e-06,
1830
+ "loss": 1.257,
1831
+ "step": 2420
1832
+ },
1833
+ {
1834
+ "epoch": 0.10884658454647256,
1835
+ "grad_norm": 4.910660743713379,
1836
+ "learning_rate": 4.0382290623499384e-06,
1837
+ "loss": 1.2748,
1838
+ "step": 2430
1839
+ },
1840
+ {
1841
+ "epoch": 0.10929451287793952,
1842
+ "grad_norm": 2.3609941005706787,
1843
+ "learning_rate": 4.014251624813453e-06,
1844
+ "loss": 0.9422,
1845
+ "step": 2440
1846
+ },
1847
+ {
1848
+ "epoch": 0.10974244120940649,
1849
+ "grad_norm": 3.063828706741333,
1850
+ "learning_rate": 3.990263324927675e-06,
1851
+ "loss": 1.1829,
1852
+ "step": 2450
1853
+ },
1854
+ {
1855
+ "epoch": 0.11019036954087345,
1856
+ "grad_norm": 2.658452033996582,
1857
+ "learning_rate": 3.966265148757655e-06,
1858
+ "loss": 1.0062,
1859
+ "step": 2460
1860
+ },
1861
+ {
1862
+ "epoch": 0.11063829787234042,
1863
+ "grad_norm": 6.130062103271484,
1864
+ "learning_rate": 3.9422580827744224e-06,
1865
+ "loss": 1.1504,
1866
+ "step": 2470
1867
+ },
1868
+ {
1869
+ "epoch": 0.11108622620380738,
1870
+ "grad_norm": 3.3496034145355225,
1871
+ "learning_rate": 3.9182431138144315e-06,
1872
+ "loss": 0.8731,
1873
+ "step": 2480
1874
+ },
1875
+ {
1876
+ "epoch": 0.11153415453527436,
1877
+ "grad_norm": 3.8455569744110107,
1878
+ "learning_rate": 3.894221229038995e-06,
1879
+ "loss": 1.0125,
1880
+ "step": 2490
1881
+ },
1882
+ {
1883
+ "epoch": 0.11198208286674133,
1884
+ "grad_norm": 4.499962329864502,
1885
+ "learning_rate": 3.870193415893709e-06,
1886
+ "loss": 1.0228,
1887
+ "step": 2500
1888
+ },
1889
+ {
1890
+ "epoch": 0.1124300111982083,
1891
+ "grad_norm": 6.230105876922607,
1892
+ "learning_rate": 3.846160662067859e-06,
1893
+ "loss": 1.1794,
1894
+ "step": 2510
1895
+ },
1896
+ {
1897
+ "epoch": 0.11287793952967526,
1898
+ "grad_norm": 7.316727638244629,
1899
+ "learning_rate": 3.8221239554538275e-06,
1900
+ "loss": 1.2728,
1901
+ "step": 2520
1902
+ },
1903
+ {
1904
+ "epoch": 0.11332586786114222,
1905
+ "grad_norm": 3.291714906692505,
1906
+ "learning_rate": 3.798084284106478e-06,
1907
+ "loss": 1.167,
1908
+ "step": 2530
1909
+ },
1910
+ {
1911
+ "epoch": 0.11377379619260919,
1912
+ "grad_norm": 5.075141429901123,
1913
+ "learning_rate": 3.7740426362025424e-06,
1914
+ "loss": 1.0547,
1915
+ "step": 2540
1916
+ },
1917
+ {
1918
+ "epoch": 0.11422172452407615,
1919
+ "grad_norm": 3.961540937423706,
1920
+ "learning_rate": 3.7500000000000005e-06,
1921
+ "loss": 1.0713,
1922
+ "step": 2550
1923
+ },
1924
+ {
1925
+ "epoch": 0.11422172452407615,
1926
+ "eval_loss": 1.2046430110931396,
1927
+ "eval_runtime": 51.7175,
1928
+ "eval_samples_per_second": 9.668,
1929
+ "eval_steps_per_second": 9.668,
1930
+ "step": 2550
1931
  }
1932
  ],
1933
  "logging_steps": 10,
 
1947
  "attributes": {}
1948
  }
1949
  },
1950
+ "total_flos": 3.2997299945472e+17,
1951
  "train_batch_size": 4,
1952
  "trial_name": null,
1953
  "trial_params": null