neuralwonderland commited on
Commit
c611f0a
·
verified ·
1 Parent(s): 8892878

Training in progress, step 2550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89a617556f243d6ec03e30f237b1e2ebff58dd7cb54252ae3f4c46c1a70ae592
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c9efcc4fca0de984b6adfde8ddbe71abae266a330d7c7082b6986689320dc5
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:618391452d93ea3d8d163016e74d38e813add986ec344fe6b594c740bf4e1da4
3
  size 640010002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eef0c312f254b5fe732cb8fd6be97b205f3e1334c1ecab36130b3371b9def4ac
3
  size 640010002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab415317bf6085550b52e3662fa27ce809ca87c7e59c2544438ec52f9a4b522
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dea3ce7e5867473893ebfaed8870e7b8f0b41b7bff140a091f47fbfd87102dc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e289fadf25e00f6e814140ec41c4ae5fbbf1fcf1f7907f11f4e9d252c610f893
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a0eec867f87d3a5ffc128abe2e98cc84850220a61f050e675a6109f6d217117
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.3416612446308136,
3
  "best_model_checkpoint": "./output/checkpoint-2400",
4
- "epoch": 0.13570055411059595,
5
  "eval_steps": 150,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1815,6 +1815,119 @@
1815
  "eval_samples_per_second": 12.631,
1816
  "eval_steps_per_second": 12.631,
1817
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1818
  }
1819
  ],
1820
  "logging_steps": 10,
@@ -1834,7 +1947,7 @@
1834
  "attributes": {}
1835
  }
1836
  },
1837
- "total_flos": 1.3574956494156595e+17,
1838
  "train_batch_size": 4,
1839
  "trial_name": null,
1840
  "trial_params": null
 
1
  {
2
  "best_metric": 0.3416612446308136,
3
  "best_model_checkpoint": "./output/checkpoint-2400",
4
+ "epoch": 0.1441818387425082,
5
  "eval_steps": 150,
6
+ "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1815
  "eval_samples_per_second": 12.631,
1816
  "eval_steps_per_second": 12.631,
1817
  "step": 2400
1818
+ },
1819
+ {
1820
+ "epoch": 0.13626597308605676,
1821
+ "grad_norm": 2.4910035133361816,
1822
+ "learning_rate": 4.0861474083878765e-06,
1823
+ "loss": 0.0915,
1824
+ "step": 2410
1825
+ },
1826
+ {
1827
+ "epoch": 0.1368313920615176,
1828
+ "grad_norm": 3.023247718811035,
1829
+ "learning_rate": 4.062194651918585e-06,
1830
+ "loss": 0.1348,
1831
+ "step": 2420
1832
+ },
1833
+ {
1834
+ "epoch": 0.1373968110369784,
1835
+ "grad_norm": 2.995408773422241,
1836
+ "learning_rate": 4.0382290623499384e-06,
1837
+ "loss": 0.226,
1838
+ "step": 2430
1839
+ },
1840
+ {
1841
+ "epoch": 0.1379622300124392,
1842
+ "grad_norm": 0.3714699149131775,
1843
+ "learning_rate": 4.014251624813453e-06,
1844
+ "loss": 0.1973,
1845
+ "step": 2440
1846
+ },
1847
+ {
1848
+ "epoch": 0.13852764898790004,
1849
+ "grad_norm": 3.3884501457214355,
1850
+ "learning_rate": 3.990263324927675e-06,
1851
+ "loss": 0.3278,
1852
+ "step": 2450
1853
+ },
1854
+ {
1855
+ "epoch": 0.13909306796336085,
1856
+ "grad_norm": 5.731414318084717,
1857
+ "learning_rate": 3.966265148757655e-06,
1858
+ "loss": 0.2329,
1859
+ "step": 2460
1860
+ },
1861
+ {
1862
+ "epoch": 0.13965848693882166,
1863
+ "grad_norm": 4.77826452255249,
1864
+ "learning_rate": 3.9422580827744224e-06,
1865
+ "loss": 0.2764,
1866
+ "step": 2470
1867
+ },
1868
+ {
1869
+ "epoch": 0.1402239059142825,
1870
+ "grad_norm": 0.8867257833480835,
1871
+ "learning_rate": 3.9182431138144315e-06,
1872
+ "loss": 0.274,
1873
+ "step": 2480
1874
+ },
1875
+ {
1876
+ "epoch": 0.1407893248897433,
1877
+ "grad_norm": 0.6366099715232849,
1878
+ "learning_rate": 3.894221229038995e-06,
1879
+ "loss": 0.2898,
1880
+ "step": 2490
1881
+ },
1882
+ {
1883
+ "epoch": 0.1413547438652041,
1884
+ "grad_norm": 8.020559310913086,
1885
+ "learning_rate": 3.870193415893709e-06,
1886
+ "loss": 0.2701,
1887
+ "step": 2500
1888
+ },
1889
+ {
1890
+ "epoch": 0.14192016284066494,
1891
+ "grad_norm": 0.9654809236526489,
1892
+ "learning_rate": 3.846160662067859e-06,
1893
+ "loss": 0.1817,
1894
+ "step": 2510
1895
+ },
1896
+ {
1897
+ "epoch": 0.14248558181612575,
1898
+ "grad_norm": 3.0796737670898438,
1899
+ "learning_rate": 3.8221239554538275e-06,
1900
+ "loss": 0.1611,
1901
+ "step": 2520
1902
+ },
1903
+ {
1904
+ "epoch": 0.14305100079158656,
1905
+ "grad_norm": 3.5335640907287598,
1906
+ "learning_rate": 3.798084284106478e-06,
1907
+ "loss": 0.2191,
1908
+ "step": 2530
1909
+ },
1910
+ {
1911
+ "epoch": 0.1436164197670474,
1912
+ "grad_norm": 7.279317378997803,
1913
+ "learning_rate": 3.7740426362025424e-06,
1914
+ "loss": 0.2094,
1915
+ "step": 2540
1916
+ },
1917
+ {
1918
+ "epoch": 0.1441818387425082,
1919
+ "grad_norm": 0.32002538442611694,
1920
+ "learning_rate": 3.7500000000000005e-06,
1921
+ "loss": 0.1295,
1922
+ "step": 2550
1923
+ },
1924
+ {
1925
+ "epoch": 0.1441818387425082,
1926
+ "eval_loss": 0.35380080342292786,
1927
+ "eval_runtime": 39.6008,
1928
+ "eval_samples_per_second": 12.626,
1929
+ "eval_steps_per_second": 12.626,
1930
+ "step": 2550
1931
  }
1932
  ],
1933
  "logging_steps": 10,
 
1947
  "attributes": {}
1948
  }
1949
  },
1950
+ "total_flos": 1.4369993015279616e+17,
1951
  "train_batch_size": 4,
1952
  "trial_name": null,
1953
  "trial_params": null