alicegoesdown commited on
Commit
6a363ba
·
verified ·
1 Parent(s): 59a73b0

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b50df0d26d180087225139d1cbccb1e4f8988f3ba78da80175de8d2ccb715425
3
  size 653434568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbbae645955e4b08b9adb4332c2a3fa1333fd7e3b873cd110fb3133e27e1f642
3
  size 653434568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1349d7c7820ba989d22e9c3dafb981ed735ef01b3315b8e5cfd62c75bb5677b0
3
  size 1288533754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a895dc6347988ee7d21290f36a1a0888df619f8426f81d076b7e3887e811b749
3
  size 1288533754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2aee148c55266a4fa01d336e6c825eb6826c0bacd0e25635305678a84af39fc7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb42fb8f2d70da71e5001efb218126512515054d130cd6dcd017d036e0a538be
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5c8c9c4d7ddd30debc6fb341973ff9c39ea0dc55bc39bc535243ffe8a16ce90
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1fe9c3f91db580a8b6ede7b15e0466000f08c889875e4652a5c9ec9f77e1fab
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.5997846126556396,
3
- "best_model_checkpoint": "./output/checkpoint-2250",
4
- "epoch": 0.19950345805993971,
5
  "eval_steps": 150,
6
- "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1702,6 +1702,119 @@
1702
  "eval_samples_per_second": 8.41,
1703
  "eval_steps_per_second": 8.41,
1704
  "step": 2250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1705
  }
1706
  ],
1707
  "logging_steps": 10,
@@ -1721,7 +1834,7 @@
1721
  "attributes": {}
1722
  }
1723
  },
1724
- "total_flos": 7.577803153093755e+17,
1725
  "train_batch_size": 8,
1726
  "trial_name": null,
1727
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.5888803005218506,
3
+ "best_model_checkpoint": "./output/checkpoint-2400",
4
+ "epoch": 0.21280368859726903,
5
  "eval_steps": 150,
6
+ "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1702
  "eval_samples_per_second": 8.41,
1703
  "eval_steps_per_second": 8.41,
1704
  "step": 2250
1705
+ },
1706
+ {
1707
+ "epoch": 0.20039014009576167,
1708
+ "grad_norm": 13.114813804626465,
1709
+ "learning_rate": 5.9243078060868454e-05,
1710
+ "loss": 1.5787,
1711
+ "step": 2260
1712
+ },
1713
+ {
1714
+ "epoch": 0.20127682213158363,
1715
+ "grad_norm": 6.7087321281433105,
1716
+ "learning_rate": 5.8927844739931854e-05,
1717
+ "loss": 1.3785,
1718
+ "step": 2270
1719
+ },
1720
+ {
1721
+ "epoch": 0.20216350416740556,
1722
+ "grad_norm": 6.644030570983887,
1723
+ "learning_rate": 5.8612244430265966e-05,
1724
+ "loss": 1.5126,
1725
+ "step": 2280
1726
+ },
1727
+ {
1728
+ "epoch": 0.2030501862032275,
1729
+ "grad_norm": 10.291509628295898,
1730
+ "learning_rate": 5.829629010496342e-05,
1731
+ "loss": 1.4863,
1732
+ "step": 2290
1733
+ },
1734
+ {
1735
+ "epoch": 0.20393686823904947,
1736
+ "grad_norm": 6.426754951477051,
1737
+ "learning_rate": 5.797999475166898e-05,
1738
+ "loss": 1.5586,
1739
+ "step": 2300
1740
+ },
1741
+ {
1742
+ "epoch": 0.20482355027487142,
1743
+ "grad_norm": 9.044095039367676,
1744
+ "learning_rate": 5.766337137204581e-05,
1745
+ "loss": 1.5063,
1746
+ "step": 2310
1747
+ },
1748
+ {
1749
+ "epoch": 0.20571023231069338,
1750
+ "grad_norm": 8.852991104125977,
1751
+ "learning_rate": 5.734643298124092e-05,
1752
+ "loss": 1.7211,
1753
+ "step": 2320
1754
+ },
1755
+ {
1756
+ "epoch": 0.20659691434651534,
1757
+ "grad_norm": 73.65837860107422,
1758
+ "learning_rate": 5.702919260735016e-05,
1759
+ "loss": 1.5191,
1760
+ "step": 2330
1761
+ },
1762
+ {
1763
+ "epoch": 0.2074835963823373,
1764
+ "grad_norm": 8.413342475891113,
1765
+ "learning_rate": 5.671166329088279e-05,
1766
+ "loss": 1.5013,
1767
+ "step": 2340
1768
+ },
1769
+ {
1770
+ "epoch": 0.20837027841815925,
1771
+ "grad_norm": 6.938820838928223,
1772
+ "learning_rate": 5.639385808422532e-05,
1773
+ "loss": 1.5099,
1774
+ "step": 2350
1775
+ },
1776
+ {
1777
+ "epoch": 0.2092569604539812,
1778
+ "grad_norm": 7.757599353790283,
1779
+ "learning_rate": 5.6075790051105044e-05,
1780
+ "loss": 1.5848,
1781
+ "step": 2360
1782
+ },
1783
+ {
1784
+ "epoch": 0.21014364248980316,
1785
+ "grad_norm": 7.502821445465088,
1786
+ "learning_rate": 5.5757472266052994e-05,
1787
+ "loss": 1.7166,
1788
+ "step": 2370
1789
+ },
1790
+ {
1791
+ "epoch": 0.21103032452562512,
1792
+ "grad_norm": 11.332352638244629,
1793
+ "learning_rate": 5.543891781386657e-05,
1794
+ "loss": 1.671,
1795
+ "step": 2380
1796
+ },
1797
+ {
1798
+ "epoch": 0.21191700656144707,
1799
+ "grad_norm": 7.515905380249023,
1800
+ "learning_rate": 5.512013978907158e-05,
1801
+ "loss": 1.6298,
1802
+ "step": 2390
1803
+ },
1804
+ {
1805
+ "epoch": 0.21280368859726903,
1806
+ "grad_norm": 6.094747543334961,
1807
+ "learning_rate": 5.4801151295384105e-05,
1808
+ "loss": 1.5135,
1809
+ "step": 2400
1810
+ },
1811
+ {
1812
+ "epoch": 0.21280368859726903,
1813
+ "eval_loss": 1.5888803005218506,
1814
+ "eval_runtime": 59.4453,
1815
+ "eval_samples_per_second": 8.411,
1816
+ "eval_steps_per_second": 8.411,
1817
+ "step": 2400
1818
  }
1819
  ],
1820
  "logging_steps": 10,
 
1834
  "attributes": {}
1835
  }
1836
  },
1837
+ "total_flos": 8.081845027159081e+17,
1838
  "train_batch_size": 8,
1839
  "trial_name": null,
1840
  "trial_params": null