iamnguyen commited on
Commit
0c0ee93
·
verified ·
1 Parent(s): 2f2f4f2

Training in progress, step 256, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55f63548d61d6c65a1654bc4bf0a1b62fc84fb53fb4d0f3eb1a55325d89267bb
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d40101cb3554c5d0ee9abbb345dec3e03351f3cc8ceca5279467894c1cd29246
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df0b473c11c1b6b133d909acb6c86cf9a832deb9c2a2162cac061c32e7b84d08
3
- size 240728084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2552fd74ef5bd48e004d8ae63a0c71c30b9b81b8546aca6ef459461e383a2778
3
+ size 240728404
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:703f8b43f2697cc055bff9862430a4543fd4d2968318f68935d73a18b734e1eb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9365a698c9eb8b230edf4bfa8724f76b1a22ec8cb4f788c7cb4ab8bca424d5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.015516311320240098,
5
  "eval_steps": 500,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1687,6 +1687,118 @@
1687
  "learning_rate": 9.999239668895627e-06,
1688
  "loss": 1.4116,
1689
  "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1690
  }
1691
  ],
1692
  "logging_steps": 1,
@@ -1706,7 +1818,7 @@
1706
  "attributes": {}
1707
  }
1708
  },
1709
- "total_flos": 1.536003023152988e+17,
1710
  "train_batch_size": 2,
1711
  "trial_name": null,
1712
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.01655073207492277,
5
  "eval_steps": 500,
6
+ "global_step": 256,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1687
  "learning_rate": 9.999239668895627e-06,
1688
  "loss": 1.4116,
1689
  "step": 240
1690
+ },
1691
+ {
1692
+ "epoch": 0.015580962617407766,
1693
+ "grad_norm": 4.260445594787598,
1694
+ "learning_rate": 9.999221673982747e-06,
1695
+ "loss": 1.3179,
1696
+ "step": 241
1697
+ },
1698
+ {
1699
+ "epoch": 0.015645613914575432,
1700
+ "grad_norm": 4.778342247009277,
1701
+ "learning_rate": 9.999203468625017e-06,
1702
+ "loss": 1.3185,
1703
+ "step": 242
1704
+ },
1705
+ {
1706
+ "epoch": 0.0157102652117431,
1707
+ "grad_norm": 3.723858594894409,
1708
+ "learning_rate": 9.999185052823207e-06,
1709
+ "loss": 1.3,
1710
+ "step": 243
1711
+ },
1712
+ {
1713
+ "epoch": 0.015774916508910768,
1714
+ "grad_norm": 3.748918294906616,
1715
+ "learning_rate": 9.99916642657809e-06,
1716
+ "loss": 1.455,
1717
+ "step": 244
1718
+ },
1719
+ {
1720
+ "epoch": 0.015839567806078435,
1721
+ "grad_norm": 4.436662197113037,
1722
+ "learning_rate": 9.999147589890452e-06,
1723
+ "loss": 1.3895,
1724
+ "step": 245
1725
+ },
1726
+ {
1727
+ "epoch": 0.0159042191032461,
1728
+ "grad_norm": 4.519418716430664,
1729
+ "learning_rate": 9.999128542761085e-06,
1730
+ "loss": 1.2948,
1731
+ "step": 246
1732
+ },
1733
+ {
1734
+ "epoch": 0.015968870400413768,
1735
+ "grad_norm": 4.407564640045166,
1736
+ "learning_rate": 9.99910928519079e-06,
1737
+ "loss": 1.5275,
1738
+ "step": 247
1739
+ },
1740
+ {
1741
+ "epoch": 0.016033521697581436,
1742
+ "grad_norm": 4.254813194274902,
1743
+ "learning_rate": 9.999089817180378e-06,
1744
+ "loss": 1.3428,
1745
+ "step": 248
1746
+ },
1747
+ {
1748
+ "epoch": 0.016098172994749103,
1749
+ "grad_norm": 4.610138893127441,
1750
+ "learning_rate": 9.999070138730668e-06,
1751
+ "loss": 1.3733,
1752
+ "step": 249
1753
+ },
1754
+ {
1755
+ "epoch": 0.01616282429191677,
1756
+ "grad_norm": 3.6939423084259033,
1757
+ "learning_rate": 9.99905024984249e-06,
1758
+ "loss": 1.3943,
1759
+ "step": 250
1760
+ },
1761
+ {
1762
+ "epoch": 0.016227475589084436,
1763
+ "grad_norm": 3.755028247833252,
1764
+ "learning_rate": 9.999030150516681e-06,
1765
+ "loss": 1.4256,
1766
+ "step": 251
1767
+ },
1768
+ {
1769
+ "epoch": 0.016292126886252103,
1770
+ "grad_norm": 4.2649149894714355,
1771
+ "learning_rate": 9.999009840754085e-06,
1772
+ "loss": 1.4257,
1773
+ "step": 252
1774
+ },
1775
+ {
1776
+ "epoch": 0.01635677818341977,
1777
+ "grad_norm": 3.718479633331299,
1778
+ "learning_rate": 9.998989320555562e-06,
1779
+ "loss": 1.3312,
1780
+ "step": 253
1781
+ },
1782
+ {
1783
+ "epoch": 0.01642142948058744,
1784
+ "grad_norm": 3.7253224849700928,
1785
+ "learning_rate": 9.998968589921969e-06,
1786
+ "loss": 1.37,
1787
+ "step": 254
1788
+ },
1789
+ {
1790
+ "epoch": 0.016486080777755104,
1791
+ "grad_norm": 3.8125829696655273,
1792
+ "learning_rate": 9.998947648854182e-06,
1793
+ "loss": 1.3721,
1794
+ "step": 255
1795
+ },
1796
+ {
1797
+ "epoch": 0.01655073207492277,
1798
+ "grad_norm": 4.105193138122559,
1799
+ "learning_rate": 9.998926497353084e-06,
1800
+ "loss": 1.3238,
1801
+ "step": 256
1802
  }
1803
  ],
1804
  "logging_steps": 1,
 
1818
  "attributes": {}
1819
  }
1820
  },
1821
+ "total_flos": 1.637374085936087e+17,
1822
  "train_batch_size": 2,
1823
  "trial_name": null,
1824
  "trial_params": null