neuralwonderland commited on
Commit
c69f877
·
verified ·
1 Parent(s): c46748f

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2693f72e813e713d406690d49d8156881475ef3cf8f6818b5e9056a7b4a22c0c
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a8c4d05cebc8a6230506025ac32b92b43f97706b15c5ef0630b359fcedb61fd
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b288ca22be915aaaaf317d351bd953b3936b15f6015824c642dc247009b8ec4
3
  size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0768f36181b7ac2e23e854e832362cdba94250b37612dba5c19cd4985727d9a
3
  size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6ae471f42e4ea6fa3a444149858dc300e531b4ad7528d71e7842bbc7d6626eb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc077b33af19e58da72048d8d3eba838a65dd66577cf1fffba1ce82ab86f524d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd4de6d323250fa29100ace84fac2a12c56e0aeb7da01a14da471c2e00185282
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e289fadf25e00f6e814140ec41c4ae5fbbf1fcf1f7907f11f4e9d252c610f893
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2077045440673828,
3
- "best_model_checkpoint": "./output/checkpoint-1950",
4
- "epoch": 0.10078387458006718,
5
  "eval_steps": 150,
6
- "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1702,6 +1702,119 @@
1702
  "eval_samples_per_second": 9.684,
1703
  "eval_steps_per_second": 9.684,
1704
  "step": 2250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1705
  }
1706
  ],
1707
  "logging_steps": 10,
@@ -1721,7 +1834,7 @@
1721
  "attributes": {}
1722
  }
1723
  },
1724
- "total_flos": 2.911099213817856e+17,
1725
  "train_batch_size": 4,
1726
  "trial_name": null,
1727
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2038679122924805,
3
+ "best_model_checkpoint": "./output/checkpoint-2400",
4
+ "epoch": 0.10750279955207166,
5
  "eval_steps": 150,
6
+ "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1702
  "eval_samples_per_second": 9.684,
1703
  "eval_steps_per_second": 9.684,
1704
  "step": 2250
1705
+ },
1706
+ {
1707
+ "epoch": 0.10123180291153415,
1708
+ "grad_norm": 3.644266128540039,
1709
+ "learning_rate": 4.443230854565133e-06,
1710
+ "loss": 1.0985,
1711
+ "step": 2260
1712
+ },
1713
+ {
1714
+ "epoch": 0.10167973124300111,
1715
+ "grad_norm": 4.662050724029541,
1716
+ "learning_rate": 4.4195883554948885e-06,
1717
+ "loss": 1.3397,
1718
+ "step": 2270
1719
+ },
1720
+ {
1721
+ "epoch": 0.10212765957446808,
1722
+ "grad_norm": 5.3237385749816895,
1723
+ "learning_rate": 4.3959183322699466e-06,
1724
+ "loss": 1.1351,
1725
+ "step": 2280
1726
+ },
1727
+ {
1728
+ "epoch": 0.10257558790593505,
1729
+ "grad_norm": 4.3604207038879395,
1730
+ "learning_rate": 4.372221757872255e-06,
1731
+ "loss": 1.1208,
1732
+ "step": 2290
1733
+ },
1734
+ {
1735
+ "epoch": 0.10302351623740201,
1736
+ "grad_norm": 3.731410264968872,
1737
+ "learning_rate": 4.3484996063751725e-06,
1738
+ "loss": 1.1584,
1739
+ "step": 2300
1740
+ },
1741
+ {
1742
+ "epoch": 0.10347144456886898,
1743
+ "grad_norm": 4.031397342681885,
1744
+ "learning_rate": 4.324752852903435e-06,
1745
+ "loss": 0.9656,
1746
+ "step": 2310
1747
+ },
1748
+ {
1749
+ "epoch": 0.10391937290033594,
1750
+ "grad_norm": 3.564148187637329,
1751
+ "learning_rate": 4.300982473593068e-06,
1752
+ "loss": 1.0031,
1753
+ "step": 2320
1754
+ },
1755
+ {
1756
+ "epoch": 0.1043673012318029,
1757
+ "grad_norm": 5.459331035614014,
1758
+ "learning_rate": 4.277189445551261e-06,
1759
+ "loss": 1.0037,
1760
+ "step": 2330
1761
+ },
1762
+ {
1763
+ "epoch": 0.10481522956326987,
1764
+ "grad_norm": 4.870905876159668,
1765
+ "learning_rate": 4.253374746816209e-06,
1766
+ "loss": 0.9615,
1767
+ "step": 2340
1768
+ },
1769
+ {
1770
+ "epoch": 0.10526315789473684,
1771
+ "grad_norm": 5.284097671508789,
1772
+ "learning_rate": 4.229539356316898e-06,
1773
+ "loss": 1.3278,
1774
+ "step": 2350
1775
+ },
1776
+ {
1777
+ "epoch": 0.1057110862262038,
1778
+ "grad_norm": 5.323864459991455,
1779
+ "learning_rate": 4.205684253832877e-06,
1780
+ "loss": 1.1903,
1781
+ "step": 2360
1782
+ },
1783
+ {
1784
+ "epoch": 0.10615901455767077,
1785
+ "grad_norm": 7.844208717346191,
1786
+ "learning_rate": 4.1818104199539735e-06,
1787
+ "loss": 1.056,
1788
+ "step": 2370
1789
+ },
1790
+ {
1791
+ "epoch": 0.10660694288913773,
1792
+ "grad_norm": 4.325316905975342,
1793
+ "learning_rate": 4.1579188360399916e-06,
1794
+ "loss": 1.2431,
1795
+ "step": 2380
1796
+ },
1797
+ {
1798
+ "epoch": 0.1070548712206047,
1799
+ "grad_norm": 3.5362424850463867,
1800
+ "learning_rate": 4.134010484180368e-06,
1801
+ "loss": 1.1804,
1802
+ "step": 2390
1803
+ },
1804
+ {
1805
+ "epoch": 0.10750279955207166,
1806
+ "grad_norm": 3.2404041290283203,
1807
+ "learning_rate": 4.110086347153807e-06,
1808
+ "loss": 1.1556,
1809
+ "step": 2400
1810
+ },
1811
+ {
1812
+ "epoch": 0.10750279955207166,
1813
+ "eval_loss": 1.2038679122924805,
1814
+ "eval_runtime": 51.7303,
1815
+ "eval_samples_per_second": 9.666,
1816
+ "eval_steps_per_second": 9.666,
1817
+ "step": 2400
1818
  }
1819
  ],
1820
  "logging_steps": 10,
 
1834
  "attributes": {}
1835
  }
1836
  },
1837
+ "total_flos": 3.109754858840064e+17,
1838
  "train_batch_size": 4,
1839
  "trial_name": null,
1840
  "trial_params": null