ToastyPigeon commited on
Commit
bf79fa3
·
verified ·
1 Parent(s): 374e615

Training in progress, step 228, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31fbb834535d30942ff8926c5ac856548f98ca3a71f1c2f7d371cb8f822d0e3c
3
  size 1101095848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0184bb6acc68a58cc0dacb82914e388bbec38f79cd33c854b57afd7c43853aee
3
  size 1101095848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d315cdc1640f0b8c81f2afdc924fde254c2c741910745b9daca663b83e61a59f
3
  size 841204242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43853a514527a555e13374ba3f728123fb6f36e697df82e0208e60f943b06b9e
3
  size 841204242
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21bf96c648c2b81637c2a374c88eb7bd6aaef1de82d55c601d0b411131031f36
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6bc11267b6d9e2f43375eecd976c5fb34f3d1bffe4ba00a0fba984b05edabd9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0398efb9f2d009f44e4675efc73a4fa2f0e6d741b98fe4c59c94a120cfb58052
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ee55838ad56f50847c88f72590af5e2dcdcbe347ba3d25f77d1eccfe679d3a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9078947368421053,
5
  "eval_steps": 23,
6
- "global_step": 207,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1536,6 +1536,153 @@
1536
  "eval_samples_per_second": 0.365,
1537
  "eval_steps_per_second": 0.365,
1538
  "step": 207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1539
  }
1540
  ],
1541
  "logging_steps": 1,
@@ -1550,12 +1697,12 @@
1550
  "should_evaluate": false,
1551
  "should_log": false,
1552
  "should_save": true,
1553
- "should_training_stop": false
1554
  },
1555
  "attributes": {}
1556
  }
1557
  },
1558
- "total_flos": 1.161249603094315e+18,
1559
  "train_batch_size": 8,
1560
  "trial_name": null,
1561
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 23,
6
+ "global_step": 228,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1536
  "eval_samples_per_second": 0.365,
1537
  "eval_steps_per_second": 0.365,
1538
  "step": 207
1539
+ },
1540
+ {
1541
+ "epoch": 0.9122807017543859,
1542
+ "grad_norm": 0.04384492337703705,
1543
+ "learning_rate": 5.928091439526226e-06,
1544
+ "loss": 2.2382,
1545
+ "step": 208
1546
+ },
1547
+ {
1548
+ "epoch": 0.9166666666666666,
1549
+ "grad_norm": 0.044699691236019135,
1550
+ "learning_rate": 5.838168799362318e-06,
1551
+ "loss": 2.1484,
1552
+ "step": 209
1553
+ },
1554
+ {
1555
+ "epoch": 0.9210526315789473,
1556
+ "grad_norm": 0.043975383043289185,
1557
+ "learning_rate": 5.752744728439006e-06,
1558
+ "loss": 2.208,
1559
+ "step": 210
1560
+ },
1561
+ {
1562
+ "epoch": 0.9254385964912281,
1563
+ "grad_norm": 0.0446508526802063,
1564
+ "learning_rate": 5.671836966996916e-06,
1565
+ "loss": 2.0749,
1566
+ "step": 211
1567
+ },
1568
+ {
1569
+ "epoch": 0.9298245614035088,
1570
+ "grad_norm": 0.04373237490653992,
1571
+ "learning_rate": 5.595462317362849e-06,
1572
+ "loss": 2.372,
1573
+ "step": 212
1574
+ },
1575
+ {
1576
+ "epoch": 0.9342105263157895,
1577
+ "grad_norm": 0.04464460536837578,
1578
+ "learning_rate": 5.523636640460405e-06,
1579
+ "loss": 2.2327,
1580
+ "step": 213
1581
+ },
1582
+ {
1583
+ "epoch": 0.9385964912280702,
1584
+ "grad_norm": 0.05024990811944008,
1585
+ "learning_rate": 5.456374852516083e-06,
1586
+ "loss": 2.1838,
1587
+ "step": 214
1588
+ },
1589
+ {
1590
+ "epoch": 0.9429824561403509,
1591
+ "grad_norm": 0.044989317655563354,
1592
+ "learning_rate": 5.3936909219616205e-06,
1593
+ "loss": 2.373,
1594
+ "step": 215
1595
+ },
1596
+ {
1597
+ "epoch": 0.9473684210526315,
1598
+ "grad_norm": 0.04446178302168846,
1599
+ "learning_rate": 5.335597866533116e-06,
1600
+ "loss": 2.0206,
1601
+ "step": 216
1602
+ },
1603
+ {
1604
+ "epoch": 0.9517543859649122,
1605
+ "grad_norm": 0.0473959781229496,
1606
+ "learning_rate": 5.282107750567588e-06,
1607
+ "loss": 2.0744,
1608
+ "step": 217
1609
+ },
1610
+ {
1611
+ "epoch": 0.956140350877193,
1612
+ "grad_norm": 0.046764299273490906,
1613
+ "learning_rate": 5.233231682497572e-06,
1614
+ "loss": 2.0287,
1615
+ "step": 218
1616
+ },
1617
+ {
1618
+ "epoch": 0.9605263157894737,
1619
+ "grad_norm": 0.042999010533094406,
1620
+ "learning_rate": 5.1889798125441795e-06,
1621
+ "loss": 2.3255,
1622
+ "step": 219
1623
+ },
1624
+ {
1625
+ "epoch": 0.9649122807017544,
1626
+ "grad_norm": 0.04476455599069595,
1627
+ "learning_rate": 5.149361330609188e-06,
1628
+ "loss": 2.3038,
1629
+ "step": 220
1630
+ },
1631
+ {
1632
+ "epoch": 0.9692982456140351,
1633
+ "grad_norm": 0.04403753951191902,
1634
+ "learning_rate": 5.114384464366541e-06,
1635
+ "loss": 2.2972,
1636
+ "step": 221
1637
+ },
1638
+ {
1639
+ "epoch": 0.9736842105263158,
1640
+ "grad_norm": 0.04389164224267006,
1641
+ "learning_rate": 5.084056477553695e-06,
1642
+ "loss": 2.2914,
1643
+ "step": 222
1644
+ },
1645
+ {
1646
+ "epoch": 0.9780701754385965,
1647
+ "grad_norm": 0.044895585626363754,
1648
+ "learning_rate": 5.058383668463131e-06,
1649
+ "loss": 2.2486,
1650
+ "step": 223
1651
+ },
1652
+ {
1653
+ "epoch": 0.9824561403508771,
1654
+ "grad_norm": 0.04405970871448517,
1655
+ "learning_rate": 5.0373713686343774e-06,
1656
+ "loss": 2.1718,
1657
+ "step": 224
1658
+ },
1659
+ {
1660
+ "epoch": 0.9868421052631579,
1661
+ "grad_norm": 0.04554829001426697,
1662
+ "learning_rate": 5.021023941746794e-06,
1663
+ "loss": 2.1137,
1664
+ "step": 225
1665
+ },
1666
+ {
1667
+ "epoch": 0.9912280701754386,
1668
+ "grad_norm": 0.041737962514162064,
1669
+ "learning_rate": 5.009344782713349e-06,
1670
+ "loss": 2.2621,
1671
+ "step": 226
1672
+ },
1673
+ {
1674
+ "epoch": 0.9956140350877193,
1675
+ "grad_norm": 0.04512747749686241,
1676
+ "learning_rate": 5.0023363169756045e-06,
1677
+ "loss": 2.3551,
1678
+ "step": 227
1679
+ },
1680
+ {
1681
+ "epoch": 1.0,
1682
+ "grad_norm": 0.04331080988049507,
1683
+ "learning_rate": 5e-06,
1684
+ "loss": 2.2325,
1685
+ "step": 228
1686
  }
1687
  ],
1688
  "logging_steps": 1,
 
1697
  "should_evaluate": false,
1698
  "should_log": false,
1699
  "should_save": true,
1700
+ "should_training_stop": true
1701
  },
1702
  "attributes": {}
1703
  }
1704
  },
1705
+ "total_flos": 1.2790575338430136e+18,
1706
  "train_batch_size": 8,
1707
  "trial_name": null,
1708
  "trial_params": null