alicegoesdown commited on
Commit
f59dd3a
·
verified ·
1 Parent(s): 3296209

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:344e59bc78799202e4281f32ff32ec9afbe2bffaf62d018b9713f07526c9ffd7
3
  size 653434568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b50df0d26d180087225139d1cbccb1e4f8988f3ba78da80175de8d2ccb715425
3
  size 653434568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a021549015dfb97be1e802ae8c40492f2707149eb971b9e7ac1a848051d11f9a
3
  size 1288533754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1349d7c7820ba989d22e9c3dafb981ed735ef01b3315b8e5cfd62c75bb5677b0
3
  size 1288533754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c3977d0af1ade9045b3a39ec32003586e6a5b397305b71605a9903ef5005b58
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aee148c55266a4fa01d336e6c825eb6826c0bacd0e25635305678a84af39fc7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fb4bc4166507f8f33f2952bbfed6bcfe1fc38cebd715a42a7763a997fdb86c8
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c8c9c4d7ddd30debc6fb341973ff9c39ea0dc55bc39bc535243ffe8a16ce90
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6301392316818237,
3
- "best_model_checkpoint": "./output/checkpoint-2100",
4
- "epoch": 0.1862032275226104,
5
  "eval_steps": 150,
6
- "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1589,6 +1589,119 @@
1589
  "eval_samples_per_second": 8.411,
1590
  "eval_steps_per_second": 8.411,
1591
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1592
  }
1593
  ],
1594
  "logging_steps": 10,
@@ -1608,7 +1721,7 @@
1608
  "attributes": {}
1609
  }
1610
  },
1611
- "total_flos": 7.071503650259927e+17,
1612
  "train_batch_size": 8,
1613
  "trial_name": null,
1614
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.5997846126556396,
3
+ "best_model_checkpoint": "./output/checkpoint-2250",
4
+ "epoch": 0.19950345805993971,
5
  "eval_steps": 150,
6
+ "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1589
  "eval_samples_per_second": 8.411,
1590
  "eval_steps_per_second": 8.411,
1591
  "step": 2100
1592
+ },
1593
+ {
1594
+ "epoch": 0.18708990955843235,
1595
+ "grad_norm": 8.962486267089844,
1596
+ "learning_rate": 6.391876823712319e-05,
1597
+ "loss": 1.4843,
1598
+ "step": 2110
1599
+ },
1600
+ {
1601
+ "epoch": 0.1879765915942543,
1602
+ "grad_norm": 10.67493724822998,
1603
+ "learning_rate": 6.361058499664857e-05,
1604
+ "loss": 1.6638,
1605
+ "step": 2120
1606
+ },
1607
+ {
1608
+ "epoch": 0.18886327363007627,
1609
+ "grad_norm": 8.06369686126709,
1610
+ "learning_rate": 6.330184227833377e-05,
1611
+ "loss": 1.6439,
1612
+ "step": 2130
1613
+ },
1614
+ {
1615
+ "epoch": 0.1897499556658982,
1616
+ "grad_norm": 9.005534172058105,
1617
+ "learning_rate": 6.299255277338267e-05,
1618
+ "loss": 1.5289,
1619
+ "step": 2140
1620
+ },
1621
+ {
1622
+ "epoch": 0.19063663770172015,
1623
+ "grad_norm": 9.255204200744629,
1624
+ "learning_rate": 6.268272919547539e-05,
1625
+ "loss": 1.46,
1626
+ "step": 2150
1627
+ },
1628
+ {
1629
+ "epoch": 0.1915233197375421,
1630
+ "grad_norm": 7.344980239868164,
1631
+ "learning_rate": 6.237238428024573e-05,
1632
+ "loss": 1.4932,
1633
+ "step": 2160
1634
+ },
1635
+ {
1636
+ "epoch": 0.19241000177336406,
1637
+ "grad_norm": 8.692234992980957,
1638
+ "learning_rate": 6.206153078475765e-05,
1639
+ "loss": 1.6582,
1640
+ "step": 2170
1641
+ },
1642
+ {
1643
+ "epoch": 0.19329668380918602,
1644
+ "grad_norm": 7.381601333618164,
1645
+ "learning_rate": 6.175018148698078e-05,
1646
+ "loss": 1.5007,
1647
+ "step": 2180
1648
+ },
1649
+ {
1650
+ "epoch": 0.19418336584500798,
1651
+ "grad_norm": 7.794239044189453,
1652
+ "learning_rate": 6.143834918526529e-05,
1653
+ "loss": 1.6501,
1654
+ "step": 2190
1655
+ },
1656
+ {
1657
+ "epoch": 0.19507004788082993,
1658
+ "grad_norm": 8.13096809387207,
1659
+ "learning_rate": 6.112604669781574e-05,
1660
+ "loss": 1.6862,
1661
+ "step": 2200
1662
+ },
1663
+ {
1664
+ "epoch": 0.1959567299166519,
1665
+ "grad_norm": 6.846219539642334,
1666
+ "learning_rate": 6.081328686216419e-05,
1667
+ "loss": 1.5702,
1668
+ "step": 2210
1669
+ },
1670
+ {
1671
+ "epoch": 0.19684341195247385,
1672
+ "grad_norm": 8.771533966064453,
1673
+ "learning_rate": 6.0500082534642485e-05,
1674
+ "loss": 1.6259,
1675
+ "step": 2220
1676
+ },
1677
+ {
1678
+ "epoch": 0.1977300939882958,
1679
+ "grad_norm": 6.50418758392334,
1680
+ "learning_rate": 6.01864465898538e-05,
1681
+ "loss": 1.6948,
1682
+ "step": 2230
1683
+ },
1684
+ {
1685
+ "epoch": 0.19861677602411776,
1686
+ "grad_norm": 8.83719539642334,
1687
+ "learning_rate": 5.987239192014337e-05,
1688
+ "loss": 1.643,
1689
+ "step": 2240
1690
+ },
1691
+ {
1692
+ "epoch": 0.19950345805993971,
1693
+ "grad_norm": 7.24541711807251,
1694
+ "learning_rate": 5.955793143506864e-05,
1695
+ "loss": 1.624,
1696
+ "step": 2250
1697
+ },
1698
+ {
1699
+ "epoch": 0.19950345805993971,
1700
+ "eval_loss": 1.5997846126556396,
1701
+ "eval_runtime": 59.4561,
1702
+ "eval_samples_per_second": 8.41,
1703
+ "eval_steps_per_second": 8.41,
1704
+ "step": 2250
1705
  }
1706
  ],
1707
  "logging_steps": 10,
 
1721
  "attributes": {}
1722
  }
1723
  },
1724
+ "total_flos": 7.577803153093755e+17,
1725
  "train_batch_size": 8,
1726
  "trial_name": null,
1727
  "trial_params": null