prxy5604 commited on
Commit
68bb731
·
verified ·
1 Parent(s): 2fb95b0

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18fe49e9417b99ce6c776bdfc9bc98e835ce876ec876396d18bc528860bd5afc
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fef308b8921bdaa4b3462e88f1c7c0ade05615f1ab91ad36b5610d3f5b3c17d
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9187cf23ffa688abda57e93e50617b6d17917ac42c67df1542ec8253ba4e2c77
3
  size 403961210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a90254e302ec8b6d14e289c29594d11ebf0d484538ff3c10b69180beee9fbee6
3
  size 403961210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab7ae01c632cf4a9eea703dbce890c0dc572c63819fc76789d81ce59e76600e8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b3a0b8ae4b1ef3d273ffeb9e44259fa1cdfe737176b54fe581e2c5bb5beb35e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d91efca010f12a32bdc7d1c787424022430eaf3f7cc0cdd2c9645acb3fd16b80
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f2082f8ae2500626171b5f3c174135f50005132f4fb0d89617e1b5f611c23e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2823649644851685,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 2.2346368715083798,
5
  "eval_steps": 50,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1447,6 +1447,364 @@
1447
  "eval_samples_per_second": 42.613,
1448
  "eval_steps_per_second": 21.448,
1449
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
  }
1451
  ],
1452
  "logging_steps": 1,
@@ -1475,7 +1833,7 @@
1475
  "attributes": {}
1476
  }
1477
  },
1478
- "total_flos": 4.26629570494464e+16,
1479
  "train_batch_size": 8,
1480
  "trial_name": null,
1481
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2524751424789429,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
+ "epoch": 2.793296089385475,
5
  "eval_steps": 50,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1447
  "eval_samples_per_second": 42.613,
1448
  "eval_steps_per_second": 21.448,
1449
  "step": 200
1450
+ },
1451
+ {
1452
+ "epoch": 2.2458100558659218,
1453
+ "grad_norm": 2.0551750659942627,
1454
+ "learning_rate": 1.8678916237581522e-05,
1455
+ "loss": 1.0606,
1456
+ "step": 201
1457
+ },
1458
+ {
1459
+ "epoch": 2.2569832402234637,
1460
+ "grad_norm": 1.3848985433578491,
1461
+ "learning_rate": 1.816933090929055e-05,
1462
+ "loss": 1.2715,
1463
+ "step": 202
1464
+ },
1465
+ {
1466
+ "epoch": 2.2681564245810057,
1467
+ "grad_norm": 1.1580418348312378,
1468
+ "learning_rate": 1.7665245337452368e-05,
1469
+ "loss": 1.1072,
1470
+ "step": 203
1471
+ },
1472
+ {
1473
+ "epoch": 2.2793296089385473,
1474
+ "grad_norm": 1.1287815570831299,
1475
+ "learning_rate": 1.716674661881848e-05,
1476
+ "loss": 1.1187,
1477
+ "step": 204
1478
+ },
1479
+ {
1480
+ "epoch": 2.2905027932960893,
1481
+ "grad_norm": 1.1857539415359497,
1482
+ "learning_rate": 1.667392088483456e-05,
1483
+ "loss": 1.3102,
1484
+ "step": 205
1485
+ },
1486
+ {
1487
+ "epoch": 2.3016759776536313,
1488
+ "grad_norm": 1.0973467826843262,
1489
+ "learning_rate": 1.6186853286758397e-05,
1490
+ "loss": 1.2696,
1491
+ "step": 206
1492
+ },
1493
+ {
1494
+ "epoch": 2.3128491620111733,
1495
+ "grad_norm": 1.0390197038650513,
1496
+ "learning_rate": 1.570562798094747e-05,
1497
+ "loss": 1.2218,
1498
+ "step": 207
1499
+ },
1500
+ {
1501
+ "epoch": 2.3240223463687153,
1502
+ "grad_norm": 1.2470310926437378,
1503
+ "learning_rate": 1.5230328114318127e-05,
1504
+ "loss": 1.3041,
1505
+ "step": 208
1506
+ },
1507
+ {
1508
+ "epoch": 2.335195530726257,
1509
+ "grad_norm": 1.1841509342193604,
1510
+ "learning_rate": 1.4761035809979395e-05,
1511
+ "loss": 1.2008,
1512
+ "step": 209
1513
+ },
1514
+ {
1515
+ "epoch": 2.346368715083799,
1516
+ "grad_norm": 1.027030110359192,
1517
+ "learning_rate": 1.4297832153043656e-05,
1518
+ "loss": 1.0652,
1519
+ "step": 210
1520
+ },
1521
+ {
1522
+ "epoch": 2.357541899441341,
1523
+ "grad_norm": 1.1887884140014648,
1524
+ "learning_rate": 1.3840797176616466e-05,
1525
+ "loss": 1.1927,
1526
+ "step": 211
1527
+ },
1528
+ {
1529
+ "epoch": 2.368715083798883,
1530
+ "grad_norm": 1.1091079711914062,
1531
+ "learning_rate": 1.3390009847968504e-05,
1532
+ "loss": 1.157,
1533
+ "step": 212
1534
+ },
1535
+ {
1536
+ "epoch": 2.3798882681564244,
1537
+ "grad_norm": 1.1894493103027344,
1538
+ "learning_rate": 1.2945548054891321e-05,
1539
+ "loss": 1.1287,
1540
+ "step": 213
1541
+ },
1542
+ {
1543
+ "epoch": 2.3910614525139664,
1544
+ "grad_norm": 1.2934393882751465,
1545
+ "learning_rate": 1.2507488592239847e-05,
1546
+ "loss": 1.127,
1547
+ "step": 214
1548
+ },
1549
+ {
1550
+ "epoch": 2.4022346368715084,
1551
+ "grad_norm": 1.1570240259170532,
1552
+ "learning_rate": 1.2075907148663579e-05,
1553
+ "loss": 1.0553,
1554
+ "step": 215
1555
+ },
1556
+ {
1557
+ "epoch": 2.4134078212290504,
1558
+ "grad_norm": 1.172692060470581,
1559
+ "learning_rate": 1.1650878293528994e-05,
1560
+ "loss": 1.0155,
1561
+ "step": 216
1562
+ },
1563
+ {
1564
+ "epoch": 2.4245810055865924,
1565
+ "grad_norm": 1.197474718093872,
1566
+ "learning_rate": 1.1232475464035385e-05,
1567
+ "loss": 1.135,
1568
+ "step": 217
1569
+ },
1570
+ {
1571
+ "epoch": 2.435754189944134,
1572
+ "grad_norm": 1.1875431537628174,
1573
+ "learning_rate": 1.0820770952526155e-05,
1574
+ "loss": 1.1129,
1575
+ "step": 218
1576
+ },
1577
+ {
1578
+ "epoch": 2.446927374301676,
1579
+ "grad_norm": 1.4438897371292114,
1580
+ "learning_rate": 1.0415835893998116e-05,
1581
+ "loss": 1.2491,
1582
+ "step": 219
1583
+ },
1584
+ {
1585
+ "epoch": 2.458100558659218,
1586
+ "grad_norm": 1.3826245069503784,
1587
+ "learning_rate": 1.0017740253810609e-05,
1588
+ "loss": 1.1198,
1589
+ "step": 220
1590
+ },
1591
+ {
1592
+ "epoch": 2.46927374301676,
1593
+ "grad_norm": 1.3730559349060059,
1594
+ "learning_rate": 9.62655281559679e-06,
1595
+ "loss": 1.0396,
1596
+ "step": 221
1597
+ },
1598
+ {
1599
+ "epoch": 2.4804469273743015,
1600
+ "grad_norm": 1.6272450685501099,
1601
+ "learning_rate": 9.242341169379076e-06,
1602
+ "loss": 1.1171,
1603
+ "step": 222
1604
+ },
1605
+ {
1606
+ "epoch": 2.4916201117318435,
1607
+ "grad_norm": 1.9734584093093872,
1608
+ "learning_rate": 8.865171699890834e-06,
1609
+ "loss": 1.1374,
1610
+ "step": 223
1611
+ },
1612
+ {
1613
+ "epoch": 2.5027932960893855,
1614
+ "grad_norm": 1.3264243602752686,
1615
+ "learning_rate": 8.49510957510633e-06,
1616
+ "loss": 1.3275,
1617
+ "step": 224
1618
+ },
1619
+ {
1620
+ "epoch": 2.5139664804469275,
1621
+ "grad_norm": 1.3713715076446533,
1622
+ "learning_rate": 8.132218734980852e-06,
1623
+ "loss": 1.2514,
1624
+ "step": 225
1625
+ },
1626
+ {
1627
+ "epoch": 2.5251396648044695,
1628
+ "grad_norm": 1.2266292572021484,
1629
+ "learning_rate": 7.776561880403072e-06,
1630
+ "loss": 1.1443,
1631
+ "step": 226
1632
+ },
1633
+ {
1634
+ "epoch": 2.536312849162011,
1635
+ "grad_norm": 1.2601666450500488,
1636
+ "learning_rate": 7.4282004623615396e-06,
1637
+ "loss": 1.2684,
1638
+ "step": 227
1639
+ },
1640
+ {
1641
+ "epoch": 2.547486033519553,
1642
+ "grad_norm": 1.222358226776123,
1643
+ "learning_rate": 7.0871946713269856e-06,
1644
+ "loss": 1.1953,
1645
+ "step": 228
1646
+ },
1647
+ {
1648
+ "epoch": 2.558659217877095,
1649
+ "grad_norm": 1.1266566514968872,
1650
+ "learning_rate": 6.753603426852589e-06,
1651
+ "loss": 1.1542,
1652
+ "step": 229
1653
+ },
1654
+ {
1655
+ "epoch": 2.5698324022346366,
1656
+ "grad_norm": 1.035483479499817,
1657
+ "learning_rate": 6.427484367393699e-06,
1658
+ "loss": 1.1903,
1659
+ "step": 230
1660
+ },
1661
+ {
1662
+ "epoch": 2.5810055865921786,
1663
+ "grad_norm": 1.0014451742172241,
1664
+ "learning_rate": 6.108893840348995e-06,
1665
+ "loss": 1.0823,
1666
+ "step": 231
1667
+ },
1668
+ {
1669
+ "epoch": 2.5921787709497206,
1670
+ "grad_norm": 1.1314361095428467,
1671
+ "learning_rate": 5.797886892324694e-06,
1672
+ "loss": 1.2311,
1673
+ "step": 232
1674
+ },
1675
+ {
1676
+ "epoch": 2.6033519553072626,
1677
+ "grad_norm": 1.0532621145248413,
1678
+ "learning_rate": 5.494517259623477e-06,
1679
+ "loss": 1.1181,
1680
+ "step": 233
1681
+ },
1682
+ {
1683
+ "epoch": 2.6145251396648046,
1684
+ "grad_norm": 1.0458093881607056,
1685
+ "learning_rate": 5.198837358959901e-06,
1686
+ "loss": 1.1474,
1687
+ "step": 234
1688
+ },
1689
+ {
1690
+ "epoch": 2.6256983240223466,
1691
+ "grad_norm": 1.1469138860702515,
1692
+ "learning_rate": 4.910898278403669e-06,
1693
+ "loss": 1.1677,
1694
+ "step": 235
1695
+ },
1696
+ {
1697
+ "epoch": 2.636871508379888,
1698
+ "grad_norm": 1.135291337966919,
1699
+ "learning_rate": 4.630749768552589e-06,
1700
+ "loss": 1.1351,
1701
+ "step": 236
1702
+ },
1703
+ {
1704
+ "epoch": 2.64804469273743,
1705
+ "grad_norm": 1.09344482421875,
1706
+ "learning_rate": 4.358440233936617e-06,
1707
+ "loss": 1.1221,
1708
+ "step": 237
1709
+ },
1710
+ {
1711
+ "epoch": 2.659217877094972,
1712
+ "grad_norm": 1.190507411956787,
1713
+ "learning_rate": 4.094016724654359e-06,
1714
+ "loss": 1.0893,
1715
+ "step": 238
1716
+ },
1717
+ {
1718
+ "epoch": 2.6703910614525137,
1719
+ "grad_norm": 1.1962109804153442,
1720
+ "learning_rate": 3.837524928243774e-06,
1721
+ "loss": 1.0827,
1722
+ "step": 239
1723
+ },
1724
+ {
1725
+ "epoch": 2.6815642458100557,
1726
+ "grad_norm": 1.3094158172607422,
1727
+ "learning_rate": 3.589009161788104e-06,
1728
+ "loss": 1.0457,
1729
+ "step": 240
1730
+ },
1731
+ {
1732
+ "epoch": 2.6927374301675977,
1733
+ "grad_norm": 1.3989758491516113,
1734
+ "learning_rate": 3.3485123642587658e-06,
1735
+ "loss": 1.0812,
1736
+ "step": 241
1737
+ },
1738
+ {
1739
+ "epoch": 2.7039106145251397,
1740
+ "grad_norm": 1.4350848197937012,
1741
+ "learning_rate": 3.116076089096265e-06,
1742
+ "loss": 1.0437,
1743
+ "step": 242
1744
+ },
1745
+ {
1746
+ "epoch": 2.7150837988826817,
1747
+ "grad_norm": 1.4926385879516602,
1748
+ "learning_rate": 2.8917404970305097e-06,
1749
+ "loss": 1.0832,
1750
+ "step": 243
1751
+ },
1752
+ {
1753
+ "epoch": 2.7262569832402237,
1754
+ "grad_norm": 1.7393081188201904,
1755
+ "learning_rate": 2.675544349141779e-06,
1756
+ "loss": 1.0668,
1757
+ "step": 244
1758
+ },
1759
+ {
1760
+ "epoch": 2.7374301675977653,
1761
+ "grad_norm": 1.9382210969924927,
1762
+ "learning_rate": 2.4675250001635232e-06,
1763
+ "loss": 1.0107,
1764
+ "step": 245
1765
+ },
1766
+ {
1767
+ "epoch": 2.7486033519553073,
1768
+ "grad_norm": 0.7568252086639404,
1769
+ "learning_rate": 2.2677183920281343e-06,
1770
+ "loss": 1.0928,
1771
+ "step": 246
1772
+ },
1773
+ {
1774
+ "epoch": 2.7597765363128492,
1775
+ "grad_norm": 0.9040629267692566,
1776
+ "learning_rate": 2.076159047656889e-06,
1777
+ "loss": 1.3023,
1778
+ "step": 247
1779
+ },
1780
+ {
1781
+ "epoch": 2.770949720670391,
1782
+ "grad_norm": 0.9287111163139343,
1783
+ "learning_rate": 1.892880064994934e-06,
1784
+ "loss": 1.1539,
1785
+ "step": 248
1786
+ },
1787
+ {
1788
+ "epoch": 2.782122905027933,
1789
+ "grad_norm": 0.9362537860870361,
1790
+ "learning_rate": 1.7179131112926627e-06,
1791
+ "loss": 1.1163,
1792
+ "step": 249
1793
+ },
1794
+ {
1795
+ "epoch": 2.793296089385475,
1796
+ "grad_norm": 0.9989909529685974,
1797
+ "learning_rate": 1.551288417634106e-06,
1798
+ "loss": 1.1343,
1799
+ "step": 250
1800
+ },
1801
+ {
1802
+ "epoch": 2.793296089385475,
1803
+ "eval_loss": 1.2524751424789429,
1804
+ "eval_runtime": 3.5444,
1805
+ "eval_samples_per_second": 42.603,
1806
+ "eval_steps_per_second": 21.442,
1807
+ "step": 250
1808
  }
1809
  ],
1810
  "logging_steps": 1,
 
1833
  "attributes": {}
1834
  }
1835
  },
1836
+ "total_flos": 5.3328696311808e+16,
1837
  "train_batch_size": 8,
1838
  "trial_name": null,
1839
  "trial_params": null