iamnguyen commited on
Commit
e2c08f1
·
verified ·
1 Parent(s): fda83e7

Training in progress, step 240, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8184a10ca48807e22cc06e50ec6c504f76f4078fa26a373c09b1e614591a70dd
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f63548d61d6c65a1654bc4bf0a1b62fc84fb53fb4d0f3eb1a55325d89267bb
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27c59c970af91f75ad174c54b9036732a06ac597e37b07db4c76c33a0dbe1d9b
3
  size 240728084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df0b473c11c1b6b133d909acb6c86cf9a832deb9c2a2162cac061c32e7b84d08
3
  size 240728084
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ef7d41b67bbb64d8f59b1b890b60e98c43be1a525871d9f2311fa3b7b48a618
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703f8b43f2697cc055bff9862430a4543fd4d2968318f68935d73a18b734e1eb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.014481890565557426,
5
  "eval_steps": 500,
6
- "global_step": 224,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1575,6 +1575,118 @@
1575
  "learning_rate": 9.999498966411415e-06,
1576
  "loss": 1.3715,
1577
  "step": 224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1578
  }
1579
  ],
1580
  "logging_steps": 1,
@@ -1594,7 +1706,7 @@
1594
  "attributes": {}
1595
  }
1596
  },
1597
- "total_flos": 1.437989735806894e+17,
1598
  "train_batch_size": 2,
1599
  "trial_name": null,
1600
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.015516311320240098,
5
  "eval_steps": 500,
6
+ "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1575
  "learning_rate": 9.999498966411415e-06,
1576
  "loss": 1.3715,
1577
  "step": 224
1578
+ },
1579
+ {
1580
+ "epoch": 0.014546541862725092,
1581
+ "grad_norm": 4.114704608917236,
1582
+ "learning_rate": 9.999484338713096e-06,
1583
+ "loss": 1.376,
1584
+ "step": 225
1585
+ },
1586
+ {
1587
+ "epoch": 0.01461119315989276,
1588
+ "grad_norm": 4.159117221832275,
1589
+ "learning_rate": 9.999469500558872e-06,
1590
+ "loss": 1.3388,
1591
+ "step": 226
1592
+ },
1593
+ {
1594
+ "epoch": 0.014675844457060426,
1595
+ "grad_norm": 4.336222171783447,
1596
+ "learning_rate": 9.999454451949364e-06,
1597
+ "loss": 1.3121,
1598
+ "step": 227
1599
+ },
1600
+ {
1601
+ "epoch": 0.014740495754228094,
1602
+ "grad_norm": 3.4951186180114746,
1603
+ "learning_rate": 9.999439192885212e-06,
1604
+ "loss": 1.3861,
1605
+ "step": 228
1606
+ },
1607
+ {
1608
+ "epoch": 0.01480514705139576,
1609
+ "grad_norm": 4.519493579864502,
1610
+ "learning_rate": 9.999423723367056e-06,
1611
+ "loss": 1.3151,
1612
+ "step": 229
1613
+ },
1614
+ {
1615
+ "epoch": 0.014869798348563428,
1616
+ "grad_norm": 3.895230770111084,
1617
+ "learning_rate": 9.999408043395546e-06,
1618
+ "loss": 1.3877,
1619
+ "step": 230
1620
+ },
1621
+ {
1622
+ "epoch": 0.014934449645731094,
1623
+ "grad_norm": 3.6650257110595703,
1624
+ "learning_rate": 9.999392152971344e-06,
1625
+ "loss": 1.3744,
1626
+ "step": 231
1627
+ },
1628
+ {
1629
+ "epoch": 0.014999100942898762,
1630
+ "grad_norm": 4.416625022888184,
1631
+ "learning_rate": 9.999376052095117e-06,
1632
+ "loss": 1.2943,
1633
+ "step": 232
1634
+ },
1635
+ {
1636
+ "epoch": 0.01506375224006643,
1637
+ "grad_norm": 4.903157711029053,
1638
+ "learning_rate": 9.999359740767545e-06,
1639
+ "loss": 1.3302,
1640
+ "step": 233
1641
+ },
1642
+ {
1643
+ "epoch": 0.015128403537234096,
1644
+ "grad_norm": 4.176599502563477,
1645
+ "learning_rate": 9.999343218989313e-06,
1646
+ "loss": 1.3421,
1647
+ "step": 234
1648
+ },
1649
+ {
1650
+ "epoch": 0.015193054834401764,
1651
+ "grad_norm": 4.2415876388549805,
1652
+ "learning_rate": 9.999326486761114e-06,
1653
+ "loss": 1.3693,
1654
+ "step": 235
1655
+ },
1656
+ {
1657
+ "epoch": 0.01525770613156943,
1658
+ "grad_norm": 4.100305080413818,
1659
+ "learning_rate": 9.999309544083657e-06,
1660
+ "loss": 1.3251,
1661
+ "step": 236
1662
+ },
1663
+ {
1664
+ "epoch": 0.015322357428737098,
1665
+ "grad_norm": 3.9971888065338135,
1666
+ "learning_rate": 9.999292390957653e-06,
1667
+ "loss": 1.4118,
1668
+ "step": 237
1669
+ },
1670
+ {
1671
+ "epoch": 0.015387008725904764,
1672
+ "grad_norm": 4.218728065490723,
1673
+ "learning_rate": 9.999275027383826e-06,
1674
+ "loss": 1.371,
1675
+ "step": 238
1676
+ },
1677
+ {
1678
+ "epoch": 0.015451660023072432,
1679
+ "grad_norm": 5.075481414794922,
1680
+ "learning_rate": 9.999257453362903e-06,
1681
+ "loss": 1.3753,
1682
+ "step": 239
1683
+ },
1684
+ {
1685
+ "epoch": 0.015516311320240098,
1686
+ "grad_norm": 4.296051025390625,
1687
+ "learning_rate": 9.999239668895627e-06,
1688
+ "loss": 1.4116,
1689
+ "step": 240
1690
  }
1691
  ],
1692
  "logging_steps": 1,
 
1706
  "attributes": {}
1707
  }
1708
  },
1709
+ "total_flos": 1.536003023152988e+17,
1710
  "train_batch_size": 2,
1711
  "trial_name": null,
1712
  "trial_params": null