besimray commited on
Commit
d4e65ac
·
verified ·
1 Parent(s): 869aff1

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a639c0dd8d46132adc0b16337b5a1ff36e268cf252a3de28258698f829ef7ce6
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ce726842d250f20c86c371d9e87a8ce9846060e663f40a17aa5220f549de934
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53ecb0ea5d82d22db059b7add3506c06a0cd8eeb38fa9e49da520bca058f53e6
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d5bd83a26023b846c84f45639c04652bc00657132d2cf830dd829784d28815
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14f613e9aa4b1eff57e81d3c847842d7ccd502bc7cfeef73e08e7430e2140097
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ebd8401c143f16f72b022f70333a3cfcf5c180eb2884806e8d3b16f8611859c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb7ebe91aa688ab052f1c015d887206a7b417ef70ab8e5d1552c4ac1b55fa0b6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53ada7ec389029bea855f553cfcb8ba6729038e26afcd278435a9c0b241e2783
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.02279281616211,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-190",
4
- "epoch": 0.008587376556462,
5
  "eval_steps": 5,
6
- "global_step": 190,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1649,6 +1649,92 @@
1649
  "eval_samples_per_second": 52.881,
1650
  "eval_steps_per_second": 26.443,
1651
  "step": 190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1652
  }
1653
  ],
1654
  "logging_steps": 1,
@@ -1677,7 +1763,7 @@
1677
  "attributes": {}
1678
  }
1679
  },
1680
- "total_flos": 1996278988800.0,
1681
  "train_batch_size": 2,
1682
  "trial_name": null,
1683
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.022467613220215,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.009039343743644212,
5
  "eval_steps": 5,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1649
  "eval_samples_per_second": 52.881,
1650
  "eval_steps_per_second": 26.443,
1651
  "step": 190
1652
+ },
1653
+ {
1654
+ "epoch": 0.008632573275180222,
1655
+ "grad_norm": 0.5897504091262817,
1656
+ "learning_rate": 0.0001398912386251042,
1657
+ "loss": 44.0428,
1658
+ "step": 191
1659
+ },
1660
+ {
1661
+ "epoch": 0.008677769993898443,
1662
+ "grad_norm": 0.4917847514152527,
1663
+ "learning_rate": 0.00013930250316539238,
1664
+ "loss": 44.0819,
1665
+ "step": 192
1666
+ },
1667
+ {
1668
+ "epoch": 0.008722966712616664,
1669
+ "grad_norm": 0.4644782245159149,
1670
+ "learning_rate": 0.00013871215213371284,
1671
+ "loss": 44.0209,
1672
+ "step": 193
1673
+ },
1674
+ {
1675
+ "epoch": 0.008768163431334885,
1676
+ "grad_norm": 0.6393492817878723,
1677
+ "learning_rate": 0.00013812020979708418,
1678
+ "loss": 44.101,
1679
+ "step": 194
1680
+ },
1681
+ {
1682
+ "epoch": 0.008813360150053106,
1683
+ "grad_norm": 0.60307377576828,
1684
+ "learning_rate": 0.00013752670048793744,
1685
+ "loss": 44.1646,
1686
+ "step": 195
1687
+ },
1688
+ {
1689
+ "epoch": 0.008813360150053106,
1690
+ "eval_loss": 11.022566795349121,
1691
+ "eval_runtime": 176.0184,
1692
+ "eval_samples_per_second": 52.932,
1693
+ "eval_steps_per_second": 26.469,
1694
+ "step": 195
1695
+ },
1696
+ {
1697
+ "epoch": 0.008858556868771328,
1698
+ "grad_norm": 0.4305557608604431,
1699
+ "learning_rate": 0.00013693164860311565,
1700
+ "loss": 44.0883,
1701
+ "step": 196
1702
+ },
1703
+ {
1704
+ "epoch": 0.008903753587489549,
1705
+ "grad_norm": 0.4658234119415283,
1706
+ "learning_rate": 0.00013633507860287116,
1707
+ "loss": 44.1006,
1708
+ "step": 197
1709
+ },
1710
+ {
1711
+ "epoch": 0.00894895030620777,
1712
+ "grad_norm": 0.5248441100120544,
1713
+ "learning_rate": 0.0001357370150098601,
1714
+ "loss": 44.0716,
1715
+ "step": 198
1716
+ },
1717
+ {
1718
+ "epoch": 0.008994147024925991,
1719
+ "grad_norm": 0.5177784562110901,
1720
+ "learning_rate": 0.0001351374824081343,
1721
+ "loss": 44.1013,
1722
+ "step": 199
1723
+ },
1724
+ {
1725
+ "epoch": 0.009039343743644212,
1726
+ "grad_norm": 0.5134817361831665,
1727
+ "learning_rate": 0.00013453650544213076,
1728
+ "loss": 44.0501,
1729
+ "step": 200
1730
+ },
1731
+ {
1732
+ "epoch": 0.009039343743644212,
1733
+ "eval_loss": 11.022467613220215,
1734
+ "eval_runtime": 176.1703,
1735
+ "eval_samples_per_second": 52.886,
1736
+ "eval_steps_per_second": 26.446,
1737
+ "step": 200
1738
  }
1739
  ],
1740
  "logging_steps": 1,
 
1763
  "attributes": {}
1764
  }
1765
  },
1766
+ "total_flos": 2101346304000.0,
1767
  "train_batch_size": 2,
1768
  "trial_name": null,
1769
  "trial_params": null