besimray commited on
Commit
49c5612
·
verified ·
1 Parent(s): 2af695d

Training in progress, step 190, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c8aadcc25f22b5bd1fff3362f07043a38073d83188469ddb5bce8d545b884f
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a639c0dd8d46132adc0b16337b5a1ff36e268cf252a3de28258698f829ef7ce6
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc3ccfb8c5981089a4b8c855ecc6afb5559dd1e01e57ce3254eff726ed1e7efb
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53ecb0ea5d82d22db059b7add3506c06a0cd8eeb38fa9e49da520bca058f53e6
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db28645e3a5ed38f3c725d595cee3c53b367101eca9d631abd8e1db85596d3f2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14f613e9aa4b1eff57e81d3c847842d7ccd502bc7cfeef73e08e7430e2140097
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05495c329a0a59adfbdcbe310642bb4a2adcb593713c0b96973034e9930bed7e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7ebe91aa688ab052f1c015d887206a7b417ef70ab8e5d1552c4ac1b55fa0b6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.023147583007812,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-180",
4
- "epoch": 0.00813540936927979,
5
  "eval_steps": 5,
6
- "global_step": 180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1563,6 +1563,92 @@
1563
  "eval_samples_per_second": 52.833,
1564
  "eval_steps_per_second": 26.419,
1565
  "step": 180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1566
  }
1567
  ],
1568
  "logging_steps": 1,
@@ -1591,7 +1677,7 @@
1591
  "attributes": {}
1592
  }
1593
  },
1594
- "total_flos": 1891211673600.0,
1595
  "train_batch_size": 2,
1596
  "trial_name": null,
1597
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.02279281616211,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-190",
4
+ "epoch": 0.008587376556462,
5
  "eval_steps": 5,
6
+ "global_step": 190,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1563
  "eval_samples_per_second": 52.833,
1564
  "eval_steps_per_second": 26.419,
1565
  "step": 180
1566
+ },
1567
+ {
1568
+ "epoch": 0.008180606087998012,
1569
+ "grad_norm": 0.5716975927352905,
1570
+ "learning_rate": 0.00014568444677839516,
1571
+ "loss": 44.1164,
1572
+ "step": 181
1573
+ },
1574
+ {
1575
+ "epoch": 0.008225802806716233,
1576
+ "grad_norm": 0.6961561441421509,
1577
+ "learning_rate": 0.00014511318662403347,
1578
+ "loss": 44.1024,
1579
+ "step": 182
1580
+ },
1581
+ {
1582
+ "epoch": 0.008270999525434454,
1583
+ "grad_norm": 0.5740232467651367,
1584
+ "learning_rate": 0.0001445400720432659,
1585
+ "loss": 44.1379,
1586
+ "step": 183
1587
+ },
1588
+ {
1589
+ "epoch": 0.008316196244152675,
1590
+ "grad_norm": 0.5687277913093567,
1591
+ "learning_rate": 0.00014396512659458824,
1592
+ "loss": 44.1165,
1593
+ "step": 184
1594
+ },
1595
+ {
1596
+ "epoch": 0.008361392962870896,
1597
+ "grad_norm": 0.6230690479278564,
1598
+ "learning_rate": 0.00014338837391175582,
1599
+ "loss": 44.118,
1600
+ "step": 185
1601
+ },
1602
+ {
1603
+ "epoch": 0.008361392962870896,
1604
+ "eval_loss": 11.022916793823242,
1605
+ "eval_runtime": 176.0405,
1606
+ "eval_samples_per_second": 52.925,
1607
+ "eval_steps_per_second": 26.465,
1608
+ "step": 185
1609
+ },
1610
+ {
1611
+ "epoch": 0.008406589681589116,
1612
+ "grad_norm": 0.48787158727645874,
1613
+ "learning_rate": 0.0001428098377028126,
1614
+ "loss": 44.0875,
1615
+ "step": 186
1616
+ },
1617
+ {
1618
+ "epoch": 0.008451786400307337,
1619
+ "grad_norm": 0.44323569536209106,
1620
+ "learning_rate": 0.000142229541749116,
1621
+ "loss": 44.143,
1622
+ "step": 187
1623
+ },
1624
+ {
1625
+ "epoch": 0.008496983119025558,
1626
+ "grad_norm": 0.47104522585868835,
1627
+ "learning_rate": 0.0001416475099043599,
1628
+ "loss": 44.0804,
1629
+ "step": 188
1630
+ },
1631
+ {
1632
+ "epoch": 0.00854217983774378,
1633
+ "grad_norm": 0.549055814743042,
1634
+ "learning_rate": 0.0001410637660935938,
1635
+ "loss": 44.0923,
1636
+ "step": 189
1637
+ },
1638
+ {
1639
+ "epoch": 0.008587376556462,
1640
+ "grad_norm": 0.4136901795864105,
1641
+ "learning_rate": 0.00014047833431223938,
1642
+ "loss": 44.0967,
1643
+ "step": 190
1644
+ },
1645
+ {
1646
+ "epoch": 0.008587376556462,
1647
+ "eval_loss": 11.02279281616211,
1648
+ "eval_runtime": 176.1885,
1649
+ "eval_samples_per_second": 52.881,
1650
+ "eval_steps_per_second": 26.443,
1651
+ "step": 190
1652
  }
1653
  ],
1654
  "logging_steps": 1,
 
1677
  "attributes": {}
1678
  }
1679
  },
1680
+ "total_flos": 1996278988800.0,
1681
  "train_batch_size": 2,
1682
  "trial_name": null,
1683
  "trial_params": null