Training in progress, step 190, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 125048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a639c0dd8d46132adc0b16337b5a1ff36e268cf252a3de28258698f829ef7ce6
|
3 |
size 125048
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 162868
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53ecb0ea5d82d22db059b7add3506c06a0cd8eeb38fa9e49da520bca058f53e6
|
3 |
size 162868
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14f613e9aa4b1eff57e81d3c847842d7ccd502bc7cfeef73e08e7430e2140097
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb7ebe91aa688ab052f1c015d887206a7b417ef70ab8e5d1552c4ac1b55fa0b6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 11.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1563,6 +1563,92 @@
|
|
1563 |
"eval_samples_per_second": 52.833,
|
1564 |
"eval_steps_per_second": 26.419,
|
1565 |
"step": 180
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1566 |
}
|
1567 |
],
|
1568 |
"logging_steps": 1,
|
@@ -1591,7 +1677,7 @@
|
|
1591 |
"attributes": {}
|
1592 |
}
|
1593 |
},
|
1594 |
-
"total_flos":
|
1595 |
"train_batch_size": 2,
|
1596 |
"trial_name": null,
|
1597 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 11.02279281616211,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-190",
|
4 |
+
"epoch": 0.008587376556462,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 190,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1563 |
"eval_samples_per_second": 52.833,
|
1564 |
"eval_steps_per_second": 26.419,
|
1565 |
"step": 180
|
1566 |
+
},
|
1567 |
+
{
|
1568 |
+
"epoch": 0.008180606087998012,
|
1569 |
+
"grad_norm": 0.5716975927352905,
|
1570 |
+
"learning_rate": 0.00014568444677839516,
|
1571 |
+
"loss": 44.1164,
|
1572 |
+
"step": 181
|
1573 |
+
},
|
1574 |
+
{
|
1575 |
+
"epoch": 0.008225802806716233,
|
1576 |
+
"grad_norm": 0.6961561441421509,
|
1577 |
+
"learning_rate": 0.00014511318662403347,
|
1578 |
+
"loss": 44.1024,
|
1579 |
+
"step": 182
|
1580 |
+
},
|
1581 |
+
{
|
1582 |
+
"epoch": 0.008270999525434454,
|
1583 |
+
"grad_norm": 0.5740232467651367,
|
1584 |
+
"learning_rate": 0.0001445400720432659,
|
1585 |
+
"loss": 44.1379,
|
1586 |
+
"step": 183
|
1587 |
+
},
|
1588 |
+
{
|
1589 |
+
"epoch": 0.008316196244152675,
|
1590 |
+
"grad_norm": 0.5687277913093567,
|
1591 |
+
"learning_rate": 0.00014396512659458824,
|
1592 |
+
"loss": 44.1165,
|
1593 |
+
"step": 184
|
1594 |
+
},
|
1595 |
+
{
|
1596 |
+
"epoch": 0.008361392962870896,
|
1597 |
+
"grad_norm": 0.6230690479278564,
|
1598 |
+
"learning_rate": 0.00014338837391175582,
|
1599 |
+
"loss": 44.118,
|
1600 |
+
"step": 185
|
1601 |
+
},
|
1602 |
+
{
|
1603 |
+
"epoch": 0.008361392962870896,
|
1604 |
+
"eval_loss": 11.022916793823242,
|
1605 |
+
"eval_runtime": 176.0405,
|
1606 |
+
"eval_samples_per_second": 52.925,
|
1607 |
+
"eval_steps_per_second": 26.465,
|
1608 |
+
"step": 185
|
1609 |
+
},
|
1610 |
+
{
|
1611 |
+
"epoch": 0.008406589681589116,
|
1612 |
+
"grad_norm": 0.48787158727645874,
|
1613 |
+
"learning_rate": 0.0001428098377028126,
|
1614 |
+
"loss": 44.0875,
|
1615 |
+
"step": 186
|
1616 |
+
},
|
1617 |
+
{
|
1618 |
+
"epoch": 0.008451786400307337,
|
1619 |
+
"grad_norm": 0.44323569536209106,
|
1620 |
+
"learning_rate": 0.000142229541749116,
|
1621 |
+
"loss": 44.143,
|
1622 |
+
"step": 187
|
1623 |
+
},
|
1624 |
+
{
|
1625 |
+
"epoch": 0.008496983119025558,
|
1626 |
+
"grad_norm": 0.47104522585868835,
|
1627 |
+
"learning_rate": 0.0001416475099043599,
|
1628 |
+
"loss": 44.0804,
|
1629 |
+
"step": 188
|
1630 |
+
},
|
1631 |
+
{
|
1632 |
+
"epoch": 0.00854217983774378,
|
1633 |
+
"grad_norm": 0.549055814743042,
|
1634 |
+
"learning_rate": 0.0001410637660935938,
|
1635 |
+
"loss": 44.0923,
|
1636 |
+
"step": 189
|
1637 |
+
},
|
1638 |
+
{
|
1639 |
+
"epoch": 0.008587376556462,
|
1640 |
+
"grad_norm": 0.4136901795864105,
|
1641 |
+
"learning_rate": 0.00014047833431223938,
|
1642 |
+
"loss": 44.0967,
|
1643 |
+
"step": 190
|
1644 |
+
},
|
1645 |
+
{
|
1646 |
+
"epoch": 0.008587376556462,
|
1647 |
+
"eval_loss": 11.02279281616211,
|
1648 |
+
"eval_runtime": 176.1885,
|
1649 |
+
"eval_samples_per_second": 52.881,
|
1650 |
+
"eval_steps_per_second": 26.443,
|
1651 |
+
"step": 190
|
1652 |
}
|
1653 |
],
|
1654 |
"logging_steps": 1,
|
|
|
1677 |
"attributes": {}
|
1678 |
}
|
1679 |
},
|
1680 |
+
"total_flos": 1996278988800.0,
|
1681 |
"train_batch_size": 2,
|
1682 |
"trial_name": null,
|
1683 |
"trial_params": null
|