Training in progress, step 2250, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 653434568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b50df0d26d180087225139d1cbccb1e4f8988f3ba78da80175de8d2ccb715425
|
3 |
size 653434568
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1288533754
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1349d7c7820ba989d22e9c3dafb981ed735ef01b3315b8e5cfd62c75bb5677b0
|
3 |
size 1288533754
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2aee148c55266a4fa01d336e6c825eb6826c0bacd0e25635305678a84af39fc7
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5c8c9c4d7ddd30debc6fb341973ff9c39ea0dc55bc39bc535243ffe8a16ce90
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1589,6 +1589,119 @@
|
|
1589 |
"eval_samples_per_second": 8.411,
|
1590 |
"eval_steps_per_second": 8.411,
|
1591 |
"step": 2100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1592 |
}
|
1593 |
],
|
1594 |
"logging_steps": 10,
|
@@ -1608,7 +1721,7 @@
|
|
1608 |
"attributes": {}
|
1609 |
}
|
1610 |
},
|
1611 |
-
"total_flos": 7.
|
1612 |
"train_batch_size": 8,
|
1613 |
"trial_name": null,
|
1614 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.5997846126556396,
|
3 |
+
"best_model_checkpoint": "./output/checkpoint-2250",
|
4 |
+
"epoch": 0.19950345805993971,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 2250,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1589 |
"eval_samples_per_second": 8.411,
|
1590 |
"eval_steps_per_second": 8.411,
|
1591 |
"step": 2100
|
1592 |
+
},
|
1593 |
+
{
|
1594 |
+
"epoch": 0.18708990955843235,
|
1595 |
+
"grad_norm": 8.962486267089844,
|
1596 |
+
"learning_rate": 6.391876823712319e-05,
|
1597 |
+
"loss": 1.4843,
|
1598 |
+
"step": 2110
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 0.1879765915942543,
|
1602 |
+
"grad_norm": 10.67493724822998,
|
1603 |
+
"learning_rate": 6.361058499664857e-05,
|
1604 |
+
"loss": 1.6638,
|
1605 |
+
"step": 2120
|
1606 |
+
},
|
1607 |
+
{
|
1608 |
+
"epoch": 0.18886327363007627,
|
1609 |
+
"grad_norm": 8.06369686126709,
|
1610 |
+
"learning_rate": 6.330184227833377e-05,
|
1611 |
+
"loss": 1.6439,
|
1612 |
+
"step": 2130
|
1613 |
+
},
|
1614 |
+
{
|
1615 |
+
"epoch": 0.1897499556658982,
|
1616 |
+
"grad_norm": 9.005534172058105,
|
1617 |
+
"learning_rate": 6.299255277338267e-05,
|
1618 |
+
"loss": 1.5289,
|
1619 |
+
"step": 2140
|
1620 |
+
},
|
1621 |
+
{
|
1622 |
+
"epoch": 0.19063663770172015,
|
1623 |
+
"grad_norm": 9.255204200744629,
|
1624 |
+
"learning_rate": 6.268272919547539e-05,
|
1625 |
+
"loss": 1.46,
|
1626 |
+
"step": 2150
|
1627 |
+
},
|
1628 |
+
{
|
1629 |
+
"epoch": 0.1915233197375421,
|
1630 |
+
"grad_norm": 7.344980239868164,
|
1631 |
+
"learning_rate": 6.237238428024573e-05,
|
1632 |
+
"loss": 1.4932,
|
1633 |
+
"step": 2160
|
1634 |
+
},
|
1635 |
+
{
|
1636 |
+
"epoch": 0.19241000177336406,
|
1637 |
+
"grad_norm": 8.692234992980957,
|
1638 |
+
"learning_rate": 6.206153078475765e-05,
|
1639 |
+
"loss": 1.6582,
|
1640 |
+
"step": 2170
|
1641 |
+
},
|
1642 |
+
{
|
1643 |
+
"epoch": 0.19329668380918602,
|
1644 |
+
"grad_norm": 7.381601333618164,
|
1645 |
+
"learning_rate": 6.175018148698078e-05,
|
1646 |
+
"loss": 1.5007,
|
1647 |
+
"step": 2180
|
1648 |
+
},
|
1649 |
+
{
|
1650 |
+
"epoch": 0.19418336584500798,
|
1651 |
+
"grad_norm": 7.794239044189453,
|
1652 |
+
"learning_rate": 6.143834918526529e-05,
|
1653 |
+
"loss": 1.6501,
|
1654 |
+
"step": 2190
|
1655 |
+
},
|
1656 |
+
{
|
1657 |
+
"epoch": 0.19507004788082993,
|
1658 |
+
"grad_norm": 8.13096809387207,
|
1659 |
+
"learning_rate": 6.112604669781574e-05,
|
1660 |
+
"loss": 1.6862,
|
1661 |
+
"step": 2200
|
1662 |
+
},
|
1663 |
+
{
|
1664 |
+
"epoch": 0.1959567299166519,
|
1665 |
+
"grad_norm": 6.846219539642334,
|
1666 |
+
"learning_rate": 6.081328686216419e-05,
|
1667 |
+
"loss": 1.5702,
|
1668 |
+
"step": 2210
|
1669 |
+
},
|
1670 |
+
{
|
1671 |
+
"epoch": 0.19684341195247385,
|
1672 |
+
"grad_norm": 8.771533966064453,
|
1673 |
+
"learning_rate": 6.0500082534642485e-05,
|
1674 |
+
"loss": 1.6259,
|
1675 |
+
"step": 2220
|
1676 |
+
},
|
1677 |
+
{
|
1678 |
+
"epoch": 0.1977300939882958,
|
1679 |
+
"grad_norm": 6.50418758392334,
|
1680 |
+
"learning_rate": 6.01864465898538e-05,
|
1681 |
+
"loss": 1.6948,
|
1682 |
+
"step": 2230
|
1683 |
+
},
|
1684 |
+
{
|
1685 |
+
"epoch": 0.19861677602411776,
|
1686 |
+
"grad_norm": 8.83719539642334,
|
1687 |
+
"learning_rate": 5.987239192014337e-05,
|
1688 |
+
"loss": 1.643,
|
1689 |
+
"step": 2240
|
1690 |
+
},
|
1691 |
+
{
|
1692 |
+
"epoch": 0.19950345805993971,
|
1693 |
+
"grad_norm": 7.24541711807251,
|
1694 |
+
"learning_rate": 5.955793143506864e-05,
|
1695 |
+
"loss": 1.624,
|
1696 |
+
"step": 2250
|
1697 |
+
},
|
1698 |
+
{
|
1699 |
+
"epoch": 0.19950345805993971,
|
1700 |
+
"eval_loss": 1.5997846126556396,
|
1701 |
+
"eval_runtime": 59.4561,
|
1702 |
+
"eval_samples_per_second": 8.41,
|
1703 |
+
"eval_steps_per_second": 8.41,
|
1704 |
+
"step": 2250
|
1705 |
}
|
1706 |
],
|
1707 |
"logging_steps": 10,
|
|
|
1721 |
"attributes": {}
|
1722 |
}
|
1723 |
},
|
1724 |
+
"total_flos": 7.577803153093755e+17,
|
1725 |
"train_batch_size": 8,
|
1726 |
"trial_name": null,
|
1727 |
"trial_params": null
|