Training in progress, step 240, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 479769104
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55f63548d61d6c65a1654bc4bf0a1b62fc84fb53fb4d0f3eb1a55325d89267bb
|
3 |
size 479769104
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 240728084
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df0b473c11c1b6b133d909acb6c86cf9a832deb9c2a2162cac061c32e7b84d08
|
3 |
size 240728084
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:703f8b43f2697cc055bff9862430a4543fd4d2968318f68935d73a18b734e1eb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1575,6 +1575,118 @@
|
|
1575 |
"learning_rate": 9.999498966411415e-06,
|
1576 |
"loss": 1.3715,
|
1577 |
"step": 224
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1578 |
}
|
1579 |
],
|
1580 |
"logging_steps": 1,
|
@@ -1594,7 +1706,7 @@
|
|
1594 |
"attributes": {}
|
1595 |
}
|
1596 |
},
|
1597 |
-
"total_flos": 1.
|
1598 |
"train_batch_size": 2,
|
1599 |
"trial_name": null,
|
1600 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.015516311320240098,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 240,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1575 |
"learning_rate": 9.999498966411415e-06,
|
1576 |
"loss": 1.3715,
|
1577 |
"step": 224
|
1578 |
+
},
|
1579 |
+
{
|
1580 |
+
"epoch": 0.014546541862725092,
|
1581 |
+
"grad_norm": 4.114704608917236,
|
1582 |
+
"learning_rate": 9.999484338713096e-06,
|
1583 |
+
"loss": 1.376,
|
1584 |
+
"step": 225
|
1585 |
+
},
|
1586 |
+
{
|
1587 |
+
"epoch": 0.01461119315989276,
|
1588 |
+
"grad_norm": 4.159117221832275,
|
1589 |
+
"learning_rate": 9.999469500558872e-06,
|
1590 |
+
"loss": 1.3388,
|
1591 |
+
"step": 226
|
1592 |
+
},
|
1593 |
+
{
|
1594 |
+
"epoch": 0.014675844457060426,
|
1595 |
+
"grad_norm": 4.336222171783447,
|
1596 |
+
"learning_rate": 9.999454451949364e-06,
|
1597 |
+
"loss": 1.3121,
|
1598 |
+
"step": 227
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 0.014740495754228094,
|
1602 |
+
"grad_norm": 3.4951186180114746,
|
1603 |
+
"learning_rate": 9.999439192885212e-06,
|
1604 |
+
"loss": 1.3861,
|
1605 |
+
"step": 228
|
1606 |
+
},
|
1607 |
+
{
|
1608 |
+
"epoch": 0.01480514705139576,
|
1609 |
+
"grad_norm": 4.519493579864502,
|
1610 |
+
"learning_rate": 9.999423723367056e-06,
|
1611 |
+
"loss": 1.3151,
|
1612 |
+
"step": 229
|
1613 |
+
},
|
1614 |
+
{
|
1615 |
+
"epoch": 0.014869798348563428,
|
1616 |
+
"grad_norm": 3.895230770111084,
|
1617 |
+
"learning_rate": 9.999408043395546e-06,
|
1618 |
+
"loss": 1.3877,
|
1619 |
+
"step": 230
|
1620 |
+
},
|
1621 |
+
{
|
1622 |
+
"epoch": 0.014934449645731094,
|
1623 |
+
"grad_norm": 3.6650257110595703,
|
1624 |
+
"learning_rate": 9.999392152971344e-06,
|
1625 |
+
"loss": 1.3744,
|
1626 |
+
"step": 231
|
1627 |
+
},
|
1628 |
+
{
|
1629 |
+
"epoch": 0.014999100942898762,
|
1630 |
+
"grad_norm": 4.416625022888184,
|
1631 |
+
"learning_rate": 9.999376052095117e-06,
|
1632 |
+
"loss": 1.2943,
|
1633 |
+
"step": 232
|
1634 |
+
},
|
1635 |
+
{
|
1636 |
+
"epoch": 0.01506375224006643,
|
1637 |
+
"grad_norm": 4.903157711029053,
|
1638 |
+
"learning_rate": 9.999359740767545e-06,
|
1639 |
+
"loss": 1.3302,
|
1640 |
+
"step": 233
|
1641 |
+
},
|
1642 |
+
{
|
1643 |
+
"epoch": 0.015128403537234096,
|
1644 |
+
"grad_norm": 4.176599502563477,
|
1645 |
+
"learning_rate": 9.999343218989313e-06,
|
1646 |
+
"loss": 1.3421,
|
1647 |
+
"step": 234
|
1648 |
+
},
|
1649 |
+
{
|
1650 |
+
"epoch": 0.015193054834401764,
|
1651 |
+
"grad_norm": 4.2415876388549805,
|
1652 |
+
"learning_rate": 9.999326486761114e-06,
|
1653 |
+
"loss": 1.3693,
|
1654 |
+
"step": 235
|
1655 |
+
},
|
1656 |
+
{
|
1657 |
+
"epoch": 0.01525770613156943,
|
1658 |
+
"grad_norm": 4.100305080413818,
|
1659 |
+
"learning_rate": 9.999309544083657e-06,
|
1660 |
+
"loss": 1.3251,
|
1661 |
+
"step": 236
|
1662 |
+
},
|
1663 |
+
{
|
1664 |
+
"epoch": 0.015322357428737098,
|
1665 |
+
"grad_norm": 3.9971888065338135,
|
1666 |
+
"learning_rate": 9.999292390957653e-06,
|
1667 |
+
"loss": 1.4118,
|
1668 |
+
"step": 237
|
1669 |
+
},
|
1670 |
+
{
|
1671 |
+
"epoch": 0.015387008725904764,
|
1672 |
+
"grad_norm": 4.218728065490723,
|
1673 |
+
"learning_rate": 9.999275027383826e-06,
|
1674 |
+
"loss": 1.371,
|
1675 |
+
"step": 238
|
1676 |
+
},
|
1677 |
+
{
|
1678 |
+
"epoch": 0.015451660023072432,
|
1679 |
+
"grad_norm": 5.075481414794922,
|
1680 |
+
"learning_rate": 9.999257453362903e-06,
|
1681 |
+
"loss": 1.3753,
|
1682 |
+
"step": 239
|
1683 |
+
},
|
1684 |
+
{
|
1685 |
+
"epoch": 0.015516311320240098,
|
1686 |
+
"grad_norm": 4.296051025390625,
|
1687 |
+
"learning_rate": 9.999239668895627e-06,
|
1688 |
+
"loss": 1.4116,
|
1689 |
+
"step": 240
|
1690 |
}
|
1691 |
],
|
1692 |
"logging_steps": 1,
|
|
|
1706 |
"attributes": {}
|
1707 |
}
|
1708 |
},
|
1709 |
+
"total_flos": 1.536003023152988e+17,
|
1710 |
"train_batch_size": 2,
|
1711 |
"trial_name": null,
|
1712 |
"trial_params": null
|