Training in progress, step 2400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 653434568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbbae645955e4b08b9adb4332c2a3fa1333fd7e3b873cd110fb3133e27e1f642
|
3 |
size 653434568
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1288533754
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a895dc6347988ee7d21290f36a1a0888df619f8426f81d076b7e3887e811b749
|
3 |
size 1288533754
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb42fb8f2d70da71e5001efb218126512515054d130cd6dcd017d036e0a538be
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1fe9c3f91db580a8b6ede7b15e0466000f08c889875e4652a5c9ec9f77e1fab
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1702,6 +1702,119 @@
|
|
1702 |
"eval_samples_per_second": 8.41,
|
1703 |
"eval_steps_per_second": 8.41,
|
1704 |
"step": 2250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1705 |
}
|
1706 |
],
|
1707 |
"logging_steps": 10,
|
@@ -1721,7 +1834,7 @@
|
|
1721 |
"attributes": {}
|
1722 |
}
|
1723 |
},
|
1724 |
-
"total_flos":
|
1725 |
"train_batch_size": 8,
|
1726 |
"trial_name": null,
|
1727 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.5888803005218506,
|
3 |
+
"best_model_checkpoint": "./output/checkpoint-2400",
|
4 |
+
"epoch": 0.21280368859726903,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 2400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1702 |
"eval_samples_per_second": 8.41,
|
1703 |
"eval_steps_per_second": 8.41,
|
1704 |
"step": 2250
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"epoch": 0.20039014009576167,
|
1708 |
+
"grad_norm": 13.114813804626465,
|
1709 |
+
"learning_rate": 5.9243078060868454e-05,
|
1710 |
+
"loss": 1.5787,
|
1711 |
+
"step": 2260
|
1712 |
+
},
|
1713 |
+
{
|
1714 |
+
"epoch": 0.20127682213158363,
|
1715 |
+
"grad_norm": 6.7087321281433105,
|
1716 |
+
"learning_rate": 5.8927844739931854e-05,
|
1717 |
+
"loss": 1.3785,
|
1718 |
+
"step": 2270
|
1719 |
+
},
|
1720 |
+
{
|
1721 |
+
"epoch": 0.20216350416740556,
|
1722 |
+
"grad_norm": 6.644030570983887,
|
1723 |
+
"learning_rate": 5.8612244430265966e-05,
|
1724 |
+
"loss": 1.5126,
|
1725 |
+
"step": 2280
|
1726 |
+
},
|
1727 |
+
{
|
1728 |
+
"epoch": 0.2030501862032275,
|
1729 |
+
"grad_norm": 10.291509628295898,
|
1730 |
+
"learning_rate": 5.829629010496342e-05,
|
1731 |
+
"loss": 1.4863,
|
1732 |
+
"step": 2290
|
1733 |
+
},
|
1734 |
+
{
|
1735 |
+
"epoch": 0.20393686823904947,
|
1736 |
+
"grad_norm": 6.426754951477051,
|
1737 |
+
"learning_rate": 5.797999475166898e-05,
|
1738 |
+
"loss": 1.5586,
|
1739 |
+
"step": 2300
|
1740 |
+
},
|
1741 |
+
{
|
1742 |
+
"epoch": 0.20482355027487142,
|
1743 |
+
"grad_norm": 9.044095039367676,
|
1744 |
+
"learning_rate": 5.766337137204581e-05,
|
1745 |
+
"loss": 1.5063,
|
1746 |
+
"step": 2310
|
1747 |
+
},
|
1748 |
+
{
|
1749 |
+
"epoch": 0.20571023231069338,
|
1750 |
+
"grad_norm": 8.852991104125977,
|
1751 |
+
"learning_rate": 5.734643298124092e-05,
|
1752 |
+
"loss": 1.7211,
|
1753 |
+
"step": 2320
|
1754 |
+
},
|
1755 |
+
{
|
1756 |
+
"epoch": 0.20659691434651534,
|
1757 |
+
"grad_norm": 73.65837860107422,
|
1758 |
+
"learning_rate": 5.702919260735016e-05,
|
1759 |
+
"loss": 1.5191,
|
1760 |
+
"step": 2330
|
1761 |
+
},
|
1762 |
+
{
|
1763 |
+
"epoch": 0.2074835963823373,
|
1764 |
+
"grad_norm": 8.413342475891113,
|
1765 |
+
"learning_rate": 5.671166329088279e-05,
|
1766 |
+
"loss": 1.5013,
|
1767 |
+
"step": 2340
|
1768 |
+
},
|
1769 |
+
{
|
1770 |
+
"epoch": 0.20837027841815925,
|
1771 |
+
"grad_norm": 6.938820838928223,
|
1772 |
+
"learning_rate": 5.639385808422532e-05,
|
1773 |
+
"loss": 1.5099,
|
1774 |
+
"step": 2350
|
1775 |
+
},
|
1776 |
+
{
|
1777 |
+
"epoch": 0.2092569604539812,
|
1778 |
+
"grad_norm": 7.757599353790283,
|
1779 |
+
"learning_rate": 5.6075790051105044e-05,
|
1780 |
+
"loss": 1.5848,
|
1781 |
+
"step": 2360
|
1782 |
+
},
|
1783 |
+
{
|
1784 |
+
"epoch": 0.21014364248980316,
|
1785 |
+
"grad_norm": 7.502821445465088,
|
1786 |
+
"learning_rate": 5.5757472266052994e-05,
|
1787 |
+
"loss": 1.7166,
|
1788 |
+
"step": 2370
|
1789 |
+
},
|
1790 |
+
{
|
1791 |
+
"epoch": 0.21103032452562512,
|
1792 |
+
"grad_norm": 11.332352638244629,
|
1793 |
+
"learning_rate": 5.543891781386657e-05,
|
1794 |
+
"loss": 1.671,
|
1795 |
+
"step": 2380
|
1796 |
+
},
|
1797 |
+
{
|
1798 |
+
"epoch": 0.21191700656144707,
|
1799 |
+
"grad_norm": 7.515905380249023,
|
1800 |
+
"learning_rate": 5.512013978907158e-05,
|
1801 |
+
"loss": 1.6298,
|
1802 |
+
"step": 2390
|
1803 |
+
},
|
1804 |
+
{
|
1805 |
+
"epoch": 0.21280368859726903,
|
1806 |
+
"grad_norm": 6.094747543334961,
|
1807 |
+
"learning_rate": 5.4801151295384105e-05,
|
1808 |
+
"loss": 1.5135,
|
1809 |
+
"step": 2400
|
1810 |
+
},
|
1811 |
+
{
|
1812 |
+
"epoch": 0.21280368859726903,
|
1813 |
+
"eval_loss": 1.5888803005218506,
|
1814 |
+
"eval_runtime": 59.4453,
|
1815 |
+
"eval_samples_per_second": 8.411,
|
1816 |
+
"eval_steps_per_second": 8.411,
|
1817 |
+
"step": 2400
|
1818 |
}
|
1819 |
],
|
1820 |
"logging_steps": 10,
|
|
|
1834 |
"attributes": {}
|
1835 |
}
|
1836 |
},
|
1837 |
+
"total_flos": 8.081845027159081e+17,
|
1838 |
"train_batch_size": 8,
|
1839 |
"trial_name": null,
|
1840 |
"trial_params": null
|