Iteration 2, finte-tuned using same dataset but with 512 token sequences and 64 tokens sliding window
Browse files- README.md +4 -1
- config.json +1 -1
- model-00001-of-00006.safetensors +1 -1
- model-00002-of-00006.safetensors +1 -1
- model-00003-of-00006.safetensors +1 -1
- model-00004-of-00006.safetensors +1 -1
- model-00005-of-00006.safetensors +1 -1
- model-00006-of-00006.safetensors +1 -1
README.md
CHANGED
@@ -6,4 +6,7 @@ base_model:
|
|
6 |
|
7 |
|
8 |
Fine-tuned/hyperfitted with methodology from https://arxiv.org/abs/2412.04318
|
9 |
-
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
Fine-tuned/hyperfitted with methodology from https://arxiv.org/abs/2412.04318
|
9 |
+
|
10 |
+
using OrthoGrad optimizer https://arxiv.org/abs/2501.04697
|
11 |
+
|
12 |
+
Updated 23.02.2025: same dataset, 512 token sequences with 64 tokens sliding window (loss still decreased).
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./Qwen2.5-14B-Instruct-1M-HF-GK",
|
3 |
"architectures": [
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
model-00001-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4986211280
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70a4b4b0c665125d68722f8daaf8ba7f0815b62346da2b6eedcf703a4b003a40
|
3 |
size 4986211280
|
model-00002-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4954847344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5e371a11e0c359a2692fd0025fb997df7a8a682a2105197d264c8b50bad2e65
|
3 |
size 4954847344
|
model-00003-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4954847392
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5820c7dfdce9280e4851957a7d195c74878ee532eab2b18670f011255239302f
|
3 |
size 4954847392
|
model-00004-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4954847392
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd957afade2903db8f8968aa1e9673722e9d147b0b7cf2503eeb53525dc44d19
|
3 |
size 4954847392
|
model-00005-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4954847392
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f780f1c4830c2bd96a07a27a88b1e5fad6f77910eaa2c3019af1d3e85dcb5e6
|
3 |
size 4954847392
|
model-00006-of-00006.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4734533160
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b192ca9011e001d439f77ae31c469f0522756e7f77431c93e49383ee61f7ccf8
|
3 |
size 4734533160
|