pk11 commited on
Commit
95784ae
·
1 Parent(s): 054cd5b

Iteration 2, finte-tuned using same dataset but with 512 token sequences and 64 tokens sliding window

Browse files
README.md CHANGED
@@ -6,4 +6,7 @@ base_model:
6
 
7
 
8
  Fine-tuned/hyperfitted with methodology from https://arxiv.org/abs/2412.04318
9
- With OrthoGrad optimizer https://arxiv.org/abs/2501.04697
 
 
 
 
6
 
7
 
8
  Fine-tuned/hyperfitted with methodology from https://arxiv.org/abs/2412.04318
9
+
10
+ using OrthoGrad optimizer https://arxiv.org/abs/2501.04697
11
+
12
+ Updated 23.02.2025: same dataset, 512 token sequences with 64 tokens sliding window (loss still decreased).
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2.5-14B-Instruct-1M",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "./Qwen2.5-14B-Instruct-1M-HF-GK",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8ff0b8f34f3b53f7ee677a47346b79ec6ffa12d058137bc4228786a032bd69a
3
  size 4986211280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70a4b4b0c665125d68722f8daaf8ba7f0815b62346da2b6eedcf703a4b003a40
3
  size 4986211280
model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77e7a869f01f97a00afe8a3014e562d9fcd8cd5ea09eb9f473f92feee1c6380d
3
  size 4954847344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e371a11e0c359a2692fd0025fb997df7a8a682a2105197d264c8b50bad2e65
3
  size 4954847344
model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb5ac25de6bd7d5fb8d93e8d141c866527a4e33cc9d3f5cc35f1a2e82dd4b43f
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5820c7dfdce9280e4851957a7d195c74878ee532eab2b18670f011255239302f
3
  size 4954847392
model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da338a53fde524ab95bbe034e5868565520680cd9baa87f86985fd7bdb1d2ce2
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd957afade2903db8f8968aa1e9673722e9d147b0b7cf2503eeb53525dc44d19
3
  size 4954847392
model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfaa53688803147aa8e9eb6252318e49967cde7268060411b8b0e68d575c7ccd
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f780f1c4830c2bd96a07a27a88b1e5fad6f77910eaa2c3019af1d3e85dcb5e6
3
  size 4954847392
model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d50d8e60849e1d25264208d147a4d1d7a0531d59c4f686c5a99c1deae1ae353e
3
  size 4734533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b192ca9011e001d439f77ae31c469f0522756e7f77431c93e49383ee61f7ccf8
3
  size 4734533160