MrRobotoAI commited on
Commit
3d2638d
·
verified ·
1 Parent(s): d2209c3

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,7 +1,8 @@
1
  ---
2
  base_model:
3
- - MrRobotoAI/MrRoboto-BASE-v2.1-8b-64k
4
- - WeMake/Llama-3-8B-Instruct-V41-1048k
 
5
  library_name: transformers
6
  tags:
7
  - mergekit
@@ -15,49 +16,27 @@ This is a merge of pre-trained language models created using [mergekit](https://
15
  ## Merge Details
16
  ### Merge Method
17
 
18
- This model was merged using the [Linear DARE](https://arxiv.org/abs/2311.03099) merge method using [WeMake/Llama-3-8B-Instruct-V41-1048k](https://huggingface.co/WeMake/Llama-3-8B-Instruct-V41-1048k) as a base.
19
 
20
  ### Models Merged
21
 
22
  The following models were included in the merge:
23
- * [MrRobotoAI/MrRoboto-BASE-v2.1-8b-64k](https://huggingface.co/MrRobotoAI/MrRoboto-BASE-v2.1-8b-64k)
 
24
 
25
  ### Configuration
26
 
27
  The following YAML configuration was used to produce this model:
28
 
29
  ```yaml
30
- merge_method: dare_linear
31
  models:
32
- - model: WeMake/Llama-3-8B-Instruct-V41-1048k
33
- parameters:
34
- weight:
35
- - filter: v_proj
36
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
37
- - filter: o_proj
38
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
39
- - filter: up_proj
40
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
41
- - filter: gate_proj
42
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
43
- - filter: down_proj
44
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
45
- - value: 1
46
- - model: MrRobotoAI/MrRoboto-BASE-v2.1-8b-64k
47
- parameters:
48
- weight:
49
- - filter: v_proj
50
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
51
- - filter: o_proj
52
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
53
- - filter: up_proj
54
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
55
- - filter: gate_proj
56
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
57
- - filter: down_proj
58
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
59
- - value: 0
60
- base_model: WeMake/Llama-3-8B-Instruct-V41-1048k
61
- tokenizer_source: base
62
- dtype: bfloat16
63
  ```
 
1
  ---
2
  base_model:
3
+ - MrRobotoAI/MrRoboto-BASE-v2-8b-64k
4
+ - MrRobotoAI/MrRoboto-BASE-v2-Unholy-8b-64k
5
+ - MrRobotoAI/Llama-3-8B-Uncensored-0.2
6
  library_name: transformers
7
  tags:
8
  - mergekit
 
16
  ## Merge Details
17
  ### Merge Method
18
 
19
+ This model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [MrRobotoAI/MrRoboto-BASE-v2-Unholy-8b-64k](https://huggingface.co/MrRobotoAI/MrRoboto-BASE-v2-Unholy-8b-64k) as a base.
20
 
21
  ### Models Merged
22
 
23
  The following models were included in the merge:
24
+ * [MrRobotoAI/MrRoboto-BASE-v2-8b-64k](https://huggingface.co/MrRobotoAI/MrRoboto-BASE-v2-8b-64k)
25
+ * [MrRobotoAI/Llama-3-8B-Uncensored-0.2](https://huggingface.co/MrRobotoAI/Llama-3-8B-Uncensored-0.2)
26
 
27
  ### Configuration
28
 
29
  The following YAML configuration was used to produce this model:
30
 
31
  ```yaml
 
32
  models:
33
+
34
+ - model: MrRobotoAI/Llama-3-8B-Uncensored-0.2
35
+ - model: MrRobotoAI/MrRoboto-BASE-v2-8b-64k
36
+ - model: MrRobotoAI/MrRoboto-BASE-v2-Unholy-8b-64k
37
+
38
+ merge_method: model_stock
39
+ base_model: MrRobotoAI/MrRoboto-BASE-v2-Unholy-8b-64k
40
+ normalize: true
41
+ dtype: float16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  ```
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "WeMake/Llama-3-8B-Instruct-V41-1048k",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -23,7 +23,7 @@
23
  "rope_scaling": null,
24
  "rope_theta": 2804339835.0,
25
  "tie_word_embeddings": false,
26
- "torch_dtype": "bfloat16",
27
  "transformers_version": "4.48.2",
28
  "use_cache": true,
29
  "vocab_size": 128256
 
1
  {
2
+ "_name_or_path": "MrRobotoAI/MrRoboto-BASE-v2-Unholy-8b-64k",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
23
  "rope_scaling": null,
24
  "rope_theta": 2804339835.0,
25
  "tie_word_embeddings": false,
26
+ "torch_dtype": "float16",
27
  "transformers_version": "4.48.2",
28
  "use_cache": true,
29
  "vocab_size": 128256
mergekit_config.yml CHANGED
@@ -1,33 +1,10 @@
1
- merge_method: dare_linear
2
  models:
3
- - model: WeMake/Llama-3-8B-Instruct-V41-1048k
4
- parameters:
5
- weight:
6
- - filter: v_proj
7
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
8
- - filter: o_proj
9
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
10
- - filter: up_proj
11
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
12
- - filter: gate_proj
13
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
14
- - filter: down_proj
15
- value: [0.8, 0.8, 0.55, 0.45, 0.35, 0.25, 0.35, 0.45, 0.55, 0.8, 0.8]
16
- - value: 1
17
- - model: MrRobotoAI/MrRoboto-BASE-v2.1-8b-64k
18
- parameters:
19
- weight:
20
- - filter: v_proj
21
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
22
- - filter: o_proj
23
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
24
- - filter: up_proj
25
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
26
- - filter: gate_proj
27
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
28
- - filter: down_proj
29
- value: [0.2, 0.2, 0.45, 0.55, 0.65, 0.75, 0.65, 0.55, 0.45, 0.2, 0.2]
30
- - value: 0
31
- base_model: WeMake/Llama-3-8B-Instruct-V41-1048k
32
- tokenizer_source: base
33
- dtype: bfloat16
 
 
1
  models:
2
+
3
+ - model: MrRobotoAI/Llama-3-8B-Uncensored-0.2
4
+ - model: MrRobotoAI/MrRoboto-BASE-v2-8b-64k
5
+ - model: MrRobotoAI/MrRoboto-BASE-v2-Unholy-8b-64k
6
+
7
+ merge_method: model_stock
8
+ base_model: MrRobotoAI/MrRoboto-BASE-v2-Unholy-8b-64k
9
+ normalize: true
10
+ dtype: float16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35b3732002e2dab64cb02382dafcad89f558b29fbcafd7edd31c12e2ecb49d3e
3
- size 4953586384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39eea101144d0037488633a3b9ef931855e36aeaed6ed921182ebb9f01270390
3
+ size 4953586328
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e7c3d8a583855fdcb93195bca3b2da144794c3197e88c1e5daa838156e90e82
3
- size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c009b3ea7a3419c2a406770acf098ccc6c31a2809b63ca0bc0f0743bade7c29
3
+ size 4999819232
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10d0f9b94244059dc5dbdcd003832935d2c6ba0f8856b186ab157f0e68eaf44d
3
- size 4915916144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e42f2be4c50451d98cb409c91a4700842aeae4af7ca655c30faf2386e0b0561
3
+ size 4915916048
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2305713ec18dd14c2182baec5f56e68cdea9eab160cd58ed8551837a98dbb15
3
- size 1191234472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e3378c375c8987bb51e22b1fdf6472f56a91f322ea54231fcaa6a7b9f4a883
3
+ size 1191234448