nintwentydo commited on
Commit
f5839a0
·
verified ·
1 Parent(s): c136114

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nintwentydo/Razorback-12B-v0.2",
3
+ "architectures": [
4
+ "LlavaForConditionalGeneration"
5
+ ],
6
+ "ignore_index": -100,
7
+ "image_seq_length": 1,
8
+ "image_token_index": 10,
9
+ "model_type": "llava",
10
+ "projector_hidden_act": "gelu",
11
+ "text_config": {
12
+ "hidden_size": 5120,
13
+ "head_dim": 128,
14
+ "intermediate_size": 14336,
15
+ "is_composition": true,
16
+ "max_position_embeddings": 131072,
17
+ "model_type": "mistral",
18
+ "num_hidden_layers": 40,
19
+ "num_attention_heads": 32,
20
+ "num_key_value_heads": 8,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_theta": 1000000000.0,
23
+ "sliding_window": null,
24
+ "vocab_size": 131072
25
+ },
26
+ "torch_dtype": "bfloat16",
27
+ "transformers_version": "4.47.1",
28
+ "vision_config": {
29
+ "head_dim": 64,
30
+ "num_attention_heads": 16,
31
+ "hidden_act": "silu",
32
+ "image_size": 1024,
33
+ "is_composition": true,
34
+ "model_type": "pixtral",
35
+ "patch_size": 16,
36
+ "rope_theta": 10000.0,
37
+ "tie_word_embeddings": false
38
+ },
39
+ "vision_feature_layer": -1,
40
+ "vision_feature_select_strategy": "full",
41
+ "quantization_config": {
42
+ "quant_method": "exl2",
43
+ "version": "0.2.7",
44
+ "bits": 8.0,
45
+ "head_bits": 8,
46
+ "calibration": {
47
+ "rows": 115,
48
+ "length": 2048,
49
+ "dataset": "(default)"
50
+ }
51
+ }
52
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,738 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ merge_method: task_arithmetic
2
+ dtype: bfloat16
3
+ base_model: /opt/dlami/nvme/models/unsloth_Pixtral-12B-2409
4
+ models:
5
+ - model: /opt/dlami/nvme/models/TheDrummer_UnslopNemo-12B-v3_llava
6
+ parameters:
7
+ normalize: false
8
+ weight:
9
+ - filter: language_model.lm_head.weight
10
+ value: 0.027243
11
+ - filter: language_model.model.embed_tokens.weight
12
+ value: 0.14717
13
+ - filter: language_model.model.layers.39.mlp.gate_proj.weight
14
+ value: 0.128203
15
+ - filter: language_model.model.layers.39.mlp.up_proj.weight
16
+ value: 0.123174
17
+ - filter: language_model.model.layers.39.mlp.down_proj.weight
18
+ value: 0.098388
19
+ - filter: language_model.model.layers.38.mlp.down_proj.weight
20
+ value: 0.125974
21
+ - filter: language_model.model.layers.38.mlp.up_proj.weight
22
+ value: 0.151487
23
+ - filter: language_model.model.layers.38.mlp.gate_proj.weight
24
+ value: 0.154605
25
+ - filter: language_model.model.layers.37.mlp.gate_proj.weight
26
+ value: 0.160597
27
+ - filter: language_model.model.layers.37.mlp.up_proj.weight
28
+ value: 0.160597
29
+ - filter: language_model.model.layers.37.mlp.down_proj.weight
30
+ value: 0.154017
31
+ - filter: language_model.model.layers.36.mlp.up_proj.weight
32
+ value: 0.166257
33
+ - filter: language_model.model.layers.36.mlp.gate_proj.weight
34
+ value: 0.165639
35
+ - filter: language_model.model.layers.36.mlp.down_proj.weight
36
+ value: 0.165014
37
+ - filter: language_model.model.layers.35.mlp.gate_proj.weight
38
+ value: 0.17496
39
+ - filter: language_model.model.layers.35.mlp.up_proj.weight
40
+ value: 0.170804
41
+ - filter: language_model.model.layers.35.mlp.down_proj.weight
42
+ value: 0.170191
43
+ - filter: language_model.model.layers.4.mlp.gate_proj.weight
44
+ value: 0.186816
45
+ - filter: language_model.model.layers.6.mlp.gate_proj.weight
46
+ value: 0.186816
47
+ - filter: language_model.model.norm.weight
48
+ value: 0.700371
49
+ - filter: language_model.model.layers.34.mlp.gate_proj.weight
50
+ value: 0.175869
51
+ - filter: language_model.model.layers.7.mlp.gate_proj.weight
52
+ value: 0.184212
53
+ - filter: language_model.model.layers.33.mlp.gate_proj.weight
54
+ value: 0.177382
55
+ - filter: language_model.model.layers.34.mlp.up_proj.weight
56
+ value: 0.171061
57
+ - filter: language_model.model.layers.5.mlp.gate_proj.weight
58
+ value: 0.187836
59
+ - filter: language_model.model.layers.8.mlp.gate_proj.weight
60
+ value: 0.177382
61
+ - filter: language_model.model.layers.11.mlp.gate_proj.weight
62
+ value: 0.180445
63
+ - filter: language_model.model.layers.33.mlp.up_proj.weight
64
+ value: 0.171948
65
+ - filter: language_model.model.layers.34.mlp.down_proj.weight
66
+ value: 0.174078
67
+ - filter: language_model.model.layers.9.mlp.gate_proj.weight
68
+ value: 0.176204
69
+ - filter: language_model.model.layers.10.mlp.gate_proj.weight
70
+ value: 0.175606
71
+ - filter: language_model.model.layers.3.mlp.gate_proj.weight
72
+ value: 0.186236
73
+ - filter: language_model.model.layers.32.mlp.gate_proj.weight
74
+ value: 0.184118
75
+ - filter: language_model.model.layers.12.mlp.gate_proj.weight
76
+ value: 0.183562
77
+ - filter: language_model.model.layers.13.mlp.gate_proj.weight
78
+ value: 0.180853
79
+ - filter: language_model.model.layers.31.mlp.gate_proj.weight
80
+ value: 0.189408
81
+ - filter: language_model.model.layers.32.mlp.up_proj.weight
82
+ value: 0.172229
83
+ - filter: language_model.model.layers.33.mlp.down_proj.weight
84
+ value: 0.176549
85
+ - filter: language_model.model.layers.14.mlp.gate_proj.weight
86
+ value: 0.175944
87
+ - filter: language_model.model.layers.30.mlp.gate_proj.weight
88
+ value: 0.194776
89
+ - filter: language_model.model.layers.29.mlp.gate_proj.weight
90
+ value: 0.194269
91
+ - filter: language_model.model.layers.31.mlp.up_proj.weight
92
+ value: 0.174714
93
+ - filter: language_model.model.layers.32.mlp.down_proj.weight
94
+ value: 0.179092
95
+ - filter: language_model.model.layers.15.mlp.gate_proj.weight
96
+ value: 0.174089
97
+ - filter: language_model.model.layers.21.mlp.gate_proj.weight
98
+ value: 0.187243
99
+ - filter: language_model.model.layers.22.mlp.gate_proj.weight
100
+ value: 0.189419
101
+ - filter: language_model.model.layers.27.mlp.gate_proj.weight
102
+ value: 0.195916
103
+ - filter: language_model.model.layers.28.mlp.gate_proj.weight
104
+ value: 0.195916
105
+ - filter: language_model.model.layers.16.mlp.gate_proj.weight
106
+ value: 0.173457
107
+ - filter: language_model.model.layers.2.mlp.gate_proj.weight
108
+ value: 0.191059
109
+ - filter: language_model.model.layers.20.mlp.gate_proj.weight
110
+ value: 0.184494
111
+ - filter: language_model.model.layers.26.mlp.gate_proj.weight
112
+ value: 0.19541
113
+ - filter: language_model.model.layers.30.mlp.up_proj.weight
114
+ value: 0.175673
115
+ - filter: language_model.model.layers.18.mlp.gate_proj.weight
116
+ value: 0.179492
117
+ - filter: language_model.model.layers.19.mlp.gate_proj.weight
118
+ value: 0.181707
119
+ - filter: language_model.model.layers.23.mlp.gate_proj.weight
120
+ value: 0.192713
121
+ - filter: language_model.model.layers.24.mlp.gate_proj.weight
122
+ value: 0.194899
123
+ - filter: language_model.model.layers.25.mlp.gate_proj.weight
124
+ value: 0.194899
125
+ - filter: language_model.model.layers.29.mlp.up_proj.weight
126
+ value: 0.177272
127
+ - filter: language_model.model.layers.31.mlp.down_proj.weight
128
+ value: 0.181707
129
+ - filter: language_model.model.layers.17.mlp.gate_proj.weight
130
+ value: 0.176651
131
+ - filter: language_model.model.layers.28.mlp.up_proj.weight
132
+ value: 0.176651
133
+ - filter: language_model.model.layers.26.mlp.up_proj.weight
134
+ value: 0.179905
135
+ - filter: language_model.model.layers.27.mlp.up_proj.weight
136
+ value: 0.179905
137
+ - filter: language_model.model.layers.30.mlp.down_proj.weight
138
+ value: 0.182155
139
+ - filter: language_model.model.layers.1.mlp.gate_proj.weight
140
+ value: 0.195027
141
+ - filter: language_model.model.layers.23.mlp.up_proj.weight
142
+ value: 0.186066
143
+ - filter: language_model.model.layers.24.mlp.up_proj.weight
144
+ value: 0.183812
145
+ - filter: language_model.model.layers.25.mlp.up_proj.weight
146
+ value: 0.183812
147
+ - filter: language_model.model.layers.15.mlp.up_proj.weight
148
+ value: 0.19674
149
+ - filter: language_model.model.layers.16.mlp.up_proj.weight
150
+ value: 0.198974
151
+ - filter: language_model.model.layers.17.mlp.up_proj.weight
152
+ value: 0.201202
153
+ - filter: language_model.model.layers.21.mlp.up_proj.weight
154
+ value: 0.187747
155
+ - filter: language_model.model.layers.22.mlp.up_proj.weight
156
+ value: 0.185485
157
+ - filter: language_model.model.layers.0.mlp.gate_proj.weight
158
+ value: 0.191707
159
+ - filter: language_model.model.layers.14.mlp.up_proj.weight
160
+ value: 0.193966
161
+ - filter: language_model.model.layers.18.mlp.up_proj.weight
162
+ value: 0.200708
163
+ - filter: language_model.model.layers.19.mlp.up_proj.weight
164
+ value: 0.19622
165
+ - filter: language_model.model.layers.20.mlp.up_proj.weight
166
+ value: 0.191707
167
+ - filter: language_model.model.layers.29.mlp.down_proj.weight
168
+ value: 0.187172
169
+ - filter: language_model.model.layers.10.mlp.up_proj.weight
170
+ value: 0.195161
171
+ - filter: language_model.model.layers.9.mlp.up_proj.weight
172
+ value: 0.192881
173
+ - filter: language_model.model.layers.12.mlp.up_proj.weight
174
+ value: 0.194623
175
+ - filter: language_model.model.layers.13.mlp.up_proj.weight
176
+ value: 0.194623
177
+ - filter: language_model.model.layers.28.mlp.down_proj.weight
178
+ value: 0.192328
179
+ - filter: language_model.model.layers.8.mlp.up_proj.weight
180
+ value: 0.187722
181
+ - filter: language_model.model.layers.5.mlp.up_proj.weight
182
+ value: 0.182478
183
+ - filter: language_model.model.layers.11.mlp.up_proj.weight
184
+ value: 0.195844
185
+ - filter: language_model.model.layers.15.mlp.down_proj.weight
186
+ value: 0.202757
187
+ - filter: language_model.model.layers.27.mlp.down_proj.weight
188
+ value: 0.193527
189
+ - filter: language_model.model.layers.3.mlp.up_proj.weight
190
+ value: 0.179504
191
+ - filter: language_model.model.layers.14.mlp.down_proj.weight
192
+ value: 0.199945
193
+ - filter: language_model.model.layers.16.mlp.down_proj.weight
194
+ value: 0.206862
195
+ - filter: language_model.model.layers.2.mlp.up_proj.weight
196
+ value: 0.188289
197
+ - filter: language_model.model.layers.4.mlp.up_proj.weight
198
+ value: 0.181225
199
+ - filter: language_model.model.layers.6.mlp.up_proj.weight
200
+ value: 0.18594
201
+ - filter: language_model.model.layers.7.mlp.up_proj.weight
202
+ value: 0.190633
203
+ - filter: language_model.model.layers.26.mlp.down_proj.weight
204
+ value: 0.197091
205
+ - filter: language_model.model.layers.0.mlp.up_proj.weight
206
+ value: 0.191835
207
+ - filter: language_model.model.layers.8.mlp.down_proj.weight
208
+ value: 0.19655
209
+ - filter: language_model.model.layers.9.mlp.down_proj.weight
210
+ value: 0.198898
211
+ - filter: language_model.model.layers.1.mlp.up_proj.weight
212
+ value: 0.193633
213
+ - filter: language_model.model.layers.17.mlp.down_proj.weight
214
+ value: 0.212397
215
+ - filter: language_model.model.layers.10.mlp.down_proj.weight
216
+ value: 0.204918
217
+ - filter: language_model.model.layers.12.mlp.down_proj.weight
218
+ value: 0.202561
219
+ - filter: language_model.model.layers.13.mlp.down_proj.weight
220
+ value: 0.204918
221
+ - filter: language_model.model.layers.2.mlp.down_proj.weight
222
+ value: 0.195448
223
+ - filter: language_model.model.layers.21.mlp.down_proj.weight
224
+ value: 0.202561
225
+ - filter: language_model.model.layers.22.mlp.down_proj.weight
226
+ value: 0.202561
227
+ - filter: language_model.model.layers.25.mlp.down_proj.weight
228
+ value: 0.202561
229
+ - filter: language_model.model.layers.3.mlp.down_proj.weight
230
+ value: 0.190673
231
+ - filter: language_model.model.layers.23.mlp.down_proj.weight
232
+ value: 0.206783
233
+ - filter: language_model.model.layers.5.mlp.down_proj.weight
234
+ value: 0.194886
235
+ - filter: language_model.model.layers.18.mlp.down_proj.weight
236
+ value: 0.213388
237
+ - filter: language_model.model.layers.20.mlp.down_proj.weight
238
+ value: 0.208664
239
+ - filter: language_model.model.layers.24.mlp.down_proj.weight
240
+ value: 0.206291
241
+ - filter: language_model.model.layers.11.mlp.down_proj.weight
242
+ value: 0.205793
243
+ - filter: language_model.model.layers.19.mlp.down_proj.weight
244
+ value: 0.215302
245
+ - filter: language_model.model.layers.4.mlp.down_proj.weight
246
+ value: 0.193743
247
+ - filter: language_model.model.layers.6.mlp.down_proj.weight
248
+ value: 0.196167
249
+ - filter: language_model.model.layers.7.mlp.down_proj.weight
250
+ value: 0.200995
251
+ - filter: language_model.model.layers.1.mlp.down_proj.weight
252
+ value: 0.202878
253
+ - filter: language_model.model.layers.0.mlp.down_proj.weight
254
+ value: 0.216568
255
+ - filter: language_model.model.layers.39.self_attn.o_proj.weight
256
+ value: 0.125786
257
+ - filter: language_model.model.layers.38.self_attn.o_proj.weight
258
+ value: 0.168001
259
+ - filter: language_model.model.layers.39.self_attn.q_proj.weight
260
+ value: 0.104322
261
+ - filter: language_model.model.layers.32.self_attn.q_proj.weight
262
+ value: 0.142943
263
+ - filter: language_model.model.layers.35.self_attn.q_proj.weight
264
+ value: 0.135335
265
+ - filter: language_model.model.layers.27.self_attn.q_proj.weight
266
+ value: 0.155682
267
+ - filter: language_model.model.layers.28.self_attn.q_proj.weight
268
+ value: 0.160908
269
+ - filter: language_model.model.layers.36.self_attn.o_proj.weight
270
+ value: 0.169581
271
+ - filter: language_model.model.layers.30.self_attn.q_proj.weight
272
+ value: 0.153355
273
+ - filter: language_model.model.layers.33.self_attn.q_proj.weight
274
+ value: 0.14809
275
+ - filter: language_model.model.layers.37.self_attn.o_proj.weight
276
+ value: 0.181167
277
+ - filter: language_model.model.layers.29.self_attn.q_proj.weight
278
+ value: 0.151579
279
+ - filter: language_model.model.layers.35.self_attn.o_proj.weight
280
+ value: 0.172772
281
+ - filter: language_model.model.layers.5.self_attn.q_proj.weight
282
+ value: 0.20046
283
+ - filter: language_model.model.layers.7.self_attn.q_proj.weight
284
+ value: 0.19533
285
+ - filter: language_model.model.layers.36.self_attn.q_proj.weight
286
+ value: 0.131309
287
+ - filter: language_model.model.layers.6.self_attn.q_proj.weight
288
+ value: 0.208596
289
+ - filter: language_model.model.layers.25.self_attn.q_proj.weight
290
+ value: 0.169983
291
+ - filter: language_model.model.layers.31.self_attn.q_proj.weight
292
+ value: 0.159312
293
+ - filter: language_model.model.layers.2.self_attn.q_proj.weight
294
+ value: 0.226469
295
+ - filter: language_model.model.layers.22.self_attn.q_proj.weight
296
+ value: 0.173013
297
+ - filter: language_model.model.layers.26.self_attn.q_proj.weight
298
+ value: 0.169458
299
+ - filter: language_model.model.layers.34.self_attn.q_proj.weight
300
+ value: 0.138944
301
+ - filter: language_model.model.layers.38.self_attn.q_proj.weight
302
+ value: 0.120914
303
+ - filter: language_model.model.layers.4.self_attn.q_proj.weight
304
+ value: 0.206842
305
+ - filter: language_model.model.layers.3.self_attn.q_proj.weight
306
+ value: 0.218577
307
+ - filter: language_model.model.layers.24.self_attn.q_proj.weight
308
+ value: 0.17765
309
+ - filter: language_model.model.layers.13.self_attn.q_proj.weight
310
+ value: 0.19292
311
+ - filter: language_model.model.layers.32.self_attn.o_proj.weight
312
+ value: 0.167494
313
+ - filter: language_model.model.layers.8.self_attn.q_proj.weight
314
+ value: 0.203614
315
+ - filter: language_model.model.layers.9.self_attn.q_proj.weight
316
+ value: 0.201451
317
+ - filter: language_model.model.layers.33.self_attn.o_proj.weight
318
+ value: 0.162669
319
+ - filter: language_model.model.layers.34.self_attn.o_proj.weight
320
+ value: 0.190237
321
+ - filter: language_model.model.layers.21.self_attn.q_proj.weight
322
+ value: 0.191609
323
+ - filter: language_model.model.layers.12.self_attn.q_proj.weight
324
+ value: 0.196632
325
+ - filter: language_model.model.layers.23.self_attn.q_proj.weight
326
+ value: 0.180131
327
+ - filter: language_model.model.layers.11.self_attn.q_proj.weight
328
+ value: 0.207093
329
+ - filter: language_model.model.layers.37.self_attn.q_proj.weight
330
+ value: 0.132486
331
+ - filter: language_model.model.layers.10.self_attn.q_proj.weight
332
+ value: 0.209974
333
+ - filter: language_model.model.layers.30.self_attn.o_proj.weight
334
+ value: 0.159707
335
+ - filter: language_model.model.layers.14.self_attn.q_proj.weight
336
+ value: 0.202298
337
+ - filter: language_model.model.layers.15.self_attn.q_proj.weight
338
+ value: 0.204133
339
+ - filter: language_model.model.layers.20.self_attn.q_proj.weight
340
+ value: 0.196769
341
+ - filter: language_model.model.layers.18.self_attn.q_proj.weight
342
+ value: 0.212189
343
+ - filter: language_model.model.layers.19.self_attn.q_proj.weight
344
+ value: 0.196912
345
+ - filter: language_model.model.layers.1.self_attn.q_proj.weight
346
+ value: 0.257896
347
+ - filter: language_model.model.layers.16.self_attn.q_proj.weight
348
+ value: 0.212969
349
+ - filter: language_model.model.layers.29.self_attn.o_proj.weight
350
+ value: 0.174884
351
+ - filter: language_model.model.layers.31.self_attn.o_proj.weight
352
+ value: 0.17181
353
+ - filter: language_model.model.layers.17.self_attn.q_proj.weight
354
+ value: 0.211513
355
+ - filter: language_model.model.layers.28.self_attn.o_proj.weight
356
+ value: 0.166682
357
+ - filter: language_model.model.layers.27.self_attn.o_proj.weight
358
+ value: 0.168077
359
+ - filter: language_model.model.layers.26.self_attn.o_proj.weight
360
+ value: 0.173479
361
+ - filter: language_model.model.layers.0.self_attn.q_proj.weight
362
+ value: 0.257296
363
+ - filter: language_model.model.layers.25.self_attn.o_proj.weight
364
+ value: 0.186181
365
+ - filter: language_model.model.layers.22.self_attn.o_proj.weight
366
+ value: 0.198762
367
+ - filter: language_model.model.layers.24.self_attn.o_proj.weight
368
+ value: 0.196575
369
+ - filter: language_model.model.layers.23.self_attn.o_proj.weight
370
+ value: 0.196065
371
+ - filter: language_model.model.layers.21.self_attn.o_proj.weight
372
+ value: 0.204071
373
+ - filter: language_model.model.layers.2.self_attn.o_proj.weight
374
+ value: 0.172557
375
+ - filter: language_model.model.layers.3.self_attn.o_proj.weight
376
+ value: 0.158307
377
+ - filter: language_model.model.layers.4.self_attn.o_proj.weight
378
+ value: 0.181612
379
+ - filter: language_model.model.layers.20.self_attn.o_proj.weight
380
+ value: 0.21766
381
+ - filter: language_model.model.layers.5.self_attn.o_proj.weight
382
+ value: 0.17041
383
+ - filter: language_model.model.layers.19.self_attn.o_proj.weight
384
+ value: 0.233288
385
+ - filter: language_model.model.layers.0.self_attn.o_proj.weight
386
+ value: 0.126586
387
+ - filter: language_model.model.layers.6.self_attn.o_proj.weight
388
+ value: 0.187595
389
+ - filter: language_model.model.layers.7.self_attn.o_proj.weight
390
+ value: 0.20661
391
+ - filter: language_model.model.layers.1.self_attn.o_proj.weight
392
+ value: 0.17396
393
+ - filter: language_model.model.layers.14.self_attn.o_proj.weight
394
+ value: 0.216113
395
+ - filter: language_model.model.layers.8.self_attn.o_proj.weight
396
+ value: 0.21012
397
+ - filter: language_model.model.layers.12.self_attn.o_proj.weight
398
+ value: 0.239032
399
+ - filter: language_model.model.layers.18.self_attn.o_proj.weight
400
+ value: 0.239032
401
+ - filter: language_model.model.layers.9.self_attn.o_proj.weight
402
+ value: 0.234053
403
+ - filter: language_model.model.layers.13.self_attn.o_proj.weight
404
+ value: 0.235535
405
+ - filter: language_model.model.layers.11.self_attn.o_proj.weight
406
+ value: 0.235188
407
+ - filter: language_model.model.layers.15.self_attn.o_proj.weight
408
+ value: 0.240286
409
+ - filter: language_model.model.layers.38.self_attn.v_proj.weight
410
+ value: 0.245061
411
+ - filter: language_model.model.layers.17.self_attn.o_proj.weight
412
+ value: 0.259885
413
+ - filter: language_model.model.layers.39.self_attn.v_proj.weight
414
+ value: 0.174974
415
+ - filter: language_model.model.layers.10.self_attn.o_proj.weight
416
+ value: 0.24449
417
+ - filter: language_model.model.layers.16.self_attn.o_proj.weight
418
+ value: 0.247054
419
+ - filter: language_model.model.layers.0.self_attn.k_proj.weight
420
+ value: 0.263433
421
+ - filter: language_model.model.layers.37.self_attn.v_proj.weight
422
+ value: 0.251838
423
+ - filter: language_model.model.layers.36.self_attn.v_proj.weight
424
+ value: 0.229054
425
+ - filter: language_model.model.layers.2.self_attn.k_proj.weight
426
+ value: 0.272851
427
+ - filter: language_model.model.layers.34.self_attn.v_proj.weight
428
+ value: 0.237781
429
+ - filter: language_model.model.layers.35.self_attn.v_proj.weight
430
+ value: 0.234624
431
+ - filter: language_model.model.layers.39.self_attn.k_proj.weight
432
+ value: 0.131171
433
+ - filter: language_model.model.layers.4.self_attn.k_proj.weight
434
+ value: 0.283584
435
+ - filter: language_model.model.layers.6.self_attn.k_proj.weight
436
+ value: 0.27769
437
+ - filter: language_model.model.layers.5.self_attn.k_proj.weight
438
+ value: 0.271602
439
+ - filter: language_model.model.layers.7.self_attn.k_proj.weight
440
+ value: 0.262444
441
+ - filter: language_model.model.layers.3.self_attn.k_proj.weight
442
+ value: 0.287982
443
+ - filter: language_model.model.layers.1.self_attn.k_proj.weight
444
+ value: 0.322508
445
+ - filter: language_model.model.layers.8.self_attn.k_proj.weight
446
+ value: 0.27421
447
+ - filter: language_model.model.layers.13.self_attn.k_proj.weight
448
+ value: 0.264598
449
+ - filter: language_model.model.layers.9.self_attn.k_proj.weight
450
+ value: 0.270955
451
+ - filter: language_model.model.layers.28.self_attn.k_proj.weight
452
+ value: 0.225454
453
+ - filter: language_model.model.layers.32.self_attn.v_proj.weight
454
+ value: 0.223712
455
+ - filter: language_model.model.layers.10.self_attn.k_proj.weight
456
+ value: 0.280152
457
+ - filter: language_model.model.layers.11.self_attn.k_proj.weight
458
+ value: 0.281748
459
+ - filter: language_model.model.layers.12.self_attn.k_proj.weight
460
+ value: 0.268835
461
+ - filter: language_model.model.layers.15.self_attn.k_proj.weight
462
+ value: 0.278551
463
+ - filter: language_model.model.layers.30.self_attn.k_proj.weight
464
+ value: 0.224603
465
+ - filter: language_model.model.layers.25.self_attn.k_proj.weight
466
+ value: 0.246963
467
+ - filter: language_model.model.layers.27.self_attn.k_proj.weight
468
+ value: 0.233124
469
+ - filter: language_model.model.layers.35.self_attn.k_proj.weight
470
+ value: 0.189342
471
+ - filter: language_model.model.layers.14.self_attn.k_proj.weight
472
+ value: 0.275182
473
+ - filter: language_model.model.layers.26.self_attn.k_proj.weight
474
+ value: 0.239651
475
+ - filter: language_model.model.layers.29.self_attn.k_proj.weight
476
+ value: 0.212543
477
+ - filter: language_model.model.layers.31.self_attn.k_proj.weight
478
+ value: 0.224937
479
+ - filter: language_model.model.layers.22.self_attn.k_proj.weight
480
+ value: 0.247783
481
+ - filter: language_model.model.layers.32.self_attn.k_proj.weight
482
+ value: 0.200668
483
+ - filter: language_model.model.layers.33.self_attn.k_proj.weight
484
+ value: 0.215173
485
+ - filter: language_model.model.layers.24.self_attn.k_proj.weight
486
+ value: 0.24565
487
+ - filter: language_model.model.layers.29.self_attn.v_proj.weight
488
+ value: 0.220374
489
+ - filter: language_model.model.layers.33.self_attn.v_proj.weight
490
+ value: 0.227699
491
+ - filter: language_model.model.layers.18.self_attn.k_proj.weight
492
+ value: 0.2765
493
+ - filter: language_model.model.layers.36.self_attn.k_proj.weight
494
+ value: 0.17181
495
+ - filter: language_model.model.layers.16.self_attn.k_proj.weight
496
+ value: 0.289809
497
+ - filter: language_model.model.layers.21.self_attn.k_proj.weight
498
+ value: 0.266098
499
+ - filter: language_model.model.layers.23.self_attn.k_proj.weight
500
+ value: 0.246661
501
+ - filter: language_model.model.layers.20.self_attn.k_proj.weight
502
+ value: 0.266
503
+ - filter: language_model.model.layers.31.self_attn.v_proj.weight
504
+ value: 0.219631
505
+ - filter: language_model.model.layers.30.self_attn.v_proj.weight
506
+ value: 0.217368
507
+ - filter: language_model.model.layers.34.self_attn.k_proj.weight
508
+ value: 0.171745
509
+ - filter: language_model.model.layers.17.self_attn.k_proj.weight
510
+ value: 0.281311
511
+ - filter: language_model.model.layers.19.self_attn.k_proj.weight
512
+ value: 0.258162
513
+ - filter: language_model.model.layers.38.self_attn.k_proj.weight
514
+ value: 0.147819
515
+ - filter: language_model.model.layers.28.self_attn.v_proj.weight
516
+ value: 0.212052
517
+ - filter: language_model.model.layers.27.self_attn.v_proj.weight
518
+ value: 0.220392
519
+ - filter: language_model.model.layers.37.self_attn.k_proj.weight
520
+ value: 0.177511
521
+ - filter: language_model.model.layers.26.self_attn.v_proj.weight
522
+ value: 0.223488
523
+ - filter: language_model.model.layers.22.self_attn.v_proj.weight
524
+ value: 0.242903
525
+ - filter: language_model.model.layers.24.self_attn.v_proj.weight
526
+ value: 0.235792
527
+ - filter: language_model.model.layers.25.self_attn.v_proj.weight
528
+ value: 0.231352
529
+ - filter: language_model.model.layers.23.self_attn.v_proj.weight
530
+ value: 0.235188
531
+ - filter: language_model.model.layers.2.self_attn.v_proj.weight
532
+ value: 0.168169
533
+ - filter: language_model.model.layers.5.self_attn.v_proj.weight
534
+ value: 0.165651
535
+ - filter: language_model.model.layers.3.self_attn.v_proj.weight
536
+ value: 0.151622
537
+ - filter: language_model.model.layers.4.self_attn.v_proj.weight
538
+ value: 0.187611
539
+ - filter: language_model.model.layers.20.self_attn.v_proj.weight
540
+ value: 0.224346
541
+ - filter: language_model.model.layers.19.self_attn.v_proj.weight
542
+ value: 0.245325
543
+ - filter: language_model.model.layers.21.self_attn.v_proj.weight
544
+ value: 0.22807
545
+ - filter: language_model.model.layers.6.self_attn.v_proj.weight
546
+ value: 0.18168
547
+ - filter: language_model.model.layers.7.self_attn.v_proj.weight
548
+ value: 0.204672
549
+ - filter: language_model.model.layers.17.self_attn.v_proj.weight
550
+ value: 0.243401
551
+ - filter: language_model.model.layers.8.self_attn.v_proj.weight
552
+ value: 0.19022
553
+ - filter: language_model.model.layers.12.self_attn.v_proj.weight
554
+ value: 0.232988
555
+ - filter: language_model.model.layers.9.self_attn.v_proj.weight
556
+ value: 0.230431
557
+ - filter: language_model.model.layers.14.self_attn.v_proj.weight
558
+ value: 0.205856
559
+ - filter: language_model.model.layers.1.self_attn.v_proj.weight
560
+ value: 0.168527
561
+ - filter: language_model.model.layers.10.self_attn.v_proj.weight
562
+ value: 0.228984
563
+ - filter: language_model.model.layers.13.self_attn.v_proj.weight
564
+ value: 0.219502
565
+ - filter: language_model.model.layers.15.self_attn.v_proj.weight
566
+ value: 0.226354
567
+ - filter: language_model.model.layers.16.self_attn.v_proj.weight
568
+ value: 0.23457
569
+ - filter: language_model.model.layers.18.self_attn.v_proj.weight
570
+ value: 0.250678
571
+ - filter: language_model.model.layers.11.self_attn.v_proj.weight
572
+ value: 0.22923
573
+ - filter: language_model.model.layers.0.self_attn.v_proj.weight
574
+ value: 0.134132
575
+ - filter: language_model.model.layers.34.post_attention_layernorm.weight
576
+ value: 0.883075
577
+ - filter: language_model.model.layers.33.post_attention_layernorm.weight
578
+ value: 0.880867
579
+ - filter: language_model.model.layers.35.post_attention_layernorm.weight
580
+ value: 0.888537
581
+ - filter: language_model.model.layers.38.post_attention_layernorm.weight
582
+ value: 0.895727
583
+ - filter: language_model.model.layers.31.post_attention_layernorm.weight
584
+ value: 0.880202
585
+ - filter: language_model.model.layers.32.post_attention_layernorm.weight
586
+ value: 0.882647
587
+ - filter: language_model.model.layers.36.post_attention_layernorm.weight
588
+ value: 0.895084
589
+ - filter: language_model.model.layers.37.post_attention_layernorm.weight
590
+ value: 0.896463
591
+ - filter: language_model.model.layers.30.post_attention_layernorm.weight
592
+ value: 0.884693
593
+ - filter: language_model.model.layers.29.post_attention_layernorm.weight
594
+ value: 0.886562
595
+ - filter: language_model.model.layers.28.post_attention_layernorm.weight
596
+ value: 0.883506
597
+ - filter: language_model.model.layers.27.post_attention_layernorm.weight
598
+ value: 0.885518
599
+ - filter: language_model.model.layers.30.input_layernorm.weight
600
+ value: 0.900142
601
+ - filter: language_model.model.layers.26.post_attention_layernorm.weight
602
+ value: 0.888298
603
+ - filter: language_model.model.layers.38.input_layernorm.weight
604
+ value: 0.918705
605
+ - filter: language_model.model.layers.25.post_attention_layernorm.weight
606
+ value: 0.887788
607
+ - filter: language_model.model.layers.11.post_attention_layernorm.weight
608
+ value: 0.819136
609
+ - filter: language_model.model.layers.8.input_layernorm.weight
610
+ value: 0.827776
611
+ - filter: language_model.model.layers.12.post_attention_layernorm.weight
612
+ value: 0.829306
613
+ - filter: language_model.model.layers.17.post_attention_layernorm.weight
614
+ value: 0.844334
615
+ - filter: language_model.model.layers.24.post_attention_layernorm.weight
616
+ value: 0.88749
617
+ - filter: language_model.model.layers.23.post_attention_layernorm.weight
618
+ value: 0.882598
619
+ - filter: language_model.model.layers.18.post_attention_layernorm.weight
620
+ value: 0.855345
621
+ - filter: language_model.model.layers.16.post_attention_layernorm.weight
622
+ value: 0.842616
623
+ - filter: language_model.model.layers.27.input_layernorm.weight
624
+ value: 0.906137
625
+ - filter: language_model.model.layers.13.post_attention_layernorm.weight
626
+ value: 0.838716
627
+ - filter: language_model.model.layers.26.input_layernorm.weight
628
+ value: 0.901367
629
+ - filter: language_model.model.layers.10.post_attention_layernorm.weight
630
+ value: 0.815316
631
+ - filter: language_model.model.layers.14.input_layernorm.weight
632
+ value: 0.879509
633
+ - filter: language_model.model.layers.15.post_attention_layernorm.weight
634
+ value: 0.843714
635
+ - filter: language_model.model.layers.19.post_attention_layernorm.weight
636
+ value: 0.865915
637
+ - filter: language_model.model.layers.22.post_attention_layernorm.weight
638
+ value: 0.885566
639
+ - filter: language_model.model.layers.28.input_layernorm.weight
640
+ value: 0.902543
641
+ - filter: language_model.model.layers.5.input_layernorm.weight
642
+ value: 0.810565
643
+ - filter: language_model.model.layers.6.post_attention_layernorm.weight
644
+ value: 0.796305
645
+ - filter: language_model.model.layers.7.post_attention_layernorm.weight
646
+ value: 0.806038
647
+ - filter: language_model.model.layers.9.post_attention_layernorm.weight
648
+ value: 0.815692
649
+ - filter: language_model.model.layers.21.input_layernorm.weight
650
+ value: 0.891671
651
+ - filter: language_model.model.layers.6.input_layernorm.weight
652
+ value: 0.811936
653
+ - filter: language_model.model.layers.7.input_layernorm.weight
654
+ value: 0.83771
655
+ - filter: language_model.model.layers.14.post_attention_layernorm.weight
656
+ value: 0.852636
657
+ - filter: language_model.model.layers.21.post_attention_layernorm.weight
658
+ value: 0.886217
659
+ - filter: language_model.model.layers.20.post_attention_layernorm.weight
660
+ value: 0.880705
661
+ - filter: language_model.model.layers.3.input_layernorm.weight
662
+ value: 0.816342
663
+ - filter: language_model.model.layers.10.input_layernorm.weight
664
+ value: 0.863017
665
+ - filter: language_model.model.layers.8.post_attention_layernorm.weight
666
+ value: 0.820931
667
+ - filter: language_model.model.layers.1.input_layernorm.weight
668
+ value: 0.771411
669
+ - filter: language_model.model.layers.31.input_layernorm.weight
670
+ value: 0.910857
671
+ - filter: language_model.model.layers.37.input_layernorm.weight
672
+ value: 0.922558
673
+ - filter: language_model.model.layers.5.post_attention_layernorm.weight
674
+ value: 0.794374
675
+ - filter: language_model.model.layers.25.input_layernorm.weight
676
+ value: 0.9114
677
+ - filter: language_model.model.layers.35.input_layernorm.weight
678
+ value: 0.923684
679
+ - filter: language_model.model.layers.33.input_layernorm.weight
680
+ value: 0.919435
681
+ - filter: language_model.model.layers.16.input_layernorm.weight
682
+ value: 0.892982
683
+ - filter: language_model.model.layers.32.input_layernorm.weight
684
+ value: 0.921119
685
+ - filter: language_model.model.layers.4.post_attention_layernorm.weight
686
+ value: 0.784228
687
+ - filter: language_model.model.layers.9.input_layernorm.weight
688
+ value: 0.868681
689
+ - filter: language_model.model.layers.11.input_layernorm.weight
690
+ value: 0.884768
691
+ - filter: language_model.model.layers.36.input_layernorm.weight
692
+ value: 0.927183
693
+ - filter: language_model.model.layers.20.input_layernorm.weight
694
+ value: 0.912409
695
+ - filter: language_model.model.layers.29.input_layernorm.weight
696
+ value: 0.92094
697
+ - filter: language_model.model.layers.34.input_layernorm.weight
698
+ value: 0.924849
699
+ - filter: language_model.model.layers.15.input_layernorm.weight
700
+ value: 0.896905
701
+ - filter: language_model.model.layers.12.input_layernorm.weight
702
+ value: 0.891217
703
+ - filter: language_model.model.layers.13.input_layernorm.weight
704
+ value: 0.8992
705
+ - filter: language_model.model.layers.23.input_layernorm.weight
706
+ value: 0.913613
707
+ - filter: language_model.model.layers.18.input_layernorm.weight
708
+ value: 0.906696
709
+ - filter: language_model.model.layers.24.input_layernorm.weight
710
+ value: 0.91877
711
+ - filter: language_model.model.layers.3.post_attention_layernorm.weight
712
+ value: 0.786238
713
+ - filter: language_model.model.layers.2.post_attention_layernorm.weight
714
+ value: 0.777384
715
+ - filter: language_model.model.layers.39.input_layernorm.weight
716
+ value: 0.942583
717
+ - filter: language_model.model.layers.4.input_layernorm.weight
718
+ value: 0.83397
719
+ - filter: language_model.model.layers.22.input_layernorm.weight
720
+ value: 0.916358
721
+ - filter: language_model.model.layers.17.input_layernorm.weight
722
+ value: 0.91935
723
+ - filter: language_model.model.layers.39.post_attention_layernorm.weight
724
+ value: 0.955214
725
+ - filter: language_model.model.layers.19.input_layernorm.weight
726
+ value: 0.932588
727
+ - filter: language_model.model.layers.0.post_attention_layernorm.weight
728
+ value: 0.709289
729
+ - filter: language_model.model.layers.1.post_attention_layernorm.weight
730
+ value: 0.750784
731
+ - filter: language_model.model.layers.2.input_layernorm.weight
732
+ value: 0.851994
733
+ - filter: language_model.model.layers.0.input_layernorm.weight
734
+ value: 0.736801
735
+ - value: 0.0
736
+ tokenizer:
737
+ source: base
738
+ chat_template: pixtral12b
output.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d9e3a07c8a729696eb62746361eaa5327f6c6f1f095f6b771a94896616c73c
3
+ size 13701532624
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "PixtralImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "patch_size": {
18
+ "height": 16,
19
+ "width": 16
20
+ },
21
+ "processor_class": "PixtralProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "longest_edge": 1024
26
+ }
27
+ }
processor_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_break_token": "[IMG_BREAK]",
3
+ "image_end_token": "[IMG_END]",
4
+ "image_token": "[IMG]",
5
+ "patch_size": 16,
6
+ "processor_class": "PixtralProcessor"
7
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84f33e6f52b2833e8cc17229af8eea363f640a898f19a48184a2c7f6f5a88337
3
+ size 17077329
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff