Xenova HF staff commited on
Commit
0cd880c
1 Parent(s): 47aeb14

Upload folder using huggingface_hub (#4)

Browse files

- Upload folder using huggingface_hub (440a8c3e273a79d5a14af38831fbc0b9340d856c)
- Update quantize_config.json (c650d273068d4865c87b26638a12080e3cd0d89e)

onnx/build_delay_pattern_mask.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84edae5bef3bfba3aad4f1e3e786dc120857cb8db136e3b9f68e4eb1a4cfeac7
3
+ size 35360
onnx/build_delay_pattern_mask_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5c988ded727c85d0c396b97cc7e79ffe5bb1a294f9c7f6e19df75d83d576d0b
3
+ size 35379
onnx/build_delay_pattern_mask_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4acc38a235d6c9499915095905bfd4b02bb4c62039f487c44084c63846d2059c
3
+ size 52053
onnx/build_delay_pattern_mask_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4997af816452ed9c9e1c9a7f8c02cd613e235e02a179a2a0f6f3a8b0fb628e0e
3
+ size 52092
onnx/build_delay_pattern_mask_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4997af816452ed9c9e1c9a7f8c02cd613e235e02a179a2a0f6f3a8b0fb628e0e
3
+ size 52092
onnx/build_delay_pattern_mask_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4997af816452ed9c9e1c9a7f8c02cd613e235e02a179a2a0f6f3a8b0fb628e0e
3
+ size 52092
onnx/decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8497084fd0d4f7733ae1fa481c6d39f5130e32eff27da717fa07c6e7deb1af71
3
+ size 1691136293
onnx/decoder_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f907283b2cc5dd902d8321f6f89013055efd471eb2d9dd145c6097f55e3390d6
3
+ size 275515156
onnx/decoder_model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50aebf05bf038e95a2ea6948f5bacf7865e36fca7e179fa5cd586f3d0ba48bae
3
+ size 846702678
onnx/decoder_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52ff6681bc2b74ef3326584cd9e74ff6949742ca61efaf5b41ada412e1b8919
3
+ size 424993274
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2cf8b839b2e48b480e264972931e9f6b7f7d6d1483fb00a03ad6c4bda64121
3
+ size 1692230645
onnx/decoder_model_merged_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b258352f011dbd2717a6cb838ceccf488daca5e818e19230c2d9443d0362a1c
3
+ size 276638222
onnx/decoder_model_merged_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9098bd6b5195626455db10198681b84b1f002033d53294d5f4df5b6faf1da365
3
+ size 848232814
onnx/decoder_model_merged_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0784dfa87ac4cba563c01a081f22841e782bdbe91ce751b5cc955f6c499c4c6
3
+ size 426777507
onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0784dfa87ac4cba563c01a081f22841e782bdbe91ce751b5cc955f6c499c4c6
3
+ size 426777507
onnx/decoder_model_merged_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b7ce852f6a9b5b4af113c6f3da41361dbdde62b5986b8da5f7f939646b498d9
3
+ size 426777627
onnx/decoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52ff6681bc2b74ef3326584cd9e74ff6949742ca61efaf5b41ada412e1b8919
3
+ size 424993274
onnx/decoder_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5315e5d595e184d470e36467df93dd86bcdf1bbe5b5f5c3530fa818bdd1a691c
3
+ size 424993394
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05501e600138f1d09220f994f924280f5b403aab503e6989acbf8ec3456e2637
3
+ size 1486479458
onnx/decoder_with_past_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7723ccfdb919a9c5a78a6f0db57d2769abf32d2c233ea6994c00ab1b52da5355
3
+ size 246569076
onnx/decoder_with_past_model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c32661a2843568bc569f8fc66d05e5b6c5fcf4bd2ec88db898277c38e5ab30f
3
+ size 744224406
onnx/decoder_with_past_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f079fa848c6a21ba458fee27968c7a165e0d56d4f1e81658d7b370f466e8449
3
+ size 373572413
onnx/decoder_with_past_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f079fa848c6a21ba458fee27968c7a165e0d56d4f1e81658d7b370f466e8449
3
+ size 373572413
onnx/decoder_with_past_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34151320cd6e680c391b16ffb6a22932675ac534bbffd1484769cb89d89519fb
3
+ size 373572507
onnx/encodec_decode.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:847918300a85c9a8f93333f482c5740e07c4e2599faed839bbec73fd0d588073
3
+ size 118056304
onnx/encodec_decode_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12cad95eb6dfd867cb75e2735a497beb38361a9a2e4dae385a4a51b9396ac235
3
+ size 118056323
onnx/encodec_decode_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10aa50de680d50e13e3cb85e358bad31427c98ce9ee9cd2596af6576620a7b9
3
+ size 59125087
onnx/encodec_decode_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49bc9dd34b782aa667bbf2d83f25b31ec19db39aadc55a61e4161736c98c99bf
3
+ size 59796619
onnx/encodec_decode_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a5d97ab43fba9152418343efd9e61fe1eb3d00175cf8e97b124f661ac77d54b
3
+ size 59796623
onnx/encodec_decode_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a5d97ab43fba9152418343efd9e61fe1eb3d00175cf8e97b124f661ac77d54b
3
+ size 59796623
onnx/text_encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7962c2408b03ebb37252e6d1a3179517e8c6127b9fda4e0beab84dfa09aab80
3
+ size 438689759
onnx/text_encoder_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef68265cc3aa0cd351f33de7a57785dfc8c85d1396d567ffb2426c937383ca0
3
+ size 146738081
onnx/text_encoder_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0fdd8b8b3edc877e1ecefcabe1cad24d8d299a92ea208b1d9c732f9b63753ed
3
+ size 219508053
onnx/text_encoder_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77494d927d25ea09e6abc8d566281b5b9cd58f39c5f89a7da60e157b0a711607
3
+ size 110027935
onnx/text_encoder_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77494d927d25ea09e6abc8d566281b5b9cd58f39c5f89a7da60e157b0a711607
3
+ size 110027935
onnx/text_encoder_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4473292627f012bce6714118bcf22095eb4dcbf85ba140b5cb10b51de0d04c5e
3
+ size 110027971
quantize_config.json CHANGED
@@ -1,6 +1,70 @@
1
  {
 
2
  "q8": {
3
  "per_model_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "text_encoder": {
5
  "op_types": [
6
  "Abs",
@@ -38,8 +102,11 @@
38
  "Cast",
39
  "Concat",
40
  "Constant",
 
41
  "Div",
 
42
  "Erf",
 
43
  "Gather",
44
  "MatMul",
45
  "Mul",
@@ -52,8 +119,101 @@
52
  "Sqrt",
53
  "Sub",
54
  "Transpose",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "Unsqueeze"
56
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  "weight_type": "QInt8"
58
  },
59
  "decoder_model": {
@@ -87,6 +247,65 @@
87
  ],
88
  "weight_type": "QInt8"
89
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  "encodec_decode": {
91
  "op_types": [
92
  "Add",
@@ -111,44 +330,66 @@
111
  "Transpose",
112
  "Unsqueeze"
113
  ],
114
- "weight_type": "QUInt8"
115
  },
116
- "encodec_encoder": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  "op_types": [
118
  "Add",
119
- "ArgMax",
120
  "Cast",
121
- "Ceil",
122
  "Concat",
123
- "ConcatFromSequence",
124
  "Constant",
125
  "ConstantOfShape",
126
- "Conv",
127
  "Div",
128
- "Elu",
129
  "Equal",
 
130
  "Expand",
131
  "Gather",
132
- "Identity",
133
  "If",
134
- "LSTM",
135
- "LessOrEqual",
136
- "Loop",
137
  "MatMul",
138
- "Max",
139
  "Mul",
140
- "Neg",
141
- "Pad",
142
  "Pow",
143
- "ReduceL2",
144
- "ReduceMin",
145
- "ReduceSum",
146
  "Reshape",
147
- "ScatterND",
148
- "SequenceEmpty",
149
- "SequenceInsert",
150
  "Shape",
151
  "Slice",
 
 
152
  "Squeeze",
153
  "Sub",
154
  "Transpose",
@@ -157,7 +398,7 @@
157
  ],
158
  "weight_type": "QUInt8"
159
  },
160
- "decoder_model_merged": {
161
  "op_types": [
162
  "Add",
163
  "Cast",
@@ -169,7 +410,6 @@
169
  "Erf",
170
  "Expand",
171
  "Gather",
172
- "If",
173
  "Less",
174
  "MatMul",
175
  "Mul",
@@ -187,10 +427,127 @@
187
  "Unsqueeze",
188
  "Where"
189
  ],
190
- "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  }
192
  },
193
  "per_channel": false,
194
  "reduce_range": false
 
 
 
 
195
  }
196
  }
 
1
  {
2
+ "fp16": {},
3
  "q8": {
4
  "per_model_config": {
5
+ "decoder_model_merged": {
6
+ "op_types": [
7
+ "Add",
8
+ "Cast",
9
+ "Concat",
10
+ "Constant",
11
+ "ConstantOfShape",
12
+ "Div",
13
+ "Equal",
14
+ "Erf",
15
+ "Expand",
16
+ "Gather",
17
+ "If",
18
+ "Less",
19
+ "MatMul",
20
+ "Mul",
21
+ "Pow",
22
+ "Range",
23
+ "ReduceMean",
24
+ "Reshape",
25
+ "Shape",
26
+ "Slice",
27
+ "Softmax",
28
+ "Sqrt",
29
+ "Squeeze",
30
+ "Sub",
31
+ "Transpose",
32
+ "Unsqueeze",
33
+ "Where"
34
+ ],
35
+ "weight_type": "QInt8"
36
+ },
37
+ "decoder_model": {
38
+ "op_types": [
39
+ "Add",
40
+ "Cast",
41
+ "Concat",
42
+ "Constant",
43
+ "ConstantOfShape",
44
+ "Div",
45
+ "Equal",
46
+ "Erf",
47
+ "Expand",
48
+ "Gather",
49
+ "Less",
50
+ "MatMul",
51
+ "Mul",
52
+ "Pow",
53
+ "Range",
54
+ "ReduceMean",
55
+ "Reshape",
56
+ "Shape",
57
+ "Slice",
58
+ "Softmax",
59
+ "Sqrt",
60
+ "Squeeze",
61
+ "Sub",
62
+ "Transpose",
63
+ "Unsqueeze",
64
+ "Where"
65
+ ],
66
+ "weight_type": "QInt8"
67
+ },
68
  "text_encoder": {
69
  "op_types": [
70
  "Abs",
 
102
  "Cast",
103
  "Concat",
104
  "Constant",
105
+ "ConstantOfShape",
106
  "Div",
107
+ "Equal",
108
  "Erf",
109
+ "Expand",
110
  "Gather",
111
  "MatMul",
112
  "Mul",
 
119
  "Sqrt",
120
  "Sub",
121
  "Transpose",
122
+ "Unsqueeze",
123
+ "Where"
124
+ ],
125
+ "weight_type": "QInt8"
126
+ },
127
+ "encodec_decode": {
128
+ "op_types": [
129
+ "Add",
130
+ "Cast",
131
+ "Ceil",
132
+ "Concat",
133
+ "Constant",
134
+ "ConstantOfShape",
135
+ "Conv",
136
+ "ConvTranspose",
137
+ "Div",
138
+ "Elu",
139
+ "Gather",
140
+ "LSTM",
141
+ "Pad",
142
+ "Reshape",
143
+ "Shape",
144
+ "Slice",
145
+ "Split",
146
+ "Squeeze",
147
+ "Sub",
148
+ "Transpose",
149
  "Unsqueeze"
150
  ],
151
+ "weight_type": "QUInt8"
152
+ },
153
+ "build_delay_pattern_mask": {
154
+ "op_types": [
155
+ "Add",
156
+ "Cast",
157
+ "Concat",
158
+ "Constant",
159
+ "ConstantOfShape",
160
+ "Equal",
161
+ "Expand",
162
+ "Gather",
163
+ "GreaterOrEqual",
164
+ "Mul",
165
+ "NonZero",
166
+ "Not",
167
+ "Range",
168
+ "ReduceMin",
169
+ "Reshape",
170
+ "ScatterND",
171
+ "Shape",
172
+ "Slice",
173
+ "Sub",
174
+ "Transpose",
175
+ "Trilu",
176
+ "Unsqueeze",
177
+ "Where"
178
+ ],
179
+ "weight_type": "QInt8"
180
+ }
181
+ },
182
+ "per_channel": false,
183
+ "reduce_range": false
184
+ },
185
+ "int8": {
186
+ "per_model_config": {
187
+ "decoder_model_merged": {
188
+ "op_types": [
189
+ "Add",
190
+ "Cast",
191
+ "Concat",
192
+ "Constant",
193
+ "ConstantOfShape",
194
+ "Div",
195
+ "Equal",
196
+ "Erf",
197
+ "Expand",
198
+ "Gather",
199
+ "If",
200
+ "Less",
201
+ "MatMul",
202
+ "Mul",
203
+ "Pow",
204
+ "Range",
205
+ "ReduceMean",
206
+ "Reshape",
207
+ "Shape",
208
+ "Slice",
209
+ "Softmax",
210
+ "Sqrt",
211
+ "Squeeze",
212
+ "Sub",
213
+ "Transpose",
214
+ "Unsqueeze",
215
+ "Where"
216
+ ],
217
  "weight_type": "QInt8"
218
  },
219
  "decoder_model": {
 
247
  ],
248
  "weight_type": "QInt8"
249
  },
250
+ "text_encoder": {
251
+ "op_types": [
252
+ "Abs",
253
+ "Add",
254
+ "Cast",
255
+ "Concat",
256
+ "Constant",
257
+ "ConstantOfShape",
258
+ "Div",
259
+ "Gather",
260
+ "Greater",
261
+ "Less",
262
+ "Log",
263
+ "MatMul",
264
+ "Min",
265
+ "Mul",
266
+ "Pow",
267
+ "Range",
268
+ "ReduceMean",
269
+ "Relu",
270
+ "Reshape",
271
+ "Shape",
272
+ "Softmax",
273
+ "Sqrt",
274
+ "Sub",
275
+ "Transpose",
276
+ "Unsqueeze",
277
+ "Where"
278
+ ],
279
+ "weight_type": "QInt8"
280
+ },
281
+ "decoder_with_past_model": {
282
+ "op_types": [
283
+ "Add",
284
+ "Cast",
285
+ "Concat",
286
+ "Constant",
287
+ "ConstantOfShape",
288
+ "Div",
289
+ "Equal",
290
+ "Erf",
291
+ "Expand",
292
+ "Gather",
293
+ "MatMul",
294
+ "Mul",
295
+ "Pow",
296
+ "Range",
297
+ "ReduceMean",
298
+ "Reshape",
299
+ "Shape",
300
+ "Softmax",
301
+ "Sqrt",
302
+ "Sub",
303
+ "Transpose",
304
+ "Unsqueeze",
305
+ "Where"
306
+ ],
307
+ "weight_type": "QInt8"
308
+ },
309
  "encodec_decode": {
310
  "op_types": [
311
  "Add",
 
330
  "Transpose",
331
  "Unsqueeze"
332
  ],
333
+ "weight_type": "QInt8"
334
  },
335
+ "build_delay_pattern_mask": {
336
+ "op_types": [
337
+ "Add",
338
+ "Cast",
339
+ "Concat",
340
+ "Constant",
341
+ "ConstantOfShape",
342
+ "Equal",
343
+ "Expand",
344
+ "Gather",
345
+ "GreaterOrEqual",
346
+ "Mul",
347
+ "NonZero",
348
+ "Not",
349
+ "Range",
350
+ "ReduceMin",
351
+ "Reshape",
352
+ "ScatterND",
353
+ "Shape",
354
+ "Slice",
355
+ "Sub",
356
+ "Transpose",
357
+ "Trilu",
358
+ "Unsqueeze",
359
+ "Where"
360
+ ],
361
+ "weight_type": "QInt8"
362
+ }
363
+ },
364
+ "per_channel": false,
365
+ "reduce_range": false
366
+ },
367
+ "uint8": {
368
+ "per_model_config": {
369
+ "decoder_model_merged": {
370
  "op_types": [
371
  "Add",
 
372
  "Cast",
 
373
  "Concat",
 
374
  "Constant",
375
  "ConstantOfShape",
 
376
  "Div",
 
377
  "Equal",
378
+ "Erf",
379
  "Expand",
380
  "Gather",
 
381
  "If",
382
+ "Less",
 
 
383
  "MatMul",
 
384
  "Mul",
 
 
385
  "Pow",
386
+ "Range",
387
+ "ReduceMean",
 
388
  "Reshape",
 
 
 
389
  "Shape",
390
  "Slice",
391
+ "Softmax",
392
+ "Sqrt",
393
  "Squeeze",
394
  "Sub",
395
  "Transpose",
 
398
  ],
399
  "weight_type": "QUInt8"
400
  },
401
+ "decoder_model": {
402
  "op_types": [
403
  "Add",
404
  "Cast",
 
410
  "Erf",
411
  "Expand",
412
  "Gather",
 
413
  "Less",
414
  "MatMul",
415
  "Mul",
 
427
  "Unsqueeze",
428
  "Where"
429
  ],
430
+ "weight_type": "QUInt8"
431
+ },
432
+ "text_encoder": {
433
+ "op_types": [
434
+ "Abs",
435
+ "Add",
436
+ "Cast",
437
+ "Concat",
438
+ "Constant",
439
+ "ConstantOfShape",
440
+ "Div",
441
+ "Gather",
442
+ "Greater",
443
+ "Less",
444
+ "Log",
445
+ "MatMul",
446
+ "Min",
447
+ "Mul",
448
+ "Pow",
449
+ "Range",
450
+ "ReduceMean",
451
+ "Relu",
452
+ "Reshape",
453
+ "Shape",
454
+ "Softmax",
455
+ "Sqrt",
456
+ "Sub",
457
+ "Transpose",
458
+ "Unsqueeze",
459
+ "Where"
460
+ ],
461
+ "weight_type": "QUInt8"
462
+ },
463
+ "decoder_with_past_model": {
464
+ "op_types": [
465
+ "Add",
466
+ "Cast",
467
+ "Concat",
468
+ "Constant",
469
+ "ConstantOfShape",
470
+ "Div",
471
+ "Equal",
472
+ "Erf",
473
+ "Expand",
474
+ "Gather",
475
+ "MatMul",
476
+ "Mul",
477
+ "Pow",
478
+ "Range",
479
+ "ReduceMean",
480
+ "Reshape",
481
+ "Shape",
482
+ "Softmax",
483
+ "Sqrt",
484
+ "Sub",
485
+ "Transpose",
486
+ "Unsqueeze",
487
+ "Where"
488
+ ],
489
+ "weight_type": "QUInt8"
490
+ },
491
+ "encodec_decode": {
492
+ "op_types": [
493
+ "Add",
494
+ "Cast",
495
+ "Ceil",
496
+ "Concat",
497
+ "Constant",
498
+ "ConstantOfShape",
499
+ "Conv",
500
+ "ConvTranspose",
501
+ "Div",
502
+ "Elu",
503
+ "Gather",
504
+ "LSTM",
505
+ "Pad",
506
+ "Reshape",
507
+ "Shape",
508
+ "Slice",
509
+ "Split",
510
+ "Squeeze",
511
+ "Sub",
512
+ "Transpose",
513
+ "Unsqueeze"
514
+ ],
515
+ "weight_type": "QUInt8"
516
+ },
517
+ "build_delay_pattern_mask": {
518
+ "op_types": [
519
+ "Add",
520
+ "Cast",
521
+ "Concat",
522
+ "Constant",
523
+ "ConstantOfShape",
524
+ "Equal",
525
+ "Expand",
526
+ "Gather",
527
+ "GreaterOrEqual",
528
+ "Mul",
529
+ "NonZero",
530
+ "Not",
531
+ "Range",
532
+ "ReduceMin",
533
+ "Reshape",
534
+ "ScatterND",
535
+ "Shape",
536
+ "Slice",
537
+ "Sub",
538
+ "Transpose",
539
+ "Trilu",
540
+ "Unsqueeze",
541
+ "Where"
542
+ ],
543
+ "weight_type": "QUInt8"
544
  }
545
  },
546
  "per_channel": false,
547
  "reduce_range": false
548
+ },
549
+ "bnb4": {
550
+ "block_size": 64,
551
+ "quant_type": 1
552
  }
553
  }