[WIP] Optimized q4f16 ONNX export

#3
by Xenova HF staff - opened
Files changed (2) hide show
  1. config.json +3 -0
  2. onnx/model_q4f16.onnx +2 -2
config.json CHANGED
@@ -25,6 +25,9 @@
25
  "tie_word_embeddings": true,
26
  "torch_dtype": "bfloat16",
27
  "transformers_version": "4.42.3",
 
 
 
28
  "use_cache": true,
29
  "vocab_size": 49152
30
  }
 
25
  "tie_word_embeddings": true,
26
  "torch_dtype": "bfloat16",
27
  "transformers_version": "4.42.3",
28
+ "transformers.js_config": {
29
+ "kv_cache_dtype": "float16"
30
+ },
31
  "use_cache": true,
32
  "vocab_size": 49152
33
  }
onnx/model_q4f16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1a788453e1393e8642f43ca729b7f2301ba61cc1f8ac1f1904c809869fc1ffb
3
- size 272513495
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc95f62ea740d675d75c0f263ecf467c950f4002d18428dce832cb2fd5705b9e
3
+ size 298430898