fxmarty
/

llama-tiny-w-int8-per-tensor

fxmarty commited on Oct 9, 2024

Commit

f38bba4

verified ·

1 Parent(s): 93fb75c

Upload folder using huggingface_hub

Files changed (1) hide show

config.json CHANGED Viewed

@@ -21,38 +21,42 @@
   "pad_token_id": -1,
   "pretraining_tp": 1,
   "quantization_config": {
-    "algo_config": null,
-    "exclude": [
-      "lm_head"
-    ],
-    "export": {
-      "kv_cache_group": [],
-      "pack_method": "reorder",
-      "weight_format": "real_quantized",
-      "weight_merge_groups": null
-    },
-    "global_quant_config": {
-      "bias": null,
-      "input_tensors": null,
-      "output_tensors": null,
-      "target_device": null,
-      "weight": {
-        "ch_axis": null,
-        "dtype": "int8",
-        "group_size": null,
-        "is_dynamic": false,
-        "observer_cls": "PerTensorMinMaxObserver",
-        "qscheme": "per_tensor",
-        "round_method": "half_even",
-        "scale_type": "float",
-        "symmetric": true
-      }
-    },
-    "layer_quant_config": {},
-    "layer_type_quant_config": {},
-    "pack_method": "reorder",
     "quant_method": "quark",
-    "quant_mode": 1
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,

   "pad_token_id": -1,
   "pretraining_tp": 1,
   "quantization_config": {
+    "library": "quark",
     "quant_method": "quark",
+    "quark_config": {
+      "algo_config": null,
+      "exclude": [
+        "lm_head"
+      ],
+      "export": {
+        "kv_cache_group": [],
+        "pack_method": "reorder",
+        "weight_format": "real_quantized",
+        "weight_merge_groups": null
+      },
+      "global_quant_config": {
+        "bias": null,
+        "input_tensors": null,
+        "output_tensors": null,
+        "target_device": null,
+        "weight": {
+          "ch_axis": null,
+          "dtype": "int8",
+          "group_size": null,
+          "is_dynamic": false,
+          "observer_cls": "PerTensorMinMaxObserver",
+          "qscheme": "per_tensor",
+          "round_method": "half_even",
+          "scale_type": "float",
+          "symmetric": true
+        }
+      },
+      "layer_quant_config": {},
+      "layer_type_quant_config": {},
+      "pack_method": "reorder",
+      "quant_method": "quark",
+      "quant_mode": 1
+    }
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,