TheBloke
/

samantha-mistral-7B-GPTQ

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions Community

TheBloke commited on Sep 30, 2023

Commit

e7fed2b

·

1 Parent(s): f37ef95

GPTQ model commit

Files changed (1) hide show

config.json +21 -1

config.json CHANGED Viewed

@@ -10,6 +10,26 @@
     "initializer_range": 0.02,
     "intermediate_size": 14336,
     "max_position_embeddings": 32768,
     "model_type": "mistral",
     "num_attention_heads": 32,
     "num_hidden_layers": 32,
@@ -24,4 +44,4 @@
     "vocab_size": 32000,
     "pretraining_tp": 1,
     "pad_token_id": 0
-}

     "initializer_range": 0.02,
     "intermediate_size": 14336,
     "max_position_embeddings": 32768,
+    "quantization_config": {
+      "batch_size": 1,
+      "bits": 4,
+      "block_name_to_quantize": "model.layers",
+      "damp_percent": 0.1,
+      "desc_act": true,
+      "disable_exllama": false,
+      "group_size": 128,
+      "max_input_length": null,
+      "model_seqlen": 4096,
+      "module_name_preceding_first_block": [
+        "model.embed_tokens"
+      ],
+      "pad_token_id": null,
+      "quant_method": "gptq",
+      "sym": true,
+      "tokenizer": null,
+      "true_sequential": true,
+      "use_cuda_fp16": true
+    },
     "model_type": "mistral",
     "num_attention_heads": 32,
     "num_hidden_layers": 32,
     "vocab_size": 32000,
     "pretraining_tp": 1,
     "pad_token_id": 0
+}