Dogge
/

Tia-70B-RP

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

Dogge commited on Apr 20, 2024

Commit

7fb2ac5

·

verified ·

1 Parent(s): 92b3e4b

Upload 3 files

Files changed (3) hide show

config.json +13 -1
generation_config.json +1 -4
tokenizer_config.json +1 -1

config.json CHANGED Viewed

@@ -6,7 +6,7 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
-  "eos_token_id": 128001,
   "hidden_act": "silu",
   "hidden_size": 8192,
   "initializer_range": 0.02,
@@ -17,6 +17,18 @@
   "num_hidden_layers": 80,
   "num_key_value_heads": 8,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 500000.0,

   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
+  "eos_token_id": 128009,
   "hidden_act": "silu",
   "hidden_size": 8192,
   "initializer_range": 0.02,
   "num_hidden_layers": 80,
   "num_key_value_heads": 8,
   "pretraining_tp": 1,
+  "quantization_config": {
+    "bnb_4bit_compute_dtype": "bfloat16",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": true,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 500000.0,

generation_config.json CHANGED Viewed

@@ -1,9 +1,6 @@
 {
   "_from_model_config": true,
   "bos_token_id": 128000,
-  "eos_token_id": [
-    128001,
-    128009
-  ],
   "transformers_version": "4.40.0"
 }

 {
   "_from_model_config": true,
   "bos_token_id": 128000,
+  "eos_token_id": 128009,
   "transformers_version": "4.40.0"
 }

tokenizer_config.json CHANGED Viewed

@@ -2052,7 +2052,7 @@
   "bos_token": "<|begin_of_text|>",
   "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
-  "eos_token": "<|end_of_text|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"

   "bos_token": "<|begin_of_text|>",
   "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"