allenai
/

llama-3-tulu-2-8b

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Update tokenizer_config.json

#1

by hamishivi - opened Aug 5

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

Files changed (1) hide show

tokenizer_config.json +12 -3

tokenizer_config.json CHANGED Viewed

@@ -2047,9 +2047,18 @@
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<|begin_of_text|>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "model_input_names": [
@@ -2057,6 +2066,6 @@
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
-  "tokenizer_class": "PreTrainedTokenizerFast",
-  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
-}

       "rstrip": false,
       "single_word": false,
       "special": true
+    },
+    "128256": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
     }
   },
   "bos_token": "<|begin_of_text|>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "model_input_names": [
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}