`tokenizer_config.json` Fixes (#2)
Browse files- `tokenizer_config.json` Fixes (a56c230bff16ef7ce3a10012858b692ad99c67a9)
- more tokenizer_config fixes (be52112a7484741ab83a995e224c8a6209a25774)
- tokenizer_config.json +5 -2
tokenizer_config.json
CHANGED
@@ -2050,13 +2050,16 @@
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
|
|
2053 |
"clean_up_tokenization_spaces": true,
|
2054 |
-
"eos_token": "<|
|
2055 |
"extra_special_tokens": {},
|
2056 |
"model_input_names": [
|
2057 |
"input_ids",
|
2058 |
"attention_mask"
|
2059 |
],
|
2060 |
-
"model_max_length":
|
|
|
|
|
2061 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2062 |
}
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{%- for message in messages %}\n {%- if loop.index0 == 0 %}\n {{- bos_token }}\n {%- endif %}\n {%- if message['role'] == 'system' %}\n {{- '<|start_header_id|>Metadata<|end_header_id|>\n\n' + message['content'] + eos_token }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {{- '<|start_header_id|>Japanese<|end_header_id|>\n\n' + message['content'] + eos_token }}\n {%- endif %}\n {%- if message['role'] == 'assistant' %}\n {{- '<|start_header_id|>English<|end_header_id|>\n\n' + message['content'] + eos_token }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>English<|end_header_id|>\n\n' }}\n{%- endif %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
+
"eos_token": "<|eot_id|>",
|
2056 |
"extra_special_tokens": {},
|
2057 |
"model_input_names": [
|
2058 |
"input_ids",
|
2059 |
"attention_mask"
|
2060 |
],
|
2061 |
+
"model_max_length": 8192,
|
2062 |
+
"pad_token": "<|end_of_text|>",
|
2063 |
+
"padding_side": "left",
|
2064 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2065 |
}
|