tmberooney
commited on
Commit
•
19eb1b8
1
Parent(s):
0293e78
Upload tokenizer
Browse files- tokenizer.json +1 -0
- tokenizer.model +3 -0
- tokenizer_config.json +3 -0
tokenizer.json
CHANGED
@@ -146,6 +146,7 @@
|
|
146 |
"end_of_word_suffix": null,
|
147 |
"fuse_unk": true,
|
148 |
"byte_fallback": true,
|
|
|
149 |
"vocab": {
|
150 |
"<unk>": 0,
|
151 |
"<s>": 1,
|
|
|
146 |
"end_of_word_suffix": null,
|
147 |
"fuse_unk": true,
|
148 |
"byte_fallback": true,
|
149 |
+
"ignore_merges": false,
|
150 |
"vocab": {
|
151 |
"<unk>": 0,
|
152 |
"<s>": 1,
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
tokenizer_config.json
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
{
|
|
|
|
|
|
|
2 |
"added_tokens_decoder": {
|
3 |
"0": {
|
4 |
"content": "<unk>",
|
|
|
1 |
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
7 |
"content": "<unk>",
|