Upload tokenizer
Browse files- tokenizer.json +6 -15
- tokenizer_config.json +4 -12
- vocab.json +0 -0
tokenizer.json
CHANGED
@@ -167,21 +167,21 @@
|
|
167 |
},
|
168 |
{
|
169 |
"id": 100274,
|
170 |
-
"content": "<|
|
171 |
"single_word": false,
|
172 |
"lstrip": false,
|
173 |
"rstrip": false,
|
174 |
"normalized": false,
|
175 |
-
"special":
|
176 |
},
|
177 |
{
|
178 |
"id": 100275,
|
179 |
-
"content": "<|
|
180 |
"single_word": false,
|
181 |
"lstrip": false,
|
182 |
"rstrip": false,
|
183 |
"normalized": false,
|
184 |
-
"special":
|
185 |
},
|
186 |
{
|
187 |
"id": 100276,
|
@@ -200,15 +200,6 @@
|
|
200 |
"rstrip": false,
|
201 |
"normalized": false,
|
202 |
"special": true
|
203 |
-
},
|
204 |
-
{
|
205 |
-
"id": 100278,
|
206 |
-
"content": "<|repo_name|>",
|
207 |
-
"single_word": false,
|
208 |
-
"lstrip": false,
|
209 |
-
"rstrip": false,
|
210 |
-
"normalized": false,
|
211 |
-
"special": true
|
212 |
}
|
213 |
],
|
214 |
"normalizer": null,
|
@@ -100530,8 +100521,8 @@
|
|
100530 |
"<|extra_id_6|>": 100271,
|
100531 |
"<|extra_id_7|>": 100272,
|
100532 |
"<|extra_id_8|>": 100273,
|
100533 |
-
"<|
|
100534 |
-
"<|
|
100535 |
"<|endofprompt|>": 100276,
|
100536 |
"<|pad|>": 100277
|
100537 |
},
|
|
|
167 |
},
|
168 |
{
|
169 |
"id": 100274,
|
170 |
+
"content": "<|repo_name|>",
|
171 |
"single_word": false,
|
172 |
"lstrip": false,
|
173 |
"rstrip": false,
|
174 |
"normalized": false,
|
175 |
+
"special": true
|
176 |
},
|
177 |
{
|
178 |
"id": 100275,
|
179 |
+
"content": "<|file_sep|>",
|
180 |
"single_word": false,
|
181 |
"lstrip": false,
|
182 |
"rstrip": false,
|
183 |
"normalized": false,
|
184 |
+
"special": true
|
185 |
},
|
186 |
{
|
187 |
"id": 100276,
|
|
|
200 |
"rstrip": false,
|
201 |
"normalized": false,
|
202 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
}
|
204 |
],
|
205 |
"normalizer": null,
|
|
|
100521 |
"<|extra_id_6|>": 100271,
|
100522 |
"<|extra_id_7|>": 100272,
|
100523 |
"<|extra_id_8|>": 100273,
|
100524 |
+
"<|repo_name|>": 100274,
|
100525 |
+
"<|file_sep|>": 100275,
|
100526 |
"<|endofprompt|>": 100276,
|
100527 |
"<|pad|>": 100277
|
100528 |
},
|
tokenizer_config.json
CHANGED
@@ -146,20 +146,20 @@
|
|
146 |
"special": false
|
147 |
},
|
148 |
"100274": {
|
149 |
-
"content": "<|
|
150 |
"lstrip": false,
|
151 |
"normalized": false,
|
152 |
"rstrip": false,
|
153 |
"single_word": false,
|
154 |
-
"special":
|
155 |
},
|
156 |
"100275": {
|
157 |
-
"content": "<|
|
158 |
"lstrip": false,
|
159 |
"normalized": false,
|
160 |
"rstrip": false,
|
161 |
"single_word": false,
|
162 |
-
"special":
|
163 |
},
|
164 |
"100276": {
|
165 |
"content": "<|endofprompt|>",
|
@@ -176,14 +176,6 @@
|
|
176 |
"rstrip": false,
|
177 |
"single_word": false,
|
178 |
"special": true
|
179 |
-
},
|
180 |
-
"100278": {
|
181 |
-
"content": "<|repo_name|>",
|
182 |
-
"lstrip": false,
|
183 |
-
"normalized": false,
|
184 |
-
"rstrip": false,
|
185 |
-
"single_word": false,
|
186 |
-
"special": true
|
187 |
}
|
188 |
},
|
189 |
"bos_token": "<|endoftext|>",
|
|
|
146 |
"special": false
|
147 |
},
|
148 |
"100274": {
|
149 |
+
"content": "<|repo_name|>",
|
150 |
"lstrip": false,
|
151 |
"normalized": false,
|
152 |
"rstrip": false,
|
153 |
"single_word": false,
|
154 |
+
"special": true
|
155 |
},
|
156 |
"100275": {
|
157 |
+
"content": "<|file_sep|>",
|
158 |
"lstrip": false,
|
159 |
"normalized": false,
|
160 |
"rstrip": false,
|
161 |
"single_word": false,
|
162 |
+
"special": true
|
163 |
},
|
164 |
"100276": {
|
165 |
"content": "<|endofprompt|>",
|
|
|
176 |
"rstrip": false,
|
177 |
"single_word": false,
|
178 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
}
|
180 |
},
|
181 |
"bos_token": "<|endoftext|>",
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|