Create the tokenizer.json properly (with TemplateProcessing included).

#75
by Narsil HF staff - opened
Files changed (1) hide show
  1. tokenizer.json +31 -2
tokenizer.json CHANGED
@@ -128,7 +128,7 @@
128
  "rstrip": true,
129
  "normalized": false,
130
  "special": true
131
- }
132
  ],
133
  "normalizer": {
134
  "type": "Sequence",
@@ -150,6 +150,12 @@
150
  "post_processor": {
151
  "type": "TemplateProcessing",
152
  "single": [
 
 
 
 
 
 
153
  {
154
  "Sequence": {
155
  "id": "A",
@@ -158,12 +164,24 @@
158
  }
159
  ],
160
  "pair": [
 
 
 
 
 
 
161
  {
162
  "Sequence": {
163
  "id": "A",
164
  "type_id": 0
165
  }
166
  },
 
 
 
 
 
 
167
  {
168
  "Sequence": {
169
  "id": "B",
@@ -171,7 +189,17 @@
171
  }
172
  }
173
  ],
174
- "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
175
  },
176
  "decoder": {
177
  "type": "Sequence",
@@ -205,6 +233,7 @@
205
  "end_of_word_suffix": null,
206
  "fuse_unk": true,
207
  "byte_fallback": true,
 
208
  "vocab": {
209
  "<unk>": 0,
210
  "<s>": 1,
 
128
  "rstrip": true,
129
  "normalized": false,
130
  "special": true
131
+ }
132
  ],
133
  "normalizer": {
134
  "type": "Sequence",
 
150
  "post_processor": {
151
  "type": "TemplateProcessing",
152
  "single": [
153
+ {
154
+ "SpecialToken": {
155
+ "id": "<s>",
156
+ "type_id": 0
157
+ }
158
+ },
159
  {
160
  "Sequence": {
161
  "id": "A",
 
164
  }
165
  ],
166
  "pair": [
167
+ {
168
+ "SpecialToken": {
169
+ "id": "<s>",
170
+ "type_id": 0
171
+ }
172
+ },
173
  {
174
  "Sequence": {
175
  "id": "A",
176
  "type_id": 0
177
  }
178
  },
179
+ {
180
+ "SpecialToken": {
181
+ "id": "<s>",
182
+ "type_id": 1
183
+ }
184
+ },
185
  {
186
  "Sequence": {
187
  "id": "B",
 
189
  }
190
  }
191
  ],
192
+ "special_tokens": {
193
+ "<s>": {
194
+ "id": "<s>",
195
+ "ids": [
196
+ 1
197
+ ],
198
+ "tokens": [
199
+ "<s>"
200
+ ]
201
+ }
202
+ }
203
  },
204
  "decoder": {
205
  "type": "Sequence",
 
233
  "end_of_word_suffix": null,
234
  "fuse_unk": true,
235
  "byte_fallback": true,
236
+ "ignore_merges": false,
237
  "vocab": {
238
  "<unk>": 0,
239
  "<s>": 1,