soldni commited on
Commit
e4c5bf9
·
verified ·
1 Parent(s): e51d11c

Upload tokenizer

Browse files
Files changed (3) hide show
  1. tokenizer.json +6 -15
  2. tokenizer_config.json +4 -12
  3. vocab.json +0 -0
tokenizer.json CHANGED
@@ -167,21 +167,21 @@
167
  },
168
  {
169
  "id": 100274,
170
- "content": "<|extra_id_9|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
  "normalized": false,
175
- "special": false
176
  },
177
  {
178
  "id": 100275,
179
- "content": "<|extra_id_10|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
  "normalized": false,
184
- "special": false
185
  },
186
  {
187
  "id": 100276,
@@ -200,15 +200,6 @@
200
  "rstrip": false,
201
  "normalized": false,
202
  "special": true
203
- },
204
- {
205
- "id": 100278,
206
- "content": "<|repo_name|>",
207
- "single_word": false,
208
- "lstrip": false,
209
- "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  }
213
  ],
214
  "normalizer": null,
@@ -100530,8 +100521,8 @@
100530
  "<|extra_id_6|>": 100271,
100531
  "<|extra_id_7|>": 100272,
100532
  "<|extra_id_8|>": 100273,
100533
- "<|extra_id_9|>": 100274,
100534
- "<|extra_id_10|>": 100275,
100535
  "<|endofprompt|>": 100276,
100536
  "<|pad|>": 100277
100537
  },
 
167
  },
168
  {
169
  "id": 100274,
170
+ "content": "<|repo_name|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
  "normalized": false,
175
+ "special": true
176
  },
177
  {
178
  "id": 100275,
179
+ "content": "<|file_sep|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
  "normalized": false,
184
+ "special": true
185
  },
186
  {
187
  "id": 100276,
 
200
  "rstrip": false,
201
  "normalized": false,
202
  "special": true
 
 
 
 
 
 
 
 
 
203
  }
204
  ],
205
  "normalizer": null,
 
100521
  "<|extra_id_6|>": 100271,
100522
  "<|extra_id_7|>": 100272,
100523
  "<|extra_id_8|>": 100273,
100524
+ "<|repo_name|>": 100274,
100525
+ "<|file_sep|>": 100275,
100526
  "<|endofprompt|>": 100276,
100527
  "<|pad|>": 100277
100528
  },
tokenizer_config.json CHANGED
@@ -146,20 +146,20 @@
146
  "special": false
147
  },
148
  "100274": {
149
- "content": "<|extra_id_9|>",
150
  "lstrip": false,
151
  "normalized": false,
152
  "rstrip": false,
153
  "single_word": false,
154
- "special": false
155
  },
156
  "100275": {
157
- "content": "<|extra_id_10|>",
158
  "lstrip": false,
159
  "normalized": false,
160
  "rstrip": false,
161
  "single_word": false,
162
- "special": false
163
  },
164
  "100276": {
165
  "content": "<|endofprompt|>",
@@ -176,14 +176,6 @@
176
  "rstrip": false,
177
  "single_word": false,
178
  "special": true
179
- },
180
- "100278": {
181
- "content": "<|repo_name|>",
182
- "lstrip": false,
183
- "normalized": false,
184
- "rstrip": false,
185
- "single_word": false,
186
- "special": true
187
  }
188
  },
189
  "bos_token": "<|endoftext|>",
 
146
  "special": false
147
  },
148
  "100274": {
149
+ "content": "<|repo_name|>",
150
  "lstrip": false,
151
  "normalized": false,
152
  "rstrip": false,
153
  "single_word": false,
154
+ "special": true
155
  },
156
  "100275": {
157
+ "content": "<|file_sep|>",
158
  "lstrip": false,
159
  "normalized": false,
160
  "rstrip": false,
161
  "single_word": false,
162
+ "special": true
163
  },
164
  "100276": {
165
  "content": "<|endofprompt|>",
 
176
  "rstrip": false,
177
  "single_word": false,
178
  "special": true
 
 
 
 
 
 
 
 
179
  }
180
  },
181
  "bos_token": "<|endoftext|>",
vocab.json CHANGED
The diff for this file is too large to render. See raw diff