livinNector commited on
Commit
05da8df
·
1 Parent(s): 5d11267

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +1 -0
  2. tokenizer.json +9 -0
  3. tokenizer_config.json +2 -2
added_tokens.json CHANGED
@@ -18,6 +18,7 @@
18
  "6": 4019,
19
  "7": 4020,
20
  "8": 4021,
 
21
  ":": 4005,
22
  ";": 4006,
23
  "?": 4003
 
18
  "6": 4019,
19
  "7": 4020,
20
  "8": 4021,
21
+ "9": 4022,
22
  ":": 4005,
23
  ";": 4006,
24
  "?": 4003
tokenizer.json CHANGED
@@ -245,6 +245,15 @@
245
  "rstrip": false,
246
  "normalized": true,
247
  "special": false
 
 
 
 
 
 
 
 
 
248
  }
249
  ],
250
  "normalizer": {
 
245
  "rstrip": false,
246
  "normalized": true,
247
  "special": false
248
+ },
249
+ {
250
+ "id": 4022,
251
+ "content": "9",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": true,
256
+ "special": false
257
  }
258
  ],
259
  "normalizer": {
tokenizer_config.json CHANGED
@@ -1,13 +1,13 @@
1
  {
 
2
  "cls_token": "[CLS]",
3
  "do_lower_case": false,
4
  "mask_token": "[MASK]",
5
  "model_max_length": 512,
6
  "pad_token": "[PAD]",
7
  "sep_token": "[SEP]",
8
- "special_tokens_map_file": null,
9
  "strip_accents": null,
10
  "tokenize_chinese_chars": true,
11
- "tokenizer_class": "BertTokenizer",
12
  "unk_token": "[UNK]"
13
  }
 
1
  {
2
+ "clean_up_tokenization_spaces": true,
3
  "cls_token": "[CLS]",
4
  "do_lower_case": false,
5
  "mask_token": "[MASK]",
6
  "model_max_length": 512,
7
  "pad_token": "[PAD]",
8
  "sep_token": "[SEP]",
 
9
  "strip_accents": null,
10
  "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "DistilBertTokenizer",
12
  "unk_token": "[UNK]"
13
  }