Omartificial-Intelligence-Space commited on
Commit
396cfb2
·
verified ·
1 Parent(s): e302082

update tokenizer_config

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +5 -16
tokenizer_config.json CHANGED
@@ -27,20 +27,12 @@
27
  "3": {
28
  "content": "<unk>",
29
  "lstrip": false,
30
- "normalized": true,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "104": {
36
- "content": "[UNK]",
37
- "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  },
43
- "30526": {
44
  "content": "<mask>",
45
  "lstrip": true,
46
  "normalized": false,
@@ -52,21 +44,18 @@
52
  "bos_token": "<s>",
53
  "clean_up_tokenization_spaces": true,
54
  "cls_token": "<s>",
55
- "do_lower_case": true,
56
  "eos_token": "</s>",
57
  "mask_token": "<mask>",
58
- "max_length": 384,
59
- "model_max_length": 384,
60
  "pad_to_multiple_of": null,
61
  "pad_token": "<pad>",
62
  "pad_token_type_id": 0,
63
  "padding_side": "right",
64
  "sep_token": "</s>",
65
  "stride": 0,
66
- "strip_accents": null,
67
- "tokenize_chinese_chars": true,
68
- "tokenizer_class": "MPNetTokenizer",
69
  "truncation_side": "right",
70
  "truncation_strategy": "longest_first",
71
- "unk_token": "[UNK]"
72
  }
 
27
  "3": {
28
  "content": "<unk>",
29
  "lstrip": false,
 
 
 
 
 
 
 
 
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "250001": {
36
  "content": "<mask>",
37
  "lstrip": true,
38
  "normalized": false,
 
44
  "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
 
47
  "eos_token": "</s>",
48
  "mask_token": "<mask>",
49
+ "max_length": 128,
50
+ "model_max_length": 128,
51
  "pad_to_multiple_of": null,
52
  "pad_token": "<pad>",
53
  "pad_token_type_id": 0,
54
  "padding_side": "right",
55
  "sep_token": "</s>",
56
  "stride": 0,
57
+ "tokenizer_class": "XLMRobertaTokenizer",
 
 
58
  "truncation_side": "right",
59
  "truncation_strategy": "longest_first",
60
+ "unk_token": "<unk>"
61
  }