raygx commited on
Commit
ea0b5b0
1 Parent(s): 047a25b

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +2 -2
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -18,8 +18,8 @@
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50001,
 
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": true,
22
+ "special": false
23
  },
24
  {
25
  "id": 50001,
tokenizer_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "bos_token": "<|endoftext|>",
4
  "clean_up_tokenization_spaces": true,
5
  "eos_token": "<|endoftext|>",
6
- "model_max_length": 1024,
7
  "tokenizer_class": "GPT2Tokenizer",
8
  "unk_token": "<|endoftext|>"
9
  }
 
3
  "bos_token": "<|endoftext|>",
4
  "clean_up_tokenization_spaces": true,
5
  "eos_token": "<|endoftext|>",
6
+ "model_max_length": 512,
7
  "tokenizer_class": "GPT2Tokenizer",
8
  "unk_token": "<|endoftext|>"
9
  }