shahidul034 commited on
Commit
8004ade
·
1 Parent(s): e8a8de1

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +7 -1
  2. tokenizer_config.json +3 -0
special_tokens_map.json CHANGED
@@ -18,7 +18,13 @@
18
  "rstrip": false,
19
  "single_word": false
20
  },
21
- "pad_token": "</s>",
 
 
 
 
 
 
22
  "unk_token": {
23
  "content": "<unk>",
24
  "lstrip": false,
 
18
  "rstrip": false,
19
  "single_word": false
20
  },
21
+ "pad_token": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
  "unk_token": {
29
  "content": "<unk>",
30
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -37,12 +37,15 @@
37
  "clean_up_tokenization_spaces": false,
38
  "eos_token": "</s>",
39
  "legacy": true,
 
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "</s>",
42
  "sp_model_kwargs": {},
43
  "spaces_between_special_tokens": false,
 
44
  "tokenizer_class": "LlamaTokenizer",
45
  "truncation_side": "left",
 
46
  "unk_token": "<unk>",
47
  "use_default_system_prompt": true
48
  }
 
37
  "clean_up_tokenization_spaces": false,
38
  "eos_token": "</s>",
39
  "legacy": true,
40
+ "max_length": 1024,
41
  "model_max_length": 1000000000000000019884624838656,
42
  "pad_token": "</s>",
43
  "sp_model_kwargs": {},
44
  "spaces_between_special_tokens": false,
45
+ "stride": 0,
46
  "tokenizer_class": "LlamaTokenizer",
47
  "truncation_side": "left",
48
+ "truncation_strategy": "longest_first",
49
  "unk_token": "<unk>",
50
  "use_default_system_prompt": true
51
  }