ryo0634 commited on
Commit
a647f24
1 Parent(s): 451df20

use LlamaTokenizer

Browse files
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
spiece.model → tokenizer.model RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e2fccdac2794faadad487a5bf76018f693c71361ba7eb088bcaf9fe9ee90c23
3
- size 1831863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008293028e1a9d9a1038d9b63d989a2319797dfeaa03f171093a57b33a3a8277
3
+ size 1831879
tokenizer_config.json CHANGED
@@ -12,8 +12,32 @@
12
  "padding_side": "left",
13
  "sp_model_kwargs": {},
14
  "special_tokens_map_file": null,
15
- "tokenizer_class": "T5Tokenizer",
16
  "added_tokens_decoder": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "7": {
18
  "content": "<|system|>",
19
  "lstrip": false,
@@ -102,5 +126,10 @@
102
  "single_word": false,
103
  "special": false
104
  }
105
- }
 
 
 
 
 
106
  }
 
12
  "padding_side": "left",
13
  "sp_model_kwargs": {},
14
  "special_tokens_map_file": null,
15
+ "tokenizer_class": "LlamaTokenizer",
16
  "added_tokens_decoder": {
17
+ "4": {
18
+ "content": "<sep>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false,
23
+ "special": false
24
+ },
25
+ "5": {
26
+ "content": "<mask>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false,
31
+ "special": false
32
+ },
33
+ "6": {
34
+ "content": "<cls>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false,
39
+ "special": false
40
+ },
41
  "7": {
42
  "content": "<|system|>",
43
  "lstrip": false,
 
126
  "single_word": false,
127
  "special": false
128
  }
129
+ },
130
+ "add_prefix_space": false,
131
+ "add_dummy_prefix_space": false,
132
+ "legacy": false,
133
+ "add_bos_token": false,
134
+ "add_eos_token": true
135
  }