ashishsr commited on
Commit
60c296e
1 Parent(s): 5c224a5

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +34 -4
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -25,16 +27,44 @@
25
  "special": true
26
  }
27
  },
28
- "bos_token": "<s>",
 
 
 
 
 
 
 
29
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
30
  "clean_up_tokenization_spaces": false,
31
- "eos_token": "</s>",
 
 
 
 
 
 
 
32
  "legacy": false,
33
  "model_max_length": 2048,
34
- "pad_token": "</s>",
 
 
 
 
 
 
 
35
  "padding_side": "right",
36
  "sp_model_kwargs": {},
37
  "tokenizer_class": "LlamaTokenizer",
38
- "unk_token": "<unk>",
 
 
 
 
 
 
 
39
  "use_default_system_prompt": false
40
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
27
  "special": true
28
  }
29
  },
30
+ "bos_token": {
31
+ "__type": "AddedToken",
32
+ "content": "<s>",
33
+ "lstrip": false,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
39
  "clean_up_tokenization_spaces": false,
40
+ "eos_token": {
41
+ "__type": "AddedToken",
42
+ "content": "</s>",
43
+ "lstrip": false,
44
+ "normalized": true,
45
+ "rstrip": false,
46
+ "single_word": false
47
+ },
48
  "legacy": false,
49
  "model_max_length": 2048,
50
+ "pad_token": {
51
+ "__type": "AddedToken",
52
+ "content": "</s>",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false
57
+ },
58
  "padding_side": "right",
59
  "sp_model_kwargs": {},
60
  "tokenizer_class": "LlamaTokenizer",
61
+ "unk_token": {
62
+ "__type": "AddedToken",
63
+ "content": "<unk>",
64
+ "lstrip": false,
65
+ "normalized": true,
66
+ "rstrip": false,
67
+ "single_word": false
68
+ },
69
  "use_default_system_prompt": false
70
  }