osanseviero pcuenq HF staff commited on
Commit
d1ff2ed
β€’
1 Parent(s): 5c0e18b

Add <step> token, chat template (#5)

Browse files

- Add <step> token, chat template (0a378c1610f3711b26a521ecf070c7acb2caeb08)


Co-authored-by: Pedro Cuenca <[email protected]>

Files changed (3) hide show
  1. added_tokens.json +3 -0
  2. tokenizer.json +11 -3
  3. tokenizer_config.json +9 -0
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<step>": 32015
3
+ }
tokenizer.json CHANGED
@@ -29,6 +29,15 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
@@ -32149,8 +32158,7 @@
32149
  "▁<EOT><EOT>": 32011,
32150
  "▁<EOT><EOT><EOT>": 32012,
32151
  "▁<EOT><EOT><EOT><EOT>": 32013,
32152
- "▁<EOT><EOT><EOT><EOT><EOT>": 32014,
32153
- "<step>": 32015
32154
  },
32155
  "merges": [
32156
  "▁ t",
@@ -93415,4 +93423,4 @@
93415
  "▁<EOT >"
93416
  ]
93417
  }
93418
- }
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 32015,
35
+ "content": "<step>",
36
+ "single_word": true,
37
+ "lstrip": true,
38
+ "rstrip": true,
39
+ "normalized": false,
40
+ "special": false
41
  }
42
  ],
43
  "normalizer": {
 
32158
  "▁<EOT><EOT>": 32011,
32159
  "▁<EOT><EOT><EOT>": 32012,
32160
  "▁<EOT><EOT><EOT><EOT>": 32013,
32161
+ "▁<EOT><EOT><EOT><EOT><EOT>": 32014
 
32162
  },
32163
  "merges": [
32164
  "▁ t",
 
93423
  "▁<EOT >"
93424
  ]
93425
  }
93426
+ }
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -25,6 +26,14 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
28
  }
29
  },
30
  "bos_token": "<s>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set user_index = 1 %}{% else %}{% set user_index = 0 %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != ((loop.index0 + user_index) % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{{ '<s>' }}{% endif %}{% set content = 'Source: ' + message['role'] + '\n\n ' + message['content'].strip() %}{{ content + ' <step> ' }}{% endfor %}{{'Source: assistant\nDestination: user\n\n '}}",
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
29
+ },
30
+ "32015": {
31
+ "content": "<step>",
32
+ "lstrip": true,
33
+ "normalized": false,
34
+ "rstrip": true,
35
+ "single_word": true,
36
+ "special": false
37
  }
38
  },
39
  "bos_token": "<s>",