yuxiang630 commited on
Commit
a5210a4
1 Parent(s): a245736

Upload tokenizer

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. tokenizer_config.json +1 -0
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
2
  license: other
3
- license_name: deepseek
4
  datasets:
5
  - ise-uiuc/Magicoder-OSS-Instruct-75K
6
- library_name: transformers
7
  pipeline_tag: text-generation
8
  ---
9
  # 🎩 Magicoder: Source Code Is All You Need
 
1
  ---
2
  license: other
3
+ library_name: transformers
4
  datasets:
5
  - ise-uiuc/Magicoder-OSS-Instruct-75K
6
+ license_name: deepseek
7
  pipeline_tag: text-generation
8
  ---
9
  # 🎩 Magicoder: Source Code Is All You Need
tokenizer_config.json CHANGED
@@ -180,6 +180,7 @@
180
  }
181
  },
182
  "bos_token": "<|begin▁of▁sentence|>",
 
183
  "clean_up_tokenization_spaces": false,
184
  "eos_token": "<|end▁of▁sentence|>",
185
  "legacy": true,
 
180
  }
181
  },
182
  "bos_token": "<|begin▁of▁sentence|>",
183
+ "chat_template": "{{bos_token}}{{'You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.\n\n'}}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{ raise_exception('System messages are not allowed in this template.') }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'@@ Instruction\n' + message['content'] + '\n\n'}}\n {%- else %}\n{{'@@ Response\n' + message['content'] + eos_token + '\n\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{{'@@ Response\n'}}",
184
  "clean_up_tokenization_spaces": false,
185
  "eos_token": "<|end▁of▁sentence|>",
186
  "legacy": true,