tmberooney commited on
Commit
19eb1b8
1 Parent(s): 0293e78

Upload tokenizer

Browse files
Files changed (3) hide show
  1. tokenizer.json +1 -0
  2. tokenizer.model +3 -0
  3. tokenizer_config.json +3 -0
tokenizer.json CHANGED
@@ -146,6 +146,7 @@
146
  "end_of_word_suffix": null,
147
  "fuse_unk": true,
148
  "byte_fallback": true,
 
149
  "vocab": {
150
  "<unk>": 0,
151
  "<s>": 1,
 
146
  "end_of_word_suffix": null,
147
  "fuse_unk": true,
148
  "byte_fallback": true,
149
+ "ignore_merges": false,
150
  "vocab": {
151
  "<unk>": 0,
152
  "<s>": 1,
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",