abhinavkulkarni commited on
Commit
76f4923
1 Parent(s): ca0e6b8

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -5,5 +5,6 @@
5
  ],
6
  "bos_token": "<|endoftext|>",
7
  "eos_token": "<|endoftext|>",
 
8
  "unk_token": "<|endoftext|>"
9
  }
 
5
  ],
6
  "bos_token": "<|endoftext|>",
7
  "eos_token": "<|endoftext|>",
8
+ "pad_token": "<|endoftext|>",
9
  "unk_token": "<|endoftext|>"
10
  }
tokenizer.json CHANGED
@@ -1,7 +1,14 @@
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Left",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 0,
9
+ "pad_type_id": 0,
10
+ "pad_token": "<|endoftext|>"
11
+ },
12
  "added_tokens": [
13
  {
14
  "id": 0,
tokenizer_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "bos_token": "<|endoftext|>",
4
  "clean_up_tokenization_spaces": true,
5
  "eos_token": "<|endoftext|>",
6
- "model_max_length": 1000000000000000019884624838656,
7
  "tokenizer_class": "GPTNeoXTokenizer",
8
  "unk_token": "<|endoftext|>"
9
  }
 
3
  "bos_token": "<|endoftext|>",
4
  "clean_up_tokenization_spaces": true,
5
  "eos_token": "<|endoftext|>",
6
+ "model_max_length": 8192,
7
  "tokenizer_class": "GPTNeoXTokenizer",
8
  "unk_token": "<|endoftext|>"
9
  }