ashwinmuthuraman commited on
Commit
a5cc1ac
1 Parent(s): bae8e3c

Upload tokenizer

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. special_tokens_map.json +1 -0
  3. tokenizer_config.json +1 -0
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
  license: apache-2.0
3
- base_model: distilbert/distilgpt2
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
  - eli5_category
 
8
  model-index:
9
  - name: my_awesome_eli5_clm-model
10
  results: []
 
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
  datasets:
6
  - eli5_category
7
+ base_model: distilbert/distilgpt2
8
  model-index:
9
  - name: my_awesome_eli5_clm-model
10
  results: []
special_tokens_map.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
 
4
  "unk_token": "<|endoftext|>"
5
  }
 
1
  {
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
  "unk_token": "<|endoftext|>"
6
  }
tokenizer_config.json CHANGED
@@ -14,6 +14,7 @@
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
16
  "model_max_length": 1024,
 
17
  "tokenizer_class": "GPT2Tokenizer",
18
  "unk_token": "<|endoftext|>"
19
  }
 
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
16
  "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
  "tokenizer_class": "GPT2Tokenizer",
19
  "unk_token": "<|endoftext|>"
20
  }