huseinzol05 commited on
Commit
da625d6
·
verified ·
1 Parent(s): c6a235f

Upload tokenizer

Browse files
Files changed (4) hide show
  1. added_tokens.json +1 -0
  2. tokenizer.json +9 -0
  3. tokenizer_config.json +9 -0
  4. vocab.json +0 -0
added_tokens.json CHANGED
@@ -1595,6 +1595,7 @@
1595
  "<|th|>": 50289,
1596
  "<|tk|>": 50341,
1597
  "<|tl|>": 50348,
 
1598
  "<|transcribe|>": 50359,
1599
  "<|translate|>": 50358,
1600
  "<|tr|>": 50268,
 
1595
  "<|th|>": 50289,
1596
  "<|tk|>": 50341,
1597
  "<|tl|>": 50348,
1598
+ "<|transcribeprecise|>": 51865,
1599
  "<|transcribe|>": 50359,
1600
  "<|translate|>": 50358,
1601
  "<|tr|>": 50268,
tokenizer.json CHANGED
@@ -14474,6 +14474,15 @@
14474
  "rstrip": false,
14475
  "normalized": true,
14476
  "special": false
 
 
 
 
 
 
 
 
 
14477
  }
14478
  ],
14479
  "normalizer": null,
 
14474
  "rstrip": false,
14475
  "normalized": true,
14476
  "special": false
14477
+ },
14478
+ {
14479
+ "id": 51865,
14480
+ "content": "<|transcribeprecise|>",
14481
+ "single_word": false,
14482
+ "lstrip": false,
14483
+ "rstrip": false,
14484
+ "normalized": true,
14485
+ "special": false
14486
  }
14487
  ],
14488
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -12865,6 +12865,14 @@
12865
  "rstrip": false,
12866
  "single_word": false,
12867
  "special": false
 
 
 
 
 
 
 
 
12868
  }
12869
  },
12870
  "additional_special_tokens": [
@@ -12980,6 +12988,7 @@
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
 
12983
  "model_max_length": 1024,
12984
  "pad_token": "<|endoftext|>",
12985
  "processor_class": "WhisperProcessor",
 
12865
  "rstrip": false,
12866
  "single_word": false,
12867
  "special": false
12868
+ },
12869
+ "51865": {
12870
+ "content": "<|transcribeprecise|>",
12871
+ "lstrip": false,
12872
+ "normalized": true,
12873
+ "rstrip": false,
12874
+ "single_word": false,
12875
+ "special": false
12876
  }
12877
  },
12878
  "additional_special_tokens": [
 
12988
  "clean_up_tokenization_spaces": true,
12989
  "eos_token": "<|endoftext|>",
12990
  "errors": "replace",
12991
+ "extra_special_tokens": {},
12992
  "model_max_length": 1024,
12993
  "pad_token": "<|endoftext|>",
12994
  "processor_class": "WhisperProcessor",
vocab.json CHANGED
The diff for this file is too large to render. See raw diff