huseinzol05
commited on
Upload tokenizer
Browse files- added_tokens.json +1 -0
- tokenizer.json +9 -0
- tokenizer_config.json +9 -0
- vocab.json +0 -0
added_tokens.json
CHANGED
@@ -1595,6 +1595,7 @@
|
|
1595 |
"<|th|>": 50289,
|
1596 |
"<|tk|>": 50341,
|
1597 |
"<|tl|>": 50348,
|
|
|
1598 |
"<|transcribe|>": 50359,
|
1599 |
"<|translate|>": 50358,
|
1600 |
"<|tr|>": 50268,
|
|
|
1595 |
"<|th|>": 50289,
|
1596 |
"<|tk|>": 50341,
|
1597 |
"<|tl|>": 50348,
|
1598 |
+
"<|transcribeprecise|>": 51865,
|
1599 |
"<|transcribe|>": 50359,
|
1600 |
"<|translate|>": 50358,
|
1601 |
"<|tr|>": 50268,
|
tokenizer.json
CHANGED
@@ -14474,6 +14474,15 @@
|
|
14474 |
"rstrip": false,
|
14475 |
"normalized": true,
|
14476 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14477 |
}
|
14478 |
],
|
14479 |
"normalizer": null,
|
|
|
14474 |
"rstrip": false,
|
14475 |
"normalized": true,
|
14476 |
"special": false
|
14477 |
+
},
|
14478 |
+
{
|
14479 |
+
"id": 51865,
|
14480 |
+
"content": "<|transcribeprecise|>",
|
14481 |
+
"single_word": false,
|
14482 |
+
"lstrip": false,
|
14483 |
+
"rstrip": false,
|
14484 |
+
"normalized": true,
|
14485 |
+
"special": false
|
14486 |
}
|
14487 |
],
|
14488 |
"normalizer": null,
|
tokenizer_config.json
CHANGED
@@ -12865,6 +12865,14 @@
|
|
12865 |
"rstrip": false,
|
12866 |
"single_word": false,
|
12867 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12868 |
}
|
12869 |
},
|
12870 |
"additional_special_tokens": [
|
@@ -12980,6 +12988,7 @@
|
|
12980 |
"clean_up_tokenization_spaces": true,
|
12981 |
"eos_token": "<|endoftext|>",
|
12982 |
"errors": "replace",
|
|
|
12983 |
"model_max_length": 1024,
|
12984 |
"pad_token": "<|endoftext|>",
|
12985 |
"processor_class": "WhisperProcessor",
|
|
|
12865 |
"rstrip": false,
|
12866 |
"single_word": false,
|
12867 |
"special": false
|
12868 |
+
},
|
12869 |
+
"51865": {
|
12870 |
+
"content": "<|transcribeprecise|>",
|
12871 |
+
"lstrip": false,
|
12872 |
+
"normalized": true,
|
12873 |
+
"rstrip": false,
|
12874 |
+
"single_word": false,
|
12875 |
+
"special": false
|
12876 |
}
|
12877 |
},
|
12878 |
"additional_special_tokens": [
|
|
|
12988 |
"clean_up_tokenization_spaces": true,
|
12989 |
"eos_token": "<|endoftext|>",
|
12990 |
"errors": "replace",
|
12991 |
+
"extra_special_tokens": {},
|
12992 |
"model_max_length": 1024,
|
12993 |
"pad_token": "<|endoftext|>",
|
12994 |
"processor_class": "WhisperProcessor",
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|