add tokenizer
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 42, "</s>": 43}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"ա": 1, "բ": 2, "գ": 3, "դ": 4, "ե": 5, "զ": 6, "է": 7, "ը": 8, "թ": 9, "ժ": 10, "ի": 11, "լ": 12, "խ": 13, "ծ": 14, "կ": 15, "հ": 16, "ձ": 17, "ղ": 18, "ճ": 19, "մ": 20, "յ": 21, "ն": 22, "շ": 23, "ո": 24, "չ": 25, "պ": 26, "ջ": 27, "ռ": 28, "ս": 29, "վ": 30, "տ": 31, "ր": 32, "ց": 33, "ւ": 34, "փ": 35, "ք": 36, "օ": 37, "ֆ": 38, "և": 39, "|": 0, "[UNK]": 40, "[PAD]": 41}
|