{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 1, "content": "a", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "E", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "e", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "Ɛ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "ɛ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "Ə", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "ə", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 8, "content": "I", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 9, "content": "i", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 10, "content": "O", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 11, "content": "o", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 12, "content": "Ɔ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 13, "content": "ɔ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 14, "content": "U", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 15, "content": "u", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 16, "content": "ã", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 17, "content": "ẽ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 18, "content": "ĩ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 19, "content": "õ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 20, "content": "ũ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 21, "content": "B", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 22, "content": "b", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 23, "content": "D", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 24, "content": "d", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 25, "content": "Đ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 26, "content": "ɖ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 27, "content": "F", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 28, "content": "f", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 29, "content": "Ƒ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 30, "content": "ƒ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 31, "content": "G", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 32, "content": "g", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 33, "content": "Ɣ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 34, "content": "ɣ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 35, "content": "H", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 36, "content": "h", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 37, "content": "K", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 38, "content": "k", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 39, "content": "L", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 40, "content": "l", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 41, "content": "M", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 42, "content": "m", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 43, "content": "N", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 44, "content": "n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 45, "content": "Ŋ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 46, "content": "ŋ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 47, "content": "P", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 48, "content": "p", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 49, "content": "R", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 50, "content": "r", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 51, "content": "S", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 52, "content": "s", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 53, "content": "T", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 54, "content": "t", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 55, "content": "V", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 56, "content": "v", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 57, "content": "Ʋ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 58, "content": "ʋ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 59, "content": "W", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 60, "content": "w", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 61, "content": "X", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 62, "content": "x", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 63, "content": "Y", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 64, "content": "y", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 65, "content": "Z", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 66, "content": "z", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 67, "content": "̃", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 68, "content": "ó", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 69, "content": "À", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 70, "content": "é", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 71, "content": "È", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 72, "content": "ò", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 73, "content": "à", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 74, "content": "í", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "<|endoftext|>": 0, "a": 1, "E": 2, "e": 3, "Ɛ": 4, "ɛ": 5, "Ə": 6, "ə": 7, "I": 8, "i": 9, "O": 10, "o": 11, "Ɔ": 12, "ɔ": 13, "U": 14, "u": 15, "ã": 16, "ẽ": 17, "ĩ": 18, "õ": 19, "ũ": 20, "B": 21, "b": 22, "D": 23, "d": 24, "Đ": 25, "ɖ": 26, "F": 27, "f": 28, "Ƒ": 29, "ƒ": 30, "G": 31, "g": 32, "Ɣ": 33, "ɣ": 34, "H": 35, "h": 36, "K": 37, "k": 38, "L": 39, "l": 40, "M": 41, "m": 42, "N": 43, "n": 44, "Ŋ": 45, "ŋ": 46, "P": 47, "p": 48, "R": 49, "r": 50, "S": 51, "s": 52, "T": 53, "t": 54, "V": 55, "v": 56, "Ʋ": 57, "ʋ": 58, "W": 59, "w": 60, "X": 61, "x": 62, "Y": 63, "y": 64, "Z": 65, "z": 66, "̃": 67, "ó": 68, "À": 69, "é": 70, "È": 71, "ò": 72, "à": 73, "í": 74, "!": 75, "\"": 76, "#": 77, "$": 78, "%": 79, "&": 80, "'": 81, "(": 82, ")": 83, "*": 84, "+": 85, ",": 86, "-": 87, ".": 88, "/": 89, "0": 90, "1": 91, "2": 92, "3": 93, "4": 94, "5": 95, "6": 96, "7": 97, "8": 98, "9": 99, ":": 100, ";": 101, "<": 102, "=": 103, ">": 104, "?": 105, "@": 106, "A": 107, "C": 108, "J": 109, "Q": 110, "[": 111, "\\": 112, "]": 113, "^": 114, "_": 115, "`": 116, "c": 117, "j": 118, "q": 119, "{": 120, "|": 121, "}": 122, "~": 123, "¡": 124, "¢": 125, "£": 126, "¤": 127, "¥": 128, "¦": 129, "§": 130, "¨": 131, "©": 132, "ª": 133, "«": 134, "¬": 135, "®": 136, "¯": 137, "°": 138, "±": 139, "²": 140, "³": 141, "´": 142, "µ": 143, "¶": 144, "·": 145, "¸": 146, "¹": 147, "º": 148, "»": 149, "¼": 150, "½": 151, "¾": 152, "¿": 153, "Á": 154, "Â": 155, "Ã": 156, "Ä": 157, "Å": 158, "Æ": 159, "Ç": 160, "É": 161, "Ê": 162, "Ë": 163, "Ì": 164, "Í": 165, "Î": 166, "Ï": 167, "Ð": 168, "Ñ": 169, "Ò": 170, "Ó": 171, "Ô": 172, "Õ": 173, "Ö": 174, "×": 175, "Ø": 176, "Ù": 177, "Ú": 178, "Û": 179, "Ü": 180, "Ý": 181, "Þ": 182, "ß": 183, "á": 184, "â": 185, "ä": 186, "å": 187, "æ": 188, "ç": 189, "è": 190, "ê": 191, "ë": 192, "ì": 193, "î": 194, "ï": 195, "ð": 196, "ñ": 197, "ô": 198, "ö": 199, "÷": 200, "ø": 201, "ù": 202, "ú": 203, "û": 204, "ü": 205, "ý": 206, "þ": 207, "ÿ": 208, "Ā": 209, "ā": 210, "Ă": 211, "ă": 212, "Ą": 213, "ą": 214, "Ć": 215, "ć": 216, "Ĉ": 217, "ĉ": 218, "Ċ": 219, "ċ": 220, "Č": 221, "č": 222, "Ď": 223, "ď": 224, "đ": 225, "Ē": 226, "ē": 227, "Ĕ": 228, "ĕ": 229, "Ė": 230, "ė": 231, "Ę": 232, "ę": 233, "Ě": 234, "ě": 235, "Ĝ": 236, "ĝ": 237, "Ğ": 238, "ğ": 239, "Ġ": 240, "ġ": 241, "Ģ": 242, "ģ": 243, "Ĥ": 244, "ĥ": 245, "Ħ": 246, "ħ": 247, "Ĩ": 248, "Ī": 249, "ī": 250, "Ĭ": 251, "ĭ": 252, "Į": 253, "į": 254, "İ": 255, "ı": 256, "IJ": 257, "ij": 258, "Ĵ": 259, "ĵ": 260, "Ķ": 261, "ķ": 262, "ĸ": 263, "Ĺ": 264, "ĺ": 265, "Ļ": 266, "ļ": 267, "Ľ": 268, "ľ": 269, "Ŀ": 270, "ŀ": 271, "Ł": 272, "ł": 273, "Ń": 274 }, "merges": [] } }