{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 64, "strategy": "LongestFirst", "stride": 0 }, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": null, "lowercase": true }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "&": 5, "'": 6, "*": 7, ",": 8, "-": 9, ".": 10, "/": 11, "?": 12, "0": 13, "1": 14, "2": 15, "3": 16, "4": 17, "5": 18, "6": 19, "7": 20, "8": 21, "9": 22, ":": 23, ";": 24, "\\": 25, "_": 26, "a": 27, "b": 28, "c": 29, "d": 30, "e": 31, "f": 32, "g": 33, "h": 34, "i": 35, "j": 36, "k": 37, "l": 38, "m": 39, "n": 40, "o": 41, "p": 42, "q": 43, "r": 44, "s": 45, "t": 46, "u": 47, "v": 48, "w": 49, "x": 50, "y": 51, "z": 52, "//": 53, "//:": 54, "http": 55, "https": 56, "ftp": 57, "sftp": 58, "mailto": 59, "tel": 60, "file": 61, "ws": 62, "wss": 63, "rtmp": 64, "ssh": 65, "ldap": 66, "ldaps": 67, "nntp": 68, "gopher": 69, "telnet": 70, "view": 71, "source": 72, "about": 73, "chrome": 74, "data": 75, "irc": 76, "ircs": 77, "magnet": 78, "mms": 79, "redis": 80, "rsync": 81, "rtsp": 82, "svn": 83, "vnc": 84, "webcal": 85, "xmpp": 86, "dns": 87, "ntp": 88, "ip": 89, "com": 90, "de": 91, "net": 92, "uk": 93, "cn": 94, "org": 95, "info": 96, "nl": 97, "eu": 98, "ru": 99, "su": 100, "href": 101, "br": 102, "htm": 103, "php": 104, "co": 105, "ly": 106, "bit": 107, "log": 108, "index": 109, "bank": 110, "za": 111, "direct": 112, "xml": 113, "mail": 114, "it": 115, "www": 116, "run": 117, "security": 118, "code": 119, "promo": 120, "jpg": 121, "img": 122, "pay": 123, "form": 124, "docs": 125, "host": 126, "ec": 127, "cx": 128, "free": 129, "true": 130, "false": 131, "amp": 132, "blog": 133, "key": 134, "pal": 135, "contact": 136, "online": 137, "abc": 138, "media": 139, "admin": 140, "etc": 141, "login": 142, "cmd": 143, "bin": 144, "web": 145, "verif": 146, "the": 147, "in": 148, "##s": 149, "of": 150, "la": 151, "en": 152, "and": 153, "##e": 154, "##a": 155, "to": 156, "##n": 157, "##i": 158, "der": 159, "un": 160, "di": 161, "que": 162, "##t": 163, "is": 164, "el": 165, "se": 166, "del": 167, "die": 168, "##r": 169, "und": 170, "et": 171, "na": 172, "##o": 173, "was": 174, "on": 175, "##en": 176, "##u": 177, "des": 178, "den": 179, "le": 180, "for": 181, "da": 182, "je": 183, "van": 184, "as": 185, "##m": 186, "sa": 187, "do": 188, "10": 189, "an": 190, "les": 191, "una": 192, "il": 193, "by": 194, "og": 195, "##y": 196, "at": 197, "##l": 198, "##d": 199, "er": 200, "al": 201, "##er": 202, "von": 203, "du": 204, "av": 205, "##es": 206, "med": 207, "con": 208, "##k": 209, "est": 210, "per": 211, "som": 212, "los": 213, "por": 214, "from": 215, "that": 216, "no": 217, "11": 218, "es": 219, "ja": 220, "km": 221, "##е": 222, "##an": 223, "om": 224, "im": 225, "##ta": 226, "dan": 227, "##te": 228, "##na": 229, "para": 230, "mit": 231, "El": 232, "his": 233, "##у": 234, "ha": 235, "##da": 236, "##ing": 237, "une": 238, "##h": 239, "##ne": 240, "##g": 241, "das": 242, "##in": 243, "##re": 244, "par": 245, "##us": 246, "##de": 247, "au": 248, "dans": 249, "he": 250, "che": 251, "em": 252, "dem": 253, "19": 254, "til": 255, "се": 256, "han": 257, "##ia": 258, "##le": 259, "las": 260, "della": 261, "new": 262, "##ra": 263, "##is": 264, "um": 265, "si": 266, "var": 267, "are": 268, "op": 269, "zu": 270, "##et": 271, "were": 272, "##os": 273, "od": 274, "son": 275, "##о": 276, "##do": 277, "which": 278, "##ja": 279, "va": 280, "pour": 281, "ve": 282, "##ti": 283, "sur": 284, "##la": 285, "##ed": 286, "war": 287, "##to": 288, "##se": 289, "##ni": 290, "##no": 291, "be": 292, "det": 293, "##і": 294, "##х": 295, "gov": 296, "##ar": 297, "qui": 298, "az": 299, "te": 300, "##va": 301, "##nt": 302, "##ma": 303, "##ka": 304, "had": 305, "##ng": 306, "also": 307, "so": 308, "##je": 309, "##li": 310, "am": 311, "has": 312, "dos": 313, "ur": 314, "##ie": 315, "##ri": 316, "entre": 317, "##as": 318, "lo": 319, "era": 320, "ni": 321, "##al": 322, "##j": 323, "##ce": 324, "first": 325, "##ca": 326, "##ment": 327, "os": 328, "met": 329, "ou": 330, "all": 331, "##ko": 332, "aus": 333, "non": 334, "##si": 335, "##em": 336, "##ly": 337, "##b": 338, "film": 339, "##um": 340, "##sa": 341, "##v": 342, "##ga": 343, "##it": 344, "##mi": 345, "##ki": 346, "po": 347, "##ge": 348, "##at": 349, "##ba": 350, "##ur": 351, "##ke": 352, "##st": 353, "##ro": 354, "##el": 355, "##f": 356, "##man": 357, "##ci": 358, "##ul": 359, "##ndo": 360, "##mente": 361, "##ve": 362, "##me": 363, "##den": 364, "##za": 365, "##io": 366, "##or": 367, "##nya": 368, "##ya": 369, "##ten": 370, "##x": 371, "##om": 372, "##di": 373, "##kan": 374, "##ek": 375, "into": 376, "##ak": 377, "till": 378, "##lo": 379, "ble": 380, "ka": 381, "##S": 382, "mai": 383, "up": 384, "ng": 385, "##то": 386, "aux": 387, "##ny": 388, "##ho": 389, "##1": 390, "##ju": 391, "##lar": 392, "##ji": 393, "##go": 394, "##ts": 395, "##co": 396, "##ler": 397, "##tion": 398, "##ir": 399, "ad": 400, "##ku": 401, "##ze": 402, "##w": 403, "ki": 404, "##ns": 405, "##ik": 406, "##ers": 407, "##ry": 408, "me": 409, "##sen": 410, "##des": 411, "##ha": 412, "##ban": 413, "during": 414, "where": 415, "ze": 416, "##rs": 417, "can": 418, "out": 419, "wie": 420, "со": 421, "##ben": 422, "##ren": 423, "##sta": 424, "##rt": 425, "##tu": 426, "fu": 427, "##am": 428, "##ou": 429, "##ria": 430, "##ov": 431, "##il": 432, "##mo": 433, "vom": 434, "##wa": 435, "##jo": 436, "##ica": 437, "##be": 438, "##ion": 439, "##ken": 440, "##ina": 441, "##land": 442, "##lla": 443, "nu": 444, "##ine": 445, "##un": 446, "club": 447, "##che": 448, "team": 449, "##ner": 450, "##ic": 451, "ca": 452, "##ok": 453, "##ig": 454, "##th": 455, "##nu": 456, "##ada": 457, "##ste": 458, "##ut": 459, "pe": 460, "ke": 461, "area": 462, "any": 463, "##dos": 464, "##ton": 465, "##que": 466, "vor": 467, "##ty": 468, "##tes": 469, "##ble": 470, "##das": 471 } } }