|
{ |
|
"version": "1.0", |
|
"truncation": { |
|
"direction": "Right", |
|
"max_length": 64, |
|
"strategy": "LongestFirst", |
|
"stride": 0 |
|
}, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "[PAD]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "[UNK]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "[CLS]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "[SEP]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 4, |
|
"content": "[MASK]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": { |
|
"type": "BertNormalizer", |
|
"clean_text": true, |
|
"handle_chinese_chars": true, |
|
"strip_accents": null, |
|
"lowercase": true |
|
}, |
|
"pre_tokenizer": { |
|
"type": "BertPreTokenizer" |
|
}, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "[CLS]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 0 |
|
} |
|
} |
|
], |
|
"pair": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "[CLS]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "B", |
|
"type_id": 1 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 1 |
|
} |
|
} |
|
], |
|
"special_tokens": { |
|
"[CLS]": { |
|
"id": "[CLS]", |
|
"ids": [ |
|
2 |
|
], |
|
"tokens": [ |
|
"[CLS]" |
|
] |
|
}, |
|
"[SEP]": { |
|
"id": "[SEP]", |
|
"ids": [ |
|
3 |
|
], |
|
"tokens": [ |
|
"[SEP]" |
|
] |
|
} |
|
} |
|
}, |
|
"decoder": { |
|
"type": "WordPiece", |
|
"prefix": "##", |
|
"cleanup": true |
|
}, |
|
"model": { |
|
"type": "WordPiece", |
|
"unk_token": "[UNK]", |
|
"continuing_subword_prefix": "##", |
|
"max_input_chars_per_word": 100, |
|
"vocab": { |
|
"[PAD]": 0, |
|
"[UNK]": 1, |
|
"[CLS]": 2, |
|
"[SEP]": 3, |
|
"[MASK]": 4, |
|
"&": 5, |
|
"'": 6, |
|
"*": 7, |
|
",": 8, |
|
"-": 9, |
|
".": 10, |
|
"/": 11, |
|
"?": 12, |
|
"0": 13, |
|
"1": 14, |
|
"2": 15, |
|
"3": 16, |
|
"4": 17, |
|
"5": 18, |
|
"6": 19, |
|
"7": 20, |
|
"8": 21, |
|
"9": 22, |
|
":": 23, |
|
";": 24, |
|
"\\": 25, |
|
"_": 26, |
|
"a": 27, |
|
"b": 28, |
|
"c": 29, |
|
"d": 30, |
|
"e": 31, |
|
"f": 32, |
|
"g": 33, |
|
"h": 34, |
|
"i": 35, |
|
"j": 36, |
|
"k": 37, |
|
"l": 38, |
|
"m": 39, |
|
"n": 40, |
|
"o": 41, |
|
"p": 42, |
|
"q": 43, |
|
"r": 44, |
|
"s": 45, |
|
"t": 46, |
|
"u": 47, |
|
"v": 48, |
|
"w": 49, |
|
"x": 50, |
|
"y": 51, |
|
"z": 52, |
|
"//": 53, |
|
"//:": 54, |
|
"http": 55, |
|
"https": 56, |
|
"ftp": 57, |
|
"sftp": 58, |
|
"mailto": 59, |
|
"tel": 60, |
|
"file": 61, |
|
"ws": 62, |
|
"wss": 63, |
|
"rtmp": 64, |
|
"ssh": 65, |
|
"ldap": 66, |
|
"ldaps": 67, |
|
"nntp": 68, |
|
"gopher": 69, |
|
"telnet": 70, |
|
"view": 71, |
|
"source": 72, |
|
"about": 73, |
|
"chrome": 74, |
|
"data": 75, |
|
"irc": 76, |
|
"ircs": 77, |
|
"magnet": 78, |
|
"mms": 79, |
|
"redis": 80, |
|
"rsync": 81, |
|
"rtsp": 82, |
|
"svn": 83, |
|
"vnc": 84, |
|
"webcal": 85, |
|
"xmpp": 86, |
|
"dns": 87, |
|
"ntp": 88, |
|
"ip": 89, |
|
"com": 90, |
|
"de": 91, |
|
"net": 92, |
|
"uk": 93, |
|
"cn": 94, |
|
"org": 95, |
|
"info": 96, |
|
"nl": 97, |
|
"eu": 98, |
|
"ru": 99, |
|
"su": 100, |
|
"href": 101, |
|
"br": 102, |
|
"htm": 103, |
|
"php": 104, |
|
"co": 105, |
|
"ly": 106, |
|
"bit": 107, |
|
"log": 108, |
|
"index": 109, |
|
"bank": 110, |
|
"za": 111, |
|
"direct": 112, |
|
"xml": 113, |
|
"mail": 114, |
|
"it": 115, |
|
"www": 116, |
|
"run": 117, |
|
"security": 118, |
|
"code": 119, |
|
"promo": 120, |
|
"jpg": 121, |
|
"img": 122, |
|
"pay": 123, |
|
"form": 124, |
|
"docs": 125, |
|
"host": 126, |
|
"ec": 127, |
|
"cx": 128, |
|
"free": 129, |
|
"true": 130, |
|
"false": 131, |
|
"amp": 132, |
|
"blog": 133, |
|
"key": 134, |
|
"pal": 135, |
|
"contact": 136, |
|
"online": 137, |
|
"abc": 138, |
|
"media": 139, |
|
"admin": 140, |
|
"etc": 141, |
|
"login": 142, |
|
"cmd": 143, |
|
"bin": 144, |
|
"web": 145, |
|
"verif": 146, |
|
"the": 147, |
|
"in": 148, |
|
"##s": 149, |
|
"of": 150, |
|
"la": 151, |
|
"en": 152, |
|
"and": 153, |
|
"##e": 154, |
|
"##a": 155, |
|
"to": 156, |
|
"##n": 157, |
|
"##i": 158, |
|
"der": 159, |
|
"un": 160, |
|
"di": 161, |
|
"que": 162, |
|
"##t": 163, |
|
"is": 164, |
|
"el": 165, |
|
"se": 166, |
|
"del": 167, |
|
"die": 168, |
|
"##r": 169, |
|
"und": 170, |
|
"et": 171, |
|
"na": 172, |
|
"##o": 173, |
|
"was": 174, |
|
"on": 175, |
|
"##en": 176, |
|
"##u": 177, |
|
"des": 178, |
|
"den": 179, |
|
"le": 180, |
|
"for": 181, |
|
"da": 182, |
|
"je": 183, |
|
"van": 184, |
|
"as": 185, |
|
"##m": 186, |
|
"sa": 187, |
|
"do": 188, |
|
"10": 189, |
|
"an": 190, |
|
"les": 191, |
|
"una": 192, |
|
"il": 193, |
|
"by": 194, |
|
"og": 195, |
|
"##y": 196, |
|
"at": 197, |
|
"##l": 198, |
|
"##d": 199, |
|
"er": 200, |
|
"al": 201, |
|
"##er": 202, |
|
"von": 203, |
|
"du": 204, |
|
"av": 205, |
|
"##es": 206, |
|
"med": 207, |
|
"con": 208, |
|
"##k": 209, |
|
"est": 210, |
|
"per": 211, |
|
"som": 212, |
|
"los": 213, |
|
"por": 214, |
|
"from": 215, |
|
"that": 216, |
|
"no": 217, |
|
"11": 218, |
|
"es": 219, |
|
"ja": 220, |
|
"km": 221, |
|
"##е": 222, |
|
"##an": 223, |
|
"om": 224, |
|
"im": 225, |
|
"##ta": 226, |
|
"dan": 227, |
|
"##te": 228, |
|
"##na": 229, |
|
"para": 230, |
|
"mit": 231, |
|
"El": 232, |
|
"his": 233, |
|
"##у": 234, |
|
"ha": 235, |
|
"##da": 236, |
|
"##ing": 237, |
|
"une": 238, |
|
"##h": 239, |
|
"##ne": 240, |
|
"##g": 241, |
|
"das": 242, |
|
"##in": 243, |
|
"##re": 244, |
|
"par": 245, |
|
"##us": 246, |
|
"##de": 247, |
|
"au": 248, |
|
"dans": 249, |
|
"he": 250, |
|
"che": 251, |
|
"em": 252, |
|
"dem": 253, |
|
"19": 254, |
|
"til": 255, |
|
"се": 256, |
|
"han": 257, |
|
"##ia": 258, |
|
"##le": 259, |
|
"las": 260, |
|
"della": 261, |
|
"new": 262, |
|
"##ra": 263, |
|
"##is": 264, |
|
"um": 265, |
|
"si": 266, |
|
"var": 267, |
|
"are": 268, |
|
"op": 269, |
|
"zu": 270, |
|
"##et": 271, |
|
"were": 272, |
|
"##os": 273, |
|
"od": 274, |
|
"son": 275, |
|
"##о": 276, |
|
"##do": 277, |
|
"which": 278, |
|
"##ja": 279, |
|
"va": 280, |
|
"pour": 281, |
|
"ve": 282, |
|
"##ti": 283, |
|
"sur": 284, |
|
"##la": 285, |
|
"##ed": 286, |
|
"war": 287, |
|
"##to": 288, |
|
"##se": 289, |
|
"##ni": 290, |
|
"##no": 291, |
|
"be": 292, |
|
"det": 293, |
|
"##і": 294, |
|
"##х": 295, |
|
"gov": 296, |
|
"##ar": 297, |
|
"qui": 298, |
|
"az": 299, |
|
"te": 300, |
|
"##va": 301, |
|
"##nt": 302, |
|
"##ma": 303, |
|
"##ka": 304, |
|
"had": 305, |
|
"##ng": 306, |
|
"also": 307, |
|
"so": 308, |
|
"##je": 309, |
|
"##li": 310, |
|
"am": 311, |
|
"has": 312, |
|
"dos": 313, |
|
"ur": 314, |
|
"##ie": 315, |
|
"##ri": 316, |
|
"entre": 317, |
|
"##as": 318, |
|
"lo": 319, |
|
"era": 320, |
|
"ni": 321, |
|
"##al": 322, |
|
"##j": 323, |
|
"##ce": 324, |
|
"first": 325, |
|
"##ca": 326, |
|
"##ment": 327, |
|
"os": 328, |
|
"met": 329, |
|
"ou": 330, |
|
"all": 331, |
|
"##ko": 332, |
|
"aus": 333, |
|
"non": 334, |
|
"##si": 335, |
|
"##em": 336, |
|
"##ly": 337, |
|
"##b": 338, |
|
"film": 339, |
|
"##um": 340, |
|
"##sa": 341, |
|
"##v": 342, |
|
"##ga": 343, |
|
"##it": 344, |
|
"##mi": 345, |
|
"##ki": 346, |
|
"po": 347, |
|
"##ge": 348, |
|
"##at": 349, |
|
"##ba": 350, |
|
"##ur": 351, |
|
"##ke": 352, |
|
"##st": 353, |
|
"##ro": 354, |
|
"##el": 355, |
|
"##f": 356, |
|
"##man": 357, |
|
"##ci": 358, |
|
"##ul": 359, |
|
"##ndo": 360, |
|
"##mente": 361, |
|
"##ve": 362, |
|
"##me": 363, |
|
"##den": 364, |
|
"##za": 365, |
|
"##io": 366, |
|
"##or": 367, |
|
"##nya": 368, |
|
"##ya": 369, |
|
"##ten": 370, |
|
"##x": 371, |
|
"##om": 372, |
|
"##di": 373, |
|
"##kan": 374, |
|
"##ek": 375, |
|
"into": 376, |
|
"##ak": 377, |
|
"till": 378, |
|
"##lo": 379, |
|
"ble": 380, |
|
"ka": 381, |
|
"##S": 382, |
|
"mai": 383, |
|
"up": 384, |
|
"ng": 385, |
|
"##то": 386, |
|
"aux": 387, |
|
"##ny": 388, |
|
"##ho": 389, |
|
"##1": 390, |
|
"##ju": 391, |
|
"##lar": 392, |
|
"##ji": 393, |
|
"##go": 394, |
|
"##ts": 395, |
|
"##co": 396, |
|
"##ler": 397, |
|
"##tion": 398, |
|
"##ir": 399, |
|
"ad": 400, |
|
"##ku": 401, |
|
"##ze": 402, |
|
"##w": 403, |
|
"ki": 404, |
|
"##ns": 405, |
|
"##ik": 406, |
|
"##ers": 407, |
|
"##ry": 408, |
|
"me": 409, |
|
"##sen": 410, |
|
"##des": 411, |
|
"##ha": 412, |
|
"##ban": 413, |
|
"during": 414, |
|
"where": 415, |
|
"ze": 416, |
|
"##rs": 417, |
|
"can": 418, |
|
"out": 419, |
|
"wie": 420, |
|
"со": 421, |
|
"##ben": 422, |
|
"##ren": 423, |
|
"##sta": 424, |
|
"##rt": 425, |
|
"##tu": 426, |
|
"fu": 427, |
|
"##am": 428, |
|
"##ou": 429, |
|
"##ria": 430, |
|
"##ov": 431, |
|
"##il": 432, |
|
"##mo": 433, |
|
"vom": 434, |
|
"##wa": 435, |
|
"##jo": 436, |
|
"##ica": 437, |
|
"##be": 438, |
|
"##ion": 439, |
|
"##ken": 440, |
|
"##ina": 441, |
|
"##land": 442, |
|
"##lla": 443, |
|
"nu": 444, |
|
"##ine": 445, |
|
"##un": 446, |
|
"club": 447, |
|
"##che": 448, |
|
"team": 449, |
|
"##ner": 450, |
|
"##ic": 451, |
|
"ca": 452, |
|
"##ok": 453, |
|
"##ig": 454, |
|
"##th": 455, |
|
"##nu": 456, |
|
"##ada": 457, |
|
"##ste": 458, |
|
"##ut": 459, |
|
"pe": 460, |
|
"ke": 461, |
|
"area": 462, |
|
"any": 463, |
|
"##dos": 464, |
|
"##ton": 465, |
|
"##que": 466, |
|
"vor": 467, |
|
"##ty": 468, |
|
"##tes": 469, |
|
"##ble": 470, |
|
"##das": 471 |
|
} |
|
} |
|
} |