{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "~", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": ">", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "[ILLEGAL]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFD" }, { "type": "StripAccents" } ] }, "pre_tokenizer": { "type": "Split", "pattern": { "String": "" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "RobertaProcessing", "sep": [ "[SEP]", 3 ], "cls": [ "[CLS]", 2 ], "trim_offsets": false, "add_prefix_space": false }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "~": 0, ">": 1, "[CLS]": 2, "[SEP]": 3, "[PAD]": 4, "[UNK]": 5, "[MASK]": 6, "[ILLEGAL]": 7, " ": 8, "-": 9, "/": 10, "0": 11, "1": 12, "2": 13, "3": 14, "4": 15, "5": 16, "6": 17, "7": 18, "8": 19, "9": 20, "B": 21, "K": 22, "N": 23, "P": 24, "Q": 25, "R": 26, "a": 27, "b": 28, "c": 29, "d": 30, "e": 31, "f": 32, "g": 33, "h": 34, "k": 35, "n": 36, "p": 37, "q": 38, "r": 39, "w": 40 } } }