flores101_mm100_175M / tokenizer_config.json
VityaVitalich's picture
Upload tokenizer
066931b verified
raw
history blame
24.3 kB
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256001": {
"content": "__af__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256002": {
"content": "__am__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256003": {
"content": "__ar__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256004": {
"content": "__as__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256005": {
"content": "__ast__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256006": {
"content": "__ay__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256007": {
"content": "__az__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256008": {
"content": "__ba__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256009": {
"content": "__be__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256010": {
"content": "__bg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256011": {
"content": "__bn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256012": {
"content": "__br__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256013": {
"content": "__bs__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256014": {
"content": "__ca__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256015": {
"content": "__ceb__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256016": {
"content": "__cjk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256017": {
"content": "__cs__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256018": {
"content": "__cy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256019": {
"content": "__da__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256020": {
"content": "__de__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256021": {
"content": "__dyu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256022": {
"content": "__el__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256023": {
"content": "__en__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256024": {
"content": "__es__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256025": {
"content": "__et__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256026": {
"content": "__fa__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256027": {
"content": "__ff__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256028": {
"content": "__fi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256029": {
"content": "__fr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256030": {
"content": "__fy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256031": {
"content": "__ga__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256032": {
"content": "__gd__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256033": {
"content": "__gl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256034": {
"content": "__gu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256035": {
"content": "__ha__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256036": {
"content": "__he__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256037": {
"content": "__hi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256038": {
"content": "__hr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256039": {
"content": "__ht__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256040": {
"content": "__hu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256041": {
"content": "__hy__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256042": {
"content": "__id__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256043": {
"content": "__ig__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256044": {
"content": "__ilo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256045": {
"content": "__is__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256046": {
"content": "__it__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256047": {
"content": "__ja__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256048": {
"content": "__jv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256049": {
"content": "__ka__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256050": {
"content": "__kac__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256051": {
"content": "__kam__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256052": {
"content": "__kea__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256053": {
"content": "__kg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256054": {
"content": "__kk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256055": {
"content": "__km__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256056": {
"content": "__kmb__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256057": {
"content": "__kmr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256058": {
"content": "__kn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256059": {
"content": "__ko__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256060": {
"content": "__ku__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256061": {
"content": "__ky__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256062": {
"content": "__lb__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256063": {
"content": "__lg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256064": {
"content": "__ln__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256065": {
"content": "__lo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256066": {
"content": "__lt__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256067": {
"content": "__luo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256068": {
"content": "__lv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256069": {
"content": "__mg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256070": {
"content": "__mi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256071": {
"content": "__mk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256072": {
"content": "__ml__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256073": {
"content": "__mn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256074": {
"content": "__mr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256075": {
"content": "__ms__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256076": {
"content": "__mt__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256077": {
"content": "__my__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256078": {
"content": "__ne__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256079": {
"content": "__nl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256080": {
"content": "__no__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256081": {
"content": "__ns__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256082": {
"content": "__ny__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256083": {
"content": "__oc__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256084": {
"content": "__om__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256085": {
"content": "__or__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256086": {
"content": "__pa__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256087": {
"content": "__pl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256088": {
"content": "__ps__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256089": {
"content": "__pt__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256090": {
"content": "__qu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256091": {
"content": "__ro__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256092": {
"content": "__ru__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256093": {
"content": "__sd__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256094": {
"content": "__shn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256095": {
"content": "__si__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256096": {
"content": "__sk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256097": {
"content": "__sl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256098": {
"content": "__sn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256099": {
"content": "__so__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256100": {
"content": "__sq__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256101": {
"content": "__sr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256102": {
"content": "__ss__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256103": {
"content": "__su__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256104": {
"content": "__sv__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256105": {
"content": "__sw__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256106": {
"content": "__ta__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256107": {
"content": "__te__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256108": {
"content": "__tg__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256109": {
"content": "__th__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256110": {
"content": "__ti__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256111": {
"content": "__tl__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256112": {
"content": "__tn__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256113": {
"content": "__tr__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256114": {
"content": "__uk__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256115": {
"content": "__umb__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256116": {
"content": "__ur__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256117": {
"content": "__uz__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256118": {
"content": "__vi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256119": {
"content": "__wo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256120": {
"content": "__xh__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256121": {
"content": "__yi__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256122": {
"content": "__yo__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256123": {
"content": "__zh__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256124": {
"content": "__zu__",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"__af__",
"__am__",
"__ar__",
"__as__",
"__ast__",
"__ay__",
"__az__",
"__ba__",
"__be__",
"__bg__",
"__bn__",
"__br__",
"__bs__",
"__ca__",
"__ceb__",
"__cjk__",
"__cs__",
"__cy__",
"__da__",
"__de__",
"__dyu__",
"__el__",
"__en__",
"__es__",
"__et__",
"__fa__",
"__ff__",
"__fi__",
"__fr__",
"__fy__",
"__ga__",
"__gd__",
"__gl__",
"__gu__",
"__ha__",
"__he__",
"__hi__",
"__hr__",
"__ht__",
"__hu__",
"__hy__",
"__id__",
"__ig__",
"__ilo__",
"__is__",
"__it__",
"__ja__",
"__jv__",
"__ka__",
"__kac__",
"__kam__",
"__kea__",
"__kg__",
"__kk__",
"__km__",
"__kmb__",
"__kmr__",
"__kn__",
"__ko__",
"__ku__",
"__ky__",
"__lb__",
"__lg__",
"__ln__",
"__lo__",
"__lt__",
"__luo__",
"__lv__",
"__mg__",
"__mi__",
"__mk__",
"__ml__",
"__mn__",
"__mr__",
"__ms__",
"__mt__",
"__my__",
"__ne__",
"__nl__",
"__no__",
"__ns__",
"__ny__",
"__oc__",
"__om__",
"__or__",
"__pa__",
"__pl__",
"__ps__",
"__pt__",
"__qu__",
"__ro__",
"__ru__",
"__sd__",
"__shn__",
"__si__",
"__sk__",
"__sl__",
"__sn__",
"__so__",
"__sq__",
"__sr__",
"__ss__",
"__su__",
"__sv__",
"__sw__",
"__ta__",
"__te__",
"__tg__",
"__th__",
"__ti__",
"__tl__",
"__tn__",
"__tr__",
"__uk__",
"__umb__",
"__ur__",
"__uz__",
"__vi__",
"__wo__",
"__xh__",
"__yi__",
"__yo__",
"__zh__",
"__zu__"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": true,
"eos_token": "</s>",
"language_codes": "m2m100",
"model_max_length": 1024,
"num_madeup_words": 0,
"pad_token": "<pad>",
"sep_token": "</s>",
"sp_model_kwargs": {},
"src_lang": "en",
"tgt_lang": null,
"tokenizer_class": "M2M100Tokenizer",
"unk_token": "<unk>"
}