goldfish-models commited on
Commit
1e0916b
1 Parent(s): 1ca9415

Upload kha_latn_10mb tokenizer.

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[XXXXX322]": 42718, "[XXXXX318]": 42714, "[XXXXX263]": 42659, "[XXXXX342]": 42738, "[XXXXX354]": 42750, "[XXXXX22]": 42418, "[XXXXX394]": 42790, "[XXXXX349]": 42745, "[XXXXX364]": 42760, "[XXXXX142]": 42538, "[XXXXX385]": 42781, "[XXXXX593]": 42989, "[XXXXX46]": 42442, "[XXXXX139]": 42535, "[XXXXX26]": 42422, "[XXXXX12]": 42408, "[XXXXX219]": 42615, "[XXXXX534]": 42930, "[XXXXX527]": 42923, "[XXXXX201]": 42597, "[XXXXX41]": 42437, "[XXXXX336]": 42732, "[XXXXX533]": 42929, "[XXXXX323]": 42719, "[XXXXX277]": 42673, "[XXXXX245]": 42641, "[XXXXX559]": 42955, "[XXXXX86]": 42482, "[XXXXX90]": 42486, "[XXXXX43]": 42439, "[XXXXX506]": 42902, "[XXXXX542]": 42938, "[XXXXX376]": 42772, "[XXXXX105]": 42501, "[XXXXX272]": 42668, "[XXXXX603]": 42999, "[XXXXX538]": 42934, "[XXXXX308]": 42704, "[XXXXX348]": 42744, "[XXXXX439]": 42835, "[XXXXX262]": 42658, "[XXXXX68]": 42464, "[XXXXX379]": 42775, "[XXXXX8]": 42404, "[XXXXX520]": 42916, "[XXXXX234]": 42630, "[XXXXX59]": 42455, "[XXXXX433]": 42829, "[XXXXX587]": 42983, "[XXXXX367]": 42763, "[XXXXX469]": 42865, "[XXXXX399]": 42795, "[XXXXX410]": 42806, "[XXXXX416]": 42812, "[XXXXX574]": 42970, "[XXXXX310]": 42706, "[XXXXX207]": 42603, "[XXXXX288]": 42684, "[XXXXX296]": 42692, "[XXXXX532]": 42928, "[XXXXX372]": 42768, "[XXXXX161]": 42557, "[XXXXX405]": 42801, "[XXXXX499]": 42895, "[XXXXX273]": 42669, "[XXXXX64]": 42460, "[XXXXX17]": 42413, "[XXXXX350]": 42746, "[XXXXX246]": 42642, "[XXXXX434]": 42830, "[XXXXX537]": 42933, "[XXXXX490]": 42886, "[XXXXX193]": 42589, "[XXXXX176]": 42572, "[XXXXX149]": 42545, "[XXXXX11]": 42407, "[XXXXX275]": 42671, "[XXXXX306]": 42702, "[XXXXX403]": 42799, "[XXXXX382]": 42778, "[XXXXX341]": 42737, "[XXXXX175]": 42571, "[XXXXX84]": 42480, "[XXXXX425]": 42821, "[XXXXX160]": 42556, "[XXXXX409]": 42805, "[XXXXX268]": 42664, "[XXXXX23]": 42419, "[XXXXX92]": 42488, "[XXXXX414]": 42810, "[XXXXX362]": 42758, "[XXXXX599]": 42995, "[XXXXX166]": 42562, "[XXXXX389]": 42785, "[XXXXX553]": 42949, "[XXXXX608]": 43004, "[XXXXX301]": 42697, "[XXXXX502]": 42898, "[XXXXX111]": 42507, "[XXXXX228]": 42624, "[XXXXX315]": 42711, "[XXXXX187]": 42583, "[XXXXX523]": 42919, "[XXXXX73]": 42469, "[XXXXX446]": 42842, "[XXXXX540]": 42936, "[XXXXX102]": 42498, "[XXXXX282]": 42678, "[XXXXX408]": 42804, "[XXXXX331]": 42727, "[XXXXX134]": 42530, "[XXXXX211]": 42607, "[XXXXX535]": 42931, "[XXXXX396]": 42792, "[XXXXX109]": 42505, "[XXXXX70]": 42466, "[XXXXX202]": 42598, "[XXXXX401]": 42797, "[XXXXX579]": 42975, "[XXXXX417]": 42813, "[XXXXX253]": 42649, "[XXXXX515]": 42911, "[XXXXX610]": 43006, "[XXXXX448]": 42844, "[XXXXX529]": 42925, "[XXXXX200]": 42596, "[XXXXX568]": 42964, "[XXXXX569]": 42965, "[XXXXX584]": 42980, "[XXXXX128]": 42524, "[XXXXX549]": 42945, "[XXXXX400]": 42796, "[XXXXX147]": 42543, "[XXXXX456]": 42852, "[XXXXX368]": 42764, "[XXXXX595]": 42991, "[XXXXX597]": 42993, "[XXXXX583]": 42979, "[XXXXX605]": 43001, "[XXXXX516]": 42912, "[XXXXX249]": 42645, "[XXXXX428]": 42824, "[XXXXX397]": 42793, "[XXXXX260]": 42656, "[XXXXX338]": 42734, "[XXXXX45]": 42441, "[XXXXX123]": 42519, "[XXXXX254]": 42650, "[XXXXX37]": 42433, "[XXXXX244]": 42640, "[XXXXX519]": 42915, "[XXXXX474]": 42870, "[XXXXX530]": 42926, "[XXXXX48]": 42444, "[XXXXX30]": 42426, "[XXXXX384]": 42780, "[XXXXX480]": 42876, "[XXXXX186]": 42582, "[XXXXX189]": 42585, "[CLS]": 42392, "[XXXXX287]": 42683, "[XXXXX332]": 42728, "[XXXXX575]": 42971, "[XXXXX181]": 42577, "[XXXXX250]": 42646, "[XXXXX390]": 42786, "[XXXXX9]": 42405, "[XXXXX174]": 42570, "<pad>": 42394, "[XXXXX150]": 42546, "[XXXXX311]": 42707, "[XXXXX183]": 42579, "[XXXXX276]": 42672, "[XXXXX421]": 42817, "[XXXXX351]": 42747, "[XXXXX28]": 42424, "[XXXXX387]": 42783, "[XXXXX216]": 42612, "[XXXXX77]": 42473, "[XXXXX292]": 42688, "[XXXXX407]": 42803, "[XXXXX514]": 42910, "[XXXXX261]": 42657, "[XXXXX355]": 42751, "[XXXXX119]": 42515, "[XXXXX326]": 42722, "[XXXXX377]": 42773, "[XXXXX444]": 42840, "[XXXXX472]": 42868, "[XXXXX137]": 42533, "[XXXXX305]": 42701, "[XXXXX468]": 42864, "[XXXXX316]": 42712, "[XXXXX122]": 42518, "[XXXXX406]": 42802, "[XXXXX248]": 42644, "[XXXXX103]": 42499, "[XXXXX431]": 42827, "[XXXXX440]": 42836, "[XXXXX213]": 42609, "[XXXXX482]": 42878, "[XXXXX143]": 42539, "[XXXXX169]": 42565, "[XXXXX571]": 42967, "[XXXXX581]": 42977, "[XXXXX291]": 42687, "[XXXXX21]": 42417, "[XXXXX131]": 42527, "[XXXXX133]": 42529, "[XXXXX233]": 42629, "[XXXXX585]": 42981, "[XXXXX61]": 42457, "[XXXXX203]": 42599, "[XXXXX10]": 42406, "[XXXXX552]": 42948, "[XXXXX565]": 42961, "[XXXXX546]": 42942, "[XXXXX56]": 42452, "[MASK]": 42395, "[XXXXX209]": 42605, "[XXXXX359]": 42755, "[XXXXX498]": 42894, "[XXXXX148]": 42544, "[XXXXX71]": 42467, "[XXXXX52]": 42448, "[XXXXX251]": 42647, "[XXXXX460]": 42856, "[XXXXX247]": 42643, "[XXXXX524]": 42920, "[XXXXX239]": 42635, "[XXXXX218]": 42614, "[XXXXX126]": 42522, "[XXXXX180]": 42576, "[XXXXX411]": 42807, "[XXXXX580]": 42976, "[XXXXX429]": 42825, "[XXXXX154]": 42550, "[XXXXX508]": 42904, "[XXXXX374]": 42770, "[XXXXX381]": 42777, "[XXXXX212]": 42608, "[XXXXX453]": 42849, "[XXXXX606]": 43002, "[XXXXX238]": 42634, "[XXXXX582]": 42978, "[XXXXX270]": 42666, "[XXXXX555]": 42951, "[XXXXX424]": 42820, "[XXXXX511]": 42907, "[XXXXX513]": 42909, "[XXXXX302]": 42698, "[XXXXX65]": 42461, "[XXXXX413]": 42809, "[XXXXX159]": 42555, "[XXXXX236]": 42632, "[XXXXX36]": 42432, "[XXXXX153]": 42549, "[XXXXX509]": 42905, "[XXXXX243]": 42639, "[XXXXX53]": 42449, "[XXXXX100]": 42496, "[XXXXX278]": 42674, "[XXXXX304]": 42700, "[XXXXX49]": 42445, "[XXXXX383]": 42779, "[XXXXX487]": 42883, "[XXXXX525]": 42921, "[XXXXX330]": 42726, "[XXXXX611]": 43007, "[XXXXX235]": 42631, "[XXXXX67]": 42463, "[XXXXX140]": 42536, "[XXXXX607]": 43003, "[XXXXX560]": 42956, "[XXXXX98]": 42494, "[XXXXX319]": 42715, "[XXXXX531]": 42927, "[XXXXX222]": 42618, "[XXXXX14]": 42410, "[XXXXX257]": 42653, "[XXXXX371]": 42767, "[XXXXX442]": 42838, "[XXXXX208]": 42604, "[XXXXX432]": 42828, "[XXXXX436]": 42832, "[XXXXX386]": 42782, "[XXXXX600]": 42996, "[XXXXX352]": 42748, "[XXXXX517]": 42913, "[XXXXX327]": 42723, "[XXXXX343]": 42739, "[XXXXX265]": 42661, "[XXXXX195]": 42591, "[XXXXX240]": 42636, "[XXXXX591]": 42987, "[XXXXX558]": 42954, "[XXXXX461]": 42857, "[XXXXX378]": 42774, "[XXXXX404]": 42800, "[XXXXX173]": 42569, "[XXXXX4]": 42400, "[XXXXX99]": 42495, "[XXXXX373]": 42769, "[XXXXX395]": 42791, "[XXXXX252]": 42648, "[XXXXX589]": 42985, "[XXXXX264]": 42660, "[XXXXX184]": 42580, "[XXXXX496]": 42892, "[XXXXX286]": 42682, "[XXXXX309]": 42705, "[XXXXX467]": 42863, "[XXXXX136]": 42532, "[XXXXX165]": 42561, "[XXXXX117]": 42513, "[XXXXX47]": 42443, "[XXXXX510]": 42906, "[XXXXX1]": 42397, "[XXXXX450]": 42846, "[XXXXX94]": 42490, "[XXXXX298]": 42694, "[XXXXX241]": 42637, "[XXXXX360]": 42756, "[XXXXX317]": 42713, "[XXXXX178]": 42574, "[XXXXX81]": 42477, "[XXXXX521]": 42917, "[XXXXX157]": 42553, "[XXXXX87]": 42483, "[XXXXX258]": 42654, "[XXXXX438]": 42834, "[XXXXX220]": 42616, "[XXXXX54]": 42450, "[XXXXX522]": 42918, "[XXXXX570]": 42966, "[XXXXX242]": 42638, "[XXXXX504]": 42900, "[XXXXX307]": 42703, "[XXXXX556]": 42952, "[XXXXX598]": 42994, "[XXXXX566]": 42962, "[XXXXX363]": 42759, "[XXXXX491]": 42887, "[XXXXX485]": 42881, "[XXXXX466]": 42862, "[XXXXX464]": 42860, "[XXXXX357]": 42753, "[XXXXX15]": 42411, "[XXXXX437]": 42833, "[XXXXX223]": 42619, "[XXXXX539]": 42935, "[XXXXX127]": 42523, "[XXXXX132]": 42528, "[XXXXX526]": 42922, "[XXXXX190]": 42586, "[XXXXX135]": 42531, "[XXXXX422]": 42818, "[XXXXX423]": 42819, "[XXXXX518]": 42914, "[XXXXX562]": 42958, "[XXXXX602]": 42998, "[XXXXX7]": 42403, "[XXXXX74]": 42470, "[XXXXX283]": 42679, "[XXXXX294]": 42690, "[XXXXX5]": 42401, "[XXXXX550]": 42946, "[XXXXX297]": 42693, "[XXXXX130]": 42526, "[XXXXX13]": 42409, "[XXXXX313]": 42709, "[XXXXX365]": 42761, "[XXXXX24]": 42420, "[XXXXX170]": 42566, "[XXXXX179]": 42575, "[XXXXX388]": 42784, "[XXXXX198]": 42594, "[XXXXX443]": 42839, "[XXXXX545]": 42941, "[XXXXX210]": 42606, "[XXXXX97]": 42493, "[XXXXX185]": 42581, "[XXXXX269]": 42665, "[XXXXX60]": 42456, "[XXXXX493]": 42889, "[XXXXX164]": 42560, "[XXXXX42]": 42438, "[XXXXX156]": 42552, "[XXXXX544]": 42940, "[XXXXX55]": 42451, "[XXXXX290]": 42686, "[XXXXX172]": 42568, "[XXXXX391]": 42787, "[XXXXX455]": 42851, "[XXXXX289]": 42685, "[XXXXX25]": 42421, "[XXXXX151]": 42547, "[XXXXX576]": 42972, "[XXXXX476]": 42872, "[XXXXX484]": 42880, "[XXXXX95]": 42491, "[XXXXX40]": 42436, "[XXXXX91]": 42487, "[XXXXX564]": 42960, "[XXXXX303]": 42699, "[XXXXX197]": 42593, "[XXXXX18]": 42414, "[XXXXX463]": 42859, "[XXXXX486]": 42882, "[XXXXX85]": 42481, "[SEP]": 42393, "[XXXXX83]": 42479, "[XXXXX507]": 42903, "[XXXXX16]": 42412, "[XXXXX328]": 42724, "[XXXXX340]": 42736, "[XXXXX255]": 42651, "[XXXXX141]": 42537, "[XXXXX104]": 42500, "[XXXXX20]": 42416, "[XXXXX588]": 42984, "[XXXXX418]": 42814, "[XXXXX578]": 42974, "[XXXXX231]": 42627, "[XXXXX321]": 42717, "[XXXXX110]": 42506, "[XXXXX192]": 42588, "[XXXXX225]": 42621, "[XXXXX79]": 42475, "[XXXXX191]": 42587, "[XXXXX528]": 42924, "[XXXXX572]": 42968, "[XXXXX561]": 42957, "[XXXXX152]": 42548, "[XXXXX494]": 42890, "[XXXXX230]": 42626, "[XXXXX590]": 42986, "[XXXXX118]": 42514, "[XXXXX82]": 42478, "[XXXXX285]": 42681, "[XXXXX551]": 42947, "[XXXXX76]": 42472, "[XXXXX415]": 42811, "[XXXXX380]": 42776, "[XXXXX295]": 42691, "[XXXXX392]": 42788, "[XXXXX346]": 42742, "[XXXXX592]": 42988, "[XXXXX112]": 42508, "[XXXXX412]": 42808, "[XXXXX280]": 42676, "[XXXXX347]": 42743, "[XXXXX214]": 42610, "[XXXXX329]": 42725, "[XXXXX217]": 42613, "[XXXXX430]": 42826, "[XXXXX505]": 42901, "[XXXXX6]": 42402, "[XXXXX435]": 42831, "[XXXXX51]": 42447, "[XXXXX34]": 42430, "[XXXXX345]": 42741, "[XXXXX478]": 42874, "[XXXXX337]": 42733, "[XXXXX358]": 42754, "[XXXXX293]": 42689, "[XXXXX39]": 42435, "[XXXXX3]": 42399, "[XXXXX465]": 42861, "[XXXXX447]": 42843, "[XXXXX547]": 42943, "[XXXXX93]": 42489, "[XXXXX473]": 42869, "[XXXXX33]": 42429, "[XXXXX115]": 42511, "[XXXXX471]": 42867, "[XXXXX69]": 42465, "[XXXXX356]": 42752, "[XXXXX320]": 42716, "[XXXXX284]": 42680, "[XXXXX325]": 42721, "[XXXXX334]": 42730, "[XXXXX171]": 42567, "[XXXXX366]": 42762, "[XXXXX503]": 42899, "[XXXXX188]": 42584, "[XXXXX146]": 42542, "[XXXXX121]": 42517, "[XXXXX194]": 42590, "[XXXXX369]": 42765, "[XXXXX145]": 42541, "[XXXXX449]": 42845, "[XXXXX271]": 42667, "[XXXXX488]": 42884, "[XXXXX138]": 42534, "[XXXXX107]": 42503, "[XXXXX452]": 42848, "[XXXXX577]": 42973, "[XXXXX224]": 42620, "[XXXXX361]": 42757, "[XXXXX483]": 42879, "[XXXXX497]": 42893, "[XXXXX226]": 42622, "[XXXXX144]": 42540, "[XXXXX89]": 42485, "[XXXXX501]": 42897, "[XXXXX477]": 42873, "[XXXXX557]": 42953, "[XXXXX573]": 42969, "[XXXXX182]": 42578, "[XXXXX229]": 42625, "[XXXXX445]": 42841, "[XXXXX38]": 42434, "[XXXXX314]": 42710, "[XXXXX237]": 42633, "[XXXXX101]": 42497, "[XXXXX267]": 42663, "[XXXXX256]": 42652, "[XXXXX489]": 42885, "[XXXXX457]": 42853, "[XXXXX279]": 42675, "[XXXXX441]": 42837, "[XXXXX206]": 42602, "[XXXXX300]": 42696, "[XXXXX402]": 42798, "[XXXXX492]": 42888, "[XXXXX299]": 42695, "[XXXXX420]": 42816, "[XXXXX75]": 42471, "[XXXXX470]": 42866, "[XXXXX609]": 43005, "[XXXXX475]": 42871, "[XXXXX227]": 42623, "[XXXXX554]": 42950, "[XXXXX586]": 42982, "[XXXXX88]": 42484, "[XXXXX601]": 42997, "[XXXXX29]": 42425, "[XXXXX196]": 42592, "[XXXXX114]": 42510, "[XXXXX0]": 42396, "[XXXXX215]": 42611, "[XXXXX129]": 42525, "[XXXXX567]": 42963, "[XXXXX158]": 42554, "[XXXXX459]": 42855, "[XXXXX563]": 42959, "[XXXXX541]": 42937, "[XXXXX80]": 42476, "[XXXXX163]": 42559, "[XXXXX63]": 42459, "[XXXXX335]": 42731, "[XXXXX536]": 42932, "[XXXXX333]": 42729, "[XXXXX27]": 42423, "[XXXXX495]": 42891, "[XXXXX451]": 42847, "[XXXXX124]": 42520, "[XXXXX500]": 42896, "[XXXXX419]": 42815, "[XXXXX116]": 42512, "[XXXXX543]": 42939, "[XXXXX120]": 42516, "[XXXXX205]": 42601, "[XXXXX312]": 42708, "[XXXXX370]": 42766, "[XXXXX31]": 42427, "[XXXXX204]": 42600, "[XXXXX339]": 42735, "[XXXXX106]": 42502, "[XXXXX96]": 42492, "[XXXXX512]": 42908, "[XXXXX66]": 42462, "[XXXXX221]": 42617, "[XXXXX167]": 42563, "[XXXXX58]": 42454, "[XXXXX427]": 42823, "[XXXXX353]": 42749, "[XXXXX479]": 42875, "[XXXXX125]": 42521, "[XXXXX57]": 42453, "[XXXXX78]": 42474, "[XXXXX108]": 42504, "[XXXXX2]": 42398, "[XXXXX274]": 42670, "[XXXXX155]": 42551, "[XXXXX35]": 42431, "[XXXXX426]": 42822, "[XXXXX462]": 42858, "[XXXXX232]": 42628, "[XXXXX393]": 42789, "[XXXXX548]": 42944, "[XXXXX19]": 42415, "[XXXXX594]": 42990, "[XXXXX375]": 42771, "[XXXXX32]": 42428, "[XXXXX44]": 42440, "[XXXXX168]": 42564, "[XXXXX62]": 42458, "[XXXXX454]": 42850, "[XXXXX604]": 43000, "[XXXXX199]": 42595, "[XXXXX177]": 42573, "[XXXXX344]": 42740, "[XXXXX324]": 42720, "[XXXXX50]": 42446, "[XXXXX481]": 42877, "[XXXXX281]": 42677, "[XXXXX72]": 42468, "[XXXXX259]": 42655, "[XXXXX266]": 42662, "[XXXXX398]": 42794, "[XXXXX596]": 42992, "[XXXXX458]": 42854, "[XXXXX113]": 42509, "[XXXXX162]": 42558}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]", "[XXXXX280]", "[XXXXX281]", "[XXXXX282]", "[XXXXX283]", "[XXXXX284]", "[XXXXX285]", "[XXXXX286]", "[XXXXX287]", "[XXXXX288]", "[XXXXX289]", "[XXXXX290]", "[XXXXX291]", "[XXXXX292]", "[XXXXX293]", "[XXXXX294]", "[XXXXX295]", "[XXXXX296]", "[XXXXX297]", "[XXXXX298]", "[XXXXX299]", "[XXXXX300]", "[XXXXX301]", "[XXXXX302]", "[XXXXX303]", "[XXXXX304]", "[XXXXX305]", "[XXXXX306]", "[XXXXX307]", "[XXXXX308]", "[XXXXX309]", "[XXXXX310]", "[XXXXX311]", "[XXXXX312]", "[XXXXX313]", "[XXXXX314]", "[XXXXX315]", "[XXXXX316]", "[XXXXX317]", "[XXXXX318]", "[XXXXX319]", "[XXXXX320]", "[XXXXX321]", "[XXXXX322]", "[XXXXX323]", "[XXXXX324]", "[XXXXX325]", "[XXXXX326]", "[XXXXX327]", "[XXXXX328]", "[XXXXX329]", "[XXXXX330]", "[XXXXX331]", "[XXXXX332]", "[XXXXX333]", "[XXXXX334]", "[XXXXX335]", "[XXXXX336]", "[XXXXX337]", "[XXXXX338]", "[XXXXX339]", "[XXXXX340]", "[XXXXX341]", "[XXXXX342]", "[XXXXX343]", "[XXXXX344]", "[XXXXX345]", "[XXXXX346]", "[XXXXX347]", "[XXXXX348]", "[XXXXX349]", "[XXXXX350]", "[XXXXX351]", "[XXXXX352]", "[XXXXX353]", "[XXXXX354]", "[XXXXX355]", "[XXXXX356]", "[XXXXX357]", "[XXXXX358]", "[XXXXX359]", "[XXXXX360]", "[XXXXX361]", "[XXXXX362]", "[XXXXX363]", "[XXXXX364]", "[XXXXX365]", "[XXXXX366]", "[XXXXX367]", "[XXXXX368]", "[XXXXX369]", "[XXXXX370]", "[XXXXX371]", "[XXXXX372]", "[XXXXX373]", "[XXXXX374]", "[XXXXX375]", "[XXXXX376]", "[XXXXX377]", "[XXXXX378]", "[XXXXX379]", "[XXXXX380]", "[XXXXX381]", "[XXXXX382]", "[XXXXX383]", "[XXXXX384]", "[XXXXX385]", "[XXXXX386]", "[XXXXX387]", "[XXXXX388]", "[XXXXX389]", "[XXXXX390]", "[XXXXX391]", "[XXXXX392]", "[XXXXX393]", "[XXXXX394]", "[XXXXX395]", "[XXXXX396]", "[XXXXX397]", "[XXXXX398]", "[XXXXX399]", "[XXXXX400]", "[XXXXX401]", "[XXXXX402]", "[XXXXX403]", "[XXXXX404]", "[XXXXX405]", "[XXXXX406]", "[XXXXX407]", "[XXXXX408]", "[XXXXX409]", "[XXXXX410]", "[XXXXX411]", "[XXXXX412]", "[XXXXX413]", "[XXXXX414]", "[XXXXX415]", "[XXXXX416]", "[XXXXX417]", "[XXXXX418]", "[XXXXX419]", "[XXXXX420]", "[XXXXX421]", "[XXXXX422]", "[XXXXX423]", "[XXXXX424]", "[XXXXX425]", "[XXXXX426]", "[XXXXX427]", "[XXXXX428]", "[XXXXX429]", "[XXXXX430]", "[XXXXX431]", "[XXXXX432]", "[XXXXX433]", "[XXXXX434]", "[XXXXX435]", "[XXXXX436]", "[XXXXX437]", "[XXXXX438]", "[XXXXX439]", "[XXXXX440]", "[XXXXX441]", "[XXXXX442]", "[XXXXX443]", "[XXXXX444]", "[XXXXX445]", "[XXXXX446]", "[XXXXX447]", "[XXXXX448]", "[XXXXX449]", "[XXXXX450]", "[XXXXX451]", "[XXXXX452]", "[XXXXX453]", "[XXXXX454]", "[XXXXX455]", "[XXXXX456]", "[XXXXX457]", "[XXXXX458]", "[XXXXX459]", "[XXXXX460]", "[XXXXX461]", "[XXXXX462]", "[XXXXX463]", "[XXXXX464]", "[XXXXX465]", "[XXXXX466]", "[XXXXX467]", "[XXXXX468]", "[XXXXX469]", "[XXXXX470]", "[XXXXX471]", "[XXXXX472]", "[XXXXX473]", "[XXXXX474]", "[XXXXX475]", "[XXXXX476]", "[XXXXX477]", "[XXXXX478]", "[XXXXX479]", "[XXXXX480]", "[XXXXX481]", "[XXXXX482]", "[XXXXX483]", "[XXXXX484]", "[XXXXX485]", "[XXXXX486]", "[XXXXX487]", "[XXXXX488]", "[XXXXX489]", "[XXXXX490]", "[XXXXX491]", "[XXXXX492]", "[XXXXX493]", "[XXXXX494]", "[XXXXX495]", "[XXXXX496]", "[XXXXX497]", "[XXXXX498]", "[XXXXX499]", "[XXXXX500]", "[XXXXX501]", "[XXXXX502]", "[XXXXX503]", "[XXXXX504]", "[XXXXX505]", "[XXXXX506]", "[XXXXX507]", "[XXXXX508]", "[XXXXX509]", "[XXXXX510]", "[XXXXX511]", "[XXXXX512]", "[XXXXX513]", "[XXXXX514]", "[XXXXX515]", "[XXXXX516]", "[XXXXX517]", "[XXXXX518]", "[XXXXX519]", "[XXXXX520]", "[XXXXX521]", "[XXXXX522]", "[XXXXX523]", "[XXXXX524]", "[XXXXX525]", "[XXXXX526]", "[XXXXX527]", "[XXXXX528]", "[XXXXX529]", "[XXXXX530]", "[XXXXX531]", "[XXXXX532]", "[XXXXX533]", "[XXXXX534]", "[XXXXX535]", "[XXXXX536]", "[XXXXX537]", "[XXXXX538]", "[XXXXX539]", "[XXXXX540]", "[XXXXX541]", "[XXXXX542]", "[XXXXX543]", "[XXXXX544]", "[XXXXX545]", "[XXXXX546]", "[XXXXX547]", "[XXXXX548]", "[XXXXX549]", "[XXXXX550]", "[XXXXX551]", "[XXXXX552]", "[XXXXX553]", "[XXXXX554]", "[XXXXX555]", "[XXXXX556]", "[XXXXX557]", "[XXXXX558]", "[XXXXX559]", "[XXXXX560]", "[XXXXX561]", "[XXXXX562]", "[XXXXX563]", "[XXXXX564]", "[XXXXX565]", "[XXXXX566]", "[XXXXX567]", "[XXXXX568]", "[XXXXX569]", "[XXXXX570]", "[XXXXX571]", "[XXXXX572]", "[XXXXX573]", "[XXXXX574]", "[XXXXX575]", "[XXXXX576]", "[XXXXX577]", "[XXXXX578]", "[XXXXX579]", "[XXXXX580]", "[XXXXX581]", "[XXXXX582]", "[XXXXX583]", "[XXXXX584]", "[XXXXX585]", "[XXXXX586]", "[XXXXX587]", "[XXXXX588]", "[XXXXX589]", "[XXXXX590]", "[XXXXX591]", "[XXXXX592]", "[XXXXX593]", "[XXXXX594]", "[XXXXX595]", "[XXXXX596]", "[XXXXX597]", "[XXXXX598]", "[XXXXX599]", "[XXXXX600]", "[XXXXX601]", "[XXXXX602]", "[XXXXX603]", "[XXXXX604]", "[XXXXX605]", "[XXXXX606]", "[XXXXX607]", "[XXXXX608]", "[XXXXX609]", "[XXXXX610]", "[XXXXX611]"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da95b934e58677013177b64faa0eee88ee8873f2285de1f0687d7d366da3f656
3
+ size 916851
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/10mb/kha_latn_10mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/10mb/kha_latn_10mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}