selfies-bart / tokenizer.json
maykcaldas's picture
Upload tokenizer
24ddeaf
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[mask]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[bos]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[eos]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[unk]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[nop]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "WhitespaceSplit"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordPiece",
"unk_token": "[unk]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[mask]": 0,
"[bos]": 1,
"[eos]": 2,
"[unk]": 3,
"[nop]": 4,
".": 5,
"[N@]": 6,
"[Se]": 7,
"[Li+1]": 8,
"[=Ge]": 9,
"[\\N]": 10,
"[Mg+2]": 11,
"[ClH0]": 12,
"[Ba+2]": 13,
"[N]": 14,
"[=Mo]": 15,
"[SH1-2]": 16,
"[=O]": 17,
"[Zr+4]": 18,
"[\\N+1]": 19,
"[/Br]": 20,
"[Mo+2]": 21,
"[/-Ring1]": 22,
"[=SH0]": 23,
"[Sn]": 24,
"[=Si]": 25,
"[N-1]": 26,
"[Fe+2]": 27,
"[FH0]": 28,
"[B-1]": 29,
"[2H]": 30,
"[/N+1]": 31,
"[Cl-1]": 32,
"[=V+2]": 33,
"[\\NH1+1]": 34,
"[=N]": 35,
"[CH0]": 36,
"[P@@]": 37,
"[=Fe]": 38,
"[\\S]": 39,
"[Cl]": 40,
"[Al+3]": 41,
"[\\-Ring2]": 42,
"[As+3]": 43,
"[Ring1]": 44,
"[Sm+3]": 45,
"[=Ni]": 46,
"[Co+2]": 47,
"[NH1+2]": 48,
"[\\C@H1]": 49,
"[Mo]": 50,
"[S-2]": 51,
"[=PH0]": 52,
"[Zn]": 53,
"[Cr]": 54,
"[Br-1]": 55,
"[=P]": 56,
"[S]": 57,
"[#Branch2]": 58,
"[-/Ring1]": 59,
"[\\C@@H1]": 60,
"[NH2+1]": 61,
"[Sr]": 62,
"[=Cr]": 63,
"[=N-1]": 64,
"[=SnH2]": 65,
"[\\NH1]": 66,
"[PH0]": 67,
"[NH0]": 68,
"[Gd]": 69,
"[Na]": 70,
"[/C@@H1]": 71,
"[Au+1]": 72,
"[Ce]": 73,
"[Dy]": 74,
"[=Ti]": 75,
"[CH2]": 76,
"[Hg]": 77,
"[=Ring2]": 78,
"[=Zr]": 79,
"[Zr+2]": 80,
"[I]": 81,
"[Si]": 82,
"[=Branch1]": 83,
"[K]": 84,
"[BH0]": 85,
"[Pr+3]": 86,
"[Nd+3]": 87,
"[Ag+1]": 88,
"[In+3]": 89,
"[Cl+3]": 90,
"[Cu]": 91,
"[O-2]": 92,
"[W]": 93,
"[N+1]": 94,
"[=Ca]": 95,
"[=S]": 96,
"[=P+1]": 97,
"[/N]": 98,
"[#O+1]": 99,
"[H+1]": 100,
"[Nb+5]": 101,
"[NH3+1]": 102,
"[Branch3]": 103,
"[Gd+3]": 104,
"[Zn+2]": 105,
"[/C@]": 106,
"[Cd+2]": 107,
"[Ce+3]": 108,
"[Pb+2]": 109,
"[Na+1]": 110,
"[#Branch1]": 111,
"[Lu+3]": 112,
"[MgH2]": 113,
"[\\Br]": 114,
"[-/Ring2]": 115,
"[NH1+1]": 116,
"[S-1]": 117,
"[I-1]": 118,
"[O]": 119,
"[=V]": 120,
"[Y]": 121,
"[Rh]": 122,
"[#S]": 123,
"[Cs+1]": 124,
"[Ni]": 125,
"[H]": 126,
"[#N]": 127,
"[Cu+1]": 128,
"[=PH1]": 129,
"[SH1-1]": 130,
"[=Mn]": 131,
"[Mn+3]": 132,
"[#C]": 133,
"[Branch2]": 134,
"[Sb+3]": 135,
"[=B]": 136,
"[\\As]": 137,
"[=Sn]": 138,
"[H-1]": 139,
"[=Ce]": 140,
"[B+3]": 141,
"[=O+1]": 142,
"[=Branch2]": 143,
"[K+1]": 144,
"[=Pb]": 145,
"[P@]": 146,
"[Al]": 147,
"[=Ring1]": 148,
"[Co]": 149,
"[Be]": 150,
"[=Se]": 151,
"[/I]": 152,
"[C@@H1]": 153,
"[/S]": 154,
"[Ring2]": 155,
"[Ni+2]": 156,
"[/As]": 157,
"[C]": 158,
"[Ta]": 159,
"[F-1]": 160,
"[Sb]": 161,
"[Au]": 162,
"[/NH1+1]": 163,
"[Zr]": 164,
"[CH1-1]": 165,
"[Rh+3]": 166,
"[Si@]": 167,
"[\\O]": 168,
"[-\\Ring1]": 169,
"[SnH2]": 170,
"[Bi+3]": 171,
"[Fe]": 172,
"[Hf]": 173,
"[=NH1+1]": 174,
"[Pb]": 175,
"[\\Cl]": 176,
"[Ca+2]": 177,
"[Ir]": 178,
"[Pt+2]": 179,
"[Hg+2]": 180,
"[#B]": 181,
"[Ge]": 182,
"[=Te]": 183,
"[Nd]": 184,
"[OH1-1]": 185,
"[NH1]": 186,
"[C+1]": 187,
"[Co+3]": 188,
"[=Cd]": 189,
"[Y+3]": 190,
"[Ti+2]": 191,
"[Au+3]": 192,
"[OH0]": 193,
"[Pd+2]": 194,
"[O+1]": 195,
"[=As]": 196,
"[/C]": 197,
"[Hg+1]": 198,
"[=N+1]": 199,
"[Sn+2]": 200,
"[O-1]": 201,
"[F]": 202,
"[NH4+1]": 203,
"[Re]": 204,
"[/O]": 205,
"[Sr+2]": 206,
"[/-Ring2]": 207,
"[Dy+3]": 208,
"[S+1]": 209,
"[Sn+4]": 210,
"[\\I]": 211,
"[Ti+4]": 212,
"[Mn+2]": 213,
"[Mg]": 214,
"[B]": 215,
"[La]": 216,
"[=W]": 217,
"[Cs]": 218,
"[#C-1]": 219,
"[=Sr]": 220,
"[=S+1]": 221,
"[=Be]": 222,
"[C@H1]": 223,
"[\\C]": 224,
"[Ca]": 225,
"[Si+4]": 226,
"[PH1]": 227,
"[Nb]": 228,
"[Cr+3]": 229,
"[Branch1]": 230,
"[Hf+4]": 231,
"[=Nb]": 232,
"[P]": 233,
"[CH1]": 234,
"[Ag]": 235,
"[=C-1]": 236,
"[\\-Ring1]": 237,
"[C-1]": 238,
"[C@@]": 239,
"[/Cl]": 240,
"[Cu+2]": 241,
"[Cd]": 242,
"[CaH2]": 243,
"[=Co]": 244,
"[/C@@]": 245,
"[Li]": 246,
"[Ti]": 247,
"[=C]": 248,
"[P+1]": 249,
"[=CH0]": 250,
"[=Y]": 251,
"[V+2]": 252,
"[SiH1]": 253,
"[V]": 254,
"[La+3]": 255,
"[Ru+3]": 256,
"[=Cu]": 257,
"[C@]": 258,
"[Fe+3]": 259,
"[Te]": 260,
"[=Zn]": 261,
"[Bi]": 262,
"[Br]": 263,
"[/C@H1]": 264,
"[Mn]": 265,
"[SH0]": 266,
"[Pt+4]": 267,
"[As]": 268,
"[/NH1]": 269,
"[Nop]": 270,
"[Ring3]": 271,
"[#Branch3]": 272,
"[=Branch3]": 273,
"[\\C@]": 274,
"[#N+1]": 275,
"[=Ring3]": 276,
"[19F]": 277,
"[As-1]": 278,
"[#14C]": 279,
"[\\O-1]": 280,
"[KH1]": 281,
"[AsH3]": 282,
"[127Xe]": 283,
"[S@+1]": 284,
"[I+3]": 285,
"[82Rb]": 286,
"[\\S+1]": 287,
"[10B]": 288,
"[Cl+1]": 289,
"[=11C]": 290,
"[SiH1-1]": 291,
"[125I-1]": 292,
"[=Al]": 293,
"[=Se+1]": 294,
"[82Rb+1]": 295,
"[129Xe]": 296,
"[18F]": 297,
"[123Te]": 298,
"[OH1+1]": 299,
"[127I]": 300,
"[15NH1]": 301,
"[Zn-2]": 302,
"[\\Si]": 303,
"[NH1-1]": 304,
"[\\131I]": 305,
"[#Al]": 306,
"[81Kr]": 307,
"[Br+2]": 308,
"[/131I]": 309,
"[Mg+1]": 310,
"[P@@+1]": 311,
"[PH2]": 312,
"[\\C-1]": 313,
"[123IH1]": 314,
"[\\B]": 315,
"[=13CH1]": 316,
"[Cl+2]": 317,
"[SiH4]": 318,
"[Ba]": 319,
"[=PH2]": 320,
"[Ag-4]": 321,
"[73Se]": 322,
"[OH1]": 323,
"[SrH2]": 324,
"[223Ra]": 325,
"[15OH2]": 326,
"[13CH1]": 327,
"[/123I]": 328,
"[\\C@@]": 329,
"[18FH1]": 330,
"[B@-1]": 331,
"[=SH1]": 332,
"[14CH2]": 333,
"[Se-2]": 334,
"[=P@@]": 335,
"[SH2]": 336,
"[133Xe]": 337,
"[#Ring2]": 338,
"[AsH1]": 339,
"[47Ca+2]": 340,
"[=P@]": 341,
"[14C@H1]": 342,
"[15N]": 343,
"[Te+1]": 344,
"[Al-3]": 345,
"[14CH1]": 346,
"[B@@-1]": 347,
"[Te-1]": 348,
"[Si-1]": 349,
"[\\S-1]": 350,
"[Se-1]": 351,
"[18OH1]": 352,
"[=NH2+1]": 353,
"[11CH1]": 354,
"[=B-1]": 355,
"[11CH3]": 356,
"[S@@+1]": 357,
"[\\3H]": 358,
"[17F]": 359,
"[3H]": 360,
"[S@]": 361,
"[He]": 362,
"[/N-1]": 363,
"[42K+1]": 364,
"[11C]": 365,
"[\\NH1-1]": 366,
"[13CH3]": 367,
"[BH2-1]": 368,
"[/S-1]": 369,
"[11C@H1]": 370,
"[\\123I]": 371,
"[Be+2]": 372,
"[/13CH1]": 373,
"[135I]": 374,
"[14C@@H1]": 375,
"[/Te]": 376,
"[BH1-1]": 377,
"[Kr]": 378,
"[13NH3]": 379,
"[/13C]": 380,
"[13C]": 381,
"[=Mg]": 382,
"[/14CH1]": 383,
"[N@@+1]": 384,
"[SeH1]": 385,
"[-\\Ring2]": 386,
"[SiH3-1]": 387,
"[N@@]": 388,
"[123I-1]": 389,
"[I+1]": 390,
"[32PH1]": 391,
"[SeH2]": 392,
"[45Ca+2]": 393,
"[\\P]": 394,
"[22Na+1]": 395,
"[11CH2]": 396,
"[76BrH1]": 397,
"[/O-1]": 398,
"[\\P+1]": 399,
"[LiH1]": 400,
"[/P@]": 401,
"[=13C]": 402,
"[/B]": 403,
"[35S]": 404,
"[Xe]": 405,
"[=Te+1]": 406,
"[#Ring1]": 407,
"[Rb]": 408,
"[=S@@]": 409,
"[HH1]": 410,
"[124I]": 411,
"[/Si]": 412,
"[S@@]": 413,
"[Se+1]": 414,
"[/P]": 415,
"[85SrH2]": 416,
"[I+2]": 417,
"[32P]": 418,
"[/125I]": 419,
"[85Sr+2]": 420,
"[4H]": 421,
"[\\SeH1]": 422,
"[14CH3]": 423,
"[SH1]": 424,
"[124I-1]": 425,
"[=18O]": 426,
"[Zn+1]": 427,
"[N@+1]": 428,
"[125I]": 429,
"[/S+1]": 430,
"[SH1+1]": 431,
"[131I-1]": 432,
"[P@+1]": 433,
"[\\CH1-1]": 434,
"[/11CH3]": 435,
"[131Cs]": 436,
"[131I]": 437,
"[18F-1]": 438,
"[\\Se]": 439,
"[=CH1]": 440,
"[/F]": 441,
"[PH2+1]": 442,
"[TeH1]": 443,
"[Ra]": 444,
"[123I]": 445,
"[13CH2]": 446,
"[Rb+1]": 447,
"[/C-1]": 448,
"[=14C]": 449,
"[BH3-1]": 450,
"[125IH1]": 451,
"[/Se]": 452,
"[75Se]": 453,
"[/14C]": 454,
"[=S@]": 455,
"[\\PH1]": 456,
"[SiH2]": 457,
"[Ra+2]": 458,
"[NaH1]": 459,
"[14C]": 460,
"[76Br]": 461,
"[=14CH1]": 462,
"[223Ra+2]": 463,
"[/CH1-1]": 464,
"[As+1]": 465,
"[\\125I]": 466,
"[TeH2]": 467,
"[\\F]": 468,
"[14C@]": 469,
"[\\Te]": 470,
"[89Sr+2]": 471,
"[#11C-1]": 472
}
}
}