beomi commited on
Commit
c9da3f9
β€’
1 Parent(s): ebf5d67

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +5 -4
  2. tokenizer.json +13 -4
added_tokens.json CHANGED
@@ -1,5 +1,10 @@
1
  {
2
  "<pad>": 32000,
 
 
 
 
 
3
  "▁가": 32015,
4
  "β–κ°€κ²Œ": 37149,
5
  "▁가격": 32283,
@@ -10695,7 +10700,6 @@
10695
  "λ§žλŠ”": 44294,
10696
  "λ§žμ•„": 44679,
10697
  "λ§žμ€": 43815,
10698
- "맟": 46334,
10699
  "맑": 45333,
10700
  "맣": 46283,
10701
  "맀": 44869,
@@ -11127,7 +11131,6 @@
11127
  "λ΄€λ‹€": 36235,
11128
  "λ΄€μ–΄μš”": 41629,
11129
  "봀자": 39079,
11130
- "봬": 46332,
11131
  "롈": 45923,
11132
  "λ΅™": 46134,
11133
  "λΆ€κ°€": 33504,
@@ -12268,7 +12271,6 @@
12268
  "욘": 46111,
12269
  "욜": 46081,
12270
  "욀": 46003,
12271
- "μš₯": 46331,
12272
  "욧": 46126,
12273
  "μš©λŸ‰": 41845,
12274
  "용으둜": 35931,
@@ -12374,7 +12376,6 @@
12374
  "μœ„ν—˜": 40543,
12375
  "윈": 45544,
12376
  "윌": 45815,
12377
- "μœ”": 46333,
12378
  "μœ—": 45648,
12379
  "μœ™": 45884,
12380
  "μœ κ°€": 35835,
 
1
  {
2
  "<pad>": 32000,
3
+ "<|acc|>": 46333,
4
+ "<|endoftext|>": 46332,
5
+ "<|rrn|>": 46334,
6
+ "<|sep|>": 46331,
7
+ "<|tel|>": 46335,
8
  "▁가": 32015,
9
  "β–κ°€κ²Œ": 37149,
10
  "▁가격": 32283,
 
10700
  "λ§žλŠ”": 44294,
10701
  "λ§žμ•„": 44679,
10702
  "λ§žμ€": 43815,
 
10703
  "맑": 45333,
10704
  "맣": 46283,
10705
  "맀": 44869,
 
11131
  "λ΄€λ‹€": 36235,
11132
  "λ΄€μ–΄μš”": 41629,
11133
  "봀자": 39079,
 
11134
  "롈": 45923,
11135
  "λ΅™": 46134,
11136
  "λΆ€κ°€": 33504,
 
12271
  "욘": 46111,
12272
  "욜": 46081,
12273
  "욀": 46003,
 
12274
  "욧": 46126,
12275
  "μš©λŸ‰": 41845,
12276
  "용으둜": 35931,
 
12376
  "μœ„ν—˜": 40543,
12377
  "윈": 45544,
12378
  "윌": 45815,
 
12379
  "μœ—": 45648,
12380
  "μœ™": 45884,
12381
  "μœ κ°€": 35835,
tokenizer.json CHANGED
@@ -130010,7 +130010,7 @@
130010
  },
130011
  {
130012
  "id": 46331,
130013
- "content": "μš₯",
130014
  "single_word": false,
130015
  "lstrip": false,
130016
  "rstrip": false,
@@ -130019,7 +130019,7 @@
130019
  },
130020
  {
130021
  "id": 46332,
130022
- "content": "봬",
130023
  "single_word": false,
130024
  "lstrip": false,
130025
  "rstrip": false,
@@ -130028,7 +130028,7 @@
130028
  },
130029
  {
130030
  "id": 46333,
130031
- "content": "μœ”",
130032
  "single_word": false,
130033
  "lstrip": false,
130034
  "rstrip": false,
@@ -130037,7 +130037,16 @@
130037
  },
130038
  {
130039
  "id": 46334,
130040
- "content": "맟",
 
 
 
 
 
 
 
 
 
130041
  "single_word": false,
130042
  "lstrip": false,
130043
  "rstrip": false,
 
130010
  },
130011
  {
130012
  "id": 46331,
130013
+ "content": "<|sep|>",
130014
  "single_word": false,
130015
  "lstrip": false,
130016
  "rstrip": false,
 
130019
  },
130020
  {
130021
  "id": 46332,
130022
+ "content": "<|endoftext|>",
130023
  "single_word": false,
130024
  "lstrip": false,
130025
  "rstrip": false,
 
130028
  },
130029
  {
130030
  "id": 46333,
130031
+ "content": "<|acc|>",
130032
  "single_word": false,
130033
  "lstrip": false,
130034
  "rstrip": false,
 
130037
  },
130038
  {
130039
  "id": 46334,
130040
+ "content": "<|rrn|>",
130041
+ "single_word": false,
130042
+ "lstrip": false,
130043
+ "rstrip": false,
130044
+ "normalized": true,
130045
+ "special": false
130046
+ },
130047
+ {
130048
+ "id": 46335,
130049
+ "content": "<|tel|>",
130050
  "single_word": false,
130051
  "lstrip": false,
130052
  "rstrip": false,