Upload tokenizer
Browse files- added_tokens.json +5 -4
- tokenizer.json +13 -4
added_tokens.json
CHANGED
@@ -1,5 +1,10 @@
|
|
1 |
{
|
2 |
"<pad>": 32000,
|
|
|
|
|
|
|
|
|
|
|
3 |
"βκ°": 32015,
|
4 |
"βκ°κ²": 37149,
|
5 |
"βκ°κ²©": 32283,
|
@@ -10695,7 +10700,6 @@
|
|
10695 |
"λ§λ": 44294,
|
10696 |
"λ§μ": 44679,
|
10697 |
"λ§μ": 43815,
|
10698 |
-
"λ§": 46334,
|
10699 |
"맑": 45333,
|
10700 |
"맣": 46283,
|
10701 |
"맀": 44869,
|
@@ -11127,7 +11131,6 @@
|
|
11127 |
"λ΄€λ€": 36235,
|
11128 |
"λ΄€μ΄μ": 41629,
|
11129 |
"λ΄€μ": 39079,
|
11130 |
-
"봬": 46332,
|
11131 |
"λ΅": 45923,
|
11132 |
"λ΅": 46134,
|
11133 |
"λΆκ°": 33504,
|
@@ -12268,7 +12271,6 @@
|
|
12268 |
"μ": 46111,
|
12269 |
"μ": 46081,
|
12270 |
"μ€": 46003,
|
12271 |
-
"μ₯": 46331,
|
12272 |
"μ§": 46126,
|
12273 |
"μ©λ": 41845,
|
12274 |
"μ©μΌλ‘": 35931,
|
@@ -12374,7 +12376,6 @@
|
|
12374 |
"μν": 40543,
|
12375 |
"μ": 45544,
|
12376 |
"μ": 45815,
|
12377 |
-
"μ": 46333,
|
12378 |
"μ": 45648,
|
12379 |
"μ": 45884,
|
12380 |
"μ κ°": 35835,
|
|
|
1 |
{
|
2 |
"<pad>": 32000,
|
3 |
+
"<|acc|>": 46333,
|
4 |
+
"<|endoftext|>": 46332,
|
5 |
+
"<|rrn|>": 46334,
|
6 |
+
"<|sep|>": 46331,
|
7 |
+
"<|tel|>": 46335,
|
8 |
"βκ°": 32015,
|
9 |
"βκ°κ²": 37149,
|
10 |
"βκ°κ²©": 32283,
|
|
|
10700 |
"λ§λ": 44294,
|
10701 |
"λ§μ": 44679,
|
10702 |
"λ§μ": 43815,
|
|
|
10703 |
"맑": 45333,
|
10704 |
"맣": 46283,
|
10705 |
"맀": 44869,
|
|
|
11131 |
"λ΄€λ€": 36235,
|
11132 |
"λ΄€μ΄μ": 41629,
|
11133 |
"λ΄€μ": 39079,
|
|
|
11134 |
"λ΅": 45923,
|
11135 |
"λ΅": 46134,
|
11136 |
"λΆκ°": 33504,
|
|
|
12271 |
"μ": 46111,
|
12272 |
"μ": 46081,
|
12273 |
"μ€": 46003,
|
|
|
12274 |
"μ§": 46126,
|
12275 |
"μ©λ": 41845,
|
12276 |
"μ©μΌλ‘": 35931,
|
|
|
12376 |
"μν": 40543,
|
12377 |
"μ": 45544,
|
12378 |
"μ": 45815,
|
|
|
12379 |
"μ": 45648,
|
12380 |
"μ": 45884,
|
12381 |
"μ κ°": 35835,
|
tokenizer.json
CHANGED
@@ -130010,7 +130010,7 @@
|
|
130010 |
},
|
130011 |
{
|
130012 |
"id": 46331,
|
130013 |
-
"content": "
|
130014 |
"single_word": false,
|
130015 |
"lstrip": false,
|
130016 |
"rstrip": false,
|
@@ -130019,7 +130019,7 @@
|
|
130019 |
},
|
130020 |
{
|
130021 |
"id": 46332,
|
130022 |
-
"content": "
|
130023 |
"single_word": false,
|
130024 |
"lstrip": false,
|
130025 |
"rstrip": false,
|
@@ -130028,7 +130028,7 @@
|
|
130028 |
},
|
130029 |
{
|
130030 |
"id": 46333,
|
130031 |
-
"content": "
|
130032 |
"single_word": false,
|
130033 |
"lstrip": false,
|
130034 |
"rstrip": false,
|
@@ -130037,7 +130037,16 @@
|
|
130037 |
},
|
130038 |
{
|
130039 |
"id": 46334,
|
130040 |
-
"content": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130041 |
"single_word": false,
|
130042 |
"lstrip": false,
|
130043 |
"rstrip": false,
|
|
|
130010 |
},
|
130011 |
{
|
130012 |
"id": 46331,
|
130013 |
+
"content": "<|sep|>",
|
130014 |
"single_word": false,
|
130015 |
"lstrip": false,
|
130016 |
"rstrip": false,
|
|
|
130019 |
},
|
130020 |
{
|
130021 |
"id": 46332,
|
130022 |
+
"content": "<|endoftext|>",
|
130023 |
"single_word": false,
|
130024 |
"lstrip": false,
|
130025 |
"rstrip": false,
|
|
|
130028 |
},
|
130029 |
{
|
130030 |
"id": 46333,
|
130031 |
+
"content": "<|acc|>",
|
130032 |
"single_word": false,
|
130033 |
"lstrip": false,
|
130034 |
"rstrip": false,
|
|
|
130037 |
},
|
130038 |
{
|
130039 |
"id": 46334,
|
130040 |
+
"content": "<|rrn|>",
|
130041 |
+
"single_word": false,
|
130042 |
+
"lstrip": false,
|
130043 |
+
"rstrip": false,
|
130044 |
+
"normalized": true,
|
130045 |
+
"special": false
|
130046 |
+
},
|
130047 |
+
{
|
130048 |
+
"id": 46335,
|
130049 |
+
"content": "<|tel|>",
|
130050 |
"single_word": false,
|
130051 |
"lstrip": false,
|
130052 |
"rstrip": false,
|