Taizo Kaneko
commited on
Commit
•
3ba50ba
1
Parent(s):
912b503
commit files to HF hub
Browse files- config.json +2 -1
- fasttext_jp_embedding.py +1 -1
- fasttext_jp_tokenizer.py +2 -0
config.json
CHANGED
@@ -7,7 +7,8 @@
|
|
7 |
"AutoModel": "fasttext_jp_embedding.FastTextJpModel"
|
8 |
},
|
9 |
"hidden_size": 300,
|
10 |
-
"model_type": "
|
|
|
11 |
"torch_dtype": "float32",
|
12 |
"transformers_version": "4.23.1",
|
13 |
"vocab_size": 2000000
|
|
|
7 |
"AutoModel": "fasttext_jp_embedding.FastTextJpModel"
|
8 |
},
|
9 |
"hidden_size": 300,
|
10 |
+
"model_type": "fasttext_jp",
|
11 |
+
"tokenizer_class": "FastTextJpTokenizer",
|
12 |
"torch_dtype": "float32",
|
13 |
"transformers_version": "4.23.1",
|
14 |
"vocab_size": 2000000
|
fasttext_jp_embedding.py
CHANGED
@@ -6,7 +6,7 @@ import torch
|
|
6 |
|
7 |
|
8 |
class FastTextJpConfig(PretrainedConfig):
|
9 |
-
model_type = "
|
10 |
|
11 |
def __init__(self, **kwargs):
|
12 |
super().__init__(**kwargs)
|
|
|
6 |
|
7 |
|
8 |
class FastTextJpConfig(PretrainedConfig):
|
9 |
+
model_type = "fasttext_jp"
|
10 |
|
11 |
def __init__(self, **kwargs):
|
12 |
super().__init__(**kwargs)
|
fasttext_jp_tokenizer.py
CHANGED
@@ -28,6 +28,8 @@ def load_stoi(vocab_file: str) -> dict[str, int]:
|
|
28 |
|
29 |
|
30 |
class FastTextJpTokenizer(MeCabTokenizer):
|
|
|
|
|
31 |
vocab_files_names = VOCAB_FILES_NAMES
|
32 |
|
33 |
def __init__(self,
|
|
|
28 |
|
29 |
|
30 |
class FastTextJpTokenizer(MeCabTokenizer):
|
31 |
+
model_type = "fasttext_jp"
|
32 |
+
|
33 |
vocab_files_names = VOCAB_FILES_NAMES
|
34 |
|
35 |
def __init__(self,
|