parkerjj commited on
Commit
4e6d2ce
·
1 Parent(s): 8bf0955

从 Hugging Face Hub 下载 Word2Vec 模型,移除本地路径搜索逻辑

Browse files
Files changed (1) hide show
  1. preprocess.py +27 -33
preprocess.py CHANGED
@@ -71,7 +71,7 @@ class LazyWord2Vec:
71
  @property
72
  def model(self):
73
  if self._model is None:
74
- print("Loading Word2Vec model...")
75
  self._model = KeyedVectors.load(self.model_path, mmap='r')
76
  return self._model
77
 
@@ -88,43 +88,37 @@ class LazyWord2Vec:
88
  return key in self.model
89
 
90
  # 加载预训练的 Google News Word2Vec 模型
91
- # 定义路径列表
92
- search_paths = ["/BuckLake/Model/",
93
- "/Users/parker/Development/Server/BuckLake/Model/",
94
- "/Users/liuyue/Work/BuckLake/Model/"]
95
-
96
- # 获取当前文件所在目录的路径
97
- current_directory = os.getcwd()
98
- print(f"Current directory: {current_directory}")
99
- current_directory = os.path.dirname(os.path.abspath(__file__))
100
-
101
- # 添加相对于当前项目的路径
102
- # search_paths.insert(0, os.path.join(current_directory, 'model'))
103
- search_paths.insert(1, os.path.join(current_directory, '..', 'Model'))
104
 
 
 
 
105
 
106
- # 定义相对路径
107
- filename = 'word2vec-google-news-300.model'
 
108
 
109
- # 初始化word2vec_path为None
110
- word2vec_path = None
111
 
112
- # 遍历路径列表
113
- for path in search_paths:
114
- potential_path = os.path.join(path, filename)
115
- if os.path.exists(potential_path):
116
- word2vec_path = potential_path
117
- break
118
- else:
119
- print(f"{potential_path} not found.")
120
-
121
- # 如果找到路径,加载模型
122
- if word2vec_path:
123
- print(f"Loading Word2Vec model from {word2vec_path}...")
124
- word2vec_model = LazyWord2Vec(word2vec_path)
125
- else:
126
- raise FileNotFoundError(f"{filename} not found in any of the search paths: {search_paths}")
127
 
 
 
 
128
 
129
 
130
  def pos_tagging(text):
 
71
  @property
72
  def model(self):
73
  if self._model is None:
74
+ print(f"Loading Word2Vec model from path: {self.model_path}...")
75
  self._model = KeyedVectors.load(self.model_path, mmap='r')
76
  return self._model
77
 
 
88
  return key in self.model
89
 
90
  # 加载预训练的 Google News Word2Vec 模型
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ # 定义模型名称
93
+ from huggingface_hub import hf_hub_download
94
+ import os
95
 
96
+ # 定义 Hugging Face 的 repository 信息
97
+ repo_id = "fse/word2vec-google-news-300" # 替换为实际的仓库ID
98
+ filename = "word2vec-google-news-300.model" # 文件名
99
 
100
+ # 确保本地保存目录存在
101
+ #os.makedirs(local_model_path, exist_ok=True)
102
 
103
+ # 尝试从 Hugging Face 下载模型文件
104
+ try:
105
+ print(f"Downloading {filename} from Hugging Face Hub...")
106
+ downloaded_path = hf_hub_download(
107
+ repo_id=repo_id,
108
+ filename=filename
109
+ )
110
+
111
+ downloaded_path_npy = hf_hub_download(
112
+ repo_id=repo_id,
113
+ filename="word2vec-google-news-300.model.vectors.npy"
114
+ )
115
+ print(f"Model downloaded to {downloaded_path}")
116
+ except Exception as e:
117
+ raise RuntimeError(f"Failed to download {filename} from Hugging Face Hub: {e}")
118
 
119
+ # 加载模型
120
+ print(f"Loading Word2Vec model from {downloaded_path}...")
121
+ word2vec_model = LazyWord2Vec(downloaded_path)
122
 
123
 
124
  def pos_tagging(text):