Upload folder using huggingface_hub
Browse files- Test_RAG.py +39 -38
Test_RAG.py
CHANGED
@@ -282,50 +282,51 @@ print(f"Loading model from {model_dir}")
|
|
282 |
|
283 |
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
|
284 |
|
285 |
-
|
286 |
-
# llm = HuggingFacePipeline.from_model_id(
|
287 |
-
# model_id= "meta-llama/Meta-Llama-3-8B",
|
288 |
-
# #meta-llama/Meta-Llama-3-8B------------/meta-llama/Llama-3.2-3B-Instruct
|
289 |
-
# task="text-generation",
|
290 |
-
# backend="openvino",
|
291 |
-
# model_kwargs={
|
292 |
-
# "device": llm_device,
|
293 |
-
# "ov_config": ov_config,
|
294 |
-
# "trust_remote_code": True,
|
295 |
-
# },
|
296 |
-
# pipeline_kwargs={"max_new_tokens": 2},
|
297 |
-
# )
|
298 |
-
from optimum.intel.openvino import OVModelForCausalLM
|
299 |
-
from transformers import pipeline
|
300 |
print("starting setting llm model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
|
302 |
-
|
303 |
-
|
|
|
304 |
|
305 |
-
# 使用 OpenVINO 导出模型
|
306 |
-
model = OVModelForCausalLM.from_pretrained(
|
307 |
-
model_id,
|
308 |
-
export=True, # 将模型转换为 OpenVINO 格式
|
309 |
-
use_cache=False,
|
310 |
-
ov_config=ov_config,
|
311 |
-
trust_remote_code=True # 支持远程代码的信任问题
|
312 |
-
)
|
313 |
|
314 |
-
#
|
315 |
-
|
316 |
|
317 |
-
#
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
|
326 |
-
# Step 3: 执行推理
|
327 |
-
output = llm("2 + 2 =")
|
328 |
-
print(output)
|
329 |
|
330 |
# print("test:2+2:")
|
331 |
# print(llm.invoke("2 + 2 ="))
|
|
|
282 |
|
283 |
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
|
284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
print("starting setting llm model")
|
286 |
+
llm = HuggingFacePipeline.from_model_id(
|
287 |
+
model_id="meta-llama/Meta-Llama-3-8B",
|
288 |
+
task="text-generation",
|
289 |
+
backend="openvino",
|
290 |
+
model_kwargs={
|
291 |
+
"device": llm_device.value,
|
292 |
+
"ov_config": ov_config,
|
293 |
+
"trust_remote_code": True,
|
294 |
+
},
|
295 |
+
pipeline_kwargs={"max_new_tokens": 2},
|
296 |
+
)
|
297 |
|
298 |
+
print(llm.invoke("2 + 2 ="))
|
299 |
+
# from optimum.intel.openvino import OVModelForCausalLM
|
300 |
+
# from transformers import pipeline
|
301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
|
303 |
+
# model_id = "meta-llama/Meta-Llama-3-8B"
|
304 |
+
# ov_config = {"PERFORMANCE_HINT": "LATENCY"} # 这是一个例子,检查你的实际 ov_config
|
305 |
|
306 |
+
# # 使用 OpenVINO 导出模型
|
307 |
+
# model = OVModelForCausalLM.from_pretrained(
|
308 |
+
# model_id,
|
309 |
+
# export=True, # 将模型转换为 OpenVINO 格式
|
310 |
+
# use_cache=False,
|
311 |
+
# ov_config=ov_config,
|
312 |
+
# trust_remote_code=True # 支持远程代码的信任问题
|
313 |
+
# )
|
314 |
+
|
315 |
+
# # 保存 OpenVINO 模型
|
316 |
+
# model.save_pretrained("./openvino_llama_model")
|
317 |
+
|
318 |
+
# # Step 2: 加载保存的 OpenVINO 模型并设置推理任务
|
319 |
+
# llm_device = "CPU" # 确保你根据环境设置正确的设备
|
320 |
+
# llm = pipeline(
|
321 |
+
# task="text-generation",
|
322 |
+
# model=OVModelForCausalLM.from_pretrained("./openvino_llama_model"),
|
323 |
+
# device=llm_device,
|
324 |
+
# max_new_tokens=2 # 生成的最大新token数量
|
325 |
+
# )
|
326 |
|
327 |
+
# # Step 3: 执行推理
|
328 |
+
# output = llm("2 + 2 =")
|
329 |
+
# print(output)
|
330 |
|
331 |
# print("test:2+2:")
|
332 |
# print(llm.invoke("2 + 2 ="))
|