JiakaiDu commited on
Commit
5df3113
1 Parent(s): b18fb8c

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. Test_RAG.py +39 -38
Test_RAG.py CHANGED
@@ -282,50 +282,51 @@ print(f"Loading model from {model_dir}")
282
 
283
  ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
284
 
285
-
286
- # llm = HuggingFacePipeline.from_model_id(
287
- # model_id= "meta-llama/Meta-Llama-3-8B",
288
- # #meta-llama/Meta-Llama-3-8B------------/meta-llama/Llama-3.2-3B-Instruct
289
- # task="text-generation",
290
- # backend="openvino",
291
- # model_kwargs={
292
- # "device": llm_device,
293
- # "ov_config": ov_config,
294
- # "trust_remote_code": True,
295
- # },
296
- # pipeline_kwargs={"max_new_tokens": 2},
297
- # )
298
- from optimum.intel.openvino import OVModelForCausalLM
299
- from transformers import pipeline
300
  print("starting setting llm model")
 
 
 
 
 
 
 
 
 
 
 
301
 
302
- model_id = "meta-llama/Meta-Llama-3-8B"
303
- ov_config = {"PERFORMANCE_HINT": "LATENCY"} # 这是一个例子,检查你的实际 ov_config
 
304
 
305
- # 使用 OpenVINO 导出模型
306
- model = OVModelForCausalLM.from_pretrained(
307
- model_id,
308
- export=True, # 将模型转换为 OpenVINO 格式
309
- use_cache=False,
310
- ov_config=ov_config,
311
- trust_remote_code=True # 支持远程代码的信任问题
312
- )
313
 
314
- # 保存 OpenVINO 模型
315
- model.save_pretrained("./openvino_llama_model")
316
 
317
- # Step 2: 加载保存的 OpenVINO 模型并设置推理任务
318
- llm_device = "CPU" # 确保你根据环境设置正确的设备
319
- llm = pipeline(
320
- task="text-generation",
321
- model=OVModelForCausalLM.from_pretrained("./openvino_llama_model"),
322
- device=llm_device,
323
- max_new_tokens=2 # 生成的最大新token数量
324
- )
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
- # Step 3: 执行推理
327
- output = llm("2 + 2 =")
328
- print(output)
329
 
330
  # print("test:2+2:")
331
  # print(llm.invoke("2 + 2 ="))
 
282
 
283
  ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  print("starting setting llm model")
286
+ llm = HuggingFacePipeline.from_model_id(
287
+ model_id="meta-llama/Meta-Llama-3-8B",
288
+ task="text-generation",
289
+ backend="openvino",
290
+ model_kwargs={
291
+ "device": llm_device.value,
292
+ "ov_config": ov_config,
293
+ "trust_remote_code": True,
294
+ },
295
+ pipeline_kwargs={"max_new_tokens": 2},
296
+ )
297
 
298
+ print(llm.invoke("2 + 2 ="))
299
+ # from optimum.intel.openvino import OVModelForCausalLM
300
+ # from transformers import pipeline
301
 
 
 
 
 
 
 
 
 
302
 
303
+ # model_id = "meta-llama/Meta-Llama-3-8B"
304
+ # ov_config = {"PERFORMANCE_HINT": "LATENCY"} # 这是一个例子,检查你的实际 ov_config
305
 
306
+ # # 使用 OpenVINO 导出模型
307
+ # model = OVModelForCausalLM.from_pretrained(
308
+ # model_id,
309
+ # export=True, # 将模型转换为 OpenVINO 格式
310
+ # use_cache=False,
311
+ # ov_config=ov_config,
312
+ # trust_remote_code=True # 支持远程代码的信任问题
313
+ # )
314
+
315
+ # # 保存 OpenVINO 模型
316
+ # model.save_pretrained("./openvino_llama_model")
317
+
318
+ # # Step 2: 加载保存的 OpenVINO 模型并设置推理任务
319
+ # llm_device = "CPU" # 确保你根据环境设置正确的设备
320
+ # llm = pipeline(
321
+ # task="text-generation",
322
+ # model=OVModelForCausalLM.from_pretrained("./openvino_llama_model"),
323
+ # device=llm_device,
324
+ # max_new_tokens=2 # 生成的最大新token数量
325
+ # )
326
 
327
+ # # Step 3: 执行推理
328
+ # output = llm("2 + 2 =")
329
+ # print(output)
330
 
331
  # print("test:2+2:")
332
  # print(llm.invoke("2 + 2 ="))