Spaces:

Bread-F
/

Intelligent-Medical-Guidance-Large-Model

Running

FaYo

model

d8d694f about 1 month ago

806 Bytes

	from pathlib import Path

	import streamlit as st
	from lmdeploy import TurbomindEngineConfig, pipeline
	from modelscope import snapshot_download

	from utils.web_configs import WEB_CONFIGS


	@st.cache_resource
	def load_turbomind_model(model_dir): # hf awq

	print("load model begin.")

	model_format = "hf"
	if Path(model_dir).stem.endswith("-4bit"):
	model_format = "awq"

	model_dir = snapshot_download(model_dir, revision="master", cache_dir=WEB_CONFIGS.LLM_MODEL_DIR)

	backend_config = TurbomindEngineConfig(
	model_format=model_format, session_len=32768, cache_max_entry_count=WEB_CONFIGS.CACHE_MAX_ENTRY_COUNT
	)
	pipe = pipeline(model_dir, backend_config=backend_config, log_level="INFO", model_name="internlm2")

	print("load model end.")

	return pipe