Spaces:

Paulie-Aditya
/

MediMate

Sleeping

App Files Files Community

MediMate / app.py

Paulie-Aditya

hopefully no mle this time

002e0f5 2 months ago

raw

history blame

2.17 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	import torch

	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	class Assistant:
	def __init__(self):
	model_name = "ruslanmv/Medical-Llama3-8B"
	device_map = 'auto'
	# bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",bnb_4bit_compute_dtype=torch.float16,)
	# self.model = AutoModelForCausalLM.from_pretrained( model_name,quantization_config=bnb_config, trust_remote_code=True,use_cache=False,device_map=device_map)
	self.model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True,use_cache=False,device_map=device_map)

	self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	self.tokenizer.pad_token = self.tokenizer.eos_token


	def respond(
	self,
	message
	):
	sys_message = '''
	You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
	provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.
	'''
	messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": message}]

	# Applying chat template
	prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = self.tokenizer(prompt, return_tensors="pt").to("cuda")
	outputs = self.model.generate(**inputs, max_new_tokens=100, use_cache=True)

	# Extract and return the generated text, removing the prompt
	response_text = self.tokenizer.batch_decode(outputs)[0].strip()
	answer = response_text.split('<\|im_start\|>assistant')[-1].strip()
	return answer


	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	assistant = Assistant()

	demo = gr.ChatInterface(
	assistant.respond
	)


	if __name__ == "__main__":
	demo.launch()