Spaces:

Mat17892
/

iris

Runtime error

iris / app.py

desert

init inference

038ef00 about 2 months ago

2.39 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel, PeftConfig
	from huggingface_hub import hf_hub_download

	# Hugging Face repository IDs
	base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
	adapter_repo = "Mat17892/llama_lora_gguf"

	# Download model and adapter
	print("Downloading base model...")
	base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")

	print("Downloading LoRA adapter...")
	lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")

	# Load the tokenizer and base model
	print("Loading base model and tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(base_model_path)
	base_model = AutoModelForCausalLM.from_pretrained(base_model_path)

	# Load the LoRA adapter
	print("Loading LoRA adapter...")
	config = PeftConfig.from_pretrained(lora_adapter_path)
	model = PeftModel.from_pretrained(base_model, lora_adapter_path)

	print("Model is ready!")

	# Function for inference
	def chat_with_model(user_input, chat_history):
	"""
	Generate a response from the model using the chat history and user input.
	"""
	# Prepare the prompt
	prompt = ""
	for user, ai in chat_history:
	prompt += f"User: {user}\nAI: {ai}\n"
	prompt += f"User: {user_input}\nAI:" # Add latest user input

	# Tokenize input
	inputs = tokenizer(prompt, return_tensors="pt")

	# Generate response
	outputs = model.generate(**inputs, max_new_tokens=200, pad_token_id=tokenizer.eos_token_id)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Update chat history
	chat_history.append((user_input, response))
	return chat_history, chat_history

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 🦙 LLaMA Chatbot with Base Model and LoRA Adapter")
	chatbot = gr.Chatbot(label="Chat with the Model")

	with gr.Row():
	with gr.Column(scale=4):
	user_input = gr.Textbox(label="Your Message", placeholder="Type a message...")
	with gr.Column(scale=1):
	submit_btn = gr.Button("Send")

	chat_history = gr.State([])

	# Link components
	submit_btn.click(
	chat_with_model,
	inputs=[user_input, chat_history],
	outputs=[chatbot, chat_history],
	show_progress=True,
	)

	# Launch the Gradio app
	demo.launch()