iris / app.py
desert
init inference
038ef00
raw
history blame
2.39 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
from huggingface_hub import hf_hub_download
# Hugging Face repository IDs
base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
adapter_repo = "Mat17892/llama_lora_gguf"
# Download model and adapter
print("Downloading base model...")
base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")
print("Downloading LoRA adapter...")
lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")
# Load the tokenizer and base model
print("Loading base model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
base_model = AutoModelForCausalLM.from_pretrained(base_model_path)
# Load the LoRA adapter
print("Loading LoRA adapter...")
config = PeftConfig.from_pretrained(lora_adapter_path)
model = PeftModel.from_pretrained(base_model, lora_adapter_path)
print("Model is ready!")
# Function for inference
def chat_with_model(user_input, chat_history):
"""
Generate a response from the model using the chat history and user input.
"""
# Prepare the prompt
prompt = ""
for user, ai in chat_history:
prompt += f"User: {user}\nAI: {ai}\n"
prompt += f"User: {user_input}\nAI:" # Add latest user input
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt")
# Generate response
outputs = model.generate(**inputs, max_new_tokens=200, pad_token_id=tokenizer.eos_token_id)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Update chat history
chat_history.append((user_input, response))
return chat_history, chat_history
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 🦙 LLaMA Chatbot with Base Model and LoRA Adapter")
chatbot = gr.Chatbot(label="Chat with the Model")
with gr.Row():
with gr.Column(scale=4):
user_input = gr.Textbox(label="Your Message", placeholder="Type a message...")
with gr.Column(scale=1):
submit_btn = gr.Button("Send")
chat_history = gr.State([])
# Link components
submit_btn.click(
chat_with_model,
inputs=[user_input, chat_history],
outputs=[chatbot, chat_history],
show_progress=True,
)
# Launch the Gradio app
demo.launch()