Dark-Hermes3.2 / app.py
mrcuddle's picture
Update app.py
0c92bcf verified
raw
history blame
2.43 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
# Load the model and tokenizer
model_name = "mrcuddle/Dark-Hermes3-Llama3.2-3B"
device = "cuda" if torch.cuda.is_available() else "cpu" # Detect GPU or default to CPU
dtype = torch.bfloat16 if device == "cuda" else torch.float32 # Use bfloat16 for mixed precision on GPU
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=dtype).to(device) # Ensure model is on the correct device
model.eval() # Ensure the model is in evaluation mode
# Define the system prompt
system_prompt = (
"You are Dark-Hermes, a helpful and intelligent chatbot. "
"You always provide concise, accurate, and polite responses to user questions. "
"If you don't know an answer, say 'I'm not sure about that, but I can try to help further!'"
)
# Limit chat history length
MAX_HISTORY_LENGTH = 5 # Keep only the last 5 turns to prevent excessive context size
@spaces.GPU # For ZeroGPU compatibility
def chatbot(message, history):
# Keep only the most recent entries in history
history = history[-MAX_HISTORY_LENGTH:]
# Combine the system prompt with chat history and user message
conversation = system_prompt + "\n"
conversation += "".join([f"User: {msg}\nBot: {resp}\n" for msg, resp in history])
conversation += f"User: {message}\nBot:"
# Tokenize the input and move it to the correct device and dtype
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024).to(device, dtype=dtype)
# Generate a response
outputs = model.generate(
**inputs,
max_length=1024,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id
)
# Decode the generated text
response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Bot:")[-1].strip()
# Append the response to the chat history
history.append((message, response))
return history, ""
# Create the Gradio interface
iface = gr.ChatInterface(
fn=chatbot,
title="Dark-Hermes3-Llama3.2-3B Chatbot",
description="A chatbot interface powered by the Dark-Hermes3-Llama3.2-3B model. Ask me anything!",
examples=["Hello!", "How are you?", "Tell me a joke.", "What is AI?"]
)
# Launch the interface
if __name__ == "__main__":
iface.launch()