Spaces:
Sleeping
Sleeping
File size: 2,277 Bytes
aea9dab 3811739 3ddade0 d47dcd6 3ddade0 3811739 61d7cec 3811739 61d7cec 3ddade0 3811739 3ddade0 c75f1c5 e57b01b 61d7cec 3ddade0 e57b01b 61d7cec 3ddade0 61d7cec 3ddade0 61d7cec 3ddade0 c75f1c5 3811739 c75f1c5 3ddade0 aea9dab 3811739 3ddade0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
# Load the model and tokenizer
model_name = "mrcuddle/Dark-Hermes3-Llama3.2-3B"
device = "cuda" if torch.cuda.is_available() else "cpu" # Detect GPU or default to CPU
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device) # Move model to the appropriate device
model.eval() # Ensure the model is in evaluation mode
# Define the system prompt
system_prompt = (
"You are Dark-Hermes, a helpful and intelligent chatbot. "
"You always provide concise, accurate, and polite responses to user questions. "
"If you don't know an answer, say 'I'm not sure about that, but I can try to help further!'"
)
# Limit chat history length
MAX_HISTORY_LENGTH = 5 # Keep only the last 5 turns to prevent excessive context size
@spaces.GPU # For ZeroGPU compatibility
def chatbot(message, history):
# Keep only the most recent entries in history
history = history[-MAX_HISTORY_LENGTH:]
# Combine the system prompt with chat history and user message
conversation = system_prompt + "\n"
conversation += "".join([f"User: {msg}\nBot: {resp}\n" for msg, resp in history])
conversation += f"User: {message}\nBot:"
# Tokenize the input and move it to the correct device
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024).to(device)
# Generate a response
outputs = model.generate(
**inputs,
max_length=1024,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id
)
# Decode the generated text
response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Bot:")[-1].strip()
# Append the response to the chat history
history.append((message, response))
return history, ""
# Create the Gradio interface
iface = gr.ChatInterface(
fn=chatbot,
title="Dark-Hermes3-Llama3.2-3B Chatbot",
description="A chatbot interface powered by the Dark-Hermes3-Llama3.2-3B model. Ask me anything!",
examples=["Hello!", "How are you?", "Tell me a joke.", "What is AI?"]
)
# Launch the interface
if __name__ == "__main__":
iface.launch() |