Spaces:

mrcuddle
/

Dark-Hermes3.2

Sleeping

File size: 2,277 Bytes

aea9dab
3811739
3ddade0
d47dcd6
3ddade0
3811739
 
61d7cec
3811739
61d7cec
3ddade0
3811739
3ddade0
 
 
 
 
 
c75f1c5
e57b01b
 
 
61d7cec
3ddade0
e57b01b
 
 
61d7cec
3ddade0
 
 
 
61d7cec
 
3ddade0
 
 
61d7cec
 
 
3ddade0
 
 
 
 
 
c75f1c5
 
 
3811739
 
c75f1c5
 
 
3ddade0
 
aea9dab
 
3811739
3ddade0

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces

# Load the model and tokenizer
model_name = "mrcuddle/Dark-Hermes3-Llama3.2-3B"
device = "cuda" if torch.cuda.is_available() else "cpu"  # Detect GPU or default to CPU
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)  # Move model to the appropriate device
model.eval()  # Ensure the model is in evaluation mode

# Define the system prompt
system_prompt = (
    "You are Dark-Hermes, a helpful and intelligent chatbot. "
    "You always provide concise, accurate, and polite responses to user questions. "
    "If you don't know an answer, say 'I'm not sure about that, but I can try to help further!'"
)

# Limit chat history length
MAX_HISTORY_LENGTH = 5  # Keep only the last 5 turns to prevent excessive context size

@spaces.GPU  # For ZeroGPU compatibility
def chatbot(message, history):
    # Keep only the most recent entries in history
    history = history[-MAX_HISTORY_LENGTH:]

    # Combine the system prompt with chat history and user message
    conversation = system_prompt + "\n"
    conversation += "".join([f"User: {msg}\nBot: {resp}\n" for msg, resp in history])
    conversation += f"User: {message}\nBot:"
    
    # Tokenize the input and move it to the correct device
    inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024).to(device)
    
    # Generate a response
    outputs = model.generate(
        **inputs,
        max_length=1024,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Decode the generated text
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Bot:")[-1].strip()
    
    # Append the response to the chat history
    history.append((message, response))
    return history, ""

# Create the Gradio interface
iface = gr.ChatInterface(
    fn=chatbot,
    title="Dark-Hermes3-Llama3.2-3B Chatbot",
    description="A chatbot interface powered by the Dark-Hermes3-Llama3.2-3B model. Ask me anything!",
    examples=["Hello!", "How are you?", "Tell me a joke.", "What is AI?"]
)

# Launch the interface
if __name__ == "__main__":
    iface.launch()