Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Load model and tokenizer from Hugging Face Model Hub | |
model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Define system instruction | |
system_instruction = "You are a helpful assistant. Provide detailed and accurate responses to the user's queries." | |
# Define the chat function | |
def chat_function(prompt): | |
# Create the full input prompt including the system instruction | |
full_prompt = f"{system_instruction}\nUser: {prompt}\nAssistant:" | |
# Tokenize the full prompt | |
inputs = tokenizer(full_prompt, return_tensors="pt") | |
# Generate model response | |
with torch.no_grad(): | |
outputs = model.generate(**inputs, max_length=150, num_return_sequences=1) | |
# Decode and return response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
# Extract only the assistant's response | |
response = response.split("Assistant:")[-1].strip() | |
return response | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=chat_function, | |
inputs="text", | |
outputs="text", | |
title="Meta-Llama Chatbot", | |
description="A chatbot powered by the Meta-Llama-3.1-70B-Instruct model." | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
iface.launch() | |