srgtuszy's picture
Added chat interface
b4a15fa
raw
history blame
876 Bytes
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
import gradio as gr
import torch
login(token = os.getenv('HF_TOKEN'))
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct")
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.2-11B-Vision-Instruct",
device_map="auto",
torch_dtype="auto",
)
def generate_response(message, history):
inputs = tokenizer(message['text'], return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(inputs.input_ids, max_length=100)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.ChatInterface(
fn=generate_response,
examples=[{"text": "Hello", "files": []}],
title="LLAMA 3.2 Chat",
multimodal=True
)
demo.launch(debug = True)