import os
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor
import torch
import gradio as gr

# Load the model and processor
model_name = "arjunanand13/Florence-enphase"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)

processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
torch.cuda.empty_cache()


def predict(image, question):
    
    encoding = processor(image, question, return_tensors="pt")

    with torch.no_grad():
        outputs = model.generate(**encoding, max_length=256)
    answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    return answer

def gradio_interface(image, question):
    
    if image.mode != "RGB":
        image = image.convert("RGB")

    answer = predict(image, question)
    return answer

iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Textbox(label="Enter your question")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="Florence-enphase Leg lift classifier",
    description="Upload an image and ask a question about it."
)


iface.launch()