import os import torch from PIL import Image from transformers import AutoModelForCausalLM, AutoProcessor import torch import gradio as gr # Load the model and processor model_name = "arjunanand13/Florence-enphase" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device) processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) torch.cuda.empty_cache() def predict(image, question): encoding = processor(image, question, return_tensors="pt") with torch.no_grad(): outputs = model.generate(**encoding, max_length=256) answer = processor.batch_decode(outputs, skip_special_tokens=True)[0] return answer def gradio_interface(image, question): if image.mode != "RGB": image = image.convert("RGB") answer = predict(image, question) return answer iface = gr.Interface( fn=gradio_interface, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Enter your question") ], outputs=gr.Textbox(label="Answer"), title="Florence-enphase Leg lift classifier", description="Upload an image and ask a question about it." ) iface.launch()