Spaces:
Running
Running
import gradio as gr | |
from PIL import Image | |
from transformers import AutoConfig, AutoModelForCausalLM | |
import torch | |
# Determine if a GPU is available and set the device accordingly | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Load configuration from the base model | |
config = AutoConfig.from_pretrained("microsoft/Florence-2-base-ft", trust_remote_code=True) | |
# Load the model using the base model's configuration | |
model = AutoModelForCausalLM.from_pretrained( | |
"fauzail/Florence-2-VQA", | |
config=config, | |
trust_remote_code=True | |
).to(device) | |
from transformers import AutoProcessor | |
# Load the processor for the model | |
processor = AutoProcessor.from_pretrained("fauzail/Florence-2-VQA", trust_remote_code=True) | |
# Define the prediction function for Gradio | |
def predict(image, question): | |
inputs = processor(text=[question], images=[image], return_tensors="pt", padding=True).to(device) | |
outputs = model.generate(**inputs) | |
return processor.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Create the Gradio interface | |
interface = gr.Interface( | |
fn=predict, | |
inputs=["image", "text"], | |
outputs="text", | |
title="Florence 2 VQA - Engineering Drawings", | |
description="Upload an engineering drawing and ask a related question." | |
) | |
# Launch the Gradio interface | |
interface.launch() | |