import gradio as gr from PIL import Image from transformers import AutoConfig, AutoModelForCausalLM import torch # Determine if a GPU is available and set the device accordingly device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load configuration from the base model config = AutoConfig.from_pretrained("microsoft/Florence-2-base-ft", trust_remote_code=True) # Load the model using the base model's configuration model = AutoModelForCausalLM.from_pretrained( "fauzail/Florence-2-VQA", config=config, trust_remote_code=True ).to(device) from transformers import AutoProcessor # Load the processor for the model processor = AutoProcessor.from_pretrained("fauzail/Florence-2-VQA", trust_remote_code=True) # Define the prediction function for Gradio def predict(image, question): inputs = processor(text=[question], images=[image], return_tensors="pt", padding=True).to(device) outputs = model.generate(**inputs) return processor.tokenizer.decode(outputs[0], skip_special_tokens=True) # Create the Gradio interface interface = gr.Interface( fn=predict, inputs=["image", "text"], outputs="text", title="Florence 2 VQA - Engineering Drawings", description="Upload an engineering drawing and ask a related question." ) # Launch the Gradio interface interface.launch()