Spaces:
Running
Running
import gradio as gr | |
from transformers import CLIPProcessor, CLIPModel | |
from PIL import Image | |
import requests | |
# Load the model and processor | |
model = CLIPModel.from_pretrained("geolocal/StreetCLIP") | |
processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP") | |
def classify_image(image): | |
# Preprocess the image | |
inputs = processor(images=image, return_tensors="pt") | |
# Perform the inference | |
outputs = model(**inputs) | |
# Postprocess the outputs | |
logits_per_image = outputs.logits_per_image # this is the image-text similarity score | |
probs = logits_per_image.softmax(dim=1) # we can use softmax to get probabilities | |
return probs | |
# Define Gradio interface | |
iface = gr.Interface( | |
fn=classify_image, | |
inputs=gr.inputs.Image(type="pil"), | |
outputs="text", | |
title="Geolocal StreetCLIP Classification", | |
description="Upload an image to classify using Geolocal StreetCLIP" | |
) | |
# Launch the interface | |
iface.launch() |