scr930's picture
Update app.py
72afe1e verified
raw
history blame
950 Bytes
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import requests
# Load the model and processor
model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")
def classify_image(image):
# Preprocess the image
inputs = processor(images=image, return_tensors="pt")
# Perform the inference
outputs = model(**inputs)
# Postprocess the outputs
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1) # we can use softmax to get probabilities
return probs
# Define Gradio interface
iface = gr.Interface(
fn=classify_image,
inputs=gr.inputs.Image(type="pil"),
outputs="text",
title="Geolocal StreetCLIP Classification",
description="Upload an image to classify using Geolocal StreetCLIP"
)
# Launch the interface
iface.launch()