Spaces:
Running
Running
File size: 1,344 Bytes
9c53e9f 72afe1e 1848536 9c53e9f 72afe1e 1848536 72afe1e 1848536 72afe1e 1848536 72afe1e 6b1798b 1848536 72afe1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch
# Load the model and processor
model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")
def classify_image(image):
# Example labels for classification
labels = ["a photo of a cat", "a photo of a dog", "a photo of a car", "a photo of a tree"]
# Preprocess the image and text
inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
# Perform the inference
outputs = model(**inputs)
# Postprocess the outputs
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1) # we can use softmax to get probabilities
# Convert the probabilities to a list
probs_list = probs.tolist()[0]
# Create a dictionary of labels and probabilities
result = {label: prob for label, prob in zip(labels, probs_list)}
return result
# Define Gradio interface
iface = gr.Interface(
fn=classify_image,
inputs=gr.Image(type="pil"),
outputs="label",
title="Geolocal StreetCLIP Classification",
description="Upload an image to classify using Geolocal StreetCLIP"
)
# Launch the interface
iface.launch() |