Spaces:

scr930
/

geolocal-StreetCLIP

Running

geolocal-StreetCLIP / app.py

Update app.py

72afe1e verified 7 months ago

950 Bytes

	import gradio as gr
	from transformers import CLIPProcessor, CLIPModel
	from PIL import Image
	import requests

	# Load the model and processor
	model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
	processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")

	def classify_image(image):
	# Preprocess the image
	inputs = processor(images=image, return_tensors="pt")
	# Perform the inference
	outputs = model(**inputs)
	# Postprocess the outputs
	logits_per_image = outputs.logits_per_image # this is the image-text similarity score
	probs = logits_per_image.softmax(dim=1) # we can use softmax to get probabilities
	return probs

	# Define Gradio interface
	iface = gr.Interface(
	fn=classify_image,
	inputs=gr.inputs.Image(type="pil"),
	outputs="text",
	title="Geolocal StreetCLIP Classification",
	description="Upload an image to classify using Geolocal StreetCLIP"
	)

	# Launch the interface
	iface.launch()