Clip-Model / app.py
ktllc's picture
Update app.py
7a385ac
import numpy as np
import clip
import torch
import gradio as gr
from PIL import Image
import os
# Load the CLIP model
model, preprocess = clip.load("ViT-B/32")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device).eval()
# Define the Business Listing variable
Business_Listing = "Air Guide"
def find_similarity(image, text_input):
# Preprocess the uploaded image
image = preprocess(image).unsqueeze(0).to(device)
# Prepare input text
text_tokens = clip.tokenize([text_input]).to(device)
# Encode image and text features
with torch.no_grad():
image_features = model.encode_image(image)
text_features = model.encode_text(text_tokens)
# Normalize features and calculate similarity
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (text_features @ image_features.T).squeeze(0).cpu().numpy()
return similarity[0, 0]
# Define a Gradio interface
iface = gr.Interface(
fn=find_similarity,
inputs=[gr.Image(type="pil"), "text"],
outputs="number",
live=True,
interpretation="default",
title="CLIP Model Image-Text Cosine Similarity",
description="Upload an image and enter text to find their cosine similarity.",
)
iface.launch()