Spaces:
Sleeping
Sleeping
File size: 2,757 Bytes
159fca3 e27c656 159fca3 e27c656 159fca3 e27c656 159fca3 e27c656 159fca3 e27c656 159fca3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
from PIL import Image
import requests
import torch
# Load the FashionCLIP processor and model
processor = AutoProcessor.from_pretrained("patrickjohncyh/fashion-clip")
model = AutoModelForZeroShotImageClassification.from_pretrained("patrickjohncyh/fashion-clip")
# Define the function to process both text and image inputs
def generate_embeddings(input_text=None, input_image_url=None):
try:
if input_image_url:
# Process image with accompanying text
response = requests.get(input_image_url, stream=True)
response.raise_for_status()
image = Image.open(response.raw)
# Use a default text if none is provided
if not input_text:
input_text = "this is an image"
# Prepare inputs for the model
inputs = processor(
text=[input_text],
images=image,
return_tensors="pt",
padding=True
)
with torch.no_grad():
outputs = model(**inputs)
image_embedding = outputs.logits_per_image.cpu().numpy().tolist()
return {
"type": "image_embedding",
"input_image_url": input_image_url,
"input_text": input_text,
"embedding": image_embedding
}
elif input_text:
# Process text input only
inputs = processor(
text=[input_text],
images=None,
return_tensors="pt",
padding=True
)
with torch.no_grad():
outputs = model(**inputs)
text_embedding = outputs.logits_per_text.cpu().numpy().tolist()
return {
"type": "text_embedding",
"input_text": input_text,
"embedding": text_embedding
}
else:
return {"error": "Please provide either a text query or an image URL."}
except Exception as e:
return {"error": str(e)}
# Create the Gradio interface
interface = gr.Interface(
fn=generate_embeddings,
inputs=[
gr.Textbox(label="Text Query (Optional)", placeholder="e.g., red dress (used with image or for text embedding)"),
gr.Textbox(label="Image URL", placeholder="e.g., https://example.com/image.jpg (used with or without text query)")
],
outputs="json",
title="FashionCLIP Combined Embedding API",
description="Provide a text query and/or an image URL to compute embeddings for vector search."
)
# Launch the app
if __name__ == "__main__":
interface.launch()
|