Spaces:
Running
Running
File size: 5,471 Bytes
660ea27 159fca3 660ea27 159fca3 660ea27 159fca3 660ea27 e27c656 660ea27 159fca3 660ea27 159fca3 660ea27 159fca3 660ea27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# import gradio as gr
# from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
# from PIL import Image
# import requests
# import torch
# # Load the FashionCLIP processor and model
# processor = AutoProcessor.from_pretrained("patrickjohncyh/fashion-clip")
# model = AutoModelForZeroShotImageClassification.from_pretrained("patrickjohncyh/fashion-clip")
# # Define the function to process both text and image inputs
# def generate_embeddings(input_text=None, input_image_url=None):
# try:
# if input_image_url:
# # Process image with accompanying text
# response = requests.get(input_image_url, stream=True)
# response.raise_for_status()
# image = Image.open(response.raw)
# # Use a default text if none is provided
# if not input_text:
# input_text = "this is an image"
# # Prepare inputs for the model
# inputs = processor(
# text=[input_text],
# images=image,
# return_tensors="pt",
# padding=True
# )
# with torch.no_grad():
# outputs = model(**inputs)
# image_embedding = outputs.logits_per_image.cpu().numpy().tolist()
# return {
# "type": "image_embedding",
# "input_image_url": input_image_url,
# "input_text": input_text,
# "embedding": image_embedding
# }
# elif input_text:
# # Process text input only
# inputs = processor(
# text=[input_text],
# images=None,
# return_tensors="pt",
# padding=True
# )
# with torch.no_grad():
# outputs = model(**inputs)
# text_embedding = outputs.logits_per_text.cpu().numpy().tolist()
# return {
# "type": "text_embedding",
# "input_text": input_text,
# "embedding": text_embedding
# }
# else:
# return {"error": "Please provide either a text query or an image URL."}
# except Exception as e:
# return {"error": str(e)}
# # Create the Gradio interface
# interface = gr.Interface(
# fn=generate_embeddings,
# inputs=[
# gr.Textbox(label="Text Query (Optional)", placeholder="e.g., red dress (used with image or for text embedding)"),
# gr.Textbox(label="Image URL", placeholder="e.g., https://example.com/image.jpg (used with or without text query)")
# ],
# outputs="json",
# title="FashionCLIP Combined Embedding API",
# description="Provide a text query and/or an image URL to compute embeddings for vector search."
# )
# # Launch the app
# if __name__ == "__main__":
# interface.launch()
# print(generate_embeddings("red dress"))
import uuid
import requests
from PIL import Image
import numpy as np
import gradio as gr
from encoder import FashionCLIPEncoder
# Constants
REQUESTS_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# Initialize encoder
encoder = FashionCLIPEncoder()
# Helper function to download images
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
try:
response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
if response.status_code == 200:
return Image.open(response.raw).convert("RGB") # Ensure consistent format
return None
except Exception as e:
print(f"Error downloading image: {e}")
return None
# Embedding function for a batch of images
def batch_process_images(image_urls: list):
embeddings = []
results = []
for url in image_urls:
try:
# Download image
image = download_image_as_pil(url)
if not image:
results.append({"image_url": url, "error": "Failed to download image"})
continue
# Generate embedding
embedding = encoder.encode_images([image])[0]
# Normalize embedding
embedding_normalized = embedding / np.linalg.norm(embedding)
# Append results
results.append({
"image_url": url,
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
"success": True
})
except Exception as e:
results.append({"image_url": url, "error": str(e)})
return results
# Gradio Interface
iface = gr.Interface(
fn=batch_process_images,
inputs=gr.Textbox(
lines=5,
placeholder="Enter image URLs separated by commas",
label="Batch Image URLs",
),
outputs=gr.JSON(label="Embedding Results"),
title="Batch Fashion CLIP Embedding API",
description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
examples=[
["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
],
)
# Launch Gradio App
if __name__ == "__main__":
iface.launch()
|