Spaces:
Sleeping
Sleeping
import uuid | |
import requests | |
from PIL import Image | |
import numpy as np | |
import gradio as gr | |
from encoder import FashionCLIPEncoder | |
# Constants | |
REQUESTS_HEADERS = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
} | |
BATCH_SIZE = 30 # Define batch size for processing | |
# Initialize encoder | |
encoder = FashionCLIPEncoder() | |
# Helper function to download images | |
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image: | |
try: | |
response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout) | |
if response.status_code == 200: | |
return Image.open(response.raw).convert("RGB") # Ensure consistent format | |
return None | |
except Exception as e: | |
print(f"Error downloading image: {e}") | |
return None | |
# Embedding function for a batch of images | |
def batch_process_images(image_urls: str): | |
# Split the input string by commas and strip whitespace | |
urls = [url.strip() for url in image_urls.split(",") if url.strip()] | |
if not urls: | |
return {"error": "No valid image URLs provided."} | |
results = [] | |
batch_urls, batch_images = [], [] | |
for url in urls: | |
try: | |
# Download image | |
image = download_image_as_pil(url) | |
if not image: | |
results.append({"image_url": url, "error": "Failed to download image"}) | |
continue | |
batch_urls.append(url) | |
batch_images.append(image) | |
# Process batch when reaching batch size | |
if len(batch_images) == BATCH_SIZE: | |
process_batch(batch_urls, batch_images, results) | |
batch_urls, batch_images = [], [] | |
except Exception as e: | |
results.append({"image_url": url, "error": str(e)}) | |
# Process remaining images in the last batch | |
if batch_images: | |
process_batch(batch_urls, batch_images, results) | |
return results | |
# Helper function to process a batch | |
def process_batch(batch_urls, batch_images, results): | |
try: | |
# Generate embeddings | |
embeddings = encoder.encode_images(batch_images) | |
for url, embedding in zip(batch_urls, embeddings): | |
# Normalize embedding | |
embedding_normalized = embedding / np.linalg.norm(embedding) | |
# Append results | |
results.append({ | |
"image_url": url, | |
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview | |
"success": True | |
}) | |
except Exception as e: | |
for url in batch_urls: | |
results.append({"image_url": url, "error": str(e)}) | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=batch_process_images, | |
inputs=gr.Textbox( | |
lines=5, | |
placeholder="Enter image URLs separated by commas", | |
label="Batch Image URLs", | |
), | |
outputs=gr.JSON(label="Embedding Results"), | |
title="Batch Fashion CLIP Embedding API", | |
description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.", | |
examples=[ | |
["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"] | |
], | |
) | |
# Launch Gradio App | |
if __name__ == "__main__": | |
iface.launch() | |
# import os | |
# import requests | |
# from PIL import Image | |
# import numpy as np | |
# from encoder import FashionCLIPEncoder | |
# from pinecone import Pinecone | |
# from dotenv import load_dotenv | |
# # Load environment variables | |
# load_dotenv() | |
# # Constants | |
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") | |
# PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME") | |
# REQUESTS_HEADERS = { | |
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
# } | |
# BATCH_SIZE = 30 # Define batch size for processing | |
# # Ensure API key and index name are set | |
# if not PINECONE_API_KEY or not PINECONE_INDEX_NAME: | |
# raise ValueError("PINECONE_API_KEY and PINECONE_INDEX_NAME must be set in environment variables.") | |
# # Initialize Pinecone | |
# pc = Pinecone(api_key=PINECONE_API_KEY) | |
# # Connect to the existing index | |
# if PINECONE_INDEX_NAME not in pc.list_indexes().names(): | |
# raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it in your Pinecone account.") | |
# index = pc.Index(PINECONE_INDEX_NAME) | |
# print(f"Connected to Pinecone index '{PINECONE_INDEX_NAME}'.") | |
# # Initialize encoder | |
# encoder = FashionCLIPEncoder() | |
# # Helper function to download images | |
# def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image: | |
# """ | |
# Downloads an image from a URL and converts it to a PIL Image in RGB format. | |
# """ | |
# try: | |
# response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout) | |
# if response.status_code == 200: | |
# return Image.open(response.raw).convert("RGB") # Ensure consistent format | |
# return None | |
# except Exception as e: | |
# print(f"Error downloading image from {url}: {e}") | |
# return None | |
# # Function to process a batch of images | |
# def batch_process_images(image_data: list, namespace: str = None): | |
# """ | |
# Processes a batch of images, generates embeddings, and uploads them to Pinecone. | |
# Args: | |
# image_data (list): A list of dictionaries with "id" and "url" keys. | |
# namespace (str): Namespace for the Pinecone index. | |
# Returns: | |
# list: A list of results containing the embedding preview or error information. | |
# """ | |
# results = [] | |
# batch_ids, batch_urls, batch_images = [], [], [] | |
# for data in image_data: | |
# try: | |
# image_id = data["id"] | |
# image_url = data["url"] | |
# # Download the image | |
# image = download_image_as_pil(image_url) | |
# if not image: | |
# results.append({"id": image_id, "url": image_url, "error": "Failed to download image"}) | |
# continue | |
# batch_ids.append(image_id) | |
# batch_urls.append(image_url) | |
# batch_images.append(image) | |
# # Process batch when reaching batch size | |
# if len(batch_images) == BATCH_SIZE: | |
# process_batch(batch_ids, batch_urls, batch_images, results, namespace) | |
# batch_ids, batch_urls, batch_images = [], [], [] | |
# except Exception as e: | |
# results.append({"id": data.get("id"), "url": data.get("url"), "error": str(e)}) | |
# # Process remaining images in the last batch | |
# if batch_images: | |
# process_batch(batch_ids, batch_urls, batch_images, results, namespace) | |
# return results | |
# # Function to process a batch and upload to Pinecone | |
# def process_batch(batch_ids, batch_urls, batch_images, results, namespace): | |
# """ | |
# Processes a batch of images and generates embeddings, uploading them to Pinecone. | |
# Args: | |
# batch_ids (list): List of IDs for the images. | |
# batch_urls (list): List of image URLs. | |
# batch_images (list): List of PIL images. | |
# results (list): List to store results for each image. | |
# namespace (str): Namespace for the Pinecone index. | |
# """ | |
# try: | |
# # Generate embeddings | |
# embeddings = encoder.encode_images(batch_images) | |
# vectors = [] | |
# for image_id, url, embedding in zip(batch_ids, batch_urls, embeddings): | |
# # Normalize embedding | |
# embedding_normalized = embedding / np.linalg.norm(embedding) | |
# # Append results | |
# result = { | |
# "id": image_id, | |
# "url": url, | |
# "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview | |
# "success": True | |
# } | |
# results.append(result) | |
# # Prepare vector for upserting | |
# vectors.append({ | |
# "id": str(image_id), | |
# "values": embedding_normalized.tolist(), | |
# "metadata": {"url": url} | |
# }) | |
# # Upload vectors to Pinecone | |
# index.upsert(vectors=vectors, namespace=namespace) | |
# except Exception as e: | |
# for image_id, url in zip(batch_ids, batch_urls): | |
# results.append({"id": image_id, "url": url, "error": str(e)}) | |
# # Example usage | |
# if __name__ == "__main__": | |
# # Example input data | |
# image_data = [ | |
# {"id": "1", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"}, | |
# {"id": "2", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"} | |
# ] | |
# # Process images and upload to Pinecone under namespace "ns1" | |
# results = batch_process_images(image_data, namespace="ns1") | |
# # Print results | |
# for result in results: | |
# print(result) | |