openFashionClip / app copy.py
im
change
4bd62d7
raw
history blame
9.16 kB
import uuid
import requests
from PIL import Image
import numpy as np
import gradio as gr
from encoder import FashionCLIPEncoder
# Constants
REQUESTS_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
BATCH_SIZE = 30 # Define batch size for processing
# Initialize encoder
encoder = FashionCLIPEncoder()
# Helper function to download images
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
try:
response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
if response.status_code == 200:
return Image.open(response.raw).convert("RGB") # Ensure consistent format
return None
except Exception as e:
print(f"Error downloading image: {e}")
return None
# Embedding function for a batch of images
def batch_process_images(image_urls: str):
# Split the input string by commas and strip whitespace
urls = [url.strip() for url in image_urls.split(",") if url.strip()]
if not urls:
return {"error": "No valid image URLs provided."}
results = []
batch_urls, batch_images = [], []
for url in urls:
try:
# Download image
image = download_image_as_pil(url)
if not image:
results.append({"image_url": url, "error": "Failed to download image"})
continue
batch_urls.append(url)
batch_images.append(image)
# Process batch when reaching batch size
if len(batch_images) == BATCH_SIZE:
process_batch(batch_urls, batch_images, results)
batch_urls, batch_images = [], []
except Exception as e:
results.append({"image_url": url, "error": str(e)})
# Process remaining images in the last batch
if batch_images:
process_batch(batch_urls, batch_images, results)
return results
# Helper function to process a batch
def process_batch(batch_urls, batch_images, results):
try:
# Generate embeddings
embeddings = encoder.encode_images(batch_images)
for url, embedding in zip(batch_urls, embeddings):
# Normalize embedding
embedding_normalized = embedding / np.linalg.norm(embedding)
# Append results
results.append({
"image_url": url,
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
"success": True
})
except Exception as e:
for url in batch_urls:
results.append({"image_url": url, "error": str(e)})
# Gradio Interface
iface = gr.Interface(
fn=batch_process_images,
inputs=gr.Textbox(
lines=5,
placeholder="Enter image URLs separated by commas",
label="Batch Image URLs",
),
outputs=gr.JSON(label="Embedding Results"),
title="Batch Fashion CLIP Embedding API",
description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
examples=[
["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
],
)
# Launch Gradio App
if __name__ == "__main__":
iface.launch()
# import os
# import requests
# from PIL import Image
# import numpy as np
# from encoder import FashionCLIPEncoder
# from pinecone import Pinecone
# from dotenv import load_dotenv
# # Load environment variables
# load_dotenv()
# # Constants
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
# PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
# REQUESTS_HEADERS = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
# }
# BATCH_SIZE = 30 # Define batch size for processing
# # Ensure API key and index name are set
# if not PINECONE_API_KEY or not PINECONE_INDEX_NAME:
# raise ValueError("PINECONE_API_KEY and PINECONE_INDEX_NAME must be set in environment variables.")
# # Initialize Pinecone
# pc = Pinecone(api_key=PINECONE_API_KEY)
# # Connect to the existing index
# if PINECONE_INDEX_NAME not in pc.list_indexes().names():
# raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it in your Pinecone account.")
# index = pc.Index(PINECONE_INDEX_NAME)
# print(f"Connected to Pinecone index '{PINECONE_INDEX_NAME}'.")
# # Initialize encoder
# encoder = FashionCLIPEncoder()
# # Helper function to download images
# def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
# """
# Downloads an image from a URL and converts it to a PIL Image in RGB format.
# """
# try:
# response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
# if response.status_code == 200:
# return Image.open(response.raw).convert("RGB") # Ensure consistent format
# return None
# except Exception as e:
# print(f"Error downloading image from {url}: {e}")
# return None
# # Function to process a batch of images
# def batch_process_images(image_data: list, namespace: str = None):
# """
# Processes a batch of images, generates embeddings, and uploads them to Pinecone.
# Args:
# image_data (list): A list of dictionaries with "id" and "url" keys.
# namespace (str): Namespace for the Pinecone index.
# Returns:
# list: A list of results containing the embedding preview or error information.
# """
# results = []
# batch_ids, batch_urls, batch_images = [], [], []
# for data in image_data:
# try:
# image_id = data["id"]
# image_url = data["url"]
# # Download the image
# image = download_image_as_pil(image_url)
# if not image:
# results.append({"id": image_id, "url": image_url, "error": "Failed to download image"})
# continue
# batch_ids.append(image_id)
# batch_urls.append(image_url)
# batch_images.append(image)
# # Process batch when reaching batch size
# if len(batch_images) == BATCH_SIZE:
# process_batch(batch_ids, batch_urls, batch_images, results, namespace)
# batch_ids, batch_urls, batch_images = [], [], []
# except Exception as e:
# results.append({"id": data.get("id"), "url": data.get("url"), "error": str(e)})
# # Process remaining images in the last batch
# if batch_images:
# process_batch(batch_ids, batch_urls, batch_images, results, namespace)
# return results
# # Function to process a batch and upload to Pinecone
# def process_batch(batch_ids, batch_urls, batch_images, results, namespace):
# """
# Processes a batch of images and generates embeddings, uploading them to Pinecone.
# Args:
# batch_ids (list): List of IDs for the images.
# batch_urls (list): List of image URLs.
# batch_images (list): List of PIL images.
# results (list): List to store results for each image.
# namespace (str): Namespace for the Pinecone index.
# """
# try:
# # Generate embeddings
# embeddings = encoder.encode_images(batch_images)
# vectors = []
# for image_id, url, embedding in zip(batch_ids, batch_urls, embeddings):
# # Normalize embedding
# embedding_normalized = embedding / np.linalg.norm(embedding)
# # Append results
# result = {
# "id": image_id,
# "url": url,
# "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
# "success": True
# }
# results.append(result)
# # Prepare vector for upserting
# vectors.append({
# "id": str(image_id),
# "values": embedding_normalized.tolist(),
# "metadata": {"url": url}
# })
# # Upload vectors to Pinecone
# index.upsert(vectors=vectors, namespace=namespace)
# except Exception as e:
# for image_id, url in zip(batch_ids, batch_urls):
# results.append({"id": image_id, "url": url, "error": str(e)})
# # Example usage
# if __name__ == "__main__":
# # Example input data
# image_data = [
# {"id": "1", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"},
# {"id": "2", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"}
# ]
# # Process images and upload to Pinecone under namespace "ns1"
# results = batch_process_images(image_data, namespace="ns1")
# # Print results
# for result in results:
# print(result)