Spaces:

Streetmarkets
/

openFashionClip

Sleeping

File size: 9,164 Bytes

4bd62d7

import uuid
import requests
from PIL import Image
import numpy as np
import gradio as gr
from encoder import FashionCLIPEncoder

# Constants
REQUESTS_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
BATCH_SIZE = 30  # Define batch size for processing

# Initialize encoder
encoder = FashionCLIPEncoder()

# Helper function to download images
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
    try:
        response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
        if response.status_code == 200:
            return Image.open(response.raw).convert("RGB")  # Ensure consistent format
        return None
    except Exception as e:
        print(f"Error downloading image: {e}")
        return None

# Embedding function for a batch of images
def batch_process_images(image_urls: str):
    # Split the input string by commas and strip whitespace
    urls = [url.strip() for url in image_urls.split(",") if url.strip()]
    
    if not urls:
        return {"error": "No valid image URLs provided."}

    results = []
    batch_urls, batch_images = [], []

    for url in urls:
        try:
            # Download image
            image = download_image_as_pil(url)
            if not image:
                results.append({"image_url": url, "error": "Failed to download image"})
                continue

            batch_urls.append(url)
            batch_images.append(image)

            # Process batch when reaching batch size
            if len(batch_images) == BATCH_SIZE:
                process_batch(batch_urls, batch_images, results)
                batch_urls, batch_images = [], []

        except Exception as e:
            results.append({"image_url": url, "error": str(e)})

    # Process remaining images in the last batch
    if batch_images:
        process_batch(batch_urls, batch_images, results)

    return results


# Helper function to process a batch
def process_batch(batch_urls, batch_images, results):
    try:
        # Generate embeddings
        embeddings = encoder.encode_images(batch_images)
        
        for url, embedding in zip(batch_urls, embeddings):
            # Normalize embedding
            embedding_normalized = embedding / np.linalg.norm(embedding)
            
            # Append results
            results.append({
                "image_url": url,
                "embedding_preview": embedding_normalized[:5].tolist(),  # First 5 values for preview
                "success": True
            })
    except Exception as e:
        for url in batch_urls:
            results.append({"image_url": url, "error": str(e)})


# Gradio Interface
iface = gr.Interface(
    fn=batch_process_images,
    inputs=gr.Textbox(
        lines=5,
        placeholder="Enter image URLs separated by commas",
        label="Batch Image URLs",
    ),
    outputs=gr.JSON(label="Embedding Results"),
    title="Batch Fashion CLIP Embedding API",
    description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
    examples=[
        ["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
    ],
)

# Launch Gradio App
if __name__ == "__main__":
    iface.launch()


# import os
# import requests
# from PIL import Image
# import numpy as np
# from encoder import FashionCLIPEncoder
# from pinecone import Pinecone
# from dotenv import load_dotenv

# # Load environment variables
# load_dotenv()

# # Constants
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
# PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
# REQUESTS_HEADERS = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
# }
# BATCH_SIZE = 30  # Define batch size for processing

# # Ensure API key and index name are set
# if not PINECONE_API_KEY or not PINECONE_INDEX_NAME:
#     raise ValueError("PINECONE_API_KEY and PINECONE_INDEX_NAME must be set in environment variables.")

# # Initialize Pinecone
# pc = Pinecone(api_key=PINECONE_API_KEY)

# # Connect to the existing index
# if PINECONE_INDEX_NAME not in pc.list_indexes().names():
#     raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it in your Pinecone account.")

# index = pc.Index(PINECONE_INDEX_NAME)
# print(f"Connected to Pinecone index '{PINECONE_INDEX_NAME}'.")

# # Initialize encoder
# encoder = FashionCLIPEncoder()

# # Helper function to download images
# def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
#     """
#     Downloads an image from a URL and converts it to a PIL Image in RGB format.
#     """
#     try:
#         response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
#         if response.status_code == 200:
#             return Image.open(response.raw).convert("RGB")  # Ensure consistent format
#         return None
#     except Exception as e:
#         print(f"Error downloading image from {url}: {e}")
#         return None

# # Function to process a batch of images
# def batch_process_images(image_data: list, namespace: str = None):
#     """
#     Processes a batch of images, generates embeddings, and uploads them to Pinecone.

#     Args:
#         image_data (list): A list of dictionaries with "id" and "url" keys.
#         namespace (str): Namespace for the Pinecone index.

#     Returns:
#         list: A list of results containing the embedding preview or error information.
#     """
#     results = []
#     batch_ids, batch_urls, batch_images = [], [], []

#     for data in image_data:
#         try:
#             image_id = data["id"]
#             image_url = data["url"]

#             # Download the image
#             image = download_image_as_pil(image_url)
#             if not image:
#                 results.append({"id": image_id, "url": image_url, "error": "Failed to download image"})
#                 continue

#             batch_ids.append(image_id)
#             batch_urls.append(image_url)
#             batch_images.append(image)

#             # Process batch when reaching batch size
#             if len(batch_images) == BATCH_SIZE:
#                 process_batch(batch_ids, batch_urls, batch_images, results, namespace)
#                 batch_ids, batch_urls, batch_images = [], [], []

#         except Exception as e:
#             results.append({"id": data.get("id"), "url": data.get("url"), "error": str(e)})

#     # Process remaining images in the last batch
#     if batch_images:
#         process_batch(batch_ids, batch_urls, batch_images, results, namespace)

#     return results

# # Function to process a batch and upload to Pinecone
# def process_batch(batch_ids, batch_urls, batch_images, results, namespace):
#     """
#     Processes a batch of images and generates embeddings, uploading them to Pinecone.

#     Args:
#         batch_ids (list): List of IDs for the images.
#         batch_urls (list): List of image URLs.
#         batch_images (list): List of PIL images.
#         results (list): List to store results for each image.
#         namespace (str): Namespace for the Pinecone index.
#     """
#     try:
#         # Generate embeddings
#         embeddings = encoder.encode_images(batch_images)
        
#         vectors = []
#         for image_id, url, embedding in zip(batch_ids, batch_urls, embeddings):
#             # Normalize embedding
#             embedding_normalized = embedding / np.linalg.norm(embedding)
            
#             # Append results
#             result = {
#                 "id": image_id,
#                 "url": url,
#                 "embedding_preview": embedding_normalized[:5].tolist(),  # First 5 values for preview
#                 "success": True
#             }
#             results.append(result)

#             # Prepare vector for upserting
#             vectors.append({
#                 "id": str(image_id),
#                 "values": embedding_normalized.tolist(),
#                 "metadata": {"url": url}
#             })

#         # Upload vectors to Pinecone
#         index.upsert(vectors=vectors, namespace=namespace)
#     except Exception as e:
#         for image_id, url in zip(batch_ids, batch_urls):
#             results.append({"id": image_id, "url": url, "error": str(e)})

# # Example usage
# if __name__ == "__main__":
#     # Example input data
#     image_data = [
#         {"id": "1", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"},
#         {"id": "2", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"}
#     ]

#     # Process images and upload to Pinecone under namespace "ns1"
#     results = batch_process_images(image_data, namespace="ns1")

#     # Print results
#     for result in results:
#         print(result)