Spaces:
Sleeping
Sleeping
File size: 9,164 Bytes
4bd62d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 |
import uuid
import requests
from PIL import Image
import numpy as np
import gradio as gr
from encoder import FashionCLIPEncoder
# Constants
REQUESTS_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
BATCH_SIZE = 30 # Define batch size for processing
# Initialize encoder
encoder = FashionCLIPEncoder()
# Helper function to download images
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
try:
response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
if response.status_code == 200:
return Image.open(response.raw).convert("RGB") # Ensure consistent format
return None
except Exception as e:
print(f"Error downloading image: {e}")
return None
# Embedding function for a batch of images
def batch_process_images(image_urls: str):
# Split the input string by commas and strip whitespace
urls = [url.strip() for url in image_urls.split(",") if url.strip()]
if not urls:
return {"error": "No valid image URLs provided."}
results = []
batch_urls, batch_images = [], []
for url in urls:
try:
# Download image
image = download_image_as_pil(url)
if not image:
results.append({"image_url": url, "error": "Failed to download image"})
continue
batch_urls.append(url)
batch_images.append(image)
# Process batch when reaching batch size
if len(batch_images) == BATCH_SIZE:
process_batch(batch_urls, batch_images, results)
batch_urls, batch_images = [], []
except Exception as e:
results.append({"image_url": url, "error": str(e)})
# Process remaining images in the last batch
if batch_images:
process_batch(batch_urls, batch_images, results)
return results
# Helper function to process a batch
def process_batch(batch_urls, batch_images, results):
try:
# Generate embeddings
embeddings = encoder.encode_images(batch_images)
for url, embedding in zip(batch_urls, embeddings):
# Normalize embedding
embedding_normalized = embedding / np.linalg.norm(embedding)
# Append results
results.append({
"image_url": url,
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
"success": True
})
except Exception as e:
for url in batch_urls:
results.append({"image_url": url, "error": str(e)})
# Gradio Interface
iface = gr.Interface(
fn=batch_process_images,
inputs=gr.Textbox(
lines=5,
placeholder="Enter image URLs separated by commas",
label="Batch Image URLs",
),
outputs=gr.JSON(label="Embedding Results"),
title="Batch Fashion CLIP Embedding API",
description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
examples=[
["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
],
)
# Launch Gradio App
if __name__ == "__main__":
iface.launch()
# import os
# import requests
# from PIL import Image
# import numpy as np
# from encoder import FashionCLIPEncoder
# from pinecone import Pinecone
# from dotenv import load_dotenv
# # Load environment variables
# load_dotenv()
# # Constants
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
# PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
# REQUESTS_HEADERS = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
# }
# BATCH_SIZE = 30 # Define batch size for processing
# # Ensure API key and index name are set
# if not PINECONE_API_KEY or not PINECONE_INDEX_NAME:
# raise ValueError("PINECONE_API_KEY and PINECONE_INDEX_NAME must be set in environment variables.")
# # Initialize Pinecone
# pc = Pinecone(api_key=PINECONE_API_KEY)
# # Connect to the existing index
# if PINECONE_INDEX_NAME not in pc.list_indexes().names():
# raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it in your Pinecone account.")
# index = pc.Index(PINECONE_INDEX_NAME)
# print(f"Connected to Pinecone index '{PINECONE_INDEX_NAME}'.")
# # Initialize encoder
# encoder = FashionCLIPEncoder()
# # Helper function to download images
# def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
# """
# Downloads an image from a URL and converts it to a PIL Image in RGB format.
# """
# try:
# response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
# if response.status_code == 200:
# return Image.open(response.raw).convert("RGB") # Ensure consistent format
# return None
# except Exception as e:
# print(f"Error downloading image from {url}: {e}")
# return None
# # Function to process a batch of images
# def batch_process_images(image_data: list, namespace: str = None):
# """
# Processes a batch of images, generates embeddings, and uploads them to Pinecone.
# Args:
# image_data (list): A list of dictionaries with "id" and "url" keys.
# namespace (str): Namespace for the Pinecone index.
# Returns:
# list: A list of results containing the embedding preview or error information.
# """
# results = []
# batch_ids, batch_urls, batch_images = [], [], []
# for data in image_data:
# try:
# image_id = data["id"]
# image_url = data["url"]
# # Download the image
# image = download_image_as_pil(image_url)
# if not image:
# results.append({"id": image_id, "url": image_url, "error": "Failed to download image"})
# continue
# batch_ids.append(image_id)
# batch_urls.append(image_url)
# batch_images.append(image)
# # Process batch when reaching batch size
# if len(batch_images) == BATCH_SIZE:
# process_batch(batch_ids, batch_urls, batch_images, results, namespace)
# batch_ids, batch_urls, batch_images = [], [], []
# except Exception as e:
# results.append({"id": data.get("id"), "url": data.get("url"), "error": str(e)})
# # Process remaining images in the last batch
# if batch_images:
# process_batch(batch_ids, batch_urls, batch_images, results, namespace)
# return results
# # Function to process a batch and upload to Pinecone
# def process_batch(batch_ids, batch_urls, batch_images, results, namespace):
# """
# Processes a batch of images and generates embeddings, uploading them to Pinecone.
# Args:
# batch_ids (list): List of IDs for the images.
# batch_urls (list): List of image URLs.
# batch_images (list): List of PIL images.
# results (list): List to store results for each image.
# namespace (str): Namespace for the Pinecone index.
# """
# try:
# # Generate embeddings
# embeddings = encoder.encode_images(batch_images)
# vectors = []
# for image_id, url, embedding in zip(batch_ids, batch_urls, embeddings):
# # Normalize embedding
# embedding_normalized = embedding / np.linalg.norm(embedding)
# # Append results
# result = {
# "id": image_id,
# "url": url,
# "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
# "success": True
# }
# results.append(result)
# # Prepare vector for upserting
# vectors.append({
# "id": str(image_id),
# "values": embedding_normalized.tolist(),
# "metadata": {"url": url}
# })
# # Upload vectors to Pinecone
# index.upsert(vectors=vectors, namespace=namespace)
# except Exception as e:
# for image_id, url in zip(batch_ids, batch_urls):
# results.append({"id": image_id, "url": url, "error": str(e)})
# # Example usage
# if __name__ == "__main__":
# # Example input data
# image_data = [
# {"id": "1", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"},
# {"id": "2", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"}
# ]
# # Process images and upload to Pinecone under namespace "ns1"
# results = batch_process_images(image_data, namespace="ns1")
# # Print results
# for result in results:
# print(result)
|