Spaces:

Streetmarkets
/

openFashionClip

Sleeping

openFashionClip / app copy.py

change

4bd62d7 about 1 month ago

9.16 kB

	import uuid
	import requests
	from PIL import Image
	import numpy as np
	import gradio as gr
	from encoder import FashionCLIPEncoder

	# Constants
	REQUESTS_HEADERS = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}
	BATCH_SIZE = 30 # Define batch size for processing

	# Initialize encoder
	encoder = FashionCLIPEncoder()

	# Helper function to download images
	def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
	try:
	response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
	if response.status_code == 200:
	return Image.open(response.raw).convert("RGB") # Ensure consistent format
	return None
	except Exception as e:
	print(f"Error downloading image: {e}")
	return None

	# Embedding function for a batch of images
	def batch_process_images(image_urls: str):
	# Split the input string by commas and strip whitespace
	urls = [url.strip() for url in image_urls.split(",") if url.strip()]

	if not urls:
	return {"error": "No valid image URLs provided."}

	results = []
	batch_urls, batch_images = [], []

	for url in urls:
	try:
	# Download image
	image = download_image_as_pil(url)
	if not image:
	results.append({"image_url": url, "error": "Failed to download image"})
	continue

	batch_urls.append(url)
	batch_images.append(image)

	# Process batch when reaching batch size
	if len(batch_images) == BATCH_SIZE:
	process_batch(batch_urls, batch_images, results)
	batch_urls, batch_images = [], []

	except Exception as e:
	results.append({"image_url": url, "error": str(e)})

	# Process remaining images in the last batch
	if batch_images:
	process_batch(batch_urls, batch_images, results)

	return results


	# Helper function to process a batch
	def process_batch(batch_urls, batch_images, results):
	try:
	# Generate embeddings
	embeddings = encoder.encode_images(batch_images)

	for url, embedding in zip(batch_urls, embeddings):
	# Normalize embedding
	embedding_normalized = embedding / np.linalg.norm(embedding)

	# Append results
	results.append({
	"image_url": url,
	"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
	"success": True
	})
	except Exception as e:
	for url in batch_urls:
	results.append({"image_url": url, "error": str(e)})


	# Gradio Interface
	iface = gr.Interface(
	fn=batch_process_images,
	inputs=gr.Textbox(
	lines=5,
	placeholder="Enter image URLs separated by commas",
	label="Batch Image URLs",
	),
	outputs=gr.JSON(label="Embedding Results"),
	title="Batch Fashion CLIP Embedding API",
	description="Enter multiple image URLs (separated by commas) to generate embeddings for the batch. Each embedding preview includes the first 5 values.",
	examples=[
	["https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp, https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"]
	],
	)

	# Launch Gradio App
	if __name__ == "__main__":
	iface.launch()


	# import os
	# import requests
	# from PIL import Image
	# import numpy as np
	# from encoder import FashionCLIPEncoder
	# from pinecone import Pinecone
	# from dotenv import load_dotenv

	# # Load environment variables
	# load_dotenv()

	# # Constants
	# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
	# PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
	# REQUESTS_HEADERS = {
	# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	# }
	# BATCH_SIZE = 30 # Define batch size for processing

	# # Ensure API key and index name are set
	# if not PINECONE_API_KEY or not PINECONE_INDEX_NAME:
	# raise ValueError("PINECONE_API_KEY and PINECONE_INDEX_NAME must be set in environment variables.")

	# # Initialize Pinecone
	# pc = Pinecone(api_key=PINECONE_API_KEY)

	# # Connect to the existing index
	# if PINECONE_INDEX_NAME not in pc.list_indexes().names():
	# raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it in your Pinecone account.")

	# index = pc.Index(PINECONE_INDEX_NAME)
	# print(f"Connected to Pinecone index '{PINECONE_INDEX_NAME}'.")

	# # Initialize encoder
	# encoder = FashionCLIPEncoder()

	# # Helper function to download images
	# def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
	# """
	# Downloads an image from a URL and converts it to a PIL Image in RGB format.
	# """
	# try:
	# response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
	# if response.status_code == 200:
	# return Image.open(response.raw).convert("RGB") # Ensure consistent format
	# return None
	# except Exception as e:
	# print(f"Error downloading image from {url}: {e}")
	# return None

	# # Function to process a batch of images
	# def batch_process_images(image_data: list, namespace: str = None):
	# """
	# Processes a batch of images, generates embeddings, and uploads them to Pinecone.

	# Args:
	# image_data (list): A list of dictionaries with "id" and "url" keys.
	# namespace (str): Namespace for the Pinecone index.

	# Returns:
	# list: A list of results containing the embedding preview or error information.
	# """
	# results = []
	# batch_ids, batch_urls, batch_images = [], [], []

	# for data in image_data:
	# try:
	# image_id = data["id"]
	# image_url = data["url"]

	# # Download the image
	# image = download_image_as_pil(image_url)
	# if not image:
	# results.append({"id": image_id, "url": image_url, "error": "Failed to download image"})
	# continue

	# batch_ids.append(image_id)
	# batch_urls.append(image_url)
	# batch_images.append(image)

	# # Process batch when reaching batch size
	# if len(batch_images) == BATCH_SIZE:
	# process_batch(batch_ids, batch_urls, batch_images, results, namespace)
	# batch_ids, batch_urls, batch_images = [], [], []

	# except Exception as e:
	# results.append({"id": data.get("id"), "url": data.get("url"), "error": str(e)})

	# # Process remaining images in the last batch
	# if batch_images:
	# process_batch(batch_ids, batch_urls, batch_images, results, namespace)

	# return results

	# # Function to process a batch and upload to Pinecone
	# def process_batch(batch_ids, batch_urls, batch_images, results, namespace):
	# """
	# Processes a batch of images and generates embeddings, uploading them to Pinecone.

	# Args:
	# batch_ids (list): List of IDs for the images.
	# batch_urls (list): List of image URLs.
	# batch_images (list): List of PIL images.
	# results (list): List to store results for each image.
	# namespace (str): Namespace for the Pinecone index.
	# """
	# try:
	# # Generate embeddings
	# embeddings = encoder.encode_images(batch_images)

	# vectors = []
	# for image_id, url, embedding in zip(batch_ids, batch_urls, embeddings):
	# # Normalize embedding
	# embedding_normalized = embedding / np.linalg.norm(embedding)

	# # Append results
	# result = {
	# "id": image_id,
	# "url": url,
	# "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
	# "success": True
	# }
	# results.append(result)

	# # Prepare vector for upserting
	# vectors.append({
	# "id": str(image_id),
	# "values": embedding_normalized.tolist(),
	# "metadata": {"url": url}
	# })

	# # Upload vectors to Pinecone
	# index.upsert(vectors=vectors, namespace=namespace)
	# except Exception as e:
	# for image_id, url in zip(batch_ids, batch_urls):
	# results.append({"id": image_id, "url": url, "error": str(e)})

	# # Example usage
	# if __name__ == "__main__":
	# # Example input data
	# image_data = [
	# {"id": "1", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"},
	# {"id": "2", "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/00907857-C6B0-4D2A-8AEA-688BDE1E67D7_1024x1024.jpg"}
	# ]

	# # Process images and upload to Pinecone under namespace "ns1"
	# results = batch_process_images(image_data, namespace="ns1")

	# # Print results
	# for result in results:
	# print(result)