import streamlit as st from pinecone import Pinecone from dotenv import load_dotenv import os from PIL import Image import requests from transformers import AutoProcessor, CLIPModel import numpy as np # Load environment variables load_dotenv() # Initialize Pinecone pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) index_name = "image-index-50000" unsplash_index = pc.Index(index_name) # Load CLIP model and processor @st.cache_resource def load_clip_model(): model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32") return model, processor model, processor = load_clip_model() # Function to generate embedding from text def get_text_embedding(text): inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True) text_features = model.get_text_features(**inputs) embedding = text_features.detach().cpu().numpy().flatten().tolist() return embedding # Function to generate embedding from image def get_image_embedding(image): inputs = processor(images=image, return_tensors="pt") image_features = model.get_image_features(**inputs) embedding = image_features.detach().cpu().numpy().flatten().tolist() return embedding # Function to query Pinecone and fetch similar images def search_similar_images(embedding, top_k=10): results = unsplash_index.query( vector=embedding, top_k=top_k, include_metadata=True, namespace="image-search-dataset" ) return results["matches"] # Streamlit UI st.title("🔍 Image Search App") # Sidebar for search controls with st.sidebar: st.header("Search Options") # Search type selection search_type = st.radio( "Select search type:", ("Text to Image", "Image to Image") ) # Input based on search type if search_type == "Text to Image": search_query = st.text_input("Enter your search query (e.g. Flower)") uploaded_file = None else: # Image to Image uploaded_file = st.file_uploader("Upload an image to search", type=["jpg", "jpeg", "png"]) search_query = None # Search button search_button = st.button("Search") # Main content area for results if search_button: if (search_type == "Text to Image" and search_query) or (search_type == "Image to Image" and uploaded_file): # Generate embedding based on search type with st.spinner("Generating embedding..."): if search_type == "Text to Image": embedding = get_text_embedding(search_query) else: # Image to Image image = Image.open(uploaded_file).convert("RGB") embedding = get_image_embedding(image) # Display the uploaded image st.image(image, caption="Uploaded Image", use_container_width=True) # Search for similar images with st.spinner("Searching for similar images..."): matches = search_similar_images(embedding, top_k=10) # Display results st.subheader("Top Similar Images") for match in matches: score = match["score"] photo_id = match["id"] url = match["metadata"]["url"] st.write(f"**Photo ID**: {photo_id} | **Similarity Score**: {score:.4f}") try: # Fetch and display the image from the URL response = requests.get(url, stream=True) response.raw.decode_content = True img = Image.open(response.raw) st.image(img, caption=f"Photo ID: {photo_id}", use_container_width=True) except Exception as e: st.error(f"Could not load image from {url}: {e}") else: st.warning("Please provide a search query or upload an image!") # Instructions st.write("---") st.write("Note: This app searches an Unsplash dataset indexed in Pinecone using CLIP embeddings based on your input.")