Spaces:

traversaal-internal
/

pro-search-api

Sleeping

App Files Files Community

vhr1007 commited on Aug 19, 2024

Commit

567e7ba

1 Parent(s): 5897f5d

adding embed-query

Browse files

Files changed (3) hide show

app.py +9 -6
requirements.txt +1 -0
services/qdrant_searcher.py +16 -5

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from services.openai_service import generate_rag_response
 from utils.auth import token_required
 from dotenv import load_dotenv
 import os
 # Load environment variables from .env file
 load_dotenv()
@@ -57,7 +58,7 @@ try:
     # Initialize the Qdrant searcher after the model is successfully loaded
     global searcher  # Ensure searcher is accessible globally if needed
-    searcher = QdrantSearcher(encoder=model, qdrant_url=qdrant_url, access_token=access_token)
 except Exception as e:
     logging.error(f"Failed to load the model or initialize searcher: {e}")
@@ -68,7 +69,7 @@ def embed_text(text):
     inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
     outputs = model(**inputs)
     embeddings = outputs.last_hidden_state.mean(dim=1)  # Example: mean pooling
-    return embeddings
 # Define the request body models
 class SearchDocumentsRequest(BaseModel):
@@ -97,8 +98,10 @@ async def search_documents(
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.query)
-        # Assuming searcher.search_documents uses these embeddings for search
-        hits, error = searcher.search_documents("documents", query_embedding, user_id, body.limit)
         if error:
             logging.error(f"Search documents error: {error}")
@@ -128,7 +131,7 @@ async def generate_rag_response_api(
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.search_query)
-        # Perform search using the encoded query
         hits, error = searcher.search_documents("documents", query_embedding, user_id)
         if error:
@@ -137,7 +140,7 @@ async def generate_rag_response_api(
         logging.info("Generating RAG response")
-        # Assuming generate_rag_response uses the retrieved documents to generate a response
         response, error = generate_rag_response(hits, body.search_query)
         if error:

 from utils.auth import token_required
 from dotenv import load_dotenv
 import os
+import torch
 # Load environment variables from .env file
 load_dotenv()
     # Initialize the Qdrant searcher after the model is successfully loaded
     global searcher  # Ensure searcher is accessible globally if needed
+    searcher = QdrantSearcher(qdrant_url=qdrant_url, access_token=access_token)
 except Exception as e:
     logging.error(f"Failed to load the model or initialize searcher: {e}")
     inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
     outputs = model(**inputs)
     embeddings = outputs.last_hidden_state.mean(dim=1)  # Example: mean pooling
+    return embeddings.detach().numpy()
 # Define the request body models
 class SearchDocumentsRequest(BaseModel):
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.query)
+        collection_name = "my_embeddings"  # Use the collection name where the embeddings are stored
+        # Perform search using the precomputed embeddings
+        hits, error = searcher.search_documents(collection_name, query_embedding, user_id, body.limit)
         if error:
             logging.error(f"Search documents error: {error}")
         # Encode the query using the custom embedding function
         query_embedding = embed_text(body.search_query)
+        # Perform search using the precomputed embeddings
         hits, error = searcher.search_documents("documents", query_embedding, user_id)
         if error:
         logging.info("Generating RAG response")
+        # Generate the RAG response using the retrieved documents
         response, error = generate_rag_response(hits, body.search_query)
         if error:

requirements.txt CHANGED Viewed

@@ -5,6 +5,7 @@ cryptography>=3.4.7
 openai==1.37.1
 PyJWT==2.6.0
 nltk==3.6.7
 pydantic==2.8.2
 pydantic_core==2.20.1
 Pygments==2.18.0

 openai==1.37.1
 PyJWT==2.6.0
 nltk==3.6.7
+numpy==1.22.0
 pydantic==2.8.2
 pydantic_core==2.20.1
 Pygments==2.18.0

services/qdrant_searcher.py CHANGED Viewed

@@ -1,21 +1,32 @@
 import logging
 from qdrant_client import QdrantClient
 from qdrant_client.http.models import Filter, FieldCondition
 class QdrantSearcher:
-    def __init__(self, encoder, qdrant_url, access_token):
-        self.encoder = encoder
         self.client = QdrantClient(url=qdrant_url, api_key=access_token)
-    def search_documents(self, collection_name, query, user_id, limit=3):
         logging.info("Starting document search")
-        query_vector = self.encoder.encode(query).tolist()
         query_filter = Filter(must=[FieldCondition(key="user_id", match={"value": user_id})])
         try:
             hits = self.client.search(
                 collection_name=collection_name,
-                query_vector=query_vector,
                 limit=limit,
                 query_filter=query_filter
             )

 import logging
+import torch
+import numpy as np
 from qdrant_client import QdrantClient
 from qdrant_client.http.models import Filter, FieldCondition
 class QdrantSearcher:
+    def __init__(self, qdrant_url, access_token):
+        # Removed the encoder since embeddings are precomputed externally
         self.client = QdrantClient(url=qdrant_url, api_key=access_token)
+    def search_documents(self, collection_name, query_embedding, user_id, limit=3):
         logging.info("Starting document search")
+        # Ensure the query_embedding is in the correct format (list)
+        if isinstance(query_embedding, torch.Tensor):
+            query_embedding = query_embedding.detach().numpy().tolist()
+            logging.info("Converted query embedding to list")
+        elif isinstance(query_embedding, np.ndarray):
+            query_embedding = query_embedding.tolist()
+            logging.info("Converted query embedding to list")
+        # Filter by user_id
         query_filter = Filter(must=[FieldCondition(key="user_id", match={"value": user_id})])
         try:
             hits = self.client.search(
                 collection_name=collection_name,
+                query_vector=query_embedding,
                 limit=limit,
                 query_filter=query_filter
             )