Spaces:
Sleeping
Sleeping
vhr1007
commited on
Commit
·
2733f93
1
Parent(s):
c6afeca
increasing-similarity-score
Browse files
services/qdrant_searcher.py
CHANGED
@@ -8,7 +8,7 @@ class QdrantSearcher:
|
|
8 |
def __init__(self, qdrant_url, access_token):
|
9 |
self.client = QdrantClient(url=qdrant_url, api_key=access_token)
|
10 |
|
11 |
-
def search_documents(self, collection_name, query_embedding, user_id, limit=3):
|
12 |
logging.info("Starting document search")
|
13 |
|
14 |
# Ensure the query_embedding is in the correct format (flat list of floats)
|
@@ -25,7 +25,7 @@ class QdrantSearcher:
|
|
25 |
|
26 |
# Filter by user_id
|
27 |
query_filter = Filter(must=[FieldCondition(key="user_id", match={"value": user_id})])
|
28 |
-
|
29 |
try:
|
30 |
hits = self.client.search(
|
31 |
collection_name=collection_name,
|
@@ -36,13 +36,15 @@ class QdrantSearcher:
|
|
36 |
except Exception as e:
|
37 |
logging.error(f"Error during Qdrant search: {e}")
|
38 |
return None, str(e)
|
|
|
|
|
39 |
|
40 |
-
if not
|
41 |
logging.info("No documents found for the given query")
|
42 |
return None, "No documents found for the given query."
|
43 |
|
44 |
hits_list = []
|
45 |
-
for hit in
|
46 |
hit_info = {
|
47 |
"id": hit.id,
|
48 |
"score": hit.score,
|
|
|
8 |
def __init__(self, qdrant_url, access_token):
|
9 |
self.client = QdrantClient(url=qdrant_url, api_key=access_token)
|
10 |
|
11 |
+
def search_documents(self, collection_name, query_embedding, user_id, limit=3,similarity_threshold=0.6):
|
12 |
logging.info("Starting document search")
|
13 |
|
14 |
# Ensure the query_embedding is in the correct format (flat list of floats)
|
|
|
25 |
|
26 |
# Filter by user_id
|
27 |
query_filter = Filter(must=[FieldCondition(key="user_id", match={"value": user_id})])
|
28 |
+
logging.info(f"Performing search using the precomputed embeddings for user_id: {user_id}")
|
29 |
try:
|
30 |
hits = self.client.search(
|
31 |
collection_name=collection_name,
|
|
|
36 |
except Exception as e:
|
37 |
logging.error(f"Error during Qdrant search: {e}")
|
38 |
return None, str(e)
|
39 |
+
|
40 |
+
filtered_hits = [hit for hit in hits if hit.score >= similarity_threshold]
|
41 |
|
42 |
+
if not filtered_hits:
|
43 |
logging.info("No documents found for the given query")
|
44 |
return None, "No documents found for the given query."
|
45 |
|
46 |
hits_list = []
|
47 |
+
for hit in filtered_hits:
|
48 |
hit_info = {
|
49 |
"id": hit.id,
|
50 |
"score": hit.score,
|