Spaces:
Runtime error
Runtime error
kevin-pek
commited on
Commit
•
3e243df
1
Parent(s):
e6dc9f0
edit number of results to retrieve
Browse files
main.py
CHANGED
@@ -7,7 +7,7 @@ from langchain.vectorstores import FAISS
|
|
7 |
import gradio as gr
|
8 |
import re
|
9 |
|
10 |
-
model = "msmarco-distilbert-base-
|
11 |
embeddings = SentenceTransformerEmbeddings(model_name=model)
|
12 |
prev_files = None
|
13 |
retriever = None
|
@@ -20,7 +20,7 @@ def handle_files_and_query(query, files):
|
|
20 |
prev_files = files
|
21 |
for file in files:
|
22 |
documents.extend(PyMuPDFLoader(file).load_and_split(SentenceTransformersTokenTextSplitter(model_name=model)))
|
23 |
-
retriever = BM25Retriever.from_documents(documents, k=
|
24 |
results += "Index created successfully!\n"
|
25 |
print("Index created successfully!")
|
26 |
elif files is None:
|
@@ -30,17 +30,13 @@ def handle_files_and_query(query, files):
|
|
30 |
|
31 |
print(f"Query: {query}")
|
32 |
if query:
|
33 |
-
search_results = retriever.get_relevant_documents(query
|
34 |
pattern = r'[^\\/]+$' # pattern to get filename from filepath
|
35 |
-
reranked_results = FAISS.from_documents(search_results, embeddings, distance_strategy=DistanceStrategy.COSINE).similarity_search(query, k=
|
36 |
-
|
37 |
-
f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}"
|
38 |
for result in reranked_results
|
39 |
-
]
|
40 |
-
results = [
|
41 |
-
f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{result.page_content}"
|
42 |
-
for result in reranked_results
|
43 |
-
][0]
|
44 |
return results
|
45 |
|
46 |
interface = gr.Interface(
|
|
|
7 |
import gradio as gr
|
8 |
import re
|
9 |
|
10 |
+
model = "msmarco-distilbert-base-tas-b"
|
11 |
embeddings = SentenceTransformerEmbeddings(model_name=model)
|
12 |
prev_files = None
|
13 |
retriever = None
|
|
|
20 |
prev_files = files
|
21 |
for file in files:
|
22 |
documents.extend(PyMuPDFLoader(file).load_and_split(SentenceTransformersTokenTextSplitter(model_name=model)))
|
23 |
+
retriever = BM25Retriever.from_documents(documents, k=100)
|
24 |
results += "Index created successfully!\n"
|
25 |
print("Index created successfully!")
|
26 |
elif files is None:
|
|
|
30 |
|
31 |
print(f"Query: {query}")
|
32 |
if query:
|
33 |
+
search_results = retriever.get_relevant_documents(query)
|
34 |
pattern = r'[^\\/]+$' # pattern to get filename from filepath
|
35 |
+
reranked_results = FAISS.from_documents(search_results, embeddings, distance_strategy=DistanceStrategy.COSINE).similarity_search(query, k=25)
|
36 |
+
results = "\n".join([
|
37 |
+
f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{result.page_content}\n"
|
38 |
for result in reranked_results
|
39 |
+
])
|
|
|
|
|
|
|
|
|
40 |
return results
|
41 |
|
42 |
interface = gr.Interface(
|