Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ from PyPDF2 import PdfReader
|
|
5 |
import gradio as gr
|
6 |
from datasets import Dataset, load_from_disk
|
7 |
from sentence_transformers import SentenceTransformer
|
|
|
8 |
|
9 |
# Extract text from PDF
|
10 |
def extract_text_from_pdf(pdf_path):
|
@@ -45,7 +46,19 @@ os.makedirs(index_path, exist_ok=True)
|
|
45 |
# Save the dataset to disk and create an index
|
46 |
dataset.save_to_disk(dataset_path)
|
47 |
dataset = load_from_disk(dataset_path)
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
# Custom retriever
|
51 |
def retrieve(query):
|
|
|
5 |
import gradio as gr
|
6 |
from datasets import Dataset, load_from_disk
|
7 |
from sentence_transformers import SentenceTransformer
|
8 |
+
import numpy as np
|
9 |
|
10 |
# Extract text from PDF
|
11 |
def extract_text_from_pdf(pdf_path):
|
|
|
46 |
# Save the dataset to disk and create an index
|
47 |
dataset.save_to_disk(dataset_path)
|
48 |
dataset = load_from_disk(dataset_path)
|
49 |
+
|
50 |
+
# Add FAISS index while addressing numpy object deprecation
|
51 |
+
def add_faiss_index(dataset, column):
|
52 |
+
import faiss # Make sure faiss is installed
|
53 |
+
embeddings = np.array(dataset[column])
|
54 |
+
dim = embeddings.shape[1]
|
55 |
+
index = faiss.IndexFlatL2(dim)
|
56 |
+
index.add(embeddings)
|
57 |
+
dataset.add_faiss_index(column=column)
|
58 |
+
return dataset
|
59 |
+
|
60 |
+
dataset = add_faiss_index(dataset, column="embeddings")
|
61 |
+
dataset.save(index_path)
|
62 |
|
63 |
# Custom retriever
|
64 |
def retrieve(query):
|