Rathapoom commited on
Commit
4b10535
1 Parent(s): 9dd2a6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -1
app.py CHANGED
@@ -5,6 +5,7 @@ from PyPDF2 import PdfReader
5
  import gradio as gr
6
  from datasets import Dataset, load_from_disk
7
  from sentence_transformers import SentenceTransformer
 
8
 
9
  # Extract text from PDF
10
  def extract_text_from_pdf(pdf_path):
@@ -45,7 +46,19 @@ os.makedirs(index_path, exist_ok=True)
45
  # Save the dataset to disk and create an index
46
  dataset.save_to_disk(dataset_path)
47
  dataset = load_from_disk(dataset_path)
48
- dataset.add_faiss_index(column="embeddings").save(index_path)
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  # Custom retriever
51
  def retrieve(query):
 
5
  import gradio as gr
6
  from datasets import Dataset, load_from_disk
7
  from sentence_transformers import SentenceTransformer
8
+ import numpy as np
9
 
10
  # Extract text from PDF
11
  def extract_text_from_pdf(pdf_path):
 
46
  # Save the dataset to disk and create an index
47
  dataset.save_to_disk(dataset_path)
48
  dataset = load_from_disk(dataset_path)
49
+
50
+ # Add FAISS index while addressing numpy object deprecation
51
+ def add_faiss_index(dataset, column):
52
+ import faiss # Make sure faiss is installed
53
+ embeddings = np.array(dataset[column])
54
+ dim = embeddings.shape[1]
55
+ index = faiss.IndexFlatL2(dim)
56
+ index.add(embeddings)
57
+ dataset.add_faiss_index(column=column)
58
+ return dataset
59
+
60
+ dataset = add_faiss_index(dataset, column="embeddings")
61
+ dataset.save(index_path)
62
 
63
  # Custom retriever
64
  def retrieve(query):