Spaces:
Runtime error
Runtime error
qorgh346
commited on
Commit
·
15d201d
1
Parent(s):
dd9ce97
update app.py
Browse files
app.py
CHANGED
@@ -26,17 +26,17 @@ def get_pdf_text(pdf_docs):
|
|
26 |
temp_file.write(pdf_docs.getvalue())
|
27 |
temp_file.seek(0)
|
28 |
pdf_loader = PyPDFLoader(temp_file.name)
|
29 |
-
print('pdf_loader = ', pdf_loader)
|
30 |
pdf_doc = pdf_loader.load()
|
31 |
-
print('pdf_doc = ',pdf_doc)
|
32 |
return pdf_doc
|
33 |
|
34 |
|
35 |
-
def get_text_chunks(
|
36 |
-
|
37 |
text_splitter = RecursiveCharacterTextSplitter(
|
38 |
-
chunk_size =
|
39 |
-
chunk_overlap =
|
40 |
length_function= len
|
41 |
)
|
42 |
# text_splitter = CharacterTextSplitter(
|
@@ -45,9 +45,9 @@ def get_text_chunks(text):
|
|
45 |
# chunk_overlap=200,
|
46 |
# length_function=len
|
47 |
# )
|
48 |
-
|
49 |
-
print('
|
50 |
-
return
|
51 |
|
52 |
|
53 |
def get_vectorstore(text_chunks):
|
@@ -58,7 +58,7 @@ def get_vectorstore(text_chunks):
|
|
58 |
# embeddings = OpenAIEmbeddings()sentence-transformers/all-MiniLM-L6-v2
|
59 |
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
|
60 |
# model_kwargs={'device':'cpu'})
|
61 |
-
vectorstore = FAISS.
|
62 |
# vectorstore = Chroma.from_texts(texts=text_chunks, embedding=embeddings)
|
63 |
|
64 |
return vectorstore
|
@@ -186,7 +186,7 @@ def main():
|
|
186 |
|
187 |
|
188 |
# get the text chunks
|
189 |
-
text_chunks = get_text_chunks(
|
190 |
|
191 |
# create vector store
|
192 |
vectorstore = get_vectorstore(text_chunks)
|
|
|
26 |
temp_file.write(pdf_docs.getvalue())
|
27 |
temp_file.seek(0)
|
28 |
pdf_loader = PyPDFLoader(temp_file.name)
|
29 |
+
# print('pdf_loader = ', pdf_loader)
|
30 |
pdf_doc = pdf_loader.load()
|
31 |
+
# print('pdf_doc = ',pdf_doc)
|
32 |
return pdf_doc
|
33 |
|
34 |
|
35 |
+
def get_text_chunks(documents):
|
36 |
+
|
37 |
text_splitter = RecursiveCharacterTextSplitter(
|
38 |
+
chunk_size = 1000,
|
39 |
+
chunk_overlap = 200,
|
40 |
length_function= len
|
41 |
)
|
42 |
# text_splitter = CharacterTextSplitter(
|
|
|
45 |
# chunk_overlap=200,
|
46 |
# length_function=len
|
47 |
# )
|
48 |
+
documents = text_splitter.split_documents(documents)
|
49 |
+
print('documents = ', documents)
|
50 |
+
return documents
|
51 |
|
52 |
|
53 |
def get_vectorstore(text_chunks):
|
|
|
58 |
# embeddings = OpenAIEmbeddings()sentence-transformers/all-MiniLM-L6-v2
|
59 |
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
|
60 |
# model_kwargs={'device':'cpu'})
|
61 |
+
vectorstore = FAISS.from_documents(texts=text_chunks, embedding=embeddings)
|
62 |
# vectorstore = Chroma.from_texts(texts=text_chunks, embedding=embeddings)
|
63 |
|
64 |
return vectorstore
|
|
|
186 |
|
187 |
|
188 |
# get the text chunks
|
189 |
+
text_chunks = get_text_chunks(doc_list)
|
190 |
|
191 |
# create vector store
|
192 |
vectorstore = get_vectorstore(text_chunks)
|