Update app.py
Browse files
app.py
CHANGED
@@ -70,14 +70,16 @@ if "vector" not in st.session_state:
|
|
70 |
# pdf_file_path = "*.pdf" # JB
|
71 |
# st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
|
72 |
# st.session_state.loader = PyPDFLoader(*.pdf).load() # JB syntax error *.pdf !
|
73 |
-
st.session_state.loader = PyPDFDirectoryLoader("*.pdf")
|
74 |
# chunks = self.text_splitter.split_documents(docs)
|
75 |
# chunks = filter_complex_metadata(chunks)
|
76 |
|
77 |
-
|
|
|
|
|
78 |
|
79 |
st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
80 |
-
st.session_state.documents = st.session_state.text_splitter.split_documents(
|
81 |
# st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
|
82 |
st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
|
83 |
# ZIE:
|
|
|
70 |
# pdf_file_path = "*.pdf" # JB
|
71 |
# st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
|
72 |
# st.session_state.loader = PyPDFLoader(*.pdf).load() # JB syntax error *.pdf !
|
73 |
+
st.session_state.loader = PyPDFDirectoryLoader("*.pdf") # JB PyPDFDirectoryLoader("example_data/")
|
74 |
# chunks = self.text_splitter.split_documents(docs)
|
75 |
# chunks = filter_complex_metadata(chunks)
|
76 |
|
77 |
+
# JB:
|
78 |
+
# https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf#pypdf-directory
|
79 |
+
st.session_state.docs = loader.load()
|
80 |
|
81 |
st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
82 |
+
st.session_state.documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
|
83 |
# st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
|
84 |
st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
|
85 |
# ZIE:
|