JBHF commited on
Commit
145f602
1 Parent(s): 245c2de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -70,14 +70,16 @@ if "vector" not in st.session_state:
70
  # pdf_file_path = "*.pdf" # JB
71
  # st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
72
  # st.session_state.loader = PyPDFLoader(*.pdf).load() # JB syntax error *.pdf !
73
- st.session_state.loader = PyPDFDirectoryLoader("*.pdf").load() # JB PyPDFDirectoryLoader("example_data/")
74
  # chunks = self.text_splitter.split_documents(docs)
75
  # chunks = filter_complex_metadata(chunks)
76
 
77
-
 
 
78
 
79
  st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
80
- st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
81
  # st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
82
  st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
83
  # ZIE:
 
70
  # pdf_file_path = "*.pdf" # JB
71
  # st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
72
  # st.session_state.loader = PyPDFLoader(*.pdf).load() # JB syntax error *.pdf !
73
+ st.session_state.loader = PyPDFDirectoryLoader("*.pdf") # JB PyPDFDirectoryLoader("example_data/")
74
  # chunks = self.text_splitter.split_documents(docs)
75
  # chunks = filter_complex_metadata(chunks)
76
 
77
+ # JB:
78
+ # https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf#pypdf-directory
79
+ st.session_state.docs = loader.load()
80
 
81
  st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
82
+ st.session_state.documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
83
  # st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
84
  st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
85
  # ZIE: