Moghazy commited on
Commit
b7aebfc
·
1 Parent(s): 1e86b23

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ import langchain
5
+ from htmlTemplates import css,bot_template,user_template,url,aiLogoUrl
6
+
7
+
8
+ def get_pdf_text(pdf_docs):
9
+ text = ""
10
+ for pdf in pdf_docs:
11
+ pdfReader = PdfReader(pdf)
12
+ for Page in pdfReader.pages:
13
+ text += Page.extract_text()
14
+ return text
15
+
16
+ def get_text_chunks(text):
17
+ text_splitter = langchain.text_splitter.CharacterTextSplitter(
18
+ separator="\n",
19
+ chunk_size=1000,
20
+ chunk_overlap=200,
21
+ length_function=len
22
+ )
23
+ chunks = text_splitter.split_text(text)
24
+ return chunks
25
+
26
+ def get_vectorstore(text_chunks):
27
+ embeddings = langchain.embeddings.CohereEmbeddings()
28
+ vectorstore = langchain.vectorstores.FAISS.from_texts(texts=text_chunks,embedding=embeddings)
29
+ return vectorstore
30
+
31
+
32
+ def get_conversation_chain(vectorstore):
33
+ llm = langchain.llms.Cohere()
34
+ memory = langchain.memory.ConversationBufferMemory(memory_key = 'chat_history',return_messages=True)
35
+ conversation_chain = langchain.chains.ConversationalRetrievalChain.from_llm(
36
+ llm = llm,
37
+ retriever=vectorstore.as_retriever(),
38
+ memory=memory
39
+ )
40
+ return conversation_chain
41
+
42
+
43
+ def handle_userinput(user_question):
44
+ response = st.session_state.conversation({'question':user_question})
45
+ st.session_state.chat_history = response['chat_history']
46
+
47
+ for i,message in enumerate(st.session_state.chat_history):
48
+ if i % 2 == 0:
49
+ st.write(user_template.replace("{{MSG}}",message.content), unsafe_allow_html=True)
50
+ else:
51
+ st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
52
+
53
+ def main():
54
+ load_dotenv()
55
+ st.set_page_config(page_title="Chat with multiple pdfs", page_icon=":books:")
56
+ st.write(css,unsafe_allow_html=True)
57
+ st.markdown(
58
+ '<div class="logo-container"><img class="logo" src="' + url + '" /><img class="logo" src="' + aiLogoUrl + '" /></div>',
59
+ unsafe_allow_html=True)
60
+
61
+
62
+ if "conversation" not in st.session_state:
63
+ st.session_state.conversation = None
64
+
65
+ if "chat_history" not in st.session_state:
66
+ st.session_state.chat_history = None
67
+
68
+
69
+
70
+ st.header("Chat with multiple pdfs :books:")
71
+ user_question = st.text_input("Ask a question about your documents:")
72
+
73
+ if user_question:
74
+ handle_userinput(user_question)
75
+
76
+
77
+ with st.sidebar:
78
+ st.subheader("Your documents")
79
+ pdf_docs=st.file_uploader("Upload your files here and click process",accept_multiple_files=True)
80
+ if st.button("Process"):
81
+ with st.spinner("Processing"):
82
+ # get pdf text
83
+ raw_text = get_pdf_text(pdf_docs)
84
+
85
+ # get the text chunks
86
+ text_chunks = get_text_chunks(raw_text)
87
+
88
+ # create vector store
89
+ vectorstore = get_vectorstore(text_chunks)
90
+
91
+ # create conversation chai
92
+
93
+ st.session_state.conversation = get_conversation_chain(vectorstore)
94
+
95
+ if __name__=='__main__':
96
+ main()