ishans24 commited on
Commit
88bd9e4
1 Parent(s): 1bef0a4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -0
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PyPDF2 import PdfReader
3
+ import streamlit as st
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_community.vectorstores.faiss import FAISS
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ import google.generativeai as genai
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
15
+
16
+ # Function to extract text from PDFs
17
+ def extract_pdf_text(pdfs):
18
+ all_text = ""
19
+ for pdf in pdfs:
20
+ pdf_reader = PdfReader(pdf)
21
+ for page in pdf_reader.pages:
22
+ all_text += page.extract_text()
23
+ return all_text
24
+
25
+ # Function to split text into chunks
26
+ def split_text_into_chunks(text):
27
+ splitter = RecursiveCharacterTextSplitter(chunk_size=12000, chunk_overlap=1200)
28
+ text_chunks = splitter.split_text(text)
29
+ return text_chunks
30
+
31
+ # Function to create vector store
32
+ def create_vector_store(chunks):
33
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
34
+ vector_store = FAISS.from_texts(chunks, embedding=embeddings)
35
+ vector_store.save_local("faiss_index")
36
+
37
+ # Function to setup conversation chain for QA
38
+ def setup_conversation_chain(template):
39
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
40
+ prompt = PromptTemplate(template=template, input_variables=["context", "question"])
41
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
42
+ return chain
43
+
44
+ # Function to handle user input based on selected mode
45
+ def handle_user_input(mode, user_question=None):
46
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
47
+ indexed_data = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
48
+ docs = indexed_data.similarity_search(user_question)
49
+
50
+ chain = setup_conversation_chain(prompt_template[mode])
51
+ response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
52
+ return response["output_text"]
53
+
54
+ # Prompt templates for each mode
55
+ prompt_template = {
56
+ "chat":"""
57
+ Your alias is Neural-PDF. Your task is to provide a thorough response based on the given context, ensuring all relevant details are included.
58
+ If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
59
+ Don't provide incorrect information.\n\n
60
+ Context: \n {context}?\n
61
+ Question: \n {question}\n
62
+
63
+ Answer:
64
+ """,
65
+ "quiz":"""
66
+ Your alias is Neural-PDF. Your task is to generate multiple choice questions for quiz based on the given context and requested number of questions, ensuring all relevant details are included.
67
+ If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
68
+ Don't provide incorrect information.\n\n
69
+ Context: \n {context}?\n
70
+ Question: \n {question}\n
71
+
72
+ Answer:
73
+ """,
74
+ "long":"""
75
+ Your alias is Neural-PDF. Your task is to generate long answer-type questions based on the given context and requested number of questions, ensuring all relevant details are included.
76
+ If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
77
+ Don't provide incorrect information.\n\n
78
+ Context: \n {context}?\n
79
+ Question: \n {question}\n
80
+
81
+ Answer:
82
+ """,
83
+ }
84
+
85
+
86
+ # Streamlit app
87
+ def main():
88
+ if "conversation" not in st.session_state:
89
+ st.session_state.conversation = []
90
+ if "mode" not in st.session_state:
91
+ st.session_state.mode=""
92
+ if "file_upload" not in st.session_state:
93
+ st.session_state.file_upload=False
94
+
95
+ st.set_page_config(page_title="NeuralPDF", page_icon=":page_with_curl:", initial_sidebar_state="expanded", layout="wide")
96
+ st.title("NeuralPDF: Interactive PDF Chat using AI 🤖")
97
+
98
+ # sidebar
99
+ files = st.sidebar.file_uploader("Upload one or more PDF files", type="pdf", accept_multiple_files=True)
100
+ if st.sidebar.button("Submit"):
101
+ if files:
102
+ with st.spinner("Processing..."):
103
+ raw_text = extract_pdf_text(files)
104
+ text_chunks = split_text_into_chunks(raw_text)
105
+ create_vector_store(text_chunks)
106
+ st.sidebar.success("Processing done!")
107
+ st.session_state.file_upload=True
108
+
109
+ # mode of chat
110
+ with st.sidebar:
111
+ if st.session_state.file_upload:
112
+ # st.write('<style>div.row-widget.stRadio > div{flex-direction:row;justify-content: center;} </style>', unsafe_allow_html=True)
113
+ # st.write('<style>div.st-bf{flex-direction:column;} div.st-ag{font-weight:bold;padding-left:2px;}</style>', unsafe_allow_html=True)
114
+ modes={"Chat Conversation":"chat", "Quiz & MCQs":"quiz", "Long-Answer Questions":"long"}
115
+ choose_mode = st.radio("", list(modes.keys()), index=0)
116
+ st.session_state.mode=modes[choose_mode]
117
+
118
+ if st.session_state.file_upload:
119
+ # keep history of chat
120
+ for dialogue in st.session_state.conversation:
121
+ with st.chat_message(dialogue["role"]):
122
+ if st.session_state.mode != "chat" and dialogue["role"] == "assistant":
123
+ st.markdown(dialogue["content"])
124
+ with st.expander("Answer"):
125
+ st.markdown(dialogue["answer"])
126
+ else: st.markdown(dialogue["content"])
127
+
128
+ # handle conversation
129
+ if prompt := st.chat_input("Type your question here"):
130
+ # handle user side
131
+ with st.chat_message("user"): st.markdown(prompt)
132
+ st.session_state.conversation.append({"role":"user", "content":prompt, "answer":""})
133
+ # handle assistant side
134
+ with st.chat_message("assistant"):
135
+ response=handle_user_input(st.session_state.mode, prompt)
136
+ answer=""
137
+ if st.session_state.mode != "chat":
138
+ answer = handle_user_input("chat", response)
139
+ st.markdown(response)
140
+ with st.expander("Answer"):
141
+ st.markdown(answer)
142
+ else: st.markdown(response)
143
+ st.session_state.conversation.append({"role":"assistant", "content":response, "answer":answer})
144
+
145
+
146
+ # Launch the app
147
+ if __name__ == "__main__":
148
+ main()