Browse files
@@ -0,0 +1,148 @@
1 |
import os
2 |
from PyPDF2 import PdfReader
3 |
import streamlit as st
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
5 |
from langchain_community.vectorstores.faiss import FAISS
6 |
from langchain.chains.question_answering import load_qa_chain
7 |
from langchain.prompts import PromptTemplate
8 |
from langchain_google_genai import GoogleGenerativeAIEmbeddings
9 |
from langchain_google_genai import ChatGoogleGenerativeAI
10 |
import google.generativeai as genai
11 |
from dotenv import load_dotenv
12 |
13 |
14 |
15 |
16 |
# Function to extract text from PDFs
17 |
def extract_pdf_text(pdfs):
18 |
all_text = ""
19 |
for pdf in pdfs:
20 |
pdf_reader = PdfReader(pdf)
21 |
for page in pdf_reader.pages:
22 |
all_text += page.extract_text()
23 |
return all_text
24 |
25 |
# Function to split text into chunks
26 |
def split_text_into_chunks(text):
27 |
splitter = RecursiveCharacterTextSplitter(chunk_size=12000, chunk_overlap=1200)
28 |
text_chunks = splitter.split_text(text)
29 |
return text_chunks
30 |
31 |
# Function to create vector store
32 |
def create_vector_store(chunks):
33 |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
34 |
vector_store = FAISS.from_texts(chunks, embedding=embeddings)
35 |
36 |
37 |
# Function to setup conversation chain for QA
38 |
def setup_conversation_chain(template):
39 |
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
40 |
prompt = PromptTemplate(template=template, input_variables=["context", "question"])
41 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
42 |
return chain
43 |
44 |
# Function to handle user input based on selected mode
45 |
def handle_user_input(mode, user_question=None):
46 |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
47 |
indexed_data = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
48 |
docs = indexed_data.similarity_search(user_question)
49 |
50 |
chain = setup_conversation_chain(prompt_template[mode])
51 |
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
52 |
return response["output_text"]
53 |
54 |
# Prompt templates for each mode
55 |
prompt_template = {
56 |
57 |
Your alias is Neural-PDF. Your task is to provide a thorough response based on the given context, ensuring all relevant details are included.
58 |
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
59 |
Don't provide incorrect information.\n\n
60 |
Context: \n {context}?\n
61 |
Question: \n {question}\n
62 |
63 |
64 |
65 |
66 |
Your alias is Neural-PDF. Your task is to generate multiple choice questions for quiz based on the given context and requested number of questions, ensuring all relevant details are included.
67 |
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
68 |
Don't provide incorrect information.\n\n
69 |
Context: \n {context}?\n
70 |
Question: \n {question}\n
71 |
72 |
73 |
74 |
75 |
Your alias is Neural-PDF. Your task is to generate long answer-type questions based on the given context and requested number of questions, ensuring all relevant details are included.
76 |
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
77 |
Don't provide incorrect information.\n\n
78 |
Context: \n {context}?\n
79 |
Question: \n {question}\n
80 |
81 |
82 |
83 |
84 |
85 |
86 |
# Streamlit app
87 |
def main():
88 |
if "conversation" not in st.session_state:
89 |
st.session_state.conversation = []
90 |
if "mode" not in st.session_state:
91 |
92 |
if "file_upload" not in st.session_state:
93 |
94 |
95 |
st.set_page_config(page_title="NeuralPDF", page_icon=":page_with_curl:", initial_sidebar_state="expanded", layout="wide")
96 |
st.title("NeuralPDF: Interactive PDF Chat using AI 🤖")
97 |
98 |
# sidebar
99 |
files = st.sidebar.file_uploader("Upload one or more PDF files", type="pdf", accept_multiple_files=True)
100 |
if st.sidebar.button("Submit"):
101 |
if files:
102 |
with st.spinner("Processing..."):
103 |
raw_text = extract_pdf_text(files)
104 |
text_chunks = split_text_into_chunks(raw_text)
105 |
106 |
st.sidebar.success("Processing done!")
107 |
108 |
109 |
# mode of chat
110 |
with st.sidebar:
111 |
if st.session_state.file_upload:
112 |
# st.write('<style>div.row-widget.stRadio > div{flex-direction:row;justify-content: center;} </style>', unsafe_allow_html=True)
113 |
# st.write('<style>{flex-direction:column;}{font-weight:bold;padding-left:2px;}</style>', unsafe_allow_html=True)
114 |
modes={"Chat Conversation":"chat", "Quiz & MCQs":"quiz", "Long-Answer Questions":"long"}
115 |
choose_mode ="", list(modes.keys()), index=0)
116 |
117 |
118 |
if st.session_state.file_upload:
119 |
# keep history of chat
120 |
for dialogue in st.session_state.conversation:
121 |
with st.chat_message(dialogue["role"]):
122 |
if st.session_state.mode != "chat" and dialogue["role"] == "assistant":
123 |
124 |
with st.expander("Answer"):
125 |
126 |
else: st.markdown(dialogue["content"])
127 |
128 |
# handle conversation
129 |
if prompt := st.chat_input("Type your question here"):
130 |
# handle user side
131 |
with st.chat_message("user"): st.markdown(prompt)
132 |
st.session_state.conversation.append({"role":"user", "content":prompt, "answer":""})
133 |
# handle assistant side
134 |
with st.chat_message("assistant"):
135 |
response=handle_user_input(st.session_state.mode, prompt)
136 |
137 |
if st.session_state.mode != "chat":
138 |
answer = handle_user_input("chat", response)
139 |
140 |
with st.expander("Answer"):
141 |
142 |
else: st.markdown(response)
143 |
st.session_state.conversation.append({"role":"assistant", "content":response, "answer":answer})
144 |
145 |
146 |
# Launch the app
147 |
if __name__ == "__main__":
148 |