Spaces:

ishans24
/

NeuralPDF

Running

App Files Files Community

ishans24 commited on Jul 19

Commit

88bd9e4

•

1 Parent(s): 1bef0a4

Create app.py

Browse files

Files changed (1) hide show

app.py +148 -0

app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import os
+from PyPDF2 import PdfReader
+import streamlit as st
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores.faiss import FAISS
+from langchain.chains.question_answering import load_qa_chain
+from langchain.prompts import PromptTemplate
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain_google_genai import ChatGoogleGenerativeAI
+import google.generativeai as genai
+from dotenv import load_dotenv
+load_dotenv()
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+# Function to extract text from PDFs
+def extract_pdf_text(pdfs):
+    all_text = ""
+    for pdf in pdfs:
+        pdf_reader = PdfReader(pdf)
+        for page in pdf_reader.pages:
+            all_text += page.extract_text()
+    return all_text
+# Function to split text into chunks
+def split_text_into_chunks(text):
+    splitter = RecursiveCharacterTextSplitter(chunk_size=12000, chunk_overlap=1200)
+    text_chunks = splitter.split_text(text)
+    return text_chunks
+# Function to create vector store
+def create_vector_store(chunks):
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
+    vector_store.save_local("faiss_index")
+# Function to setup conversation chain for QA
+def setup_conversation_chain(template):
+    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
+    prompt = PromptTemplate(template=template, input_variables=["context", "question"])
+    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
+    return chain
+# Function to handle user input based on selected mode
+def handle_user_input(mode, user_question=None):
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    indexed_data = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
+    docs = indexed_data.similarity_search(user_question)
+    chain = setup_conversation_chain(prompt_template[mode])
+    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
+    return response["output_text"]
+# Prompt templates for each mode
+prompt_template = {
+"chat":"""
+Your alias is Neural-PDF. Your task is to provide a thorough response based on the given context, ensuring all relevant details are included.
+If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
+Don't provide incorrect information.\n\n
+Context: \n {context}?\n
+Question: \n {question}\n
+Answer:
+""",
+"quiz":"""
+Your alias is Neural-PDF. Your task is to generate multiple choice questions for quiz based on the given context and requested number of questions, ensuring all relevant details are included.
+If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
+Don't provide incorrect information.\n\n
+Context: \n {context}?\n
+Question: \n {question}\n
+Answer:
+""",
+"long":"""
+Your alias is Neural-PDF. Your task is to generate long answer-type questions based on the given context and requested number of questions, ensuring all relevant details are included.
+If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context.
+Don't provide incorrect information.\n\n
+Context: \n {context}?\n
+Question: \n {question}\n
+Answer:
+""",
+}
+# Streamlit app
+def main():
+    if "conversation" not in st.session_state:
+        st.session_state.conversation = []
+    if "mode" not in st.session_state:
+        st.session_state.mode=""
+    if "file_upload" not in st.session_state:
+        st.session_state.file_upload=False
+    st.set_page_config(page_title="NeuralPDF", page_icon=":page_with_curl:", initial_sidebar_state="expanded", layout="wide")
+    st.title("NeuralPDF: Interactive PDF Chat using AI 🤖")
+    # sidebar
+    files = st.sidebar.file_uploader("Upload one or more PDF files", type="pdf", accept_multiple_files=True)
+    if st.sidebar.button("Submit"):
+        if files:
+            with st.spinner("Processing..."):
+                raw_text = extract_pdf_text(files)
+                text_chunks = split_text_into_chunks(raw_text)
+                create_vector_store(text_chunks)
+            st.sidebar.success("Processing done!")
+            st.session_state.file_upload=True
+    # mode of chat
+    with st.sidebar:
+        if st.session_state.file_upload:
+            # st.write('<style>div.row-widget.stRadio > div{flex-direction:row;justify-content: center;} </style>', unsafe_allow_html=True)
+            # st.write('<style>div.st-bf{flex-direction:column;} div.st-ag{font-weight:bold;padding-left:2px;}</style>', unsafe_allow_html=True)
+            modes={"Chat Conversation":"chat", "Quiz & MCQs":"quiz", "Long-Answer Questions":"long"}
+            choose_mode = st.radio("", list(modes.keys()), index=0)
+            st.session_state.mode=modes[choose_mode]
+    if st.session_state.file_upload:
+        # keep history of chat
+        for dialogue in st.session_state.conversation:
+            with st.chat_message(dialogue["role"]):
+                if st.session_state.mode != "chat" and dialogue["role"] == "assistant":
+                    st.markdown(dialogue["content"])
+                    with st.expander("Answer"):
+                        st.markdown(dialogue["answer"])
+                else: st.markdown(dialogue["content"])
+        # handle conversation
+        if prompt := st.chat_input("Type your question here"):
+            # handle user side
+            with st.chat_message("user"): st.markdown(prompt)
+            st.session_state.conversation.append({"role":"user", "content":prompt, "answer":""})
+            # handle assistant side
+            with st.chat_message("assistant"):
+                response=handle_user_input(st.session_state.mode, prompt)
+                answer=""
+                if st.session_state.mode != "chat":
+                    answer = handle_user_input("chat", response)
+                    st.markdown(response)
+                    with st.expander("Answer"):
+                        st.markdown(answer)
+                else: st.markdown(response)
+            st.session_state.conversation.append({"role":"assistant", "content":response, "answer":answer})
+# Launch the app
+if __name__ == "__main__":
+    main()