File size: 6,529 Bytes
88bd9e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
from PyPDF2 import PdfReader
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from dotenv import load_dotenv

load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Function to extract text from PDFs
def extract_pdf_text(pdfs):
    all_text = ""
    for pdf in pdfs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            all_text += page.extract_text()
    return all_text

# Function to split text into chunks
def split_text_into_chunks(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=12000, chunk_overlap=1200)
    text_chunks = splitter.split_text(text)
    return text_chunks

# Function to create vector store
def create_vector_store(chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")

# Function to setup conversation chain for QA
def setup_conversation_chain(template):
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    prompt = PromptTemplate(template=template, input_variables=["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain

# Function to handle user input based on selected mode
def handle_user_input(mode, user_question=None):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    indexed_data = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = indexed_data.similarity_search(user_question)

    chain = setup_conversation_chain(prompt_template[mode])
    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
    return response["output_text"]      

# Prompt templates for each mode
prompt_template = {
"chat":"""
Your alias is Neural-PDF. Your task is to provide a thorough response based on the given context, ensuring all relevant details are included. 
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. 
Don't provide incorrect information.\n\n
Context: \n {context}?\n
Question: \n {question}\n

Answer:
""", 
"quiz":"""
Your alias is Neural-PDF. Your task is to generate multiple choice questions for quiz based on the given context and requested number of questions, ensuring all relevant details are included. 
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. 
Don't provide incorrect information.\n\n
Context: \n {context}?\n
Question: \n {question}\n

Answer:
""", 
"long":"""
Your alias is Neural-PDF. Your task is to generate long answer-type questions based on the given context and requested number of questions, ensuring all relevant details are included. 
If the requested information isn't available, simply state, "answer not available in context," then answer based on your understanding, connecting with the context. 
Don't provide incorrect information.\n\n
Context: \n {context}?\n
Question: \n {question}\n

Answer:
""", 
}


# Streamlit app
def main():
    if "conversation" not in st.session_state:
        st.session_state.conversation = []
    if "mode" not in st.session_state:
        st.session_state.mode=""
    if "file_upload" not in st.session_state:
        st.session_state.file_upload=False
    
    st.set_page_config(page_title="NeuralPDF", page_icon=":page_with_curl:", initial_sidebar_state="expanded", layout="wide")
    st.title("NeuralPDF: Interactive PDF Chat using AI 🤖")
    
    # sidebar
    files = st.sidebar.file_uploader("Upload one or more PDF files", type="pdf", accept_multiple_files=True)
    if st.sidebar.button("Submit"):
        if files:
            with st.spinner("Processing..."):
                raw_text = extract_pdf_text(files)
                text_chunks = split_text_into_chunks(raw_text)
                create_vector_store(text_chunks)
            st.sidebar.success("Processing done!")
            st.session_state.file_upload=True

    # mode of chat
    with st.sidebar:
        if st.session_state.file_upload:
            # st.write('<style>div.row-widget.stRadio > div{flex-direction:row;justify-content: center;} </style>', unsafe_allow_html=True)
            # st.write('<style>div.st-bf{flex-direction:column;} div.st-ag{font-weight:bold;padding-left:2px;}</style>', unsafe_allow_html=True)
            modes={"Chat Conversation":"chat", "Quiz & MCQs":"quiz", "Long-Answer Questions":"long"}
            choose_mode = st.radio("", list(modes.keys()), index=0)
            st.session_state.mode=modes[choose_mode]

    if st.session_state.file_upload:    
        # keep history of chat
        for dialogue in st.session_state.conversation:
            with st.chat_message(dialogue["role"]):
                if st.session_state.mode != "chat" and dialogue["role"] == "assistant":
                    st.markdown(dialogue["content"])
                    with st.expander("Answer"):
                        st.markdown(dialogue["answer"])
                else: st.markdown(dialogue["content"])

        # handle conversation
        if prompt := st.chat_input("Type your question here"):
            # handle user side
            with st.chat_message("user"): st.markdown(prompt)
            st.session_state.conversation.append({"role":"user", "content":prompt, "answer":""})
            # handle assistant side
            with st.chat_message("assistant"):
                response=handle_user_input(st.session_state.mode, prompt)
                answer=""
                if st.session_state.mode != "chat":
                    answer = handle_user_input("chat", response)
                    st.markdown(response)
                    with st.expander("Answer"):
                        st.markdown(answer)
                else: st.markdown(response)
            st.session_state.conversation.append({"role":"assistant", "content":response, "answer":answer})


# Launch the app
if __name__ == "__main__":
    main()