Dark900 commited on
Commit
bf97e3b
·
verified ·
1 Parent(s): be46739

Upload 2 files

Browse files
Files changed (2) hide show
  1. pdf_Q-A_app.py +114 -0
  2. requirements.txt +9 -0
pdf_Q-A_app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_nvidia import ChatNVIDIA
2
+ from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import FAISS
5
+ from langchain_community.document_loaders import PyPDFDirectoryLoader,PyPDFLoader
6
+ from langchain_core.prompts import ChatMessagePromptTemplate,ChatPromptTemplate
7
+ from langchain.prompts import PromptTemplate
8
+ from dotenv import load_dotenv
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+ from langchain.chains.history_aware_retriever import create_history_aware_retriever
11
+ from langchain.chains.combine_documents import create_stuff_documents_chain
12
+ from langchain.chains import create_retrieval_chain
13
+ from langchain.chains import LLMChain
14
+ import streamlit as st
15
+ import os
16
+
17
+ load_dotenv()
18
+ os.environ["NVIDIA_API_KEY"] = os.getenv("NVIDIA_API_KEY")
19
+ os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")
20
+
21
+
22
+ def file_process(file_uploader):
23
+
24
+ all_docs = []
25
+ if file_uploader is not None:
26
+
27
+ for file_uploader in file_uploader:
28
+ with open(f"./temp/{file_uploader.name}",'wb') as f:
29
+ f.write(file_uploader.getbuffer())
30
+ loader = PyPDFLoader(f"./temp/{file_uploader.name}")
31
+ docs = loader.load()
32
+ all_docs.extend(docs)
33
+
34
+ return all_docs
35
+
36
+ def vectore_function(docs):
37
+ if "vectore" not in st.session_state:
38
+
39
+ st.session_state.embedding = NVIDIAEmbeddings()
40
+ #st.session_state.loader = PyPDFDirectoryLoader("./pdf")
41
+ #st.session_state.pdf = st.session_state.loader.load()
42
+ st.session_state.text_spliter = RecursiveCharacterTextSplitter(chunk_size=700,chunk_overlap=70)
43
+ st.session_state.spliter = st.session_state.text_spliter.split_documents(docs[:30])
44
+ st.session_state.vectore = FAISS.from_documents(st.session_state.spliter,st.session_state.embedding)
45
+
46
+
47
+ template_prompt = ChatPromptTemplate.from_template(
48
+
49
+ """
50
+ your work to give the answer to the user question from using
51
+ context. please try to use attractive emoji or give the most accurcate and
52
+ similar answer from the context if answer is seems
53
+ similar so write the Sorry we don't have a answer yet!.
54
+
55
+ <context>
56
+ {context}
57
+ <context>
58
+
59
+ Question:{input}
60
+
61
+ """
62
+ )
63
+
64
+
65
+ llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-70b-instruct")
66
+
67
+ st.set_page_config(page_title="Langchain: Question/Answering Chat bot",page_icon="🦜")
68
+ st.title('🦜LangChain')
69
+ st.title("Title: Question/Answering Chat bot")
70
+ st.subheader("How to use:")
71
+
72
+ st.write("Step1: Upload your document. Please ensure that the document is in PDF format.")
73
+
74
+ st.write("""Step2: Click on the 'Embed Document' button and wait. During this step,the entire document will be embedded and stored in the database.
75
+ Wait until the database setup is complete.""")
76
+
77
+ st.write("""Step3: Once the database is ready, a text box will appear where you can
78
+ ask question based on the stored document.""")
79
+
80
+ st.write("Step4: Enter your question in the text box and press Enter to receive answers.")
81
+
82
+ st.write("⚠️Step 1: Upload the document then go to another steps")
83
+ file_uploader = st.file_uploader("Upload the only pdf",type='pdf',accept_multiple_files=True)
84
+
85
+ if file_uploader is not None:
86
+ #st.write(f"file name:{file_uploader.name}")
87
+ os.makedirs(f"./temp",exist_ok=True)
88
+ upload_docs = file_process(file_uploader)
89
+ st.write(f"length of the uploaded document: {len(upload_docs)}")
90
+
91
+ st.write("⚠️Step2: Click on the 'Embed document' button and wait for the database is successfully ready")
92
+ if st.button("Embed the document"):
93
+ vectore_function(upload_docs)
94
+ st.write("Sucessfully database is ready")
95
+
96
+ st.write("⚠️After completing all the requirements then you ask your question")
97
+ user_input = st.text_area("Ask you any question from the documentation")
98
+
99
+ if st.button("👉Generate the answer"):
100
+ if user_input:
101
+ chain_stuff = create_stuff_documents_chain(llm=llm,prompt=template_prompt)
102
+ retriever = st.session_state.vectore.as_retriever()
103
+ chain_retriever = create_retrieval_chain(retriever,chain_stuff)
104
+ response = chain_retriever.invoke({"input":user_input})
105
+ st.write(response['answer'])
106
+
107
+ if "context" in response:
108
+ with st.expander("Similar document:"):
109
+ for i,doc in enumerate(response['context']):
110
+ st.write(doc.page_content)
111
+ st.write("---------------------")
112
+
113
+ else:
114
+ st.write("context is not provide in the response")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ langchain_groq
2
+ langchain
3
+ langchain_community
4
+ python-dotenv
5
+ ipykernel
6
+ lamini
7
+ python-dotenv
8
+ langchain-nvidia-ai-endpoints
9
+ langchain_core