ghazalnazari1990
commited on
Commit
•
f992926
1
Parent(s):
fd90f94
Upload chatwithyourpdf_bot.py
Browse files- chatwithyourpdf_bot.py +196 -0
chatwithyourpdf_bot.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""ChatWithYourPDF_Bot.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1rWK0dbIv4_9J74u21VdV7dUdZihi3k4n
|
8 |
+
|
9 |
+
# **Chat With Your PDF**
|
10 |
+
|
11 |
+
# **Import Libraries**
|
12 |
+
"""
|
13 |
+
|
14 |
+
! pip install langchain
|
15 |
+
|
16 |
+
! pip install pypdf
|
17 |
+
|
18 |
+
!pip install openai
|
19 |
+
|
20 |
+
!pip install tiktoken
|
21 |
+
|
22 |
+
!pip install -U docarray
|
23 |
+
|
24 |
+
from google.colab import drive
|
25 |
+
drive.mount('/content/drive')
|
26 |
+
|
27 |
+
my_key = 'Your API'
|
28 |
+
|
29 |
+
import openai
|
30 |
+
import os
|
31 |
+
os.environ['OPENAI_API_KEY'] = my_key
|
32 |
+
|
33 |
+
llm_name = "gpt-3.5-turbo"
|
34 |
+
|
35 |
+
!pip install -U langchain langchain-community
|
36 |
+
|
37 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
38 |
+
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
39 |
+
from langchain.vectorstores import DocArrayInMemorySearch
|
40 |
+
from langchain.document_loaders import TextLoader
|
41 |
+
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
|
42 |
+
from langchain.memory import ConversationBufferMemory
|
43 |
+
from langchain.chat_models import ChatOpenAI
|
44 |
+
from langchain.document_loaders import TextLoader
|
45 |
+
from langchain.document_loaders import PyPDFLoader
|
46 |
+
|
47 |
+
"""# **Define Functions**"""
|
48 |
+
|
49 |
+
def load_db(file, chain_type, k):
|
50 |
+
# load documents
|
51 |
+
loader = PyPDFLoader(file)
|
52 |
+
documents = loader.load()
|
53 |
+
# split documents
|
54 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
55 |
+
docs = text_splitter.split_documents(documents)
|
56 |
+
# define embedding
|
57 |
+
embeddings = OpenAIEmbeddings()
|
58 |
+
# create vector database from data
|
59 |
+
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
|
60 |
+
# define retriever
|
61 |
+
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
|
62 |
+
# create a chatbot chain. Memory is managed externally.
|
63 |
+
qa = ConversationalRetrievalChain.from_llm(
|
64 |
+
llm=ChatOpenAI(model_name=llm_name, temperature=0),
|
65 |
+
chain_type=chain_type,
|
66 |
+
retriever=retriever,
|
67 |
+
return_source_documents=True,
|
68 |
+
return_generated_question=True,
|
69 |
+
)
|
70 |
+
return qa
|
71 |
+
|
72 |
+
"""# **Define the ChatBotApp Class**"""
|
73 |
+
|
74 |
+
import panel as pn
|
75 |
+
import param
|
76 |
+
|
77 |
+
class cbfs(param.Parameterized):
|
78 |
+
chat_history = param.List([])
|
79 |
+
answer = param.String("")
|
80 |
+
db_query = param.String("")
|
81 |
+
db_response = param.List([])
|
82 |
+
|
83 |
+
def __init__(self, **params):
|
84 |
+
super(cbfs, self).__init__( **params)
|
85 |
+
self.panels = []
|
86 |
+
self.loaded_file = "/content/drive/MyDrive/DataRoadMap/MachineLearning-Lecture01.pdf"
|
87 |
+
self.qa = load_db(self.loaded_file,"stuff", 4)
|
88 |
+
|
89 |
+
def call_load_db(self, count):
|
90 |
+
if count == 0 or file_input.value is None: # init or no file specified :
|
91 |
+
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
|
92 |
+
else:
|
93 |
+
file_input.save("temp.pdf") # local copy
|
94 |
+
self.loaded_file = file_input.filename
|
95 |
+
button_load.button_style="outline"
|
96 |
+
self.qa = load_db("temp.pdf", "stuff", 4)
|
97 |
+
button_load.button_style="solid"
|
98 |
+
self.clr_history()
|
99 |
+
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
|
100 |
+
|
101 |
+
def convchain(self, query):
|
102 |
+
if not query:
|
103 |
+
return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
|
104 |
+
result = self.qa({"question": query, "chat_history": self.chat_history})
|
105 |
+
self.chat_history.extend([(query, result["answer"])])
|
106 |
+
self.db_query = result["generated_question"]
|
107 |
+
self.db_response = result["source_documents"]
|
108 |
+
self.answer = result['answer']
|
109 |
+
self.panels.extend([
|
110 |
+
pn.Row('User:', pn.pane.Markdown(query, width=600)),
|
111 |
+
pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, styles={'background-color': '#F6F6F6'}))
|
112 |
+
])
|
113 |
+
inp.value = '' #clears loading indicator when cleared
|
114 |
+
return pn.WidgetBox(*self.panels,scroll=True)
|
115 |
+
|
116 |
+
@param.depends('db_query ', )
|
117 |
+
def get_lquest(self):
|
118 |
+
if not self.db_query :
|
119 |
+
return pn.Column(
|
120 |
+
pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
|
121 |
+
pn.Row(pn.pane.Str("no DB accesses so far"))
|
122 |
+
)
|
123 |
+
return pn.Column(
|
124 |
+
pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
|
125 |
+
pn.pane.Str(self.db_query )
|
126 |
+
)
|
127 |
+
|
128 |
+
@param.depends('db_response', )
|
129 |
+
def get_sources(self):
|
130 |
+
if not self.db_response:
|
131 |
+
return
|
132 |
+
rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
|
133 |
+
for doc in self.db_response:
|
134 |
+
rlist.append(pn.Row(pn.pane.Str(doc)))
|
135 |
+
return pn.WidgetBox(*rlist, width=600, scroll=True)
|
136 |
+
|
137 |
+
@param.depends('convchain', 'clr_history')
|
138 |
+
def get_chats(self):
|
139 |
+
if not self.chat_history:
|
140 |
+
return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
|
141 |
+
rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
|
142 |
+
for exchange in self.chat_history:
|
143 |
+
rlist.append(pn.Row(pn.pane.Str(exchange)))
|
144 |
+
return pn.WidgetBox(*rlist, width=600, scroll=True)
|
145 |
+
|
146 |
+
def clr_history(self,count=0):
|
147 |
+
self.chat_history = []
|
148 |
+
return
|
149 |
+
|
150 |
+
"""# **Instantiate and Display the App**"""
|
151 |
+
|
152 |
+
!pip install jupyter_bokeh
|
153 |
+
|
154 |
+
import panel as pn
|
155 |
+
pn.extension() # Activate the panel extension
|
156 |
+
|
157 |
+
cb = cbfs()
|
158 |
+
|
159 |
+
file_input = pn.widgets.FileInput(accept='.pdf')
|
160 |
+
button_load = pn.widgets.Button(name="Load pdf", button_type='primary')
|
161 |
+
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
|
162 |
+
button_clearhistory.on_click(cb.clr_history)
|
163 |
+
inp = pn.widgets.TextInput( placeholder='Enter text here…')
|
164 |
+
|
165 |
+
bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
|
166 |
+
conversation = pn.bind(cb.convchain, inp)
|
167 |
+
|
168 |
+
jpg_pane = pn.pane.Image( '/content/drive/MyDrive/DataRoadMap/free-pdf-upload-icon-3389-thumb.png')
|
169 |
+
|
170 |
+
tab1 = pn.Column(
|
171 |
+
pn.Row(inp),
|
172 |
+
pn.layout.Divider(),
|
173 |
+
pn.panel(conversation, loading_indicator=True, height=300),
|
174 |
+
pn.layout.Divider(),
|
175 |
+
)
|
176 |
+
tab2= pn.Column(
|
177 |
+
pn.panel(cb.get_lquest),
|
178 |
+
pn.layout.Divider(),
|
179 |
+
pn.panel(cb.get_sources ),
|
180 |
+
)
|
181 |
+
tab3= pn.Column(
|
182 |
+
pn.panel(cb.get_chats),
|
183 |
+
pn.layout.Divider(),
|
184 |
+
)
|
185 |
+
tab4=pn.Column(
|
186 |
+
pn.Row( file_input, button_load, bound_button_load),
|
187 |
+
pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
|
188 |
+
pn.layout.Divider(),
|
189 |
+
pn.Row(jpg_pane.clone(width=400))
|
190 |
+
)
|
191 |
+
dashboard = pn.Column(
|
192 |
+
pn.Row(pn.pane.Markdown('# Chat With Your PDF_Bot')),
|
193 |
+
pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
|
194 |
+
)
|
195 |
+
dashboard
|
196 |
+
|