ghazalnazari1990 commited on
Commit
f992926
1 Parent(s): fd90f94

Upload chatwithyourpdf_bot.py

Browse files
Files changed (1) hide show
  1. chatwithyourpdf_bot.py +196 -0
chatwithyourpdf_bot.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """ChatWithYourPDF_Bot.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1rWK0dbIv4_9J74u21VdV7dUdZihi3k4n
8
+
9
+ # **Chat With Your PDF**
10
+
11
+ # **Import Libraries**
12
+ """
13
+
14
+ ! pip install langchain
15
+
16
+ ! pip install pypdf
17
+
18
+ !pip install openai
19
+
20
+ !pip install tiktoken
21
+
22
+ !pip install -U docarray
23
+
24
+ from google.colab import drive
25
+ drive.mount('/content/drive')
26
+
27
+ my_key = 'Your API'
28
+
29
+ import openai
30
+ import os
31
+ os.environ['OPENAI_API_KEY'] = my_key
32
+
33
+ llm_name = "gpt-3.5-turbo"
34
+
35
+ !pip install -U langchain langchain-community
36
+
37
+ from langchain.embeddings.openai import OpenAIEmbeddings
38
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
39
+ from langchain.vectorstores import DocArrayInMemorySearch
40
+ from langchain.document_loaders import TextLoader
41
+ from langchain.chains import RetrievalQA, ConversationalRetrievalChain
42
+ from langchain.memory import ConversationBufferMemory
43
+ from langchain.chat_models import ChatOpenAI
44
+ from langchain.document_loaders import TextLoader
45
+ from langchain.document_loaders import PyPDFLoader
46
+
47
+ """# **Define Functions**"""
48
+
49
+ def load_db(file, chain_type, k):
50
+ # load documents
51
+ loader = PyPDFLoader(file)
52
+ documents = loader.load()
53
+ # split documents
54
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
55
+ docs = text_splitter.split_documents(documents)
56
+ # define embedding
57
+ embeddings = OpenAIEmbeddings()
58
+ # create vector database from data
59
+ db = DocArrayInMemorySearch.from_documents(docs, embeddings)
60
+ # define retriever
61
+ retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
62
+ # create a chatbot chain. Memory is managed externally.
63
+ qa = ConversationalRetrievalChain.from_llm(
64
+ llm=ChatOpenAI(model_name=llm_name, temperature=0),
65
+ chain_type=chain_type,
66
+ retriever=retriever,
67
+ return_source_documents=True,
68
+ return_generated_question=True,
69
+ )
70
+ return qa
71
+
72
+ """# **Define the ChatBotApp Class**"""
73
+
74
+ import panel as pn
75
+ import param
76
+
77
+ class cbfs(param.Parameterized):
78
+ chat_history = param.List([])
79
+ answer = param.String("")
80
+ db_query = param.String("")
81
+ db_response = param.List([])
82
+
83
+ def __init__(self, **params):
84
+ super(cbfs, self).__init__( **params)
85
+ self.panels = []
86
+ self.loaded_file = "/content/drive/MyDrive/DataRoadMap/MachineLearning-Lecture01.pdf"
87
+ self.qa = load_db(self.loaded_file,"stuff", 4)
88
+
89
+ def call_load_db(self, count):
90
+ if count == 0 or file_input.value is None: # init or no file specified :
91
+ return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
92
+ else:
93
+ file_input.save("temp.pdf") # local copy
94
+ self.loaded_file = file_input.filename
95
+ button_load.button_style="outline"
96
+ self.qa = load_db("temp.pdf", "stuff", 4)
97
+ button_load.button_style="solid"
98
+ self.clr_history()
99
+ return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
100
+
101
+ def convchain(self, query):
102
+ if not query:
103
+ return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
104
+ result = self.qa({"question": query, "chat_history": self.chat_history})
105
+ self.chat_history.extend([(query, result["answer"])])
106
+ self.db_query = result["generated_question"]
107
+ self.db_response = result["source_documents"]
108
+ self.answer = result['answer']
109
+ self.panels.extend([
110
+ pn.Row('User:', pn.pane.Markdown(query, width=600)),
111
+ pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, styles={'background-color': '#F6F6F6'}))
112
+ ])
113
+ inp.value = '' #clears loading indicator when cleared
114
+ return pn.WidgetBox(*self.panels,scroll=True)
115
+
116
+ @param.depends('db_query ', )
117
+ def get_lquest(self):
118
+ if not self.db_query :
119
+ return pn.Column(
120
+ pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
121
+ pn.Row(pn.pane.Str("no DB accesses so far"))
122
+ )
123
+ return pn.Column(
124
+ pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
125
+ pn.pane.Str(self.db_query )
126
+ )
127
+
128
+ @param.depends('db_response', )
129
+ def get_sources(self):
130
+ if not self.db_response:
131
+ return
132
+ rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
133
+ for doc in self.db_response:
134
+ rlist.append(pn.Row(pn.pane.Str(doc)))
135
+ return pn.WidgetBox(*rlist, width=600, scroll=True)
136
+
137
+ @param.depends('convchain', 'clr_history')
138
+ def get_chats(self):
139
+ if not self.chat_history:
140
+ return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
141
+ rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
142
+ for exchange in self.chat_history:
143
+ rlist.append(pn.Row(pn.pane.Str(exchange)))
144
+ return pn.WidgetBox(*rlist, width=600, scroll=True)
145
+
146
+ def clr_history(self,count=0):
147
+ self.chat_history = []
148
+ return
149
+
150
+ """# **Instantiate and Display the App**"""
151
+
152
+ !pip install jupyter_bokeh
153
+
154
+ import panel as pn
155
+ pn.extension() # Activate the panel extension
156
+
157
+ cb = cbfs()
158
+
159
+ file_input = pn.widgets.FileInput(accept='.pdf')
160
+ button_load = pn.widgets.Button(name="Load pdf", button_type='primary')
161
+ button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
162
+ button_clearhistory.on_click(cb.clr_history)
163
+ inp = pn.widgets.TextInput( placeholder='Enter text here…')
164
+
165
+ bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
166
+ conversation = pn.bind(cb.convchain, inp)
167
+
168
+ jpg_pane = pn.pane.Image( '/content/drive/MyDrive/DataRoadMap/free-pdf-upload-icon-3389-thumb.png')
169
+
170
+ tab1 = pn.Column(
171
+ pn.Row(inp),
172
+ pn.layout.Divider(),
173
+ pn.panel(conversation, loading_indicator=True, height=300),
174
+ pn.layout.Divider(),
175
+ )
176
+ tab2= pn.Column(
177
+ pn.panel(cb.get_lquest),
178
+ pn.layout.Divider(),
179
+ pn.panel(cb.get_sources ),
180
+ )
181
+ tab3= pn.Column(
182
+ pn.panel(cb.get_chats),
183
+ pn.layout.Divider(),
184
+ )
185
+ tab4=pn.Column(
186
+ pn.Row( file_input, button_load, bound_button_load),
187
+ pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
188
+ pn.layout.Divider(),
189
+ pn.Row(jpg_pane.clone(width=400))
190
+ )
191
+ dashboard = pn.Column(
192
+ pn.Row(pn.pane.Markdown('# Chat With Your PDF_Bot')),
193
+ pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
194
+ )
195
+ dashboard
196
+