|
|
|
"""ChatWithYourPDF_Bot.ipynb |
|
|
|
Automatically generated by Colab. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1rWK0dbIv4_9J74u21VdV7dUdZihi3k4n |
|
|
|
# **Chat With Your PDF** |
|
|
|
# **Import Libraries** |
|
""" |
|
|
|
! pip install langchain |
|
|
|
! pip install pypdf |
|
|
|
!pip install openai |
|
|
|
!pip install tiktoken |
|
|
|
!pip install -U docarray |
|
|
|
from google.colab import drive |
|
drive.mount() |
|
|
|
my_key = 'Your API' |
|
|
|
import openai |
|
import os |
|
os.environ['OPENAI_API_KEY'] = my_key |
|
|
|
llm_name = "gpt-3.5-turbo" |
|
|
|
!pip install -U langchain langchain-community |
|
|
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter |
|
from langchain.vectorstores import DocArrayInMemorySearch |
|
from langchain.document_loaders import TextLoader |
|
from langchain.chains import RetrievalQA, ConversationalRetrievalChain |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.document_loaders import TextLoader |
|
from langchain.document_loaders import PyPDFLoader |
|
|
|
"""# **Define Functions**""" |
|
|
|
def load_db(file, chain_type, k): |
|
|
|
loader = PyPDFLoader(file) |
|
documents = loader.load() |
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150) |
|
docs = text_splitter.split_documents(documents) |
|
|
|
embeddings = OpenAIEmbeddings() |
|
|
|
db = DocArrayInMemorySearch.from_documents(docs, embeddings) |
|
|
|
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k}) |
|
|
|
qa = ConversationalRetrievalChain.from_llm( |
|
llm=ChatOpenAI(model_name=llm_name, temperature=0), |
|
chain_type=chain_type, |
|
retriever=retriever, |
|
return_source_documents=True, |
|
return_generated_question=True, |
|
) |
|
return qa |
|
|
|
"""# **Define the ChatBotApp Class**""" |
|
|
|
import panel as pn |
|
import param |
|
|
|
class cbfs(param.Parameterized): |
|
chat_history = param.List([]) |
|
answer = param.String("") |
|
db_query = param.String("") |
|
db_response = param.List([]) |
|
|
|
def __init__(self, **params): |
|
super(cbfs, self).__init__( **params) |
|
self.panels = [] |
|
self.loaded_file = "/content/drive/MyDrive/DataRoadMap/MachineLearning-Lecture01.pdf" |
|
self.qa = load_db(self.loaded_file,"stuff", 4) |
|
|
|
def call_load_db(self, count): |
|
if count == 0 or file_input.value is None: |
|
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}") |
|
else: |
|
file_input.save("temp.pdf") |
|
self.loaded_file = file_input.filename |
|
button_load.button_style="outline" |
|
self.qa = load_db("temp.pdf", "stuff", 4) |
|
button_load.button_style="solid" |
|
self.clr_history() |
|
return pn.pane.Markdown(f"Loaded File: {self.loaded_file}") |
|
|
|
def convchain(self, query): |
|
if not query: |
|
return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True) |
|
result = self.qa({"question": query, "chat_history": self.chat_history}) |
|
self.chat_history.extend([(query, result["answer"])]) |
|
self.db_query = result["generated_question"] |
|
self.db_response = result["source_documents"] |
|
self.answer = result['answer'] |
|
self.panels.extend([ |
|
pn.Row('User:', pn.pane.Markdown(query, width=600)), |
|
pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, styles={'background-color': '#F6F6F6'})) |
|
]) |
|
inp.value = '' |
|
return pn.WidgetBox(*self.panels,scroll=True) |
|
|
|
@param.depends('db_query ', ) |
|
def get_lquest(self): |
|
if not self.db_query : |
|
return pn.Column( |
|
pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})), |
|
pn.Row(pn.pane.Str("no DB accesses so far")) |
|
) |
|
return pn.Column( |
|
pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})), |
|
pn.pane.Str(self.db_query ) |
|
) |
|
|
|
@param.depends('db_response', ) |
|
def get_sources(self): |
|
if not self.db_response: |
|
return |
|
rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))] |
|
for doc in self.db_response: |
|
rlist.append(pn.Row(pn.pane.Str(doc))) |
|
return pn.WidgetBox(*rlist, width=600, scroll=True) |
|
|
|
@param.depends('convchain', 'clr_history') |
|
def get_chats(self): |
|
if not self.chat_history: |
|
return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True) |
|
rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))] |
|
for exchange in self.chat_history: |
|
rlist.append(pn.Row(pn.pane.Str(exchange))) |
|
return pn.WidgetBox(*rlist, width=600, scroll=True) |
|
|
|
def clr_history(self,count=0): |
|
self.chat_history = [] |
|
return |
|
|
|
"""# **Instantiate and Display the App**""" |
|
|
|
!pip install jupyter_bokeh |
|
|
|
import panel as pn |
|
pn.extension() |
|
|
|
cb = cbfs() |
|
|
|
file_input = pn.widgets.FileInput(accept='.pdf') |
|
button_load = pn.widgets.Button(name="Load pdf", button_type='primary') |
|
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning') |
|
button_clearhistory.on_click(cb.clr_history) |
|
inp = pn.widgets.TextInput( placeholder='Enter text here…') |
|
|
|
bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks) |
|
conversation = pn.bind(cb.convchain, inp) |
|
|
|
jpg_pane = pn.pane.Image( '/content/drive/MyDrive/DataRoadMap/free-pdf-upload-icon-3389-thumb.png') |
|
|
|
tab1 = pn.Column( |
|
pn.Row(inp), |
|
pn.layout.Divider(), |
|
pn.panel(conversation, loading_indicator=True, height=300), |
|
pn.layout.Divider(), |
|
) |
|
tab2= pn.Column( |
|
pn.panel(cb.get_lquest), |
|
pn.layout.Divider(), |
|
pn.panel(cb.get_sources ), |
|
) |
|
tab3= pn.Column( |
|
pn.panel(cb.get_chats), |
|
pn.layout.Divider(), |
|
) |
|
tab4=pn.Column( |
|
pn.Row( file_input, button_load, bound_button_load), |
|
pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )), |
|
pn.layout.Divider(), |
|
pn.Row(jpg_pane.clone(width=400)) |
|
) |
|
dashboard = pn.Column( |
|
pn.Row(pn.pane.Markdown('# Chat With Your PDF_Bot')), |
|
pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4)) |
|
) |
|
dashboard |
|
|
|
|