ChatWithYourPDF

Runtime error

App Files Files Community

ChatWithYourPDF / app-12-04-2024-19u45m-CET.py

JBHF

Update app-12-04-2024-19u45m-CET.py

2cecfe5 verified 7 months ago

raw

history blame contribute delete

3.95 kB

	# app.py-12-04-2024-19u45m-CET.py
	#
	# POE ChatGPT:
	# To convert the code from a Chainlit app to a Streamlit app, you'll need to make several modifications.
	# Here's the modified code for a Streamlit app:

	python
	Copy
	import os
	from typing import List
	import streamlit as st
	from langchain_community.embeddings import FastEmbedEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import Chroma
	from langchain.chains import ConversationalRetrievalChain
	from langchain.document_loaders import PyPDFLoader
	from langchain_groq import ChatGroq
	from langchain.prompts.chat import (
	ChatPromptTemplate,
	SystemMessagePromptTemplate,
	HumanMessagePromptTemplate,
	)
	from langchain.docstore.document import Document
	from langchain.memory import ChatMessageHistory, ConversationBufferMemory

	st.title("Chat App")
	st.write("Upload a PDF file to begin!")

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

	system_template = """Use the following pieces of context to answer the user's question.
	If you don't know the answer, just say that you don't know, don't try to make up an answer.
	ALWAYS return a "SOURCES" part in your answer.
	The "SOURCES" part should be a reference to the source of the document from which you got your answer.
	And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
	Example of your response should be:
	The answer is foo
	SOURCES: xyz
	Begin!
	----------------
	{summaries}"""

	messages = [
	SystemMessagePromptTemplate.from_template(system_template),
	HumanMessagePromptTemplate.from_template("{question}"),
	]

	prompt = ChatPromptTemplate.from_messages(messages)
	chain_type_kwargs = {"prompt": prompt}


	def process_file(file):
	with open(file.name, "wb") as f:
	f.write(file.read())

	pypdf_loader = PyPDFLoader(file.name)
	texts = pypdf_loader.load_and_split()
	texts = [text.page_content for text in texts]
	return texts


	def main():
	files = st.file_uploader("Upload PDF File", type="pdf", key="pdf_upload")

	if not files:
	return

	file = files[0]

	st.write(f"Processing `{file.name}`...")

	texts = process_file(file)

	# Create a metadata for each chunk
	metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]

	embeddings = FastEmbedEmbeddings()
	docsearch = Chroma.from_texts(texts, embeddings, metadatas=metadatas)

	message_history = ChatMessageHistory()

	memory = ConversationBufferMemory(
	memory_key="chat_history",
	output_key="answer",
	chat_memory=message_history,
	return_messages=True,
	)

	chain = ConversationalRetrievalChain.from_llm(
	ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768', streaming=True),
	chain_type="stuff",
	retriever=docsearch.as_retriever(),
	memory=memory,
	return_source_documents=True,
	)

	st.write(f"Processing `{file.name}` done. You can now ask questions!")

	while True:
	user_input = st.text_input("User Input")
	if st.button("Send"):
	res = chain.call(user_input)
	answer = res["answer"]
	source_documents = res["source_documents"]

	text_elements = []

	if source_documents:
	for source_idx, source_doc in enumerate(source_documents):
	source_name = f"source_{source_idx}"
	text_elements.append(Document(content=source_doc.page_content, name=source_name))
	source_names = [text_el.name for text_el in text_elements]

	if source_names:
	answer += f"\nSources: {', '.join(source_names)}"
	else:
	answer += "\nNo sources found"

	st.write(answer)
	for source_doc in source_documents:
	st.write(source_doc.page_content)