ChatWithYourPDF

Runtime error

App Files Files Community

JBHF commited on Apr 12

Commit

2cecfe5

•

1 Parent(s): 1f5fbd7

Update app-12-04-2024-19u45m-CET.py

Browse files

Files changed (1) hide show

app-12-04-2024-19u45m-CET.py +47 -94

app-12-04-2024-19u45m-CET.py CHANGED Viewed

@@ -1,20 +1,20 @@
 # app.py-12-04-2024-19u45m-CET.py
 import os
 from typing import List
-# from langchain.embeddings.openai import OpenAIEmbeddings # ORIGINAL
-from langchain_community.embeddings import FastEmbedEmbeddings # JB
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
-from langchain.chains import (
-    ConversationalRetrievalChain,
-)
 from langchain.document_loaders import PyPDFLoader
-# from langchain.chat_models import ChatOpenAI # ORIGINAL
-from langchain_groq import ChatGroq # JB
 from langchain.prompts.chat import (
     ChatPromptTemplate,
     SystemMessagePromptTemplate,
@@ -22,91 +22,60 @@ from langchain.prompts.chat import (
 )
 from langchain.docstore.document import Document
 from langchain.memory import ChatMessageHistory, ConversationBufferMemory
-from chainlit.types import AskFileResponse
-import chainlit as cl
-# JB
-from dotenv import load_dotenv
-import glob
-load_dotenv()  #
-groq_api_key = os.environ['GROQ_API_KEY']
-# groq_api_key = "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" # os.environ['GROQ_API_KEY']
-print"groq_api_key: ", groq_api_key)
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
-system_template = """Use the following pieces of context to answer the users question.
 If you don't know the answer, just say that you don't know, don't try to make up an answer.
 ALWAYS return a "SOURCES" part in your answer.
 The "SOURCES" part should be a reference to the source of the document from which you got your answer.
 And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
 Example of your response should be:
 The answer is foo
 SOURCES: xyz
 Begin!
 ----------------
 {summaries}"""
 messages = [
     SystemMessagePromptTemplate.from_template(system_template),
     HumanMessagePromptTemplate.from_template("{question}"),
 ]
 prompt = ChatPromptTemplate.from_messages(messages)
 chain_type_kwargs = {"prompt": prompt}
-def process_file(file: AskFileResponse):
-    import tempfile
-    with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
-        with open(tempfile.name, "wb") as f:
-            f.write(file.content)
-    pypdf_loader = PyPDFLoader(tempfile.name)
     texts = pypdf_loader.load_and_split()
     texts = [text.page_content for text in texts]
     return texts
-@cl.on_chat_start
-async def on_chat_start():
-    files = None
-    # Wait for the user to upload a file
-    while files == None:
-        files = await cl.AskFileMessage(
-            content="Please upload a PDF file to begin!",
-            accept=["application/pdf"],
-            max_size_mb=20,
-            timeout=180,
-        ).send()
     file = files[0]
-    msg = cl.Message(
-        content=f"Processing `{file.name}`...", disable_human_feedback=True
-    )
-    await msg.send()
-    # load the file
     texts = process_file(file)
-    print(texts[0])
     # Create a metadata for each chunk
     metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
-    # Create a Chroma vector store
-    # embeddings = OpenAIEmbeddings()  # ORIGINAL
-    embeddings = FastEmbedEmbeddings # JB
-    docsearch = await cl.make_async(Chroma.from_texts)(
-        texts, embeddings, metadatas=metadatas
-    )
     message_history = ChatMessageHistory()
@@ -117,52 +86,36 @@ async def on_chat_start():
         return_messages=True,
     )
-    # JB
-    # llm = ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768')
-    # Create a chain that uses the Chroma vector store
     chain = ConversationalRetrievalChain.from_llm(
-        # ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True), # ORIGINAL
-        ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768', streaming=True), # JB
         chain_type="stuff",
         retriever=docsearch.as_retriever(),
         memory=memory,
         return_source_documents=True,
     )
-    # Let the user know that the system is ready
-    msg.content = f"Processing `{file.name}` done. You can now ask questions!"
-    await msg.update()
-    cl.user_session.set("chain", chain)
-@cl.on_message
-async def main(message):
-    chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
-    cb = cl.AsyncLangchainCallbackHandler()
-    res = await chain.acall(message.content, callbacks=[cb])
-    answer = res["answer"]
-    source_documents = res["source_documents"]  # type: List[Document]
-    text_elements = []  # type: List[cl.Text]
-    if source_documents:
-        for source_idx, source_doc in enumerate(source_documents):
-            source_name = f"source_{source_idx}"
-            # Create the text element referenced in the message
-            text_elements.append(
-                cl.Text(content=source_doc.page_content, name=source_name)
-            )
-        source_names = [text_el.name for text_el in text_elements]
-        if source_names:
-            answer += f"\nSources: {', '.join(source_names)}"
-        else:
-            answer += "\nNo sources found"
-    await cl.Message(content=answer, elements=text_elements).send()

 # app.py-12-04-2024-19u45m-CET.py
+#
+# POE ChatGPT:
+# To convert the code from a Chainlit app to a Streamlit app, you'll need to make several modifications.
+# Here's the modified code for a Streamlit app:
+python
+Copy
 import os
 from typing import List
+import streamlit as st
+from langchain_community.embeddings import FastEmbedEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
+from langchain.chains import ConversationalRetrievalChain
 from langchain.document_loaders import PyPDFLoader
+from langchain_groq import ChatGroq
 from langchain.prompts.chat import (
     ChatPromptTemplate,
     SystemMessagePromptTemplate,
 )
 from langchain.docstore.document import Document
 from langchain.memory import ChatMessageHistory, ConversationBufferMemory
+st.title("Chat App")
+st.write("Upload a PDF file to begin!")
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+system_template = """Use the following pieces of context to answer the user's question.
 If you don't know the answer, just say that you don't know, don't try to make up an answer.
 ALWAYS return a "SOURCES" part in your answer.
 The "SOURCES" part should be a reference to the source of the document from which you got your answer.
 And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
 Example of your response should be:
 The answer is foo
 SOURCES: xyz
 Begin!
 ----------------
 {summaries}"""
 messages = [
     SystemMessagePromptTemplate.from_template(system_template),
     HumanMessagePromptTemplate.from_template("{question}"),
 ]
 prompt = ChatPromptTemplate.from_messages(messages)
 chain_type_kwargs = {"prompt": prompt}
+def process_file(file):
+    with open(file.name, "wb") as f:
+        f.write(file.read())
+    pypdf_loader = PyPDFLoader(file.name)
     texts = pypdf_loader.load_and_split()
     texts = [text.page_content for text in texts]
     return texts
+def main():
+    files = st.file_uploader("Upload PDF File", type="pdf", key="pdf_upload")
+    if not files:
+        return
     file = files[0]
+    st.write(f"Processing `{file.name}`...")
     texts = process_file(file)
     # Create a metadata for each chunk
     metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
+    embeddings = FastEmbedEmbeddings()
+    docsearch = Chroma.from_texts(texts, embeddings, metadatas=metadatas)
     message_history = ChatMessageHistory()
         return_messages=True,
     )
     chain = ConversationalRetrievalChain.from_llm(
+        ChatGroq(temperature=0.2, groq_api_key=groq_api_key, model_name='mixtral-8x7b-32768', streaming=True),
         chain_type="stuff",
         retriever=docsearch.as_retriever(),
         memory=memory,
         return_source_documents=True,
     )
+    st.write(f"Processing `{file.name}` done. You can now ask questions!")
+    while True:
+        user_input = st.text_input("User Input")
+        if st.button("Send"):
+            res = chain.call(user_input)
+            answer = res["answer"]
+            source_documents = res["source_documents"]
+            text_elements = []
+            if source_documents:
+                for source_idx, source_doc in enumerate(source_documents):
+                    source_name = f"source_{source_idx}"
+                    text_elements.append(Document(content=source_doc.page_content, name=source_name))
+                source_names = [text_el.name for text_el in text_elements]
+                if source_names:
+                    answer += f"\nSources: {', '.join(source_names)}"
+                else:
+                    answer += "\nNo sources found"
+            st.write(answer)
+            for source_doc in source_documents:
+                st.write(source_doc.page_content)