Spaces:

Kameshr
/

RAG

Sleeping

App Files Files Community

Kameshr commited on Dec 29, 2024

Commit

7afda13

verified ·

1 Parent(s): 8e39574

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -133

app.py CHANGED Viewed

@@ -1,170 +1,152 @@
 import gradio as gr
 import os
-# Retrieve API token from environment variable
-api_token = os.getenv("HF_TOKEN")
-# Import required libraries from LangChain
-from langchain.llms import HuggingFaceHub
-from langchain.vectorstores import FAISS
-from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationSummaryBufferMemory
-# Define the default LLM model to use
-llm_model = "meta-llama/Meta-Llama-3-8B-Instruct"
-# Initialize the LLM-based retrieval chain
-# The retriever is set directly within this function
-def initialize_llmchain(llm_model, temperature, max_tokens, top_k, retriever):
-    # Configure the selected LLM with parameters using HuggingFaceHub
-    llm = HuggingFaceHub(
-        repo_id=llm_model,
         huggingfacehub_api_token=api_token,
-        model_kwargs={
-            "temperature": temperature,
-            "max_new_tokens": max_tokens,
-            "top_k": top_k
-        }
     )
-    # Use summary-based conversation memory for better performance
-    memory = ConversationSummaryBufferMemory(
-        llm=llm,
         memory_key="chat_history",
-        output_key="answer"
     )
-    # Combine LLM, retriever, and memory into a Conversational Retrieval Chain
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm,
-        retriever=retriever,
         memory=memory,
         return_source_documents=True,
         verbose=False,
     )
     return qa_chain
-# Load and split PDF documents into manageable chunks
-def load_doc(list_file_path):
-    # Load each file using PyPDFLoader
-    loaders = [PyPDFLoader(x) for x in list_file_path]
-    pages = []
-    for loader in loaders:
-        pages.extend(loader.load())
-    # Split loaded pages into smaller chunks with overlap for better context
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1024,
-        chunk_overlap=64
-    )
-    doc_splits = text_splitter.split_documents(pages)
-    return doc_splits
-# Create a vector database from document splits
-def create_db(splits):
-    embeddings = HuggingFaceEmbeddings()
-    vectordb = FAISS.from_documents(splits, embeddings)
-    return vectordb
-# Format chat history for display in chatbot UI
-def format_chat_history(message, chat_history):
-    formatted_chat_history = []
-    for user_message, bot_message in chat_history:
-        formatted_chat_history.append(f"User: {user_message}")
-        formatted_chat_history.append(f"Assistant: {bot_message}")
-    return formatted_chat_history
-# Handle user queries and generate responses using the chatbot
 def conversation(qa_chain, message, history):
-    # Format chat history to include previous interactions
-    formatted_chat_history = format_chat_history(message, history)
-    # Invoke the QA chain with the user message and chat history
-    response = qa_chain.invoke({"question": message, "chat_history": formatted_chat_history})
-    # Extract the response answer, removing unnecessary labels if present
-    response_answer = response["answer"].split("Helpful Answer:")[-1] if "Helpful Answer:" in response["answer"] else response["answer"]
-    # Extract top 3 source documents for relevance display
-    source_documents = response["source_documents"][:3]
-    sources = [
-        {
-            "content": doc.page_content.strip(),
-            "page": doc.metadata.get("page", 0) + 1
-        } for doc in source_documents
-    ]
-    # Update chat history with the latest interaction
-    new_history = history + [(message, response_answer)]
-    return qa_chain, gr.update(value=""), new_history, sources
-# Set up the Gradio interface for the chatbot application
-def demo():
-    # Load and initialize a retriever with a placeholder database
-    placeholder_docs = load_doc(["placeholder.pdf"])
-    vector_db = create_db(placeholder_docs)
-    retriever = vector_db.as_retriever()
-    # Initialize the QA chain with default LLM parameters
-    qa_chain = initialize_llmchain(
-        llm_model=llm_model,
-        temperature=0.5,
-        max_tokens=1024,
-        top_k=3,
-        retriever=retriever
     )
-    with gr.Blocks() as demo:
-        # Persistent states for the vector database and QA chain
-        gr.State()
-        # Display app header and description
         gr.HTML("<center><h1>RAG PDF Chatbot</h1></center>")
         gr.Markdown("""
-            <b>Query your PDF documents!</b> Upload files to create a vector database and chat with the content. <b>Do not upload confidential documents.</b>
         """)
         with gr.Row():
-            # Column for uploading files and configuring the pipeline
-            with gr.Column(scale=85):
-                document = gr.Files(height=200, file_count="multiple", file_types=[".pdf"], label="Upload PDF documents")
-                upload_btn = gr.Button("Upload and Process Documents")
-                pipeline_status = gr.Textbox(value="Initialized", interactive=False, label="Status")
-            # Column for chatbot interaction
-            with gr.Column(scale=200):
-                chatbot = gr.Chatbot(height=400, label="Chatbot")
-                message = gr.Textbox(placeholder="Type your question here")
-                submit_btn = gr.Button("Submit")
-                clear_btn = gr.ClearButton([message, chatbot], value="Clear")
-                relevant_context = gr.Textbox(label="Relevant Context", lines=3, interactive=False)
-        # Define action to process documents and update the retriever
-        upload_btn.click(
-            lambda file_obj: initialize_llmchain(
-                llm_model=llm_model,
-                temperature=0.5,
-                max_tokens=1024,
-                top_k=3,
-                retriever=create_db(load_doc([file.name for file in file_obj if file is not None])).as_retriever()
-            ),
             inputs=[document],
-            outputs=[qa_chain]
         )
-        # Define action to handle user queries
         submit_btn.click(
             conversation,
-            inputs=[qa_chain, message, chatbot],
-            outputs=[qa_chain, message, chatbot, relevant_context]
         )
-    demo.launch()
-# Launch the application
 if __name__ == "__main__":
     demo()

 import gradio as gr
 import os
+from langchain_community.vectorstores import FAISS
+from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.llms import HuggingFaceEndpoint
 from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+api_token = os.getenv("HF_TOKEN")
+DEFAULT_LLM = "meta-llama/Meta-Llama-3-8B-Instruct"
+def load_and_create_db(list_file_obj):
+    # Create a list of documents
+    list_file_path = [x.name for x in list_file_obj if x is not None]
+    # Load documents
+    loaders = [PyPDFLoader(x) for x in list_file_path]
+    pages = []
+    for loader in loaders:
+        pages.extend(loader.load())
+    # Split documents
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1024,
+        chunk_overlap=64
+    )
+    doc_splits = text_splitter.split_documents(pages)
+    # Create vector database
+    embeddings = HuggingFaceEmbeddings()
+    vectordb = FAISS.from_documents(doc_splits, embeddings)
+    return vectordb
+def initialize_llmchain(vector_db, temperature=0.5, max_tokens=4096, top_k=3):
+    llm = HuggingFaceEndpoint(
+        repo_id=DEFAULT_LLM,
         huggingfacehub_api_token=api_token,
+        temperature=temperature,
+        max_new_tokens=max_tokens,
+        top_k=top_k,
     )
+    memory = ConversationBufferMemory(
         memory_key="chat_history",
+        output_key='answer',
+        return_messages=True
     )
     qa_chain = ConversationalRetrievalChain.from_llm(
         llm,
+        retriever=vector_db.as_retriever(),
+        chain_type="stuff",
         memory=memory,
         return_source_documents=True,
         verbose=False,
     )
     return qa_chain
+def format_citation(source_doc):
+    content = source_doc.page_content.strip()
+    page = source_doc.metadata["page"] + 1
+    return f"[Page {page}] {content}"
+def format_response_with_citations(answer, sources):
+    citations = [format_citation(source) for source in sources[:3]]
+    formatted_response = f"{answer}\n\nReferences:\n"
+    for idx, citation in enumerate(citations, 1):
+        formatted_response += f"^{idx}^ {citation}\n"
+    return formatted_response
 def conversation(qa_chain, message, history):
+    if not qa_chain:
+        return None, gr.update(value=""), history, "Please upload a document first.", None
+    formatted_history = []
+    for user_msg, bot_msg in history:
+        formatted_history.append(f"User: {user_msg}")
+        formatted_history.append(f"Assistant: {bot_msg}")
+    response = qa_chain.invoke({
+        "question": message,
+        "chat_history": formatted_history
+    })
+    answer = response["answer"]
+    if "Helpful Answer:" in answer:
+        answer = answer.split("Helpful Answer:")[-1]
+    formatted_response = format_response_with_citations(
+        answer,
+        response["source_documents"]
     )
+    new_history = history + [(message, formatted_response)]
+    return qa_chain, gr.update(value=""), new_history
+def demo():
+    with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
+        vector_db = gr.State()
+        qa_chain = gr.State()
         gr.HTML("<center><h1>RAG PDF Chatbot</h1></center>")
         gr.Markdown("""
+        <b>Query your PDF documents!</b> This AI agent performs retrieval augmented generation (RAG)
+        on PDF documents. <b>Please do not upload confidential documents.</b>
         """)
         with gr.Row():
+            with gr.Column(scale=1):
+                document = gr.Files(
+                    height=300,
+                    file_count="multiple",
+                    file_types=["pdf"],
+                    label="Upload PDF documents"
+                )
+            with gr.Column(scale=2):
+                chatbot = gr.Chatbot(height=600)
+                with gr.Row():
+                    msg = gr.Textbox(
+                        placeholder="Ask a question about your documents...",
+                        container=True
+                    )
+                with gr.Row():
+                    submit_btn = gr.Button("Submit")
+                    clear_btn = gr.ClearButton([msg, chatbot], value="Clear")
+        # Automatically create vector DB and initialize chain on file upload
+        document.upload(
+            fn=lambda files: [load_and_create_db(files), initialize_llmchain(load_and_create_db(files))],
             inputs=[document],
+            outputs=[vector_db, qa_chain]
         )
+        # Chatbot events
         submit_btn.click(
             conversation,
+            inputs=[qa_chain, msg, chatbot],
+            outputs=[qa_chain, msg, chatbot]
         )
+        msg.submit(
+            conversation,
+            inputs=[qa_chain, msg, chatbot],
+            outputs=[qa_chain, msg, chatbot]
+        )
+    demo.queue().launch(debug=True)
 if __name__ == "__main__":
     demo()