Spaces:

amraly1983
/

chat-with-pdf

Sleeping

App Files Files Community

amraly1983 commited on May 17

Commit

b10004d

•

1 Parent(s): 9a367c7

Create app.py

Browse files

Files changed (1) hide show

app.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import streamlit as st
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import HuggingFaceHub
+from langchain.chains import RetrievalQA
+from langchain.text_splitter import CharacterTextSplitter
+import fitz
+import os
+from langchain.schema import Document
+def process_pdf(file):
+    """Extract text from PDF, split into chunks, and create embeddings."""
+    try:
+        # Save the uploaded file temporarily
+        with open("temp_pdf.pdf", "wb") as f:
+            f.write(file.getbuffer())
+        text = ""
+        with fitz.open("temp_pdf.pdf") as doc:
+            for page in doc:
+                text += page.get_text()
+        # Create Document objects for the text splitter
+        texts = [Document(page_content=text)]
+        # Split text into smaller chunks
+        text_splitter = CharacterTextSplitter(
+            separator="\n",
+            chunk_size=300,
+            chunk_overlap=30
+        )
+        documents = text_splitter.split_documents(texts)
+        embeddings = HuggingFaceEmbeddings()
+        vectorstore = FAISS.from_documents(documents, embeddings)
+        os.remove("temp_pdf.pdf")
+        return vectorstore
+    except Exception as e:
+        st.error(f"Error processing PDF: {e}")
+        return None
+# --- Streamlit UI ---
+st.title("PDF Chatbot")
+uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
+if uploaded_file:
+    vectorstore = process_pdf(uploaded_file)
+    if vectorstore:
+        # --- Chat Functionality ---
+        llm = HuggingFaceHub(
+            repo_id="google/flan-t5-xxl",
+            model_kwargs={"temperature": 0.7, "max_length": 512},
+            huggingfacehub_api_token=HF_TOKEN  # Replace with your actual API token
+        )
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type="stuff",
+            retriever=vectorstore.as_retriever(search_kwargs={"k": 2})
+        )
+        user_question = st.text_input("Ask a question about the PDF:")
+        if user_question:
+            with st.spinner("Generating answer..."):
+                response = qa_chain({"query": user_question})
+                answer = response['result']
+                st.write(answer)
+                # --- Feedback Mechanism ---
+                st.write("Was this answer helpful?")
+                col1, col2 = st.columns(2)
+                with col1:
+                    if st.button("👍"):
+                        st.write("Thanks for the feedback!")
+                with col2:
+                    if st.button("👎"):
+                        st.write("We appreciate your feedback. We'll work on improving!")