Spaces:

amraly1983
/

chat-with-pdf

Sleeping

File size: 2,682 Bytes

b10004d

import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.text_splitter import CharacterTextSplitter
import fitz
import os
from langchain.schema import Document

def process_pdf(file):
    """Extract text from PDF, split into chunks, and create embeddings."""
    try:
        # Save the uploaded file temporarily
        with open("temp_pdf.pdf", "wb") as f:
            f.write(file.getbuffer())

        text = ""
        with fitz.open("temp_pdf.pdf") as doc:
            for page in doc:
                text += page.get_text()  

        # Create Document objects for the text splitter
        texts = [Document(page_content=text)] 

        # Split text into smaller chunks
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=300,  
            chunk_overlap=30
        )
        documents = text_splitter.split_documents(texts)

        embeddings = HuggingFaceEmbeddings()
        vectorstore = FAISS.from_documents(documents, embeddings) 

        os.remove("temp_pdf.pdf")  

        return vectorstore
    except Exception as e:
        st.error(f"Error processing PDF: {e}")
        return None

# --- Streamlit UI ---
st.title("PDF Chatbot")
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])

if uploaded_file:
    vectorstore = process_pdf(uploaded_file)

    if vectorstore:
        # --- Chat Functionality ---
        llm = HuggingFaceHub(
            repo_id="google/flan-t5-xxl",
            model_kwargs={"temperature": 0.7, "max_length": 512},
            huggingfacehub_api_token=HF_TOKEN  # Replace with your actual API token
        )
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(search_kwargs={"k": 2}) 
        )

        user_question = st.text_input("Ask a question about the PDF:")
        if user_question:
            with st.spinner("Generating answer..."):
                response = qa_chain({"query": user_question})
                answer = response['result']
                st.write(answer)

                # --- Feedback Mechanism ---
                st.write("Was this answer helpful?")
                col1, col2 = st.columns(2)
                with col1:
                    if st.button("👍"):
                        st.write("Thanks for the feedback!")
                with col2:
                    if st.button("👎"):
                        st.write("We appreciate your feedback. We'll work on improving!")