import streamlit as st from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.llms import HuggingFaceHub from langchain.chains import RetrievalQA from langchain.text_splitter import CharacterTextSplitter import fitz import os from langchain.schema import Document def process_pdf(file): """Extract text from PDF, split into chunks, and create embeddings.""" try: # Save the uploaded file temporarily with open("temp_pdf.pdf", "wb") as f: f.write(file.getbuffer()) text = "" with fitz.open("temp_pdf.pdf") as doc: for page in doc: text += page.get_text() # Create Document objects for the text splitter texts = [Document(page_content=text)] # Split text into smaller chunks text_splitter = CharacterTextSplitter( separator="\n", chunk_size=300, chunk_overlap=30 ) documents = text_splitter.split_documents(texts) embeddings = HuggingFaceEmbeddings() vectorstore = FAISS.from_documents(documents, embeddings) os.remove("temp_pdf.pdf") return vectorstore except Exception as e: st.error(f"Error processing PDF: {e}") return None # --- Streamlit UI --- st.title("PDF Chatbot") uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) if uploaded_file: vectorstore = process_pdf(uploaded_file) if vectorstore: # --- Chat Functionality --- llm = HuggingFaceHub( repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.7, "max_length": 512}, huggingfacehub_api_token=HF_TOKEN # Replace with your actual API token ) qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 2}) ) user_question = st.text_input("Ask a question about the PDF:") if user_question: with st.spinner("Generating answer..."): response = qa_chain({"query": user_question}) answer = response['result'] st.write(answer) # --- Feedback Mechanism --- st.write("Was this answer helpful?") col1, col2 = st.columns(2) with col1: if st.button("👍"): st.write("Thanks for the feedback!") with col2: if st.button("👎"): st.write("We appreciate your feedback. We'll work on improving!")