chat-with-pdf / app.py
amraly1983's picture
Create app.py
b10004d verified
raw
history blame
2.68 kB
import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.text_splitter import CharacterTextSplitter
import fitz
import os
from langchain.schema import Document
def process_pdf(file):
"""Extract text from PDF, split into chunks, and create embeddings."""
try:
# Save the uploaded file temporarily
with open("temp_pdf.pdf", "wb") as f:
f.write(file.getbuffer())
text = ""
with fitz.open("temp_pdf.pdf") as doc:
for page in doc:
text += page.get_text()
# Create Document objects for the text splitter
texts = [Document(page_content=text)]
# Split text into smaller chunks
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=300,
chunk_overlap=30
)
documents = text_splitter.split_documents(texts)
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(documents, embeddings)
os.remove("temp_pdf.pdf")
return vectorstore
except Exception as e:
st.error(f"Error processing PDF: {e}")
return None
# --- Streamlit UI ---
st.title("PDF Chatbot")
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file:
vectorstore = process_pdf(uploaded_file)
if vectorstore:
# --- Chat Functionality ---
llm = HuggingFaceHub(
repo_id="google/flan-t5-xxl",
model_kwargs={"temperature": 0.7, "max_length": 512},
huggingfacehub_api_token=HF_TOKEN # Replace with your actual API token
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={"k": 2})
)
user_question = st.text_input("Ask a question about the PDF:")
if user_question:
with st.spinner("Generating answer..."):
response = qa_chain({"query": user_question})
answer = response['result']
st.write(answer)
# --- Feedback Mechanism ---
st.write("Was this answer helpful?")
col1, col2 = st.columns(2)
with col1:
if st.button("πŸ‘"):
st.write("Thanks for the feedback!")
with col2:
if st.button("πŸ‘Ž"):
st.write("We appreciate your feedback. We'll work on improving!")