import streamlit as st | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.llms import HuggingFaceHub | |
from langchain.chains import RetrievalQA | |
from langchain.text_splitter import CharacterTextSplitter | |
import fitz | |
import os | |
from langchain.schema import Document | |
def process_pdf(file): | |
"""Extract text from PDF, split into chunks, and create embeddings.""" | |
try: | |
# Save the uploaded file temporarily | |
with open("temp_pdf.pdf", "wb") as f: | |
f.write(file.getbuffer()) | |
text = "" | |
with"temp_pdf.pdf") as doc: | |
for page in doc: | |
text += page.get_text() | |
# Create Document objects for the text splitter | |
texts = [Document(page_content=text)] | |
# Split text into smaller chunks | |
text_splitter = CharacterTextSplitter( | |
separator="\n", | |
chunk_size=300, | |
chunk_overlap=30 | |
) | |
documents = text_splitter.split_documents(texts) | |
embeddings = HuggingFaceEmbeddings() | |
vectorstore = FAISS.from_documents(documents, embeddings) | |
os.remove("temp_pdf.pdf") | |
return vectorstore | |
except Exception as e: | |
st.error(f"Error processing PDF: {e}") | |
return None | |
# --- Streamlit UI --- | |
st.title("PDF Chatbot") | |
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) | |
if uploaded_file: | |
vectorstore = process_pdf(uploaded_file) | |
if vectorstore: | |
# --- Chat Functionality --- | |
llm = HuggingFaceHub( | |
repo_id="google/flan-t5-xxl", | |
model_kwargs={"temperature": 0.7, "max_length": 512}, | |
huggingfacehub_api_token=HF_TOKEN # Replace with your actual API token | |
) | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=vectorstore.as_retriever(search_kwargs={"k": 2}) | |
) | |
user_question = st.text_input("Ask a question about the PDF:") | |
if user_question: | |
with st.spinner("Generating answer..."): | |
response = qa_chain({"query": user_question}) | |
answer = response['result'] | |
st.write(answer) | |
# --- Feedback Mechanism --- | |
st.write("Was this answer helpful?") | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.button("π"): | |
st.write("Thanks for the feedback!") | |
with col2: | |
if st.button("π"): | |
st.write("We appreciate your feedback. We'll work on improving!") | |