Spaces:

amraly1983
/

chat-with-pdf

Sleeping

App Files Files Community

chat-with-pdf / app.py

amraly1983

Create app.py

b10004d verified 6 months ago

raw

history blame

2.68 kB

	import streamlit as st
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.llms import HuggingFaceHub
	from langchain.chains import RetrievalQA
	from langchain.text_splitter import CharacterTextSplitter
	import fitz
	import os
	from langchain.schema import Document

	def process_pdf(file):
	"""Extract text from PDF, split into chunks, and create embeddings."""
	try:
	# Save the uploaded file temporarily
	with open("temp_pdf.pdf", "wb") as f:
	f.write(file.getbuffer())

	text = ""
	with fitz.open("temp_pdf.pdf") as doc:
	for page in doc:
	text += page.get_text()

	# Create Document objects for the text splitter
	texts = [Document(page_content=text)]

	# Split text into smaller chunks
	text_splitter = CharacterTextSplitter(
	separator="\n",
	chunk_size=300,
	chunk_overlap=30
	)
	documents = text_splitter.split_documents(texts)

	embeddings = HuggingFaceEmbeddings()
	vectorstore = FAISS.from_documents(documents, embeddings)

	os.remove("temp_pdf.pdf")

	return vectorstore
	except Exception as e:
	st.error(f"Error processing PDF: {e}")
	return None

	# --- Streamlit UI ---
	st.title("PDF Chatbot")
	uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])

	if uploaded_file:
	vectorstore = process_pdf(uploaded_file)

	if vectorstore:
	# --- Chat Functionality ---
	llm = HuggingFaceHub(
	repo_id="google/flan-t5-xxl",
	model_kwargs={"temperature": 0.7, "max_length": 512},
	huggingfacehub_api_token=HF_TOKEN # Replace with your actual API token
	)
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=vectorstore.as_retriever(search_kwargs={"k": 2})
	)

	user_question = st.text_input("Ask a question about the PDF:")
	if user_question:
	with st.spinner("Generating answer..."):
	response = qa_chain({"query": user_question})
	answer = response['result']
	st.write(answer)

	# --- Feedback Mechanism ---
	st.write("Was this answer helpful?")
	col1, col2 = st.columns(2)
	with col1:
	if st.button("👍"):
	st.write("Thanks for the feedback!")
	with col2:
	if st.button("👎"):
	st.write("We appreciate your feedback. We'll work on improving!")