Spaces:

DvorakInnovationAI
/

Document-Interaction-NDIS

Sleeping

App Files Files Community

Document-Interaction-NDIS / app.py

subashdvorak

Update app.py

14e4af7 verified 18 days ago

raw

history blame

3.01 kB

	import streamlit as st
	import fitz # PyMuPDF
	import faiss
	from sentence_transformers import SentenceTransformer
	import numpy as np
	from phi.agent import Agent
	from phi.model.groq import Groq

	# Load embedding model
	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

	def agent_response(question, retrieved_text):
	agent = Agent(
	model=Groq(id="llama-3.3-70b-versatile"),
	markdown=True,
	description="You are an AI assistant that provides the answer based on the provided document.",
	instructions=[
	f"First read the question carefully. The question is: {question}",
	f"Then read the document provided to you as a text. The document is: \n{retrieved_text}\n",
	"Finally answer the question based on the provided document only. Don't try to give random responses."
	]
	)
	response = agent.run(question + '\n' + retrieved_text).content
	return response

	class PDFChatbot:
	def __init__(self):
	self.text_chunks = []
	self.index = None

	def process_pdf(self, pdf_file):
	"""Extract text from PDF and create FAISS index."""
	self.text_chunks = []

	# Read the uploaded file into memory
	with fitz.open("pdf", pdf_file.read()) as doc: # Use "pdf" as format
	for page in doc:
	self.text_chunks.append(page.get_text("text"))

	# Embed the chunks
	embeddings = embedding_model.encode(self.text_chunks, convert_to_numpy=True)

	# Create FAISS index
	self.index = faiss.IndexFlatL2(embeddings.shape[1])
	self.index.add(embeddings)
	return "PDF processed successfully!"

	def chat(self, query):
	"""Retrieve the most relevant chunk for a query."""
	if self.index is None:
	return "Please upload a PDF first."

	# query_embedding = embedding_model.encode([query], convert_to_numpy=True)
	# _, indices = self.index.search(query_embedding, 1) # Get top match
	# retrieved_text = self.text_chunks[indices[0][0]]
	# response = agent_response(query, retrieved_text)

	query_embedding = embedding_model.encode([query], convert_to_numpy=True)
	_, indices = self.index.search(query_embedding, 5) # Get top 5 matches
	retrieved_texts = [self.text_chunks[idx] for idx in indices[0]]
	retrieved_text_combined = "\n\n".join(retrieved_texts)
	print('Retrieved_texts:',retrieved_text_combined)
	response = agent_response(query, retrieved_text_combined)
	return response

	# Instantiate chatbot
	chatbot = PDFChatbot()

	st.title("Chat with your PDF")

	uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
	if uploaded_file:
	message = chatbot.process_pdf(uploaded_file)
	st.success(message)

	query = st.text_input("Ask a question")
	if st.button("Ask"):
	if query:
	response = chatbot.chat(query)
	st.markdown(f"Answer:\n\n{response}")