import streamlit as st import fitz # PyMuPDF import faiss from sentence_transformers import SentenceTransformer import numpy as np from phi.agent import Agent from phi.model.groq import Groq # Load embedding model embedding_model = SentenceTransformer("all-MiniLM-L6-v2") def agent_response(question, retrieved_text): agent = Agent( model=Groq(id="llama-3.3-70b-versatile"), markdown=True, description="You are an AI assistant that provides the answer based on the provided document.", instructions=[ f"First read the question carefully. The question is: **{question}**", f"Then read the document provided to you as a text. The document is: \n**{retrieved_text}**\n", "Finally answer the question based on the provided document only. Don't try to give random responses." ] ) response = agent.run(question + '\n' + retrieved_text).content return response class PDFChatbot: def __init__(self): self.text_chunks = [] self.index = None def process_pdf(self, pdf_file): """Extract text from PDF and create FAISS index.""" self.text_chunks = [] # Read the uploaded file into memory with fitz.open("pdf", pdf_file.read()) as doc: # Use "pdf" as format for page in doc: self.text_chunks.append(page.get_text("text")) # Embed the chunks embeddings = embedding_model.encode(self.text_chunks, convert_to_numpy=True) # Create FAISS index self.index = faiss.IndexFlatL2(embeddings.shape[1]) self.index.add(embeddings) return "PDF processed successfully!" def chat(self, query): """Retrieve the most relevant chunk for a query.""" if self.index is None: return "Please upload a PDF first." # query_embedding = embedding_model.encode([query], convert_to_numpy=True) # _, indices = self.index.search(query_embedding, 1) # Get top match # retrieved_text = self.text_chunks[indices[0][0]] # response = agent_response(query, retrieved_text) query_embedding = embedding_model.encode([query], convert_to_numpy=True) _, indices = self.index.search(query_embedding, 5) # Get top 5 matches retrieved_texts = [self.text_chunks[idx] for idx in indices[0]] retrieved_text_combined = "\n\n".join(retrieved_texts) print('Retrieved_texts:',retrieved_text_combined) response = agent_response(query, retrieved_text_combined) return response # Instantiate chatbot chatbot = PDFChatbot() st.title("Chat with your PDF") uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"]) if uploaded_file: message = chatbot.process_pdf(uploaded_file) st.success(message) query = st.text_input("Ask a question") if st.button("Ask"): if query: response = chatbot.chat(query) st.markdown(f"**Answer:**\n\n{response}")