|
import streamlit as st |
|
import fitz |
|
import faiss |
|
from sentence_transformers import SentenceTransformer |
|
import numpy as np |
|
from phi.agent import Agent |
|
from phi.model.groq import Groq |
|
|
|
|
|
embedding_model = SentenceTransformer("all-MiniLM-L6-v2") |
|
|
|
def agent_response(question, retrieved_text): |
|
agent = Agent( |
|
model=Groq(id="llama-3.3-70b-versatile"), |
|
markdown=True, |
|
description="You are an AI assistant that provides the answer based on the provided document.", |
|
instructions=[ |
|
f"First read the question carefully. The question is: **{question}**", |
|
f"Then read the document provided to you as a text. The document is: \n**{retrieved_text}**\n", |
|
"Finally answer the question based on the provided document only. Don't try to give random responses." |
|
] |
|
) |
|
response = agent.run(question + '\n' + retrieved_text).content |
|
return response |
|
|
|
class PDFChatbot: |
|
def __init__(self): |
|
self.text_chunks = [] |
|
self.index = None |
|
|
|
def process_pdf(self, pdf_file): |
|
"""Extract text from PDF and create FAISS index.""" |
|
self.text_chunks = [] |
|
|
|
|
|
with fitz.open("pdf", pdf_file.read()) as doc: |
|
for page in doc: |
|
self.text_chunks.append(page.get_text("text")) |
|
|
|
|
|
embeddings = embedding_model.encode(self.text_chunks, convert_to_numpy=True) |
|
|
|
|
|
self.index = faiss.IndexFlatL2(embeddings.shape[1]) |
|
self.index.add(embeddings) |
|
return "PDF processed successfully!" |
|
|
|
def chat(self, query): |
|
"""Retrieve the most relevant chunk for a query.""" |
|
if self.index is None: |
|
return "Please upload a PDF first." |
|
|
|
|
|
|
|
|
|
|
|
|
|
query_embedding = embedding_model.encode([query], convert_to_numpy=True) |
|
_, indices = self.index.search(query_embedding, 5) |
|
retrieved_texts = [self.text_chunks[idx] for idx in indices[0]] |
|
retrieved_text_combined = "\n\n".join(retrieved_texts) |
|
print('Retrieved_texts:',retrieved_text_combined) |
|
response = agent_response(query, retrieved_text_combined) |
|
return response |
|
|
|
|
|
chatbot = PDFChatbot() |
|
|
|
st.title("Chat with your PDF") |
|
|
|
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"]) |
|
if uploaded_file: |
|
message = chatbot.process_pdf(uploaded_file) |
|
st.success(message) |
|
|
|
query = st.text_input("Ask a question") |
|
if st.button("Ask"): |
|
if query: |
|
response = chatbot.chat(query) |
|
st.markdown(f"**Answer:**\n\n{response}") |
|
|