|
|
|
import PyPDF2 |
|
from sentence_transformers import SentenceTransformer |
|
import faiss |
|
import numpy as np |
|
|
|
|
|
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
def process_pdf(pdf_file): |
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
document_text = "" |
|
for page in pdf_reader.pages: |
|
document_text += page.extract_text() |
|
sentences = document_text.split('. ') |
|
embeddings = embedding_model.encode(sentences) |
|
faiss_index = faiss.IndexFlatL2(embeddings.shape[1]) |
|
faiss_index.add(embeddings) |
|
return sentences, embeddings, faiss_index |
|
|
|
|
|
def get_relevant_context(query, faiss_index, sentences, k=3): |
|
query_vector = embedding_model.encode([query]) |
|
_, I = faiss_index.search(query_vector, k) |
|
relevant_sentences = [sentences[i] for i in I[0]] |
|
return ". ".join(relevant_sentences) |
|
|
|
from transformers import pipeline |
|
|
|
qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") |
|
|
|
def answer_question(query, faiss_index, sentences): |
|
if not sentences: |
|
return "Please upload a document first.", "" |
|
relevant_context = get_relevant_context(query, faiss_index, sentences) |
|
answer = qa_model(question=query, context=relevant_context) |
|
return answer['answer'], relevant_context |
|
|
|
import gradio as gr |
|
|
|
def process_and_answer(pdf_file, query): |
|
sentences, embeddings, faiss_index = process_pdf(pdf_file) |
|
answer, context = answer_question(query, faiss_index, sentences) |
|
return answer, context |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Interactive QA Bot") |
|
pdf_input = gr.File(label="Upload PDF") |
|
query_input = gr.Textbox(label="Ask a question about the document") |
|
answer_output = gr.Textbox(label="Answer") |
|
context_output = gr.Textbox(label="Relevant Context") |
|
submit_button = gr.Button("Submit") |
|
|
|
submit_button.click(process_and_answer, inputs=[pdf_input, query_input], outputs=[answer_output, context_output]) |
|
|
|
demo.launch() |
|
|
|
|