import PyPDF2 from sentence_transformers import SentenceTransformer import faiss import numpy as np # Load models embedding_model = SentenceTransformer('all-MiniLM-L6-v2') # Function to process PDF def process_pdf(pdf_file): pdf_reader = PyPDF2.PdfReader(pdf_file) document_text = "" for page in pdf_reader.pages: document_text += page.extract_text() sentences = document_text.split('. ') embeddings = embedding_model.encode(sentences) faiss_index = faiss.IndexFlatL2(embeddings.shape[1]) faiss_index.add(embeddings) return sentences, embeddings, faiss_index # Function to get relevant context def get_relevant_context(query, faiss_index, sentences, k=3): query_vector = embedding_model.encode([query]) _, I = faiss_index.search(query_vector, k) relevant_sentences = [sentences[i] for i in I[0]] return ". ".join(relevant_sentences) from transformers import pipeline qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") def answer_question(query, faiss_index, sentences): if not sentences: return "Please upload a document first.", "" relevant_context = get_relevant_context(query, faiss_index, sentences) answer = qa_model(question=query, context=relevant_context) return answer['answer'], relevant_context import gradio as gr def process_and_answer(pdf_file, query): sentences, embeddings, faiss_index = process_pdf(pdf_file) answer, context = answer_question(query, faiss_index, sentences) return answer, context with gr.Blocks() as demo: gr.Markdown("# Interactive QA Bot") pdf_input = gr.File(label="Upload PDF") query_input = gr.Textbox(label="Ask a question about the document") answer_output = gr.Textbox(label="Answer") context_output = gr.Textbox(label="Relevant Context") submit_button = gr.Button("Submit") submit_button.click(process_and_answer, inputs=[pdf_input, query_input], outputs=[answer_output, context_output]) demo.launch()