import os import PyPDF2 import faiss import numpy as np from sentence_transformers import SentenceTransformer import gradio as gr import requests # Load your Anthropic API key from environment variable or set it here ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") # Make sure to set your key # Alternatively, set it directly in the code for testing purposes: # ANTHROPIC_API_KEY = "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" # Step 1: Extract text from PDFs def extract_text_from_pdf(pdf_file): reader = PyPDF2.PdfReader(pdf_file) text = "" for page in reader.pages: text += page.extract_text() return text # Step 2: Generate embeddings def create_embeddings(text): model = SentenceTransformer('all-MiniLM-L6-v2') return model.encode(text.split('\n'), convert_to_tensor=True) # Step 3: Create FAISS index def create_faiss_index(embeddings): dim = embeddings.shape[1] index = faiss.IndexFlatL2(dim) index.add(np.array(embeddings)) return index # Step 4: Query with Anthropic def query_anthropic(prompt): headers = { "Authorization": f"Bearer {ANTHROPIC_API_KEY}", "Content-Type": "application/json" } data = { "prompt": prompt, "max_tokens": 150, "stop": None } response = requests.post('https://api.anthropic.com/v1/complete', json=data, headers=headers) return response.json().get('completion', 'No response from model') # Step 5: Complete workflow def process_pdf_and_query(pdf_file, user_query): # Step 1: Extract text pdf_text = extract_text_from_pdf(pdf_file) # Step 2: Generate embeddings embeddings = create_embeddings(pdf_text) # Step 3: Create FAISS index faiss_index = create_faiss_index(embeddings) # Step 4: Query with Anthropic index_query_embedding = create_embeddings(user_query) D, I = faiss_index.search(np.array([index_query_embedding]), k=1) # Searching for the closest match closest_text = pdf_text.split('\n')[I[0][0]] # Get the closest text based on the index response = query_anthropic(f"Answer the question based on this context: {closest_text}. Question: {user_query}") return response # Gradio interface def run_gradio(): iface = gr.Interface( fn=process_pdf_and_query, inputs=[gr.File(label="Upload PDF File"), gr.Textbox(label="Ask a question")], outputs="text", title="PDF Query with Anthropic", description="Upload a PDF file and ask questions related to its content." ) iface.launch() if __name__ == "__main__": run_gradio()