Spaces:
Sleeping
Sleeping
import os | |
import PyPDF2 | |
import faiss | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
import gradio as gr | |
import requests | |
# Load your Anthropic API key from environment variable or set it here | |
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") # Make sure to set your key | |
# Alternatively, set it directly in the code for testing purposes: | |
# ANTHROPIC_API_KEY = "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" | |
# Step 1: Extract text from PDFs | |
def extract_text_from_pdf(pdf_file): | |
reader = PyPDF2.PdfReader(pdf_file) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Step 2: Generate embeddings | |
def create_embeddings(text): | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
return model.encode(text.split('\n'), convert_to_tensor=True) | |
# Step 3: Create FAISS index | |
def create_faiss_index(embeddings): | |
dim = embeddings.shape[1] | |
index = faiss.IndexFlatL2(dim) | |
index.add(np.array(embeddings)) | |
return index | |
# Step 4: Query with Anthropic | |
def query_anthropic(prompt): | |
headers = { | |
"Authorization": f"Bearer {ANTHROPIC_API_KEY}", | |
"Content-Type": "application/json" | |
} | |
data = { | |
"prompt": prompt, | |
"max_tokens": 150, | |
"stop": None | |
} | |
response = requests.post('https://api.anthropic.com/v1/complete', json=data, headers=headers) | |
return response.json().get('completion', 'No response from model') | |
# Step 5: Complete workflow | |
def process_pdf_and_query(pdf_file, user_query): | |
# Step 1: Extract text | |
pdf_text = extract_text_from_pdf(pdf_file) | |
# Step 2: Generate embeddings | |
embeddings = create_embeddings(pdf_text) | |
# Step 3: Create FAISS index | |
faiss_index = create_faiss_index(embeddings) | |
# Step 4: Query with Anthropic | |
index_query_embedding = create_embeddings(user_query) | |
D, I = faiss_index.search(np.array([index_query_embedding]), k=1) # Searching for the closest match | |
closest_text = pdf_text.split('\n')[I[0][0]] # Get the closest text based on the index | |
response = query_anthropic(f"Answer the question based on this context: {closest_text}. Question: {user_query}") | |
return response | |
# Gradio interface | |
def run_gradio(): | |
iface = gr.Interface( | |
fn=process_pdf_and_query, | |
inputs=[gr.File(label="Upload PDF File"), gr.Textbox(label="Ask a question")], | |
outputs="text", | |
title="PDF Query with Anthropic", | |
description="Upload a PDF file and ask questions related to its content." | |
) | |
iface.launch() | |
if __name__ == "__main__": | |
run_gradio() | |