Spaces:

nileshhanotia
/

c2pdf

Sleeping

App Files Files Community

c2pdf / app.py

nileshhanotia

Create app.py

9735353 verified 18 days ago

raw

history blame contribute delete

2.67 kB

	import os
	import PyPDF2
	import faiss
	import numpy as np
	from sentence_transformers import SentenceTransformer
	import gradio as gr
	import requests

	# Load your Anthropic API key from environment variable or set it here
	ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") # Make sure to set your key
	# Alternatively, set it directly in the code for testing purposes:
	# ANTHROPIC_API_KEY = "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"

	# Step 1: Extract text from PDFs
	def extract_text_from_pdf(pdf_file):
	reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	# Step 2: Generate embeddings
	def create_embeddings(text):
	model = SentenceTransformer('all-MiniLM-L6-v2')
	return model.encode(text.split('\n'), convert_to_tensor=True)

	# Step 3: Create FAISS index
	def create_faiss_index(embeddings):
	dim = embeddings.shape[1]
	index = faiss.IndexFlatL2(dim)
	index.add(np.array(embeddings))
	return index

	# Step 4: Query with Anthropic
	def query_anthropic(prompt):
	headers = {
	"Authorization": f"Bearer {ANTHROPIC_API_KEY}",
	"Content-Type": "application/json"
	}
	data = {
	"prompt": prompt,
	"max_tokens": 150,
	"stop": None
	}
	response = requests.post('https://api.anthropic.com/v1/complete', json=data, headers=headers)
	return response.json().get('completion', 'No response from model')

	# Step 5: Complete workflow
	def process_pdf_and_query(pdf_file, user_query):
	# Step 1: Extract text
	pdf_text = extract_text_from_pdf(pdf_file)

	# Step 2: Generate embeddings
	embeddings = create_embeddings(pdf_text)

	# Step 3: Create FAISS index
	faiss_index = create_faiss_index(embeddings)

	# Step 4: Query with Anthropic
	index_query_embedding = create_embeddings(user_query)
	D, I = faiss_index.search(np.array([index_query_embedding]), k=1) # Searching for the closest match
	closest_text = pdf_text.split('\n')[I[0][0]] # Get the closest text based on the index
	response = query_anthropic(f"Answer the question based on this context: {closest_text}. Question: {user_query}")

	return response

	# Gradio interface
	def run_gradio():
	iface = gr.Interface(
	fn=process_pdf_and_query,
	inputs=[gr.File(label="Upload PDF File"), gr.Textbox(label="Ask a question")],
	outputs="text",
	title="PDF Query with Anthropic",
	description="Upload a PDF file and ask questions related to its content."
	)
	iface.launch()

	if __name__ == "__main__":
	run_gradio()