Spaces:

nileshhanotia
/

c2pdf

Sleeping

App Files Files Community

nileshhanotia commited on 18 days ago

Commit

9735353

•

1 Parent(s): 73afd28

Create app.py

Browse files

Files changed (1) hide show

app.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+import PyPDF2
+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import gradio as gr
+import requests
+# Load your Anthropic API key from environment variable or set it here
+ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")  # Make sure to set your key
+# Alternatively, set it directly in the code for testing purposes:
+# ANTHROPIC_API_KEY = "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"
+# Step 1: Extract text from PDFs
+def extract_text_from_pdf(pdf_file):
+    reader = PyPDF2.PdfReader(pdf_file)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text()
+    return text
+# Step 2: Generate embeddings
+def create_embeddings(text):
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    return model.encode(text.split('\n'), convert_to_tensor=True)
+# Step 3: Create FAISS index
+def create_faiss_index(embeddings):
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(np.array(embeddings))
+    return index
+# Step 4: Query with Anthropic
+def query_anthropic(prompt):
+    headers = {
+        "Authorization": f"Bearer {ANTHROPIC_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "prompt": prompt,
+        "max_tokens": 150,
+        "stop": None
+    }
+    response = requests.post('https://api.anthropic.com/v1/complete', json=data, headers=headers)
+    return response.json().get('completion', 'No response from model')
+# Step 5: Complete workflow
+def process_pdf_and_query(pdf_file, user_query):
+    # Step 1: Extract text
+    pdf_text = extract_text_from_pdf(pdf_file)
+    # Step 2: Generate embeddings
+    embeddings = create_embeddings(pdf_text)
+    # Step 3: Create FAISS index
+    faiss_index = create_faiss_index(embeddings)
+    # Step 4: Query with Anthropic
+    index_query_embedding = create_embeddings(user_query)
+    D, I = faiss_index.search(np.array([index_query_embedding]), k=1)  # Searching for the closest match
+    closest_text = pdf_text.split('\n')[I[0][0]]  # Get the closest text based on the index
+    response = query_anthropic(f"Answer the question based on this context: {closest_text}. Question: {user_query}")
+    return response
+# Gradio interface
+def run_gradio():
+    iface = gr.Interface(
+        fn=process_pdf_and_query,
+        inputs=[gr.File(label="Upload PDF File"), gr.Textbox(label="Ask a question")],
+        outputs="text",
+        title="PDF Query with Anthropic",
+        description="Upload a PDF file and ask questions related to its content."
+    )
+    iface.launch()
+if __name__ == "__main__":
+    run_gradio()