nileshhanotia commited on
Commit
9735353
1 Parent(s): 73afd28

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PyPDF2
3
+ import faiss
4
+ import numpy as np
5
+ from sentence_transformers import SentenceTransformer
6
+ import gradio as gr
7
+ import requests
8
+
9
+ # Load your Anthropic API key from environment variable or set it here
10
+ ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") # Make sure to set your key
11
+ # Alternatively, set it directly in the code for testing purposes:
12
+ # ANTHROPIC_API_KEY = "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"
13
+
14
+ # Step 1: Extract text from PDFs
15
+ def extract_text_from_pdf(pdf_file):
16
+ reader = PyPDF2.PdfReader(pdf_file)
17
+ text = ""
18
+ for page in reader.pages:
19
+ text += page.extract_text()
20
+ return text
21
+
22
+ # Step 2: Generate embeddings
23
+ def create_embeddings(text):
24
+ model = SentenceTransformer('all-MiniLM-L6-v2')
25
+ return model.encode(text.split('\n'), convert_to_tensor=True)
26
+
27
+ # Step 3: Create FAISS index
28
+ def create_faiss_index(embeddings):
29
+ dim = embeddings.shape[1]
30
+ index = faiss.IndexFlatL2(dim)
31
+ index.add(np.array(embeddings))
32
+ return index
33
+
34
+ # Step 4: Query with Anthropic
35
+ def query_anthropic(prompt):
36
+ headers = {
37
+ "Authorization": f"Bearer {ANTHROPIC_API_KEY}",
38
+ "Content-Type": "application/json"
39
+ }
40
+ data = {
41
+ "prompt": prompt,
42
+ "max_tokens": 150,
43
+ "stop": None
44
+ }
45
+ response = requests.post('https://api.anthropic.com/v1/complete', json=data, headers=headers)
46
+ return response.json().get('completion', 'No response from model')
47
+
48
+ # Step 5: Complete workflow
49
+ def process_pdf_and_query(pdf_file, user_query):
50
+ # Step 1: Extract text
51
+ pdf_text = extract_text_from_pdf(pdf_file)
52
+
53
+ # Step 2: Generate embeddings
54
+ embeddings = create_embeddings(pdf_text)
55
+
56
+ # Step 3: Create FAISS index
57
+ faiss_index = create_faiss_index(embeddings)
58
+
59
+ # Step 4: Query with Anthropic
60
+ index_query_embedding = create_embeddings(user_query)
61
+ D, I = faiss_index.search(np.array([index_query_embedding]), k=1) # Searching for the closest match
62
+ closest_text = pdf_text.split('\n')[I[0][0]] # Get the closest text based on the index
63
+ response = query_anthropic(f"Answer the question based on this context: {closest_text}. Question: {user_query}")
64
+
65
+ return response
66
+
67
+ # Gradio interface
68
+ def run_gradio():
69
+ iface = gr.Interface(
70
+ fn=process_pdf_and_query,
71
+ inputs=[gr.File(label="Upload PDF File"), gr.Textbox(label="Ask a question")],
72
+ outputs="text",
73
+ title="PDF Query with Anthropic",
74
+ description="Upload a PDF file and ask questions related to its content."
75
+ )
76
+ iface.launch()
77
+
78
+ if __name__ == "__main__":
79
+ run_gradio()