sunbal7 commited on
Commit
aae1639
Β·
verified Β·
1 Parent(s): a7e018f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -17
app.py CHANGED
@@ -5,7 +5,6 @@ import faiss
5
  import numpy as np
6
  from sentence_transformers import SentenceTransformer
7
  import PyPDF2
8
- import os
9
 
10
  # Model Setup
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -22,6 +21,8 @@ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
22
  dimension = 384 # Embedding size for MiniLM
23
  index = faiss.IndexFlatL2(dimension)
24
  docs = [] # Store document texts
 
 
25
 
26
  # Function to extract text from PDF
27
  def extract_text_from_pdf(uploaded_file):
@@ -29,9 +30,10 @@ def extract_text_from_pdf(uploaded_file):
29
  text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
30
  return text
31
 
32
- # Function to process uploaded documents
 
33
  def process_documents(files):
34
- global docs, index
35
  docs = []
36
 
37
  for file in files:
@@ -44,15 +46,38 @@ def process_documents(files):
44
 
45
  embeddings = embedding_model.encode(docs)
46
  index.add(np.array(embeddings))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # Function to retrieve relevant context
49
  def retrieve_context(query):
 
 
 
50
  query_embedding = embedding_model.encode([query])
51
  distances, indices = index.search(np.array(query_embedding), k=1)
52
 
53
- if len(indices) > 0 and indices[0][0] < len(docs):
54
- return docs[indices[0][0]]
55
- return "No relevant context found."
 
 
56
 
57
  # Function to generate response using IBM Granite
58
  def generate_response(query, context):
@@ -64,27 +89,32 @@ def generate_response(query, context):
64
 
65
  input_tokens = tokenizer(chat, return_tensors="pt").to(device)
66
  output = model.generate(**input_tokens, max_new_tokens=200)
 
67
  return tokenizer.batch_decode(output, skip_special_tokens=True)[0]
68
 
 
69
  # Streamlit UI
70
- st.set_page_config(page_title="πŸ“– Smart Study", page_icon="πŸ€–")
71
- st.title("πŸ“– Q&A using IBM Granite")
72
- st.subheader("Upload documents and ask questions!")
73
 
74
- uploaded_files = st.file_uploader("Upload PDFs or TXT files", accept_multiple_files=True)
75
 
76
  if uploaded_files:
77
- with st.spinner("Processing documents..."):
78
- process_documents(uploaded_files)
79
- st.success("Documents uploaded and indexed!")
80
 
81
- if query:
82
- if index.ntotal == 0: # Ensure documents are indexed before querying
83
- st.warning("Please upload and process documents first!")
 
 
 
 
84
  else:
85
  with st.spinner("Retrieving and generating response..."):
86
  context = retrieve_context(query)
87
  response = generate_response(query, context)
88
  st.markdown("### πŸ€– Answer:")
89
  st.write(response)
90
-
 
5
  import numpy as np
6
  from sentence_transformers import SentenceTransformer
7
  import PyPDF2
 
8
 
9
  # Model Setup
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
21
  dimension = 384 # Embedding size for MiniLM
22
  index = faiss.IndexFlatL2(dimension)
23
  docs = [] # Store document texts
24
+ summary = "" # Store book summary
25
+
26
 
27
  # Function to extract text from PDF
28
  def extract_text_from_pdf(uploaded_file):
 
30
  text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
31
  return text
32
 
33
+
34
+ # Function to process uploaded documents and generate summary
35
  def process_documents(files):
36
+ global docs, index, summary
37
  docs = []
38
 
39
  for file in files:
 
46
 
47
  embeddings = embedding_model.encode(docs)
48
  index.add(np.array(embeddings))
49
+
50
+ # Generate summary after processing documents
51
+ summary = generate_summary("\n".join(docs))
52
+
53
+
54
+ # Function to generate a book summary
55
+ def generate_summary(text):
56
+ chat = [
57
+ {"role": "system", "content": "You are a helpful AI that summarizes books."},
58
+ {"role": "user", "content": f"Summarize this book in a short paragraph:\n{text[:4000]}"} # Limiting input size
59
+ ]
60
+ chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
61
+
62
+ input_tokens = tokenizer(chat, return_tensors="pt").to(device)
63
+ output = model.generate(**input_tokens, max_new_tokens=300)
64
+
65
+ return tokenizer.batch_decode(output, skip_special_tokens=True)[0]
66
+
67
 
68
  # Function to retrieve relevant context
69
  def retrieve_context(query):
70
+ if index.ntotal == 0:
71
+ return "No documents available. Please upload files first."
72
+
73
  query_embedding = embedding_model.encode([query])
74
  distances, indices = index.search(np.array(query_embedding), k=1)
75
 
76
+ if len(indices) == 0 or indices[0][0] >= len(docs):
77
+ return "No relevant context found."
78
+
79
+ return docs[indices[0][0]]
80
+
81
 
82
  # Function to generate response using IBM Granite
83
  def generate_response(query, context):
 
89
 
90
  input_tokens = tokenizer(chat, return_tensors="pt").to(device)
91
  output = model.generate(**input_tokens, max_new_tokens=200)
92
+
93
  return tokenizer.batch_decode(output, skip_special_tokens=True)[0]
94
 
95
+
96
  # Streamlit UI
97
+ st.set_page_config(page_title="πŸ“– AI Book Assistant", page_icon="πŸ“š")
98
+ st.title("πŸ“– AI-Powered Book Assistant")
99
+ st.subheader("Upload a book and get its summary or ask questions!")
100
 
101
+ uploaded_files = st.file_uploader("Upload a book (PDF or TXT)", accept_multiple_files=False)
102
 
103
  if uploaded_files:
104
+ with st.spinner("Processing book and generating summary..."):
105
+ process_documents([uploaded_files])
106
+ st.success("Book uploaded and processed!")
107
 
108
+ st.markdown("### πŸ“š Book Summary:")
109
+ st.write(summary)
110
+
111
+ query = st.text_input("Ask a question about the book:")
112
+ if st.button("Get Answer"):
113
+ if index.ntotal == 0:
114
+ st.warning("Please upload a book first!")
115
  else:
116
  with st.spinner("Retrieving and generating response..."):
117
  context = retrieve_context(query)
118
  response = generate_response(query, context)
119
  st.markdown("### πŸ€– Answer:")
120
  st.write(response)