Talha812 commited on
Commit
2e4aae4
Β·
verified Β·
1 Parent(s): 938704f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +297 -71
app.py CHANGED
@@ -1,25 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import streamlit as st
2
  # import torch
3
  # from transformers import GPTNeoXForCausalLM, AutoTokenizer
4
  # from sentence_transformers import SentenceTransformer
5
  # import faiss
6
- # import fitz # PyMuPDF
7
  # from langchain_text_splitters import RecursiveCharacterTextSplitter
8
 
9
- # # 1. Set page config FIRST
10
  # st.set_page_config(page_title="πŸ“š Smart Book Analyst", layout="wide")
11
 
12
  # # Configuration
13
  # MODEL_NAME = "ibm-granite/granite-3.1-1b-a400m-instruct"
14
  # EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
15
  # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
- # CHUNK_SIZE = 512
17
- # CHUNK_OVERLAP = 50
 
18
 
19
  # @st.cache_resource
20
  # def load_models():
21
  # try:
22
- # # Load Granite model
23
  # tokenizer = AutoTokenizer.from_pretrained(
24
  # MODEL_NAME,
25
  # trust_remote_code=True
@@ -27,13 +191,15 @@
27
 
28
  # model = GPTNeoXForCausalLM.from_pretrained(
29
  # MODEL_NAME,
30
- # device_map="auto" if DEVICE == "cuda" else None,
31
  # torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
32
- # trust_remote_code=True
 
33
  # ).eval()
34
 
35
- # # Load sentence transformer for embeddings
36
  # embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
 
37
 
38
  # return tokenizer, model, embedder
39
 
@@ -43,7 +209,6 @@
43
 
44
  # tokenizer, model, embedder = load_models()
45
 
46
- # # Text processing
47
  # def process_text(text):
48
  # splitter = RecursiveCharacterTextSplitter(
49
  # chunk_size=CHUNK_SIZE,
@@ -52,70 +217,79 @@
52
  # )
53
  # return splitter.split_text(text)
54
 
55
- # # PDF extraction
56
  # def extract_pdf_text(uploaded_file):
57
  # try:
58
  # doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
59
- # return "\n".join([page.get_text() for page in doc])
60
  # except Exception as e:
61
  # st.error(f"PDF extraction error: {str(e)}")
62
  # return ""
63
 
64
- # # Summarization function
65
  # def generate_summary(text):
66
- # chunks = process_text(text)[:10]
 
 
 
 
67
  # summaries = []
68
 
69
- # for chunk in chunks:
 
70
  # prompt = f"""<|user|>
71
- # Summarize this text section focusing on key themes, characters, and plot points:
72
- # {chunk[:2000]}
73
  # <|assistant|>
74
  # """
75
 
76
  # inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
77
- # outputs = model.generate(**inputs, max_new_tokens=300, temperature=0.3)
 
 
 
 
 
78
  # summaries.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
79
 
80
  # combined = "\n".join(summaries)
81
  # final_prompt = f"""<|user|>
82
- # Combine these section summaries into a coherent book summary:
83
  # {combined}
84
  # <|assistant|>
85
- # The comprehensive summary is:"""
86
 
87
  # inputs = tokenizer(final_prompt, return_tensors="pt").to(DEVICE)
88
- # outputs = model.generate(**inputs, max_new_tokens=500, temperature=0.5)
89
- # return tokenizer.decode(outputs[0], skip_special_tokens=True).split(":")[-1].strip()
 
 
 
 
 
90
 
91
- # # FAISS index creation
92
  # def build_faiss_index(texts):
93
- # embeddings = embedder.encode(texts, show_progress_bar=False)
94
  # dimension = embeddings.shape[1]
95
  # index = faiss.IndexFlatIP(dimension)
96
  # faiss.normalize_L2(embeddings)
97
  # index.add(embeddings)
98
  # return index
99
 
100
- # # Answer generation
101
  # def generate_answer(query, context):
102
  # prompt = f"""<|user|>
103
- # Using this context: {context}
104
- # Answer the question precisely and truthfully. If unsure, say "I don't know".
105
- # Question: {query}
106
- # <|assistant|>
107
- # """
108
 
109
  # inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)
110
  # outputs = model.generate(
111
  # **inputs,
112
- # max_new_tokens=300,
113
- # temperature=0.4,
114
- # top_p=0.9,
115
- # repetition_penalty=1.2,
116
  # do_sample=True
117
  # )
118
- # return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
119
 
120
  # # Streamlit UI
121
  # st.title("πŸ“š AI-Powered Book Analysis System")
@@ -130,6 +304,10 @@
130
  # else:
131
  # text = uploaded_file.read().decode()
132
 
 
 
 
 
133
  # chunks = process_text(text)
134
  # st.session_state.docs = chunks
135
  # st.session_state.index = build_faiss_index(chunks)
@@ -148,14 +326,14 @@
148
  # try:
149
  # query_embed = embedder.encode([query])
150
  # faiss.normalize_L2(query_embed)
151
- # distances, indices = st.session_state.index.search(query_embed, k=3)
152
 
153
  # context = "\n".join([st.session_state.docs[i] for i in indices[0]])
154
  # answer = generate_answer(query, context)
155
 
156
  # st.subheader("Answer")
157
  # st.markdown(f"```\n{answer}\n```")
158
- # st.caption("Retrieved context confidence: {:.2f}".format(distances[0][0]))
159
 
160
  # except Exception as e:
161
  # st.error(f"Query failed: {str(e)}")
@@ -169,25 +347,27 @@ import faiss
169
  import fitz
170
  from langchain_text_splitters import RecursiveCharacterTextSplitter
171
 
172
- # Set page config FIRST
173
  st.set_page_config(page_title="πŸ“š Smart Book Analyst", layout="wide")
174
 
175
  # Configuration
176
  MODEL_NAME = "ibm-granite/granite-3.1-1b-a400m-instruct"
177
  EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
178
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
179
- CHUNK_SIZE = 1024 # Increased chunk size for better performance
180
  CHUNK_OVERLAP = 100
181
- MAX_SUMMARY_CHUNKS = 5 # Reduced from 10 to 5 for faster processing
182
 
183
  @st.cache_resource
184
  def load_models():
185
  try:
186
- # Load model with optimized settings
187
  tokenizer = AutoTokenizer.from_pretrained(
188
  MODEL_NAME,
189
- trust_remote_code=True
 
190
  )
 
191
 
192
  model = GPTNeoXForCausalLM.from_pretrained(
193
  MODEL_NAME,
@@ -197,9 +377,9 @@ def load_models():
197
  low_cpu_mem_usage=True
198
  ).eval()
199
 
200
- # Load embedder with faster model
201
  embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
202
- embedder.max_seq_length = 256 # Reduce embedding dimension
203
 
204
  return tokenizer, model, embedder
205
 
@@ -234,40 +414,71 @@ def generate_summary(text):
234
  summaries = []
235
 
236
  for i, chunk in enumerate(chunks):
237
- progress_bar.progress((i+1)/len(chunks), text=f"Processing chunk {i+1}/{len(chunks)}...")
 
 
 
238
  prompt = f"""<|user|>
239
- Summarize key points in 2 sentences:
240
  {chunk[:1500]}
241
  <|assistant|>
242
  """
243
 
244
- inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
 
 
 
 
 
 
245
  outputs = model.generate(
246
  **inputs,
247
- max_new_tokens=150,
248
- temperature=0.2,
249
- do_sample=False # Disable sampling for faster generation
 
 
 
250
  )
251
- summaries.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
 
 
 
 
 
 
252
 
253
- combined = "\n".join(summaries)
254
  final_prompt = f"""<|user|>
255
- Combine these into a concise summary (3-5 paragraphs):
256
  {combined}
257
  <|assistant|>
258
- Summary:"""
259
 
260
  inputs = tokenizer(final_prompt, return_tensors="pt").to(DEVICE)
261
  outputs = model.generate(
262
  **inputs,
263
- max_new_tokens=300,
264
- temperature=0.3,
265
- do_sample=False
 
 
 
266
  )
267
- return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Summary:")[-1].strip()
 
 
 
 
268
 
269
  def build_faiss_index(texts):
270
- embeddings = embedder.encode(texts, show_progress_bar=False, batch_size=32)
 
 
 
 
 
 
271
  dimension = embeddings.shape[1]
272
  index = faiss.IndexFlatIP(dimension)
273
  faiss.normalize_L2(embeddings)
@@ -276,20 +487,35 @@ def build_faiss_index(texts):
276
 
277
  def generate_answer(query, context):
278
  prompt = f"""<|user|>
279
- Context: {context[:2000]}
280
- Q: {query}
281
- A:"""
 
 
 
 
 
 
 
 
 
 
282
 
283
- inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)
284
  outputs = model.generate(
285
  **inputs,
286
- max_new_tokens=200,
287
- temperature=0.3,
288
- top_p=0.85,
289
- repetition_penalty=1.1,
290
- do_sample=True
 
 
291
  )
292
- return tokenizer.decode(outputs[0], skip_special_tokens=True).split("A:")[-1].strip()
 
 
 
 
293
 
294
  # Streamlit UI
295
  st.title("πŸ“š AI-Powered Book Analysis System")
@@ -305,7 +531,7 @@ if uploaded_file:
305
  text = uploaded_file.read().decode()
306
 
307
  if not text.strip():
308
- st.error("Uploaded file appears to be empty")
309
  st.stop()
310
 
311
  chunks = process_text(text)
@@ -326,14 +552,14 @@ if 'index' in st.session_state and st.session_state.index:
326
  try:
327
  query_embed = embedder.encode([query])
328
  faiss.normalize_L2(query_embed)
329
- distances, indices = st.session_state.index.search(query_embed, k=2)
330
 
331
  context = "\n".join([st.session_state.docs[i] for i in indices[0]])
332
  answer = generate_answer(query, context)
333
 
334
  st.subheader("Answer")
335
  st.markdown(f"```\n{answer}\n```")
336
- st.caption(f"Confidence: {distances[0][0]:.2f}")
337
 
338
  except Exception as e:
339
  st.error(f"Query failed: {str(e)}")
 
1
+ # # import streamlit as st
2
+ # # import torch
3
+ # # from transformers import GPTNeoXForCausalLM, AutoTokenizer
4
+ # # from sentence_transformers import SentenceTransformer
5
+ # # import faiss
6
+ # # import fitz # PyMuPDF
7
+ # # from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+
9
+ # # # 1. Set page config FIRST
10
+ # # st.set_page_config(page_title="πŸ“š Smart Book Analyst", layout="wide")
11
+
12
+ # # # Configuration
13
+ # # MODEL_NAME = "ibm-granite/granite-3.1-1b-a400m-instruct"
14
+ # # EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
15
+ # # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
+ # # CHUNK_SIZE = 512
17
+ # # CHUNK_OVERLAP = 50
18
+
19
+ # # @st.cache_resource
20
+ # # def load_models():
21
+ # # try:
22
+ # # # Load Granite model
23
+ # # tokenizer = AutoTokenizer.from_pretrained(
24
+ # # MODEL_NAME,
25
+ # # trust_remote_code=True
26
+ # # )
27
+
28
+ # # model = GPTNeoXForCausalLM.from_pretrained(
29
+ # # MODEL_NAME,
30
+ # # device_map="auto" if DEVICE == "cuda" else None,
31
+ # # torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
32
+ # # trust_remote_code=True
33
+ # # ).eval()
34
+
35
+ # # # Load sentence transformer for embeddings
36
+ # # embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
37
+
38
+ # # return tokenizer, model, embedder
39
+
40
+ # # except Exception as e:
41
+ # # st.error(f"Model loading failed: {str(e)}")
42
+ # # st.stop()
43
+
44
+ # # tokenizer, model, embedder = load_models()
45
+
46
+ # # # Text processing
47
+ # # def process_text(text):
48
+ # # splitter = RecursiveCharacterTextSplitter(
49
+ # # chunk_size=CHUNK_SIZE,
50
+ # # chunk_overlap=CHUNK_OVERLAP,
51
+ # # length_function=len
52
+ # # )
53
+ # # return splitter.split_text(text)
54
+
55
+ # # # PDF extraction
56
+ # # def extract_pdf_text(uploaded_file):
57
+ # # try:
58
+ # # doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
59
+ # # return "\n".join([page.get_text() for page in doc])
60
+ # # except Exception as e:
61
+ # # st.error(f"PDF extraction error: {str(e)}")
62
+ # # return ""
63
+
64
+ # # # Summarization function
65
+ # # def generate_summary(text):
66
+ # # chunks = process_text(text)[:10]
67
+ # # summaries = []
68
+
69
+ # # for chunk in chunks:
70
+ # # prompt = f"""<|user|>
71
+ # # Summarize this text section focusing on key themes, characters, and plot points:
72
+ # # {chunk[:2000]}
73
+ # # <|assistant|>
74
+ # # """
75
+
76
+ # # inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
77
+ # # outputs = model.generate(**inputs, max_new_tokens=300, temperature=0.3)
78
+ # # summaries.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
79
+
80
+ # # combined = "\n".join(summaries)
81
+ # # final_prompt = f"""<|user|>
82
+ # # Combine these section summaries into a coherent book summary:
83
+ # # {combined}
84
+ # # <|assistant|>
85
+ # # The comprehensive summary is:"""
86
+
87
+ # # inputs = tokenizer(final_prompt, return_tensors="pt").to(DEVICE)
88
+ # # outputs = model.generate(**inputs, max_new_tokens=500, temperature=0.5)
89
+ # # return tokenizer.decode(outputs[0], skip_special_tokens=True).split(":")[-1].strip()
90
+
91
+ # # # FAISS index creation
92
+ # # def build_faiss_index(texts):
93
+ # # embeddings = embedder.encode(texts, show_progress_bar=False)
94
+ # # dimension = embeddings.shape[1]
95
+ # # index = faiss.IndexFlatIP(dimension)
96
+ # # faiss.normalize_L2(embeddings)
97
+ # # index.add(embeddings)
98
+ # # return index
99
+
100
+ # # # Answer generation
101
+ # # def generate_answer(query, context):
102
+ # # prompt = f"""<|user|>
103
+ # # Using this context: {context}
104
+ # # Answer the question precisely and truthfully. If unsure, say "I don't know".
105
+ # # Question: {query}
106
+ # # <|assistant|>
107
+ # # """
108
+
109
+ # # inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)
110
+ # # outputs = model.generate(
111
+ # # **inputs,
112
+ # # max_new_tokens=300,
113
+ # # temperature=0.4,
114
+ # # top_p=0.9,
115
+ # # repetition_penalty=1.2,
116
+ # # do_sample=True
117
+ # # )
118
+ # # return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
119
+
120
+ # # # Streamlit UI
121
+ # # st.title("πŸ“š AI-Powered Book Analysis System")
122
+
123
+ # # uploaded_file = st.file_uploader("Upload book (PDF or TXT)", type=["pdf", "txt"])
124
+
125
+ # # if uploaded_file:
126
+ # # with st.spinner("πŸ“– Analyzing book content..."):
127
+ # # try:
128
+ # # if uploaded_file.type == "application/pdf":
129
+ # # text = extract_pdf_text(uploaded_file)
130
+ # # else:
131
+ # # text = uploaded_file.read().decode()
132
+
133
+ # # chunks = process_text(text)
134
+ # # st.session_state.docs = chunks
135
+ # # st.session_state.index = build_faiss_index(chunks)
136
+
137
+ # # with st.expander("πŸ“ Book Summary", expanded=True):
138
+ # # summary = generate_summary(text)
139
+ # # st.write(summary)
140
+
141
+ # # except Exception as e:
142
+ # # st.error(f"Processing failed: {str(e)}")
143
+
144
+ # # if 'index' in st.session_state and st.session_state.index:
145
+ # # query = st.text_input("Ask about the book:")
146
+ # # if query:
147
+ # # with st.spinner("πŸ” Searching for answers..."):
148
+ # # try:
149
+ # # query_embed = embedder.encode([query])
150
+ # # faiss.normalize_L2(query_embed)
151
+ # # distances, indices = st.session_state.index.search(query_embed, k=3)
152
+
153
+ # # context = "\n".join([st.session_state.docs[i] for i in indices[0]])
154
+ # # answer = generate_answer(query, context)
155
+
156
+ # # st.subheader("Answer")
157
+ # # st.markdown(f"```\n{answer}\n```")
158
+ # # st.caption("Retrieved context confidence: {:.2f}".format(distances[0][0]))
159
+
160
+ # # except Exception as e:
161
+ # # st.error(f"Query failed: {str(e)}")
162
+
163
+
164
  # import streamlit as st
165
  # import torch
166
  # from transformers import GPTNeoXForCausalLM, AutoTokenizer
167
  # from sentence_transformers import SentenceTransformer
168
  # import faiss
169
+ # import fitz
170
  # from langchain_text_splitters import RecursiveCharacterTextSplitter
171
 
172
+ # # Set page config FIRST
173
  # st.set_page_config(page_title="πŸ“š Smart Book Analyst", layout="wide")
174
 
175
  # # Configuration
176
  # MODEL_NAME = "ibm-granite/granite-3.1-1b-a400m-instruct"
177
  # EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
178
  # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
179
+ # CHUNK_SIZE = 1024 # Increased chunk size for better performance
180
+ # CHUNK_OVERLAP = 100
181
+ # MAX_SUMMARY_CHUNKS = 5 # Reduced from 10 to 5 for faster processing
182
 
183
  # @st.cache_resource
184
  # def load_models():
185
  # try:
186
+ # # Load model with optimized settings
187
  # tokenizer = AutoTokenizer.from_pretrained(
188
  # MODEL_NAME,
189
  # trust_remote_code=True
 
191
 
192
  # model = GPTNeoXForCausalLM.from_pretrained(
193
  # MODEL_NAME,
194
+ # device_map="auto",
195
  # torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
196
+ # trust_remote_code=True,
197
+ # low_cpu_mem_usage=True
198
  # ).eval()
199
 
200
+ # # Load embedder with faster model
201
  # embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
202
+ # embedder.max_seq_length = 256 # Reduce embedding dimension
203
 
204
  # return tokenizer, model, embedder
205
 
 
209
 
210
  # tokenizer, model, embedder = load_models()
211
 
 
212
  # def process_text(text):
213
  # splitter = RecursiveCharacterTextSplitter(
214
  # chunk_size=CHUNK_SIZE,
 
217
  # )
218
  # return splitter.split_text(text)
219
 
 
220
  # def extract_pdf_text(uploaded_file):
221
  # try:
222
  # doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
223
+ # return "\n".join(page.get_text() for page in doc)
224
  # except Exception as e:
225
  # st.error(f"PDF extraction error: {str(e)}")
226
  # return ""
227
 
 
228
  # def generate_summary(text):
229
+ # chunks = process_text(text)[:MAX_SUMMARY_CHUNKS]
230
+ # if not chunks:
231
+ # return "No meaningful content found."
232
+
233
+ # progress_bar = st.progress(0)
234
  # summaries = []
235
 
236
+ # for i, chunk in enumerate(chunks):
237
+ # progress_bar.progress((i+1)/len(chunks), text=f"Processing chunk {i+1}/{len(chunks)}...")
238
  # prompt = f"""<|user|>
239
+ # Summarize key points in 2 sentences:
240
+ # {chunk[:1500]}
241
  # <|assistant|>
242
  # """
243
 
244
  # inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
245
+ # outputs = model.generate(
246
+ # **inputs,
247
+ # max_new_tokens=150,
248
+ # temperature=0.2,
249
+ # do_sample=False # Disable sampling for faster generation
250
+ # )
251
  # summaries.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
252
 
253
  # combined = "\n".join(summaries)
254
  # final_prompt = f"""<|user|>
255
+ # Combine these into a concise summary (3-5 paragraphs):
256
  # {combined}
257
  # <|assistant|>
258
+ # Summary:"""
259
 
260
  # inputs = tokenizer(final_prompt, return_tensors="pt").to(DEVICE)
261
+ # outputs = model.generate(
262
+ # **inputs,
263
+ # max_new_tokens=300,
264
+ # temperature=0.3,
265
+ # do_sample=False
266
+ # )
267
+ # return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Summary:")[-1].strip()
268
 
 
269
  # def build_faiss_index(texts):
270
+ # embeddings = embedder.encode(texts, show_progress_bar=False, batch_size=32)
271
  # dimension = embeddings.shape[1]
272
  # index = faiss.IndexFlatIP(dimension)
273
  # faiss.normalize_L2(embeddings)
274
  # index.add(embeddings)
275
  # return index
276
 
 
277
  # def generate_answer(query, context):
278
  # prompt = f"""<|user|>
279
+ # Context: {context[:2000]}
280
+ # Q: {query}
281
+ # A:"""
 
 
282
 
283
  # inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)
284
  # outputs = model.generate(
285
  # **inputs,
286
+ # max_new_tokens=200,
287
+ # temperature=0.3,
288
+ # top_p=0.85,
289
+ # repetition_penalty=1.1,
290
  # do_sample=True
291
  # )
292
+ # return tokenizer.decode(outputs[0], skip_special_tokens=True).split("A:")[-1].strip()
293
 
294
  # # Streamlit UI
295
  # st.title("πŸ“š AI-Powered Book Analysis System")
 
304
  # else:
305
  # text = uploaded_file.read().decode()
306
 
307
+ # if not text.strip():
308
+ # st.error("Uploaded file appears to be empty")
309
+ # st.stop()
310
+
311
  # chunks = process_text(text)
312
  # st.session_state.docs = chunks
313
  # st.session_state.index = build_faiss_index(chunks)
 
326
  # try:
327
  # query_embed = embedder.encode([query])
328
  # faiss.normalize_L2(query_embed)
329
+ # distances, indices = st.session_state.index.search(query_embed, k=2)
330
 
331
  # context = "\n".join([st.session_state.docs[i] for i in indices[0]])
332
  # answer = generate_answer(query, context)
333
 
334
  # st.subheader("Answer")
335
  # st.markdown(f"```\n{answer}\n```")
336
+ # st.caption(f"Confidence: {distances[0][0]:.2f}")
337
 
338
  # except Exception as e:
339
  # st.error(f"Query failed: {str(e)}")
 
347
  import fitz
348
  from langchain_text_splitters import RecursiveCharacterTextSplitter
349
 
350
+ # Set page config first
351
  st.set_page_config(page_title="πŸ“š Smart Book Analyst", layout="wide")
352
 
353
  # Configuration
354
  MODEL_NAME = "ibm-granite/granite-3.1-1b-a400m-instruct"
355
  EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
356
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
357
+ CHUNK_SIZE = 1024
358
  CHUNK_OVERLAP = 100
359
+ MAX_SUMMARY_CHUNKS = 5
360
 
361
  @st.cache_resource
362
  def load_models():
363
  try:
364
+ # Load model with correct tokenizer mapping
365
  tokenizer = AutoTokenizer.from_pretrained(
366
  MODEL_NAME,
367
+ trust_remote_code=True,
368
+ padding_side="left" # Crucial for generation quality
369
  )
370
+ tokenizer.pad_token = tokenizer.eos_token
371
 
372
  model = GPTNeoXForCausalLM.from_pretrained(
373
  MODEL_NAME,
 
377
  low_cpu_mem_usage=True
378
  ).eval()
379
 
380
+ # Configure embedder properly
381
  embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
382
+ embedder.max_seq_length = 512
383
 
384
  return tokenizer, model, embedder
385
 
 
414
  summaries = []
415
 
416
  for i, chunk in enumerate(chunks):
417
+ # Proper progress text formatting
418
+ progress_bar.progress((i+1)/len(chunks),
419
+ text=f"Processing section {i+1}/{len(chunks)}...")
420
+
421
  prompt = f"""<|user|>
422
+ Summarize the key points from this text section in 3 bullet points:
423
  {chunk[:1500]}
424
  <|assistant|>
425
  """
426
 
427
+ inputs = tokenizer(
428
+ prompt,
429
+ return_tensors="pt",
430
+ max_length=1024,
431
+ truncation=True
432
+ ).to(DEVICE)
433
+
434
  outputs = model.generate(
435
  **inputs,
436
+ max_new_tokens=200,
437
+ temperature=0.3,
438
+ top_p=0.9,
439
+ repetition_penalty=1.1,
440
+ do_sample=True,
441
+ pad_token_id=tokenizer.eos_token_id # Critical fix
442
  )
443
+
444
+ decoded = tokenizer.decode(
445
+ outputs[0],
446
+ skip_special_tokens=True
447
+ ).split("<|assistant|>")[-1].strip()
448
+
449
+ summaries.append(decoded)
450
 
451
+ combined = "\n\n".join(summaries)
452
  final_prompt = f"""<|user|>
453
+ Combine these bullet points into a coherent 3-paragraph summary:
454
  {combined}
455
  <|assistant|>
456
+ Here is the comprehensive summary:"""
457
 
458
  inputs = tokenizer(final_prompt, return_tensors="pt").to(DEVICE)
459
  outputs = model.generate(
460
  **inputs,
461
+ max_new_tokens=400,
462
+ temperature=0.5,
463
+ top_p=0.9,
464
+ repetition_penalty=1.1,
465
+ do_sample=True,
466
+ pad_token_id=tokenizer.eos_token_id
467
  )
468
+
469
+ return tokenizer.decode(
470
+ outputs[0],
471
+ skip_special_tokens=True
472
+ ).split("Here is the comprehensive summary:")[-1].strip()
473
 
474
  def build_faiss_index(texts):
475
+ embeddings = embedder.encode(
476
+ texts,
477
+ show_progress_bar=False,
478
+ batch_size=16,
479
+ convert_to_tensor=True
480
+ ).cpu().numpy()
481
+
482
  dimension = embeddings.shape[1]
483
  index = faiss.IndexFlatIP(dimension)
484
  faiss.normalize_L2(embeddings)
 
487
 
488
  def generate_answer(query, context):
489
  prompt = f"""<|user|>
490
+ Based on this context:
491
+ {context[:2000]}
492
+
493
+ Answer this question concisely: {query}
494
+ <|assistant|>
495
+ """
496
+
497
+ inputs = tokenizer(
498
+ prompt,
499
+ return_tensors="pt",
500
+ max_length=1024,
501
+ truncation=True
502
+ ).to(DEVICE)
503
 
 
504
  outputs = model.generate(
505
  **inputs,
506
+ max_new_tokens=300,
507
+ temperature=0.4,
508
+ top_p=0.95,
509
+ repetition_penalty=1.15,
510
+ do_sample=True,
511
+ pad_token_id=tokenizer.eos_token_id,
512
+ no_repeat_ngram_size=3 # Prevent repetition
513
  )
514
+
515
+ return tokenizer.decode(
516
+ outputs[0],
517
+ skip_special_tokens=True
518
+ ).split("<|assistant|>")[-1].strip()
519
 
520
  # Streamlit UI
521
  st.title("πŸ“š AI-Powered Book Analysis System")
 
531
  text = uploaded_file.read().decode()
532
 
533
  if not text.strip():
534
+ st.error("Uploaded file is empty")
535
  st.stop()
536
 
537
  chunks = process_text(text)
 
552
  try:
553
  query_embed = embedder.encode([query])
554
  faiss.normalize_L2(query_embed)
555
+ distances, indices = st.session_state.index.search(query_embed, k=3)
556
 
557
  context = "\n".join([st.session_state.docs[i] for i in indices[0]])
558
  answer = generate_answer(query, context)
559
 
560
  st.subheader("Answer")
561
  st.markdown(f"```\n{answer}\n```")
562
+ st.caption(f"Confidence score: {distances[0][0]:.2f}")
563
 
564
  except Exception as e:
565
  st.error(f"Query failed: {str(e)}")