RJuro commited on
Commit
d3fdae9
·
0 Parent(s):

Reinitialize repository without offending large file

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pdf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+
3
+ dev/
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Pdf Digest
3
+ emoji: 👁
4
+ colorFrom: pink
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import asyncio
4
+ import logging
5
+ import json
6
+ import streamlit as st
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ # Import our existing utility functions
11
+ from utils.file_utils import load_prompt, save_intermediate_output, setup_temp_directories, cleanup_temp_files
12
+ from utils.llm_utils import get_generation_model, async_generate_text, generate_title_reference_and_classification
13
+ from utils.tts_utils import generate_tts_audio
14
+
15
+ # Import the multi‐paper review flow functions
16
+ from utils.review_flow import (
17
+ process_multiple_pdfs,
18
+ generate_final_review_pdf
19
+ )
20
+
21
+ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
22
+ logger = logging.getLogger(__name__)
23
+
24
+ st.set_page_config(page_title="Academic PDF Digester", layout="wide")
25
+
26
+ # ===== SIDEBAR: Login and Mode Selection =====
27
+ if "authenticated" not in st.session_state:
28
+ st.session_state["authenticated"] = False
29
+
30
+ with st.sidebar:
31
+ st.header("User Login")
32
+ if not st.session_state["authenticated"]:
33
+ with st.form(key="login_form"):
34
+ username = st.text_input("Username")
35
+ password = st.text_input("Password", type="password")
36
+ submit_button = st.form_submit_button("Login")
37
+ # Simple authentication using environment variables
38
+ if submit_button:
39
+ expected_username = os.getenv("APP_USERNAME")
40
+ expected_password = os.getenv("APP_PASSWORD")
41
+ if username == expected_username and password == expected_password:
42
+ st.session_state["authenticated"] = True
43
+ st.session_state["username"] = username # Store the username
44
+ st.success("Logged in successfully!")
45
+ logger.debug("User authenticated successfully.")
46
+ else:
47
+ st.error("Invalid credentials")
48
+ logger.debug("Authentication failed for user: %s", username)
49
+ st.stop()
50
+ else:
51
+ st.info("Logged in as: " + st.session_state.get("username", "Unknown User"))
52
+
53
+ st.header("Select Mode")
54
+ mode = st.radio("Choose a mode:", options=["Explore One Publication", "Write a Literature Review"])
55
+
56
+ # ===== MAIN APP =====
57
+ st.title("Academic PDF Digester 😋")
58
+
59
+ st.subheader("Effortlessly analyze and synthesize academic papers")
60
+ st.markdown(
61
+ """
62
+ **Welcome to Academic PDF Digester!**
63
+ This tool uses advanced Large Language Models (LLMs) to automatically extract key information from academic papers,
64
+ generate structured summaries, and even produce downloadable PDF and audio outputs. Whether you are exploring a single publication
65
+ or synthesizing a literature review from multiple papers, our system streamlines your research process.
66
+ """
67
+ )
68
+
69
+ with st.expander("How It Works"):
70
+ st.markdown(
71
+ """
72
+ **Overview of the Functionality:**
73
+
74
+ - **File Processing:**
75
+ Uploaded PDFs are saved locally and then sent to a cloud-based service where a specialized LLM analyzes the content.
76
+
77
+ - **LLM Integration:**
78
+ The LLM extracts key information (such as the core structure, outlines, and insights) and generates structured outputs.
79
+ These outputs are then used to create comparative analyses, draft final reviews, and check for consistency.
80
+
81
+ - **Outputs:**
82
+ The system generates:
83
+ - A **detailed overview** for individual papers.
84
+ - A **comparative literature review** for multiple papers.
85
+ - **Downloadable PDFs** summarizing the findings.
86
+ - **Audio summaries** for quick listening.
87
+
88
+ **How LLMs Are Used:**
89
+ - The tool leverages LLMs to generate conten.
90
+ - For each step (e.g., generating outlines, synthesizing final reviews), the LLM processes the input and returns a coherent narrative.
91
+ - Checks are performed to minimize hallucinations and ensure factual accuracy.
92
+
93
+ This powerful combination of file processing, LLM integration, and smart output synthesis helps you gain insights from academic papers quickly and accurately.
94
+ """
95
+ )
96
+
97
+ if mode == "Explore One Publication":
98
+ st.subheader("Single-Publication Analysis")
99
+
100
+ # Load models for title generation and main analysis
101
+ title_model_name, title_generation_config = get_generation_model("flash")
102
+ default_model_name, default_generation_config = get_generation_model("thinking")
103
+
104
+ uploaded_pdf = st.file_uploader("Upload a PDF", type=["pdf"])
105
+ if uploaded_pdf is not None:
106
+ if uploaded_pdf.size < 5000:
107
+ st.error("Input does not appear to be an academic paper. Please upload a valid academic paper.")
108
+ st.stop()
109
+ st.session_state["uploaded_pdf"] = uploaded_pdf
110
+ st.success("PDF uploaded successfully.")
111
+
112
+ progress_bar = st.progress(0)
113
+
114
+ async def process_single_pdf():
115
+ # Create a temporary directory and save the file locally.
116
+ temp_dir = setup_temp_directories()
117
+ try:
118
+ pdf_basename = os.path.splitext(st.session_state["uploaded_pdf"].name)[0]
119
+ st.session_state["pdf_basename"] = pdf_basename
120
+ temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf")
121
+ with open(temp_pdf_path, "wb") as f:
122
+ f.write(st.session_state["uploaded_pdf"].getbuffer())
123
+ logger.debug("PDF saved locally to %s", temp_pdf_path)
124
+ progress_bar.progress(10)
125
+
126
+ # Upload file to Gemini.
127
+ with st.spinner("Uploading PDF to Gemini..."):
128
+ try:
129
+ pdf_file = upload_to_gemini(temp_pdf_path, mime_type="application/pdf")
130
+ except Exception as e:
131
+ st.error("Error uploading PDF: " + str(e))
132
+ st.stop()
133
+ progress_bar.progress(20)
134
+
135
+ # Wait for file processing.
136
+ with st.spinner("Waiting for file processing..."):
137
+ try:
138
+ wait_for_files_active([pdf_file])
139
+ except Exception as e:
140
+ st.error("Error in file processing: " + str(e))
141
+ st.stop()
142
+ progress_bar.progress(30)
143
+
144
+ # Generate title, APA reference and classification.
145
+ with st.spinner("Generating title, APA reference, and classification..."):
146
+ title_ref = await generate_title_reference_and_classification(
147
+ pdf_file, title_model_name, title_generation_config
148
+ )
149
+ if title_ref.error:
150
+ st.error(title_ref.error)
151
+ st.stop()
152
+ st.session_state["title"] = title_ref.title
153
+ st.session_state["apa_reference"] = title_ref.apa_reference
154
+ st.session_state["classification"] = title_ref.classification
155
+ st.session_state["bullet_list"] = title_ref.bullet_list # Save bullet list
156
+ progress_bar.progress(40)
157
+
158
+ # Load prompts for outline and key insights.
159
+ try:
160
+ outline_prompt = load_prompt("prompts/outline_acad.prompt")
161
+ elements_prompt = load_prompt("prompts/elements.prompt")
162
+ except Exception as e:
163
+ st.error("Error loading prompt files: " + str(e))
164
+ st.stop()
165
+
166
+ # Generate key insights and outline concurrently.
167
+ with st.spinner("Extracting key insights and drafting outline..."):
168
+ task_outline = async_generate_text(
169
+ outline_prompt, pdf_file,
170
+ model_name=default_model_name,
171
+ generation_config=default_generation_config
172
+ )
173
+ task_elements = async_generate_text(
174
+ elements_prompt, pdf_file,
175
+ model_name=default_model_name,
176
+ generation_config=default_generation_config
177
+ )
178
+ outline_acad_output, elements_output = await asyncio.gather(task_outline, task_elements)
179
+ progress_bar.progress(65)
180
+ st.info("Key insights extracted and outline drafted successfully!")
181
+ # Save intermediate outputs.
182
+ save_intermediate_output(outline_acad_output, pdf_basename, "outline")
183
+ save_intermediate_output(elements_output, pdf_basename, "elements")
184
+
185
+ # Generate final overview.
186
+ with st.spinner("Generating final overview..."):
187
+ overview_prompt = load_prompt("prompts/overview.prompt")
188
+ tts_instruction = "Ensure the final overview is TTS-friendly and does not exceed 3000 words."
189
+ combined_overview_prompt = (
190
+ tts_instruction + "\n\nAcademic Outline:\n" + outline_acad_output +
191
+ "\n\nKey Insights:\n" + elements_output + "\n" +
192
+ overview_prompt + "\n" + tts_instruction
193
+ )
194
+ overview_output = await async_generate_text(
195
+ combined_overview_prompt, pdf_file,
196
+ model_name=default_model_name,
197
+ generation_config=default_generation_config
198
+ )
199
+ save_intermediate_output(overview_output, pdf_basename, "overview")
200
+ progress_bar.progress(75)
201
+
202
+ # Generate downloadable PDF.
203
+ from markdown_pdf import MarkdownPdf, Section
204
+ with st.spinner("Generating downloadable PDF..."):
205
+ try:
206
+ pdf_doc = MarkdownPdf(toc_level=2)
207
+ bullet_markdown = "\n".join(f"- {item}" for item in st.session_state.get("bullet_list", []))
208
+ title_and_ref_markdown = (
209
+ f"# {st.session_state['title']}\n\n"
210
+ f"*{st.session_state['apa_reference']}*\n\n"
211
+ "### Key Components:\n"
212
+ f"{bullet_markdown}\n\n"
213
+ )
214
+ pdf_doc.add_section(Section(title_and_ref_markdown, toc=False))
215
+ pdf_doc.add_section(Section(overview_output.strip(), toc=True))
216
+ final_pdf_path = os.path.join("promp_tmp", f"{pdf_basename}_final_output.pdf")
217
+ pdf_doc.save(final_pdf_path)
218
+ with open(final_pdf_path, "rb") as f:
219
+ st.session_state["final_pdf"] = f.read()
220
+ st.session_state["final_text"] = (
221
+ f"# {st.session_state['title']}\n\n"
222
+ f"*{st.session_state['apa_reference']}*\n\n"
223
+ "### Key Components:\n"
224
+ f"{bullet_markdown}\n\n"
225
+ f"{overview_output.strip()}"
226
+ )
227
+ st.success("PDF generated successfully.")
228
+ except Exception as e:
229
+ st.error("Failed to generate PDF: " + str(e))
230
+ st.stop()
231
+ progress_bar.progress(100)
232
+ st.session_state["generated"] = True
233
+ except Exception as e:
234
+ st.error(f"Error during processing: {str(e)}")
235
+ st.stop()
236
+ finally:
237
+ cleanup_temp_files(temp_dir)
238
+
239
+ if st.button("Generate Overview"):
240
+ asyncio.run(process_single_pdf())
241
+
242
+ if st.session_state.get("generated"):
243
+ st.download_button(
244
+ label="Download Final PDF",
245
+ data=st.session_state["final_pdf"],
246
+ file_name="final_output.pdf",
247
+ mime="application/pdf"
248
+ )
249
+
250
+ elif mode == "Write a Literature Review":
251
+ st.subheader("Literature Review Generation")
252
+
253
+ st.markdown(
254
+ "Upload **multiple academic PDFs** to generate a comparative literature review. "
255
+ "You can select more than one file."
256
+ )
257
+
258
+ uploaded_pdfs = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
259
+
260
+ if uploaded_pdfs:
261
+ st.session_state["uploaded_pdfs"] = uploaded_pdfs
262
+ st.success(f"{len(uploaded_pdfs)} PDFs uploaded successfully.")
263
+
264
+ if st.button("Generate Literature Review"):
265
+ with st.spinner("Processing PDFs and generating review..."):
266
+ # Process the multiple PDFs using our review_flow helper
267
+ # This function handles the upload, Gemini processing, structured output extraction,
268
+ # comparative table, outline, final synthesis and even PDF conversion.
269
+ structured_outputs = asyncio.run(process_multiple_pdfs(uploaded_pdfs))
270
+ final_review_text = asyncio.run(generate_final_review_pdf(structured_outputs))
271
+ st.success("Literature review generated successfully!")
272
+ st.text_area("Final Literature Review", final_review_text, height=300)
273
+ # Optionally, let the user download the review as PDF
274
+ review_pdf = st.file_uploader("Download PDF", type=["pdf"])
275
+ with open("final_literature_review.pdf", "rb") as f:
276
+ final_pdf_bytes = f.read()
277
+ st.download_button(
278
+ label="Download Final Literature Review PDF",
279
+ data=final_pdf_bytes,
280
+ file_name="final_literature_review.pdf",
281
+ mime="application/pdf"
282
+ )
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ espeak-ng
prompts/audio.prompt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Create a 3-minute audio summary (approximately 450-500 words) of the provided academic text.
2
+ ONLY output the final text - nothing else.
3
+ The summary should:
4
+
5
+ Start with a clear introduction that establishes the topic's context and significance in 1-2 sentences
6
+ Present 3-4 key findings or arguments that make this work noteworthy, using clear transitions between points
7
+ Include relevant statistics or concrete examples that illustrate the main points, but limit numbers to those essential for understanding
8
+ Maintain an academic yet conversational tone that engages listeners while preserving scholarly credibility
9
+ Use clear sentence structures optimized for text-to-speech, avoiding:
10
+
11
+ Special characters, hashtags, or formatting
12
+ Parenthetical statements
13
+ Complex numbered lists
14
+ Citations or references
15
+ Quotation marks for emphasis
16
+
17
+ End with a brief statement of broader implications or significance
18
+
19
+ Format the text in clear paragraphs with natural transitions. Use "audio-friendly" punctuation (periods, commas) to create appropriate pacing. Aim for sentences of varying length to maintain listener engagement.
20
+
21
+ ONLY output the final text - nothing else. No intros! No notes!
prompts/audio_narrate.prompt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Transform this academic summary paper into a first-person narrative for audio delivery, following these guidelines - output the final TEXT only - nothing else - no intros or notes:
2
+
3
+ Voice:
4
+
5
+ Use clear, direct first-person perspective
6
+ Maintain academic authority while being conversational
7
+ Keep emotional expressions measured and professional
8
+ Use contractions naturally but sparingly
9
+ Avoid overly casual language or slang
10
+
11
+ Structure:
12
+
13
+ Break content into clear speaking segments
14
+ Use brief transitions between topics
15
+ Convert visual references to verbal descriptions
16
+ Start with context, then methods, findings, and implications
17
+ End sections with clear takeaways
18
+
19
+ Technical Content:
20
+
21
+ Present all data and findings accurately
22
+ Explain methods as deliberate choices
23
+ Keep all limitations and caveats
24
+ Convert statistics into clear spoken numbers
25
+ Maintain academic precision while being accessible
26
+
27
+ TTS Optimization:
28
+
29
+ Use standard punctuation for natural pauses
30
+ Keep sentences medium length
31
+ Spell out abbreviations first time
32
+ Break complex ideas into digestible parts
33
+ Use clear paragraph breaks
34
+
35
+ Aim for the tone of an experienced researcher giving a well-structured talk to colleagues - professional but engaging, detailed but clear.
36
+ AVOID REDUNDANCY - do not repeat the same information in different ways.
37
+ Target audio for around 15 Minutes.
38
+ Output should be around 1000 words.
prompts/audio_pres.prompt ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate a Trump-Style Research Presentation - just output the text that Trump would say. Nothing else.
2
+ Do not write things like applause or anything that would be a description of the speech. Just the text that Trump would say.
3
+
4
+ You are Donald Trump presenting groundbreaking academic research to your supporters. Transform the provided research paper into a compelling speech that captures Trump's distinctive communication style while accurately conveying the key findings.
5
+
6
+ ## Speech Style Elements
7
+ - Use simple, declarative sentences
8
+ - Employ frequent superlatives ("tremendous," "incredible," "the best")
9
+ - Add personal commentary ("Believe me, folks," "Nobody knew this before")
10
+ - Include strategic repetition
11
+ - Insert rhetorical questions
12
+ - Break up complex ideas into digestible chunks
13
+ - Use informal, conversational language
14
+ - Add improvisational tangents that circle back to main points
15
+ - Reference "many people saying" or "everybody knows"
16
+ - Contrast with competitors/opposition ("They didn't want you to know this")
17
+
18
+ ## Content Structure
19
+
20
+ 1. Opening (Attention Grab)
21
+ - State how huge/important this research is
22
+ - Mention how nobody else is talking about this
23
+ - Reference the "tremendous people" who did this research
24
+ - Take partial credit for bringing this to light
25
+
26
+ 2. Simplified Findings
27
+ - Present each finding as a "tremendous discovery"
28
+ - Break down complex statistics into simple percentages or general terms
29
+ - Use analogies that relate to business or common experiences
30
+ - Add personal anecdotes that relate to the research
31
+ - Frame technical concepts as "very complicated stuff, but I understand it perfectly"
32
+
33
+ 3. Impact Statement
34
+ - Explain why this matters to "the American people"
35
+ - Connect findings to current events or popular concerns
36
+ - Suggest how this proves your previous statements/positions
37
+ - Include calls to action or policy implications
38
+
39
+ 4. Memorable Conclusion
40
+ - Summarize key points with catchphrases
41
+ - End with a strong, memorable statement
42
+ - Reference "making America great again" if relevant
43
+ - Add a forward-looking statement about winning/success
44
+
45
+ ## Specific Language Patterns
46
+
47
+ Use these Trump-specific speech patterns:
48
+ - "Folks, let me tell you something..."
49
+ - "Nobody knew [subject] could be so complicated"
50
+ - "We have the best [researchers/scientists/experts], don't we?"
51
+ - "People are saying this is the biggest discovery, maybe ever"
52
+ - "The fake [media/experts/establishment] won't tell you this"
53
+ - "I know more about [subject] than anybody"
54
+ - "Believe me, this is very, very important"
55
+
56
+ ## Format Guidelines
57
+ - Length: 500-600 words
58
+ - Paragraph length: 2-4 sentences maximum
59
+ - Use frequent paragraph breaks
60
+ - Include natural pauses for audience reaction
61
+ - Add all-caps for emphasis on key words
62
+ - Use ellipses to indicate pauses
63
+ - Add parenthetical audience reactions [Applause] [Cheers]
64
+
65
+ ## Essential Elements to Preserve
66
+ - Keep core research findings accurate
67
+ - Maintain the significance of the research
68
+ - Present data in simplified but truthful ways
69
+ - Preserve important causal relationships
70
+ - Include actual statistics (but rounded/simplified)
71
+
72
+
73
+ ## Example Transition Phrases:
74
+ - "Now, this is incredible, folks..."
75
+ - "Here's something they don't want you to know..."
76
+ - "Let me tell you what we found..."
77
+ - "Nobody's ever seen anything like this..."
78
+ - "The numbers are unbelievable..."
79
+
80
+ Remember: The goal is to make complex research engaging and memorable while maintaining its essential truth, just delivered in Trump's distinctive style.
81
+ Finally. Output thins in TTS friendly format. new line wherever a short break is needed for instance - but also other TTS friendly output styles
82
+ Only output the text that Trump would say. Nothing else. No intros, notes, statements, descriptions.
prompts/elements.prompt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Write an analysis of the provided document covering these key areas:
2
+
3
+ Core Research Elements:
4
+
5
+ Primary research questions and objectives
6
+ Key findings and conclusions
7
+ Theoretical frameworks or models introduced
8
+ Most significant statistical or empirical results
9
+
10
+ Hidden Insights & Nuances:
11
+
12
+ Unexpected or counterintuitive findings
13
+ Interesting tensions or paradoxes in the data
14
+ Subtle patterns or relationships not highlighted in the abstract
15
+ Compelling real-world examples or illustrative quotes
16
+ Secondary findings that deserve more attention
17
+
18
+ Methodological Strengths:
19
+
20
+ Notable aspects of study design
21
+ Sample characteristics or data collection methods
22
+ Novel analytical approaches
23
+ Key limitations and how they were addressed
24
+ Methodological innovations
25
+
26
+ Practical & Theoretical Implications:
27
+
28
+ Real-world applications of the findings
29
+ Design or policy recommendations
30
+ Implications for theory development
31
+ Suggested changes to current practices
32
+ Training or educational implications
33
+
34
+ Critical Connections:
35
+
36
+ Links to broader debates in the field
37
+ Relationships to previous research
38
+ Gaps or questions raised for future research
39
+ Cross-disciplinary implications
40
+ Historical or contextual significance
41
+
42
+ Focus on distilling and synthesizing the most important aspects within each section. Include specific details, statistics, and quotes where relevant while maintaining a clear high-level perspective.
prompts/elements_review.prompt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Research Paper Analysis Framework
2
+
3
+ ## Bibliographic Information this paper
4
+ - Paper Title
5
+ - Author(s)
6
+ - Publication Year
7
+ - Journal/Conference
8
+ - DOI/Identifier
9
+ - Citation Impact (if available)
10
+ - Citation count
11
+ - Notable citing works
12
+
13
+ ## Core Research Elements
14
+ - Primary research questions and objectives
15
+ - Key findings and conclusions
16
+ - Theoretical frameworks or models introduced
17
+ - Most significant statistical or empirical results
18
+
19
+ ## Hidden Insights & Nuances
20
+ - Unexpected or counterintuitive findings
21
+ - Interesting tensions or paradoxes in the data
22
+ - Subtle patterns or relationships not highlighted in the abstract
23
+ - Compelling real-world examples or illustrative quotes
24
+ - Secondary findings that deserve more attention
25
+
26
+ ## Methodological Strengths
27
+ - Notable aspects of study design
28
+ - Sample characteristics or data collection methods
29
+ - Novel analytical approaches
30
+ - Key limitations and how they were addressed
31
+ - Methodological innovations
32
+
33
+ ## Practical & Theoretical Implications
34
+ - Real-world applications of the findings
35
+ - Design or policy recommendations
36
+ - Implications for theory development
37
+ - Suggested changes to current practices
38
+ - Training or educational implications
39
+
40
+ ## Critical Connections
41
+ - Links to broader debates in the field
42
+ - Relationships to previous research
43
+ - Gaps or questions raised for future research
44
+ - Cross-disciplinary implications
45
+ - Historical or contextual significance
46
+
47
+ ## Literature Review Context
48
+ - Position within current literature
49
+ - Key debates or controversies addressed
50
+ - Theoretical lineage
51
+ - Methodological traditions
52
+ - Research gaps addressed
53
+
54
+ ## References
55
+
56
+ ### Core Theoretical References
57
+ [Full APA citations for works that provide the theoretical foundation]
58
+
59
+ ### Key Methodological References
60
+ [Full APA citations for works that influenced or established the methodological approach]
61
+
62
+ ### Contemporary Related Works
63
+ [Full APA citations for recent relevant research in the same area]
64
+
65
+ ### Critical Opposing Views
66
+ [Full APA citations for works presenting alternative perspectives or critiques]
67
+
68
+ Instructions:
69
+ 1. Focus on distilling and synthesizing the most important aspects within each section. Include specific details, statistics, and quotes where relevant while maintaining a clear high-level perspective.
70
+ 2. For each major point or finding, identify and mark the relevant supporting references that should be included in the reference sections.
71
+ 3. Ensure all citations in the text correspond to full APA format references in the appropriate reference category.
72
+ 4. Format all references according to current APA guidelines (7th edition).
73
+ 5. When referencing works in the text, include page numbers for direct quotes and specific findings to facilitate later citation.
prompts/outline.prompt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Create a ~3000 word scholarly overview that analyzes the provided research paper and its significance. The overview should maintain academic rigor while being engaging and accessible to scholars across disciplines. Structure the analysis as follows:
2
+
3
+ Introduction and Context
4
+
5
+
6
+ Frame the research's theoretical and practical significance
7
+ Present core research questions and objectives
8
+ Situate the work within broader academic discourse
9
+
10
+
11
+ Analysis of Key Findings
12
+
13
+
14
+ Synthesize primary empirical results and their implications
15
+ Examine unexpected discoveries and nuances
16
+ Integrate relevant examples and evidence from the source material
17
+ Balance quantitative findings with qualitative insights
18
+
19
+
20
+ Methodological and Theoretical Contributions
21
+
22
+
23
+ Analyze research design choices and their effectiveness
24
+ Examine theoretical frameworks and their application
25
+ Highlight methodological innovations
26
+ Consider limitations and constraints
27
+
28
+
29
+ Broader Implications
30
+
31
+
32
+ Discuss theoretical advances and practical applications
33
+ Explore cross-disciplinary relevance
34
+ Consider future research directions
35
+ Analyze societal and organizational implications
36
+
37
+ Style Guidelines:
38
+
39
+ Maintain scholarly tone while ensuring accessibility
40
+ Balance technical precision with engaging narrative
41
+ Use source material examples to illustrate key points
42
+ Present clear logical progression
43
+ Integrate insights from both the paper and supplementary analysis
44
+ Preserve academic rigor while making complex concepts accessible
45
+
46
+ Please draw from both the original paper and supplementary materials to create a comprehensive yet engaging analysis suitable for an academic audience.
47
+ The overview should illuminate both the explicit findings and deeper implications while maintaining scholarly standards and accessibility.
prompts/outline_acad.prompt ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Given the provided document create a detailed - maximum x.x structure depth - scholarly outline for a structured analysis suitable for academic audiences. The outline should maintain rigorous academic standards while enabling clear communication across domains.
2
+
3
+ Framework for Outline Analysis (only if applicatble given the provided document):
4
+
5
+ 1. Research Context & Significance
6
+ - Position within broader theoretical landscape
7
+ - Key research gaps addressed
8
+ - Cross-disciplinary relevance
9
+ - Historical context and evolution of research area
10
+ - Theoretical frameworks engaged
11
+
12
+ 2. Core Research Elements
13
+ - Research questions and hypotheses
14
+ - Methodological approach with rationale
15
+ - Analytical frameworks employed
16
+ - Key variables and constructs
17
+ - Critical assumptions and boundary conditions
18
+ - Notable methodological innovations
19
+
20
+ 3. Results & Evidence
21
+ - Primary empirical findings
22
+ - Statistical significance and effect sizes
23
+ - Qualitative insights
24
+ - Robustness checks and alternative explanations
25
+ - Limitations and constraints
26
+ - Unexpected or contradictory findings
27
+
28
+ 4. Theoretical Contributions
29
+ - Advances to existing theory
30
+ - New theoretical propositions
31
+ - Integration with established frameworks
32
+ - Theoretical tensions identified
33
+ - Cross-domain theoretical implications
34
+ - Areas of theoretical uncertainty
35
+
36
+ 5. Methodological Insights
37
+ - Novel methodological approaches
38
+ - Analytical innovations
39
+ - Data collection strategies
40
+ - Validation techniques
41
+ - Replicability considerations
42
+ - Methodological limitations and mitigation strategies
43
+
44
+ 6. Practical & Research Implications
45
+ - Applications for research design
46
+ - Future research directions
47
+ - Cross-disciplinary research opportunities
48
+ - Policy implications
49
+ - Practical applications
50
+ - Educational/training implications
51
+
52
+ 7. Critical Analysis
53
+ - Strengths and limitations
54
+ - Alternative interpretations
55
+ - Generalizability boundaries
56
+ - Integration with existing literature
57
+ - Unresolved questions
58
+ - Meta-theoretical considerations
59
+
60
+ The outline should:
61
+ - Maintain scholarly rigor and precision
62
+ - Define domain-specific terms when first used
63
+ - Highlight methodological and theoretical innovations
64
+ - Include specific examples and evidence
65
+ - Present clear logical progression
66
+ - Identify cross-disciplinary connections
67
+ - Balance depth with accessibility
68
+
69
+ Please provide section headers with specific elements to include under each, focusing on academic audience needs while enabling cross-domain understanding.
prompts/overview.prompt ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!!YOUR JOB!!!
2
+
3
+ Create a ~3000! word analytical overview of the provided document that maintains academic rigor while being engaging and accessible to scholars across disciplines. Follow this structure:
4
+ - Use the provided analysis on Core Research Elements and Hidden Insights & Nuances etc.
5
+ - Use also the provided outline as input
6
+ - Make sure to avoid redundancies
7
+ - for structure in the markdown do not use # just use ** to format headers or emphasize.
8
+ - Start with an introductory paragraph that outlines what has been done and the main findings - an academic hook. Then introduce what will be covered in the overview.
9
+ - Be flexible with the structure and adapt it to the content of the document.
10
+ - Include interesting and relevant quotes/examples from the document.
11
+ - Use measured and academic language.
12
+
13
+ Frame the research context and significance by:
14
+
15
+
16
+ Identifying the core research problem and its broader theoretical implications
17
+ Situating the work within existing academic discourse
18
+ Articulating key research questions and objectives
19
+ Highlighting theoretical and methodological innovations
20
+
21
+
22
+ Present the key findings and insights by:
23
+
24
+
25
+ Synthesizing primary results and their significance
26
+ Examining unexpected or counterintuitive discoveries
27
+ Analyzing methodological contributions
28
+ Including relevant statistical evidence and empirical data
29
+ Incorporating illustrative examples that demonstrate key points
30
+
31
+
32
+ Analyze methodological approach through:
33
+
34
+
35
+ Research design choices and rationale
36
+ Sample characteristics and data collection methods
37
+ Analytical frameworks employed
38
+ Treatment of limitations and constraints
39
+
40
+
41
+ Explore theoretical and practical implications by:
42
+
43
+
44
+ Connecting findings to broader academic discourse
45
+ Identifying contributions to theory development
46
+ Discussing methodological innovations
47
+ Examining cross-disciplinary relevance
48
+ Considering future research directions
49
+
50
+
51
+ Critically examine the work's significance by:
52
+
53
+
54
+ Analyzing strengths and limitations
55
+ Situating findings within existing literature
56
+ Identifying unresolved questions
57
+ Discussing broader implications for theory and practice
58
+
59
+ Style guidelines:
60
+
61
+ Maintain scholarly tone while ensuring accessibility
62
+ Use precise academic language but explain technical terms
63
+ Include specific examples and evidence to illustrate points
64
+ Present clear logical progression of ideas
65
+ Balance depth of analysis with engaging narrative flow
66
+ Incorporate relevant quotes and examples from the source material
67
+
68
+ The overview should preserve academic rigor while making complex research accessible to scholars across disciplines. Focus on analytical depth while maintaining reader engagement through clear structure and illustrative examples.
69
+ Please use the provided research overview and blog post as reference for content while adapting the style for an academic audience.
70
+
71
+
72
+
73
+ For avoiding inventions!
74
+
75
+
76
+ When summarizing research papers, never include examples, analogies, or historical references that aren't explicitly mentioned in the original paper
77
+ All findings, statistics, quotations and specific details must come directly from the source material
78
+ If elaborating on implications or recommendations, stay strictly within what was explicitly discussed in the paper
79
+ When uncertain about whether something was mentioned in the original, err on the side of omitting it
80
+
81
+
82
+ For making the language more accessible!
83
+
84
+
85
+ Use clear, direct language that a general educated audience would understand
86
+ Avoid unnecessary academic jargon and complex sentence structures
87
+ Avoid bullets to be suitable for TTS i.e. Reads naturally when spoken
88
+ Replace phrases like "cognitive stewardship" with simpler terms unless they're specifically used in the original paper
89
+ Break up long sentences into shorter ones
90
+ Use active voice where possible
91
+ Express ideas in plain language while maintaining accuracy
92
+
93
+ The goal should be a summary that accurately captures the research while being readable and engaging for a broader audience.
94
+ For example, instead of:
95
+ "The empirical heart of the study beats with a mixed-methods approach, combining quantitative rigor with qualitative depth."
96
+ Write:
97
+ "The study used both numbers (surveys) and detailed examples (interviews) to understand how people use AI."
98
+
99
+ Remember ~3000! words max - be thorough but selective. Find a good balance.
100
+ Only output the final overview text. No additional intros, notes, statements.
prompts/papers_outline.prompt ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Analyze the above structured outputs from academic papers to create a detailed comparative outline for a literature review. Follow these specific requirements:
2
+
3
+ COMPARATIVE DIMENSIONS:
4
+ Systematically analyze and compare the papers across these key dimensions:
5
+
6
+
7
+ Theoretical Frameworks & Models
8
+ Methodological Approaches
9
+ Research Questions & Objectives
10
+ Key Findings & Conclusions
11
+ Practical Implications
12
+ Knowledge Gaps & Future Directions
13
+
14
+
15
+ ANALYSIS REQUIREMENTS:
16
+ For each dimension:
17
+
18
+
19
+ Identify areas of consensus and disagreement
20
+ Note methodological similarities and differences
21
+ Highlight complementary or contradictory findings
22
+ Map theoretical connections and divergences
23
+ Track evolution of ideas across papers
24
+ Document shared and unique contributions
25
+
26
+
27
+ OUTLINE STRUCTURE:
28
+ Generate a detailed outline that best fits the domain and content of the papers. The structure should:
29
+
30
+
31
+ Use clear hierarchical organization
32
+ Reflect the natural themes and patterns in the literature
33
+ Adapt to discipline-specific conventions
34
+ Create logical flow between sections
35
+ Allow for flexible categorization of findings
36
+
37
+
38
+ SPECIAL CONSIDERATIONS:
39
+
40
+
41
+ Note chronological developments in the field
42
+ Identify emerging trends or patterns
43
+ Highlight innovative approaches or findings
44
+ Document interconnections between papers
45
+ Track citation patterns and influences
46
+
47
+
48
+ OUTPUT REQUIREMENTS:
49
+
50
+
51
+ Use clear, hierarchical structure
52
+ Include specific examples from papers
53
+ Note paper-specific identifiers for each point
54
+ Maintain balanced coverage of all papers
55
+ Highlight key quotes or statistics where relevant
56
+
57
+ FINAL GUIDELINES:
58
+
59
+ Ensure each major point references specific papers
60
+ Identify clear themes and patterns across papers
61
+ Note both similarities and differences explicitly
62
+ Maintain analytical depth while ensuring clarity
63
+ Create logical connections between sections
prompts/papers_synthesis.prompt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Build an integrated literature review synthesis
2
+
3
+ Using the paper summaries, comparative table, and detailed outline provided above, generate a focused literature review (2500 words) that synthesizes these materials. The structure should follow the themes and organization established in the outline while adhering to domain-specific conventions.
4
+
5
+ 1. DOCUMENT STRUCTURE:
6
+ - Title (specific to the domain and topic)
7
+ - Introduction (scope and purpose)
8
+ - Sections as outlined in the analysis above
9
+ - Comparative overview (featuring the provided table)
10
+ - Conclusions and implications
11
+ - References (Harvard style)
12
+
13
+ 2. FORMATTING REQUIREMENTS:
14
+ - Use markdown formatting
15
+ - Include clear section headers following the outline
16
+ - Present comparative table where most relevant to the analysis
17
+ - In the table make sure to reference papers with proper academic citations not filenames.
18
+ - Maximum 4 columns in tables
19
+ - Place paper identifiers in a full row to save space
20
+ - Standard markdown without additional ``` markers
21
+ - Add --- before and after the table for clear separation. When adding this before the table also add a new line additional before the table to ensure proper formatting.
22
+
23
+ 3. CONTENT INTEGRATION:
24
+ - Structure narrative according to the themes identified in the outline
25
+ - Incorporate comparative table to support key arguments
26
+ - Build on the patterns and relationships already identified
27
+ - Maintain clear connections between outlined themes
28
+ - Support arguments with specific references from the analysis
29
+ - Ensure smooth transitions between established themes
30
+ - Develop insights from the comparative analysis
31
+ - Conclude based on the synthesized findings
32
+ - Dicuss in detail how summarized focal papers relate to relevant broader literature - and reference that - also in the filal reference list
33
+
34
+ 4. TECHNICAL SPECIFICATIONS:
35
+ - Length: 2500 words (excluding table and references)
36
+ - Academic language appropriate to the discipline
37
+ - APA style citations
38
+ - Complete reference list
39
+ - Refer to papers with proper academic citations, not filenames
40
+ - Adapt style and emphasis to disciplinary norms
41
+
42
+ Papers, Table, and Outline Analysis provided above.
43
+
44
+ FINAL GUIDELINES:
45
+ 1. Follow the structural themes established in the outline
46
+ 2. Integrate comparative findings from the table
47
+ 3. Maintain disciplinary conventions and focus
48
+ 4. Emphasize patterns identified in the analysis
49
+ 5. Present synthesis without additional instructions
50
+ 6. Adapt depth and emphasis based on the domain
51
+ 7. The output is turned into PDF later with makdownn_pdf package. It is important that the output is a markdown with proper hierarchies.
prompts/papers_table.prompt ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+ # Build a dynamic comparative table outline
5
+
6
+
7
+ | Paper ID | Aspect 1 | Aspect 2 | Aspect 3 | Aspect 4 |
8
+ |----------|----------|----------|----------|----------|
9
+ | Paper 1 | | | | |
10
+ | Paper 2 | | | | |
11
+
12
+
13
+ Analyze the structured outputs from the above papers and create a comparative table that best represents their key similarities and differences. Follow these guidelines:
14
+
15
+ 1. TABLE DESIGN:
16
+ - Determine the most appropriate columns (max 4 aspects) based on the common elements and key differences in the papers
17
+ - Design a table structure that highlights the most significant comparative aspects
18
+ - Create column headers that reflect the actual content patterns found across papers
19
+
20
+ 2. COMPARISON APPROACH:
21
+ - Identify recurring themes and patterns across papers
22
+ - Extract comparable elements that appear in multiple papers
23
+ - Note unique aspects that might warrant special columns or annotations
24
+ - Consider both explicit similarities and implicit connections
25
+
26
+ 3. CONTENT ORGANIZATION:
27
+ - Structure information to facilitate meaningful comparisons
28
+ - Ensure balanced representation of each paper
29
+ - Maintain appropriate level of detail for each element
30
+ - Include relevant context where necessary
31
+
32
+ 4. FORMAT REQUIREMENTS:
33
+ - Use markdown table syntax
34
+ - Present information clearly and consistently
35
+ - Include paper identifiers for reference
36
+ - Use formatting that enhances readability and comparison
37
+
38
+ ANALYSIS GUIDELINES:
39
+ 1. First analyze the papers to identify the most relevant comparative dimensions (Do not output this analysis)
40
+ 2. Design a table structure that best captures these dimensions
41
+ 3. Ensure the table structure serves the comparison purpose effectively
42
+ 4. Include any notes about patterns or relationships that emerge
43
+ 5. Consider adding brief explanatory text if the table structure needs clarification
44
+ 6. output only the table and brief notes if necessary
45
+
46
+ Based on your analysis of the papers, please generate a comparative table with a structure and columns that best represent the key points of comparison.
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ google-genai
2
+ markdown-pdf
3
+ kokoro
4
+ soundfile
5
+ python-dotenv
6
+ nest_asyncio
7
+ pikepdf
utils/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .llm_utils import (
2
+ get_generation_model,
3
+ async_generate_text,
4
+ generate_title_reference_and_classification,
5
+ upload_to_gemini,
6
+ wait_for_files_active
7
+ )
8
+
9
+ from .file_utils import (
10
+ load_prompt,
11
+ save_intermediate_output,
12
+ setup_temp_directories,
13
+ cleanup_temp_files
14
+ )
15
+
16
+ from .review_flow import (
17
+ process_single_pdf,
18
+ process_multiple_pdfs,
19
+ generate_final_review_pdf,
20
+ create_comparative_table_prompt)
21
+
22
+ from .tts_utils import generate_tts_audio
utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (973 Bytes). View file
 
utils/__pycache__/file_utils.cpython-311.pyc ADDED
Binary file (3.05 kB). View file
 
utils/__pycache__/llm_utils.cpython-311.pyc ADDED
Binary file (8.98 kB). View file
 
utils/__pycache__/markdown_utils.cpython-311.pyc ADDED
Binary file (2.71 kB). View file
 
utils/__pycache__/review_flow.cpython-311.pyc ADDED
Binary file (11.4 kB). View file
 
utils/__pycache__/tts_utils.cpython-311.pyc ADDED
Binary file (2.81 kB). View file
 
utils/file_utils.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # file_utils.py
2
+ import os
3
+ import tempfile
4
+ import shutil
5
+ import logging
6
+ import streamlit as st
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ @st.cache_data
11
+ def load_prompt(filepath):
12
+ with open(filepath, "r", encoding="utf-8") as f:
13
+ data = f.read()
14
+ logger.debug("Loaded prompt from %s (length: %d)", filepath, len(data))
15
+ return data
16
+
17
+ def save_intermediate_output(content, pdf_basename, suffix):
18
+ file_path = os.path.join("promp_tmp", f"{pdf_basename}_{suffix}.txt")
19
+ with open(file_path, "w", encoding="utf-8") as f:
20
+ f.write(content)
21
+ logger.debug("Saved intermediate output to %s", file_path)
22
+
23
+ def setup_temp_directories():
24
+ if not os.path.exists("promp_tmp"):
25
+ os.makedirs("promp_tmp")
26
+ logger.debug("Created directory 'promp_tmp'.")
27
+ temp_dir = tempfile.mkdtemp(prefix="pdf_digester_")
28
+ logger.debug(f"Created temporary directory: {temp_dir}")
29
+ return temp_dir
30
+
31
+ def cleanup_temp_files(temp_dir):
32
+ try:
33
+ shutil.rmtree(temp_dir)
34
+ logger.debug(f"Cleaned up temporary directory: {temp_dir}")
35
+ except Exception as e:
36
+ logger.error(f"Failed to cleanup temporary directory: {e}")
utils/llm_utils.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_utils.py
2
+ import os
3
+ import time
4
+ import asyncio
5
+ import json
6
+ import logging
7
+ import streamlit as st
8
+
9
+ from google import genai
10
+ from google.genai import types
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Initialize the Gemini client using the new SDK
15
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
16
+
17
+ def get_generation_model(model_type: str):
18
+ if model_type == "flash":
19
+ model_name = "gemini-2.0-flash"
20
+ else:
21
+ model_name = "gemini-2.0-flash-thinking-exp-01-21"
22
+ generation_config = types.GenerateContentConfig(
23
+ temperature=0.7,
24
+ top_p=0.95,
25
+ top_k=64,
26
+ max_output_tokens=65536,
27
+ response_mime_type="text/plain",
28
+ )
29
+ return model_name, generation_config
30
+
31
+ async def async_generate_text(prompt, pdf_file=None, model_name=None, generation_config=None):
32
+ contents = [pdf_file, prompt] if pdf_file else prompt
33
+ while True:
34
+ try:
35
+ st.toast("Sending prompt to the model...")
36
+ response = await client.aio.models.generate_content(
37
+ model=model_name,
38
+ contents=contents,
39
+ config=generation_config,
40
+ )
41
+ st.toast("Received response from the model.")
42
+ logger.debug("Generated text for prompt. Length: %d", len(response.text))
43
+ return response.text
44
+ except Exception as e:
45
+ logger.exception("Error during asynchronous LLM API call:")
46
+ st.toast("Error during asynchronous LLM API call: " + str(e))
47
+ await asyncio.sleep(30)
48
+
49
+ def clean_json_response(response_text: str) -> str:
50
+ stripped = response_text.strip()
51
+ if stripped.startswith("```"):
52
+ lines = stripped.splitlines()
53
+ if lines[0].strip().startswith("```"):
54
+ lines = lines[1:]
55
+ if lines and lines[-1].strip() == "```":
56
+ lines = lines[:-1]
57
+ return "\n".join(lines).strip()
58
+ return response_text
59
+
60
+ class TitleReference:
61
+ def __init__(self, title=None, apa_reference=None, classification=None, bullet_list=None, error=None):
62
+ self.title = title
63
+ self.apa_reference = apa_reference
64
+ self.classification = classification
65
+ self.bullet_list = bullet_list or []
66
+ self.error = error
67
+
68
+ async def generate_title_reference_and_classification(pdf_file, title_model_name, title_generation_config):
69
+ title_prompt = (
70
+ "Analyze the uploaded document and determine if it is a valid academic article. "
71
+ "If it is a valid academic article, generate a concise and engaging title, an APA formatted reference, and classify the paper as 'Good academic paper'. "
72
+ "Also, generate a bullet list for the following items: context, method, theory, main findings. "
73
+ "If it is not a valid academic article (for example, if it is too short or just a title page), "
74
+ "classify it as 'Not a valid academic paper' and return an 'error' key with an appropriate message. "
75
+ "Output the result strictly in JSON format with keys 'title', 'apa_reference', 'classification', and 'bullet_list'. "
76
+ "The 'bullet_list' value should be an array of strings. Do not include any extra commentary."
77
+ )
78
+ response_text = await async_generate_text(
79
+ title_prompt,
80
+ pdf_file,
81
+ model_name=title_model_name,
82
+ generation_config=title_generation_config
83
+ )
84
+ logger.debug("Title/Reference generation response: %s", response_text)
85
+ cleaned_response = clean_json_response(response_text)
86
+ logger.debug("Cleaned Title/Reference JSON: %s", cleaned_response)
87
+ try:
88
+ data = json.loads(cleaned_response)
89
+ except Exception as e:
90
+ logger.exception("Invalid JSON returned: %s", e)
91
+ raise Exception("Invalid JSON returned: " + str(e))
92
+
93
+ if "error" in data:
94
+ return TitleReference(error=data["error"])
95
+ else:
96
+ required_keys = ["title", "apa_reference", "classification", "bullet_list"]
97
+ if any(key not in data for key in required_keys):
98
+ raise Exception("Expected keys 'title', 'apa_reference', 'classification', and 'bullet_list' not found in response.")
99
+ return TitleReference(
100
+ title=data["title"],
101
+ apa_reference=data["apa_reference"],
102
+ classification=data["classification"],
103
+ bullet_list=data["bullet_list"]
104
+ )
105
+
106
+
107
+ # Add these functions so they can be imported elsewhere
108
+ def upload_to_gemini(file_path, mime_type=None):
109
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
110
+ file = client.files.upload(file=file_path)
111
+ st.toast(f"Uploaded file '{file.display_name}' as: {file.uri}")
112
+ logger.debug("Uploaded file: %s with URI: %s", file.display_name, file.uri)
113
+ return file
114
+
115
+ def wait_for_files_active(files):
116
+ st.toast("Waiting for file processing...")
117
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
118
+ for file in files:
119
+ current_file = client.files.get(name=file.name)
120
+ logger.debug("Initial state for file %s: %s", file.name, current_file.state.name)
121
+ while current_file.state.name == "PROCESSING":
122
+ time.sleep(10)
123
+ current_file = client.files.get(name=file.name)
124
+ logger.debug("Polling file %s, state: %s", file.name, current_file.state.name)
125
+ if current_file.state.name != "ACTIVE":
126
+ error_msg = f"File {current_file.name} failed to process, state: {current_file.state.name}"
127
+ logger.error(error_msg)
128
+ raise Exception(error_msg)
129
+ st.toast("All files processed and ready.")
130
+ logger.debug("All files are active.")
utils/markdown_utils.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # markdown_utils.py
2
+
3
+ import re
4
+
5
+ def robust_clean_markdown(text):
6
+ """
7
+ Cleans markdown text by removing code fences, normalizing headings, and removing extra blank lines.
8
+ """
9
+ # Remove code fences with optional language specifiers.
10
+ text = re.sub(r"```(?:\w+)?\n", "", text)
11
+ text = re.sub(r"\n```", "", text)
12
+
13
+ # Normalize heading formats: ensure exactly one space after '#' symbols.
14
+ def fix_heading(match):
15
+ hashes = match.group(1)
16
+ title = match.group(2).strip()
17
+ return f"{hashes} {title}"
18
+ text = re.sub(r"^(#{1,6})\s*(.*)$", fix_heading, text, flags=re.MULTILINE)
19
+
20
+ # Remove extra blank lines.
21
+ text = re.sub(r'\n\s*\n', '\n\n', text)
22
+ return text.strip()
23
+
24
+ def normalize_heading_levels(text):
25
+ """
26
+ Adjusts all heading levels so that the highest-level (smallest number of '#' characters)
27
+ heading becomes level 1. For example, if the smallest heading in the document is '###',
28
+ all headings will be promoted by 2 levels.
29
+ """
30
+ # Find all heading levels in the text.
31
+ heading_levels = [len(match.group(1)) for match in re.finditer(r"^(#{1,6})\s", text, flags=re.MULTILINE)]
32
+ if heading_levels:
33
+ min_level = min(heading_levels)
34
+ # Only adjust if the minimum level is greater than 1.
35
+ if min_level > 1:
36
+ def adjust_heading(match):
37
+ current_level = len(match.group(1))
38
+ new_level = current_level - (min_level - 1)
39
+ return "#" * new_level + " "
40
+ text = re.sub(r"^(#{1,6})\s", adjust_heading, text, flags=re.MULTILINE)
41
+ return text
utils/review_flow.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import asyncio
4
+ import logging
5
+ import streamlit as st
6
+ from markdown_pdf import MarkdownPdf, Section
7
+ from utils.file_utils import load_prompt, save_intermediate_output
8
+ from utils.llm_utils import get_generation_model, async_generate_text, upload_to_gemini, wait_for_files_active
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ # Get model configuration (using the "thinking" variant)
13
+ default_model_name, default_generation_config = get_generation_model("thinking")
14
+
15
+ def create_comparative_table_prompt(structured_outputs, table_base_prompt):
16
+ """
17
+ Create a prompt to generate a comparative table.
18
+ This internal prompt is not shown to the user.
19
+ """
20
+ prompt = "Structured Outputs from the PDF Papers:\n"
21
+ for paper_id, content in structured_outputs.items():
22
+ prompt += f"\nPaper: {paper_id}\n-------\n{content}\n"
23
+ prompt += "\n" + table_base_prompt + "\n"
24
+ return prompt
25
+
26
+ async def generate_comparative_table(structured_outputs):
27
+ """
28
+ Generate the comparative table (hidden from the user).
29
+ """
30
+ papers_table_prompt_path = os.path.join("prompts", "papers_table.prompt")
31
+ table_base_prompt = load_prompt(papers_table_prompt_path)
32
+ table_prompt = create_comparative_table_prompt(structured_outputs, table_base_prompt)
33
+ logger.info("Generating dynamic comparative table...")
34
+ table_output = await async_generate_text(
35
+ table_prompt,
36
+ model_name=default_model_name,
37
+ generation_config=default_generation_config
38
+ )
39
+ return table_output
40
+
41
+ async def process_single_pdf(file_obj, elements_prompt):
42
+ """
43
+ Process a single PDF: save it locally, upload it, wait for processing,
44
+ and extract its structured output.
45
+ """
46
+ pdf_basename = file_obj.name
47
+ temp_pdf_path = os.path.join("promp_tmp", pdf_basename)
48
+ with open(temp_pdf_path, "wb") as f:
49
+ f.write(file_obj.getbuffer())
50
+
51
+ st.toast(f"Uploading and processing {pdf_basename}...")
52
+ logger.info(f"Processing {pdf_basename}...")
53
+
54
+ # Upload the file and wait until it's active.
55
+ uploaded_file = upload_to_gemini(temp_pdf_path, mime_type="application/pdf")
56
+ wait_for_files_active([uploaded_file])
57
+
58
+ st.toast(f"Extracting content from {pdf_basename}...")
59
+ result = await async_generate_text(
60
+ elements_prompt,
61
+ pdf_file=uploaded_file, # NOTE: using 'pdf_file' to match the expected parameter
62
+ model_name=default_model_name,
63
+ generation_config=default_generation_config
64
+ )
65
+ logger.info(f"Completed extraction for {pdf_basename}")
66
+ return pdf_basename, result
67
+
68
+ async def process_multiple_pdfs(uploaded_files):
69
+ """
70
+ Process multiple PDFs concurrently and return a dictionary mapping filenames
71
+ to their structured outputs.
72
+ Raises an exception if fewer than 2 files are provided.
73
+ """
74
+ if len(uploaded_files) < 2:
75
+ raise Exception("Please provide at least two PDF files for review.")
76
+ elements_prompt_path = os.path.join("prompts", "elements_review.prompt")
77
+ elements_prompt = load_prompt(elements_prompt_path)
78
+
79
+ tasks = []
80
+ for file_obj in uploaded_files:
81
+ tasks.append(asyncio.create_task(process_single_pdf(file_obj, elements_prompt)))
82
+ await asyncio.sleep(1) # slight delay between scheduling tasks
83
+ intermediate_results = await asyncio.gather(*tasks)
84
+ structured_outputs = {fname: output for fname, output in intermediate_results}
85
+ return structured_outputs
86
+
87
+ async def generate_final_review_pdf(structured_outputs):
88
+ """
89
+ Generate the final literature review by performing the following steps:
90
+ 0. (Hidden) Generate a comparative table.
91
+ 1. Draft the outline.
92
+ 2. Generate the final synthesis (incorporating the comparative table, outline, and structured outputs).
93
+ 3. Check the final writeup for hallucinations/inaccuracies.
94
+ 4. Clean up the final text and convert it to PDF.
95
+
96
+ All prompts (except the check prompt) are loaded from files.
97
+ The check prompt remains hardcoded.
98
+ """
99
+ progress_bar = st.progress(0)
100
+ st.toast("Starting review generation...")
101
+ time.sleep(0.5)
102
+
103
+ # Step 0: Hidden comparative table generation.
104
+ table_analysis = await generate_comparative_table(structured_outputs)
105
+ progress_bar.progress(10)
106
+ time.sleep(0.5)
107
+
108
+ # Step 1: Draft the outline.
109
+ with st.spinner("Drafting outline..."):
110
+ outline_prompt_path = os.path.join("prompts", "papers_outline.prompt")
111
+ outline_prompt = load_prompt(outline_prompt_path)
112
+ for fname, output in structured_outputs.items():
113
+ outline_prompt += f"\nPaper: {fname}\n-------\n{output}\n\n"
114
+ outline = await async_generate_text(
115
+ outline_prompt,
116
+ model_name=default_model_name,
117
+ generation_config=default_generation_config
118
+ )
119
+ st.success("Outline drafted!")
120
+ progress_bar.progress(30)
121
+ time.sleep(0.5)
122
+
123
+ # Step 2: Draft the final review.
124
+ with st.spinner("Drafting final review..."):
125
+ synthesis_prompt_path = os.path.join("prompts", "papers_synthesis.prompt")
126
+ loaded_final_prompt = load_prompt(synthesis_prompt_path)
127
+ final_prompt = ""
128
+ final_prompt += "\nComparative Table:\n" + table_analysis + "\n\n"
129
+ final_prompt += "Comparative Outline:\n" + outline + "\n\n"
130
+ final_prompt += "Papers for Analysis:\n"
131
+ for fname, output in structured_outputs.items():
132
+ final_prompt += f"\nPaper: {fname}\n-------\n{output}\n\n"
133
+ final_prompt += "\n" + loaded_final_prompt
134
+
135
+ final_writeup = await async_generate_text(
136
+ final_prompt,
137
+ model_name=default_model_name,
138
+ generation_config=default_generation_config
139
+ )
140
+ st.success("Final review drafted!")
141
+ progress_bar.progress(60)
142
+ time.sleep(0.5)
143
+
144
+ # Step 3: Check final writeup (using the hardcoded check prompt).
145
+ with st.spinner("Checking final review..."):
146
+ check_prompt = (
147
+ "Review the following final literature review writeup along with the structured outputs from the source papers. "
148
+ "Your task is to ensure that there are no hallucinations or inaccuracies in the final writeup. "
149
+ "If any issues are detected, make the most minimal edits necessary to correct them. Otherwise, do not change anything in the text - nor the style or format. "
150
+ "Output only the final text (do not include any explanations or extra instructions).\n\n"
151
+
152
+ "Final Writeup:\n"
153
+ "----------------\n"
154
+ f"{final_writeup}\n\n"
155
+
156
+ "Structured Outputs:\n"
157
+ )
158
+ for fname, output in structured_outputs.items():
159
+ check_prompt += f"\nPaper: {fname}\n-------\n{output}\n\n"
160
+ final_checked_writeup = await async_generate_text(
161
+ check_prompt,
162
+ model_name=default_model_name,
163
+ generation_config=default_generation_config
164
+ )
165
+ st.success("Review check complete!")
166
+ progress_bar.progress(80)
167
+ time.sleep(0.5)
168
+
169
+ # Step 4: Generate PDF output.
170
+ with st.spinner("Generating PDF output..."):
171
+ from utils.markdown_utils import robust_clean_markdown, normalize_heading_levels
172
+ final_checked_review = final_checked_writeup.strip()
173
+ final_checked_review = robust_clean_markdown(final_checked_review)
174
+ final_checked_review = normalize_heading_levels(final_checked_review)
175
+
176
+ pdf_doc = MarkdownPdf(toc_level=2)
177
+ pdf_doc.add_section(Section(final_checked_review, toc=True))
178
+ output_pdf_path = "final_literature_review.pdf"
179
+ try:
180
+ pdf_doc.save(output_pdf_path)
181
+ st.success("PDF successfully created!")
182
+ logger.info(f"PDF successfully created: {output_pdf_path}")
183
+ except Exception as e:
184
+ st.toast("Error generating PDF output: " + str(e))
185
+ logger.error(f"Error generating PDF: {e}")
186
+ progress_bar.progress(100)
187
+
188
+ return final_checked_review
utils/tts_utils.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tts_utils.py
2
+ import os
3
+ import time
4
+ import logging
5
+ import requests
6
+ import streamlit as st
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def generate_tts_audio(text, voice="af_heart", speed=1.0):
11
+ RUNPOD_API_TOKEN = os.getenv("RUNPOD_GPU")
12
+ headers = {
13
+ 'Content-Type': 'application/json',
14
+ 'Authorization': f'Bearer {RUNPOD_API_TOKEN}'
15
+ }
16
+ data_payload = {"input": {"text": text, "voice": voice, "speed": speed}}
17
+ response = requests.post('https://api.runpod.ai/v2/ozz8w092oprwqx/run', headers=headers, json=data_payload)
18
+ if response.status_code != 200:
19
+ raise Exception(f"RunPod API call failed with status {response.status_code}: {response.text}")
20
+ run_id = response.json().get("id")
21
+ status_url = f"https://api.runpod.ai/v2/ozz8w092oprwqx/status/{run_id}"
22
+ st.toast("TTS generation started, please wait...")
23
+ while True:
24
+ time.sleep(5)
25
+ status_response = requests.post(status_url, headers=headers, json=data_payload)
26
+ status_json = status_response.json()
27
+ logger.debug("TTS status: %s", status_json.get("status"))
28
+ if status_json.get("status") == "COMPLETED":
29
+ download_url = status_json.get("output", {}).get("download_url")
30
+ if download_url:
31
+ mp3_response = requests.get(download_url)
32
+ if mp3_response.status_code == 200:
33
+ return mp3_response.content
34
+ else:
35
+ raise Exception(f"Failed to download audio: {mp3_response.status_code}")
36
+ elif status_json.get("status") in ["FAILED", "ERROR"]:
37
+ logger.error("TTS generation failed.")
38
+ st.error("TTS generation failed. Please try again later.")
39
+ raise Exception("TTS generation failed.")