import os import time import asyncio import logging import secrets import json import streamlit as st from dotenv import load_dotenv load_dotenv() st.set_page_config(page_title="Academic PDF Digester", layout="wide") # Let user choose the authentication method via a sidebar radio button auth_method = st.sidebar.radio("Choose Authentication Method:", options=["Google", "Basic"]) if auth_method == "Google": from streamlit_google_auth import Authenticate google_auth_str = os.getenv("GOOGLE_AUTH") if not google_auth_str: raise ValueError("GOOGLE_AUTH not set in .env") try: google_auth_data = json.loads(google_auth_str) except json.JSONDecodeError as e: raise ValueError("GOOGLE_AUTH is not valid JSON") from e with open("google_credentials.json", "w") as f: json.dump(google_auth_data, f, indent=4) # Generate secure cookie values (or set via env variables) cookie_name = os.getenv("COOKIE_NAME", f"llm_pdf_digest_{secrets.token_hex(4)}") cookie_key = os.getenv("COOKIE_KEY", secrets.token_hex(32)) authenticator = Authenticate( secret_credentials_path='google_credentials.json', cookie_name=cookie_name, cookie_key=cookie_key, redirect_uri='https://rjuro-pdf-digest.hf.space', ) authenticator.check_authentification() authenticator.login() if not st.session_state.get('connected'): st.stop() with st.sidebar: st.image(st.session_state['user_info'].get('picture')) st.write('Hello, ' + st.session_state['user_info'].get('name')) st.write('Your email is ' + st.session_state['user_info'].get('email')) else: # Basic Authentication EXPECTED_USERNAME = os.getenv("APP_USERNAME") EXPECTED_PASSWORD = os.getenv("APP_PASSWORD") def authenticate(username, password): return username == EXPECTED_USERNAME and password == EXPECTED_PASSWORD if "authenticated" not in st.session_state: st.session_state["authenticated"] = False if "username" not in st.session_state: st.session_state["username"] = "" if not st.session_state["authenticated"]: st.info("Login Required: Please enter your username and password to access the app. πŸ”’") with st.form(key="login_form"): user_input = st.text_input("Username") password_input = st.text_input("Password", type="password") submit = st.form_submit_button("Login") if submit: if authenticate(user_input, password_input): st.session_state["authenticated"] = True st.session_state["username"] = user_input # Save the username in session state st.success("Authentication successful!") else: st.error("Invalid username or password.") st.stop() with st.sidebar: st.write("Hello, " + st.session_state["username"]) # --- Continue with Academic PDF Digester App --- from utils.file_utils import load_prompt, save_intermediate_output, setup_temp_directories, cleanup_temp_files from utils.llm_utils import ( get_generation_model, async_generate_text, generate_title_reference_and_classification, upload_to_gemini, wait_for_files_active ) from utils.tts_utils import generate_tts_audio from utils.review_flow import process_multiple_pdfs, generate_final_review_pdf, generate_multi_speaker_podcast logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) st.title("Academic PDF Digester πŸ˜‹") st.subheader("Effortlessly analyze and synthesize academic papers πŸš€") st.markdown( """ **Welcome to Academic PDF Digester!** This tool uses advanced Large Language Models (LLMs) to automatically extract key information from academic papers, generate structured summaries, and even produce downloadable PDF and audio outputs. Whether you are exploring a single publication or synthesizing a literature review from multiple papers, our system streamlines your research process. """ ) with st.expander("How It Works"): st.markdown( """ **Overview of the Functionality:** - **File Processing:** Uploaded PDFs are saved locally and then sent to a cloud-based service where a specialized LLM analyzes the content. - **LLM Integration:** The LLM extracts key information (such as the core structure, outlines, and insights) and generates structured outputs. These outputs are then used to create comparative analyses, draft final reviews, and check for consistency. - **Outputs:** The system generates: - A **detailed overview** for individual papers. - A **comparative literature review** for multiple papers. - **Downloadable PDFs** summarizing the findings. - **Audio summaries** for quick listening. **How LLMs Are Used:** - The tool leverages LLMs to generate content. - For each step (e.g., generating outlines, synthesizing final reviews), the LLM processes the input and returns a coherent narrative. - Checks are performed to minimize hallucinations and ensure factual accuracy. This powerful combination of file processing, LLM integration, and smart output synthesis helps you gain insights from academic papers quickly and accurately. """ ) # --- Mode Selection --- mode = st.sidebar.radio("Choose a mode:", options=["Explore One Publication", "Write a Literature Review", "Generate TTS Readout"]) if mode == "Explore One Publication": st.subheader("Single-Publication Analysis πŸ“„") # Load models for title generation and main analysis title_model_name, title_generation_config = get_generation_model("flash") default_model_name, default_generation_config = get_generation_model("thinking") uploaded_pdf = st.file_uploader("Upload a PDF", type=["pdf"]) if uploaded_pdf is not None: if uploaded_pdf.size < 5000: st.error("Input does not appear to be an academic paper. Please upload a valid academic paper.") st.stop() st.session_state["uploaded_pdf"] = uploaded_pdf st.success("PDF uploaded successfully. πŸ‘") progress_bar = st.progress(0) async def process_single_pdf(): # Create a temporary directory and save the file locally. temp_dir = setup_temp_directories() try: pdf_basename = os.path.splitext(st.session_state["uploaded_pdf"].name)[0] st.session_state["pdf_basename"] = pdf_basename temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf") with open(temp_pdf_path, "wb") as f: f.write(st.session_state["uploaded_pdf"].getbuffer()) logger.debug("PDF saved locally to %s", temp_pdf_path) progress_bar.progress(10) # Upload file to Gemini. with st.spinner("Uploading PDF to Gemini... ⏳"): try: pdf_file = upload_to_gemini(temp_pdf_path, mime_type="application/pdf") except Exception as e: st.error("Error uploading PDF: " + str(e)) st.stop() progress_bar.progress(20) # Wait for file processing. with st.spinner("Waiting for file processing... ⏱️"): try: wait_for_files_active([pdf_file]) except Exception as e: st.error("Error in file processing: " + str(e)) st.stop() progress_bar.progress(30) # Generate title, APA reference and classification. with st.spinner("Generating title, APA reference, and classification... πŸ“"): title_ref = await generate_title_reference_and_classification( pdf_file, title_model_name, title_generation_config ) if title_ref.error: st.error(title_ref.error) st.stop() st.session_state["title"] = title_ref.title st.session_state["apa_reference"] = title_ref.apa_reference st.session_state["classification"] = title_ref.classification st.session_state["bullet_list"] = title_ref.bullet_list progress_bar.progress(40) # Load prompts for outline and key insights. try: outline_prompt = load_prompt("prompts/outline_acad.prompt") elements_prompt = load_prompt("prompts/elements.prompt") except Exception as e: st.error("Error loading prompt files: " + str(e)) st.stop() # Generate key insights and outline concurrently. with st.spinner("Extracting key insights and drafting outline... πŸ”"): task_outline = async_generate_text( outline_prompt, pdf_file, model_name=default_model_name, generation_config=default_generation_config ) task_elements = async_generate_text( elements_prompt, pdf_file, model_name=default_model_name, generation_config=default_generation_config ) outline_acad_output, elements_output = await asyncio.gather(task_outline, task_elements) progress_bar.progress(65) st.success("Key insights extracted and outline drafted successfully! πŸ“") save_intermediate_output(outline_acad_output, pdf_basename, "outline") save_intermediate_output(elements_output, pdf_basename, "elements") # Generate final overview. with st.spinner("Generating final overview... πŸ“š"): overview_prompt = load_prompt("prompts/overview.prompt") tts_instruction = "Ensure the final overview is TTS-friendly and does not exceed 3000 words." combined_overview_prompt = ( tts_instruction + "\n\nAcademic Outline:\n" + outline_acad_output + "\n\nKey Insights:\n" + elements_output + "\n" + overview_prompt + "\n" + tts_instruction ) overview_output = await async_generate_text( combined_overview_prompt, pdf_file, model_name=default_model_name, generation_config=default_generation_config ) save_intermediate_output(overview_output, pdf_basename, "overview") progress_bar.progress(75) # Generate downloadable PDF. from markdown_pdf import MarkdownPdf, Section # Format bullet items: Only the bullet name (before colon) is bold. def format_bullet(item): if ':' in item: key, value = item.split(':', 1) return f"- **{key.strip()}**: {value.strip()}" else: return f"- **{item.strip()}**" bullet_markdown = "\n".join(format_bullet(item) for item in st.session_state.get("bullet_list", [])) with st.spinner("Generating downloadable PDF... πŸ“„"): try: pdf_doc = MarkdownPdf(toc_level=2) title_and_ref_markdown = ( f"# {st.session_state['title']}\n\n" f"*{st.session_state['apa_reference']}*\n\n" "### Key Components:\n" f"{bullet_markdown}\n\n" ) pdf_doc.add_section(Section(title_and_ref_markdown, toc=False)) pdf_doc.add_section(Section(overview_output.strip(), toc=True)) final_pdf_path = os.path.join("promp_tmp", f"{st.session_state['pdf_basename']}_final_output.pdf") pdf_doc.save(final_pdf_path) with open(final_pdf_path, "rb") as f: st.session_state["final_pdf"] = f.read() st.session_state["final_text"] = ( f"# {st.session_state['title']}\n\n" f"*{st.session_state['apa_reference']}*\n\n" "### Key Components:\n" f"{bullet_markdown}\n\n" f"{overview_output.strip()}" ) st.success("PDF generated successfully. You can download it below. πŸ“₯") except Exception as e: st.error("Failed to generate PDF: " + str(e)) st.stop() progress_bar.progress(100) st.session_state["generated"] = True except Exception as e: st.error(f"Error during processing: {str(e)}") st.stop() finally: cleanup_temp_files(temp_dir) if st.button("Generate Overview πŸš€"): asyncio.run(process_single_pdf()) if st.session_state.get("generated"): st.download_button( label="Download Final PDF πŸ“„", data=st.session_state["final_pdf"], file_name="final_output.pdf", mime="application/pdf" ) st.info("PDF is ready for download!") # --- Audio Generation Section --- if st.session_state.get("final_text"): st.subheader("Audio Generation Options 🎧") st.markdown( "Choose **Short Audio Summary πŸ“’** for a concise overview or **Long Narrative Summary πŸŽ™οΈ** - simulating a presentation by the authors - for a more detailed narration." ) audio_option = st.radio("Select Audio Type", options=[ "Short Audio Summary πŸ“’", "Long Narrative Summary πŸŽ™οΈ" ]) if st.button("Generate Audio πŸ”Š"): pdf_basename = st.session_state["pdf_basename"] if audio_option == "Short Audio Summary πŸ“’": try: audio_prompt = load_prompt("prompts/audio.prompt") except Exception as e: logger.exception("Failed to load audio prompt:") st.error("Error loading audio prompt: " + str(e)) st.stop() combined_audio_prompt = audio_prompt + "\n\n" + st.session_state["final_text"] with st.spinner("Generating concise audio summary text..."): concise_audio_text = asyncio.run(async_generate_text( combined_audio_prompt, model_name=default_model_name, generation_config=default_generation_config )) with st.spinner("Generating audio via TTS API..."): try: audio_mp3_data = generate_tts_audio(concise_audio_text, voice="af_heart", speed=1.0) except Exception as e: logger.exception("TTS generation failed:") st.error("Audio generation failed. Please try again later.") st.stop() st.audio(audio_mp3_data, format="audio/mp3") st.download_button( "Download Audio Summary πŸ“₯", audio_mp3_data, file_name=f"{pdf_basename}_audio.mp3", mime="audio/mp3" ) elif audio_option == "Long Narrative Summary πŸŽ™οΈ": try: audio_narrate_prompt = load_prompt("prompts/audio_narrate.prompt") except Exception as e: logger.exception("Failed to load long narrative prompt:") st.error("Error loading long narrative prompt: " + str(e)) st.stop() combined_narrate_prompt = st.session_state["final_text"] + "\n\n" + audio_narrate_prompt with st.spinner("Generating long narrative summary text..."): narrative_text = asyncio.run(async_generate_text( combined_narrate_prompt, model_name=default_model_name, generation_config=default_generation_config )) with st.spinner("Generating audio via TTS API..."): try: audio_mp3_data = generate_tts_audio(narrative_text, voice="af_heart", speed=1.0) except Exception as e: logger.exception("TTS generation failed:") st.error("Audio generation failed. Please try again later.") st.stop() st.audio(audio_mp3_data, format="audio/mp3") st.download_button( "Download Long Narrative Audio πŸ“₯", audio_mp3_data, file_name=f"{pdf_basename}_narrative_audio.mp3", mime="audio/mp3" ) elif mode == "Write a Literature Review": st.subheader("Literature Review Generation πŸ“š") st.markdown( "Upload **multiple academic PDFs** to generate a comparative literature review. " "You can select more than one file." ) uploaded_pdfs = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True) if uploaded_pdfs: st.session_state["uploaded_pdfs"] = uploaded_pdfs st.success(f"{len(uploaded_pdfs)} PDFs uploaded successfully.") if st.button("Generate Literature Review πŸš€"): with st.spinner("Processing PDFs and generating review..."): structured_outputs = asyncio.run(process_multiple_pdfs(uploaded_pdfs)) final_review_text = asyncio.run(generate_final_review_pdf(structured_outputs)) st.success("Literature review generated successfully! πŸŽ‰") st.text_area("Final Literature Review", final_review_text, height=300) with open("final_literature_review.pdf", "rb") as f: final_pdf_bytes = f.read() st.download_button( label="Download Final Literature Review PDF πŸ“„", data=final_pdf_bytes, file_name="final_literature_review.pdf", mime="application/pdf" ) # Save final text and a base filename for podcast generation. st.session_state["final_text"] = final_review_text st.session_state["pdf_basename"] = "final_literature_review" if st.session_state.get("final_text"): if st.button("Generate Multi-Speaker Podcast 🎀"): progress_bar = st.progress(0) with st.spinner("Generating multi-speaker podcast..."): try: podcast_audio = asyncio.run( generate_multi_speaker_podcast(st.session_state["final_text"], progress_bar=progress_bar) ) st.audio(podcast_audio, format="audio/mp3") st.download_button( "Download Podcast Audio πŸ“₯", podcast_audio, file_name=f"{st.session_state.get('pdf_basename', 'literature_review')}_podcast.mp3", mime="audio/mp3" ) except Exception as e: st.error("Podcast generation failed: " + str(e)) elif mode == "Generate TTS Readout": st.subheader("Generate Simple TTS Readout") uploaded_pdf = st.file_uploader("Upload a PDF", type=["pdf"]) # Mapping with nicer descriptors: Name, gender, and country flag emoji voice_options = { "Heart (Female) πŸ‡ΊπŸ‡Έ": "af_heart", "Bella (Female) πŸ‡ΊπŸ‡Έ": "af_bella", "Michael (Male) πŸ‡ΊπŸ‡Έ": "am_michael", "Puck (Male) πŸ‡ΊπŸ‡Έ": "am_puck", "Emma (Female) πŸ‡¬πŸ‡§": "bf_emma", "George (Male) πŸ‡¬πŸ‡§": "bm_george" } selected_voice = st.selectbox("Select Voice", options=list(voice_options.keys())) voice_choice = voice_options[selected_voice] # Flag to store intermediate outputs to disk (set to True by default) store_intermediates = False if uploaded_pdf is not None: if uploaded_pdf.size < 5000: st.error("Input does not appear to be a valid academic paper.") st.stop() st.session_state["uploaded_pdf_tts"] = uploaded_pdf st.success("PDF uploaded successfully. πŸ‘") # Load generation models title_model_name, title_generation_config = get_generation_model("flash") default_model_name, default_generation_config = get_generation_model("thinking") progress_bar = st.progress(0) async def process_tts_readout(): temp_dir = setup_temp_directories() try: pdf_basename = os.path.splitext(uploaded_pdf.name)[0] st.session_state["pdf_basename_tts"] = pdf_basename temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf") with open(temp_pdf_path, "wb") as f: f.write(uploaded_pdf.getbuffer()) progress_bar.progress(10) # Upload PDF to Gemini and wait for processing pdf_file = upload_to_gemini(temp_pdf_path, mime_type="application/pdf") wait_for_files_active([pdf_file]) progress_bar.progress(20) # Validate the academic paper via title/reference check with st.spinner("Validating academic paper..."): title_ref = await generate_title_reference_and_classification( pdf_file, title_model_name, title_generation_config ) if title_ref.error: st.error("Uploaded PDF is not a valid academic paper: " + title_ref.error) st.stop() progress_bar.progress(30) # Step 1: Generate TTS Outline with st.spinner("Generating TTS Outline..."): plain_tts_outline_prompt = load_prompt("prompts/plain_TTS_outline.prompt") outline_output = await async_generate_text( plain_tts_outline_prompt, pdf_file, model_name=default_model_name, generation_config=default_generation_config ) progress_bar.progress(50) if store_intermediates: save_intermediate_output(outline_output, pdf_basename, "tts_outline") # Step 2: Generate TTS Draft using the outline with st.spinner("Generating TTS Draft..."): plain_tts_draft_prompt = load_prompt("prompts/plain_TTS_draft.prompt") combined_draft_prompt = outline_output + "\n\n" + plain_tts_draft_prompt draft_output = await async_generate_text( combined_draft_prompt, pdf_file, model_name=default_model_name, generation_config=default_generation_config ) progress_bar.progress(70) if store_intermediates: save_intermediate_output(draft_output, pdf_basename, "tts_draft") # Step 3: Finalize the readout via Q&A with st.spinner("Finalizing TTS Readout..."): plain_tts_qa_prompt = load_prompt("prompts/plain_TTS_QA.prompt") combined_qa_prompt = draft_output + "\n\n" + plain_tts_qa_prompt final_output = await async_generate_text( combined_qa_prompt, pdf_file, model_name=default_model_name, generation_config=default_generation_config ) progress_bar.progress(90) if store_intermediates: save_intermediate_output(final_output, pdf_basename, "tts_final") # Generate audio using the selected voice with st.spinner("Generating audio..."): audio_mp3_data = generate_tts_audio(final_output, voice=voice_choice, speed=1.0) progress_bar.progress(100) st.audio(audio_mp3_data, format="audio/mp3") st.download_button( label="Download TTS Audio", data=audio_mp3_data, file_name=f"{pdf_basename}_tts_audio.mp3", mime="audio/mp3" ) except Exception as e: st.error("Error during TTS readout generation: " + str(e)) finally: cleanup_temp_files(temp_dir) if st.button("Generate TTS Readout Audio"): asyncio.run(process_tts_readout())