Spaces:
Running
Running
Plain TTS Flow
Browse files- .gitignore +2 -1
- app.py +117 -2
- prompts/plain_TTS_QA.prompt +25 -36
- prompts/plain_TTS_draft.prompt +33 -17
- prompts/plain_TTS_outline.prompt +45 -48
- utils/__pycache__/review_flow.cpython-311.pyc +0 -0
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
.env
|
2 |
dev/
|
3 |
promp_tmp/
|
4 |
-
.aiderignore
|
|
|
|
1 |
.env
|
2 |
dev/
|
3 |
promp_tmp/
|
4 |
+
.aiderignore
|
5 |
+
google_credentials.json
|
app.py
CHANGED
@@ -137,7 +137,7 @@ with st.expander("How It Works"):
|
|
137 |
)
|
138 |
|
139 |
# --- Mode Selection ---
|
140 |
-
mode = st.sidebar.radio("Choose a mode:", options=["Explore One Publication", "Write a Literature Review"])
|
141 |
|
142 |
if mode == "Explore One Publication":
|
143 |
st.subheader("Single-Publication Analysis π")
|
@@ -419,4 +419,119 @@ elif mode == "Write a Literature Review":
|
|
419 |
mime="audio/mp3"
|
420 |
)
|
421 |
except Exception as e:
|
422 |
-
st.error("Podcast generation failed: " + str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
)
|
138 |
|
139 |
# --- Mode Selection ---
|
140 |
+
mode = st.sidebar.radio("Choose a mode:", options=["Explore One Publication", "Write a Literature Review", "Generate TTS Readout"])
|
141 |
|
142 |
if mode == "Explore One Publication":
|
143 |
st.subheader("Single-Publication Analysis π")
|
|
|
419 |
mime="audio/mp3"
|
420 |
)
|
421 |
except Exception as e:
|
422 |
+
st.error("Podcast generation failed: " + str(e))
|
423 |
+
|
424 |
+
elif mode == "Generate TTS Readout":
|
425 |
+
st.subheader("Generate Simple TTS Readout")
|
426 |
+
uploaded_pdf = st.file_uploader("Upload a PDF", type=["pdf"])
|
427 |
+
|
428 |
+
# Mapping with nicer descriptors: Name, gender, and country flag emoji
|
429 |
+
voice_options = {
|
430 |
+
"Heart (Female) πΊπΈ": "af_heart",
|
431 |
+
"Bella (Female) πΊπΈ": "af_bella",
|
432 |
+
"Michael (Male) πΊπΈ": "am_michael",
|
433 |
+
"Puck (Male) πΊπΈ": "am_puck",
|
434 |
+
"Emma (Female) π¬π§": "bf_emma",
|
435 |
+
"George (Male) π¬π§": "bm_george"
|
436 |
+
}
|
437 |
+
selected_voice = st.selectbox("Select Voice", options=list(voice_options.keys()))
|
438 |
+
voice_choice = voice_options[selected_voice]
|
439 |
+
|
440 |
+
# Flag to store intermediate outputs to disk (set to True by default)
|
441 |
+
store_intermediates = False
|
442 |
+
|
443 |
+
if uploaded_pdf is not None:
|
444 |
+
if uploaded_pdf.size < 5000:
|
445 |
+
st.error("Input does not appear to be a valid academic paper.")
|
446 |
+
st.stop()
|
447 |
+
st.session_state["uploaded_pdf_tts"] = uploaded_pdf
|
448 |
+
st.success("PDF uploaded successfully. π")
|
449 |
+
|
450 |
+
# Load generation models
|
451 |
+
title_model_name, title_generation_config = get_generation_model("flash")
|
452 |
+
default_model_name, default_generation_config = get_generation_model("thinking")
|
453 |
+
|
454 |
+
progress_bar = st.progress(0)
|
455 |
+
|
456 |
+
async def process_tts_readout():
|
457 |
+
temp_dir = setup_temp_directories()
|
458 |
+
try:
|
459 |
+
pdf_basename = os.path.splitext(uploaded_pdf.name)[0]
|
460 |
+
st.session_state["pdf_basename_tts"] = pdf_basename
|
461 |
+
temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf")
|
462 |
+
with open(temp_pdf_path, "wb") as f:
|
463 |
+
f.write(uploaded_pdf.getbuffer())
|
464 |
+
progress_bar.progress(10)
|
465 |
+
|
466 |
+
# Upload PDF to Gemini and wait for processing
|
467 |
+
pdf_file = upload_to_gemini(temp_pdf_path, mime_type="application/pdf")
|
468 |
+
wait_for_files_active([pdf_file])
|
469 |
+
progress_bar.progress(20)
|
470 |
+
|
471 |
+
# Validate the academic paper via title/reference check
|
472 |
+
with st.spinner("Validating academic paper..."):
|
473 |
+
title_ref = await generate_title_reference_and_classification(
|
474 |
+
pdf_file, title_model_name, title_generation_config
|
475 |
+
)
|
476 |
+
if title_ref.error:
|
477 |
+
st.error("Uploaded PDF is not a valid academic paper: " + title_ref.error)
|
478 |
+
st.stop()
|
479 |
+
progress_bar.progress(30)
|
480 |
+
|
481 |
+
# Step 1: Generate TTS Outline
|
482 |
+
with st.spinner("Generating TTS Outline..."):
|
483 |
+
plain_tts_outline_prompt = load_prompt("prompts/plain_TTS_outline.prompt")
|
484 |
+
outline_output = await async_generate_text(
|
485 |
+
plain_tts_outline_prompt, pdf_file,
|
486 |
+
model_name=default_model_name,
|
487 |
+
generation_config=default_generation_config
|
488 |
+
)
|
489 |
+
progress_bar.progress(50)
|
490 |
+
if store_intermediates:
|
491 |
+
save_intermediate_output(outline_output, pdf_basename, "tts_outline")
|
492 |
+
|
493 |
+
# Step 2: Generate TTS Draft using the outline
|
494 |
+
with st.spinner("Generating TTS Draft..."):
|
495 |
+
plain_tts_draft_prompt = load_prompt("prompts/plain_TTS_draft.prompt")
|
496 |
+
combined_draft_prompt = outline_output + "\n\n" + plain_tts_draft_prompt
|
497 |
+
draft_output = await async_generate_text(
|
498 |
+
combined_draft_prompt, pdf_file,
|
499 |
+
model_name=default_model_name,
|
500 |
+
generation_config=default_generation_config
|
501 |
+
)
|
502 |
+
progress_bar.progress(70)
|
503 |
+
if store_intermediates:
|
504 |
+
save_intermediate_output(draft_output, pdf_basename, "tts_draft")
|
505 |
+
|
506 |
+
# Step 3: Finalize the readout via Q&A
|
507 |
+
with st.spinner("Finalizing TTS Readout..."):
|
508 |
+
plain_tts_qa_prompt = load_prompt("prompts/plain_TTS_QA.prompt")
|
509 |
+
combined_qa_prompt = draft_output + "\n\n" + plain_tts_qa_prompt
|
510 |
+
final_output = await async_generate_text(
|
511 |
+
combined_qa_prompt, pdf_file,
|
512 |
+
model_name=default_model_name,
|
513 |
+
generation_config=default_generation_config
|
514 |
+
)
|
515 |
+
progress_bar.progress(90)
|
516 |
+
if store_intermediates:
|
517 |
+
save_intermediate_output(final_output, pdf_basename, "tts_final")
|
518 |
+
|
519 |
+
# Generate audio using the selected voice
|
520 |
+
with st.spinner("Generating audio..."):
|
521 |
+
audio_mp3_data = generate_tts_audio(final_output, voice=voice_choice, speed=1.0)
|
522 |
+
progress_bar.progress(100)
|
523 |
+
|
524 |
+
st.audio(audio_mp3_data, format="audio/mp3")
|
525 |
+
st.download_button(
|
526 |
+
label="Download TTS Audio",
|
527 |
+
data=audio_mp3_data,
|
528 |
+
file_name=f"{pdf_basename}_tts_audio.mp3",
|
529 |
+
mime="audio/mp3"
|
530 |
+
)
|
531 |
+
except Exception as e:
|
532 |
+
st.error("Error during TTS readout generation: " + str(e))
|
533 |
+
finally:
|
534 |
+
cleanup_temp_files(temp_dir)
|
535 |
+
|
536 |
+
if st.button("Generate TTS Readout Audio"):
|
537 |
+
asyncio.run(process_tts_readout())
|
prompts/plain_TTS_QA.prompt
CHANGED
@@ -1,46 +1,35 @@
|
|
1 |
-
You are a quality assurance specialist for academic Text-to-Speech (TTS) content. Your task is to
|
2 |
|
3 |
Input:
|
4 |
1. Original academic paper (complete text)
|
5 |
-
2. TTS-adapted version of the paper
|
6 |
|
7 |
Quality Assurance Process:
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
- Remove
|
11 |
-
- Remove
|
12 |
-
-
|
13 |
-
-
|
14 |
-
-
|
|
|
15 |
|
16 |
-
|
17 |
-
- Replace
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
- Convert all parenthetical citations to spoken format:
|
23 |
-
β’ Example: "(Smith et al., 2020)" β "as Smith and colleagues demonstrated in 2020"
|
24 |
-
β’ Use varied phrasing for natural flow
|
25 |
-
- Ensure acronyms are properly expanded at first use
|
26 |
-
- Adapt table/figure references for audio context
|
27 |
-
|
28 |
-
3. Output Format Requirements
|
29 |
-
- Provide a single, continuous document without section splits
|
30 |
-
- Use clear section headings as ## [Section Title]
|
31 |
-
- DO NOT include:
|
32 |
-
β’ References section
|
33 |
-
β’ Tables section at the end
|
34 |
-
β’ Figures section at the end
|
35 |
-
β’ Acknowledgements section
|
36 |
-
β’ Author information
|
37 |
-
- Ensure transitions between merged sections flow naturally
|
38 |
|
39 |
4. Final Deliverable
|
40 |
-
-
|
41 |
-
-
|
42 |
-
-
|
43 |
-
- The entire paper should be presented
|
44 |
-
- The document should be fully ready for direct input into a TTS system
|
45 |
|
46 |
-
Important: Your output should be ONLY the final, production-ready TTS version with all corrections applied. Do not include
|
|
|
1 |
+
You are a quality assurance specialist for academic Text-to-Speech (TTS) content. Your task is to review and finalize a draft TTS adaptation, focusing especially on handling tables correctly and removing unnecessary elements.
|
2 |
|
3 |
Input:
|
4 |
1. Original academic paper (complete text)
|
5 |
+
2. TTS-adapted draft version of the paper
|
6 |
|
7 |
Quality Assurance Process:
|
8 |
+
1. Table Content Handling
|
9 |
+
- Convert all tabular information into narrative paragraphs
|
10 |
+
- For comparative tables, create flowing text that highlights key comparisons
|
11 |
+
- Use phrases like "Comparing the studies..." or "The research shows several patterns across..."
|
12 |
+
- Ensure all valuable data from tables is preserved in spoken form
|
13 |
|
14 |
+
2. Content Verification
|
15 |
+
- Remove the references section completely
|
16 |
+
- Remove any partial or complete table listings
|
17 |
+
- Ensure no section headings contain special characters or formatting
|
18 |
+
- Verify all temperatures are properly written out (e.g., "negative 80 degrees Celsius")
|
19 |
+
- Check that all acronyms are properly expanded at first use
|
20 |
+
- Confirm all parenthetical citations are converted to natural spoken format
|
21 |
|
22 |
+
3. Audio-Friendly Formatting
|
23 |
+
- Replace ALL special characters with spoken equivalents
|
24 |
+
- Ensure section headings are in plain text format with no special characters
|
25 |
+
- Verify transitions between sections flow naturally
|
26 |
+
- Check that all content is presented in complete sentences
|
27 |
+
- Confirm no markup, formatting codes, or non-verbal elements remain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
4. Final Deliverable
|
30 |
+
- Provide the complete, corrected text as your output
|
31 |
+
- The document must contain ZERO special characters, hashtags, asterisks, or formatting codes
|
32 |
+
- Every element must be in a form that can be read aloud naturally
|
33 |
+
- The entire paper should be presented as continuous, flowing text
|
|
|
34 |
|
35 |
+
Important: Your output should be ONLY the final, production-ready TTS version with all corrections applied. Do not include explanations of your changes or QA notes. The text should flow as a continuous, complete document that a TTS system can read without encountering any non-verbal elements or requiring human interpretation.
|
prompts/plain_TTS_draft.prompt
CHANGED
@@ -1,47 +1,63 @@
|
|
1 |
You are an expert in creating TTS-friendly versions of academic papers. You're now in PHASE 2 - EXECUTION, where you'll transform an academic paper into audio-optimized content following the conversion plan created in Phase 1.
|
2 |
|
3 |
PHASE 2 - EXECUTION (CURRENT TASK):
|
4 |
-
Using
|
5 |
|
6 |
Instructions for Phase 2:
|
7 |
-
|
8 |
1. Input requirements:
|
9 |
- The original academic paper text
|
10 |
-
- The
|
11 |
|
12 |
-
2. For each section in the
|
13 |
-
-
|
14 |
-
- Apply only the necessary modifications specified in tts_conversion_instructions
|
15 |
- Preserve original wording wherever it doesn't hinder audio comprehension
|
|
|
16 |
|
17 |
-
3. Make
|
18 |
- Converting parenthetical citations into spoken form
|
19 |
β’ Example: "(Smith et al., 2020)" β "as Smith and colleagues demonstrated in 2020"
|
20 |
-
|
21 |
-
β’ Only modify how tables/figures are referenced, not the surrounding analysis
|
22 |
- Spelling out symbols, equations and non-standard characters
|
23 |
β’ Example: "p<0.05" β "p less than 0.05"
|
24 |
- Adding minimal transition words between sections only when necessary for audio flow
|
25 |
|
26 |
-
4.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
- Technical terminology or field-specific vocabulary
|
28 |
-
- Sentence structure unless absolutely necessary for audio comprehension
|
29 |
- The author's original arguments, assertions, or conclusions
|
30 |
- Any content that already works well in spoken form
|
31 |
|
32 |
-
|
33 |
- Create a single comprehensive document with clearly marked sections
|
34 |
- Each section should follow this structure:
|
35 |
```
|
36 |
-
|
37 |
|
38 |
-
[
|
39 |
```
|
|
|
40 |
|
41 |
-
|
42 |
-
- Remain as close to verbatim as possible
|
43 |
- Only modify elements that specifically hinder TTS delivery
|
44 |
- Maintain the exact same information, tone, and academic level
|
45 |
- Be indistinguishable from the original in terms of content and meaning
|
|
|
46 |
|
47 |
-
Note: The principle of minimal intervention should guide all transformations.
|
|
|
1 |
You are an expert in creating TTS-friendly versions of academic papers. You're now in PHASE 2 - EXECUTION, where you'll transform an academic paper into audio-optimized content following the conversion plan created in Phase 1.
|
2 |
|
3 |
PHASE 2 - EXECUTION (CURRENT TASK):
|
4 |
+
Using the original academic paper, create a TTS-optimized version while preserving the original language wherever possible.
|
5 |
|
6 |
Instructions for Phase 2:
|
|
|
7 |
1. Input requirements:
|
8 |
- The original academic paper text
|
9 |
+
- The outline with a plan for the transformation.
|
10 |
|
11 |
+
2. For each section in the paper:
|
12 |
+
- Apply only necessary modifications for audio clarity
|
|
|
13 |
- Preserve original wording wherever it doesn't hinder audio comprehension
|
14 |
+
- Maintain the logical flow and academic integrity of the content
|
15 |
|
16 |
+
3. Make targeted transformations limited to:
|
17 |
- Converting parenthetical citations into spoken form
|
18 |
β’ Example: "(Smith et al., 2020)" β "as Smith and colleagues demonstrated in 2020"
|
19 |
+
β’ Only include the first citation when multiple citations appear for the same point
|
|
|
20 |
- Spelling out symbols, equations and non-standard characters
|
21 |
β’ Example: "p<0.05" β "p less than 0.05"
|
22 |
- Adding minimal transition words between sections only when necessary for audio flow
|
23 |
|
24 |
+
4. Table transformation requirements (CRITICAL):
|
25 |
+
- ALL tables must be completely transformed into narrative paragraphs
|
26 |
+
- Do not preserve ANY tabular structure, column headings, or row formats
|
27 |
+
- For comparative tables showing multiple studies/methods:
|
28 |
+
β’ Begin with a transition phrase like "Comparing the key studies in this review..."
|
29 |
+
β’ Organize information by meaningful patterns (chronological, methodological similarities, or finding categories)
|
30 |
+
β’ Highlight comparative elements: "While Study A found X, Study B demonstrated Y"
|
31 |
+
β’ Ensure all critical data points are preserved in the narrative
|
32 |
+
- For data tables:
|
33 |
+
β’ Convert into descriptive paragraphs that present the patterns and relationships
|
34 |
+
β’ Use natural language to describe trends, comparisons, and outliers
|
35 |
+
β’ Maintain the analytical insights from the original table
|
36 |
+
|
37 |
+
5. Special handling for other complex elements:
|
38 |
+
- For references section: Omit the final reference list entirely as it's not suitable for audio
|
39 |
+
- For figures/visuals: Briefly describe what they would show, then focus on the insights they provide
|
40 |
+
|
41 |
+
6. DO NOT modify:
|
42 |
- Technical terminology or field-specific vocabulary
|
|
|
43 |
- The author's original arguments, assertions, or conclusions
|
44 |
- Any content that already works well in spoken form
|
45 |
|
46 |
+
7. Output format:
|
47 |
- Create a single comprehensive document with clearly marked sections
|
48 |
- Each section should follow this structure:
|
49 |
```
|
50 |
+
Section Title
|
51 |
|
52 |
+
[Audio-optimized content]
|
53 |
```
|
54 |
+
- Do not include any formatting characters such as #, *, _, or other markdown symbols
|
55 |
|
56 |
+
8. Final output should:
|
57 |
+
- Remain as close to verbatim as possible in non-tabular content
|
58 |
- Only modify elements that specifically hinder TTS delivery
|
59 |
- Maintain the exact same information, tone, and academic level
|
60 |
- Be indistinguishable from the original in terms of content and meaning
|
61 |
+
- Contain absolutely no special characters, formatting codes, or tabular structures
|
62 |
|
63 |
+
Note: The principle of minimal intervention should guide all transformations. Your goal is to create an audio-friendly version that maintains the scholarly integrity of the original while enabling smooth TTS delivery. The entire paper should be presented in its final, TTS-ready form with no special characters that cannot be read aloud.
|
prompts/plain_TTS_outline.prompt
CHANGED
@@ -4,63 +4,60 @@ PHASE 1 - ANALYSIS AND PLANNING (CURRENT TASK):
|
|
4 |
Analyze the provided academic paper and create a structured JSON plan for its conversion to TTS format. This plan will serve as instructions for the actual conversion in Phase 2.
|
5 |
|
6 |
Instructions for Phase 1:
|
7 |
-
|
8 |
Identify and map the paper's structure:
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
Note where figures, tables, equations, and citations appear
|
13 |
-
|
14 |
-
Identify sections to exclude (references, acknowledgments, etc.)
|
15 |
|
16 |
Output a JSON planning document with this structure:
|
|
|
17 |
{
|
18 |
-
"paper_metadata": {
|
19 |
-
"title": "Title of the paper",
|
20 |
-
"authors": ["Author 1 Name", "Author 2 Name", ...],
|
21 |
-
"publication_details": "e.g., Journal Name, Year (if available)"
|
22 |
-
},
|
23 |
-
"conversion_plan": [
|
24 |
-
{
|
25 |
-
"section_id": "unique_identifier",
|
26 |
-
"section_title": "Section Title",
|
27 |
-
"section_type": "abstract|introduction|methodology|results|discussion|conclusion|etc",
|
28 |
-
"content_markers": {
|
29 |
-
"start": "First 5-7 words of section...",
|
30 |
-
"end": "...last 5-7 words of section"
|
31 |
-
},
|
32 |
-
"tts_conversion_instructions": [
|
33 |
-
"Specific instruction for handling this section",
|
34 |
-
"Handle X citations in paragraph 2",
|
35 |
-
"Narrate Table Y findings",
|
36 |
-
"Simplify equation discussion in paragraph Z"
|
37 |
-
],
|
38 |
-
"special_elements": [
|
39 |
-
{
|
40 |
-
"element_type": "citation|table|figure|equation",
|
41 |
-
"location": "Paragraph number or descriptive location",
|
42 |
-
"handling_strategy": "How this element should be converted"
|
43 |
-
}
|
44 |
-
]
|
45 |
-
}
|
46 |
-
],
|
47 |
-
"global_conversion_guidelines": [
|
48 |
-
"General principle 1 for the entire document",
|
49 |
-
"General principle 2 for the entire document"
|
50 |
-
]
|
51 |
}
|
|
|
52 |
|
53 |
For each section, provide clear content_markers using the first and last few words to help locate the section boundaries.
|
54 |
|
55 |
For tts_conversion_instructions, be specific about:
|
56 |
-
|
57 |
-
How to handle
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
How to improve flow between paragraphs or concepts
|
64 |
|
65 |
PHASE 2 - EXECUTION (FUTURE TASK):
|
66 |
In the next step, these instructions will be used to transform the actual content into TTS-friendly text, following all the specific guidelines provided in Phase 1.
|
|
|
4 |
Analyze the provided academic paper and create a structured JSON plan for its conversion to TTS format. This plan will serve as instructions for the actual conversion in Phase 2.
|
5 |
|
6 |
Instructions for Phase 1:
|
|
|
7 |
Identify and map the paper's structure:
|
8 |
+
- Create a logical outline of all major sections and subsections
|
9 |
+
- Note where figures, tables, equations, and citations appear
|
10 |
+
- Identify sections to exclude (references, acknowledgments, etc.)
|
|
|
|
|
|
|
11 |
|
12 |
Output a JSON planning document with this structure:
|
13 |
+
```
|
14 |
{
|
15 |
+
"paper_metadata": {
|
16 |
+
"title": "Title of the paper",
|
17 |
+
"authors": ["Author 1 Name", "Author 2 Name", ...],
|
18 |
+
"publication_details": "e.g., Journal Name, Year (if available)"
|
19 |
+
},
|
20 |
+
"conversion_plan": [
|
21 |
+
{
|
22 |
+
"section_id": "unique_identifier",
|
23 |
+
"section_title": "Section Title",
|
24 |
+
"section_type": "abstract|introduction|methodology|results|discussion|conclusion|etc",
|
25 |
+
"content_markers": {
|
26 |
+
"start": "First 5-7 words of section...",
|
27 |
+
"end": "...last 5-7 words of section"
|
28 |
+
},
|
29 |
+
"tts_conversion_instructions": [
|
30 |
+
"Specific instruction for handling this section",
|
31 |
+
"Handle X citations in paragraph 2",
|
32 |
+
"Narrate Table Y findings",
|
33 |
+
"Simplify equation discussion in paragraph Z"
|
34 |
+
],
|
35 |
+
"special_elements": [
|
36 |
+
{
|
37 |
+
"element_type": "citation|table|figure|equation",
|
38 |
+
"location": "Paragraph number or descriptive location",
|
39 |
+
"handling_strategy": "How this element should be converted"
|
40 |
+
}
|
41 |
+
]
|
42 |
+
}
|
43 |
+
],
|
44 |
+
"global_conversion_guidelines": [
|
45 |
+
"General principle 1 for the entire document",
|
46 |
+
"General principle 2 for the entire document"
|
47 |
+
]
|
48 |
}
|
49 |
+
```
|
50 |
|
51 |
For each section, provide clear content_markers using the first and last few words to help locate the section boundaries.
|
52 |
|
53 |
For tts_conversion_instructions, be specific about:
|
54 |
+
- How to handle citations (e.g., "(Smith et al., 2019)" β "as Smith and colleagues found in 2019")
|
55 |
+
- How to handle figures and tables:
|
56 |
+
* For figures: Include a brief 1-2 sentence description using pattern "The paper presents Figure X, which shows [key visual element]." Then focus on the insights: "This figure illustrates that..."
|
57 |
+
* For simple tables: Summarize in a short paragraph using pattern "The paper now presents Table Y - a comparison of [key elements]. The main findings show that..."
|
58 |
+
* For complex tables (e.g., regression results): Use pattern "Table Z presents regression results that demonstrate [1-2 key conclusions]" without reading individual values
|
59 |
+
- For mathematical formulas: Note their presence without reading them verbatim: "The paper includes a mathematical expression for [concept]" then explain the conclusion or implication
|
60 |
+
- How to improve flow between paragraphs or concepts
|
|
|
61 |
|
62 |
PHASE 2 - EXECUTION (FUTURE TASK):
|
63 |
In the next step, these instructions will be used to transform the actual content into TTS-friendly text, following all the specific guidelines provided in Phase 1.
|
utils/__pycache__/review_flow.cpython-311.pyc
CHANGED
Binary files a/utils/__pycache__/review_flow.cpython-311.pyc and b/utils/__pycache__/review_flow.cpython-311.pyc differ
|
|