hivecorp commited on
Commit
a66f077
ยท
verified ยท
1 Parent(s): 366652b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -11
app.py CHANGED
@@ -4,11 +4,13 @@ import asyncio
4
  from fastapi import FastAPI
5
  import edge_tts
6
  from fastapi.responses import FileResponse
 
7
 
8
  app = FastAPI()
9
 
 
10
  def split_text(text, max_chunk_size=500):
11
- """Split text into smaller chunks."""
12
  sentences = text.replace('เฅค', '.').replace('ุŸ', '?').split('.')
13
  chunks = []
14
  current_chunk = []
@@ -31,41 +33,64 @@ def split_text(text, max_chunk_size=500):
31
 
32
  return chunks
33
 
 
34
  async def process_chunk(text, voice, temp_dir, chunk_index):
35
- """Process a single chunk of text."""
36
  tmp_path = os.path.join(temp_dir, f"chunk_{chunk_index}.mp3")
37
- print(f"๐ŸŽค Processing chunk {chunk_index}: {text[:50]}...") # Logging
38
  communicate = edge_tts.Communicate(text, voice)
39
  await communicate.save(tmp_path)
40
  return tmp_path
41
 
 
42
  async def combine_audio_files(chunk_files, output_path):
43
- """Combine multiple MP3 files into one."""
44
- from pydub import AudioSegment
45
-
46
  combined = AudioSegment.empty()
 
47
  for file in chunk_files:
48
- print(f"๐Ÿ”น Adding {file} to final output") # Logging
49
  combined += AudioSegment.from_mp3(file)
50
 
51
  combined.export(output_path, format="mp3")
52
 
 
53
  for file in chunk_files:
54
  os.remove(file)
55
 
 
 
 
 
 
56
  @app.get("/tts")
57
  async def tts(text: str, voice: str = "en-US-AriaNeural"):
58
- """Main API function to process TTS."""
59
  temp_dir = "temp_audio"
60
  os.makedirs(temp_dir, exist_ok=True)
 
61
  chunks = split_text(text)
62
-
 
63
  if len(chunks) == 1:
64
- return await FileResponse(await process_chunk(text, voice, temp_dir, 0), media_type="audio/mpeg", filename="speech.mp3")
 
 
 
 
65
 
66
- chunk_files = await asyncio.gather(*[process_chunk(ch, voice, temp_dir, i) for i, ch in enumerate(chunks)])
 
 
 
67
 
 
68
  output_file = "final_output.mp3"
69
  await combine_audio_files(chunk_files, output_file)
70
 
 
71
  return FileResponse(output_file, media_type="audio/mpeg", filename="speech.mp3")
 
 
 
 
 
 
4
  from fastapi import FastAPI
5
  import edge_tts
6
  from fastapi.responses import FileResponse
7
+ from pydub import AudioSegment
8
 
9
  app = FastAPI()
10
 
11
+ # ๐Ÿ”น Function to split text into manageable chunks
12
  def split_text(text, max_chunk_size=500):
13
+ """Split text into smaller chunks at sentence boundaries."""
14
  sentences = text.replace('เฅค', '.').replace('ุŸ', '?').split('.')
15
  chunks = []
16
  current_chunk = []
 
33
 
34
  return chunks
35
 
36
+ # ๐Ÿ”น Function to process a single chunk asynchronously
37
  async def process_chunk(text, voice, temp_dir, chunk_index):
38
+ """Generate speech for a single chunk and save as MP3."""
39
  tmp_path = os.path.join(temp_dir, f"chunk_{chunk_index}.mp3")
40
+ print(f"๐ŸŽค Processing chunk {chunk_index}: {text[:50]}...") # Logging for debugging
41
  communicate = edge_tts.Communicate(text, voice)
42
  await communicate.save(tmp_path)
43
  return tmp_path
44
 
45
+ # ๐Ÿ”น Function to merge all chunked MP3 files into a single audio file
46
  async def combine_audio_files(chunk_files, output_path):
47
+ """Combine multiple MP3 files into one final MP3."""
 
 
48
  combined = AudioSegment.empty()
49
+
50
  for file in chunk_files:
51
+ print(f"๐Ÿ”น Adding {file} to final output") # Logging for debugging
52
  combined += AudioSegment.from_mp3(file)
53
 
54
  combined.export(output_path, format="mp3")
55
 
56
+ # Remove temporary files
57
  for file in chunk_files:
58
  os.remove(file)
59
 
60
+ @app.get("/")
61
+ def home():
62
+ return {"message": "โœ… EdgeTTS FastAPI is running!"}
63
+
64
+ # ๐Ÿ”น Main TTS API
65
  @app.get("/tts")
66
  async def tts(text: str, voice: str = "en-US-AriaNeural"):
67
+ """Main API function to handle text-to-speech conversion."""
68
  temp_dir = "temp_audio"
69
  os.makedirs(temp_dir, exist_ok=True)
70
+
71
  chunks = split_text(text)
72
+
73
+ # If text is short, process directly
74
  if len(chunks) == 1:
75
+ print("๐Ÿ“ข Processing without chunking...")
76
+ output_file = await process_chunk(text, voice, temp_dir, 0)
77
+ return FileResponse(output_file, media_type="audio/mpeg", filename="speech.mp3")
78
+
79
+ print(f"๐Ÿš€ Splitting into {len(chunks)} chunks and processing concurrently...")
80
 
81
+ # ๐Ÿ”น Concurrently process all chunks
82
+ chunk_files = await asyncio.gather(*[
83
+ process_chunk(ch, voice, temp_dir, i) for i, ch in enumerate(chunks)
84
+ ])
85
 
86
+ # ๐Ÿ”น Merge all MP3 files
87
  output_file = "final_output.mp3"
88
  await combine_audio_files(chunk_files, output_file)
89
 
90
+ print("โœ… TTS Generation Complete. Sending response...")
91
  return FileResponse(output_file, media_type="audio/mpeg", filename="speech.mp3")
92
+
93
+ # ๐Ÿ”น Ensure app starts in Hugging Face Spaces
94
+ if __name__ == "__main__":
95
+ import uvicorn
96
+ uvicorn.run(app, host="0.0.0.0", port=7860)