import gradio as gr from gtts import gTTS from pydub import AudioSegment from io import BytesIO # Step 1: Define a function to generate and merge TTS audio for multiple languages def multilingual_tts(korean_text, british_english_text, american_english_text): # Language mapping texts = { "ko": korean_text, "en-gb": british_english_text, # British English "en-us": american_english_text, # American English } combined_audio = AudioSegment.silent(duration=0) # Empty audio to start for lang, text in texts.items(): if text.strip(): # Process only if text is provided tld = 'co.uk' if lang == "en-gb" else 'com' tts = gTTS(text, lang="en" if lang.startswith("en") else lang, tld=tld) audio_file = BytesIO() tts.write_to_fp(audio_file) audio_file.seek(0) tts_audio = AudioSegment.from_file(audio_file, format="mp3") combined_audio += tts_audio + AudioSegment.silent(duration=500) # Add silence between languages # Save combined audio to a file output_file = "combined_output.mp3" combined_audio.export(output_file, format="mp3") return output_file # Step 2: Create Gradio interface with gr.Blocks() as demo: gr.Markdown("## Multilingual TTS: Generate a Single Audio File") with gr.Row(): korean_input = gr.Textbox(label="Enter Korean Text:", placeholder="안녕하세요") british_english_input = gr.Textbox(label="Enter British English Text:", placeholder="Hello (British)") american_english_input = gr.Textbox(label="Enter American English Text:", placeholder="Hello (American)") output_audio = gr.Audio(label="Generated Speech", type="filepath") generate_button = gr.Button("Generate Speech") generate_button.click( multilingual_tts, inputs=[korean_input, british_english_input, american_english_input], outputs=output_audio ) # Run the app if __name__ == "__main__": demo.launch()