from TTS.api import TTS from bs4 import BeautifulSoup import requests import streamlit as st import tempfile import os import json import datetime with open('config.json', 'r') as f: config = json.load(f) APP_NAME = config['APP_NAME'] APP_LOGO = config['APP_LOGO'] APP_DESCRIPTION = config['APP_DESCRIPTION'] LANGUAGES_URL = config['LANGUAGES_URL'] def contains_only_ascii(input_string): return all(ord(char) < 128 for char in input_string) def get_iso_languages(): response = requests.get(LANGUAGES_URL) soup = BeautifulSoup(response.text, 'html.parser') p_tags = soup.find_all('p') iso_language_dict = {} for p_tag in p_tags[1:]: # Skipping the first
which contains the header parts = p_tag.get_text().split() if len(parts) == 2: iso_code, language_name = parts if contains_only_ascii(language_name): iso_language_dict[language_name] = iso_code return iso_language_dict def create_temp_file(input_wav): temp_file = tempfile.NamedTemporaryFile(delete=False) temp_file.write(input_wav.read()) return temp_file def remove_temp_file(temp_file): temp_file.close() os.remove(temp_file.name) def update_progress(percent, text): progress_bar.progress(percent) status_text.text(text) iso_languages = get_iso_languages() languages = list(iso_languages.keys()) st.set_page_config(page_title=APP_NAME) st.title(APP_NAME) st.image(APP_LOGO, use_column_width=True) st.markdown(APP_DESCRIPTION) language = st.selectbox('Select a language', languages) prompt = st.text_input('Enter your prompt') input_wav = st.file_uploader("Upload a WAV file", type=["wav"]) if input_wav: if not input_wav or input_wav is None: st.error('Please upload wav input audio') elif not prompt: st.error('Please write prompt') else: progress_bar = st.progress(0) status_text = st.empty() current_datetime = datetime.datetime.now() formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H%M%S") output_filename = f"recording_{formatted_datetime}.wav" temp_file = create_temp_file(input_wav) iso_code = iso_languages[language] print(f'Language: {language}, prompt: {prompt}') update_progress(0, 'Loading TTS model...') api = TTS(f"tts_models/{iso_code}/fairseq/vits") update_progress(50, 'Generating audio...') api.tts_with_vc_to_file( prompt, speaker_wav=temp_file.name, file_path=output_filename ) remove_temp_file(temp_file) audio_file = open(output_filename, 'rb') audio_bytes = audio_file.read() update_progress(100, 'Audio generated successfully!') st.audio(audio_bytes, format='audio/wav') st.download_button('Download WAV', data=audio_bytes, file_name='output.wav')