Spaces:
Build error
Build error
File size: 3,073 Bytes
ea9036c 7d0bff2 201db01 7d0bff2 201db01 7d0bff2 201db01 7d0bff2 201db01 7d0bff2 ea9036c ebe9cbe ea9036c 7d0bff2 201db01 7d0bff2 ebe9cbe 201db01 7d0bff2 ebe9cbe 7d0bff2 ebe9cbe 7d0bff2 ea9036c ebe9cbe 201db01 ebe9cbe 7d0bff2 201db01 ea9036c 7d0bff2 ea9036c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
from subprocess import call
import gradio as gr
import os
from TTS.api import TTS
from TTS.utils.synthesizer import Synthesizer
# List available 🐸TTS models and choose the first one
# all_models = TTS.list_models()
# for model in all_models:
# print(model)
# print("Using model: ", all_models[0])
# model = all_models[0]
# Init TTS
MAX_TXT_LEN = 500
print("Downloading model...", '')
voiceCloneModel = TTS('tts_models/multilingual/multi-dataset/your_tts')
# tts = TTS("tts_models/de/thorsten/tacotron2-DDC")
def run_cmd(command):
try:
print(command)
call(command)
except KeyboardInterrupt:
print("Process interrupted")
sys.exit(1)
def inference(text, speaker):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(
f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
if (speaker == 'Speaker-1'):
speaker = 'input/amitabh.mp3'
elif (speaker == 'Speaker-2'):
speaker = 'input/amrish.mp3'
elif (speaker == 'Speaker-3'):
speaker = 'input/obama.mp3'
elif (speaker == 'Speaker-4'):
speaker = 'input/trump.wav'
elif (speaker == 'Rock'):
speaker = 'input/sample/Dwayne-Johnson-sample.wav'
elif (speaker == 'Elon'):
speaker = 'input/sample/Elon-Sample.wav'
elif (speaker == 'Obama'):
speaker = 'input/sample/Obama-sample.wav'
elif (speaker == 'Tony'):
speaker = 'input/sample/tony-stark-2.wav'
elif (speaker == 'Madara Uchiha'):
speaker = 'input/sample/Madara.mp3'
else:
speaker = 'input/z-default.wav'
# print("speaker: ", speaker)
# cmd = ['tts', '--text', text, '--out_path', 'output/tts_output.wav']
# run_cmd(cmd)
# Text to speech to a file
# tts = TTS(model="tts_models/multilingual/multi-dataset/your_tts",
# progress_bar=False, gpu=True)
voiceCloneModel.tts_to_file(text, speaker_wav=speaker,
language="en", file_path="output/output.wav")
# for i in range(len(model.languages)):
# model.tts_to_file(text=text,
# speaker=model.speakers[i], language=model.languages[0], file_path='output/output-'+str(i)+'.wav')
return 'output/output.wav'
sampleInput = ['Speaker-1', 'Speaker-2', 'Speaker-3',
'Speaker-4', 'Rock', 'Elon', 'Obama', 'Tony', 'Madara Uchiha']
inputs = [gr.Textbox(lines=5, label="Input Text"),
gr.Dropdown(
sampleInput, label="Sample Input", default="Speaker-1",)
]
outputs = gr.Audio(type="filepath", label="Output Audio")
title = "Text To Speech"
description = "An example of using TTS to generate speech from text."
article = ""
examples = [
["This is an open-source library that generates synthetic speech"]
]
gr.Interface(
inference,
inputs,
outputs,
verbose=True,
title=title,
description=description,
article=article,
examples=examples,
enable_queue=True,
allow_flagging="never",
).launch(debug=True)
|