Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import time | |
import azure.cognitiveservices.speech as speechsdk | |
from pyht import Client | |
from pyht.client import TTSOptions | |
import requests | |
text = 'Today is Sunday, the weather is sunny. I am here to test the delay of various TTS services thoroughly' | |
def azure_tts(text): | |
speech_key = os.getenv('SPEECH_KEY') | |
speech_regoion = os.getenv('SPEECH_REGION') | |
if speech_key is None or speech_regoion is None: | |
print('Please set the environment variables SPEECH_KEY and SPEECH_REGION') | |
exit(1) | |
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_regoion) | |
speech_config.speech_synthesis_voice_name = 'en-US-JennyNeural' | |
speech_config.speech_synthesis_language = "en-US" | |
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3) | |
pull_stream = speechsdk.audio.PullAudioOutputStream() | |
stream_config = speechsdk.audio.AudioOutputConfig(stream=pull_stream) | |
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=stream_config) | |
speech_synthesizer.speak_text_async(text) | |
azure_latency = 0 | |
start = time.perf_counter() | |
audio_buffer = bytes(512) | |
filled_size = pull_stream.read(audio_buffer) | |
end = time.perf_counter() | |
azure_latency = end - start | |
return azure_latency | |
def coqui_tts(text): | |
voice_id = 'c791b5b5-0558-42b8-bb0b-602ac5efc0b9' | |
COQUI_API_TOKEN = os.getenv["COQUI_TOKEN"] | |
start = time.perf_counter() | |
res = requests.post( | |
"https://app.coqui.ai/api/v2/samples/xtts/stream", | |
json={ | |
"text": text, | |
"language": 'en', | |
"voice_id": voice_id}, | |
headers={"Authorization": f"Bearer {COQUI_API_TOKEN}"}, | |
stream=True, | |
) | |
if res.status_code != 201: | |
print(f"Endpoint failed with status code {res.status_code}:", | |
res.content.decode("utf-8")) | |
return 0 | |
first = True | |
for chunk in res.iter_content(chunk_size=512): | |
if first: | |
end = time.perf_counter() | |
coqui_latency = end-start | |
return coqui_latency | |
def elevenlab_tts(text): | |
voice_id = '21m00Tcm4TlvDq8ikWAM' | |
CHUNK_SIZE = 512 | |
url = f'https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream' | |
xi_api_key = os.getenv['ELEVENLAB_KEY'] | |
if xi_api_key is None: | |
print('Please set the environment variable ELEVENLAB_KEY') | |
exit(1) | |
headers = { | |
"Accept": "audio/mpeg", | |
"Content-Type": "application/json", | |
"xi-api-key": xi_api_key | |
} | |
data = { | |
"text": text, | |
"model_id": "eleven_multilingual_v2", | |
"voice_settings": { | |
"stability": 0.5, | |
"similarity_boost": 0.5 | |
} | |
} | |
start = time.perf_counter() | |
response = requests.post(url, json=data, headers=headers, stream=True) | |
first = True | |
for chunk in response.iter_content(chunk_size=CHUNK_SIZE): | |
if first: | |
first = False | |
end = time.perf_counter() | |
elevenlab_latency = end - start | |
return elevenlab_latency | |
def playht_tts(text): | |
userid = os.getenv("PLAY_HT_USER_ID") | |
api_key = os.getenv("PLAY_HT_API_KEY") | |
if userid is None or api_key is None: | |
print('Please set the environment variables PLAY_HT_USER_ID and PLAY_HT_API_KEY') | |
exit(1) | |
client = Client( | |
user_id=userid, | |
api_key=api_key) | |
options = TTSOptions(voice="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",speed=5.0) | |
first = True | |
start = time.perf_counter() | |
res = client.tts(text, options) | |
for chunk in res: | |
# do something with the audio chunk | |
if first: | |
first = False | |
end = time.perf_counter() | |
playht_latency = end - start | |
return playht_latency | |
title = """<h1 align="center">🔥TRTC 文档机器人🚀</h1>""" | |
def greet(input): | |
azure_latency = azure_tts(input) | |
coqui_latency = coqui_tts(input) | |
elevenlab_latency = elevenlab_tts(input) | |
playht_latency = playht_tts(input) | |
print(f'Elevenlab TTS Delay, Time to first chunk {elevenlab_latency}s \n Azure TTS Delay, Time to first chunk {azure_latency}s \n Coqui TTS Delay, Time to first chunk {coqui_latency}s \n Pyht TTS Delay, Time to first chunk {playht_latency}s') | |
return f'Elevenlab TTS Delay, Time to first chunk {elevenlab_latency}s \n Azure TTS Delay, Time to first chunk {azure_latency}s \n Coqui TTS Delay, Time to first chunk {coqui_latency}s \n Pyht TTS Delay, Time to first chunk {playht_latency}s' | |
with gr.Blocks(theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_sm, radius_size=gr.themes.sizes.radius_sm, text_size=gr.themes.sizes.text_sm)) as demo: | |
gr.HTML(title) | |
with gr.Row(): | |
txt = gr.Textbox(show_label=False, lines=1, | |
placeholder='input the text to run ') | |
outtxt = gr.Textbox(show_label=False, lines=4, | |
placeholder='the output text') | |
txt.submit(greet, [txt], [outtxt]) | |
submit = gr.Button(value="Submmit", variant="secondary").style( | |
full_width=False) | |
submit.click(greet, [txt], [outtxt]) | |
gr.Examples( | |
label="for example", | |
examples=[ | |
"Today is Sunday, the weather is sunny. I am here to test the delay of various TTS services thoroughly", | |
], | |
inputs=txt, | |
) | |
demo.launch() | |