leolxliu
add more code
4958e9a
import gradio as gr
import os
import time
import azure.cognitiveservices.speech as speechsdk
from pyht import Client
from pyht.client import TTSOptions
import requests
text = 'Today is Sunday, the weather is sunny. I am here to test the delay of various TTS services thoroughly'
def azure_tts(text):
speech_key = os.getenv('SPEECH_KEY')
speech_regoion = os.getenv('SPEECH_REGION')
if speech_key is None or speech_regoion is None:
print('Please set the environment variables SPEECH_KEY and SPEECH_REGION')
exit(1)
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_regoion)
speech_config.speech_synthesis_voice_name = 'en-US-JennyNeural'
speech_config.speech_synthesis_language = "en-US"
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
pull_stream = speechsdk.audio.PullAudioOutputStream()
stream_config = speechsdk.audio.AudioOutputConfig(stream=pull_stream)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=stream_config)
speech_synthesizer.speak_text_async(text)
azure_latency = 0
start = time.perf_counter()
audio_buffer = bytes(512)
filled_size = pull_stream.read(audio_buffer)
end = time.perf_counter()
azure_latency = end - start
return azure_latency
def coqui_tts(text):
voice_id = 'c791b5b5-0558-42b8-bb0b-602ac5efc0b9'
COQUI_API_TOKEN = os.getenv["COQUI_TOKEN"]
start = time.perf_counter()
res = requests.post(
"https://app.coqui.ai/api/v2/samples/xtts/stream",
json={
"text": text,
"language": 'en',
"voice_id": voice_id},
headers={"Authorization": f"Bearer {COQUI_API_TOKEN}"},
stream=True,
)
if res.status_code != 201:
print(f"Endpoint failed with status code {res.status_code}:",
res.content.decode("utf-8"))
return 0
first = True
for chunk in res.iter_content(chunk_size=512):
if first:
end = time.perf_counter()
coqui_latency = end-start
return coqui_latency
def elevenlab_tts(text):
voice_id = '21m00Tcm4TlvDq8ikWAM'
CHUNK_SIZE = 512
url = f'https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream'
xi_api_key = os.getenv['ELEVENLAB_KEY']
if xi_api_key is None:
print('Please set the environment variable ELEVENLAB_KEY')
exit(1)
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": xi_api_key
}
data = {
"text": text,
"model_id": "eleven_multilingual_v2",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.5
}
}
start = time.perf_counter()
response = requests.post(url, json=data, headers=headers, stream=True)
first = True
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
if first:
first = False
end = time.perf_counter()
elevenlab_latency = end - start
return elevenlab_latency
def playht_tts(text):
userid = os.getenv("PLAY_HT_USER_ID")
api_key = os.getenv("PLAY_HT_API_KEY")
if userid is None or api_key is None:
print('Please set the environment variables PLAY_HT_USER_ID and PLAY_HT_API_KEY')
exit(1)
client = Client(
user_id=userid,
api_key=api_key)
options = TTSOptions(voice="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",speed=5.0)
first = True
start = time.perf_counter()
res = client.tts(text, options)
for chunk in res:
# do something with the audio chunk
if first:
first = False
end = time.perf_counter()
playht_latency = end - start
return playht_latency
title = """<h1 align="center">🔥TRTC 文档机器人🚀</h1>"""
def greet(input):
azure_latency = azure_tts(input)
coqui_latency = coqui_tts(input)
elevenlab_latency = elevenlab_tts(input)
playht_latency = playht_tts(input)
print(f'Elevenlab TTS Delay, Time to first chunk {elevenlab_latency}s \n Azure TTS Delay, Time to first chunk {azure_latency}s \n Coqui TTS Delay, Time to first chunk {coqui_latency}s \n Pyht TTS Delay, Time to first chunk {playht_latency}s')
return f'Elevenlab TTS Delay, Time to first chunk {elevenlab_latency}s \n Azure TTS Delay, Time to first chunk {azure_latency}s \n Coqui TTS Delay, Time to first chunk {coqui_latency}s \n Pyht TTS Delay, Time to first chunk {playht_latency}s'
with gr.Blocks(theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_sm, radius_size=gr.themes.sizes.radius_sm, text_size=gr.themes.sizes.text_sm)) as demo:
gr.HTML(title)
with gr.Row():
txt = gr.Textbox(show_label=False, lines=1,
placeholder='input the text to run ')
outtxt = gr.Textbox(show_label=False, lines=4,
placeholder='the output text')
txt.submit(greet, [txt], [outtxt])
submit = gr.Button(value="Submmit", variant="secondary").style(
full_width=False)
submit.click(greet, [txt], [outtxt])
gr.Examples(
label="for example",
examples=[
"Today is Sunday, the weather is sunny. I am here to test the delay of various TTS services thoroughly",
],
inputs=txt,
)
demo.launch()