Spaces:

leeoxiang
/

tts-streaming-latency

Runtime error

leolxliu

add more code

4958e9a over 1 year ago

5.65 kB

	import gradio as gr


	import os
	import time
	import azure.cognitiveservices.speech as speechsdk
	from pyht import Client
	from pyht.client import TTSOptions
	import requests

	text = 'Today is Sunday, the weather is sunny. I am here to test the delay of various TTS services thoroughly'


	def azure_tts(text):

	speech_key = os.getenv('SPEECH_KEY')
	speech_regoion = os.getenv('SPEECH_REGION')
	if speech_key is None or speech_regoion is None:
	print('Please set the environment variables SPEECH_KEY and SPEECH_REGION')
	exit(1)


	speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_regoion)
	speech_config.speech_synthesis_voice_name = 'en-US-JennyNeural'
	speech_config.speech_synthesis_language = "en-US"

	speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
	pull_stream = speechsdk.audio.PullAudioOutputStream()
	stream_config = speechsdk.audio.AudioOutputConfig(stream=pull_stream)
	speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=stream_config)

	speech_synthesizer.speak_text_async(text)

	azure_latency = 0
	start = time.perf_counter()

	audio_buffer = bytes(512)
	filled_size = pull_stream.read(audio_buffer)

	end = time.perf_counter()
	azure_latency = end - start
	return azure_latency





	def coqui_tts(text):

	voice_id = 'c791b5b5-0558-42b8-bb0b-602ac5efc0b9'

	COQUI_API_TOKEN = os.getenv["COQUI_TOKEN"]

	start = time.perf_counter()
	res = requests.post(
	"https://app.coqui.ai/api/v2/samples/xtts/stream",
	json={
	"text": text,
	"language": 'en',
	"voice_id": voice_id},
	headers={"Authorization": f"Bearer {COQUI_API_TOKEN}"},
	stream=True,
	)

	if res.status_code != 201:
	print(f"Endpoint failed with status code {res.status_code}:",
	res.content.decode("utf-8"))
	return 0

	first = True
	for chunk in res.iter_content(chunk_size=512):
	if first:
	end = time.perf_counter()
	coqui_latency = end-start
	return coqui_latency




	def elevenlab_tts(text):
	voice_id = '21m00Tcm4TlvDq8ikWAM'
	CHUNK_SIZE = 512
	url = f'https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream'

	xi_api_key = os.getenv['ELEVENLAB_KEY']
	if xi_api_key is None:
	print('Please set the environment variable ELEVENLAB_KEY')
	exit(1)

	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": xi_api_key
	}

	data = {
	"text": text,
	"model_id": "eleven_multilingual_v2",
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.5
	}
	}

	start = time.perf_counter()
	response = requests.post(url, json=data, headers=headers, stream=True)


	first = True
	for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
	if first:
	first = False
	end = time.perf_counter()
	elevenlab_latency = end - start
	return elevenlab_latency



	def playht_tts(text):
	userid = os.getenv("PLAY_HT_USER_ID")
	api_key = os.getenv("PLAY_HT_API_KEY")

	if userid is None or api_key is None:
	print('Please set the environment variables PLAY_HT_USER_ID and PLAY_HT_API_KEY')
	exit(1)

	client = Client(
	user_id=userid,
	api_key=api_key)


	options = TTSOptions(voice="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",speed=5.0)
	first = True
	start = time.perf_counter()

	res = client.tts(text, options)
	for chunk in res:
	# do something with the audio chunk
	if first:
	first = False
	end = time.perf_counter()
	playht_latency = end - start
	return playht_latency



	title = """<h1 align="center">🔥TRTC 文档机器人🚀</h1>"""

	def greet(input):

	azure_latency = azure_tts(input)

	coqui_latency = coqui_tts(input)

	elevenlab_latency = elevenlab_tts(input)

	playht_latency = playht_tts(input)

	print(f'Elevenlab TTS Delay, Time to first chunk {elevenlab_latency}s \n Azure TTS Delay, Time to first chunk {azure_latency}s \n Coqui TTS Delay, Time to first chunk {coqui_latency}s \n Pyht TTS Delay, Time to first chunk {playht_latency}s')

	return f'Elevenlab TTS Delay, Time to first chunk {elevenlab_latency}s \n Azure TTS Delay, Time to first chunk {azure_latency}s \n Coqui TTS Delay, Time to first chunk {coqui_latency}s \n Pyht TTS Delay, Time to first chunk {playht_latency}s'


	with gr.Blocks(theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_sm, radius_size=gr.themes.sizes.radius_sm, text_size=gr.themes.sizes.text_sm)) as demo:

	gr.HTML(title)

	with gr.Row():
	txt = gr.Textbox(show_label=False, lines=1,
	placeholder='input the text to run ')
	outtxt = gr.Textbox(show_label=False, lines=4,
	placeholder='the output text')

	txt.submit(greet, [txt], [outtxt])
	submit = gr.Button(value="Submmit", variant="secondary").style(
	full_width=False)
	submit.click(greet, [txt], [outtxt])

	gr.Examples(
	label="for example",
	examples=[
	"Today is Sunday, the weather is sunny. I am here to test the delay of various TTS services thoroughly",
	],
	inputs=txt,
	)

	demo.launch()