Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from pathlib import Path | |
from typing import List | |
import nls | |
class CosyVoiceSynthesizer: | |
def __init__(self) -> None: | |
self.app_key = os.environ.get('ALIYUN_APP_KEY') | |
def call(self, save_file, transcript, voice="longyuan", sample_rate=16000): | |
writer = open(save_file, "wb") | |
return_data = b'' | |
def write_data(data, *args): | |
nonlocal return_data | |
return_data += data | |
if writer is not None: | |
writer.write(data) | |
def raise_error(error, *args): | |
raise RuntimeError( | |
f'Synthesizing speech failed with error: {error}') | |
def close_file(*args): | |
if writer is not None: | |
writer.close() | |
response = requests.get("https://alice-open.oss-cn-zhangjiakou.aliyuncs.com/nls_token.txt") | |
token = response.text.strip() | |
sdk = nls.NlsStreamInputTtsSynthesizer( | |
url='wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1', | |
token=token, | |
appkey=self.app_key, | |
on_data=write_data, | |
on_error=raise_error, | |
on_close=close_file, | |
) | |
sdk.startStreamInputTts(voice=voice, sample_rate=sample_rate, aformat='wav') | |
sdk.sendStreamInputTts(transcript,) | |
sdk.stopStreamInputTts() | |
class CosyVoiceAgent: | |
def __init__(self, config) -> None: | |
self.config = config | |
def call(self, pages: List, device: str, save_path: str): | |
save_path = Path(save_path) | |
generation_agent = CosyVoiceSynthesizer() | |
for idx, page in enumerate(pages): | |
generation_agent.call( | |
save_file=save_path / f"p{idx + 1}.wav", | |
transcript=page, | |
**self.config["call_cfg"] | |
) | |
return { | |
"modality": "speech" | |
} | |