Spaces:
Running
on
Zero
Running
on
Zero
from kokoro import KPipeline | |
import soundfile as sf | |
import numpy as np | |
import logging | |
def generate_audio( | |
text, | |
lang_code="a", | |
voice="af_heart", | |
speed=1, | |
save_segments=False, | |
progress=None, | |
): | |
""" | |
Generate audio from text using Kokoro TTS pipeline | |
Args: | |
text (str): Text to convert to speech | |
lang_code (str): Language code for the TTS model | |
voice (str): Voice ID to use | |
speed (float): Speech speed multiplier | |
save_segments (bool): Whether to save individual audio segments | |
Returns: | |
numpy.ndarray: Combined audio data at 24kHz sample rate | |
""" | |
pipeline = KPipeline(lang_code=lang_code) | |
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r"\.") | |
all_audio = [] | |
segments = list(generator) # Get total number of segments | |
for i, (gs, ps, audio) in enumerate( | |
progress.tqdm(segments, desc="Generating audio") | |
): | |
logging.info("Processing segment") | |
logging.info(f"Graphemes: {gs}") | |
logging.info(f"Phonemes: {ps}") | |
all_audio.append(audio) | |
if save_segments: | |
sf.write(f"segment_{i}.wav", audio, 24000) | |
# Concatenate all audio segments | |
combined_audio = np.concatenate(all_audio) | |
return combined_audio | |
if __name__ == "__main__": | |
# Example usage | |
sample_text = "Hello world" | |
audio_data = generate_audio(sample_text) | |
sf.write("out.wav", audio_data, 24000) | |