from kokoro import KPipeline import soundfile as sf import numpy as np import logging pipeline = KPipeline(lang_code="a") try: pipeline = pipeline.to("cuda") except: logging.warning("CUDA not available, using CPU") def generate_audio( text, voice="af_heart", speed=1, save_segments=False, progress=None, ): """ Generate audio from text using Kokoro TTS pipeline Args: text (str): Text to convert to speech lang_code (str): Language code for the TTS model voice (str): Voice ID to use speed (float): Speech speed multiplier save_segments (bool): Whether to save individual audio segments Returns: numpy.ndarray: Combined audio data at 24kHz sample rate """ generator = pipeline(text, voice=voice, speed=speed, split_pattern=r"\.") all_audio = [] segments = list(generator) # Get total number of segments for i, (gs, ps, audio) in enumerate( progress.tqdm(segments, desc="Generating audio") ): logging.info("Processing segment") logging.info(f"Graphemes: {gs}") logging.info(f"Phonemes: {ps}") all_audio.append(audio) if save_segments: sf.write(f"segment_{i}.wav", audio, 24000) # Concatenate all audio segments combined_audio = np.concatenate(all_audio) return combined_audio