Spaces:
Runtime error
Runtime error
import os | |
import subprocess | |
import librosa | |
import numpy as np | |
from data_gen.tts.wav_processors.base_processor import BaseWavProcessor, register_wav_processors | |
from data_gen.tts.data_gen_utils import trim_long_silences | |
from utils.audio import save_wav | |
from utils.rnnoise import rnnoise | |
from utils.hparams import hparams | |
class ConvertToWavProcessor(BaseWavProcessor): | |
def name(self): | |
return 'ToWav' | |
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args): | |
if input_fn[-4:] == '.wav': | |
return input_fn, sr | |
else: | |
output_fn = self.output_fn(input_fn) | |
subprocess.check_call(f'sox -v 0.95 "{input_fn}" -t wav "{output_fn}"', shell=True) | |
return output_fn, sr | |
class ResampleProcessor(BaseWavProcessor): | |
def name(self): | |
return 'Resample' | |
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args): | |
output_fn = self.output_fn(input_fn) | |
sr_file = librosa.core.get_samplerate(input_fn) | |
if sr != sr_file: | |
subprocess.check_call(f'sox -v 0.95 "{input_fn}" -r{sr} "{output_fn}"', shell=True) | |
y, _ = librosa.core.load(input_fn, sr=sr) | |
y, _ = librosa.effects.trim(y) | |
save_wav(y, output_fn, sr) | |
return output_fn, sr | |
else: | |
return input_fn, sr | |
class TrimSILProcessor(BaseWavProcessor): | |
def name(self): | |
return 'TrimSIL' | |
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args): | |
output_fn = self.output_fn(input_fn) | |
y, _ = librosa.core.load(input_fn, sr=sr) | |
y, _ = librosa.effects.trim(y) | |
save_wav(y, output_fn, sr) | |
return output_fn | |
class TrimAllSILProcessor(BaseWavProcessor): | |
def name(self): | |
return 'TrimSIL' | |
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args): | |
output_fn = self.output_fn(input_fn) | |
y, audio_mask, _ = trim_long_silences( | |
input_fn, vad_max_silence_length=preprocess_args.get('vad_max_silence_length', 12)) | |
save_wav(y, output_fn, sr) | |
if preprocess_args['save_sil_mask']: | |
os.makedirs(f'{processed_dir}/sil_mask', exist_ok=True) | |
np.save(f'{processed_dir}/sil_mask/{item_name}.npy', audio_mask) | |
return output_fn, sr | |
class DenoiseProcessor(BaseWavProcessor): | |
def name(self): | |
return 'Denoise' | |
def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, preprocess_args): | |
output_fn = self.output_fn(input_fn) | |
rnnoise(input_fn, output_fn, out_sample_rate=sr) | |
return output_fn, sr | |