Text-to-Meow / predict.py
Scrya's picture
add counter
8f3689f
raw
history blame
1.72 kB
import os
import json
import random
import librosa
import numpy as np
import gradio as gr
from typing import Any, List, Dict, Tuple
from utils import meow_stretch, get_word_lengths
from config import config, BaseConfig
COUNTER = 0
''' Gradio Input/Output Configurations '''
inputs: str = 'text'
outputs: gr.Audio = gr.Audio()
def load_meows(cfg: BaseConfig) -> List[Dict[str, Any]]:
meow_dir = os.path.dirname(cfg.manifest_path)
with open(cfg.manifest_path, mode='r') as fr:
lines = fr.readlines()
items = []
for line in lines:
item = json.loads(line)
item['audio'], item['rate'] = librosa.load(os.path.join(meow_dir, item['audio_filepath']), sr=None)
items.append(item)
return items
def extract_meows_weights(items: List[Dict[str, Any]]) -> Tuple[List[np.ndarray], List[float]]:
meows = [item['audio'] for item in items]
weights = [item['weight'] for item in items]
return meows, weights
''' Load meows '''
meow_items = load_meows(config)
meows, weights = extract_meows_weights(meow_items)
def predict(text: str) -> str:
COUNTER += 1
print(f'Number of calls:', COUNTER)
word_lengths = get_word_lengths(text)
selected_meows = random.choices(meows, weights=weights, k=len(word_lengths))
transformed_meows = [
meow_stretch(
meow, wl,
init_factor=config.init_factor,
add_factor=config.add_factor,
power_factor=config.power_factor
) for meow, wl in zip(selected_meows, word_lengths)
]
result_meows = np.concatenate(transformed_meows, axis=0)
return (config.sample_rate, result_meows)