File size: 1,719 Bytes
17c5099
 
 
 
 
 
 
 
 
 
 
8f3689f
 
17c5099
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f3689f
 
 
17c5099
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import json
import random
import librosa
import numpy as np
import gradio as gr
from typing import Any, List, Dict, Tuple

from utils import meow_stretch, get_word_lengths
from config import config, BaseConfig

COUNTER = 0

''' Gradio Input/Output Configurations '''
inputs: str = 'text'
outputs: gr.Audio = gr.Audio()

def load_meows(cfg: BaseConfig) -> List[Dict[str, Any]]:

    meow_dir = os.path.dirname(cfg.manifest_path)

    with open(cfg.manifest_path, mode='r') as fr:
        lines = fr.readlines()

    items = []
    for line in lines:
        item = json.loads(line)
        item['audio'], item['rate'] = librosa.load(os.path.join(meow_dir, item['audio_filepath']), sr=None)
        items.append(item)

    return items

def extract_meows_weights(items: List[Dict[str, Any]]) -> Tuple[List[np.ndarray], List[float]]:
    meows = [item['audio'] for item in items]
    weights = [item['weight'] for item in items]
    return meows, weights

''' Load meows '''
meow_items = load_meows(config)
meows, weights = extract_meows_weights(meow_items)

def predict(text: str) -> str:

    COUNTER += 1
    print(f'Number of calls:', COUNTER)

    word_lengths = get_word_lengths(text)
    selected_meows = random.choices(meows, weights=weights, k=len(word_lengths))
    transformed_meows = [
        meow_stretch(
            meow, wl,
            init_factor=config.init_factor,
            add_factor=config.add_factor,
            power_factor=config.power_factor
        ) for meow, wl in zip(selected_meows, word_lengths)
    ]

    result_meows = np.concatenate(transformed_meows, axis=0)

    return (config.sample_rate, result_meows)