Spaces:
Runtime error
Runtime error
first commit
Browse files- .gitignore +3 -0
- app.py +24 -0
- config.py +17 -0
- meows/data/meow1.wav +0 -0
- meows/data/meow10.wav +0 -0
- meows/data/meow2.wav +0 -0
- meows/data/meow3.wav +0 -0
- meows/data/meow4.wav +0 -0
- meows/data/meow5.wav +0 -0
- meows/data/meow6.wav +0 -0
- meows/data/meow7.wav +0 -0
- meows/data/meow8.wav +0 -0
- meows/data/meow9.wav +0 -0
- meows/manifest.json +10 -0
- predict.py +57 -0
- requirements.txt +3 -0
- utils.py +57 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.ipynb_checkpoints/
|
2 |
+
flagged/
|
3 |
+
__pycache__/
|
app.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
from config import BaseConfig
|
5 |
+
from predict import inputs, outputs, predict
|
6 |
+
|
7 |
+
if __name__ == "__main__":
|
8 |
+
logging.basicConfig(format="[%(asctime)s] %(levelname)s: %(message)s")
|
9 |
+
config = BaseConfig()
|
10 |
+
|
11 |
+
app = gr.Interface(
|
12 |
+
predict,
|
13 |
+
inputs=inputs,
|
14 |
+
outputs=outputs,
|
15 |
+
title="Text-to-Meow",
|
16 |
+
description="Ever thought of whether your cat understands your words? It no longer matters! Now you get to speak in their language!",
|
17 |
+
)
|
18 |
+
|
19 |
+
app.launch(
|
20 |
+
server_name="0.0.0.0",
|
21 |
+
server_port=config.port,
|
22 |
+
enable_queue=True,
|
23 |
+
share=True
|
24 |
+
)
|
config.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseSettings, Field
|
2 |
+
|
3 |
+
class BaseConfig(BaseSettings):
|
4 |
+
"""Define any config here.
|
5 |
+
See here for documentation:
|
6 |
+
https://pydantic-docs.helpmanual.io/usage/settings/
|
7 |
+
"""
|
8 |
+
# KNative assigns a $PORT environment variable to the container
|
9 |
+
port: int = Field(default=8080, env="PORT",description="Gradio App Server Port")
|
10 |
+
|
11 |
+
manifest_path: str = 'meows/manifest.json'
|
12 |
+
sample_rate: int = 16000
|
13 |
+
init_factor: float = 0.3
|
14 |
+
add_factor: float = 0.2
|
15 |
+
power_factor: float = 0.8
|
16 |
+
|
17 |
+
config = BaseConfig()
|
meows/data/meow1.wav
ADDED
Binary file (43.6 kB). View file
|
|
meows/data/meow10.wav
ADDED
Binary file (21.4 kB). View file
|
|
meows/data/meow2.wav
ADDED
Binary file (54.5 kB). View file
|
|
meows/data/meow3.wav
ADDED
Binary file (36.4 kB). View file
|
|
meows/data/meow4.wav
ADDED
Binary file (46.4 kB). View file
|
|
meows/data/meow5.wav
ADDED
Binary file (32.3 kB). View file
|
|
meows/data/meow6.wav
ADDED
Binary file (47.5 kB). View file
|
|
meows/data/meow7.wav
ADDED
Binary file (27.7 kB). View file
|
|
meows/data/meow8.wav
ADDED
Binary file (51.6 kB). View file
|
|
meows/data/meow9.wav
ADDED
Binary file (18.1 kB). View file
|
|
meows/manifest.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"audio_filepath": "data/meow1.wav", "text": "meow", "weight": 1.0}
|
2 |
+
{"audio_filepath": "data/meow2.wav", "text": "meow", "weight": 1.0}
|
3 |
+
{"audio_filepath": "data/meow3.wav", "text": "meow", "weight": 1.0}
|
4 |
+
{"audio_filepath": "data/meow4.wav", "text": "meow", "weight": 1.0}
|
5 |
+
{"audio_filepath": "data/meow5.wav", "text": "meow", "weight": 1.0}
|
6 |
+
{"audio_filepath": "data/meow6.wav", "text": "meow", "weight": 1.0}
|
7 |
+
{"audio_filepath": "data/meow7.wav", "text": "meow", "weight": 1.0}
|
8 |
+
{"audio_filepath": "data/meow8.wav", "text": "meow", "weight": 1.0}
|
9 |
+
{"audio_filepath": "data/meow9.wav", "text": "meow", "weight": 1.0}
|
10 |
+
{"audio_filepath": "data/meow10.wav", "text": "meow with bold", "weight": 0.01}
|
predict.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import random
|
4 |
+
import librosa
|
5 |
+
import numpy as np
|
6 |
+
import gradio as gr
|
7 |
+
from typing import Any, List, Dict, Tuple
|
8 |
+
|
9 |
+
from utils import meow_stretch, get_word_lengths
|
10 |
+
from config import config, BaseConfig
|
11 |
+
|
12 |
+
''' Gradio Input/Output Configurations '''
|
13 |
+
inputs: str = 'text'
|
14 |
+
outputs: gr.Audio = gr.Audio()
|
15 |
+
|
16 |
+
def load_meows(cfg: BaseConfig) -> List[Dict[str, Any]]:
|
17 |
+
|
18 |
+
meow_dir = os.path.dirname(cfg.manifest_path)
|
19 |
+
|
20 |
+
with open(cfg.manifest_path, mode='r') as fr:
|
21 |
+
lines = fr.readlines()
|
22 |
+
|
23 |
+
items = []
|
24 |
+
for line in lines:
|
25 |
+
item = json.loads(line)
|
26 |
+
item['audio'], item['rate'] = librosa.load(os.path.join(meow_dir, item['audio_filepath']), sr=None)
|
27 |
+
items.append(item)
|
28 |
+
|
29 |
+
return items
|
30 |
+
|
31 |
+
def extract_meows_weights(items: List[Dict[str, Any]]) -> Tuple[List[np.ndarray], List[float]]:
|
32 |
+
meows = [item['audio'] for item in items]
|
33 |
+
weights = [item['weight'] for item in items]
|
34 |
+
return meows, weights
|
35 |
+
|
36 |
+
''' Load meows '''
|
37 |
+
meow_items = load_meows(config)
|
38 |
+
meows, weights = extract_meows_weights(meow_items)
|
39 |
+
|
40 |
+
def predict(text: str) -> str:
|
41 |
+
|
42 |
+
word_lengths = get_word_lengths(text)
|
43 |
+
selected_meows = random.choices(meows, weights=weights, k=len(word_lengths))
|
44 |
+
transformed_meows = [
|
45 |
+
meow_stretch(
|
46 |
+
meow, wl,
|
47 |
+
init_factor=config.init_factor,
|
48 |
+
add_factor=config.add_factor,
|
49 |
+
power_factor=config.power_factor
|
50 |
+
) for meow, wl in zip(selected_meows, word_lengths)
|
51 |
+
]
|
52 |
+
|
53 |
+
result_meows = np.concatenate(transformed_meows, axis=0)
|
54 |
+
|
55 |
+
return (config.sample_rate, result_meows)
|
56 |
+
|
57 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.24.1
|
2 |
+
librosa==0.10.0.post2
|
3 |
+
numpy==1.23.5
|
utils.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import string
|
2 |
+
import librosa
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from typing import List
|
6 |
+
|
7 |
+
def stretch(x: np.ndarray, factor: float, nfft: int = 2048) -> np.ndarray:
|
8 |
+
'''
|
9 |
+
@author: Gagandeep Singh, 29 Oct, 2018
|
10 |
+
https://github.com/gaganbahga/time_stretch
|
11 |
+
|
12 |
+
stretch an audio sequence by a factor using FFT of size nfft converting to frequency domain
|
13 |
+
:param x: np.ndarray, audio array in PCM float32 format
|
14 |
+
:param factor: float, stretching or shrinking factor, depending on if its > or < 1 respectively
|
15 |
+
:return: np.ndarray, time stretched audio
|
16 |
+
'''
|
17 |
+
stft = librosa.core.stft(x, n_fft=nfft).transpose() # i prefer time-major fashion, so transpose
|
18 |
+
stft_rows = stft.shape[0]
|
19 |
+
stft_cols = stft.shape[1]
|
20 |
+
|
21 |
+
times = np.arange(0, stft.shape[0], factor) # times at which new FFT to be calculated
|
22 |
+
hop = nfft/4 # frame shift
|
23 |
+
stft_new = np.zeros((len(times), stft_cols), dtype=np.complex_)
|
24 |
+
phase_adv = (2 * np.pi * hop * np.arange(0, stft_cols))/ nfft
|
25 |
+
phase = np.angle(stft[0])
|
26 |
+
|
27 |
+
stft = np.concatenate( (stft, np.zeros((1, stft_cols))), axis=0)
|
28 |
+
|
29 |
+
for i, time in enumerate(times):
|
30 |
+
left_frame = int(np.floor(time))
|
31 |
+
local_frames = stft[[left_frame, left_frame + 1], :]
|
32 |
+
right_wt = time - np.floor(time) # weight on right frame out of 2
|
33 |
+
local_mag = (1 - right_wt) * np.absolute(local_frames[0, :]) + right_wt * np.absolute(local_frames[1, :])
|
34 |
+
local_dphi = np.angle(local_frames[1, :]) - np.angle(local_frames[0, :]) - phase_adv
|
35 |
+
local_dphi = local_dphi - 2 * np.pi * np.floor(local_dphi/(2 * np.pi))
|
36 |
+
stft_new[i, :] = local_mag * np.exp(phase*1j)
|
37 |
+
phase += local_dphi + phase_adv
|
38 |
+
|
39 |
+
return librosa.core.istft(stft_new.transpose())
|
40 |
+
|
41 |
+
def meow_stretch(
|
42 |
+
x: np.ndarray, character_len: int,
|
43 |
+
init_factor: float = 0.3, add_factor: float = 0.2,
|
44 |
+
power_factor: float = 0.8, nfft: int = 2048
|
45 |
+
) -> np.ndarray:
|
46 |
+
'''
|
47 |
+
Stretch the meows based on word length, with a reducing power to prevent incredibly long meows
|
48 |
+
'''
|
49 |
+
|
50 |
+
factor = init_factor + (add_factor * character_len) ** power_factor
|
51 |
+
return stretch(x, 1/factor, nfft=nfft)
|
52 |
+
|
53 |
+
def get_word_lengths(text_input: str) -> List[int]:
|
54 |
+
text_input = text_input.translate(str.maketrans('', '', string.punctuation))
|
55 |
+
word_list = text_input.split()
|
56 |
+
|
57 |
+
return [len(word) for word in word_list]
|