Spaces:
Runtime error
Runtime error
import soundfile | |
import librosa | |
import numpy as np | |
import pickle | |
import os | |
from convert_wavs import convert_audio | |
AVAILABLE_EMOTIONS = { | |
"neutral", | |
"calm", | |
"happy", | |
"sad", | |
"angry", | |
"fear", | |
"disgust", | |
"ps", # pleasant surprised | |
"boredom" | |
} | |
def get_label(audio_config): | |
"""Returns label corresponding to which features are to be extracted | |
e.g: | |
audio_config = {'mfcc': True, 'chroma': True, 'contrast': False, 'tonnetz': False, 'mel': False} | |
get_label(audio_config): 'mfcc-chroma' | |
""" | |
features = ["mfcc", "chroma", "mel", "contrast", "tonnetz"] | |
label = "" | |
for feature in features: | |
if audio_config[feature]: | |
label += f"{feature}-" | |
return label.rstrip("-") | |
def get_dropout_str(dropout, n_layers=3): | |
if isinstance(dropout, list): | |
return "_".join([ str(d) for d in dropout]) | |
elif isinstance(dropout, float): | |
return "_".join([ str(dropout) for i in range(n_layers) ]) | |
def get_first_letters(emotions): | |
return "".join(sorted([ e[0].upper() for e in emotions ])) | |
def extract_feature(file_name, **kwargs): | |
""" | |
Extract feature from audio file `file_name` | |
Features supported: | |
- MFCC (mfcc) | |
- Chroma (chroma) | |
- MEL Spectrogram Frequency (mel) | |
- Contrast (contrast) | |
- Tonnetz (tonnetz) | |
e.g: | |
`features = extract_feature(path, mel=True, mfcc=True)` | |
""" | |
mfcc = kwargs.get("mfcc") | |
chroma = kwargs.get("chroma") | |
mel = kwargs.get("mel") | |
contrast = kwargs.get("contrast") | |
tonnetz = kwargs.get("tonnetz") | |
# try: | |
# with soundfile.SoundFile(file_name) as sound_file: | |
# pass | |
# except RuntimeError: | |
# # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg | |
# # get the basename | |
# basename = os.path.basename(file_name) | |
# dirname = os.path.dirname(file_name) | |
# name, ext = os.path.splitext(basename) | |
# new_basename = f"{name}_c.wav" | |
# new_filename = os.path.join(dirname, new_basename) | |
# v = convert_audio(file_name, new_filename) | |
# if v: | |
# raise NotImplementedError("Converting the audio files failed, make sure `ffmpeg` is installed in your machine and added to PATH.") | |
# else: | |
# new_filename = file_name | |
# with soundfile.SoundFile(new_filename) as sound_file: | |
X = file_name[1].astype("float32") | |
#X = sound_file.read(dtype="float32") | |
sample_rate = file_name[0] #sound_file.samplerate | |
#sample_rate = sound_file.samplerate | |
if chroma or contrast: | |
stft = np.abs(librosa.stft(X)) | |
result = np.array([]) | |
if mfcc: | |
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) | |
result = np.hstack((result, mfccs)) | |
if chroma: | |
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0) | |
result = np.hstack((result, chroma)) | |
if mel: | |
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) | |
result = np.hstack((result, mel)) | |
if contrast: | |
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0) | |
result = np.hstack((result, contrast)) | |
if tonnetz: | |
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0) | |
result = np.hstack((result, tonnetz)) | |
return result | |
def get_best_estimators(classification): | |
""" | |
Loads the estimators that are pickled in `grid` folder | |
Note that if you want to use different or more estimators, | |
you can fine tune the parameters in `grid_search.py` script | |
and run it again ( may take hours ) | |
""" | |
if classification: | |
return pickle.load(open("grid/best_classifiers.pickle", "rb")) | |
else: | |
return pickle.load(open("grid/best_regressors.pickle", "rb")) | |
def get_audio_config(features_list): | |
""" | |
Converts a list of features into a dictionary understandable by | |
`data_extractor.AudioExtractor` class | |
""" | |
audio_config = {'mfcc': False, 'chroma': False, 'mel': False, 'contrast': False, 'tonnetz': False} | |
for feature in features_list: | |
if feature not in audio_config: | |
raise TypeError(f"Feature passed: {feature} is not recognized.") | |
audio_config[feature] = True | |
return audio_config | |