Spaces:
Runtime error
Runtime error
File size: 4,428 Bytes
9af4f2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import soundfile
import librosa
import numpy as np
import pickle
import os
from convert_wavs import convert_audio
AVAILABLE_EMOTIONS = {
"neutral",
"calm",
"happy",
"sad",
"angry",
"fear",
"disgust",
"ps", # pleasant surprised
"boredom"
}
def get_label(audio_config):
"""Returns label corresponding to which features are to be extracted
e.g:
audio_config = {'mfcc': True, 'chroma': True, 'contrast': False, 'tonnetz': False, 'mel': False}
get_label(audio_config): 'mfcc-chroma'
"""
features = ["mfcc", "chroma", "mel", "contrast", "tonnetz"]
label = ""
for feature in features:
if audio_config[feature]:
label += f"{feature}-"
return label.rstrip("-")
def get_dropout_str(dropout, n_layers=3):
if isinstance(dropout, list):
return "_".join([ str(d) for d in dropout])
elif isinstance(dropout, float):
return "_".join([ str(dropout) for i in range(n_layers) ])
def get_first_letters(emotions):
return "".join(sorted([ e[0].upper() for e in emotions ]))
def extract_feature(file_name, **kwargs):
"""
Extract feature from audio file `file_name`
Features supported:
- MFCC (mfcc)
- Chroma (chroma)
- MEL Spectrogram Frequency (mel)
- Contrast (contrast)
- Tonnetz (tonnetz)
e.g:
`features = extract_feature(path, mel=True, mfcc=True)`
"""
mfcc = kwargs.get("mfcc")
chroma = kwargs.get("chroma")
mel = kwargs.get("mel")
contrast = kwargs.get("contrast")
tonnetz = kwargs.get("tonnetz")
# try:
# with soundfile.SoundFile(file_name) as sound_file:
# pass
# except RuntimeError:
# # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg
# # get the basename
# basename = os.path.basename(file_name)
# dirname = os.path.dirname(file_name)
# name, ext = os.path.splitext(basename)
# new_basename = f"{name}_c.wav"
# new_filename = os.path.join(dirname, new_basename)
# v = convert_audio(file_name, new_filename)
# if v:
# raise NotImplementedError("Converting the audio files failed, make sure `ffmpeg` is installed in your machine and added to PATH.")
# else:
# new_filename = file_name
# with soundfile.SoundFile(new_filename) as sound_file:
X = file_name[1].astype("float32")
#X = sound_file.read(dtype="float32")
sample_rate = file_name[0] #sound_file.samplerate
#sample_rate = sound_file.samplerate
if chroma or contrast:
stft = np.abs(librosa.stft(X))
result = np.array([])
if mfcc:
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
result = np.hstack((result, mfccs))
if chroma:
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, chroma))
if mel:
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result = np.hstack((result, mel))
if contrast:
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, contrast))
if tonnetz:
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
result = np.hstack((result, tonnetz))
return result
def get_best_estimators(classification):
"""
Loads the estimators that are pickled in `grid` folder
Note that if you want to use different or more estimators,
you can fine tune the parameters in `grid_search.py` script
and run it again ( may take hours )
"""
if classification:
return pickle.load(open("grid/best_classifiers.pickle", "rb"))
else:
return pickle.load(open("grid/best_regressors.pickle", "rb"))
def get_audio_config(features_list):
"""
Converts a list of features into a dictionary understandable by
`data_extractor.AudioExtractor` class
"""
audio_config = {'mfcc': False, 'chroma': False, 'mel': False, 'contrast': False, 'tonnetz': False}
for feature in features_list:
if feature not in audio_config:
raise TypeError(f"Feature passed: {feature} is not recognized.")
audio_config[feature] = True
return audio_config
|