Spaces:
Runtime error
Runtime error
import numpy as np | |
import pandas as pd | |
import pickle | |
import tqdm | |
import os | |
from utils import get_label, extract_feature, get_first_letters | |
from collections import defaultdict | |
class AudioExtractor: | |
"""A class that is used to featurize audio clips, and provide | |
them to the machine learning algorithms for training and testing""" | |
def __init__(self, audio_config=None, verbose=1, features_folder_name="features", classification=True, | |
emotions=['sad', 'neutral', 'happy'], balance=True): | |
""" | |
Params: | |
audio_config (dict): the dictionary that indicates what features to extract from the audio file, | |
default is {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False} | |
(i.e mfcc, chroma and mel) | |
verbose (bool/int): verbosity level, 0 for silence, 1 for info, default is 1 | |
features_folder_name (str): the folder to store output features extracted, default is "features". | |
classification (bool): whether it is a classification or regression, default is True (i.e classification) | |
emotions (list): list of emotions to be extracted, default is ['sad', 'neutral', 'happy'] | |
balance (bool): whether to balance dataset (both training and testing), default is True | |
""" | |
self.audio_config = audio_config if audio_config else {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False} | |
self.verbose = verbose | |
self.features_folder_name = features_folder_name | |
self.classification = classification | |
self.emotions = emotions | |
self.balance = balance | |
# input dimension | |
self.input_dimension = None | |
def _load_data(self, desc_files, partition, shuffle): | |
self.load_metadata_from_desc_file(desc_files, partition) | |
# balancing the datasets ( both training or testing ) | |
if partition == "train" and self.balance: | |
self.balance_training_data() | |
elif partition == "test" and self.balance: | |
self.balance_testing_data() | |
else: | |
if self.balance: | |
raise TypeError("Invalid partition, must be either train/test") | |
if shuffle: | |
self.shuffle_data_by_partition(partition) | |
def load_train_data(self, desc_files=["train_speech.csv"], shuffle=False): | |
"""Loads training data from the metadata files `desc_files`""" | |
self._load_data(desc_files, "train", shuffle) | |
def load_test_data(self, desc_files=["test_speech.csv"], shuffle=False): | |
"""Loads testing data from the metadata files `desc_files`""" | |
self._load_data(desc_files, "test", shuffle) | |
def shuffle_data_by_partition(self, partition): | |
if partition == "train": | |
self.train_audio_paths, self.train_emotions, self.train_features = shuffle_data(self.train_audio_paths, | |
self.train_emotions, self.train_features) | |
elif partition == "test": | |
self.test_audio_paths, self.test_emotions, self.test_features = shuffle_data(self.test_audio_paths, | |
self.test_emotions, self.test_features) | |
else: | |
raise TypeError("Invalid partition, must be either train/test") | |
def load_metadata_from_desc_file(self, desc_files, partition): | |
"""Read metadata from a CSV file & Extract and loads features of audio files | |
Params: | |
desc_files (list): list of description files (csv files) to read from | |
partition (str): whether is "train" or "test" | |
""" | |
# empty dataframe | |
df = pd.DataFrame({'path': [], 'emotion': []}) | |
for desc_file in desc_files: | |
# concat dataframes | |
df = pd.concat((df, pd.read_csv(desc_file)), sort=False) | |
if self.verbose: | |
print("[*] Loading audio file paths and its corresponding labels...") | |
# get columns | |
audio_paths, emotions = list(df['path']), list(df['emotion']) | |
# if not classification, convert emotions to numbers | |
if not self.classification: | |
# so naive and need to be implemented | |
# in a better way | |
if len(self.emotions) == 3: | |
self.categories = {'sad': 1, 'neutral': 2, 'happy': 3} | |
elif len(self.emotions) == 5: | |
self.categories = {'angry': 1, 'sad': 2, 'neutral': 3, 'ps': 4, 'happy': 5} | |
else: | |
raise TypeError("Regression is only for either ['sad', 'neutral', 'happy'] or ['angry', 'sad', 'neutral', 'ps', 'happy']") | |
emotions = [ self.categories[e] for e in emotions ] | |
# make features folder if does not exist | |
if not os.path.isdir(self.features_folder_name): | |
os.mkdir(self.features_folder_name) | |
# get label for features | |
label = get_label(self.audio_config) | |
# construct features file name | |
n_samples = len(audio_paths) | |
first_letters = get_first_letters(self.emotions) | |
name = os.path.join(self.features_folder_name, f"{partition}_{label}_{first_letters}_{n_samples}.npy") | |
if os.path.isfile(name): | |
# if file already exists, just load then | |
if self.verbose: | |
print("[+] Feature file already exists, loading...") | |
features = np.load(name) | |
else: | |
# file does not exist, extract those features and dump them into the file | |
features = [] | |
append = features.append | |
for audio_file in tqdm.tqdm(audio_paths, f"Extracting features for {partition}"): | |
feature = extract_feature(audio_file, **self.audio_config) | |
if self.input_dimension is None: | |
self.input_dimension = feature.shape[0] | |
append(feature) | |
# convert to numpy array | |
features = np.array(features) | |
# save it | |
np.save(name, features) | |
if partition == "train": | |
try: | |
self.train_audio_paths | |
except AttributeError: | |
self.train_audio_paths = audio_paths | |
self.train_emotions = emotions | |
self.train_features = features | |
else: | |
if self.verbose: | |
print("[*] Adding additional training samples") | |
self.train_audio_paths += audio_paths | |
self.train_emotions += emotions | |
self.train_features = np.vstack((self.train_features, features)) | |
elif partition == "test": | |
try: | |
self.test_audio_paths | |
except AttributeError: | |
self.test_audio_paths = audio_paths | |
self.test_emotions = emotions | |
self.test_features = features | |
else: | |
if self.verbose: | |
print("[*] Adding additional testing samples") | |
self.test_audio_paths += audio_paths | |
self.test_emotions += emotions | |
self.test_features = np.vstack((self.test_features, features)) | |
else: | |
raise TypeError("Invalid partition, must be either train/test") | |
def _balance_data(self, partition): | |
if partition == "train": | |
emotions = self.train_emotions | |
features = self.train_features | |
audio_paths = self.train_audio_paths | |
elif partition == "test": | |
emotions = self.test_emotions | |
features = self.test_features | |
audio_paths = self.test_audio_paths | |
else: | |
raise TypeError("Invalid partition, must be either train/test") | |
count = [] | |
if self.classification: | |
for emotion in self.emotions: | |
count.append(len([ e for e in emotions if e == emotion])) | |
else: | |
# regression, take actual numbers, not label emotion | |
for emotion in self.categories.values(): | |
count.append(len([ e for e in emotions if e == emotion])) | |
# get the minimum data samples to balance to | |
minimum = min(count) | |
if minimum == 0: | |
# won't balance, otherwise 0 samples will be loaded | |
print("[!] One class has 0 samples, setting balance to False") | |
self.balance = False | |
return | |
if self.verbose: | |
print("[*] Balancing the dataset to the minimum value:", minimum) | |
d = defaultdict(list) | |
if self.classification: | |
counter = {e: 0 for e in self.emotions } | |
else: | |
counter = { e: 0 for e in self.categories.values() } | |
for emotion, feature, audio_path in zip(emotions, features, audio_paths): | |
if counter[emotion] >= minimum: | |
# minimum value exceeded | |
continue | |
counter[emotion] += 1 | |
d[emotion].append((feature, audio_path)) | |
emotions, features, audio_paths = [], [], [] | |
for emotion, features_audio_paths in d.items(): | |
for feature, audio_path in features_audio_paths: | |
emotions.append(emotion) | |
features.append(feature) | |
audio_paths.append(audio_path) | |
if partition == "train": | |
self.train_emotions = emotions | |
self.train_features = features | |
self.train_audio_paths = audio_paths | |
elif partition == "test": | |
self.test_emotions = emotions | |
self.test_features = features | |
self.test_audio_paths = audio_paths | |
else: | |
raise TypeError("Invalid partition, must be either train/test") | |
def balance_training_data(self): | |
self._balance_data("train") | |
def balance_testing_data(self): | |
self._balance_data("test") | |
def shuffle_data(audio_paths, emotions, features): | |
""" Shuffle the data (called after making a complete pass through | |
training or validation data during the training process) | |
Params: | |
audio_paths (list): Paths to audio clips | |
emotions (list): Emotions in each audio clip | |
features (list): features audio clips | |
""" | |
p = np.random.permutation(len(audio_paths)) | |
audio_paths = [audio_paths[i] for i in p] | |
emotions = [emotions[i] for i in p] | |
features = [features[i] for i in p] | |
return audio_paths, emotions, features | |
def load_data(train_desc_files, test_desc_files, audio_config=None, classification=True, shuffle=True, | |
balance=True, emotions=['sad', 'neutral', 'happy']): | |
# instantiate the class | |
audiogen = AudioExtractor(audio_config=audio_config, classification=classification, emotions=emotions, | |
balance=balance, verbose=0) | |
# Loads training data | |
audiogen.load_train_data(train_desc_files, shuffle=shuffle) | |
# Loads testing data | |
audiogen.load_test_data(test_desc_files, shuffle=shuffle) | |
# X_train, X_test, y_train, y_test | |
return { | |
"X_train": np.array(audiogen.train_features), | |
"X_test": np.array(audiogen.test_features), | |
"y_train": np.array(audiogen.train_emotions), | |
"y_test": np.array(audiogen.test_emotions), | |
"train_audio_paths": audiogen.train_audio_paths, | |
"test_audio_paths": audiogen.test_audio_paths, | |
"balance": audiogen.balance, | |
} |