Spaces:

chaitanya9
/

emotion_recognizer

Runtime error

App Files Files Community

emotion_recognizer / data_extractor.py

chaitanya9

Upload data_extractor.py

27bf1d6 about 3 years ago

raw

history blame contribute delete

11.3 kB


	import numpy as np
	import pandas as pd
	import pickle
	import tqdm
	import os

	from utils import get_label, extract_feature, get_first_letters
	from collections import defaultdict


	class AudioExtractor:
	"""A class that is used to featurize audio clips, and provide
	them to the machine learning algorithms for training and testing"""
	def __init__(self, audio_config=None, verbose=1, features_folder_name="features", classification=True,
	emotions=['sad', 'neutral', 'happy'], balance=True):
	"""
	Params:
	audio_config (dict): the dictionary that indicates what features to extract from the audio file,
	default is {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
	(i.e mfcc, chroma and mel)
	verbose (bool/int): verbosity level, 0 for silence, 1 for info, default is 1
	features_folder_name (str): the folder to store output features extracted, default is "features".
	classification (bool): whether it is a classification or regression, default is True (i.e classification)
	emotions (list): list of emotions to be extracted, default is ['sad', 'neutral', 'happy']
	balance (bool): whether to balance dataset (both training and testing), default is True
	"""
	self.audio_config = audio_config if audio_config else {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
	self.verbose = verbose
	self.features_folder_name = features_folder_name
	self.classification = classification
	self.emotions = emotions
	self.balance = balance
	# input dimension
	self.input_dimension = None

	def _load_data(self, desc_files, partition, shuffle):
	self.load_metadata_from_desc_file(desc_files, partition)
	# balancing the datasets ( both training or testing )
	if partition == "train" and self.balance:
	self.balance_training_data()
	elif partition == "test" and self.balance:
	self.balance_testing_data()
	else:
	if self.balance:
	raise TypeError("Invalid partition, must be either train/test")
	if shuffle:
	self.shuffle_data_by_partition(partition)

	def load_train_data(self, desc_files=["train_speech.csv"], shuffle=False):
	"""Loads training data from the metadata files `desc_files`"""
	self._load_data(desc_files, "train", shuffle)

	def load_test_data(self, desc_files=["test_speech.csv"], shuffle=False):
	"""Loads testing data from the metadata files `desc_files`"""
	self._load_data(desc_files, "test", shuffle)

	def shuffle_data_by_partition(self, partition):
	if partition == "train":
	self.train_audio_paths, self.train_emotions, self.train_features = shuffle_data(self.train_audio_paths,
	self.train_emotions, self.train_features)
	elif partition == "test":
	self.test_audio_paths, self.test_emotions, self.test_features = shuffle_data(self.test_audio_paths,
	self.test_emotions, self.test_features)
	else:
	raise TypeError("Invalid partition, must be either train/test")

	def load_metadata_from_desc_file(self, desc_files, partition):
	"""Read metadata from a CSV file & Extract and loads features of audio files
	Params:
	desc_files (list): list of description files (csv files) to read from
	partition (str): whether is "train" or "test"
	"""
	# empty dataframe
	df = pd.DataFrame({'path': [], 'emotion': []})
	for desc_file in desc_files:
	# concat dataframes
	df = pd.concat((df, pd.read_csv(desc_file)), sort=False)
	if self.verbose:
	print("[*] Loading audio file paths and its corresponding labels...")
	# get columns
	audio_paths, emotions = list(df['path']), list(df['emotion'])
	# if not classification, convert emotions to numbers
	if not self.classification:
	# so naive and need to be implemented
	# in a better way
	if len(self.emotions) == 3:
	self.categories = {'sad': 1, 'neutral': 2, 'happy': 3}
	elif len(self.emotions) == 5:
	self.categories = {'angry': 1, 'sad': 2, 'neutral': 3, 'ps': 4, 'happy': 5}
	else:
	raise TypeError("Regression is only for either ['sad', 'neutral', 'happy'] or ['angry', 'sad', 'neutral', 'ps', 'happy']")
	emotions = [ self.categories[e] for e in emotions ]
	# make features folder if does not exist
	if not os.path.isdir(self.features_folder_name):
	os.mkdir(self.features_folder_name)
	# get label for features
	label = get_label(self.audio_config)
	# construct features file name
	n_samples = len(audio_paths)
	first_letters = get_first_letters(self.emotions)
	name = os.path.join(self.features_folder_name, f"{partition}_{label}_{first_letters}_{n_samples}.npy")
	if os.path.isfile(name):
	# if file already exists, just load then
	if self.verbose:
	print("[+] Feature file already exists, loading...")
	features = np.load(name)
	else:
	# file does not exist, extract those features and dump them into the file
	features = []
	append = features.append
	for audio_file in tqdm.tqdm(audio_paths, f"Extracting features for {partition}"):
	feature = extract_feature(audio_file, **self.audio_config)
	if self.input_dimension is None:
	self.input_dimension = feature.shape[0]
	append(feature)
	# convert to numpy array
	features = np.array(features)
	# save it
	np.save(name, features)
	if partition == "train":
	try:
	self.train_audio_paths
	except AttributeError:
	self.train_audio_paths = audio_paths
	self.train_emotions = emotions
	self.train_features = features
	else:
	if self.verbose:
	print("[*] Adding additional training samples")
	self.train_audio_paths += audio_paths
	self.train_emotions += emotions
	self.train_features = np.vstack((self.train_features, features))
	elif partition == "test":
	try:
	self.test_audio_paths
	except AttributeError:
	self.test_audio_paths = audio_paths
	self.test_emotions = emotions
	self.test_features = features
	else:
	if self.verbose:
	print("[*] Adding additional testing samples")
	self.test_audio_paths += audio_paths
	self.test_emotions += emotions
	self.test_features = np.vstack((self.test_features, features))
	else:
	raise TypeError("Invalid partition, must be either train/test")

	def _balance_data(self, partition):
	if partition == "train":
	emotions = self.train_emotions
	features = self.train_features
	audio_paths = self.train_audio_paths
	elif partition == "test":
	emotions = self.test_emotions
	features = self.test_features
	audio_paths = self.test_audio_paths
	else:
	raise TypeError("Invalid partition, must be either train/test")

	count = []
	if self.classification:
	for emotion in self.emotions:
	count.append(len([ e for e in emotions if e == emotion]))
	else:
	# regression, take actual numbers, not label emotion
	for emotion in self.categories.values():
	count.append(len([ e for e in emotions if e == emotion]))
	# get the minimum data samples to balance to
	minimum = min(count)
	if minimum == 0:
	# won't balance, otherwise 0 samples will be loaded
	print("[!] One class has 0 samples, setting balance to False")
	self.balance = False
	return
	if self.verbose:
	print("[*] Balancing the dataset to the minimum value:", minimum)
	d = defaultdict(list)
	if self.classification:
	counter = {e: 0 for e in self.emotions }
	else:
	counter = { e: 0 for e in self.categories.values() }
	for emotion, feature, audio_path in zip(emotions, features, audio_paths):
	if counter[emotion] >= minimum:
	# minimum value exceeded
	continue
	counter[emotion] += 1
	d[emotion].append((feature, audio_path))

	emotions, features, audio_paths = [], [], []
	for emotion, features_audio_paths in d.items():
	for feature, audio_path in features_audio_paths:
	emotions.append(emotion)
	features.append(feature)
	audio_paths.append(audio_path)

	if partition == "train":
	self.train_emotions = emotions
	self.train_features = features
	self.train_audio_paths = audio_paths
	elif partition == "test":
	self.test_emotions = emotions
	self.test_features = features
	self.test_audio_paths = audio_paths
	else:
	raise TypeError("Invalid partition, must be either train/test")

	def balance_training_data(self):
	self._balance_data("train")

	def balance_testing_data(self):
	self._balance_data("test")


	def shuffle_data(audio_paths, emotions, features):
	""" Shuffle the data (called after making a complete pass through
	training or validation data during the training process)
	Params:
	audio_paths (list): Paths to audio clips
	emotions (list): Emotions in each audio clip
	features (list): features audio clips
	"""
	p = np.random.permutation(len(audio_paths))
	audio_paths = [audio_paths[i] for i in p]
	emotions = [emotions[i] for i in p]
	features = [features[i] for i in p]
	return audio_paths, emotions, features


	def load_data(train_desc_files, test_desc_files, audio_config=None, classification=True, shuffle=True,
	balance=True, emotions=['sad', 'neutral', 'happy']):
	# instantiate the class
	audiogen = AudioExtractor(audio_config=audio_config, classification=classification, emotions=emotions,
	balance=balance, verbose=0)
	# Loads training data
	audiogen.load_train_data(train_desc_files, shuffle=shuffle)
	# Loads testing data
	audiogen.load_test_data(test_desc_files, shuffle=shuffle)
	# X_train, X_test, y_train, y_test
	return {
	"X_train": np.array(audiogen.train_features),
	"X_test": np.array(audiogen.test_features),
	"y_train": np.array(audiogen.train_emotions),
	"y_test": np.array(audiogen.test_emotions),
	"train_audio_paths": audiogen.train_audio_paths,
	"test_audio_paths": audiogen.test_audio_paths,
	"balance": audiogen.balance,
	}