chaitanya9 commited on
Commit
27bf1d6
·
1 Parent(s): 9a38ba2

Upload data_extractor.py

Browse files
Files changed (1) hide show
  1. data_extractor.py +249 -0
data_extractor.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import pandas as pd
4
+ import pickle
5
+ import tqdm
6
+ import os
7
+
8
+ from utils import get_label, extract_feature, get_first_letters
9
+ from collections import defaultdict
10
+
11
+
12
+ class AudioExtractor:
13
+ """A class that is used to featurize audio clips, and provide
14
+ them to the machine learning algorithms for training and testing"""
15
+ def __init__(self, audio_config=None, verbose=1, features_folder_name="features", classification=True,
16
+ emotions=['sad', 'neutral', 'happy'], balance=True):
17
+ """
18
+ Params:
19
+ audio_config (dict): the dictionary that indicates what features to extract from the audio file,
20
+ default is {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
21
+ (i.e mfcc, chroma and mel)
22
+ verbose (bool/int): verbosity level, 0 for silence, 1 for info, default is 1
23
+ features_folder_name (str): the folder to store output features extracted, default is "features".
24
+ classification (bool): whether it is a classification or regression, default is True (i.e classification)
25
+ emotions (list): list of emotions to be extracted, default is ['sad', 'neutral', 'happy']
26
+ balance (bool): whether to balance dataset (both training and testing), default is True
27
+ """
28
+ self.audio_config = audio_config if audio_config else {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
29
+ self.verbose = verbose
30
+ self.features_folder_name = features_folder_name
31
+ self.classification = classification
32
+ self.emotions = emotions
33
+ self.balance = balance
34
+ # input dimension
35
+ self.input_dimension = None
36
+
37
+ def _load_data(self, desc_files, partition, shuffle):
38
+ self.load_metadata_from_desc_file(desc_files, partition)
39
+ # balancing the datasets ( both training or testing )
40
+ if partition == "train" and self.balance:
41
+ self.balance_training_data()
42
+ elif partition == "test" and self.balance:
43
+ self.balance_testing_data()
44
+ else:
45
+ if self.balance:
46
+ raise TypeError("Invalid partition, must be either train/test")
47
+ if shuffle:
48
+ self.shuffle_data_by_partition(partition)
49
+
50
+ def load_train_data(self, desc_files=["train_speech.csv"], shuffle=False):
51
+ """Loads training data from the metadata files `desc_files`"""
52
+ self._load_data(desc_files, "train", shuffle)
53
+
54
+ def load_test_data(self, desc_files=["test_speech.csv"], shuffle=False):
55
+ """Loads testing data from the metadata files `desc_files`"""
56
+ self._load_data(desc_files, "test", shuffle)
57
+
58
+ def shuffle_data_by_partition(self, partition):
59
+ if partition == "train":
60
+ self.train_audio_paths, self.train_emotions, self.train_features = shuffle_data(self.train_audio_paths,
61
+ self.train_emotions, self.train_features)
62
+ elif partition == "test":
63
+ self.test_audio_paths, self.test_emotions, self.test_features = shuffle_data(self.test_audio_paths,
64
+ self.test_emotions, self.test_features)
65
+ else:
66
+ raise TypeError("Invalid partition, must be either train/test")
67
+
68
+ def load_metadata_from_desc_file(self, desc_files, partition):
69
+ """Read metadata from a CSV file & Extract and loads features of audio files
70
+ Params:
71
+ desc_files (list): list of description files (csv files) to read from
72
+ partition (str): whether is "train" or "test"
73
+ """
74
+ # empty dataframe
75
+ df = pd.DataFrame({'path': [], 'emotion': []})
76
+ for desc_file in desc_files:
77
+ # concat dataframes
78
+ df = pd.concat((df, pd.read_csv(desc_file)), sort=False)
79
+ if self.verbose:
80
+ print("[*] Loading audio file paths and its corresponding labels...")
81
+ # get columns
82
+ audio_paths, emotions = list(df['path']), list(df['emotion'])
83
+ # if not classification, convert emotions to numbers
84
+ if not self.classification:
85
+ # so naive and need to be implemented
86
+ # in a better way
87
+ if len(self.emotions) == 3:
88
+ self.categories = {'sad': 1, 'neutral': 2, 'happy': 3}
89
+ elif len(self.emotions) == 5:
90
+ self.categories = {'angry': 1, 'sad': 2, 'neutral': 3, 'ps': 4, 'happy': 5}
91
+ else:
92
+ raise TypeError("Regression is only for either ['sad', 'neutral', 'happy'] or ['angry', 'sad', 'neutral', 'ps', 'happy']")
93
+ emotions = [ self.categories[e] for e in emotions ]
94
+ # make features folder if does not exist
95
+ if not os.path.isdir(self.features_folder_name):
96
+ os.mkdir(self.features_folder_name)
97
+ # get label for features
98
+ label = get_label(self.audio_config)
99
+ # construct features file name
100
+ n_samples = len(audio_paths)
101
+ first_letters = get_first_letters(self.emotions)
102
+ name = os.path.join(self.features_folder_name, f"{partition}_{label}_{first_letters}_{n_samples}.npy")
103
+ if os.path.isfile(name):
104
+ # if file already exists, just load then
105
+ if self.verbose:
106
+ print("[+] Feature file already exists, loading...")
107
+ features = np.load(name)
108
+ else:
109
+ # file does not exist, extract those features and dump them into the file
110
+ features = []
111
+ append = features.append
112
+ for audio_file in tqdm.tqdm(audio_paths, f"Extracting features for {partition}"):
113
+ feature = extract_feature(audio_file, **self.audio_config)
114
+ if self.input_dimension is None:
115
+ self.input_dimension = feature.shape[0]
116
+ append(feature)
117
+ # convert to numpy array
118
+ features = np.array(features)
119
+ # save it
120
+ np.save(name, features)
121
+ if partition == "train":
122
+ try:
123
+ self.train_audio_paths
124
+ except AttributeError:
125
+ self.train_audio_paths = audio_paths
126
+ self.train_emotions = emotions
127
+ self.train_features = features
128
+ else:
129
+ if self.verbose:
130
+ print("[*] Adding additional training samples")
131
+ self.train_audio_paths += audio_paths
132
+ self.train_emotions += emotions
133
+ self.train_features = np.vstack((self.train_features, features))
134
+ elif partition == "test":
135
+ try:
136
+ self.test_audio_paths
137
+ except AttributeError:
138
+ self.test_audio_paths = audio_paths
139
+ self.test_emotions = emotions
140
+ self.test_features = features
141
+ else:
142
+ if self.verbose:
143
+ print("[*] Adding additional testing samples")
144
+ self.test_audio_paths += audio_paths
145
+ self.test_emotions += emotions
146
+ self.test_features = np.vstack((self.test_features, features))
147
+ else:
148
+ raise TypeError("Invalid partition, must be either train/test")
149
+
150
+ def _balance_data(self, partition):
151
+ if partition == "train":
152
+ emotions = self.train_emotions
153
+ features = self.train_features
154
+ audio_paths = self.train_audio_paths
155
+ elif partition == "test":
156
+ emotions = self.test_emotions
157
+ features = self.test_features
158
+ audio_paths = self.test_audio_paths
159
+ else:
160
+ raise TypeError("Invalid partition, must be either train/test")
161
+
162
+ count = []
163
+ if self.classification:
164
+ for emotion in self.emotions:
165
+ count.append(len([ e for e in emotions if e == emotion]))
166
+ else:
167
+ # regression, take actual numbers, not label emotion
168
+ for emotion in self.categories.values():
169
+ count.append(len([ e for e in emotions if e == emotion]))
170
+ # get the minimum data samples to balance to
171
+ minimum = min(count)
172
+ if minimum == 0:
173
+ # won't balance, otherwise 0 samples will be loaded
174
+ print("[!] One class has 0 samples, setting balance to False")
175
+ self.balance = False
176
+ return
177
+ if self.verbose:
178
+ print("[*] Balancing the dataset to the minimum value:", minimum)
179
+ d = defaultdict(list)
180
+ if self.classification:
181
+ counter = {e: 0 for e in self.emotions }
182
+ else:
183
+ counter = { e: 0 for e in self.categories.values() }
184
+ for emotion, feature, audio_path in zip(emotions, features, audio_paths):
185
+ if counter[emotion] >= minimum:
186
+ # minimum value exceeded
187
+ continue
188
+ counter[emotion] += 1
189
+ d[emotion].append((feature, audio_path))
190
+
191
+ emotions, features, audio_paths = [], [], []
192
+ for emotion, features_audio_paths in d.items():
193
+ for feature, audio_path in features_audio_paths:
194
+ emotions.append(emotion)
195
+ features.append(feature)
196
+ audio_paths.append(audio_path)
197
+
198
+ if partition == "train":
199
+ self.train_emotions = emotions
200
+ self.train_features = features
201
+ self.train_audio_paths = audio_paths
202
+ elif partition == "test":
203
+ self.test_emotions = emotions
204
+ self.test_features = features
205
+ self.test_audio_paths = audio_paths
206
+ else:
207
+ raise TypeError("Invalid partition, must be either train/test")
208
+
209
+ def balance_training_data(self):
210
+ self._balance_data("train")
211
+
212
+ def balance_testing_data(self):
213
+ self._balance_data("test")
214
+
215
+
216
+ def shuffle_data(audio_paths, emotions, features):
217
+ """ Shuffle the data (called after making a complete pass through
218
+ training or validation data during the training process)
219
+ Params:
220
+ audio_paths (list): Paths to audio clips
221
+ emotions (list): Emotions in each audio clip
222
+ features (list): features audio clips
223
+ """
224
+ p = np.random.permutation(len(audio_paths))
225
+ audio_paths = [audio_paths[i] for i in p]
226
+ emotions = [emotions[i] for i in p]
227
+ features = [features[i] for i in p]
228
+ return audio_paths, emotions, features
229
+
230
+
231
+ def load_data(train_desc_files, test_desc_files, audio_config=None, classification=True, shuffle=True,
232
+ balance=True, emotions=['sad', 'neutral', 'happy']):
233
+ # instantiate the class
234
+ audiogen = AudioExtractor(audio_config=audio_config, classification=classification, emotions=emotions,
235
+ balance=balance, verbose=0)
236
+ # Loads training data
237
+ audiogen.load_train_data(train_desc_files, shuffle=shuffle)
238
+ # Loads testing data
239
+ audiogen.load_test_data(test_desc_files, shuffle=shuffle)
240
+ # X_train, X_test, y_train, y_test
241
+ return {
242
+ "X_train": np.array(audiogen.train_features),
243
+ "X_test": np.array(audiogen.test_features),
244
+ "y_train": np.array(audiogen.train_emotions),
245
+ "y_test": np.array(audiogen.test_emotions),
246
+ "train_audio_paths": audiogen.train_audio_paths,
247
+ "test_audio_paths": audiogen.test_audio_paths,
248
+ "balance": audiogen.balance,
249
+ }