Spaces:
Build error
Build error
feifeifeiliu
commited on
Commit
·
9b6fdf7
1
Parent(s):
865fd8a
Upload 25 files
Browse files- data_utils/utils.py +16 -37
- visualise/rendering.py +1 -1
data_utils/utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import numpy as np
|
2 |
# import librosa #has to do this cause librosa is not supported on my server
|
3 |
-
import python_speech_features
|
4 |
from scipy.io import wavfile
|
5 |
from scipy import signal
|
6 |
import librosa
|
@@ -79,9 +78,9 @@ def get_melspec(audio_fn, eps=1e-6, fps = 25, sr=16000, n_mels=64):
|
|
79 |
'''
|
80 |
|
81 |
def extract_mfcc(audio,sample_rate=16000):
|
82 |
-
mfcc = zip(*python_speech_features.mfcc(audio,sample_rate, numcep=64, nfilt=64, nfft=2048, winstep=0.04))
|
83 |
-
mfcc = np.stack([np.array(i) for i in mfcc])
|
84 |
-
return
|
85 |
|
86 |
def get_mfcc_psf(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
|
87 |
y, sr = load_wav_old(audio_fn, sr=sr)
|
@@ -97,14 +96,14 @@ def get_mfcc_psf(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, w
|
|
97 |
n_fft=2048
|
98 |
|
99 |
#hard coded for 25 fps
|
100 |
-
if not smlpx:
|
101 |
-
|
102 |
-
else:
|
103 |
-
|
104 |
# if C.shape[0] == n_mfcc:
|
105 |
# C = C.transpose(1, 0)
|
106 |
|
107 |
-
return
|
108 |
|
109 |
|
110 |
def get_mfcc_psf_min(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
|
@@ -119,13 +118,13 @@ def get_mfcc_psf_min(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=6
|
|
119 |
|
120 |
C = []
|
121 |
|
122 |
-
for i in range(slice):
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
|
130 |
return C
|
131 |
|
@@ -310,24 +309,4 @@ def smooth_geom(geom, mask: torch.Tensor = None, filter_size: int = 9, sigma: fl
|
|
310 |
if mask is None:
|
311 |
return smoothed
|
312 |
else:
|
313 |
-
return smoothed * mask[None, :, None] + geom * (-mask[None, :, None] + 1)
|
314 |
-
|
315 |
-
if __name__ == '__main__':
|
316 |
-
audio_fn = '../sample_audio/clip000028_tCAkv4ggPgI.wav'
|
317 |
-
|
318 |
-
C = get_mfcc_psf(audio_fn)
|
319 |
-
print(C.shape)
|
320 |
-
|
321 |
-
C_2 = get_mfcc_librosa(audio_fn)
|
322 |
-
print(C.shape)
|
323 |
-
|
324 |
-
print(C)
|
325 |
-
print(C_2)
|
326 |
-
print((C == C_2).all())
|
327 |
-
# print(y.shape, sr)
|
328 |
-
# mel_spec = get_melspec(audio_fn)
|
329 |
-
# print(mel_spec.shape)
|
330 |
-
# mfcc = get_mfcc(audio_fn, sr = 16000)
|
331 |
-
# print(mfcc.shape)
|
332 |
-
# print(mel_spec.max(), mel_spec.min())
|
333 |
-
# print(mfcc.max(), mfcc.min())
|
|
|
1 |
import numpy as np
|
2 |
# import librosa #has to do this cause librosa is not supported on my server
|
|
|
3 |
from scipy.io import wavfile
|
4 |
from scipy import signal
|
5 |
import librosa
|
|
|
78 |
'''
|
79 |
|
80 |
def extract_mfcc(audio,sample_rate=16000):
|
81 |
+
# mfcc = zip(*python_speech_features.mfcc(audio,sample_rate, numcep=64, nfilt=64, nfft=2048, winstep=0.04))
|
82 |
+
# mfcc = np.stack([np.array(i) for i in mfcc])
|
83 |
+
return None
|
84 |
|
85 |
def get_mfcc_psf(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
|
86 |
y, sr = load_wav_old(audio_fn, sr=sr)
|
|
|
96 |
n_fft=2048
|
97 |
|
98 |
#hard coded for 25 fps
|
99 |
+
# if not smlpx:
|
100 |
+
# C = python_speech_features.mfcc(y, sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=0.04)
|
101 |
+
# else:
|
102 |
+
# C = python_speech_features.mfcc(y, sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01/15)
|
103 |
# if C.shape[0] == n_mfcc:
|
104 |
# C = C.transpose(1, 0)
|
105 |
|
106 |
+
return None
|
107 |
|
108 |
|
109 |
def get_mfcc_psf_min(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
|
|
|
118 |
|
119 |
C = []
|
120 |
|
121 |
+
# for i in range(slice):
|
122 |
+
# if i != (slice - 1):
|
123 |
+
# feat = python_speech_features.mfcc(y[i*slice_len:(i+1)*slice_len], sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01 / 15)
|
124 |
+
# else:
|
125 |
+
# feat = python_speech_features.mfcc(y[i * slice_len:], sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01 / 15)
|
126 |
+
#
|
127 |
+
# C.append(feat)
|
128 |
|
129 |
return C
|
130 |
|
|
|
309 |
if mask is None:
|
310 |
return smoothed
|
311 |
else:
|
312 |
+
return smoothed * mask[None, :, None] + geom * (-mask[None, :, None] + 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
visualise/rendering.py
CHANGED
@@ -226,7 +226,7 @@ class RenderTool():
|
|
226 |
writer.write(final_img)
|
227 |
writer.release()
|
228 |
|
229 |
-
cmd = ('ffmpeg' + ' -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p result.mp4'.format(
|
230 |
tmp_audio_file.name, tmp_video_file.name)).split()
|
231 |
# cmd = ('ffmpeg' + '-i {0} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {1}'.format(
|
232 |
# tmp_video_file.name, video_fname)).split()
|
|
|
226 |
writer.write(final_img)
|
227 |
writer.release()
|
228 |
|
229 |
+
cmd = ('ffmpeg' + ' -y -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p result.mp4'.format(
|
230 |
tmp_audio_file.name, tmp_video_file.name)).split()
|
231 |
# cmd = ('ffmpeg' + '-i {0} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {1}'.format(
|
232 |
# tmp_video_file.name, video_fname)).split()
|