Spaces:

feifeifeiliu
/

TalkSHOW

Build error

App Files Files Community

feifeifeiliu commited on Apr 3, 2023

Commit

9b6fdf7

1 Parent(s): 865fd8a

Upload 25 files

Browse files

Files changed (2) hide show

data_utils/utils.py +16 -37
visualise/rendering.py +1 -1

data_utils/utils.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import numpy as np
 # import librosa #has to do this cause librosa is not supported on my server
-import python_speech_features
 from scipy.io import wavfile
 from scipy import signal
 import librosa
@@ -79,9 +78,9 @@ def get_melspec(audio_fn, eps=1e-6, fps = 25, sr=16000, n_mels=64):
     '''
 def extract_mfcc(audio,sample_rate=16000):
-    mfcc = zip(*python_speech_features.mfcc(audio,sample_rate, numcep=64, nfilt=64, nfft=2048, winstep=0.04))
-    mfcc = np.stack([np.array(i) for i in mfcc])
-    return mfcc
 def get_mfcc_psf(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
     y, sr = load_wav_old(audio_fn, sr=sr)
@@ -97,14 +96,14 @@ def get_mfcc_psf(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, w
     n_fft=2048
     #hard coded for 25 fps
-    if not smlpx:
-        C = python_speech_features.mfcc(y, sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=0.04)
-    else:
-        C = python_speech_features.mfcc(y, sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01/15)
     # if C.shape[0] == n_mfcc:
     #     C = C.transpose(1, 0)
-    return C
 def get_mfcc_psf_min(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
@@ -119,13 +118,13 @@ def get_mfcc_psf_min(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=6
     C = []
-    for i in range(slice):
-        if i != (slice - 1):
-            feat = python_speech_features.mfcc(y[i*slice_len:(i+1)*slice_len], sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01 / 15)
-        else:
-            feat = python_speech_features.mfcc(y[i * slice_len:], sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01 / 15)
-        C.append(feat)
     return C
@@ -310,24 +309,4 @@ def smooth_geom(geom, mask: torch.Tensor = None, filter_size: int = 9, sigma: fl
     if mask is None:
         return smoothed
     else:
-        return smoothed * mask[None, :, None] + geom * (-mask[None, :, None] + 1)
-if __name__ == '__main__':
-    audio_fn = '../sample_audio/clip000028_tCAkv4ggPgI.wav'
-    C = get_mfcc_psf(audio_fn)
-    print(C.shape)
-    C_2 = get_mfcc_librosa(audio_fn)
-    print(C.shape)
-    print(C)
-    print(C_2)
-    print((C == C_2).all())
-    # print(y.shape, sr)
-    # mel_spec = get_melspec(audio_fn)
-    # print(mel_spec.shape)
-    # mfcc = get_mfcc(audio_fn, sr = 16000)
-    # print(mfcc.shape)
-    # print(mel_spec.max(), mel_spec.min())
-    # print(mfcc.max(), mfcc.min())

 import numpy as np
 # import librosa #has to do this cause librosa is not supported on my server
 from scipy.io import wavfile
 from scipy import signal
 import librosa
     '''
 def extract_mfcc(audio,sample_rate=16000):
+    # mfcc = zip(*python_speech_features.mfcc(audio,sample_rate, numcep=64, nfilt=64, nfft=2048, winstep=0.04))
+    # mfcc = np.stack([np.array(i) for i in mfcc])
+    return None
 def get_mfcc_psf(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
     y, sr = load_wav_old(audio_fn, sr=sr)
     n_fft=2048
     #hard coded for 25 fps
+    # if not smlpx:
+    #     C = python_speech_features.mfcc(y, sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=0.04)
+    # else:
+    #     C = python_speech_features.mfcc(y, sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01/15)
     # if C.shape[0] == n_mfcc:
     #     C = C.transpose(1, 0)
+    return None
 def get_mfcc_psf_min(audio_fn, eps=1e-6, fps=25, smlpx=False, sr=16000, n_mfcc=64, win_size=None):
     C = []
+    # for i in range(slice):
+    #     if i != (slice - 1):
+    #         feat = python_speech_features.mfcc(y[i*slice_len:(i+1)*slice_len], sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01 / 15)
+    #     else:
+    #         feat = python_speech_features.mfcc(y[i * slice_len:], sr, numcep=n_mfcc, nfilt=n_mfcc, nfft=n_fft, winstep=1.01 / 15)
+    #
+    #     C.append(feat)
     return C
     if mask is None:
         return smoothed
     else:
+        return smoothed * mask[None, :, None] + geom * (-mask[None, :, None] + 1)

visualise/rendering.py CHANGED Viewed

@@ -226,7 +226,7 @@ class RenderTool():
             writer.write(final_img)
         writer.release()
-        cmd = ('ffmpeg' + ' -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p result.mp4'.format(
             tmp_audio_file.name, tmp_video_file.name)).split()
         # cmd = ('ffmpeg' + '-i {0} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {1}'.format(
         #     tmp_video_file.name, video_fname)).split()

             writer.write(final_img)
         writer.release()
+        cmd = ('ffmpeg' + ' -y -i {0} -i {1} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p result.mp4'.format(
             tmp_audio_file.name, tmp_video_file.name)).split()
         # cmd = ('ffmpeg' + '-i {0} -vcodec h264 -ac 2 -channel_layout stereo -pix_fmt yuv420p {1}'.format(
         #     tmp_video_file.name, video_fname)).split()