whisper-youtube-2-hf_dataset

Runtime error

juancopi81 commited on Feb 16, 2023

Commit

d357f9b

1 Parent(s): 5c20ea7

Update transforming/whispertransform.py

Files changed (1) hide show

transforming/whispertransform.py CHANGED Viewed

@@ -40,22 +40,27 @@ class WhisperTransform(Transform):
         print(f"Video title and url: {video.title} {video.url}")
         audio_file = self._get_audio_from_video(yt)
-        result = self.model.transcribe(audio_file,
-                                       without_timestamps=self.without_timestamps)
-        transcription = result["text"]
-        data = []
-        for seg in result['segments']:
-            data.append(OrderedDict({'start': seg['start'], 'end': seg['end'],'text': seg['text']}))
-        os.remove(audio_file)
-        return YoutubeVideo(channel_name = video.channel_name,
-                            url = video.url,
-                            title = video.title,
-                            description = video.description,
-                            transcription = transcription,
-                            segments = data)
     def _get_audio_from_video(self, yt: Any) -> Path:
         # TODO: Add credits

         print(f"Video title and url: {video.title} {video.url}")
         audio_file = self._get_audio_from_video(yt)
+        try:
+            result = self.model.transcribe(audio_file,
+                                           without_timestamps=self.without_timestamps)
+        except Exception as e:
+            print(f"Audio exception print: {e}")
+            pass
+        else:
+            transcription = result["text"]
+            data = []
+            for seg in result['segments']:
+                data.append(OrderedDict({'start': seg['start'], 'end': seg['end'],'text': seg['text']}))
+            os.remove(audio_file)
+            return YoutubeVideo(channel_name = video.channel_name,
+                                url = video.url,
+                                title = video.title,
+                                description = video.description,
+                                transcription = transcription,
+                                segments = data)
     def _get_audio_from_video(self, yt: Any) -> Path:
         # TODO: Add credits