Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on Nov 2, 2024

Commit

50380bc

unverified ·

2 Parent(s): f12a40c c7bfcf2

Merge pull request #375 from jhj0517/fix/improve-test

Browse files

Files changed (4) hide show

.github/workflows/ci.yml +1 -1
modules/whisper/base_transcription_pipeline.py +6 -6
tests/test_config.py +8 -4
tests/test_transcription.py +13 -9

.github/workflows/ci.yml CHANGED Viewed

@@ -37,7 +37,7 @@ jobs:
         run: sudo apt-get update && sudo apt-get install -y git ffmpeg
       - name: Install dependencies
-        run: pip install -r requirements.txt pytest
       - name: Run test
         run: python -m pytest -rs tests

         run: sudo apt-get update && sudo apt-get install -y git ffmpeg
       - name: Install dependencies
+        run: pip install -r requirements.txt pytest jiwer
       - name: Run test
         run: python -m pytest -rs tests

modules/whisper/base_transcription_pipeline.py CHANGED Viewed

@@ -179,7 +179,7 @@ class BaseTranscriptionPipeline(ABC):
                         add_timestamp: bool = True,
                         progress=gr.Progress(),
                         *pipeline_params,
-                        ) -> list:
         """
         Write subtitle file from Files
@@ -250,7 +250,7 @@ class BaseTranscriptionPipeline(ABC):
             result_str = f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
             result_file_path = [info['path'] for info in files_info.values()]
-            return [result_str, result_file_path]
         except Exception as e:
             print(f"Error transcribing file: {e}")
@@ -264,7 +264,7 @@ class BaseTranscriptionPipeline(ABC):
                        add_timestamp: bool = True,
                        progress=gr.Progress(),
                        *pipeline_params,
-                       ) -> list:
         """
         Write subtitle file from microphone
@@ -314,7 +314,7 @@ class BaseTranscriptionPipeline(ABC):
             )
             result_str = f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
-            return [result_str, file_path]
         except Exception as e:
             print(f"Error transcribing mic: {e}")
             raise
@@ -327,7 +327,7 @@ class BaseTranscriptionPipeline(ABC):
                            add_timestamp: bool = True,
                            progress=gr.Progress(),
                            *pipeline_params,
-                           ) -> list:
         """
         Write subtitle file from Youtube
@@ -385,7 +385,7 @@ class BaseTranscriptionPipeline(ABC):
             if os.path.exists(audio):
                 os.remove(audio)
-            return [result_str, file_path]
         except Exception as e:
             print(f"Error transcribing youtube: {e}")

                         add_timestamp: bool = True,
                         progress=gr.Progress(),
                         *pipeline_params,
+                        ) -> Tuple[str, List]:
         """
         Write subtitle file from Files
             result_str = f"Done in {self.format_time(total_time)}! Subtitle is in the outputs folder.\n\n{total_result}"
             result_file_path = [info['path'] for info in files_info.values()]
+            return result_str, result_file_path
         except Exception as e:
             print(f"Error transcribing file: {e}")
                        add_timestamp: bool = True,
                        progress=gr.Progress(),
                        *pipeline_params,
+                       ) -> Tuple[str, str]:
         """
         Write subtitle file from microphone
             )
             result_str = f"Done in {self.format_time(time_for_task)}! Subtitle file is in the outputs folder.\n\n{subtitle}"
+            return result_str, file_path
         except Exception as e:
             print(f"Error transcribing mic: {e}")
             raise
                            add_timestamp: bool = True,
                            progress=gr.Progress(),
                            *pipeline_params,
+                           ) -> Tuple[str, str]:
         """
         Write subtitle file from Youtube
             if os.path.exists(audio):
                 os.remove(audio)
+            return result_str, file_path
         except Exception as e:
             print(f"Error transcribing youtube: {e}")

tests/test_config.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import functools
 from modules.utils.paths import *
 from modules.utils.youtube_manager import *
-import os
-import torch
 TEST_FILE_DOWNLOAD_URL = "https://github.com/jhj0517/whisper_flutter_new/raw/main/example/assets/jfk.wav"
 TEST_FILE_PATH = os.path.join(WEBUI_DIR, "tests", "jfk.wav")
 TEST_YOUTUBE_URL = "https://www.youtube.com/watch?v=4WEQtgnBu0I&ab_channel=AndriaFitzer"
-TEST_WHISPER_MODEL = "tiny.en"
 TEST_UVR_MODEL = "UVR-MDX-NET-Inst_HQ_4"
 TEST_NLLB_MODEL = "facebook/nllb-200-distilled-600M"
 TEST_SUBTITLE_SRT_PATH = os.path.join(WEBUI_DIR, "tests", "test_srt.srt")
@@ -34,3 +35,6 @@ def is_pytube_detected_bot(url: str = TEST_YOUTUBE_URL):
         print(f"Pytube has detected as a bot: {e}")
         return True

 import functools
+import jiwer
+import os
+import torch
 from modules.utils.paths import *
 from modules.utils.youtube_manager import *
 TEST_FILE_DOWNLOAD_URL = "https://github.com/jhj0517/whisper_flutter_new/raw/main/example/assets/jfk.wav"
 TEST_FILE_PATH = os.path.join(WEBUI_DIR, "tests", "jfk.wav")
+TEST_ANSWER = "And so my fellow Americans ask not what your country can do for you ask what you can do for your country"
 TEST_YOUTUBE_URL = "https://www.youtube.com/watch?v=4WEQtgnBu0I&ab_channel=AndriaFitzer"
+TEST_WHISPER_MODEL = "tiny"
 TEST_UVR_MODEL = "UVR-MDX-NET-Inst_HQ_4"
 TEST_NLLB_MODEL = "facebook/nllb-200-distilled-600M"
 TEST_SUBTITLE_SRT_PATH = os.path.join(WEBUI_DIR, "tests", "test_srt.srt")
         print(f"Pytube has detected as a bot: {e}")
         return True
+def calculate_wer(answer, prediction):
+    return jiwer.wer(answer, prediction)

tests/test_transcription.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from modules.whisper.whisper_factory import WhisperFactory
 from modules.whisper.data_classes import *
 from modules.utils.paths import WEBUI_DIR
 from test_config import *
@@ -28,6 +29,10 @@ def test_transcribe(
     if not os.path.exists(audio_path):
         download_file(TEST_FILE_DOWNLOAD_URL, audio_path_dir)
     whisper_inferencer = WhisperFactory.create_whisper_inference(
         whisper_type=whisper_type,
     )
@@ -54,7 +59,7 @@ def test_transcribe(
         ),
     ).to_list()
-    subtitle_str, file_path = whisper_inferencer.transcribe_file(
         [audio_path],
         None,
         "SRT",
@@ -62,12 +67,11 @@ def test_transcribe(
         gr.Progress(),
         *hparams,
     )
-    assert isinstance(subtitle_str, str) and subtitle_str
-    assert isinstance(file_path[0], str) and file_path
     if not is_pytube_detected_bot():
-        whisper_inferencer.transcribe_youtube(
             TEST_YOUTUBE_URL,
             "SRT",
             False,
@@ -75,17 +79,17 @@ def test_transcribe(
             *hparams,
         )
         assert isinstance(subtitle_str, str) and subtitle_str
-        assert isinstance(file_path[0], str) and file_path
-    whisper_inferencer.transcribe_mic(
         audio_path,
         "SRT",
         False,
         gr.Progress(),
         *hparams,
     )
-    assert isinstance(subtitle_str, str) and subtitle_str
-    assert isinstance(file_path[0], str) and file_path
 def download_file(url, save_dir):

 from modules.whisper.whisper_factory import WhisperFactory
 from modules.whisper.data_classes import *
+from modules.utils.subtitle_manager import read_file
 from modules.utils.paths import WEBUI_DIR
 from test_config import *
     if not os.path.exists(audio_path):
         download_file(TEST_FILE_DOWNLOAD_URL, audio_path_dir)
+    answer = TEST_ANSWER
+    if diarization:
+        answer = "SPEAKER_00|"+TEST_ANSWER
     whisper_inferencer = WhisperFactory.create_whisper_inference(
         whisper_type=whisper_type,
     )
         ),
     ).to_list()
+    subtitle_str, file_paths = whisper_inferencer.transcribe_file(
         [audio_path],
         None,
         "SRT",
         gr.Progress(),
         *hparams,
     )
+    subtitle = read_file(file_paths[0]).split("\n")
+    assert calculate_wer(answer, subtitle[2].strip().replace(",", "").replace(".", "")) < 0.1
     if not is_pytube_detected_bot():
+        subtitle_str, file_path = whisper_inferencer.transcribe_youtube(
             TEST_YOUTUBE_URL,
             "SRT",
             False,
             *hparams,
         )
         assert isinstance(subtitle_str, str) and subtitle_str
+        assert os.path.exists(file_path)
+    subtitle_str, file_path = whisper_inferencer.transcribe_mic(
         audio_path,
         "SRT",
         False,
         gr.Progress(),
         *hparams,
     )
+    subtitle = read_file(file_path).split("\n")
+    assert calculate_wer(answer, subtitle[2].strip().replace(",", "").replace(".", "")) < 0.1
 def download_file(url, save_dir):