whisper-sami-demo

Running on Zero

App Files Files Community

versae commited on Apr 12, 2024

Commit

c1541fb

verified ·

1 Parent(s): f5e579f

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -26

app.py CHANGED Viewed

@@ -17,31 +17,19 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 @spaces.GPU(duration=120)
-def transcribe(file):  #microphone, file_upload):
-    pipe = pipeline(
         task="automatic-speech-recognition",
         model=MODEL_NAME,
         chunk_length_s=30,
         device=device,
         token=auth_token,
     )
-    pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
-    #pipe.to(device)
-    warn_output = ""
-    # if (microphone is not None) and (file_upload is not None):
-    #     warn_output = (
-    #         "WARNING: You've uploaded an audio file and used the microphone. "
-    #         "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
-    #     )
-    # elif (microphone is None) and (file_upload is None):
-    #     return "ERROR: You have to either use the microphone or upload an audio file"
-    # file = microphone if microphone is not None else file_upload
     text = pipe(file)["text"]
-    return warn_output + text
 def _return_yt_html_embed(yt_url):
@@ -54,15 +42,6 @@ def _return_yt_html_embed(yt_url):
 def yt_transcribe(yt_url):
-    pipe = pipeline(
-        task="automatic-speech-recognition",
-        model=MODEL_NAME,
-        chunk_length_s=30,
-        device=device,
-        token=auth_token,
-    )
-    pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
     yt = pt.YouTube(yt_url)
     html_embed_str = _return_yt_html_embed(yt_url)
     stream = yt.streams.filter(only_audio=True)[0]

 print(f"Using device: {device}")
 @spaces.GPU(duration=120)
+def pipe(file):
+    asr
         task="automatic-speech-recognition",
         model=MODEL_NAME,
         chunk_length_s=30,
         device=device,
         token=auth_token,
     )
+    asr.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
+def transcribe(file):
     text = pipe(file)["text"]
+    return text
 def _return_yt_html_embed(yt_url):
 def yt_transcribe(yt_url):
     yt = pt.YouTube(yt_url)
     html_embed_str = _return_yt_html_embed(yt_url)
     stream = yt.streams.filter(only_audio=True)[0]