sanchit-gandhi HF staff commited on
Commit
38d85c1
1 Parent(s): 9f8c873

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -27
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
 
4
  import gradio as gr
5
  import pytube as pt
6
- from asr_diarizer import ASRDiarizationPipeline
7
 
8
  MODEL_NAME = "openai/whisper-tiny"
9
 
@@ -16,22 +16,21 @@ pipe = ASRDiarizationPipeline.from_pretrained(
16
  use_auth_token=HF_TOKEN,
17
  )
18
 
19
- def transcribe(microphone, file_upload):
20
- warn_output = ""
21
- if (microphone is not None) and (file_upload is not None):
22
- warn_output = (
23
- "WARNING: You've uploaded an audio file and used the microphone. "
24
- "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
25
- )
26
 
27
- elif (microphone is None) and (file_upload is None):
28
- return "ERROR: You have to either use the microphone or upload an audio file"
29
 
30
- file = microphone if microphone is not None else file_upload
 
 
 
 
31
 
32
- text = pipe(file)
33
 
34
- return warn_output + "\n\n".join([chunk["speaker"] + chunk["text"] for chunk in text])
 
 
 
35
 
36
 
37
  def _return_yt_html_embed(yt_url):
@@ -43,7 +42,7 @@ def _return_yt_html_embed(yt_url):
43
  return HTML_str
44
 
45
 
46
- def yt_transcribe(yt_url):
47
  yt = pt.YouTube(yt_url)
48
  html_embed_str = _return_yt_html_embed(yt_url)
49
  stream = yt.streams.filter(only_audio=True)[0]
@@ -51,7 +50,7 @@ def yt_transcribe(yt_url):
51
 
52
  text = pipe("audio.mp3")
53
 
54
- return html_embed_str, "\n\n".join([chunk["speaker"] + chunk["text"] for chunk in text])
55
 
56
 
57
  demo = gr.Blocks()
@@ -59,37 +58,43 @@ demo = gr.Blocks()
59
  mf_transcribe = gr.Interface(
60
  fn=transcribe,
61
  inputs=[
62
- gr.inputs.Audio(source="microphone", type="filepath", optional=True),
63
  gr.inputs.Audio(source="upload", type="filepath", optional=True),
 
64
  ],
65
  outputs="text",
66
  layout="horizontal",
67
  theme="huggingface",
68
  title="Whisper Demo: Transcribe Audio",
69
  description=(
70
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
71
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
72
- " of arbitrary length."
73
- ),
74
  allow_flagging="never",
75
  )
76
 
77
  yt_transcribe = gr.Interface(
78
  fn=yt_transcribe,
79
- inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
 
 
 
80
  outputs=["html", "text"],
81
  layout="horizontal",
82
  theme="huggingface",
83
- title="Whisper Demo: Transcribe YouTube",
84
  description=(
85
- "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
86
- f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
87
- " arbitrary length."
88
- ),
 
 
 
89
  allow_flagging="never",
90
  )
91
 
92
  with demo:
93
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
94
 
95
- demo.launch(enable_queue=True)
 
3
 
4
  import gradio as gr
5
  import pytube as pt
6
+ from asr_diarize import ASRDiarizationPipeline # TODO: speechbox import
7
 
8
  MODEL_NAME = "openai/whisper-tiny"
9
 
 
16
  use_auth_token=HF_TOKEN,
17
  )
18
 
19
+ def tuple_to_string(start_end_tuple, ndigits=1):
20
+ return str((round(start_end_tuple[0], ndigits), round(start_end_tuple[1], ndigits)))
 
 
 
 
 
21
 
 
 
22
 
23
+ def format_as_transcription(raw_segments, with_timestamps=False):
24
+ if with_timestamps:
25
+ return "\n\n".join([chunk["speaker"] + " " + tuple_to_string(chunk["timestamp"]) + chunk["text"] for chunk in raw_segments])
26
+ else:
27
+ return "\n\n".join([chunk["speaker"] + chunk["text"] for chunk in raw_segments])
28
 
 
29
 
30
+ def transcribe(file_upload, with_timestamps):
31
+ raw_segments = pipe(file_upload)
32
+ transcription = format_as_transcription(raw_segments, with_timestamps=with_timestamps)
33
+ return transcription
34
 
35
 
36
  def _return_yt_html_embed(yt_url):
 
42
  return HTML_str
43
 
44
 
45
+ def yt_transcribe(yt_url, with_timestamps):
46
  yt = pt.YouTube(yt_url)
47
  html_embed_str = _return_yt_html_embed(yt_url)
48
  stream = yt.streams.filter(only_audio=True)[0]
 
50
 
51
  text = pipe("audio.mp3")
52
 
53
+ return html_embed_str, format_as_transcription(text, with_timestamps=with_timestamps)
54
 
55
 
56
  demo = gr.Blocks()
 
58
  mf_transcribe = gr.Interface(
59
  fn=transcribe,
60
  inputs=[
 
61
  gr.inputs.Audio(source="upload", type="filepath", optional=True),
62
+ gr.Checkbox(label="With timestamps?", value=True),
63
  ],
64
  outputs="text",
65
  layout="horizontal",
66
  theme="huggingface",
67
  title="Whisper Demo: Transcribe Audio",
68
  description=(
69
+ "Transcribe audio files with speaker diarization using 🤗 Speechbox. Demo uses the pre-trained checkpoint"
70
+ f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) for ASR transcriptions and [PyAnnote Audio](https://huggingface.co/pyannote/speaker-diarization)"
71
+ " to label the speakers."
72
+ )
73
  allow_flagging="never",
74
  )
75
 
76
  yt_transcribe = gr.Interface(
77
  fn=yt_transcribe,
78
+ inputs=[
79
+ gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
80
+ gr.Checkbox(label="With timestamps?", value=True),
81
+ ],
82
  outputs=["html", "text"],
83
  layout="horizontal",
84
  theme="huggingface",
85
+ title="Whisper Speaker Diarization Demo: Transcribe YouTube",
86
  description=(
87
+ "Transcribe YouTube videos with speaker diarization using 🤗 Speechbox. Demo uses the pre-trained checkpoint"
88
+ f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) for ASR transcriptions and [PyAnnote Audio](https://huggingface.co/pyannote/speaker-diarization)"
89
+ " to label the speakers."
90
+ )
91
+ examples=[
92
+ ["https://www.youtube.com/watch?v=9dAWIPixYxc", True],
93
+ ],
94
  allow_flagging="never",
95
  )
96
 
97
  with demo:
98
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
99
 
100
+ demo.launch(enable_queue=True, share=True)