versae commited on
Commit
6a3ae5e
·
verified ·
1 Parent(s): 1515c07

Add timestamps

Browse files
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -28,8 +28,18 @@ def pipe(file):
28
  asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
29
  return asr(file)
30
 
31
- def transcribe(file):
32
- text = pipe(file)["text"]
 
 
 
 
 
 
 
 
 
 
33
  return text
34
 
35
 
@@ -42,13 +52,13 @@ def _return_yt_html_embed(yt_url):
42
  return HTML_str
43
 
44
 
45
- def yt_transcribe(yt_url):
46
  yt = pt.YouTube(yt_url)
47
  html_embed_str = _return_yt_html_embed(yt_url)
48
  stream = yt.streams.filter(only_audio=True)[0]
49
  stream.download(filename="audio.mp3")
50
 
51
- text = pipe("audio.mp3")["text"]
52
 
53
  return html_embed_str, text
54
 
@@ -59,6 +69,7 @@ mf_transcribe = gr.Interface(
59
  fn=transcribe,
60
  inputs=[
61
  gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
 
62
  ],
63
  outputs="text",
64
  theme="huggingface",
@@ -73,7 +84,10 @@ mf_transcribe = gr.Interface(
73
 
74
  yt_transcribe = gr.Interface(
75
  fn=yt_transcribe,
76
- inputs=[gr.components.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
 
 
 
77
  examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
78
  outputs=["html", "text"],
79
  theme="huggingface",
 
28
  asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
29
  return asr(file)
30
 
31
+ def transcribe(file, return_timestamps=False):
32
+ if not return_timestamps:
33
+ text = pipe(file)["text"]
34
+ else:
35
+ chunks = pipe(file, return_timestamps=True)["chunks"]
36
+ text = []
37
+ for chunk in chunks:
38
+ start_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][0])) if chunk["timestamp"][0] is not None else "??:??:??"
39
+ end_time = time.strftime('%H:%M:%S', time.gmtime(chunk["timestamp"][1])) if chunk["timestamp"][1] is not None else "??:??:??"
40
+ line = f"[{start_time} -> {end_time}] {chunk['text']}"
41
+ text.append(line)
42
+ text = "\n".join(text)
43
  return text
44
 
45
 
 
52
  return HTML_str
53
 
54
 
55
+ def yt_transcribe(yt_url, return_timestamps=False):
56
  yt = pt.YouTube(yt_url)
57
  html_embed_str = _return_yt_html_embed(yt_url)
58
  stream = yt.streams.filter(only_audio=True)[0]
59
  stream.download(filename="audio.mp3")
60
 
61
+ text = transcribe("audio.mp3")
62
 
63
  return html_embed_str, text
64
 
 
69
  fn=transcribe,
70
  inputs=[
71
  gr.components.Audio(sources=['upload', 'microphone'], type="filepath"),
72
+ gr.components.Checkbox(label="Return timestamps"),
73
  ],
74
  outputs="text",
75
  theme="huggingface",
 
84
 
85
  yt_transcribe = gr.Interface(
86
  fn=yt_transcribe,
87
+ inputs=[
88
+ gr.components.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
89
+ gr.components.Checkbox(label="Return timestamps"),
90
+ ],
91
  examples=[["https://www.youtube.com/watch?v=mukeSSa5GKo"]],
92
  outputs=["html", "text"],
93
  theme="huggingface",