ritwikraha commited on
Commit
0c75fa8
·
1 Parent(s): eed9b5d

add: first app

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import required libraries
2
+ import os
3
+ import re
4
+ import logging
5
+ import whisper
6
+ from pytube import YouTube
7
+ import gradio as gr
8
+
9
+ # Setup logging
10
+ logging.basicConfig(level=logging.INFO)
11
+
12
+ # Load the Whisper model
13
+ model = whisper.load_model("base")
14
+
15
+ def download_audio_from_youtube(url):
16
+ """
17
+ Download the audio from a YouTube video and return the path to the audio file.
18
+ """
19
+ yt = YouTube(url)
20
+ video = yt.streams.filter(only_audio=True).first()
21
+ out_file = video.download(output_path=".")
22
+ return out_file
23
+
24
+ def get_text(url):
25
+ """
26
+ Transcribe the audio from a YouTube video and return the transcript.
27
+ """
28
+ if not url:
29
+ return ''
30
+
31
+ out_file = download_audio_from_youtube(url)
32
+ file_stats = os.stat(out_file)
33
+
34
+ logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
35
+
36
+ if file_stats.st_size > 30000000:
37
+ logging.error('Videos for transcription on this space are limited to about 1.5 hours...')
38
+ return ''
39
+
40
+ base, ext = os.path.splitext(out_file)
41
+ new_file = base + '.mp3'
42
+ os.rename(out_file, new_file)
43
+
44
+ result = model.transcribe(new_file)
45
+ return result['text'].strip()
46
+
47
+ def create_gradio_interface():
48
+ """
49
+ Create and launch a Gradio interface for transcribing YouTube videos.
50
+ """
51
+ with gr.Blocks() as demo:
52
+ gr.Markdown("<h1><center>Free Fast YouTube URL Video-to-Text using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a> Model</center></h1>")
53
+ gr.Markdown("<center>Enter the link of any YouTube video to generate a text transcript of the video.</center>")
54
+ gr.Markdown("<center><b>'Whisper is a neural net that approaches human level robustness and accuracy on English speech recognition.'</b></center>")
55
+ gr.Markdown("<center>Transcription takes 5-10 seconds per minute of the video. #patience<br />If you have time while waiting, check out my <a href=https://www.artificial-intelligence.blog target=_blank>AI blog</a> (opens in new tab).</center>")
56
+
57
+ input_text_url = gr.Textbox(placeholder='Youtube video URL', label='YouTube URL')
58
+ result_button_transcribe = gr.Button('Transcribe')
59
+ output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript')
60
+
61
+ result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe)
62
+
63
+ demo.queue(default_enabled=True).launch(debug=True)
64
+
65
+ # Launch the Gradio interface
66
+ if __name__ == "__main__":
67
+ create_gradio_interface()