Athspi commited on
Commit
31dfffa
·
verified ·
1 Parent(s): 7739135

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import os
4
+ from faster_whisper import WhisperModel
5
+ from moviepy.editor import VideoFileClip
6
+
7
+ # Define the model and device
8
+ MODEL_NAME = "Systran/faster-whisper-large-v3"
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ compute_type = "float32" if device == "cuda" else "int8"
11
+
12
+ # Load the Whisper model
13
+ model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
14
+
15
+ # List of all supported languages in Whisper
16
+ SUPPORTED_LANGUAGES = [
17
+ "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
18
+ "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
19
+ "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
20
+ "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
21
+ "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
22
+ "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
23
+ "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
24
+ "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
25
+ "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
26
+ "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
27
+ "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
28
+ "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
29
+ "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
30
+ "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
31
+ "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
32
+ "Sundanese"
33
+ ]
34
+
35
+ def extract_audio_from_video(video_file):
36
+ """Extract audio from a video file and save it as a WAV file."""
37
+ video = VideoFileClip(video_file)
38
+ audio_file = "extracted_audio.wav"
39
+ video.audio.write_audiofile(audio_file, fps=16000)
40
+ return audio_file
41
+
42
+ def generate_subtitles(audio_file, language="Auto Detect"):
43
+ """Generate subtitles from an audio file using Whisper."""
44
+ # Transcribe the audio
45
+ segments, info = model.transcribe(
46
+ audio_file,
47
+ task="transcribe",
48
+ language=None if language == "Auto Detect" else language.lower(),
49
+ word_timestamps=True
50
+ )
51
+
52
+ # Generate SRT format subtitles
53
+ srt_subtitles = ""
54
+ for i, segment in enumerate(segments, start=1):
55
+ start_time = segment.start
56
+ end_time = segment.end
57
+ text = segment.text.strip()
58
+
59
+ # Format timestamps for SRT
60
+ start_time_srt = format_timestamp(start_time)
61
+ end_time_srt = format_timestamp(end_time)
62
+
63
+ # Add to SRT
64
+ srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{text}\n\n"
65
+
66
+ return srt_subtitles
67
+
68
+ def format_timestamp(seconds):
69
+ """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
70
+ hours = int(seconds // 3600)
71
+ minutes = int((seconds % 3600) // 60)
72
+ seconds = seconds % 60
73
+ milliseconds = int((seconds - int(seconds)) * 1000)
74
+ return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"
75
+
76
+ def process_video(video_file, language="Auto Detect"):
77
+ """Process a video file to generate subtitles."""
78
+ # Extract audio from the video
79
+ audio_file = extract_audio_from_video(video_file)
80
+
81
+ # Generate subtitles
82
+ subtitles = generate_subtitles(audio_file, language)
83
+
84
+ # Save subtitles to an SRT file
85
+ srt_file = "subtitles.srt"
86
+ with open(srt_file, "w", encoding="utf-8") as f:
87
+ f.write(subtitles)
88
+
89
+ # Clean up extracted audio file
90
+ os.remove(audio_file)
91
+
92
+ return srt_file
93
+
94
+ # Custom CSS for styling
95
+ custom_css = """
96
+ .gradio-container {
97
+ background: linear-gradient(135deg, #f5f7fa, #c3cfe2);
98
+ font-family: 'Arial', sans-serif;
99
+ }
100
+ .header {
101
+ text-align: center;
102
+ padding: 20px;
103
+ background: linear-gradient(135deg, #6a11cb, #2575fc);
104
+ color: white;
105
+ border-radius: 10px;
106
+ margin-bottom: 20px;
107
+ }
108
+ .header h1 {
109
+ font-size: 2.5rem;
110
+ margin: 0;
111
+ }
112
+ .header p {
113
+ font-size: 1.2rem;
114
+ margin: 10px 0 0;
115
+ }
116
+ .tab {
117
+ background: white;
118
+ padding: 20px;
119
+ border-radius: 10px;
120
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
121
+ }
122
+ """
123
+
124
+ # Define the Gradio interface
125
+ with gr.Blocks(css=custom_css, title="AutoSubGen - AI Video Subtitle Generator") as demo:
126
+ # Header
127
+ with gr.Column(elem_classes="header"):
128
+ gr.Markdown("# AutoSubGen")
129
+ gr.Markdown("### AI-Powered Video Subtitle Generator")
130
+ gr.Markdown("Automatically generate subtitles for your videos in SRT format. Supports 100+ languages and auto-detection.")
131
+
132
+ # Main content
133
+ with gr.Tab("Generate Subtitles", elem_classes="tab"):
134
+ gr.Markdown("### Upload a video file to generate subtitles.")
135
+ with gr.Row():
136
+ video_input = gr.Video(label="Upload Video File", scale=2)
137
+ language_dropdown = gr.Dropdown(
138
+ choices=SUPPORTED_LANGUAGES,
139
+ label="Select Language",
140
+ value="Auto Detect",
141
+ scale=1
142
+ )
143
+ generate_button = gr.Button("Generate Subtitles", variant="primary")
144
+ subtitle_output = gr.File(label="Download Subtitles (SRT)")
145
+
146
+ # Link button to function
147
+ generate_button.click(
148
+ process_video,
149
+ inputs=[video_input, language_dropdown],
150
+ outputs=subtitle_output
151
+ )
152
+
153
+ # Launch the Gradio interface
154
+ demo.launch()