Spaces:
Running
Running
import gradio as gr | |
import torch | |
import os | |
from faster_whisper import WhisperModel | |
from moviepy.editor import VideoFileClip | |
# Define the model and device | |
MODEL_NAME = "Systran/faster-whisper-large-v3" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
compute_type = "float32" if device == "cuda" else "int8" | |
# Load the Whisper model | |
model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type) | |
# List of all supported languages in Whisper | |
SUPPORTED_LANGUAGES = [ | |
"Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean", | |
"French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch", | |
"Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese", | |
"Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish", | |
"Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian", | |
"Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu", | |
"Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian", | |
"Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic", | |
"Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian", | |
"Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona", | |
"Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian", | |
"Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek", | |
"Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese", | |
"Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy", | |
"Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese", | |
"Sundanese" | |
] | |
def extract_audio_from_video(video_file): | |
"""Extract audio from a video file and save it as a WAV file.""" | |
video = VideoFileClip(video_file) | |
audio_file = "extracted_audio.wav" | |
video.audio.write_audiofile(audio_file, fps=16000) | |
return audio_file | |
def generate_subtitles(audio_file, language="Auto Detect"): | |
"""Generate subtitles from an audio file using Whisper.""" | |
# Transcribe the audio | |
segments, info = model.transcribe( | |
audio_file, | |
task="transcribe", | |
language=None if language == "Auto Detect" else language.lower(), | |
word_timestamps=True | |
) | |
# Generate SRT format subtitles | |
srt_subtitles = "" | |
for i, segment in enumerate(segments, start=1): | |
start_time = segment.start | |
end_time = segment.end | |
text = segment.text.strip() | |
# Format timestamps for SRT | |
start_time_srt = format_timestamp(start_time) | |
end_time_srt = format_timestamp(end_time) | |
# Add to SRT | |
srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{text}\n\n" | |
return srt_subtitles | |
def format_timestamp(seconds): | |
"""Convert seconds to SRT timestamp format (HH:MM:SS,mmm).""" | |
hours = int(seconds // 3600) | |
minutes = int((seconds % 3600) // 60) | |
seconds = seconds % 60 | |
milliseconds = int((seconds - int(seconds)) * 1000) | |
return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}" | |
def process_video(video_file, language="Auto Detect"): | |
"""Process a video file to generate subtitles.""" | |
# Extract audio from the video | |
audio_file = extract_audio_from_video(video_file) | |
# Generate subtitles | |
subtitles = generate_subtitles(audio_file, language) | |
# Save subtitles to an SRT file | |
srt_file = "subtitles.srt" | |
with open(srt_file, "w", encoding="utf-8") as f: | |
f.write(subtitles) | |
# Clean up extracted audio file | |
os.remove(audio_file) | |
return srt_file | |
# Custom CSS for styling | |
custom_css = """ | |
.gradio-container { | |
background: linear-gradient(135deg, #f5f7fa, #c3cfe2); | |
font-family: 'Arial', sans-serif; | |
} | |
.header { | |
text-align: center; | |
padding: 20px; | |
background: linear-gradient(135deg, #6a11cb, #2575fc); | |
color: white; | |
border-radius: 10px; | |
margin-bottom: 20px; | |
} | |
.header h1 { | |
font-size: 2.5rem; | |
margin: 0; | |
} | |
.header p { | |
font-size: 1.2rem; | |
margin: 10px 0 0; | |
} | |
.tab { | |
background: white; | |
padding: 20px; | |
border-radius: 10px; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
} | |
""" | |
# Define the Gradio interface | |
with gr.Blocks(css=custom_css, title="AutoSubGen - AI Video Subtitle Generator") as demo: | |
# Header | |
with gr.Column(elem_classes="header"): | |
gr.Markdown("# AutoSubGen") | |
gr.Markdown("### AI-Powered Video Subtitle Generator") | |
gr.Markdown("Automatically generate subtitles for your videos in SRT format. Supports 100+ languages and auto-detection.") | |
# Main content | |
with gr.Tab("Generate Subtitles", elem_classes="tab"): | |
gr.Markdown("### Upload a video file to generate subtitles.") | |
with gr.Row(): | |
video_input = gr.Video(label="Upload Video File", scale=2) | |
language_dropdown = gr.Dropdown( | |
choices=SUPPORTED_LANGUAGES, | |
label="Select Language", | |
value="Auto Detect", | |
scale=1 | |
) | |
generate_button = gr.Button("Generate Subtitles", variant="primary") | |
subtitle_output = gr.File(label="Download Subtitles (SRT)") | |
# Link button to function | |
generate_button.click( | |
process_video, | |
inputs=[video_input, language_dropdown], | |
outputs=subtitle_output | |
) | |
# Launch the Gradio interface | |
demo.launch() |