Spaces:
Sleeping
Sleeping
File size: 6,857 Bytes
3c71f27 9e156fa 4f0841f effdcb4 9e156fa effdcb4 dbc58d4 f537405 9e156fa effdcb4 4f0841f 9e156fa 8c56203 219d138 4f0841f effdcb4 3c71f27 effdcb4 3c71f27 effdcb4 4f0841f effdcb4 3c71f27 effdcb4 3c71f27 effdcb4 f537405 bfe6e49 effdcb4 3c71f27 effdcb4 eb5718c effdcb4 eb5718c effdcb4 eb5718c effdcb4 eb5718c effdcb4 4f0841f 9e156fa b7a9c79 0121f2d b7a9c79 f678d41 b7a9c79 219d138 b7a9c79 fc75498 b7a9c79 219d138 b7a9c79 fc75498 b7a9c79 219d138 eb5718c b7a9c79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import spaces
import gradio as gr
from faster_whisper import WhisperModel
import logging
import os
import pysrt
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer
import ffmpeg
import torch
# Configuration initiale et chargement des données
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
model_size_options = ["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"] # Add model size options
logging.basicConfig(level=logging.DEBUG)
# Fonction pour formater un texte en SRT
def text_to_srt(text):
lines = text.split('\n')
srt_content = ""
for i, line in enumerate(lines):
if line.strip() == "":
continue
try:
times, content = line.split(']', 1)
start, end = times[1:].split(' -> ')
if start.count(":") == 1:
start = "00:" + start
if end.count(":") == 1:
end = "00:" + end
srt_content += f"{i+1}\n{start.replace('.', ',')} --> {end.replace('.', ',')}\n{content.strip()}\n\n"
except ValueError:
continue
temp_file_path = '/tmp/output.srt'
with open(temp_file_path, 'w', encoding='utf-8') as file:
file.write(srt_content)
return temp_file_path
# Fonction pour formater des secondes en timestamp
def format_timestamp(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
@spaces.GPU
# Fonction de traduction de texte
def translate_text(text, source_language_code, target_language_code):
model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
if source_language_code == target_language_code:
return "Translation between the same languages is not supported."
try:
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
except Exception as e:
return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
@spaces.GPU
# Fonction pour traduire un fichier SRT
def translate_srt(input_file_path, source_language_code, target_language_code, progress=None):
subs = pysrt.open(input_file_path)
translated_subs = []
for idx, sub in enumerate(subs):
translated_text = translate_text(sub.text, source_language_code, target_language_code)
translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
translated_subs.append(translated_sub)
if progress:
progress((idx + 1) / len(subs))
translated_srt_path = input_file_path.replace(".srt", f"_{target_language_code}.srt")
pysrt.SubRipFile(translated_subs).save(translated_srt_path)
return translated_srt_path
@spaces.GPU
# Fonction pour transcrire l'audio d'une vidéo en texte
def transcribe(audio_file_path, model_size="base"):
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
model = WhisperModel(model_size, device=device, compute_type=compute_type)
segments, _ = model.transcribe(audio_file_path)
transcription_with_timestamps = [
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
for segment in segments
]
return "\n".join(transcription_with_timestamps)
@spaces.GPU
# Fonction pour ajouter des sous-titres à une vidéo
def add_subtitle_to_video(input_video, subtitle_file, subtitle_language, soft_subtitle=False):
video_input_stream = ffmpeg.input(input_video)
subtitle_input_stream = ffmpeg.input(subtitle_file)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
output_video = f"/tmp/{input_video_name}_subtitled.mp4"
if soft_subtitle:
stream = ffmpeg.output(video_input_stream, subtitle_input_stream, output_video, **{"c": "copy", "c:s": "mov_text"})
else:
stream = ffmpeg.output(video_input_stream, output_video, vf=f"subtitles={subtitle_file}")
ffmpeg.run(stream, overwrite_output=True)
return output_video
# Initialisation de Gradio Blocks
with gr.Blocks() as blocks_app:
gr.Markdown(
"""
# Video Subtitle Creation API
For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/VideoSubtitleCreation)
""")
with gr.Row():
video_file = gr.Video(label="Upload Video")
source_language_dropdown = gr.Dropdown(choices=language_options, label="Source Language", value="en")
target_language_dropdown = gr.Dropdown(choices=language_options, label="Target Language", value="en")
model_size_dropdown = gr.Dropdown(choices=model_size_options, label="Model Size", value="large") # Model size dropdown
transcribe_button = gr.Button("Transcribe Video")
translate_button = gr.Button("Translate Subtitles")
output_video = gr.Video(label="Processed Video")
output_srt = gr.File(label="Subtitles File (.srt)")
def transcribe_and_add_subtitles(video_file, model_size):
transcription = transcribe(video_file, model_size)
srt_path = text_to_srt(transcription)
output_video_path = add_subtitle_to_video(video_file, srt_path, subtitle_language="eng", soft_subtitle=False)
return output_video_path, srt_path
def translate_subtitles_and_add_to_video(video_file, source_language_code, target_language_code, model_size):
transcription = transcribe(video_file, model_size)
srt_path = text_to_srt(transcription)
translated_srt_path = translate_srt(srt_path, source_language_code, target_language_code)
output_video_path = add_subtitle_to_video(video_file, translated_srt_path, target_language_code, soft_subtitle=False)
return output_video_path, translated_srt_path
transcribe_button.click(transcribe_and_add_subtitles, inputs=[video_file, model_size_dropdown], outputs=[output_video, output_srt])
translate_button.click(translate_subtitles_and_add_to_video, inputs=[video_file, source_language_dropdown, target_language_dropdown, model_size_dropdown], outputs=[output_video, output_srt])
# Lancement de l'application
blocks_app.launch() |