Spaces:
Build error
Build error
File size: 6,034 Bytes
6fd7ef3 f1f3adb 6fd7ef3 cb23f19 6fd7ef3 352eb01 6fd7ef3 352eb01 42c4598 352eb01 f1f3adb cb23f19 f1f3adb cb23f19 6fd7ef3 5556030 074b5e7 3b1eecc 6fd7ef3 7de403b 6fd7ef3 cb23f19 6fd7ef3 cb23f19 6fd7ef3 cb23f19 6fd7ef3 cb23f19 6fd7ef3 cb23f19 6fd7ef3 96d2396 6fd7ef3 6e73d37 6fd7ef3 b21ecef 6fd7ef3 cb23f19 352a44b f1f3adb cb23f19 7de403b cb23f19 6fd7ef3 7de403b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import sys
import time
from pathlib import Path
import anvil.server
import anvil.media
from whisper.utils import write_srt
from youtube_dl import YoutubeDL
from youtube_dl.utils import DownloadError
import os
import tempfile
import json
import argparse
import whisper
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
import ffmpeg
from utils.subs import bake_subs
from utils.utils import get_args
original_dir = os.getcwd()
output_dir = Path('output')
args = get_args()
model_size: str = args.get("model", os.environ.get("WHISPER_MODEL", "large"))
preload_model: bool = args.get("preload")
if preload_model:
print("Preloading model")
model = whisper.load_model(model_size)
def download_generator(url, translate_action=True, source_language='Autodetect', corrected_subtitles=None):
# Step 1 : check if video is available
yield {"message": f"Checking {url} for videos"}
try:
meta = check_download(url)
# print(json.dumps(meta, indent=2))
# if(meta['duration'] > 159) :
# raise Exception("Video is too long, please use videos less than 159 seconds")
yield {"message": f"Found video with {meta['duration']} seconds duration from {meta['extractor']}", "meta": meta}
tempdir = output_dir/f"{meta['id']}"
except Exception as e:
yield {"message": f"{e}"}
return
# Step 2 : Download video and extract audio
try:
# check if we already have the folder and the main files
if(tempdir.is_dir() and (tempdir/f"{meta['id']}.{meta['ext']}").is_file() and (tempdir/f"{meta['id']}.mp3").is_file()):
yield {"message": f"Using cached files"}
video = str((tempdir/f"{meta['id']}.{meta['ext']}").resolve())
audio = str((tempdir/f"{meta['id']}.mp3").resolve())
else:
yield {"message": f"Starting download with URL {url}, this may take a while"}
meta, video, audio = download(url, tempdir)
yield {"message": f"Downloaded video and extracted audio", "video": video, "audio": audio, "meta": meta}
except Exception as e:
os.chdir(original_dir)
yield {"message": f"{e}"}
raise e
srt_path = tempdir / f"{meta['id']}.srt"
if not corrected_subtitles:
### Step 3 : Transcribe with whisper
yield {"message": f"[PLEASE WAIT] Starting whisper transcribe with {meta['id']}.mp3"}
try:
whisper_result = transcribe(audio, translate_action, source_language)
with open(srt_path, "w", encoding="utf-8") as srt:
write_srt(whisper_result["segments"], file=srt)
whisper_result["srt"] = Path(srt_path).read_text()
yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta, "srt_path": srt_path}
except Exception as e:
os.chdir(original_dir)
yield {"message": f"{e}"}
raise e
else:
### step 3.5 : use corrected subtitles
yield {"message": f"Using corrected subtitles"}
with open(srt_path, "w", encoding="utf-8") as srt:
srt.write(corrected_subtitles)
yield {"message": f"Transcribe successful", "srt_path": srt_path, "meta": meta}
### Step 4 : Bake subtitles into video with ffmpeg
yield {"message": f"[PLEASE WAIT] baking subtitles into video"}
try:
print('Stating to bake subtitles')
subbed_video_path = tempdir / f"{meta['id']}_translated.mp4"
fontsdir = Path('fonts')
bake_subs(video, subbed_video_path.absolute() , srt_path.absolute(), fontsdir, translate_action)
yield {"message": f"Subtitled video ready!", "sub_video": str(subbed_video_path.absolute()), "meta": meta}
except ffmpeg.Error as e:
print('stdout:', e.stdout.decode('utf8'))
print('stderr:', e.stderr.decode('utf8'))
raise e
except Exception as e:
print('stdout:', e.stdout.decode('utf8'))
print('stderr:', e.stderr.decode('utf8'))
os.chdir(original_dir)
print('error', file=sys.stderr)
raise e
yield {"message": f"{e}"}
def progress_hook(d):
if d['status'] == 'downloading':
print("downloading " + str(round(float(d['downloaded_bytes']) / float(d['total_bytes']) * 100, 1)) + "%")
yield f"{d['_percent_str']} downloaded"
if d['status'] == 'finished':
filename = d['filename']
print(filename)
yield f"Downloaded {filename}"
def download(url, tempdir):
try:
ydl_opts = {
"format": "bestvideo[ext=mp4]+bestaudio/best",
"keepvideo": True,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
"skip_download": False,
"outtmpl": f"{tempdir}/%(id)s.%(ext)s",
"noplaylist": True,
"verbose": False,
"quiet": True,
"progress_hooks": [progress_hook],
}
ydl = YoutubeDL(ydl_opts)
meta = ydl.extract_info(
url,
download=True,
)
except DownloadError as e:
raise e
else:
video = tempdir / f"{meta['id']}.{meta['ext']}"
audio = tempdir / f"{meta['id']}.mp3"
print(str(video.resolve()))
return meta, str(video.resolve()), str(audio.resolve())
def check_download(url):
ydl_opts = {
"format": "bestvideo[ext=mp4]+bestaudio/best",
"skip_download": True,
"verbose": False,
}
ydl = YoutubeDL(ydl_opts)
try:
meta = ydl.extract_info(
url,
download=False,
)
except DownloadError as e:
raise e
else:
return meta
def transcribe(audio, translate_action=True, language='Autodetect'):
task = "translate" if translate_action else "transcribe"
print(f'Starting {task} with whisper size {model_size}')
global model
if not preload_model:
model = whisper.load_model(model_size)
props = {
"task": task,
}
if language != 'Autodetect':
props["language"] = TO_LANGUAGE_CODE[language.lower()]
output = model.transcribe(audio, task=task)
output["language"] = LANGUAGES[output["language"]]
output['segments'] = [{"id": 0, "seek": 0, "start": 0.0, "end": 3, "text": " [AI transcription]"}] + output['segments']
print(f'Finished transcribe from {output["language"]}', output["text"])
return output
|