Spaces:
Build error
Build error
Alex Volkov
commited on
Commit
·
6fd7ef3
1
Parent(s):
8a19f9b
Initial commit, let's see if this works on HF
Browse files- README.md +1 -1
- app.py +117 -4
- download.py +146 -0
- fonts/arial.ttf +0 -0
- utils/__init__.py +0 -0
- utils/subs.py +84 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Vidtranslator
|
3 |
-
emoji:
|
4 |
colorFrom: red
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
title: Vidtranslator
|
3 |
+
emoji: 🎥
|
4 |
colorFrom: red
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
app.py
CHANGED
@@ -1,7 +1,120 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
|
4 |
-
return "Hello there " + name + "!!"
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
+
from download import check_download, download, download_generator
|
|
|
4 |
|
5 |
+
import anvil.media
|
6 |
+
import os
|
7 |
+
import pathlib
|
8 |
+
from pathlib import Path
|
9 |
+
from shutil import rmtree
|
10 |
+
|
11 |
+
anvil.server.connect(os.environ['ANVIL_UPLINK_KEY'])
|
12 |
+
queue_placeholder = None
|
13 |
+
|
14 |
+
url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", value='https://twitter.com/starsonxh/status/1552945347194142720', lines=1, elem_id="url_input")
|
15 |
+
download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
|
16 |
+
init_video = gr.Video(label="Downloaded video", visible=False)
|
17 |
+
init_audio = gr.Audio(label="Downloaded audio", visible=False)
|
18 |
+
output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10)
|
19 |
+
sub_video = gr.Video(label="Subbed video", visible=False)
|
20 |
+
|
21 |
+
|
22 |
+
@anvil.server.callable
|
23 |
+
def cleanup_output_dir():
|
24 |
+
#make sure we're in the main directory
|
25 |
+
os.chdir(pathlib.Path(__file__).parent.absolute())
|
26 |
+
#delete the output directory contents
|
27 |
+
for path in Path("output").glob("**/*"):
|
28 |
+
if path.is_file():
|
29 |
+
path.unlink()
|
30 |
+
elif path.is_dir():
|
31 |
+
rmtree(path)
|
32 |
+
|
33 |
+
@anvil.server.callable
|
34 |
+
def download_api(url):
|
35 |
+
print(f'Request from Anvil with URL {url}')
|
36 |
+
final_response = ''
|
37 |
+
#TODO: figure out how to push an incoming event to the queue
|
38 |
+
#THIS DOESN'T WORK queue_placeholder.push_event('download', url)
|
39 |
+
#TODO: handle errors
|
40 |
+
for response in download_generator(url):
|
41 |
+
final_response = response['message']
|
42 |
+
print(final_response)
|
43 |
+
return final_response
|
44 |
+
|
45 |
+
def predownload(url):
|
46 |
+
for response in download_generator(url):
|
47 |
+
updates_object = {}
|
48 |
+
updates_object[download_status] = response.get('message', '')
|
49 |
+
meta = response.get('meta')
|
50 |
+
if 'video' in response:
|
51 |
+
updates_object[init_video] = gr.update(visible=True, value=response["video"],
|
52 |
+
label=f"Init Video: {meta['id']}.{meta['ext']}")
|
53 |
+
updates_object[init_audio] = gr.update(visible=True, value=response["audio"],
|
54 |
+
label=f"Extracted audio : {meta['id']}.mp3")
|
55 |
+
if 'whisper_result' in response:
|
56 |
+
updates_object[output_text] = gr.update(value=response['whisper_result'].get('srt'), visible=True,
|
57 |
+
label=f"Subtitles translated from {response['whisper_result'].get('language')} (detected language)")
|
58 |
+
if 'sub_video' in response:
|
59 |
+
updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
|
60 |
+
label=f"Subbed video: {meta['id']}_translated.mp4")
|
61 |
+
|
62 |
+
yield updates_object
|
63 |
+
|
64 |
+
|
65 |
+
subtitled_video = False
|
66 |
+
css = """
|
67 |
+
#submit{
|
68 |
+
position: absolute;
|
69 |
+
flex:0 !important;
|
70 |
+
width: 120px;
|
71 |
+
right: 13px;
|
72 |
+
top: 40px;
|
73 |
+
}
|
74 |
+
#url_input{
|
75 |
+
font-size: 40px !important;
|
76 |
+
}
|
77 |
+
#download_status{
|
78 |
+
font-size: 40px !important;
|
79 |
+
}
|
80 |
+
.gradio-container {background-color: red}
|
81 |
+
#input_row{
|
82 |
+
position: relative;
|
83 |
+
}
|
84 |
+
.gradio-interface #submit{
|
85 |
+
|
86 |
+
}
|
87 |
+
"""
|
88 |
+
with gr.Blocks(css=css+"") as demo:
|
89 |
+
gr.Markdown('# Vid Translator 0.1 - get english subtitles for videos in any language')
|
90 |
+
gr.Markdown('### Link to a tweet, youtube or other video and get a translated video with @openAi #whisper, built by [@altryne](https://twitter.com/altryne/)')
|
91 |
+
gr.Markdown('### This is used as the backend for [@vidtranslator](https://twitter.com/vidtranslator/)')
|
92 |
+
with gr.Row(elem_id="input_row"):
|
93 |
+
with gr.Group() as group:
|
94 |
+
url_input.render()
|
95 |
+
greet_btn = gr.Button("Download", elem_id='submit', variant='primary')
|
96 |
+
pause_for_editing = gr.Checkbox(label="Pause for editing")
|
97 |
+
with gr.Row():
|
98 |
+
with gr.Column():
|
99 |
+
download_status.render()
|
100 |
+
init_video.render()
|
101 |
+
init_audio.render()
|
102 |
+
with gr.Column():
|
103 |
+
with gr.Group() :
|
104 |
+
output_text.render()
|
105 |
+
gr.Button("Download srt file")
|
106 |
+
gr.Button("Bake subtitles into video")
|
107 |
+
sub_video.render()
|
108 |
+
if(subtitled_video):
|
109 |
+
download_video = gr.Button("Download Video", variant='primary')
|
110 |
+
download_srt = gr.Button("Download Srt", variant='primary')
|
111 |
+
|
112 |
+
greet_btn.click(fn=predownload, inputs=[url_input], outputs=[download_status, init_video, init_audio, output_text, sub_video], api_name='predownload')
|
113 |
+
url_input.submit(fn=predownload, inputs=[url_input], outputs=[download_status, init_video, init_audio, output_text, sub_video])
|
114 |
+
|
115 |
+
api_button = gr.Button("API", variant='primary', visible=False).click(fn=cleanup_output_dir, inputs=[], outputs=[], api_name='cleanup_output_dir')
|
116 |
+
|
117 |
+
queue_placeholder = demo.queue()
|
118 |
+
|
119 |
+
if __name__ == "__main__":
|
120 |
+
demo.launch(show_error=True, debug=True)
|
download.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import time
|
3 |
+
from pathlib import Path
|
4 |
+
import anvil.server
|
5 |
+
import anvil.media
|
6 |
+
from whisper.utils import write_srt
|
7 |
+
from youtube_dl import YoutubeDL
|
8 |
+
from youtube_dl.utils import DownloadError
|
9 |
+
import os
|
10 |
+
import tempfile
|
11 |
+
import json
|
12 |
+
import whisper
|
13 |
+
from whisper.tokenizer import LANGUAGES
|
14 |
+
|
15 |
+
import ffmpeg
|
16 |
+
from utils.subs import bake_subs
|
17 |
+
|
18 |
+
original_dir = os.getcwd()
|
19 |
+
output_dir = Path('output')
|
20 |
+
|
21 |
+
def download_generator(url):
|
22 |
+
### Step 1 : check if video is available
|
23 |
+
yield {"message": f"Checking {url} for videos"}
|
24 |
+
try:
|
25 |
+
meta = check_download(url)
|
26 |
+
yield {"message": f"Found video with {meta['duration']} seconds duration from {meta['extractor']}", "meta": meta}
|
27 |
+
# create a temp directory with meta ID
|
28 |
+
# os.makedirs(output_dir/f"{meta['id']}", exist_ok=False)
|
29 |
+
tempdir = output_dir/f"{meta['id']}"
|
30 |
+
except Exception as e:
|
31 |
+
os.chdir(original_dir)
|
32 |
+
yield {"message": f"{e}"}
|
33 |
+
return
|
34 |
+
|
35 |
+
### Step 2 : Download video and extract audio
|
36 |
+
yield {"message": f"Starting download with URL {url}, this may take a while"}
|
37 |
+
|
38 |
+
try:
|
39 |
+
meta, video, audio = download(url, tempdir)
|
40 |
+
yield {"message": f"Downloaded video and extracted audio", "video": video, "audio": audio, "meta": meta}
|
41 |
+
except Exception as e:
|
42 |
+
os.chdir(original_dir)
|
43 |
+
yield {"message": f"{e}"}
|
44 |
+
raise e
|
45 |
+
|
46 |
+
### Step 3 : Transcribe with whisper
|
47 |
+
yield {"message": f"[PLEASE WAIT] Starting whisper transcribe with {meta['id']}.mp3"}
|
48 |
+
try:
|
49 |
+
whisper_result = transcribe(audio)
|
50 |
+
srt_path = tempdir / f"{meta['id']}.srt"
|
51 |
+
with open(srt_path, "w", encoding="utf-8") as srt:
|
52 |
+
write_srt(whisper_result["segments"], file=srt)
|
53 |
+
|
54 |
+
whisper_result["srt"] = Path(srt_path).read_text()
|
55 |
+
yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta}
|
56 |
+
except Exception as e:
|
57 |
+
os.chdir(original_dir)
|
58 |
+
yield {"message": f"{e}"}
|
59 |
+
#TODO: add return here so users can continue after editing subtitles
|
60 |
+
|
61 |
+
### Step 4 : Bake subtitles into video with ffmpeg
|
62 |
+
yield {"message": f"[PLEASE WAIT] baking subtitles into video"}
|
63 |
+
try:
|
64 |
+
|
65 |
+
subbed_video_path = tempdir / f"{meta['id']}_translated.mp4"
|
66 |
+
|
67 |
+
fontsdir = Path('fonts')
|
68 |
+
bake_subs(video, subbed_video_path.absolute() , srt_path.absolute(), fontsdir)
|
69 |
+
yield {"message": f"Subtitled video ready!", "sub_video": str(subbed_video_path.absolute()), "meta": meta}
|
70 |
+
except ffmpeg.Error as e:
|
71 |
+
print('stdout:', e.stdout.decode('utf8'))
|
72 |
+
print('stderr:', e.stderr.decode('utf8'))
|
73 |
+
raise e
|
74 |
+
except Exception as e:
|
75 |
+
os.chdir(original_dir)
|
76 |
+
print('error', file=sys.stderr)
|
77 |
+
raise e
|
78 |
+
yield {"message": f"{e}"}
|
79 |
+
|
80 |
+
|
81 |
+
def progress_hook(d):
|
82 |
+
if d['status'] == 'downloading':
|
83 |
+
print("downloading " + str(round(float(d['downloaded_bytes']) / float(d['total_bytes']) * 100, 1)) + "%")
|
84 |
+
yield f"{d['_percent_str']} downloaded"
|
85 |
+
if d['status'] == 'finished':
|
86 |
+
filename = d['filename']
|
87 |
+
print(filename)
|
88 |
+
yield f"Downloaded {filename}"
|
89 |
+
|
90 |
+
def download(url, tempdir):
|
91 |
+
try:
|
92 |
+
ydl_opts = {
|
93 |
+
"format": "bestvideo[ext=mp4]+bestaudio/best",
|
94 |
+
"keepvideo": True,
|
95 |
+
'postprocessors': [{
|
96 |
+
'key': 'FFmpegExtractAudio',
|
97 |
+
'preferredcodec': 'mp3',
|
98 |
+
'preferredquality': '192',
|
99 |
+
}],
|
100 |
+
"skip_download": False,
|
101 |
+
"outtmpl": f"{tempdir}/%(id)s.%(ext)s",
|
102 |
+
"noplaylist": True,
|
103 |
+
"verbose": False,
|
104 |
+
"quiet": True,
|
105 |
+
"progress_hooks": [progress_hook],
|
106 |
+
|
107 |
+
}
|
108 |
+
ydl = YoutubeDL(ydl_opts)
|
109 |
+
meta = ydl.extract_info(
|
110 |
+
url,
|
111 |
+
download=True,
|
112 |
+
)
|
113 |
+
except DownloadError as e:
|
114 |
+
raise e
|
115 |
+
else:
|
116 |
+
video = tempdir / f"{meta['id']}.{meta['ext']}"
|
117 |
+
audio = tempdir / f"{meta['id']}.mp3"
|
118 |
+
print(str(video.resolve()))
|
119 |
+
return meta, str(video.resolve()), str(audio.resolve())
|
120 |
+
|
121 |
+
def check_download(url):
|
122 |
+
ydl_opts = {
|
123 |
+
"format": "bestvideo[ext=mp4]+bestaudio/best",
|
124 |
+
"skip_download": True,
|
125 |
+
"verbose": False,
|
126 |
+
}
|
127 |
+
ydl = YoutubeDL(ydl_opts)
|
128 |
+
try:
|
129 |
+
meta = ydl.extract_info(
|
130 |
+
url,
|
131 |
+
download=False,
|
132 |
+
)
|
133 |
+
|
134 |
+
except DownloadError as e:
|
135 |
+
raise e
|
136 |
+
else:
|
137 |
+
return meta
|
138 |
+
|
139 |
+
def transcribe(audio):
|
140 |
+
print('Starting transcribe...')
|
141 |
+
# model = whisper.load_model('medium')
|
142 |
+
# output = model.transcribe(audio, task="translate")
|
143 |
+
# output["language"] = LANGUAGES[output["language"]]
|
144 |
+
output = {"text": " And then, you throw it and bam! How do you do this when it's going this way? Throw it and bam! That's right! Is that it? Throw it and bam! That's it. That's it. That's it. That's it. That's it. That's right. Throw it and bam! No, this is... Do it like this and... Isn't this the most natural? Bam! What is it?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 2.46, "text": " And then, you throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 1, "seek": 0, "start": 2.46, "end": 4.86, "text": " How do you do this when it's going this way?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 2, "seek": 0, "start": 4.86, "end": 6.36, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 3, "seek": 0, "start": 6.36, "end": 7.92, "text": " That's right!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 4, "seek": 0, "start": 7.92, "end": 8.42, "text": " Is that it?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 5, "seek": 0, "start": 8.42, "end": 9.76, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 6, "seek": 0, "start": 9.76, "end": 10.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 7, "seek": 0, "start": 10.26, "end": 10.76, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 8, "seek": 0, "start": 10.76, "end": 11.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 9, "seek": 0, "start": 11.26, "end": 11.76, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 10, "seek": 0, "start": 11.76, "end": 12.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 11, "seek": 0, "start": 12.26, "end": 12.76, "text": " That's right.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 12, "seek": 0, "start": 12.76, "end": 14.6, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 13, "seek": 0, "start": 14.6, "end": 15.32, "text": " No, this is...", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 14, "seek": 0, "start": 15.32, "end": 16.36, "text": " Do it like this and...", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 15, "seek": 0, "start": 16.36, "end": 17.92, "text": " Isn't this the most natural?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 16, "seek": 0, "start": 17.92, "end": 18.92, "text": " Bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 17, "seek": 1892, "start": 18.92, "end": 30.92, "text": " What is it?", "tokens": [50364, 708, 307, 309, 30, 50964], "temperature": 0.0, "avg_logprob": -0.9666390419006348, "compression_ratio": 0.5789473684210527, "no_speech_prob": 0.0033069916535168886}], "language": "korean"}
|
145 |
+
print(f'Finished transcribe from {output["language"]}', output["text"])
|
146 |
+
return output
|
fonts/arial.ttf
ADDED
Binary file (367 kB). View file
|
|
utils/__init__.py
ADDED
File without changes
|
utils/subs.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ffmpeg
|
2 |
+
import os
|
3 |
+
from pathlib import Path, PureWindowsPath
|
4 |
+
import anvil.media
|
5 |
+
import os
|
6 |
+
from typing import Iterator, TextIO
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
def bake_subs(input_file, output_file, subs_file, fontsdir):
|
11 |
+
print(f"Baking {subs_file} into video... {input_file} -> {output_file}")
|
12 |
+
|
13 |
+
|
14 |
+
fontfile = fontsdir / 'arial.ttf'
|
15 |
+
fontstyle = 'Fontsize=18,OutlineColour=&H40000000,BorderStyle=3,FontName=Arial'
|
16 |
+
video = ffmpeg.input(input_file)
|
17 |
+
audio = video.audio
|
18 |
+
(
|
19 |
+
ffmpeg
|
20 |
+
.concat(
|
21 |
+
video.filter('subtitles', subs_file, fontsdir=fontfile, force_style=fontstyle),
|
22 |
+
audio, v=1, a=1
|
23 |
+
)
|
24 |
+
.output(filename=output_file)
|
25 |
+
.run(quiet=True, overwrite_output=True)
|
26 |
+
)
|
27 |
+
|
28 |
+
|
29 |
+
def str2bool(string):
|
30 |
+
str2val = {"True": True, "False": False}
|
31 |
+
if string in str2val:
|
32 |
+
return str2val[string]
|
33 |
+
else:
|
34 |
+
raise ValueError(
|
35 |
+
f"Expected one of {set(str2val.keys())}, got {string}")
|
36 |
+
|
37 |
+
|
38 |
+
def format_timestamp(seconds: float, always_include_hours: bool = False):
|
39 |
+
assert seconds >= 0, "non-negative timestamp expected"
|
40 |
+
milliseconds = round(seconds * 1000.0)
|
41 |
+
|
42 |
+
hours = milliseconds // 3_600_000
|
43 |
+
milliseconds -= hours * 3_600_000
|
44 |
+
|
45 |
+
minutes = milliseconds // 60_000
|
46 |
+
milliseconds -= minutes * 60_000
|
47 |
+
|
48 |
+
seconds = milliseconds // 1_000
|
49 |
+
milliseconds -= seconds * 1_000
|
50 |
+
|
51 |
+
hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
|
52 |
+
return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
|
53 |
+
|
54 |
+
|
55 |
+
def write_srt(transcript: Iterator[dict], file: TextIO):
|
56 |
+
for i, segment in enumerate(transcript, start=1):
|
57 |
+
print(
|
58 |
+
f"{i}\n"
|
59 |
+
f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
|
60 |
+
f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
|
61 |
+
f"{segment['text'].strip().replace('-->', '->')}\n",
|
62 |
+
file=file,
|
63 |
+
flush=True,
|
64 |
+
)
|
65 |
+
|
66 |
+
|
67 |
+
def filename(path):
|
68 |
+
return os.path.splitext(os.path.basename(path))[0]
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
# if __name__ == '__main__':
|
73 |
+
# meta = {
|
74 |
+
# "id": 1576155093245693954,
|
75 |
+
# "ext": 'mp4'
|
76 |
+
# }
|
77 |
+
# tempdirname = Path(f"encoding/temp/{meta['id']}")
|
78 |
+
# video_file_path = f"{meta['id']}.{meta['ext']}"
|
79 |
+
# srt_path = f"{meta['id']}.srt"
|
80 |
+
# out_path = f"{meta['id']}_translated.mp4"
|
81 |
+
# os.chdir(tempdirname)
|
82 |
+
# bake_subs(video_file_path, out_path, srt_path)
|
83 |
+
# anvil_media = anvil.media.from_file(out_path, 'video/mp4')
|
84 |
+
# print(anvil_media)
|