File size: 21,090 Bytes
6fd7ef3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7de403b
 
6fd7ef3
7de403b
6fd7ef3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7de403b
 
 
 
 
6fd7ef3
7de403b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import sys
import time
from pathlib import Path
import anvil.server
import anvil.media
from whisper.utils import write_srt
from youtube_dl import YoutubeDL
from youtube_dl.utils import DownloadError
import os
import tempfile
import json
import whisper
from whisper.tokenizer import LANGUAGES

import ffmpeg
from utils.subs import bake_subs

original_dir = os.getcwd()
output_dir = Path('output')

def download_generator(url):
  ### Step 1 : check if video is available
  yield {"message": f"Checking {url} for videos"}
  try:
    meta = check_download(url)
    if(meta['duration'] > 5 * 60):
      raise Exception("Video is too long, please use videos less than 5 minutes")
    yield {"message": f"Found video with {meta['duration']} seconds duration from {meta['extractor']}", "meta": meta}

    tempdir = output_dir/f"{meta['id']}"
  except Exception as e:
    yield {"message": f"{e}"}
    return

  ### Step 2 : Download video and extract audio
  yield {"message": f"Starting download with URL {url}, this may take a while"}

  try:
    meta, video, audio = download(url, tempdir)
    yield {"message": f"Downloaded video and extracted audio", "video": video, "audio": audio, "meta": meta}
  except Exception as e:
    os.chdir(original_dir)
    yield {"message": f"{e}"}
    raise e

  ### Step 3 : Transcribe with whisper
  yield {"message": f"[PLEASE WAIT] Starting whisper transcribe with {meta['id']}.mp3"}
  try:
    whisper_result = transcribe(audio)
    srt_path = tempdir / f"{meta['id']}.srt"
    with open(srt_path, "w", encoding="utf-8") as srt:
      write_srt(whisper_result["segments"], file=srt)

    whisper_result["srt"] = Path(srt_path).read_text()
    yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta}
  except Exception as e:
    os.chdir(original_dir)
    yield {"message": f"{e}"}
    #TODO: add return here so users can continue after editing subtitles

  ### Step 4 : Bake subtitles into video with ffmpeg
  yield {"message": f"[PLEASE WAIT] baking subtitles into video"}
  try:

    subbed_video_path = tempdir / f"{meta['id']}_translated.mp4"

    fontsdir = Path('fonts')
    bake_subs(video, subbed_video_path.absolute() , srt_path.absolute(), fontsdir)
    yield {"message": f"Subtitled video ready!", "sub_video": str(subbed_video_path.absolute()), "meta": meta}
  except ffmpeg.Error as e:
    print('stdout:', e.stdout.decode('utf8'))
    print('stderr:', e.stderr.decode('utf8'))
    raise e
  except Exception as e:
    os.chdir(original_dir)
    print('error', file=sys.stderr)
    raise e
    yield {"message": f"{e}"}


def progress_hook(d):
  if d['status'] == 'downloading':
    print("downloading " + str(round(float(d['downloaded_bytes']) / float(d['total_bytes']) * 100, 1)) + "%")
    yield f"{d['_percent_str']} downloaded"
  if d['status'] == 'finished':
    filename = d['filename']
    print(filename)
    yield f"Downloaded {filename}"

def download(url, tempdir):
  try:
    ydl_opts = {
      "format": "bestvideo[ext=mp4]+bestaudio/best",
      "keepvideo": True,
      'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',
      }],
      "skip_download": False,
      "outtmpl": f"{tempdir}/%(id)s.%(ext)s",
      "noplaylist": True,
      "verbose": False,
      "quiet": True,
      "progress_hooks": [progress_hook],

    }
    ydl = YoutubeDL(ydl_opts)
    meta = ydl.extract_info(
      url,
      download=True,
    )
  except DownloadError as e:
    raise e
  else:
    video = tempdir / f"{meta['id']}.{meta['ext']}"
    audio = tempdir / f"{meta['id']}.mp3"
    print(str(video.resolve()))
    return meta, str(video.resolve()), str(audio.resolve())

def check_download(url):
  ydl_opts = {
        "format": "bestvideo[ext=mp4]+bestaudio/best",
        "skip_download": True,
        "verbose": False,
    }
  ydl = YoutubeDL(ydl_opts)
  try:
    meta = ydl.extract_info(
      url,
      download=False,
    )

  except DownloadError as e:
    raise e
  else:
    return meta

def transcribe(audio):
  print('Starting transcribe...')
  model = whisper.load_model('medium')
  output = model.transcribe(audio, task="translate")
  output["language"] = LANGUAGES[output["language"]]
  # output = {"text": " And then, you throw it and bam! How do you do this when it's going this way? Throw it and bam! That's right! Is that it? Throw it and bam! That's it. That's it. That's it. That's it. That's it. That's right. Throw it and bam! No, this is... Do it like this and... Isn't this the most natural? Bam! What is it?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 2.46, "text": " And then, you throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 1, "seek": 0, "start": 2.46, "end": 4.86, "text": " How do you do this when it's going this way?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 2, "seek": 0, "start": 4.86, "end": 6.36, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 3, "seek": 0, "start": 6.36, "end": 7.92, "text": " That's right!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 4, "seek": 0, "start": 7.92, "end": 8.42, "text": " Is that it?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 5, "seek": 0, "start": 8.42, "end": 9.76, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 6, "seek": 0, "start": 9.76, "end": 10.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 7, "seek": 0, "start": 10.26, "end": 10.76, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 8, "seek": 0, "start": 10.76, "end": 11.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 9, "seek": 0, "start": 11.26, "end": 11.76, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 10, "seek": 0, "start": 11.76, "end": 12.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 11, "seek": 0, "start": 12.26, "end": 12.76, "text": " That's right.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 12, "seek": 0, "start": 12.76, "end": 14.6, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 13, "seek": 0, "start": 14.6, "end": 15.32, "text": " No, this is...", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 14, "seek": 0, "start": 15.32, "end": 16.36, "text": " Do it like this and...", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 15, "seek": 0, "start": 16.36, "end": 17.92, "text": " Isn't this the most natural?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 16, "seek": 0, "start": 17.92, "end": 18.92, "text": " Bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 17, "seek": 1892, "start": 18.92, "end": 30.92, "text": " What is it?", "tokens": [50364, 708, 307, 309, 30, 50964], "temperature": 0.0, "avg_logprob": -0.9666390419006348, "compression_ratio": 0.5789473684210527, "no_speech_prob": 0.0033069916535168886}], "language": "korean"}
  output['segments'] = [{"id": 0, "seek": 0, "start": 0.0, "end": 3, "text": " [AI translation by @vidtranslator]"}] + output['segments']
  print(f'Finished transcribe from {output["language"]}', output["text"])
  return output