Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,696 Bytes
1d7163f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import logging
import os
import tempfile
from typing import Iterator
from pysrt import SubRipFile, SubRipItem, SubRipTime
from pytubefix import YouTube
from transcriber import TranscribeResult
logger = logging.getLogger(__name__)
def download_youtube_audio(video_id: str) -> str:
"""
Download audio from YouTube video.
Args:
video_id (str): YouTube video ID.
Returns:
str: Path to the downloaded audio file.
"""
urls = "https://www.youtube.com/watch?v={}".format(video_id)
try:
# https://github.com/JuanBindez/pytubefix/issues/242#issuecomment-2369067929
vid = YouTube(urls, "MWEB")
if vid.title is None:
return None
audio_download = vid.streams.get_audio_only()
audio_download.download(
mp3=True,
filename=video_id,
output_path=tempfile.gettempdir(),
skip_existing=True,
)
audio_file = tempfile.gettempdir() + "/" + video_id + ".mp3"
return audio_file
except Exception as e:
print(e)
return None
def to_srt(results: Iterator["TranscribeResult"]) -> str:
"""
Convert the list of TranscribeResult objects into a SRT file
"""
srt = SubRipFile()
for i, t in enumerate(results):
start = SubRipTime(seconds=t.start_time)
end = SubRipTime(seconds=t.end_time)
item = SubRipItem(index=i, start=start, end=end, text=t.text)
srt.append(item)
temp_file = tempfile.gettempdir() + "/output.srt"
srt.save(temp_file)
with open(temp_file, "r", encoding="utf-8") as f:
srt_text = f.read()
os.remove(temp_file)
return srt_text
|