Spaces:
Runtime error
Runtime error
#importing the necessary modules | |
import os | |
import urllib.request | |
import re | |
import time | |
import gradio as gr | |
#Creating a Gradio App Menu | |
def transcript_extract(): | |
#specifying the YouTube channel URL | |
channel_url = gr.inputs.Textbox(label="Channel URL") | |
#accessing the webpage | |
page = urllib.request.urlopen(channel_url) | |
#reading the source code | |
data = page.read().decode("utf-8") | |
#creating a directory to save the transcripts | |
os.makedirs('Transcripts',exist_ok=True) | |
#finding the transcripts | |
transcript_links = re.findall(r'(\/watch\?v=[A-Za-z0-9_.-]*)', str(data)) | |
#looping through each transcript to download | |
for link in transcript_links: | |
video_url = 'http://www.youtube.com'+link | |
#access the video page | |
video_page = urllib.request.urlopen(video_url) | |
#read the source code | |
video_data = video_page.read().decode("utf-8") | |
#find the transcript | |
transcript_link = re.findall(r'(\/timedtext_editor\?[A-Za-z0-9_.-]*)', str(video_data)) | |
#check if there is a transcript available | |
if(len(transcript_link) > 0): | |
#access the transcript page | |
transcript_url ='http://www.youtube.com'+ transcript_link[0] | |
transcript_page = urllib.request.urlopen(transcript_url) | |
transcript_data = transcript_page.read().decode("utf-8") | |
#find the link to the transcript | |
transcript_download_link = re.findall(r'(\/api\/timedtext\?[A-Za-z0-9_.-]*)', str(transcript_data)) | |
#check if the transcript is available for download | |
if(len(transcript_download_link) > 0): | |
#download the transcript | |
file_name = "Transcripts/" + link[9:] + ".xml" | |
download_url = 'http://www.youtube.com'+transcript_download_link[0] | |
urllib.request.urlretrieve(download_url, file_name) | |
print("Downloading transcript for video " + link[9:] + "...") | |
time.sleep(3) | |
else: | |
print("Transcript not available for video " + link[9:]) | |
else: | |
print("Transcript not available for video " + link[9:]) | |
#launch the gradio | |
gr.Interface(fn=transcript_extract, inputs="textbox", outputs="textbox", share=True).launch() |