Spaces:

it4xperts
/

Youtube_video_transcribe

Runtime error

App Files Files Community

Youtube_video_transcribe / app.py

it4xperts

Update app.py

d6a32e1 11 months ago

raw

history blame contribute delete

4.68 kB

	import os
	import streamlit as st
	from pytube import YouTube, exceptions as pytube_exceptions
	from urllib.error import URLError
	from tempfile import NamedTemporaryFile
	import speech_recognition as sr
	import subprocess
	import pydub
	from langdetect import detect
	from deep_translator import GoogleTranslator

	def split_audio(file_path, chunk_length_ms=60000):
	audio = pydub.AudioSegment.from_wav(file_path)
	chunks = pydub.silence.split_on_silence(audio, min_silence_len=500, silence_thresh=audio.dBFS-14, keep_silence=500)
	chunk_files = []
	for i, chunk in enumerate(chunks):
	chunk_name = f"{file_path}_chunk{i}.wav"
	chunk.export(chunk_name, format="wav")
	chunk_files.append(chunk_name)
	return chunk_files

	def download_audio(url):
	try:
	yt = YouTube(url)
	audio_stream = yt.streams.filter(only_audio=True).first()
	if audio_stream is None:
	raise ValueError("No audio stream found")
	with NamedTemporaryFile(delete=False) as tempfile:
	audio_stream.download(filename=tempfile.name)
	return tempfile.name
	except pytube_exceptions.PytubeError as e:
	st.error(f"Error downloading video: {e}")
	return None
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	return None

	def convert_to_wav(original_file):
	target_format = "wav"
	output_file = original_file.split('.')[0] + '.' + target_format
	command = ['ffmpeg', '-i', original_file, '-ar', '16000', '-ac', '1', output_file]
	try:
	subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
	except subprocess.CalledProcessError as e:
	st.error(f"Error converting file to WAV: {e}")
	return None
	return output_file

	def transcribe_and_translate(file_path):
	recognizer = sr.Recognizer()
	transcription = ""
	language = "unknown"
	translation = ""

	if not os.path.exists(file_path):
	return "[Error: File not found]", language, translation

	if os.path.getsize(file_path) / (1024 * 1024) > 10:
	chunk_files = split_audio(file_path)
	else:
	chunk_files = [file_path]

	for chunk_file in chunk_files:
	if os.path.exists(chunk_file):
	with sr.AudioFile(chunk_file) as source:
	audio_data = recognizer.record(source)
	try:
	transcription += recognizer.recognize_google(audio_data) + " "
	except sr.UnknownValueError:
	transcription += "[Unintelligible] "
	except sr.RequestError as e:
	transcription += f"[Error: {e}] "
	os.remove(chunk_file)
	else:
	transcription += f"[Error: File {chunk_file} not found] "

	try:
	language = detect(transcription)
	except:
	language = "Detection failed"

	if language != "en":
	translator = GoogleTranslator(source=language, target='en')
	try:
	translation = translator.translate(transcription)
	except Exception as e:
	translation = f"Translation failed: {e}"

	return transcription, language, translation

	def transcribe_youtube_video(url):
	try:
	audio_path = download_audio(url)
	if audio_path:
	wav_audio_path = convert_to_wav(audio_path)
	if wav_audio_path:
	transcription, detected_language, translation = transcribe_and_translate(wav_audio_path)
	os.remove(audio_path)
	os.remove(wav_audio_path)
	return transcription, detected_language, translation
	else:
	return "Failed to convert audio to WAV.", "unknown", ""
	else:
	return "Failed to download audio from YouTube.", "unknown", ""
	except URLError:
	return "Error in network connection. Please check your connection and try again.", "unknown", ""
	except Exception as e:
	return f"An unexpected error occurred: {e}", "unknown", ""

	def main():
	st.title("YouTube Video Transcriber")
	url = st.text_input("Enter the URL of the YouTube video:")

	if st.button("Transcribe"):
	if url:
	with st.spinner('Transcribing...'):
	transcription, language, translation = transcribe_youtube_video(url)
	st.text_area("Transcription:", transcription, height=300)
	st.write(f"Detected Language: {language.title()}")
	st.text_area("Translation (English):", translation, height=300)
	else:
	st.error("Please enter a valid YouTube URL.")

	if __name__ == "__main__":
	main()