Spaces:
Runtime error
Runtime error
import os | |
import streamlit as st | |
from pytube import YouTube, exceptions as pytube_exceptions | |
from urllib.error import URLError | |
from tempfile import NamedTemporaryFile | |
import speech_recognition as sr | |
import subprocess | |
import pydub | |
from langdetect import detect | |
from deep_translator import GoogleTranslator | |
def split_audio(file_path, chunk_length_ms=60000): | |
audio = pydub.AudioSegment.from_wav(file_path) | |
chunks = pydub.silence.split_on_silence(audio, min_silence_len=500, silence_thresh=audio.dBFS-14, keep_silence=500) | |
chunk_files = [] | |
for i, chunk in enumerate(chunks): | |
chunk_name = f"{file_path}_chunk{i}.wav" | |
chunk.export(chunk_name, format="wav") | |
chunk_files.append(chunk_name) | |
return chunk_files | |
def download_audio(url): | |
try: | |
yt = YouTube(url) | |
audio_stream = yt.streams.filter(only_audio=True).first() | |
if audio_stream is None: | |
raise ValueError("No audio stream found") | |
with NamedTemporaryFile(delete=False) as tempfile: | |
audio_stream.download(filename=tempfile.name) | |
return tempfile.name | |
except pytube_exceptions.PytubeError as e: | |
st.error(f"Error downloading video: {e}") | |
return None | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {e}") | |
return None | |
def convert_to_wav(original_file): | |
target_format = "wav" | |
output_file = original_file.split('.')[0] + '.' + target_format | |
command = ['ffmpeg', '-i', original_file, '-ar', '16000', '-ac', '1', output_file] | |
try: | |
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) | |
except subprocess.CalledProcessError as e: | |
st.error(f"Error converting file to WAV: {e}") | |
return None | |
return output_file | |
def transcribe_and_translate(file_path): | |
recognizer = sr.Recognizer() | |
transcription = "" | |
language = "unknown" | |
translation = "" | |
if not os.path.exists(file_path): | |
return "[Error: File not found]", language, translation | |
if os.path.getsize(file_path) / (1024 * 1024) > 10: | |
chunk_files = split_audio(file_path) | |
else: | |
chunk_files = [file_path] | |
for chunk_file in chunk_files: | |
if os.path.exists(chunk_file): | |
with sr.AudioFile(chunk_file) as source: | |
audio_data = recognizer.record(source) | |
try: | |
transcription += recognizer.recognize_google(audio_data) + " " | |
except sr.UnknownValueError: | |
transcription += "[Unintelligible] " | |
except sr.RequestError as e: | |
transcription += f"[Error: {e}] " | |
os.remove(chunk_file) | |
else: | |
transcription += f"[Error: File {chunk_file} not found] " | |
try: | |
language = detect(transcription) | |
except: | |
language = "Detection failed" | |
if language != "en": | |
translator = GoogleTranslator(source=language, target='en') | |
try: | |
translation = translator.translate(transcription) | |
except Exception as e: | |
translation = f"Translation failed: {e}" | |
return transcription, language, translation | |
def transcribe_youtube_video(url): | |
try: | |
audio_path = download_audio(url) | |
if audio_path: | |
wav_audio_path = convert_to_wav(audio_path) | |
if wav_audio_path: | |
transcription, detected_language, translation = transcribe_and_translate(wav_audio_path) | |
os.remove(audio_path) | |
os.remove(wav_audio_path) | |
return transcription, detected_language, translation | |
else: | |
return "Failed to convert audio to WAV.", "unknown", "" | |
else: | |
return "Failed to download audio from YouTube.", "unknown", "" | |
except URLError: | |
return "Error in network connection. Please check your connection and try again.", "unknown", "" | |
except Exception as e: | |
return f"An unexpected error occurred: {e}", "unknown", "" | |
def main(): | |
st.title("YouTube Video Transcriber") | |
url = st.text_input("Enter the URL of the YouTube video:") | |
if st.button("Transcribe"): | |
if url: | |
with st.spinner('Transcribing...'): | |
transcription, language, translation = transcribe_youtube_video(url) | |
st.text_area("Transcription:", transcription, height=300) | |
st.write(f"Detected Language: {language.title()}") | |
st.text_area("Translation (English):", translation, height=300) | |
else: | |
st.error("Please enter a valid YouTube URL.") | |
if __name__ == "__main__": | |
main() | |