import os import streamlit as st from moviepy.editor import VideoFileClip import speech_recognition as sr from pydub import AudioSegment def extract_audio_from_video(video_path, audio_path): video = VideoFileClip(video_path) video.audio.write_audiofile(audio_path, codec="pcm_s16le") def transcribe_audio(audio_path): recognizer = sr.Recognizer() audio = AudioSegment.from_wav(audio_path) chunk_length_ms = 60000 # 60 seconds per chunk chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)] full_text = "" for i, chunk in enumerate(chunks): chunk.export(f"chunk_{i}.wav", format="wav") with sr.AudioFile(f"chunk_{i}.wav") as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data) full_text += text + " " except sr.UnknownValueError: full_text += "[Unclear speech] " except sr.RequestError as e: full_text += f"[API error: {e}] " for i in range(len(chunks)): os.remove(f"chunk_{i}.wav") return full_text st.title("Video to Text Transcription") st.write("Upload a video file to transcribe its audio to text.") uploaded_file = st.file_uploader("Choose a video file", type=["mp4", "mov", "avi", "mkv"]) if uploaded_file is not None: with open("temp_video." + uploaded_file.name.split('.')[-1], "wb") as f: f.write(uploaded_file.getbuffer()) audio_path = "temp_audio.wav" extract_audio_from_video("temp_video." + uploaded_file.name.split('.')[-1], audio_path) st.write("Extracting and transcribing audio...") transcribed_text = transcribe_audio(audio_path) st.write("Transcription completed. Here's the text:") st.text_area("Transcribed Text", transcribed_text, height=300) with open("transcribed_text.txt", "w", encoding="utf-8") as text_file: text_file.write(transcribed_text) st.download_button("Download Transcribed Text", data=transcribed_text, file_name="transcribed_text.txt") os.remove("temp_video." + uploaded_file.name.split('.')[-1]) os.remove(audio_path)