kritsadaK's picture
adding app.py
c14e652
import os
import streamlit as st
from moviepy.editor import VideoFileClip
import speech_recognition as sr
from pydub import AudioSegment
def extract_audio_from_video(video_path, audio_path):
video = VideoFileClip(video_path)
video.audio.write_audiofile(audio_path, codec="pcm_s16le")
def transcribe_audio(audio_path):
recognizer = sr.Recognizer()
audio = AudioSegment.from_wav(audio_path)
chunk_length_ms = 60000 # 60 seconds per chunk
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
full_text = ""
for i, chunk in enumerate(chunks):
chunk.export(f"chunk_{i}.wav", format="wav")
with sr.AudioFile(f"chunk_{i}.wav") as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
full_text += text + " "
except sr.UnknownValueError:
full_text += "[Unclear speech] "
except sr.RequestError as e:
full_text += f"[API error: {e}] "
for i in range(len(chunks)):
os.remove(f"chunk_{i}.wav")
return full_text
st.title("Video to Text Transcription")
st.write("Upload a video file to transcribe its audio to text.")
uploaded_file = st.file_uploader("Choose a video file", type=["mp4", "mov", "avi", "mkv"])
if uploaded_file is not None:
with open("temp_video." + uploaded_file.name.split('.')[-1], "wb") as f:
f.write(uploaded_file.getbuffer())
audio_path = "temp_audio.wav"
extract_audio_from_video("temp_video." + uploaded_file.name.split('.')[-1], audio_path)
st.write("Extracting and transcribing audio...")
transcribed_text = transcribe_audio(audio_path)
st.write("Transcription completed. Here's the text:")
st.text_area("Transcribed Text", transcribed_text, height=300)
with open("transcribed_text.txt", "w", encoding="utf-8") as text_file:
text_file.write(transcribed_text)
st.download_button("Download Transcribed Text", data=transcribed_text, file_name="transcribed_text.txt")
os.remove("temp_video." + uploaded_file.name.split('.')[-1])
os.remove(audio_path)