File size: 2,188 Bytes
c14e652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import streamlit as st
from moviepy.editor import VideoFileClip
import speech_recognition as sr
from pydub import AudioSegment

def extract_audio_from_video(video_path, audio_path):
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path, codec="pcm_s16le")

def transcribe_audio(audio_path):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_wav(audio_path)

    chunk_length_ms = 60000  # 60 seconds per chunk
    chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]

    full_text = ""
    for i, chunk in enumerate(chunks):
        chunk.export(f"chunk_{i}.wav", format="wav")
        with sr.AudioFile(f"chunk_{i}.wav") as source:
            audio_data = recognizer.record(source)
            try:
                text = recognizer.recognize_google(audio_data)
                full_text += text + " "
            except sr.UnknownValueError:
                full_text += "[Unclear speech] "
            except sr.RequestError as e:
                full_text += f"[API error: {e}] "

    for i in range(len(chunks)):
        os.remove(f"chunk_{i}.wav")

    return full_text

st.title("Video to Text Transcription")
st.write("Upload a video file to transcribe its audio to text.")

uploaded_file = st.file_uploader("Choose a video file", type=["mp4", "mov", "avi", "mkv"])

if uploaded_file is not None:
    with open("temp_video." + uploaded_file.name.split('.')[-1], "wb") as f:
        f.write(uploaded_file.getbuffer())
    audio_path = "temp_audio.wav"

    extract_audio_from_video("temp_video." + uploaded_file.name.split('.')[-1], audio_path)
    st.write("Extracting and transcribing audio...")

    transcribed_text = transcribe_audio(audio_path)
    st.write("Transcription completed. Here's the text:")
    st.text_area("Transcribed Text", transcribed_text, height=300)

    with open("transcribed_text.txt", "w", encoding="utf-8") as text_file:
        text_file.write(transcribed_text)
    st.download_button("Download Transcribed Text", data=transcribed_text, file_name="transcribed_text.txt")

    os.remove("temp_video." + uploaded_file.name.split('.')[-1])
    os.remove(audio_path)