Spaces:

Yuki20
/

video_subtitle

Sleeping

video_subtitle / app.py

yuki-2025

commit2

d41e813 5 months ago

5.59 kB

	import streamlit as st
	import tempfile
	import os
	from io import StringIO
	import torch
	import numpy as np
	import logging
	import datetime
	import time
	import psutil
	from audio_utils import format_timestamp, generate_srt, transcribe_audio, set_logger

	# Configure logging
	class StreamlitHandler(logging.Handler):
	def __init__(self, placeholder):
	super().__init__()
	self.placeholder = placeholder
	self.log_output = StringIO()

	def emit(self, record):
	log_entry = f"{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - {self.format(record)}"
	self.log_output.write(log_entry + '\n')
	self.placeholder.code(self.log_output.getvalue())

	logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)


	def get_gpu_info():
	if torch.cuda.is_available():
	gpu = torch.cuda.get_device_properties(0)
	return f"GPU: {gpu.name}, Total Memory: {gpu.total_memory / 1e9:.2f} GB"
	return "GPU: Not available"

	def get_cpu_info():
	cpu_info = psutil.cpu_freq()
	cpu_count = psutil.cpu_count(logical=False)
	cpu_logical_count = psutil.cpu_count(logical=True)
	return f"CPU: {cpu_count} physical cores, {cpu_logical_count} logical cores, Max Frequency: {cpu_info.max:.2f} MHz"

	def main():
	st.set_page_config(page_title="Video Subtitle Generator", page_icon="🎬")
	st.markdown("""
	<style>

	.stButton > button {
	background-color: #4CAF50;
	color: white;
	font-size: 16px;
	padding: 10px 20px;
	border-radius: 5px;
	border: none;
	}
	</style>
	""", unsafe_allow_html=True)

	st.title("Video Subtitle Generator")
	st.markdown("Generate subtitles from an audio/video file, using OpenAI's Whisper model.")

	# Input section
	st.header("Input")

	with st.form(key='subtitle_form'):
	# Create two columns with 2:1 ratio
	col1, col2 = st.columns([2, 1])

	# Elements in the wider column (2/3 width)
	with col1:
	uploaded_file = st.file_uploader("Upload video/audio file", type=["mp3", "wav", "mp4"])

	# Elements in the narrower column (1/3 width)
	with col2:
	model_name = st.selectbox("Choose Whisper model", [
	"openai/whisper-base",
	"openai/whisper-tiny",
	"openai/whisper-small",
	"openai/whisper-medium",
	"openai/whisper-large"
	])
	language = st.selectbox("Choose language", ["en", "fr", "de", "es", "it", "ja", "ko", "pt", "ru", "zh"])

	# Add a submit button to the form
	submit_button = st.form_submit_button(label='Generate Subtitles')

	st.subheader("Logs")
	# Create a placeholder for logs
	logs_placeholder = st.empty()

	# Add StreamlitHandler to logger
	streamlit_handler = StreamlitHandler(logs_placeholder)
	logger.addHandler(streamlit_handler)
	set_logger(logger)

	# Handle form submission
	if submit_button:
	if uploaded_file is not None:
	start_time = time.time()
	with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
	tmp_file.write(uploaded_file.getvalue())
	tmp_file_path = tmp_file.name

	with st.spinner("Processing..."):
	logger.info("Starting transcription process...")
	logger.info(get_gpu_info())
	logger.info(get_cpu_info())
	full_text, srt_content = transcribe_audio(model_name, tmp_file_path, language=language)

	if full_text and srt_content:
	# Output section

	with st.form(key='output_form'):
	st.header("Output")
	# col1, col2 = st.columns(1)

	# with col1:
	st.subheader("Full Transcription")
	st.text_area("", value=full_text, height=200)

	st.subheader("Detected Language")
	st.write(language)

	# with col2:
	st.subheader("Subtitles (SRT format)")
	st.text_area("", value=srt_content, height=200)

	logger.info("Processing completed successfully")

	submitted = st.form_submit_button("Download Subtitles")
	if submitted:
	st.download(
	data=srt_content,
	file_name="subtitles.srt",
	mime="text/plain"
	)
	# # Add download button for subtitles
	# st.download_button(
	# label="Download Subtitles",
	# data=srt_content,
	# file_name="subtitles.srt",
	# mime="text/plain"
	# )

	end_time = time.time()
	total_time = end_time - start_time
	logger.info(f"Total processing time: {total_time:.2f} seconds")

	else:
	logger.error("Transcription failed. No text or subtitle was generated.")
	st.error("Transcription failed. No text or subtitle was generated.")

	os.unlink(tmp_file_path)
	else:
	logger.warning("No file uploaded")
	st.error("Please upload an audio/video file")

	if __name__ == "__main__":
	main()