Spaces:

becteur92
/

smollvm

Paused

smollvm / src /app.py

youssef

use cuda for ffmpeg

b841197 3 days ago

5.48 kB

	import gradio as gr
	from video_processor.processor import VideoAnalyzer, get_video_duration_seconds
	import logging
	import torch
	import spaces
	import time

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Print version information
	logger.info(f"PyTorch version: {torch.__version__}")
	logger.info(f"CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	logger.info(f"CUDA version: {torch.version.cuda}")
	logger.info(f"GPU device: {torch.cuda.get_device_name(0)}")

	@spaces.GPU
	def on_process(video):
	start_time = time.time()

	# Clear all components when starting new processing
	yield [
	"", # Clear status
	"", # Clear description
	gr.update(visible=False) # Hide accordion
	]

	if not video:
	yield [
	"Please upload a video",
	"",
	gr.update(visible=False)
	]
	return

	try:
	# Initialize analyzer
	init_start = time.time()
	yield [
	"Initializing video analyzer...",
	"",
	gr.update(visible=False)
	]

	analyzer = VideoAnalyzer()
	init_time = time.time() - init_start
	logger.info(f"Initialization took {init_time:.2f} seconds")

	# Process video
	yield [
	f"Model initialized in {init_time:.2f}s. Starting analysis...",
	"",
	gr.update(visible=True)
	]

	logger.info(f"Processing video: {video}")

	# Get duration and calculate total segments
	duration = get_video_duration_seconds(video)
	total_segments = (int(duration) + 9) // 10 # Ceiling division for 10-second segments

	# Process video segments
	yield [
	f"Processing video... (Will analyze {total_segments} segments)",
	"",
	gr.update(visible=True)
	]

	# Process segments and show progress
	segments = []
	total_ffmpeg_time = 0
	total_inference_time = 0

	for i, segment in enumerate(analyzer.process_video(video)):
	segments.append(segment)

	# Update timing totals
	total_ffmpeg_time += segment['processing_times']['ffmpeg']
	total_inference_time += segment['processing_times']['inference']

	progress = int((i + 1) / total_segments * 100)
	avg_ffmpeg_time = total_ffmpeg_time / (i + 1)
	avg_inference_time = total_inference_time / (i + 1)
	remaining_segments = total_segments - (i + 1)
	estimated_remaining = remaining_segments * (avg_ffmpeg_time + avg_inference_time)

	# Format current segments
	formatted_desc = "### Video Analysis by Segments:\n\n"
	for seg in segments:
	formatted_desc += f"[{seg['timestamp']}] {seg['description']}\n\n"

	yield [
	f"Processing segments... {progress}% complete\n" +
	f"Segment {i+1}/{total_segments}\n" +
	f"FFmpeg processing: {segment['processing_times']['ffmpeg']:.2f}s (avg: {avg_ffmpeg_time:.2f}s)\n" +
	f"Model inference: {segment['processing_times']['inference']:.2f}s (avg: {avg_inference_time:.2f}s)\n" +
	f"Estimated time remaining: {estimated_remaining:.2f}s",
	formatted_desc,
	gr.update(visible=True)
	]

	total_time = time.time() - start_time
	yield [
	f"Processing complete!\n" +
	f"Total processing time: {total_time:.2f}s\n" +
	f"Average per segment:\n" +
	f" - FFmpeg: {total_ffmpeg_time/total_segments:.2f}s\n" +
	f" - Inference: {total_inference_time/total_segments:.2f}s\n" +
	f" - Total: {(total_ffmpeg_time + total_inference_time)/total_segments:.2f}s",
	formatted_desc,
	gr.update(visible=True)
	]

	except Exception as e:
	logger.exception("Error processing video")
	yield [
	f"Error processing video: {str(e)}",
	"",
	gr.update(visible=False)
	]
	finally:
	# Clean up
	torch.cuda.empty_cache()

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# SmolVLM Video Analyzer")
	gr.Markdown("Upload a video to get a detailed analysis of its content, split into segments with timestamps.")

	with gr.Row():
	with gr.Column(scale=1):
	input_video = gr.Video(
	label="Upload your video",
	interactive=True
	)
	process_btn = gr.Button("Process Video", variant="primary")

	with gr.Column(scale=1):
	status = gr.Markdown()
	analysis_accordion = gr.Accordion(
	"Analysis Details",
	open=True,
	visible=False
	)
	with analysis_accordion:
	video_description = gr.Markdown("")

	process_btn.click(
	on_process,
	inputs=[input_video],
	outputs=[
	status,
	video_description,
	analysis_accordion
	],
	queue=True,
	)

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)