smollvm / src /app.py
youssef
use cuda for ffmpeg
b841197
import gradio as gr
from video_processor.processor import VideoAnalyzer, get_video_duration_seconds
import logging
import torch
import spaces
import time
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Print version information
logger.info(f"PyTorch version: {torch.__version__}")
logger.info(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
logger.info(f"CUDA version: {torch.version.cuda}")
logger.info(f"GPU device: {torch.cuda.get_device_name(0)}")
@spaces.GPU
def on_process(video):
start_time = time.time()
# Clear all components when starting new processing
yield [
"", # Clear status
"", # Clear description
gr.update(visible=False) # Hide accordion
]
if not video:
yield [
"Please upload a video",
"",
gr.update(visible=False)
]
return
try:
# Initialize analyzer
init_start = time.time()
yield [
"Initializing video analyzer...",
"",
gr.update(visible=False)
]
analyzer = VideoAnalyzer()
init_time = time.time() - init_start
logger.info(f"Initialization took {init_time:.2f} seconds")
# Process video
yield [
f"Model initialized in {init_time:.2f}s. Starting analysis...",
"",
gr.update(visible=True)
]
logger.info(f"Processing video: {video}")
# Get duration and calculate total segments
duration = get_video_duration_seconds(video)
total_segments = (int(duration) + 9) // 10 # Ceiling division for 10-second segments
# Process video segments
yield [
f"Processing video... (Will analyze {total_segments} segments)",
"",
gr.update(visible=True)
]
# Process segments and show progress
segments = []
total_ffmpeg_time = 0
total_inference_time = 0
for i, segment in enumerate(analyzer.process_video(video)):
segments.append(segment)
# Update timing totals
total_ffmpeg_time += segment['processing_times']['ffmpeg']
total_inference_time += segment['processing_times']['inference']
progress = int((i + 1) / total_segments * 100)
avg_ffmpeg_time = total_ffmpeg_time / (i + 1)
avg_inference_time = total_inference_time / (i + 1)
remaining_segments = total_segments - (i + 1)
estimated_remaining = remaining_segments * (avg_ffmpeg_time + avg_inference_time)
# Format current segments
formatted_desc = "### Video Analysis by Segments:\n\n"
for seg in segments:
formatted_desc += f"**[{seg['timestamp']}]** {seg['description']}\n\n"
yield [
f"Processing segments... {progress}% complete\n" +
f"Segment {i+1}/{total_segments}\n" +
f"FFmpeg processing: {segment['processing_times']['ffmpeg']:.2f}s (avg: {avg_ffmpeg_time:.2f}s)\n" +
f"Model inference: {segment['processing_times']['inference']:.2f}s (avg: {avg_inference_time:.2f}s)\n" +
f"Estimated time remaining: {estimated_remaining:.2f}s",
formatted_desc,
gr.update(visible=True)
]
total_time = time.time() - start_time
yield [
f"Processing complete!\n" +
f"Total processing time: {total_time:.2f}s\n" +
f"Average per segment:\n" +
f" - FFmpeg: {total_ffmpeg_time/total_segments:.2f}s\n" +
f" - Inference: {total_inference_time/total_segments:.2f}s\n" +
f" - Total: {(total_ffmpeg_time + total_inference_time)/total_segments:.2f}s",
formatted_desc,
gr.update(visible=True)
]
except Exception as e:
logger.exception("Error processing video")
yield [
f"Error processing video: {str(e)}",
"",
gr.update(visible=False)
]
finally:
# Clean up
torch.cuda.empty_cache()
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# SmolVLM Video Analyzer")
gr.Markdown("Upload a video to get a detailed analysis of its content, split into segments with timestamps.")
with gr.Row():
with gr.Column(scale=1):
input_video = gr.Video(
label="Upload your video",
interactive=True
)
process_btn = gr.Button("Process Video", variant="primary")
with gr.Column(scale=1):
status = gr.Markdown()
analysis_accordion = gr.Accordion(
"Analysis Details",
open=True,
visible=False
)
with analysis_accordion:
video_description = gr.Markdown("")
process_btn.click(
on_process,
inputs=[input_video],
outputs=[
status,
video_description,
analysis_accordion
],
queue=True,
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)