|
import gradio as gr |
|
from video_processor.processor import VideoAnalyzer, get_video_duration_seconds |
|
import logging |
|
import torch |
|
import spaces |
|
import time |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
logger.info(f"PyTorch version: {torch.__version__}") |
|
logger.info(f"CUDA available: {torch.cuda.is_available()}") |
|
if torch.cuda.is_available(): |
|
logger.info(f"CUDA version: {torch.version.cuda}") |
|
logger.info(f"GPU device: {torch.cuda.get_device_name(0)}") |
|
|
|
@spaces.GPU |
|
def on_process(video): |
|
start_time = time.time() |
|
|
|
|
|
yield [ |
|
"", |
|
"", |
|
gr.update(visible=False) |
|
] |
|
|
|
if not video: |
|
yield [ |
|
"Please upload a video", |
|
"", |
|
gr.update(visible=False) |
|
] |
|
return |
|
|
|
try: |
|
|
|
init_start = time.time() |
|
yield [ |
|
"Initializing video analyzer...", |
|
"", |
|
gr.update(visible=False) |
|
] |
|
|
|
analyzer = VideoAnalyzer() |
|
init_time = time.time() - init_start |
|
logger.info(f"Initialization took {init_time:.2f} seconds") |
|
|
|
|
|
yield [ |
|
f"Model initialized in {init_time:.2f}s. Starting analysis...", |
|
"", |
|
gr.update(visible=True) |
|
] |
|
|
|
logger.info(f"Processing video: {video}") |
|
|
|
|
|
duration = get_video_duration_seconds(video) |
|
total_segments = (int(duration) + 9) // 10 |
|
|
|
|
|
yield [ |
|
f"Processing video... (Will analyze {total_segments} segments)", |
|
"", |
|
gr.update(visible=True) |
|
] |
|
|
|
|
|
segments = [] |
|
total_ffmpeg_time = 0 |
|
total_inference_time = 0 |
|
|
|
for i, segment in enumerate(analyzer.process_video(video)): |
|
segments.append(segment) |
|
|
|
|
|
total_ffmpeg_time += segment['processing_times']['ffmpeg'] |
|
total_inference_time += segment['processing_times']['inference'] |
|
|
|
progress = int((i + 1) / total_segments * 100) |
|
avg_ffmpeg_time = total_ffmpeg_time / (i + 1) |
|
avg_inference_time = total_inference_time / (i + 1) |
|
remaining_segments = total_segments - (i + 1) |
|
estimated_remaining = remaining_segments * (avg_ffmpeg_time + avg_inference_time) |
|
|
|
|
|
formatted_desc = "### Video Analysis by Segments:\n\n" |
|
for seg in segments: |
|
formatted_desc += f"**[{seg['timestamp']}]** {seg['description']}\n\n" |
|
|
|
yield [ |
|
f"Processing segments... {progress}% complete\n" + |
|
f"Segment {i+1}/{total_segments}\n" + |
|
f"FFmpeg processing: {segment['processing_times']['ffmpeg']:.2f}s (avg: {avg_ffmpeg_time:.2f}s)\n" + |
|
f"Model inference: {segment['processing_times']['inference']:.2f}s (avg: {avg_inference_time:.2f}s)\n" + |
|
f"Estimated time remaining: {estimated_remaining:.2f}s", |
|
formatted_desc, |
|
gr.update(visible=True) |
|
] |
|
|
|
total_time = time.time() - start_time |
|
yield [ |
|
f"Processing complete!\n" + |
|
f"Total processing time: {total_time:.2f}s\n" + |
|
f"Average per segment:\n" + |
|
f" - FFmpeg: {total_ffmpeg_time/total_segments:.2f}s\n" + |
|
f" - Inference: {total_inference_time/total_segments:.2f}s\n" + |
|
f" - Total: {(total_ffmpeg_time + total_inference_time)/total_segments:.2f}s", |
|
formatted_desc, |
|
gr.update(visible=True) |
|
] |
|
|
|
except Exception as e: |
|
logger.exception("Error processing video") |
|
yield [ |
|
f"Error processing video: {str(e)}", |
|
"", |
|
gr.update(visible=False) |
|
] |
|
finally: |
|
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# SmolVLM Video Analyzer") |
|
gr.Markdown("Upload a video to get a detailed analysis of its content, split into segments with timestamps.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
input_video = gr.Video( |
|
label="Upload your video", |
|
interactive=True |
|
) |
|
process_btn = gr.Button("Process Video", variant="primary") |
|
|
|
with gr.Column(scale=1): |
|
status = gr.Markdown() |
|
analysis_accordion = gr.Accordion( |
|
"Analysis Details", |
|
open=True, |
|
visible=False |
|
) |
|
with analysis_accordion: |
|
video_description = gr.Markdown("") |
|
|
|
process_btn.click( |
|
on_process, |
|
inputs=[input_video], |
|
outputs=[ |
|
status, |
|
video_description, |
|
analysis_accordion |
|
], |
|
queue=True, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=False |
|
) |