Spaces:

moondream
/

video-redaction

Running on Zero

App Files Files Community

video-redaction / app.py

vikhyatk

Update app.py

4790cf8 verified 10 days ago

raw

history blame contribute delete

7.81 kB

	#!/usr/bin/env python3
	import gradio as gr
	import os
	from main import load_moondream, process_video
	import tempfile
	import shutil
	import torch
	import spaces

	# Get absolute path to workspace root
	WORKSPACE_ROOT = os.path.dirname(os.path.abspath(__file__))


	# Initialize model globally for reuse
	print("Loading Moondream model...")
	model, tokenizer = load_moondream()

	# Uncomment for Hugging Face Spaces
	@spaces.GPU(duration=120)
	def process_video_file(
	video_file, detect_keyword, box_style, ffmpeg_preset, rows, cols, test_mode
	):
	"""Process a video file through the Gradio interface."""
	try:
	if not video_file:
	raise gr.Error("Please upload a video file")

	# Ensure input/output directories exist using absolute paths
	inputs_dir = os.path.join(WORKSPACE_ROOT, "inputs")
	outputs_dir = os.path.join(WORKSPACE_ROOT, "outputs")
	os.makedirs(inputs_dir, exist_ok=True)
	os.makedirs(outputs_dir, exist_ok=True)

	# Copy uploaded video to inputs directory
	video_filename = f"input_{os.path.basename(video_file)}"
	input_video_path = os.path.join(inputs_dir, video_filename)
	shutil.copy2(video_file, input_video_path)

	try:
	# Process the video
	output_path = process_video(
	input_video_path,
	detect_keyword,
	test_mode=test_mode,
	ffmpeg_preset=ffmpeg_preset,
	rows=rows,
	cols=cols,
	box_style=box_style,
	)

	# Verify output exists and is readable
	if not output_path or not os.path.exists(output_path):
	print(f"Warning: Output path {output_path} does not exist")
	# Try to find the output based on expected naming convention
	expected_output = os.path.join(
	outputs_dir, f"{box_style}_{detect_keyword}_{video_filename}"
	)
	if os.path.exists(expected_output):
	output_path = expected_output
	else:
	# Try searching in outputs directory for any matching file
	matching_files = [
	f
	for f in os.listdir(outputs_dir)
	if f.startswith(f"{box_style}_{detect_keyword}_")
	]
	if matching_files:
	output_path = os.path.join(outputs_dir, matching_files[0])
	else:
	raise gr.Error("Failed to locate output video")

	# Convert output path to absolute path if it isn't already
	if not os.path.isabs(output_path):
	output_path = os.path.join(WORKSPACE_ROOT, output_path)

	print(f"Returning output path: {output_path}")
	return output_path

	finally:
	# Clean up input file
	try:
	if os.path.exists(input_video_path):
	os.remove(input_video_path)
	except:
	pass

	except Exception as e:
	print(f"Error in process_video_file: {str(e)}")
	raise gr.Error(f"Error processing video: {str(e)}")


	# Create the Gradio interface
	with gr.Blocks(title="Promptable Video Redaction") as app:
	gr.Markdown("# Promptable Video Redaction with Moondream")
	gr.Markdown(
	"""
	[Moondream 2B](https://github.com/vikhyat/moondream) is a lightweight vision model that detects and visualizes objects in videos. It can identify objects, people, text and more.

	Upload a video and specify what to detect. The app will process each frame and apply your chosen visualization style. For help, join the [Moondream Discord](https://discord.com/invite/tRUdpjDQfH).
	"""
	)

	with gr.Row():
	with gr.Column():
	# Input components
	video_input = gr.Video(label="Upload Video")

	detect_input = gr.Textbox(
	label="What to Detect",
	placeholder="e.g. face, logo, text, person, car, dog, etc.",
	value="face",
	info="Moondream can detect anything that you can describe in natural language",
	)

	gr.Examples(
	examples=[
	["examples/homealone.mp4", "face"],
	["examples/soccer.mp4", "ball"],
	["examples/rally.mp4", "license plate"],
	],
	inputs=[video_input, detect_input],
	label="Try these examples",
	)

	process_btn = gr.Button("Process Video", variant="primary")

	with gr.Accordion("Advanced Settings", open=False):
	box_style_input = gr.Radio(
	choices=["censor", "bounding-box", "hitmarker"],
	value="censor",
	label="Visualization Style",
	info="Choose how to display detections",
	)
	preset_input = gr.Dropdown(
	choices=[
	"ultrafast",
	"superfast",
	"veryfast",
	"faster",
	"fast",
	"medium",
	"slow",
	"slower",
	"veryslow",
	],
	value="medium",
	label="Processing Speed (faster = lower quality)",
	)
	with gr.Row():
	rows_input = gr.Slider(
	minimum=1, maximum=4, value=1, step=1, label="Grid Rows"
	)
	cols_input = gr.Slider(
	minimum=1, maximum=4, value=1, step=1, label="Grid Columns"
	)

	test_mode_input = gr.Checkbox(
	label="Test Mode (Process first 3 seconds only)",
	value=True,
	info="Enable to quickly test settings on a short clip before processing the full video (recommended)",
	)

	gr.Markdown(
	"""
	Note: Processing in test mode will only process the first 3 seconds of the video and is recommended for testing settings.
	"""
	)

	gr.Markdown(
	"""
	We can get a rough estimate of how long the video will take to process by multiplying the videos framerate * seconds * the number of rows and columns and assuming 0.12 seconds processing time per detection.
	For example, a 3 second video at 30fps with 2x2 grid, the estimated time is 3 * 30 * 2 * 2 * 0.12 = 43.2 seconds (tested on a 4090 GPU).
	"""
	)

	with gr.Column():
	# Output components
	video_output = gr.Video(label="Processed Video")

	# About section under the video output
	gr.Markdown(
	"""
	### Links:
	- [GitHub Repository](https://github.com/vikhyat/moondream)
	- [Hugging Face](https://huggingface.co/vikhyatk/moondream2)
	- [Python Package](https://pypi.org/project/moondream/)
	- [Moondream Recipes](https://docs.moondream.ai/recipes)
	"""
	)

	# Event handlers
	process_btn.click(
	fn=process_video_file,
	inputs=[
	video_input,
	detect_input,
	box_style_input,
	preset_input,
	rows_input,
	cols_input,
	test_mode_input,
	],
	outputs=video_output,
	)

	if __name__ == "__main__":
	app.launch(share=True)