video-redaction / app.py
vikhyatk's picture
Update app.py
4790cf8 verified
#!/usr/bin/env python3
import gradio as gr
import os
from main import load_moondream, process_video
import tempfile
import shutil
import torch
import spaces
# Get absolute path to workspace root
WORKSPACE_ROOT = os.path.dirname(os.path.abspath(__file__))
# Initialize model globally for reuse
print("Loading Moondream model...")
model, tokenizer = load_moondream()
# Uncomment for Hugging Face Spaces
@spaces.GPU(duration=120)
def process_video_file(
video_file, detect_keyword, box_style, ffmpeg_preset, rows, cols, test_mode
):
"""Process a video file through the Gradio interface."""
try:
if not video_file:
raise gr.Error("Please upload a video file")
# Ensure input/output directories exist using absolute paths
inputs_dir = os.path.join(WORKSPACE_ROOT, "inputs")
outputs_dir = os.path.join(WORKSPACE_ROOT, "outputs")
os.makedirs(inputs_dir, exist_ok=True)
os.makedirs(outputs_dir, exist_ok=True)
# Copy uploaded video to inputs directory
video_filename = f"input_{os.path.basename(video_file)}"
input_video_path = os.path.join(inputs_dir, video_filename)
shutil.copy2(video_file, input_video_path)
try:
# Process the video
output_path = process_video(
input_video_path,
detect_keyword,
test_mode=test_mode,
ffmpeg_preset=ffmpeg_preset,
rows=rows,
cols=cols,
box_style=box_style,
)
# Verify output exists and is readable
if not output_path or not os.path.exists(output_path):
print(f"Warning: Output path {output_path} does not exist")
# Try to find the output based on expected naming convention
expected_output = os.path.join(
outputs_dir, f"{box_style}_{detect_keyword}_{video_filename}"
)
if os.path.exists(expected_output):
output_path = expected_output
else:
# Try searching in outputs directory for any matching file
matching_files = [
f
for f in os.listdir(outputs_dir)
if f.startswith(f"{box_style}_{detect_keyword}_")
]
if matching_files:
output_path = os.path.join(outputs_dir, matching_files[0])
else:
raise gr.Error("Failed to locate output video")
# Convert output path to absolute path if it isn't already
if not os.path.isabs(output_path):
output_path = os.path.join(WORKSPACE_ROOT, output_path)
print(f"Returning output path: {output_path}")
return output_path
finally:
# Clean up input file
try:
if os.path.exists(input_video_path):
os.remove(input_video_path)
except:
pass
except Exception as e:
print(f"Error in process_video_file: {str(e)}")
raise gr.Error(f"Error processing video: {str(e)}")
# Create the Gradio interface
with gr.Blocks(title="Promptable Video Redaction") as app:
gr.Markdown("# Promptable Video Redaction with Moondream")
gr.Markdown(
"""
[Moondream 2B](https://github.com/vikhyat/moondream) is a lightweight vision model that detects and visualizes objects in videos. It can identify objects, people, text and more.
Upload a video and specify what to detect. The app will process each frame and apply your chosen visualization style. For help, join the [Moondream Discord](https://discord.com/invite/tRUdpjDQfH).
"""
)
with gr.Row():
with gr.Column():
# Input components
video_input = gr.Video(label="Upload Video")
detect_input = gr.Textbox(
label="What to Detect",
placeholder="e.g. face, logo, text, person, car, dog, etc.",
value="face",
info="Moondream can detect anything that you can describe in natural language",
)
gr.Examples(
examples=[
["examples/homealone.mp4", "face"],
["examples/soccer.mp4", "ball"],
["examples/rally.mp4", "license plate"],
],
inputs=[video_input, detect_input],
label="Try these examples",
)
process_btn = gr.Button("Process Video", variant="primary")
with gr.Accordion("Advanced Settings", open=False):
box_style_input = gr.Radio(
choices=["censor", "bounding-box", "hitmarker"],
value="censor",
label="Visualization Style",
info="Choose how to display detections",
)
preset_input = gr.Dropdown(
choices=[
"ultrafast",
"superfast",
"veryfast",
"faster",
"fast",
"medium",
"slow",
"slower",
"veryslow",
],
value="medium",
label="Processing Speed (faster = lower quality)",
)
with gr.Row():
rows_input = gr.Slider(
minimum=1, maximum=4, value=1, step=1, label="Grid Rows"
)
cols_input = gr.Slider(
minimum=1, maximum=4, value=1, step=1, label="Grid Columns"
)
test_mode_input = gr.Checkbox(
label="Test Mode (Process first 3 seconds only)",
value=True,
info="Enable to quickly test settings on a short clip before processing the full video (recommended)",
)
gr.Markdown(
"""
Note: Processing in test mode will only process the first 3 seconds of the video and is recommended for testing settings.
"""
)
gr.Markdown(
"""
We can get a rough estimate of how long the video will take to process by multiplying the videos framerate * seconds * the number of rows and columns and assuming 0.12 seconds processing time per detection.
For example, a 3 second video at 30fps with 2x2 grid, the estimated time is 3 * 30 * 2 * 2 * 0.12 = 43.2 seconds (tested on a 4090 GPU).
"""
)
with gr.Column():
# Output components
video_output = gr.Video(label="Processed Video")
# About section under the video output
gr.Markdown(
"""
### Links:
- [GitHub Repository](https://github.com/vikhyat/moondream)
- [Hugging Face](https://huggingface.co/vikhyatk/moondream2)
- [Python Package](https://pypi.org/project/moondream/)
- [Moondream Recipes](https://docs.moondream.ai/recipes)
"""
)
# Event handlers
process_btn.click(
fn=process_video_file,
inputs=[
video_input,
detect_input,
box_style_input,
preset_input,
rows_input,
cols_input,
test_mode_input,
],
outputs=video_output,
)
if __name__ == "__main__":
app.launch(share=True)