Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,700 Bytes
55ac26c 28da247 55ac26c 28da247 55ac26c bddb8a1 55ac26c 28da247 55ac26c 31acc4c 6596772 9ee2acc 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 55ac26c 864aa62 55ac26c 83f9422 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 bddb8a1 55ac26c 28da247 55ac26c bddb8a1 28da247 bddb8a1 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 55ac26c 28da247 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import os
import gc
import torch
import cv2 # Wird für die Bildverarbeitung (z.B. hconcat, GaussianBlur) benötigt
import gradio as gr
import numpy as np
import matplotlib.cm as cm
from video_depth_anything.video_depth import VideoDepthAnything
from utils.dc_utils import read_video_frames, save_video
from huggingface_hub import hf_hub_download
# Examples for the Gradio Demo
examples = [
['assets/example_videos/davis_rollercoaster.mp4', -1, -1, 1280],
['assets/example_videos/Tokyo-Walk_rgb.mp4', -1, -1, 1280],
['assets/example_videos/4158877-uhd_3840_2160_30fps_rgb.mp4', -1, -1, 1280],
['assets/example_videos/4511004-uhd_3840_2160_24fps_rgb.mp4', -1, -1, 1280],
['assets/example_videos/1753029-hd_1920_1080_30fps.mp4', -1, -1, 1280],
['assets/example_videos/davis_burnout.mp4', -1, -1, 1280],
['assets/example_videos/example_5473765-l.mp4', -1, -1, 1280],
['assets/example_videos/Istanbul-26920.mp4', -1, -1, 1280],
['assets/example_videos/obj_1.mp4', -1, -1, 1280],
['assets/example_videos/sheep_cut1.mp4', -1, -1, 1280],
]
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
# Model configuration
model_configs = {
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
}
encoder2name = {
'vits': 'Small',
'vitl': 'Large',
}
encoder = 'vitl'
model_name = encoder2name[encoder]
# Initialize the model
video_depth_anything = VideoDepthAnything(**model_configs[encoder])
filepath = hf_hub_download(repo_id=f"depth-anything/Video-Depth-Anything-{model_name}",
filename=f"video_depth_anything_{encoder}.pth",
repo_type="model")
video_depth_anything.load_state_dict(torch.load(filepath, map_location='cpu'))
video_depth_anything = video_depth_anything.to(DEVICE).eval()
title = "# Video Depth Anything"
description = """Official demo for **Video Depth Anything**.
Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
@gr.processing_utils.threaded # alternativ kann spaces.GPU genutzt werden, falls verfügbar
def infer_video_depth(
input_video: str,
max_len: int = -1,
target_fps: int = -1,
max_res: int = 1280,
output_dir: str = './outputs',
input_size: int = 518,
stitch: bool = False,
grayscale: bool = False,
blur: float = 0.0,
):
# Read input video frames
frames, target_fps = read_video_frames(input_video, max_len, target_fps, max_res)
# Infer depths using the model
depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=input_size, device=DEVICE)
video_name = os.path.basename(input_video)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Save the processed (RGB) video and the depth visualization
processed_video_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_src.mp4')
depth_vis_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_vis.mp4')
save_video(frames, processed_video_path, fps=fps)
save_video(depths, depth_vis_path, fps=fps, is_depths=True)
stitched_video_path = ""
if stitch:
# Create a stitched video (side-by-side): left: processed RGB, right: depth
d_min, d_max = depths.min(), depths.max()
stitched_frames = []
for i in range(min(len(frames), len(depths))):
rgb_frame = frames[i]
depth_frame = depths[i]
# Normalize the depth frame to [0, 255]
depth_norm = ((depth_frame - d_min) / (d_max - d_min) * 255).astype(np.uint8)
# Choose grayscale or colored mapping
if grayscale:
depth_vis = np.stack([depth_norm] * 3, axis=-1)
else:
cmap = cm.get_cmap("inferno")
# cmap liefert RGBA, hier verwenden wir nur die ersten drei Kanäle
depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
# Apply Gaussian blur if requested
if blur > 0:
kernel_size = int(blur * 20) * 2 + 1 # ensures odd kernel size
depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
# Concatenate side-by-side
stitched = cv2.hconcat([rgb_frame, depth_vis])
stitched_frames.append(stitched)
stitched_frames = np.array(stitched_frames)
stitched_video_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_stitched.mp4')
save_video(stitched_frames, stitched_video_path, fps=fps)
gc.collect()
torch.cuda.empty_cache()
# Return three outputs: processed RGB video, depth visualization, and (optionally) stitched video.
# Falls stitch nicht aktiviert ist, wird ein leerer String zurückgegeben.
return [processed_video_path, depth_vis_path, stitched_video_path]
def construct_demo():
with gr.Blocks(analytics_enabled=False) as demo:
gr.Markdown(title)
gr.Markdown(description)
gr.Markdown("### If you find this work useful, please help ⭐ the [Github Repo](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
with gr.Row(equal_height=True):
with gr.Column(scale=1):
input_video = gr.Video(label="Input Video", source="upload", type="filepath")
with gr.Column(scale=2):
with gr.Row(equal_height=True):
processed_video = gr.Video(label="Preprocessed Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
depth_vis_video = gr.Video(label="Generated Depth Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
stitched_video = gr.Video(label="Stitched RGBD Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
with gr.Row(equal_height=True):
with gr.Column(scale=1):
with gr.Accordion("Advanced Settings", open=False):
max_len = gr.Slider(label="Max process length", minimum=-1, maximum=1000, value=500, step=1)
target_fps = gr.Slider(label="Target FPS", minimum=-1, maximum=30, value=15, step=1)
max_res = gr.Slider(label="Max side resolution", minimum=480, maximum=1920, value=1280, step=1)
stitch_option = gr.Checkbox(label="Stitch RGB & Depth Videos", value=False)
grayscale_option = gr.Checkbox(label="Output Depth as Grayscale", value=False)
blur_slider = gr.Slider(minimum=0, maximum=1, step=0.01, label="Depth Blur Factor", value=0)
generate_btn = gr.Button("Generate")
with gr.Column(scale=2):
pass
gr.Examples(
examples=examples,
inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, blur_slider],
outputs=[processed_video, depth_vis_video, stitched_video],
fn=infer_video_depth,
cache_examples="lazy",
)
generate_btn.click(
fn=infer_video_depth,
inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, blur_slider],
outputs=[processed_video, depth_vis_video, stitched_video],
)
return demo
if __name__ == "__main__":
demo = construct_demo()
demo.queue()
demo.launch(share=True)
|