Video-Depth-Anything_RGBD

Running on Zero

App Files Files Community

Krokodilpirat commited on 26 days ago

Commit

19217bc

verified ·

1 Parent(s): 83f9422

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -21

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import gc
 import torch
-import cv2  # Wird für die Bildverarbeitung (z.B. hconcat, GaussianBlur) benötigt
 import gradio as gr
 import numpy as np
 import matplotlib.cm as cm
@@ -42,9 +42,11 @@ model_name = encoder2name[encoder]
 # Initialize the model
 video_depth_anything = VideoDepthAnything(**model_configs[encoder])
-filepath = hf_hub_download(repo_id=f"depth-anything/Video-Depth-Anything-{model_name}",
-                           filename=f"video_depth_anything_{encoder}.pth",
-                           repo_type="model")
 video_depth_anything.load_state_dict(torch.load(filepath, map_location='cpu'))
 video_depth_anything = video_depth_anything.to(DEVICE).eval()
@@ -52,8 +54,6 @@ title = "# Video Depth Anything"
 description = """Official demo for **Video Depth Anything**.
 Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
-@gr.processing_utils.threaded  # alternativ kann spaces.GPU genutzt werden, falls verfügbar
 def infer_video_depth(
     input_video: str,
     max_len: int = -1,
@@ -67,14 +67,14 @@ def infer_video_depth(
 ):
     # Read input video frames
     frames, target_fps = read_video_frames(input_video, max_len, target_fps, max_res)
-    # Infer depths using the model
     depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=input_size, device=DEVICE)
     video_name = os.path.basename(input_video)
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
-    # Save the processed (RGB) video and the depth visualization
     processed_video_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_src.mp4')
     depth_vis_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_vis.mp4')
     save_video(frames, processed_video_path, fps=fps)
@@ -82,7 +82,7 @@ def infer_video_depth(
     stitched_video_path = ""
     if stitch:
-        # Create a stitched video (side-by-side): left: processed RGB, right: depth
         d_min, d_max = depths.min(), depths.max()
         stitched_frames = []
         for i in range(min(len(frames), len(depths))):
@@ -90,18 +90,18 @@ def infer_video_depth(
             depth_frame = depths[i]
             # Normalize the depth frame to [0, 255]
             depth_norm = ((depth_frame - d_min) / (d_max - d_min) * 255).astype(np.uint8)
-            # Choose grayscale or colored mapping
             if grayscale:
                 depth_vis = np.stack([depth_norm] * 3, axis=-1)
             else:
                 cmap = cm.get_cmap("inferno")
-                # cmap liefert RGBA, hier verwenden wir nur die ersten drei Kanäle
                 depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
-            # Apply Gaussian blur if requested
             if blur > 0:
                 kernel_size = int(blur * 20) * 2 + 1  # ensures odd kernel size
                 depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
-            # Concatenate side-by-side
             stitched = cv2.hconcat([rgb_frame, depth_vis])
             stitched_frames.append(stitched)
         stitched_frames = np.array(stitched_frames)
@@ -111,17 +111,15 @@ def infer_video_depth(
     gc.collect()
     torch.cuda.empty_cache()
-    # Return three outputs: processed RGB video, depth visualization, and (optionally) stitched video.
-    # Falls stitch nicht aktiviert ist, wird ein leerer String zurückgegeben.
     return [processed_video_path, depth_vis_path, stitched_video_path]
 def construct_demo():
     with gr.Blocks(analytics_enabled=False) as demo:
         gr.Markdown(title)
         gr.Markdown(description)
         gr.Markdown("### If you find this work useful, please help ⭐ the [Github Repo](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
         with gr.Row(equal_height=True):
             with gr.Column(scale=1):
                 input_video = gr.Video(label="Input Video", source="upload", type="filepath")
@@ -130,6 +128,7 @@ def construct_demo():
                     processed_video = gr.Video(label="Preprocessed Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
                     depth_vis_video = gr.Video(label="Generated Depth Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
                     stitched_video = gr.Video(label="Stitched RGBD Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
         with gr.Row(equal_height=True):
             with gr.Column(scale=1):
                 with gr.Accordion("Advanced Settings", open=False):
@@ -142,7 +141,7 @@ def construct_demo():
                 generate_btn = gr.Button("Generate")
             with gr.Column(scale=2):
                 pass
         gr.Examples(
             examples=examples,
             inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, blur_slider],
@@ -150,16 +149,16 @@ def construct_demo():
             fn=infer_video_depth,
             cache_examples="lazy",
         )
         generate_btn.click(
             fn=infer_video_depth,
             inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, blur_slider],
             outputs=[processed_video, depth_vis_video, stitched_video],
         )
     return demo
 if __name__ == "__main__":
     demo = construct_demo()
-    demo.queue()
     demo.launch(share=True)

 import os
 import gc
 import torch
+import cv2
 import gradio as gr
 import numpy as np
 import matplotlib.cm as cm
 # Initialize the model
 video_depth_anything = VideoDepthAnything(**model_configs[encoder])
+filepath = hf_hub_download(
+    repo_id=f"depth-anything/Video-Depth-Anything-{model_name}",
+    filename=f"video_depth_anything_{encoder}.pth",
+    repo_type="model"
+)
 video_depth_anything.load_state_dict(torch.load(filepath, map_location='cpu'))
 video_depth_anything = video_depth_anything.to(DEVICE).eval()
 description = """Official demo for **Video Depth Anything**.
 Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
 def infer_video_depth(
     input_video: str,
     max_len: int = -1,
 ):
     # Read input video frames
     frames, target_fps = read_video_frames(input_video, max_len, target_fps, max_res)
+    # Infer depth maps using the model
     depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=input_size, device=DEVICE)
     video_name = os.path.basename(input_video)
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
+    # Save the processed (RGB) video and the depth visualization (using the default color mapping)
     processed_video_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_src.mp4')
     depth_vis_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_vis.mp4')
     save_video(frames, processed_video_path, fps=fps)
     stitched_video_path = ""
     if stitch:
+        # Create a stitched video: left side is the RGB video, right side is the depth video
         d_min, d_max = depths.min(), depths.max()
         stitched_frames = []
         for i in range(min(len(frames), len(depths))):
             depth_frame = depths[i]
             # Normalize the depth frame to [0, 255]
             depth_norm = ((depth_frame - d_min) / (d_max - d_min) * 255).astype(np.uint8)
+            # Use grayscale or colored mapping for the depth channel
             if grayscale:
                 depth_vis = np.stack([depth_norm] * 3, axis=-1)
             else:
                 cmap = cm.get_cmap("inferno")
+                # cmap liefert RGBA – wir verwenden die ersten 3 Kanäle und skalieren auf 255
                 depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
+            # Apply Gaussian blur if requested (blur factor > 0)
             if blur > 0:
                 kernel_size = int(blur * 20) * 2 + 1  # ensures odd kernel size
                 depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
+            # Concatenate side-by-side: RGB frame on the left, processed depth on the right
             stitched = cv2.hconcat([rgb_frame, depth_vis])
             stitched_frames.append(stitched)
         stitched_frames = np.array(stitched_frames)
     gc.collect()
     torch.cuda.empty_cache()
+    # Return processed RGB video, depth visualization, and (if created) stitched video.
     return [processed_video_path, depth_vis_path, stitched_video_path]
 def construct_demo():
     with gr.Blocks(analytics_enabled=False) as demo:
         gr.Markdown(title)
         gr.Markdown(description)
         gr.Markdown("### If you find this work useful, please help ⭐ the [Github Repo](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
         with gr.Row(equal_height=True):
             with gr.Column(scale=1):
                 input_video = gr.Video(label="Input Video", source="upload", type="filepath")
                     processed_video = gr.Video(label="Preprocessed Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
                     depth_vis_video = gr.Video(label="Generated Depth Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
                     stitched_video = gr.Video(label="Stitched RGBD Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
         with gr.Row(equal_height=True):
             with gr.Column(scale=1):
                 with gr.Accordion("Advanced Settings", open=False):
                 generate_btn = gr.Button("Generate")
             with gr.Column(scale=2):
                 pass
         gr.Examples(
             examples=examples,
             inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, blur_slider],
             fn=infer_video_depth,
             cache_examples="lazy",
         )
         generate_btn.click(
             fn=infer_video_depth,
             inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, blur_slider],
             outputs=[processed_video, depth_vis_video, stitched_video],
         )
     return demo
 if __name__ == "__main__":
     demo = construct_demo()
+    demo.queue()  # Asynchrone Verarbeitung aktivieren
     demo.launch(share=True)