|
import os |
|
import torch |
|
import cv2 |
|
import numpy as np |
|
import torch.nn.functional as F |
|
from torchvision.transforms import Compose |
|
|
|
from depth_anything.dpt import DPT_DINOv2 |
|
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet |
|
from .util import load_model |
|
from .annotator_path import models_path |
|
|
|
|
|
transform = Compose( |
|
[ |
|
Resize( |
|
width=518, |
|
height=518, |
|
resize_target=False, |
|
keep_aspect_ratio=True, |
|
ensure_multiple_of=14, |
|
resize_method="lower_bound", |
|
image_interpolation_method=cv2.INTER_CUBIC, |
|
), |
|
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), |
|
PrepareForNet(), |
|
] |
|
) |
|
|
|
|
|
class DepthAnythingDetector: |
|
"""https://github.com/LiheYoung/Depth-Anything""" |
|
|
|
model_dir = os.path.join(models_path, "depth_anything") |
|
|
|
def __init__(self, device: torch.device): |
|
self.device = device |
|
self.model = ( |
|
DPT_DINOv2( |
|
encoder="vitl", |
|
features=256, |
|
out_channels=[256, 512, 1024, 1024], |
|
localhub=False, |
|
) |
|
.to(device) |
|
.eval() |
|
) |
|
remote_url = os.environ.get( |
|
"CONTROLNET_DEPTH_ANYTHING_MODEL_URL", |
|
"https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitl14.pth", |
|
) |
|
model_path = load_model( |
|
"depth_anything_vitl14.pth", remote_url=remote_url, model_dir=self.model_dir |
|
) |
|
self.model.load_state_dict(torch.load(model_path)) |
|
|
|
def __call__(self, image: np.ndarray, colored: bool = True) -> np.ndarray: |
|
self.model.to(self.device) |
|
h, w = image.shape[:2] |
|
|
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0 |
|
image = transform({"image": image})["image"] |
|
image = torch.from_numpy(image).unsqueeze(0).to(self.device) |
|
@torch.no_grad() |
|
def predict_depth(model, image): |
|
return model(image) |
|
depth = predict_depth(self.model, image) |
|
depth = F.interpolate( |
|
depth[None], (h, w), mode="bilinear", align_corners=False |
|
)[0, 0] |
|
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 |
|
depth = depth.cpu().numpy().astype(np.uint8) |
|
if colored: |
|
return cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1] |
|
else: |
|
return depth |
|
|
|
def unload_model(self): |
|
self.model.to("cpu") |
|
|