Spaces:
Sleeping
Sleeping
# Copyright (c) 2023-2024, Zexin He | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# https://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import math | |
import torch | |
""" | |
R: (N, 3, 3) | |
T: (N, 3) | |
E: (N, 4, 4) | |
vector: (N, 3) | |
""" | |
def compose_extrinsic_R_T(R: torch.Tensor, T: torch.Tensor): | |
""" | |
Compose the standard form extrinsic matrix from R and T. | |
Batched I/O. | |
""" | |
RT = torch.cat((R, T.unsqueeze(-1)), dim=-1) | |
return compose_extrinsic_RT(RT) | |
def compose_extrinsic_RT(RT: torch.Tensor): | |
""" | |
Compose the standard form extrinsic matrix from RT. | |
Batched I/O. | |
""" | |
return torch.cat([ | |
RT, | |
torch.tensor([[[0, 0, 0, 1]]], dtype=RT.dtype, device=RT.device).repeat(RT.shape[0], 1, 1) | |
], dim=1) | |
def decompose_extrinsic_R_T(E: torch.Tensor): | |
""" | |
Decompose the standard extrinsic matrix into R and T. | |
Batched I/O. | |
""" | |
RT = decompose_extrinsic_RT(E) | |
return RT[:, :, :3], RT[:, :, 3] | |
def decompose_extrinsic_RT(E: torch.Tensor): | |
""" | |
Decompose the standard extrinsic matrix into RT. | |
Batched I/O. | |
""" | |
return E[:, :3, :] | |
def get_normalized_camera_intrinsics(intrinsics: torch.Tensor): | |
""" | |
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] | |
Return batched fx, fy, cx, cy | |
""" | |
fx, fy = intrinsics[:, 0, 0], intrinsics[:, 0, 1] | |
cx, cy = intrinsics[:, 1, 0], intrinsics[:, 1, 1] | |
width, height = intrinsics[:, 2, 0], intrinsics[:, 2, 1] | |
fx, fy = fx / width, fy / height | |
cx, cy = cx / width, cy / height | |
return fx, fy, cx, cy | |
def build_camera_principle(RT: torch.Tensor, intrinsics: torch.Tensor): | |
""" | |
RT: (N, 3, 4) | |
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] | |
""" | |
fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics) | |
return torch.cat([ | |
RT.reshape(-1, 12), | |
fx.unsqueeze(-1), fy.unsqueeze(-1), cx.unsqueeze(-1), cy.unsqueeze(-1), | |
], dim=-1) | |
def build_camera_standard(RT: torch.Tensor, intrinsics: torch.Tensor): | |
""" | |
RT: (N, 3, 4) | |
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] | |
""" | |
E = compose_extrinsic_RT(RT) | |
fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics) | |
I = torch.stack([ | |
torch.stack([fx, torch.zeros_like(fx), cx], dim=-1), | |
torch.stack([torch.zeros_like(fy), fy, cy], dim=-1), | |
torch.tensor([[0, 0, 1]], dtype=torch.float32, device=RT.device).repeat(RT.shape[0], 1), | |
], dim=1) | |
return torch.cat([ | |
E.reshape(-1, 16), | |
I.reshape(-1, 9), | |
], dim=-1) | |
def center_looking_at_camera_pose( | |
camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None, | |
device: torch.device = torch.device('cpu'), | |
): | |
""" | |
camera_position: (M, 3) | |
look_at: (3) | |
up_world: (3) | |
return: (M, 3, 4) | |
""" | |
# by default, looking at the origin and world up is pos-z | |
if look_at is None: | |
look_at = torch.tensor([0, 0, 0], dtype=torch.float32, device=device) | |
if up_world is None: | |
up_world = torch.tensor([0, 0, 1], dtype=torch.float32, device=device) | |
look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1) | |
up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1) | |
z_axis = camera_position - look_at | |
z_axis = z_axis / z_axis.norm(dim=-1, keepdim=True) | |
x_axis = torch.cross(up_world, z_axis) | |
x_axis = x_axis / x_axis.norm(dim=-1, keepdim=True) | |
y_axis = torch.cross(z_axis, x_axis) | |
y_axis = y_axis / y_axis.norm(dim=-1, keepdim=True) | |
extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1) | |
return extrinsics | |
def surrounding_views_linspace(n_views: int, radius: float = 2.0, height: float = 0.8, device: torch.device = torch.device('cpu')): | |
""" | |
n_views: number of surrounding views | |
radius: camera dist to center | |
height: height of the camera | |
return: (M, 3, 4) | |
""" | |
assert n_views > 0 | |
assert radius > 0 | |
theta = torch.linspace(-torch.pi / 2, 3 * torch.pi / 2, n_views, device=device) | |
projected_radius = math.sqrt(radius ** 2 - height ** 2) | |
x = torch.cos(theta) * projected_radius | |
y = torch.sin(theta) * projected_radius | |
z = torch.full((n_views,), height, device=device) | |
camera_positions = torch.stack([x, y, z], dim=1) | |
extrinsics = center_looking_at_camera_pose(camera_positions, device=device) | |
return extrinsics | |
def create_intrinsics( | |
f: float, | |
c: float = None, cx: float = None, cy: float = None, | |
w: float = 1., h: float = 1., | |
dtype: torch.dtype = torch.float32, | |
device: torch.device = torch.device('cpu'), | |
): | |
""" | |
return: (3, 2) | |
""" | |
fx = fy = f | |
if c is not None: | |
assert cx is None and cy is None, "c and cx/cy cannot be used together" | |
cx = cy = c | |
else: | |
assert cx is not None and cy is not None, "cx/cy must be provided when c is not provided" | |
fx, fy, cx, cy, w, h = fx/w, fy/h, cx/w, cy/h, 1., 1. | |
intrinsics = torch.tensor([ | |
[fx, fy], | |
[cx, cy], | |
[w, h], | |
], dtype=dtype, device=device) | |
return intrinsics | |