Spaces:
Running
on
Zero
Running
on
Zero
# | |
# Copyright (C) 2023, Inria | |
# GRAPHDECO research group, https://team.inria.fr/graphdeco | |
# All rights reserved. | |
# | |
# This software is free for non-commercial, research and evaluation use | |
# under the terms of the LICENSE.md file. | |
# | |
# For inquiries contact [email protected] | |
# | |
from scene.cameras import Camera | |
import numpy as np | |
from utils.general_utils import PILtoTorch | |
from utils.graphics_utils import fov2focal | |
import scipy | |
import matplotlib.pyplot as plt | |
WARNED = False | |
def loadCam(args, id, cam_info, resolution_scale): | |
orig_w, orig_h = cam_info.image.size | |
if args.resolution in [1, 2, 4, 8]: | |
resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution)) | |
else: # should be a type that converts to float | |
if args.resolution == -1: | |
if orig_w > 1600: | |
global WARNED | |
if not WARNED: | |
print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n " | |
"If this is not desired, please explicitly specify '--resolution/-r' as 1") | |
WARNED = True | |
global_down = orig_w / 1600 | |
else: | |
global_down = 1 | |
else: | |
global_down = orig_w / args.resolution | |
scale = float(global_down) * float(resolution_scale) | |
resolution = (int(orig_w / scale), int(orig_h / scale)) | |
resized_image_rgb = PILtoTorch(cam_info.image, resolution) | |
gt_image = resized_image_rgb[:3, ...] | |
loaded_mask = None | |
if resized_image_rgb.shape[1] == 4: | |
loaded_mask = resized_image_rgb[3:4, ...] | |
return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, | |
FoVx=cam_info.FovX, FoVy=cam_info.FovY, | |
image=gt_image, gt_alpha_mask=loaded_mask, | |
image_name=cam_info.image_name, uid=id, data_device=args.data_device) | |
def cameraList_from_camInfos(cam_infos, resolution_scale, args): | |
camera_list = [] | |
for id, c in enumerate(cam_infos): | |
camera_list.append(loadCam(args, id, c, resolution_scale)) | |
return camera_list | |
def camera_to_JSON(id, camera : Camera): | |
Rt = np.zeros((4, 4)) | |
Rt[:3, :3] = camera.R.transpose() | |
Rt[:3, 3] = camera.T | |
Rt[3, 3] = 1.0 | |
W2C = np.linalg.inv(Rt) | |
pos = W2C[:3, 3] | |
rot = W2C[:3, :3] | |
serializable_array_2d = [x.tolist() for x in rot] | |
camera_entry = { | |
'id' : id, | |
'img_name' : camera.image_name, | |
'width' : camera.width, | |
'height' : camera.height, | |
'position': pos.tolist(), | |
'rotation': serializable_array_2d, | |
'fy' : fov2focal(camera.FovY, camera.height), | |
'fx' : fov2focal(camera.FovX, camera.width) | |
} | |
return camera_entry | |
def transform_poses_pca(poses): | |
"""Transforms poses so principal components lie on XYZ axes. | |
Args: | |
poses: a (N, 3, 4) array containing the cameras' camera to world transforms. | |
Returns: | |
A tuple (poses, transform), with the transformed poses and the applied | |
camera_to_world transforms. | |
""" | |
t = poses[:, :3, 3] | |
t_mean = t.mean(axis=0) | |
t = t - t_mean | |
eigval, eigvec = np.linalg.eig(t.T @ t) | |
# Sort eigenvectors in order of largest to smallest eigenvalue. | |
inds = np.argsort(eigval)[::-1] | |
eigvec = eigvec[:, inds] | |
rot = eigvec.T | |
if np.linalg.det(rot) < 0: | |
rot = np.diag(np.array([1, 1, -1])) @ rot | |
transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1) | |
poses_recentered = unpad_poses(transform @ pad_poses(poses)) | |
transform = np.concatenate([transform, np.eye(4)[3:]], axis=0) | |
# Flip coordinate system if z component of y-axis is negative | |
if poses_recentered.mean(axis=0)[2, 1] < 0: | |
poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered | |
transform = np.diag(np.array([1, -1, -1, 1])) @ transform | |
# Just make sure it's it in the [-1, 1]^3 cube | |
scale_factor = 1. / np.max(np.abs(poses_recentered[:, :3, 3])) | |
poses_recentered[:, :3, 3] *= scale_factor | |
transform = np.diag(np.array([scale_factor] * 3 + [1])) @ transform | |
return poses_recentered, transform | |
def generate_interpolated_path(poses, n_interp, spline_degree=5, | |
smoothness=.03, rot_weight=.1): | |
"""Creates a smooth spline path between input keyframe camera poses. | |
Spline is calculated with poses in format (position, lookat-point, up-point). | |
Args: | |
poses: (n, 3, 4) array of input pose keyframes. | |
n_interp: returned path will have n_interp * (n - 1) total poses. | |
spline_degree: polynomial degree of B-spline. | |
smoothness: parameter for spline smoothing, 0 forces exact interpolation. | |
rot_weight: relative weighting of rotation/translation in spline solve. | |
Returns: | |
Array of new camera poses with shape (n_interp * (n - 1), 3, 4). | |
""" | |
def poses_to_points(poses, dist): | |
"""Converts from pose matrices to (position, lookat, up) format.""" | |
pos = poses[:, :3, -1] | |
lookat = poses[:, :3, -1] - dist * poses[:, :3, 2] | |
up = poses[:, :3, -1] + dist * poses[:, :3, 1] | |
return np.stack([pos, lookat, up], 1) | |
def points_to_poses(points): | |
"""Converts from (position, lookat, up) format to pose matrices.""" | |
return np.array([viewmatrix(p - l, u - p, p) for p, l, u in points]) | |
def interp(points, n, k, s): | |
"""Runs multidimensional B-spline interpolation on the input points.""" | |
sh = points.shape | |
pts = np.reshape(points, (sh[0], -1)) | |
k = min(k, sh[0] - 1) | |
tck, _ = scipy.interpolate.splprep(pts.T, k=k, s=s) | |
u = np.linspace(0, 1, n, endpoint=False) | |
new_points = np.array(scipy.interpolate.splev(u, tck)) | |
new_points = np.reshape(new_points.T, (n, sh[1], sh[2])) | |
return new_points | |
### Additional operation | |
# inter_poses = [] | |
# for pose in poses: | |
# tmp_pose = np.eye(4) | |
# tmp_pose[:3] = np.concatenate([pose.R.T, pose.T[:, None]], 1) | |
# tmp_pose = np.linalg.inv(tmp_pose) | |
# tmp_pose[:, 1:3] *= -1 | |
# inter_poses.append(tmp_pose) | |
# inter_poses = np.stack(inter_poses, 0) | |
# poses, transform = transform_poses_pca(inter_poses) | |
points = poses_to_points(poses, dist=rot_weight) | |
new_points = interp(points, | |
n_interp * (points.shape[0] - 1), | |
k=spline_degree, | |
s=smoothness) | |
return points_to_poses(new_points) | |
def viewmatrix(lookdir, up, position): | |
"""Construct lookat view matrix.""" | |
vec2 = normalize(lookdir) | |
vec0 = normalize(np.cross(up, vec2)) | |
vec1 = normalize(np.cross(vec2, vec0)) | |
m = np.stack([vec0, vec1, vec2, position], axis=1) | |
return m | |
def normalize(x): | |
"""Normalization helper function.""" | |
return x / np.linalg.norm(x) | |
def pad_poses(p): | |
"""Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1].""" | |
bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape) | |
return np.concatenate([p[..., :3, :4], bottom], axis=-2) | |
def unpad_poses(p): | |
"""Remove the homogeneous bottom row from [..., 4, 4] pose matrices.""" | |
return p[..., :3, :4] | |
def visualizer(camera_poses, colors, save_path="/mnt/data/1.png"): | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection="3d") | |
for pose, color in zip(camera_poses, colors): | |
rotation = pose[:3, :3] | |
translation = pose[:3, 3] # Corrected to use 3D translation component | |
camera_positions = np.einsum( | |
"...ij,...j->...i", np.linalg.inv(rotation), -translation | |
) | |
ax.scatter( | |
camera_positions[0], | |
camera_positions[1], | |
camera_positions[2], | |
c=color, | |
marker="o", | |
) | |
ax.set_xlabel("X") | |
ax.set_ylabel("Y") | |
ax.set_zlabel("Z") | |
ax.set_title("Camera Poses") | |
plt.savefig(save_path) | |
plt.close() | |
return save_path |