edit_anything / my3d.py
memef4rmer's picture
Duplicate from jyseo/3DFuse
efe5745
# some tools developed for the vision class
import numpy as np
from numpy import cross, tan
from numpy.linalg import norm, inv
def normalize(v):
return v / norm(v)
def camera_pose(eye, front, up):
z = normalize(-1 * front)
x = normalize(cross(up, z))
y = normalize(cross(z, x))
# convert to col vector
x = x.reshape(-1, 1)
y = y.reshape(-1, 1)
z = z.reshape(-1, 1)
eye = eye.reshape(-1, 1)
pose = np.block([
[x, y, z, eye],
[0, 0, 0, 1]
])
return pose
def compute_extrinsics(eye, front, up):
pose = camera_pose(eye, front, up)
world_2_cam = inv(pose)
return world_2_cam
def compute_intrinsics(aspect_ratio, fov, img_height_in_pix):
# aspect ratio is w / h
ndc = compute_proj_to_normalized(aspect_ratio, fov)
# anything beyond [-1, 1] should be discarded
# this did not mention how to do z-clipping;
ndc_to_img = compute_normalized_to_img_trans(aspect_ratio, img_height_in_pix)
intrinsic = ndc_to_img @ ndc
return intrinsic
def compute_proj_to_normalized(aspect, fov):
# compared to standard OpenGL NDC intrinsic,
# this skips the 3rd row treatment on z. hence the name partial_ndc
fov_in_rad = fov / 180 * np.pi
t = tan(fov_in_rad / 2) # tan half fov
partial_ndc_intrinsic = np.array([
[1 / (t * aspect), 0, 0, 0],
[0, 1 / t, 0, 0],
[0, 0, -1, 0] # copy the negative distance for division
])
return partial_ndc_intrinsic
def compute_normalized_to_img_trans(aspect, img_height_in_pix):
img_h = img_height_in_pix
img_w = img_height_in_pix * aspect
# note the OpenGL convention that (0, 0) sits at the center of the pixel;
# hence the extra -0.5 translation
# this is useful when you shoot rays through a pixel to the scene
ndc_to_img = np.array([
[img_w / 2, 0, img_w / 2 - 0.5],
[0, img_h / 2, img_h / 2 - 0.5],
[0, 0, 1]
])
img_y_coord_flip = np.array([
[1, 0, 0],
[0, -1, img_h - 1], # note the -1
[0, 0, 1]
])
# the product of the above 2 matrices is equivalent to adding
# - sign to the (1, 1) entry
# you could have simply written
# ndc_to_img = np.array([
# [img_w / 2, 0, img_w / 2 - 0.5],
# [0, -img_h / 2, img_h / 2 - 0.5],
# [0, 0, 1]
# ])
ndc_to_img = img_y_coord_flip @ ndc_to_img
return ndc_to_img
def unproject(K, pixel_coords, depth=1.0):
"""sometimes also referred to as backproject
pixel_coords: [n, 2] pixel locations
depth: [n,] or [,] depth value. of a shape that is broadcastable with pix coords
"""
K = K[0:3, 0:3]
pixel_coords = as_homogeneous(pixel_coords)
pixel_coords = pixel_coords.T # [2+1, n], so that mat mult is on the left
# this will give points with z = -1, which is exactly what you want since
# your camera is facing the -ve z axis
pts = inv(K) @ pixel_coords
pts = pts * depth # [3, n] * [n,] broadcast
pts = pts.T
pts = as_homogeneous(pts)
return pts
"""
these two functions are changed so that they can handle arbitrary number of
dimensions >=1
"""
def homogenize(pts):
# pts: [..., d], where last dim of the d is the diviser
*front, d = pts.shape
pts = pts / pts[..., -1].reshape(*front, 1)
return pts
def as_homogeneous(pts, lib=np):
# pts: [..., d]
*front, d = pts.shape
points = lib.ones((*front, d + 1))
points[..., :d] = pts
return points
def simple_point_render(pts, img_w, img_h, fov, eye, front, up):
"""
pts: [N, 3]
"""
canvas = np.ones((img_h, img_w, 3))
pts = as_homogeneous(pts)
E = compute_extrinsics(eye, front, up)
world_2_ndc = compute_proj_to_normalized(img_w / img_h, fov)
ndc_to_img = compute_normalized_to_img_trans(img_w / img_h, img_h)
pts = pts @ E.T
pts = pts @ world_2_ndc.T
pts = homogenize(pts)
# now filter out outliers beyond [-1, 1]
outlier_mask = (np.abs(pts) > 1.0).any(axis=1)
pts = pts[~outlier_mask]
pts = pts @ ndc_to_img.T
# now draw each point
pts = np.rint(pts).astype(np.int32)
xs, ys, _ = pts.T
canvas[ys, xs] = (1, 0, 0)
return canvas