|
|
|
|
|
|
|
|
|
import torch |
|
import numpy as np |
|
import pyrender |
|
import trimesh |
|
import math |
|
from scipy.spatial.transform import Rotation |
|
from PIL import ImageFont, ImageDraw, Image |
|
|
|
OPENCV_TO_OPENGL_CAMERA_CONVENTION = np.array([[1, 0, 0, 0], |
|
[0, -1, 0, 0], |
|
[0, 0, -1, 0], |
|
[0, 0, 0, 1]]) |
|
|
|
def geotrf( Trf, pts, ncol=None, norm=False): |
|
""" Apply a geometric transformation to a list of 3-D points. |
|
H: 3x3 or 4x4 projection matrix (typically a Homography) |
|
p: numpy/torch/tuple of coordinates. Shape must be (...,2) or (...,3) |
|
|
|
ncol: int. number of columns of the result (2 or 3) |
|
norm: float. if != 0, the resut is projected on the z=norm plane. |
|
|
|
Returns an array of projected 2d points. |
|
""" |
|
assert Trf.ndim in (2,3) |
|
if isinstance(Trf, np.ndarray): |
|
pts = np.asarray(pts) |
|
elif isinstance(Trf, torch.Tensor): |
|
pts = torch.as_tensor(pts, dtype=Trf.dtype) |
|
|
|
ncol = ncol or pts.shape[-1] |
|
|
|
|
|
output_reshape = pts.shape[:-1] |
|
if Trf.ndim == 3: |
|
assert len(Trf) == len(pts), 'batch size does not match' |
|
if Trf.ndim == 3 and pts.ndim > 3: |
|
|
|
pts = pts.reshape(pts.shape[0], -1, pts.shape[-1]) |
|
elif Trf.ndim == 3 and pts.ndim == 2: |
|
|
|
pts = pts[:, None, :] |
|
|
|
if pts.shape[-1]+1 == Trf.shape[-1]: |
|
Trf = Trf.swapaxes(-1,-2) |
|
pts = pts @ Trf[...,:-1,:] + Trf[...,-1:,:] |
|
elif pts.shape[-1] == Trf.shape[-1]: |
|
Trf = Trf.swapaxes(-1,-2) |
|
pts = pts @ Trf |
|
else: |
|
pts = Trf @ pts.T |
|
if pts.ndim >= 2: pts = pts.swapaxes(-1,-2) |
|
if norm: |
|
pts = pts / pts[...,-1:] |
|
if norm != 1: pts *= norm |
|
|
|
return pts[...,:ncol].reshape(*output_reshape, ncol) |
|
|
|
def create_scene(img_pil, l_mesh, l_face, color=None, metallicFactor=0., roughnessFactor=0.5, focal=600): |
|
|
|
scene = trimesh.Scene( |
|
lights=trimesh.scene.lighting.Light(intensity=3.0) |
|
) |
|
|
|
|
|
for i, mesh in enumerate(l_mesh): |
|
if color is None: |
|
_color = (np.random.choice(range(1,225))/255, np.random.choice(range(1,225))/255, np.random.choice(range(1,225))/255) |
|
else: |
|
if isinstance(color,list): |
|
_color = color[i] |
|
elif isinstance(color,tuple): |
|
_color = color |
|
else: |
|
raise NotImplementedError |
|
mesh = trimesh.Trimesh(mesh, l_face[i]) |
|
mesh.visual = trimesh.visual.TextureVisuals( |
|
uv=None, |
|
material=trimesh.visual.material.PBRMaterial( |
|
metallicFactor=metallicFactor, |
|
roughnessFactor=roughnessFactor, |
|
alphaMode='OPAQUE', |
|
baseColorFactor=(_color[0], _color[1], _color[2], 1.0) |
|
), |
|
image=None, |
|
face_materials=None |
|
) |
|
scene.add_geometry(mesh) |
|
|
|
|
|
H, W = img_pil.size[0], img_pil.size[1] |
|
screen_width = 0.3 |
|
height = focal * screen_width / H |
|
width = screen_width * 0.5**0.5 |
|
rot45 = np.eye(4) |
|
rot45[:3,:3] = Rotation.from_euler('z',np.deg2rad(45)).as_matrix() |
|
rot45[2,3] = -height |
|
aspect_ratio = np.eye(4) |
|
aspect_ratio[0,0] = W/H |
|
transform = OPENCV_TO_OPENGL_CAMERA_CONVENTION @ aspect_ratio @ rot45 |
|
cam = trimesh.creation.cone(width, height, sections=4, transform=transform) |
|
|
|
|
|
|
|
|
|
|
|
vertices = cam.vertices[[4,5,1,3]] |
|
faces = np.array([[0, 1, 2], [0, 2, 3], [2, 1, 0], [3, 2, 0]]) |
|
img = trimesh.Trimesh(vertices=vertices, faces=faces) |
|
uv_coords = np.float32([[0, 0], [1, 0], [1, 1], [0, 1]]) |
|
|
|
material = trimesh.visual.texture.SimpleMaterial(image=img_pil, |
|
diffuse=[255,255,255,0], |
|
ambient=[255,255,255,0], |
|
specular=[255,255,255,0], |
|
glossiness=1.0) |
|
img.visual = trimesh.visual.TextureVisuals(uv=uv_coords, image=img_pil) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scene.add_geometry(img) |
|
|
|
|
|
rot2 = np.eye(4) |
|
rot2[:3,:3] = Rotation.from_euler('z',np.deg2rad(2)).as_matrix() |
|
|
|
|
|
|
|
|
|
vertices = np.r_[cam.vertices, 0.95*cam.vertices, geotrf(rot2, cam.vertices)] |
|
|
|
faces = [] |
|
for face in cam.faces: |
|
if 0 in face: continue |
|
a,b,c = face |
|
a2,b2,c2 = face + len(cam.vertices) |
|
a3,b3,c3 = face + 2*len(cam.vertices) |
|
|
|
|
|
faces.append((a,b,b2)) |
|
faces.append((a,a2,c)) |
|
faces.append((c2,b,c)) |
|
|
|
faces.append((a,b,b3)) |
|
faces.append((a,a3,c)) |
|
faces.append((c3,b,c)) |
|
|
|
|
|
faces += [(c,b,a) for a,b,c in faces] |
|
|
|
cam = trimesh.Trimesh(vertices=vertices, faces=faces) |
|
cam.visual.face_colors[:,:3] = (255, 0, 0) |
|
scene.add_geometry(cam) |
|
|
|
|
|
rot = np.eye(4) |
|
cams2world = np.eye(4) |
|
rot[:3,:3] = Rotation.from_euler('y',np.deg2rad(180)).as_matrix() |
|
scene.apply_transform(np.linalg.inv(cams2world @ OPENCV_TO_OPENGL_CAMERA_CONVENTION @ rot)) |
|
|
|
return scene |
|
|
|
def render_meshes(img, l_mesh, l_face, cam_param, color=None, alpha=1.0, |
|
show_camera=False, |
|
intensity=3.0, |
|
metallicFactor=0., roughnessFactor=0.5, smooth=True, |
|
): |
|
""" |
|
Rendering multiple mesh and project then in the initial image. |
|
Args: |
|
- img: np.array [w,h,3] |
|
- l_mesh: np.array list of [v,3] |
|
- l_face: np.array list of [f,3] |
|
- cam_param: info about the camera intrinsics (focal, princpt) and (R,t) is possible |
|
Return: |
|
- img: np.array [w,h,3] |
|
""" |
|
|
|
scene = pyrender.Scene(ambient_light=(0.3, 0.3, 0.3)) |
|
|
|
|
|
for i, mesh in enumerate(l_mesh): |
|
if color is None: |
|
_color = (np.random.choice(range(1,225))/255, np.random.choice(range(1,225))/255, np.random.choice(range(1,225))/255) |
|
else: |
|
if isinstance(color,list): |
|
_color = color[i] |
|
elif isinstance(color,tuple): |
|
_color = color |
|
else: |
|
raise NotImplementedError |
|
mesh = trimesh.Trimesh(mesh, l_face[i]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
material = pyrender.MetallicRoughnessMaterial( |
|
metallicFactor=metallicFactor, |
|
roughnessFactor=roughnessFactor, |
|
alphaMode='OPAQUE', |
|
baseColorFactor=(_color[0], _color[1], _color[2], 1.0)) |
|
mesh = pyrender.Mesh.from_trimesh(mesh, material=material, smooth=smooth) |
|
scene.add(mesh, f"mesh_{i}") |
|
|
|
|
|
|
|
if show_camera: |
|
import pyvista |
|
|
|
def get_faces(x): |
|
return x.faces.astype(np.uint32).reshape((x.n_faces, 4))[:, 1:] |
|
|
|
|
|
material_cam = pyrender.MetallicRoughnessMaterial(metallicFactor=metallicFactor, roughnessFactor=roughnessFactor, alphaMode='OPAQUE', baseColorFactor=(0.5,0.5,0.5)) |
|
height = 0.2 |
|
radius = 0.1 |
|
cone = pyvista.Cone(center=(0.0, 0.0, -height/2), direction=(0.0, 0.0, -1.0), height=height, radius=radius).extract_surface().triangulate() |
|
verts = cone.points |
|
mesh = pyrender.Mesh.from_trimesh(trimesh.Trimesh(verts, get_faces(cone)), material=material_cam, smooth=smooth) |
|
scene.add(mesh, f"cone") |
|
|
|
size = 0.1 |
|
box = pyvista.Box(bounds=(-size, size, |
|
-size, size, |
|
verts[:,-1].min() - 3*size, verts[:,-1].min())).extract_surface().triangulate() |
|
verts = box.points |
|
mesh = pyrender.Mesh.from_trimesh(trimesh.Trimesh(verts, get_faces(box)), material=material_cam, smooth=smooth) |
|
scene.add(mesh, f"box") |
|
|
|
|
|
|
|
|
|
l_color = [(1,0,0,1.0), (0,1,0,1.0), (0,0,1,1.0)] |
|
l_direction = [(1,0,0), (0,1,0), (0,0,1)] |
|
scale = 0.2 |
|
pose3d = [2*scale, 0.0, -scale] |
|
for i in range(len(l_color)): |
|
arrow = pyvista.Arrow(direction=l_direction[i], scale=scale) |
|
arrow = arrow.extract_surface().triangulate() |
|
verts = arrow.points + np.asarray([pose3d]) |
|
faces = arrow.faces.astype(np.uint32).reshape((arrow.n_faces, 4))[:, 1:] |
|
mesh = trimesh.Trimesh(verts, faces) |
|
material = pyrender.MetallicRoughnessMaterial(metallicFactor=metallicFactor, roughnessFactor=roughnessFactor, alphaMode='OPAQUE', baseColorFactor=l_color[i]) |
|
mesh = pyrender.Mesh.from_trimesh(mesh, material=material, smooth=smooth) |
|
scene.add(mesh, f"arrow_{i}") |
|
|
|
focal, princpt = cam_param['focal'], cam_param['princpt'] |
|
camera_pose = np.eye(4) |
|
if 'R' in cam_param.keys(): |
|
camera_pose[:3, :3] = cam_param['R'] |
|
if 't' in cam_param.keys(): |
|
camera_pose[:3, 3] = cam_param['t'] |
|
camera = pyrender.IntrinsicsCamera(fx=focal[0], fy=focal[1], cx=princpt[0], cy=princpt[1]) |
|
|
|
|
|
camera_pose = OPENCV_TO_OPENGL_CAMERA_CONVENTION @ camera_pose |
|
camera_pose = np.linalg.inv(camera_pose) |
|
scene.add(camera, pose=camera_pose) |
|
|
|
|
|
renderer = pyrender.OffscreenRenderer(viewport_width=img.shape[1], viewport_height=img.shape[0], point_size=1.0) |
|
|
|
|
|
light = pyrender.DirectionalLight(intensity=intensity) |
|
scene.add(light, pose=camera_pose) |
|
|
|
|
|
rgb, depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA) |
|
rgb = rgb[:,:,:3].astype(np.float32) |
|
fg = (depth > 0)[:,:,None].astype(np.float32) |
|
|
|
|
|
bg_blending_radius = 1 |
|
bg_blending_kernel = 2.0 * torch.ones((1, 1, 2 * bg_blending_radius + 1, 2 * bg_blending_radius + 1)) / (2 * bg_blending_radius + 1) ** 2 |
|
bg_blending_bias = -torch.ones(1) |
|
fg = fg.reshape((fg.shape[0],fg.shape[1])) |
|
fg = torch.from_numpy(fg).unsqueeze(0) |
|
fg = torch.clamp_min(torch.nn.functional.conv2d(fg, weight=bg_blending_kernel, bias=bg_blending_bias, padding=bg_blending_radius) * fg, 0.0) |
|
fg = fg.permute(1,2,0).numpy() |
|
|
|
|
|
img = (fg * (alpha * rgb + (1.0-alpha) * img) + (1-fg) * img).astype(np.uint8) |
|
|
|
renderer.delete() |
|
|
|
return img.astype(np.uint8) |
|
|
|
def length(v): |
|
return math.sqrt(v[0]*v[0]+v[1]*v[1]+v[2]*v[2]) |
|
|
|
def cross(v0, v1): |
|
return [ |
|
v0[1]*v1[2]-v1[1]*v0[2], |
|
v0[2]*v1[0]-v1[2]*v0[0], |
|
v0[0]*v1[1]-v1[0]*v0[1]] |
|
|
|
def dot(v0, v1): |
|
return v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2] |
|
|
|
def normalize(v, eps=1e-13): |
|
l = length(v) |
|
return [v[0]/(l+eps), v[1]/(l+eps), v[2]/(l+eps)] |
|
|
|
def lookAt(eye, target, *args, **kwargs): |
|
""" |
|
eye is the point of view, target is the point which is looked at and up is the upwards direction. |
|
|
|
Input should be in OpenCV format - we transform arguments to OpenGL |
|
Do compute in OpenGL and then transform back to OpenCV |
|
|
|
""" |
|
|
|
|
|
|
|
up = [0,-1,0] |
|
|
|
eye, at, up = eye, target, up |
|
zaxis = normalize((at[0]-eye[0], at[1]-eye[1], at[2]-eye[2])) |
|
xaxis = normalize(cross(zaxis, up)) |
|
yaxis = cross(xaxis, zaxis) |
|
|
|
zaxis = [-zaxis[0],-zaxis[1],-zaxis[2]] |
|
|
|
viewMatrix = np.asarray([ |
|
[xaxis[0], xaxis[1], xaxis[2], -dot(xaxis, eye)], |
|
[yaxis[0], yaxis[1], yaxis[2], -dot(yaxis, eye)], |
|
[zaxis[0], zaxis[1], zaxis[2], -dot(zaxis, eye)], |
|
[0, 0, 0, 1]] |
|
).reshape(4,4) |
|
|
|
|
|
viewMatrix = OPENCV_TO_OPENGL_CAMERA_CONVENTION @ viewMatrix |
|
|
|
return viewMatrix |
|
|
|
def print_distance_on_image(pred_rend_array, humans, _color): |
|
|
|
font = ImageFont.load_default() |
|
rend_pil = Image.fromarray(pred_rend_array) |
|
draw = ImageDraw.Draw(rend_pil) |
|
for i_hum, hum in enumerate(humans): |
|
|
|
transl = hum['transl_pelvis'].cpu().numpy().reshape(3) |
|
dist_cam = np.sqrt(((transl[[0,2]])**2).sum()) |
|
|
|
bbox = get_bbox(hum['j2d_smplx'].cpu().numpy(), factor=1.35, output_format='x1y1x2y2') |
|
loc = [(bbox[0] + bbox[2]) / 2., bbox[1]] |
|
txt = f"{dist_cam:.2f}m" |
|
length = font.getlength(txt) |
|
loc[0] = loc[0] - length // 2 |
|
fill = tuple((np.asarray(_color[i_hum]) * 255).astype(np.int32).tolist()) |
|
draw.text((loc[0], loc[1]), txt, fill=fill, font=font) |
|
return np.asarray(rend_pil) |
|
|
|
def get_bbox(points, factor=1., output_format='xywh'): |
|
""" |
|
Args: |
|
- y: [k,2] |
|
Return: |
|
- bbox: [4] in a specific format |
|
""" |
|
assert len(points.shape) == 2, f"Wrong shape, expected two-dimensional array. Got shape {points.shape}" |
|
assert points.shape[1] == 2 |
|
x1, x2 = points[:,0].min(), points[:,0].max() |
|
y1, y2 = points[:,1].min(), points[:,1].max() |
|
cx, cy = (x2 + x1) / 2., (y2 + y1) / 2. |
|
sx, sy = np.abs(x2 - x1), np.abs(y2 - y1) |
|
sx, sy = int(factor * sx), int(factor * sy) |
|
x1, y1 = int(cx - sx / 2.), int(cy - sy / 2.) |
|
x2, y2 = int(cx + sx / 2.), int(cy + sy / 2.) |
|
if output_format == 'xywh': |
|
return [x1,y1,sx,sy] |
|
elif output_format == 'x1y1x2y2': |
|
return [x1,y1,x2,y2] |
|
else: |
|
raise NotImplementedError |
|
|
|
def render_side_views(img_array, _color, humans, model, K): |
|
_bg = 255. |
|
|
|
|
|
focal = np.asarray([K[0,0,0].cpu().numpy(),K[0,1,1].cpu().numpy()]) |
|
princpt = np.asarray([K[0,0,-1].cpu().numpy(),K[0,1,-1].cpu().numpy()]) |
|
|
|
|
|
l_verts = [humans[j]['verts_smplx'].cpu().numpy() for j in range(len(humans))] |
|
l_faces = [model.smpl_layer['neutral'].bm_x.faces for j in range(len(humans))] |
|
|
|
bg_array = 1 + 0.*img_array.copy() |
|
if len(humans) == 0: |
|
pred_rend_array_bis = _bg * bg_array.copy() |
|
pred_rend_array_sideview = _bg * bg_array.copy() |
|
pred_rend_array_bev = _bg * bg_array.copy() |
|
else: |
|
|
|
H_bis = lookAt(eye=[2.,-1,-2], target=[0,0,3]) |
|
pred_rend_array_bis = render_meshes(_bg* bg_array.copy(), l_verts, l_faces, |
|
{'focal': focal, 'princpt': princpt, 'R': H_bis[:3,:3], 't': H_bis[:3,3]}, |
|
alpha=1.0, color=_color, show_camera=True) |
|
|
|
|
|
l_z = [] |
|
for hum in humans: |
|
l_z.append(hum['transl_pelvis'].cpu().numpy().reshape(-1)[-1]) |
|
target_z = np.median(np.asarray(l_z)) |
|
|
|
|
|
H_sideview = lookAt(eye=[2.2*target_z,0,target_z], target=[0,0,target_z]) |
|
pred_rend_array_sideview = render_meshes(_bg*bg_array.copy(), l_verts, l_faces, |
|
{'focal': focal, 'princpt': princpt, 'R': H_sideview[:3,:3], 't': H_sideview[:3,3]}, |
|
alpha=1.0, color=_color, show_camera=True) |
|
|
|
|
|
H_bev = lookAt(eye=[0.,-2*target_z,target_z-0.001], target=[0,0,target_z]) |
|
pred_rend_array_bev = render_meshes(_bg* bg_array.copy(), l_verts, l_faces, |
|
{'focal': focal, 'princpt': princpt, 'R': H_bev[:3,:3], 't': H_bev[:3,3]}, |
|
alpha=1.0, color=_color, show_camera=True) |
|
|
|
return pred_rend_array_bis, pred_rend_array_sideview, pred_rend_array_bev |