gene-hoi-denoising / utils /model_util.py
meow
readme
1b00369
# from model.mdm import MDM
# from model.mdm_ours import MDM as MDM_Ours
# from model.mdm_ours import MDMV3 as MDM_Ours_V3
# from model.mdm_ours import MDMV4 as MDM_Ours_V4
# from model.mdm_ours import MDMV5 as MDM_Ours_V5
# from model.mdm_ours import MDMV6 as MDM_Ours_V6
# from model.mdm_ours import MDMV7 as MDM_Ours_V7
# from model.mdm_ours import MDMV8 as MDM_Ours_V8
# from model.mdm_ours import MDMV9 as MDM_Ours_V9
from model.mdm_ours import MDMV10 as MDM_Ours_V10
# from model.mdm_ours import MDMV11 as MDM_Ours_V11
# MDM_Ours_V12
from model.mdm_ours import MDMV12 as MDM_Ours_V12
# MDM_Ours_V13
# from model.mdm_ours import MDMV13 as MDM_Ours_V13
# # MDM_Ours_V14
# from model.mdm_ours import MDMV14 as MDM_Ours_V14
from diffusion import gaussian_diffusion as gd
from diffusion.respace import SpacedDiffusion, space_timesteps
from utils.parser_util import get_cond_mode
import torch
from torch import optim, nn
import torch.nn.functional as F
from manopth.manolayer import ManoLayer
import numpy as np
import trimesh
import os
from diffusion.respace_ours import SpacedDiffusion as SpacedDiffusion_Ours
# SpacedDiffusionV2
from diffusion.respace_ours import SpacedDiffusionV2 as SpacedDiffusion_OursV2
from diffusion.respace_ours import SpacedDiffusionV3 as SpacedDiffusion_OursV3
# SpacedDiffusionV4
from diffusion.respace_ours import SpacedDiffusionV4 as SpacedDiffusion_OursV4
# SpacedDiffusion_OursV5
from diffusion.respace_ours import SpacedDiffusionV5 as SpacedDiffusion_OursV5
# SpacedDiffusion_OursV6
from diffusion.respace_ours import SpacedDiffusionV6 as SpacedDiffusion_OursV6
# SpacedDiffusion_OursV7
from diffusion.respace_ours import SpacedDiffusionV7 as SpacedDiffusion_OursV7
from diffusion.respace_ours import SpacedDiffusionV9 as SpacedDiffusion_OursV9
def batched_index_select_ours(values, indices, dim = 1):
value_dims = values.shape[(dim + 1):]
values_shape, indices_shape = map(lambda t: list(t.shape), (values, indices))
indices = indices[(..., *((None,) * len(value_dims)))]
indices = indices.expand(*((-1,) * len(indices_shape)), *value_dims)
value_expand_len = len(indices_shape) - (dim + 1)
values = values[(*((slice(None),) * dim), *((None,) * value_expand_len), ...)]
value_expand_shape = [-1] * len(values.shape)
expand_slice = slice(dim, (dim + value_expand_len))
value_expand_shape[expand_slice] = indices.shape[expand_slice]
values = values.expand(*value_expand_shape)
dim += value_expand_len
return values.gather(dim, indices)
def gaussian_entropy(logvar): # gaussian entropy ##
const = 0.5 * float(logvar.size(1)) * (1. + np.log(np.pi * 2))
ent = 0.5 * logvar.sum(dim=1, keepdim=False) + const
return ent
def standard_normal_logprob(z): # feature dim
dim = z.size(-1) # dim size -1
log_z = -0.5 * dim * np.log(2 * np.pi)
return log_z - z.pow(2) / 2
def load_multiple_models_fr_path(model_path, model):
model_paths = model_path.split(";")
print(f"Loading multiple models with split model_path: {model_paths}")
setting_to_model_path = {}
for cur_path in model_paths:
cur_setting_nm, cur_model_path = cur_path.split(':')
setting_to_model_path[cur_setting_nm] = cur_model_path
loaded_dict = {}
for cur_setting in setting_to_model_path:
cur_model_path = setting_to_model_path[cur_setting]
cur_model_state_dict = torch.load(cur_model_path, map_location='cpu')
if cur_setting == 'diff_realbasejtsrel':
interested_keys = [
'real_basejtsrel_input_process', 'real_basejtsrel_sequence_pos_encoder', 'real_basejtsrel_seqTransEncoder', 'real_basejtsrel_embed_timestep', 'real_basejtsrel_sequence_pos_denoising_encoder', 'real_basejtsrel_denoising_seqTransEncoder', 'real_basejtsrel_output_process'
]
elif cur_setting == 'diff_basejtsrel':
interested_keys = [
'avg_joints_sequence_input_process', 'joints_offset_input_process', 'sequence_pos_encoder', 'seqTransEncoder', 'logvar_seqTransEncoder', 'embed_timestep', 'basejtsrel_denoising_embed_timestep', 'sequence_pos_denoising_encoder', 'basejtsrel_denoising_seqTransEncoder', 'basejtsrel_glb_denoising_latents_trans_layer', 'avg_joint_sequence_output_process', 'joint_offset_output_process', 'output_process'
]
elif cur_setting == 'diff_realbasejtsrel_to_joints':
interested_keys = [
'real_basejtsrel_to_joints_input_process', 'real_basejtsrel_to_joints_sequence_pos_encoder', 'real_basejtsrel_to_joints_seqTransEncoder', 'real_basejtsrel_to_joints_embed_timestep', 'real_basejtsrel_to_joints_sequence_pos_denoising_encoder', 'real_basejtsrel_to_joints_denoising_seqTransEncoder', 'real_basejtsrel_to_joints_output_process',
]
else:
raise ValueError(f"cur_setting:{cur_setting} Not implemented yet")
for k in cur_model_state_dict:
for cur_inter_key in interested_keys:
if cur_inter_key in k:
loaded_dict[k] = cur_model_state_dict[k]
model_dict = model.state_dict()
model_dict.update(loaded_dict)
model.load_state_dict(model_dict)
def load_model_wo_clip(model, state_dict): # missing_keys: in the current model but not found in the state_dict? # unexpected_keys: not in the current model but found inthe state_dict?
missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
# print(unexpected_keys)
assert len(unexpected_keys) == 0
assert all([k.startswith('clip_model.') for k in missing_keys])
### create model and diffusion ## #
def create_model_and_diffusion(args, data):
if args.dataset in ['motion_ours'] and args.rep_type in ["obj_base_rel_dist", "ambient_obj_base_rel_dist"]:
model = MDM_Ours(**get_model_args(args, data))
elif args.dataset in ['motion_ours'] and args.rep_type in ["obj_base_rel_dist_we"]:
model = MDM_Ours_V3(**get_model_args(args, data))
# MDM_Ours_V4
elif args.dataset in ['motion_ours'] and args.rep_type in ["obj_base_rel_dist_we_wj"]:
model = MDM_Ours_V4(**get_model_args(args, data))
# obj_base_rel_dist_we_wj_latents
elif args.dataset in ['motion_ours'] and args.rep_type in ["obj_base_rel_dist_we_wj_latents"]:
if args.diff_spatial:
if args.pred_joints_offset:
if args.diff_joint_quants:
model = MDM_Ours_V13(**get_model_args(args, data))
elif args.diff_hand_params:
model = MDM_Ours_V14(**get_model_args(args, data))
else:
if args.finetune_with_cond:
print(f"Using MDM ours V12!!!!")
model = MDM_Ours_V12(**get_model_args(args, data))
else:
print(f"Using MDM ours V10!!!!")
model = MDM_Ours_V10(**get_model_args(args, data))
else:
print(f"Using MDM ours V9!!!!")
model = MDM_Ours_V9(**get_model_args(args, data))
elif args.diff_latents:
print(f"Using MDM ours V11!!!!")
model = MDM_Ours_V11(**get_model_args(args, data))
elif args.use_sep_models:
if args.use_vae:
if args.pred_basejtsrel_avgjts:
print(f"Using MDM ours V8!!!!")
model = MDM_Ours_V8(**get_model_args(args, data))
else:
model = MDM_Ours_V7(**get_model_args(args, data))
else:
model = MDM_Ours_V6(**get_model_args(args, data))
else:
model = MDM_Ours_V5(**get_model_args(args, data))
else:
model = MDM(**get_model_args(args, data))
diffusion = create_gaussian_diffusion(args)
return model, diffusion
# give utils to models #
def get_model_args(args, data):
# default_args
clip_version = 'ViT-B/32'
action_emb = 'tensor' ## get model arguments ##
cond_mode = get_cond_mode(args)
if hasattr(data.dataset, 'num_actions'):
num_actions = data.dataset.num_actions
else:
num_actions = 1
# SMPL defaults
data_rep = 'rot6d'
njoints = 25
nfeats = 6
if args.dataset in ['humanml']: ## from
data_rep = 'hml_vec'
njoints = 263 # joints
nfeats = 1
elif args.dataset in ['motion_ours']:
data_rep = 'xyz'
njoints = 21
nfeats = 3
elif args.dataset == 'kit':
data_rep = 'hml_vec'
njoints = 251
nfeats = 1
## modeltype;
return {'modeltype': '', 'njoints': njoints, 'nfeats': nfeats, 'num_actions': num_actions,
'translation': True, 'pose_rep': 'rot6d', 'glob': True, 'glob_rot': True,
'latent_dim': args.latent_dim, 'ff_size': 1024, 'num_layers': args.layers, 'num_heads': 4,
'dropout': 0.1, 'activation': "gelu", 'data_rep': data_rep, 'cond_mode': cond_mode,
'cond_mask_prob': args.cond_mask_prob, 'action_emb': action_emb, 'arch': args.arch,
'emb_trans_dec': args.emb_trans_dec, 'clip_version': clip_version, 'dataset': args.dataset, 'args': args}
def optimize_sampled_hand_joints(sampled_joints, rel_base_pts_to_joints, dists_base_pts_to_joints, base_pts, base_normals):
# sampled_joints: bsz x ws x nnj x 3
# signed distances
# smoothness
bsz, ws, nnj = sampled_joints.shape[:3]
device = sampled_joints.device
coarse_lr = 0.1
num_iters = 100 # if i_iter > 0 else 1 ## nn-coarse-iters for global transformations #
mano_path = "/data1/sim/mano_models/mano/models"
base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
base_normals_exp = base_normals.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
signed_dist_e_coeff = 1.0
signed_dist_e_coeff = 0.0
### start optimization ###
# setup MANO layer
mano_layer = ManoLayer(
flat_hand_mean=True,
side='right',
mano_root=mano_path, # mano_path for the mano model #
ncomps=24,
use_pca=True,
root_rot_mode='axisang',
joint_rot_mode='axisang'
).to(device)
## random init variables ##
beta_var = torch.randn([bsz, 10]).to(device)
rot_var = torch.randn([bsz * ws, 3]).to(device)
theta_var = torch.randn([bsz * ws, 24]).to(device)
transl_var = torch.randn([bsz * ws, 3]).to(device)
beta_var.requires_grad_()
rot_var.requires_grad_()
theta_var.requires_grad_()
transl_var.requires_grad_()
opt = optim.Adam([rot_var, transl_var], lr=coarse_lr)
for i_iter in range(num_iters):
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
if dists_base_pts_to_joints is not None:
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).to(device).mean()
### === e2 the signed distances to nearest points should not be negative to the neareste === ###
## base_pts: bsz x nn_base_pts x 3
## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
## bsz x ws x nnj x nnb ##
''' strategy 2: use all base pts, rel, dists for resolving '''
# rel_base_pts_to_hand_joints: bsz x ws x nnj x nnb x 3 #
signed_dist_mask = signed_dist_base_pts_to_hand_joints < 0.
l2_dist_rel_joints_to_base_pts_mask = torch.sqrt(
torch.sum(rel_base_pts_to_hand_joints ** 2, dim=-1)
) < 0.05
signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_base_pts_mask.float()) > 1.5
dot_rel_with_normals = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# dot_rel_with_normals: bsz x ws x nnj x nnb
avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
signed_dist_e = dot_rel_with_normals * signed_dist_base_pts_to_hand_joints
signed_dist_e = torch.sum(
signed_dist_e[signed_dist_mask]
) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
###### ====== get loss for signed distances ==== ###
''' strategy 2: use all base pts, rel, dists for resolving '''
''' strategy 1: use nearest base pts, rel, dists for resolving '''
# dist_rhand_joints_to_base_pts = torch.sum(
# (hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
# )
# # minn_dists_idxes: bsz x ws x nnj -->
# minn_dists_to_base_pts, minn_dists_idxes = torch.min(
# dist_rhand_joints_to_base_pts, dim=-1
# )
# # base_pts: bsz x nn_base_pts x 3 #
# # base_pts: bsz x ws x nn_base_pts x 3 #
# # bsz x ws x nnj
# # object verts and object faces #
# ## other than the sampling process; not
# # bsz x ws x nnj x 3 ##
# nearest_base_pts = batched_index_select_ours(
# base_pts_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 # # base normalse #
# nearest_base_normals = batched_index_select_ours(
# base_normals_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 # # the nearest distance points may be of some ambiguous
# rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# # bsz x ws x nnj #
# signed_dist_joints_to_base_pts = torch.sum(
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
# # should not be negative
# signed_dist_mask = signed_dist_joints_to_base_pts < 0.
# l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# ) < 0.05
# signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
# ### ==== mean of signed distances ==== ###
# signed_dist_e = torch.sum( # penetration
# -1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
# ) / torch.clamp(
# torch.sum(signed_dist_mask.float()), min=1e-5
# ).item()
''' strategy 1: use nearest base pts, rel, dists for resolving '''
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
### signed distance coeff -> the distance coeff #
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
fine_lr = 0.1
num_iters = 1000
opt = optim.Adam([rot_var, transl_var, beta_var, theta_var], lr=fine_lr)
for i_iter in range(num_iters):
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
# dists_base_pts_to_joints ## dists_base_pts_to_joints ##
if dists_base_pts_to_joints is not None: ## dists_base_pts_to_joints ##
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).mean()
### === e2 the signed distances to nearest points should not be negative to the neareste === ###
## base_pts: bsz x nn_base_pts x 3
## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
## bsz x ws x nnj x nnb ##
dist_rhand_joints_to_base_pts = torch.sum(
(hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
)
# minn_dists_idxes: bsz x ws x nnj -->
minn_dists_to_base_pts, minn_dists_idxes = torch.min(
dist_rhand_joints_to_base_pts, dim=-1
)
# base_pts: bsz x nn_base_pts x 3 #
# base_pts: bsz x ws x nn_base_pts x 3 #
# bsz x ws x nnj
# base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
# bsz x ws x nnj x 3 ##
nearest_base_pts = batched_index_select_ours(
base_pts_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 #
nearest_base_normals = batched_index_select_ours(
base_normals_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 #
rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# bsz x ws x nnj #
signed_dist_joints_to_base_pts = torch.sum(
rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
)
# should not be negative
signed_dist_mask = signed_dist_joints_to_base_pts < 0.
l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
) < 0.05
signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
### ==== mean of signed distances ==== ###
signed_dist_e = torch.sum(
-1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
) / torch.clamp(
torch.sum(signed_dist_mask.float()), min=1e-5
).item()
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
### refine the optimization with signed energy ##
signed_dist_e_coeff = 1.0
fine_lr = 0.1
num_iters = 1000
opt = optim.Adam([rot_var, transl_var, beta_var, theta_var], lr=fine_lr)
for i_iter in range(num_iters):
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
# dists_base_pts_to_joints ## dists_base_pts_to_joints ##
if dists_base_pts_to_joints is not None: ## dists_base_pts_to_joints ##
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).mean()
''' strategy 2: use all base pts, rel, dists for resolving '''
# rel_base_pts_to_hand_joints: bsz x ws x nnj x nnb x 3 #
signed_dist_mask = signed_dist_base_pts_to_hand_joints < 0.
l2_dist_rel_joints_to_base_pts_mask = torch.sqrt(
torch.sum(rel_base_pts_to_hand_joints ** 2, dim=-1)
) < 0.05
signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_base_pts_mask.float()) > 1.5
## === dot rel with normals === ##
# dot_rel_with_normals = torch.sum(
# rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
# )
## === dot rel with normals === ##
## === dot rel with rel, strategy 3 === ##
dot_rel_with_normals = torch.sum(
-1.0 * rel_base_pts_to_hand_joints * rel_base_pts_to_hand_joints, dim=-1
)
## === dot rel with rel, strategy 3 === ##
signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# dot_rel_with_normals: bsz x ws x nnj x nnb
avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
signed_dist_e = dot_rel_with_normals * signed_dist_base_pts_to_hand_joints
signed_dist_e = torch.sum(
signed_dist_e[signed_dist_mask]
) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
###### ====== get loss for signed distances ==== ###
''' strategy 2: use all base pts, rel, dists for resolving '''
# hard projections for
''' strategy 1: use nearest base pts, rel, dists for resolving '''
# ### === e2 the signed distances to nearest points should not be negative to the neareste === ###
# ## base_pts: bsz x nn_base_pts x 3
# ## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
# ## bsz x ws x nnj x nnb ##
# dist_rhand_joints_to_base_pts = torch.sum(
# (hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
# )
# # minn_dists_idxes: bsz x ws x nnj -->
# minn_dists_to_base_pts, minn_dists_idxes = torch.min(
# dist_rhand_joints_to_base_pts, dim=-1
# )
#
# # base_pts: bsz x nn_base_pts x 3 #
# # base_pts: bsz x ws x nn_base_pts x 3 #
# # bsz x ws x nnj
# # base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
# # bsz x ws x nnj x 3 ##
# nearest_base_pts = batched_index_select_ours(
# base_pts_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 #
# nearest_base_normals = batched_index_select_ours(
# base_normals_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 #
# rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# # bsz x ws x nnj #
# signed_dist_joints_to_base_pts = torch.sum(
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
# # should not be negative
# signed_dist_mask = signed_dist_joints_to_base_pts < 0.
# ## === luojisiwei and others === ##
# # l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# # torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# # ) < 0.05
# ## === luojisiwei and others === ##
# l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# ) < 0.1
# signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
# ### ==== mean of signed distances ==== ###
# # signed_dist_e = torch.sum(
# # -1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
# # ) / torch.clamp(
# # torch.sum(signed_dist_mask.float()), min=1e-5
# # ).item()
# # signed_dist_joints_to_base_pts: bsz x ws x nnj # -> disstances
# signed_dist_joints_to_base_pts = signed_dist_joints_to_base_pts.detach()
# #
## penetraition resolving --- strategy
# dot_rel_with_normals = torch.sum(
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
# signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# # bsz x ws x nnj --> the loss term
# ## signed distances 3 #### isgned distance 3 ###
# ## dotrelwithnormals, ##
# # # signed_dist_mask -> the distances
# # dot_rel_with_normals: bsz x ws x nnj x nnb
# avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
# signed_dist_e = dot_rel_with_normals * signed_dist_joints_to_base_pts
# signed_dist_e = torch.sum(
# signed_dist_e[signed_dist_mask]
# ) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
# ###### ====== get loss for signed distances ==== ###
''' strategy 1: use nearest base pts, rel, dists for resolving '''
## judeg whether inside the object and only project those one inside of the object
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
# shoudl take a
# how to proejct the jvertex
# hwo to project the veretex
# weighted sum of the projectiondirection
# weights of each base point
# atraction field -> should be able to learn the penetration resolving strategy
# stochestic penetration resolving strategy #
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
# avg_masks
print('\tAvg masks: {}'.format(avg_masks.item()))
''' returning sampled_joints '''
sampled_joints = hand_joints
np.save("optimized_verts.npy", hand_verts.detach().cpu().numpy())
print(f"Optimized verts saved to optimized_verts.npy")
return sampled_joints.detach()
def get_obj_trimesh_list(obj_verts, obj_faces):
tot_trimeshes = []
tot_n = len(obj_verts)
for i_obj in range(tot_n):
cur_obj_verts, cur_obj_faces = obj_verts[i_obj], obj_faces[i_obj]
if isinstance(cur_obj_verts, torch.Tensor):
cur_obj_verts = cur_obj_verts.detach().cpu().numpy()
if isinstance(cur_obj_faces, torch.Tensor):
cur_obj_faces = cur_obj_faces.detach().cpu().numpy()
cur_obj_mesh = trimesh.Trimesh(vertices=cur_obj_verts, faces=cur_obj_faces,
process=False, use_embree=True)
tot_trimeshes.append(cur_obj_mesh)
return tot_trimeshes
def judge_penetrated_points(obj_mesh, subj_pts):
# bsz
tot_pts_inside_objmesh_labels = []
nn_bsz = len(obj_mesh)
for i_bsz in range(nn_bsz):
cur_obj_mesh = obj_mesh[i_bsz]
cur_subj_pts = subj_pts[i_bsz].detach().cpu().numpy()
ori_subj_pts_shape = cur_subj_pts.shape
if len(cur_subj_pts.shape) > 2:
cur_subj_pts = cur_subj_pts.reshape(cur_subj_pts.shape[0] * cur_subj_pts.shape[1], 3)
#
pts_inside_objmesh = cur_obj_mesh.contains(cur_subj_pts)
pts_inside_objmesh = pts_inside_objmesh.astype(np.float32)
### reshape inside_objmesh labels ###
pts_inside_objmesh = pts_inside_objmesh.reshape(*ori_subj_pts_shape[:-1])
tot_pts_inside_objmesh_labels.append(pts_inside_objmesh)
tot_pts_inside_objmesh_labels = np.stack(tot_pts_inside_objmesh_labels, axis=0) # nn_bsz x nn_subj_pts
tot_pts_inside_objmesh_labels = torch.from_numpy(tot_pts_inside_objmesh_labels).float()
return tot_pts_inside_objmesh_labels.to(subj_pts.device) # gt inside objmesh labels and to the pts device #
# TODO: other optimization strategies? e.g. sequential optimziation> #
def optimize_sampled_hand_joints_wobj(sampled_joints, rel_base_pts_to_joints, dists_base_pts_to_joints, base_pts, base_normals, obj_verts, obj_normals, obj_faces):
# sampled_joints: bsz x ws x nnj x 3
# signed distances
# smoothness
# tot_n_objs #
tot_obj_trimeshes = get_obj_trimesh_list(obj_verts, obj_faces)
## TODO: write the collect function for object verts, normals, faces ##
### A simple penetration resolving strategy is as follows:
#### 1) get vertices in the object; 2) get nearest base points (for simplicity); 3) project the vertex to the base point ####
## 1) for joints only;
## 2) for vertices;
## 3) for vertices ##
## TODO: optimzie the resolvign strategy stated above ##
bsz, ws, nnj = sampled_joints.shape[:3]
device = sampled_joints.device
coarse_lr = 0.1
num_iters = 100 # if i_iter > 0 else 1 ## nn-coarse-iters for global transformations #
mano_path = "/data1/sim/mano_models/mano/models"
# obj_verts: bsz x nnobjverts x
base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
base_normals_exp = base_normals.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
signed_dist_e_coeff = 1.0
signed_dist_e_coeff = 0.0
### start optimization ###
# setup MANO layer
mano_layer = ManoLayer(
flat_hand_mean=True,
side='right',
mano_root=mano_path, # mano_path for the mano model #
ncomps=24,
use_pca=True,
root_rot_mode='axisang',
joint_rot_mode='axisang'
).to(device)
## random init variables ##
beta_var = torch.randn([bsz, 10]).to(device)
rot_var = torch.randn([bsz * ws, 3]).to(device)
theta_var = torch.randn([bsz * ws, 24]).to(device)
transl_var = torch.randn([bsz * ws, 3]).to(device)
beta_var.requires_grad_()
rot_var.requires_grad_()
theta_var.requires_grad_()
transl_var.requires_grad_()
opt = optim.Adam([rot_var, transl_var], lr=coarse_lr)
for i_iter in range(num_iters):
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
if dists_base_pts_to_joints is not None:
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).to(device).mean()
### === e2 the signed distances to nearest points should not be negative to the neareste === ###
## base_pts: bsz x nn_base_pts x 3
## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
## bsz x ws x nnj x nnb ##
''' strategy 2: use all base pts, rel, dists for resolving '''
# rel_base_pts_to_hand_joints: bsz x ws x nnj x nnb x 3 #
signed_dist_mask = signed_dist_base_pts_to_hand_joints < 0.
l2_dist_rel_joints_to_base_pts_mask = torch.sqrt(
torch.sum(rel_base_pts_to_hand_joints ** 2, dim=-1)
) < 0.05
signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_base_pts_mask.float()) > 1.5
dot_rel_with_normals = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# dot_rel_with_normals: bsz x ws x nnj x nnb
avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
signed_dist_e = dot_rel_with_normals * signed_dist_base_pts_to_hand_joints
signed_dist_e = torch.sum(
signed_dist_e[signed_dist_mask]
) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
###### ====== get loss for signed distances ==== ###
''' strategy 2: use all base pts, rel, dists for resolving '''
''' strategy 1: use nearest base pts, rel, dists for resolving '''
# dist_rhand_joints_to_base_pts = torch.sum(
# (hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
# )
# # minn_dists_idxes: bsz x ws x nnj -->
# minn_dists_to_base_pts, minn_dists_idxes = torch.min(
# dist_rhand_joints_to_base_pts, dim=-1
# )
# # base_pts: bsz x nn_base_pts x 3 #
# # base_pts: bsz x ws x nn_base_pts x 3 #
# # bsz x ws x nnj
# # object verts and object faces #
# ## other than the sampling process; not
# # bsz x ws x nnj x 3 ##
# nearest_base_pts = batched_index_select_ours(
# base_pts_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 # # base normalse #
# nearest_base_normals = batched_index_select_ours(
# base_normals_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 # # the nearest distance points may be of some ambiguous
# rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# # bsz x ws x nnj #
# signed_dist_joints_to_base_pts = torch.sum(
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
# # should not be negative
# signed_dist_mask = signed_dist_joints_to_base_pts < 0.
# l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# ) < 0.05
# signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
# ### ==== mean of signed distances ==== ###
# signed_dist_e = torch.sum( # penetration
# -1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
# ) / torch.clamp(
# torch.sum(signed_dist_mask.float()), min=1e-5
# ).item()
''' strategy 1: use nearest base pts, rel, dists for resolving '''
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
### signed distance coeff -> the distance coeff #
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
fine_lr = 0.1
num_iters = 1000
opt = optim.Adam([rot_var, transl_var, beta_var, theta_var], lr=fine_lr)
for i_iter in range(num_iters):
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
# dists_base_pts_to_joints ## dists_base_pts_to_joints ##
if dists_base_pts_to_joints is not None: ## dists_base_pts_to_joints ##
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).mean()
### === e2 the signed distances to nearest points should not be negative to the neareste === ###
## base_pts: bsz x nn_base_pts x 3
## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
## bsz x ws x nnj x nnb ##
dist_rhand_joints_to_base_pts = torch.sum(
(hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
)
# minn_dists_idxes: bsz x ws x nnj -->
minn_dists_to_base_pts, minn_dists_idxes = torch.min(
dist_rhand_joints_to_base_pts, dim=-1
)
# base_pts: bsz x nn_base_pts x 3 #
# base_pts: bsz x ws x nn_base_pts x 3 #
# bsz x ws x nnj
# base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
# bsz x ws x nnj x 3 ##
nearest_base_pts = batched_index_select_ours(
base_pts_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 #
nearest_base_normals = batched_index_select_ours(
base_normals_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 #
rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# bsz x ws x nnj #
signed_dist_joints_to_base_pts = torch.sum(
rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
)
# should not be negative
signed_dist_mask = signed_dist_joints_to_base_pts < 0.
l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
) < 0.05
signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
### ==== mean of signed distances ==== ###
signed_dist_e = torch.sum(
-1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
) / torch.clamp(
torch.sum(signed_dist_mask.float()), min=1e-5
).item()
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
# tot_obj_trimeshes
### refine the optimization with signed energy ##
signed_dist_e_coeff = 1.0 #
fine_lr = 0.1
# num_iters = 1000 #
num_iters = 100 # reinement #
opt = optim.Adam([rot_var, transl_var, beta_var, theta_var], lr=fine_lr)
for i_iter in range(num_iters): #
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
# dists_base_pts_to_joints ## dists_base_pts_to_joints ##
if dists_base_pts_to_joints is not None: ## dists_base_pts_to_joints ##
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).mean()
''' strategy 2: use all base pts, rel, dists for resolving '''
# # rel_base_pts_to_hand_joints: bsz x ws x nnj x nnb x 3 #
# signed_dist_mask = signed_dist_base_pts_to_hand_joints < 0.
# l2_dist_rel_joints_to_base_pts_mask = torch.sqrt(
# torch.sum(rel_base_pts_to_hand_joints ** 2, dim=-1)
# ) < 0.05
# signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_base_pts_mask.float()) > 1.5
# ## === dot rel with normals === ##
# # dot_rel_with_normals = torch.sum(
# # rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
# # )
# ## === dot rel with normals === ##
# ## === dot rel with rel, strategy 3 === ##
# dot_rel_with_normals = torch.sum(
# -1.0 * rel_base_pts_to_hand_joints * rel_base_pts_to_hand_joints, dim=-1
# )
# ## === dot rel with rel, strategy 3 === ##
# signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# # dot_rel_with_normals: bsz x ws x nnj x nnb
# avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
# signed_dist_e = dot_rel_with_normals * signed_dist_base_pts_to_hand_joints
# signed_dist_e = torch.sum(
# signed_dist_e[signed_dist_mask]
# ) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
# ###### ====== get loss for signed distances ==== ###
''' strategy 2: use all base pts, rel, dists for resolving '''
## use all base pts ##
{
# hard projections for
''' strategy 1: use nearest base pts, rel, dists for resolving '''
# ### === e2 the signed distances to nearest points should not be negative to the neareste === ###
# ## base_pts: bsz x nn_base_pts x 3
# ## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
# ## bsz x ws x nnj x nnb ##
# dist_rhand_joints_to_base_pts = torch.sum(
# (hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
# )
# # minn_dists_idxes: bsz x ws x nnj -->
# minn_dists_to_base_pts, minn_dists_idxes = torch.min(
# dist_rhand_joints_to_base_pts, dim=-1
# )
#
# # base_pts: bsz x nn_base_pts x 3 #
# # base_pts: bsz x ws x nn_base_pts x 3 #
# # bsz x ws x nnj
# # base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
# # bsz x ws x nnj x 3 ##
# nearest_base_pts = batched_index_select_ours(
# base_pts_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 #
# nearest_base_normals = batched_index_select_ours(
# base_normals_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 #
# rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# # bsz x ws x nnj #
# signed_dist_joints_to_base_pts = torch.sum(
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
# # should not be negative
# signed_dist_mask = signed_dist_joints_to_base_pts < 0.
# ## === luojisiwei and others === ##
# # l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# # torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# # ) < 0.05
# ## === luojisiwei and others === ##
# l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# ) < 0.1
# signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
# ### ==== mean of signed distances ==== ###
# # signed_dist_e = torch.sum(
# # -1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
# # ) / torch.clamp(
# # torch.sum(signed_dist_mask.float()), min=1e-5
# # ).item()
# # signed_dist_joints_to_base_pts: bsz x ws x nnj # -> disstances
# signed_dist_joints_to_base_pts = signed_dist_joints_to_base_pts.detach()
# #
## penetraition resolving --- strategy
# dot_rel_with_normals = torch.sum(
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
# signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# # bsz x ws x nnj --> the loss term
# ## signed distances 3 #### isgned distance 3 ###
# ## dotrelwithnormals, ##
# # # signed_dist_mask -> the distances
# # dot_rel_with_normals: bsz x ws x nnj x nnb
# avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
# signed_dist_e = dot_rel_with_normals * signed_dist_joints_to_base_pts
# signed_dist_e = torch.sum(
# signed_dist_e[signed_dist_mask]
# ) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
# ###### ====== get loss for signed distances ==== ###
''' strategy 1: use nearest base pts, rel, dists for resolving '''
}
# bsz x ws x nnj # --> objmesh insides pts labels
pts_inside_objmesh_labels = judge_penetrated_points(tot_obj_trimeshes, hand_joints)
pts_inside_objmesh_labels_mask = pts_inside_objmesh_labels.bool()
# {
# hard projections for
''' strategy 1: use nearest base pts, rel, dists for resolving '''
### === e2 the signed distances to nearest points should not be negative to the neareste === ###
## base_pts: bsz x nn_base_pts x 3
## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
## bsz x ws x nnj x nnb ##
dist_rhand_joints_to_base_pts = torch.sum(
(hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
)
# minn_dists_idxes: bsz x ws x nnj -->
# base_pts
minn_dists_to_base_pts, minn_dists_idxes = torch.min(
dist_rhand_joints_to_base_pts, dim=-1
)
# base_pts: bsz x nn_base_pts x 3 #
# base_pts: bsz x ws x nn_base_pts x 3 #
# bsz x ws x nnj
# base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
# bsz x ws x nnj x 3 ##
# simple penetration ##
nearest_base_pts = batched_index_select_ours(
base_pts_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 #
nearest_base_normals = batched_index_select_ours(
base_normals_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 #
rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# bsz x ws x nnj #
# signed_dist_joints_to_base_pts = torch.sum(
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
# # should not be negative
# signed_dist_mask = signed_dist_joints_to_base_pts < 0.
## === luojisiwei and others === ##
# l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# ) < 0.05
## === luojisiwei and others === ##
##### ===== GET l2_distance mask ===== #####
# l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# ) < 0.1
# signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
##### ===== GET l2_distance mask ===== #####
### ==== mean of signed distances ==== ###
# signed_dist_e = torch.sum(
# -1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
# ) / torch.clamp(
# torch.sum(signed_dist_mask.float()), min=1e-5
# ).item()
# signed_dist_joints_to_base_pts: bsz x ws x nnj # -> disstances
signed_dist_joints_to_base_pts = signed_dist_joints_to_base_pts.detach()
#
# dot rel
# penetraition resolving --- strategy
# dot_rel_with_normals = torch.sum( # dot rhand joints with normals #
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
#
dot_rel_with_normals = torch.sum( # dot rhand joints with normals #
-rel_joints_to_nearest_base_pts * rel_joints_to_nearest_base_pts, dim=-1
)
#### Get masks for penetrated joint points ####
# signed_dist_mask = (signed_dist_mask.float() + pts_inside_objmesh_labels_mask.float()) > 1.5
signed_dist_mask = pts_inside_objmesh_labels_mask
# bsz x ws x nnj
signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# bsz x ws x nnj --> the loss term
## signed distances 3 #### isgned distance 3 ###
## dotrelwithnormals, ##
# # signed_dist_mask -> the distances
# dot_rel_with_normals: bsz x ws x nnj x nnb # avg over windows and batches #
avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
## get singed distance energies ### ## projection ##
# signed_dist_e = dot_rel_with_normals * signed_dist_joints_to_base_pts
signed_dist_e = -1.0 * dot_rel_with_normals
signed_dist_e = torch.sum(
signed_dist_e[signed_dist_mask]
) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
###### ====== get loss for signed distances ==== ###
''' strategy 1: use nearest base pts, rel, dists for resolving '''
# cannot mask in some caes
# change of isgned distances #
# intersection spline
## judeg whether inside the object and only project those one inside of the object
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
#### ==== sv_dict ==== ####
sv_dict = {
'pts_inside_objmesh_labels_mask': pts_inside_objmesh_labels_mask.detach().cpu().numpy(),
'hand_joints': hand_joints.detach().cpu().numpy(),
'obj_verts': [cur_verts.detach().cpu().numpy() for cur_verts in obj_verts],
'obj_faces': [cur_faces.detach().cpu().numpy() for cur_faces in obj_faces],
'base_pts': base_pts.detach().cpu().numpy(),
'base_normals': base_normals.detach().cpu().numpy(), # bsz x nnb x 3 -> bsz x nnb x 3 -> base normals #
'nearest_base_pts': nearest_base_pts.detach().cpu().numpy(), # bsz x ws x nnj x 3 #
'nearest_base_normals': nearest_base_normals.detach().cpu().numpy(), # bsz x ws x nnj x 3 --> base normals and pts
}
#
sv_dict_folder = "/data1/sim/mdm/tmp_saving"
os.makedirs(sv_dict_folder, exist_ok=True)
sv_dict_fn = os.path.join(sv_dict_folder, f"optim_iter_{i_iter}.npy")
np.save(sv_dict_fn, sv_dict)
print(f"Obj and subj saved to {sv_dict_fn}")
#### ==== sv_dict ==== ####
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
# shoudl take a
# how to proejct the jvertex
# hwo to project the veretex
# weighted sum of the projectiondirection
# weights of each base point
# atraction field -> should be able to learn the penetration resolving strategy
# stochestic penetration resolving strategy #
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
# avg_masks
print('\tAvg masks: {}'.format(avg_masks.item()))
''' returning sampled_joints '''
sampled_joints = hand_joints
np.save("optimized_verts.npy", hand_verts.detach().cpu().numpy())
print(f"Optimized verts saved to optimized_verts.npy")
return sampled_joints.detach()
# TODO: other optimization strategies? e.g. sequential optimziation> #
def optimize_sampled_hand_joints_wobj_v2(sampled_joints, rel_base_pts_to_joints, dists_base_pts_to_joints, base_pts, base_normals, obj_verts, obj_normals, obj_faces):
# sampled_joints: bsz x ws x nnj x 3 #
# sampled_joints: bsz x ws x nnj x 3 # obj trimeshes #
tot_obj_trimeshes = get_obj_trimesh_list(obj_verts, obj_faces)
## TODO: write the collect function for object verts, normals, faces ##
### A simple penetration resolving strategy is as follows:
#### 1) get vertices in the object; 2) get nearest base points (for simplicity); 3) project the vertex to the base point ####
## 1) for joints only;
## 2) for vertices;
## 3) for vertices;
## TODO: optimzie the resolvign strategy stated above ##
bsz, ws, nnj = sampled_joints.shape[:3]
device = sampled_joints.device
coarse_lr = 0.1
num_iters = 100 # if i_iter > 0 else 1 ## nn-coarse-iters for global transformations #
mano_path = "/data1/sim/mano_models/mano/models"
# obj_verts: bsz x nnobjverts x
base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
base_normals_exp = base_normals.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
signed_dist_e_coeff = 1.0
signed_dist_e_coeff = 0.0
### start optimization ###
# setup MANO layer
mano_layer = ManoLayer(
flat_hand_mean=True,
side='right',
mano_root=mano_path, # mano_path for the mano model #
ncomps=24,
use_pca=True,
root_rot_mode='axisang',
joint_rot_mode='axisang'
).to(device)
## random init variables ##
beta_var = torch.randn([bsz, 10]).to(device)
rot_var = torch.randn([bsz * ws, 3]).to(device)
theta_var = torch.randn([bsz * ws, 24]).to(device)
transl_var = torch.randn([bsz * ws, 3]).to(device)
beta_var.requires_grad_()
rot_var.requires_grad_()
theta_var.requires_grad_()
transl_var.requires_grad_()
opt = optim.Adam([rot_var, transl_var], lr=coarse_lr)
for i_iter in range(num_iters):
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
if dists_base_pts_to_joints is not None:
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).to(device).mean()
### === e2 the signed distances to nearest points should not be negative to the neareste === ###
## base_pts: bsz x nn_base_pts x 3
## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
## bsz x ws x nnj x nnb ##
''' strategy 2: use all base pts, rel, dists for resolving '''
# rel_base_pts_to_hand_joints: bsz x ws x nnj x nnb x 3 #
signed_dist_mask = signed_dist_base_pts_to_hand_joints < 0.
l2_dist_rel_joints_to_base_pts_mask = torch.sqrt(
torch.sum(rel_base_pts_to_hand_joints ** 2, dim=-1)
) < 0.05
signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_base_pts_mask.float()) > 1.5
dot_rel_with_normals = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# dot_rel_with_normals: bsz x ws x nnj x nnb
avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
signed_dist_e = dot_rel_with_normals * signed_dist_base_pts_to_hand_joints
signed_dist_e = torch.sum(
signed_dist_e[signed_dist_mask]
) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
###### ====== get loss for signed distances ==== ###
''' strategy 2: use all base pts, rel, dists for resolving '''
''' strategy 1: use nearest base pts, rel, dists for resolving '''
# dist_rhand_joints_to_base_pts = torch.sum(
# (hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
# )
# # minn_dists_idxes: bsz x ws x nnj -->
# minn_dists_to_base_pts, minn_dists_idxes = torch.min(
# dist_rhand_joints_to_base_pts, dim=-1
# )
# # base_pts: bsz x nn_base_pts x 3 #
# # base_pts: bsz x ws x nn_base_pts x 3 #
# # bsz x ws x nnj
# # object verts and object faces #
# ## other than the sampling process; not
# # bsz x ws x nnj x 3 ##
# nearest_base_pts = batched_index_select_ours(
# base_pts_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 # # base normalse #
# nearest_base_normals = batched_index_select_ours(
# base_normals_exp, indices=minn_dists_idxes, dim=2
# )
# # bsz x ws x nnj x 3 # # the nearest distance points may be of some ambiguous
# rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# # bsz x ws x nnj #
# signed_dist_joints_to_base_pts = torch.sum(
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
# # should not be negative
# signed_dist_mask = signed_dist_joints_to_base_pts < 0.
# l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
# torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
# ) < 0.05
# signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
# ### ==== mean of signed distances ==== ###
# signed_dist_e = torch.sum( # penetration
# -1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
# ) / torch.clamp(
# torch.sum(signed_dist_mask.float()), min=1e-5
# ).item()
''' strategy 1: use nearest base pts, rel, dists for resolving '''
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
### signed distance coeff -> the distance coeff #
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
fine_lr = 0.1
num_iters = 1000
opt = optim.Adam([rot_var, transl_var, beta_var, theta_var], lr=fine_lr)
for i_iter in range(num_iters):
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
# dists_base_pts_to_joints ## dists_base_pts_to_joints ##
if dists_base_pts_to_joints is not None: ## dists_base_pts_to_joints ##
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).mean()
### === e2 the signed distances to nearest points should not be negative to the neareste === ###
## base_pts: bsz x nn_base_pts x 3
## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
## bsz x ws x nnj x nnb ##
dist_rhand_joints_to_base_pts = torch.sum(
(hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
)
# minn_dists_idxes: bsz x ws x nnj -->
minn_dists_to_base_pts, minn_dists_idxes = torch.min(
dist_rhand_joints_to_base_pts, dim=-1
)
# base_pts: bsz x nn_base_pts x 3 #
# base_pts: bsz x ws x nn_base_pts x 3 #
# bsz x ws x nnj
# base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
# bsz x ws x nnj x 3 ##
nearest_base_pts = batched_index_select_ours(
base_pts_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 #
nearest_base_normals = batched_index_select_ours(
base_normals_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 #
rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# bsz x ws x nnj #
signed_dist_joints_to_base_pts = torch.sum(
rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
)
# should not be negative
signed_dist_mask = signed_dist_joints_to_base_pts < 0.
l2_dist_rel_joints_to_nearest_base_pts_mask = torch.sqrt(
torch.sum(rel_joints_to_nearest_base_pts ** 2, dim=-1)
) < 0.05
signed_dist_mask = (signed_dist_mask.float() + l2_dist_rel_joints_to_nearest_base_pts_mask.float()) > 1.5
### ==== mean of signed distances ==== ###
signed_dist_e = torch.sum(
-1.0 * signed_dist_joints_to_base_pts[signed_dist_mask]
) / torch.clamp(
torch.sum(signed_dist_mask.float()), min=1e-5
).item()
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
# tot_obj_trimeshes
### refine the optimization with signed energy ##
#
# signed_dist_jts_to_nearest_base_pts = []
# tot_nearest_base_pts = []
# tot_nearest_base_normals = []
signed_dist_e_coeff = 1.0 #
fine_lr = 0.1
# num_iters = 1000 #
num_iters = 100 # reinement #
opt = optim.Adam([rot_var, transl_var, beta_var, theta_var], lr=fine_lr)
for i_iter in range(num_iters): #
opt.zero_grad()
# mano_layer #
hand_verts, hand_joints = mano_layer(torch.cat([rot_var, theta_var], dim=-1),
beta_var.unsqueeze(1).repeat(1, ws, 1).view(-1, 10), transl_var)
hand_verts = hand_verts.view(bsz, ws, 778, 3) * 0.001 ## bsz x ws x nn
hand_joints = hand_joints.view(bsz, ws, -1, 3) * 0.001
### === e1 should be close to predicted values === ###
# bsz x ws x nnj x nnb x 3 #
rel_base_pts_to_hand_joints = hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
# bs zx ws x nnj x nnb #
signed_dist_base_pts_to_hand_joints = torch.sum(
rel_base_pts_to_hand_joints * base_normals.unsqueeze(1).unsqueeze(1), dim=-1
)
rel_e = torch.sum(
(rel_base_pts_to_hand_joints - rel_base_pts_to_joints) ** 2, dim=-1
).mean()
# dists_base_pts_to_joints ## dists_base_pts_to_joints ##
if dists_base_pts_to_joints is not None: ## dists_base_pts_to_joints ##
dist_e = torch.sum(
(signed_dist_base_pts_to_hand_joints - dists_base_pts_to_joints) ** 2, dim=-1
).mean()
else:
dist_e = torch.zeros((1,), dtype=torch.float32).mean()
### ==== inside the objemesh labels ==== ###
# bsz x ws x nnj # --> objmesh insides pts labels #
pts_inside_objmesh_labels = judge_penetrated_points(tot_obj_trimeshes, hand_joints)
pts_inside_objmesh_labels_mask = pts_inside_objmesh_labels.bool()
# {
# hard projections for
''' strategy 1: use nearest base pts, rel, dists for resolving '''
### === e2 the signed distances to nearest points should not be negative to the neareste === ###
## base_pts: bsz x nn_base_pts x 3
## bsz x ws x nnj x 1 x 3 -- bsz x 1 x 1 x nnb x 3 ##
## bsz x ws x nnj x nnb ##
dist_rhand_joints_to_base_pts = torch.sum(
(hand_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)) ** 2, dim=-1
)
# minn_dists_idxes: bsz x ws x nnj #
# base_pts
minn_dists_to_base_pts, minn_dists_idxes = torch.min(
dist_rhand_joints_to_base_pts, dim=-1
)
# base_pts: bsz x nn_base_pts x 3 #
# base_pts: bsz x ws x nn_base_pts x 3 #
# bsz x ws x nnj
# base_pts_exp = base_pts.unsqueeze(1).repeat(1, ws, 1, 1).contiguous()
# bsz x ws x nnj x 3 ##
# simple penetration ##
nearest_base_pts = batched_index_select_ours(
base_pts_exp, indices=minn_dists_idxes, dim=2
)
# bsz x ws x nnj x 3 # #
nearest_base_normals = batched_index_select_ours(
base_normals_exp, indices=minn_dists_idxes, dim=2
)
tot_masks = []
tot_base_pts = []
tot_base_normals = []
tot_base_signed_dists = []
## === nearest base pts === ##
for i_bsz in range(nearest_base_pts.size(0)):
# masks, base_pts, base_normals for each frame here
# cur_bsz_
cur_bsz_masks = [pts_inside_objmesh_labels_mask[i_bsz][0]]
cur_bsz_base_pts = [nearest_base_pts[i_bsz][0]]
cur_bsz_base_normals = [nearest_base_normals[i_bsz][0]]
# nnjts #
## st frame signed dist ##
cur_bsz_st_frame_signed_dist = torch.sum(
(hand_joints[i_bsz][0] - cur_bsz_base_pts[0]) * cur_bsz_base_normals[0], dim=-1
)
cur_bsz_signed_dist = [cur_bsz_st_frame_signed_dist]
for i_fr in range(1, nearest_base_pts.size(1)):
cur_bsz_cur_fr_jts = hand_joints[i_bsz][i_fr]
# cur_bsz_cur_fr_base_pts = nearest_base_pts
# cur_fr_jts -
cur_bsz_cur_fr_prev_fr_signed_dist = torch.sum(
(cur_bsz_cur_fr_jts - cur_bsz_base_pts[-1]) * cur_bsz_base_normals[-1], dim=-1
)
# nnjts # cur
cur_bsz_cur_fr_mask = ((cur_bsz_signed_dist[-1] >= 0.).float() + (cur_bsz_cur_fr_prev_fr_signed_dist < 0.).float()) > 1.5
cur_bsz_cur_fr_base_pts = nearest_base_pts[i_bsz][i_fr].clone()
cur_bsz_cur_fr_base_pts[cur_bsz_cur_fr_mask] = cur_bsz_base_pts[-1][cur_bsz_cur_fr_mask]
cur_bsz_cur_fr_base_normals = nearest_base_normals[i_bsz][i_fr].clone()
# ### curbsz curfr base normals; ### #
cur_bsz_cur_fr_base_normals[cur_bsz_cur_fr_mask] = cur_bsz_base_normals[-1][cur_bsz_cur_fr_mask]
cur_bsz_cur_fr_signed_dist = torch.sum(
(cur_bsz_cur_fr_jts - cur_bsz_cur_fr_base_pts) * cur_bsz_cur_fr_base_normals, dim=-1
)
cur_bsz_cur_fr_signed_dist[cur_bsz_cur_fr_mask] = 0. # ot the bes points
### for masks ###
cur_bsz_masks.append(cur_bsz_cur_fr_mask)
cur_bsz_base_pts.append(cur_bsz_cur_fr_base_pts)
cur_bsz_base_normals.append(cur_bsz_cur_fr_base_normals)
#
cur_bsz_masks = torch.stack(cur_bsz_masks, dim=0)
cur_bsz_base_pts = torch.stack(cur_bsz_base_pts, dim=0)
cur_bsz_base_normals = torch.stack(cur_bsz_base_normals, dim=0)
cur_bsz_signed_dist = torch.stack(cur_bsz_signed_dist, dim=0)
tot_masks.append(cur_bsz_masks)
tot_base_pts.append(cur_bsz_base_pts)
tot_base_normals.append(cur_bsz_base_normals)
tot_base_signed_dists.append(cur_bsz_signed_dist)
# masks;
tot_masks = torch.stack(tot_masks, dim=0)
tot_base_pts = torch.stack(tot_base_pts, dim=0)
tot_base_normals = torch.stack(tot_base_normals, dim=0)
tot_base_signed_dists = torch.stack(tot_base_signed_dists, dim=0)
#
nearest_base_pts = tot_base_pts.clone() # tot base pts
nearest_base_normals = tot_base_normals.clone()
pts_inside_objmesh_labels_mask = tot_masks.clone()
# if len()
# bsz x ws x nnj x 3 #
rel_joints_to_nearest_base_pts = hand_joints - nearest_base_pts
# signed_dist_joints_to_base_pts: bsz x ws x nnj # -> disstances
# signed_dist_joints_to_base_pts = signed_dist_joints_to_base_pts.detach()
#
# dot rel
# penetraition resolving --- strategy
# dot_rel_with_normals = torch.sum( # dot rhand joints with normals #
# rel_joints_to_nearest_base_pts * nearest_base_normals, dim=-1
# )
#
dot_rel_with_normals = torch.sum( # dot rhand joints with normals #
-rel_joints_to_nearest_base_pts * rel_joints_to_nearest_base_pts, dim=-1
)
#### Get masks for penetrated joint points ####
# signed_dist_mask = (signed_dist_mask.float() + pts_inside_objmesh_labels_mask.float()) > 1.5
signed_dist_mask = pts_inside_objmesh_labels_mask
# bsz x ws x nnj
signed_dist_mask = signed_dist_mask.detach() # detach the mask #
# bsz x ws x nnj --> the loss term
## signed distances 3 #### isgned distance 3 ###
## dotrelwithnormals, ##
# # signed_dist_mask -> the distances
# dot_rel_with_normals: bsz x ws x nnj x nnb # avg over windows and batches #
avg_masks = (signed_dist_mask.float()).sum(dim=-1).mean()
## get singed distance energies ### ## projection ##
# signed_dist_e = dot_rel_with_normals * signed_dist_joints_to_base_pts
### dot_rel_with_normals -->
signed_dist_e = -1.0 * dot_rel_with_normals
signed_dist_e = torch.sum(
signed_dist_e[signed_dist_mask]
) / torch.clamp(torch.sum(signed_dist_mask.float()), min=1e-5).item()
###### ====== get loss for signed distances ==== ###
''' strategy 1: use nearest base pts, rel, dists for resolving '''
# cannot mask in some caes
# change of isgned distances #
# intersection spline
## judeg whether inside the object and only project those one inside of the object
## === e3 smoothness and prior losses === ##
pose_smoothness_loss = F.mse_loss(theta_var.view(bsz, ws, -1)[:, 1:], theta_var.view(bsz, ws, -1)[:, :-1])
shape_prior_loss = torch.mean(beta_var**2)
pose_prior_loss = torch.mean(theta_var**2)
## === e3 smoothness and prior losses === ##
# points to object vertices
#### ==== sv_dict ==== ####
sv_dict = {
'pts_inside_objmesh_labels_mask': pts_inside_objmesh_labels_mask.detach().cpu().numpy(),
'hand_joints': hand_joints.detach().cpu().numpy(),
'obj_verts': [cur_verts.detach().cpu().numpy() for cur_verts in obj_verts],
'obj_faces': [cur_faces.detach().cpu().numpy() for cur_faces in obj_faces],
'base_pts': base_pts.detach().cpu().numpy(),
'base_normals': base_normals.detach().cpu().numpy(), # bsz x nnb x 3 -> bsz x nnb x 3 -> base normals #
'nearest_base_pts': nearest_base_pts.detach().cpu().numpy(), # bsz x ws x nnj x 3 #
'nearest_base_normals': nearest_base_normals.detach().cpu().numpy(), # bsz x ws x nnj x 3 --> base normals and pts
}
#
sv_dict_folder = "/data1/sim/mdm/tmp_saving"
os.makedirs(sv_dict_folder, exist_ok=True)
sv_dict_fn = os.path.join(sv_dict_folder, f"optim_iter_{i_iter}.npy")
np.save(sv_dict_fn, sv_dict)
print(f"Obj and subj saved to {sv_dict_fn}")
#### ==== sv_dict ==== ####
## === e4 hand joints should be close to sampled hand joints === ##
dist_dec_jts_to_sampled_pts = torch.sum(
(hand_joints - sampled_joints) ** 2, dim=-1
).mean()
# shoudl take a
# how to proejct the jvertex
# hwo to project the veretex
# weighted sum of the projectiondirection
# weights of each base point
# atraction field -> should be able to learn the penetration resolving strategy
# stochestic penetration resolving strategy #
loss = pose_smoothness_loss * 0.05 + shape_prior_loss*0.001 + pose_prior_loss * 0.0001 + signed_dist_e * signed_dist_e_coeff + rel_e + dist_e + dist_dec_jts_to_sampled_pts
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tShape Prior Loss: {}'.format(shape_prior_loss.item()))
print('\tPose Prior Loss: {}'.format(pose_prior_loss.item()))
print('\tPose Smoothness Loss: {}'.format(pose_smoothness_loss.item()))
print('\tsigned_dist_e Loss: {}'.format(signed_dist_e.item()))
print('\trel_e Loss: {}'.format(rel_e.item()))
print('\tdist_e Loss: {}'.format(dist_e.item()))
print('\tdist_dec_jts_to_sampled_pts Loss: {}'.format(dist_dec_jts_to_sampled_pts.item()))
# avg_masks
print('\tAvg masks: {}'.format(avg_masks.item()))
''' returning sampled_joints '''
sampled_joints = hand_joints
np.save("optimized_verts.npy", hand_verts.detach().cpu().numpy())
print(f"Optimized verts saved to optimized_verts.npy")
return sampled_joints.detach()
##
def create_gaussian_diffusion(args): ## create guassian diffusion ##
# default params
predict_xstart = True # we always predict x_start (a.k.a. x0), that's our deal!
steps = 1000 #
scale_beta = 1. # no scaling
timestep_respacing = '' # can be used for ddim sampling, we don't use it.
learn_sigma = False # learn sigma #
rescale_timesteps = False
## noose schedule; steps; scale_beta ## ## MSE ##
betas = gd.get_named_beta_schedule(args.noise_schedule, steps, scale_beta)
loss_type = gd.LossType.MSE
if not timestep_respacing:
timestep_respacing = [steps]
print(f"dataset: {args.dataset}, rep_type: {args.rep_type}")
if args.dataset in ['motion_ours'] and args.rep_type in ["obj_base_rel_dist", "ambient_obj_base_rel_dist"]:
print(f"here! dataset: {args.dataset}, rep_type: {args.rep_type}")
cur_spaced_diffusion_model = SpacedDiffusion_Ours
# SpacedDiffusion_OursV2
elif args.dataset in ['motion_ours'] and args.rep_type in ["obj_base_rel_dist_we"]:
cur_spaced_diffusion_model = SpacedDiffusion_OursV2
elif args.dataset in ['motion_ours'] and args.rep_type in ["obj_base_rel_dist_we_wj"]:
cur_spaced_diffusion_model = SpacedDiffusion_OursV3
# SpacedDiffusion_OursV4
elif args.dataset in ['motion_ours'] and args.rep_type in ["obj_base_rel_dist_we_wj_latents"]:
if args.diff_joint_quants:
cur_spaced_diffusion_model = SpacedDiffusion_OursV7
elif args.diff_hand_params:
cur_spaced_diffusion_model = SpacedDiffusion_OursV9
else:
if args.diff_spatial:
cur_spaced_diffusion_model = SpacedDiffusion_OursV5
elif args.diff_latents:
cur_spaced_diffusion_model = SpacedDiffusion_OursV6
else:
cur_spaced_diffusion_model = SpacedDiffusion_OursV4
else:
cur_spaced_diffusion_model = SpacedDiffusion
### ==== predict xstart other than the noise in the model === ###
return cur_spaced_diffusion_model(
use_timesteps=space_timesteps(steps, timestep_respacing),
betas=betas,
model_mean_type=(
gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X
),
model_var_type=( ## use fixed sigmas / variances ##
(
gd.ModelVarType.FIXED_LARGE
if not args.sigma_small
else gd.ModelVarType.FIXED_SMALL # fixed small #
)
if not learn_sigma ## use learned sigmas ##
else gd.ModelVarType.LEARNED_RANGE
), ## modelvartype ##
loss_type=loss_type, ## loss_type ##
rescale_timesteps=rescale_timesteps,
lambda_vel=args.lambda_vel,
lambda_rcxyz=args.lambda_rcxyz, ## lambda
lambda_fc=args.lambda_fc,
# motion_to_rep
denoising_stra=args.denoising_stra,
inter_optim=args.inter_optim,
args=args,
)
### from decoded energies to optimized joints ###
## latent variables ##
## encoded energies ## from energies calculated from perturbed energies ##
## decoded energies should also match the clean energy term ##
## and those values should be all denormed ##
def optimize_joints_according_to_e(dec_joints, base_pts, base_normals, dec_e):
# dec_e_along_normals: bsz x (ws - 1) x nnj x nnb
dec_e_along_normals = dec_e['dec_e_along_normals']
# dec_e_vt_normals: bsz x (ws - 1) x nnj x nnb
dec_e_vt_normals = dec_e['dec_e_vt_normals']
nn_iters = 10
coarse_lr = 0.001
dec_joints.requires_grad_()
opt = optim.Adam([dec_joints], lr=coarse_lr)
for i_iter in range(nn_iters):
# dec_joints: bsz x ws x nnj x 3
# base_pts: bsz x nnb x 3
k_f = 1.
# bsz x ws x nnj x nnb x 3 #
denormed_rel_base_pts_to_rhand_joints = dec_joints.unsqueeze(-2) - base_pts.unsqueeze(1).unsqueeze(1)
k_f = 1. ## l2 rel base pts to pert rhand joints ##
# l2_rel_base_pts_to_pert_rhand_joints: bsz x nf x nnj x nnb #
l2_rel_base_pts_to_pert_rhand_joints = torch.norm(denormed_rel_base_pts_to_rhand_joints, dim=-1)
### att_forces ##
att_forces = torch.exp(-k_f * l2_rel_base_pts_to_pert_rhand_joints) # bsz x nf x nnj x nnb #
# bsz x (ws - 1) x nnj x nnb #
att_forces = att_forces[:, :-1, :, :] # attraction forces -1 #
# rhand_joints: ws x nnj x 3 # -> (ws - 1) x nnj x 3 ## rhand_joints ##
# bsz x (ws - 1) x nnj x 3 --> displacements s#
denormed_rhand_joints_disp = dec_joints[:, 1:, :, :] - dec_joints[:, :-1, :, :]
# distance -- base_normalss,; (ws - 1) x nnj x nnb x 3 --> bsz x (ws - 1) x nnj x nnb #
# signed_dist_base_pts_to_pert_rhand_joints_along_normal # bsz x (ws - 1) x nnj x nnb #
signed_dist_base_pts_to_rhand_joints_along_normal = torch.sum(
base_normals.unsqueeze(1).unsqueeze(1) * denormed_rhand_joints_disp.unsqueeze(-2), dim=-1
)
# rel_base_pts_to_pert_rhand_joints_vt_normal: bsz x (ws -1) x nnj x nnb x 3 -> the relative positions vertical to base normals #
rel_base_pts_to_rhand_joints_vt_normal = denormed_rhand_joints_disp.unsqueeze(-2) - signed_dist_base_pts_to_rhand_joints_along_normal.unsqueeze(-1) * base_normals.unsqueeze(1).unsqueeze(1)
dist_base_pts_to_rhand_joints_vt_normal = torch.sqrt(torch.sum(
rel_base_pts_to_rhand_joints_vt_normal ** 2, dim=-1
))
k_a = 1.
k_b = 1.
### bsz x (ws - 1) x nnj x nnb ###
e_disp_rel_to_base_along_normals = k_a * att_forces * torch.abs(signed_dist_base_pts_to_rhand_joints_along_normal)
# (ws - 1) x nnj x nnb # -> dist vt normals # ##
e_disp_rel_to_baes_vt_normals = k_b * att_forces * dist_base_pts_to_rhand_joints_vt_normal
# nf x nnj x nnb ---> dist_vt_normals -> nf x nnj x nnb # # torch.sqrt() ##
#
loss_cur_e_pred_e_along_normals = ((e_disp_rel_to_base_along_normals - dec_e_along_normals) ** 2).mean()
loss_cur_e_pred_e_vt_normals = ((e_disp_rel_to_baes_vt_normals - dec_e_vt_normals) ** 2).mean()
loss = loss_cur_e_pred_e_along_normals + loss_cur_e_pred_e_vt_normals
opt.zero_grad()
loss.backward()
opt.step()
print('Iter {}: {}'.format(i_iter, loss.item()), flush=True)
print('\tloss_cur_e_pred_e_along_normals: {}'.format(loss_cur_e_pred_e_along_normals.item()))
print('\tloss_cur_e_pred_e_vt_normals: {}'.format(loss_cur_e_pred_e_vt_normals.item()))
return dec_joints.detach()