Spaces:

wliu88
/

StructDiffusionDemo

Paused

App Files Files Community

Weiyu Liu commited on Jul 10, 2023

Commit

405900e

1 Parent(s): b7dd541

remove dep on pytorch3d

Browse files

Files changed (6) hide show

app.py +41 -1
requirements.txt +1 -5
src/StructDiffusion/diffusion/__pycache__/pose_conversion.cpython-38.pyc +0 -0
src/StructDiffusion/diffusion/__pycache__/sampler.cpython-38.pyc +0 -0
src/StructDiffusion/diffusion/pose_conversion.py +0 -1
src/StructDiffusion/diffusion/sampler.py +234 -234

app.py CHANGED Viewed

@@ -16,10 +16,50 @@ from StructDiffusion.models.pl_models import ConditionalPoseDiffusionModel
 from StructDiffusion.diffusion.sampler import Sampler
 from StructDiffusion.diffusion.pose_conversion import get_struct_objs_poses
 from StructDiffusion.utils.files import get_checkpoint_path_from_dir
-from StructDiffusion.utils.batch_inference import move_pc_and_create_scene_simple, visualize_batch_pcs
 from StructDiffusion.utils.rearrangement import show_pcs_with_trimesh
 class Infer_Wrapper:
     def __init__(self, args, cfg):

 from StructDiffusion.diffusion.sampler import Sampler
 from StructDiffusion.diffusion.pose_conversion import get_struct_objs_poses
 from StructDiffusion.utils.files import get_checkpoint_path_from_dir
 from StructDiffusion.utils.rearrangement import show_pcs_with_trimesh
+def move_pc_and_create_scene_simple(obj_xyzs, struct_pose, pc_poses_in_struct):
+    device = obj_xyzs.device
+    # obj_xyzs: B, N, P, 3 or 6
+    # struct_pose: B, 1, 4, 4
+    # pc_poses_in_struct: B, N, 4, 4
+    B, N, _, _ = pc_poses_in_struct.shape
+    _, _, P, _ = obj_xyzs.shape
+    current_pc_poses = torch.eye(4).repeat(B, N, 1, 1).to(device)  # B, N, 4, 4
+    # print(torch.mean(obj_xyzs, dim=2).shape)
+    current_pc_poses[:, :, :3, 3] = torch.mean(obj_xyzs[:, :, :, :3], dim=2)  # B, N, 4, 4
+    current_pc_poses = current_pc_poses.reshape(B * N, 4, 4)  # B x N, 4, 4
+    struct_pose = struct_pose.repeat(1, N, 1, 1) # B, N, 4, 4
+    struct_pose = struct_pose.reshape(B * N, 4, 4)  # B x 1, 4, 4
+    pc_poses_in_struct = pc_poses_in_struct.reshape(B * N, 4, 4)  # B x N, 4, 4
+    goal_pc_pose = struct_pose @ pc_poses_in_struct  # B x N, 4, 4
+    # print("goal pc poses")
+    # print(goal_pc_pose)
+    goal_pc_transform = goal_pc_pose @ torch.inverse(current_pc_poses)  # B x N, 4, 4
+    # # important: pytorch3d uses row-major ordering, need to transpose each transformation matrix
+    # transpose = tra3d.Transform3d(matrix=goal_pc_transform.transpose(1, 2))
+    # new_obj_xyzs = obj_xyzs.reshape(B * N, P, -1)  # B x N, P, 3
+    # new_obj_xyzs[:, :, :3] = transpose.transform_points(new_obj_xyzs[:, :, :3])
+    # a verision that does not rely on pytorch3d
+    new_obj_xyzs = obj_xyzs.reshape(B * N, P, -1)[:, :, :3]  # B x N, P, 3
+    new_obj_xyzs = torch.concat([new_obj_xyzs, torch.ones(B * N, P, 1).to(device)], dim=-1) # B x N, P, 4
+    new_obj_xyzs = torch.einsum('bij,bkj->bki', goal_pc_transform, new_obj_xyzs)[:, :, :3]  # # B x N, P, 3
+    # put it back to B, N, P, 3
+    obj_xyzs[:, :, :, :3] = new_obj_xyzs.reshape(B, N, P, -1)
+    return obj_xyzs
 class Infer_Wrapper:
     def __init__(self, args, cfg):

requirements.txt CHANGED Viewed

@@ -7,8 +7,4 @@ pyglet==1.5.0
 openpyxl
 pytorch_lightning==1.6.1
 wandb===0.13.10
-omegaconf==2.2.2
-torch==1.12.0+cpu
-torchvision==0.13.0+cpu
-torchaudio==0.12.0
-git+https://github.com/facebookresearch/pytorch3d.git@stable

 openpyxl
 pytorch_lightning==1.6.1
 wandb===0.13.10
+omegaconf==2.2.2

src/StructDiffusion/diffusion/__pycache__/pose_conversion.cpython-38.pyc CHANGED Viewed

Binary files a/src/StructDiffusion/diffusion/__pycache__/pose_conversion.cpython-38.pyc and b/src/StructDiffusion/diffusion/__pycache__/pose_conversion.cpython-38.pyc differ

src/StructDiffusion/diffusion/__pycache__/sampler.cpython-38.pyc CHANGED Viewed

Binary files a/src/StructDiffusion/diffusion/__pycache__/sampler.cpython-38.pyc and b/src/StructDiffusion/diffusion/__pycache__/sampler.cpython-38.pyc differ

src/StructDiffusion/diffusion/pose_conversion.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import torch
-import pytorch3d.transforms as tra3d
 from StructDiffusion.utils.rotation_continuity import compute_rotation_matrix_from_ortho6d

 import os
 import torch
 from StructDiffusion.utils.rotation_continuity import compute_rotation_matrix_from_ortho6d

src/StructDiffusion/diffusion/sampler.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 from tqdm import tqdm
-import pytorch3d.transforms as tra3d
 from StructDiffusion.diffusion.noise_schedule import extract
 from StructDiffusion.diffusion.pose_conversion import get_struct_objs_poses
@@ -61,236 +61,236 @@ class Sampler:
         xs = list(reversed(xs))
         return xs
-class SamplerV2:
-    def __init__(self, diffusion_model_class, diffusion_checkpoint_path,
-                 collision_model_class, collision_checkpoint_path,
-                 device, debug=False):
-        self.debug = debug
-        self.device = device
-        self.diffusion_model = diffusion_model_class.load_from_checkpoint(diffusion_checkpoint_path)
-        self.diffusion_backbone = self.diffusion_model.model
-        self.diffusion_backbone.to(device)
-        self.diffusion_backbone.eval()
-        self.collision_model = collision_model_class.load_from_checkpoint(collision_checkpoint_path)
-        self.collision_backbone = self.collision_model.model
-        self.collision_backbone.to(device)
-        self.collision_backbone.eval()
-    def sample(self, batch, num_poses):
-        noise_schedule = self.diffusion_model.noise_schedule
-        B = batch["pcs"].shape[0]
-        x_noisy = torch.randn((B, num_poses, 9), device=self.device)
-        xs = []
-        for t_index in tqdm(reversed(range(0, noise_schedule.timesteps)),
-                            desc='sampling loop time step', total=noise_schedule.timesteps):
-            t = torch.full((B,), t_index, device=self.device, dtype=torch.long)
-            # noise schedule
-            betas_t = extract(noise_schedule.betas, t, x_noisy.shape)
-            sqrt_one_minus_alphas_cumprod_t = extract(noise_schedule.sqrt_one_minus_alphas_cumprod, t, x_noisy.shape)
-            sqrt_recip_alphas_t = extract(noise_schedule.sqrt_recip_alphas, t, x_noisy.shape)
-            # predict noise
-            pcs = batch["pcs"]
-            sentence = batch["sentence"]
-            type_index = batch["type_index"]
-            position_index = batch["position_index"]
-            pad_mask = batch["pad_mask"]
-            # calling the backbone instead of the pytorch-lightning model
-            with torch.no_grad():
-                predicted_noise = self.diffusion_backbone.forward(t, pcs, sentence, x_noisy, type_index, position_index, pad_mask)
-            # compute noisy x at t
-            model_mean = sqrt_recip_alphas_t * (x_noisy - betas_t * predicted_noise / sqrt_one_minus_alphas_cumprod_t)
-            if t_index == 0:
-                x_noisy = model_mean
-            else:
-                posterior_variance_t = extract(noise_schedule.posterior_variance, t, x_noisy.shape)
-                noise = torch.randn_like(x_noisy)
-                x_noisy = model_mean + torch.sqrt(posterior_variance_t) * noise
-            xs.append(x_noisy)
-        xs = list(reversed(xs))
-        visualize = True
-        struct_pose, pc_poses_in_struct = get_struct_objs_poses(xs[0])
-        # struct_pose: B, 1, 4, 4
-        # pc_poses_in_struct: B, N, 4, 4
-        S = B
-        num_elite = 10
-        ####################################################
-        # only keep one copy
-        # N, P, 3
-        obj_xyzs = batch["pcs"][0][:, :, :3]
-        print("obj_xyzs shape", obj_xyzs.shape)
-        # 1, N
-        # object_pad_mask: padding location has 1
-        num_target_objs = num_poses
-        if self.diffusion_backbone.use_virtual_structure_frame:
-            num_target_objs -= 1
-        object_pad_mask = batch["pad_mask"][0][-num_target_objs:].unsqueeze(0)
-        target_object_inds = 1 - object_pad_mask
-        print("target_object_inds shape", target_object_inds.shape)
-        print("target_object_inds", target_object_inds)
-        N, P, _ = obj_xyzs.shape
-        print("S, N, P: {}, {}, {}".format(S, N, P))
-        ####################################################
-        # S, N, ...
-        struct_pose = struct_pose.repeat(1, N, 1, 1)  # S, N, 4, 4
-        struct_pose = struct_pose.reshape(S * N, 4, 4)  # S x N, 4, 4
-        new_obj_xyzs = obj_xyzs.repeat(S, 1, 1, 1)  # S, N, P, 3
-        current_pc_pose = torch.eye(4).repeat(S, N, 1, 1).to(self.device)  # S, N, 4, 4
-        current_pc_pose[:, :, :3, 3] = torch.mean(new_obj_xyzs, dim=2)  # S, N, 4, 4
-        current_pc_pose = current_pc_pose.reshape(S * N, 4, 4)  # S x N, 4, 4
-        # optimize xyzrpy
-        obj_params = torch.zeros((S, N, 6)).to(self.device)
-        obj_params[:, :, :3] = pc_poses_in_struct[:, :, :3, 3]
-        obj_params[:, :, 3:] = tra3d.matrix_to_euler_angles(pc_poses_in_struct[:, :, :3, :3], "XYZ")  # S, N, 6
-        #
-        # new_obj_xyzs_before_cem, goal_pc_pose_before_cem = move_pc(obj_xyzs, obj_params, struct_pose, current_pc_pose, device)
-        #
-        # if visualize:
-        #     print("visualizing rearrangements predicted by the generator")
-        #     visualize_batch_pcs(new_obj_xyzs_before_cem, S, N, P, limit_B=5)
-        ####################################################
-        # rank
-        # evaluate in batches
-        scores = torch.zeros(S).to(self.device)
-        no_intersection_scores = torch.zeros(S).to(self.device)  # the higher the better
-        num_batches = int(S / B)
-        if S % B != 0:
-            num_batches += 1
-        for b in range(num_batches):
-            if b + 1 == num_batches:
-                cur_batch_idxs_start = b * B
-                cur_batch_idxs_end = S
-            else:
-                cur_batch_idxs_start = b * B
-                cur_batch_idxs_end = (b + 1) * B
-            cur_batch_size = cur_batch_idxs_end - cur_batch_idxs_start
-            # print("current batch idxs start", cur_batch_idxs_start)
-            # print("current batch idxs end", cur_batch_idxs_end)
-            # print("size of the current batch", cur_batch_size)
-            batch_obj_params = obj_params[cur_batch_idxs_start: cur_batch_idxs_end]
-            batch_struct_pose = struct_pose[cur_batch_idxs_start * N: cur_batch_idxs_end * N]
-            batch_current_pc_pose = current_pc_pose[cur_batch_idxs_start * N:cur_batch_idxs_end * N]
-            new_obj_xyzs, _, subsampled_scene_xyz, _, obj_pair_xyzs = \
-                move_pc_and_create_scene_new(obj_xyzs, batch_obj_params, batch_struct_pose, batch_current_pc_pose,
-                                             target_object_inds, self.device,
-                                             return_scene_pts=False,
-                                             return_scene_pts_and_pc_idxs=False,
-                                             num_scene_pts=False,
-                                             normalize_pc=False,
-                                             return_pair_pc=True,
-                                             num_pair_pc_pts=self.collision_model.data_cfg.num_scene_pts,
-                                             normalize_pair_pc=self.collision_model.data_cfg.normalize_pc)
-            #######################################
-            # predict whether there are pairwise collisions
-            # if collision_score_weight > 0:
-            with torch.no_grad():
-                _, num_comb, num_pair_pc_pts, _ = obj_pair_xyzs.shape
-                # obj_pair_xyzs = obj_pair_xyzs.reshape(cur_batch_size * num_comb, num_pair_pc_pts, -1)
-                collision_logits = self.collision_backbone.forward(obj_pair_xyzs.reshape(cur_batch_size * num_comb, num_pair_pc_pts, -1))
-                collision_scores = self.collision_backbone.convert_logits(collision_logits).reshape(cur_batch_size, num_comb)  # cur_batch_size, num_comb
-                # debug
-                # for bi, this_obj_pair_xyzs in enumerate(obj_pair_xyzs):
-                #     print("batch id", bi)
-                #     for pi, obj_pair_xyz in enumerate(this_obj_pair_xyzs):
-                #         print("pair", pi)
-                #         # obj_pair_xyzs: 2 * P, 5
-                #         print("collision score", collision_scores[bi, pi])
-                #         trimesh.PointCloud(obj_pair_xyz[:, :3].cpu()).show()
-                # 1 - mean() since the collision model predicts 1 if there is a collision
-                no_intersection_scores[cur_batch_idxs_start:cur_batch_idxs_end] = 1 - torch.mean(collision_scores, dim=1)
-            if visualize:
-                print("no intersection scores", no_intersection_scores)
-            # #######################################
-            # if discriminator_score_weight > 0:
-            #     # # debug:
-            #     # print(subsampled_scene_xyz.shape)
-            #     # print(subsampled_scene_xyz[0])
-            #     # trimesh.PointCloud(subsampled_scene_xyz[0, :, :3].cpu().numpy()).show()
-            #     #
-            #     with torch.no_grad():
-            #
-            #         # Important: since this discriminator only uses local structure param, takes sentence from the first and last position
-            #         # local_sentence = sentence[:, [0, 4]]
-            #         # local_sentence_pad_mask = sentence_pad_mask[:, [0, 4]]
-            #         # sentence_disc, sentence_pad_mask_disc, position_index_dic = discriminator_inference.dataset.tensorfy_sentence(raw_sentence_discriminator, raw_sentence_pad_mask_discriminator, raw_position_index_discriminator)
-            #
-            #         sentence_disc = torch.LongTensor(
-            #             [discriminator_tokenizer.tokenize(*i) for i in raw_sentence_discriminator])
-            #         sentence_pad_mask_disc = torch.LongTensor(raw_sentence_pad_mask_discriminator)
-            #         position_index_dic = torch.LongTensor(raw_position_index_discriminator)
-            #
-            #         preds = discriminator_model.forward(subsampled_scene_xyz,
-            #                                             sentence_disc.unsqueeze(0).repeat(cur_batch_size, 1).to(device),
-            #                                             sentence_pad_mask_disc.unsqueeze(0).repeat(cur_batch_size,
-            #                                                                                        1).to(device),
-            #                                             position_index_dic.unsqueeze(0).repeat(cur_batch_size, 1).to(
-            #                                                 device))
-            #         # preds = discriminator_model.forward(subsampled_scene_xyz)
-            #         preds = discriminator_model.convert_logits(preds)
-            #         preds = preds["is_circle"]  # cur_batch_size,
-            #         scores[cur_batch_idxs_start:cur_batch_idxs_end] = preds
-            #     if visualize:
-            #         print("discriminator scores", scores)
-        # scores = scores * discriminator_score_weight + no_intersection_scores * collision_score_weight
-        scores = no_intersection_scores
-        sort_idx = torch.argsort(scores).flip(dims=[0])[:num_elite]
-        elite_obj_params = obj_params[sort_idx]  # num_elite, N, 6
-        elite_struct_poses = struct_pose.reshape(S, N, 4, 4)[sort_idx]  # num_elite, N, 4, 4
-        elite_struct_poses = elite_struct_poses.reshape(num_elite * N, 4, 4)  # num_elite x N, 4, 4
-        elite_scores = scores[sort_idx]
-        print("elite scores:", elite_scores)
-        ####################################################
-        # # visualize best samples
-        # num_scene_pts = 4096 # if discriminator_num_scene_pts is None else discriminator_num_scene_pts
-        # batch_current_pc_pose = current_pc_pose[0: num_elite * N]
-        # best_new_obj_xyzs, best_goal_pc_pose, best_subsampled_scene_xyz, _, _ = \
-        #     move_pc_and_create_scene_new(obj_xyzs, elite_obj_params, elite_struct_poses, batch_current_pc_pose,
-        #                                  target_object_inds, self.device,
-        #                                  return_scene_pts=True, num_scene_pts=num_scene_pts, normalize_pc=True)
-        # if visualize:
-        #     print("visualizing elite rearrangements ranked by collision model/discriminator")
-        #     visualize_batch_pcs(best_new_obj_xyzs, num_elite, limit_B=num_elite)
-        # num_elite, N, 6
-        elite_obj_params = elite_obj_params.reshape(num_elite * N, -1)
-        pc_poses_in_struct = torch.eye(4).repeat(num_elite * N, 1, 1).to(self.device)
-        pc_poses_in_struct[:, :3, :3] = tra3d.euler_angles_to_matrix(elite_obj_params[:, 3:], "XYZ")
-        pc_poses_in_struct[:, :3, 3] = elite_obj_params[:, :3]
-        pc_poses_in_struct = pc_poses_in_struct.reshape(num_elite, N, 4, 4)  # num_elite, N, 4, 4
-        struct_pose = elite_struct_poses.reshape(num_elite, N, 4, 4)[:, 0,].unsqueeze(1)  # num_elite, 1, 4, 4
-        return struct_pose, pc_poses_in_struct

 import torch
 from tqdm import tqdm
+# import pytorch3d.transforms as tra3d
 from StructDiffusion.diffusion.noise_schedule import extract
 from StructDiffusion.diffusion.pose_conversion import get_struct_objs_poses
         xs = list(reversed(xs))
         return xs
+# class SamplerV2:
+#
+#     def __init__(self, diffusion_model_class, diffusion_checkpoint_path,
+#                  collision_model_class, collision_checkpoint_path,
+#                  device, debug=False):
+#
+#         self.debug = debug
+#         self.device = device
+#
+#         self.diffusion_model = diffusion_model_class.load_from_checkpoint(diffusion_checkpoint_path)
+#         self.diffusion_backbone = self.diffusion_model.model
+#         self.diffusion_backbone.to(device)
+#         self.diffusion_backbone.eval()
+#
+#         self.collision_model = collision_model_class.load_from_checkpoint(collision_checkpoint_path)
+#         self.collision_backbone = self.collision_model.model
+#         self.collision_backbone.to(device)
+#         self.collision_backbone.eval()
+#
+#     def sample(self, batch, num_poses):
+#
+#         noise_schedule = self.diffusion_model.noise_schedule
+#
+#         B = batch["pcs"].shape[0]
+#
+#         x_noisy = torch.randn((B, num_poses, 9), device=self.device)
+#
+#         xs = []
+#         for t_index in tqdm(reversed(range(0, noise_schedule.timesteps)),
+#                             desc='sampling loop time step', total=noise_schedule.timesteps):
+#
+#             t = torch.full((B,), t_index, device=self.device, dtype=torch.long)
+#
+#             # noise schedule
+#             betas_t = extract(noise_schedule.betas, t, x_noisy.shape)
+#             sqrt_one_minus_alphas_cumprod_t = extract(noise_schedule.sqrt_one_minus_alphas_cumprod, t, x_noisy.shape)
+#             sqrt_recip_alphas_t = extract(noise_schedule.sqrt_recip_alphas, t, x_noisy.shape)
+#
+#             # predict noise
+#             pcs = batch["pcs"]
+#             sentence = batch["sentence"]
+#             type_index = batch["type_index"]
+#             position_index = batch["position_index"]
+#             pad_mask = batch["pad_mask"]
+#             # calling the backbone instead of the pytorch-lightning model
+#             with torch.no_grad():
+#                 predicted_noise = self.diffusion_backbone.forward(t, pcs, sentence, x_noisy, type_index, position_index, pad_mask)
+#
+#             # compute noisy x at t
+#             model_mean = sqrt_recip_alphas_t * (x_noisy - betas_t * predicted_noise / sqrt_one_minus_alphas_cumprod_t)
+#             if t_index == 0:
+#                 x_noisy = model_mean
+#             else:
+#                 posterior_variance_t = extract(noise_schedule.posterior_variance, t, x_noisy.shape)
+#                 noise = torch.randn_like(x_noisy)
+#                 x_noisy = model_mean + torch.sqrt(posterior_variance_t) * noise
+#
+#             xs.append(x_noisy)
+#
+#         xs = list(reversed(xs))
+#
+#         visualize = True
+#
+#         struct_pose, pc_poses_in_struct = get_struct_objs_poses(xs[0])
+#         # struct_pose: B, 1, 4, 4
+#         # pc_poses_in_struct: B, N, 4, 4
+#
+#         S = B
+#         num_elite = 10
+#         ####################################################
+#         # only keep one copy
+#
+#         # N, P, 3
+#         obj_xyzs = batch["pcs"][0][:, :, :3]
+#         print("obj_xyzs shape", obj_xyzs.shape)
+#
+#         # 1, N
+#         # object_pad_mask: padding location has 1
+#         num_target_objs = num_poses
+#         if self.diffusion_backbone.use_virtual_structure_frame:
+#             num_target_objs -= 1
+#         object_pad_mask = batch["pad_mask"][0][-num_target_objs:].unsqueeze(0)
+#         target_object_inds = 1 - object_pad_mask
+#         print("target_object_inds shape", target_object_inds.shape)
+#         print("target_object_inds", target_object_inds)
+#
+#         N, P, _ = obj_xyzs.shape
+#         print("S, N, P: {}, {}, {}".format(S, N, P))
+#
+#         ####################################################
+#         # S, N, ...
+#
+#         struct_pose = struct_pose.repeat(1, N, 1, 1)  # S, N, 4, 4
+#         struct_pose = struct_pose.reshape(S * N, 4, 4)  # S x N, 4, 4
+#
+#         new_obj_xyzs = obj_xyzs.repeat(S, 1, 1, 1)  # S, N, P, 3
+#         current_pc_pose = torch.eye(4).repeat(S, N, 1, 1).to(self.device)  # S, N, 4, 4
+#         current_pc_pose[:, :, :3, 3] = torch.mean(new_obj_xyzs, dim=2)  # S, N, 4, 4
+#         current_pc_pose = current_pc_pose.reshape(S * N, 4, 4)  # S x N, 4, 4
+#
+#         # optimize xyzrpy
+#         obj_params = torch.zeros((S, N, 6)).to(self.device)
+#         obj_params[:, :, :3] = pc_poses_in_struct[:, :, :3, 3]
+#         obj_params[:, :, 3:] = tra3d.matrix_to_euler_angles(pc_poses_in_struct[:, :, :3, :3], "XYZ")  # S, N, 6
+#         #
+#         # new_obj_xyzs_before_cem, goal_pc_pose_before_cem = move_pc(obj_xyzs, obj_params, struct_pose, current_pc_pose, device)
+#         #
+#         # if visualize:
+#         #     print("visualizing rearrangements predicted by the generator")
+#         #     visualize_batch_pcs(new_obj_xyzs_before_cem, S, N, P, limit_B=5)
+#
+#         ####################################################
+#         # rank
+#
+#         # evaluate in batches
+#         scores = torch.zeros(S).to(self.device)
+#         no_intersection_scores = torch.zeros(S).to(self.device)  # the higher the better
+#         num_batches = int(S / B)
+#         if S % B != 0:
+#             num_batches += 1
+#         for b in range(num_batches):
+#             if b + 1 == num_batches:
+#                 cur_batch_idxs_start = b * B
+#                 cur_batch_idxs_end = S
+#             else:
+#                 cur_batch_idxs_start = b * B
+#                 cur_batch_idxs_end = (b + 1) * B
+#             cur_batch_size = cur_batch_idxs_end - cur_batch_idxs_start
+#
+#             # print("current batch idxs start", cur_batch_idxs_start)
+#             # print("current batch idxs end", cur_batch_idxs_end)
+#             # print("size of the current batch", cur_batch_size)
+#
+#             batch_obj_params = obj_params[cur_batch_idxs_start: cur_batch_idxs_end]
+#             batch_struct_pose = struct_pose[cur_batch_idxs_start * N: cur_batch_idxs_end * N]
+#             batch_current_pc_pose = current_pc_pose[cur_batch_idxs_start * N:cur_batch_idxs_end * N]
+#
+#             new_obj_xyzs, _, subsampled_scene_xyz, _, obj_pair_xyzs = \
+#                 move_pc_and_create_scene_new(obj_xyzs, batch_obj_params, batch_struct_pose, batch_current_pc_pose,
+#                                              target_object_inds, self.device,
+#                                              return_scene_pts=False,
+#                                              return_scene_pts_and_pc_idxs=False,
+#                                              num_scene_pts=False,
+#                                              normalize_pc=False,
+#                                              return_pair_pc=True,
+#                                              num_pair_pc_pts=self.collision_model.data_cfg.num_scene_pts,
+#                                              normalize_pair_pc=self.collision_model.data_cfg.normalize_pc)
+#
+#             #######################################
+#             # predict whether there are pairwise collisions
+#             # if collision_score_weight > 0:
+#             with torch.no_grad():
+#                 _, num_comb, num_pair_pc_pts, _ = obj_pair_xyzs.shape
+#                 # obj_pair_xyzs = obj_pair_xyzs.reshape(cur_batch_size * num_comb, num_pair_pc_pts, -1)
+#                 collision_logits = self.collision_backbone.forward(obj_pair_xyzs.reshape(cur_batch_size * num_comb, num_pair_pc_pts, -1))
+#                 collision_scores = self.collision_backbone.convert_logits(collision_logits).reshape(cur_batch_size, num_comb)  # cur_batch_size, num_comb
+#
+#                 # debug
+#                 # for bi, this_obj_pair_xyzs in enumerate(obj_pair_xyzs):
+#                 #     print("batch id", bi)
+#                 #     for pi, obj_pair_xyz in enumerate(this_obj_pair_xyzs):
+#                 #         print("pair", pi)
+#                 #         # obj_pair_xyzs: 2 * P, 5
+#                 #         print("collision score", collision_scores[bi, pi])
+#                 #         trimesh.PointCloud(obj_pair_xyz[:, :3].cpu()).show()
+#
+#                 # 1 - mean() since the collision model predicts 1 if there is a collision
+#                 no_intersection_scores[cur_batch_idxs_start:cur_batch_idxs_end] = 1 - torch.mean(collision_scores, dim=1)
+#             if visualize:
+#                 print("no intersection scores", no_intersection_scores)
+#             # #######################################
+#             # if discriminator_score_weight > 0:
+#             #     # # debug:
+#             #     # print(subsampled_scene_xyz.shape)
+#             #     # print(subsampled_scene_xyz[0])
+#             #     # trimesh.PointCloud(subsampled_scene_xyz[0, :, :3].cpu().numpy()).show()
+#             #     #
+#             #     with torch.no_grad():
+#             #
+#             #         # Important: since this discriminator only uses local structure param, takes sentence from the first and last position
+#             #         # local_sentence = sentence[:, [0, 4]]
+#             #         # local_sentence_pad_mask = sentence_pad_mask[:, [0, 4]]
+#             #         # sentence_disc, sentence_pad_mask_disc, position_index_dic = discriminator_inference.dataset.tensorfy_sentence(raw_sentence_discriminator, raw_sentence_pad_mask_discriminator, raw_position_index_discriminator)
+#             #
+#             #         sentence_disc = torch.LongTensor(
+#             #             [discriminator_tokenizer.tokenize(*i) for i in raw_sentence_discriminator])
+#             #         sentence_pad_mask_disc = torch.LongTensor(raw_sentence_pad_mask_discriminator)
+#             #         position_index_dic = torch.LongTensor(raw_position_index_discriminator)
+#             #
+#             #         preds = discriminator_model.forward(subsampled_scene_xyz,
+#             #                                             sentence_disc.unsqueeze(0).repeat(cur_batch_size, 1).to(device),
+#             #                                             sentence_pad_mask_disc.unsqueeze(0).repeat(cur_batch_size,
+#             #                                                                                        1).to(device),
+#             #                                             position_index_dic.unsqueeze(0).repeat(cur_batch_size, 1).to(
+#             #                                                 device))
+#             #         # preds = discriminator_model.forward(subsampled_scene_xyz)
+#             #         preds = discriminator_model.convert_logits(preds)
+#             #         preds = preds["is_circle"]  # cur_batch_size,
+#             #         scores[cur_batch_idxs_start:cur_batch_idxs_end] = preds
+#             #     if visualize:
+#             #         print("discriminator scores", scores)
+#
+#         # scores = scores * discriminator_score_weight + no_intersection_scores * collision_score_weight
+#         scores = no_intersection_scores
+#         sort_idx = torch.argsort(scores).flip(dims=[0])[:num_elite]
+#         elite_obj_params = obj_params[sort_idx]  # num_elite, N, 6
+#         elite_struct_poses = struct_pose.reshape(S, N, 4, 4)[sort_idx]  # num_elite, N, 4, 4
+#         elite_struct_poses = elite_struct_poses.reshape(num_elite * N, 4, 4)  # num_elite x N, 4, 4
+#         elite_scores = scores[sort_idx]
+#         print("elite scores:", elite_scores)
+#
+#         ####################################################
+#         # # visualize best samples
+#         # num_scene_pts = 4096 # if discriminator_num_scene_pts is None else discriminator_num_scene_pts
+#         # batch_current_pc_pose = current_pc_pose[0: num_elite * N]
+#         # best_new_obj_xyzs, best_goal_pc_pose, best_subsampled_scene_xyz, _, _ = \
+#         #     move_pc_and_create_scene_new(obj_xyzs, elite_obj_params, elite_struct_poses, batch_current_pc_pose,
+#         #                                  target_object_inds, self.device,
+#         #                                  return_scene_pts=True, num_scene_pts=num_scene_pts, normalize_pc=True)
+#         # if visualize:
+#         #     print("visualizing elite rearrangements ranked by collision model/discriminator")
+#         #     visualize_batch_pcs(best_new_obj_xyzs, num_elite, limit_B=num_elite)
+#
+#         # num_elite, N, 6
+#         elite_obj_params = elite_obj_params.reshape(num_elite * N, -1)
+#         pc_poses_in_struct = torch.eye(4).repeat(num_elite * N, 1, 1).to(self.device)
+#         pc_poses_in_struct[:, :3, :3] = tra3d.euler_angles_to_matrix(elite_obj_params[:, 3:], "XYZ")
+#         pc_poses_in_struct[:, :3, 3] = elite_obj_params[:, :3]
+#         pc_poses_in_struct = pc_poses_in_struct.reshape(num_elite, N, 4, 4)  # num_elite, N, 4, 4
+#
+#         struct_pose = elite_struct_poses.reshape(num_elite, N, 4, 4)[:, 0,].unsqueeze(1)  # num_elite, 1, 4, 4
+#
+#         return struct_pose, pc_poses_in_struct