opdmulti-demo / visualization.py
atwang's picture
local app demo is working
6d737eb
import os
from copy import deepcopy
import imageio
import open3d as o3d
import numpy as np
from PIL import Image, ImageChops
POINT_COLOR = [1, 0, 0] # red for demonstration
ARROW_COLOR = [0, 1, 0] # green
IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg")
def generate_rotation_visualization(
pcd: o3d.geometry.PointCloud,
axis_arrow: o3d.geometry.TriangleMesh,
mask: np.ndarray,
axis_vector: np.ndarray,
origin: np.ndarray,
range_min: float,
range_max: float,
num_samples: int,
output_dir: str,
) -> None:
"""
Generate visualization files for a rotation motion of a part.
:param pcd: point cloud object representing 2D image input (RGBD) as a point cloud
:param axis_arrow: mesh object representing axis arrow of rotation to be rendered in visualization
:param mask: mask np.array of dimensions (height, width) representing the part to be rotated in the image
:param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of rotation
:param origin: np.array of dimensions (3, ) representing the origin point of the axis of rotation
:param range_min: float representing the minimum range of motion in radians
:param range_max: float representing the maximum range of motion in radians
:param num_samples: number of sample states to visualize in between range_min and range_max of motion
:param output_dir: string path to directory in which to save visualization output
"""
angle_in_radians = np.linspace(range_min, range_max, num_samples)
angles_in_degrees = angle_in_radians * 180 / np.pi
for idx, angle_in_degrees in enumerate(angles_in_degrees):
# Make a copy of your original point cloud and arrow for each rotation
rotated_pcd = deepcopy(pcd)
rotated_arrow = deepcopy(axis_arrow)
angle_rad = np.radians(angle_in_degrees)
rotated_pcd = rotate_part(rotated_pcd, mask, axis_vector, origin, angle_rad)
# Create a Visualizer object for each rotation
vis = o3d.visualization.Visualizer()
vis.create_window(visible=False)
# Add the rotated geometries
vis.add_geometry(rotated_pcd)
vis.add_geometry(rotated_arrow)
# Apply the additional rotation around x-axis if desired
angle_x = np.pi * 5.5 / 5 # 198 degrees
rotation_matrix = o3d.geometry.get_rotation_matrix_from_axis_angle(np.asarray([1, 0, 0]) * angle_x)
rotated_pcd.rotate(rotation_matrix, center=rotated_pcd.get_center())
rotated_arrow.rotate(rotation_matrix, center=rotated_pcd.get_center())
# Capture and save the image
output_filename = f"{output_dir}/{idx}.png"
vis.capture_screen_image(output_filename, do_render=True)
vis.destroy_window()
def generate_translation_visualization(
pcd: o3d.geometry.PointCloud,
axis_arrow: o3d.geometry.TriangleMesh,
mask: np.ndarray,
end: np.ndarray,
range_min: float,
range_max: float,
num_samples: int,
output_dir: str,
) -> None:
"""
Generate visualization files for a translation motion of a part.
:param pcd: point cloud object representing 2D image input (RGBD) as a point cloud
:param axis_arrow: mesh object representing axis arrow of translation to be rendered in visualization
:param mask: mask np.array of dimensions (height, width) representing the part to be translated in the image
:param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of translation
:param origin: np.array of dimensions (3, ) representing the origin point of the axis of translation
:param range_min: float representing the minimum range of motion
:param range_max: float representing the maximum range of motion
:param num_samples: number of sample states to visualize in between range_min and range_max of motion
:param output_dir: string path to directory in which to save visualization output
"""
translate_distances = np.linspace(range_min, range_max, num_samples)
for idx, translate_distance in enumerate(translate_distances):
translated_pcd = deepcopy(pcd)
translated_arrow = deepcopy(axis_arrow)
translated_pcd = translate_part(translated_pcd, mask, end, translate_distance.item())
# Create a Visualizer object for each rotation
vis = o3d.visualization.Visualizer()
vis.create_window(visible=False)
# Add the translated geometries
vis.add_geometry(translated_pcd)
vis.add_geometry(translated_arrow)
# Apply the additional rotation around x-axis if desired
# TODO: not sure why we need this rotation for the translation, and when it would be desired
angle_x = np.pi * 5.5 / 5 # 198 degrees
R = o3d.geometry.get_rotation_matrix_from_axis_angle(np.asarray([1, 0, 0]) * angle_x)
translated_pcd.rotate(R, center=translated_pcd.get_center())
translated_arrow.rotate(R, center=translated_pcd.get_center())
# Capture and save the image
output_filename = f"{output_dir}/{idx}.png"
vis.capture_screen_image(output_filename, do_render=True)
vis.destroy_window()
def get_rotation_matrix_from_vectors(vec1: np.ndarray, vec2: np.ndarray) -> np.ndarray:
"""
Find the rotation matrix that aligns vec1 to vec2
:param vec1: A 3d "source" vector
:param vec2: A 3d "destination" vector
:return: A transform matrix (3x3) which when applied to vec1, aligns it with vec2.
"""
a, b = (vec1 / np.linalg.norm(vec1)).reshape(3), (vec2 / np.linalg.norm(vec2)).reshape(3)
v = np.cross(a, b)
c = np.dot(a, b)
s = np.linalg.norm(v)
kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s**2))
return rotation_matrix
def draw_line(start_point: np.ndarray, end_point: np.ndarray) -> o3d.geometry.TriangleMesh:
"""
Generate 3D mesh representing axis from start_point to end_point.
:param start_point: np.ndarray of dimensions (3, ) representing the start point of the axis
:param end_point: np.ndarray of dimensions (3, ) representing the end point of the axis
:return: mesh object representing axis from start to end
"""
# Compute direction vector and normalize it
direction_vector = end_point - start_point
normalized_vector = direction_vector / np.linalg.norm(direction_vector)
# Compute the rotation matrix to align the Z-axis with the desired direction
target_vector = np.array([0, 0, 1])
rot_mat = get_rotation_matrix_from_vectors(target_vector, normalized_vector)
# Create the cylinder (shaft of the arrow)
cylinder_length = 0.9 # 90% of the total arrow length, you can adjust as needed
cylinder_radius = 0.01 # Adjust the thickness of the arrow shaft
cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius=cylinder_radius, height=cylinder_length)
# Move base of cylinder to origin, rotate, then translate to start_point
cylinder.translate([0, 0, 0])
cylinder.rotate(rot_mat, center=[0, 0, 0])
cylinder.translate(start_point)
# Create the cone (head of the arrow)
cone_height = 0.1 # 10% of the total arrow length, adjust as needed
cone_radius = 0.03 # Adjust the size of the arrowhead
cone = o3d.geometry.TriangleMesh.create_cone(radius=cone_radius, height=cone_height)
# Move base of cone to origin, rotate, then translate to end of cylinder
cone.translate([-0, 0, 0])
cone.rotate(rot_mat, center=[0, 0, 0])
cone.translate(start_point + normalized_vector * 0.4)
arrow = cylinder + cone
return arrow
def rotate_part(
pcd: o3d.geometry.PointCloud, mask: np.ndarray, axis_vector: np.ndarray, origin: np.ndarray, angle_rad: float
) -> o3d.geometry.PointCloud:
"""
Generate rotated point cloud of mask based on provided angle around axis.
:param pcd: point cloud object representing points of image
:param mask: mask np.array of dimensions (height, width) representing the part to be rotated in the image
:param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of rotation
:param origin: np.array of dimensions (3, ) representing the origin point of the axis of rotation
:param angle_rad: angle in radians to rotate mask part
:return: point cloud object after rotation of masked part
"""
# Get the coordinates of the point cloud as a numpy array
points_np = np.asarray(pcd.points)
# Convert point cloud colors to numpy array for easier manipulation
colors_np = np.asarray(pcd.colors)
# Create skew-symmetric matrix from end
K = np.array(
[
[0, -axis_vector[2], axis_vector[1]],
[axis_vector[2], 0, -axis_vector[0]],
[-axis_vector[1], axis_vector[0], 0],
]
)
# Compute rotation matrix using Rodrigues' formula
R = np.eye(3) + np.sin(angle_rad) * K + (1 - np.cos(angle_rad)) * np.dot(K, K)
# Iterate over the mask and rotate the points corresponding to the object pixels
for i in range(mask.shape[0]):
for j in range(mask.shape[1]):
if mask[i, j] > 0: # This condition checks if the pixel belongs to the object
point_index = i * mask.shape[1] + j
# Translate the point such that the rotation origin is at the world origin
translated_point = points_np[point_index] - origin
# Rotate the translated point
rotated_point = np.dot(R, translated_point)
# Translate the point back
points_np[point_index] = rotated_point + origin
colors_np[point_index] = POINT_COLOR
# Update the point cloud's coordinates
pcd.points = o3d.utility.Vector3dVector(points_np)
# Update point cloud colors
pcd.colors = o3d.utility.Vector3dVector(colors_np)
return pcd
def translate_part(pcd, mask, axis_vector, distance):
"""
Generate translated point cloud of mask based on provided angle around axis.
:param pcd: point cloud object representing points of image
:param mask: mask np.array of dimensions (height, width) representing the part to be translated in the image
:param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of translation
:param distance: distance within coordinate system to translate mask part
:return: point cloud object after translation of masked part
"""
normalized_vector = axis_vector / np.linalg.norm(axis_vector)
translation_vector = normalized_vector * distance
# Convert point cloud colors to numpy array for easier manipulation
colors_np = np.asarray(pcd.colors)
# Get the coordinates of the point cloud as a numpy array
points_np = np.asarray(pcd.points)
# Iterate over the mask and assign the color to the points corresponding to the object pixels
for i in range(mask.shape[0]):
for j in range(mask.shape[1]):
if mask[i, j] > 0: # This condition checks if the pixel belongs to the object
point_index = i * mask.shape[1] + j
colors_np[point_index] = POINT_COLOR
points_np[point_index] += translation_vector
# Update point cloud colors
pcd.colors = o3d.utility.Vector3dVector(colors_np)
# Update the point cloud's coordinates
pcd.points = o3d.utility.Vector3dVector(points_np)
return pcd
def batch_trim(images_path: str, save_path: str, identical: bool = False) -> None:
"""
Trim white spaces from all images in the given path and save new images to folder.
:param images_path: local path to folder containing all images. Images must have the extension ".png", ".jpg", or
".jpeg".
:param save_path: local path to folder in which to save trimmed images
:param identical: if True, will apply same crop to all images, else each image will have its whitespace trimmed
independently. Note that in the latter case, each image may have a slightly different size.
"""
def get_trim(im):
"""Trim whitespace from an image and return the cropped image."""
bg = Image.new(im.mode, im.size, im.getpixel((0, 0)))
diff = ImageChops.difference(im, bg)
diff = ImageChops.add(diff, diff, 2.0, -100)
bbox = diff.getbbox()
return bbox
if identical: #
images = []
optimal_box = None
# load all images
for image_file in sorted(os.listdir(images_path)):
if image_file.endswith(IMAGE_EXTENSIONS):
image_path = os.path.join(images_path, image_file)
images.append(Image.open(image_path))
# find optimal box size
for im in images:
bbox = get_trim(im)
if bbox is None:
bbox = (0, 0, im.size[0], im.size[1]) # bound entire image
if optimal_box is None:
optimal_box = bbox
else:
optimal_box = (
min(optimal_box[0], bbox[0]),
min(optimal_box[1], bbox[1]),
max(optimal_box[2], bbox[2]),
max(optimal_box[3], bbox[3]),
)
# apply cropping, if optimal box was found
for idx, im in enumerate(images):
im.crop(optimal_box)
im.save(os.path.join(save_path, f"{idx}.png"))
im.close()
else: # trim each image separately
for image_file in os.listdir(images_path):
if image_file.endswith(IMAGE_EXTENSIONS):
image_path = os.path.join(images_path, image_file)
with Image.open(image_path) as im:
bbox = get_trim(im)
trimmed = im.crop(bbox) if bbox else im
trimmed.save(os.path.join(save_path, image_file))
def create_gif(image_folder_path: str, num_samples: int, gif_filename: str = "output.gif") -> None:
"""
Create gif out of folder of images and save to file.
:param image_folder_path: path to folder containing images (non-recursive). Assumes images are named as {i}.png for
each of i from 0 to num_samples.
:param num_samples: number of sampled images to compile into gif.
:param gif_filename: filename for gif, defaults to "output.gif"
"""
# Generate a list of image filenames (assuming the images are saved as 0.png, 1.png, etc.)
image_files = [f"{image_folder_path}/{i}.png" for i in range(num_samples)]
# Read the images using imageio
images = [imageio.imread(image_file) for image_file in image_files]
assert all(
images[0].shape == im.shape for im in images
), f"Found some images with a different shape: {[im.shape for im in images]}"
# Save images as a gif
gif_output_path = f"{image_folder_path}/{gif_filename}"
imageio.mimsave(gif_output_path, images, duration=0.1)
return