Spaces:

xymeow7
/

gene-hoi-denoising

Runtime error

App Files Files Community

gene-hoi-denoising / common /data_utils.py

meow

init

d6d3a5b 8 months ago

raw

history blame

No virus

10.9 kB

	"""
	This file contains functions that are used to perform data augmentation.
	"""
	import cv2
	import numpy as np
	import torch
	from loguru import logger


	def get_transform(center, scale, res, rot=0):
	"""Generate transformation matrix."""
	h = 200 * scale
	t = np.zeros((3, 3))
	t[0, 0] = float(res[1]) / h
	t[1, 1] = float(res[0]) / h
	t[0, 2] = res[1] * (-float(center[0]) / h + 0.5)
	t[1, 2] = res[0] * (-float(center[1]) / h + 0.5)
	t[2, 2] = 1
	if not rot == 0:
	rot = -rot # To match direction of rotation from cropping
	rot_mat = np.zeros((3, 3))
	rot_rad = rot * np.pi / 180
	sn, cs = np.sin(rot_rad), np.cos(rot_rad)
	rot_mat[0, :2] = [cs, -sn]
	rot_mat[1, :2] = [sn, cs]
	rot_mat[2, 2] = 1
	# Need to rotate around center
	t_mat = np.eye(3)
	t_mat[0, 2] = -res[1] / 2
	t_mat[1, 2] = -res[0] / 2
	t_inv = t_mat.copy()
	t_inv[:2, 2] *= -1
	t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
	return t


	def transform(pt, center, scale, res, invert=0, rot=0):
	"""Transform pixel location to different reference."""
	t = get_transform(center, scale, res, rot=rot)
	if invert:
	t = np.linalg.inv(t)
	new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.0]).T
	new_pt = np.dot(t, new_pt)
	return new_pt[:2].astype(int) + 1


	def rotate_2d(pt_2d, rot_rad):
	x = pt_2d[0]
	y = pt_2d[1]
	sn, cs = np.sin(rot_rad), np.cos(rot_rad)
	xx = x * cs - y * sn
	yy = x * sn + y * cs
	return np.array([xx, yy], dtype=np.float32)


	def gen_trans_from_patch_cv(
	c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False
	):
	# augment size with scale
	src_w = src_width * scale
	src_h = src_height * scale
	src_center = np.array([c_x, c_y], dtype=np.float32)

	# augment rotation
	rot_rad = np.pi * rot / 180
	src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad)
	src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad)

	dst_w = dst_width
	dst_h = dst_height
	dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32)
	dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32)
	dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32)

	src = np.zeros((3, 2), dtype=np.float32)
	src[0, :] = src_center
	src[1, :] = src_center + src_downdir
	src[2, :] = src_center + src_rightdir

	dst = np.zeros((3, 2), dtype=np.float32)
	dst[0, :] = dst_center
	dst[1, :] = dst_center + dst_downdir
	dst[2, :] = dst_center + dst_rightdir

	if inv:
	trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
	else:
	trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))

	trans = trans.astype(np.float32)
	return trans


	def generate_patch_image(
	cvimg,
	bbox,
	scale,
	rot,
	out_shape,
	interpl_strategy,
	gauss_kernel=5,
	gauss_sigma=8.0,
	):
	img = cvimg.copy()

	bb_c_x = float(bbox[0])
	bb_c_y = float(bbox[1])
	bb_width = float(bbox[2])
	bb_height = float(bbox[3])

	trans = gen_trans_from_patch_cv(
	bb_c_x, bb_c_y, bb_width, bb_height, out_shape[1], out_shape[0], scale, rot
	)

	# anti-aliasing
	blur = cv2.GaussianBlur(img, (gauss_kernel, gauss_kernel), gauss_sigma)
	img_patch = cv2.warpAffine(
	blur, trans, (int(out_shape[1]), int(out_shape[0])), flags=interpl_strategy
	)
	img_patch = img_patch.astype(np.float32)
	inv_trans = gen_trans_from_patch_cv(
	bb_c_x,
	bb_c_y,
	bb_width,
	bb_height,
	out_shape[1],
	out_shape[0],
	scale,
	rot,
	inv=True,
	)

	return img_patch, trans, inv_trans


	def augm_params(is_train, flip_prob, noise_factor, rot_factor, scale_factor):
	"""Get augmentation parameters."""
	flip = 0 # flipping
	pn = np.ones(3) # per channel pixel-noise
	rot = 0 # rotation
	sc = 1 # scaling
	if is_train:
	# We flip with probability 1/2
	if np.random.uniform() <= flip_prob:
	flip = 1
	assert False, "Flipping not supported"

	# Each channel is multiplied with a number
	# in the area [1-opt.noiseFactor,1+opt.noiseFactor]
	pn = np.random.uniform(1 - noise_factor, 1 + noise_factor, 3)

	# The rotation is a number in the area [-2rotFactor, 2rotFactor]
	rot = min(
	2 * rot_factor,
	max(
	-2 * rot_factor,
	np.random.randn() * rot_factor,
	),
	)

	# The scale is multiplied with a number
	# in the area [1-scaleFactor,1+scaleFactor]
	sc = min(
	1 + scale_factor,
	max(
	1 - scale_factor,
	np.random.randn() * scale_factor + 1,
	),
	)
	# but it is zero with probability 3/5
	if np.random.uniform() <= 0.6:
	rot = 0

	augm_dict = {}
	augm_dict["flip"] = flip
	augm_dict["pn"] = pn
	augm_dict["rot"] = rot
	augm_dict["sc"] = sc
	return augm_dict


	def rgb_processing(is_train, rgb_img, center, bbox_dim, augm_dict, img_res):
	rot = augm_dict["rot"]
	sc = augm_dict["sc"]
	pn = augm_dict["pn"]
	scale = sc * bbox_dim

	crop_dim = int(scale * 200)
	# faster cropping!!
	rgb_img = generate_patch_image(
	rgb_img,
	[center[0], center[1], crop_dim, crop_dim],
	1.0,
	rot,
	[img_res, img_res],
	cv2.INTER_CUBIC,
	)[0]

	# in the rgb image we add pixel noise in a channel-wise manner
	rgb_img[:, :, 0] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 0] * pn[0]))
	rgb_img[:, :, 1] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 1] * pn[1]))
	rgb_img[:, :, 2] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 2] * pn[2]))
	rgb_img = np.transpose(rgb_img.astype("float32"), (2, 0, 1)) / 255.0
	return rgb_img


	def transform_kp2d(kp2d, bbox):
	# bbox: (cx, cy, scale) in the original image space
	# scale is normalized
	assert isinstance(kp2d, np.ndarray)
	assert len(kp2d.shape) == 2
	cx, cy, scale = bbox
	s = 200 * scale # to px
	cap_dim = 1000 # px
	factor = cap_dim / (1.5 * s)
	kp2d_cropped = np.copy(kp2d)
	kp2d_cropped[:, 0] -= cx - 1.5 / 2 * s
	kp2d_cropped[:, 1] -= cy - 1.5 / 2 * s
	kp2d_cropped[:, 0] *= factor
	kp2d_cropped[:, 1] *= factor
	return kp2d_cropped


	def j2d_processing(kp, center, bbox_dim, augm_dict, img_res):
	"""Process gt 2D keypoints and apply all augmentation transforms."""
	scale = augm_dict["sc"] * bbox_dim
	rot = augm_dict["rot"]

	nparts = kp.shape[0]
	for i in range(nparts):
	kp[i, 0:2] = transform(
	kp[i, 0:2] + 1,
	center,
	scale,
	[img_res, img_res],
	rot=rot,
	)
	# convert to normalized coordinates
	kp = normalize_kp2d_np(kp, img_res)
	kp = kp.astype("float32")
	return kp


	def pose_processing(pose, augm_dict):
	"""Process SMPL theta parameters and apply all augmentation transforms."""
	rot = augm_dict["rot"]
	# rotation or the pose parameters
	pose[:3] = rot_aa(pose[:3], rot)
	# flip the pose parameters
	# (72),float
	pose = pose.astype("float32")
	return pose


	def rot_aa(aa, rot):
	"""Rotate axis angle parameters."""
	# pose parameters
	R = np.array(
	[
	[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
	[np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0],
	[0, 0, 1],
	]
	)
	# find the rotation of the body in camera frame
	per_rdg, _ = cv2.Rodrigues(aa)
	# apply the global rotation to the global orientation
	resrot, _ = cv2.Rodrigues(np.dot(R, per_rdg))
	aa = (resrot.T)[0]
	return aa


	def denormalize_images(images):
	images = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape(
	1, 3, 1, 1
	)
	images = images + torch.tensor([0.485, 0.456, 0.406], device=images.device).reshape(
	1, 3, 1, 1
	)
	return images


	def read_img(img_fn, dummy_shape):
	try:
	cv_img = _read_img(img_fn)
	except:
	logger.warning(f"Unable to load {img_fn}")
	cv_img = np.zeros(dummy_shape, dtype=np.float32)
	return cv_img, False
	return cv_img, True


	def _read_img(img_fn):
	img = cv2.cvtColor(cv2.imread(img_fn), cv2.COLOR_BGR2RGB)
	return img.astype(np.float32)


	def normalize_kp2d_np(kp2d: np.ndarray, img_res):
	assert kp2d.shape[1] == 3
	kp2d_normalized = kp2d.copy()
	kp2d_normalized[:, :2] = 2.0 * kp2d[:, :2] / img_res - 1.0
	return kp2d_normalized


	def unnormalize_2d_kp(kp_2d_np: np.ndarray, res):
	assert kp_2d_np.shape[1] == 3
	kp_2d = np.copy(kp_2d_np)
	kp_2d[:, :2] = 0.5 * res * (kp_2d[:, :2] + 1)
	return kp_2d


	def normalize_kp2d(kp2d: torch.Tensor, img_res):
	assert len(kp2d.shape) == 3
	kp2d_normalized = kp2d.clone()
	kp2d_normalized[:, :, :2] = 2.0 * kp2d[:, :, :2] / img_res - 1.0
	return kp2d_normalized


	def unormalize_kp2d(kp2d_normalized: torch.Tensor, img_res):
	assert len(kp2d_normalized.shape) == 3
	assert kp2d_normalized.shape[2] == 2
	kp2d = kp2d_normalized.clone()
	kp2d = 0.5 * img_res * (kp2d + 1)
	return kp2d


	def get_wp_intrix(fixed_focal: float, img_res):
	# consruct weak perspective on patch
	camera_center = np.array([img_res // 2, img_res // 2])
	intrx = torch.zeros([3, 3])
	intrx[0, 0] = fixed_focal
	intrx[1, 1] = fixed_focal
	intrx[2, 2] = 1.0
	intrx[0, -1] = camera_center[0]
	intrx[1, -1] = camera_center[1]
	return intrx


	def get_aug_intrix(
	intrx, fixed_focal: float, img_res, use_gt_k, bbox_cx, bbox_cy, scale
	):
	"""
	This function returns camera intrinsics under scaling.
	If use_gt_k, the GT K is used, but scaled based on the amount of scaling in the patch.
	Else, we construct an intrinsic camera with a fixed focal length and fixed camera center.
	"""

	if not use_gt_k:
	# consruct weak perspective on patch
	intrx = get_wp_intrix(fixed_focal, img_res)
	else:
	# update the GT intrinsics (full image space)
	# such that it matches the scale of the patch

	dim = scale * 200.0 # bbox size
	k_scale = float(img_res) / dim # resized_dim / bbox_size in full image space
	"""
	# x1 and y1: top-left corner of bbox
	intrinsics after data augmentation
	fx' = k*fx
	fy' = k*fy
	cx' = k*(cx - x1)
	cy' = k*(cy - y1)
	"""
	intrx[0, 0] = k_scale # kfx
	intrx[1, 1] = k_scale # kfy
	intrx[0, 2] -= bbox_cx - dim / 2.0
	intrx[1, 2] -= bbox_cy - dim / 2.0
	intrx[0, 2] *= k_scale
	intrx[1, 2] *= k_scale
	return intrx