CM2000112 / internals /pipelines /pose_detector.py

Upload folder using huggingface_hub

a3d6c18 over 1 year ago

7.1 kB

	from pathlib import Path
	from typing import Optional, Union

	from mmdet.apis import inference_detector, init_detector
	from mmpose.apis import (
	inference_top_down_pose_model,
	init_pose_model,
	process_mmdet_results,
	vis_pose_result,
	)
	from mmpose.datasets import DatasetInfo
	from PIL import Image, ImageDraw
	from torch import ge

	from internals.util.commons import download_file, download_image
	from internals.util.config import get_root_dir


	class PoseDetector:
	__det_model = "https://comic-assets.s3.ap-south-1.amazonaws.com/models/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth"
	__pose_model = "https://comic-assets.s3.ap-south-1.amazonaws.com/models/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth"

	__loaded = False

	def load(self):
	if self.__loaded:
	return

	det_path = Path.home() / ".cache" / self.__det_model.split("/")[-1]
	pose_path = Path.home() / ".cache" / self.__pose_model.split("/")[-1]

	download_file(self.__det_model, det_path)
	download_file(self.__pose_model, pose_path)

	self.det_model = init_detector(
	f"{get_root_dir()}/external/faster_rcnn_r50_fpn_coco.py",
	str(det_path),
	device="cpu",
	)
	self.pose_model = init_pose_model(
	f"{get_root_dir()}/external/hrnet_w48_coco_256x192.py",
	str(pose_path),
	device="cpu",
	)
	self.dataset = self.pose_model.cfg.data["test"]["type"]
	self.dataset_info = self.pose_model.cfg.data["test"].get("dataset_info", None)
	self.dataset_info = DatasetInfo(self.dataset_info)

	self.__loaded = True

	def transform(
	self,
	image: Union[str, Image.Image],
	width: int,
	height: int,
	client_coordinates: Optional[dict],
	) -> Image.Image:
	"Infer pose coordinates from image, map head and body coordinates to infered ones, create pose"
	if type(image) is str:
	image = download_image(image)

	infer_coordinates = self.infer(image, width, height)
	if client_coordinates and client_coordinates["candidate"]:
	client_coordinates = self.resize_coordinates(
	client_coordinates, 384, 384, width, height
	)
	infer_coordinates = self.map_head_to_body(
	client_coordinates, infer_coordinates
	)

	print(infer_coordinates)

	return self.create_pose(infer_coordinates, width, height)

	def resize_coordinates(
	self, data: dict, ori_width, ori_height, new_width, new_height
	):
	points = data["candidate"]
	new_points = []

	if new_width > new_height:
	ori_min = min(ori_width, ori_height)
	new_min = min(new_width, new_height)
	else:
	ori_min = max(ori_width, ori_height)
	new_min = max(new_width, new_height)

	for _, pair in enumerate(points):
	x = pair[0] * new_min / ori_min
	y = pair[1] * new_min / ori_min
	new_points.append([x, y])

	return {"candidate": new_points, "subset": data["subset"]}

	def create_pose(self, data: dict, width: int, height: int) -> Image.Image:
	image = Image.new("RGB", (width, height), "black")
	draw = ImageDraw.Draw(image)

	points = data["candidate"]
	for pair in self.__pose_logical_map:
	xy = points[pair[0] - 1]
	x1y1 = points[pair[1] - 1]

	draw.line(
	(xy[0], xy[1], x1y1[0], x1y1[1]),
	fill=pair[2],
	width=4,
	)
	for i, point in enumerate(points):
	x = point[0]
	y = point[1]
	draw.ellipse((x - 3, y - 3, x + 3, y + 3), fill=self.__points_color[i])

	return image

	def infer(self, imageUrl: Union[str, Image.Image], width, height) -> dict:
	candidate = []
	subset = []

	if type(imageUrl) == Image.Image:
	image_path = Path.home() / ".cache" / "input.png"
	imageUrl.resize((width, height)).save(image_path)
	elif type(imageUrl) == str:
	image_path = Path.home() / ".cache" / imageUrl.split("/")[-1]
	image = download_image(imageUrl).resize((width, height))
	image.save(image_path)
	else:
	raise Exception("Invalid image type")
	mmdet_results = inference_detector(self.det_model, str(image_path))
	person_results = process_mmdet_results(mmdet_results, 1)

	pose_results, _ = inference_top_down_pose_model(
	self.pose_model,
	str(image_path),
	person_results,
	bbox_thr=0.3,
	format="xyxy",
	dataset=self.dataset,
	dataset_info=self.dataset_info,
	return_heatmap=False,
	outputs=None,
	)

	for d in pose_results:
	n = len(candidate)
	if d["bbox"][4] < 0.9:
	continue
	keypoints = d["keypoints"][:, :2].tolist()
	midpoint = [
	(keypoints[5][0] + keypoints[6][0]) / 2,
	(keypoints[5][1] + keypoints[6][1]) / 2,
	]
	keypoints.append(midpoint)
	candidate.extend(self.__convert_keypoints(keypoints))
	m = len(candidate)
	subset.append([j for j in range(n, m)])

	return {"candidate": candidate[:18], "subset": subset[:18]}

	def map_head_to_body(
	self, client_coordinates: dict, infer_coordinates: dict
	) -> dict:
	client_points = client_coordinates["candidate"]
	infer_points = infer_coordinates["candidate"]

	c_neck = client_points[1]
	i_neck = infer_points[1]

	dx = i_neck[0] - c_neck[0]
	dy = i_neck[1] - c_neck[1]

	for i in range(2, 15):
	point = client_points[i - 1]
	infer_points[i - 1] = [point[0] + dx, point[1] + dy]

	return {"candidate": infer_points, "subset": infer_coordinates["subset"]}

	def __convert_keypoints(self, keypoints):
	return [keypoints[i] for i in self.__kim]

	__kim = [0, 17, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
	__pose_logical_map = [
	[1, 2, "#000099"],
	[1, 16, "#330099"],
	[1, 15, "#660099"],
	[16, 18, "#990099"],
	[15, 17, "#990066"],
	[2, 3, "#990001"],
	[2, 6, "#993301"],
	[3, 4, "#996502"],
	[4, 5, "#999900"],
	[6, 7, "#669900"],
	[7, 8, "#349900"],
	[2, 9, "#009900"],
	[2, 12, "#009999"],
	[9, 10, "#009966"],
	[10, 11, "#009966"],
	[12, 13, "#006699"],
	[13, 14, "#013399"],
	]
	__points_color = [
	"#ff0000",
	"#ff5600",
	"#ffaa01",
	"#ffff00",
	"#aaff03",
	"#53ff00",
	"#03ff00",
	"#03ff55",
	"#03ffaa",
	"#03ffff",
	"#05aaff",
	"#0055ff",
	"#0000ff",
	"#5500ff",
	"#aa00ff",
	"#ff00aa",
	"#ff00ff",
	"#ff0055",
	]