Spaces:
Sleeping
Sleeping
import numpy as np | |
import pycocotools.mask as mask_util | |
from detectron2.structures import BoxMode | |
# MotionNet: based on instances_to_coco_json and relevant codes in densepose | |
def prediction_to_json(instances, img_id: str): | |
""" | |
Args: | |
instances (Instances): the output of the model | |
img_id (str): the image id in COCO | |
Returns: | |
list[dict]: the results in densepose evaluation format | |
""" | |
boxes = instances.pred_boxes.tensor.numpy() | |
boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) | |
boxes = boxes.tolist() | |
scores = instances.scores.tolist() | |
classes = instances.pred_classes.tolist() | |
# Prediction for MotionNet | |
# mtype = instances.mtype.squeeze(axis=1).tolist() | |
# 2.0.3 | |
if instances.has("pdim"): | |
pdim = instances.pdim.tolist() | |
if instances.has("ptrans"): | |
ptrans = instances.ptrans.tolist() | |
if instances.has("prot"): | |
prot = instances.prot.tolist() | |
mtype = instances.mtype.tolist() | |
morigin = instances.morigin.tolist() | |
maxis = instances.maxis.tolist() | |
mstate = instances.mstate.tolist() | |
mstatemax = instances.mstatemax.tolist() | |
if instances.has("mextrinsic"): | |
mextrinsic = instances.mextrinsic.tolist() | |
# if motionstate: | |
# mstate = instances.mstate.tolist() | |
# MotionNet has masks in the annotation | |
# use RLE to encode the masks, because they are too large and takes memory | |
# since this evaluator stores outputs of the entire dataset | |
rles = [mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks] | |
for rle in rles: | |
# "counts" is an array encoded by mask_util as a byte-stream. Python3's | |
# json writer which always produces strings cannot serialize a bytestream | |
# unless you decode it. Thankfully, utf-8 works out (which is also what | |
# the pycocotools/_mask.pyx does). | |
rle["counts"] = rle["counts"].decode("utf-8") | |
results = [] | |
for k in range(len(instances)): | |
if instances.has("pdim"): | |
result = { | |
"image_id": img_id, | |
"category_id": classes[k], | |
"bbox": boxes[k], | |
"score": scores[k], | |
"segmentation": rles[k], | |
"pdim": pdim[k], | |
"ptrans": ptrans[k], | |
"prot": prot[k], | |
"mtype": mtype[k], | |
"morigin": morigin[k], | |
"maxis": maxis[k], | |
"mstate": mstate[k], | |
"mstatemax": mstatemax[k], | |
} | |
elif instances.has("mextrinsic"): | |
result = { | |
"image_id": img_id, | |
"category_id": classes[k], | |
"bbox": boxes[k], | |
"score": scores[k], | |
"segmentation": rles[k], | |
"mtype": mtype[k], | |
"morigin": morigin[k], | |
"maxis": maxis[k], | |
"mextrinsic": mextrinsic[k], | |
"mstate": mstate[k], | |
"mstatemax": mstatemax[k], | |
} | |
else: | |
result = { | |
"image_id": img_id, | |
"category_id": classes[k], | |
"bbox": boxes[k], | |
"score": scores[k], | |
"segmentation": rles[k], | |
"mtype": mtype[k], | |
"morigin": morigin[k], | |
"maxis": maxis[k], | |
"mstate": mstate[k], | |
"mstatemax": mstatemax[k], | |
} | |
# if motionstate: | |
# result["mstate"] = mstate[k] | |
results.append(result) | |
return results | |