import matplotlib.pyplot as plt |
import requests, validators |
import torch |
import pathlib |
import numpy as np |
from PIL import Image |
import cv2 as cv |
from transformers import DetrFeatureExtractor, DetrForSegmentation, MaskFormerImageProcessor, MaskFormerForInstanceSegmentation |
from transformers.image_transforms import rgb_to_id |
TEST_IMAGE = Image.open(r"images/9999999_00783_d_0000358.jpg") |
MODEL_NAME_DETR = "facebook/detr-resnet-50-panoptic" |
MODEL_NAME_MASKFORMER = "facebook/maskformer-swin-large-coco" |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
image = TEST_IMAGE |
processor = MaskFormerImageProcessor.from_pretrained(model_name) |
model = MaskFormerForInstanceSegmentation.from_pretrained(model_name) |
model.to(DEVICE) |
inputs = processor(images=image, return_tensors="pt") |
inputs.to(DEVICE) |
outputs = model(**inputs) |
results = processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0] |
def show_mask_for_number(map_to_use, label_id): |
""" |
map_to_use: You have to pass in `results["segmentation"]` |
""" |
if torch.cuda.is_available(): |
mask = (map_to_use.cpu().numpy() == label_id) |
else: |
mask = (map_to_use.numpy() == label_id) |
visual_mask = (mask* 255).astype(np.uint8) |
visual_mask = Image.fromarray(visual_mask) |
plt.imshow(visual_mask) |
plt.show() |
def show_mask_for_number_over_image(map_to_use, label_id, image_object): |
""" |
map_to_use: You have to pass in `results["segmentation"]` |
""" |
if torch.cuda.is_available(): |
mask = (map_to_use.cpu().numpy() == label_id) |
else: |
mask = (map_to_use.numpy() == label_id) |
visual_mask = (mask* 255).astype(np.uint8) |
visual_mask = Image.fromarray(visual_mask) |
plt.imshow(image_object) |
plt.imshow(visual_mask, alpha=0.25) |
plt.show() |
def get_coordinates_for_bb_simple(map_to_use, label_id): |
""" |
map_to_use: You have to pass in `results["segmentation"]` |
""" |
if torch.cuda.is_available(): |
mask = (map_to_use.cpu().numpy() == label_id) |
else: |
mask = (map_to_use.numpy() == label_id) |
x, y = np.where(mask==True) |
x_max, x_min = max(x), min(x) |
y_max, y_min = max(y), min(y) |
return (x_min, y_min), (x_max, y_max) |
def make_simple_box(left_top, right_bottom, map_size): |
full_mask = np.full(map_size, False) |
left_x, top_y = left_top |
right_x, bottom_y = right_bottom |
full_mask[left_x:right_x, top_y] = True |
full_mask[left_x:right_x, bottom_y] = True |
full_mask[left_x, top_y:bottom_y] = True |
full_mask[right_x, top_y:bottom_y] = True |
visual_mask = (full_mask* 255).astype(np.uint8) |
visual_mask = Image.fromarray(visual_mask) |
plt.imshow(visual_mask) |
plt.show() |
def test(map_to_use, label_id): |
""" |
map_to_use: You have to pass in `results["segmentation"]` |
""" |
if torch.cuda.is_available(): |
mask = (map_to_use.cpu().numpy() == label_id) |
else: |
mask = (map_to_use.numpy() == label_id) |
lt, rb = get_coordinates_for_bb_simple(map_to_use, label_id) |
left_x, top_y = lt |
right_x, bottom_y = rb |
mask[left_x:right_x, top_y] = .5 |
mask[left_x:right_x, bottom_y] = .5 |
mask[left_x, top_y:bottom_y] = .5 |
mask[right_x, top_y:bottom_y] = .5 |
visual_mask = (mask* 255).astype(np.uint8) |
visual_mask = Image.fromarray(visual_mask) |
plt.imshow(visual_mask) |
plt.show() |
""" |
>>> model.config.id2label |
{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', |
13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', |
27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', |
39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', |
54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', |
68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush', 80: 'banner', 81: 'blanket', |
82: 'bridge', 83: 'cardboard', 84: 'counter', 85: 'curtain', 86: 'door-stuff', 87: 'floor-wood', 88: 'flower', 89: 'fruit', 90: 'gravel', 91: 'house', 92: 'light', 93: 'mirror-stuff', 94: 'net', 95: 'pillow', |
96: 'platform', 97: 'playingfield', 98: 'railroad', 99: 'river', 100: 'road', 101: 'roof', 102: 'sand', 103: 'sea', 104: 'shelf', 105: 'snow', 106: 'stairs', 107: 'tent', 108: 'towel', 109: 'wall-brick', |
110: 'wall-stone', 111: 'wall-tile', 112: 'wall-wood', 113: 'water-other', 114: 'window-blind', 115: 'window-other', 116: 'tree-merged', 117: 'fence-merged', 118: 'ceiling-merged', 119: 'sky-other-merged', |
120: 'cabinet-merged', 121: 'table-merged', 122: 'floor-other-merged', 123: 'pavement-merged', 124: 'mountain-merged', 125: 'grass-merged', 126: 'dirt-merged', 127: 'paper-merged', 128: 'food-other-merged', |
129: 'building-other-merged', 130: 'rock-merged', 131: 'wall-other-merged', 132: 'rug-merged'} |
>>> model.config.id2label[123] |
'pavement-merged' |
>>> results["segments_info"][1] |
{'id': 2, 'label_id': 123, 'was_fused': False, 'score': 0.995813} |
""" |
""" |
>>> Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8)) |
<PIL.Image.Image image mode=L size=2000x1500 at 0x7F07773691C0> |
>>> temp = Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8)) |
""" |
""" |
>>> mask = (results["segmentation"].cpu().numpy == 4) |
>>> mask = (results["segmentation"].cpu().numpy() == 4) |
>>> mask |
array([[False, False, False, ..., False, False, False], |
[False, False, False, ..., False, False, False], |
[False, False, False, ..., False, False, False], |
..., |
[False, False, False, ..., False, False, False], |
[False, False, False, ..., False, False, False], |
[False, False, False, ..., False, False, False]]) |
>>> visual_mask = (mask * 255).astype(np.uint8) |
>>> visual_mask = Image.fromarray(visual_mask) |
>>> plt.imshow(visual_mask) |
<matplotlib.image.AxesImage object at 0x7f0761e78040> |
>>> plt.show() |
""" |
""" |
>>> mask = (results["segmentation"].cpu().numpy() == 1) |
>>> visual_mask = (mask*255).astype(np.uint8) |
>>> visual_mask = Image.fromarray(visual_mask) |
>>> plt.imshow(visual_mask) |
<matplotlib.image.AxesImage object at 0x7f0760298550> |
>>> plt.show() |
>>> results["segments_info"][0] |
{'id': 1, 'label_id': 25, 'was_fused': False, 'score': 0.998022} |
>>> |
""" |
""" |
>>> np.where(mask==True) |
(array([300, 300, 300, ..., 392, 392, 392]), array([452, 453, 454, ..., 473, 474, 475])) |
>>> max(np.where(mask==True)[0]) |
392 |
>>> min(np.where(mask==True)[0]) |
300 |
>>> max(np.where(mask==True)[1]) |
538 |
>>> min(np.where(mask==True)[1]) |
399 |
""" |
def contour_map(map_to_use, label_id): |
""" |
map_to_use: You have to pass in `results["segmentation"]` |
""" |
if torch.cuda.is_available(): |
mask = (map_to_use.cpu().numpy() == label_id) |
else: |
mask = (map_to_use.numpy() == label_id) |
visual_mask = (mask* 255).astype(np.uint8) |
contours, hierarchy = cv.findContours(visual_mask, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE) |
return contours, hierarchy |
""" |
>>> mask = (results["segmentation"].cpu().numpy() == 1) |
>>> visual_mask = (mask* 255).astype(np.uint8) |
>>> import cv2 as cv |
>>> contours, hierarchy = cv.findContours(visual_mask, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE) |
>>> contours.shape |
Traceback (most recent call last): |
File "<stdin>", line 1, in <module> |
AttributeError: 'tuple' object has no attribute 'shape' |
>>> contours[0].shape |
(7, 1, 2) |
>>> shrunk = contours[0][:, 0, :] |
>>> shrunk |
array([[400, 340], |
[399, 341], |
[400, 342], |
[401, 342], |
[402, 341], |
[403, 341], |
[402, 340]], dtype=int32) |
>>> get_coordinates_for_bb_simple(results["segmentation"], 1) |
((300, 399), (392, 538)) |
>>> shrunk = contours[1][:, 0, :] |
>>> max(shrunk[:, 0]) |
538 |
>>> min(shrunk[:, 0]) |
409 |
>>> min(shrunk[:, 1]) |
300 |
>>> max(shrunk[:, 1]) |
392 |
>>> |
""" |
""" |
import cv2 as cv |
contours, hierarchy = cv.findContours(visual_mask, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE) |
shrunk = contours[0][:, 0, :] |
>>> shrunk[0, :] |
array([1907, 887], dtype=int32) |
>>> shrunk[:, 0] |
array([1907, 1907, 1908, 1908, 1908], dtype=int32) |
>>> shrunk[:, 1] |
array([887, 888, 889, 890, 888], dtype=int32) |
>>> shrunk |
array([[1907, 887], |
[1907, 888], |
[1908, 889], |
[1908, 890], |
[1908, 888]], dtype=int32) |
""" |