Spaces:

wwydmanski
/

sam-quant

Build error

App Files Files Community

Witold Wydmański commited on Nov 9, 2023

Commit

a06c206

1 Parent(s): 9de3a79

init

Browse files

Files changed (7) hide show

.gitattributes +2 -0
__pycache__/transforms.cpython-38.pyc +0 -0
app.py +121 -0
decoder-quant.onnx +3 -0
encoder-quant.onnx +3 -0
requirements.txt +4 -0
transforms.py +97 -0

.gitattributes CHANGED Viewed

@@ -1,3 +1,5 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

+encoder-quant.onnx filter=lfs diff=lfs merge=lfs -text
+decoder-quant.onnx filter=lfs diff=lfs merge=lfs -text
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

__pycache__/transforms.cpython-38.pyc ADDED Viewed

Binary file (3.98 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import gradio as gr
+import onnxruntime as rt
+import numpy as np
+from transforms import ResizeLongestSide
+from torch.nn import functional as F
+import torch
+import onnxruntime
+IMAGE_SIZE = 1024
+def preprocess_image(image):
+    transform = ResizeLongestSide(IMAGE_SIZE)
+    input_image = transform.apply_image(image)
+    input_image_torch = torch.as_tensor(input_image, device="cpu")
+    input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
+    pixel_mean = torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1)
+    pixel_std = torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1)
+    x = (input_image_torch - pixel_mean) / pixel_std
+    h, w = x.shape[-2:]
+    padh = IMAGE_SIZE - h
+    padw = IMAGE_SIZE - w
+    x = F.pad(x, (0, padw, 0, padh))
+    x = x.numpy()
+    return x
+def prepare_inputs(image_embedding, input_point, image_shape):
+    transform = ResizeLongestSide(IMAGE_SIZE)
+    input_label = np.array([1])
+    onnx_coord = np.concatenate([input_point, np.array([[0.0, 0.0]])], axis=0)[None, :, :]
+    onnx_label = np.concatenate([input_label, np.array([-1])], axis=0)[None, :].astype(np.float32)
+    onnx_coord = transform.apply_coords(onnx_coord, image_shape).astype(np.float32)
+    onnx_mask_input = np.zeros((1, 1, 256, 256), dtype=np.float32)
+    onnx_has_mask_input = np.zeros(1, dtype=np.float32)
+    decoder_inputs = {
+        "image_embeddings": image_embedding,
+        "point_coords": onnx_coord,
+        "point_labels": onnx_label,
+        "mask_input": onnx_mask_input,
+        "has_mask_input": onnx_has_mask_input,
+        "orig_im_size": np.array(image_shape, dtype=np.float32)
+    }
+    return decoder_inputs
+enc_session = onnxruntime.InferenceSession("encoder-quant.onnx")
+dec_session = onnxruntime.InferenceSession("decoder-quant.onnx")
+def predict_image(img):
+    x = preprocess_image(img)
+    encoder_inputs = {
+        "x": x,
+    }
+    output = enc_session.run(None, encoder_inputs)
+    image_embedding = output[0]
+    middle_of_photo = np.array([[img.shape[1] / 2, img.shape[0] / 2]])
+    decoder_inputs = prepare_inputs(image_embedding, middle_of_photo, img.shape[:2])
+    masks, _, low_res_logits = dec_session.run(None, decoder_inputs)
+    # normalize the results between -1 and 1
+    masks = masks[0][0]
+    masks[masks<0] = 0
+    masks = masks / np.max(masks)
+    return masks, image_embedding, img.shape[:2]
+def segment_image(image_embedding, shape, evt: gr.SelectData):
+    image_embedding = np.array(image_embedding)
+    middle_of_photo = np.array([evt.index])
+    decoder_inputs = prepare_inputs(image_embedding, middle_of_photo, shape)
+    masks, _, low_res_logits = dec_session.run(None, decoder_inputs)
+    # normalize the results between -1 and 1
+    masks = masks[0][0]
+    masks[masks<0] = 0
+    masks = masks / np.max(masks)
+    return masks
+with gr.Blocks() as demo:
+    gr.Markdown("# SAM quantized (Segment Anything Model)")
+    markdown = """
+    This is a demo of the SAM model, which is a model for segmenting anything in an image.
+    It returns segmentation mask of the image that's overlapping with the clicked point.
+    The model is quantized using ONNX Runtime
+    """
+    gr.Markdown(markdown)
+    embedding = gr.State()
+    shape = gr.State()
+    with gr.Row():
+        with gr.Column():
+            inputs = gr.Image()
+            start_segmentation = gr.Button("Segment")
+        with gr.Column():
+            outputs = gr.Image(label="Segmentation Mask")
+    start_segmentation.click(
+        predict_image,
+        inputs,
+        [outputs, embedding, shape],
+    )
+    outputs.select(
+        segment_image,
+        [embedding, shape],
+        outputs,
+    )
+demo.launch()

decoder-quant.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64dedbe577d41b18ccb8d5496d26916929f65c7ecd8f06d5b23c5197434bfcb0
+size 8738974

encoder-quant.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f1b905f70f4a3e769473b222f277c45c8e2aa0085b522e33f8b457f2b11faa5
+size 322569075

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch==1.8.1
+torchvision==0.9.1
+onnxruntime==1.16.1
+gradio==3.44.0

transforms.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import numpy as np
+import torch
+from torch.nn import functional as F
+from torchvision.transforms.functional import resize, to_pil_image  # type: ignore
+from copy import deepcopy
+from typing import Tuple
+class ResizeLongestSide:
+    """
+    Resizes images to the longest side 'target_length', as well as provides
+    methods for resizing coordinates and boxes. Provides methods for
+    transforming both numpy array and batched torch tensors.
+    """
+    def __init__(self, target_length: int) -> None:
+        self.target_length = target_length
+    def apply_image(self, image: np.ndarray) -> np.ndarray:
+        """
+        Expects a numpy array with shape HxWxC in uint8 format.
+        """
+        target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
+        return np.array(resize(to_pil_image(image), target_size))
+    def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
+        """
+        Expects a numpy array of length 2 in the final dimension. Requires the
+        original image size in (H, W) format.
+        """
+        old_h, old_w = original_size
+        new_h, new_w = self.get_preprocess_shape(
+            original_size[0], original_size[1], self.target_length
+        )
+        coords = deepcopy(coords).astype(float)
+        coords[..., 0] = coords[..., 0] * (new_w / old_w)
+        coords[..., 1] = coords[..., 1] * (new_h / old_h)
+        return coords
+    def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
+        """
+        Expects a numpy array shape Bx4. Requires the original image size
+        in (H, W) format.
+        """
+        boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
+        return boxes.reshape(-1, 4)
+    def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Expects batched images with shape BxCxHxW and float format. This
+        transformation may not exactly match apply_image. apply_image is
+        the transformation expected by the model.
+        """
+        # Expects an image in BCHW format. May not exactly match apply_image.
+        target_size = self.get_preprocess_shape(image.shape[2], image.shape[3], self.target_length)
+        return F.interpolate(
+            image, target_size, mode="bilinear", align_corners=False, antialias=True
+        )
+    def apply_coords_torch(
+        self, coords: torch.Tensor, original_size: Tuple[int, ...]
+    ) -> torch.Tensor:
+        """
+        Expects a torch tensor with length 2 in the last dimension. Requires the
+        original image size in (H, W) format.
+        """
+        old_h, old_w = original_size
+        new_h, new_w = self.get_preprocess_shape(
+            original_size[0], original_size[1], self.target_length
+        )
+        coords = deepcopy(coords).to(torch.float)
+        coords[..., 0] = coords[..., 0] * (new_w / old_w)
+        coords[..., 1] = coords[..., 1] * (new_h / old_h)
+        return coords
+    def apply_boxes_torch(
+        self, boxes: torch.Tensor, original_size: Tuple[int, ...]
+    ) -> torch.Tensor:
+        """
+        Expects a torch tensor with shape Bx4. Requires the original image
+        size in (H, W) format.
+        """
+        boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
+        return boxes.reshape(-1, 4)
+    @staticmethod
+    def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
+        """
+        Compute the output size given input size and target long side length.
+        """
+        scale = long_side_length * 1.0 / max(oldh, oldw)
+        newh, neww = oldh * scale, oldw * scale
+        neww = int(neww + 0.5)
+        newh = int(newh + 0.5)
+        return (newh, neww)