Spaces:

NN-BRD
/

hackathon_depth_segment

Runtime error

App Files Files Community

jens commited on Aug 20, 2023

Commit

9f0c4b3

1 Parent(s): 0c863e7

autolinting with black

Browse files

Files changed (3) hide show

app.py +186 -79
inference.py +99 -66
utils.py +88 -55

app.py CHANGED Viewed

@@ -1,19 +1,18 @@
 import os
 import gradio as gr
 import numpy as np
-import cv2
 from PIL import Image, ImageOps
 import torch
 from inference import SegmentPredictor, DepthPredictor
 from utils import generate_PCL, PCL3, point_cloud
 sam = SegmentPredictor()
-sam_cpu = SegmentPredictor(device='cpu')
 dpt = DepthPredictor()
-red = (255,0,0)
-blue = (0,0,255)
 annos = []
@@ -22,8 +21,10 @@ with block:
     # States
     def point_coords_empty():
         return []
     def point_labels_empty():
         return []
     image_edit_trigger = gr.State(True)
     point_coords = gr.State(point_coords_empty)
     point_labels = gr.State(point_labels_empty)
@@ -36,61 +37,123 @@ with block:
     # UI
     with gr.Column():
         gr.Markdown(
-            '''# Segment Anything Model (SAM)
             ## a new AI model from Meta AI that can "cut out" any object, in any image, with a single click 🚀
             SAM is a promptable segmentation system with zero-shot generalization to unfamiliar objects and images, without the need for additional training. [**Official Project**](https://segment-anything.com/) [**Code**](https://github.com/facebookresearch/segment-anything).
-            '''
         )
         with gr.Row():
             with gr.Column():
                 with gr.Tab("Upload Image"):
-                    upload_image = gr.Image(label='Input', type='pil', tool=None) # mirror_webcam = False
                 with gr.Tab("Webcam"):
-                    input_image = gr.Image(label='Input', type='pil', tool=None, source="webcam") # mirror_webcam = False
                 with gr.Row():
-                    sam_encode_btn = gr.Button('Encode', variant='primary')
-                    sam_sgmt_everything_btn = gr.Button('Segment Everything!', variant = 'primary')
-                #sam_encode_status = gr.Label('Not encoded yet')
         with gr.Row():
-            prompt_image = gr.Image(label='Segments')
-            #prompt_lbl_image = gr.AnnotatedImage(label='Segment Labels')
-            lbl_image = gr.AnnotatedImage(label='Everything')
         with gr.Row():
-            point_label_radio = gr.Radio(label='Point Label', choices=[1,0], value=1)
-            text = gr.Textbox(label='Mask Name')
-            reset_btn = gr.Button('New Mask')
-        selected_masks_image = gr.AnnotatedImage(label='Selected Masks')
         with gr.Row():
             with gr.Column():
-                pcl_figure = gr.Model3D(label="3-D Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0])
                 with gr.Row():
-                    max_depth = gr.Slider(minimum=0, maximum=10, step=0.01, default=1, label='Max Depth')
-                    min_depth = gr.Slider(minimum=0, maximum=10, step=0.01, default=0.1, label='Min Depth')
-                    n_samples = gr.Slider(minimum=1e3, maximum=1e6, step=1e3, default=1e3, label='Number of Samples')
-                    cube_size = gr.Slider(minimum=0.00001, maximum=0.001, step=0.000001, default=0.00001, label='Cube size')
-                    depth_reconstruction_btn = gr.Button('Depth Reconstruction', variant = 'primary')
-                sam_decode_btn = gr.Button('Predict using points!', variant = 'primary')
     # components
-    components = {point_coords, point_labels, image_edit_trigger, masks, cutout_idx, input_image, embedding,
-                  point_label_radio, text, reset_btn, sam_sgmt_everything_btn,
-                  sam_decode_btn, depth_reconstruction_btn, prompt_image, lbl_image, n_samples, max_depth, min_depth, cube_size, selected_masks_image}
     def on_upload_image(input_image, upload_image):
-        ## Mirror because gradio.image webcam has mirror = True
-        upload_image_mirror  = ImageOps.mirror(upload_image)
         return [upload_image_mirror, upload_image]
-    upload_image.upload(on_upload_image, [input_image, upload_image], [input_image, upload_image])
     # event - init coords
     def on_reset_btn_click(input_image):
         return input_image, point_coords_empty(), point_labels_empty(), None, []
-    reset_btn.click(on_reset_btn_click, [input_image], [input_image, point_coords, point_labels], queue=False)
-    def on_prompt_image_select(input_image, prompt_image, point_coords, point_labels, point_label_radio, text, pred_masks, embedding, evt: gr.SelectData):
         sam_cpu.dummy_encode(input_image)
         x, y = evt.index
         color = red if point_label_radio == 0 else blue
@@ -98,47 +161,69 @@ with block:
             prompt_image = np.array(input_image.copy())
         cv2.circle(prompt_image, (x, y), 5, color, -1)
-        point_coords.append([x,y])
         point_labels.append(point_label_radio)
-        sam_masks = sam_cpu.cond_pred(pts=np.array(point_coords), lbls=np.array(point_labels), embedding=embedding)
-        return  [ prompt_image,
-                  (input_image, sam_masks),
-                  point_coords,
-                  point_labels,
-                  sam_masks ]
-    prompt_image.select(on_prompt_image_select,
-                       [input_image, prompt_image, point_coords, point_labels, point_label_radio, text, pred_masks, embedding],
-                       [prompt_image, lbl_image, point_coords, point_labels, pred_masks], queue=True)
-    def on_everything_image_select(input_image, pred_masks, masks, text, evt: gr.SelectData):
         i = evt.index
         mask = pred_masks[i][0]
         print(mask)
         print(type(mask))
         masks.append((mask, text))
-        anno = (input_image, masks)
-        return  [masks, anno]
-    lbl_image.select(on_everything_image_select,
-                       [input_image, pred_masks, masks, text],
-                       [masks, selected_masks_image], queue=False)
     def on_selected_masks_image_select(input_image, masks, evt: gr.SelectData):
         i = evt.index
         del masks[i]
-        anno = (input_image, masks)
-        return  [masks, anno]
-    selected_masks_image.select(on_selected_masks_image_select,
-                                [input_image, masks],
-                                [masks, selected_masks_image], queue=False)
-    #prompt_lbl_image.select(on_everything_image_select,
     #                   [input_image, prompt_masks, masks, text],
     #                   [masks, selected_masks_image], queue=False)
     def on_click_sam_encode_btn(inputs):
         print("encoding")
         # encode image on click
@@ -146,27 +231,43 @@ with block:
         sam_cpu.dummy_encode(inputs[input_image])
         print("encoding done")
         return [inputs[input_image], embedding]
-    sam_encode_btn.click(on_click_sam_encode_btn, components, [prompt_image, embedding], queue=False)
     def on_click_sam_dencode_btn(inputs):
         print("inferencing")
         image = inputs[input_image]
-        generated_mask, _, _ = sam.cond_pred(pts=np.array(inputs[point_coords]), lbls=np.array(inputs[point_labels]))
         inputs[masks].append((generated_mask, inputs[text]))
         print(inputs[masks][0])
         return {prompt_image: (image, inputs[masks])}
-    sam_decode_btn.click(on_click_sam_dencode_btn, components, [prompt_image, masks, cutout_idx], queue=True)
     def on_depth_reconstruction_btn_click(inputs):
         print("depth reconstruction")
-        path = dpt.generate_obj_rgb(image=inputs[input_image],
-                                      cube_size=inputs[cube_size],
-                                      n_samples=inputs[n_samples],
-                                      #masks=inputs[masks],
-                                      min_depth=inputs[min_depth],
-                                      max_depth=inputs[max_depth]) #
         return {pcl_figure: path}
-    depth_reconstruction_btn.click(on_depth_reconstruction_btn_click, components, [pcl_figure], queue=False)
     def on_sam_sgmt_everything_btn_click(inputs):
         print("segmenting everything")
@@ -175,9 +276,15 @@ with block:
         print(image)
         print(sam_masks)
         return [(image, sam_masks), sam_masks]
-    sam_sgmt_everything_btn.click(on_sam_sgmt_everything_btn_click, components, [lbl_image, pred_masks], queue=True)
-if __name__ == '__main__':
     block.queue()
-    block.launch()

 import os
 import gradio as gr
 import numpy as np
+import cv2
 from PIL import Image, ImageOps
 import torch
 from inference import SegmentPredictor, DepthPredictor
 from utils import generate_PCL, PCL3, point_cloud
 sam = SegmentPredictor()
+sam_cpu = SegmentPredictor(device="cpu")
 dpt = DepthPredictor()
+red = (255, 0, 0)
+blue = (0, 0, 255)
 annos = []
     # States
     def point_coords_empty():
         return []
     def point_labels_empty():
         return []
     image_edit_trigger = gr.State(True)
     point_coords = gr.State(point_coords_empty)
     point_labels = gr.State(point_labels_empty)
     # UI
     with gr.Column():
         gr.Markdown(
+            """# Segment Anything Model (SAM)
             ## a new AI model from Meta AI that can "cut out" any object, in any image, with a single click 🚀
             SAM is a promptable segmentation system with zero-shot generalization to unfamiliar objects and images, without the need for additional training. [**Official Project**](https://segment-anything.com/) [**Code**](https://github.com/facebookresearch/segment-anything).
+            """
         )
         with gr.Row():
             with gr.Column():
                 with gr.Tab("Upload Image"):
+                    # mirror_webcam = False
+                    upload_image = gr.Image(label="Input", type="pil", tool=None)
                 with gr.Tab("Webcam"):
+                    # mirror_webcam = False
+                    input_image = gr.Image(
+                        label="Input", type="pil", tool=None, source="webcam"
+                    )
                 with gr.Row():
+                    sam_encode_btn = gr.Button("Encode", variant="primary")
+                    sam_sgmt_everything_btn = gr.Button(
+                        "Segment Everything!", variant="primary"
+                    )
+                # sam_encode_status = gr.Label('Not encoded yet')
         with gr.Row():
+            prompt_image = gr.Image(label="Segments")
+            # prompt_lbl_image = gr.AnnotatedImage(label='Segment Labels')
+            lbl_image = gr.AnnotatedImage(label="Everything")
         with gr.Row():
+            point_label_radio = gr.Radio(label="Point Label", choices=[1, 0], value=1)
+            text = gr.Textbox(label="Mask Name")
+            reset_btn = gr.Button("New Mask")
+        selected_masks_image = gr.AnnotatedImage(label="Selected Masks")
         with gr.Row():
             with gr.Column():
+                pcl_figure = gr.Model3D(
+                    label="3-D Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]
+                )
                 with gr.Row():
+                    max_depth = gr.Slider(
+                        minimum=0, maximum=10, step=0.01, default=1, label="Max Depth"
+                    )
+                    min_depth = gr.Slider(
+                        minimum=0, maximum=10, step=0.01, default=0.1, label="Min Depth"
+                    )
+                    n_samples = gr.Slider(
+                        minimum=1e3,
+                        maximum=1e6,
+                        step=1e3,
+                        default=1e3,
+                        label="Number of Samples",
+                    )
+                    cube_size = gr.Slider(
+                        minimum=0.00001,
+                        maximum=0.001,
+                        step=0.000001,
+                        default=0.00001,
+                        label="Cube size",
+                    )
+                    depth_reconstruction_btn = gr.Button(
+                        "Depth Reconstruction", variant="primary"
+                    )
+                sam_decode_btn = gr.Button("Predict using points!", variant="primary")
     # components
+    components = {
+        point_coords,
+        point_labels,
+        image_edit_trigger,
+        masks,
+        cutout_idx,
+        input_image,
+        embedding,
+        point_label_radio,
+        text,
+        reset_btn,
+        sam_sgmt_everything_btn,
+        sam_decode_btn,
+        depth_reconstruction_btn,
+        prompt_image,
+        lbl_image,
+        n_samples,
+        max_depth,
+        min_depth,
+        cube_size,
+        selected_masks_image,
+    }
     def on_upload_image(input_image, upload_image):
+        # Mirror because gradio.image webcam has mirror = True
+        upload_image_mirror = ImageOps.mirror(upload_image)
         return [upload_image_mirror, upload_image]
+    upload_image.upload(
+        on_upload_image, [input_image, upload_image], [input_image, upload_image]
+    )
     # event - init coords
     def on_reset_btn_click(input_image):
         return input_image, point_coords_empty(), point_labels_empty(), None, []
+    reset_btn.click(
+        on_reset_btn_click,
+        [input_image],
+        [input_image, point_coords, point_labels],
+        queue=False,
+    )
+    def on_prompt_image_select(
+        input_image,
+        prompt_image,
+        point_coords,
+        point_labels,
+        point_label_radio,
+        text,
+        pred_masks,
+        embedding,
+        evt: gr.SelectData,
+    ):
         sam_cpu.dummy_encode(input_image)
         x, y = evt.index
         color = red if point_label_radio == 0 else blue
             prompt_image = np.array(input_image.copy())
         cv2.circle(prompt_image, (x, y), 5, color, -1)
+        point_coords.append([x, y])
         point_labels.append(point_label_radio)
+        sam_masks = sam_cpu.cond_pred(
+            pts=np.array(point_coords), lbls=np.array(point_labels), embedding=embedding
+        )
+        return [
+            prompt_image,
+            (input_image, sam_masks),
+            point_coords,
+            point_labels,
+            sam_masks,
+        ]
+    prompt_image.select(
+        on_prompt_image_select,
+        [
+            input_image,
+            prompt_image,
+            point_coords,
+            point_labels,
+            point_label_radio,
+            text,
+            pred_masks,
+            embedding,
+        ],
+        [prompt_image, lbl_image, point_coords, point_labels, pred_masks],
+        queue=True,
+    )
+    def on_everything_image_select(
+        input_image, pred_masks, masks, text, evt: gr.SelectData
+    ):
         i = evt.index
         mask = pred_masks[i][0]
         print(mask)
         print(type(mask))
         masks.append((mask, text))
+        anno = (input_image, masks)
+        return [masks, anno]
+    lbl_image.select(
+        on_everything_image_select,
+        [input_image, pred_masks, masks, text],
+        [masks, selected_masks_image],
+        queue=False,
+    )
     def on_selected_masks_image_select(input_image, masks, evt: gr.SelectData):
         i = evt.index
         del masks[i]
+        anno = (input_image, masks)
+        return [masks, anno]
+    selected_masks_image.select(
+        on_selected_masks_image_select,
+        [input_image, masks],
+        [masks, selected_masks_image],
+        queue=False,
+    )
+    # prompt_lbl_image.select(on_everything_image_select,
     #                   [input_image, prompt_masks, masks, text],
     #                   [masks, selected_masks_image], queue=False)
     def on_click_sam_encode_btn(inputs):
         print("encoding")
         # encode image on click
         sam_cpu.dummy_encode(inputs[input_image])
         print("encoding done")
         return [inputs[input_image], embedding]
+    sam_encode_btn.click(
+        on_click_sam_encode_btn, components, [prompt_image, embedding], queue=False
+    )
     def on_click_sam_dencode_btn(inputs):
         print("inferencing")
         image = inputs[input_image]
+        generated_mask, _, _ = sam.cond_pred(
+            pts=np.array(inputs[point_coords]), lbls=np.array(inputs[point_labels])
+        )
         inputs[masks].append((generated_mask, inputs[text]))
         print(inputs[masks][0])
         return {prompt_image: (image, inputs[masks])}
+    sam_decode_btn.click(
+        on_click_sam_dencode_btn,
+        components,
+        [prompt_image, masks, cutout_idx],
+        queue=True,
+    )
     def on_depth_reconstruction_btn_click(inputs):
         print("depth reconstruction")
+        path = dpt.generate_obj_rgb(
+            image=inputs[input_image],
+            cube_size=inputs[cube_size],
+            n_samples=inputs[n_samples],
+            # masks=inputs[masks],
+            min_depth=inputs[min_depth],
+            max_depth=inputs[max_depth],
+        )
         return {pcl_figure: path}
+    depth_reconstruction_btn.click(
+        on_depth_reconstruction_btn_click, components, [pcl_figure], queue=False
+    )
     def on_sam_sgmt_everything_btn_click(inputs):
         print("segmenting everything")
         print(image)
         print(sam_masks)
         return [(image, sam_masks), sam_masks]
+    sam_sgmt_everything_btn.click(
+        on_sam_sgmt_everything_btn_click,
+        components,
+        [lbl_image, pred_masks],
+        queue=True,
+    )
+if __name__ == "__main__":
     block.queue()
+    block.launch()

inference.py CHANGED Viewed

@@ -12,9 +12,6 @@ import plotly.express as px
 import matplotlib.pyplot as plt
 def map_image_range(depth, min_value, max_value):
     """
     Maps the values of a numpy image array to a specified range.
@@ -43,6 +40,7 @@ def map_image_range(depth, min_value, max_value):
     print(np.max(mapped_image))
     return mapped_image
 def PCL(mask, depth):
     assert mask.shape == depth.shape
     assert type(mask) == np.ndarray
@@ -52,46 +50,62 @@ def PCL(mask, depth):
     print(np.unique(rgb_mask))
     depth_o3d = o3d.geometry.Image(depth)
     image_o3d = o3d.geometry.Image(rgb_mask)
-    #print(len(depth_o3d))
-    #print(len(image_o3d))
-    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
     # Step 3: Create a PointCloud from the RGBD image
-    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
     # Step 4: Convert PointCloud data to a NumPy array
-    #print(len(pcd))
     points = np.asarray(pcd.points)
     colors = np.asarray(pcd.colors)
     print(np.unique(colors, axis=0))
     print(np.unique(colors, axis=1))
     print(np.unique(colors))
-    mask = (colors[:, 0] == 1.)
     print(mask.sum())
     print(colors.shape)
     points = points[mask]
     colors = colors[mask]
     return points, colors
 def PCL_rgb(rgb, depth):
-    #assert rgb.shape == depth.shape
     assert type(rgb) == np.ndarray
     assert type(depth) == np.ndarray
     depth_o3d = o3d.geometry.Image(depth)
     image_o3d = o3d.geometry.Image(rgb)
-    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
     # Step 3: Create a PointCloud from the RGBD image
-    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
     # Step 4: Convert PointCloud data to a NumPy array
     points = np.asarray(pcd.points)
     colors = np.asarray(pcd.colors)
     return points, colors
 class DepthPredictor:
     def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
         self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
         self.model.eval()
     def predict(self, image):
         # prepare image for the model
         encoding = self.feature_extractor(image, return_tensors="pt")
@@ -101,16 +115,16 @@ class DepthPredictor:
             predicted_depth = outputs.predicted_depth
             # interpolate to original size
             prediction = torch.nn.functional.interpolate(
-                                predicted_depth.unsqueeze(1),
-                                size=image.size[::-1],
-                                mode="bicubic",
-                                align_corners=False,
-                        ).squeeze()
         output = prediction.cpu().numpy()
-        #output = 1 - (output/np.max(output))
         return output
     def generate_pcl(self, image):
         print(np.array(image).shape)
         depth = self.predict(image)
@@ -118,34 +132,47 @@ class DepthPredictor:
         # Step 2: Create an RGBD image from the RGB and depth image
         depth_o3d = o3d.geometry.Image(depth)
         image_o3d = o3d.geometry.Image(np.array(image))
-        rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
         # Step 3: Create a PointCloud from the RGBD image
-        pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
         # Step 4: Convert PointCloud data to a NumPy array
         points = np.asarray(pcd.points)
         colors = np.asarray(pcd.colors)
         print(points.shape, colors.shape)
         return points, colors
     def generate_fig(self, image):
         points, colors = self.generate_pcl(image)
-        data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2],
-            'red': colors[:, 0], 'green': colors[:, 1], 'blue': colors[:, 2]}
         df = pd.DataFrame(data)
         size = np.zeros(len(df))
         size[:] = 0.01
         # Step 6: Create a 3D scatter plot using Plotly Express
-        fig = px.scatter_3d(df, x='x', y='y', z='z', color='red', size=size)
         return fig
     def generate_fig2(self, image):
         points, colors = self.generate_pcl(image)
         # Step 6: Create a 3D scatter plot using Plotly Express
         fig = plt.figure()
-        ax = fig.add_subplot(111, projection='3d')
-        ax.scatter(points,size=0.01, c=colors, marker='o')
         return fig
     def generate_obj_rgb(self, image, n_samples, cube_size, max_depth, min_depth):
         # Step 1: Create a point cloud
         depth = self.predict(image)
@@ -159,7 +186,9 @@ class DepthPredictor:
         mesh = o3d.geometry.TriangleMesh()
         # Create cubes and add them to the mesh
         for point, color in zip(point_cloud, color_array):
-            cube = o3d.geometry.TriangleMesh.create_box(width=cube_size, height=cube_size, depth=cube_size)
             cube.translate(-point)
             cube.paint_uniform_color(color)
             mesh += cube
@@ -174,14 +203,19 @@ class DepthPredictor:
         print(point_cloud.shape)
         mesh = o3d.geometry.TriangleMesh()
         # Create cubes and add them to the mesh
-        cs = [(255,0,0),(0,255,0),(0,0,255)]
-        for c,(mask, _) in zip(cs, masks):
             mask = mask.ravel()
-            point_cloud_subset, color_array_subset = point_cloud[mask], color_array[mask]
             idxs = np.random.choice(len(point_cloud_subset), int(n_samples))
             point_cloud_subset = point_cloud_subset[idxs]
             for point in point_cloud_subset:
-                cube = o3d.geometry.TriangleMesh.create_box(width=cube_size, height=cube_size, depth=cube_size)
                 cube.translate(-point)
                 cube.paint_uniform_color(c)
                 mesh += cube
@@ -189,22 +223,26 @@ class DepthPredictor:
         output_file = "./cloud.obj"
         o3d.io.write_triangle_mesh(output_file, mesh)
         return output_file
-    def generate_obj_masks2(self, image, masks, cube_size, n_samples, min_depth, max_depth):
         # Generate a point cloud
         depth = self.predict(image)
-        #depth = map_image_range(depth, min_depth, max_depth)
         image = np.array(image)
         mesh = o3d.geometry.TriangleMesh()
         # Create cubes and add them to the mesh
         print(len(masks))
-        cs = [(255,0,0),(0,255,0),(0,0,255)]
-        for c,(mask, _) in zip(cs, masks):
             points, _ = PCL(mask, depth)
-            #idxs = np.random.choice(len(points), int(n_samples))
-            #points = points[idxs]
             for point in points:
-                cube = o3d.geometry.TriangleMesh.create_box(width=cube_size, height=cube_size, depth=cube_size)
                 cube.translate(-point)
                 cube.paint_uniform_color(c)
                 mesh += cube
@@ -212,12 +250,12 @@ class DepthPredictor:
         output_file = "./cloud.obj"
         o3d.io.write_triangle_mesh(output_file, mesh)
         return output_file
 import numpy as np
 from typing import Optional, Tuple
 class CustomSamPredictor(SamPredictor):
     def __init__(
         self,
@@ -249,7 +287,9 @@ class CustomSamPredictor(SamPredictor):
         # Transform the image to the form expected by the model
         input_image = self.transform.apply_image(image)
         input_image_torch = torch.as_tensor(input_image, device=self.device)
-        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
         self.set_torch_image(input_image_torch, image.shape[:2])
         return self.get_image_embedding()
@@ -313,7 +353,7 @@ class CustomSamPredictor(SamPredictor):
         self.input_size = tuple(transformed_image.shape[-2:])
         input_image = self.model.preprocess(transformed_image)
         # The following line is commented out to avoid encoding on cpu
-        #self.features = self.model.image_encoder(input_image)
         self.is_image_set = True
     def dummy_set_image(
@@ -340,10 +380,13 @@ class CustomSamPredictor(SamPredictor):
         # Transform the image to the form expected by the model
         input_image = self.transform.apply_image(image)
         input_image_torch = torch.as_tensor(input_image, device=self.device)
-        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
         self.dummy_set_torch_image(input_image_torch, image.shape[:2])
 class SegmentPredictor:
     def __init__(self, device=None):
         MODEL_TYPE = "vit_h"
@@ -351,13 +394,13 @@ class SegmentPredictor:
         sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
         # Select device
         if device is None:
-            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         else:
             self.device = device
         sam.to(device=self.device)
         self.mask_generator = SamAutomaticMaskGenerator(sam)
         self.conditioned_pred = CustomSamPredictor(sam)
     def encode(self, image):
         image = np.array(image)
         return self.conditioned_pred.encode_image(image)
@@ -365,33 +408,23 @@ class SegmentPredictor:
     def dummy_encode(self, image):
         image = np.array(image)
         self.conditioned_pred.dummy_set_image(image)
     def cond_pred(self, embedding, pts, lbls):
         lbls = np.array(lbls)
         pts = np.array(pts)
         masks, _, _ = self.conditioned_pred.decode_and_predict(
-            embedding,
-            point_coords=pts,
-            point_labels=lbls,
-            multimask_output=True
-            )
-        idxs = np.argsort(-masks.sum(axis=(1,2)))
         sam_masks = []
-        for n,i in enumerate(idxs):
             sam_masks.append((masks[i], str(n)))
         return sam_masks
     def segment_everything(self, image):
         image = np.array(image)
         sam_result = self.mask_generator.generate(image)
         sam_masks = []
-        for i,mask in enumerate(sam_result):
             sam_masks.append((mask["segmentation"], str(i)))
         return sam_masks

 import matplotlib.pyplot as plt
 def map_image_range(depth, min_value, max_value):
     """
     Maps the values of a numpy image array to a specified range.
     print(np.max(mapped_image))
     return mapped_image
 def PCL(mask, depth):
     assert mask.shape == depth.shape
     assert type(mask) == np.ndarray
     print(np.unique(rgb_mask))
     depth_o3d = o3d.geometry.Image(depth)
     image_o3d = o3d.geometry.Image(rgb_mask)
+    # print(len(depth_o3d))
+    # print(len(image_o3d))
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
     # Step 3: Create a PointCloud from the RGBD image
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        o3d.camera.PinholeCameraIntrinsic(
+            o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+        ),
+    )
     # Step 4: Convert PointCloud data to a NumPy array
+    # print(len(pcd))
     points = np.asarray(pcd.points)
     colors = np.asarray(pcd.colors)
     print(np.unique(colors, axis=0))
     print(np.unique(colors, axis=1))
     print(np.unique(colors))
+    mask = colors[:, 0] == 1.0
     print(mask.sum())
     print(colors.shape)
     points = points[mask]
     colors = colors[mask]
     return points, colors
 def PCL_rgb(rgb, depth):
+    # assert rgb.shape == depth.shape
     assert type(rgb) == np.ndarray
     assert type(depth) == np.ndarray
     depth_o3d = o3d.geometry.Image(depth)
     image_o3d = o3d.geometry.Image(rgb)
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
     # Step 3: Create a PointCloud from the RGBD image
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        o3d.camera.PinholeCameraIntrinsic(
+            o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+        ),
+    )
     # Step 4: Convert PointCloud data to a NumPy array
     points = np.asarray(pcd.points)
     colors = np.asarray(pcd.colors)
     return points, colors
 class DepthPredictor:
     def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
         self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
         self.model.eval()
     def predict(self, image):
         # prepare image for the model
         encoding = self.feature_extractor(image, return_tensors="pt")
             predicted_depth = outputs.predicted_depth
             # interpolate to original size
             prediction = torch.nn.functional.interpolate(
+                predicted_depth.unsqueeze(1),
+                size=image.size[::-1],
+                mode="bicubic",
+                align_corners=False,
+            ).squeeze()
         output = prediction.cpu().numpy()
+        # output = 1 - (output/np.max(output))
         return output
     def generate_pcl(self, image):
         print(np.array(image).shape)
         depth = self.predict(image)
         # Step 2: Create an RGBD image from the RGB and depth image
         depth_o3d = o3d.geometry.Image(depth)
         image_o3d = o3d.geometry.Image(np.array(image))
+        rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+            image_o3d, depth_o3d, convert_rgb_to_intensity=False
+        )
         # Step 3: Create a PointCloud from the RGBD image
+        pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+            rgbd_image,
+            o3d.camera.PinholeCameraIntrinsic(
+                o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+            ),
+        )
         # Step 4: Convert PointCloud data to a NumPy array
         points = np.asarray(pcd.points)
         colors = np.asarray(pcd.colors)
         print(points.shape, colors.shape)
         return points, colors
     def generate_fig(self, image):
         points, colors = self.generate_pcl(image)
+        data = {
+            "x": points[:, 0],
+            "y": points[:, 1],
+            "z": points[:, 2],
+            "red": colors[:, 0],
+            "green": colors[:, 1],
+            "blue": colors[:, 2],
+        }
         df = pd.DataFrame(data)
         size = np.zeros(len(df))
         size[:] = 0.01
         # Step 6: Create a 3D scatter plot using Plotly Express
+        fig = px.scatter_3d(df, x="x", y="y", z="z", color="red", size=size)
         return fig
     def generate_fig2(self, image):
         points, colors = self.generate_pcl(image)
         # Step 6: Create a 3D scatter plot using Plotly Express
         fig = plt.figure()
+        ax = fig.add_subplot(111, projection="3d")
+        ax.scatter(points, size=0.01, c=colors, marker="o")
         return fig
     def generate_obj_rgb(self, image, n_samples, cube_size, max_depth, min_depth):
         # Step 1: Create a point cloud
         depth = self.predict(image)
         mesh = o3d.geometry.TriangleMesh()
         # Create cubes and add them to the mesh
         for point, color in zip(point_cloud, color_array):
+            cube = o3d.geometry.TriangleMesh.create_box(
+                width=cube_size, height=cube_size, depth=cube_size
+            )
             cube.translate(-point)
             cube.paint_uniform_color(color)
             mesh += cube
         print(point_cloud.shape)
         mesh = o3d.geometry.TriangleMesh()
         # Create cubes and add them to the mesh
+        cs = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
+        for c, (mask, _) in zip(cs, masks):
             mask = mask.ravel()
+            point_cloud_subset, color_array_subset = (
+                point_cloud[mask],
+                color_array[mask],
+            )
             idxs = np.random.choice(len(point_cloud_subset), int(n_samples))
             point_cloud_subset = point_cloud_subset[idxs]
             for point in point_cloud_subset:
+                cube = o3d.geometry.TriangleMesh.create_box(
+                    width=cube_size, height=cube_size, depth=cube_size
+                )
                 cube.translate(-point)
                 cube.paint_uniform_color(c)
                 mesh += cube
         output_file = "./cloud.obj"
         o3d.io.write_triangle_mesh(output_file, mesh)
         return output_file
+    def generate_obj_masks2(
+        self, image, masks, cube_size, n_samples, min_depth, max_depth
+    ):
         # Generate a point cloud
         depth = self.predict(image)
+        # depth = map_image_range(depth, min_depth, max_depth)
         image = np.array(image)
         mesh = o3d.geometry.TriangleMesh()
         # Create cubes and add them to the mesh
         print(len(masks))
+        cs = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
+        for c, (mask, _) in zip(cs, masks):
             points, _ = PCL(mask, depth)
+            # idxs = np.random.choice(len(points), int(n_samples))
+            # points = points[idxs]
             for point in points:
+                cube = o3d.geometry.TriangleMesh.create_box(
+                    width=cube_size, height=cube_size, depth=cube_size
+                )
                 cube.translate(-point)
                 cube.paint_uniform_color(c)
                 mesh += cube
         output_file = "./cloud.obj"
         o3d.io.write_triangle_mesh(output_file, mesh)
         return output_file
 import numpy as np
 from typing import Optional, Tuple
 class CustomSamPredictor(SamPredictor):
     def __init__(
         self,
         # Transform the image to the form expected by the model
         input_image = self.transform.apply_image(image)
         input_image_torch = torch.as_tensor(input_image, device=self.device)
+        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
+            None, :, :, :
+        ]
         self.set_torch_image(input_image_torch, image.shape[:2])
         return self.get_image_embedding()
         self.input_size = tuple(transformed_image.shape[-2:])
         input_image = self.model.preprocess(transformed_image)
         # The following line is commented out to avoid encoding on cpu
+        # self.features = self.model.image_encoder(input_image)
         self.is_image_set = True
     def dummy_set_image(
         # Transform the image to the form expected by the model
         input_image = self.transform.apply_image(image)
         input_image_torch = torch.as_tensor(input_image, device=self.device)
+        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
+            None, :, :, :
+        ]
         self.dummy_set_torch_image(input_image_torch, image.shape[:2])
 class SegmentPredictor:
     def __init__(self, device=None):
         MODEL_TYPE = "vit_h"
         sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
         # Select device
         if device is None:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
         else:
             self.device = device
         sam.to(device=self.device)
         self.mask_generator = SamAutomaticMaskGenerator(sam)
         self.conditioned_pred = CustomSamPredictor(sam)
     def encode(self, image):
         image = np.array(image)
         return self.conditioned_pred.encode_image(image)
     def dummy_encode(self, image):
         image = np.array(image)
         self.conditioned_pred.dummy_set_image(image)
     def cond_pred(self, embedding, pts, lbls):
         lbls = np.array(lbls)
         pts = np.array(pts)
         masks, _, _ = self.conditioned_pred.decode_and_predict(
+            embedding, point_coords=pts, point_labels=lbls, multimask_output=True
+        )
+        idxs = np.argsort(-masks.sum(axis=(1, 2)))
         sam_masks = []
+        for n, i in enumerate(idxs):
             sam_masks.append((masks[i], str(n)))
         return sam_masks
     def segment_everything(self, image):
         image = np.array(image)
         sam_result = self.mask_generator.generate(image)
         sam_masks = []
+        for i, mask in enumerate(sam_result):
             sam_masks.append((mask["segmentation"], str(i)))
         return sam_masks

utils.py CHANGED Viewed

@@ -8,54 +8,53 @@ from inference import DepthPredictor
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
-def create_3d_obj(rgb_image, depth_image, depth=10, path='./image.gltf'):
     depth_o3d = o3d.geometry.Image(depth_image)
     image_o3d = o3d.geometry.Image(rgb_image)
     rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
-        image_o3d, depth_o3d, convert_rgb_to_intensity=False)
     w = int(depth_image.shape[1])
     h = int(depth_image.shape[0])
     camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
-    camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)
-    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
-        rgbd_image, camera_intrinsic)
-    print('normals')
     pcd.normals = o3d.utility.Vector3dVector(
-        np.zeros((1, 3)))  # invalidate existing normals
     pcd.estimate_normals(
-        search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
     pcd.orient_normals_towards_camera_location(
-        camera_location=np.array([0., 0., 1000.]))
-    pcd.transform([[1, 0, 0, 0],
-                   [0, -1, 0, 0],
-                   [0, 0, -1, 0],
-                   [0, 0, 0, 1]])
-    pcd.transform([[-1, 0, 0, 0],
-                   [0, 1, 0, 0],
-                   [0, 0, 1, 0],
-                   [0, 0, 0, 1]])
-    print('run Poisson surface reconstruction')
     with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
         mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
-            pcd, depth=depth, width=0, scale=1.1, linear_fit=True)
     voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
-    print(f'voxel_size = {voxel_size:e}')
     mesh = mesh_raw.simplify_vertex_clustering(
         voxel_size=voxel_size,
-        contraction=o3d.geometry.SimplificationContraction.Average)
     # vertices_to_remove = densities < np.quantile(densities, 0.001)
     # mesh.remove_vertices_by_mask(vertices_to_remove)
     bbox = pcd.get_axis_aligned_bounding_box()
     mesh_crop = mesh.crop(bbox)
     gltf_path = path
-    o3d.io.write_triangle_mesh(
-        gltf_path, mesh_crop, write_triangle_uvs=True)
     return gltf_path
@@ -64,7 +63,8 @@ def create_3d_pc(rgb_image, depth_image, depth=10):
     depth_o3d = o3d.geometry.Image(depth_image)
     image_o3d = o3d.geometry.Image(rgb_image)
     rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
-        image_o3d, depth_o3d, convert_rgb_to_intensity=False)
     w = int(depth_image.shape[1])
     h = int(depth_image.shape[0])
@@ -77,20 +77,21 @@ def create_3d_pc(rgb_image, depth_image, depth=10):
     camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, fx, fy, cx, cy)
-    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
-        rgbd_image, camera_intrinsic)
-    print('Estimating normals...')
     pcd.estimate_normals(
-        search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
     pcd.orient_normals_towards_camera_location(
-        camera_location=np.array([0., 0., 1000.]))
     # Save the point cloud as a PLY file
     filename = "pc.pcd"
     o3d.io.write_point_cloud(filename, pcd)
-    return filename # Return the file path where the PLY file is saved
 def point_cloud(rgb_image):
@@ -99,29 +100,42 @@ def point_cloud(rgb_image):
     # Step 2: Create an RGBD image from the RGB and depth image
     depth_o3d = o3d.geometry.Image(depth_image)
     image_o3d = o3d.geometry.Image(rgb_image)
-    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
     # Step 3: Create a PointCloud from the RGBD image
-    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
     # Step 4: Convert PointCloud data to a NumPy array
     points = np.asarray(pcd.points)
     colors = np.asarray(pcd.colors)
     # Step 5: Create a DataFrame from the NumPy arrays
-    data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2],
-            'red': colors[:, 0], 'green': colors[:, 1], 'blue': colors[:, 2]}
     df = pd.DataFrame(data)
     size = np.zeros(len(df))
     size[:] = 0.01
     # Step 6: Create a 3D scatter plot using Plotly Express
-    fig = px.scatter_3d(df, x='x', y='y', z='z', color='red', size=size)
     return fig
 def array_PCL(rgb_image, depth_image):
-    FX_RGB = 5.1885790117450188e+02
-    FY_RGB = 5.1946961112127485e+02
-    CX_RGB = 3.2558244941119034e+0
-    CY_RGB = 2.5373616633400465e+02
     FX_DEPTH = FX_RGB
     FY_DEPTH = FY_RGB
     CX_DEPTH = CX_RGB
@@ -142,11 +156,20 @@ def array_PCL(rgb_image, depth_image):
     # compute point cloud
     pcd = np.dstack((xx * z, yy * z, z)).reshape((length, 3))
-    #cam_RGB = np.apply_along_axis(np.linalg.inv(R).dot, 1, pcd) - np.linalg.inv(R).dot(T)
-    xx_rgb = ((rgb_image[:, 0] * FX_RGB) / rgb_image[:, 2] + CX_RGB + width / 2).astype(int).clip(0, width - 1)
-    yy_rgb = ((rgb_image[:, 1] * FY_RGB) / rgb_image[:, 2] + CY_RGB).astype(int).clip(0, height - 1)
-    #colors = rgb_image[yy_rgb, xx_rgb]/255
-    return pcd#, colors
 def generate_PCL(image):
     depth_predictor = DepthPredictor()
@@ -159,7 +182,9 @@ def generate_PCL(image):
 def plot_PCL(rgb_image, depth_image):
     pcd, colors = array_PCL(rgb_image, depth_image)
-    fig = px.scatter_3d(x=pcd[:, 0], y=pcd[:, 1], z=pcd[:, 2], color=colors, size_max=0.1)
     return fig
@@ -170,9 +195,16 @@ def PCL3(image):
     # Step 2: Create an RGBD image from the RGB and depth image
     depth_o3d = o3d.geometry.Image(depth_result)
     image_o3d = o3d.geometry.Image(image)
-    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
     # Step 3: Create a PointCloud from the RGBD image
-    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
     # Step 4: Convert PointCloud data to a NumPy array
     vis = o3d.visualization.Visualizer()
     vis.add_geometry(pcd)
@@ -183,16 +215,17 @@ def PCL3(image):
     sizes[:] = 0.01
     colors = [tuple(c) for c in colors]
     fig = plt.figure()
-    #ax = fig.add_subplot(111, projection='3d')
     ax = Axes3D(fig)
     print("plotting...")
     ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, s=0.01)
     print("Plot Succesful")
-    #data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'sizes': sizes[:, 0]}
-    #df = pd.DataFrame(data)
     # Step 6: Create a 3D scatter plot using Plotly Express
-    #fig = px.scatter_3d(df, x='x', y='y', z='z', color=colors, size="sizes")
     return fig
 import numpy as np

 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
+def create_3d_obj(rgb_image, depth_image, depth=10, path="./image.gltf"):
     depth_o3d = o3d.geometry.Image(depth_image)
     image_o3d = o3d.geometry.Image(rgb_image)
     rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
     w = int(depth_image.shape[1])
     h = int(depth_image.shape[0])
     camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
+    camera_intrinsic.set_intrinsics(w, h, 500, 500, w / 2, h / 2)
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
+    print("normals")
     pcd.normals = o3d.utility.Vector3dVector(
+        np.zeros((1, 3))
+    )  # invalidate existing normals
     pcd.estimate_normals(
+        search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
+    )
     pcd.orient_normals_towards_camera_location(
+        camera_location=np.array([0.0, 0.0, 1000.0])
+    )
+    pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+    pcd.transform([[-1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
+    print("run Poisson surface reconstruction")
     with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
         mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+            pcd, depth=depth, width=0, scale=1.1, linear_fit=True
+        )
     voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
+    print(f"voxel_size = {voxel_size:e}")
     mesh = mesh_raw.simplify_vertex_clustering(
         voxel_size=voxel_size,
+        contraction=o3d.geometry.SimplificationContraction.Average,
+    )
     # vertices_to_remove = densities < np.quantile(densities, 0.001)
     # mesh.remove_vertices_by_mask(vertices_to_remove)
     bbox = pcd.get_axis_aligned_bounding_box()
     mesh_crop = mesh.crop(bbox)
     gltf_path = path
+    o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
     return gltf_path
     depth_o3d = o3d.geometry.Image(depth_image)
     image_o3d = o3d.geometry.Image(rgb_image)
     rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
     w = int(depth_image.shape[1])
     h = int(depth_image.shape[0])
     camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, fx, fy, cx, cy)
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
+    print("Estimating normals...")
     pcd.estimate_normals(
+        search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
+    )
     pcd.orient_normals_towards_camera_location(
+        camera_location=np.array([0.0, 0.0, 1000.0])
+    )
     # Save the point cloud as a PLY file
     filename = "pc.pcd"
     o3d.io.write_point_cloud(filename, pcd)
+    return filename  # Return the file path where the PLY file is saved
 def point_cloud(rgb_image):
     # Step 2: Create an RGBD image from the RGB and depth image
     depth_o3d = o3d.geometry.Image(depth_image)
     image_o3d = o3d.geometry.Image(rgb_image)
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
     # Step 3: Create a PointCloud from the RGBD image
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        o3d.camera.PinholeCameraIntrinsic(
+            o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+        ),
+    )
     # Step 4: Convert PointCloud data to a NumPy array
     points = np.asarray(pcd.points)
     colors = np.asarray(pcd.colors)
     # Step 5: Create a DataFrame from the NumPy arrays
+    data = {
+        "x": points[:, 0],
+        "y": points[:, 1],
+        "z": points[:, 2],
+        "red": colors[:, 0],
+        "green": colors[:, 1],
+        "blue": colors[:, 2],
+    }
     df = pd.DataFrame(data)
     size = np.zeros(len(df))
     size[:] = 0.01
     # Step 6: Create a 3D scatter plot using Plotly Express
+    fig = px.scatter_3d(df, x="x", y="y", z="z", color="red", size=size)
     return fig
 def array_PCL(rgb_image, depth_image):
+    FX_RGB = 5.1885790117450188e02
+    FY_RGB = 5.1946961112127485e02
+    CX_RGB = 3.2558244941119034e0
+    CY_RGB = 2.5373616633400465e02
     FX_DEPTH = FX_RGB
     FY_DEPTH = FY_RGB
     CX_DEPTH = CX_RGB
     # compute point cloud
     pcd = np.dstack((xx * z, yy * z, z)).reshape((length, 3))
+    # cam_RGB = np.apply_along_axis(np.linalg.inv(R).dot, 1, pcd) - np.linalg.inv(R).dot(T)
+    xx_rgb = (
+        ((rgb_image[:, 0] * FX_RGB) / rgb_image[:, 2] + CX_RGB + width / 2)
+        .astype(int)
+        .clip(0, width - 1)
+    )
+    yy_rgb = (
+        ((rgb_image[:, 1] * FY_RGB) / rgb_image[:, 2] + CY_RGB)
+        .astype(int)
+        .clip(0, height - 1)
+    )
+    # colors = rgb_image[yy_rgb, xx_rgb]/255
+    return pcd  # , colors
 def generate_PCL(image):
     depth_predictor = DepthPredictor()
 def plot_PCL(rgb_image, depth_image):
     pcd, colors = array_PCL(rgb_image, depth_image)
+    fig = px.scatter_3d(
+        x=pcd[:, 0], y=pcd[:, 1], z=pcd[:, 2], color=colors, size_max=0.1
+    )
     return fig
     # Step 2: Create an RGBD image from the RGB and depth image
     depth_o3d = o3d.geometry.Image(depth_result)
     image_o3d = o3d.geometry.Image(image)
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
     # Step 3: Create a PointCloud from the RGBD image
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        o3d.camera.PinholeCameraIntrinsic(
+            o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+        ),
+    )
     # Step 4: Convert PointCloud data to a NumPy array
     vis = o3d.visualization.Visualizer()
     vis.add_geometry(pcd)
     sizes[:] = 0.01
     colors = [tuple(c) for c in colors]
     fig = plt.figure()
+    # ax = fig.add_subplot(111, projection='3d')
     ax = Axes3D(fig)
     print("plotting...")
     ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, s=0.01)
     print("Plot Succesful")
+    # data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'sizes': sizes[:, 0]}
+    # df = pd.DataFrame(data)
     # Step 6: Create a 3D scatter plot using Plotly Express
+    # fig = px.scatter_3d(df, x='x', y='y', z='z', color=colors, size="sizes")
     return fig
 import numpy as np