jens commited on
Commit
9f0c4b3
·
1 Parent(s): 0c863e7

autolinting with black

Browse files
Files changed (3) hide show
  1. app.py +186 -79
  2. inference.py +99 -66
  3. utils.py +88 -55
app.py CHANGED
@@ -1,19 +1,18 @@
1
  import os
2
  import gradio as gr
3
  import numpy as np
4
- import cv2
5
  from PIL import Image, ImageOps
6
  import torch
7
  from inference import SegmentPredictor, DepthPredictor
8
  from utils import generate_PCL, PCL3, point_cloud
9
 
10
 
11
-
12
  sam = SegmentPredictor()
13
- sam_cpu = SegmentPredictor(device='cpu')
14
  dpt = DepthPredictor()
15
- red = (255,0,0)
16
- blue = (0,0,255)
17
  annos = []
18
 
19
 
@@ -22,8 +21,10 @@ with block:
22
  # States
23
  def point_coords_empty():
24
  return []
 
25
  def point_labels_empty():
26
  return []
 
27
  image_edit_trigger = gr.State(True)
28
  point_coords = gr.State(point_coords_empty)
29
  point_labels = gr.State(point_labels_empty)
@@ -36,61 +37,123 @@ with block:
36
  # UI
37
  with gr.Column():
38
  gr.Markdown(
39
- '''# Segment Anything Model (SAM)
40
  ## a new AI model from Meta AI that can "cut out" any object, in any image, with a single click 🚀
41
  SAM is a promptable segmentation system with zero-shot generalization to unfamiliar objects and images, without the need for additional training. [**Official Project**](https://segment-anything.com/) [**Code**](https://github.com/facebookresearch/segment-anything).
42
- '''
43
  )
44
  with gr.Row():
45
  with gr.Column():
46
  with gr.Tab("Upload Image"):
47
- upload_image = gr.Image(label='Input', type='pil', tool=None) # mirror_webcam = False
 
48
  with gr.Tab("Webcam"):
49
- input_image = gr.Image(label='Input', type='pil', tool=None, source="webcam") # mirror_webcam = False
 
 
 
50
  with gr.Row():
51
- sam_encode_btn = gr.Button('Encode', variant='primary')
52
- sam_sgmt_everything_btn = gr.Button('Segment Everything!', variant = 'primary')
53
- #sam_encode_status = gr.Label('Not encoded yet')
 
 
54
  with gr.Row():
55
- prompt_image = gr.Image(label='Segments')
56
- #prompt_lbl_image = gr.AnnotatedImage(label='Segment Labels')
57
- lbl_image = gr.AnnotatedImage(label='Everything')
58
  with gr.Row():
59
- point_label_radio = gr.Radio(label='Point Label', choices=[1,0], value=1)
60
- text = gr.Textbox(label='Mask Name')
61
- reset_btn = gr.Button('New Mask')
62
- selected_masks_image = gr.AnnotatedImage(label='Selected Masks')
63
  with gr.Row():
64
  with gr.Column():
65
- pcl_figure = gr.Model3D(label="3-D Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0])
 
 
66
  with gr.Row():
67
- max_depth = gr.Slider(minimum=0, maximum=10, step=0.01, default=1, label='Max Depth')
68
- min_depth = gr.Slider(minimum=0, maximum=10, step=0.01, default=0.1, label='Min Depth')
69
- n_samples = gr.Slider(minimum=1e3, maximum=1e6, step=1e3, default=1e3, label='Number of Samples')
70
- cube_size = gr.Slider(minimum=0.00001, maximum=0.001, step=0.000001, default=0.00001, label='Cube size')
71
- depth_reconstruction_btn = gr.Button('Depth Reconstruction', variant = 'primary')
72
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
 
74
 
75
- sam_decode_btn = gr.Button('Predict using points!', variant = 'primary')
76
-
77
  # components
78
- components = {point_coords, point_labels, image_edit_trigger, masks, cutout_idx, input_image, embedding,
79
- point_label_radio, text, reset_btn, sam_sgmt_everything_btn,
80
- sam_decode_btn, depth_reconstruction_btn, prompt_image, lbl_image, n_samples, max_depth, min_depth, cube_size, selected_masks_image}
81
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def on_upload_image(input_image, upload_image):
83
- ## Mirror because gradio.image webcam has mirror = True
84
- upload_image_mirror = ImageOps.mirror(upload_image)
85
  return [upload_image_mirror, upload_image]
86
- upload_image.upload(on_upload_image, [input_image, upload_image], [input_image, upload_image])
 
 
 
87
 
88
  # event - init coords
89
  def on_reset_btn_click(input_image):
90
  return input_image, point_coords_empty(), point_labels_empty(), None, []
91
- reset_btn.click(on_reset_btn_click, [input_image], [input_image, point_coords, point_labels], queue=False)
92
 
93
- def on_prompt_image_select(input_image, prompt_image, point_coords, point_labels, point_label_radio, text, pred_masks, embedding, evt: gr.SelectData):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  sam_cpu.dummy_encode(input_image)
95
  x, y = evt.index
96
  color = red if point_label_radio == 0 else blue
@@ -98,47 +161,69 @@ with block:
98
  prompt_image = np.array(input_image.copy())
99
 
100
  cv2.circle(prompt_image, (x, y), 5, color, -1)
101
- point_coords.append([x,y])
102
  point_labels.append(point_label_radio)
103
- sam_masks = sam_cpu.cond_pred(pts=np.array(point_coords), lbls=np.array(point_labels), embedding=embedding)
104
- return [ prompt_image,
105
- (input_image, sam_masks),
106
- point_coords,
107
- point_labels,
108
- sam_masks ]
109
-
110
- prompt_image.select(on_prompt_image_select,
111
- [input_image, prompt_image, point_coords, point_labels, point_label_radio, text, pred_masks, embedding],
112
- [prompt_image, lbl_image, point_coords, point_labels, pred_masks], queue=True)
113
-
114
-
115
- def on_everything_image_select(input_image, pred_masks, masks, text, evt: gr.SelectData):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  i = evt.index
117
  mask = pred_masks[i][0]
118
  print(mask)
119
  print(type(mask))
120
  masks.append((mask, text))
121
- anno = (input_image, masks)
122
- return [masks, anno]
123
-
124
- lbl_image.select(on_everything_image_select,
125
- [input_image, pred_masks, masks, text],
126
- [masks, selected_masks_image], queue=False)
127
-
 
 
 
128
  def on_selected_masks_image_select(input_image, masks, evt: gr.SelectData):
129
  i = evt.index
130
  del masks[i]
131
- anno = (input_image, masks)
132
- return [masks, anno]
133
-
134
- selected_masks_image.select(on_selected_masks_image_select,
135
- [input_image, masks],
136
- [masks, selected_masks_image], queue=False)
137
- #prompt_lbl_image.select(on_everything_image_select,
 
 
 
138
  # [input_image, prompt_masks, masks, text],
139
  # [masks, selected_masks_image], queue=False)
140
 
141
-
142
  def on_click_sam_encode_btn(inputs):
143
  print("encoding")
144
  # encode image on click
@@ -146,27 +231,43 @@ with block:
146
  sam_cpu.dummy_encode(inputs[input_image])
147
  print("encoding done")
148
  return [inputs[input_image], embedding]
149
- sam_encode_btn.click(on_click_sam_encode_btn, components, [prompt_image, embedding], queue=False)
 
 
 
150
 
151
  def on_click_sam_dencode_btn(inputs):
152
  print("inferencing")
153
  image = inputs[input_image]
154
- generated_mask, _, _ = sam.cond_pred(pts=np.array(inputs[point_coords]), lbls=np.array(inputs[point_labels]))
 
 
155
  inputs[masks].append((generated_mask, inputs[text]))
156
  print(inputs[masks][0])
157
  return {prompt_image: (image, inputs[masks])}
158
- sam_decode_btn.click(on_click_sam_dencode_btn, components, [prompt_image, masks, cutout_idx], queue=True)
 
 
 
 
 
 
159
 
160
  def on_depth_reconstruction_btn_click(inputs):
161
  print("depth reconstruction")
162
- path = dpt.generate_obj_rgb(image=inputs[input_image],
163
- cube_size=inputs[cube_size],
164
- n_samples=inputs[n_samples],
165
- #masks=inputs[masks],
166
- min_depth=inputs[min_depth],
167
- max_depth=inputs[max_depth]) #
 
 
168
  return {pcl_figure: path}
169
- depth_reconstruction_btn.click(on_depth_reconstruction_btn_click, components, [pcl_figure], queue=False)
 
 
 
170
 
171
  def on_sam_sgmt_everything_btn_click(inputs):
172
  print("segmenting everything")
@@ -175,9 +276,15 @@ with block:
175
  print(image)
176
  print(sam_masks)
177
  return [(image, sam_masks), sam_masks]
178
- sam_sgmt_everything_btn.click(on_sam_sgmt_everything_btn_click, components, [lbl_image, pred_masks], queue=True)
 
 
 
 
 
 
179
 
180
 
181
- if __name__ == '__main__':
182
  block.queue()
183
- block.launch()
 
1
  import os
2
  import gradio as gr
3
  import numpy as np
4
+ import cv2
5
  from PIL import Image, ImageOps
6
  import torch
7
  from inference import SegmentPredictor, DepthPredictor
8
  from utils import generate_PCL, PCL3, point_cloud
9
 
10
 
 
11
  sam = SegmentPredictor()
12
+ sam_cpu = SegmentPredictor(device="cpu")
13
  dpt = DepthPredictor()
14
+ red = (255, 0, 0)
15
+ blue = (0, 0, 255)
16
  annos = []
17
 
18
 
 
21
  # States
22
  def point_coords_empty():
23
  return []
24
+
25
  def point_labels_empty():
26
  return []
27
+
28
  image_edit_trigger = gr.State(True)
29
  point_coords = gr.State(point_coords_empty)
30
  point_labels = gr.State(point_labels_empty)
 
37
  # UI
38
  with gr.Column():
39
  gr.Markdown(
40
+ """# Segment Anything Model (SAM)
41
  ## a new AI model from Meta AI that can "cut out" any object, in any image, with a single click 🚀
42
  SAM is a promptable segmentation system with zero-shot generalization to unfamiliar objects and images, without the need for additional training. [**Official Project**](https://segment-anything.com/) [**Code**](https://github.com/facebookresearch/segment-anything).
43
+ """
44
  )
45
  with gr.Row():
46
  with gr.Column():
47
  with gr.Tab("Upload Image"):
48
+ # mirror_webcam = False
49
+ upload_image = gr.Image(label="Input", type="pil", tool=None)
50
  with gr.Tab("Webcam"):
51
+ # mirror_webcam = False
52
+ input_image = gr.Image(
53
+ label="Input", type="pil", tool=None, source="webcam"
54
+ )
55
  with gr.Row():
56
+ sam_encode_btn = gr.Button("Encode", variant="primary")
57
+ sam_sgmt_everything_btn = gr.Button(
58
+ "Segment Everything!", variant="primary"
59
+ )
60
+ # sam_encode_status = gr.Label('Not encoded yet')
61
  with gr.Row():
62
+ prompt_image = gr.Image(label="Segments")
63
+ # prompt_lbl_image = gr.AnnotatedImage(label='Segment Labels')
64
+ lbl_image = gr.AnnotatedImage(label="Everything")
65
  with gr.Row():
66
+ point_label_radio = gr.Radio(label="Point Label", choices=[1, 0], value=1)
67
+ text = gr.Textbox(label="Mask Name")
68
+ reset_btn = gr.Button("New Mask")
69
+ selected_masks_image = gr.AnnotatedImage(label="Selected Masks")
70
  with gr.Row():
71
  with gr.Column():
72
+ pcl_figure = gr.Model3D(
73
+ label="3-D Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]
74
+ )
75
  with gr.Row():
76
+ max_depth = gr.Slider(
77
+ minimum=0, maximum=10, step=0.01, default=1, label="Max Depth"
78
+ )
79
+ min_depth = gr.Slider(
80
+ minimum=0, maximum=10, step=0.01, default=0.1, label="Min Depth"
81
+ )
82
+ n_samples = gr.Slider(
83
+ minimum=1e3,
84
+ maximum=1e6,
85
+ step=1e3,
86
+ default=1e3,
87
+ label="Number of Samples",
88
+ )
89
+ cube_size = gr.Slider(
90
+ minimum=0.00001,
91
+ maximum=0.001,
92
+ step=0.000001,
93
+ default=0.00001,
94
+ label="Cube size",
95
+ )
96
+ depth_reconstruction_btn = gr.Button(
97
+ "Depth Reconstruction", variant="primary"
98
+ )
99
 
100
+ sam_decode_btn = gr.Button("Predict using points!", variant="primary")
101
 
 
 
102
  # components
103
+ components = {
104
+ point_coords,
105
+ point_labels,
106
+ image_edit_trigger,
107
+ masks,
108
+ cutout_idx,
109
+ input_image,
110
+ embedding,
111
+ point_label_radio,
112
+ text,
113
+ reset_btn,
114
+ sam_sgmt_everything_btn,
115
+ sam_decode_btn,
116
+ depth_reconstruction_btn,
117
+ prompt_image,
118
+ lbl_image,
119
+ n_samples,
120
+ max_depth,
121
+ min_depth,
122
+ cube_size,
123
+ selected_masks_image,
124
+ }
125
+
126
  def on_upload_image(input_image, upload_image):
127
+ # Mirror because gradio.image webcam has mirror = True
128
+ upload_image_mirror = ImageOps.mirror(upload_image)
129
  return [upload_image_mirror, upload_image]
130
+
131
+ upload_image.upload(
132
+ on_upload_image, [input_image, upload_image], [input_image, upload_image]
133
+ )
134
 
135
  # event - init coords
136
  def on_reset_btn_click(input_image):
137
  return input_image, point_coords_empty(), point_labels_empty(), None, []
 
138
 
139
+ reset_btn.click(
140
+ on_reset_btn_click,
141
+ [input_image],
142
+ [input_image, point_coords, point_labels],
143
+ queue=False,
144
+ )
145
+
146
+ def on_prompt_image_select(
147
+ input_image,
148
+ prompt_image,
149
+ point_coords,
150
+ point_labels,
151
+ point_label_radio,
152
+ text,
153
+ pred_masks,
154
+ embedding,
155
+ evt: gr.SelectData,
156
+ ):
157
  sam_cpu.dummy_encode(input_image)
158
  x, y = evt.index
159
  color = red if point_label_radio == 0 else blue
 
161
  prompt_image = np.array(input_image.copy())
162
 
163
  cv2.circle(prompt_image, (x, y), 5, color, -1)
164
+ point_coords.append([x, y])
165
  point_labels.append(point_label_radio)
166
+ sam_masks = sam_cpu.cond_pred(
167
+ pts=np.array(point_coords), lbls=np.array(point_labels), embedding=embedding
168
+ )
169
+ return [
170
+ prompt_image,
171
+ (input_image, sam_masks),
172
+ point_coords,
173
+ point_labels,
174
+ sam_masks,
175
+ ]
176
+
177
+ prompt_image.select(
178
+ on_prompt_image_select,
179
+ [
180
+ input_image,
181
+ prompt_image,
182
+ point_coords,
183
+ point_labels,
184
+ point_label_radio,
185
+ text,
186
+ pred_masks,
187
+ embedding,
188
+ ],
189
+ [prompt_image, lbl_image, point_coords, point_labels, pred_masks],
190
+ queue=True,
191
+ )
192
+
193
+ def on_everything_image_select(
194
+ input_image, pred_masks, masks, text, evt: gr.SelectData
195
+ ):
196
  i = evt.index
197
  mask = pred_masks[i][0]
198
  print(mask)
199
  print(type(mask))
200
  masks.append((mask, text))
201
+ anno = (input_image, masks)
202
+ return [masks, anno]
203
+
204
+ lbl_image.select(
205
+ on_everything_image_select,
206
+ [input_image, pred_masks, masks, text],
207
+ [masks, selected_masks_image],
208
+ queue=False,
209
+ )
210
+
211
  def on_selected_masks_image_select(input_image, masks, evt: gr.SelectData):
212
  i = evt.index
213
  del masks[i]
214
+ anno = (input_image, masks)
215
+ return [masks, anno]
216
+
217
+ selected_masks_image.select(
218
+ on_selected_masks_image_select,
219
+ [input_image, masks],
220
+ [masks, selected_masks_image],
221
+ queue=False,
222
+ )
223
+ # prompt_lbl_image.select(on_everything_image_select,
224
  # [input_image, prompt_masks, masks, text],
225
  # [masks, selected_masks_image], queue=False)
226
 
 
227
  def on_click_sam_encode_btn(inputs):
228
  print("encoding")
229
  # encode image on click
 
231
  sam_cpu.dummy_encode(inputs[input_image])
232
  print("encoding done")
233
  return [inputs[input_image], embedding]
234
+
235
+ sam_encode_btn.click(
236
+ on_click_sam_encode_btn, components, [prompt_image, embedding], queue=False
237
+ )
238
 
239
  def on_click_sam_dencode_btn(inputs):
240
  print("inferencing")
241
  image = inputs[input_image]
242
+ generated_mask, _, _ = sam.cond_pred(
243
+ pts=np.array(inputs[point_coords]), lbls=np.array(inputs[point_labels])
244
+ )
245
  inputs[masks].append((generated_mask, inputs[text]))
246
  print(inputs[masks][0])
247
  return {prompt_image: (image, inputs[masks])}
248
+
249
+ sam_decode_btn.click(
250
+ on_click_sam_dencode_btn,
251
+ components,
252
+ [prompt_image, masks, cutout_idx],
253
+ queue=True,
254
+ )
255
 
256
  def on_depth_reconstruction_btn_click(inputs):
257
  print("depth reconstruction")
258
+ path = dpt.generate_obj_rgb(
259
+ image=inputs[input_image],
260
+ cube_size=inputs[cube_size],
261
+ n_samples=inputs[n_samples],
262
+ # masks=inputs[masks],
263
+ min_depth=inputs[min_depth],
264
+ max_depth=inputs[max_depth],
265
+ )
266
  return {pcl_figure: path}
267
+
268
+ depth_reconstruction_btn.click(
269
+ on_depth_reconstruction_btn_click, components, [pcl_figure], queue=False
270
+ )
271
 
272
  def on_sam_sgmt_everything_btn_click(inputs):
273
  print("segmenting everything")
 
276
  print(image)
277
  print(sam_masks)
278
  return [(image, sam_masks), sam_masks]
279
+
280
+ sam_sgmt_everything_btn.click(
281
+ on_sam_sgmt_everything_btn_click,
282
+ components,
283
+ [lbl_image, pred_masks],
284
+ queue=True,
285
+ )
286
 
287
 
288
+ if __name__ == "__main__":
289
  block.queue()
290
+ block.launch()
inference.py CHANGED
@@ -12,9 +12,6 @@ import plotly.express as px
12
  import matplotlib.pyplot as plt
13
 
14
 
15
-
16
-
17
-
18
  def map_image_range(depth, min_value, max_value):
19
  """
20
  Maps the values of a numpy image array to a specified range.
@@ -43,6 +40,7 @@ def map_image_range(depth, min_value, max_value):
43
  print(np.max(mapped_image))
44
  return mapped_image
45
 
 
46
  def PCL(mask, depth):
47
  assert mask.shape == depth.shape
48
  assert type(mask) == np.ndarray
@@ -52,46 +50,62 @@ def PCL(mask, depth):
52
  print(np.unique(rgb_mask))
53
  depth_o3d = o3d.geometry.Image(depth)
54
  image_o3d = o3d.geometry.Image(rgb_mask)
55
- #print(len(depth_o3d))
56
- #print(len(image_o3d))
57
- rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
 
 
58
  # Step 3: Create a PointCloud from the RGBD image
59
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
 
 
 
 
 
60
  # Step 4: Convert PointCloud data to a NumPy array
61
- #print(len(pcd))
62
  points = np.asarray(pcd.points)
63
  colors = np.asarray(pcd.colors)
64
  print(np.unique(colors, axis=0))
65
  print(np.unique(colors, axis=1))
66
  print(np.unique(colors))
67
- mask = (colors[:, 0] == 1.)
68
  print(mask.sum())
69
  print(colors.shape)
70
  points = points[mask]
71
  colors = colors[mask]
72
  return points, colors
73
 
 
74
  def PCL_rgb(rgb, depth):
75
- #assert rgb.shape == depth.shape
76
  assert type(rgb) == np.ndarray
77
  assert type(depth) == np.ndarray
78
  depth_o3d = o3d.geometry.Image(depth)
79
  image_o3d = o3d.geometry.Image(rgb)
80
- rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
 
 
81
  # Step 3: Create a PointCloud from the RGBD image
82
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
 
 
 
 
 
83
  # Step 4: Convert PointCloud data to a NumPy array
84
  points = np.asarray(pcd.points)
85
  colors = np.asarray(pcd.colors)
86
  return points, colors
87
 
 
88
  class DepthPredictor:
89
  def __init__(self):
90
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
91
  self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
92
  self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
93
  self.model.eval()
94
-
95
  def predict(self, image):
96
  # prepare image for the model
97
  encoding = self.feature_extractor(image, return_tensors="pt")
@@ -101,16 +115,16 @@ class DepthPredictor:
101
  predicted_depth = outputs.predicted_depth
102
  # interpolate to original size
103
  prediction = torch.nn.functional.interpolate(
104
- predicted_depth.unsqueeze(1),
105
- size=image.size[::-1],
106
- mode="bicubic",
107
- align_corners=False,
108
- ).squeeze()
109
-
110
  output = prediction.cpu().numpy()
111
- #output = 1 - (output/np.max(output))
112
  return output
113
-
114
  def generate_pcl(self, image):
115
  print(np.array(image).shape)
116
  depth = self.predict(image)
@@ -118,34 +132,47 @@ class DepthPredictor:
118
  # Step 2: Create an RGBD image from the RGB and depth image
119
  depth_o3d = o3d.geometry.Image(depth)
120
  image_o3d = o3d.geometry.Image(np.array(image))
121
- rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
 
 
122
  # Step 3: Create a PointCloud from the RGBD image
123
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
 
 
 
 
 
124
  # Step 4: Convert PointCloud data to a NumPy array
125
  points = np.asarray(pcd.points)
126
  colors = np.asarray(pcd.colors)
127
  print(points.shape, colors.shape)
128
  return points, colors
129
-
130
  def generate_fig(self, image):
131
  points, colors = self.generate_pcl(image)
132
- data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2],
133
- 'red': colors[:, 0], 'green': colors[:, 1], 'blue': colors[:, 2]}
 
 
 
 
 
 
134
  df = pd.DataFrame(data)
135
  size = np.zeros(len(df))
136
  size[:] = 0.01
137
  # Step 6: Create a 3D scatter plot using Plotly Express
138
- fig = px.scatter_3d(df, x='x', y='y', z='z', color='red', size=size)
139
  return fig
140
-
141
  def generate_fig2(self, image):
142
  points, colors = self.generate_pcl(image)
143
  # Step 6: Create a 3D scatter plot using Plotly Express
144
  fig = plt.figure()
145
- ax = fig.add_subplot(111, projection='3d')
146
- ax.scatter(points,size=0.01, c=colors, marker='o')
147
  return fig
148
-
149
  def generate_obj_rgb(self, image, n_samples, cube_size, max_depth, min_depth):
150
  # Step 1: Create a point cloud
151
  depth = self.predict(image)
@@ -159,7 +186,9 @@ class DepthPredictor:
159
  mesh = o3d.geometry.TriangleMesh()
160
  # Create cubes and add them to the mesh
161
  for point, color in zip(point_cloud, color_array):
162
- cube = o3d.geometry.TriangleMesh.create_box(width=cube_size, height=cube_size, depth=cube_size)
 
 
163
  cube.translate(-point)
164
  cube.paint_uniform_color(color)
165
  mesh += cube
@@ -174,14 +203,19 @@ class DepthPredictor:
174
  print(point_cloud.shape)
175
  mesh = o3d.geometry.TriangleMesh()
176
  # Create cubes and add them to the mesh
177
- cs = [(255,0,0),(0,255,0),(0,0,255)]
178
- for c,(mask, _) in zip(cs, masks):
179
  mask = mask.ravel()
180
- point_cloud_subset, color_array_subset = point_cloud[mask], color_array[mask]
 
 
 
181
  idxs = np.random.choice(len(point_cloud_subset), int(n_samples))
182
  point_cloud_subset = point_cloud_subset[idxs]
183
  for point in point_cloud_subset:
184
- cube = o3d.geometry.TriangleMesh.create_box(width=cube_size, height=cube_size, depth=cube_size)
 
 
185
  cube.translate(-point)
186
  cube.paint_uniform_color(c)
187
  mesh += cube
@@ -189,22 +223,26 @@ class DepthPredictor:
189
  output_file = "./cloud.obj"
190
  o3d.io.write_triangle_mesh(output_file, mesh)
191
  return output_file
192
-
193
- def generate_obj_masks2(self, image, masks, cube_size, n_samples, min_depth, max_depth):
 
 
194
  # Generate a point cloud
195
  depth = self.predict(image)
196
- #depth = map_image_range(depth, min_depth, max_depth)
197
  image = np.array(image)
198
  mesh = o3d.geometry.TriangleMesh()
199
  # Create cubes and add them to the mesh
200
  print(len(masks))
201
- cs = [(255,0,0),(0,255,0),(0,0,255)]
202
- for c,(mask, _) in zip(cs, masks):
203
  points, _ = PCL(mask, depth)
204
- #idxs = np.random.choice(len(points), int(n_samples))
205
- #points = points[idxs]
206
  for point in points:
207
- cube = o3d.geometry.TriangleMesh.create_box(width=cube_size, height=cube_size, depth=cube_size)
 
 
208
  cube.translate(-point)
209
  cube.paint_uniform_color(c)
210
  mesh += cube
@@ -212,12 +250,12 @@ class DepthPredictor:
212
  output_file = "./cloud.obj"
213
  o3d.io.write_triangle_mesh(output_file, mesh)
214
  return output_file
215
-
216
 
217
 
218
  import numpy as np
219
  from typing import Optional, Tuple
220
 
 
221
  class CustomSamPredictor(SamPredictor):
222
  def __init__(
223
  self,
@@ -249,7 +287,9 @@ class CustomSamPredictor(SamPredictor):
249
  # Transform the image to the form expected by the model
250
  input_image = self.transform.apply_image(image)
251
  input_image_torch = torch.as_tensor(input_image, device=self.device)
252
- input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
 
 
253
  self.set_torch_image(input_image_torch, image.shape[:2])
254
  return self.get_image_embedding()
255
 
@@ -313,7 +353,7 @@ class CustomSamPredictor(SamPredictor):
313
  self.input_size = tuple(transformed_image.shape[-2:])
314
  input_image = self.model.preprocess(transformed_image)
315
  # The following line is commented out to avoid encoding on cpu
316
- #self.features = self.model.image_encoder(input_image)
317
  self.is_image_set = True
318
 
319
  def dummy_set_image(
@@ -340,10 +380,13 @@ class CustomSamPredictor(SamPredictor):
340
  # Transform the image to the form expected by the model
341
  input_image = self.transform.apply_image(image)
342
  input_image_torch = torch.as_tensor(input_image, device=self.device)
343
- input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
 
 
344
 
345
  self.dummy_set_torch_image(input_image_torch, image.shape[:2])
346
 
 
347
  class SegmentPredictor:
348
  def __init__(self, device=None):
349
  MODEL_TYPE = "vit_h"
@@ -351,13 +394,13 @@ class SegmentPredictor:
351
  sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
352
  # Select device
353
  if device is None:
354
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
355
  else:
356
  self.device = device
357
  sam.to(device=self.device)
358
  self.mask_generator = SamAutomaticMaskGenerator(sam)
359
  self.conditioned_pred = CustomSamPredictor(sam)
360
-
361
  def encode(self, image):
362
  image = np.array(image)
363
  return self.conditioned_pred.encode_image(image)
@@ -365,33 +408,23 @@ class SegmentPredictor:
365
  def dummy_encode(self, image):
366
  image = np.array(image)
367
  self.conditioned_pred.dummy_set_image(image)
368
-
369
  def cond_pred(self, embedding, pts, lbls):
370
  lbls = np.array(lbls)
371
  pts = np.array(pts)
372
  masks, _, _ = self.conditioned_pred.decode_and_predict(
373
- embedding,
374
- point_coords=pts,
375
- point_labels=lbls,
376
- multimask_output=True
377
- )
378
- idxs = np.argsort(-masks.sum(axis=(1,2)))
379
  sam_masks = []
380
- for n,i in enumerate(idxs):
381
  sam_masks.append((masks[i], str(n)))
382
  return sam_masks
383
 
384
-
385
  def segment_everything(self, image):
386
  image = np.array(image)
387
  sam_result = self.mask_generator.generate(image)
388
  sam_masks = []
389
- for i,mask in enumerate(sam_result):
390
  sam_masks.append((mask["segmentation"], str(i)))
391
  return sam_masks
392
-
393
-
394
-
395
-
396
-
397
-
 
12
  import matplotlib.pyplot as plt
13
 
14
 
 
 
 
15
  def map_image_range(depth, min_value, max_value):
16
  """
17
  Maps the values of a numpy image array to a specified range.
 
40
  print(np.max(mapped_image))
41
  return mapped_image
42
 
43
+
44
  def PCL(mask, depth):
45
  assert mask.shape == depth.shape
46
  assert type(mask) == np.ndarray
 
50
  print(np.unique(rgb_mask))
51
  depth_o3d = o3d.geometry.Image(depth)
52
  image_o3d = o3d.geometry.Image(rgb_mask)
53
+ # print(len(depth_o3d))
54
+ # print(len(image_o3d))
55
+ rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
56
+ image_o3d, depth_o3d, convert_rgb_to_intensity=False
57
+ )
58
  # Step 3: Create a PointCloud from the RGBD image
59
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
60
+ rgbd_image,
61
+ o3d.camera.PinholeCameraIntrinsic(
62
+ o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
63
+ ),
64
+ )
65
  # Step 4: Convert PointCloud data to a NumPy array
66
+ # print(len(pcd))
67
  points = np.asarray(pcd.points)
68
  colors = np.asarray(pcd.colors)
69
  print(np.unique(colors, axis=0))
70
  print(np.unique(colors, axis=1))
71
  print(np.unique(colors))
72
+ mask = colors[:, 0] == 1.0
73
  print(mask.sum())
74
  print(colors.shape)
75
  points = points[mask]
76
  colors = colors[mask]
77
  return points, colors
78
 
79
+
80
  def PCL_rgb(rgb, depth):
81
+ # assert rgb.shape == depth.shape
82
  assert type(rgb) == np.ndarray
83
  assert type(depth) == np.ndarray
84
  depth_o3d = o3d.geometry.Image(depth)
85
  image_o3d = o3d.geometry.Image(rgb)
86
+ rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
87
+ image_o3d, depth_o3d, convert_rgb_to_intensity=False
88
+ )
89
  # Step 3: Create a PointCloud from the RGBD image
90
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
91
+ rgbd_image,
92
+ o3d.camera.PinholeCameraIntrinsic(
93
+ o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
94
+ ),
95
+ )
96
  # Step 4: Convert PointCloud data to a NumPy array
97
  points = np.asarray(pcd.points)
98
  colors = np.asarray(pcd.colors)
99
  return points, colors
100
 
101
+
102
  class DepthPredictor:
103
  def __init__(self):
104
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
105
  self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
106
  self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
107
  self.model.eval()
108
+
109
  def predict(self, image):
110
  # prepare image for the model
111
  encoding = self.feature_extractor(image, return_tensors="pt")
 
115
  predicted_depth = outputs.predicted_depth
116
  # interpolate to original size
117
  prediction = torch.nn.functional.interpolate(
118
+ predicted_depth.unsqueeze(1),
119
+ size=image.size[::-1],
120
+ mode="bicubic",
121
+ align_corners=False,
122
+ ).squeeze()
123
+
124
  output = prediction.cpu().numpy()
125
+ # output = 1 - (output/np.max(output))
126
  return output
127
+
128
  def generate_pcl(self, image):
129
  print(np.array(image).shape)
130
  depth = self.predict(image)
 
132
  # Step 2: Create an RGBD image from the RGB and depth image
133
  depth_o3d = o3d.geometry.Image(depth)
134
  image_o3d = o3d.geometry.Image(np.array(image))
135
+ rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
136
+ image_o3d, depth_o3d, convert_rgb_to_intensity=False
137
+ )
138
  # Step 3: Create a PointCloud from the RGBD image
139
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
140
+ rgbd_image,
141
+ o3d.camera.PinholeCameraIntrinsic(
142
+ o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
143
+ ),
144
+ )
145
  # Step 4: Convert PointCloud data to a NumPy array
146
  points = np.asarray(pcd.points)
147
  colors = np.asarray(pcd.colors)
148
  print(points.shape, colors.shape)
149
  return points, colors
150
+
151
  def generate_fig(self, image):
152
  points, colors = self.generate_pcl(image)
153
+ data = {
154
+ "x": points[:, 0],
155
+ "y": points[:, 1],
156
+ "z": points[:, 2],
157
+ "red": colors[:, 0],
158
+ "green": colors[:, 1],
159
+ "blue": colors[:, 2],
160
+ }
161
  df = pd.DataFrame(data)
162
  size = np.zeros(len(df))
163
  size[:] = 0.01
164
  # Step 6: Create a 3D scatter plot using Plotly Express
165
+ fig = px.scatter_3d(df, x="x", y="y", z="z", color="red", size=size)
166
  return fig
167
+
168
  def generate_fig2(self, image):
169
  points, colors = self.generate_pcl(image)
170
  # Step 6: Create a 3D scatter plot using Plotly Express
171
  fig = plt.figure()
172
+ ax = fig.add_subplot(111, projection="3d")
173
+ ax.scatter(points, size=0.01, c=colors, marker="o")
174
  return fig
175
+
176
  def generate_obj_rgb(self, image, n_samples, cube_size, max_depth, min_depth):
177
  # Step 1: Create a point cloud
178
  depth = self.predict(image)
 
186
  mesh = o3d.geometry.TriangleMesh()
187
  # Create cubes and add them to the mesh
188
  for point, color in zip(point_cloud, color_array):
189
+ cube = o3d.geometry.TriangleMesh.create_box(
190
+ width=cube_size, height=cube_size, depth=cube_size
191
+ )
192
  cube.translate(-point)
193
  cube.paint_uniform_color(color)
194
  mesh += cube
 
203
  print(point_cloud.shape)
204
  mesh = o3d.geometry.TriangleMesh()
205
  # Create cubes and add them to the mesh
206
+ cs = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
207
+ for c, (mask, _) in zip(cs, masks):
208
  mask = mask.ravel()
209
+ point_cloud_subset, color_array_subset = (
210
+ point_cloud[mask],
211
+ color_array[mask],
212
+ )
213
  idxs = np.random.choice(len(point_cloud_subset), int(n_samples))
214
  point_cloud_subset = point_cloud_subset[idxs]
215
  for point in point_cloud_subset:
216
+ cube = o3d.geometry.TriangleMesh.create_box(
217
+ width=cube_size, height=cube_size, depth=cube_size
218
+ )
219
  cube.translate(-point)
220
  cube.paint_uniform_color(c)
221
  mesh += cube
 
223
  output_file = "./cloud.obj"
224
  o3d.io.write_triangle_mesh(output_file, mesh)
225
  return output_file
226
+
227
+ def generate_obj_masks2(
228
+ self, image, masks, cube_size, n_samples, min_depth, max_depth
229
+ ):
230
  # Generate a point cloud
231
  depth = self.predict(image)
232
+ # depth = map_image_range(depth, min_depth, max_depth)
233
  image = np.array(image)
234
  mesh = o3d.geometry.TriangleMesh()
235
  # Create cubes and add them to the mesh
236
  print(len(masks))
237
+ cs = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
238
+ for c, (mask, _) in zip(cs, masks):
239
  points, _ = PCL(mask, depth)
240
+ # idxs = np.random.choice(len(points), int(n_samples))
241
+ # points = points[idxs]
242
  for point in points:
243
+ cube = o3d.geometry.TriangleMesh.create_box(
244
+ width=cube_size, height=cube_size, depth=cube_size
245
+ )
246
  cube.translate(-point)
247
  cube.paint_uniform_color(c)
248
  mesh += cube
 
250
  output_file = "./cloud.obj"
251
  o3d.io.write_triangle_mesh(output_file, mesh)
252
  return output_file
 
253
 
254
 
255
  import numpy as np
256
  from typing import Optional, Tuple
257
 
258
+
259
  class CustomSamPredictor(SamPredictor):
260
  def __init__(
261
  self,
 
287
  # Transform the image to the form expected by the model
288
  input_image = self.transform.apply_image(image)
289
  input_image_torch = torch.as_tensor(input_image, device=self.device)
290
+ input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
291
+ None, :, :, :
292
+ ]
293
  self.set_torch_image(input_image_torch, image.shape[:2])
294
  return self.get_image_embedding()
295
 
 
353
  self.input_size = tuple(transformed_image.shape[-2:])
354
  input_image = self.model.preprocess(transformed_image)
355
  # The following line is commented out to avoid encoding on cpu
356
+ # self.features = self.model.image_encoder(input_image)
357
  self.is_image_set = True
358
 
359
  def dummy_set_image(
 
380
  # Transform the image to the form expected by the model
381
  input_image = self.transform.apply_image(image)
382
  input_image_torch = torch.as_tensor(input_image, device=self.device)
383
+ input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
384
+ None, :, :, :
385
+ ]
386
 
387
  self.dummy_set_torch_image(input_image_torch, image.shape[:2])
388
 
389
+
390
  class SegmentPredictor:
391
  def __init__(self, device=None):
392
  MODEL_TYPE = "vit_h"
 
394
  sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
395
  # Select device
396
  if device is None:
397
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
398
  else:
399
  self.device = device
400
  sam.to(device=self.device)
401
  self.mask_generator = SamAutomaticMaskGenerator(sam)
402
  self.conditioned_pred = CustomSamPredictor(sam)
403
+
404
  def encode(self, image):
405
  image = np.array(image)
406
  return self.conditioned_pred.encode_image(image)
 
408
  def dummy_encode(self, image):
409
  image = np.array(image)
410
  self.conditioned_pred.dummy_set_image(image)
411
+
412
  def cond_pred(self, embedding, pts, lbls):
413
  lbls = np.array(lbls)
414
  pts = np.array(pts)
415
  masks, _, _ = self.conditioned_pred.decode_and_predict(
416
+ embedding, point_coords=pts, point_labels=lbls, multimask_output=True
417
+ )
418
+ idxs = np.argsort(-masks.sum(axis=(1, 2)))
 
 
 
419
  sam_masks = []
420
+ for n, i in enumerate(idxs):
421
  sam_masks.append((masks[i], str(n)))
422
  return sam_masks
423
 
 
424
  def segment_everything(self, image):
425
  image = np.array(image)
426
  sam_result = self.mask_generator.generate(image)
427
  sam_masks = []
428
+ for i, mask in enumerate(sam_result):
429
  sam_masks.append((mask["segmentation"], str(i)))
430
  return sam_masks
 
 
 
 
 
 
utils.py CHANGED
@@ -8,54 +8,53 @@ from inference import DepthPredictor
8
  import matplotlib.pyplot as plt
9
  from mpl_toolkits.mplot3d import Axes3D
10
 
11
- def create_3d_obj(rgb_image, depth_image, depth=10, path='./image.gltf'):
 
12
  depth_o3d = o3d.geometry.Image(depth_image)
13
  image_o3d = o3d.geometry.Image(rgb_image)
14
  rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
15
- image_o3d, depth_o3d, convert_rgb_to_intensity=False)
 
16
  w = int(depth_image.shape[1])
17
  h = int(depth_image.shape[0])
18
 
19
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
20
- camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)
21
 
22
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
23
- rgbd_image, camera_intrinsic)
24
 
25
- print('normals')
26
  pcd.normals = o3d.utility.Vector3dVector(
27
- np.zeros((1, 3))) # invalidate existing normals
 
28
  pcd.estimate_normals(
29
- search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
 
30
  pcd.orient_normals_towards_camera_location(
31
- camera_location=np.array([0., 0., 1000.]))
32
- pcd.transform([[1, 0, 0, 0],
33
- [0, -1, 0, 0],
34
- [0, 0, -1, 0],
35
- [0, 0, 0, 1]])
36
- pcd.transform([[-1, 0, 0, 0],
37
- [0, 1, 0, 0],
38
- [0, 0, 1, 0],
39
- [0, 0, 0, 1]])
40
-
41
- print('run Poisson surface reconstruction')
42
  with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
43
  mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
44
- pcd, depth=depth, width=0, scale=1.1, linear_fit=True)
 
45
 
46
  voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
47
- print(f'voxel_size = {voxel_size:e}')
48
  mesh = mesh_raw.simplify_vertex_clustering(
49
  voxel_size=voxel_size,
50
- contraction=o3d.geometry.SimplificationContraction.Average)
 
51
 
52
  # vertices_to_remove = densities < np.quantile(densities, 0.001)
53
  # mesh.remove_vertices_by_mask(vertices_to_remove)
54
  bbox = pcd.get_axis_aligned_bounding_box()
55
  mesh_crop = mesh.crop(bbox)
56
  gltf_path = path
57
- o3d.io.write_triangle_mesh(
58
- gltf_path, mesh_crop, write_triangle_uvs=True)
59
  return gltf_path
60
 
61
 
@@ -64,7 +63,8 @@ def create_3d_pc(rgb_image, depth_image, depth=10):
64
  depth_o3d = o3d.geometry.Image(depth_image)
65
  image_o3d = o3d.geometry.Image(rgb_image)
66
  rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
67
- image_o3d, depth_o3d, convert_rgb_to_intensity=False)
 
68
 
69
  w = int(depth_image.shape[1])
70
  h = int(depth_image.shape[0])
@@ -77,20 +77,21 @@ def create_3d_pc(rgb_image, depth_image, depth=10):
77
 
78
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, fx, fy, cx, cy)
79
 
80
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
81
- rgbd_image, camera_intrinsic)
82
 
83
- print('Estimating normals...')
84
  pcd.estimate_normals(
85
- search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
 
86
  pcd.orient_normals_towards_camera_location(
87
- camera_location=np.array([0., 0., 1000.]))
 
88
 
89
  # Save the point cloud as a PLY file
90
  filename = "pc.pcd"
91
  o3d.io.write_point_cloud(filename, pcd)
92
 
93
- return filename # Return the file path where the PLY file is saved
94
 
95
 
96
  def point_cloud(rgb_image):
@@ -99,29 +100,42 @@ def point_cloud(rgb_image):
99
  # Step 2: Create an RGBD image from the RGB and depth image
100
  depth_o3d = o3d.geometry.Image(depth_image)
101
  image_o3d = o3d.geometry.Image(rgb_image)
102
- rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
 
 
103
  # Step 3: Create a PointCloud from the RGBD image
104
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
 
 
 
 
 
105
  # Step 4: Convert PointCloud data to a NumPy array
106
  points = np.asarray(pcd.points)
107
  colors = np.asarray(pcd.colors)
108
  # Step 5: Create a DataFrame from the NumPy arrays
109
- data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2],
110
- 'red': colors[:, 0], 'green': colors[:, 1], 'blue': colors[:, 2]}
 
 
 
 
 
 
111
  df = pd.DataFrame(data)
112
  size = np.zeros(len(df))
113
  size[:] = 0.01
114
  # Step 6: Create a 3D scatter plot using Plotly Express
115
- fig = px.scatter_3d(df, x='x', y='y', z='z', color='red', size=size)
116
-
117
-
118
  return fig
119
 
 
120
  def array_PCL(rgb_image, depth_image):
121
- FX_RGB = 5.1885790117450188e+02
122
- FY_RGB = 5.1946961112127485e+02
123
- CX_RGB = 3.2558244941119034e+0
124
- CY_RGB = 2.5373616633400465e+02
125
  FX_DEPTH = FX_RGB
126
  FY_DEPTH = FY_RGB
127
  CX_DEPTH = CX_RGB
@@ -142,11 +156,20 @@ def array_PCL(rgb_image, depth_image):
142
 
143
  # compute point cloud
144
  pcd = np.dstack((xx * z, yy * z, z)).reshape((length, 3))
145
- #cam_RGB = np.apply_along_axis(np.linalg.inv(R).dot, 1, pcd) - np.linalg.inv(R).dot(T)
146
- xx_rgb = ((rgb_image[:, 0] * FX_RGB) / rgb_image[:, 2] + CX_RGB + width / 2).astype(int).clip(0, width - 1)
147
- yy_rgb = ((rgb_image[:, 1] * FY_RGB) / rgb_image[:, 2] + CY_RGB).astype(int).clip(0, height - 1)
148
- #colors = rgb_image[yy_rgb, xx_rgb]/255
149
- return pcd#, colors
 
 
 
 
 
 
 
 
 
150
 
151
  def generate_PCL(image):
152
  depth_predictor = DepthPredictor()
@@ -159,7 +182,9 @@ def generate_PCL(image):
159
 
160
  def plot_PCL(rgb_image, depth_image):
161
  pcd, colors = array_PCL(rgb_image, depth_image)
162
- fig = px.scatter_3d(x=pcd[:, 0], y=pcd[:, 1], z=pcd[:, 2], color=colors, size_max=0.1)
 
 
163
  return fig
164
 
165
 
@@ -170,9 +195,16 @@ def PCL3(image):
170
  # Step 2: Create an RGBD image from the RGB and depth image
171
  depth_o3d = o3d.geometry.Image(depth_result)
172
  image_o3d = o3d.geometry.Image(image)
173
- rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d, convert_rgb_to_intensity=False)
 
 
174
  # Step 3: Create a PointCloud from the RGBD image
175
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
 
 
 
 
 
176
  # Step 4: Convert PointCloud data to a NumPy array
177
  vis = o3d.visualization.Visualizer()
178
  vis.add_geometry(pcd)
@@ -183,16 +215,17 @@ def PCL3(image):
183
  sizes[:] = 0.01
184
  colors = [tuple(c) for c in colors]
185
  fig = plt.figure()
186
- #ax = fig.add_subplot(111, projection='3d')
187
  ax = Axes3D(fig)
188
  print("plotting...")
189
  ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, s=0.01)
190
  print("Plot Succesful")
191
- #data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'sizes': sizes[:, 0]}
192
- #df = pd.DataFrame(data)
193
  # Step 6: Create a 3D scatter plot using Plotly Express
194
- #fig = px.scatter_3d(df, x='x', y='y', z='z', color=colors, size="sizes")
195
-
196
  return fig
197
 
 
198
  import numpy as np
 
8
  import matplotlib.pyplot as plt
9
  from mpl_toolkits.mplot3d import Axes3D
10
 
11
+
12
+ def create_3d_obj(rgb_image, depth_image, depth=10, path="./image.gltf"):
13
  depth_o3d = o3d.geometry.Image(depth_image)
14
  image_o3d = o3d.geometry.Image(rgb_image)
15
  rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
16
+ image_o3d, depth_o3d, convert_rgb_to_intensity=False
17
+ )
18
  w = int(depth_image.shape[1])
19
  h = int(depth_image.shape[0])
20
 
21
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
22
+ camera_intrinsic.set_intrinsics(w, h, 500, 500, w / 2, h / 2)
23
 
24
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
 
25
 
26
+ print("normals")
27
  pcd.normals = o3d.utility.Vector3dVector(
28
+ np.zeros((1, 3))
29
+ ) # invalidate existing normals
30
  pcd.estimate_normals(
31
+ search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
32
+ )
33
  pcd.orient_normals_towards_camera_location(
34
+ camera_location=np.array([0.0, 0.0, 1000.0])
35
+ )
36
+ pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
37
+ pcd.transform([[-1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
38
+
39
+ print("run Poisson surface reconstruction")
 
 
 
 
 
40
  with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
41
  mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
42
+ pcd, depth=depth, width=0, scale=1.1, linear_fit=True
43
+ )
44
 
45
  voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
46
+ print(f"voxel_size = {voxel_size:e}")
47
  mesh = mesh_raw.simplify_vertex_clustering(
48
  voxel_size=voxel_size,
49
+ contraction=o3d.geometry.SimplificationContraction.Average,
50
+ )
51
 
52
  # vertices_to_remove = densities < np.quantile(densities, 0.001)
53
  # mesh.remove_vertices_by_mask(vertices_to_remove)
54
  bbox = pcd.get_axis_aligned_bounding_box()
55
  mesh_crop = mesh.crop(bbox)
56
  gltf_path = path
57
+ o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
 
58
  return gltf_path
59
 
60
 
 
63
  depth_o3d = o3d.geometry.Image(depth_image)
64
  image_o3d = o3d.geometry.Image(rgb_image)
65
  rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
66
+ image_o3d, depth_o3d, convert_rgb_to_intensity=False
67
+ )
68
 
69
  w = int(depth_image.shape[1])
70
  h = int(depth_image.shape[0])
 
77
 
78
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, fx, fy, cx, cy)
79
 
80
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
 
81
 
82
+ print("Estimating normals...")
83
  pcd.estimate_normals(
84
+ search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
85
+ )
86
  pcd.orient_normals_towards_camera_location(
87
+ camera_location=np.array([0.0, 0.0, 1000.0])
88
+ )
89
 
90
  # Save the point cloud as a PLY file
91
  filename = "pc.pcd"
92
  o3d.io.write_point_cloud(filename, pcd)
93
 
94
+ return filename # Return the file path where the PLY file is saved
95
 
96
 
97
  def point_cloud(rgb_image):
 
100
  # Step 2: Create an RGBD image from the RGB and depth image
101
  depth_o3d = o3d.geometry.Image(depth_image)
102
  image_o3d = o3d.geometry.Image(rgb_image)
103
+ rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
104
+ image_o3d, depth_o3d, convert_rgb_to_intensity=False
105
+ )
106
  # Step 3: Create a PointCloud from the RGBD image
107
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
108
+ rgbd_image,
109
+ o3d.camera.PinholeCameraIntrinsic(
110
+ o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
111
+ ),
112
+ )
113
  # Step 4: Convert PointCloud data to a NumPy array
114
  points = np.asarray(pcd.points)
115
  colors = np.asarray(pcd.colors)
116
  # Step 5: Create a DataFrame from the NumPy arrays
117
+ data = {
118
+ "x": points[:, 0],
119
+ "y": points[:, 1],
120
+ "z": points[:, 2],
121
+ "red": colors[:, 0],
122
+ "green": colors[:, 1],
123
+ "blue": colors[:, 2],
124
+ }
125
  df = pd.DataFrame(data)
126
  size = np.zeros(len(df))
127
  size[:] = 0.01
128
  # Step 6: Create a 3D scatter plot using Plotly Express
129
+ fig = px.scatter_3d(df, x="x", y="y", z="z", color="red", size=size)
130
+
 
131
  return fig
132
 
133
+
134
  def array_PCL(rgb_image, depth_image):
135
+ FX_RGB = 5.1885790117450188e02
136
+ FY_RGB = 5.1946961112127485e02
137
+ CX_RGB = 3.2558244941119034e0
138
+ CY_RGB = 2.5373616633400465e02
139
  FX_DEPTH = FX_RGB
140
  FY_DEPTH = FY_RGB
141
  CX_DEPTH = CX_RGB
 
156
 
157
  # compute point cloud
158
  pcd = np.dstack((xx * z, yy * z, z)).reshape((length, 3))
159
+ # cam_RGB = np.apply_along_axis(np.linalg.inv(R).dot, 1, pcd) - np.linalg.inv(R).dot(T)
160
+ xx_rgb = (
161
+ ((rgb_image[:, 0] * FX_RGB) / rgb_image[:, 2] + CX_RGB + width / 2)
162
+ .astype(int)
163
+ .clip(0, width - 1)
164
+ )
165
+ yy_rgb = (
166
+ ((rgb_image[:, 1] * FY_RGB) / rgb_image[:, 2] + CY_RGB)
167
+ .astype(int)
168
+ .clip(0, height - 1)
169
+ )
170
+ # colors = rgb_image[yy_rgb, xx_rgb]/255
171
+ return pcd # , colors
172
+
173
 
174
  def generate_PCL(image):
175
  depth_predictor = DepthPredictor()
 
182
 
183
  def plot_PCL(rgb_image, depth_image):
184
  pcd, colors = array_PCL(rgb_image, depth_image)
185
+ fig = px.scatter_3d(
186
+ x=pcd[:, 0], y=pcd[:, 1], z=pcd[:, 2], color=colors, size_max=0.1
187
+ )
188
  return fig
189
 
190
 
 
195
  # Step 2: Create an RGBD image from the RGB and depth image
196
  depth_o3d = o3d.geometry.Image(depth_result)
197
  image_o3d = o3d.geometry.Image(image)
198
+ rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
199
+ image_o3d, depth_o3d, convert_rgb_to_intensity=False
200
+ )
201
  # Step 3: Create a PointCloud from the RGBD image
202
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
203
+ rgbd_image,
204
+ o3d.camera.PinholeCameraIntrinsic(
205
+ o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
206
+ ),
207
+ )
208
  # Step 4: Convert PointCloud data to a NumPy array
209
  vis = o3d.visualization.Visualizer()
210
  vis.add_geometry(pcd)
 
215
  sizes[:] = 0.01
216
  colors = [tuple(c) for c in colors]
217
  fig = plt.figure()
218
+ # ax = fig.add_subplot(111, projection='3d')
219
  ax = Axes3D(fig)
220
  print("plotting...")
221
  ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, s=0.01)
222
  print("Plot Succesful")
223
+ # data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'sizes': sizes[:, 0]}
224
+ # df = pd.DataFrame(data)
225
  # Step 6: Create a 3D scatter plot using Plotly Express
226
+ # fig = px.scatter_3d(df, x='x', y='y', z='z', color=colors, size="sizes")
227
+
228
  return fig
229
 
230
+
231
  import numpy as np