Spaces:
Runtime error
Runtime error
jens
commited on
Commit
·
9f0c4b3
1
Parent(s):
0c863e7
autolinting with black
Browse files- app.py +186 -79
- inference.py +99 -66
- utils.py +88 -55
app.py
CHANGED
@@ -1,19 +1,18 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
4 |
-
import cv2
|
5 |
from PIL import Image, ImageOps
|
6 |
import torch
|
7 |
from inference import SegmentPredictor, DepthPredictor
|
8 |
from utils import generate_PCL, PCL3, point_cloud
|
9 |
|
10 |
|
11 |
-
|
12 |
sam = SegmentPredictor()
|
13 |
-
sam_cpu = SegmentPredictor(device=
|
14 |
dpt = DepthPredictor()
|
15 |
-
red = (255,0,0)
|
16 |
-
blue = (0,0,255)
|
17 |
annos = []
|
18 |
|
19 |
|
@@ -22,8 +21,10 @@ with block:
|
|
22 |
# States
|
23 |
def point_coords_empty():
|
24 |
return []
|
|
|
25 |
def point_labels_empty():
|
26 |
return []
|
|
|
27 |
image_edit_trigger = gr.State(True)
|
28 |
point_coords = gr.State(point_coords_empty)
|
29 |
point_labels = gr.State(point_labels_empty)
|
@@ -36,61 +37,123 @@ with block:
|
|
36 |
# UI
|
37 |
with gr.Column():
|
38 |
gr.Markdown(
|
39 |
-
|
40 |
## a new AI model from Meta AI that can "cut out" any object, in any image, with a single click 🚀
|
41 |
SAM is a promptable segmentation system with zero-shot generalization to unfamiliar objects and images, without the need for additional training. [**Official Project**](https://segment-anything.com/) [**Code**](https://github.com/facebookresearch/segment-anything).
|
42 |
-
|
43 |
)
|
44 |
with gr.Row():
|
45 |
with gr.Column():
|
46 |
with gr.Tab("Upload Image"):
|
47 |
-
|
|
|
48 |
with gr.Tab("Webcam"):
|
49 |
-
|
|
|
|
|
|
|
50 |
with gr.Row():
|
51 |
-
sam_encode_btn = gr.Button(
|
52 |
-
sam_sgmt_everything_btn = gr.Button(
|
53 |
-
|
|
|
|
|
54 |
with gr.Row():
|
55 |
-
prompt_image = gr.Image(label=
|
56 |
-
#prompt_lbl_image = gr.AnnotatedImage(label='Segment Labels')
|
57 |
-
lbl_image = gr.AnnotatedImage(label=
|
58 |
with gr.Row():
|
59 |
-
point_label_radio = gr.Radio(label=
|
60 |
-
text = gr.Textbox(label=
|
61 |
-
reset_btn = gr.Button(
|
62 |
-
selected_masks_image = gr.AnnotatedImage(label=
|
63 |
with gr.Row():
|
64 |
with gr.Column():
|
65 |
-
pcl_figure = gr.Model3D(
|
|
|
|
|
66 |
with gr.Row():
|
67 |
-
max_depth = gr.Slider(
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
|
|
74 |
|
75 |
-
sam_decode_btn = gr.Button('Predict using points!', variant = 'primary')
|
76 |
-
|
77 |
# components
|
78 |
-
components = {
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
def on_upload_image(input_image, upload_image):
|
83 |
-
|
84 |
-
upload_image_mirror
|
85 |
return [upload_image_mirror, upload_image]
|
86 |
-
|
|
|
|
|
|
|
87 |
|
88 |
# event - init coords
|
89 |
def on_reset_btn_click(input_image):
|
90 |
return input_image, point_coords_empty(), point_labels_empty(), None, []
|
91 |
-
reset_btn.click(on_reset_btn_click, [input_image], [input_image, point_coords, point_labels], queue=False)
|
92 |
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
sam_cpu.dummy_encode(input_image)
|
95 |
x, y = evt.index
|
96 |
color = red if point_label_radio == 0 else blue
|
@@ -98,47 +161,69 @@ with block:
|
|
98 |
prompt_image = np.array(input_image.copy())
|
99 |
|
100 |
cv2.circle(prompt_image, (x, y), 5, color, -1)
|
101 |
-
point_coords.append([x,y])
|
102 |
point_labels.append(point_label_radio)
|
103 |
-
sam_masks = sam_cpu.cond_pred(
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
i = evt.index
|
117 |
mask = pred_masks[i][0]
|
118 |
print(mask)
|
119 |
print(type(mask))
|
120 |
masks.append((mask, text))
|
121 |
-
anno = (input_image, masks)
|
122 |
-
return
|
123 |
-
|
124 |
-
lbl_image.select(
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
128 |
def on_selected_masks_image_select(input_image, masks, evt: gr.SelectData):
|
129 |
i = evt.index
|
130 |
del masks[i]
|
131 |
-
anno = (input_image, masks)
|
132 |
-
return
|
133 |
-
|
134 |
-
selected_masks_image.select(
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
138 |
# [input_image, prompt_masks, masks, text],
|
139 |
# [masks, selected_masks_image], queue=False)
|
140 |
|
141 |
-
|
142 |
def on_click_sam_encode_btn(inputs):
|
143 |
print("encoding")
|
144 |
# encode image on click
|
@@ -146,27 +231,43 @@ with block:
|
|
146 |
sam_cpu.dummy_encode(inputs[input_image])
|
147 |
print("encoding done")
|
148 |
return [inputs[input_image], embedding]
|
149 |
-
|
|
|
|
|
|
|
150 |
|
151 |
def on_click_sam_dencode_btn(inputs):
|
152 |
print("inferencing")
|
153 |
image = inputs[input_image]
|
154 |
-
generated_mask, _, _ = sam.cond_pred(
|
|
|
|
|
155 |
inputs[masks].append((generated_mask, inputs[text]))
|
156 |
print(inputs[masks][0])
|
157 |
return {prompt_image: (image, inputs[masks])}
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
def on_depth_reconstruction_btn_click(inputs):
|
161 |
print("depth reconstruction")
|
162 |
-
path = dpt.generate_obj_rgb(
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
168 |
return {pcl_figure: path}
|
169 |
-
|
|
|
|
|
|
|
170 |
|
171 |
def on_sam_sgmt_everything_btn_click(inputs):
|
172 |
print("segmenting everything")
|
@@ -175,9 +276,15 @@ with block:
|
|
175 |
print(image)
|
176 |
print(sam_masks)
|
177 |
return [(image, sam_masks), sam_masks]
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
|
181 |
-
if __name__ ==
|
182 |
block.queue()
|
183 |
-
block.launch()
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
4 |
+
import cv2
|
5 |
from PIL import Image, ImageOps
|
6 |
import torch
|
7 |
from inference import SegmentPredictor, DepthPredictor
|
8 |
from utils import generate_PCL, PCL3, point_cloud
|
9 |
|
10 |
|
|
|
11 |
sam = SegmentPredictor()
|
12 |
+
sam_cpu = SegmentPredictor(device="cpu")
|
13 |
dpt = DepthPredictor()
|
14 |
+
red = (255, 0, 0)
|
15 |
+
blue = (0, 0, 255)
|
16 |
annos = []
|
17 |
|
18 |
|
|
|
21 |
# States
|
22 |
def point_coords_empty():
|
23 |
return []
|
24 |
+
|
25 |
def point_labels_empty():
|
26 |
return []
|
27 |
+
|
28 |
image_edit_trigger = gr.State(True)
|
29 |
point_coords = gr.State(point_coords_empty)
|
30 |
point_labels = gr.State(point_labels_empty)
|
|
|
37 |
# UI
|
38 |
with gr.Column():
|
39 |
gr.Markdown(
|
40 |
+
"""# Segment Anything Model (SAM)
|
41 |
## a new AI model from Meta AI that can "cut out" any object, in any image, with a single click 🚀
|
42 |
SAM is a promptable segmentation system with zero-shot generalization to unfamiliar objects and images, without the need for additional training. [**Official Project**](https://segment-anything.com/) [**Code**](https://github.com/facebookresearch/segment-anything).
|
43 |
+
"""
|
44 |
)
|
45 |
with gr.Row():
|
46 |
with gr.Column():
|
47 |
with gr.Tab("Upload Image"):
|
48 |
+
# mirror_webcam = False
|
49 |
+
upload_image = gr.Image(label="Input", type="pil", tool=None)
|
50 |
with gr.Tab("Webcam"):
|
51 |
+
# mirror_webcam = False
|
52 |
+
input_image = gr.Image(
|
53 |
+
label="Input", type="pil", tool=None, source="webcam"
|
54 |
+
)
|
55 |
with gr.Row():
|
56 |
+
sam_encode_btn = gr.Button("Encode", variant="primary")
|
57 |
+
sam_sgmt_everything_btn = gr.Button(
|
58 |
+
"Segment Everything!", variant="primary"
|
59 |
+
)
|
60 |
+
# sam_encode_status = gr.Label('Not encoded yet')
|
61 |
with gr.Row():
|
62 |
+
prompt_image = gr.Image(label="Segments")
|
63 |
+
# prompt_lbl_image = gr.AnnotatedImage(label='Segment Labels')
|
64 |
+
lbl_image = gr.AnnotatedImage(label="Everything")
|
65 |
with gr.Row():
|
66 |
+
point_label_radio = gr.Radio(label="Point Label", choices=[1, 0], value=1)
|
67 |
+
text = gr.Textbox(label="Mask Name")
|
68 |
+
reset_btn = gr.Button("New Mask")
|
69 |
+
selected_masks_image = gr.AnnotatedImage(label="Selected Masks")
|
70 |
with gr.Row():
|
71 |
with gr.Column():
|
72 |
+
pcl_figure = gr.Model3D(
|
73 |
+
label="3-D Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]
|
74 |
+
)
|
75 |
with gr.Row():
|
76 |
+
max_depth = gr.Slider(
|
77 |
+
minimum=0, maximum=10, step=0.01, default=1, label="Max Depth"
|
78 |
+
)
|
79 |
+
min_depth = gr.Slider(
|
80 |
+
minimum=0, maximum=10, step=0.01, default=0.1, label="Min Depth"
|
81 |
+
)
|
82 |
+
n_samples = gr.Slider(
|
83 |
+
minimum=1e3,
|
84 |
+
maximum=1e6,
|
85 |
+
step=1e3,
|
86 |
+
default=1e3,
|
87 |
+
label="Number of Samples",
|
88 |
+
)
|
89 |
+
cube_size = gr.Slider(
|
90 |
+
minimum=0.00001,
|
91 |
+
maximum=0.001,
|
92 |
+
step=0.000001,
|
93 |
+
default=0.00001,
|
94 |
+
label="Cube size",
|
95 |
+
)
|
96 |
+
depth_reconstruction_btn = gr.Button(
|
97 |
+
"Depth Reconstruction", variant="primary"
|
98 |
+
)
|
99 |
|
100 |
+
sam_decode_btn = gr.Button("Predict using points!", variant="primary")
|
101 |
|
|
|
|
|
102 |
# components
|
103 |
+
components = {
|
104 |
+
point_coords,
|
105 |
+
point_labels,
|
106 |
+
image_edit_trigger,
|
107 |
+
masks,
|
108 |
+
cutout_idx,
|
109 |
+
input_image,
|
110 |
+
embedding,
|
111 |
+
point_label_radio,
|
112 |
+
text,
|
113 |
+
reset_btn,
|
114 |
+
sam_sgmt_everything_btn,
|
115 |
+
sam_decode_btn,
|
116 |
+
depth_reconstruction_btn,
|
117 |
+
prompt_image,
|
118 |
+
lbl_image,
|
119 |
+
n_samples,
|
120 |
+
max_depth,
|
121 |
+
min_depth,
|
122 |
+
cube_size,
|
123 |
+
selected_masks_image,
|
124 |
+
}
|
125 |
+
|
126 |
def on_upload_image(input_image, upload_image):
|
127 |
+
# Mirror because gradio.image webcam has mirror = True
|
128 |
+
upload_image_mirror = ImageOps.mirror(upload_image)
|
129 |
return [upload_image_mirror, upload_image]
|
130 |
+
|
131 |
+
upload_image.upload(
|
132 |
+
on_upload_image, [input_image, upload_image], [input_image, upload_image]
|
133 |
+
)
|
134 |
|
135 |
# event - init coords
|
136 |
def on_reset_btn_click(input_image):
|
137 |
return input_image, point_coords_empty(), point_labels_empty(), None, []
|
|
|
138 |
|
139 |
+
reset_btn.click(
|
140 |
+
on_reset_btn_click,
|
141 |
+
[input_image],
|
142 |
+
[input_image, point_coords, point_labels],
|
143 |
+
queue=False,
|
144 |
+
)
|
145 |
+
|
146 |
+
def on_prompt_image_select(
|
147 |
+
input_image,
|
148 |
+
prompt_image,
|
149 |
+
point_coords,
|
150 |
+
point_labels,
|
151 |
+
point_label_radio,
|
152 |
+
text,
|
153 |
+
pred_masks,
|
154 |
+
embedding,
|
155 |
+
evt: gr.SelectData,
|
156 |
+
):
|
157 |
sam_cpu.dummy_encode(input_image)
|
158 |
x, y = evt.index
|
159 |
color = red if point_label_radio == 0 else blue
|
|
|
161 |
prompt_image = np.array(input_image.copy())
|
162 |
|
163 |
cv2.circle(prompt_image, (x, y), 5, color, -1)
|
164 |
+
point_coords.append([x, y])
|
165 |
point_labels.append(point_label_radio)
|
166 |
+
sam_masks = sam_cpu.cond_pred(
|
167 |
+
pts=np.array(point_coords), lbls=np.array(point_labels), embedding=embedding
|
168 |
+
)
|
169 |
+
return [
|
170 |
+
prompt_image,
|
171 |
+
(input_image, sam_masks),
|
172 |
+
point_coords,
|
173 |
+
point_labels,
|
174 |
+
sam_masks,
|
175 |
+
]
|
176 |
+
|
177 |
+
prompt_image.select(
|
178 |
+
on_prompt_image_select,
|
179 |
+
[
|
180 |
+
input_image,
|
181 |
+
prompt_image,
|
182 |
+
point_coords,
|
183 |
+
point_labels,
|
184 |
+
point_label_radio,
|
185 |
+
text,
|
186 |
+
pred_masks,
|
187 |
+
embedding,
|
188 |
+
],
|
189 |
+
[prompt_image, lbl_image, point_coords, point_labels, pred_masks],
|
190 |
+
queue=True,
|
191 |
+
)
|
192 |
+
|
193 |
+
def on_everything_image_select(
|
194 |
+
input_image, pred_masks, masks, text, evt: gr.SelectData
|
195 |
+
):
|
196 |
i = evt.index
|
197 |
mask = pred_masks[i][0]
|
198 |
print(mask)
|
199 |
print(type(mask))
|
200 |
masks.append((mask, text))
|
201 |
+
anno = (input_image, masks)
|
202 |
+
return [masks, anno]
|
203 |
+
|
204 |
+
lbl_image.select(
|
205 |
+
on_everything_image_select,
|
206 |
+
[input_image, pred_masks, masks, text],
|
207 |
+
[masks, selected_masks_image],
|
208 |
+
queue=False,
|
209 |
+
)
|
210 |
+
|
211 |
def on_selected_masks_image_select(input_image, masks, evt: gr.SelectData):
|
212 |
i = evt.index
|
213 |
del masks[i]
|
214 |
+
anno = (input_image, masks)
|
215 |
+
return [masks, anno]
|
216 |
+
|
217 |
+
selected_masks_image.select(
|
218 |
+
on_selected_masks_image_select,
|
219 |
+
[input_image, masks],
|
220 |
+
[masks, selected_masks_image],
|
221 |
+
queue=False,
|
222 |
+
)
|
223 |
+
# prompt_lbl_image.select(on_everything_image_select,
|
224 |
# [input_image, prompt_masks, masks, text],
|
225 |
# [masks, selected_masks_image], queue=False)
|
226 |
|
|
|
227 |
def on_click_sam_encode_btn(inputs):
|
228 |
print("encoding")
|
229 |
# encode image on click
|
|
|
231 |
sam_cpu.dummy_encode(inputs[input_image])
|
232 |
print("encoding done")
|
233 |
return [inputs[input_image], embedding]
|
234 |
+
|
235 |
+
sam_encode_btn.click(
|
236 |
+
on_click_sam_encode_btn, components, [prompt_image, embedding], queue=False
|
237 |
+
)
|
238 |
|
239 |
def on_click_sam_dencode_btn(inputs):
|
240 |
print("inferencing")
|
241 |
image = inputs[input_image]
|
242 |
+
generated_mask, _, _ = sam.cond_pred(
|
243 |
+
pts=np.array(inputs[point_coords]), lbls=np.array(inputs[point_labels])
|
244 |
+
)
|
245 |
inputs[masks].append((generated_mask, inputs[text]))
|
246 |
print(inputs[masks][0])
|
247 |
return {prompt_image: (image, inputs[masks])}
|
248 |
+
|
249 |
+
sam_decode_btn.click(
|
250 |
+
on_click_sam_dencode_btn,
|
251 |
+
components,
|
252 |
+
[prompt_image, masks, cutout_idx],
|
253 |
+
queue=True,
|
254 |
+
)
|
255 |
|
256 |
def on_depth_reconstruction_btn_click(inputs):
|
257 |
print("depth reconstruction")
|
258 |
+
path = dpt.generate_obj_rgb(
|
259 |
+
image=inputs[input_image],
|
260 |
+
cube_size=inputs[cube_size],
|
261 |
+
n_samples=inputs[n_samples],
|
262 |
+
# masks=inputs[masks],
|
263 |
+
min_depth=inputs[min_depth],
|
264 |
+
max_depth=inputs[max_depth],
|
265 |
+
)
|
266 |
return {pcl_figure: path}
|
267 |
+
|
268 |
+
depth_reconstruction_btn.click(
|
269 |
+
on_depth_reconstruction_btn_click, components, [pcl_figure], queue=False
|
270 |
+
)
|
271 |
|
272 |
def on_sam_sgmt_everything_btn_click(inputs):
|
273 |
print("segmenting everything")
|
|
|
276 |
print(image)
|
277 |
print(sam_masks)
|
278 |
return [(image, sam_masks), sam_masks]
|
279 |
+
|
280 |
+
sam_sgmt_everything_btn.click(
|
281 |
+
on_sam_sgmt_everything_btn_click,
|
282 |
+
components,
|
283 |
+
[lbl_image, pred_masks],
|
284 |
+
queue=True,
|
285 |
+
)
|
286 |
|
287 |
|
288 |
+
if __name__ == "__main__":
|
289 |
block.queue()
|
290 |
+
block.launch()
|
inference.py
CHANGED
@@ -12,9 +12,6 @@ import plotly.express as px
|
|
12 |
import matplotlib.pyplot as plt
|
13 |
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
def map_image_range(depth, min_value, max_value):
|
19 |
"""
|
20 |
Maps the values of a numpy image array to a specified range.
|
@@ -43,6 +40,7 @@ def map_image_range(depth, min_value, max_value):
|
|
43 |
print(np.max(mapped_image))
|
44 |
return mapped_image
|
45 |
|
|
|
46 |
def PCL(mask, depth):
|
47 |
assert mask.shape == depth.shape
|
48 |
assert type(mask) == np.ndarray
|
@@ -52,46 +50,62 @@ def PCL(mask, depth):
|
|
52 |
print(np.unique(rgb_mask))
|
53 |
depth_o3d = o3d.geometry.Image(depth)
|
54 |
image_o3d = o3d.geometry.Image(rgb_mask)
|
55 |
-
#print(len(depth_o3d))
|
56 |
-
#print(len(image_o3d))
|
57 |
-
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
|
|
|
|
58 |
# Step 3: Create a PointCloud from the RGBD image
|
59 |
-
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
|
|
|
|
|
|
|
|
|
|
60 |
# Step 4: Convert PointCloud data to a NumPy array
|
61 |
-
#print(len(pcd))
|
62 |
points = np.asarray(pcd.points)
|
63 |
colors = np.asarray(pcd.colors)
|
64 |
print(np.unique(colors, axis=0))
|
65 |
print(np.unique(colors, axis=1))
|
66 |
print(np.unique(colors))
|
67 |
-
mask =
|
68 |
print(mask.sum())
|
69 |
print(colors.shape)
|
70 |
points = points[mask]
|
71 |
colors = colors[mask]
|
72 |
return points, colors
|
73 |
|
|
|
74 |
def PCL_rgb(rgb, depth):
|
75 |
-
#assert rgb.shape == depth.shape
|
76 |
assert type(rgb) == np.ndarray
|
77 |
assert type(depth) == np.ndarray
|
78 |
depth_o3d = o3d.geometry.Image(depth)
|
79 |
image_o3d = o3d.geometry.Image(rgb)
|
80 |
-
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
|
|
|
|
81 |
# Step 3: Create a PointCloud from the RGBD image
|
82 |
-
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
|
|
|
|
|
|
|
|
|
|
83 |
# Step 4: Convert PointCloud data to a NumPy array
|
84 |
points = np.asarray(pcd.points)
|
85 |
colors = np.asarray(pcd.colors)
|
86 |
return points, colors
|
87 |
|
|
|
88 |
class DepthPredictor:
|
89 |
def __init__(self):
|
90 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
91 |
self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
92 |
self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
93 |
self.model.eval()
|
94 |
-
|
95 |
def predict(self, image):
|
96 |
# prepare image for the model
|
97 |
encoding = self.feature_extractor(image, return_tensors="pt")
|
@@ -101,16 +115,16 @@ class DepthPredictor:
|
|
101 |
predicted_depth = outputs.predicted_depth
|
102 |
# interpolate to original size
|
103 |
prediction = torch.nn.functional.interpolate(
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
output = prediction.cpu().numpy()
|
111 |
-
#output = 1 - (output/np.max(output))
|
112 |
return output
|
113 |
-
|
114 |
def generate_pcl(self, image):
|
115 |
print(np.array(image).shape)
|
116 |
depth = self.predict(image)
|
@@ -118,34 +132,47 @@ class DepthPredictor:
|
|
118 |
# Step 2: Create an RGBD image from the RGB and depth image
|
119 |
depth_o3d = o3d.geometry.Image(depth)
|
120 |
image_o3d = o3d.geometry.Image(np.array(image))
|
121 |
-
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
|
|
|
|
122 |
# Step 3: Create a PointCloud from the RGBD image
|
123 |
-
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
|
|
|
|
|
|
|
|
|
|
124 |
# Step 4: Convert PointCloud data to a NumPy array
|
125 |
points = np.asarray(pcd.points)
|
126 |
colors = np.asarray(pcd.colors)
|
127 |
print(points.shape, colors.shape)
|
128 |
return points, colors
|
129 |
-
|
130 |
def generate_fig(self, image):
|
131 |
points, colors = self.generate_pcl(image)
|
132 |
-
data = {
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
df = pd.DataFrame(data)
|
135 |
size = np.zeros(len(df))
|
136 |
size[:] = 0.01
|
137 |
# Step 6: Create a 3D scatter plot using Plotly Express
|
138 |
-
fig = px.scatter_3d(df, x=
|
139 |
return fig
|
140 |
-
|
141 |
def generate_fig2(self, image):
|
142 |
points, colors = self.generate_pcl(image)
|
143 |
# Step 6: Create a 3D scatter plot using Plotly Express
|
144 |
fig = plt.figure()
|
145 |
-
ax = fig.add_subplot(111, projection=
|
146 |
-
ax.scatter(points,size=0.01, c=colors, marker=
|
147 |
return fig
|
148 |
-
|
149 |
def generate_obj_rgb(self, image, n_samples, cube_size, max_depth, min_depth):
|
150 |
# Step 1: Create a point cloud
|
151 |
depth = self.predict(image)
|
@@ -159,7 +186,9 @@ class DepthPredictor:
|
|
159 |
mesh = o3d.geometry.TriangleMesh()
|
160 |
# Create cubes and add them to the mesh
|
161 |
for point, color in zip(point_cloud, color_array):
|
162 |
-
cube = o3d.geometry.TriangleMesh.create_box(
|
|
|
|
|
163 |
cube.translate(-point)
|
164 |
cube.paint_uniform_color(color)
|
165 |
mesh += cube
|
@@ -174,14 +203,19 @@ class DepthPredictor:
|
|
174 |
print(point_cloud.shape)
|
175 |
mesh = o3d.geometry.TriangleMesh()
|
176 |
# Create cubes and add them to the mesh
|
177 |
-
cs = [(255,0,0),(0,255,0),(0,0,255)]
|
178 |
-
for c,(mask, _) in zip(cs, masks):
|
179 |
mask = mask.ravel()
|
180 |
-
point_cloud_subset, color_array_subset =
|
|
|
|
|
|
|
181 |
idxs = np.random.choice(len(point_cloud_subset), int(n_samples))
|
182 |
point_cloud_subset = point_cloud_subset[idxs]
|
183 |
for point in point_cloud_subset:
|
184 |
-
cube = o3d.geometry.TriangleMesh.create_box(
|
|
|
|
|
185 |
cube.translate(-point)
|
186 |
cube.paint_uniform_color(c)
|
187 |
mesh += cube
|
@@ -189,22 +223,26 @@ class DepthPredictor:
|
|
189 |
output_file = "./cloud.obj"
|
190 |
o3d.io.write_triangle_mesh(output_file, mesh)
|
191 |
return output_file
|
192 |
-
|
193 |
-
def generate_obj_masks2(
|
|
|
|
|
194 |
# Generate a point cloud
|
195 |
depth = self.predict(image)
|
196 |
-
#depth = map_image_range(depth, min_depth, max_depth)
|
197 |
image = np.array(image)
|
198 |
mesh = o3d.geometry.TriangleMesh()
|
199 |
# Create cubes and add them to the mesh
|
200 |
print(len(masks))
|
201 |
-
cs = [(255,0,0),(0,255,0),(0,0,255)]
|
202 |
-
for c,(mask, _) in zip(cs, masks):
|
203 |
points, _ = PCL(mask, depth)
|
204 |
-
#idxs = np.random.choice(len(points), int(n_samples))
|
205 |
-
#points = points[idxs]
|
206 |
for point in points:
|
207 |
-
cube = o3d.geometry.TriangleMesh.create_box(
|
|
|
|
|
208 |
cube.translate(-point)
|
209 |
cube.paint_uniform_color(c)
|
210 |
mesh += cube
|
@@ -212,12 +250,12 @@ class DepthPredictor:
|
|
212 |
output_file = "./cloud.obj"
|
213 |
o3d.io.write_triangle_mesh(output_file, mesh)
|
214 |
return output_file
|
215 |
-
|
216 |
|
217 |
|
218 |
import numpy as np
|
219 |
from typing import Optional, Tuple
|
220 |
|
|
|
221 |
class CustomSamPredictor(SamPredictor):
|
222 |
def __init__(
|
223 |
self,
|
@@ -249,7 +287,9 @@ class CustomSamPredictor(SamPredictor):
|
|
249 |
# Transform the image to the form expected by the model
|
250 |
input_image = self.transform.apply_image(image)
|
251 |
input_image_torch = torch.as_tensor(input_image, device=self.device)
|
252 |
-
input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
|
|
|
|
|
253 |
self.set_torch_image(input_image_torch, image.shape[:2])
|
254 |
return self.get_image_embedding()
|
255 |
|
@@ -313,7 +353,7 @@ class CustomSamPredictor(SamPredictor):
|
|
313 |
self.input_size = tuple(transformed_image.shape[-2:])
|
314 |
input_image = self.model.preprocess(transformed_image)
|
315 |
# The following line is commented out to avoid encoding on cpu
|
316 |
-
#self.features = self.model.image_encoder(input_image)
|
317 |
self.is_image_set = True
|
318 |
|
319 |
def dummy_set_image(
|
@@ -340,10 +380,13 @@ class CustomSamPredictor(SamPredictor):
|
|
340 |
# Transform the image to the form expected by the model
|
341 |
input_image = self.transform.apply_image(image)
|
342 |
input_image_torch = torch.as_tensor(input_image, device=self.device)
|
343 |
-
input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
|
|
|
|
|
344 |
|
345 |
self.dummy_set_torch_image(input_image_torch, image.shape[:2])
|
346 |
|
|
|
347 |
class SegmentPredictor:
|
348 |
def __init__(self, device=None):
|
349 |
MODEL_TYPE = "vit_h"
|
@@ -351,13 +394,13 @@ class SegmentPredictor:
|
|
351 |
sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
|
352 |
# Select device
|
353 |
if device is None:
|
354 |
-
self.device =
|
355 |
else:
|
356 |
self.device = device
|
357 |
sam.to(device=self.device)
|
358 |
self.mask_generator = SamAutomaticMaskGenerator(sam)
|
359 |
self.conditioned_pred = CustomSamPredictor(sam)
|
360 |
-
|
361 |
def encode(self, image):
|
362 |
image = np.array(image)
|
363 |
return self.conditioned_pred.encode_image(image)
|
@@ -365,33 +408,23 @@ class SegmentPredictor:
|
|
365 |
def dummy_encode(self, image):
|
366 |
image = np.array(image)
|
367 |
self.conditioned_pred.dummy_set_image(image)
|
368 |
-
|
369 |
def cond_pred(self, embedding, pts, lbls):
|
370 |
lbls = np.array(lbls)
|
371 |
pts = np.array(pts)
|
372 |
masks, _, _ = self.conditioned_pred.decode_and_predict(
|
373 |
-
embedding,
|
374 |
-
|
375 |
-
|
376 |
-
multimask_output=True
|
377 |
-
)
|
378 |
-
idxs = np.argsort(-masks.sum(axis=(1,2)))
|
379 |
sam_masks = []
|
380 |
-
for n,i in enumerate(idxs):
|
381 |
sam_masks.append((masks[i], str(n)))
|
382 |
return sam_masks
|
383 |
|
384 |
-
|
385 |
def segment_everything(self, image):
|
386 |
image = np.array(image)
|
387 |
sam_result = self.mask_generator.generate(image)
|
388 |
sam_masks = []
|
389 |
-
for i,mask in enumerate(sam_result):
|
390 |
sam_masks.append((mask["segmentation"], str(i)))
|
391 |
return sam_masks
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
|
|
12 |
import matplotlib.pyplot as plt
|
13 |
|
14 |
|
|
|
|
|
|
|
15 |
def map_image_range(depth, min_value, max_value):
|
16 |
"""
|
17 |
Maps the values of a numpy image array to a specified range.
|
|
|
40 |
print(np.max(mapped_image))
|
41 |
return mapped_image
|
42 |
|
43 |
+
|
44 |
def PCL(mask, depth):
|
45 |
assert mask.shape == depth.shape
|
46 |
assert type(mask) == np.ndarray
|
|
|
50 |
print(np.unique(rgb_mask))
|
51 |
depth_o3d = o3d.geometry.Image(depth)
|
52 |
image_o3d = o3d.geometry.Image(rgb_mask)
|
53 |
+
# print(len(depth_o3d))
|
54 |
+
# print(len(image_o3d))
|
55 |
+
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
56 |
+
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
57 |
+
)
|
58 |
# Step 3: Create a PointCloud from the RGBD image
|
59 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
60 |
+
rgbd_image,
|
61 |
+
o3d.camera.PinholeCameraIntrinsic(
|
62 |
+
o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
|
63 |
+
),
|
64 |
+
)
|
65 |
# Step 4: Convert PointCloud data to a NumPy array
|
66 |
+
# print(len(pcd))
|
67 |
points = np.asarray(pcd.points)
|
68 |
colors = np.asarray(pcd.colors)
|
69 |
print(np.unique(colors, axis=0))
|
70 |
print(np.unique(colors, axis=1))
|
71 |
print(np.unique(colors))
|
72 |
+
mask = colors[:, 0] == 1.0
|
73 |
print(mask.sum())
|
74 |
print(colors.shape)
|
75 |
points = points[mask]
|
76 |
colors = colors[mask]
|
77 |
return points, colors
|
78 |
|
79 |
+
|
80 |
def PCL_rgb(rgb, depth):
|
81 |
+
# assert rgb.shape == depth.shape
|
82 |
assert type(rgb) == np.ndarray
|
83 |
assert type(depth) == np.ndarray
|
84 |
depth_o3d = o3d.geometry.Image(depth)
|
85 |
image_o3d = o3d.geometry.Image(rgb)
|
86 |
+
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
87 |
+
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
88 |
+
)
|
89 |
# Step 3: Create a PointCloud from the RGBD image
|
90 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
91 |
+
rgbd_image,
|
92 |
+
o3d.camera.PinholeCameraIntrinsic(
|
93 |
+
o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
|
94 |
+
),
|
95 |
+
)
|
96 |
# Step 4: Convert PointCloud data to a NumPy array
|
97 |
points = np.asarray(pcd.points)
|
98 |
colors = np.asarray(pcd.colors)
|
99 |
return points, colors
|
100 |
|
101 |
+
|
102 |
class DepthPredictor:
|
103 |
def __init__(self):
|
104 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
105 |
self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
106 |
self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
107 |
self.model.eval()
|
108 |
+
|
109 |
def predict(self, image):
|
110 |
# prepare image for the model
|
111 |
encoding = self.feature_extractor(image, return_tensors="pt")
|
|
|
115 |
predicted_depth = outputs.predicted_depth
|
116 |
# interpolate to original size
|
117 |
prediction = torch.nn.functional.interpolate(
|
118 |
+
predicted_depth.unsqueeze(1),
|
119 |
+
size=image.size[::-1],
|
120 |
+
mode="bicubic",
|
121 |
+
align_corners=False,
|
122 |
+
).squeeze()
|
123 |
+
|
124 |
output = prediction.cpu().numpy()
|
125 |
+
# output = 1 - (output/np.max(output))
|
126 |
return output
|
127 |
+
|
128 |
def generate_pcl(self, image):
|
129 |
print(np.array(image).shape)
|
130 |
depth = self.predict(image)
|
|
|
132 |
# Step 2: Create an RGBD image from the RGB and depth image
|
133 |
depth_o3d = o3d.geometry.Image(depth)
|
134 |
image_o3d = o3d.geometry.Image(np.array(image))
|
135 |
+
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
136 |
+
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
137 |
+
)
|
138 |
# Step 3: Create a PointCloud from the RGBD image
|
139 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
140 |
+
rgbd_image,
|
141 |
+
o3d.camera.PinholeCameraIntrinsic(
|
142 |
+
o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
|
143 |
+
),
|
144 |
+
)
|
145 |
# Step 4: Convert PointCloud data to a NumPy array
|
146 |
points = np.asarray(pcd.points)
|
147 |
colors = np.asarray(pcd.colors)
|
148 |
print(points.shape, colors.shape)
|
149 |
return points, colors
|
150 |
+
|
151 |
def generate_fig(self, image):
|
152 |
points, colors = self.generate_pcl(image)
|
153 |
+
data = {
|
154 |
+
"x": points[:, 0],
|
155 |
+
"y": points[:, 1],
|
156 |
+
"z": points[:, 2],
|
157 |
+
"red": colors[:, 0],
|
158 |
+
"green": colors[:, 1],
|
159 |
+
"blue": colors[:, 2],
|
160 |
+
}
|
161 |
df = pd.DataFrame(data)
|
162 |
size = np.zeros(len(df))
|
163 |
size[:] = 0.01
|
164 |
# Step 6: Create a 3D scatter plot using Plotly Express
|
165 |
+
fig = px.scatter_3d(df, x="x", y="y", z="z", color="red", size=size)
|
166 |
return fig
|
167 |
+
|
168 |
def generate_fig2(self, image):
|
169 |
points, colors = self.generate_pcl(image)
|
170 |
# Step 6: Create a 3D scatter plot using Plotly Express
|
171 |
fig = plt.figure()
|
172 |
+
ax = fig.add_subplot(111, projection="3d")
|
173 |
+
ax.scatter(points, size=0.01, c=colors, marker="o")
|
174 |
return fig
|
175 |
+
|
176 |
def generate_obj_rgb(self, image, n_samples, cube_size, max_depth, min_depth):
|
177 |
# Step 1: Create a point cloud
|
178 |
depth = self.predict(image)
|
|
|
186 |
mesh = o3d.geometry.TriangleMesh()
|
187 |
# Create cubes and add them to the mesh
|
188 |
for point, color in zip(point_cloud, color_array):
|
189 |
+
cube = o3d.geometry.TriangleMesh.create_box(
|
190 |
+
width=cube_size, height=cube_size, depth=cube_size
|
191 |
+
)
|
192 |
cube.translate(-point)
|
193 |
cube.paint_uniform_color(color)
|
194 |
mesh += cube
|
|
|
203 |
print(point_cloud.shape)
|
204 |
mesh = o3d.geometry.TriangleMesh()
|
205 |
# Create cubes and add them to the mesh
|
206 |
+
cs = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
|
207 |
+
for c, (mask, _) in zip(cs, masks):
|
208 |
mask = mask.ravel()
|
209 |
+
point_cloud_subset, color_array_subset = (
|
210 |
+
point_cloud[mask],
|
211 |
+
color_array[mask],
|
212 |
+
)
|
213 |
idxs = np.random.choice(len(point_cloud_subset), int(n_samples))
|
214 |
point_cloud_subset = point_cloud_subset[idxs]
|
215 |
for point in point_cloud_subset:
|
216 |
+
cube = o3d.geometry.TriangleMesh.create_box(
|
217 |
+
width=cube_size, height=cube_size, depth=cube_size
|
218 |
+
)
|
219 |
cube.translate(-point)
|
220 |
cube.paint_uniform_color(c)
|
221 |
mesh += cube
|
|
|
223 |
output_file = "./cloud.obj"
|
224 |
o3d.io.write_triangle_mesh(output_file, mesh)
|
225 |
return output_file
|
226 |
+
|
227 |
+
def generate_obj_masks2(
|
228 |
+
self, image, masks, cube_size, n_samples, min_depth, max_depth
|
229 |
+
):
|
230 |
# Generate a point cloud
|
231 |
depth = self.predict(image)
|
232 |
+
# depth = map_image_range(depth, min_depth, max_depth)
|
233 |
image = np.array(image)
|
234 |
mesh = o3d.geometry.TriangleMesh()
|
235 |
# Create cubes and add them to the mesh
|
236 |
print(len(masks))
|
237 |
+
cs = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
|
238 |
+
for c, (mask, _) in zip(cs, masks):
|
239 |
points, _ = PCL(mask, depth)
|
240 |
+
# idxs = np.random.choice(len(points), int(n_samples))
|
241 |
+
# points = points[idxs]
|
242 |
for point in points:
|
243 |
+
cube = o3d.geometry.TriangleMesh.create_box(
|
244 |
+
width=cube_size, height=cube_size, depth=cube_size
|
245 |
+
)
|
246 |
cube.translate(-point)
|
247 |
cube.paint_uniform_color(c)
|
248 |
mesh += cube
|
|
|
250 |
output_file = "./cloud.obj"
|
251 |
o3d.io.write_triangle_mesh(output_file, mesh)
|
252 |
return output_file
|
|
|
253 |
|
254 |
|
255 |
import numpy as np
|
256 |
from typing import Optional, Tuple
|
257 |
|
258 |
+
|
259 |
class CustomSamPredictor(SamPredictor):
|
260 |
def __init__(
|
261 |
self,
|
|
|
287 |
# Transform the image to the form expected by the model
|
288 |
input_image = self.transform.apply_image(image)
|
289 |
input_image_torch = torch.as_tensor(input_image, device=self.device)
|
290 |
+
input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
|
291 |
+
None, :, :, :
|
292 |
+
]
|
293 |
self.set_torch_image(input_image_torch, image.shape[:2])
|
294 |
return self.get_image_embedding()
|
295 |
|
|
|
353 |
self.input_size = tuple(transformed_image.shape[-2:])
|
354 |
input_image = self.model.preprocess(transformed_image)
|
355 |
# The following line is commented out to avoid encoding on cpu
|
356 |
+
# self.features = self.model.image_encoder(input_image)
|
357 |
self.is_image_set = True
|
358 |
|
359 |
def dummy_set_image(
|
|
|
380 |
# Transform the image to the form expected by the model
|
381 |
input_image = self.transform.apply_image(image)
|
382 |
input_image_torch = torch.as_tensor(input_image, device=self.device)
|
383 |
+
input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
|
384 |
+
None, :, :, :
|
385 |
+
]
|
386 |
|
387 |
self.dummy_set_torch_image(input_image_torch, image.shape[:2])
|
388 |
|
389 |
+
|
390 |
class SegmentPredictor:
|
391 |
def __init__(self, device=None):
|
392 |
MODEL_TYPE = "vit_h"
|
|
|
394 |
sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
|
395 |
# Select device
|
396 |
if device is None:
|
397 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
398 |
else:
|
399 |
self.device = device
|
400 |
sam.to(device=self.device)
|
401 |
self.mask_generator = SamAutomaticMaskGenerator(sam)
|
402 |
self.conditioned_pred = CustomSamPredictor(sam)
|
403 |
+
|
404 |
def encode(self, image):
|
405 |
image = np.array(image)
|
406 |
return self.conditioned_pred.encode_image(image)
|
|
|
408 |
def dummy_encode(self, image):
|
409 |
image = np.array(image)
|
410 |
self.conditioned_pred.dummy_set_image(image)
|
411 |
+
|
412 |
def cond_pred(self, embedding, pts, lbls):
|
413 |
lbls = np.array(lbls)
|
414 |
pts = np.array(pts)
|
415 |
masks, _, _ = self.conditioned_pred.decode_and_predict(
|
416 |
+
embedding, point_coords=pts, point_labels=lbls, multimask_output=True
|
417 |
+
)
|
418 |
+
idxs = np.argsort(-masks.sum(axis=(1, 2)))
|
|
|
|
|
|
|
419 |
sam_masks = []
|
420 |
+
for n, i in enumerate(idxs):
|
421 |
sam_masks.append((masks[i], str(n)))
|
422 |
return sam_masks
|
423 |
|
|
|
424 |
def segment_everything(self, image):
|
425 |
image = np.array(image)
|
426 |
sam_result = self.mask_generator.generate(image)
|
427 |
sam_masks = []
|
428 |
+
for i, mask in enumerate(sam_result):
|
429 |
sam_masks.append((mask["segmentation"], str(i)))
|
430 |
return sam_masks
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
CHANGED
@@ -8,54 +8,53 @@ from inference import DepthPredictor
|
|
8 |
import matplotlib.pyplot as plt
|
9 |
from mpl_toolkits.mplot3d import Axes3D
|
10 |
|
11 |
-
|
|
|
12 |
depth_o3d = o3d.geometry.Image(depth_image)
|
13 |
image_o3d = o3d.geometry.Image(rgb_image)
|
14 |
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
15 |
-
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
|
|
16 |
w = int(depth_image.shape[1])
|
17 |
h = int(depth_image.shape[0])
|
18 |
|
19 |
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
|
20 |
-
camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)
|
21 |
|
22 |
-
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
23 |
-
rgbd_image, camera_intrinsic)
|
24 |
|
25 |
-
print(
|
26 |
pcd.normals = o3d.utility.Vector3dVector(
|
27 |
-
np.zeros((1, 3))
|
|
|
28 |
pcd.estimate_normals(
|
29 |
-
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
|
|
|
30 |
pcd.orient_normals_towards_camera_location(
|
31 |
-
camera_location=np.array([0
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
[0, 1, 0, 0],
|
38 |
-
[0, 0, 1, 0],
|
39 |
-
[0, 0, 0, 1]])
|
40 |
-
|
41 |
-
print('run Poisson surface reconstruction')
|
42 |
with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
|
43 |
mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
|
44 |
-
pcd, depth=depth, width=0, scale=1.1, linear_fit=True
|
|
|
45 |
|
46 |
voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
|
47 |
-
print(f
|
48 |
mesh = mesh_raw.simplify_vertex_clustering(
|
49 |
voxel_size=voxel_size,
|
50 |
-
contraction=o3d.geometry.SimplificationContraction.Average
|
|
|
51 |
|
52 |
# vertices_to_remove = densities < np.quantile(densities, 0.001)
|
53 |
# mesh.remove_vertices_by_mask(vertices_to_remove)
|
54 |
bbox = pcd.get_axis_aligned_bounding_box()
|
55 |
mesh_crop = mesh.crop(bbox)
|
56 |
gltf_path = path
|
57 |
-
o3d.io.write_triangle_mesh(
|
58 |
-
gltf_path, mesh_crop, write_triangle_uvs=True)
|
59 |
return gltf_path
|
60 |
|
61 |
|
@@ -64,7 +63,8 @@ def create_3d_pc(rgb_image, depth_image, depth=10):
|
|
64 |
depth_o3d = o3d.geometry.Image(depth_image)
|
65 |
image_o3d = o3d.geometry.Image(rgb_image)
|
66 |
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
67 |
-
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
|
|
68 |
|
69 |
w = int(depth_image.shape[1])
|
70 |
h = int(depth_image.shape[0])
|
@@ -77,20 +77,21 @@ def create_3d_pc(rgb_image, depth_image, depth=10):
|
|
77 |
|
78 |
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, fx, fy, cx, cy)
|
79 |
|
80 |
-
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
81 |
-
rgbd_image, camera_intrinsic)
|
82 |
|
83 |
-
print(
|
84 |
pcd.estimate_normals(
|
85 |
-
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
|
|
|
86 |
pcd.orient_normals_towards_camera_location(
|
87 |
-
camera_location=np.array([0
|
|
|
88 |
|
89 |
# Save the point cloud as a PLY file
|
90 |
filename = "pc.pcd"
|
91 |
o3d.io.write_point_cloud(filename, pcd)
|
92 |
|
93 |
-
return filename
|
94 |
|
95 |
|
96 |
def point_cloud(rgb_image):
|
@@ -99,29 +100,42 @@ def point_cloud(rgb_image):
|
|
99 |
# Step 2: Create an RGBD image from the RGB and depth image
|
100 |
depth_o3d = o3d.geometry.Image(depth_image)
|
101 |
image_o3d = o3d.geometry.Image(rgb_image)
|
102 |
-
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
|
|
|
|
103 |
# Step 3: Create a PointCloud from the RGBD image
|
104 |
-
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
|
|
|
|
|
|
|
|
|
|
105 |
# Step 4: Convert PointCloud data to a NumPy array
|
106 |
points = np.asarray(pcd.points)
|
107 |
colors = np.asarray(pcd.colors)
|
108 |
# Step 5: Create a DataFrame from the NumPy arrays
|
109 |
-
data = {
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
df = pd.DataFrame(data)
|
112 |
size = np.zeros(len(df))
|
113 |
size[:] = 0.01
|
114 |
# Step 6: Create a 3D scatter plot using Plotly Express
|
115 |
-
fig = px.scatter_3d(df, x=
|
116 |
-
|
117 |
-
|
118 |
return fig
|
119 |
|
|
|
120 |
def array_PCL(rgb_image, depth_image):
|
121 |
-
FX_RGB = 5.
|
122 |
-
FY_RGB = 5.
|
123 |
-
CX_RGB = 3.
|
124 |
-
CY_RGB = 2.
|
125 |
FX_DEPTH = FX_RGB
|
126 |
FY_DEPTH = FY_RGB
|
127 |
CX_DEPTH = CX_RGB
|
@@ -142,11 +156,20 @@ def array_PCL(rgb_image, depth_image):
|
|
142 |
|
143 |
# compute point cloud
|
144 |
pcd = np.dstack((xx * z, yy * z, z)).reshape((length, 3))
|
145 |
-
#cam_RGB = np.apply_along_axis(np.linalg.inv(R).dot, 1, pcd) - np.linalg.inv(R).dot(T)
|
146 |
-
xx_rgb = (
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
def generate_PCL(image):
|
152 |
depth_predictor = DepthPredictor()
|
@@ -159,7 +182,9 @@ def generate_PCL(image):
|
|
159 |
|
160 |
def plot_PCL(rgb_image, depth_image):
|
161 |
pcd, colors = array_PCL(rgb_image, depth_image)
|
162 |
-
fig = px.scatter_3d(
|
|
|
|
|
163 |
return fig
|
164 |
|
165 |
|
@@ -170,9 +195,16 @@ def PCL3(image):
|
|
170 |
# Step 2: Create an RGBD image from the RGB and depth image
|
171 |
depth_o3d = o3d.geometry.Image(depth_result)
|
172 |
image_o3d = o3d.geometry.Image(image)
|
173 |
-
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
|
|
|
|
174 |
# Step 3: Create a PointCloud from the RGBD image
|
175 |
-
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
|
|
|
|
|
|
|
|
|
|
176 |
# Step 4: Convert PointCloud data to a NumPy array
|
177 |
vis = o3d.visualization.Visualizer()
|
178 |
vis.add_geometry(pcd)
|
@@ -183,16 +215,17 @@ def PCL3(image):
|
|
183 |
sizes[:] = 0.01
|
184 |
colors = [tuple(c) for c in colors]
|
185 |
fig = plt.figure()
|
186 |
-
#ax = fig.add_subplot(111, projection='3d')
|
187 |
ax = Axes3D(fig)
|
188 |
print("plotting...")
|
189 |
ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, s=0.01)
|
190 |
print("Plot Succesful")
|
191 |
-
#data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'sizes': sizes[:, 0]}
|
192 |
-
#df = pd.DataFrame(data)
|
193 |
# Step 6: Create a 3D scatter plot using Plotly Express
|
194 |
-
#fig = px.scatter_3d(df, x='x', y='y', z='z', color=colors, size="sizes")
|
195 |
-
|
196 |
return fig
|
197 |
|
|
|
198 |
import numpy as np
|
|
|
8 |
import matplotlib.pyplot as plt
|
9 |
from mpl_toolkits.mplot3d import Axes3D
|
10 |
|
11 |
+
|
12 |
+
def create_3d_obj(rgb_image, depth_image, depth=10, path="./image.gltf"):
|
13 |
depth_o3d = o3d.geometry.Image(depth_image)
|
14 |
image_o3d = o3d.geometry.Image(rgb_image)
|
15 |
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
16 |
+
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
17 |
+
)
|
18 |
w = int(depth_image.shape[1])
|
19 |
h = int(depth_image.shape[0])
|
20 |
|
21 |
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
|
22 |
+
camera_intrinsic.set_intrinsics(w, h, 500, 500, w / 2, h / 2)
|
23 |
|
24 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
|
|
|
25 |
|
26 |
+
print("normals")
|
27 |
pcd.normals = o3d.utility.Vector3dVector(
|
28 |
+
np.zeros((1, 3))
|
29 |
+
) # invalidate existing normals
|
30 |
pcd.estimate_normals(
|
31 |
+
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
|
32 |
+
)
|
33 |
pcd.orient_normals_towards_camera_location(
|
34 |
+
camera_location=np.array([0.0, 0.0, 1000.0])
|
35 |
+
)
|
36 |
+
pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
|
37 |
+
pcd.transform([[-1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
|
38 |
+
|
39 |
+
print("run Poisson surface reconstruction")
|
|
|
|
|
|
|
|
|
|
|
40 |
with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
|
41 |
mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
|
42 |
+
pcd, depth=depth, width=0, scale=1.1, linear_fit=True
|
43 |
+
)
|
44 |
|
45 |
voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
|
46 |
+
print(f"voxel_size = {voxel_size:e}")
|
47 |
mesh = mesh_raw.simplify_vertex_clustering(
|
48 |
voxel_size=voxel_size,
|
49 |
+
contraction=o3d.geometry.SimplificationContraction.Average,
|
50 |
+
)
|
51 |
|
52 |
# vertices_to_remove = densities < np.quantile(densities, 0.001)
|
53 |
# mesh.remove_vertices_by_mask(vertices_to_remove)
|
54 |
bbox = pcd.get_axis_aligned_bounding_box()
|
55 |
mesh_crop = mesh.crop(bbox)
|
56 |
gltf_path = path
|
57 |
+
o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
|
|
|
58 |
return gltf_path
|
59 |
|
60 |
|
|
|
63 |
depth_o3d = o3d.geometry.Image(depth_image)
|
64 |
image_o3d = o3d.geometry.Image(rgb_image)
|
65 |
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
66 |
+
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
67 |
+
)
|
68 |
|
69 |
w = int(depth_image.shape[1])
|
70 |
h = int(depth_image.shape[0])
|
|
|
77 |
|
78 |
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, fx, fy, cx, cy)
|
79 |
|
80 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
|
|
|
81 |
|
82 |
+
print("Estimating normals...")
|
83 |
pcd.estimate_normals(
|
84 |
+
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
|
85 |
+
)
|
86 |
pcd.orient_normals_towards_camera_location(
|
87 |
+
camera_location=np.array([0.0, 0.0, 1000.0])
|
88 |
+
)
|
89 |
|
90 |
# Save the point cloud as a PLY file
|
91 |
filename = "pc.pcd"
|
92 |
o3d.io.write_point_cloud(filename, pcd)
|
93 |
|
94 |
+
return filename # Return the file path where the PLY file is saved
|
95 |
|
96 |
|
97 |
def point_cloud(rgb_image):
|
|
|
100 |
# Step 2: Create an RGBD image from the RGB and depth image
|
101 |
depth_o3d = o3d.geometry.Image(depth_image)
|
102 |
image_o3d = o3d.geometry.Image(rgb_image)
|
103 |
+
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
104 |
+
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
105 |
+
)
|
106 |
# Step 3: Create a PointCloud from the RGBD image
|
107 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
108 |
+
rgbd_image,
|
109 |
+
o3d.camera.PinholeCameraIntrinsic(
|
110 |
+
o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
|
111 |
+
),
|
112 |
+
)
|
113 |
# Step 4: Convert PointCloud data to a NumPy array
|
114 |
points = np.asarray(pcd.points)
|
115 |
colors = np.asarray(pcd.colors)
|
116 |
# Step 5: Create a DataFrame from the NumPy arrays
|
117 |
+
data = {
|
118 |
+
"x": points[:, 0],
|
119 |
+
"y": points[:, 1],
|
120 |
+
"z": points[:, 2],
|
121 |
+
"red": colors[:, 0],
|
122 |
+
"green": colors[:, 1],
|
123 |
+
"blue": colors[:, 2],
|
124 |
+
}
|
125 |
df = pd.DataFrame(data)
|
126 |
size = np.zeros(len(df))
|
127 |
size[:] = 0.01
|
128 |
# Step 6: Create a 3D scatter plot using Plotly Express
|
129 |
+
fig = px.scatter_3d(df, x="x", y="y", z="z", color="red", size=size)
|
130 |
+
|
|
|
131 |
return fig
|
132 |
|
133 |
+
|
134 |
def array_PCL(rgb_image, depth_image):
|
135 |
+
FX_RGB = 5.1885790117450188e02
|
136 |
+
FY_RGB = 5.1946961112127485e02
|
137 |
+
CX_RGB = 3.2558244941119034e0
|
138 |
+
CY_RGB = 2.5373616633400465e02
|
139 |
FX_DEPTH = FX_RGB
|
140 |
FY_DEPTH = FY_RGB
|
141 |
CX_DEPTH = CX_RGB
|
|
|
156 |
|
157 |
# compute point cloud
|
158 |
pcd = np.dstack((xx * z, yy * z, z)).reshape((length, 3))
|
159 |
+
# cam_RGB = np.apply_along_axis(np.linalg.inv(R).dot, 1, pcd) - np.linalg.inv(R).dot(T)
|
160 |
+
xx_rgb = (
|
161 |
+
((rgb_image[:, 0] * FX_RGB) / rgb_image[:, 2] + CX_RGB + width / 2)
|
162 |
+
.astype(int)
|
163 |
+
.clip(0, width - 1)
|
164 |
+
)
|
165 |
+
yy_rgb = (
|
166 |
+
((rgb_image[:, 1] * FY_RGB) / rgb_image[:, 2] + CY_RGB)
|
167 |
+
.astype(int)
|
168 |
+
.clip(0, height - 1)
|
169 |
+
)
|
170 |
+
# colors = rgb_image[yy_rgb, xx_rgb]/255
|
171 |
+
return pcd # , colors
|
172 |
+
|
173 |
|
174 |
def generate_PCL(image):
|
175 |
depth_predictor = DepthPredictor()
|
|
|
182 |
|
183 |
def plot_PCL(rgb_image, depth_image):
|
184 |
pcd, colors = array_PCL(rgb_image, depth_image)
|
185 |
+
fig = px.scatter_3d(
|
186 |
+
x=pcd[:, 0], y=pcd[:, 1], z=pcd[:, 2], color=colors, size_max=0.1
|
187 |
+
)
|
188 |
return fig
|
189 |
|
190 |
|
|
|
195 |
# Step 2: Create an RGBD image from the RGB and depth image
|
196 |
depth_o3d = o3d.geometry.Image(depth_result)
|
197 |
image_o3d = o3d.geometry.Image(image)
|
198 |
+
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
199 |
+
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
200 |
+
)
|
201 |
# Step 3: Create a PointCloud from the RGBD image
|
202 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
203 |
+
rgbd_image,
|
204 |
+
o3d.camera.PinholeCameraIntrinsic(
|
205 |
+
o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
|
206 |
+
),
|
207 |
+
)
|
208 |
# Step 4: Convert PointCloud data to a NumPy array
|
209 |
vis = o3d.visualization.Visualizer()
|
210 |
vis.add_geometry(pcd)
|
|
|
215 |
sizes[:] = 0.01
|
216 |
colors = [tuple(c) for c in colors]
|
217 |
fig = plt.figure()
|
218 |
+
# ax = fig.add_subplot(111, projection='3d')
|
219 |
ax = Axes3D(fig)
|
220 |
print("plotting...")
|
221 |
ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, s=0.01)
|
222 |
print("Plot Succesful")
|
223 |
+
# data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'sizes': sizes[:, 0]}
|
224 |
+
# df = pd.DataFrame(data)
|
225 |
# Step 6: Create a 3D scatter plot using Plotly Express
|
226 |
+
# fig = px.scatter_3d(df, x='x', y='y', z='z', color=colors, size="sizes")
|
227 |
+
|
228 |
return fig
|
229 |
|
230 |
+
|
231 |
import numpy as np
|