Spaces:
Runtime error
Runtime error
jens
commited on
Commit
·
6b8e3c4
1
Parent(s):
0ef8343
depth estimation
Browse files- app.py +7 -10
- inference.py +55 -0
- requirements.txt +2 -1
- tests.py +0 -0
app.py
CHANGED
@@ -1,20 +1,17 @@
|
|
1 |
import gradio as gr
|
2 |
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
3 |
import supervision as sv
|
|
|
4 |
|
5 |
|
6 |
|
7 |
def snap(image, video):
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
mask_annotator = sv.MaskAnnotator()
|
15 |
-
detections = sv.Detections.from_sam(sam_result=sam_result)
|
16 |
-
annotated_image = mask_annotator.annotate(scene=image.copy(), detections=detections)
|
17 |
-
return [annotated_image, video]
|
18 |
|
19 |
|
20 |
demo = gr.Interface(
|
|
|
1 |
import gradio as gr
|
2 |
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
3 |
import supervision as sv
|
4 |
+
from inference import DepthPredictor, SegmentPredictor
|
5 |
|
6 |
|
7 |
|
8 |
def snap(image, video):
|
9 |
+
depth_predictor = DepthPredictor()
|
10 |
+
#segment_predictor = SegmentPredictor()
|
11 |
+
#sam_result = segment_predictor.predict(image)
|
12 |
+
depth_result = depth_predictor.predict(image)
|
13 |
+
|
14 |
+
return [depth_result, video]
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
demo = gr.Interface(
|
inference.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
2 |
+
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
3 |
+
import gradio as gr
|
4 |
+
import supervision as sv
|
5 |
+
import torch
|
6 |
+
import numpy as np
|
7 |
+
from PIL import Image
|
8 |
+
import requests
|
9 |
+
|
10 |
+
class DepthPredictor:
|
11 |
+
def __init__(self):
|
12 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
13 |
+
self.processor = DPTImageProcessor.from_pretrained("Intel/dpt-large").to(self.device)
|
14 |
+
self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(self.device)
|
15 |
+
self.model.eval()
|
16 |
+
|
17 |
+
def predict(self, image):
|
18 |
+
# prepare image for the model
|
19 |
+
inputs = self.processor(images=image, return_tensors="pt").to(self.device)
|
20 |
+
with torch.no_grad():
|
21 |
+
outputs = self.model(**inputs)
|
22 |
+
predicted_depth = outputs.predicted_depth
|
23 |
+
|
24 |
+
# interpolate to original size
|
25 |
+
prediction = torch.nn.functional.interpolate(
|
26 |
+
predicted_depth.unsqueeze(1),
|
27 |
+
size=image.size[::-1],
|
28 |
+
mode="bicubic",
|
29 |
+
align_corners=False,
|
30 |
+
)
|
31 |
+
|
32 |
+
# visualize the prediction
|
33 |
+
output = prediction.squeeze().cpu().numpy()
|
34 |
+
formatted = (output * 255 / np.max(output)).astype("uint8")
|
35 |
+
depth = Image.fromarray(formatted)
|
36 |
+
return depth
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
class sam_inference:
|
44 |
+
def __init__(self):
|
45 |
+
MODEL_TYPE = "vit_b"
|
46 |
+
checkpoint = "sam_vit_b_01ec64.pth"
|
47 |
+
sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
|
48 |
+
self.mask_generator = SamAutomaticMaskGenerator(sam)
|
49 |
+
|
50 |
+
def predict(self, image):
|
51 |
+
sam_result = self.mask_generator.generate(image)
|
52 |
+
mask_annotator = sv.MaskAnnotator()
|
53 |
+
detections = sv.Detections.from_sam(sam_result=sam_result)
|
54 |
+
annotated_image = mask_annotator.annotate(scene=image.copy(), detections=detections)
|
55 |
+
return [annotated_image]
|
requirements.txt
CHANGED
@@ -4,4 +4,5 @@ segment-anything
|
|
4 |
supervision
|
5 |
torch
|
6 |
torchvision
|
7 |
-
opencv-python
|
|
|
|
4 |
supervision
|
5 |
torch
|
6 |
torchvision
|
7 |
+
opencv-python
|
8 |
+
transformers
|
tests.py
ADDED
File without changes
|