jens commited on
Commit
9780d7b
·
1 Parent(s): 1d564b4
Files changed (2) hide show
  1. app.py +2 -3
  2. inference.py +19 -17
app.py CHANGED
@@ -7,16 +7,15 @@ from inference import DepthPredictor, SegmentPredictor
7
 
8
  def snap(image, video):
9
  depth_predictor = DepthPredictor()
 
10
  #segment_predictor = SegmentPredictor()
11
  #sam_result = segment_predictor.predict(image)
12
- depth_result = depth_predictor.predict(image)
13
-
14
  return [depth_result, video]
15
 
16
 
17
  demo = gr.Interface(
18
  snap,
19
- [gr.Image(source="webcam", tool=None), gr.Video(source="webcam")],
20
  ["image", "video"],
21
  )
22
 
 
7
 
8
  def snap(image, video):
9
  depth_predictor = DepthPredictor()
10
+ depth_result = depth_predictor.predict(image)
11
  #segment_predictor = SegmentPredictor()
12
  #sam_result = segment_predictor.predict(image)
 
 
13
  return [depth_result, video]
14
 
15
 
16
  demo = gr.Interface(
17
  snap,
18
+ [gr.Image(source="webcam", tool=None, type="pil"), gr.Video(source="webcam")],
19
  ["image", "video"],
20
  )
21
 
inference.py CHANGED
@@ -10,30 +10,32 @@ import requests
10
  class DepthPredictor:
11
  def __init__(self):
12
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
- self.processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
14
  self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
15
  self.model.eval()
16
 
17
  def predict(self, image):
18
  # prepare image for the model
19
- inputs = self.processor(images=image, return_tensors="pt")
 
 
20
  with torch.no_grad():
21
- outputs = self.model(**inputs)
22
  predicted_depth = outputs.predicted_depth
23
-
24
- # interpolate to original size
25
- prediction = torch.nn.functional.interpolate(
26
- predicted_depth.unsqueeze(1),
27
- size=image.size[::-1],
28
- mode="bicubic",
29
- align_corners=False,
30
- )
31
-
32
- # visualize the prediction
33
- output = prediction.squeeze().cpu().numpy()
34
- formatted = (output * 255 / np.max(output)).astype("uint8")
35
- depth = Image.fromarray(formatted)
36
- return depth
37
 
38
 
39
 
 
10
  class DepthPredictor:
11
  def __init__(self):
12
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
14
  self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
15
  self.model.eval()
16
 
17
  def predict(self, image):
18
  # prepare image for the model
19
+ encoding = self.feature_extractor(image, return_tensors="pt")
20
+
21
+ # forward pass
22
  with torch.no_grad():
23
+ outputs = self.model(**encoding)
24
  predicted_depth = outputs.predicted_depth
25
+
26
+ # interpolate to original size
27
+ prediction = torch.nn.functional.interpolate(
28
+ predicted_depth.unsqueeze(1),
29
+ size=image.size[::-1],
30
+ mode="bicubic",
31
+ align_corners=False,
32
+ ).squeeze()
33
+
34
+ output = prediction.cpu().numpy()
35
+ formatted = (output * 255 / np.max(output)).astype('uint8')
36
+ img = Image.fromarray(formatted)
37
+ return img
38
+
39
 
40
 
41