Spaces:

NN-BRD
/

hackathon_depth_segment

Runtime error

jens commited on Aug 1, 2023

Commit

9780d7b

1 Parent(s): 1d564b4

fix

Files changed (2) hide show

app.py CHANGED Viewed

@@ -7,16 +7,15 @@ from inference import DepthPredictor, SegmentPredictor
 def snap(image, video):
     depth_predictor = DepthPredictor()
     #segment_predictor = SegmentPredictor()
     #sam_result = segment_predictor.predict(image)
-    depth_result = depth_predictor.predict(image)
     return [depth_result, video]
 demo = gr.Interface(
     snap,
-    [gr.Image(source="webcam", tool=None), gr.Video(source="webcam")],
     ["image", "video"],
 )

 def snap(image, video):
     depth_predictor = DepthPredictor()
+    depth_result = depth_predictor.predict(image)
     #segment_predictor = SegmentPredictor()
     #sam_result = segment_predictor.predict(image)
     return [depth_result, video]
 demo = gr.Interface(
     snap,
+    [gr.Image(source="webcam", tool=None, type="pil"), gr.Video(source="webcam")],
     ["image", "video"],
 )

inference.py CHANGED Viewed

@@ -10,30 +10,32 @@ import requests
 class DepthPredictor:
     def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
         self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
         self.model.eval()
     def predict(self, image):
         # prepare image for the model
-        inputs = self.processor(images=image, return_tensors="pt")
         with torch.no_grad():
-            outputs = self.model(**inputs)
             predicted_depth = outputs.predicted_depth
-        # interpolate to original size
-        prediction = torch.nn.functional.interpolate(
-            predicted_depth.unsqueeze(1),
-            size=image.size[::-1],
-            mode="bicubic",
-            align_corners=False,
-        )
-        # visualize the prediction
-        output = prediction.squeeze().cpu().numpy()
-        formatted = (output * 255 / np.max(output)).astype("uint8")
-        depth = Image.fromarray(formatted)
-        return depth

 class DepthPredictor:
     def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
         self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
         self.model.eval()
     def predict(self, image):
         # prepare image for the model
+        encoding = self.feature_extractor(image, return_tensors="pt")
+        # forward pass
         with torch.no_grad():
+            outputs = self.model(**encoding)
             predicted_depth = outputs.predicted_depth
+            # interpolate to original size
+            prediction = torch.nn.functional.interpolate(
+                                predicted_depth.unsqueeze(1),
+                                size=image.size[::-1],
+                                mode="bicubic",
+                                align_corners=False,
+                        ).squeeze()
+        output = prediction.cpu().numpy()
+        formatted = (output * 255 / np.max(output)).astype('uint8')
+        img = Image.fromarray(formatted)
+        return img