Shane922 commited on
Commit
891bd26
·
1 Parent(s): 46a6c3d

enable fp16 inference

Browse files
app.py CHANGED
@@ -128,7 +128,7 @@ def construct_demo():
128
  label="max process length",
129
  minimum=-1,
130
  maximum=1000,
131
- value=-1,
132
  step=1,
133
  )
134
  target_fps = gr.Slider(
 
128
  label="max process length",
129
  minimum=-1,
130
  maximum=1000,
131
+ value=500,
132
  step=1,
133
  )
134
  target_fps = gr.Slider(
utils/dc_utils.py CHANGED
@@ -22,10 +22,12 @@ def read_video_frames(video_path, process_length, target_fps=-1, max_res=-1):
22
  original_height, original_width = vid.get_batch([0]).shape[1:3]
23
  height = original_height
24
  width = original_width
 
25
  if max_res > 0 and max(height, width) > max_res:
26
  scale = max_res / max(original_height, original_width)
27
  height = ensure_even(round(original_height * scale))
28
  width = ensure_even(round(original_width * scale))
 
29
 
30
  vid = VideoReader(video_path, ctx=cpu(0), width=width, height=height)
31
 
 
22
  original_height, original_width = vid.get_batch([0]).shape[1:3]
23
  height = original_height
24
  width = original_width
25
+ print(f'==> original video size: {original_height} x {original_width}')
26
  if max_res > 0 and max(height, width) > max_res:
27
  scale = max_res / max(original_height, original_width)
28
  height = ensure_even(round(original_height * scale))
29
  width = ensure_even(round(original_width * scale))
30
+ print(f'==> downsample video size: {height} x {width}')
31
 
32
  vid = VideoReader(video_path, ctx=cpu(0), width=width, height=height)
33
 
video_depth_anything/dpt_temporal.py CHANGED
@@ -91,6 +91,8 @@ class DPTHeadTemporal(DPTHead):
91
  out = F.interpolate(
92
  out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True
93
  )
94
- out = self.scratch.output_conv2(out)
 
 
95
 
96
- return out
 
91
  out = F.interpolate(
92
  out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True
93
  )
94
+ ori_type = out.dtype
95
+ with torch.autocast(device_type="cuda", enabled=False):
96
+ out = self.scratch.output_conv2(out.float())
97
 
98
+ return out.to(ori_type)
video_depth_anything/video_depth.py CHANGED
@@ -104,7 +104,8 @@ class VideoDepthAnything(nn.Module):
104
  cur_input[:, :OVERLAP, ...] = pre_input[:, KEYFRAMES, ...]
105
 
106
  with torch.no_grad():
107
- depth = self.forward(cur_input) # depth shape: [1, T, H, W]
 
108
 
109
  depth = F.interpolate(depth.flatten(0,1).unsqueeze(1), size=(frame_height, frame_width), mode='bilinear', align_corners=True)
110
  depth_list += [depth[i][0].cpu().numpy() for i in range(depth.shape[0])]
 
104
  cur_input[:, :OVERLAP, ...] = pre_input[:, KEYFRAMES, ...]
105
 
106
  with torch.no_grad():
107
+ with torch.autocast(device_type=device, enabled=True):
108
+ depth = self.forward(cur_input) # depth shape: [1, T, H, W]
109
 
110
  depth = F.interpolate(depth.flatten(0,1).unsqueeze(1), size=(frame_height, frame_width), mode='bilinear', align_corners=True)
111
  depth_list += [depth[i][0].cpu().numpy() for i in range(depth.shape[0])]