jayparmr commited on
Commit
cd51d32
·
1 Parent(s): a3d6c18

Upload folder using huggingface_hub

Browse files
internals/pipelines/pose_detector.py CHANGED
@@ -1,24 +1,19 @@
1
  from pathlib import Path
2
  from typing import Optional, Union
3
 
4
- from mmdet.apis import inference_detector, init_detector
5
- from mmpose.apis import (
6
- inference_top_down_pose_model,
7
- init_pose_model,
8
- process_mmdet_results,
9
- vis_pose_result,
10
- )
11
- from mmpose.datasets import DatasetInfo
12
  from PIL import Image, ImageDraw
13
  from torch import ge
14
 
15
  from internals.util.commons import download_file, download_image
16
  from internals.util.config import get_root_dir
 
17
 
18
 
19
  class PoseDetector:
20
- __det_model = "https://comic-assets.s3.ap-south-1.amazonaws.com/models/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth"
21
- __pose_model = "https://comic-assets.s3.ap-south-1.amazonaws.com/models/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth"
 
 
22
 
23
  __loaded = False
24
 
@@ -26,25 +21,11 @@ class PoseDetector:
26
  if self.__loaded:
27
  return
28
 
29
- det_path = Path.home() / ".cache" / self.__det_model.split("/")[-1]
30
  pose_path = Path.home() / ".cache" / self.__pose_model.split("/")[-1]
31
 
32
- download_file(self.__det_model, det_path)
33
  download_file(self.__pose_model, pose_path)
34
 
35
- self.det_model = init_detector(
36
- f"{get_root_dir()}/external/faster_rcnn_r50_fpn_coco.py",
37
- str(det_path),
38
- device="cpu",
39
- )
40
- self.pose_model = init_pose_model(
41
- f"{get_root_dir()}/external/hrnet_w48_coco_256x192.py",
42
- str(pose_path),
43
- device="cpu",
44
- )
45
- self.dataset = self.pose_model.cfg.data["test"]["type"]
46
- self.dataset_info = self.pose_model.cfg.data["test"].get("dataset_info", None)
47
- self.dataset_info = DatasetInfo(self.dataset_info)
48
 
49
  self.__loaded = True
50
 
@@ -113,47 +94,20 @@ class PoseDetector:
113
 
114
  return image
115
 
116
- def infer(self, imageUrl: Union[str, Image.Image], width, height) -> dict:
117
  candidate = []
118
  subset = []
119
 
120
- if type(imageUrl) == Image.Image:
121
- image_path = Path.home() / ".cache" / "input.png"
122
- imageUrl.resize((width, height)).save(image_path)
123
- elif type(imageUrl) == str:
124
- image_path = Path.home() / ".cache" / imageUrl.split("/")[-1]
125
- image = download_image(imageUrl).resize((width, height))
126
- image.save(image_path)
127
- else:
128
- raise Exception("Invalid image type")
129
- mmdet_results = inference_detector(self.det_model, str(image_path))
130
- person_results = process_mmdet_results(mmdet_results, 1)
131
-
132
- pose_results, _ = inference_top_down_pose_model(
133
- self.pose_model,
134
- str(image_path),
135
- person_results,
136
- bbox_thr=0.3,
137
- format="xyxy",
138
- dataset=self.dataset,
139
- dataset_info=self.dataset_info,
140
- return_heatmap=False,
141
- outputs=None,
142
- )
143
-
144
- for d in pose_results:
145
- n = len(candidate)
146
- if d["bbox"][4] < 0.9:
147
- continue
148
- keypoints = d["keypoints"][:, :2].tolist()
149
- midpoint = [
150
- (keypoints[5][0] + keypoints[6][0]) / 2,
151
- (keypoints[5][1] + keypoints[6][1]) / 2,
152
- ]
153
- keypoints.append(midpoint)
154
- candidate.extend(self.__convert_keypoints(keypoints))
155
- m = len(candidate)
156
- subset.append([j for j in range(n, m)])
157
 
158
  return {"candidate": candidate[:18], "subset": subset[:18]}
159
 
 
1
  from pathlib import Path
2
  from typing import Optional, Union
3
 
 
 
 
 
 
 
 
 
4
  from PIL import Image, ImageDraw
5
  from torch import ge
6
 
7
  from internals.util.commons import download_file, download_image
8
  from internals.util.config import get_root_dir
9
+ from models.pose.body import Body
10
 
11
 
12
  class PoseDetector:
13
+ # __det_model = "https://comic-assets.s3.ap-south-1.amazonaws.com/models/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth"
14
+ __pose_model = (
15
+ "https://comic-assets.s3.ap-south-1.amazonaws.com/models/body_pose_model.pth"
16
+ )
17
 
18
  __loaded = False
19
 
 
21
  if self.__loaded:
22
  return
23
 
 
24
  pose_path = Path.home() / ".cache" / self.__pose_model.split("/")[-1]
25
 
 
26
  download_file(self.__pose_model, pose_path)
27
 
28
+ self.body_estimation = Body(str(pose_path))
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  self.__loaded = True
31
 
 
94
 
95
  return image
96
 
97
+ def infer(self, image: Union[str, Image.Image], width, height) -> dict:
98
  candidate = []
99
  subset = []
100
 
101
+ if type(image) == str:
102
+ image = download_image(imageUrl)
103
+
104
+ image = image.resize((width, height))
105
+
106
+ candidate, subset = self.body_estimation.__call__(image)
107
+ candidate = candidate.tolist()
108
+ subset = subset.tolist()
109
+
110
+ candidate = [item[:2] for item in candidate]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  return {"candidate": candidate[:18], "subset": subset[:18]}
113
 
internals/pipelines/remove_background.py CHANGED
@@ -1,4 +1,5 @@
1
  import io
 
2
  from typing import Union
3
 
4
  import torch
@@ -35,10 +36,11 @@ class RemoveBackgroundV2:
35
  )
36
 
37
  def remove(self, image: Union[str, Image.Image]) -> Image.Image:
 
38
  if type(image) is str:
39
  image = Image.open(io.BytesIO(read_url(image)))
40
 
41
- image.save("rm_bg.png")
42
- images_without_background = self.interface(["./rm_bg.png"])
43
  out = images_without_background[0]
44
  return out
 
1
  import io
2
+ from pathlib import Path
3
  from typing import Union
4
 
5
  import torch
 
36
  )
37
 
38
  def remove(self, image: Union[str, Image.Image]) -> Image.Image:
39
+ img_path = Path.home() / ".cache" / "rm_bg.png"
40
  if type(image) is str:
41
  image = Image.open(io.BytesIO(read_url(image)))
42
 
43
+ image.save(img_path)
44
+ images_without_background = self.interface([img_path])
45
  out = images_without_background[0]
46
  return out
models/pose/body.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import math
4
+ import time
5
+ from scipy.ndimage.filters import gaussian_filter
6
+ import torch
7
+ from torchvision import transforms
8
+ from PIL import Image
9
+
10
+ from models.pose import util
11
+ from models.pose.model import bodypose_model
12
+
13
+ class Body(object):
14
+ def __init__(self, model_path):
15
+ self.model = bodypose_model()
16
+ if torch.cuda.is_available():
17
+ self.model = self.model.cuda()
18
+ model_dict = util.transfer(self.model, torch.load(model_path))
19
+ self.model.load_state_dict(model_dict)
20
+ self.model.eval()
21
+
22
+ def __call__(self, oriImg: Image.Image):
23
+ # scale_search = [0.5, 1.0, 1.5, 2.0]
24
+ oriImg = self.__pil2cv(oriImg)
25
+ scale_search = [0.5]
26
+ boxsize = 368
27
+ stride = 8
28
+ padValue = 128
29
+ thre1 = 0.1
30
+ thre2 = 0.05
31
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
32
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
33
+ paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
34
+
35
+ for m in range(len(multiplier)):
36
+ scale = multiplier[m]
37
+ imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
38
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
39
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
40
+ im = np.ascontiguousarray(im)
41
+
42
+ data = torch.from_numpy(im).float()
43
+ if torch.cuda.is_available():
44
+ data = data.cuda()
45
+ # data = data.permute([2, 0, 1]).unsqueeze(0).float()
46
+ with torch.no_grad():
47
+ Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
48
+ Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
49
+ Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
50
+
51
+ # extract outputs, resize, and remove padding
52
+ # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
53
+ heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
54
+ heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
55
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
56
+ heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
57
+
58
+ # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
59
+ paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
60
+ paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
61
+ paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
62
+ paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
63
+
64
+ heatmap_avg += heatmap_avg + heatmap / len(multiplier)
65
+ paf_avg += + paf / len(multiplier)
66
+
67
+ all_peaks = []
68
+ peak_counter = 0
69
+
70
+ for part in range(18):
71
+ map_ori = heatmap_avg[:, :, part]
72
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
73
+
74
+ map_left = np.zeros(one_heatmap.shape)
75
+ map_left[1:, :] = one_heatmap[:-1, :]
76
+ map_right = np.zeros(one_heatmap.shape)
77
+ map_right[:-1, :] = one_heatmap[1:, :]
78
+ map_up = np.zeros(one_heatmap.shape)
79
+ map_up[:, 1:] = one_heatmap[:, :-1]
80
+ map_down = np.zeros(one_heatmap.shape)
81
+ map_down[:, :-1] = one_heatmap[:, 1:]
82
+
83
+ peaks_binary = np.logical_and.reduce(
84
+ (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
85
+ peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
86
+ peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
87
+ peak_id = range(peak_counter, peak_counter + len(peaks))
88
+ peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
89
+
90
+ all_peaks.append(peaks_with_score_and_id)
91
+ peak_counter += len(peaks)
92
+
93
+ # find connection in the specified sequence, center 29 is in the position 15
94
+ limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
95
+ [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
96
+ [1, 16], [16, 18], [3, 17], [6, 18]]
97
+ # the middle joints heatmap correpondence
98
+ mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
99
+ [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
100
+ [55, 56], [37, 38], [45, 46]]
101
+
102
+ connection_all = []
103
+ special_k = []
104
+ mid_num = 10
105
+
106
+ for k in range(len(mapIdx)):
107
+ score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
108
+ candA = all_peaks[limbSeq[k][0] - 1]
109
+ candB = all_peaks[limbSeq[k][1] - 1]
110
+ nA = len(candA)
111
+ nB = len(candB)
112
+ indexA, indexB = limbSeq[k]
113
+ if (nA != 0 and nB != 0):
114
+ connection_candidate = []
115
+ for i in range(nA):
116
+ for j in range(nB):
117
+ vec = np.subtract(candB[j][:2], candA[i][:2])
118
+ norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
119
+ norm = max(0.001, norm)
120
+ vec = np.divide(vec, norm)
121
+
122
+ startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
123
+ np.linspace(candA[i][1], candB[j][1], num=mid_num)))
124
+
125
+ vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
126
+ for I in range(len(startend))])
127
+ vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
128
+ for I in range(len(startend))])
129
+
130
+ score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
131
+ score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
132
+ 0.5 * oriImg.shape[0] / norm - 1, 0)
133
+ criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
134
+ criterion2 = score_with_dist_prior > 0
135
+ if criterion1 and criterion2:
136
+ connection_candidate.append(
137
+ [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
138
+
139
+ connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
140
+ connection = np.zeros((0, 5))
141
+ for c in range(len(connection_candidate)):
142
+ i, j, s = connection_candidate[c][0:3]
143
+ if (i not in connection[:, 3] and j not in connection[:, 4]):
144
+ connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
145
+ if (len(connection) >= min(nA, nB)):
146
+ break
147
+
148
+ connection_all.append(connection)
149
+ else:
150
+ special_k.append(k)
151
+ connection_all.append([])
152
+
153
+ # last number in each row is the total parts number of that person
154
+ # the second last number in each row is the score of the overall configuration
155
+ subset = -1 * np.ones((0, 20))
156
+ candidate = np.array([item for sublist in all_peaks for item in sublist])
157
+
158
+ for k in range(len(mapIdx)):
159
+ if k not in special_k:
160
+ partAs = connection_all[k][:, 0]
161
+ partBs = connection_all[k][:, 1]
162
+ indexA, indexB = np.array(limbSeq[k]) - 1
163
+
164
+ for i in range(len(connection_all[k])): # = 1:size(temp,1)
165
+ found = 0
166
+ subset_idx = [-1, -1]
167
+ for j in range(len(subset)): # 1:size(subset,1):
168
+ if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
169
+ subset_idx[found] = j
170
+ found += 1
171
+
172
+ if found == 1:
173
+ j = subset_idx[0]
174
+ if subset[j][indexB] != partBs[i]:
175
+ subset[j][indexB] = partBs[i]
176
+ subset[j][-1] += 1
177
+ subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
178
+ elif found == 2: # if found 2 and disjoint, merge them
179
+ j1, j2 = subset_idx
180
+ membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
181
+ if len(np.nonzero(membership == 2)[0]) == 0: # merge
182
+ subset[j1][:-2] += (subset[j2][:-2] + 1)
183
+ subset[j1][-2:] += subset[j2][-2:]
184
+ subset[j1][-2] += connection_all[k][i][2]
185
+ subset = np.delete(subset, j2, 0)
186
+ else: # as like found == 1
187
+ subset[j1][indexB] = partBs[i]
188
+ subset[j1][-1] += 1
189
+ subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
190
+
191
+ # if find no partA in the subset, create a new subset
192
+ elif not found and k < 17:
193
+ row = -1 * np.ones(20)
194
+ row[indexA] = partAs[i]
195
+ row[indexB] = partBs[i]
196
+ row[-1] = 2
197
+ row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
198
+ subset = np.vstack([subset, row])
199
+ # delete some rows of subset which has few parts occur
200
+ deleteIdx = []
201
+ for i in range(len(subset)):
202
+ if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
203
+ deleteIdx.append(i)
204
+ subset = np.delete(subset, deleteIdx, axis=0)
205
+
206
+ # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
207
+ # candidate: x, y, score, id
208
+ return candidate, subset
209
+
210
+
211
+ def __pil2cv(self, image):
212
+ ''' PIL型 -> OpenCV型 '''
213
+ new_image = np.array(image, dtype=np.uint8)
214
+ if new_image.ndim == 2: # モノクロ
215
+ pass
216
+ elif new_image.shape[2] == 3: # カラー
217
+ new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)
218
+ elif new_image.shape[2] == 4: # 透過
219
+ new_image = cv2.cvtColor(new_image, cv2.COLOR_RGBA2BGRA)
220
+ return new_image
221
+
222
+
223
+ # if __name__ == "__main__":
224
+ # body_estimation = Body('../model/body_pose_model.pth')
225
+
226
+ # test_image = '../images/ski.jpg'
227
+ # oriImg = cv2.imread(test_image) # B,G,R order
228
+ # candidate, subset = body_estimation(oriImg)
229
+ # canvas = util.draw_bodypose(oriImg, candidate, subset)
230
+ # plt.imshow(canvas[:, :, [2, 1, 0]])
231
+ # plt.show()
models/pose/model.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from collections import OrderedDict
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ def make_layers(block, no_relu_layers):
8
+ layers = []
9
+ for layer_name, v in block.items():
10
+ if 'pool' in layer_name:
11
+ layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
12
+ padding=v[2])
13
+ layers.append((layer_name, layer))
14
+ else:
15
+ conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
16
+ kernel_size=v[2], stride=v[3],
17
+ padding=v[4])
18
+ layers.append((layer_name, conv2d))
19
+ if layer_name not in no_relu_layers:
20
+ layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
21
+
22
+ return nn.Sequential(OrderedDict(layers))
23
+
24
+ class bodypose_model(nn.Module):
25
+ def __init__(self):
26
+ super(bodypose_model, self).__init__()
27
+
28
+ # these layers have no relu layer
29
+ no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
30
+ 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
31
+ 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
32
+ 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
33
+ blocks = {}
34
+ block0 = OrderedDict([
35
+ ('conv1_1', [3, 64, 3, 1, 1]),
36
+ ('conv1_2', [64, 64, 3, 1, 1]),
37
+ ('pool1_stage1', [2, 2, 0]),
38
+ ('conv2_1', [64, 128, 3, 1, 1]),
39
+ ('conv2_2', [128, 128, 3, 1, 1]),
40
+ ('pool2_stage1', [2, 2, 0]),
41
+ ('conv3_1', [128, 256, 3, 1, 1]),
42
+ ('conv3_2', [256, 256, 3, 1, 1]),
43
+ ('conv3_3', [256, 256, 3, 1, 1]),
44
+ ('conv3_4', [256, 256, 3, 1, 1]),
45
+ ('pool3_stage1', [2, 2, 0]),
46
+ ('conv4_1', [256, 512, 3, 1, 1]),
47
+ ('conv4_2', [512, 512, 3, 1, 1]),
48
+ ('conv4_3_CPM', [512, 256, 3, 1, 1]),
49
+ ('conv4_4_CPM', [256, 128, 3, 1, 1])
50
+ ])
51
+
52
+
53
+ # Stage 1
54
+ block1_1 = OrderedDict([
55
+ ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
56
+ ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
57
+ ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
58
+ ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
59
+ ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
60
+ ])
61
+
62
+ block1_2 = OrderedDict([
63
+ ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
64
+ ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
65
+ ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
66
+ ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
67
+ ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
68
+ ])
69
+ blocks['block1_1'] = block1_1
70
+ blocks['block1_2'] = block1_2
71
+
72
+ self.model0 = make_layers(block0, no_relu_layers)
73
+
74
+ # Stages 2 - 6
75
+ for i in range(2, 7):
76
+ blocks['block%d_1' % i] = OrderedDict([
77
+ ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
78
+ ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
79
+ ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
80
+ ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
81
+ ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
82
+ ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
83
+ ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
84
+ ])
85
+
86
+ blocks['block%d_2' % i] = OrderedDict([
87
+ ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
88
+ ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
89
+ ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
90
+ ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
91
+ ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
92
+ ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
93
+ ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
94
+ ])
95
+
96
+ for k in blocks.keys():
97
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
98
+
99
+ self.model1_1 = blocks['block1_1']
100
+ self.model2_1 = blocks['block2_1']
101
+ self.model3_1 = blocks['block3_1']
102
+ self.model4_1 = blocks['block4_1']
103
+ self.model5_1 = blocks['block5_1']
104
+ self.model6_1 = blocks['block6_1']
105
+
106
+ self.model1_2 = blocks['block1_2']
107
+ self.model2_2 = blocks['block2_2']
108
+ self.model3_2 = blocks['block3_2']
109
+ self.model4_2 = blocks['block4_2']
110
+ self.model5_2 = blocks['block5_2']
111
+ self.model6_2 = blocks['block6_2']
112
+
113
+
114
+ def forward(self, x):
115
+
116
+ out1 = self.model0(x)
117
+
118
+ out1_1 = self.model1_1(out1)
119
+ out1_2 = self.model1_2(out1)
120
+ out2 = torch.cat([out1_1, out1_2, out1], 1)
121
+
122
+ out2_1 = self.model2_1(out2)
123
+ out2_2 = self.model2_2(out2)
124
+ out3 = torch.cat([out2_1, out2_2, out1], 1)
125
+
126
+ out3_1 = self.model3_1(out3)
127
+ out3_2 = self.model3_2(out3)
128
+ out4 = torch.cat([out3_1, out3_2, out1], 1)
129
+
130
+ out4_1 = self.model4_1(out4)
131
+ out4_2 = self.model4_2(out4)
132
+ out5 = torch.cat([out4_1, out4_2, out1], 1)
133
+
134
+ out5_1 = self.model5_1(out5)
135
+ out5_2 = self.model5_2(out5)
136
+ out6 = torch.cat([out5_1, out5_2, out1], 1)
137
+
138
+ out6_1 = self.model6_1(out6)
139
+ out6_2 = self.model6_2(out6)
140
+
141
+ return out6_1, out6_2
142
+
143
+ class handpose_model(nn.Module):
144
+ def __init__(self):
145
+ super(handpose_model, self).__init__()
146
+
147
+ # these layers have no relu layer
148
+ no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
149
+ 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
150
+ # stage 1
151
+ block1_0 = OrderedDict([
152
+ ('conv1_1', [3, 64, 3, 1, 1]),
153
+ ('conv1_2', [64, 64, 3, 1, 1]),
154
+ ('pool1_stage1', [2, 2, 0]),
155
+ ('conv2_1', [64, 128, 3, 1, 1]),
156
+ ('conv2_2', [128, 128, 3, 1, 1]),
157
+ ('pool2_stage1', [2, 2, 0]),
158
+ ('conv3_1', [128, 256, 3, 1, 1]),
159
+ ('conv3_2', [256, 256, 3, 1, 1]),
160
+ ('conv3_3', [256, 256, 3, 1, 1]),
161
+ ('conv3_4', [256, 256, 3, 1, 1]),
162
+ ('pool3_stage1', [2, 2, 0]),
163
+ ('conv4_1', [256, 512, 3, 1, 1]),
164
+ ('conv4_2', [512, 512, 3, 1, 1]),
165
+ ('conv4_3', [512, 512, 3, 1, 1]),
166
+ ('conv4_4', [512, 512, 3, 1, 1]),
167
+ ('conv5_1', [512, 512, 3, 1, 1]),
168
+ ('conv5_2', [512, 512, 3, 1, 1]),
169
+ ('conv5_3_CPM', [512, 128, 3, 1, 1])
170
+ ])
171
+
172
+ block1_1 = OrderedDict([
173
+ ('conv6_1_CPM', [128, 512, 1, 1, 0]),
174
+ ('conv6_2_CPM', [512, 22, 1, 1, 0])
175
+ ])
176
+
177
+ blocks = {}
178
+ blocks['block1_0'] = block1_0
179
+ blocks['block1_1'] = block1_1
180
+
181
+ # stage 2-6
182
+ for i in range(2, 7):
183
+ blocks['block%d' % i] = OrderedDict([
184
+ ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
185
+ ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
186
+ ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
187
+ ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
188
+ ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
189
+ ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
190
+ ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
191
+ ])
192
+
193
+ for k in blocks.keys():
194
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
195
+
196
+ self.model1_0 = blocks['block1_0']
197
+ self.model1_1 = blocks['block1_1']
198
+ self.model2 = blocks['block2']
199
+ self.model3 = blocks['block3']
200
+ self.model4 = blocks['block4']
201
+ self.model5 = blocks['block5']
202
+ self.model6 = blocks['block6']
203
+
204
+ def forward(self, x):
205
+ out1_0 = self.model1_0(x)
206
+ out1_1 = self.model1_1(out1_0)
207
+ concat_stage2 = torch.cat([out1_1, out1_0], 1)
208
+ out_stage2 = self.model2(concat_stage2)
209
+ concat_stage3 = torch.cat([out_stage2, out1_0], 1)
210
+ out_stage3 = self.model3(concat_stage3)
211
+ concat_stage4 = torch.cat([out_stage3, out1_0], 1)
212
+ out_stage4 = self.model4(concat_stage4)
213
+ concat_stage5 = torch.cat([out_stage4, out1_0], 1)
214
+ out_stage5 = self.model5(concat_stage5)
215
+ concat_stage6 = torch.cat([out_stage5, out1_0], 1)
216
+ out_stage6 = self.model6(concat_stage6)
217
+ return out_stage6
218
+
219
+
models/pose/util.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import cv2
4
+ import numpy as np
5
+
6
+
7
+ def padRightDownCorner(img, stride, padValue):
8
+ h = img.shape[0]
9
+ w = img.shape[1]
10
+
11
+ pad = 4 * [None]
12
+ pad[0] = 0 # up
13
+ pad[1] = 0 # left
14
+ pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
15
+ pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
16
+
17
+ img_padded = img
18
+ pad_up = np.tile(img_padded[0:1, :, :] * 0 + padValue, (pad[0], 1, 1))
19
+ img_padded = np.concatenate((pad_up, img_padded), axis=0)
20
+ pad_left = np.tile(img_padded[:, 0:1, :] * 0 + padValue, (1, pad[1], 1))
21
+ img_padded = np.concatenate((pad_left, img_padded), axis=1)
22
+ pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + padValue, (pad[2], 1, 1))
23
+ img_padded = np.concatenate((img_padded, pad_down), axis=0)
24
+ pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + padValue, (1, pad[3], 1))
25
+ img_padded = np.concatenate((img_padded, pad_right), axis=1)
26
+
27
+ return img_padded, pad
28
+
29
+
30
+ # transfer caffe model to pytorch which will match the layer name
31
+ def transfer(model, model_weights):
32
+ transfered_model_weights = {}
33
+ for weights_name in model.state_dict().keys():
34
+ transfered_model_weights[weights_name] = model_weights[
35
+ ".".join(weights_name.split(".")[1:])
36
+ ]
37
+ return transfered_model_weights
38
+
39
+
40
+ # get max index of 2d array
41
+ def npmax(array):
42
+ arrayindex = array.argmax(1)
43
+ arrayvalue = array.max(1)
44
+ i = arrayvalue.argmax()
45
+ j = arrayindex[i]
46
+ return i, j
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  boto3==1.24.61
2
  triton==2.0.0
3
- diffusers==0.17.1
4
  fastapi==0.87.0
5
  Pillow==9.3.0
6
  redis==4.3.4
@@ -32,7 +32,7 @@ scikit-image
32
  omegaconf
33
  webdataset
34
  git+https://github.com/cloneofsimo/lora.git
35
- https://comic-assets.s3.ap-south-1.amazonaws.com/packages/mmcv_full-1.7.0-cp38-cp38-linux_x86_64.whl
36
  python-dateutil==2.8.2
37
  PyYAML
38
  torchvision
 
1
  boto3==1.24.61
2
  triton==2.0.0
3
+ diffusers==0.19.0
4
  fastapi==0.87.0
5
  Pillow==9.3.0
6
  redis==4.3.4
 
32
  omegaconf
33
  webdataset
34
  git+https://github.com/cloneofsimo/lora.git
35
+ https://comic-assets.s3.ap-south-1.amazonaws.com/packages/mmcv_full-1.7.0-cp39-cp39-linux_x86_64.whl
36
  python-dateutil==2.8.2
37
  PyYAML
38
  torchvision