Spaces:

Gradio-Blocks
/

ViTPose

Running

App Files Files Community

hysts commited on Jun 8, 2022

Commit

42046e4

1 Parent(s): 37c98fe

Refactor

Browse files

Files changed (3) hide show

app.py +16 -11
model.py +114 -85
style.css +5 -1

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import tarfile
 import gradio as gr
-from model import DetModel, PoseModel
 DESCRIPTION = '''# ViTPose
@@ -44,8 +44,8 @@ def main():
     extract_tar()
-    det_model = DetModel(device=args.device)
-    pose_model = PoseModel(device=args.device)
     with gr.Blocks(theme=args.theme, css='style.css') as demo:
         gr.Markdown(DESCRIPTION)
@@ -59,7 +59,7 @@ def main():
                                                type='numpy')
                     with gr.Row():
                         detector_name = gr.Dropdown(list(
-                            det_model.models.keys()),
                                                     value=det_model.model_name,
                                                     label='Detector')
                     with gr.Row():
@@ -68,7 +68,9 @@ def main():
                 with gr.Column():
                     with gr.Row():
                         detection_visualization = gr.Image(
-                            label='Detection Result', type='numpy')
                     with gr.Row():
                         vis_det_score_threshold = gr.Slider(
                             0,
@@ -91,7 +93,7 @@ def main():
                 with gr.Column():
                     with gr.Row():
                         pose_model_name = gr.Dropdown(
-                            list(pose_model.models.keys()),
                             value=pose_model.model_name,
                             label='Pose Model')
                     det_score_threshold = gr.Slider(
@@ -106,7 +108,8 @@ def main():
                 with gr.Column():
                     with gr.Row():
                         pose_visualization = gr.Image(label='Result',
-                                                      type='numpy')
                     with gr.Row():
                         vis_kpt_score_threshold = gr.Slider(
                             0,
@@ -131,11 +134,12 @@ def main():
         gr.Markdown(FOOTER)
-        detector_name.change(fn=det_model.set_model_name,
                              inputs=detector_name,
                              outputs=None)
-        detect_button.click(fn=det_model.detect_and_visualize,
                             inputs=[
                                 input_image,
                                 vis_det_score_threshold,
                             ],
@@ -151,11 +155,12 @@ def main():
                                 ],
                                 outputs=detection_visualization)
-        pose_model_name.change(fn=pose_model.set_model_name,
                                inputs=pose_model_name,
                                outputs=None)
-        predict_button.click(fn=pose_model.predict_pose_and_visualize,
                              inputs=[
                                  input_image,
                                  det_preds,
                                  det_score_threshold,

 import gradio as gr
+from model import AppDetModel, AppPoseModel
 DESCRIPTION = '''# ViTPose
     extract_tar()
+    det_model = AppDetModel(device=args.device)
+    pose_model = AppPoseModel(device=args.device)
     with gr.Blocks(theme=args.theme, css='style.css') as demo:
         gr.Markdown(DESCRIPTION)
                                                type='numpy')
                     with gr.Row():
                         detector_name = gr.Dropdown(list(
+                            det_model.MODEL_DICT.keys()),
                                                     value=det_model.model_name,
                                                     label='Detector')
                     with gr.Row():
                 with gr.Column():
                     with gr.Row():
                         detection_visualization = gr.Image(
+                            label='Detection Result',
+                            type='numpy',
+                            elem_id='det-result')
                     with gr.Row():
                         vis_det_score_threshold = gr.Slider(
                             0,
                 with gr.Column():
                     with gr.Row():
                         pose_model_name = gr.Dropdown(
+                            list(pose_model.MODEL_DICT.keys()),
                             value=pose_model.model_name,
                             label='Pose Model')
                     det_score_threshold = gr.Slider(
                 with gr.Column():
                     with gr.Row():
                         pose_visualization = gr.Image(label='Result',
+                                                      type='numpy',
+                                                      elem_id='pose-result')
                     with gr.Row():
                         vis_kpt_score_threshold = gr.Slider(
                             0,
         gr.Markdown(FOOTER)
+        detector_name.change(fn=det_model.set_model,
                              inputs=detector_name,
                              outputs=None)
+        detect_button.click(fn=det_model.run,
                             inputs=[
+                                detector_name,
                                 input_image,
                                 vis_det_score_threshold,
                             ],
                                 ],
                                 outputs=detection_visualization)
+        pose_model_name.change(fn=pose_model.set_model,
                                inputs=pose_model_name,
                                outputs=None)
+        predict_button.click(fn=pose_model.run,
                              inputs=[
+                                 pose_model_name,
                                  input_image,
                                  det_preds,
                                  det_score_threshold,

model.py CHANGED Viewed

@@ -29,46 +29,52 @@ HF_TOKEN = os.environ['HF_TOKEN']
 class DetModel:
     def __init__(self, device: str | torch.device):
         self.device = torch.device(device)
-        self.models = self._load_models()
         self.model_name = 'YOLOX-l'
-    def _load_models(self) -> dict[str, nn.Module]:
-        model_dict = {
-            'YOLOX-tiny': {
-                'config':
-                'mmdet_configs/configs/yolox/yolox_tiny_8x8_300e_coco.py',
-                'model':
-                'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth',
-            },
-            'YOLOX-s': {
-                'config':
-                'mmdet_configs/configs/yolox/yolox_s_8x8_300e_coco.py',
-                'model':
-                'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth',
-            },
-            'YOLOX-l': {
-                'config':
-                'mmdet_configs/configs/yolox/yolox_l_8x8_300e_coco.py',
-                'model':
-                'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
-            },
-            'YOLOX-x': {
-                'config':
-                'mmdet_configs/configs/yolox/yolox_x_8x8_300e_coco.py',
-                'model':
-                'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth',
-            },
-        }
-        models = {
-            key: init_detector(dic['config'], dic['model'], device=self.device)
-            for key, dic in model_dict.items()
-        }
-        return models
-    def set_model_name(self, name: str) -> None:
         self.model_name = name
     def detect_and_visualize(
             self, image: np.ndarray,
@@ -79,8 +85,7 @@ class DetModel:
     def detect(self, image: np.ndarray) -> list[np.ndarray]:
         image = image[:, :, ::-1]  # RGB -> BGR
-        model = self.models[self.model_name]
-        out = inference_detector(model, image)
         return out
     def visualize_detection_results(
@@ -88,60 +93,71 @@ class DetModel:
             image: np.ndarray,
             detection_results: list[np.ndarray],
             score_threshold: float = 0.3) -> np.ndarray:
-        person_det = [detection_results[0]] + [np.array([]).reshape(0, 5)]
         image = image[:, :, ::-1]  # RGB -> BGR
-        model = self.models[self.model_name]
-        vis = model.show_result(image,
-                                person_det,
-                                score_thr=score_threshold,
-                                bbox_color=None,
-                                text_color=(200, 200, 200),
-                                mask_color=None)
         return vis[:, :, ::-1]  # BGR -> RGB
 class PoseModel:
     def __init__(self, device: str | torch.device):
         self.device = torch.device(device)
-        self.models = self._load_models()
         self.model_name = 'ViTPose-B (multi-task train, COCO)'
-    def _load_models(self) -> dict[str, nn.Module]:
-        model_dict = {
-            'ViTPose-B (single-task train)': {
-                'config':
-                'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
-                'model': 'models/vitpose-b.pth',
-            },
-            'ViTPose-L (single-task train)': {
-                'config':
-                'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
-                'model': 'models/vitpose-l.pth',
-            },
-            'ViTPose-B (multi-task train, COCO)': {
-                'config':
-                'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
-                'model': 'models/vitpose-b-multi-coco.pth',
-            },
-            'ViTPose-L (multi-task train, COCO)': {
-                'config':
-                'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
-                'model': 'models/vitpose-l-multi-coco.pth',
-            },
-        }
-        models = dict()
-        for key, dic in model_dict.items():
-            ckpt_path = huggingface_hub.hf_hub_download(
-                'hysts/ViTPose', dic['model'], use_auth_token=HF_TOKEN)
-            model = init_pose_model(dic['config'],
-                                    ckpt_path,
-                                    device=self.device)
-            models[key] = model
-        return models
-    def set_model_name(self, name: str) -> None:
         self.model_name = name
     def predict_pose_and_visualize(
         self,
@@ -163,9 +179,8 @@ class PoseModel:
             det_results: list[np.ndarray],
             box_score_threshold: float = 0.5) -> list[dict[str, np.ndarray]]:
         image = image[:, :, ::-1]  # RGB -> BGR
-        model = self.models[self.model_name]
         person_results = process_mmdet_results(det_results, 1)
-        out, _ = inference_top_down_pose_model(model,
                                                image,
                                                person_results=person_results,
                                                bbox_thr=box_score_threshold,
@@ -179,11 +194,25 @@ class PoseModel:
                                vis_dot_radius: int = 4,
                                vis_line_thickness: int = 1) -> np.ndarray:
         image = image[:, :, ::-1]  # RGB -> BGR
-        model = self.models[self.model_name]
-        vis = vis_pose_result(model,
                               image,
                               pose_results,
                               kpt_score_thr=kpt_score_threshold,
                               radius=vis_dot_radius,
                               thickness=vis_line_thickness)
         return vis[:, :, ::-1]  # BGR -> RGB

 class DetModel:
+    MODEL_DICT = {
+        'YOLOX-tiny': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_tiny_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth',
+        },
+        'YOLOX-s': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_s_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth',
+        },
+        'YOLOX-l': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_l_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
+        },
+        'YOLOX-x': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_x_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth',
+        },
+    }
     def __init__(self, device: str | torch.device):
         self.device = torch.device(device)
+        self._load_all_models_once()
         self.model_name = 'YOLOX-l'
+        self.model = self._load_model(self.model_name)
+    def _load_all_models_once(self) -> None:
+        for name in self.MODEL_DICT:
+            self._load_model(name)
+    def _load_model(self, name: str) -> nn.Module:
+        dic = self.MODEL_DICT[name]
+        return init_detector(dic['config'], dic['model'], device=self.device)
+    def set_model(self, name: str) -> None:
+        if name == self.model_name:
+            return
         self.model_name = name
+        self.model = self._load_model(name)
     def detect_and_visualize(
             self, image: np.ndarray,
     def detect(self, image: np.ndarray) -> list[np.ndarray]:
         image = image[:, :, ::-1]  # RGB -> BGR
+        out = inference_detector(self.model, image)
         return out
     def visualize_detection_results(
             image: np.ndarray,
             detection_results: list[np.ndarray],
             score_threshold: float = 0.3) -> np.ndarray:
+        person_det = [detection_results[0]] + [np.array([]).reshape(0, 5)] * 79
         image = image[:, :, ::-1]  # RGB -> BGR
+        vis = self.model.show_result(image,
+                                     person_det,
+                                     score_thr=score_threshold,
+                                     bbox_color=None,
+                                     text_color=(200, 200, 200),
+                                     mask_color=None)
         return vis[:, :, ::-1]  # BGR -> RGB
+class AppDetModel(DetModel):
+    def run(self, model_name: str, image: np.ndarray,
+            score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
+        self.set_model(model_name)
+        return self.detect_and_visualize(image, score_threshold)
 class PoseModel:
+    MODEL_DICT = {
+        'ViTPose-B (single-task train)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
+            'model': 'models/vitpose-b.pth',
+        },
+        'ViTPose-L (single-task train)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
+            'model': 'models/vitpose-l.pth',
+        },
+        'ViTPose-B (multi-task train, COCO)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
+            'model': 'models/vitpose-b-multi-coco.pth',
+        },
+        'ViTPose-L (multi-task train, COCO)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
+            'model': 'models/vitpose-l-multi-coco.pth',
+        },
+    }
     def __init__(self, device: str | torch.device):
         self.device = torch.device(device)
         self.model_name = 'ViTPose-B (multi-task train, COCO)'
+        self.model = self._load_model(self.model_name)
+    def _load_all_models_once(self) -> None:
+        for name in self.MODEL_DICT:
+            self._load_model(name)
+    def _load_model(self, name: str) -> nn.Module:
+        dic = self.MODEL_DICT[name]
+        ckpt_path = huggingface_hub.hf_hub_download('hysts/ViTPose',
+                                                    dic['model'],
+                                                    use_auth_token=HF_TOKEN)
+        model = init_pose_model(dic['config'], ckpt_path, device=self.device)
+        return model
+    def set_model(self, name: str) -> None:
+        if name == self.model_name:
+            return
         self.model_name = name
+        self.model = self._load_model(name)
     def predict_pose_and_visualize(
         self,
             det_results: list[np.ndarray],
             box_score_threshold: float = 0.5) -> list[dict[str, np.ndarray]]:
         image = image[:, :, ::-1]  # RGB -> BGR
         person_results = process_mmdet_results(det_results, 1)
+        out, _ = inference_top_down_pose_model(self.model,
                                                image,
                                                person_results=person_results,
                                                bbox_thr=box_score_threshold,
                                vis_dot_radius: int = 4,
                                vis_line_thickness: int = 1) -> np.ndarray:
         image = image[:, :, ::-1]  # RGB -> BGR
+        vis = vis_pose_result(self.model,
                               image,
                               pose_results,
                               kpt_score_thr=kpt_score_threshold,
                               radius=vis_dot_radius,
                               thickness=vis_line_thickness)
         return vis[:, :, ::-1]  # BGR -> RGB
+class AppPoseModel(PoseModel):
+    def run(
+        self, model_name: str, image: np.ndarray,
+        det_results: list[np.ndarray], box_score_threshold: float,
+        kpt_score_threshold: float, vis_dot_radius: int,
+        vis_line_thickness: int
+    ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
+        self.set_model(model_name)
+        return self.predict_pose_and_visualize(image, det_results,
+                                               box_score_threshold,
+                                               kpt_score_threshold,
+                                               vis_dot_radius,
+                                               vis_line_thickness)

style.css CHANGED Viewed

@@ -1,7 +1,11 @@
 h1 {
   text-align: center;
 }
-div#result {
   max-width: 600px;
   max-height: 600px;
 }

 h1 {
   text-align: center;
 }
+div#det-result {
+  max-width: 600px;
+  max-height: 600px;
+}
+div#pose-result {
   max-width: 600px;
   max-height: 600px;
 }