Spaces:

Gradio-Blocks
/

ViTPose

Running

App Files Files Community

Update model.py

by test1444 - opened Mar 14, 2023

base: refs/heads/main

←

from: refs/pr/9

Discussion Files changed

+355

-282

Files changed (7) hide show

.pre-commit-config.yaml +45 -59
.style.yapf +5 -0
README.md +1 -4
app.py +179 -134
model.py +119 -82
requirements.txt +2 -2
style.css +4 -1

.pre-commit-config.yaml CHANGED Viewed

@@ -1,60 +1,46 @@
 repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
-    hooks:
-      - id: check-executables-have-shebangs
-      - id: check-json
-      - id: check-merge-conflict
-      - id: check-shebang-scripts-are-executable
-      - id: check-toml
-      - id: check-yaml
-      - id: end-of-file-fixer
-      - id: mixed-line-ending
-        args: ["--fix=lf"]
-      - id: requirements-txt-fixer
-      - id: trailing-whitespace
-  - repo: https://github.com/myint/docformatter
-    rev: v1.7.5
-    hooks:
-      - id: docformatter
-        args: ["--in-place"]
-  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
-        args: ["--profile", "black"]
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.0
-    hooks:
-      - id: mypy
-        args: ["--ignore-missing-imports"]
-        additional_dependencies:
-          [
-            "types-python-slugify",
-            "types-requests",
-            "types-PyYAML",
-            "types-pytz",
-          ]
-  - repo: https://github.com/psf/black
-    rev: 24.4.2
-    hooks:
-      - id: black
-        language_version: python3.10
-        args: ["--line-length", "119"]
-  - repo: https://github.com/kynan/nbstripout
-    rev: 0.7.1
-    hooks:
-      - id: nbstripout
-        args:
-          [
-            "--extra-keys",
-            "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
-          ]
-  - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.8.5
-    hooks:
-      - id: nbqa-black
-      - id: nbqa-pyupgrade
-        args: ["--py37-plus"]
-      - id: nbqa-isort
-        args: ["--float-to-top"]

+exclude: ^(ViTPose/|mmdet_configs/configs/)
 repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.2.0
+  hooks:
+  - id: check-executables-have-shebangs
+  - id: check-json
+  - id: check-merge-conflict
+  - id: check-shebang-scripts-are-executable
+  - id: check-toml
+  - id: check-yaml
+  - id: double-quote-string-fixer
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+    args: ['--fix=lf']
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
+- repo: https://github.com/myint/docformatter
+  rev: v1.4
+  hooks:
+  - id: docformatter
+    args: ['--in-place']
+- repo: https://github.com/pycqa/isort
+  rev: 5.10.1
+  hooks:
+    - id: isort
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.812
+  hooks:
+    - id: mypy
+      args: ['--ignore-missing-imports']
+- repo: https://github.com/google/yapf
+  rev: v0.32.0
+  hooks:
+  - id: yapf
+    args: ['--parallel', '--in-place']
+- repo: https://github.com/kynan/nbstripout
+  rev: 0.5.0
+  hooks:
+    - id: nbstripout
+      args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
+- repo: https://github.com/nbQA-dev/nbQA
+  rev: 1.3.1
+  hooks:
+    - id: nbqa-isort
+    - id: nbqa-yapf

.style.yapf ADDED Viewed

	@@ -0,0 +1,5 @@

+[style]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = false
+spaces_before_comment = 2
+split_before_logical_operator = true

README.md CHANGED Viewed

@@ -4,12 +4,9 @@ emoji: 📊
 colorFrom: yellow
 colorTo: indigo
 sdk: gradio
-sdk_version: 4.36.1
 app_file: app.py
 pinned: false
-suggested_hardware: t4-small
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
-https://arxiv.org/abs/2204.12484

 colorFrom: yellow
 colorTo: indigo
 sdk: gradio
+sdk_version: 3.1.1
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

app.py CHANGED Viewed

@@ -2,153 +2,198 @@
 from __future__ import annotations
-import os
 import pathlib
-import shlex
-import subprocess
 import tarfile
-if os.getenv("SYSTEM") == "spaces":
-    subprocess.run(shlex.split("pip install click==7.1.2"))
-    subprocess.run(shlex.split("pip install typer==0.9.4"))
-    import mim
-    mim.uninstall("mmcv-full", confirm_yes=True)
-    mim.install("mmcv-full==1.5.0", is_yes=True)
-    subprocess.run(shlex.split("pip uninstall -y opencv-python"))
-    subprocess.run(shlex.split("pip uninstall -y opencv-python-headless"))
-    subprocess.run(shlex.split("pip install opencv-python-headless==4.8.0.74"))
-import gradio as gr
-from model import AppDetModel, AppPoseModel
-DESCRIPTION = "# [ViTPose](https://github.com/ViTAE-Transformer/ViTPose)"
 def extract_tar() -> None:
-    if pathlib.Path("mmdet_configs/configs").exists():
         return
-    with tarfile.open("mmdet_configs/configs.tar") as f:
-        f.extractall("mmdet_configs")
-extract_tar()
-det_model = AppDetModel()
-pose_model = AppPoseModel()
-with gr.Blocks(css="style.css") as demo:
-    gr.Markdown(DESCRIPTION)
-    with gr.Group():
-        gr.Markdown("## Step 1")
-        with gr.Row():
-            with gr.Column():
-                with gr.Row():
-                    input_image = gr.Image(label="Input Image", type="numpy")
-                with gr.Row():
-                    detector_name = gr.Dropdown(
-                        label="Detector", choices=list(det_model.MODEL_DICT.keys()), value=det_model.model_name
-                    )
-                with gr.Row():
-                    detect_button = gr.Button("Detect")
-                    det_preds = gr.State()
-            with gr.Column():
-                with gr.Row():
-                    detection_visualization = gr.Image(label="Detection Result", type="numpy", elem_id="det-result")
-                with gr.Row():
-                    vis_det_score_threshold = gr.Slider(
-                        label="Visualization Score Threshold", minimum=0, maximum=1, step=0.05, value=0.5
-                    )
-                with gr.Row():
-                    redraw_det_button = gr.Button(value="Redraw")
-        with gr.Row():
-            paths = sorted(pathlib.Path("images").rglob("*.jpg"))
-            example_images = gr.Examples(examples=[[path.as_posix()] for path in paths], inputs=input_image)
-    with gr.Group():
-        gr.Markdown("## Step 2")
-        with gr.Row():
-            with gr.Column():
-                with gr.Row():
-                    pose_model_name = gr.Dropdown(
-                        label="Pose Model", choices=list(pose_model.MODEL_DICT.keys()), value=pose_model.model_name
-                    )
-                det_score_threshold = gr.Slider(
-                    label="Box Score Threshold", minimum=0, maximum=1, step=0.05, value=0.5
-                )
-                with gr.Row():
-                    predict_button = gr.Button("Predict")
-                    pose_preds = gr.State()
-            with gr.Column():
-                with gr.Row():
-                    pose_visualization = gr.Image(label="Result", type="numpy", elem_id="pose-result")
-                with gr.Row():
-                    vis_kpt_score_threshold = gr.Slider(
-                        label="Visualization Score Threshold", minimum=0, maximum=1, step=0.05, value=0.3
-                    )
-                with gr.Row():
-                    vis_dot_radius = gr.Slider(label="Dot Radius", minimum=1, maximum=10, step=1, value=4)
-                with gr.Row():
-                    vis_line_thickness = gr.Slider(label="Line Thickness", minimum=1, maximum=10, step=1, value=2)
-                with gr.Row():
-                    redraw_pose_button = gr.Button("Redraw")
-    detector_name.change(fn=det_model.set_model, inputs=detector_name)
-    detect_button.click(
-        fn=det_model.run,
-        inputs=[
-            detector_name,
-            input_image,
-            vis_det_score_threshold,
-        ],
-        outputs=[
-            det_preds,
-            detection_visualization,
-        ],
-    )
-    redraw_det_button.click(
-        fn=det_model.visualize_detection_results,
-        inputs=[
-            input_image,
-            det_preds,
-            vis_det_score_threshold,
-        ],
-        outputs=detection_visualization,
     )
-    pose_model_name.change(fn=pose_model.set_model, inputs=pose_model_name)
-    predict_button.click(
-        fn=pose_model.run,
-        inputs=[
-            pose_model_name,
-            input_image,
-            det_preds,
-            det_score_threshold,
-            vis_kpt_score_threshold,
-            vis_dot_radius,
-            vis_line_thickness,
-        ],
-        outputs=[
-            pose_preds,
-            pose_visualization,
-        ],
-    )
-    redraw_pose_button.click(
-        fn=pose_model.visualize_pose_results,
-        inputs=[
-            input_image,
-            pose_preds,
-            vis_kpt_score_threshold,
-            vis_dot_radius,
-            vis_line_thickness,
-        ],
-        outputs=pose_visualization,
-    )
-if __name__ == "__main__":
-    demo.queue(max_size=10).launch()

 from __future__ import annotations
+import argparse
 import pathlib
 import tarfile
+import gradio as gr
+from model import AppDetModel, AppPoseModel
+DESCRIPTION = '''# ViTPose
+This is an unofficial demo for [https://github.com/ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose).'''
+FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=hysts.vitpose" />'
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--theme', type=str)
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--disable-queue',
+                        dest='enable_queue',
+                        action='store_false')
+    return parser.parse_args()
+def set_example_image(example: list) -> dict:
+    return gr.Image.update(value=example[0])
 def extract_tar() -> None:
+    if pathlib.Path('mmdet_configs/configs').exists():
         return
+    with tarfile.open('mmdet_configs/configs.tar') as f:
+        f.extractall('mmdet_configs')
+def main():
+    args = parse_args()
+    extract_tar()
+    det_model = AppDetModel(device=args.device)
+    pose_model = AppPoseModel(device=args.device)
+    with gr.Blocks(theme=args.theme, css='style.css') as demo:
+        gr.Markdown(DESCRIPTION)
+        with gr.Box():
+            gr.Markdown('## Step 1')
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        input_image = gr.Image(label='Input Image',
+                                               type='numpy')
+                    with gr.Row():
+                        detector_name = gr.Dropdown(list(
+                            det_model.MODEL_DICT.keys()),
+                                                    value=det_model.model_name,
+                                                    label='Detector')
+                    with gr.Row():
+                        detect_button = gr.Button(value='Detect')
+                        det_preds = gr.Variable()
+                with gr.Column():
+                    with gr.Row():
+                        detection_visualization = gr.Image(
+                            label='Detection Result',
+                            type='numpy',
+                            elem_id='det-result')
+                    with gr.Row():
+                        vis_det_score_threshold = gr.Slider(
+                            0,
+                            1,
+                            step=0.05,
+                            value=0.5,
+                            label='Visualization Score Threshold')
+                    with gr.Row():
+                        redraw_det_button = gr.Button(value='Redraw')
+            with gr.Row():
+                paths = sorted(pathlib.Path('images').rglob('*.jpg'))
+                example_images = gr.Dataset(components=[input_image],
+                                            samples=[[path.as_posix()]
+                                                     for path in paths])
+        with gr.Box():
+            gr.Markdown('## Step 2')
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        pose_model_name = gr.Dropdown(
+                            list(pose_model.MODEL_DICT.keys()),
+                            value=pose_model.model_name,
+                            label='Pose Model')
+                    det_score_threshold = gr.Slider(
+                        0,
+                        1,
+                        step=0.05,
+                        value=0.5,
+                        label='Box Score Threshold')
+                    with gr.Row():
+                        predict_button = gr.Button(value='Predict')
+                        pose_preds = gr.Variable()
+                with gr.Column():
+                    with gr.Row():
+                        pose_visualization = gr.Image(label='Result',
+                                                      type='numpy',
+                                                      elem_id='pose-result')
+                    with gr.Row():
+                        vis_kpt_score_threshold = gr.Slider(
+                            0,
+                            1,
+                            step=0.05,
+                            value=0.3,
+                            label='Visualization Score Threshold')
+                    with gr.Row():
+                        vis_dot_radius = gr.Slider(1,
+                                                   10,
+                                                   step=1,
+                                                   value=4,
+                                                   label='Dot Radius')
+                    with gr.Row():
+                        vis_line_thickness = gr.Slider(1,
+                                                       10,
+                                                       step=1,
+                                                       value=2,
+                                                       label='Line Thickness')
+                    with gr.Row():
+                        redraw_pose_button = gr.Button(value='Redraw')
+        gr.Markdown(FOOTER)
+        detector_name.change(fn=det_model.set_model,
+                             inputs=detector_name,
+                             outputs=None)
+        detect_button.click(fn=det_model.run,
+                            inputs=[
+                                detector_name,
+                                input_image,
+                                vis_det_score_threshold,
+                            ],
+                            outputs=[
+                                det_preds,
+                                detection_visualization,
+                            ])
+        redraw_det_button.click(fn=det_model.visualize_detection_results,
+                                inputs=[
+                                    input_image,
+                                    det_preds,
+                                    vis_det_score_threshold,
+                                ],
+                                outputs=detection_visualization)
+        pose_model_name.change(fn=pose_model.set_model,
+                               inputs=pose_model_name,
+                               outputs=None)
+        predict_button.click(fn=pose_model.run,
+                             inputs=[
+                                 pose_model_name,
+                                 input_image,
+                                 det_preds,
+                                 det_score_threshold,
+                                 vis_kpt_score_threshold,
+                                 vis_dot_radius,
+                                 vis_line_thickness,
+                             ],
+                             outputs=[
+                                 pose_preds,
+                                 pose_visualization,
+                             ])
+        redraw_pose_button.click(fn=pose_model.visualize_pose_results,
+                                 inputs=[
+                                     input_image,
+                                     pose_preds,
+                                     vis_kpt_score_threshold,
+                                     vis_dot_radius,
+                                     vis_line_thickness,
+                                 ],
+                                 outputs=pose_visualization)
+        example_images.click(
+            fn=set_example_image,
+            inputs=example_images,
+            outputs=input_image,
+        )
+    demo.launch(
+        enable_queue=args.enable_queue,
+        server_port=args.port,
+        share=args.share,
     )
+if __name__ == '__main__':
+    main()

model.py CHANGED Viewed

@@ -1,50 +1,74 @@
 from __future__ import annotations
 import pathlib
 import sys
 import huggingface_hub
 import numpy as np
 import torch
 import torch.nn as nn
 app_dir = pathlib.Path(__file__).parent
-submodule_dir = app_dir / "ViTPose"
 sys.path.insert(0, submodule_dir.as_posix())
 from mmdet.apis import inference_detector, init_detector
-from mmpose.apis import (
-    inference_top_down_pose_model,
-    init_pose_model,
-    process_mmdet_results,
-    vis_pose_result,
-)
 class DetModel:
     MODEL_DICT = {
-        "YOLOX-tiny": {
-            "config": "mmdet_configs/configs/yolox/yolox_tiny_8x8_300e_coco.py",
-            "model": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth",
         },
-        "YOLOX-s": {
-            "config": "mmdet_configs/configs/yolox/yolox_s_8x8_300e_coco.py",
-            "model": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth",
         },
-        "YOLOX-l": {
-            "config": "mmdet_configs/configs/yolox/yolox_l_8x8_300e_coco.py",
-            "model": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth",
         },
-        "YOLOX-x": {
-            "config": "mmdet_configs/configs/yolox/yolox_x_8x8_300e_coco.py",
-            "model": "https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth",
         },
     }
-    def __init__(self):
-        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         self._load_all_models_once()
-        self.model_name = "YOLOX-l"
         self.model = self._load_model(self.model_name)
     def _load_all_models_once(self) -> None:
@@ -52,8 +76,8 @@ class DetModel:
             self._load_model(name)
     def _load_model(self, name: str) -> nn.Module:
-        d = self.MODEL_DICT[name]
-        return init_detector(d["config"], d["model"], device=self.device)
     def set_model(self, name: str) -> None:
         if name == self.model_name:
@@ -61,7 +85,9 @@ class DetModel:
         self.model_name = name
         self.model = self._load_model(name)
-    def detect_and_visualize(self, image: np.ndarray, score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
         out = self.detect(image)
         vis = self.visualize_detection_results(image, out, score_threshold)
         return out, vis
@@ -72,46 +98,56 @@ class DetModel:
         return out
     def visualize_detection_results(
-        self, image: np.ndarray, detection_results: list[np.ndarray], score_threshold: float = 0.3
-    ) -> np.ndarray:
         person_det = [detection_results[0]] + [np.array([]).reshape(0, 5)] * 79
         image = image[:, :, ::-1]  # RGB -> BGR
-        vis = self.model.show_result(
-            image, person_det, score_thr=score_threshold, bbox_color=None, text_color=(200, 200, 200), mask_color=None
-        )
         return vis[:, :, ::-1]  # BGR -> RGB
 class AppDetModel(DetModel):
-    def run(self, model_name: str, image: np.ndarray, score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
         self.set_model(model_name)
         return self.detect_and_visualize(image, score_threshold)
 class PoseModel:
     MODEL_DICT = {
-        "ViTPose-B (single-task train)": {
-            "config": "ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py",
-            "model": "models/vitpose-b.pth",
         },
-        "ViTPose-L (single-task train)": {
-            "config": "ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py",
-            "model": "models/vitpose-l.pth",
         },
-        "ViTPose-B (multi-task train, COCO)": {
-            "config": "ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py",
-            "model": "models/vitpose-b-multi-coco.pth",
         },
-        "ViTPose-L (multi-task train, COCO)": {
-            "config": "ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py",
-            "model": "models/vitpose-l-multi-coco.pth",
         },
     }
-    def __init__(self):
-        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-        self.model_name = "ViTPose-B (multi-task train, COCO)"
         self.model = self._load_model(self.model_name)
     def _load_all_models_once(self) -> None:
@@ -119,9 +155,11 @@ class PoseModel:
             self._load_model(name)
     def _load_model(self, name: str) -> nn.Module:
-        d = self.MODEL_DICT[name]
-        ckpt_path = huggingface_hub.hf_hub_download("public-data/ViTPose", d["model"])
-        model = init_pose_model(d["config"], ckpt_path, device=self.device)
         return model
     def set_model(self, name: str) -> None:
@@ -140,51 +178,50 @@ class PoseModel:
         vis_line_thickness: int,
     ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
         out = self.predict_pose(image, det_results, box_score_threshold)
-        vis = self.visualize_pose_results(image, out, kpt_score_threshold, vis_dot_radius, vis_line_thickness)
         return out, vis
     def predict_pose(
-        self, image: np.ndarray, det_results: list[np.ndarray], box_score_threshold: float = 0.5
-    ) -> list[dict[str, np.ndarray]]:
         image = image[:, :, ::-1]  # RGB -> BGR
         person_results = process_mmdet_results(det_results, 1)
-        out, _ = inference_top_down_pose_model(
-            self.model, image, person_results=person_results, bbox_thr=box_score_threshold, format="xyxy"
-        )
         return out
-    def visualize_pose_results(
-        self,
-        image: np.ndarray,
-        pose_results: list[np.ndarray],
-        kpt_score_threshold: float = 0.3,
-        vis_dot_radius: int = 4,
-        vis_line_thickness: int = 1,
-    ) -> np.ndarray:
         image = image[:, :, ::-1]  # RGB -> BGR
-        vis = vis_pose_result(
-            self.model,
-            image,
-            pose_results,
-            kpt_score_thr=kpt_score_threshold,
-            radius=vis_dot_radius,
-            thickness=vis_line_thickness,
-        )
         return vis[:, :, ::-1]  # BGR -> RGB
 class AppPoseModel(PoseModel):
     def run(
-        self,
-        model_name: str,
-        image: np.ndarray,
-        det_results: list[np.ndarray],
-        box_score_threshold: float,
-        kpt_score_threshold: float,
-        vis_dot_radius: int,
-        vis_line_thickness: int,
     ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
         self.set_model(model_name)
-        return self.predict_pose_and_visualize(
-            image, det_results, box_score_threshold, kpt_score_threshold, vis_dot_radius, vis_line_thickness
-        )

 from __future__ import annotations
+import os
 import pathlib
+import subprocess
 import sys
+try:
+    from mmcv.ops import get_compiling_cuda_version, get_compiler_version
+except:
+    import mim
+    mim.install('mmcv-full==1.5.0')
+if os.getenv('SYSTEM') == 'spaces':
+    import mim
+    mim.uninstall('mmcv-full', confirm_yes=True)
+    mim.install('mmcv-full==1.5.0', is_yes=True)
+    subprocess.run('pip uninstall -y opencv-python'.split())
+    subprocess.run('pip uninstall -y opencv-python-headless'.split())
+    subprocess.run('pip install opencv-python-headless==4.5.5.64'.split())
 import huggingface_hub
 import numpy as np
 import torch
 import torch.nn as nn
 app_dir = pathlib.Path(__file__).parent
+submodule_dir = app_dir / 'ViTPose/'
 sys.path.insert(0, submodule_dir.as_posix())
 from mmdet.apis import inference_detector, init_detector
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+                         process_mmdet_results, vis_pose_result)
+HF_TOKEN = os.environ['HF_TOKEN']
 class DetModel:
     MODEL_DICT = {
+        'YOLOX-tiny': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_tiny_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth',
         },
+        'YOLOX-s': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_s_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth',
         },
+        'YOLOX-l': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_l_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
         },
+        'YOLOX-x': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_x_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth',
         },
     }
+    def __init__(self, device: str | torch.device):
+        self.device = torch.device(device)
         self._load_all_models_once()
+        self.model_name = 'YOLOX-l'
         self.model = self._load_model(self.model_name)
     def _load_all_models_once(self) -> None:
             self._load_model(name)
     def _load_model(self, name: str) -> nn.Module:
+        dic = self.MODEL_DICT[name]
+        return init_detector(dic['config'], dic['model'], device=self.device)
     def set_model(self, name: str) -> None:
         if name == self.model_name:
         self.model_name = name
         self.model = self._load_model(name)
+    def detect_and_visualize(
+            self, image: np.ndarray,
+            score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
         out = self.detect(image)
         vis = self.visualize_detection_results(image, out, score_threshold)
         return out, vis
         return out
     def visualize_detection_results(
+            self,
+            image: np.ndarray,
+            detection_results: list[np.ndarray],
+            score_threshold: float = 0.3) -> np.ndarray:
         person_det = [detection_results[0]] + [np.array([]).reshape(0, 5)] * 79
         image = image[:, :, ::-1]  # RGB -> BGR
+        vis = self.model.show_result(image,
+                                     person_det,
+                                     score_thr=score_threshold,
+                                     bbox_color=None,
+                                     text_color=(200, 200, 200),
+                                     mask_color=None)
         return vis[:, :, ::-1]  # BGR -> RGB
 class AppDetModel(DetModel):
+    def run(self, model_name: str, image: np.ndarray,
+            score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
         self.set_model(model_name)
         return self.detect_and_visualize(image, score_threshold)
 class PoseModel:
     MODEL_DICT = {
+        'ViTPose-B (single-task train)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
+            'model': 'models/vitpose-b.pth',
         },
+        'ViTPose-L (single-task train)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
+            'model': 'models/vitpose-l.pth',
         },
+        'ViTPose-B (multi-task train, COCO)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
+            'model': 'models/vitpose-b-multi-coco.pth',
         },
+        'ViTPose-L (multi-task train, COCO)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
+            'model': 'models/vitpose-l-multi-coco.pth',
         },
     }
+    def __init__(self, device: str | torch.device):
+        self.device = torch.device(device)
+        self.model_name = 'ViTPose-B (multi-task train, COCO)'
         self.model = self._load_model(self.model_name)
     def _load_all_models_once(self) -> None:
             self._load_model(name)
     def _load_model(self, name: str) -> nn.Module:
+        dic = self.MODEL_DICT[name]
+        ckpt_path = huggingface_hub.hf_hub_download('hysts/ViTPose',
+                                                    dic['model'],
+                                                    use_auth_token=HF_TOKEN)
+        model = init_pose_model(dic['config'], ckpt_path, device=self.device)
         return model
     def set_model(self, name: str) -> None:
         vis_line_thickness: int,
     ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
         out = self.predict_pose(image, det_results, box_score_threshold)
+        vis = self.visualize_pose_results(image, out, kpt_score_threshold,
+                                          vis_dot_radius, vis_line_thickness)
         return out, vis
     def predict_pose(
+            self,
+            image: np.ndarray,
+            det_results: list[np.ndarray],
+            box_score_threshold: float = 0.5) -> list[dict[str, np.ndarray]]:
         image = image[:, :, ::-1]  # RGB -> BGR
         person_results = process_mmdet_results(det_results, 1)
+        out, _ = inference_top_down_pose_model(self.model,
+                                               image,
+                                               person_results=person_results,
+                                               bbox_thr=box_score_threshold,
+                                               format='xyxy')
         return out
+    def visualize_pose_results(self,
+                               image: np.ndarray,
+                               pose_results: list[np.ndarray],
+                               kpt_score_threshold: float = 0.3,
+                               vis_dot_radius: int = 4,
+                               vis_line_thickness: int = 1) -> np.ndarray:
         image = image[:, :, ::-1]  # RGB -> BGR
+        vis = vis_pose_result(self.model,
+                              image,
+                              pose_results,
+                              kpt_score_thr=kpt_score_threshold,
+                              radius=vis_dot_radius,
+                              thickness=vis_line_thickness)
         return vis[:, :, ::-1]  # BGR -> RGB
 class AppPoseModel(PoseModel):
     def run(
+        self, model_name: str, image: np.ndarray,
+        det_results: list[np.ndarray], box_score_threshold: float,
+        kpt_score_threshold: float, vis_dot_radius: int,
+        vis_line_thickness: int
     ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
         self.set_model(model_name)
+        return self.predict_pose_and_visualize(image, det_results,
+                                               box_score_threshold,
+                                               kpt_score_threshold,
+                                               vis_dot_radius,
+                                               vis_line_thickness)

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 mmcv-full==1.5.0
 mmdet==2.24.1
 mmpose==0.25.1
-numpy==1.23.5
-opencv-python-headless==4.8.0.74
 openmim==0.1.5
 timm==0.5.4
 torch==1.11.0

 mmcv-full==1.5.0
 mmdet==2.24.1
 mmpose==0.25.1
+numpy==1.22.4
+opencv-python-headless==4.5.5.64
 openmim==0.1.5
 timm==0.5.4
 torch==1.11.0

style.css CHANGED Viewed

@@ -1,6 +1,5 @@
 h1 {
   text-align: center;
-  display: block;
 }
 div#det-result {
   max-width: 600px;
@@ -10,3 +9,7 @@ div#pose-result {
   max-width: 600px;
   max-height: 600px;
 }

 h1 {
   text-align: center;
 }
 div#det-result {
   max-width: 600px;
   max-width: 600px;
   max-height: 600px;
 }
+img#visitor-badge {
+  display: block;
+  margin: auto;
+}