Keypoint Detection
Transformers
Safetensors
vitpose
Inference Endpoints
vitpose-plus-huge / config.json
nielsr's picture
nielsr HF staff
Upload VitPoseForPoseEstimation
8104525 verified
raw
history blame
2.48 kB
{
"architectures": [
"VitPoseForPoseEstimation"
],
"backbone": null,
"backbone_config": {
"hidden_size": 1280,
"model_type": "vitpose_backbone",
"num_attention_heads": 16,
"num_experts": 6,
"num_hidden_layers": 32,
"out_features": [
"stage32"
],
"out_indices": [
32
],
"part_features": 320,
"stage_names": [
"stem",
"stage1",
"stage2",
"stage3",
"stage4",
"stage5",
"stage6",
"stage7",
"stage8",
"stage9",
"stage10",
"stage11",
"stage12",
"stage13",
"stage14",
"stage15",
"stage16",
"stage17",
"stage18",
"stage19",
"stage20",
"stage21",
"stage22",
"stage23",
"stage24",
"stage25",
"stage26",
"stage27",
"stage28",
"stage29",
"stage30",
"stage31",
"stage32"
]
},
"backbone_kwargs": null,
"edges": [
[
15,
13
],
[
13,
11
],
[
16,
14
],
[
14,
12
],
[
11,
12
],
[
5,
11
],
[
6,
12
],
[
5,
6
],
[
5,
7
],
[
6,
8
],
[
7,
9
],
[
8,
10
],
[
1,
2
],
[
0,
1
],
[
0,
2
],
[
1,
3
],
[
2,
4
],
[
3,
5
],
[
4,
6
]
],
"id2label": {
"0": "Nose",
"1": "L_Eye",
"2": "R_Eye",
"3": "L_Ear",
"4": "R_Ear",
"5": "L_Shoulder",
"6": "R_Shoulder",
"7": "L_Elbow",
"8": "R_Elbow",
"9": "L_Wrist",
"10": "R_Wrist",
"11": "L_Hip",
"12": "R_Hip",
"13": "L_Knee",
"14": "R_Knee",
"15": "L_Ankle",
"16": "R_Ankle"
},
"initializer_range": 0.02,
"label2id": {
"L_Ankle": 15,
"L_Ear": 3,
"L_Elbow": 7,
"L_Eye": 1,
"L_Hip": 11,
"L_Knee": 13,
"L_Shoulder": 5,
"L_Wrist": 9,
"Nose": 0,
"R_Ankle": 16,
"R_Ear": 4,
"R_Elbow": 8,
"R_Eye": 2,
"R_Hip": 12,
"R_Knee": 14,
"R_Shoulder": 6,
"R_Wrist": 10
},
"model_type": "vitpose",
"scale_factor": 4,
"torch_dtype": "float32",
"transformers_version": "4.49.0.dev0",
"use_pretrained_backbone": false,
"use_simple_decoder": false,
"use_timm_backbone": false
}