ziq commited on
Commit
5bc02c3
·
1 Parent(s): fae37e4

Upload 7 files

Browse files
Files changed (7) hide show
  1. gd-ogc.py +43 -0
  2. rtmdet-l.py +178 -0
  3. rtmdet-m.py +7 -0
  4. rtmdet-s.py +62 -0
  5. rtmpose-l.py +232 -0
  6. rtmpose-m.py +232 -0
  7. rtmpose-s.py +232 -0
gd-ogc.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ batch_size = 1
2
+ modelname = "groundingdino"
3
+ backbone = "swin_T_224_1k"
4
+ position_embedding = "sine"
5
+ pe_temperatureH = 20
6
+ pe_temperatureW = 20
7
+ return_interm_indices = [1, 2, 3]
8
+ backbone_freeze_keywords = None
9
+ enc_layers = 6
10
+ dec_layers = 6
11
+ pre_norm = False
12
+ dim_feedforward = 2048
13
+ hidden_dim = 256
14
+ dropout = 0.0
15
+ nheads = 8
16
+ num_queries = 900
17
+ query_dim = 4
18
+ num_patterns = 0
19
+ num_feature_levels = 4
20
+ enc_n_points = 4
21
+ dec_n_points = 4
22
+ two_stage_type = "standard"
23
+ two_stage_bbox_embed_share = False
24
+ two_stage_class_embed_share = False
25
+ transformer_activation = "relu"
26
+ dec_pred_bbox_embed_share = True
27
+ dn_box_noise_scale = 1.0
28
+ dn_label_noise_ratio = 0.5
29
+ dn_label_coef = 1.0
30
+ dn_bbox_coef = 1.0
31
+ embed_init_tgt = True
32
+ dn_labelbook_size = 2000
33
+ max_text_len = 256
34
+ text_encoder_type = "bert-base-uncased"
35
+ use_text_enhancer = True
36
+ use_fusion_layer = True
37
+ use_checkpoint = True
38
+ use_transformer_ckpt = True
39
+ use_text_cross_attention = True
40
+ text_dropout = 0.0
41
+ fusion_dropout = 0.0
42
+ fusion_droppath = 0.1
43
+ sub_sentence_present = True
rtmdet-l.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ 'mmdet::_base_/default_runtime.py', 'mmdet::_base_/schedules/schedule_1x.py',
3
+ 'mmdet::_base_/datasets/coco_detection.py', 'mmdet::rtmdet/rtmdet_tta.py'
4
+ ]
5
+ model = dict(
6
+ type='RTMDet',
7
+ data_preprocessor=dict(
8
+ type='DetDataPreprocessor',
9
+ mean=[103.53, 116.28, 123.675],
10
+ std=[57.375, 57.12, 58.395],
11
+ bgr_to_rgb=False,
12
+ batch_augments=None),
13
+ backbone=dict(
14
+ type='CSPNeXt',
15
+ arch='P5',
16
+ expand_ratio=0.5,
17
+ deepen_factor=1,
18
+ widen_factor=1,
19
+ channel_attention=True,
20
+ norm_cfg=dict(type='SyncBN'),
21
+ act_cfg=dict(type='SiLU', inplace=True)),
22
+ neck=dict(
23
+ type='CSPNeXtPAFPN',
24
+ in_channels=[256, 512, 1024],
25
+ out_channels=256,
26
+ num_csp_blocks=3,
27
+ expand_ratio=0.5,
28
+ norm_cfg=dict(type='SyncBN'),
29
+ act_cfg=dict(type='SiLU', inplace=True)),
30
+ bbox_head=dict(
31
+ type='RTMDetSepBNHead',
32
+ num_classes=80,
33
+ in_channels=256,
34
+ stacked_convs=2,
35
+ feat_channels=256,
36
+ anchor_generator=dict(
37
+ type='MlvlPointGenerator', offset=0, strides=[8, 16, 32]),
38
+ bbox_coder=dict(type='DistancePointBBoxCoder'),
39
+ loss_cls=dict(
40
+ type='QualityFocalLoss',
41
+ use_sigmoid=True,
42
+ beta=2.0,
43
+ loss_weight=1.0),
44
+ loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
45
+ with_objectness=False,
46
+ exp_on_reg=True,
47
+ share_conv=True,
48
+ pred_kernel_size=1,
49
+ norm_cfg=dict(type='SyncBN'),
50
+ act_cfg=dict(type='SiLU', inplace=True)),
51
+ train_cfg=dict(
52
+ assigner=dict(type='DynamicSoftLabelAssigner', topk=13),
53
+ allowed_border=-1,
54
+ pos_weight=-1,
55
+ debug=False),
56
+ test_cfg=dict(
57
+ nms_pre=30000,
58
+ min_bbox_size=0,
59
+ score_thr=0.001,
60
+ nms=dict(type='nms', iou_threshold=0.65),
61
+ max_per_img=300),
62
+ )
63
+
64
+ train_pipeline = [
65
+ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
66
+ dict(type='LoadAnnotations', with_bbox=True),
67
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
68
+ dict(
69
+ type='RandomResize',
70
+ scale=(1280, 1280),
71
+ ratio_range=(0.1, 2.0),
72
+ keep_ratio=True),
73
+ dict(type='RandomCrop', crop_size=(640, 640)),
74
+ dict(type='YOLOXHSVRandomAug'),
75
+ dict(type='RandomFlip', prob=0.5),
76
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
77
+ dict(
78
+ type='CachedMixUp',
79
+ img_scale=(640, 640),
80
+ ratio_range=(1.0, 1.0),
81
+ max_cached_images=20,
82
+ pad_val=(114, 114, 114)),
83
+ dict(type='mmdet.PackDetInputs')
84
+ ]
85
+
86
+ train_pipeline_stage2 = [
87
+ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
88
+ dict(type='LoadAnnotations', with_bbox=True),
89
+ dict(
90
+ type='RandomResize',
91
+ scale=(640, 640),
92
+ ratio_range=(0.1, 2.0),
93
+ keep_ratio=True),
94
+ dict(type='RandomCrop', crop_size=(640, 640)),
95
+ dict(type='YOLOXHSVRandomAug'),
96
+ dict(type='RandomFlip', prob=0.5),
97
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
98
+ dict(type='mmdet.PackDetInputs')
99
+ ]
100
+
101
+ test_pipeline = [
102
+ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
103
+ dict(type='Resize', scale=(640, 640), keep_ratio=True),
104
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
105
+ dict(
106
+ type='mmdet.PackDetInputs',
107
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
108
+ 'scale_factor'))
109
+ ]
110
+
111
+ train_dataloader = dict(
112
+ batch_size=32,
113
+ num_workers=10,
114
+ batch_sampler=None,
115
+ pin_memory=True,
116
+ dataset=dict(pipeline=train_pipeline))
117
+ val_dataloader = dict(
118
+ batch_size=5, num_workers=10, dataset=dict(pipeline=test_pipeline))
119
+ test_dataloader = val_dataloader
120
+
121
+ max_epochs = 300
122
+ stage2_num_epochs = 20
123
+ base_lr = 0.004
124
+ interval = 10
125
+
126
+ train_cfg = dict(
127
+ max_epochs=max_epochs,
128
+ val_interval=interval,
129
+ dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)])
130
+
131
+ val_evaluator = dict(proposal_nums=(100, 1, 10))
132
+ test_evaluator = val_evaluator
133
+
134
+ # optimizer
135
+ optim_wrapper = dict(
136
+ _delete_=True,
137
+ type='OptimWrapper',
138
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
139
+ paramwise_cfg=dict(
140
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
141
+
142
+ # learning rate
143
+ param_scheduler = [
144
+ dict(
145
+ type='LinearLR',
146
+ start_factor=1.0e-5,
147
+ by_epoch=False,
148
+ begin=0,
149
+ end=1000),
150
+ dict(
151
+ # use cosine lr from 150 to 300 epoch
152
+ type='CosineAnnealingLR',
153
+ eta_min=base_lr * 0.05,
154
+ begin=max_epochs // 2,
155
+ end=max_epochs,
156
+ T_max=max_epochs // 2,
157
+ by_epoch=True,
158
+ convert_to_iter_based=True),
159
+ ]
160
+
161
+ # hooks
162
+ default_hooks = dict(
163
+ checkpoint=dict(
164
+ interval=interval,
165
+ max_keep_ckpts=3 # only keep latest 3 checkpoints
166
+ ))
167
+ custom_hooks = [
168
+ dict(
169
+ type='EMAHook',
170
+ ema_type='ExpMomentumEMA',
171
+ momentum=0.0002,
172
+ update_buffers=True,
173
+ priority=49),
174
+ dict(
175
+ type='PipelineSwitchHook',
176
+ switch_epoch=max_epochs - stage2_num_epochs,
177
+ switch_pipeline=train_pipeline_stage2)
178
+ ]
rtmdet-m.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ _base_ = "./rtmdet-l.py"
2
+
3
+ model = dict(
4
+ backbone=dict(deepen_factor=0.67, widen_factor=0.75),
5
+ neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2),
6
+ bbox_head=dict(in_channels=192, feat_channels=192),
7
+ )
rtmdet-s.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = 'mmdet::rtmdet/rtmdet_l_8xb32-300e_coco.py'
2
+ checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa
3
+ model = dict(
4
+ backbone=dict(
5
+ deepen_factor=0.33,
6
+ widen_factor=0.5,
7
+ init_cfg=dict(
8
+ type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
9
+ neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1),
10
+ bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False))
11
+
12
+ train_pipeline = [
13
+ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
14
+ dict(type='LoadAnnotations', with_bbox=True),
15
+ dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
16
+ dict(
17
+ type='RandomResize',
18
+ scale=(1280, 1280),
19
+ ratio_range=(0.5, 2.0),
20
+ keep_ratio=True),
21
+ dict(type='RandomCrop', crop_size=(640, 640)),
22
+ dict(type='YOLOXHSVRandomAug'),
23
+ dict(type='RandomFlip', prob=0.5),
24
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
25
+ dict(
26
+ type='CachedMixUp',
27
+ img_scale=(640, 640),
28
+ ratio_range=(1.0, 1.0),
29
+ max_cached_images=20,
30
+ pad_val=(114, 114, 114)),
31
+ dict(type='PackDetInputs')
32
+ ]
33
+
34
+ train_pipeline_stage2 = [
35
+ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
36
+ dict(type='LoadAnnotations', with_bbox=True),
37
+ dict(
38
+ type='RandomResize',
39
+ scale=(640, 640),
40
+ ratio_range=(0.5, 2.0),
41
+ keep_ratio=True),
42
+ dict(type='RandomCrop', crop_size=(640, 640)),
43
+ dict(type='YOLOXHSVRandomAug'),
44
+ dict(type='RandomFlip', prob=0.5),
45
+ dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
46
+ dict(type='PackDetInputs')
47
+ ]
48
+
49
+ train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
50
+
51
+ custom_hooks = [
52
+ dict(
53
+ type='EMAHook',
54
+ ema_type='ExpMomentumEMA',
55
+ momentum=0.0002,
56
+ update_buffers=True,
57
+ priority=49),
58
+ dict(
59
+ type='PipelineSwitchHook',
60
+ switch_epoch=280,
61
+ switch_pipeline=train_pipeline_stage2)
62
+ ]
rtmpose-l.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['mmpose::_base_/default_runtime.py']
2
+
3
+ # runtime
4
+ max_epochs = 420
5
+ stage2_num_epochs = 30
6
+ base_lr = 4e-3
7
+
8
+ train_cfg = dict(max_epochs=max_epochs, val_interval=10)
9
+ randomness = dict(seed=21)
10
+
11
+ # optimizer
12
+ optim_wrapper = dict(
13
+ type='OptimWrapper',
14
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
15
+ paramwise_cfg=dict(
16
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
17
+
18
+ # learning rate
19
+ param_scheduler = [
20
+ dict(
21
+ type='LinearLR',
22
+ start_factor=1.0e-5,
23
+ by_epoch=False,
24
+ begin=0,
25
+ end=1000),
26
+ dict(
27
+ # use cosine lr from 210 to 420 epoch
28
+ type='CosineAnnealingLR',
29
+ eta_min=base_lr * 0.05,
30
+ begin=max_epochs // 2,
31
+ end=max_epochs,
32
+ T_max=max_epochs // 2,
33
+ by_epoch=True,
34
+ convert_to_iter_based=True),
35
+ ]
36
+
37
+ # automatically scaling LR based on the actual training batch size
38
+ auto_scale_lr = dict(base_batch_size=1024)
39
+
40
+ # codec settings
41
+ codec = dict(
42
+ type='SimCCLabel',
43
+ input_size=(192, 256),
44
+ sigma=(4.9, 5.66),
45
+ simcc_split_ratio=2.0,
46
+ normalize=False,
47
+ use_dark=False)
48
+
49
+ # model settings
50
+ model = dict(
51
+ type='TopdownPoseEstimator',
52
+ data_preprocessor=dict(
53
+ type='PoseDataPreprocessor',
54
+ mean=[123.675, 116.28, 103.53],
55
+ std=[58.395, 57.12, 57.375],
56
+ bgr_to_rgb=True),
57
+ backbone=dict(
58
+ _scope_='mmdet',
59
+ type='CSPNeXt',
60
+ arch='P5',
61
+ expand_ratio=0.5,
62
+ deepen_factor=1.,
63
+ widen_factor=1.,
64
+ out_indices=(4, ),
65
+ channel_attention=True,
66
+ norm_cfg=dict(type='SyncBN'),
67
+ act_cfg=dict(type='SiLU'),
68
+ init_cfg=dict(
69
+ type='Pretrained',
70
+ prefix='backbone.',
71
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
72
+ 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
73
+ )),
74
+ head=dict(
75
+ type='RTMCCHead',
76
+ in_channels=1024,
77
+ out_channels=17,
78
+ input_size=codec['input_size'],
79
+ in_featuremap_size=(6, 8),
80
+ simcc_split_ratio=codec['simcc_split_ratio'],
81
+ final_layer_kernel_size=7,
82
+ gau_cfg=dict(
83
+ hidden_dims=256,
84
+ s=128,
85
+ expansion_factor=2,
86
+ dropout_rate=0.,
87
+ drop_path=0.,
88
+ act_fn='SiLU',
89
+ use_rel_bias=False,
90
+ pos_enc=False),
91
+ loss=dict(
92
+ type='KLDiscretLoss',
93
+ use_target_weight=True,
94
+ beta=10.,
95
+ label_softmax=True),
96
+ decoder=codec),
97
+ test_cfg=dict(flip_test=True))
98
+
99
+ # base dataset settings
100
+ dataset_type = 'CocoDataset'
101
+ data_mode = 'topdown'
102
+ data_root = 'data/coco/'
103
+
104
+ backend_args = dict(backend='local')
105
+ # backend_args = dict(
106
+ # backend='petrel',
107
+ # path_mapping=dict({
108
+ # f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
109
+ # f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
110
+ # }))
111
+
112
+ # pipelines
113
+ train_pipeline = [
114
+ dict(type='LoadImage', backend_args=backend_args),
115
+ dict(type='GetBBoxCenterScale'),
116
+ dict(type='RandomFlip', direction='horizontal'),
117
+ dict(type='RandomHalfBody'),
118
+ dict(
119
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
120
+ dict(type='TopdownAffine', input_size=codec['input_size']),
121
+ dict(type='mmdet.YOLOXHSVRandomAug'),
122
+ dict(
123
+ type='Albumentation',
124
+ transforms=[
125
+ dict(type='Blur', p=0.1),
126
+ dict(type='MedianBlur', p=0.1),
127
+ dict(
128
+ type='CoarseDropout',
129
+ max_holes=1,
130
+ max_height=0.4,
131
+ max_width=0.4,
132
+ min_holes=1,
133
+ min_height=0.2,
134
+ min_width=0.2,
135
+ p=1.),
136
+ ]),
137
+ dict(type='GenerateTarget', encoder=codec),
138
+ dict(type='PackPoseInputs')
139
+ ]
140
+ val_pipeline = [
141
+ dict(type='LoadImage', backend_args=backend_args),
142
+ dict(type='GetBBoxCenterScale'),
143
+ dict(type='TopdownAffine', input_size=codec['input_size']),
144
+ dict(type='PackPoseInputs')
145
+ ]
146
+
147
+ train_pipeline_stage2 = [
148
+ dict(type='LoadImage', backend_args=backend_args),
149
+ dict(type='GetBBoxCenterScale'),
150
+ dict(type='RandomFlip', direction='horizontal'),
151
+ dict(type='RandomHalfBody'),
152
+ dict(
153
+ type='RandomBBoxTransform',
154
+ shift_factor=0.,
155
+ scale_factor=[0.75, 1.25],
156
+ rotate_factor=60),
157
+ dict(type='TopdownAffine', input_size=codec['input_size']),
158
+ dict(type='mmdet.YOLOXHSVRandomAug'),
159
+ dict(
160
+ type='Albumentation',
161
+ transforms=[
162
+ dict(type='Blur', p=0.1),
163
+ dict(type='MedianBlur', p=0.1),
164
+ dict(
165
+ type='CoarseDropout',
166
+ max_holes=1,
167
+ max_height=0.4,
168
+ max_width=0.4,
169
+ min_holes=1,
170
+ min_height=0.2,
171
+ min_width=0.2,
172
+ p=0.5),
173
+ ]),
174
+ dict(type='GenerateTarget', encoder=codec),
175
+ dict(type='PackPoseInputs')
176
+ ]
177
+
178
+ # data loaders
179
+ train_dataloader = dict(
180
+ batch_size=256,
181
+ num_workers=10,
182
+ persistent_workers=True,
183
+ sampler=dict(type='DefaultSampler', shuffle=True),
184
+ dataset=dict(
185
+ type=dataset_type,
186
+ data_root=data_root,
187
+ data_mode=data_mode,
188
+ ann_file='annotations/person_keypoints_train2017.json',
189
+ data_prefix=dict(img='train2017/'),
190
+ pipeline=train_pipeline,
191
+ ))
192
+ val_dataloader = dict(
193
+ batch_size=64,
194
+ num_workers=10,
195
+ persistent_workers=True,
196
+ drop_last=False,
197
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
198
+ dataset=dict(
199
+ type=dataset_type,
200
+ data_root=data_root,
201
+ data_mode=data_mode,
202
+ ann_file='annotations/person_keypoints_val2017.json',
203
+ # bbox_file=f'{data_root}person_detection_results/'
204
+ # 'COCO_val2017_detections_AP_H_56_person.json',
205
+ data_prefix=dict(img='val2017/'),
206
+ test_mode=True,
207
+ pipeline=val_pipeline,
208
+ ))
209
+ test_dataloader = val_dataloader
210
+
211
+ # hooks
212
+ default_hooks = dict(
213
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
214
+
215
+ custom_hooks = [
216
+ dict(
217
+ type='EMAHook',
218
+ ema_type='ExpMomentumEMA',
219
+ momentum=0.0002,
220
+ update_buffers=True,
221
+ priority=49),
222
+ dict(
223
+ type='mmdet.PipelineSwitchHook',
224
+ switch_epoch=max_epochs - stage2_num_epochs,
225
+ switch_pipeline=train_pipeline_stage2)
226
+ ]
227
+
228
+ # evaluators
229
+ val_evaluator = dict(
230
+ type='CocoMetric',
231
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
232
+ test_evaluator = val_evaluator
rtmpose-m.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['mmpose::_base_/default_runtime.py']
2
+
3
+ # runtime
4
+ max_epochs = 420
5
+ stage2_num_epochs = 30
6
+ base_lr = 4e-3
7
+
8
+ train_cfg = dict(max_epochs=max_epochs, val_interval=10)
9
+ randomness = dict(seed=21)
10
+
11
+ # optimizer
12
+ optim_wrapper = dict(
13
+ type='OptimWrapper',
14
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
15
+ paramwise_cfg=dict(
16
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
17
+
18
+ # learning rate
19
+ param_scheduler = [
20
+ dict(
21
+ type='LinearLR',
22
+ start_factor=1.0e-5,
23
+ by_epoch=False,
24
+ begin=0,
25
+ end=1000),
26
+ dict(
27
+ # use cosine lr from 210 to 420 epoch
28
+ type='CosineAnnealingLR',
29
+ eta_min=base_lr * 0.05,
30
+ begin=max_epochs // 2,
31
+ end=max_epochs,
32
+ T_max=max_epochs // 2,
33
+ by_epoch=True,
34
+ convert_to_iter_based=True),
35
+ ]
36
+
37
+ # automatically scaling LR based on the actual training batch size
38
+ auto_scale_lr = dict(base_batch_size=1024)
39
+
40
+ # codec settings
41
+ codec = dict(
42
+ type='SimCCLabel',
43
+ input_size=(192, 256),
44
+ sigma=(4.9, 5.66),
45
+ simcc_split_ratio=2.0,
46
+ normalize=False,
47
+ use_dark=False)
48
+
49
+ # model settings
50
+ model = dict(
51
+ type='TopdownPoseEstimator',
52
+ data_preprocessor=dict(
53
+ type='PoseDataPreprocessor',
54
+ mean=[123.675, 116.28, 103.53],
55
+ std=[58.395, 57.12, 57.375],
56
+ bgr_to_rgb=True),
57
+ backbone=dict(
58
+ _scope_='mmdet',
59
+ type='CSPNeXt',
60
+ arch='P5',
61
+ expand_ratio=0.5,
62
+ deepen_factor=0.67,
63
+ widen_factor=0.75,
64
+ out_indices=(4, ),
65
+ channel_attention=True,
66
+ norm_cfg=dict(type='SyncBN'),
67
+ act_cfg=dict(type='SiLU'),
68
+ init_cfg=dict(
69
+ type='Pretrained',
70
+ prefix='backbone.',
71
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
72
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
73
+ )),
74
+ head=dict(
75
+ type='RTMCCHead',
76
+ in_channels=768,
77
+ out_channels=17,
78
+ input_size=codec['input_size'],
79
+ in_featuremap_size=(6, 8),
80
+ simcc_split_ratio=codec['simcc_split_ratio'],
81
+ final_layer_kernel_size=7,
82
+ gau_cfg=dict(
83
+ hidden_dims=256,
84
+ s=128,
85
+ expansion_factor=2,
86
+ dropout_rate=0.,
87
+ drop_path=0.,
88
+ act_fn='SiLU',
89
+ use_rel_bias=False,
90
+ pos_enc=False),
91
+ loss=dict(
92
+ type='KLDiscretLoss',
93
+ use_target_weight=True,
94
+ beta=10.,
95
+ label_softmax=True),
96
+ decoder=codec),
97
+ test_cfg=dict(flip_test=True))
98
+
99
+ # base dataset settings
100
+ dataset_type = 'CocoDataset'
101
+ data_mode = 'topdown'
102
+ data_root = 'data/coco/'
103
+
104
+ backend_args = dict(backend='local')
105
+ # backend_args = dict(
106
+ # backend='petrel',
107
+ # path_mapping=dict({
108
+ # f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
109
+ # f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
110
+ # }))
111
+
112
+ # pipelines
113
+ train_pipeline = [
114
+ dict(type='LoadImage', backend_args=backend_args),
115
+ dict(type='GetBBoxCenterScale'),
116
+ dict(type='RandomFlip', direction='horizontal'),
117
+ dict(type='RandomHalfBody'),
118
+ dict(
119
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
120
+ dict(type='TopdownAffine', input_size=codec['input_size']),
121
+ dict(type='mmdet.YOLOXHSVRandomAug'),
122
+ dict(
123
+ type='Albumentation',
124
+ transforms=[
125
+ dict(type='Blur', p=0.1),
126
+ dict(type='MedianBlur', p=0.1),
127
+ dict(
128
+ type='CoarseDropout',
129
+ max_holes=1,
130
+ max_height=0.4,
131
+ max_width=0.4,
132
+ min_holes=1,
133
+ min_height=0.2,
134
+ min_width=0.2,
135
+ p=1.),
136
+ ]),
137
+ dict(type='GenerateTarget', encoder=codec),
138
+ dict(type='PackPoseInputs')
139
+ ]
140
+ val_pipeline = [
141
+ dict(type='LoadImage', backend_args=backend_args),
142
+ dict(type='GetBBoxCenterScale'),
143
+ dict(type='TopdownAffine', input_size=codec['input_size']),
144
+ dict(type='PackPoseInputs')
145
+ ]
146
+
147
+ train_pipeline_stage2 = [
148
+ dict(type='LoadImage', backend_args=backend_args),
149
+ dict(type='GetBBoxCenterScale'),
150
+ dict(type='RandomFlip', direction='horizontal'),
151
+ dict(type='RandomHalfBody'),
152
+ dict(
153
+ type='RandomBBoxTransform',
154
+ shift_factor=0.,
155
+ scale_factor=[0.75, 1.25],
156
+ rotate_factor=60),
157
+ dict(type='TopdownAffine', input_size=codec['input_size']),
158
+ dict(type='mmdet.YOLOXHSVRandomAug'),
159
+ dict(
160
+ type='Albumentation',
161
+ transforms=[
162
+ dict(type='Blur', p=0.1),
163
+ dict(type='MedianBlur', p=0.1),
164
+ dict(
165
+ type='CoarseDropout',
166
+ max_holes=1,
167
+ max_height=0.4,
168
+ max_width=0.4,
169
+ min_holes=1,
170
+ min_height=0.2,
171
+ min_width=0.2,
172
+ p=0.5),
173
+ ]),
174
+ dict(type='GenerateTarget', encoder=codec),
175
+ dict(type='PackPoseInputs')
176
+ ]
177
+
178
+ # data loaders
179
+ train_dataloader = dict(
180
+ batch_size=256,
181
+ num_workers=10,
182
+ persistent_workers=True,
183
+ sampler=dict(type='DefaultSampler', shuffle=True),
184
+ dataset=dict(
185
+ type=dataset_type,
186
+ data_root=data_root,
187
+ data_mode=data_mode,
188
+ ann_file='annotations/person_keypoints_train2017.json',
189
+ data_prefix=dict(img='train2017/'),
190
+ pipeline=train_pipeline,
191
+ ))
192
+ val_dataloader = dict(
193
+ batch_size=64,
194
+ num_workers=10,
195
+ persistent_workers=True,
196
+ drop_last=False,
197
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
198
+ dataset=dict(
199
+ type=dataset_type,
200
+ data_root=data_root,
201
+ data_mode=data_mode,
202
+ ann_file='annotations/person_keypoints_val2017.json',
203
+ # bbox_file=f'{data_root}person_detection_results/'
204
+ # 'COCO_val2017_detections_AP_H_56_person.json',
205
+ data_prefix=dict(img='val2017/'),
206
+ test_mode=True,
207
+ pipeline=val_pipeline,
208
+ ))
209
+ test_dataloader = val_dataloader
210
+
211
+ # hooks
212
+ default_hooks = dict(
213
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
214
+
215
+ custom_hooks = [
216
+ dict(
217
+ type='EMAHook',
218
+ ema_type='ExpMomentumEMA',
219
+ momentum=0.0002,
220
+ update_buffers=True,
221
+ priority=49),
222
+ dict(
223
+ type='mmdet.PipelineSwitchHook',
224
+ switch_epoch=max_epochs - stage2_num_epochs,
225
+ switch_pipeline=train_pipeline_stage2)
226
+ ]
227
+
228
+ # evaluators
229
+ val_evaluator = dict(
230
+ type='CocoMetric',
231
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
232
+ test_evaluator = val_evaluator
rtmpose-s.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['mmpose::_base_/default_runtime.py']
2
+
3
+ # runtime
4
+ max_epochs = 420
5
+ stage2_num_epochs = 30
6
+ base_lr = 4e-3
7
+
8
+ train_cfg = dict(max_epochs=max_epochs, val_interval=10)
9
+ randomness = dict(seed=21)
10
+
11
+ # optimizer
12
+ optim_wrapper = dict(
13
+ type='OptimWrapper',
14
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.),
15
+ paramwise_cfg=dict(
16
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
17
+
18
+ # learning rate
19
+ param_scheduler = [
20
+ dict(
21
+ type='LinearLR',
22
+ start_factor=1.0e-5,
23
+ by_epoch=False,
24
+ begin=0,
25
+ end=1000),
26
+ dict(
27
+ # use cosine lr from 210 to 420 epoch
28
+ type='CosineAnnealingLR',
29
+ eta_min=base_lr * 0.05,
30
+ begin=max_epochs // 2,
31
+ end=max_epochs,
32
+ T_max=max_epochs // 2,
33
+ by_epoch=True,
34
+ convert_to_iter_based=True),
35
+ ]
36
+
37
+ # automatically scaling LR based on the actual training batch size
38
+ auto_scale_lr = dict(base_batch_size=1024)
39
+
40
+ # codec settings
41
+ codec = dict(
42
+ type='SimCCLabel',
43
+ input_size=(192, 256),
44
+ sigma=(4.9, 5.66),
45
+ simcc_split_ratio=2.0,
46
+ normalize=False,
47
+ use_dark=False)
48
+
49
+ # model settings
50
+ model = dict(
51
+ type='TopdownPoseEstimator',
52
+ data_preprocessor=dict(
53
+ type='PoseDataPreprocessor',
54
+ mean=[123.675, 116.28, 103.53],
55
+ std=[58.395, 57.12, 57.375],
56
+ bgr_to_rgb=True),
57
+ backbone=dict(
58
+ _scope_='mmdet',
59
+ type='CSPNeXt',
60
+ arch='P5',
61
+ expand_ratio=0.5,
62
+ deepen_factor=0.33,
63
+ widen_factor=0.5,
64
+ out_indices=(4, ),
65
+ channel_attention=True,
66
+ norm_cfg=dict(type='SyncBN'),
67
+ act_cfg=dict(type='SiLU'),
68
+ init_cfg=dict(
69
+ type='Pretrained',
70
+ prefix='backbone.',
71
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
72
+ 'rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa
73
+ )),
74
+ head=dict(
75
+ type='RTMCCHead',
76
+ in_channels=512,
77
+ out_channels=17,
78
+ input_size=codec['input_size'],
79
+ in_featuremap_size=(6, 8),
80
+ simcc_split_ratio=codec['simcc_split_ratio'],
81
+ final_layer_kernel_size=7,
82
+ gau_cfg=dict(
83
+ hidden_dims=256,
84
+ s=128,
85
+ expansion_factor=2,
86
+ dropout_rate=0.,
87
+ drop_path=0.,
88
+ act_fn='SiLU',
89
+ use_rel_bias=False,
90
+ pos_enc=False),
91
+ loss=dict(
92
+ type='KLDiscretLoss',
93
+ use_target_weight=True,
94
+ beta=10.,
95
+ label_softmax=True),
96
+ decoder=codec),
97
+ test_cfg=dict(flip_test=True))
98
+
99
+ # base dataset settings
100
+ dataset_type = 'CocoDataset'
101
+ data_mode = 'topdown'
102
+ data_root = 'data/coco/'
103
+
104
+ backend_args = dict(backend='local')
105
+ # backend_args = dict(
106
+ # backend='petrel',
107
+ # path_mapping=dict({
108
+ # f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
109
+ # f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
110
+ # }))
111
+
112
+ # pipelines
113
+ train_pipeline = [
114
+ dict(type='LoadImage', backend_args=backend_args),
115
+ dict(type='GetBBoxCenterScale'),
116
+ dict(type='RandomFlip', direction='horizontal'),
117
+ dict(type='RandomHalfBody'),
118
+ dict(
119
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
120
+ dict(type='TopdownAffine', input_size=codec['input_size']),
121
+ dict(type='mmdet.YOLOXHSVRandomAug'),
122
+ dict(
123
+ type='Albumentation',
124
+ transforms=[
125
+ dict(type='Blur', p=0.1),
126
+ dict(type='MedianBlur', p=0.1),
127
+ dict(
128
+ type='CoarseDropout',
129
+ max_holes=1,
130
+ max_height=0.4,
131
+ max_width=0.4,
132
+ min_holes=1,
133
+ min_height=0.2,
134
+ min_width=0.2,
135
+ p=1.),
136
+ ]),
137
+ dict(type='GenerateTarget', encoder=codec),
138
+ dict(type='PackPoseInputs')
139
+ ]
140
+ val_pipeline = [
141
+ dict(type='LoadImage', backend_args=backend_args),
142
+ dict(type='GetBBoxCenterScale'),
143
+ dict(type='TopdownAffine', input_size=codec['input_size']),
144
+ dict(type='PackPoseInputs')
145
+ ]
146
+
147
+ train_pipeline_stage2 = [
148
+ dict(type='LoadImage', backend_args=backend_args),
149
+ dict(type='GetBBoxCenterScale'),
150
+ dict(type='RandomFlip', direction='horizontal'),
151
+ dict(type='RandomHalfBody'),
152
+ dict(
153
+ type='RandomBBoxTransform',
154
+ shift_factor=0.,
155
+ scale_factor=[0.75, 1.25],
156
+ rotate_factor=60),
157
+ dict(type='TopdownAffine', input_size=codec['input_size']),
158
+ dict(type='mmdet.YOLOXHSVRandomAug'),
159
+ dict(
160
+ type='Albumentation',
161
+ transforms=[
162
+ dict(type='Blur', p=0.1),
163
+ dict(type='MedianBlur', p=0.1),
164
+ dict(
165
+ type='CoarseDropout',
166
+ max_holes=1,
167
+ max_height=0.4,
168
+ max_width=0.4,
169
+ min_holes=1,
170
+ min_height=0.2,
171
+ min_width=0.2,
172
+ p=0.5),
173
+ ]),
174
+ dict(type='GenerateTarget', encoder=codec),
175
+ dict(type='PackPoseInputs')
176
+ ]
177
+
178
+ # data loaders
179
+ train_dataloader = dict(
180
+ batch_size=256,
181
+ num_workers=10,
182
+ persistent_workers=True,
183
+ sampler=dict(type='DefaultSampler', shuffle=True),
184
+ dataset=dict(
185
+ type=dataset_type,
186
+ data_root=data_root,
187
+ data_mode=data_mode,
188
+ ann_file='annotations/person_keypoints_train2017.json',
189
+ data_prefix=dict(img='train2017/'),
190
+ pipeline=train_pipeline,
191
+ ))
192
+ val_dataloader = dict(
193
+ batch_size=64,
194
+ num_workers=10,
195
+ persistent_workers=True,
196
+ drop_last=False,
197
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
198
+ dataset=dict(
199
+ type=dataset_type,
200
+ data_root=data_root,
201
+ data_mode=data_mode,
202
+ ann_file='annotations/person_keypoints_val2017.json',
203
+ # bbox_file=f'{data_root}person_detection_results/'
204
+ # 'COCO_val2017_detections_AP_H_56_person.json',
205
+ data_prefix=dict(img='val2017/'),
206
+ test_mode=True,
207
+ pipeline=val_pipeline,
208
+ ))
209
+ test_dataloader = val_dataloader
210
+
211
+ # hooks
212
+ default_hooks = dict(
213
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
214
+
215
+ custom_hooks = [
216
+ dict(
217
+ type='EMAHook',
218
+ ema_type='ExpMomentumEMA',
219
+ momentum=0.0002,
220
+ update_buffers=True,
221
+ priority=49),
222
+ dict(
223
+ type='mmdet.PipelineSwitchHook',
224
+ switch_epoch=max_epochs - stage2_num_epochs,
225
+ switch_pipeline=train_pipeline_stage2)
226
+ ]
227
+
228
+ # evaluators
229
+ val_evaluator = dict(
230
+ type='CocoMetric',
231
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
232
+ test_evaluator = val_evaluator