Qihang Yu
Add kMaX-DeepLab
a06fad0
raw
history blame
2.25 kB
MODEL:
# backbone part.
BACKBONE:
FREEZE_AT: 0
NAME: "custom_bn_build_resnet_backbone" # we customize the momentum and eps in syncbn, to align with tf implementation.
WEIGHTS: "../R-50.pkl"
PIXEL_MEAN: [127.5, 127.5, 127.5]
PIXEL_STD: [127.5, 127.5, 127.5]
RESNETS:
DEPTH: 50
STEM_TYPE: "basic" # not used
STEM_OUT_CHANNELS: 64
STRIDE_IN_1X1: False
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
NORM: "SyncBN"
RES5_MULTI_GRID: [1, 1, 1] # not used
# kmax part.
META_ARCHITECTURE: "kMaXDeepLab"
SEM_SEG_HEAD:
NAME: "kMaXDeepLabHead"
IGNORE_VALUE: 255
NUM_CLASSES: 133
LOSS_WEIGHT: 1.0
KMAX_DEEPLAB:
SAVE_VIS_NUM: 0
SHARE_FINAL_MATCHING: True
DEEP_SUPERVISION: True
NO_OBJECT_WEIGHT: 1e-5
CLASS_WEIGHT: 3.0
DICE_WEIGHT: 3.0
MASK_WEIGHT: 0.3
INSDIS_WEIGHT: 1.0
AUX_SEMANTIC_WEIGHT: 1.0
PIXEL_DEC:
NAME: "kMaXPixelDecoder"
IN_FEATURES: ["res2", "res3", "res4", "res5"]
DEC_LAYERS: [1, 5, 1, 1]
LAYER_TYPES: ["axial", "axial", "bottleneck", "bottleneck"]
DEC_CHANNELS: [512, 256, 128, 64]
TRANS_DEC:
NAME: "kMaXTransformerDecoder"
DEC_LAYERS: [2, 2, 2]
NUM_OBJECT_QUERIES: 128
IN_CHANNELS: [2048, 1024, 512] # [512 * 4, 256 * 4, 128 * 4]
DROP_PATH_PROB: 0.2
TEST:
SEMANTIC_ON: False
INSTANCE_ON: False # Save some time :)
PANOPTIC_ON: True
OBJECT_MASK_THRESHOLD: 0.4
CLASS_THRESHOLD_THING: 0.7
CLASS_THRESHOLD_STUFF: 0.5
REORDER_CLASS_WEIGHT: 1.0
REORDER_MASK_WEIGHT: 1.0
OVERLAP_THRESHOLD: 0.8
DATASETS:
TRAIN: ("coco_2017_train_panoptic",)
TEST: ("coco_2017_val_panoptic",)
SOLVER:
IMS_PER_BATCH: 64
BASE_LR: 0.0005
LR_SCHEDULER_NAME: "TF2WarmupPolyLR"
MAX_ITER: 150000
WARMUP_ITERS: 5000
WEIGHT_DECAY: 0.05
OPTIMIZER: "ADAMW"
BACKBONE_MULTIPLIER: 0.1
CLIP_GRADIENTS:
ENABLED: False
AMP:
ENABLED: True
INPUT:
IMAGE_SIZE: [1281, 1281]
MIN_SCALE: 0.2
MAX_SCALE: 2.0
FORMAT: "RGB"
DATASET_MAPPER_NAME: "coco_panoptic_lsj"
MIN_SIZE_TEST: 1281
MAX_SIZE_TEST: 1281
TEST:
EVAL_PERIOD: 5000
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: True
NUM_WORKERS: 4
VERSION: 2