diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..8524e66469ef3356a648c11cddfd7f4e1ecdfffa
--- /dev/null
+++ b/app.py
@@ -0,0 +1,75 @@
+#try:
+# import detectron2
+#except:
+import os
+os.system('pip install git+https://github.com/SysCV/transfiner.git')
+
+from matplotlib.pyplot import axis
+import gradio as gr
+import requests
+import numpy as np
+from torch import nn
+import requests
+
+import torch
+
+from detectron2 import model_zoo
+from detectron2.engine import DefaultPredictor
+from detectron2.config import get_cfg
+from detectron2.utils.visualizer import Visualizer
+from detectron2.data import MetadataCatalog
+
+'''
+url1 = 'https://cdn.pixabay.com/photo/2014/09/07/21/52/city-438393_1280.jpg'
+r = requests.get(url1, allow_redirects=True)
+open("city1.jpg", 'wb').write(r.content)
+url2 = 'https://cdn.pixabay.com/photo/2016/02/19/11/36/canal-1209808_1280.jpg'
+r = requests.get(url2, allow_redirects=True)
+open("city2.jpg", 'wb').write(r.content)
+'''
+
+model_name='./configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml'
+
+# model = model_zoo.get(model_name, trained=True)
+
+cfg = get_cfg()
+# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
+cfg.merge_from_file(model_zoo.get_config_file(model_name))
+cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
+# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as w ell
+cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
+
+if not torch.cuda.is_available():
+ cfg.MODEL.DEVICE='cpu'
+
+predictor = DefaultPredictor(cfg)
+
+
+def inference(image):
+ img = np.array(image.resize((1024,1024)))
+ outputs = predictor(img)
+
+ v = Visualizer(img, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
+ out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
+
+ return out.get_image()
+
+
+
+title = "Detectron2-MaskRCNN X101"
+description = "demo for Detectron2. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below.\
+ Model: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
+article = "
Simple Copy-Paste is a Strong Data Augmentation Method for Instance Segmentation | Detectron model ZOO
"
+
+gr.Interface(
+ inference,
+ [gr.inputs.Image(type="pil", label="Input")],
+ gr.outputs.Image(type="numpy", label="Output"),
+ title=title,
+ description=description,
+ article=article,
+ examples=[
+ ["demo/sample_imgs/000000224200.jpg"],
+ ["demo/sample_imgs/000000344909.jpg"]
+ ]).launch()
+
diff --git a/configs/Base-RCNN-C4.yaml b/configs/Base-RCNN-C4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fbf34a0ea57a587e09997edd94c4012d69d0b6ad
--- /dev/null
+++ b/configs/Base-RCNN-C4.yaml
@@ -0,0 +1,18 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ RPN:
+ PRE_NMS_TOPK_TEST: 6000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "Res5ROIHeads"
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ TEST: ("coco_2017_val",)
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/Base-RCNN-DilatedC5.yaml b/configs/Base-RCNN-DilatedC5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0d6d16bdaf532f09e4976f0aa240a49e748da27
--- /dev/null
+++ b/configs/Base-RCNN-DilatedC5.yaml
@@ -0,0 +1,31 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ RESNETS:
+ OUT_FEATURES: ["res5"]
+ RES5_DILATION: 2
+ RPN:
+ IN_FEATURES: ["res5"]
+ PRE_NMS_TOPK_TEST: 6000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "StandardROIHeads"
+ IN_FEATURES: ["res5"]
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_FC: 2
+ POOLER_RESOLUTION: 7
+ ROI_MASK_HEAD:
+ NAME: "MaskRCNNConvUpsampleHead"
+ NUM_CONV: 4
+ POOLER_RESOLUTION: 14
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ TEST: ("coco_2017_val",)
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/Base-RCNN-FPN-4gpu.yaml b/configs/Base-RCNN-FPN-4gpu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..628542c2c1d06783b53aa8f68720f58181fc7744
--- /dev/null
+++ b/configs/Base-RCNN-FPN-4gpu.yaml
@@ -0,0 +1,44 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ BACKBONE:
+ NAME: "build_resnet_fpn_backbone"
+ RESNETS:
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+ FPN:
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
+ ANCHOR_GENERATOR:
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
+ RPN:
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
+ # Detectron1 uses 2000 proposals per-batch,
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+ POST_NMS_TOPK_TRAIN: 1000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "StandardROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_FC: 2
+ POOLER_RESOLUTION: 7
+ ROI_MASK_HEAD:
+ NAME: "MaskRCNNConvUpsampleHead"
+ NUM_CONV: 4
+ POOLER_RESOLUTION: 14
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ #TEST: ("coco_2017_val",)
+ #TEST: ("lvis_v0.5_val_cocofied",)
+ TEST: ("coco_2017_test-dev",)
+SOLVER:
+ IMS_PER_BATCH: 16 #8 #16
+ BASE_LR: 0.02 # 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45f3f0bb5761f5162b4d9c180f3222aeeb79b1b3
--- /dev/null
+++ b/configs/Base-RCNN-FPN.yaml
@@ -0,0 +1,42 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ BACKBONE:
+ NAME: "build_resnet_fpn_backbone"
+ RESNETS:
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+ FPN:
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
+ ANCHOR_GENERATOR:
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
+ RPN:
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
+ # Detectron1 uses 2000 proposals per-batch,
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+ POST_NMS_TOPK_TRAIN: 1000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "StandardROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_FC: 2
+ POOLER_RESOLUTION: 7
+ ROI_MASK_HEAD:
+ NAME: "MaskRCNNConvUpsampleHead"
+ NUM_CONV: 4
+ POOLER_RESOLUTION: 14
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ TEST: ("coco_2017_val",)
+SOLVER:
+ IMS_PER_BATCH: 16 #16
+ BASE_LR: 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/Base-RetinaNet.yaml b/configs/Base-RetinaNet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b45b982bbf84b34d2a6a172ab0a946b1029f7c8
--- /dev/null
+++ b/configs/Base-RetinaNet.yaml
@@ -0,0 +1,25 @@
+MODEL:
+ META_ARCHITECTURE: "RetinaNet"
+ BACKBONE:
+ NAME: "build_retinanet_resnet_fpn_backbone"
+ RESNETS:
+ OUT_FEATURES: ["res3", "res4", "res5"]
+ ANCHOR_GENERATOR:
+ SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
+ FPN:
+ IN_FEATURES: ["res3", "res4", "res5"]
+ RETINANET:
+ IOU_THRESHOLDS: [0.4, 0.5]
+ IOU_LABELS: [0, -1, 1]
+ SMOOTH_L1_LOSS_BETA: 0.0
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ TEST: ("coco_2017_val",)
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..773ac10e87c626760d00d831bf664ce9ff073c49
--- /dev/null
+++ b/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ LOAD_PROPOSALS: True
+ RESNETS:
+ DEPTH: 50
+ PROPOSAL_GENERATOR:
+ NAME: "PrecomputedProposals"
+DATASETS:
+ TRAIN: ("coco_2017_train",)
+ PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
+ TEST: ("coco_2017_val",)
+ PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+DATALOADER:
+ # proposals are part of the dataset_dicts, and take a lot of RAM
+ NUM_WORKERS: 2
diff --git a/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db142cd671c1841b4f64cf130bee7f7954ecdd28
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bceb6b343618d8cd9a6c414ff9eb86ab31cc230a
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57a098f53ee8c54ecfa354cc96efefd890dc1b72
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f96130105c3ba6ab393e0932870903875f5cb732
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc51bce390a85ee3529ffdcebde05748e1646be0
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0fe96f57febdac5790ea4cec168fa4b97ac4807a
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33fadeb87d1ef67ab2b55926b9a652ab4ac4a27d
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3262019a1211b910d3b371569199ed1afaacf6a4
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41395182bf5c9dd8ab1241c4414068817298d554
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c9b5ab77157baa581d90d9847c045c19ed6ffa3
--- /dev/null
+++ b/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ MASK_ON: False
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4abb1b9a547957aa6afc0b29129e00f89cf98d59
--- /dev/null
+++ b/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/retinanet_R_50_FPN_1x.py b/configs/COCO-Detection/retinanet_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..db86b18a9ee03789f5bc0066d470609d3515d524
--- /dev/null
+++ b/configs/COCO-Detection/retinanet_R_50_FPN_1x.py
@@ -0,0 +1,9 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.retinanet import model
+from ..common.train import train
+
+dataloader.train.mapper.use_instance_mask = False
+model.backbone.bottom_up.freeze_at = 2
+optimizer.lr = 0.01
diff --git a/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a24ce3a9a108a8792e18c8aabfb7b712f0d3725
--- /dev/null
+++ b/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b5412d4a7aef1d6c3f7c1e34f94007de639b833
--- /dev/null
+++ b/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RetinaNet.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e04821156b0376ba5215d5ce5b7010a36b43e6a1
--- /dev/null
+++ b/configs/COCO-Detection/rpn_R_50_C4_1x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ META_ARCHITECTURE: "ProposalNetwork"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ RPN:
+ PRE_NMS_TOPK_TEST: 12000
+ POST_NMS_TOPK_TEST: 2000
diff --git a/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dc9c95203b1c3c9cd9bb9876bb8d9a5dd9b31d9a
--- /dev/null
+++ b/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "ProposalNetwork"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ RPN:
+ POST_NMS_TOPK_TEST: 2000
diff --git a/configs/COCO-InstanceSegmentation/.mask_rcnn_R_50_FPN_1x_4gpu.yaml.swp b/configs/COCO-InstanceSegmentation/.mask_rcnn_R_50_FPN_1x_4gpu.yaml.swp
new file mode 100644
index 0000000000000000000000000000000000000000..d3cd42dd54ff5bad2b489ac7aa2e5d54aa651484
Binary files /dev/null and b/configs/COCO-InstanceSegmentation/.mask_rcnn_R_50_FPN_1x_4gpu.yaml.swp differ
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a94cc45a0f2aaa8c92e14871c553b736545e327
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67b70cf4be8c19f5dc735b6f55a8690698f34b69
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1935a302d2d0fa7f69553b3fd50b5a7082c6c0d1
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..315b95933a4449f1bee6790d31f45bdd180717de
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "./init_weights/model_final_a3ec72.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_deform.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_deform.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e85038edcce2cfd676e518e01274d7670cfa57a
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_deform.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "./init_weights/model_final_a3ec72.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+ DEFORM_MODULATED: False
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_lvis.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_lvis.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5197d67b143be9a4a1273a9a8983cd32d80e6a7
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x_4gpu_transfiner_lvis.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "./init_weights/model_final_a3ec72.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
+DATASETS:
+ TEST: ("lvis_v0.5_val_cocofied",)
+
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e5bf932d9e216c1a866c8ed5d1d571242c97326
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py
@@ -0,0 +1,7 @@
+from ..common.train import train
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_c4 import model
+
+model.backbone.freeze_at = 2
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9aeb4eac38026dbb867e799f9fd3a8d8eb3af80
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..38ed867d897dfec839cbcf11a2e2dc8abb92f07c
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b13eefab2a049c48d94d5051c82ceb6dbde40579
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d401016358f967f6619d88b1c9bd5673a1cdeba8
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-DilatedC5.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f216a61c5181fe9aa1c5d1008b51cbc6fb86285
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py
@@ -0,0 +1,7 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.train import train
+
+model.backbone.bottom_up.freeze_at = 2
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d50fb866ca7811a87b42555c7213f88e00bf6df1
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fb896c8fa5971cea94099fbfffc9140418603af8
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu_transfiner.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu_transfiner.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6eb97408fee1a5aec65d0985a6eac2598aa9b113
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_4gpu_transfiner.yaml
@@ -0,0 +1,6 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "./init_weights/model_final_a54504.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bec680ee17a474fefe527b7b79d26266e75c09f0
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ RPN:
+ BBOX_REG_LOSS_TYPE: "giou"
+ BBOX_REG_LOSS_WEIGHT: 2.0
+ ROI_BOX_HEAD:
+ BBOX_REG_LOSS_TYPE: "giou"
+ BBOX_REG_LOSS_WEIGHT: 10.0
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be7d06b8e0f032ee7fcaabd7c122158518489fd2
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f962edd32eaa6e8ea38d70599e036f8a415fbe6c
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "./init_weights/model_final_f10217.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_deform.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_deform.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4ef02309eade988e17afcdd2fe4c4c8f96a23e7
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_deform.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "./init_weights/model_final_f10217.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+ DEFORM_MODULATED: False
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_lvis.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_lvis.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3ae4ae125513269daf90df615fa5b0324c3cc61b
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x_4gpu_transfiner_lvis.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "./init_weights/model_final_f10217.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
+DATASETS:
+ TEST: ("lvis_v0.5_val_cocofied",)
+
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d14c63f74383bfc308750f51d51344398b02a239
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ MASK_ON: True
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x_transfiner.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x_transfiner.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b6fc19397ba59079cd2add4751f96a202b8b1e37
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x_transfiner.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ MASK_ON: True
+ WEIGHTS: "./init_weights/model_final_x101.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py b/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7bbdd7d00505f1e51154379c99ab621cb648a6d
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py
@@ -0,0 +1,34 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.train import train
+
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+
+# Replace default ResNet with RegNetX-4GF from the DDS paper. Config source:
+# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9 # noqa
+model.backbone.bottom_up = L(RegNet)(
+ stem_class=SimpleStem,
+ stem_width=32,
+ block_class=ResBottleneckBlock,
+ depth=23,
+ w_a=38.65,
+ w_0=96,
+ w_m=2.43,
+ group_width=40,
+ freeze_at=2,
+ norm="FrozenBN",
+ out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+optimizer.weight_decay = 5e-5
+train.init_checkpoint = (
+ "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth"
+)
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py b/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..72c6b7a5c8939970bd0e1e4a3c1155695943b19a
--- /dev/null
+++ b/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py
@@ -0,0 +1,35 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.train import train
+
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+
+# Replace default ResNet with RegNetY-4GF from the DDS paper. Config source:
+# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa
+model.backbone.bottom_up = L(RegNet)(
+ stem_class=SimpleStem,
+ stem_width=32,
+ block_class=ResBottleneckBlock,
+ depth=22,
+ w_a=31.41,
+ w_0=96,
+ w_m=2.24,
+ group_width=64,
+ se_ratio=0.25,
+ freeze_at=2,
+ norm="FrozenBN",
+ out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+optimizer.weight_decay = 5e-5
+train.init_checkpoint = (
+ "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth"
+)
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e03944a42d2e497da5ceca17c8fda797dac3f82
--- /dev/null
+++ b/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ KEYPOINT_ON: True
+ ROI_HEADS:
+ NUM_CLASSES: 1
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss
+ RPN:
+ # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+ # 1000 proposals per-image is found to hurt box AP.
+ # Therefore we increase it to 1500 per-image.
+ POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_train",)
+ TEST: ("keypoints_coco_2017_val",)
diff --git a/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9309535c57a1aa7d23297aac80a9bd78a6c79fcc
--- /dev/null
+++ b/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..b74e8ac52d121cfa76ea4f5ec6562552c072ff22
--- /dev/null
+++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py
@@ -0,0 +1,7 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco_keypoint import dataloader
+from ..common.models.keypoint_rcnn_fpn import model
+from ..common.train import train
+
+model.backbone.bottom_up.freeze_at = 2
diff --git a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7bf85cf745b53b3e7ab28fe94b7f4f9e7fe6e335
--- /dev/null
+++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a07f243f650a497b9372501e3face75194cf0941
--- /dev/null
+++ b/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4bfa20a98c0a65c6bd60e93b07e8f4b7d92a867
--- /dev/null
+++ b/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "Base-Keypoint-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f00d54b760c2b9271c75643e0a1ab1ffc0d9543a
--- /dev/null
+++ b/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "PanopticFPN"
+ MASK_ON: True
+ SEM_SEG_HEAD:
+ LOSS_WEIGHT: 0.5
+DATASETS:
+ TRAIN: ("coco_2017_train_panoptic_separated",)
+ TEST: ("coco_2017_val_panoptic_separated",)
+DATALOADER:
+ FILTER_EMPTY_ANNOTATIONS: False
diff --git a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e01f6fb31e9b00b1857b7de3b5074184d1f4a21
--- /dev/null
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7378e093e12ff854d0c46f1eda9177190d31813
--- /dev/null
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py
@@ -0,0 +1,7 @@
+from ..common.optim import SGD as optimizer
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.data.coco_panoptic_separated import dataloader
+from ..common.models.panoptic_fpn import model
+from ..common.train import train
+
+model.backbone.bottom_up.freeze_at = 2
diff --git a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6afa2c1cc92495309ed1553a17359fe5d7d6566e
--- /dev/null
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml
@@ -0,0 +1,5 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
diff --git a/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b956b3f673e78649184fe2c50e2700b3f1f14794
--- /dev/null
+++ b/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-Panoptic-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e
--- /dev/null
+++ b/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ # For better, more stable performance initialize from COCO
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+ MASK_ON: True
+ ROI_HEADS:
+ NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+ MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+ MIN_SIZE_TRAIN_SAMPLING: "choice"
+ MIN_SIZE_TEST: 1024
+ MAX_SIZE_TRAIN: 2048
+ MAX_SIZE_TEST: 2048
+DATASETS:
+ TRAIN: ("cityscapes_fine_instance_seg_train",)
+ TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+ BASE_LR: 0.01
+ STEPS: (18000,)
+ MAX_ITER: 24000
+ IMS_PER_BATCH: 8
+TEST:
+ EVAL_PERIOD: 8000
diff --git a/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml b/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b4f2e6545e6920f8d3a84f1c517d79679a848c0
--- /dev/null
+++ b/configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ # For better, more stable performance initialize from COCO
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+ MASK_ON: True
+ ROI_HEADS:
+ NUM_CLASSES: 8
+# This is similar to the setting used in Mask R-CNN paper, Appendix A
+# But there are some differences, e.g., we did not initialize the output
+# layer using the corresponding classes from COCO
+INPUT:
+ MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
+ MIN_SIZE_TRAIN_SAMPLING: "choice"
+ MIN_SIZE_TEST: 1024
+ MAX_SIZE_TRAIN: 2048
+ MAX_SIZE_TEST: 2048
+DATASETS:
+ TRAIN: ("cityscapes_fine_instance_seg_train",)
+ TEST: ("cityscapes_fine_instance_seg_val",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (36000,)
+ MAX_ITER: 48000
+ IMS_PER_BATCH: 4
+TEST:
+ EVAL_PERIOD: 48000
diff --git a/configs/Detectron1-Comparisons/README.md b/configs/Detectron1-Comparisons/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..924fd00af642ddf1a4ff4c4f5947f676134eb7de
--- /dev/null
+++ b/configs/Detectron1-Comparisons/README.md
@@ -0,0 +1,84 @@
+
+Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
+
+The differences in implementation details are shared in
+[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
+
+The differences in model zoo's experimental settings include:
+* Use scale augmentation during training. This improves AP with lower training cost.
+* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
+ affect other AP.
+* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
+* Use `ROIAlignV2`. This does not significantly affect AP.
+
+In this directory, we provide a few configs that __do not__ have the above changes.
+They mimic Detectron's behavior as close as possible,
+and provide a fair comparison of accuracy and speed against Detectron.
+
+
+
+
+
+
+
+Name |
+lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+mask AP |
+kp. AP |
+model id |
+download |
+
+
+ Faster R-CNN |
+1x |
+0.219 |
+0.038 |
+3.1 |
+36.9 |
+ |
+ |
+137781054 |
+model | metrics |
+
+
+ Keypoint R-CNN |
+1x |
+0.313 |
+0.071 |
+5.0 |
+53.1 |
+ |
+64.2 |
+137781195 |
+model | metrics |
+
+
+ Mask R-CNN |
+1x |
+0.273 |
+0.043 |
+3.4 |
+37.8 |
+34.9 |
+ |
+137781281 |
+model | metrics |
+
+
+
+## Comparisons:
+
+* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
+* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
+ [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
+ compensated back by some parameter tuning.
+* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
+ See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
+
+For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
diff --git a/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ce77f137fa2c4e5254a62b58c18b8b76096f2aa
--- /dev/null
+++ b/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,17 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ # Detectron1 uses smooth L1 loss with some magic beta values.
+ # The defaults are changed to L1 loss in Detectron2.
+ RPN:
+ SMOOTH_L1_BETA: 0.1111
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+INPUT:
+ # no scale augmentation
+ MIN_SIZE_TRAIN: (800, )
diff --git a/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aacf868ba5290c752031c130a2081af48afc0808
--- /dev/null
+++ b/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,27 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ KEYPOINT_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 1
+ ROI_KEYPOINT_HEAD:
+ POOLER_RESOLUTION: 14
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+ # Detectron1 uses smooth L1 loss with some magic beta values.
+ # The defaults are changed to L1 loss in Detectron2.
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+ RPN:
+ SMOOTH_L1_BETA: 0.1111
+ # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
+ # 1000 proposals per-image is found to hurt box AP.
+ # Therefore we increase it to 1500 per-image.
+ POST_NMS_TOPK_TRAIN: 1500
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_train",)
+ TEST: ("keypoints_coco_2017_val",)
diff --git a/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6
--- /dev/null
+++ b/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ # Detectron1 uses smooth L1 loss with some magic beta values.
+ # The defaults are changed to L1 loss in Detectron2.
+ RPN:
+ SMOOTH_L1_BETA: 0.1111
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+ ROI_MASK_HEAD:
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+INPUT:
+ # no scale augmentation
+ MIN_SIZE_TRAIN: (800, )
diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0c3a1bbc0a09e1384de522f30c443ba1e36fafa
--- /dev/null
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1230
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v0.5_train",)
+ TEST: ("lvis_v0.5_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de110d26e773c35504a96d75724545777d2332ee
--- /dev/null
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "./model_final_824ab5.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1230
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v0.5_train",)
+ TEST: ("lvis_v0.5_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 150 #300 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c474187bdf2db5c9662c8b7083ba481ded378fbd
--- /dev/null
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 1230
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v0.5_train",)
+ TEST: ("lvis_v0.5_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 150 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8b822c6c006ba642f4caf9b55e7983f6797427a
--- /dev/null
+++ b/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,23 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ MASK_ON: True
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1230
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v0.5_train",)
+ TEST: ("lvis_v0.5_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca4dd97144561276ecaabbb6c254e3a7737ac157
--- /dev/null
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1203
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v1_train",)
+ TEST: ("lvis_v1_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+SOLVER:
+ STEPS: (120000, 160000)
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f313295ee5f0d553d394ce2efe003810c79af47d
--- /dev/null
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 1203
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v1_train",)
+ TEST: ("lvis_v1_val",)
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+SOLVER:
+ STEPS: (120000, 160000)
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6528f7c31c8cfbf139c14fd0cae598592d8e898
--- /dev/null
+++ b/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
@@ -0,0 +1,26 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
+ PIXEL_STD: [57.375, 57.120, 58.395]
+ MASK_ON: True
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 101
+ ROI_HEADS:
+ NUM_CLASSES: 1203
+ SCORE_THRESH_TEST: 0.0001
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+DATASETS:
+ TRAIN: ("lvis_v1_train",)
+ TEST: ("lvis_v1_val",)
+SOLVER:
+ STEPS: (120000, 160000)
+ MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
+TEST:
+ DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
+DATALOADER:
+ SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
+ REPEAT_THRESHOLD: 0.001
diff --git a/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abb33b618932e94b66239945ac892f4c84a6e8f8
--- /dev/null
+++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
@@ -0,0 +1,12 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NAME: CascadeROIHeads
+ ROI_BOX_HEAD:
+ CLS_AGNOSTIC_BBOX_REG: True
+ RPN:
+ POST_NMS_TOPK_TRAIN: 2000
diff --git a/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2201ad5c46ded91ccfa47b7698a521625c5e447
--- /dev/null
+++ b/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NAME: CascadeROIHeads
+ ROI_BOX_HEAD:
+ CLS_AGNOSTIC_BBOX_REG: True
+ RPN:
+ POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc117f6b5e3e51558ec2f01b73c5365622e5ce25
--- /dev/null
+++ b/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
@@ -0,0 +1,36 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ MASK_ON: True
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
+ RESNETS:
+ STRIDE_IN_1X1: False # this is a C2 model
+ NUM_GROUPS: 32
+ WIDTH_PER_GROUP: 8
+ DEPTH: 152
+ DEFORM_ON_PER_STAGE: [False, True, True, True]
+ ROI_HEADS:
+ NAME: "CascadeROIHeads"
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_CONV: 4
+ NUM_FC: 1
+ NORM: "GN"
+ CLS_AGNOSTIC_BBOX_REG: True
+ ROI_MASK_HEAD:
+ NUM_CONV: 8
+ NORM: "GN"
+ RPN:
+ POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+ IMS_PER_BATCH: 128
+ STEPS: (35000, 45000)
+ MAX_ITER: 50000
+ BASE_LR: 0.16
+INPUT:
+ MIN_SIZE_TRAIN: (640, 864)
+ MIN_SIZE_TRAIN_SAMPLING: "range"
+ MAX_SIZE_TRAIN: 1440
+ CROP:
+ ENABLED: True
+TEST:
+ EVAL_PERIOD: 2500
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c3b767ff473bbab7225cc8a4a92608543d78246
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_BOX_HEAD:
+ CLS_AGNOSTIC_BBOX_REG: True
+ ROI_MASK_HEAD:
+ CLS_AGNOSTIC_MASK: True
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04ff988d073ef9169ee4ca2cbce0d6f030c15232
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+ DEFORM_MODULATED: False
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..68c0ca58d7df97ca728c339da0ca9828fe6be318
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+ DEFORM_MODULATED: False
+SOLVER:
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..699bea11dfa413c0718681752963cd97ab29b52c
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
+ DEFORM_MODULATED: False
+SOLVER:
+ STEPS: (420000, 500000) # (210000, 250000)
+ MAX_ITER: 540000 # 270000
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74d274e5a529b5a8afe186940868f9d48c6112b3
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ NORM: "GN"
+ STRIDE_IN_1X1: False
+ FPN:
+ NORM: "GN"
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_CONV: 4
+ NUM_FC: 1
+ NORM: "GN"
+ ROI_MASK_HEAD:
+ NORM: "GN"
+SOLVER:
+ # 3x schedule
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
diff --git a/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..11ebb076ba529f26c71a0d972e96ca4c2d6a830b
--- /dev/null
+++ b/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
@@ -0,0 +1,24 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ NORM: "SyncBN"
+ STRIDE_IN_1X1: True
+ FPN:
+ NORM: "SyncBN"
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_CONV: 4
+ NUM_FC: 1
+ NORM: "SyncBN"
+ ROI_MASK_HEAD:
+ NORM: "SyncBN"
+SOLVER:
+ # 3x schedule
+ STEPS: (210000, 250000)
+ MAX_ITER: 270000
+TEST:
+ PRECISE_BN:
+ ENABLED: True
diff --git a/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f2464be744c083985898a25f9e71d00104f689d
--- /dev/null
+++ b/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
@@ -0,0 +1,151 @@
+# An example config to train a mmdetection model using detectron2.
+
+from ..common.data.coco import dataloader
+from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
+from ..common.optim import SGD as optimizer
+from ..common.train import train
+
+from detectron2.modeling.mmdet_wrapper import MMDetDetector
+from detectron2.config import LazyCall as L
+
+model = L(MMDetDetector)(
+ detector=dict(
+ type="MaskRCNN",
+ pretrained="torchvision://resnet50",
+ backbone=dict(
+ type="ResNet",
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type="BN", requires_grad=True),
+ norm_eval=True,
+ style="pytorch",
+ ),
+ neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
+ rpn_head=dict(
+ type="RPNHead",
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type="AnchorGenerator",
+ scales=[8],
+ ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64],
+ ),
+ bbox_coder=dict(
+ type="DeltaXYWHBBoxCoder",
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[1.0, 1.0, 1.0, 1.0],
+ ),
+ loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+ ),
+ roi_head=dict(
+ type="StandardRoIHead",
+ bbox_roi_extractor=dict(
+ type="SingleRoIExtractor",
+ roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32],
+ ),
+ bbox_head=dict(
+ type="Shared2FCBBoxHead",
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=80,
+ bbox_coder=dict(
+ type="DeltaXYWHBBoxCoder",
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[0.1, 0.1, 0.2, 0.2],
+ ),
+ reg_class_agnostic=False,
+ loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
+ loss_bbox=dict(type="L1Loss", loss_weight=1.0),
+ ),
+ mask_roi_extractor=dict(
+ type="SingleRoIExtractor",
+ roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32],
+ ),
+ mask_head=dict(
+ type="FCNMaskHead",
+ num_convs=4,
+ in_channels=256,
+ conv_out_channels=256,
+ num_classes=80,
+ loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
+ ),
+ ),
+ # model training and testing settings
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type="MaxIoUAssigner",
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1,
+ ),
+ sampler=dict(
+ type="RandomSampler",
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False,
+ ),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False,
+ ),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=1000,
+ nms=dict(type="nms", iou_threshold=0.7),
+ min_bbox_size=0,
+ ),
+ rcnn=dict(
+ assigner=dict(
+ type="MaxIoUAssigner",
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=True,
+ ignore_iof_thr=-1,
+ ),
+ sampler=dict(
+ type="RandomSampler",
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True,
+ ),
+ mask_size=28,
+ pos_weight=-1,
+ debug=False,
+ ),
+ ),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type="nms", iou_threshold=0.7),
+ min_bbox_size=0,
+ ),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type="nms", iou_threshold=0.5),
+ max_per_img=100,
+ mask_thr_binary=0.5,
+ ),
+ ),
+ ),
+ pixel_mean=[123.675, 116.280, 103.530],
+ pixel_std=[58.395, 57.120, 57.375],
+)
+
+dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model
+train.init_checkpoint = None # pretrained model is loaded inside backbone
diff --git a/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b
--- /dev/null
+++ b/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
@@ -0,0 +1,26 @@
+# A large PanopticFPN for demo purposes.
+# Use GN on backbone to support semantic seg.
+# Use Cascade + Deform Conv to improve localization.
+_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
+MODEL:
+ WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
+ RESNETS:
+ DEPTH: 101
+ NORM: "GN"
+ DEFORM_ON_PER_STAGE: [False, True, True, True]
+ STRIDE_IN_1X1: False
+ FPN:
+ NORM: "GN"
+ ROI_HEADS:
+ NAME: CascadeROIHeads
+ ROI_BOX_HEAD:
+ CLS_AGNOSTIC_BBOX_REG: True
+ ROI_MASK_HEAD:
+ NORM: "GN"
+ RPN:
+ POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+ STEPS: (105000, 125000)
+ MAX_ITER: 135000
+ IMS_PER_BATCH: 32
+ BASE_LR: 0.04
diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3400288cde242fcf66eef7f63b5a9165ca663c5
--- /dev/null
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
@@ -0,0 +1,13 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+ # Train from random initialization.
+ WEIGHTS: ""
+ # It makes sense to divide by STD when training from scratch
+ # But it seems to make no difference on the results and C2's models didn't do this.
+ # So we keep things consistent with C2.
+ # PIXEL_STD: [57.375, 57.12, 58.395]
+ MASK_ON: True
+ BACKBONE:
+ FREEZE_AT: 0
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d90c9ff0ef4573252ee165b4c958ec5f74178176
--- /dev/null
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
+MODEL:
+ PIXEL_STD: [57.375, 57.12, 58.395]
+ WEIGHTS: ""
+ MASK_ON: True
+ RESNETS:
+ STRIDE_IN_1X1: False
+ BACKBONE:
+ FREEZE_AT: 0
+SOLVER:
+ # 9x schedule
+ IMS_PER_BATCH: 64 # 4x the standard
+ STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
+ MAX_ITER: 202500 # 90k * 9 / 4
+ BASE_LR: 0.08
+TEST:
+ EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60d4e42330e396a1901437df8e17b262d5ad547a
--- /dev/null
+++ b/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
@@ -0,0 +1,19 @@
+_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
+MODEL:
+ PIXEL_STD: [57.375, 57.12, 58.395]
+ WEIGHTS: ""
+ MASK_ON: True
+ RESNETS:
+ STRIDE_IN_1X1: False
+ BACKBONE:
+ FREEZE_AT: 0
+SOLVER:
+ # 9x schedule
+ IMS_PER_BATCH: 64 # 4x the standard
+ STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
+ MAX_ITER: 202500 # 90k * 9 / 4
+ BASE_LR: 0.08
+TEST:
+ EVAL_PERIOD: 2500
+# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
+# to learn what you need for training from scratch.
diff --git a/configs/Misc/semantic_R_50_FPN_1x.yaml b/configs/Misc/semantic_R_50_FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac256e1372770ab3d9ae522c962de0fd0dbceeb5
--- /dev/null
+++ b/configs/Misc/semantic_R_50_FPN_1x.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "SemanticSegmentor"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+DATASETS:
+ TRAIN: ("coco_2017_train_panoptic_stuffonly",)
+ TEST: ("coco_2017_val_panoptic_stuffonly",)
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
diff --git a/configs/Misc/torchvision_imagenet_R_50.py b/configs/Misc/torchvision_imagenet_R_50.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d75305bcf7445b98db84b3d489a1505d2fce5af
--- /dev/null
+++ b/configs/Misc/torchvision_imagenet_R_50.py
@@ -0,0 +1,150 @@
+"""
+An example config file to train a ImageNet classifier with detectron2.
+Model and dataloader both come from torchvision.
+This shows how to use detectron2 as a general engine for any new models and tasks.
+
+To run, use the following command:
+
+python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
+ --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
+
+"""
+
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from omegaconf import OmegaConf
+import torchvision
+from torchvision.transforms import transforms as T
+from torchvision.models.resnet import ResNet, Bottleneck
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from detectron2.solver import WarmupParamScheduler
+from detectron2.solver.build import get_default_optimizer_params
+from detectron2.config import LazyCall as L
+from detectron2.model_zoo import get_config
+from detectron2.data.samplers import TrainingSampler, InferenceSampler
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.utils import comm
+
+
+"""
+Note: Here we put reusable code (models, evaluation, data) together with configs just as a
+proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
+Writing code in configs offers extreme flexibility but is often not a good engineering practice.
+In practice, you might want to put code in your project and import them instead.
+"""
+
+
+def build_data_loader(dataset, batch_size, num_workers, training=True):
+ return torch.utils.data.DataLoader(
+ dataset,
+ sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
+ batch_size=batch_size,
+ num_workers=num_workers,
+ pin_memory=True,
+ )
+
+
+class ClassificationNet(nn.Module):
+ def __init__(self, model: nn.Module):
+ super().__init__()
+ self.model = model
+
+ @property
+ def device(self):
+ return list(self.model.parameters())[0].device
+
+ def forward(self, inputs):
+ image, label = inputs
+ pred = self.model(image.to(self.device))
+ if self.training:
+ label = label.to(self.device)
+ return F.cross_entropy(pred, label)
+ else:
+ return pred
+
+
+class ClassificationAcc(DatasetEvaluator):
+ def reset(self):
+ self.corr = self.total = 0
+
+ def process(self, inputs, outputs):
+ image, label = inputs
+ self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
+ self.total += len(label)
+
+ def evaluate(self):
+ all_corr_total = comm.all_gather([self.corr, self.total])
+ corr = sum(x[0] for x in all_corr_total)
+ total = sum(x[1] for x in all_corr_total)
+ return {"accuracy": corr / total}
+
+
+# --- End of code that could be in a project and be imported
+
+
+dataloader = OmegaConf.create()
+dataloader.train = L(build_data_loader)(
+ dataset=L(torchvision.datasets.ImageNet)(
+ root="/path/to/imagenet",
+ split="train",
+ transform=L(T.Compose)(
+ transforms=[
+ L(T.RandomResizedCrop)(size=224),
+ L(T.RandomHorizontalFlip)(),
+ T.ToTensor(),
+ L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+ ]
+ ),
+ ),
+ batch_size=256 // 8,
+ num_workers=4,
+ training=True,
+)
+
+dataloader.test = L(build_data_loader)(
+ dataset=L(torchvision.datasets.ImageNet)(
+ root="${...train.dataset.root}",
+ split="val",
+ transform=L(T.Compose)(
+ transforms=[
+ L(T.Resize)(size=256),
+ L(T.CenterCrop)(size=224),
+ T.ToTensor(),
+ L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+ ]
+ ),
+ ),
+ batch_size=256 // 8,
+ num_workers=4,
+ training=False,
+)
+
+dataloader.evaluator = L(ClassificationAcc)()
+
+model = L(ClassificationNet)(
+ model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
+)
+
+
+optimizer = L(torch.optim.SGD)(
+ params=L(get_default_optimizer_params)(),
+ lr=0.1,
+ momentum=0.9,
+ weight_decay=1e-4,
+)
+
+lr_multiplier = L(WarmupParamScheduler)(
+ scheduler=L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
+ ),
+ warmup_length=1 / 100,
+ warmup_factor=0.1,
+)
+
+
+train = get_config("common/train.py").train
+train.init_checkpoint = None
+train.max_iter = 100 * 1281167 // 256
diff --git a/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea2a6baaebd1a186db18f2904430ffb25901898e
--- /dev/null
+++ b/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 20
+INPUT:
+ MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+ MIN_SIZE_TEST: 800
+DATASETS:
+ TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+ TEST: ('voc_2007_test',)
+SOLVER:
+ STEPS: (12000, 16000)
+ MAX_ITER: 18000 # 17.4 epochs
+ WARMUP_ITERS: 100
diff --git a/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e554cab18a358a27b630c1ab0c2359666b0e1514
--- /dev/null
+++ b/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ NUM_CLASSES: 20
+INPUT:
+ MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+ MIN_SIZE_TEST: 800
+DATASETS:
+ TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+ TEST: ('voc_2007_test',)
+SOLVER:
+ STEPS: (12000, 16000)
+ MAX_ITER: 18000 # 17.4 epochs
+ WARMUP_ITERS: 100
diff --git a/configs/common/README.md b/configs/common/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..912cc29927542bfe4258d3208cf52d73cb0ea477
--- /dev/null
+++ b/configs/common/README.md
@@ -0,0 +1,6 @@
+This directory provides definitions for a few common models, dataloaders, scheduler,
+and optimizers that are often used in training.
+The definition of these objects are provided in the form of lazy instantiation:
+their arguments can be edited by users before constructing the objects.
+
+They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.
diff --git a/configs/common/coco_schedule.py b/configs/common/coco_schedule.py
new file mode 100644
index 0000000000000000000000000000000000000000..355e66a1d213cb599a7ffe55089d854089c8ead2
--- /dev/null
+++ b/configs/common/coco_schedule.py
@@ -0,0 +1,47 @@
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from detectron2.config import LazyCall as L
+from detectron2.solver import WarmupParamScheduler
+
+
+def default_X_scheduler(num_X):
+ """
+ Returns the config for a default multi-step LR scheduler such as "1x", "3x",
+ commonly referred to in papers, where every 1x has the total length of 1440k
+ training images (~12 COCO epochs). LR is decayed twice at the end of training
+ following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
+
+ Args:
+ num_X: a positive real number
+
+ Returns:
+ DictConfig: configs that define the multiplier for LR during training
+ """
+ # total number of iterations assuming 16 batch size, using 1440000/16=90000
+ total_steps_16bs = num_X * 90000
+
+ if num_X <= 2:
+ scheduler = L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01],
+ # note that scheduler is scale-invariant. This is equivalent to
+ # milestones=[6, 8, 9]
+ milestones=[60000, 80000, 90000],
+ )
+ else:
+ scheduler = L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01],
+ milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
+ )
+ return L(WarmupParamScheduler)(
+ scheduler=scheduler,
+ warmup_length=1000 / total_steps_16bs,
+ warmup_method="linear",
+ warmup_factor=0.001,
+ )
+
+
+lr_multiplier_1x = default_X_scheduler(1)
+lr_multiplier_2x = default_X_scheduler(2)
+lr_multiplier_3x = default_X_scheduler(3)
+lr_multiplier_6x = default_X_scheduler(6)
+lr_multiplier_9x = default_X_scheduler(9)
diff --git a/configs/common/data/coco.py b/configs/common/data/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..703c4385c7ddc7eb0759c98d102ab2384d6a9e3e
--- /dev/null
+++ b/configs/common/data/coco.py
@@ -0,0 +1,48 @@
+from omegaconf import OmegaConf
+
+import detectron2.data.transforms as T
+from detectron2.config import LazyCall as L
+from detectron2.data import (
+ DatasetMapper,
+ build_detection_test_loader,
+ build_detection_train_loader,
+ get_detection_dataset_dicts,
+)
+from detectron2.evaluation import COCOEvaluator
+
+dataloader = OmegaConf.create()
+
+dataloader.train = L(build_detection_train_loader)(
+ dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
+ mapper=L(DatasetMapper)(
+ is_train=True,
+ augmentations=[
+ L(T.ResizeShortestEdge)(
+ short_edge_length=(640, 672, 704, 736, 768, 800),
+ sample_style="choice",
+ max_size=1333,
+ ),
+ L(T.RandomFlip)(horizontal=True),
+ ],
+ image_format="BGR",
+ use_instance_mask=True,
+ ),
+ total_batch_size=16,
+ num_workers=4,
+)
+
+dataloader.test = L(build_detection_test_loader)(
+ dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
+ mapper=L(DatasetMapper)(
+ is_train=False,
+ augmentations=[
+ L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
+ ],
+ image_format="${...train.mapper.image_format}",
+ ),
+ num_workers=4,
+)
+
+dataloader.evaluator = L(COCOEvaluator)(
+ dataset_name="${..test.dataset.names}",
+)
diff --git a/configs/common/data/coco_keypoint.py b/configs/common/data/coco_keypoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4ceb066faf696954244205dc75376b767071217
--- /dev/null
+++ b/configs/common/data/coco_keypoint.py
@@ -0,0 +1,13 @@
+from detectron2.data.detection_utils import create_keypoint_hflip_indices
+
+from .coco import dataloader
+
+dataloader.train.dataset.min_keypoints = 1
+dataloader.train.dataset.names = "keypoints_coco_2017_train"
+dataloader.test.dataset.names = "keypoints_coco_2017_val"
+
+dataloader.train.mapper.update(
+ use_instance_mask=False,
+ use_keypoint=True,
+ keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
+)
diff --git a/configs/common/data/coco_panoptic_separated.py b/configs/common/data/coco_panoptic_separated.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ccbc77e64d1c92c99cbd7158d047bab54cb9f3d
--- /dev/null
+++ b/configs/common/data/coco_panoptic_separated.py
@@ -0,0 +1,26 @@
+from detectron2.config import LazyCall as L
+from detectron2.evaluation import (
+ COCOEvaluator,
+ COCOPanopticEvaluator,
+ DatasetEvaluators,
+ SemSegEvaluator,
+)
+
+from .coco import dataloader
+
+dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
+dataloader.train.dataset.filter_empty = False
+dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
+
+
+dataloader.evaluator = [
+ L(COCOEvaluator)(
+ dataset_name="${...test.dataset.names}",
+ ),
+ L(SemSegEvaluator)(
+ dataset_name="${...test.dataset.names}",
+ ),
+ L(COCOPanopticEvaluator)(
+ dataset_name="${...test.dataset.names}",
+ ),
+]
diff --git a/configs/common/models/cascade_rcnn.py b/configs/common/models/cascade_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7372a801dc00d7fec4db8cda8c2612ce281d48a
--- /dev/null
+++ b/configs/common/models/cascade_rcnn.py
@@ -0,0 +1,36 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
+
+from .mask_rcnn_fpn import model
+
+# arguments that don't exist for Cascade R-CNN
+[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
+
+model.roi_heads.update(
+ _target_=CascadeROIHeads,
+ box_heads=[
+ L(FastRCNNConvFCHead)(
+ input_shape=ShapeSpec(channels=256, height=7, width=7),
+ conv_dims=[],
+ fc_dims=[1024, 1024],
+ )
+ for k in range(3)
+ ],
+ box_predictors=[
+ L(FastRCNNOutputLayers)(
+ input_shape=ShapeSpec(channels=1024),
+ test_score_thresh=0.05,
+ box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
+ cls_agnostic_bbox_reg=True,
+ num_classes="${...num_classes}",
+ )
+ for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
+ ],
+ proposal_matchers=[
+ L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
+ for th in [0.5, 0.6, 0.7]
+ ],
+)
diff --git a/configs/common/models/keypoint_rcnn_fpn.py b/configs/common/models/keypoint_rcnn_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..56b3994df249884d4816fc9a5c7f553a9ab6f400
--- /dev/null
+++ b/configs/common/models/keypoint_rcnn_fpn.py
@@ -0,0 +1,33 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
+
+from .mask_rcnn_fpn import model
+
+[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
+
+model.roi_heads.update(
+ num_classes=1,
+ keypoint_in_features=["p2", "p3", "p4", "p5"],
+ keypoint_pooler=L(ROIPooler)(
+ output_size=14,
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+ sampling_ratio=0,
+ pooler_type="ROIAlignV2",
+ ),
+ keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
+ input_shape=ShapeSpec(channels=256, width=14, height=14),
+ num_keypoints=17,
+ conv_dims=[512] * 8,
+ loss_normalizer="visible",
+ ),
+)
+
+# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
+# 1000 proposals per-image is found to hurt box AP.
+# Therefore we increase it to 1500 per-image.
+model.proposal_generator.post_nms_topk = (1500, 1000)
+
+# Keypoint AP degrades (though box AP improves) when using plain L1 loss
+model.roi_heads.box_predictor.smooth_l1_beta = 0.5
diff --git a/configs/common/models/mask_rcnn_c4.py b/configs/common/models/mask_rcnn_c4.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3dcf8be42a39c6e5f6e76e3ab23adeccb33085d
--- /dev/null
+++ b/configs/common/models/mask_rcnn_c4.py
@@ -0,0 +1,88 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
+from detectron2.modeling.roi_heads import (
+ FastRCNNOutputLayers,
+ MaskRCNNConvUpsampleHead,
+ Res5ROIHeads,
+)
+
+model = L(GeneralizedRCNN)(
+ backbone=L(ResNet)(
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+ stages=L(ResNet.make_default_stages)(
+ depth=50,
+ stride_in_1x1=True,
+ norm="FrozenBN",
+ ),
+ out_features=["res4"],
+ ),
+ proposal_generator=L(RPN)(
+ in_features=["res4"],
+ head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
+ anchor_generator=L(DefaultAnchorGenerator)(
+ sizes=[[32, 64, 128, 256, 512]],
+ aspect_ratios=[0.5, 1.0, 2.0],
+ strides=[16],
+ offset=0.0,
+ ),
+ anchor_matcher=L(Matcher)(
+ thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
+ ),
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+ batch_size_per_image=256,
+ positive_fraction=0.5,
+ pre_nms_topk=(12000, 6000),
+ post_nms_topk=(2000, 1000),
+ nms_thresh=0.7,
+ ),
+ roi_heads=L(Res5ROIHeads)(
+ num_classes=80,
+ batch_size_per_image=512,
+ positive_fraction=0.25,
+ proposal_matcher=L(Matcher)(
+ thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
+ ),
+ in_features=["res4"],
+ pooler=L(ROIPooler)(
+ output_size=14,
+ scales=(1.0 / 16,),
+ sampling_ratio=0,
+ pooler_type="ROIAlignV2",
+ ),
+ res5=L(ResNet.make_stage)(
+ block_class=BottleneckBlock,
+ num_blocks=3,
+ stride_per_block=[2, 1, 1],
+ in_channels=1024,
+ bottleneck_channels=512,
+ out_channels=2048,
+ norm="FrozenBN",
+ stride_in_1x1=True,
+ ),
+ box_predictor=L(FastRCNNOutputLayers)(
+ input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
+ test_score_thresh=0.05,
+ box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
+ num_classes="${..num_classes}",
+ ),
+ mask_head=L(MaskRCNNConvUpsampleHead)(
+ input_shape=L(ShapeSpec)(
+ channels="${...res5.out_channels}",
+ width="${...pooler.output_size}",
+ height="${...pooler.output_size}",
+ ),
+ num_classes="${..num_classes}",
+ conv_dims=[256],
+ ),
+ ),
+ pixel_mean=[103.530, 116.280, 123.675],
+ pixel_std=[1.0, 1.0, 1.0],
+ input_format="BGR",
+)
diff --git a/configs/common/models/mask_rcnn_fpn.py b/configs/common/models/mask_rcnn_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f87d8da83d93932ddd5e9dc5b38d42786c0cbb4
--- /dev/null
+++ b/configs/common/models/mask_rcnn_fpn.py
@@ -0,0 +1,93 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import GeneralizedRCNN
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone.fpn import LastLevelMaxPool
+from detectron2.modeling.backbone import BasicStem, FPN, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
+from detectron2.modeling.roi_heads import (
+ StandardROIHeads,
+ FastRCNNOutputLayers,
+ MaskRCNNConvUpsampleHead,
+ FastRCNNConvFCHead,
+)
+
+model = L(GeneralizedRCNN)(
+ backbone=L(FPN)(
+ bottom_up=L(ResNet)(
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+ stages=L(ResNet.make_default_stages)(
+ depth=50,
+ stride_in_1x1=True,
+ norm="FrozenBN",
+ ),
+ out_features=["res2", "res3", "res4", "res5"],
+ ),
+ in_features="${.bottom_up.out_features}",
+ out_channels=256,
+ top_block=L(LastLevelMaxPool)(),
+ ),
+ proposal_generator=L(RPN)(
+ in_features=["p2", "p3", "p4", "p5", "p6"],
+ head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
+ anchor_generator=L(DefaultAnchorGenerator)(
+ sizes=[[32], [64], [128], [256], [512]],
+ aspect_ratios=[0.5, 1.0, 2.0],
+ strides=[4, 8, 16, 32, 64],
+ offset=0.0,
+ ),
+ anchor_matcher=L(Matcher)(
+ thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
+ ),
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+ batch_size_per_image=256,
+ positive_fraction=0.5,
+ pre_nms_topk=(2000, 1000),
+ post_nms_topk=(1000, 1000),
+ nms_thresh=0.7,
+ ),
+ roi_heads=L(StandardROIHeads)(
+ num_classes=80,
+ batch_size_per_image=512,
+ positive_fraction=0.25,
+ proposal_matcher=L(Matcher)(
+ thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
+ ),
+ box_in_features=["p2", "p3", "p4", "p5"],
+ box_pooler=L(ROIPooler)(
+ output_size=7,
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+ sampling_ratio=0,
+ pooler_type="ROIAlignV2",
+ ),
+ box_head=L(FastRCNNConvFCHead)(
+ input_shape=ShapeSpec(channels=256, height=7, width=7),
+ conv_dims=[],
+ fc_dims=[1024, 1024],
+ ),
+ box_predictor=L(FastRCNNOutputLayers)(
+ input_shape=ShapeSpec(channels=1024),
+ test_score_thresh=0.05,
+ box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
+ num_classes="${..num_classes}",
+ ),
+ mask_in_features=["p2", "p3", "p4", "p5"],
+ mask_pooler=L(ROIPooler)(
+ output_size=14, # ori is 14
+ scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
+ sampling_ratio=0,
+ pooler_type="ROIAlignV2",
+ ),
+ mask_head=L(MaskRCNNConvUpsampleHead)(
+ input_shape=ShapeSpec(channels=256, width=14, height=14),
+ num_classes="${..num_classes}",
+ conv_dims=[256, 256, 256, 256, 256],
+ ),
+ ),
+ pixel_mean=[103.530, 116.280, 123.675],
+ pixel_std=[1.0, 1.0, 1.0],
+ input_format="BGR",
+)
diff --git a/configs/common/models/panoptic_fpn.py b/configs/common/models/panoptic_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..88f55d2ce9db62e61445d6a3700067d9d864ecae
--- /dev/null
+++ b/configs/common/models/panoptic_fpn.py
@@ -0,0 +1,20 @@
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling import PanopticFPN
+from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
+
+from .mask_rcnn_fpn import model
+
+model._target_ = PanopticFPN
+model.sem_seg_head = L(SemSegFPNHead)(
+ input_shape={
+ f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
+ for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
+ },
+ ignore_value=255,
+ num_classes=54, # COCO stuff + 1
+ conv_dims=128,
+ common_stride=4,
+ loss_weight=0.5,
+ norm="GN",
+)
diff --git a/configs/common/models/retinanet.py b/configs/common/models/retinanet.py
new file mode 100644
index 0000000000000000000000000000000000000000..01d168fe6f054b88933488bdc65516424ce917cd
--- /dev/null
+++ b/configs/common/models/retinanet.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+from detectron2.config import LazyCall as L
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.meta_arch import RetinaNet
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
+from detectron2.modeling.backbone.fpn import LastLevelP6P7
+from detectron2.modeling.backbone import BasicStem, FPN, ResNet
+from detectron2.modeling.box_regression import Box2BoxTransform
+from detectron2.modeling.matcher import Matcher
+from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
+
+model = L(RetinaNet)(
+ backbone=L(FPN)(
+ bottom_up=L(ResNet)(
+ stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
+ stages=L(ResNet.make_default_stages)(
+ depth=50,
+ stride_in_1x1=True,
+ norm="FrozenBN",
+ ),
+ out_features=["res3", "res4", "res5"],
+ ),
+ in_features=["res3", "res4", "res5"],
+ out_channels=256,
+ top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
+ ),
+ head=L(RetinaNetHead)(
+ input_shape=[ShapeSpec(channels=256)],
+ num_classes="${..num_classes}",
+ conv_dims=[256, 256, 256, 256],
+ prior_prob=0.01,
+ num_anchors=9,
+ ),
+ anchor_generator=L(DefaultAnchorGenerator)(
+ sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
+ aspect_ratios=[0.5, 1.0, 2.0],
+ strides=[8, 16, 32, 64, 128],
+ offset=0.0,
+ ),
+ box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
+ anchor_matcher=L(Matcher)(
+ thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
+ ),
+ num_classes=80,
+ head_in_features=["p3", "p4", "p5", "p6", "p7"],
+ focal_loss_alpha=0.25,
+ focal_loss_gamma=2.0,
+ pixel_mean=[103.530, 116.280, 123.675],
+ pixel_std=[1.0, 1.0, 1.0],
+ input_format="BGR",
+)
diff --git a/configs/common/optim.py b/configs/common/optim.py
new file mode 100644
index 0000000000000000000000000000000000000000..d39d3aaa546c17e831d21d1758b69e8c1609415e
--- /dev/null
+++ b/configs/common/optim.py
@@ -0,0 +1,15 @@
+import torch
+
+from detectron2.config import LazyCall as L
+from detectron2.solver.build import get_default_optimizer_params
+
+SGD = L(torch.optim.SGD)(
+ params=L(get_default_optimizer_params)(
+ # params.model is meant to be set to the model object, before instantiating
+ # the optimizer.
+ weight_decay_norm=0.0
+ ),
+ lr=0.02,
+ momentum=0.9,
+ weight_decay=1e-4,
+)
diff --git a/configs/common/train.py b/configs/common/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c63bdb073797e48e0b3640e668ecc1d5c137d59
--- /dev/null
+++ b/configs/common/train.py
@@ -0,0 +1,18 @@
+# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
+# You can use your own instead, together with your own train_net.py
+train = dict(
+ output_dir="./output",
+ init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
+ max_iter=90000,
+ amp=dict(enabled=False), # options for Automatic Mixed Precision
+ ddp=dict( # options for DistributedDataParallel
+ broadcast_buffers=False,
+ find_unused_parameters=False,
+ fp16_compression=False,
+ ),
+ checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer
+ eval_period=5000,
+ log_period=20,
+ device="cuda"
+ # ...
+)
diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..3740e9bb08c5f168a9ab3a6d94561678bad1775c
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py
@@ -0,0 +1,9 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+model.backbone.bottom_up.stages.depth = 101
diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..18e5f0720c568db4ef0c97b59688b5e7866df606
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_101_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 2 # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..63c54ee9a5ce2368494b775cc90fada1439feaa5
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_101_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 4 # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..df7a2aedf480ed8dc4aa3645e37420e9b893fae4
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py
@@ -0,0 +1,72 @@
+import detectron2.data.transforms as T
+from detectron2.config.lazy import LazyCall as L
+from detectron2.layers.batch_norm import NaiveSyncBatchNorm
+from detectron2.solver import WarmupParamScheduler
+from fvcore.common.param_scheduler import MultiStepParamScheduler
+
+from ..common.data.coco import dataloader
+from ..common.models.mask_rcnn_fpn import model
+from ..common.optim import SGD as optimizer
+from ..common.train import train
+
+# train from scratch
+train.init_checkpoint = ""
+train.amp.enabled = True
+train.ddp.fp16_compression = True
+model.backbone.bottom_up.freeze_at = 0
+
+# SyncBN
+# fmt: off
+model.backbone.bottom_up.stem.norm = \
+ model.backbone.bottom_up.stages.norm = \
+ model.backbone.norm = "SyncBN"
+
+# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by
+# torch.nn.SyncBatchNorm. We can remove this after
+# https://github.com/pytorch/pytorch/issues/36530 is fixed.
+model.roi_heads.box_head.conv_norm = \
+ model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c,
+ stats_mode="N")
+# fmt: on
+
+# 2conv in RPN:
+# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950
+model.proposal_generator.head.conv_dims = [-1, -1]
+
+# 4conv1fc box head
+model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
+model.roi_heads.box_head.fc_dims = [1024]
+
+# resize_and_crop_image in:
+# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950
+image_size = 1024
+dataloader.train.mapper.augmentations = [
+ L(T.ResizeScale)(
+ min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size
+ ),
+ L(T.FixedSizeCrop)(crop_size=(image_size, image_size)),
+ L(T.RandomFlip)(horizontal=True),
+]
+
+# recompute boxes due to cropping
+dataloader.train.mapper.recompute_boxes = True
+
+# larger batch-size.
+dataloader.train.total_batch_size = 64
+
+# Equivalent to 100 epochs.
+# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep
+train.max_iter = 184375
+
+lr_multiplier = L(WarmupParamScheduler)(
+ scheduler=L(MultiStepParamScheduler)(
+ values=[1.0, 0.1, 0.01],
+ milestones=[163889, 177546],
+ num_updates=train.max_iter,
+ ),
+ warmup_length=500 / train.max_iter,
+ warmup_factor=0.067,
+)
+
+optimizer.lr = 0.1
+optimizer.weight_decay = 4e-5
diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a7c376da5f9269197c44079f3e0f3b09cdc63fa
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 2 # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..97586b8f5330a9d995a0bffd1f5e7bd5b5656462
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 4 # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py b/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ca1ede262cf5c37a3a54778458c74aff1479411
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter //= 2 # 100ep -> 50ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone // 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..249387fffeed7c02f592ecc84ee5a295533b1ed7
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py
@@ -0,0 +1,29 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+# Config source:
+# https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py # noqa
+model.backbone.bottom_up = L(RegNet)(
+ stem_class=SimpleStem,
+ stem_width=32,
+ block_class=ResBottleneckBlock,
+ depth=23,
+ w_a=38.65,
+ w_0=96,
+ w_m=2.43,
+ group_width=40,
+ norm="SyncBN",
+ out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..731320e74ebed4d8ceec58c07cb906542b8b021b
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 2 # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f369a2afedb6c6e69fd52ff9a9a6b1cdf965937
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 4 # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..da94e6f90d823f110e4a2373d7fd16b3d1ab5ac3
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py
@@ -0,0 +1,30 @@
+from .mask_rcnn_R_50_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+from detectron2.config import LazyCall as L
+from detectron2.modeling.backbone import RegNet
+from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
+
+# Config source:
+# https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py # noqa
+model.backbone.bottom_up = L(RegNet)(
+ stem_class=SimpleStem,
+ stem_width=32,
+ block_class=ResBottleneckBlock,
+ depth=22,
+ w_a=31.41,
+ w_0=96,
+ w_m=2.24,
+ group_width=64,
+ se_ratio=0.25,
+ norm="SyncBN",
+ out_features=["s1", "s2", "s3", "s4"],
+)
+model.pixel_std = [57.375, 57.120, 58.395]
+
+# RegNets benefit from enabling cudnn benchmark mode
+train.cudnn_benchmark = True
diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..b867cc865e5ac4d7b70221da141894efd7cbd75c
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 2 # 100ep -> 200ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 2 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b86ea8c6c5c48f5d26c9e0df7cf96e745b17b34
--- /dev/null
+++ b/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py
@@ -0,0 +1,14 @@
+from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import (
+ dataloader,
+ lr_multiplier,
+ model,
+ optimizer,
+ train,
+)
+
+train.max_iter *= 4 # 100ep -> 400ep
+
+lr_multiplier.scheduler.milestones = [
+ milestone * 4 for milestone in lr_multiplier.scheduler.milestones
+]
+lr_multiplier.scheduler.num_updates = train.max_iter
diff --git a/configs/quick_schedules/README.md b/configs/quick_schedules/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4e6c82ef3f75a73c7006f33d7c850a0d4781a58f
--- /dev/null
+++ b/configs/quick_schedules/README.md
@@ -0,0 +1,8 @@
+These are quick configs for performance or accuracy regression tracking purposes.
+
+* `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can
+ successfully finish. They are not expected to produce reasonable training results.
+* `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify
+ the results are as expected.
+* `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy
+ is within the normal range.
diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc5a4116cb096278823049c1f823e99f8e16e97e
--- /dev/null
+++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]]
diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e41a0fe7ffe9c3531741df49e546aa45cfe4fdee
--- /dev/null
+++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2f37e5e2cc2a9e195e13703e9930e67e0f9a896
--- /dev/null
+++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c
--- /dev/null
+++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,15 @@
+_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+ TEST: ("coco_2017_val_100",)
+ PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14cf2aa82aec52ad44e28ead0665dad811d55457
--- /dev/null
+++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
+DATASETS:
+ TEST: ("keypoints_coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3dd209f693bd0bfdd46a2c9e7e750dede3abc141
--- /dev/null
+++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,16 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ KEYPOINT_ON: True
+ ROI_HEADS:
+ NUM_CLASSES: 1
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_val_100",)
+ TEST: ("keypoints_coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b92392f1c4457033ae4c87a521e339fe9e184ce
--- /dev/null
+++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
@@ -0,0 +1,30 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ KEYPOINT_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ BATCH_SIZE_PER_IMAGE: 256
+ NUM_CLASSES: 1
+ ROI_KEYPOINT_HEAD:
+ POOLER_RESOLUTION: 14
+ POOLER_SAMPLING_RATIO: 2
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
+ LOSS_WEIGHT: 4.0
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss
+ RPN:
+ SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_val",)
+ TEST: ("keypoints_coco_2017_val",)
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+ WARMUP_FACTOR: 0.33333333
+ WARMUP_ITERS: 100
+ STEPS: (5500, 5800)
+ MAX_ITER: 6000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bd962878fea64035887c48981beeb8d41bfdbd0
--- /dev/null
+++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,28 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ KEYPOINT_ON: True
+ RESNETS:
+ DEPTH: 50
+ ROI_HEADS:
+ BATCH_SIZE_PER_IMAGE: 256
+ NUM_CLASSES: 1
+ ROI_KEYPOINT_HEAD:
+ POOLER_RESOLUTION: 14
+ POOLER_SAMPLING_RATIO: 2
+ ROI_BOX_HEAD:
+ SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss
+ RPN:
+ SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss
+DATASETS:
+ TRAIN: ("keypoints_coco_2017_val",)
+ TEST: ("keypoints_coco_2017_val",)
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+ WARMUP_FACTOR: 0.33333333
+ WARMUP_ITERS: 100
+ STEPS: (5500, 5800)
+ MAX_ITER: 6000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab6e69812b94ea7e071f29d9a6937d5c70805b5b
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.001
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+ CLIP_GRADIENTS:
+ ENABLED: True
+ CLIP_TYPE: "value"
+ CLIP_VALUE: 1.0
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2d5b7ff87e069f8c774a230bdfd47b8c12d18a3
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c4f1214efa520944fd941daec082ad45c164a23
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.001
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f68dd8f96c7896b5fc95d694a399f2ce417c1deb
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml
@@ -0,0 +1,22 @@
+_BASE_: "../Base-RCNN-C4.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ ROI_HEADS:
+ BATCH_SIZE_PER_IMAGE: 256
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val",)
+ TEST: ("coco_2017_val",)
+INPUT:
+ MIN_SIZE_TRAIN: (600,)
+ MAX_SIZE_TRAIN: 1000
+ MIN_SIZE_TEST: 800
+ MAX_SIZE_TEST: 1000
+SOLVER:
+ IMS_PER_BATCH: 8 # base uses 16
+ WARMUP_FACTOR: 0.33333
+ WARMUP_ITERS: 100
+ STEPS: (11000, 11600)
+ MAX_ITER: 12000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3ce6cf922ae07fba5b5e01edbac19bf58a8e9dd
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e5454bfd95cc37749c50aec7866f32d9a80ca2b7
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
+ AUG:
+ ENABLED: True
+ MIN_SIZES: (700, 800) # to save some time
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6dbfcde0bf837990634d419a6dda1e2909c3cd7f
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52f78762bda23331c97afd523cf98a5c118b113e
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
@@ -0,0 +1,6 @@
+_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml"
+MODEL:
+ ROI_BOX_HEAD:
+ TRAIN_ON_PRED_BOXES: True
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aadae4ce898761e1e40e5af65a9e5ea01053b936
--- /dev/null
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,21 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ ROI_HEADS:
+ BATCH_SIZE_PER_IMAGE: 256
+ MASK_ON: True
+DATASETS:
+ TRAIN: ("coco_2017_val",)
+ TEST: ("coco_2017_val",)
+INPUT:
+ MIN_SIZE_TRAIN: (600,)
+ MAX_SIZE_TRAIN: 1000
+ MIN_SIZE_TEST: 800
+ MAX_SIZE_TEST: 1000
+SOLVER:
+ WARMUP_FACTOR: 0.3333333
+ WARMUP_ITERS: 100
+ STEPS: (5500, 5800)
+ MAX_ITER: 6000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]]
diff --git a/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70874e3a92c9034d75cbbebb145b61084ba15e42
--- /dev/null
+++ b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100_panoptic_separated",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
diff --git a/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed
--- /dev/null
+++ b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "PanopticFPN"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ SEM_SEG_HEAD:
+ LOSS_WEIGHT: 0.5
+DATASETS:
+ TRAIN: ("coco_2017_val_100_panoptic_separated",)
+ TEST: ("coco_2017_val_100_panoptic_separated",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 1
diff --git a/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3bbf30196cb35434340d4c343cab0c96283cd4f
--- /dev/null
+++ b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "PanopticFPN"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: True
+ RESNETS:
+ DEPTH: 50
+ SEM_SEG_HEAD:
+ LOSS_WEIGHT: 0.5
+DATASETS:
+ TRAIN: ("coco_2017_val_panoptic_separated",)
+ TEST: ("coco_2017_val_panoptic_separated",)
+SOLVER:
+ BASE_LR: 0.01
+ WARMUP_FACTOR: 0.001
+ WARMUP_ITERS: 500
+ STEPS: (5500,)
+ MAX_ITER: 7000
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
diff --git a/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb666c1a6b3e351227046bc9c2af8799408858e8
--- /dev/null
+++ b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
diff --git a/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d95c1f614296716374686b22055a587ccd052b9
--- /dev/null
+++ b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7c3f908a9e80e98b2d25b6d384a60acaba9d4f8
--- /dev/null
+++ b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
+DATASETS:
+ TEST: ("coco_2017_val_100",)
+TEST:
+ EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
diff --git a/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..402d432477507dc36f04c4a9777cb80fe06b2809
--- /dev/null
+++ b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+ TRAIN: ("coco_2017_val_100",)
+ TEST: ("coco_2017_val_100",)
+SOLVER:
+ STEPS: (30,)
+ MAX_ITER: 40
+ BASE_LR: 0.005
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bca74987d5218736983617883e0fe37f79d219b7
--- /dev/null
+++ b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,10 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "SemanticSegmentor"
+ WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
+ RESNETS:
+ DEPTH: 50
+DATASETS:
+ TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+TEST:
+ EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
diff --git a/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14ab606f219b462fe37fcc7d5fbdbe65cb5c2642
--- /dev/null
+++ b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "SemanticSegmentor"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+DATASETS:
+ TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
+ TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+ BASE_LR: 0.005
+ STEPS: (30,)
+ MAX_ITER: 40
+ IMS_PER_BATCH: 4
+DATALOADER:
+ NUM_WORKERS: 2
diff --git a/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f78d775889b11e9e76743de5ddb8139198edf61
--- /dev/null
+++ b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,20 @@
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+ META_ARCHITECTURE: "SemanticSegmentor"
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+DATASETS:
+ TRAIN: ("coco_2017_val_panoptic_stuffonly",)
+ TEST: ("coco_2017_val_panoptic_stuffonly",)
+SOLVER:
+ BASE_LR: 0.01
+ WARMUP_FACTOR: 0.001
+ WARMUP_ITERS: 300
+ STEPS: (5500,)
+ MAX_ITER: 7000
+TEST:
+ EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
+INPUT:
+ # no scale augmentation
+ MIN_SIZE_TRAIN: (800, )
diff --git a/demo/README.md b/demo/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f11ad3eb72953a7bc05d5e333fca4a62ab633b9c
--- /dev/null
+++ b/demo/README.md
@@ -0,0 +1,5 @@
+
+## Mask Transfiner Demo
+
+For visualization demo, please refer to our [visualization script](https://github.com/SysCV/transfiner#visualization).
+
diff --git a/demo/__pycache__/predictor.cpython-38.pyc b/demo/__pycache__/predictor.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5fed9aa41681040202f3708d1122e5240091b8a9
Binary files /dev/null and b/demo/__pycache__/predictor.cpython-38.pyc differ
diff --git a/demo/demo.py b/demo/demo.py
new file mode 100755
index 0000000000000000000000000000000000000000..a14dfb94c998bd3bfb650004a6fe1a23bf17eda3
--- /dev/null
+++ b/demo/demo.py
@@ -0,0 +1,190 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import argparse
+import glob
+import multiprocessing as mp
+import numpy as np
+import os
+import tempfile
+import time
+import warnings
+import cv2
+import tqdm
+
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.utils.logger import setup_logger
+
+from predictor import VisualizationDemo
+
+# constants
+WINDOW_NAME = "COCO detections"
+
+
+def setup_cfg(args):
+ # load config from file and command-line arguments
+ cfg = get_cfg()
+ # To use demo for Panoptic-DeepLab, please uncomment the following two lines.
+ # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config # noqa
+ # add_panoptic_deeplab_config(cfg)
+ cfg.merge_from_file(args.config_file)
+ cfg.merge_from_list(args.opts)
+ # Set score_threshold for builtin models
+ cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
+ cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
+ cfg.freeze()
+ return cfg
+
+
+def get_parser():
+ parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
+ parser.add_argument(
+ "--config-file",
+ default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
+ metavar="FILE",
+ help="path to config file",
+ )
+ parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
+ parser.add_argument("--video-input", help="Path to video file.")
+ parser.add_argument(
+ "--input",
+ nargs="+",
+ help="A list of space separated input images; "
+ "or a single glob pattern such as 'directory/*.jpg'",
+ )
+ parser.add_argument(
+ "--output",
+ help="A file or directory to save output visualizations. "
+ "If not given, will show output in an OpenCV window.",
+ )
+
+ parser.add_argument(
+ "--confidence-threshold",
+ type=float,
+ default=0.5,
+ help="Minimum score for instance predictions to be shown",
+ )
+ parser.add_argument(
+ "--opts",
+ help="Modify config options using the command-line 'KEY VALUE' pairs",
+ default=[],
+ nargs=argparse.REMAINDER,
+ )
+ return parser
+
+
+def test_opencv_video_format(codec, file_ext):
+ with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
+ filename = os.path.join(dir, "test_file" + file_ext)
+ writer = cv2.VideoWriter(
+ filename=filename,
+ fourcc=cv2.VideoWriter_fourcc(*codec),
+ fps=float(30),
+ frameSize=(10, 10),
+ isColor=True,
+ )
+ [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
+ writer.release()
+ if os.path.isfile(filename):
+ return True
+ return False
+
+
+if __name__ == "__main__":
+ mp.set_start_method("spawn", force=True)
+ args = get_parser().parse_args()
+ setup_logger(name="fvcore")
+ logger = setup_logger()
+ logger.info("Arguments: " + str(args))
+
+ cfg = setup_cfg(args)
+
+ demo = VisualizationDemo(cfg)
+
+ if args.input:
+ if len(args.input) == 1:
+ args.input = glob.glob(os.path.expanduser(args.input[0]))
+ assert args.input, "The input path(s) was not found"
+ for path in tqdm.tqdm(args.input, disable=not args.output):
+ # use PIL, to be consistent with evaluation
+ img = read_image(path, format="BGR")
+ start_time = time.time()
+ predictions, visualized_output = demo.run_on_image(img)
+ logger.info(
+ "{}: {} in {:.2f}s".format(
+ path,
+ "detected {} instances".format(len(predictions["instances"]))
+ if "instances" in predictions
+ else "finished",
+ time.time() - start_time,
+ )
+ )
+
+ if args.output:
+ if os.path.isdir(args.output):
+ assert os.path.isdir(args.output), args.output
+ out_filename = os.path.join(args.output, os.path.basename(path))
+ else:
+ #assert len(args.input) == 1, "Please specify a directory with args.output"
+ os.makedirs(args.output)
+ out_filename = os.path.join(args.output, os.path.basename(path))
+ #out_filename = args.output
+ visualized_output.save(out_filename)
+ else:
+ cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+ cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
+ if cv2.waitKey(0) == 27:
+ break # esc to quit
+ elif args.webcam:
+ assert args.input is None, "Cannot have both --input and --webcam!"
+ assert args.output is None, "output not yet supported with --webcam!"
+ cam = cv2.VideoCapture(0)
+ for vis in tqdm.tqdm(demo.run_on_video(cam)):
+ cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+ cv2.imshow(WINDOW_NAME, vis)
+ if cv2.waitKey(1) == 27:
+ break # esc to quit
+ cam.release()
+ cv2.destroyAllWindows()
+ elif args.video_input:
+ video = cv2.VideoCapture(args.video_input)
+ width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ frames_per_second = video.get(cv2.CAP_PROP_FPS)
+ num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+ basename = os.path.basename(args.video_input)
+ codec, file_ext = (
+ ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
+ )
+ if codec == ".mp4v":
+ warnings.warn("x264 codec not available, switching to mp4v")
+ if args.output:
+ if os.path.isdir(args.output):
+ output_fname = os.path.join(args.output, basename)
+ output_fname = os.path.splitext(output_fname)[0] + file_ext
+ else:
+ output_fname = args.output
+ assert not os.path.isfile(output_fname), output_fname
+ output_file = cv2.VideoWriter(
+ filename=output_fname,
+ # some installation of opencv may not support x264 (due to its license),
+ # you can try other format (e.g. MPEG)
+ fourcc=cv2.VideoWriter_fourcc(*codec),
+ fps=float(frames_per_second),
+ frameSize=(width, height),
+ isColor=True,
+ )
+ assert os.path.isfile(args.video_input)
+ for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
+ if args.output:
+ output_file.write(vis_frame)
+ else:
+ cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
+ cv2.imshow(basename, vis_frame)
+ if cv2.waitKey(1) == 27:
+ break # esc to quit
+ video.release()
+ if args.output:
+ output_file.release()
+ else:
+ cv2.destroyAllWindows()
diff --git a/demo/predictor.py b/demo/predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b7ebd3f846850172c1f560f8492d51e5667f76d
--- /dev/null
+++ b/demo/predictor.py
@@ -0,0 +1,220 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import atexit
+import bisect
+import multiprocessing as mp
+from collections import deque
+import cv2
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.engine.defaults import DefaultPredictor
+from detectron2.utils.video_visualizer import VideoVisualizer
+from detectron2.utils.visualizer import ColorMode, Visualizer
+
+
+class VisualizationDemo(object):
+ def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
+ """
+ Args:
+ cfg (CfgNode):
+ instance_mode (ColorMode):
+ parallel (bool): whether to run the model in different processes from visualization.
+ Useful since the visualization logic can be slow.
+ """
+ self.metadata = MetadataCatalog.get(
+ cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
+ )
+ self.cpu_device = torch.device("cpu")
+ self.instance_mode = instance_mode
+
+ self.parallel = parallel
+ if parallel:
+ num_gpu = torch.cuda.device_count()
+ self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
+ else:
+ self.predictor = DefaultPredictor(cfg)
+
+ def run_on_image(self, image):
+ """
+ Args:
+ image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+ This is the format used by OpenCV.
+
+ Returns:
+ predictions (dict): the output of the model.
+ vis_output (VisImage): the visualized image output.
+ """
+ vis_output = None
+ predictions = self.predictor(image)
+ # Convert image from OpenCV BGR format to Matplotlib RGB format.
+ image = image[:, :, ::-1]
+ visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
+ if "panoptic_seg" in predictions:
+ panoptic_seg, segments_info = predictions["panoptic_seg"]
+ vis_output = visualizer.draw_panoptic_seg_predictions(
+ panoptic_seg.to(self.cpu_device), segments_info
+ )
+ else:
+ if "sem_seg" in predictions:
+ vis_output = visualizer.draw_sem_seg(
+ predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+ )
+ if "instances" in predictions:
+ instances = predictions["instances"].to(self.cpu_device)
+ vis_output = visualizer.draw_instance_predictions(predictions=instances)
+
+ return predictions, vis_output
+
+ def _frame_from_video(self, video):
+ while video.isOpened():
+ success, frame = video.read()
+ if success:
+ yield frame
+ else:
+ break
+
+ def run_on_video(self, video):
+ """
+ Visualizes predictions on frames of the input video.
+
+ Args:
+ video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
+ either a webcam or a video file.
+
+ Yields:
+ ndarray: BGR visualizations of each video frame.
+ """
+ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
+
+ def process_predictions(frame, predictions):
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+ if "panoptic_seg" in predictions:
+ panoptic_seg, segments_info = predictions["panoptic_seg"]
+ vis_frame = video_visualizer.draw_panoptic_seg_predictions(
+ frame, panoptic_seg.to(self.cpu_device), segments_info
+ )
+ elif "instances" in predictions:
+ predictions = predictions["instances"].to(self.cpu_device)
+ vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
+ elif "sem_seg" in predictions:
+ vis_frame = video_visualizer.draw_sem_seg(
+ frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+ )
+
+ # Converts Matplotlib RGB format to OpenCV BGR format
+ vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
+ return vis_frame
+
+ frame_gen = self._frame_from_video(video)
+ if self.parallel:
+ buffer_size = self.predictor.default_buffer_size
+
+ frame_data = deque()
+
+ for cnt, frame in enumerate(frame_gen):
+ frame_data.append(frame)
+ self.predictor.put(frame)
+
+ if cnt >= buffer_size:
+ frame = frame_data.popleft()
+ predictions = self.predictor.get()
+ yield process_predictions(frame, predictions)
+
+ while len(frame_data):
+ frame = frame_data.popleft()
+ predictions = self.predictor.get()
+ yield process_predictions(frame, predictions)
+ else:
+ for frame in frame_gen:
+ yield process_predictions(frame, self.predictor(frame))
+
+
+class AsyncPredictor:
+ """
+ A predictor that runs the model asynchronously, possibly on >1 GPUs.
+ Because rendering the visualization takes considerably amount of time,
+ this helps improve throughput a little bit when rendering videos.
+ """
+
+ class _StopToken:
+ pass
+
+ class _PredictWorker(mp.Process):
+ def __init__(self, cfg, task_queue, result_queue):
+ self.cfg = cfg
+ self.task_queue = task_queue
+ self.result_queue = result_queue
+ super().__init__()
+
+ def run(self):
+ predictor = DefaultPredictor(self.cfg)
+
+ while True:
+ task = self.task_queue.get()
+ if isinstance(task, AsyncPredictor._StopToken):
+ break
+ idx, data = task
+ result = predictor(data)
+ self.result_queue.put((idx, result))
+
+ def __init__(self, cfg, num_gpus: int = 1):
+ """
+ Args:
+ cfg (CfgNode):
+ num_gpus (int): if 0, will run on CPU
+ """
+ num_workers = max(num_gpus, 1)
+ self.task_queue = mp.Queue(maxsize=num_workers * 3)
+ self.result_queue = mp.Queue(maxsize=num_workers * 3)
+ self.procs = []
+ for gpuid in range(max(num_gpus, 1)):
+ cfg = cfg.clone()
+ cfg.defrost()
+ cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
+ self.procs.append(
+ AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
+ )
+
+ self.put_idx = 0
+ self.get_idx = 0
+ self.result_rank = []
+ self.result_data = []
+
+ for p in self.procs:
+ p.start()
+ atexit.register(self.shutdown)
+
+ def put(self, image):
+ self.put_idx += 1
+ self.task_queue.put((self.put_idx, image))
+
+ def get(self):
+ self.get_idx += 1 # the index needed for this request
+ if len(self.result_rank) and self.result_rank[0] == self.get_idx:
+ res = self.result_data[0]
+ del self.result_data[0], self.result_rank[0]
+ return res
+
+ while True:
+ # make sure the results are returned in the correct order
+ idx, res = self.result_queue.get()
+ if idx == self.get_idx:
+ return res
+ insert = bisect.bisect(self.result_rank, idx)
+ self.result_rank.insert(insert, idx)
+ self.result_data.insert(insert, res)
+
+ def __len__(self):
+ return self.put_idx - self.get_idx
+
+ def __call__(self, image):
+ self.put(image)
+ return self.get()
+
+ def shutdown(self):
+ for _ in self.procs:
+ self.task_queue.put(AsyncPredictor._StopToken())
+
+ @property
+ def default_buffer_size(self):
+ return len(self.procs) * 5
diff --git a/demo/sample_imgs/000000008844.jpg b/demo/sample_imgs/000000008844.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d117937ec29e62d694bd6d2dc70eb41d9a92326c
Binary files /dev/null and b/demo/sample_imgs/000000008844.jpg differ
diff --git a/demo/sample_imgs/000000018737.jpg b/demo/sample_imgs/000000018737.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..340c394ff1398a1496c81855ff1128bbf8071842
Binary files /dev/null and b/demo/sample_imgs/000000018737.jpg differ
diff --git a/demo/sample_imgs/000000126137.jpg b/demo/sample_imgs/000000126137.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..83c736e918992a085819d8a70103159b80c90998
Binary files /dev/null and b/demo/sample_imgs/000000126137.jpg differ
diff --git a/demo/sample_imgs/000000131444.jpg b/demo/sample_imgs/000000131444.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d4f63ec0b4ce746eb0eba168eceacb0032d1aac3
Binary files /dev/null and b/demo/sample_imgs/000000131444.jpg differ
diff --git a/demo/sample_imgs/000000132408.jpg b/demo/sample_imgs/000000132408.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dac4b04c9fefe52341456fe400d56a3d6ccb367b
Binary files /dev/null and b/demo/sample_imgs/000000132408.jpg differ
diff --git a/demo/sample_imgs/000000157365.jpg b/demo/sample_imgs/000000157365.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..10e719bef58161855d280d7a0034491d12a382f0
Binary files /dev/null and b/demo/sample_imgs/000000157365.jpg differ
diff --git a/demo/sample_imgs/000000176037.jpg b/demo/sample_imgs/000000176037.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0abf887fb73e89869f761c8046c9227bc5bb298a
Binary files /dev/null and b/demo/sample_imgs/000000176037.jpg differ
diff --git a/demo/sample_imgs/000000224200.jpg b/demo/sample_imgs/000000224200.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fc5b3de83c8b3861ec92ecef6263249ad7b11473
Binary files /dev/null and b/demo/sample_imgs/000000224200.jpg differ
diff --git a/demo/sample_imgs/000000244019.jpg b/demo/sample_imgs/000000244019.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..54927eb0e93cf3b5ce55a33aa64f5dd36ebd1008
Binary files /dev/null and b/demo/sample_imgs/000000244019.jpg differ
diff --git a/demo/sample_imgs/000000252776.jpg b/demo/sample_imgs/000000252776.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8b9dd0ef433ad232164a0c92c3414f49dffc6fec
Binary files /dev/null and b/demo/sample_imgs/000000252776.jpg differ
diff --git a/demo/sample_imgs/000000286849.jpg b/demo/sample_imgs/000000286849.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..12d9e147d759e2aeeb4e3903bc129157f71ac642
Binary files /dev/null and b/demo/sample_imgs/000000286849.jpg differ
diff --git a/demo/sample_imgs/000000292997.jpg b/demo/sample_imgs/000000292997.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4d56af9492d02539b68805cb80c075d6efad63e3
Binary files /dev/null and b/demo/sample_imgs/000000292997.jpg differ
diff --git a/demo/sample_imgs/000000321214.jpg b/demo/sample_imgs/000000321214.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..427cdf048ac5bb950bdf808e791e6a52477169b4
Binary files /dev/null and b/demo/sample_imgs/000000321214.jpg differ
diff --git a/demo/sample_imgs/000000344909.jpg b/demo/sample_imgs/000000344909.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6323a743693f7d87c620888e5587edbf545f0f76
Binary files /dev/null and b/demo/sample_imgs/000000344909.jpg differ
diff --git a/demo/sample_imgs/000000360661.jpg b/demo/sample_imgs/000000360661.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c90c058740466131082aed6fee6964cda04a4711
Binary files /dev/null and b/demo/sample_imgs/000000360661.jpg differ
diff --git a/demo/sample_imgs/000000396903.jpg b/demo/sample_imgs/000000396903.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f10456ff9d60df5821d6427e672f9ffe51480d9b
Binary files /dev/null and b/demo/sample_imgs/000000396903.jpg differ
diff --git a/demo/sample_imgs/000000404922.jpg b/demo/sample_imgs/000000404922.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6595f7b259bbfeb5de8d8aa172254db8a0e56645
Binary files /dev/null and b/demo/sample_imgs/000000404922.jpg differ
diff --git a/demo/sample_imgs/000000442836.jpg b/demo/sample_imgs/000000442836.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3e24da5924c518e34bc7c56dd7dc1404d58463b3
Binary files /dev/null and b/demo/sample_imgs/000000442836.jpg differ
diff --git a/demo/sample_imgs/000000464144.jpg b/demo/sample_imgs/000000464144.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b59f7e4b7fd684e7d2b47b3ac9036fb592a5457d
Binary files /dev/null and b/demo/sample_imgs/000000464144.jpg differ
diff --git a/demo/sample_imgs/000000482477.jpg b/demo/sample_imgs/000000482477.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..98c5277b190faa54f12e85df99768bef255abfff
Binary files /dev/null and b/demo/sample_imgs/000000482477.jpg differ
diff --git a/demo/sample_imgs/000000495054.jpg b/demo/sample_imgs/000000495054.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..53ae52e0be22fbb426eeec63d14e85c5b2b9fab2
Binary files /dev/null and b/demo/sample_imgs/000000495054.jpg differ
diff --git a/demo/sample_imgs/000000558073.jpg b/demo/sample_imgs/000000558073.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6d0a14a1b5b8765c0df91fde476f6a1d488a6b05
Binary files /dev/null and b/demo/sample_imgs/000000558073.jpg differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f033f4c1ff0e2e7d2ae6a19a0bcb8de6cd658a03
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+pyyaml==5.1
+torch
+torchvision
+opencv-python==4.4.0.40
+scikit-image
+kornia==0.5.11
+
+