fix heatmap bug

argusswift · argusswift · commit 1d398a2e50a1 · 2021-01-06T15:03:09.000+08:00
diff --git a/config/yolov4_config.py b/config/yolov4_config.py
@@ -1,8 +1,12 @@
 # coding=utf-8
 # project
-DATA_PATH = "E:\YOLOV4/data"
-PROJECT_PATH = "E:\YOLOV4/data"
-DETECTION_PATH = "E:\YOLOV4/"
+import os.path as osp
+PROJECT_PATH = osp.abspath(osp.join(osp.dirname(__file__), '..'))
+
+DATA_PATH = osp.join(PROJECT_PATH, 'data')
+# PROJECT_PATH = "E:\YOLOV4/data"
+# PROJECT_PATH = "E:\YOLOV4/"
+
 
 MODEL_TYPE = {
     "TYPE": "Mobilenetv3-YOLOv4"
@@ -14,10 +18,10 @@
 
 # train
 TRAIN = {
-    "DATA_TYPE": "VOC",  # DATA_TYPE: VOC ,COCO or Customer
+    "DATA_TYPE": "Customer",  # DATA_TYPE: VOC ,COCO or Customer
     "TRAIN_IMG_SIZE": 416,
     "AUGMENT": True,
-    "BATCH_SIZE": 2,
+    "BATCH_SIZE": 1,
     "MULTI_SCALE_TRAIN": False,
     "IOU_THRESHOLD_LOSS": 0.5,
     "YOLO_EPOCHS": 50,
@@ -34,7 +38,7 @@
 # val
 VAL = {
     "TEST_IMG_SIZE": 416,
-    "BATCH_SIZE": 2,
+    "BATCH_SIZE": 1,
     "NUMBER_WORKERS": 0,
     "CONF_THRESH": 0.005,
     "NMS_THRESH": 0.45,
@@ -44,8 +48,8 @@
 }
 
 Customer_DATA = {
-    "NUM": 1,  # your dataset number
-    "CLASSES": ["aeroplane"],  # your dataset class
+    "NUM": 3,  # your dataset number
+    "CLASSES": ["unknown", "person", "car"],  # your dataset class
 }
 
 VOC_DATA = {
diff --git a/eval/evaluator.py b/eval/evaluator.py
@@ -12,7 +12,7 @@
 
 
 class Evaluator(object):
-    def __init__(self, model, showatt):
+    def __init__(self, model=None, showatt=False):
         if cfg.TRAIN["DATA_TYPE"] == "VOC":
             self.classes = cfg.VOC_DATA["CLASSES"]
         elif cfg.TRAIN["DATA_TYPE"] == "COCO":
@@ -88,32 +88,32 @@ def APs_voc(self, multi_test=False, flip_test=False):
         self.inference_time = 1.0 * self.inference_time / len(img_inds)
         return self.__calc_APs(), self.inference_time
 
-    def get_bbox(self, img, multi_test=False, flip_test=False):
+    def get_bbox(self, img, multi_test=False, flip_test=False, mode=None):
         if multi_test:
             test_input_sizes = range(320, 640, 96)
             bboxes_list = []
             for test_input_size in test_input_sizes:
                 valid_scale = (0, np.inf)
                 bboxes_list.append(
-                    self.__predict(img, test_input_size, valid_scale)
+                    self.__predict(img, test_input_size, valid_scale, mode)
                 )
                 if flip_test:
                     bboxes_flip = self.__predict(
-                        img[:, ::-1], test_input_size, valid_scale
+                        img[:, ::-1], test_input_size, valid_scale, mode
                     )
                     bboxes_flip[:, [0, 2]] = (
                         img.shape[1] - bboxes_flip[:, [2, 0]]
                     )
                     bboxes_list.append(bboxes_flip)
             bboxes = np.row_stack(bboxes_list)
         else:
-            bboxes = self.__predict(img, self.val_shape, (0, np.inf))
+            bboxes = self.__predict(img, self.val_shape, (0, np.inf), mode)
 
         bboxes = nms(bboxes, self.conf_thresh, self.nms_thresh)
 
         return bboxes
 
-    def __predict(self, img, test_shape, valid_scale):
+    def __predict(self, img, test_shape, valid_scale, mode):
         org_img = np.copy(img)
         org_h, org_w, _ = org_img.shape
 
@@ -130,8 +130,8 @@ def __predict(self, img, test_shape, valid_scale):
         bboxes = self.__convert_pred(
             pred_bbox, test_shape, (org_h, org_w), valid_scale
         )
-        if self.showatt and len(img):
-            self.__show_heatmap(beta[2], org_img)
+        if self.showatt and len(img) and mode == 'det':
+            self.__show_heatmap(beta, org_img)
         return bboxes
 
     def __show_heatmap(self, beta, img):
diff --git a/eval_voc.py b/eval_voc.py
@@ -18,23 +18,26 @@ def __init__(
         weight_path=None,
         visiual=None,
         eval=False,
+        showatt=False,
+        mode=None
     ):
         self.__num_class = cfg.VOC_DATA["NUM"]
         self.__conf_threshold = cfg.VAL["CONF_THRESH"]
         self.__nms_threshold = cfg.VAL["NMS_THRESH"]
         self.__device = gpu.select_device(gpu_id)
         self.__multi_scale_val = cfg.VAL["MULTI_SCALE_VAL"]
         self.__flip_val = cfg.VAL["FLIP_VAL"]
-
+        self.__showatt = showatt
         self.__visiual = visiual
         self.__eval = eval
+        self.__mode = mode
         self.__classes = cfg.VOC_DATA["CLASSES"]
 
-        self.__model = Build_Model().to(self.__device)
+        self.__model = Build_Model(showatt=self.__showatt).to(self.__device)
 
         self.__load_model_weights(weight_path)
 
-        self.__evalter = Evaluator(self.__model, showatt=False)
+        self.__evalter = Evaluator(self.__model, showatt=self.showatt)
 
     def __load_model_weights(self, weight_path):
         print("loading weight file from : {}".format(weight_path))
@@ -76,7 +79,7 @@ def detection(self):
                 img = cv2.imread(path)
                 assert img is not None
 
-                bboxes_prd = self.__evalter.get_bbox(img, v)
+                bboxes_prd = self.__evalter.get_bbox(img, v, mode=self.__mode)
                 if bboxes_prd.shape[0] != 0:
                     boxes = bboxes_prd[..., :4]
                     class_inds = bboxes_prd[..., 5].astype(np.int32)
@@ -107,7 +110,7 @@ def detection(self):
         help="weight file path",
     )
     parser.add_argument(
-        "--log_val_path", type=str, default="log_val", help="weight file path"
+        "--log_val_path", type=str, default="log_val", help="val log file path"
     )
     parser.add_argument(
         "--gpu_id",
@@ -125,7 +128,10 @@ def detection(self):
         "--eval", action="store_true", default=True, help="eval the mAP or not"
     )
     parser.add_argument("--mode", type=str, default="val", help="val or det")
+    parser.add_argument("--showatt", type=bool, default=True, help="whether to show attention map")
     opt = parser.parse_args()
+    if not os.path.exists(opt.log_val_path):
+        os.mkdir(opt.log_val_path)
     logger = Logger(
         log_file_name=opt.log_val_path + "/log_voc_val.txt",
         log_level=logging.DEBUG,
@@ -138,11 +144,15 @@ def detection(self):
             weight_path=opt.weight_path,
             eval=opt.eval,
             visiual=opt.visiual,
+            showatt=opt.showatt,
+            mode=opt.mode
         ).val()
     else:
         Evaluation(
             gpu_id=opt.gpu_id,
             weight_path=opt.weight_path,
             eval=opt.eval,
             visiual=opt.visiual,
+            showatt=opt.showatt,
+            mode=opt.mode
         ).detection()
diff --git a/model/YOLOv4.py b/model/YOLOv4.py
@@ -5,7 +5,7 @@
 from .backbones.CSPDarknet53 import _BuildCSPDarknet53
 from .backbones.mobilenetv2 import _BuildMobilenetV2
 from .backbones.mobilenetv3 import _BuildMobilenetV3
-
+from .layers.global_context_block import ContextBlock2d
 
 class Conv(nn.Module):
     def __init__(self, in_channels, out_channels, kernel_size, stride=1):
@@ -245,10 +245,9 @@ def __initialize_weights(self):
 
 
 class YOLOv4(nn.Module):
-    def __init__(self, weight_path=None, out_channels=255, resume=False):
+    def __init__(self, weight_path=None, out_channels=255, resume=False, showatt=False, feature_channels=0):
         super(YOLOv4, self).__init__()
-
-        a = cfg.MODEL_TYPE["TYPE"]
+        self.showatt = showatt
         if cfg.MODEL_TYPE["TYPE"] == "YOLOv4":
             # CSPDarknet53 backbone
             self.backbone, feature_channels = _BuildCSPDarknet53(
@@ -267,6 +266,8 @@ def __init__(self, weight_path=None, out_channels=255, resume=False):
         else:
             assert print("model type must be YOLOv4 or Mobilenet-YOLOv4")
 
+        if self.showatt:
+            self.attention = ContextBlock2d(feature_channels[-1], feature_channels[-1])
         # Spatial Pyramid Pooling
         self.spp = SpatialPyramidPooling(feature_channels)
 
@@ -277,12 +278,14 @@ def __init__(self, weight_path=None, out_channels=255, resume=False):
         self.predict_net = PredictNet(feature_channels, out_channels)
 
     def forward(self, x):
+        beta = None
         features = self.backbone(x)
+        if self.showatt:
+            features[-1], beta = self.attention(features[-1])
         features[-1] = self.spp(features[-1])
         features = self.panet(features)
         predicts = self.predict_net(features)
-
-        return predicts
+        return predicts, beta
 
 
 if __name__ == "__main__":
diff --git a/model/backbones/mobilenetv3.py b/model/backbones/mobilenetv3.py
@@ -249,7 +249,7 @@ def __init__(
         weight_path=None,
         resume=False,
         width_mult=1.0,
-        feature_channels=[24, 48, 1024],
+        feature_channels=[24, 48, 1024]
     ):
         super(MobilenetV3, self).__init__()
         self.feature_channels = feature_channels
diff --git a/model/build_model.py b/model/build_model.py
@@ -14,9 +14,9 @@ class Build_Model(nn.Module):
     Note ： int the __init__(), to define the modules should be in order, because of the weight file is order
     """
 
-    def __init__(self, weight_path=None, resume=False):
+    def __init__(self, weight_path=None, resume=False, showatt=False):
         super(Build_Model, self).__init__()
-
+        self.__showatt = showatt
         self.__anchors = torch.FloatTensor(cfg.MODEL["ANCHORS"])
         self.__strides = torch.FloatTensor(cfg.MODEL["STRIDES"])
         if cfg.TRAIN["DATA_TYPE"] == "VOC":
@@ -31,6 +31,7 @@ def __init__(self, weight_path=None, resume=False):
             weight_path=weight_path,
             out_channels=self.__out_channel,
             resume=resume,
+            showatt=showatt
         )
         # small
         self.__head_s = Yolo_head(
@@ -47,8 +48,7 @@ def __init__(self, weight_path=None, resume=False):
 
     def forward(self, x):
         out = []
-
-        x_s, x_m, x_l = self.__yolov4(x)
+        [x_s, x_m, x_l], beta = self.__yolov4(x)
 
         out.append(self.__head_s(x_s))
         out.append(self.__head_m(x_m))
@@ -59,6 +59,8 @@ def forward(self, x):
             return p, p_d  # smalll, medium, large
         else:
             p, p_d = list(zip(*out))
+            if self.__showatt:
+                return p, torch.cat(p_d, 0), beta
             return p, torch.cat(p_d, 0)
 
 
diff --git a/train.py b/train.py
@@ -30,9 +30,15 @@ def detection_collate(batch):
 
 
 class Trainer(object):
-    def __init__(self, weight_path, resume, gpu_id, accumulate, fp_16):
+    def __init__(self, weight_path=None,
+                 resume=False,
+                 gpu_id=0,
+                 accumulate=1,
+                 fp_16=False,
+                 showatt=False):
         init_seeds(0)
         self.fp_16 = fp_16
+        self.showatt = showatt
         self.device = gpu.select_device(gpu_id)
         self.start_epoch = 0
         self.best_mAP = 0.0
@@ -59,7 +65,7 @@ def __init__(self, weight_path, resume, gpu_id, accumulate, fp_16):
             pin_memory=True,
         )
 
-        self.yolov4 = Build_Model(weight_path=weight_path, resume=resume).to(
+        self.yolov4 = Build_Model(weight_path=weight_path, resume=resume, showatt=self.showatt).to(
             self.device
         )
 
@@ -269,7 +275,7 @@ def train(self):
                     logger.info("val img size is {}".format(cfg.VAL["TEST_IMG_SIZE"]))
                     with torch.no_grad():
                         APs, inference_time = Evaluator(
-                            self.yolov4, showatt=False
+                            self.yolov4, showatt=self.showatt
                         ).APs_voc()
                         for i in APs:
                             logger.info("{} --> mAP : {}".format(i, APs[i]))
@@ -340,6 +346,12 @@ def train(self):
         default=False,
         help="whither to use fp16 precision",
     )
+    parser.add_argument(
+        "--showatt",
+        type=bool,
+        default=True,
+        help="whether to show attention map"
+    )
     opt = parser.parse_args()
     writer = SummaryWriter(logdir=opt.log_path + "/event")
     logger = Logger(
@@ -354,4 +366,5 @@ def train(self):
         gpu_id=opt.gpu_id,
         accumulate=opt.accumulate,
         fp_16=opt.fp_16,
+        showatt = opt.showatt
     ).train()
diff --git a/utils/datasets.py b/utils/datasets.py
@@ -83,7 +83,7 @@ def __load_annotations(self, anno_type):
             "test",
         ], "You must choice one of the 'train' or 'test' for anno_type parameter"
         anno_path = os.path.join(
-            cfg.PROJECT_PATH, anno_type + "_annotation.txt"
+            cfg.DATA_PATH, anno_type + "_annotation.txt"
         )
         with open(anno_path, "r") as f:
             annotations = list(filter(lambda x: len(x) > 0, f.readlines()))
diff --git a/utils/heatmap.py b/utils/heatmap.py
@@ -8,10 +8,7 @@
 def imshowAtt(beta, img=None):
     cv2.namedWindow("img")
     cv2.namedWindow("img1")
-    if img is None:
-        img = cv2.imread(
-            os.path.join("VOCdevkit\VOC2007\JPEGImages/000001.jpg"), 1
-        )  # the same input image
+    assert img is not None
 
     h, w, c = img.shape
     img1 = img.copy()

Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,7 @@ def __load_annotations(self, anno_type):`
`83`	`83`	`"test",`
`84`	`84`	`], "You must choice one of the 'train' or 'test' for anno_type parameter"`
`85`	`85`	`anno_path = os.path.join(`
`86`		`- cfg.PROJECT_PATH, anno_type + "_annotation.txt"`
	`86`	`+ cfg.DATA_PATH, anno_type + "_annotation.txt"`
`87`	`87`	`)`
`88`	`88`	`with open(anno_path, "r") as f:`
`89`	`89`	`annotations = list(filter(lambda x: len(x) > 0, f.readlines()))`