From b31d7276ddd388210af282aa2cc5641706f33635 Mon Sep 17 00:00:00 2001
From: Avery <40060351+n719710930@users.noreply.github.com>
Date: Fri, 1 Jul 2022 15:54:23 +0800
Subject: [PATCH 1/3] rewrite rpn operation(from numpy to pytorch)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

我发现frcnn_trainning.py中的numpy操作极大地影响了训练速度，而且会导致CPU利用率太高，所以我将其中绝大部分numpy操作改为了pytorch操作，在我的实验设置下提速10%左右，每个迭代由1.33 it/s 降低为 1.13it/s
---
 nets/frcnn_training.py | 329 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 305 insertions(+), 24 deletions(-)

diff --git a/nets/frcnn_training.py b/nets/frcnn_training.py
index d4eb439..68650bf 100644
--- a/nets/frcnn_training.py
+++ b/nets/frcnn_training.py
@@ -1,4 +1,6 @@
 import math
+import random
+import time
 from functools import partial
 
 import numpy as np
@@ -8,6 +10,33 @@
 
 
 def bbox_iou(bbox_a, bbox_b):
+    if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
+        print(bbox_a, bbox_b)
+        raise IndexError
+
+    tlt = torch.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
+    brt = torch.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])
+
+    area_i = torch.prod(brt - tlt, dim=2) * (tlt < brt).all(dim=2)
+    area_a = torch.prod(bbox_a[:, 2:] - bbox_a[:, :2], dim=1)
+    area_b = torch.prod(bbox_b[:, 2:] - bbox_b[:, :2], dim=1)
+
+    iou = area_i / (area_a[:, None] + area_b - area_i)
+
+    return iou
+
+
+def Tbbox_iou(bbox_a, bbox_b):
+    def box_area(box):
+        return (box[2] - box[0]) * (box[3] - box[1])
+
+    area1 = box_area(bbox_a.T)
+    area2 = box_area(bbox_b.T)
+    inter = (torch.min(bbox_a[:, None, 2:], bbox_b[:, 2:]) - torch.max(bbox_a[:, None, :2], bbox_b[:, :2])).clamp(0).prod(2)
+    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
+
+
+def bbox_iounp(bbox_a, bbox_b):
     if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
         print(bbox_a, bbox_b)
         raise IndexError
@@ -19,6 +48,34 @@ def bbox_iou(bbox_a, bbox_b):
     return area_i / (area_a[:, None] + area_b - area_i)
 
 def bbox2loc(src_bbox, dst_bbox):
+    # src_bbox = src_bbox.cpu().numpy()
+    width = src_bbox[:, 2] - src_bbox[:, 0]
+    height = src_bbox[:, 3] - src_bbox[:, 1]
+    ctr_x = src_bbox[:, 0] + 0.5 * width
+    ctr_y = src_bbox[:, 1] + 0.5 * height
+
+    base_width = dst_bbox[:, 2] - dst_bbox[:, 0]
+    base_height = dst_bbox[:, 3] - dst_bbox[:, 1]
+    base_ctr_x = dst_bbox[:, 0] + 0.5 * base_width
+    base_ctr_y = dst_bbox[:, 1] + 0.5 * base_height
+
+    eps = torch.finfo(height.dtype).eps
+    # print(eps)
+    epst = torch.empty(len(height)).cuda()
+    epst.fill_(eps)
+    width = torch.maximum(width, epst)
+    height = torch.maximum(height, epst)
+
+    dx = (base_ctr_x - ctr_x) / width
+    dy = (base_ctr_y - ctr_y) / height
+    dw = torch.log(base_width / width)
+    dh = torch.log(base_height / height)
+
+    loc = torch.cat((dx.unsqueeze(0), dy.unsqueeze(0), dw.unsqueeze(0), dh.unsqueeze(0)), dim=0)
+    return loc.T
+
+
+def bbox2locnp(src_bbox, dst_bbox):
     width = src_bbox[:, 2] - src_bbox[:, 0]
     height = src_bbox[:, 3] - src_bbox[:, 1]
     ctr_x = src_bbox[:, 0] + 0.5 * width
@@ -41,6 +98,7 @@ def bbox2loc(src_bbox, dst_bbox):
     loc = np.vstack((dx, dy, dw, dh)).transpose()
     return loc
 
+
 class AnchorTargetCreator(object):
     def __init__(self, n_sample=256, pos_iou_thresh=0.7, neg_iou_thresh=0.3, pos_ratio=0.5):
         self.n_sample       = n_sample
@@ -54,17 +112,20 @@ def __call__(self, bbox, anchor):
             loc = bbox2loc(anchor, bbox[argmax_ious])
             return loc, label
         else:
-            return np.zeros_like(anchor), label
+            return torch.zeros_like(anchor), label
 
     def _calc_ious(self, anchor, bbox):
         #----------------------------------------------#
         #   anchor和bbox的iou
         #   获得的ious的shape为[num_anchors, num_gt]
         #----------------------------------------------#
+        # iousO = bbox_iou(anchor.cpu().numpy(), bbox.cpu().numpy())
         ious = bbox_iou(anchor, bbox)
+        # print("iou_numpy:", iousO)
+        # print("iou_tensor:", ious)
 
-        if len(bbox)==0:
-            return np.zeros(len(anchor), np.int32), np.zeros(len(anchor)), np.zeros(len(bbox))
+        if len(bbox) == 0:
+            return torch.zeros(len(anchor)), torch.zeros(len(anchor)), torch.zeros(len(bbox))
         #---------------------------------------------------------#
         #   获得每一个先验框最对应的真实框  [num_anchors, ]
         #---------------------------------------------------------#
@@ -72,7 +133,10 @@ def _calc_ious(self, anchor, bbox):
         #---------------------------------------------------------#
         #   找出每一个先验框最对应的真实框的iou  [num_anchors, ]
         #---------------------------------------------------------#
-        max_ious = np.max(ious, axis=1)
+        # max_iousO = np.max(iousO, axis=1)
+        max_ious, _ = torch.max(ious, dim=1)
+        # print("iou_numpy:", max_iousO)
+        # print("iou_tensor:", max_ious)
         #---------------------------------------------------------#
         #   获得每一个真实框最对应的先验框  [num_gt, ]
         #---------------------------------------------------------#
@@ -90,8 +154,8 @@ def _create_label(self, anchor, bbox):
         #   1是正样本，0是负样本，-1忽略
         #   初始化的时候全部设置为-1
         # ------------------------------------------ #
-        label = np.empty((len(anchor),), dtype=np.int32)
-        label.fill(-1)
+        label = torch.empty(len(anchor)).cuda()
+        label.fill_(-1)
 
         # ------------------------------------------------------------------------ #
         #   argmax_ious为每个先验框对应的最大的真实框的序号         [num_anchors, ]
@@ -110,6 +174,96 @@ def _create_label(self, anchor, bbox):
         if len(gt_argmax_ious)>0:
             label[gt_argmax_ious] = 1
 
+        # ----------------------------------------------------- #
+        #   判断正样本数量是否大于128，如果大于则限制在128
+        # ----------------------------------------------------- #
+        n_pos = int(self.pos_ratio * self.n_sample)
+        pos_index = torch.where(label == 1)[0].cpu().numpy()
+        if len(pos_index) > n_pos:
+            disable_index = np.random.choice(pos_index, size=(len(pos_index) - n_pos), replace=False)
+            label[disable_index] = -1
+
+        # ----------------------------------------------------- #
+        #   平衡正负样本，保持总数量为256
+        # ----------------------------------------------------- #
+        n_neg = self.n_sample - torch.sum(label == 1)
+        neg_index = torch.where(label == 0)[0].cpu().numpy()
+        if len(neg_index) > n_neg:
+            disable_index = np.random.choice(neg_index, size=(len(neg_index) - n_neg.item()), replace=False)
+            label[disable_index] = -1
+
+        return argmax_ious, label
+
+
+class AnchorTargetCreatornp(object):
+    def __init__(self, n_sample=256, pos_iou_thresh=0.7, neg_iou_thresh=0.3, pos_ratio=0.5):
+        self.n_sample = n_sample
+        self.pos_iou_thresh = pos_iou_thresh
+        self.neg_iou_thresh = neg_iou_thresh
+        self.pos_ratio = pos_ratio
+
+    def __call__(self, bbox, anchor):
+        argmax_ious, label = self._create_label(anchor, bbox)
+        if (label > 0).any():
+            loc = bbox2locnp(anchor, bbox[argmax_ious])
+            return loc, label
+        else:
+            return np.zeros_like(anchor), label
+
+    def _calc_ious(self, anchor, bbox):
+        # ----------------------------------------------#
+        #   anchor和bbox的iou
+        #   获得的ious的shape为[num_anchors, num_gt]
+        # ----------------------------------------------#
+        ious = bbox_iounp(anchor, bbox)
+
+        if len(bbox) == 0:
+            return np.zeros(len(anchor), np.int32), np.zeros(len(anchor)), np.zeros(len(bbox))
+        # ---------------------------------------------------------#
+        #   获得每一个先验框最对应的真实框  [num_anchors, ]
+        # ---------------------------------------------------------#
+        argmax_ious = ious.argmax(axis=1)
+        # ---------------------------------------------------------#
+        #   找出每一个先验框最对应的真实框的iou  [num_anchors, ]
+        # ---------------------------------------------------------#
+        max_ious = np.max(ious, axis=1)
+        # ---------------------------------------------------------#
+        #   获得每一个真实框最对应的先验框  [num_gt, ]
+        # ---------------------------------------------------------#
+        gt_argmax_ious = ious.argmax(axis=0)
+        # ---------------------------------------------------------#
+        #   保证每一个真实框都存在对应的先验框
+        # ---------------------------------------------------------#
+        for i in range(len(gt_argmax_ious)):
+            argmax_ious[gt_argmax_ious[i]] = i
+
+        return argmax_ious, max_ious, gt_argmax_ious
+
+    def _create_label(self, anchor, bbox):
+        # ------------------------------------------ #
+        #   1是正样本，0是负样本，-1忽略
+        #   初始化的时候全部设置为-1
+        # ------------------------------------------ #
+        label = np.empty((len(anchor),), dtype=np.int32)
+        label.fill(-1)
+
+        # ------------------------------------------------------------------------ #
+        #   argmax_ious为每个先验框对应的最大的真实框的序号         [num_anchors, ]
+        #   max_ious为每个真实框对应的最大的真实框的iou             [num_anchors, ]
+        #   gt_argmax_ious为每一个真实框对应的最大的先验框的序号    [num_gt, ]
+        # ------------------------------------------------------------------------ #
+        argmax_ious, max_ious, gt_argmax_ious = self._calc_ious(anchor, bbox)
+
+        # ----------------------------------------------------- #
+        #   如果小于门限值则设置为负样本
+        #   如果大于门限值则设置为正样本
+        #   每个真实框至少对应一个先验框
+        # ----------------------------------------------------- #
+        label[max_ious < self.neg_iou_thresh] = 0
+        label[max_ious >= self.pos_iou_thresh] = 1
+        if len(gt_argmax_ious) > 0:
+            label[gt_argmax_ious] = 1
+
         # ----------------------------------------------------- #
         #   判断正样本数量是否大于128，如果大于则限制在128
         # ----------------------------------------------------- #
@@ -141,25 +295,25 @@ def __init__(self, n_sample=128, pos_ratio=0.5, pos_iou_thresh=0.5, neg_iou_thre
         self.neg_iou_thresh_low = neg_iou_thresh_low
 
     def __call__(self, roi, bbox, label, loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
-        roi = np.concatenate((roi.detach().cpu().numpy(), bbox), axis=0)
+        roi = torch.cat((roi, bbox), dim=0)
+
         # ----------------------------------------------------- #
         #   计算建议框和真实框的重合程度
         # ----------------------------------------------------- #
         iou = bbox_iou(roi, bbox)
-        
-        if len(bbox)==0:
-            gt_assignment = np.zeros(len(roi), np.int32)
-            max_iou = np.zeros(len(roi))
-            gt_roi_label = np.zeros(len(roi))
+        if len(bbox) == 0:
+            gt_assignment = torch.zeros(len(roi))
+            max_iou = torch.zeros(len(roi))
+            gt_roi_label = torch.zeros(len(roi))
         else:
             #---------------------------------------------------------#
             #   获得每一个建议框最对应的真实框  [num_roi, ]
             #---------------------------------------------------------#
-            gt_assignment = iou.argmax(axis=1)
+            gt_assignment = iou.argmax(dim=1)
             #---------------------------------------------------------#
             #   获得每一个建议框最对应的真实框的iou  [num_roi, ]
             #---------------------------------------------------------#
-            max_iou = iou.max(axis=1)
+            max_iou = iou.max(dim=1)[0]
             #---------------------------------------------------------#
             #   真实框的标签要+1因为有背景的存在
             #---------------------------------------------------------#
@@ -169,7 +323,7 @@ def __call__(self, roi, bbox, label, loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
         #   满足建议框和真实框重合程度大于neg_iou_thresh_high的作为负样本
         #   将正样本的数量限制在self.pos_roi_per_image以内
         #----------------------------------------------------------------#
-        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
+        pos_index = torch.where(max_iou >= self.pos_iou_thresh)[0].cpu().numpy()
         pos_roi_per_this_image = int(min(self.pos_roi_per_image, pos_index.size))
         if pos_index.size > 0:
             pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False)
@@ -178,7 +332,7 @@ def __call__(self, roi, bbox, label, loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
         #   满足建议框和真实框重合程度小于neg_iou_thresh_high大于neg_iou_thresh_low作为负样本
         #   将正样本的数量和负样本的数量的总和固定成self.n_sample
         #-----------------------------------------------------------------------------------------------------#
-        neg_index = np.where((max_iou < self.neg_iou_thresh_high) & (max_iou >= self.neg_iou_thresh_low))[0]
+        neg_index = torch.where((max_iou < self.neg_iou_thresh_high) & (max_iou >= self.neg_iou_thresh_low))[0].cpu().numpy()
         neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
         neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size))
         if neg_index.size > 0:
@@ -189,19 +343,92 @@ def __call__(self, roi, bbox, label, loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
         #   gt_roi_loc      [n_sample, 4]
         #   gt_roi_label    [n_sample, ]
         #---------------------------------------------------------#
-        keep_index = np.append(pos_index, neg_index)
+        keep_index = torch.from_numpy(np.append(pos_index, neg_index))
 
         sample_roi = roi[keep_index]
         if len(bbox)==0:
-            return sample_roi, np.zeros_like(sample_roi), gt_roi_label[keep_index]
+            return sample_roi, torch.zeros_like(sample_roi), gt_roi_label[keep_index]
 
         gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
+        gt_roi_loc = (gt_roi_loc / torch.tensor(loc_normalize_std).cuda())
+
+        gt_roi_label = gt_roi_label[keep_index]
+        gt_roi_label[pos_roi_per_this_image:] = 0
+        return sample_roi, gt_roi_loc, gt_roi_label
+
+
+class ProposalTargetCreatornp(object):
+    def __init__(self, n_sample=128, pos_ratio=0.5, pos_iou_thresh=0.5, neg_iou_thresh_high=0.5, neg_iou_thresh_low=0):
+        self.n_sample = n_sample
+        self.pos_ratio = pos_ratio
+        self.pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
+        self.pos_iou_thresh = pos_iou_thresh
+        self.neg_iou_thresh_high = neg_iou_thresh_high
+        self.neg_iou_thresh_low = neg_iou_thresh_low
+
+    def __call__(self, roi, bbox, label, loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
+        roi = np.concatenate((roi.detach().cpu().numpy(), bbox), axis=0)
+        # ----------------------------------------------------- #
+        #   计算建议框和真实框的重合程度
+        # ----------------------------------------------------- #
+        iou = bbox_iounp(roi, bbox)
+
+        if len(bbox) == 0:
+            gt_assignment = np.zeros(len(roi), np.int32)
+            max_iou = np.zeros(len(roi))
+            gt_roi_label = np.zeros(len(roi))
+        else:
+            # ---------------------------------------------------------#
+            #   获得每一个建议框最对应的真实框  [num_roi, ]
+            # ---------------------------------------------------------#
+            gt_assignment = iou.argmax(axis=1)
+            # ---------------------------------------------------------#
+            #   获得每一个建议框最对应的真实框的iou  [num_roi, ]
+            # ---------------------------------------------------------#
+            max_iou = iou.max(axis=1)
+            # ---------------------------------------------------------#
+            #   真实框的标签要+1因为有背景的存在
+            # ---------------------------------------------------------#
+            gt_roi_label = label[gt_assignment] + 1
+
+        # ----------------------------------------------------------------#
+        #   满足建议框和真实框重合程度大于neg_iou_thresh_high的作为负样本
+        #   将正样本的数量限制在self.pos_roi_per_image以内
+        # ----------------------------------------------------------------#
+        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
+        pos_roi_per_this_image = int(min(self.pos_roi_per_image, pos_index.size))
+        if pos_index.size > 0:
+            pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False)
+
+        # -----------------------------------------------------------------------------------------------------#
+        #   满足建议框和真实框重合程度小于neg_iou_thresh_high大于neg_iou_thresh_low作为负样本
+        #   将正样本的数量和负样本的数量的总和固定成self.n_sample
+        # -----------------------------------------------------------------------------------------------------#
+        neg_index = np.where((max_iou < self.neg_iou_thresh_high) & (max_iou >= self.neg_iou_thresh_low))[0]
+        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
+        neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size))
+        if neg_index.size > 0:
+            neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False)
+
+        # ---------------------------------------------------------#
+        #   sample_roi      [n_sample, ]
+        #   gt_roi_loc      [n_sample, 4]
+        #   gt_roi_label    [n_sample, ]
+        # ---------------------------------------------------------#
+        keep_index = np.append(pos_index, neg_index)
+
+        sample_roi = roi[keep_index]
+        if len(bbox) == 0:
+            return sample_roi, np.zeros_like(sample_roi), gt_roi_label[keep_index]
+
+        gt_roi_loc = bbox2locnp(sample_roi, bbox[gt_assignment[keep_index]])
         gt_roi_loc = (gt_roi_loc / np.array(loc_normalize_std, np.float32))
 
         gt_roi_label = gt_roi_label[keep_index]
         gt_roi_label[pos_roi_per_this_image:] = 0
         return sample_roi, gt_roi_loc, gt_roi_label
 
+
 class FasterRCNNTrainer(nn.Module):
     def __init__(self, model_train, optimizer):
         super(FasterRCNNTrainer, self).__init__()
@@ -211,8 +438,14 @@ def __init__(self, model_train, optimizer):
         self.rpn_sigma      = 1
         self.roi_sigma      = 1
 
-        self.anchor_target_creator      = AnchorTargetCreator()
-        self.proposal_target_creator    = ProposalTargetCreator()
+        #numpy
+        self.anchor_target_creatornp = AnchorTargetCreatornp()
+        self.proposal_target_creatornp = ProposalTargetCreatornp()
+
+        #pytorch
+        # self.anchor_target_creator      = AnchorTargetCreator()
+        # self.proposal_target_creator    = ProposalTargetCreator()
+
 
         self.loc_normalize_std          = [0.1, 0.1, 0.2, 0.2]
 
@@ -240,12 +473,28 @@ def forward(self, imgs, bboxes, labels, scale):
         #-------------------------------#
         #   获取公用特征层
         #-------------------------------#
+        at = time.time()
         base_feature = self.model_train(imgs, mode = 'extractor')
+        #print("base_feature.shape")
+        #print(base_feature.shape)
+        # -------------------------------#
+        #   获取公用特征层一致性损失
+        # -------------------------------#
+        #print(base_feature[0],base_feature[8])
+        #loss_distance = F.pairwise_distance(base_feature[0:8], base_feature[8:], p=2)
+        #loss_dis = loss_distance.mean()
+        #print(loss_dis)
+        #similarity = torch.cosine_similarity(base_feature[0:8], base_feature[8:], dim=1)
+        #loss_si = 1 - similarity.mean()
+        #print(loss_si)
+        # loss = loss + loss_dis
 
         # -------------------------------------------------- #
         #   利用rpn网络获得调整参数、得分、建议框、先验框
         # -------------------------------------------------- #
+        bt = time.time()
         rpn_locs, rpn_scores, rois, roi_indices, anchor = self.model_train(x = [base_feature, img_size], scale = scale, mode = 'rpn')
+        kt = time.time()
         
         rpn_loc_loss_all, rpn_cls_loss_all, roi_loc_loss_all, roi_cls_loss_all  = 0, 0, 0, 0
         sample_rois, sample_indexes, gt_roi_locs, gt_roi_labels                 = [], [], [], []
@@ -261,9 +510,19 @@ def forward(self, imgs, bboxes, labels, scale):
             #   gt_rpn_loc      [num_anchors, 4]
             #   gt_rpn_label    [num_anchors, ]
             # -------------------------------------------------- #
-            gt_rpn_loc, gt_rpn_label    = self.anchor_target_creator(bbox, anchor[0].cpu().numpy())
-            gt_rpn_loc                  = torch.Tensor(gt_rpn_loc).type_as(rpn_locs)
-            gt_rpn_label                = torch.Tensor(gt_rpn_label).type_as(rpn_locs).long()
+
+            #numpy
+            gt_rpn_loc, gt_rpn_label = self.anchor_target_creatornp(bbox, anchor[0].cpu().numpy())
+            gt_rpn_loc = torch.Tensor(gt_rpn_loc).type_as(rpn_locs)
+            gt_rpn_label = torch.Tensor(gt_rpn_label).type_as(rpn_locs).long()
+
+            #pytorch
+            # bbox = torch.from_numpy(bbox).cuda()
+            # label = torch.from_numpy(label).cuda()
+            # gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(bbox, anchor[0])
+            # gt_rpn_loc = gt_rpn_loc.type_as(rpn_locs)
+            # gt_rpn_label = gt_rpn_label.type_as(rpn_locs).long()
+
             # -------------------------------------------------- #
             #   分别计算建议框网络的回归损失和分类损失
             # -------------------------------------------------- #
@@ -279,15 +538,25 @@ def forward(self, imgs, bboxes, labels, scale):
             #   gt_roi_loc      [n_sample, 4]
             #   gt_roi_label    [n_sample, ]
             # ------------------------------------------------------ #
-            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(roi, bbox, label, self.loc_normalize_std)
+            #numpy
+            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creatornp(roi, bbox, label, self.loc_normalize_std)
             sample_rois.append(torch.Tensor(sample_roi).type_as(rpn_locs))
             sample_indexes.append(torch.ones(len(sample_roi)).type_as(rpn_locs) * roi_indices[i][0])
             gt_roi_locs.append(torch.Tensor(gt_roi_loc).type_as(rpn_locs))
             gt_roi_labels.append(torch.Tensor(gt_roi_label).type_as(rpn_locs).long())
+
+            #pytorch
+            # sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(roi, bbox, label, self.loc_normalize_std)
+            # sample_rois.append(sample_roi.type_as(rpn_locs))
+            # sample_indexes.append(torch.ones(len(sample_roi)).type_as(rpn_locs) * roi_indices[i][0])
+            # gt_roi_locs.append(gt_roi_loc.type_as(rpn_locs))
+            # gt_roi_labels.append(gt_roi_label.type_as(rpn_locs).long())
             
         sample_rois     = torch.stack(sample_rois, dim=0)
         sample_indexes  = torch.stack(sample_indexes, dim=0)
         roi_cls_locs, roi_scores = self.model_train([base_feature, sample_rois, sample_indexes, img_size], mode = 'head')
+
+        ct = time.time()
         for i in range(n):
             # ------------------------------------------------------ #
             #   根据建议框的种类，取出对应的回归预测结果
@@ -310,6 +579,13 @@ def forward(self, imgs, bboxes, labels, scale):
 
             roi_loc_loss_all += roi_loc_loss
             roi_cls_loss_all += roi_cls_loss
+
+        dt = time.time()
+
+        # print("backbone", bt - at)
+        # print("rpn", ct - bt)
+        # print("rpn_net:", kt-bt)
+        # print("head", dt - ct)
             
         losses = [rpn_loc_loss_all/n, rpn_cls_loss_all/n, roi_loc_loss_all/n, roi_cls_loss_all/n]
         losses = losses + [sum(losses)]
@@ -323,8 +599,10 @@ def train_step(self, imgs, bboxes, labels, scale, fp16=False, scaler=None):
             self.optimizer.step()
         else:
             from torch.cuda.amp import autocast
+            at = time.time()
             with autocast():
                 losses = self.forward(imgs, bboxes, labels, scale)
+            bt = time.time()
 
             #----------------------#
             #   反向传播
@@ -332,6 +610,9 @@ def train_step(self, imgs, bboxes, labels, scale, fp16=False, scaler=None):
             scaler.scale(losses[-1]).backward()
             scaler.step(self.optimizer)
             scaler.update()
+            ct = time.time()
+            print("forward", bt-at)
+            print("backward", ct-bt)
             
         return losses
 

From ae3804eec9f76fbc361b70731e01d67afca6fbe5 Mon Sep 17 00:00:00 2001
From: Avery <40060351+n719710930@users.noreply.github.com>
Date: Fri, 1 Jul 2022 16:15:25 +0800
Subject: [PATCH 2/3] Update frcnn_training.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

删除冗余代码
---
 nets/frcnn_training.py | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/nets/frcnn_training.py b/nets/frcnn_training.py
index 68650bf..cef6c1f 100644
--- a/nets/frcnn_training.py
+++ b/nets/frcnn_training.py
@@ -26,16 +26,6 @@ def bbox_iou(bbox_a, bbox_b):
     return iou
 
 
-def Tbbox_iou(bbox_a, bbox_b):
-    def box_area(box):
-        return (box[2] - box[0]) * (box[3] - box[1])
-
-    area1 = box_area(bbox_a.T)
-    area2 = box_area(bbox_b.T)
-    inter = (torch.min(bbox_a[:, None, 2:], bbox_b[:, 2:]) - torch.max(bbox_a[:, None, :2], bbox_b[:, :2])).clamp(0).prod(2)
-    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
-
-
 def bbox_iounp(bbox_a, bbox_b):
     if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
         print(bbox_a, bbox_b)
@@ -473,7 +463,6 @@ def forward(self, imgs, bboxes, labels, scale):
         #-------------------------------#
         #   获取公用特征层
         #-------------------------------#
-        at = time.time()
         base_feature = self.model_train(imgs, mode = 'extractor')
         #print("base_feature.shape")
         #print(base_feature.shape)
@@ -492,9 +481,7 @@ def forward(self, imgs, bboxes, labels, scale):
         # -------------------------------------------------- #
         #   利用rpn网络获得调整参数、得分、建议框、先验框
         # -------------------------------------------------- #
-        bt = time.time()
         rpn_locs, rpn_scores, rois, roi_indices, anchor = self.model_train(x = [base_feature, img_size], scale = scale, mode = 'rpn')
-        kt = time.time()
         
         rpn_loc_loss_all, rpn_cls_loss_all, roi_loc_loss_all, roi_cls_loss_all  = 0, 0, 0, 0
         sample_rois, sample_indexes, gt_roi_locs, gt_roi_labels                 = [], [], [], []
@@ -556,7 +543,6 @@ def forward(self, imgs, bboxes, labels, scale):
         sample_indexes  = torch.stack(sample_indexes, dim=0)
         roi_cls_locs, roi_scores = self.model_train([base_feature, sample_rois, sample_indexes, img_size], mode = 'head')
 
-        ct = time.time()
         for i in range(n):
             # ------------------------------------------------------ #
             #   根据建议框的种类，取出对应的回归预测结果
@@ -579,13 +565,6 @@ def forward(self, imgs, bboxes, labels, scale):
 
             roi_loc_loss_all += roi_loc_loss
             roi_cls_loss_all += roi_cls_loss
-
-        dt = time.time()
-
-        # print("backbone", bt - at)
-        # print("rpn", ct - bt)
-        # print("rpn_net:", kt-bt)
-        # print("head", dt - ct)
             
         losses = [rpn_loc_loss_all/n, rpn_cls_loss_all/n, roi_loc_loss_all/n, roi_cls_loss_all/n]
         losses = losses + [sum(losses)]
@@ -599,10 +578,8 @@ def train_step(self, imgs, bboxes, labels, scale, fp16=False, scaler=None):
             self.optimizer.step()
         else:
             from torch.cuda.amp import autocast
-            at = time.time()
             with autocast():
                 losses = self.forward(imgs, bboxes, labels, scale)
-            bt = time.time()
 
             #----------------------#
             #   反向传播
@@ -610,9 +587,6 @@ def train_step(self, imgs, bboxes, labels, scale, fp16=False, scaler=None):
             scaler.scale(losses[-1]).backward()
             scaler.step(self.optimizer)
             scaler.update()
-            ct = time.time()
-            print("forward", bt-at)
-            print("backward", ct-bt)
             
         return losses
 

From ba5f3b47763fe980c12791f05959c3bbe2d8b6f2 Mon Sep 17 00:00:00 2001
From: Avery <40060351+n719710930@users.noreply.github.com>
Date: Fri, 1 Jul 2022 16:30:35 +0800
Subject: [PATCH 3/3] Update frcnn_training.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

注释numpy操作，启用pytorch计算
---
 nets/frcnn_training.py | 66 ++++++++++++++----------------------------
 1 file changed, 22 insertions(+), 44 deletions(-)

diff --git a/nets/frcnn_training.py b/nets/frcnn_training.py
index cef6c1f..ebd3a5f 100644
--- a/nets/frcnn_training.py
+++ b/nets/frcnn_training.py
@@ -50,7 +50,6 @@ def bbox2loc(src_bbox, dst_bbox):
     base_ctr_y = dst_bbox[:, 1] + 0.5 * base_height
 
     eps = torch.finfo(height.dtype).eps
-    # print(eps)
     epst = torch.empty(len(height)).cuda()
     epst.fill_(eps)
     width = torch.maximum(width, epst)
@@ -109,10 +108,7 @@ def _calc_ious(self, anchor, bbox):
         #   anchor和bbox的iou
         #   获得的ious的shape为[num_anchors, num_gt]
         #----------------------------------------------#
-        # iousO = bbox_iou(anchor.cpu().numpy(), bbox.cpu().numpy())
         ious = bbox_iou(anchor, bbox)
-        # print("iou_numpy:", iousO)
-        # print("iou_tensor:", ious)
 
         if len(bbox) == 0:
             return torch.zeros(len(anchor)), torch.zeros(len(anchor)), torch.zeros(len(bbox))
@@ -123,10 +119,7 @@ def _calc_ious(self, anchor, bbox):
         #---------------------------------------------------------#
         #   找出每一个先验框最对应的真实框的iou  [num_anchors, ]
         #---------------------------------------------------------#
-        # max_iousO = np.max(iousO, axis=1)
         max_ious, _ = torch.max(ious, dim=1)
-        # print("iou_numpy:", max_iousO)
-        # print("iou_tensor:", max_ious)
         #---------------------------------------------------------#
         #   获得每一个真实框最对应的先验框  [num_gt, ]
         #---------------------------------------------------------#
@@ -429,12 +422,12 @@ def __init__(self, model_train, optimizer):
         self.roi_sigma      = 1
 
         #numpy
-        self.anchor_target_creatornp = AnchorTargetCreatornp()
-        self.proposal_target_creatornp = ProposalTargetCreatornp()
+        # self.anchor_target_creatornp = AnchorTargetCreatornp()
+        # self.proposal_target_creatornp = ProposalTargetCreatornp()
 
         #pytorch
-        # self.anchor_target_creator      = AnchorTargetCreator()
-        # self.proposal_target_creator    = ProposalTargetCreator()
+        self.anchor_target_creator      = AnchorTargetCreator()
+        self.proposal_target_creator    = ProposalTargetCreator()
 
 
         self.loc_normalize_std          = [0.1, 0.1, 0.2, 0.2]
@@ -464,25 +457,10 @@ def forward(self, imgs, bboxes, labels, scale):
         #   获取公用特征层
         #-------------------------------#
         base_feature = self.model_train(imgs, mode = 'extractor')
-        #print("base_feature.shape")
-        #print(base_feature.shape)
-        # -------------------------------#
-        #   获取公用特征层一致性损失
-        # -------------------------------#
-        #print(base_feature[0],base_feature[8])
-        #loss_distance = F.pairwise_distance(base_feature[0:8], base_feature[8:], p=2)
-        #loss_dis = loss_distance.mean()
-        #print(loss_dis)
-        #similarity = torch.cosine_similarity(base_feature[0:8], base_feature[8:], dim=1)
-        #loss_si = 1 - similarity.mean()
-        #print(loss_si)
-        # loss = loss + loss_dis
-
         # -------------------------------------------------- #
         #   利用rpn网络获得调整参数、得分、建议框、先验框
         # -------------------------------------------------- #
         rpn_locs, rpn_scores, rois, roi_indices, anchor = self.model_train(x = [base_feature, img_size], scale = scale, mode = 'rpn')
-        
         rpn_loc_loss_all, rpn_cls_loss_all, roi_loc_loss_all, roi_cls_loss_all  = 0, 0, 0, 0
         sample_rois, sample_indexes, gt_roi_locs, gt_roi_labels                 = [], [], [], []
         for i in range(n):
@@ -499,16 +477,16 @@ def forward(self, imgs, bboxes, labels, scale):
             # -------------------------------------------------- #
 
             #numpy
-            gt_rpn_loc, gt_rpn_label = self.anchor_target_creatornp(bbox, anchor[0].cpu().numpy())
-            gt_rpn_loc = torch.Tensor(gt_rpn_loc).type_as(rpn_locs)
-            gt_rpn_label = torch.Tensor(gt_rpn_label).type_as(rpn_locs).long()
+            # gt_rpn_loc, gt_rpn_label = self.anchor_target_creatornp(bbox, anchor[0].cpu().numpy())
+            # gt_rpn_loc = torch.Tensor(gt_rpn_loc).type_as(rpn_locs)
+            # gt_rpn_label = torch.Tensor(gt_rpn_label).type_as(rpn_locs).long()
 
             #pytorch
-            # bbox = torch.from_numpy(bbox).cuda()
-            # label = torch.from_numpy(label).cuda()
-            # gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(bbox, anchor[0])
-            # gt_rpn_loc = gt_rpn_loc.type_as(rpn_locs)
-            # gt_rpn_label = gt_rpn_label.type_as(rpn_locs).long()
+            bbox = torch.from_numpy(bbox).cuda()
+            label = torch.from_numpy(label).cuda()
+            gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(bbox, anchor[0])
+            gt_rpn_loc = gt_rpn_loc.type_as(rpn_locs)
+            gt_rpn_label = gt_rpn_label.type_as(rpn_locs).long()
 
             # -------------------------------------------------- #
             #   分别计算建议框网络的回归损失和分类损失
@@ -526,18 +504,18 @@ def forward(self, imgs, bboxes, labels, scale):
             #   gt_roi_label    [n_sample, ]
             # ------------------------------------------------------ #
             #numpy
-            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creatornp(roi, bbox, label, self.loc_normalize_std)
-            sample_rois.append(torch.Tensor(sample_roi).type_as(rpn_locs))
-            sample_indexes.append(torch.ones(len(sample_roi)).type_as(rpn_locs) * roi_indices[i][0])
-            gt_roi_locs.append(torch.Tensor(gt_roi_loc).type_as(rpn_locs))
-            gt_roi_labels.append(torch.Tensor(gt_roi_label).type_as(rpn_locs).long())
+            # sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creatornp(roi, bbox, label, self.loc_normalize_std)
+            # sample_rois.append(torch.Tensor(sample_roi).type_as(rpn_locs))
+            # sample_indexes.append(torch.ones(len(sample_roi)).type_as(rpn_locs) * roi_indices[i][0])
+            # gt_roi_locs.append(torch.Tensor(gt_roi_loc).type_as(rpn_locs))
+            # gt_roi_labels.append(torch.Tensor(gt_roi_label).type_as(rpn_locs).long())
 
             #pytorch
-            # sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(roi, bbox, label, self.loc_normalize_std)
-            # sample_rois.append(sample_roi.type_as(rpn_locs))
-            # sample_indexes.append(torch.ones(len(sample_roi)).type_as(rpn_locs) * roi_indices[i][0])
-            # gt_roi_locs.append(gt_roi_loc.type_as(rpn_locs))
-            # gt_roi_labels.append(gt_roi_label.type_as(rpn_locs).long())
+            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(roi, bbox, label, self.loc_normalize_std)
+            sample_rois.append(sample_roi.type_as(rpn_locs))
+            sample_indexes.append(torch.ones(len(sample_roi)).type_as(rpn_locs) * roi_indices[i][0])
+            gt_roi_locs.append(gt_roi_loc.type_as(rpn_locs))
+            gt_roi_labels.append(gt_roi_label.type_as(rpn_locs).long())
             
         sample_rois     = torch.stack(sample_rois, dim=0)
         sample_indexes  = torch.stack(sample_indexes, dim=0)