maskrcnn_benchmark理解记录——由demopredictor.py引入推断过程

2019-04-15 18:00发布

主要看下run_on_opencv_image(self, image)。大概是这样几步。逐步记录 predictions = self.compute_prediction(image) #np.ndarray): an image as returned by OpenCV to tensor top_predictions = self.select_top_predictions(predictions) result = image.copy() result = self.overlay_boxes(result, top_predictions) if self.cfg.MODEL.KEYPOINT_ON: result = self.overlay_keypoints(result, top_predictions) result = self.overlay_class_names(result, top_predictions) return result
  • compute_prediction(self, original_image)

1.先预处理     image = self.transforms(original_image) ->输入original_image=[480,640,3],int整型数据; ->经过变换后image=[3,800,1066],数据torch.float32; 2.转换为ImageList,填充,使其可被cfg.DATALOADER.SIZE_DIVISIBILITY 除尽,并放到cuda上。 3计算predictions  predictions = self.model(image_list) #self.model = build_detection_model(cfg) 这里就会跳入 generalized_rcnn.py文件进行模型处理等。更具体的可点链接查看。那这部分就很重要了,有restnet+fpn骨架的建立,rpn进行proposals的生成,和结果预测等。分为三部分: self.backbone = build_backbone(cfg) self.rpn = build_rpn(cfg, self.backbone.out_channels) ##256 * 4=1024 self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels) 1)modelingackboneackbone.py   提取各个stage的特征图;然后使用feature map进行RPN及ROI pooling操作;根据输入图片预处理后。 features = self.backbone(images.tensors) 
可以得到P2~P6层参数如下:
ipdb> p features[0].size()
torch.Size([1, 256, 200, 272])
ipdb> p features[1].size()
torch.Size([1, 256, 100, 136])
ipdb> p features[2].size()
torch.Size([1, 256, 50, 68])
ipdb> p features[3].size()
torch.Size([1, 256, 25, 34])
ipdb> p features[4].size()
torch.Size([1, 256, 13, 17])
2)modeling pn pn.py->经过rpn网络得到候选框,每一层都有1000 proposals。 proposals, proposal_losses = self.rpn(images, features, targets)
proposals格式为:[BoxList(num_boxes=1000, image_width=1066, image_height=800, mode=xyxy)] 3)modeling oi_heads oi_heads.py。 这里是经过fast rcnn网络, x, result, detector_losses = self.roi_heads(features, proposals, targets)
由两分支组成:检测分支和分割分支组成; roi_heads.append(("keypoint", build_roi_keypoint_head(cfg, in_channels)))
roi_heads = CombinedROIHeads(cfg, roi_heads)
  在roi_heads.py文件的forward()中:x, detections, loss_box = self.box(features, proposals, targets)得到检测结果,
  # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. import cv2 import torch from torchvision import transforms as T from maskrcnn_benchmark.modeling.detector import build_detection_model from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer from maskrcnn_benchmark.structures.image_list import to_image_list from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker from maskrcnn_benchmark import layers as L from maskrcnn_benchmark.utils import cv2_util from tensorboardX import SummaryWriter class COCODemo(object): # COCO categories for pretty print CATEGORIES = [ "__background", "person",...(共80+1类) "toothbrush", ] def __init__( self, cfg, confidence_threshold=0.7, show_mask_heatmaps=False, masks_per_dim=2, min_image_size=224, ): self.cfg = cfg.clone() self.model = build_detection_model(cfg) #跳入generalized_rcnn.py文件 print(self.model) self.model.eval() self.device = torch.device(cfg.MODEL.DEVICE) self.model.to(self.device) self.min_image_size = min_image_size save_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=save_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) self.transforms = self.build_transform() #对images的形式转换以及预处理 mask_threshold = -1 if show_mask_heatmaps else 0.5 self.masker = Masker(threshold=mask_threshold, padding=1) # used to make colors for each class gpu_tensor = torch.randn(10,20).cuda(0) # 将tensor放到第一个GPU上 self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1]) self.cpu_device = torch.device("cpu") self.confidence_threshold = confidence_threshold self.show_mask_heatmaps = show_mask_heatmaps self.masks_per_dim = masks_per_dim def build_transform(self): """ Creates a basic transformation that was used to train the models """ cfg = self.cfg # we are loading images with OpenCV, so we don't need to convert them # to BGR, they are already! So all we need to do is to normalize # by 255 if we want to convert to BGR255 format, or flip the channels # if we want it to be in RGB in [0-1] range. if cfg.INPUT.TO_BGR255: to_bgr_transform = T.Lambda(lambda x: x * 255) else: to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) normalize_transform = T.Normalize( mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD ) transform = T.Compose( [ T.ToPILImage(), T.Resize(self.min_image_size), T.ToTensor(), to_bgr_transform, normalize_transform, ] ) return transform def run_on_opencv_image(self, image): """ Arguments: image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ predictions = self.compute_prediction(image) #np.ndarray): an image as returned by OpenCV to tensor top_predictions = self.select_top_predictions(predictions) result = image.copy() if self.show_mask_heatmaps: return self.create_mask_montage(result, top_predictions) result = self.overlay_boxes(result, top_predictions) if self.cfg.MODEL.MASK_ON: result = self.overlay_mask(result, top_predictions) if self.cfg.MODEL.KEYPOINT_ON: result = self.overlay_keypoints(result, top_predictions) result = self.overlay_class_names(result, top_predictions) return result def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image ''' ->输入original_image=[480,640,3],int整型数据; ->经过变换后image=[3,800,1066],数据torch.float32;''' image = self.transforms(original_image) #todo what is the type of image type(x) x.type() #tensor # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY 转换为ImageList,填充,使其可被cfg.DATALOADER.SIZE_DIVISIBILITY 除尽 image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) #0 image_list = image_list.to(self.device) # compute predictions with torch.no_grad():#内部操作不跟踪历史 predictions = self.model(image_list) predictions = [o.to(self.cpu_device) for o in predictions] # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size height, width,channel = original_image.shape[:] print(height, width,channel ) prediction = prediction.resize((width, height)) predictions if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") #todo prediction 的属性 # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) return prediction def select_top_predictions(self, predictions): """ Select only predictions which have a `score` > self.confidence_threshold, and returns the predictions in descending order of score Arguments: predictions (BoxList): the result of the computation by the model. It should contain the field `scores`. Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ scores = predictions.get_field("scores") keep = torch.nonzero(scores > self.confidence_threshold).squeeze(1) predictions = predictions[keep] scores = predictions.get_field("scores") _, idx = scores.sort(0, descending=True) return predictions[idx] def compute_colors_for_labels(self, labels): """ Simple function that adds fixed colors depending on the class """ colors = labels[:, None] * self.palette colors = (colors % 255).numpy().astype("uint8") return colors def overlay_boxes(self, image, predictions): """ Adds the predicted boxes on top of the image Arguments: image (np.ndarray): an image as returned by OpenCV predictions (BoxList): the result of the computation by the model. It should contain the field `labels`. """ labels = predictions.get_field("labels") boxes = predictions.bbox colors = self.compute_colors_for_labels(labels).tolist() for box, color in zip(boxes, colors): box = box.to(torch.int64) top_left, bottom_right = box[:2].tolist(), box[2:].tolist() image = cv2.rectangle( image, tuple(top_left), tuple(bottom_right), tuple(color), 1 ) return image def overlay_mask(self, image, predictions): """ Adds the instances contours for each predicted object. Each label has a different color. Arguments: image (np.ndarray): an image as returned by OpenCV predictions (BoxList): the result of the computation by the model. It should contain the field `mask` and `labels`. """ masks = predictions.get_field("mask").numpy() labels = predictions.get_field("labels") colors = self.compute_colors_for_labels(labels).tolist() for mask, color in zip(masks, colors): thresh = mask[0, :, :, None] contours, hierarchy = cv2_util.findContours( thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) image = cv2.drawContours(image, contours, -1, color, 3) composite = image return composite def overlay_keypoints(self, image, predictions):# todo here keypoints = predictions.get_field("keypoints") kps = keypoints.keypoints scores = keypoints.get_field("logits") kps = torch.cat((kps[:, :, 0:2], scores[:, :, None]), dim=2).numpy() for region in kps: image = vis_keypoints(image, region.transpose((1, 0))) return image def create_mask_montage(self, image, predictions): """ Create a montage showing the probability heatmaps for each one one of the detected objects Arguments: image (np.ndarray): an image as returned by OpenCV predictions (BoxList): the result of the computation by the model. It should contain the field `mask`. """ masks = predictions.get_field("mask") masks_per_dim = self.masks_per_dim masks = L.interpolate( masks.float(), scale_factor=1 / masks_per_dim ).byte() height, width = masks.shape[-2:] max_masks = masks_per_dim ** 2 masks = masks[:max_masks] # handle case where we have less detections than max_masks if len(masks) < max_masks: masks_padded = torch.zeros(max_masks, 1, height, width, dtype=torch.uint8) masks_padded[: len(masks)] = masks masks = masks_padded masks = masks.reshape(masks_per_dim, masks_per_dim, height, width) result = torch.zeros( (masks_per_dim * height, masks_per_dim * width), dtype=torch.uint8 ) for y in range(masks_per_dim): start_y = y * height end_y = (y + 1) * height for x in range(masks_per_dim): start_x = x * width end_x = (x + 1) * width result[start_y:end_y, start_x:end_x] = masks[y, x] return cv2.applyColorMap(result.numpy(), cv2.COLORMAP_JET) def overlay_class_names(self, image, predictions): """ Adds detected class names and scores in the positions defined by the top-left corner of the predicted bounding box Arguments: image (np.ndarray): an image as returned by OpenCV predictions (BoxList): the result of the computation by the model. It should contain the field `scores` and `labels`. """ scores = predictions.get_field("scores").tolist() labels = predictions.get_field("labels").tolist() labels = [self.CATEGORIES[i] for i in labels] boxes = predictions.bbox template = "{}: {:.2f}" for box, score, label in zip(boxes, scores, labels): x, y = box[:2] s = template.format(label, score) cv2.putText( image, s, (x, y), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1 ) return image import numpy as np import matplotlib.pyplot as plt from maskrcnn_benchmark.structures.keypoint import PersonKeypoints # todo here def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):# todo here """Visualizes keypoints (adapted from vis_one_image). kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob). """ dataset_keypoints = PersonKeypoints.NAMES kp_lines = PersonKeypoints.CONNECTIONS # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors] # Perform the drawing on a copy of the image, to allow for blending. kp_mask = np.copy(img) # Draw mid shoulder / mid hip first for better visualization. mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = ( kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) nose_idx = dataset_keypoints.index('nose') if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh: cv2.line( kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]), color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: cv2.line( kp_mask, tuple(mid_shoulder), tuple(mid_hip), color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA) # Draw the keypoints. for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] p1 = kps[0, i1], kps[1, i1] p2 = kps[0, i2], kps[1, i2] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: cv2.line( kp_mask, p1, p2, color=colors[l], thickness=2, lineType=cv2.LINE_AA) if kps[2, i1] > kp_thresh: cv2.circle( kp_mask, p1, radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) if kps[2, i2] > kp_thresh: cv2.circle( kp_mask, p2, radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) # Blend the keypoints. return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)