visual.py

# ------------------------------------------------------------------------
# Copyright (c) 2021 megvii-model. All Rights Reserved.
# ------------------------------------------------------------------------

import os
import numpy as np
import random
import argparse
import torch
import torchvision.transforms.functional as F
import cv2
from tqdm import tqdm
from pathlib import Path
from PIL import Image, ImageDraw
from models import build_model
from util.tool import load_model
from util.plot_utils import COCO_CATEGORIES
from main import get_args_parser
from torch.nn.functional import interpolate
from typing import List
import motmetrics as mm
import shutil
import json
import pycocotools.mask as mask_util
from detectron2.structures import Instances
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.layers import paste_masks_in_image
from detectron2.utils.memory import retry_if_cuda_oom


np.random.seed(2020)
COLORS_10 = [(144, 238, 144), (178, 34, 34), (221, 160, 221), (0, 255, 0), (0, 128, 0), (210, 105, 30), (220, 20, 60),
             (192, 192, 192), (255, 228, 196), (50, 205, 50), (139, 0, 139), (100, 149, 237), (138, 43, 226),
             (238, 130, 238),
             (255, 0, 255), (0, 100, 0), (127, 255, 0), (255, 0, 255), (0, 0, 205), (255, 140, 0), (255, 239, 213),
             (199, 21, 133), (124, 252, 0), (147, 112, 219), (106, 90, 205), (176, 196, 222), (65, 105, 225),
             (173, 255, 47),
             (255, 20, 147), (219, 112, 147), (186, 85, 211), (199, 21, 133), (148, 0, 211), (255, 99, 71),
             (144, 238, 144),
             (255, 255, 0), (230, 230, 250), (0, 0, 255), (128, 128, 0), (189, 183, 107), (255, 255, 224),
             (128, 128, 128),
             (105, 105, 105), (64, 224, 208), (205, 133, 63), (0, 128, 128), (72, 209, 204), (139, 69, 19),
             (255, 245, 238),
             (250, 240, 230), (152, 251, 152), (0, 255, 255), (135, 206, 235), (0, 191, 255), (176, 224, 230),
             (0, 250, 154),
             (245, 255, 250), (240, 230, 140), (245, 222, 179), (0, 139, 139), (143, 188, 143), (255, 0, 0),
             (240, 128, 128),
             (102, 205, 170), (60, 179, 113), (46, 139, 87), (165, 42, 42), (178, 34, 34), (175, 238, 238),
             (255, 248, 220),
             (218, 165, 32), (255, 250, 240), (253, 245, 230), (244, 164, 96), (210, 105, 30)]


def plot_one_box(x, img, color=None, label=None, score=None, line_thickness=None, mask=None):
    tl = 1
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    # cv2.rectangle(img, c1, c2, color, thickness=tl)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1)  # filled
        cv2.putText(img,
                    label, (c1[0], c1[1] - 2),
                    0,
                    tl / 3, [225, 255, 255],
                    thickness=tf,
                    lineType=cv2.LINE_AA)
        if score is not None:
            cv2.putText(img, score, (c1[0], c1[1] + 30), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    # print("c1c2 = {} {}".format(c1, c2))
    if mask is not None:
        v = Visualizer(img, scale=1)
        vis_mask = v.draw_binary_mask(mask[0].cpu().numpy(), color=None, edge_color=None, text=None)
        img = vis_mask.get_image()
    return img


def draw_bboxes(ori_img, bbox, mask=None, offset=(0, 0), cvt_color=False):
    img = ori_img
    for i, box in enumerate(bbox):
        if mask is not None and mask.shape[0] > 0:
            m = mask[i]
        else:
            m = None
        x1, y1, x2, y2 = [int(i) for i in box[:4]]
        x1 += offset[0]
        x2 += offset[0]
        y1 += offset[1]
        y2 += offset[1]
        if len(box) > 4:
            score = '{:.2f}'.format(box[4])
            label = int(box[5])
        else:
            score = None
            label = None
        # box text and bar
        color = COCO_CATEGORIES[label-1]['color']
        class_name = COCO_CATEGORIES[label-1]['name']
        label_str = '{}@{}'.format(class_name, score)
        # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
        # img = plot_one_box([x1, y1, x2, y2], img, color, label, score=score, mask=m)
        # img = plot_one_box([x1, y1, x2, y2], img, color, label_str, score=None, mask=m)
        img = plot_one_box([x1, y1, x2, y2], img, color, None, score=None, mask=m)
    return img


def draw_points(img: np.ndarray, points: np.ndarray, color=(255, 255, 255)) -> np.ndarray:
    assert len(points.shape) == 2 and points.shape[1] == 2, 'invalid points shape: {}'.format(points.shape)
    for i, (x, y) in enumerate(points):
        if i >= 300:
            color = (0, 255, 0)
        cv2.circle(img, (int(x), int(y)), 2, color=color, thickness=2)
    return img


def tensor_to_numpy(tensor: torch.Tensor) -> np.ndarray:
    return tensor.detach().cpu().numpy()


class Detector(object):
    def __init__(self, args, model=None, postprocessors=None, seq_num=2, img_dir=None):

        self.args = args
        self.detr = model
        self.postprocessors = postprocessors
        self.img_dir = img_dir

        self.file_name = seq_num['file_name']
        self.id = seq_num['id']
        self.img_height = 800
        self.img_width = 1333
        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]

        self.save_path = os.path.join(self.args.output_dir, 'results')
        os.makedirs(self.save_path, exist_ok=True)

    def init_img(self, img):
        ori_img = img.copy()
        self.seq_h, self.seq_w = img.shape[:2]
        scale = self.img_height / min(self.seq_h, self.seq_w)
        if max(self.seq_h, self.seq_w) * scale > self.img_width:
            scale = self.img_width / max(self.seq_h, self.seq_w)
        target_h = int(self.seq_h * scale)
        target_w = int(self.seq_w * scale)
        img = cv2.resize(img, (target_w, target_h))
        img = F.normalize(F.to_tensor(img), self.mean, self.std)
        img = img.unsqueeze(0)
        return img, ori_img

    @staticmethod
    def filter_dt_by_score(dt_instances: Instances, prob_threshold: float) -> Instances:
        keep = dt_instances.scores > prob_threshold
        return dt_instances[keep]

    @staticmethod
    def visualize_img_with_bbox(img_path, img, dt_instances: Instances, ref_pts=None, gt_boxes=None):
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        if dt_instances.has('scores'):
            if dt_instances.has('masks'):
                img_show = draw_bboxes(img, np.concatenate(
                    [dt_instances.boxes, dt_instances.scores.reshape(-1, 1), dt_instances.labels.reshape(-1, 1)],
                    axis=-1), dt_instances.masks)
            else:
                img_show = draw_bboxes(img, np.concatenate(
                    [dt_instances.boxes, dt_instances.scores.reshape(-1, 1), dt_instances.labels.reshape(-1, 1)],
                    axis=-1))
        else:
            img_show = draw_bboxes(img, dt_instances.boxes)
        if ref_pts is not None:
            img_show = draw_points(img_show, ref_pts)
        cv2.imwrite(img_path, img_show)

    def detect(self, prob_threshold=0.4, vis=True):

        img = cv2.imread(os.path.join(self.img_dir, self.file_name))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        cur_img, ori_img = self.init_img(img)
        outputs = self.detr(cur_img.cuda().float())
        orig_target_sizes = torch.stack([torch.tensor([self.seq_h, self.seq_w]).cuda()])
        results = self.postprocessors['bbox'](outputs, target_sizes=orig_target_sizes)

        if 'segm' in postprocessors.keys():
            target_sizes = torch.stack([torch.tensor([self.img_height, self.img_width]).cuda()])
            results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes)

        dt_instances = Instances((1, 1))
        dt_instances.boxes = results[0]['boxes']
        dt_instances.scores = results[0]['scores']
        dt_instances.labels = results[0]['labels']
        dt_instances.masks = results[0]['masks']
        dt_instances = dt_instances.to(torch.device('cpu'))
        dt_instances = self.filter_dt_by_score(dt_instances, prob_threshold)

        if vis:
            cur_vis_img_path = os.path.join(self.save_path, self.file_name)
            self.visualize_img_with_bbox(cur_vis_img_path, ori_img, dt_instances, ref_pts=None)
        return dt_instances

if __name__ == '__main__':

    parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
    args = parser.parse_args()
    if args.output_dir:
        Path(args.output_dir).mkdir(parents=True, exist_ok=True)

    # load model and weights
    detr, _, postprocessors = build_model(args)
    checkpoint = torch.load(args.resume, map_location='cpu')
    detr = load_model(detr, args.resume)
    detr.eval()
    detr = detr.cuda()

    ann_path = './data/coco/annotations/instances_val2017.json'
    img_dir = './data/coco/val2017'

    annos = json.load(open(ann_path, 'r'))
    images = annos['images'][1000:2000]

    for seq_num in tqdm(images):
        det = Detector(args, model=detr, postprocessors=postprocessors, seq_num=seq_num, img_dir=img_dir)
        det.detect(vis=True)