boundingbox.py

from __future__ import division

import numbers
import os
import random

import numpy as np
import cv2

import itertools

from commons import minmax
from configs import ADNetConf
from networks import ADNetwork


class Coordinate:
    @staticmethod
    def get_imgwh(img):
        return Coordinate(x=img.shape[1], y=img.shape[0])

    def __init__(self, x, y):
        self.x = int(round(x))
        self.y = int(round(y))

    def __repr__(self):
        return 'x=%d, y=%d' % (self.x, self.y)

    def __add__(self, other):
        if isinstance(other, Coordinate):
            x = self.x + other.x
            y = self.y + other.y
        elif isinstance(other, numbers.Number):
            x = int(round(self.x + other))
            y = int(round(self.y + other))
        elif isinstance(other, tuple) or isinstance(other, list) or isinstance(other, np.ndarray):
            x = int(round(self.x + other[0]))
            y = int(round(self.y + other[1]))
        else:
            raise
        return Coordinate(x, y)

    def __sub__(self, other):
        return self.__add__(other * -1)

    def __mul__(self, other):
        if isinstance(other, numbers.Number):
            return Coordinate(self.x * other, self.y * other)
        elif isinstance(other, tuple):
            return Coordinate(self.x * other[0], self.y * other[1])
        raise

    def __floordiv__(self, other):
        return self.__truediv__(other)

    def __truediv__(self, other):
        if isinstance(other, Coordinate):
            x = self.x // other.x
            y = self.y // other.y
        elif isinstance(other, numbers.Number):
            x = self.x // other
            y = self.y // other
        else:
            raise
        return Coordinate(x, y)

    def __iter__(self):
        yield self.x
        yield self.y

    def __getitem__(self, item):
        if item == 0:
            return self.x
        else:
            return self.y

    def __eq__(self, other):
        if isinstance(other, Coordinate):
            return self.x == other.x and self.y == other.y
        elif isinstance(other, tuple) or isinstance(other, list) or isinstance(other, np.ndarray):
            return self.x == other[0] and self.y == other[1]
        else:
            raise

    def max(self, val):
        self.x = max(self.x, val)
        self.y = max(self.y, val)


class BoundingBox:
    COLOR_GT = (0, 255, 0)
    COLOR_PREDICT = (255, 0, 0)
    COLOR_NEGATIVE = (0, 0, 255)

    @staticmethod
    def read_vid_gt(path):
        if os.path.isdir(path):
            path = os.path.join(path, 'groundtruth_rect.txt')

        with open(path, 'r') as f:
            lines = f.readlines()

        boxes = []
        for line in lines:
            if not line.strip():
                continue
            x, y, w, h = [int(x) for x in line.split(',')]
            box = BoundingBox(x, y, w, h)
            boxes.append(box)
        return boxes

    @staticmethod
    def get_action_labels(samples, gt_box):
        # TODO : vectorize everything
        return [BoundingBox.get_action_label(sample, gt_box) for sample in samples]

    @staticmethod
    def get_action_label(sample, gt_box):
        ious = []
        for i in range(ADNetwork.NUM_ACTIONS):
            moved_box = sample.do_action(imgwh=None, action_idx=i)
            iou = gt_box.iou(moved_box)
            ious.append(iou)

        if ious[ADNetwork.ACTION_IDX_STOP] > ADNetConf.get()['predict']['stop_iou']:
            return ADNetwork.ACTION_IDX_STOP
        if max(ious[:-2]) * 0.99999 <= ious[ADNetwork.ACTION_IDX_STOP]:
            return np.argmax(ious)
            # return random.choice([i for i, x in enumerate(ious) if x >= max(ious)])
        return np.argmax(ious[:-2])
        # return random.choice([i for i, x in enumerate(ious[:-2]) if x >= max(ious[:-2])])

    def __init__(self, x, y, w, h):
        self.xy = Coordinate(x, y)
        self.wh = Coordinate(w, h)
        self.feat = None

    def __repr__(self):
        return 'x=%d, y=%d, w=%d, h=%d' % (self.xy.x, self.xy.y, self.wh.x, self.wh.y)

    def __eq__(self, other):
        return self.xy == other.xy and self.wh == other.wh

    def __add__(self, other):
        if isinstance(other, tuple) or isinstance(other, list) or isinstance(other, np.ndarray):
            xy = self.xy + other[:2]
            wh = self.wh + other[2:]
            return BoundingBox(xy.x, xy.y, wh.x, wh.y)
        elif isinstance(other, BoundingBox):
            xy = self.xy + other.xy
            wh = self.wh + other.wh
            return BoundingBox(xy.x, xy.y, wh.x, wh.y)
        raise

    def __mul__(self, other):
        if isinstance(other, tuple) or isinstance(other, list) or isinstance(other, np.ndarray):
            xy = self.xy * other[:2]
            wh = self.wh * other[2:]
            return BoundingBox(xy.x, xy.y, wh.x, wh.y)
        raise

    def __floordiv__(self, other):
        return self.__truediv__(other)

    def __truediv__(self, other):
        if isinstance(other, numbers.Number):
            xy = self.xy // other
            wh = self.wh // other
            return BoundingBox(xy.x, xy.y, wh.x, wh.y)
        else:
            raise

    def get_xy2(self):
        return self.xy + self.wh

    def fit_image(self, imgwh):
        self.xy.x = max(0, self.xy.x)
        self.xy.y = max(0, self.xy.y)

        self.wh.x = max(10, min(self.wh.x, imgwh.x - 10))
        self.wh.y = max(10, min(self.wh.y, imgwh.y - 10))
        self.wh.x = min(self.wh.x, imgwh.x - self.xy.x)
        self.wh.y = min(self.wh.y, imgwh.y - self.xy.y)

    def draw(self, img, color=(255, 255, 255)):
        """
        draw bounding box on image
        """
        cv2.rectangle(img, tuple(self.xy), tuple(self.get_xy2()), color, 1)

    def iou(self, other):
        # reference : https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
        # determine the (x, y)-coordinates of the intersection rectangle
        if isinstance(other, BoundingBox):
            other_x = other.xy.x
            other_y = other.xy.y
            other_w = other.wh.x
            other_h = other.wh.y
        elif isinstance(other, tuple) or isinstance(other, list) or isinstance(other, np.ndarray):
            other_x, other_y, other_w, other_h = other[:4]
        else:
            raise

        xA = max(self.xy.x, other_x)
        yA = max(self.xy.y, other_y)
        xB = min(self.xy.x + self.wh.x, other_x + other_w)
        yB = min(self.xy.y + self.wh.y, other_y + other_h)

        if xA >= xB or yA >= yB:
            return 0.0

        # compute the area of intersection rectangle
        interArea = (xB - xA) * (yB - yA)

        # compute the area of both the prediction and ground-truth
        # rectangles
        boxAArea = self.wh.x * self.wh.y
        boxBArea = other_w * other_h

        # compute the intersection over union by taking the intersection
        # area and dividing it by the sum of prediction + ground-truth
        # areas - the interesection area
        iou = interArea / float(boxAArea + boxBArea - interArea)

        # return the intersection over union value
        return iou

    def do_action(self, imgwh, action_idx):
        action_ratios = tuple([ADNetConf.get()['action_move'][x] for x in 'xywh'])

        if action_idx < 8:
            deltas_xy = self.wh * action_ratios[:2]
            deltas_xy.max(1)
            actual_deltas = ADNetwork.ACTIONS[action_idx][:2] * (deltas_xy.x, deltas_xy.y)
            moved_xy = self.xy + actual_deltas
            new_box = BoundingBox(moved_xy.x, moved_xy.y, self.wh.x, self.wh.y)
        elif action_idx == 8:
            new_box = BoundingBox(self.xy.x, self.xy.y, self.wh.x, self.wh.y)
        else:
            deltas_wh = self.wh * action_ratios[2:]
            deltas_wh.max(2)
            deltas_wh_scaled = ADNetwork.ACTIONS[action_idx][2:] * (deltas_wh.x, deltas_wh.y)
            moved_xy = self.xy + -1 * deltas_wh_scaled / 2
            moved_wh = self.wh + deltas_wh_scaled

            new_box = BoundingBox(moved_xy.x, moved_xy.y, moved_wh.x, moved_wh.y)

        if imgwh:
            new_box.fit_image(imgwh)
        return new_box

    def gen_noise_samples(self, imgwh, noise_type, num, **kwargs):
        center_xy = self.xy + self.wh * 0.5
        mean_wh = sum(self.wh) / 2.0

        gaussian_translation_f = kwargs.get('gaussian_translation_f', 0.1)
        uniform_translation_f = kwargs.get('uniform_translation_f', 1)
        uniform_scale_f = kwargs.get('uniform_scale_f', 10)

        samples = []
        if noise_type == 'whole':
            grid_x = range(self.wh.x // 2, imgwh.x - self.wh.x // 2, self.wh.x // 5)
            grid_y = range(self.wh.y // 2, imgwh.y - self.wh.y // 2, self.wh.y // 5)
            samples_tmp = []
            for dx, dy, ds in itertools.product(grid_x, grid_y, range(-5, 5, 1)):
                box = BoundingBox(dx, dy, self.wh.x*(1.05**ds), self.wh.y*(1.05**ds))
                box.fit_image(imgwh)
                samples_tmp.append(box)

            for _ in range(num):
                samples.append(random.choice(samples_tmp))
        else:
            for _ in range(num):
                if noise_type == 'gaussian':
                    dx = gaussian_translation_f * mean_wh * minmax(0.5 * random.normalvariate(0, 1), -1, 1)
                    dy = gaussian_translation_f * mean_wh * minmax(0.5 * random.normalvariate(0, 1), -1, 1)
                    dwh = 1.05 ** (3 * minmax(0.5 * random.normalvariate(0, 1), -1, 1))
                elif noise_type == 'uniform':
                    dx = uniform_translation_f * mean_wh * random.uniform(-1.0, 1.0)
                    dy = uniform_translation_f * mean_wh * random.uniform(-1.0, 1.0)
                    dwh = 1.05 ** (uniform_scale_f * random.uniform(-1.0, 1.0))
                else:
                    raise
                new_cxy = center_xy + (dx, dy)
                new_wh = self.wh * dwh
                box = BoundingBox(new_cxy.x - new_wh.x / 2.0, new_cxy.y - new_wh.y / 2.0, new_wh.x, new_wh.y)
                box.fit_image(imgwh)
                samples.append(box)

        return samples

    def get_posneg_samples(self, imgwh, pos_size, neg_size, use_whole=True, **kwargs):
        pos_thresh = kwargs.get('pos_thresh', ADNetConf.g()['initial_finetune']['pos_thresh'])
        neg_thresh = kwargs.get('neg_thresh', ADNetConf.g()['initial_finetune']['neg_thresh'])

        gaussian_samples = self.gen_noise_samples(imgwh, 'gaussian', pos_size * 2, kwargs=kwargs)
        gaussian_samples = [x for x in gaussian_samples if x.iou(self) > pos_thresh]

        uniform_samples = self.gen_noise_samples(imgwh, 'uniform', neg_size if use_whole else neg_size*2, kwargs=kwargs)
        uniform_samples = [x for x in uniform_samples if x.iou(self) < neg_thresh]

        if use_whole:
            whole_samples = self.gen_noise_samples(imgwh, 'whole', neg_size, kwargs=kwargs)
            whole_samples = [x for x in whole_samples if x.iou(self) < neg_thresh]
        else:
            whole_samples = []

        pos_samples = []
        for _ in range(pos_size):
            pos_samples.append(random.choice(gaussian_samples))

        neg_candidates = uniform_samples + whole_samples
        neg_samples = []
        for _ in range(neg_size):
            neg_samples.append(random.choice(neg_candidates))
        return pos_samples, neg_samples


if __name__ == '__main__':
    ADNetConf.get('./conf/large.yaml')

    # iou test
    box_a = BoundingBox(0, 0, 100, 100)
    box_b = BoundingBox(0, 0, 50, 10)
    assert box_a.iou(box_b) == 0.05

    box_a = BoundingBox(0, 0, 10, 10)
    box_b = BoundingBox(5, 7, 7, 10)
    assert 0.096 < box_a.iou(box_b) < 0.097

    # random generator test
    gt_box = BoundingBox.read_vid_gt('./data/freeman1/')[0]
    gt_box.wh.x = gt_box.wh.y = 30

    imgpath = os.path.join('./data/freeman1/', 'img', '0001.jpg')
    img = cv2.imread(imgpath)

    if False:
        for random_type in ['gaussian', 'uniform', 'whole']:
            gaussian_boxes = gt_box.gen_noise_samples(Coordinate.get_imgwh(img), random_type, 20)

            gt_box.draw(img, BoundingBox.COLOR_GT)
            for box in gaussian_boxes:
                box.draw(img, BoundingBox.COLOR_PREDICT)

            cv2.imshow(random_type, img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    # pos-neg sample test
    pos, neg = gt_box.get_posneg_samples(Coordinate.get_imgwh(img), 1, 10)
    img = cv2.imread(imgpath)
    for box in pos:
        box.draw(img, BoundingBox.COLOR_PREDICT)
    # for box in neg:
    #     box.draw(img, BoundingBox.COLOR_NEGATIVE)
    gt_box.draw(img, BoundingBox.COLOR_GT)
    actions = BoundingBox.get_action_labels(pos, gt_box)
    cv2.imshow('posneg samples', img)
    cv2.waitKey(10)
    cv2.destroyAllWindows()