diff --git a/configs/yolox/hyp.scratch.yaml b/configs/yolox/hyp.scratch.yaml index f3567c15..d60ac33d 100644 --- a/configs/yolox/hyp.scratch.yaml +++ b/configs/yolox/hyp.scratch.yaml @@ -56,7 +56,6 @@ data: ], [ { func_name: letterbox, scaleup: False }, - { func_name: resample_segments }, { func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4 }, { func_name: fliplr, prob: 0.5 }, { func_name: label_norm, xyxy2xywh_: True }, diff --git a/configs/yolox/yolox-nano.yaml b/configs/yolox/yolox-nano.yaml index 8162e109..ec261748 100644 --- a/configs/yolox/yolox-nano.yaml +++ b/configs/yolox/yolox-nano.yaml @@ -64,10 +64,13 @@ data: stage_epochs: [ 285, 15 ], trans_list: [ [ - { func_name: mosaic, prob: 0.5, mosaic9_prob: 0.0, translate: 0.1, scale: 0.5, degrees: 10.0, shear: 2.0 }, + { func_name: mosaic, prob: 0.5, post_transform: [ + { func_name: resample_segments }, + { func_name: random_perspective, degrees: 10.0, translate: 0.1, scale: 0.5, shear: 2.0 },] }, + { func_name: letterbox, scaleup: False, }, { func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4 }, - { func_name: label_norm, xyxy2xywh_: True }, { func_name: fliplr, prob: 0.5 }, + { func_name: label_norm, xyxy2xywh_: True }, { func_name: label_pad, padding_size: 160, padding_value: -1 }, { func_name: image_norm, scale: 255. }, { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }, diff --git a/configs/yolox/yolox-tiny.yaml b/configs/yolox/yolox-tiny.yaml index 162063de..7413584f 100644 --- a/configs/yolox/yolox-tiny.yaml +++ b/configs/yolox/yolox-tiny.yaml @@ -24,10 +24,12 @@ data: stage_epochs: [ 285, 15 ], trans_list: [ [ - { func_name: mosaic, prob: 1.0, mosaic9_prob: 0.0, translate: 0.1, scale: 0.5, degrees: 10.0, shear: 2.0 }, + { func_name: mosaic, prob: 1.0 }, + { func_name: resample_segments }, + { func_name: random_perspective, prob: 1.0, degrees: 10.0, translate: 0.1, scale: 0.5, shear: 2.0 }, { func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4 }, - { func_name: label_norm, xyxy2xywh_: True }, { func_name: fliplr, prob: 0.5 }, + { func_name: label_norm, xyxy2xywh_: True }, { func_name: label_pad, padding_size: 160, padding_value: -1 }, { func_name: image_norm, scale: 255. }, { func_name: image_transpose, bgr2rgb: True, hwc2chw: True }, diff --git a/mindyolo/data/dataset.py b/mindyolo/data/dataset.py index 4026631c..60fba0a1 100644 --- a/mindyolo/data/dataset.py +++ b/mindyolo/data/dataset.py @@ -342,6 +342,7 @@ def mosaic( self, sample, mosaic9_prob=0.0, + post_transform=None, ): segment_format = sample['segment_format'] bbox_format = sample['bbox_format'] @@ -350,9 +351,17 @@ def mosaic( mosaic9_prob = min(1.0, max(mosaic9_prob, 0.0)) if random.random() < (1 - mosaic9_prob): - return self._mosaic4(sample) + sample = self._mosaic4(sample) else: - return self._mosaic9(sample) + sample = self._mosaic9(sample) + + if post_transform: + for _i, ori_trans in enumerate(post_transform): + _trans = ori_trans.copy() + func_name, prob = _trans.pop("func_name"), _trans.pop("prob", 1.0) + sample = getattr(self, func_name)(sample, **_trans) + + return sample def _mosaic4(self, sample): # loads images in a 4-mosaic @@ -936,7 +945,7 @@ def fliplr(self, sample): sample['bboxes'] = bboxes # flip seg - if 'segments' in sample: + if self.return_segments: segment_format, segments = sample['segment_format'], sample['segments'] assert segment_format == 'polygon', \ f'FlipLR: The segment format should be polygon, but got {segment_format}' @@ -959,16 +968,21 @@ def letterbox(self, sample, new_shape=None, xywhn2xyxy_=True, scaleup=False, onl if not new_shape: new_shape = self.img_size + + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + image = sample['img'] + shape = image.shape[:2] # current shape [height, width] + if shape == new_shape: + return sample + bboxes = sample['bboxes'] ori_shape = sample['ori_shape'] - shape = image.shape[:2] # current shape [height, width] h, w = shape[:] h0, w0 = ori_shape hw_scale = np.array([h / h0, w / w0]) - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])