From 5d220ccc00ea2b90cb6d34828c05a581974e2c05 Mon Sep 17 00:00:00 2001 From: Yunchu Lee Date: Wed, 22 May 2024 16:09:39 +0900 Subject: [PATCH] add augmentation detail page to docs --- .../additional_features/adaptive_training.rst | 2 +- .../augmentations_per_model.rst | 70 +++++++++++++++++++ .../explanation/additional_features/index.rst | 1 + .../visual_prompting/fine_tuning.rst | 2 +- .../algorithms/visual_prompting/index.rst | 2 +- .../algorithms/visual_prompting/zero_shot.rst | 2 +- 6 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 docs/source/guide/explanation/additional_features/augmentations_per_model.rst diff --git a/docs/source/guide/explanation/additional_features/adaptive_training.rst b/docs/source/guide/explanation/additional_features/adaptive_training.rst index c0e989d722c..dcddaeafb31 100644 --- a/docs/source/guide/explanation/additional_features/adaptive_training.rst +++ b/docs/source/guide/explanation/additional_features/adaptive_training.rst @@ -1,5 +1,5 @@ Adaptive Training -================== +================= Adaptive-training focuses to adjust the number of iterations or interval for the validation to achieve the fast training. In the small data regime, we don't need to validate the model at every epoch since there are a few iterations at a single epoch. diff --git a/docs/source/guide/explanation/additional_features/augmentations_per_model.rst b/docs/source/guide/explanation/additional_features/augmentations_per_model.rst new file mode 100644 index 00000000000..207e25eae3e --- /dev/null +++ b/docs/source/guide/explanation/additional_features/augmentations_per_model.rst @@ -0,0 +1,70 @@ +Augmentations per model +======================= + +Following table shows details of augmentations that used for each model. + ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +| Task | Model | Train | Val | Test | ++=============================+===========================+=====================================================================================+=============================================+=============================================+ +|| Multi Class Classification || Efficientnet-B0 || - RandomResizedCrop (size=224) || - Resize (size=224) || - Resize (size=224) | +|| Multi Label Classification || Efficientnet-V2-S || - RandomFlip (flip_prob=0.5, direction="horizontal") || - Normalize || - Normalize | +|| H-Label Classification || MV3-Large || - Normalize || || | +|| || DeiT || || || | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| Detection || Yolox_l || - Mosaic (img_scale=640, pad_val=114.0) || - MultiScaleFlipAug (img_scale=(640, 640)) || - MultiScaleFlipAug (img_scale=(640, 640)) | +|| || Yolox_s || - RandomAffine || - Resize || - Resize | +|| || || - MixUp (img_scale=640, ratio_range=(0.8, 1.6), pad_val=114.0) || - RandomFlip (flip_prob=0.5) || - RandomFlip (flip_prob=0.5) | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| || Yolox_x || - YOLOXHSVRandomAug || - Pad (size_divisor=32) || - Pad (size_divisor=32) | +|| || || - RandomFlip (flip_prob=0.5) || - Normalize || - Normalize | +|| || || - Resize (img_scale=640) || || | +|| || || - Pad || || | +|| || || - Normalize || || | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| || Yolox_tiny || - Mosaic (img_scale=640, pad_val=114.0) || - Resize (img_scale=(416, 416)) || - MultiScaleFlipAug (img_scale=(416, 416)) | +|| || || - RandomAffine || - MultiScaleFlipAug (img_scale=(416, 416)) || - Resize | +|| || || - PhotoMetricDistortion || - RandomFlip || - RandomFlip | +|| || || - RandomFlip (flip_prob=0.5) || - Pad || - Pad | +|| || || - Resize (img_scale=640) || - Normalize || - Normalize | +|| || || - Pad || || | +|| || || - Normalize || || | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| || Mobilenetv2_atss || - MinIoURandomCrop || - Resize (img_scale=(992, 736)) || - Resize (img_scale=(992, 736)) | +|| || Resnext101_atss || - Resize (img_scale=[(992, 736), (896, 736), (1088, 736), (992, 672), (992, 800)]) || - MultiScaleFlipAug (img_scale=(992, 736)) || - MultiScaleFlipAug (img_scale=(992, 736)) | +|| || || - RandomFlip (flip_prob=0.5) || - RandomFlip || - RandomFlip | +|| || || - Normalize || - Normalize || - Normalize | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| || Mobilenetv2_ssd || - PhotoMetricDistortion || - Resize (img_scale=(864, 864)) || - MultiScaleFlipAug (img_scale=(864, 864)) | +|| || || - MinIoURandomCrop || - MultiScaleFlipAug (img_scale=(864, 864)) || - Resize | +|| || || - Resize (img_scale=(864, 864)) || - Normalize || - Normalize | +|| || || - Normalize || || | +|| || || - RandomFlip (flip_prob=0.5) || || | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| || Resnet50_Detr || - RandomFlip (flip_prob=0.5) || - MultiScaleFlipAug (img_scale=(1333, 800) || - MultiScaleFlipAug (img_scale=(1333, 800) | +|| || Resnet50_dino || - AutoAugment || - Resize || - Resize | +|| || || - Resize || - RandomFlip || - RandomFlip | +|| || || - RandomCrop || - Normalize || - Normalize | +|| || || - Resize || - Pad (size_divisor=32) || - Pad (size_divisor=32) | +|| || || - Normalize || || | +|| || || - Pad (size_divisor=1) || || | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| Instance-segmentation || Convnext_maskrcnn || - Resize (img_scale=1024) || - Resize (img_scale=1024) || - MultiScaleFlipAug (img_scale=1024) | +|| || Efficientnetb2b_maskrcnn || - RandomFlip (flip_prob=0.5) || - MultiScaleFlipAug || - Resize | +|| || Resnet50_maskrcnn || - Normalize || - RandomFlip (flip_prob=0.5) || - RandomFlip (flip_prob=0.5) | +|| || || - Pad (size_divisor=32) || - Normalize || - Normalize | +|| || || || - Pad (size_divisor=32) || - Pad (size_divisor=32) | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| || Maskrcnn_swin_t || - Resize (img_scale=1344) || - Resize (img_scale=1344) || - Resize (img_scale=1344) | +|| || || - RandomFlip (flip_prob=0.5) || - MultiScaleFlipAug || - MultiScaleFlipAug | +|| || || - Normalize || - RandomFlip (flip_prob=0.5) || - RandomFlip (flip_prob=0.5) | +|| || || - Pad (size_divisor=32) || - Normalize || - Normalize | +|| || || - Pad (size_divisor=32) || - Pad (size_divisor=32) || | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ +|| Segmentation || Segnext_b || - Resize (img_scale=544) || - Resize (img_scale=544) || - Resize (img_scale=544) | +|| || Segnext_s || - RandomCrop (crop_size=512, cat_max_ratio=0.75) || - MultiScaleFlipAug || - MultiScaleFlipAug | +|| || Segnext_t || - RandomFlip (flip_prob=0.5, direction="horizontal") || - RandomFlip || - RandomFlip | +|| || Lite_hrnet_18 || - Normalize || - Normalize || - Normalize | +|| || Lite_hrnet_18_mod2 || - Pad (size=512, pad_val=0, seg_pad_val=255) || || | +|| || Lite_hrnet_s_mod2 || || || | +|| || Lite_hrnet_x_mod3 || || || | ++-----------------------------+---------------------------+-------------------------------------------------------------------------------------+---------------------------------------------+---------------------------------------------+ diff --git a/docs/source/guide/explanation/additional_features/index.rst b/docs/source/guide/explanation/additional_features/index.rst index 0551f916191..3100e11fab5 100644 --- a/docs/source/guide/explanation/additional_features/index.rst +++ b/docs/source/guide/explanation/additional_features/index.rst @@ -15,3 +15,4 @@ Additional Features fast_data_loading tiling config_input_size + augmentations_per_model diff --git a/docs/source/guide/explanation/algorithms/visual_prompting/fine_tuning.rst b/docs/source/guide/explanation/algorithms/visual_prompting/fine_tuning.rst index 67bc77ff4f8..c30a9a2e31c 100644 --- a/docs/source/guide/explanation/algorithms/visual_prompting/fine_tuning.rst +++ b/docs/source/guide/explanation/algorithms/visual_prompting/fine_tuning.rst @@ -1,5 +1,5 @@ Visual Prompting (Fine-tuning) -================= +============================== Visual prompting is a computer vision task that uses a combination of an image and prompts, such as texts, bounding boxes, points, and so on to troubleshoot problems. Using these useful prompts, the main purpose of this task is to obtain labels from unlabeled datasets, and to use generated label information on particular domains or to develop a new model with the generated information. diff --git a/docs/source/guide/explanation/algorithms/visual_prompting/index.rst b/docs/source/guide/explanation/algorithms/visual_prompting/index.rst index c9d6abac31b..8910b1101b5 100644 --- a/docs/source/guide/explanation/algorithms/visual_prompting/index.rst +++ b/docs/source/guide/explanation/algorithms/visual_prompting/index.rst @@ -1,5 +1,5 @@ Visual Prompting -============ +================ .. toctree:: :maxdepth: 1 diff --git a/docs/source/guide/explanation/algorithms/visual_prompting/zero_shot.rst b/docs/source/guide/explanation/algorithms/visual_prompting/zero_shot.rst index 4923a2b73c6..f3a0679e412 100644 --- a/docs/source/guide/explanation/algorithms/visual_prompting/zero_shot.rst +++ b/docs/source/guide/explanation/algorithms/visual_prompting/zero_shot.rst @@ -1,5 +1,5 @@ Visual Prompting (Zero-shot learning) -================= +===================================== Visual prompting is a computer vision task that uses a combination of an image and prompts, such as texts, bounding boxes, points, and so on to troubleshoot problems. Using these useful prompts, the main purpose of this task is to obtain labels from unlabeled datasets, and to use generated label information on particular domains or to develop a new model with the generated information.