Fix division by zero in class incremental learning for classification (…

…#2606) * Add empty label to reproduce zero-division error Signed-off-by: Songki Choi <songki.choi@intel.com> * Fix minor typo Signed-off-by: Songki Choi <songki.choi@intel.com> * Fix empty label 4 -> 3 Signed-off-by: Songki Choi <songki.choi@intel.com> * Prevent division by zero Signed-off-by: Songki Choi <songki.choi@intel.com> * Update license Signed-off-by: Songki Choi <songki.choi@intel.com> * Update CHANGELOG.md Signed-off-by: Songki Choi <songki.choi@intel.com> * Fix inefficient sampling Signed-off-by: Songki Choi <songki.choi@intel.com> * Revert indexing Signed-off-by: Songki Choi <songki.choi@intel.com> * Fix minor typo Signed-off-by: Songki Choi <songki.choi@intel.com> --------- Signed-off-by: Songki Choi <songki.choi@intel.com>
openvinotoolkit · Nov 7, 2023 · 794a814 · 794a814
1 parent 3ec4c95
commit 794a814
Show file tree

Hide file tree

Showing 7 changed files with 25 additions and 16 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file.
 - Fix IBLoss enablement with DeiT-Tiny when class incremental training (<https://github.com/openvinotoolkit/training_extensions/pull/2595>)
 - Fix mmcls bug not wrapping model in DataParallel on CPUs (<https://github.com/openvinotoolkit/training_extensions/pull/2601>)
 - Fix h-label loss normalization issue w/ exclusive label group of singe label (<https://github.com/openvinotoolkit/training_extensions/pull/2604>)
+- Fix division by zero in class incremental learning for classification (<https://github.com/openvinotoolkit/training_extensions/pull/2606>)
 
 ## \[v1.4.3\]
 

diff --git a/src/otx/algorithms/classification/adapters/mmcls/configurer.py b/src/otx/algorithms/classification/adapters/mmcls/configurer.py
@@ -574,7 +574,7 @@ def _configure_dataloader(cfg):
 CLASS_INC_DATASET = [
     "OTXClsDataset",
     "OTXMultilabelClsDataset",
-    "MPAHierarchicalClsDataset",
+    "OTXHierarchicalClsDataset",
     "ClsTVDataset",
 ]
 WEIGHT_MIX_CLASSIFIER = ["SAMImageClassifier"]

diff --git a/src/otx/algorithms/classification/adapters/mmcls/datasets/otx_datasets.py b/src/otx/algorithms/classification/adapters/mmcls/datasets/otx_datasets.py
@@ -1,6 +1,6 @@
 """Base Dataset for Classification Task."""
 
-# Copyright (C) 2022 Intel Corporation
+# Copyright (C) 2022-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -176,7 +176,10 @@ def class_accuracy(self, results, gt_labels):
         for i in range(self.num_classes):
             cls_pred = pred_label == i
             cls_pred = cls_pred[gt_labels == i]
-            cls_acc = np.sum(cls_pred) / len(cls_pred)
+            if len(cls_pred) > 0:
+                cls_acc = np.sum(cls_pred) / len(cls_pred)
+            else:
+                cls_acc = 0.0
             accracies.append(cls_acc)
         return accracies
 

diff --git a/src/otx/algorithms/classification/adapters/mmcls/models/losses/ib_loss.py b/src/otx/algorithms/classification/adapters/mmcls/models/losses/ib_loss.py
@@ -1,5 +1,5 @@
 """Module for defining IB Loss which alleviate effect of imbalanced dataset."""
-# Copyright (C) 2022 Intel Corporation
+# Copyright (C) 2022-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -48,7 +48,7 @@ def update_weight(self, cls_num_list):
         """Update loss weight per class."""
         if len(cls_num_list) == 0:
             raise ValueError("Cannot compute the IB loss weight with empty cls_num_list.")
-        per_cls_weights = 1.0 / np.array(cls_num_list)
+        per_cls_weights = 1.0 / (np.array(cls_num_list) + self.epsilon)
         per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len(cls_num_list)
         per_cls_weights = torch.FloatTensor(per_cls_weights)
         self.weight.data = per_cls_weights.to(device=self.weight.device)

diff --git a/src/otx/algorithms/classification/task.py b/src/otx/algorithms/classification/task.py
@@ -495,7 +495,7 @@ def _generate_training_metrics(self, learning_curves):  # pylint: disable=argume
         elif self._hierarchical:
             metric_key = "val/MHAcc"
         else:
-            metric_key = "val/accuracy_top-1"
+            metric_key = "val/accuracy (%)"
 
         # Learning curves
         best_acc = -1

diff --git a/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/balanced_sampler.py b/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/balanced_sampler.py
@@ -1,4 +1,7 @@
 """Balanced sampler for imbalanced data."""
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 import math
 
 import numpy as np
@@ -32,24 +35,22 @@ def __init__(self, dataset, batch_size, efficient_mode=True, num_replicas=1, ran
             self.dataset = dataset.dataset
         else:
             self.dataset = dataset
-        self.img_indices = self.dataset.img_indices
+        self.img_indices = {k: v for k, v in self.dataset.img_indices.items() if len(v) > 0}
         self.num_cls = len(self.img_indices.keys())
         self.data_length = len(self.dataset)
         self.num_replicas = num_replicas
         self.rank = rank
         self.drop_last = drop_last
 
+        self.num_trials = int(self.data_length / self.num_cls)
         if efficient_mode:
             # Reduce the # of sampling (sampling data for a single epoch)
-            self.num_tail = min(len(cls_indices) for cls_indices in self.img_indices.values())
-            base = 1 - (1 / self.num_tail)
-            if base == 0:
-                raise ValueError("Required more than one sample per class")
-            self.num_trials = int(math.log(0.001, base))
-            if int(self.data_length / self.num_cls) < self.num_trials:
-                self.num_trials = int(self.data_length / self.num_cls)
-        else:
-            self.num_trials = int(self.data_length / self.num_cls)
+            num_tail = min(len(cls_indices) for cls_indices in self.img_indices.values())
+            if num_tail > 1:
+                base = 1 - (1 / num_tail)
+                num_reduced_trials = int(math.log(0.001, base))
+                self.num_trials = min(num_reduced_trials, self.num_trials)
+
         self.num_samples = self._calculate_num_samples()
 
         logger.info(f"This sampler will select balanced samples {self.num_trials} times")

diff --git a/tests/assets/classification_dataset_class_incremental/3/.gitignore b/tests/assets/classification_dataset_class_incremental/3/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore