Skip to content

Commit

Permalink
Fix division by zero in class incremental learning for classification (
Browse files Browse the repository at this point in the history
…#2606)

* Add empty label to reproduce zero-division error

Signed-off-by: Songki Choi <songki.choi@intel.com>

* Fix minor typo

Signed-off-by: Songki Choi <songki.choi@intel.com>

* Fix empty label 4 -> 3

Signed-off-by: Songki Choi <songki.choi@intel.com>

* Prevent division by zero

Signed-off-by: Songki Choi <songki.choi@intel.com>

* Update license

Signed-off-by: Songki Choi <songki.choi@intel.com>

* Update CHANGELOG.md

Signed-off-by: Songki Choi <songki.choi@intel.com>

* Fix inefficient sampling

Signed-off-by: Songki Choi <songki.choi@intel.com>

* Revert indexing

Signed-off-by: Songki Choi <songki.choi@intel.com>

* Fix minor typo

Signed-off-by: Songki Choi <songki.choi@intel.com>

---------

Signed-off-by: Songki Choi <songki.choi@intel.com>
  • Loading branch information
goodsong81 authored Nov 7, 2023
1 parent 3ec4c95 commit 794a814
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file.
- Fix IBLoss enablement with DeiT-Tiny when class incremental training (<https://github.com/openvinotoolkit/training_extensions/pull/2595>)
- Fix mmcls bug not wrapping model in DataParallel on CPUs (<https://github.com/openvinotoolkit/training_extensions/pull/2601>)
- Fix h-label loss normalization issue w/ exclusive label group of singe label (<https://github.com/openvinotoolkit/training_extensions/pull/2604>)
- Fix division by zero in class incremental learning for classification (<https://github.com/openvinotoolkit/training_extensions/pull/2606>)

## \[v1.4.3\]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def _configure_dataloader(cfg):
CLASS_INC_DATASET = [
"OTXClsDataset",
"OTXMultilabelClsDataset",
"MPAHierarchicalClsDataset",
"OTXHierarchicalClsDataset",
"ClsTVDataset",
]
WEIGHT_MIX_CLASSIFIER = ["SAMImageClassifier"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Base Dataset for Classification Task."""

# Copyright (C) 2022 Intel Corporation
# Copyright (C) 2022-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#

Expand Down Expand Up @@ -176,7 +176,10 @@ def class_accuracy(self, results, gt_labels):
for i in range(self.num_classes):
cls_pred = pred_label == i
cls_pred = cls_pred[gt_labels == i]
cls_acc = np.sum(cls_pred) / len(cls_pred)
if len(cls_pred) > 0:
cls_acc = np.sum(cls_pred) / len(cls_pred)
else:
cls_acc = 0.0
accracies.append(cls_acc)
return accracies

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Module for defining IB Loss which alleviate effect of imbalanced dataset."""
# Copyright (C) 2022 Intel Corporation
# Copyright (C) 2022-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#

Expand Down Expand Up @@ -48,7 +48,7 @@ def update_weight(self, cls_num_list):
"""Update loss weight per class."""
if len(cls_num_list) == 0:
raise ValueError("Cannot compute the IB loss weight with empty cls_num_list.")
per_cls_weights = 1.0 / np.array(cls_num_list)
per_cls_weights = 1.0 / (np.array(cls_num_list) + self.epsilon)
per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len(cls_num_list)
per_cls_weights = torch.FloatTensor(per_cls_weights)
self.weight.data = per_cls_weights.to(device=self.weight.device)
Expand Down
2 changes: 1 addition & 1 deletion src/otx/algorithms/classification/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ def _generate_training_metrics(self, learning_curves): # pylint: disable=argume
elif self._hierarchical:
metric_key = "val/MHAcc"
else:
metric_key = "val/accuracy_top-1"
metric_key = "val/accuracy (%)"

# Learning curves
best_acc = -1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""Balanced sampler for imbalanced data."""
# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import math

import numpy as np
Expand Down Expand Up @@ -32,24 +35,22 @@ def __init__(self, dataset, batch_size, efficient_mode=True, num_replicas=1, ran
self.dataset = dataset.dataset
else:
self.dataset = dataset
self.img_indices = self.dataset.img_indices
self.img_indices = {k: v for k, v in self.dataset.img_indices.items() if len(v) > 0}
self.num_cls = len(self.img_indices.keys())
self.data_length = len(self.dataset)
self.num_replicas = num_replicas
self.rank = rank
self.drop_last = drop_last

self.num_trials = int(self.data_length / self.num_cls)
if efficient_mode:
# Reduce the # of sampling (sampling data for a single epoch)
self.num_tail = min(len(cls_indices) for cls_indices in self.img_indices.values())
base = 1 - (1 / self.num_tail)
if base == 0:
raise ValueError("Required more than one sample per class")
self.num_trials = int(math.log(0.001, base))
if int(self.data_length / self.num_cls) < self.num_trials:
self.num_trials = int(self.data_length / self.num_cls)
else:
self.num_trials = int(self.data_length / self.num_cls)
num_tail = min(len(cls_indices) for cls_indices in self.img_indices.values())
if num_tail > 1:
base = 1 - (1 / num_tail)
num_reduced_trials = int(math.log(0.001, base))
self.num_trials = min(num_reduced_trials, self.num_trials)

self.num_samples = self._calculate_num_samples()

logger.info(f"This sampler will select balanced samples {self.num_trials} times")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

0 comments on commit 794a814

Please sign in to comment.