From e780ccabf628ee583b3deec497938106e0e130da Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Tue, 18 Jul 2023 17:43:23 +0900 Subject: [PATCH] Fix F1 auto-threshold to choose best largest confidence (#2371) * Fix F1 auto-threshold to choose best largest confidence * Update license notice * Update change log --------- Signed-off-by: Songki Choi --- CHANGELOG.md | 18 +++++++++++++++ src/otx/api/usecases/evaluation/f_measure.py | 8 +++---- .../api/usecases/evaluation/test_f_measure.py | 22 ++++++------------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2e0c9cec0c..9fa64026755 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,24 @@ All notable changes to this project will be documented in this file. +## \[v1.5.0\] + +### New features + +- + +### Enhancements + +- + +### Bug fixes + +- Fix F1 auto-threshold to choose best largest confidence () + +### Known issues + +- OpenVINO(==2023.0) IR inference is not working well on 2-stage models (e.g. Mask-RCNN) exported from torch==1.13.1 + ## \[v1.4.0\] ### New features diff --git a/src/otx/api/usecases/evaluation/f_measure.py b/src/otx/api/usecases/evaluation/f_measure.py index 7de0e08fb08..b8f07522020 100644 --- a/src/otx/api/usecases/evaluation/f_measure.py +++ b/src/otx/api/usecases/evaluation/f_measure.py @@ -1,10 +1,8 @@ """This module contains the f-measure performance provider class.""" - -# Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2021-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # - import logging from typing import Dict, List, Optional, Tuple @@ -363,7 +361,7 @@ def get_results_per_confidence( result.f_measure_curve[class_name].append(result_point[class_name].f_measure) result.precision_curve[class_name].append(result_point[class_name].precision) result.recall_curve[class_name].append(result_point[class_name].recall) - if all_classes_f_measure > result.best_f_measure: + if all_classes_f_measure > 0.0 and all_classes_f_measure >= result.best_f_measure: result.best_f_measure = all_classes_f_measure result.best_threshold = confidence_threshold return result @@ -417,7 +415,7 @@ def get_results_per_nms( result.precision_curve[class_name].append(result_point[class_name].precision) result.recall_curve[class_name].append(result_point[class_name].recall) - if all_classes_f_measure >= result.best_f_measure: + if all_classes_f_measure > 0.0 and all_classes_f_measure >= result.best_f_measure: result.best_f_measure = all_classes_f_measure result.best_threshold = nms_threshold return result diff --git a/tests/unit/api/usecases/evaluation/test_f_measure.py b/tests/unit/api/usecases/evaluation/test_f_measure.py index 8ee9deb2f31..20f3e5bc775 100644 --- a/tests/unit/api/usecases/evaluation/test_f_measure.py +++ b/tests/unit/api/usecases/evaluation/test_f_measure.py @@ -1,16 +1,6 @@ -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. import datetime from typing import cast @@ -962,7 +952,7 @@ def test_f_measure_calculator_get_results_per_confidence(self): # Check "_AggregatedResults" object returned by "get_results_per_confidence" when All Classes f-measure is more # than best f-measure in results_per_confidence expected_results_per_confidence = _AggregatedResults(["class_1", "class_2"]) - for confidence_threshold in np.arange(*[0.6, 0.9]): + for confidence_threshold in np.arange(*[0.6, 0.9, 0.1]): result_point = f_measure_calculator.evaluate_classes( classes=["class_1", "class_2"], iou_threshold=0.7, @@ -978,7 +968,7 @@ def test_f_measure_calculator_get_results_per_confidence(self): actual_results_per_confidence = f_measure_calculator.get_results_per_confidence( classes=["class_1", "class_2"], - confidence_range=[0.6, 0.9], + confidence_range=[0.6, 0.9, 0.1], # arrange(0.6, 0.9, 0.1) iou_threshold=0.7, ) assert actual_results_per_confidence.all_classes_f_measure_curve == ( @@ -987,7 +977,9 @@ def test_f_measure_calculator_get_results_per_confidence(self): assert actual_results_per_confidence.f_measure_curve == expected_results_per_confidence.f_measure_curve assert actual_results_per_confidence.recall_curve == expected_results_per_confidence.recall_curve assert actual_results_per_confidence.best_f_measure == 0.5454545454545453 - assert actual_results_per_confidence.best_threshold == 0.6 + # 0.6 -> 0.54, 0.7 -> 0.54, 0.8 -> 0.54, 0.9 -> 0.44 + # Best ""LARGEST" trehshold should be 0.8 (considering numerical error) + assert abs(actual_results_per_confidence.best_threshold - 0.8) < 0.001 # Check "_AggregatedResults" object returned by "get_results_per_confidence" when All Classes f-measure is less # than best f-measure in results_per_confidence actual_results_per_confidence = f_measure_calculator.get_results_per_confidence(