-
Notifications
You must be signed in to change notification settings - Fork 446
/
Copy pathaccuracy.py
335 lines (275 loc) · 13.3 KB
/
accuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
"""This module contains the implementation of Accuracy performance provider."""
# Copyright (C) 2021-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
import copy
import logging
from typing import List, Set, Tuple
import numpy as np
from sklearn.metrics import confusion_matrix as sklearn_confusion_matrix
from otx.api.entities.dataset_item import DatasetItemEntity
from otx.api.entities.datasets import DatasetEntity
from otx.api.entities.label import LabelEntity
from otx.api.entities.label_schema import LabelGroup
from otx.api.entities.metrics import (
BarChartInfo,
BarMetricsGroup,
ColorPalette,
MatrixChartInfo,
MatrixMetric,
MatrixMetricsGroup,
MetricsGroup,
Performance,
ScoreMetric,
)
from otx.api.entities.resultset import ResultSetEntity
from otx.api.usecases.evaluation.averaging import MetricAverageMethod
from otx.api.usecases.evaluation.basic_operations import (
precision_per_class,
recall_per_class,
)
from otx.api.usecases.evaluation.performance_provider_interface import (
IPerformanceProvider,
)
logger = logging.getLogger(__name__)
class Accuracy(IPerformanceProvider):
"""This class is responsible for providing Accuracy measures; mainly for Classification problems.
The calculation both supports multi label and binary label predictions.
Accuracy is the proportion of the predicted correct labels, to the total number (predicted and actual)
labels for that instance. Overall accuracy is the average across all instances.
Args:
resultset (ResultSetEntity): ResultSet that score will be computed for
average (MetricAverageMethod, optional): The averaging method, either MICRO or MACRO
MICRO: compute average over all predictions in all label groups
MACRO: compute accuracy per label group, return the average of the per-label-group accuracy scores
"""
def __init__(
self,
resultset: ResultSetEntity,
average: MetricAverageMethod = MetricAverageMethod.MICRO,
):
self._unnormalized_matrices: List[MatrixMetric] = compute_unnormalized_confusion_matrices_from_resultset(
resultset
)
# accuracy computation
mean_accuracy = self._compute_accuracy(average=average, confusion_matrices=self._unnormalized_matrices)
self._accuracy = ScoreMetric(value=mean_accuracy, name="Accuracy")
@property
def accuracy(self) -> ScoreMetric:
"""Returns the accuracy as ScoreMetric."""
return self._accuracy
def get_performance(self) -> Performance:
"""Returns the performance with accuracy and confusion metrics."""
confusion_matrix_dashboard_metrics: List[MetricsGroup] = []
# Use normalized matrix for UI
normalized_matrices: List[MatrixMetric] = copy.deepcopy(self._unnormalized_matrices)
for unnormalized_matrix in normalized_matrices:
unnormalized_matrix.normalize()
confusion_matrix_info = MatrixChartInfo(
name="Confusion matrix",
header="confusion",
row_header="Predicted label",
column_header="True label",
)
confusion_matrix_dashboard_metrics.append(
MatrixMetricsGroup(metrics=normalized_matrices, visualization_info=confusion_matrix_info)
)
# Compute precision and recall MetricGroups and append them to the dashboard metrics
for _confusion_matrix in self._unnormalized_matrices:
confusion_matrix_dashboard_metrics.append(precision_metrics_group(_confusion_matrix))
confusion_matrix_dashboard_metrics.append(recall_metrics_group(_confusion_matrix))
return Performance(score=self.accuracy, dashboard_metrics=confusion_matrix_dashboard_metrics)
@staticmethod
def _compute_accuracy(average: MetricAverageMethod, confusion_matrices: List[MatrixMetric]) -> float:
"""Compute accuracy using the confusion matrices.
Args:
average (MatricAverageMethod): The averaging method, either MICRO or MACRO
MICRO: compute average over all predictions in all label groups
MACRO: compute accuracy per label group, return the average of the per-label-group accuracy scores
confusion_matrices (List[MatrixMetric]): the confusion matrices to compute accuracy from.
MUST be unnormalized.
Raises
ValueError: when the ground truth dataset does not contain annotations
RuntimeError: when the averaging methods is not known
Returns:
float: the accuracy score for the provided confusion matrix
"""
# count correct predictions and total annotations
correct_per_label_group = [np.trace(mat.matrix_values) for mat in confusion_matrices]
total_per_label_group = [np.sum(mat.matrix_values) for mat in confusion_matrices]
# check if all label groups have annotations
if not np.any(total_per_label_group):
raise ValueError("The ground truth dataset must contain annotations.")
# return micro or macro average
if average == MetricAverageMethod.MACRO:
# compute accuracy for each label group, then average across groups, ignoring groups without annotations
return np.nanmean(np.divide(correct_per_label_group, total_per_label_group))
if average == MetricAverageMethod.MICRO:
# average over all predictions in all label groups
return np.sum(correct_per_label_group) / np.sum(total_per_label_group)
raise RuntimeError(f"Unknown averaging method: {average}")
def precision_metrics_group(confusion_matrix: MatrixMetric) -> MetricsGroup:
"""Computes the precision per class based on a confusion matrix and returns them as ScoreMetrics in a MetricsGroup.
Args:
confusion_matrix: matrix to compute the precision per class for
Returns:
a BarMetricsGroup with the per class precision.
"""
labels = confusion_matrix.row_labels
if labels is None:
# If no labels are given, just number the classes by index
if confusion_matrix.matrix_values is not None:
label_range = confusion_matrix.matrix_values.shape[0]
else:
label_range = 0
labels = np.arange(label_range)
per_class_precision = [
ScoreMetric(class_, value=precision)
for (class_, precision) in zip(labels, precision_per_class(confusion_matrix.matrix_values))
]
return BarMetricsGroup(
metrics=per_class_precision,
visualization_info=BarChartInfo(
name="Precision per class",
palette=ColorPalette.LABEL,
),
)
def recall_metrics_group(confusion_matrix: MatrixMetric) -> MetricsGroup:
"""Computes the recall per class based on a confusion matrix and returns them as ScoreMetrics in a MetricsGroup.
Args:
confusion_matrix: matrix to compute the recall per class for
Returns:
a BarMetricsGroup with the per class recall
"""
labels = confusion_matrix.row_labels
if labels is None:
# If no labels are given, just number the classes by index
if confusion_matrix.matrix_values is not None:
label_range = confusion_matrix.matrix_values.shape[0]
else:
label_range = 0
labels = np.arange(label_range)
per_class_recall = [
ScoreMetric(class_, value=recall)
for (class_, recall) in zip(labels, recall_per_class(confusion_matrix.matrix_values))
]
return BarMetricsGroup(
metrics=per_class_recall,
visualization_info=BarChartInfo(
name="Recall per class",
palette=ColorPalette.LABEL,
),
)
def __get_gt_and_predicted_label_indices_from_resultset(
resultset: ResultSetEntity,
) -> Tuple[List[Set[int]], List[Set[int]]]:
"""Returns the label indices lists for ground truth and prediction datasets in a tuple.
Args:
resultset
Returns:
a tuple containing two lists. The first list contains the ground truth label indices, and the second contains
the prediction label indices.
"""
true_label_idx = []
predicted_label_idx = []
gt_dataset: DatasetEntity = resultset.ground_truth_dataset
pred_dataset: DatasetEntity = resultset.prediction_dataset
gt_dataset.sort_items()
pred_dataset.sort_items()
# Iterate over each dataset item, and collect the labels for this item (pred and gt)
task_labels = resultset.model.configuration.get_label_schema().get_labels(include_empty=True)
for gt_item, pred_item in zip(gt_dataset, pred_dataset):
if isinstance(gt_item, DatasetItemEntity) and isinstance(pred_item, DatasetItemEntity):
true_label_idx.append({task_labels.index(label) for label in gt_item.get_roi_labels(task_labels)})
predicted_label_idx.append({task_labels.index(label) for label in pred_item.get_roi_labels(task_labels)})
return true_label_idx, predicted_label_idx
def __compute_unnormalized_confusion_matrices_for_label_group(
true_label_idx: List[Set[int]],
predicted_label_idx: List[Set[int]],
label_group: LabelGroup,
task_labels: List[LabelEntity],
) -> MatrixMetric:
"""Returns matrix metric for a certain label group.
Args:
true_label_idx (List[Set[int]]): list of sets of label indices for the ground truth dataset
predicted_label_idx (List[Set[int]]): list of sets of label indices for the prediction dataset
label_group (LabelGroup): label group to compute the confusion matrix for
task_labels (List[LabelEntity]): list of labels for the task
Returns:
MatrixMetric: confusion matrix for the label group
"""
map_task_labels_idx_to_group_idx = {
task_labels.index(label): i_group for i_group, label in enumerate(label_group.labels)
}
set_group_labels_idx = set(map_task_labels_idx_to_group_idx.keys())
group_label_names = [task_labels[label_idx].name for label_idx in set_group_labels_idx]
if len(group_label_names) == 1:
# Single-class
# we use "not" to make presence of a class to be at index 0, while the absence of it at index 1
y_true = [int(not set_group_labels_idx.issubset(true_labels)) for true_labels in true_label_idx]
y_pred = [int(not set_group_labels_idx.issubset(pred_labels)) for pred_labels in predicted_label_idx]
group_label_names += [f"~ {group_label_names[0]}"]
column_labels = group_label_names.copy()
remove_last_row = False
else:
# Multiclass
undefined_idx = len(group_label_names) # to define missing value
# find the intersections between GT and task labels, and Prediction and task labels
true_intersections = [true_labels.intersection(set_group_labels_idx) for true_labels in true_label_idx]
pred_intersections = [pred_labels.intersection(set_group_labels_idx) for pred_labels in predicted_label_idx]
# map the intersection to 0-index value
y_true = [
map_task_labels_idx_to_group_idx[list(true_intersection)[0]]
if len(true_intersection) != 0
else undefined_idx
for true_intersection in true_intersections
]
y_pred = [
map_task_labels_idx_to_group_idx[list(pred_intersection)[0]]
if len(pred_intersection) != 0
else undefined_idx
for pred_intersection in pred_intersections
]
column_labels = group_label_names.copy()
column_labels.append("Other")
remove_last_row = True
matrix_data = sklearn_confusion_matrix(y_true, y_pred, labels=list(range(len(column_labels))))
if remove_last_row:
# matrix clean up
matrix_data = np.delete(matrix_data, -1, 0)
if sum(matrix_data[:, -1]) == 0:
# if none of the GT is classified as classes from other groups, clean it up too
matrix_data = np.delete(matrix_data, -1, 1)
column_labels.remove(column_labels[-1])
# Use unnormalized matrix for statistics computation (accuracy, precision, recall)
return MatrixMetric(
name=f"{label_group.name}",
matrix_values=matrix_data,
row_labels=group_label_names,
column_labels=column_labels,
normalize=False,
)
def compute_unnormalized_confusion_matrices_from_resultset(
resultset: ResultSetEntity,
) -> List[MatrixMetric]:
"""Computes an (unnormalized) confusion matrix for every label group in the resultset.
Args:
resultset: the input resultset
Returns:
the computed unnormalized confusion matrices
"""
if len(resultset.ground_truth_dataset) == 0 or len(resultset.prediction_dataset) == 0:
raise ValueError("Cannot compute the confusion matrix of an empty result set.")
unnormalized_confusion_matrices: List[MatrixMetric] = []
(
true_label_idx,
predicted_label_idx,
) = __get_gt_and_predicted_label_indices_from_resultset(resultset)
task_labels = resultset.model.configuration.get_label_schema().get_labels(include_empty=False)
# Confusion matrix computation
for label_group in resultset.model.configuration.get_label_schema().get_groups():
matrix = __compute_unnormalized_confusion_matrices_for_label_group(
true_label_idx, predicted_label_idx, label_group, task_labels
)
unnormalized_confusion_matrices.append(matrix)
return unnormalized_confusion_matrices