From e973b3d38de2e1d5ac58a479b7c1d86475a6fea2 Mon Sep 17 00:00:00 2001
From: Evgeny Tsykunov <e.tsykunov@gmail.com>
Date: Thu, 15 Dec 2022 23:59:06 +0900
Subject: [PATCH] align saliency map media instantiation over tasks

---
 .../torchreid_tasks/openvino_task.py          | 23 ++++++++++---------
 .../apis/detection/openvino_task.py           | 16 ++++++++-----
 .../apis/segmentation/inference_task.py       |  2 +-
 .../apis/segmentation/openvino_task.py        |  2 +-
 .../mpa_tasks/apis/classification/task.py     |  7 +++---
 .../mpa_tasks/apis/detection/task.py          |  5 ++--
 6 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/external/deep-object-reid/torchreid_tasks/openvino_task.py b/external/deep-object-reid/torchreid_tasks/openvino_task.py
index 6ed46ffba6d..fd894caa432 100644
--- a/external/deep-object-reid/torchreid_tasks/openvino_task.py
+++ b/external/deep-object-reid/torchreid_tasks/openvino_task.py
@@ -199,28 +199,29 @@ def infer(self, dataset: DatasetEntity,
                 if saliency_map.ndim == 2:
                     # Single saliency map per image, support e.g. EigenCAM use case
                     actmap = get_actmap(saliency_map, (dataset_item.width, dataset_item.height))
-                    saliency_media = ResultMediaEntity(name="Saliency Map", type="saliency_map",
+                    saliency_media = ResultMediaEntity(name="Saliency Map",
+                                                       type="saliency_map",
                                                        annotation_scene=dataset_item.annotation_scene,
-                                                       numpy=actmap, roi=dataset_item.roi,
-                                                       label=predicted_scene.annotations[0].get_labels()[0].label)
+                                                       numpy=actmap,
+                                                       roi=dataset_item.roi)
                     dataset_item.append_metadata_item(saliency_media, model=self.model)
                 elif saliency_map.ndim == 3:
                     # Multiple saliency maps per image (class-wise saliency map), support e.g. Recipro-CAM use case
-                    predicted_class_set = set()
-                    for label in predicted_scene.annotations[0].get_labels():
-                        predicted_class_set.add(label.name)
+                    predicted_labels = set()
+                    for scored_label in predicted_scene.annotations[0].get_labels():
+                        predicted_labels.add(scored_label.label)
 
                     for class_id, class_wise_saliency_map in enumerate(saliency_map):
-                        class_name_str = self.task_environment.get_labels()[class_id].name
-                        if class_name_str in predicted_class_set:
+                        label = self.task_environment.get_labels()[class_id]
+                        if label in predicted_labels:
                             # TODO (negvet): Support more advanced use case,
                             #  when all/configurable set of saliency maps is returned
                             actmap = get_actmap(class_wise_saliency_map, (dataset_item.width, dataset_item.height))
-                            label = predicted_scene.annotations[0].get_labels()[0].label
-                            saliency_media = ResultMediaEntity(name=class_name_str,
+                            saliency_media = ResultMediaEntity(name=label.name,
                                                                type="saliency_map",
                                                                annotation_scene=dataset_item.annotation_scene,
-                                                               numpy=actmap, roi=dataset_item.roi,
+                                                               numpy=actmap,
+                                                               roi=dataset_item.roi,
                                                                label=label)
                             dataset_item.append_metadata_item(saliency_media, model=self.model)
                 else:
diff --git a/external/mmdetection/detection_tasks/apis/detection/openvino_task.py b/external/mmdetection/detection_tasks/apis/detection/openvino_task.py
index 37866985096..2e28a00c8cb 100644
--- a/external/mmdetection/detection_tasks/apis/detection/openvino_task.py
+++ b/external/mmdetection/detection_tasks/apis/detection/openvino_task.py
@@ -417,28 +417,32 @@ def infer(
                     dataset_item.append_metadata_item(saliency_media, model=self.model)
                 elif saliency_map.ndim == 3:
                     # Multiple saliency maps per image (class-wise saliency map)
-                    predicted_class_set = set()
+                    predicted_labels = set()
                     for bbox in predicted_scene.annotations:
-                        predicted_class_set.add(bbox.get_labels()[0].name)
+                        scored_label = bbox.get_labels()[0]
+                        predicted_labels.add(scored_label.label)
 
                     labels = self.task_environment.get_labels()
                     num_saliency_maps = saliency_map.shape[0]
                     if num_saliency_maps == len(labels) + 1:
                         # Include the background as the last category
                         labels.append(LabelEntity('background', Domain.DETECTION))
+
                     for class_id, class_wise_saliency_map in enumerate(saliency_map):
-                        class_name_str = labels[class_id].name
-                        if class_name_str in predicted_class_set:
+                        label = labels[class_id]
+                        if label in predicted_labels:
                             # TODO (negvet): Support more advanced use case,
                             #  when all/configurable set of saliency maps is returned
                             actmap = get_actmap(
                                 class_wise_saliency_map, (dataset_item.width, dataset_item.height)
                             )
                             saliency_media = ResultMediaEntity(
-                                name=class_name_str,
+                                name=label.name,
                                 type="saliency_map",
                                 annotation_scene=dataset_item.annotation_scene,
-                                numpy=actmap, roi=dataset_item.roi
+                                numpy=actmap,
+                                roi=dataset_item.roi,
+                                label=label
                             )
                             dataset_item.append_metadata_item(saliency_media, model=self.model)
                 else:
diff --git a/external/mmsegmentation/segmentation_tasks/apis/segmentation/inference_task.py b/external/mmsegmentation/segmentation_tasks/apis/segmentation/inference_task.py
index 02431bdfc74..91bb435810d 100644
--- a/external/mmsegmentation/segmentation_tasks/apis/segmentation/inference_task.py
+++ b/external/mmsegmentation/segmentation_tasks/apis/segmentation/inference_task.py
@@ -229,7 +229,7 @@ def _add_predictions_to_dataset(self, prediction_results, dataset, dump_soft_pre
                     current_label_soft_prediction = soft_prediction[:, :, label_index]
 
                     class_act_map = get_activation_map(current_label_soft_prediction)
-                    result_media = ResultMediaEntity(name='Soft Prediction',
+                    result_media = ResultMediaEntity(name=label.name,
                                                      type='soft_prediction',
                                                      label=label,
                                                      annotation_scene=dataset_item.annotation_scene,
diff --git a/external/mmsegmentation/segmentation_tasks/apis/segmentation/openvino_task.py b/external/mmsegmentation/segmentation_tasks/apis/segmentation/openvino_task.py
index 5d55ac5b314..65cc59032b1 100644
--- a/external/mmsegmentation/segmentation_tasks/apis/segmentation/openvino_task.py
+++ b/external/mmsegmentation/segmentation_tasks/apis/segmentation/openvino_task.py
@@ -185,7 +185,7 @@ def infer(self,
                         continue
                     current_label_soft_prediction = soft_prediction[:, :, label_index]
                     class_act_map = get_activation_map(current_label_soft_prediction)
-                    result_media = ResultMediaEntity(name='Soft Prediction',
+                    result_media = ResultMediaEntity(name=label.name,
                                                      type='soft_prediction',
                                                      label=label,
                                                      annotation_scene=dataset_item.annotation_scene,
diff --git a/external/model-preparation-algorithm/mpa_tasks/apis/classification/task.py b/external/model-preparation-algorithm/mpa_tasks/apis/classification/task.py
index a8c43de6b30..be75116fbf9 100644
--- a/external/model-preparation-algorithm/mpa_tasks/apis/classification/task.py
+++ b/external/model-preparation-algorithm/mpa_tasks/apis/classification/task.py
@@ -289,7 +289,6 @@ def _add_predictions_to_dataset(self, prediction_results, dataset, update_progre
                         annotation_scene=dataset_item.annotation_scene,
                         numpy=saliency_map,
                         roi=dataset_item.roi,
-                        label=item_labels[0].label,
                     )
                     dataset_item.append_metadata_item(saliency_map_media, model=self._task_environment.model)
                 elif saliency_map.ndim == 3:
@@ -298,14 +297,14 @@ def _add_predictions_to_dataset(self, prediction_results, dataset, update_progre
                         class_wise_saliency_map = get_actmap(
                             class_wise_saliency_map, (dataset_item.width, dataset_item.height)
                         )
-                        class_name_str = self._labels[class_id].name
+                        label = self._labels[class_id]
                         saliency_map_media = ResultMediaEntity(
-                            name=class_name_str,
+                            name=label.name,
                             type="saliency_map",
                             annotation_scene=dataset_item.annotation_scene,
                             numpy=class_wise_saliency_map,
                             roi=dataset_item.roi,
-                            label=item_labels[0].label,
+                            label=label,
                         )
                         dataset_item.append_metadata_item(saliency_map_media, model=self._task_environment.model)
                 else:
diff --git a/external/model-preparation-algorithm/mpa_tasks/apis/detection/task.py b/external/model-preparation-algorithm/mpa_tasks/apis/detection/task.py
index fbeddbb8643..e28f0d46a62 100644
--- a/external/model-preparation-algorithm/mpa_tasks/apis/detection/task.py
+++ b/external/model-preparation-algorithm/mpa_tasks/apis/detection/task.py
@@ -327,13 +327,14 @@ def _add_predictions_to_dataset(self, prediction_results, dataset, confidence_th
                         labels.append(LabelEntity("background", Domain.DETECTION))
                     for class_id, class_wise_saliency_map in enumerate(saliency_map):
                         actmap = get_actmap(class_wise_saliency_map, (dataset_item.width, dataset_item.height))
-                        class_name_str = labels[class_id].name
+                        label = labels[class_id]
                         saliency_media = ResultMediaEntity(
-                            name=class_name_str,
+                            name=label.name,
                             type="saliency_map",
                             annotation_scene=dataset_item.annotation_scene,
                             numpy=actmap,
                             roi=dataset_item.roi,
+                            label=label,
                         )
                         dataset_item.append_metadata_item(saliency_media, model=self._task_environment.model)
                 else: