Skip to content

Commit

Permalink
Merge branch 'develop' into pf/adding_anomaly_training_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pfinashx committed Apr 3, 2022
2 parents 05f1b66 + 73ec03e commit 2c968f5
Show file tree
Hide file tree
Showing 39 changed files with 1,033 additions and 334 deletions.
28 changes: 16 additions & 12 deletions external/anomaly/ote_anomalib/callbacks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,27 @@ def on_predict_epoch_end(self, _trainer: pl.Trainer, pl_module: AnomalyModule, o
for dataset_item, pred_score, pred_label, anomaly_map, pred_mask in zip(
self.ote_dataset, pred_scores, pred_labels, anomaly_maps, pred_masks
):
label = self.anomalous_label if pred_label else self.normal_label
probability = (1 - pred_score) if pred_score < 0.5 else pred_score
dataset_item.append_labels([ScoredLabel(label=label, probability=float(probability))])
probability = pred_score if pred_label else 1 - pred_score
if self.task_type == TaskType.ANOMALY_CLASSIFICATION:
label = self.anomalous_label if pred_label else self.normal_label
if self.task_type == TaskType.ANOMALY_DETECTION:
dataset_item.append_annotations(
annotations=create_detection_annotation_from_anomaly_heatmap(
hard_prediction=pred_mask,
soft_prediction=anomaly_map,
label_map=self.label_map,
)
annotations = create_detection_annotation_from_anomaly_heatmap(
hard_prediction=pred_mask,
soft_prediction=anomaly_map,
label_map=self.label_map,
)
dataset_item.append_annotations(annotations)
label = self.normal_label if len(annotations) == 0 else self.anomalous_label
elif self.task_type == TaskType.ANOMALY_SEGMENTATION:
mask = pred_mask.squeeze().astype(np.uint8)
dataset_item.append_annotations(
create_annotation_from_segmentation_map(mask, anomaly_map.squeeze(), self.label_map)
annotations = create_annotation_from_segmentation_map(
hard_prediction=pred_mask.squeeze().astype(np.uint8),
soft_prediction=anomaly_map.squeeze(),
label_map=self.label_map,
)
dataset_item.append_annotations(annotations)
label = self.normal_label if len(annotations) == 0 else self.anomalous_label

dataset_item.append_labels([ScoredLabel(label=label, probability=float(probability))])
dataset_item.append_metadata_item(
ResultMediaEntity(
name="Anomaly Map",
Expand Down
271 changes: 271 additions & 0 deletions external/anomaly/ote_anomalib/data/create_mvtec_ad_json_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#

"""Create MVTec AD (CC BY-NC-SA 4.0) JSON Annotations for OTE CLI.
Description:
This script converts MVTec AD dataset masks to OTE CLI annotation format for
classification, detection and segmentation tasks.
License:
MVTec AD dataset is released under the Creative Commons
Attribution-NonCommercial-ShareAlike 4.0 International License
(CC BY-NC-SA 4.0)(https://creativecommons.org/licenses/by-nc-sa/4.0/).
Reference:
- Paul Bergmann, Kilian Batzner, Michael Fauser, David Sattlegger, Carsten Steger:
The MVTec Anomaly Detection Dataset: A Comprehensive Real-World Dataset for
Unsupervised Anomaly Detection; in: International Journal of Computer Vision
129(4):1038-1059, 2021, DOI: 10.1007/s11263-020-01400-4.
- Paul Bergmann, Michael Fauser, David Sattlegger, Carsten Steger: MVTec AD —
A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection;
in: IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
9584-9592, 2019, DOI: 10.1109/CVPR.2019.00982.
Example:
Assume that MVTec AD dataset is located in "./data/anomaly/MVTec/" from the root
directory in training_extensions. JSON annotations could be created by running the
following:
>>> import os
'~/training_extensions'
>>> os.listdir("./data/anomaly")
['detection', 'shapes', 'segmentation', 'MVTec', 'classification']
The following script will generate the classification, detection and segmentation
JSON annotations to each category in ./data/anomaly/MVTec dataset.
>>> python external/anomaly/ote_anomalib/data/create_mvtec_ad_json_annotations.py \
... --data_path ./data/anomaly/MVTec/
"""

import json
import os
from argparse import ArgumentParser, Namespace
from pathlib import Path
from typing import Any, Dict, List, Optional

import cv2
import pandas as pd
from anomalib.data.mvtec import make_mvtec_dataset


def create_bboxes_from_mask(mask_path: str) -> List[List[float]]:
"""Create bounding box from binary mask.
Args:
mask_path (str): Path to binary mask.
Returns:
List[List[float]]: Bounding box coordinates.
"""
# pylint: disable-msg=too-many-locals

mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
height, width = mask.shape

bboxes: List[List[float]] = []
_, _, coordinates, _ = cv2.connectedComponentsWithStats(mask)
for i, coordinate in enumerate(coordinates):
# First row of the coordinates is always backround,
# so should be ignored.
if i == 0:
continue

# Last column of the coordinates is the area of the connected component.
# It could therefore be ignored.
comp_x, comp_y, comp_w, comp_h, _ = coordinate
x1 = comp_x / width
y1 = comp_y / height
x2 = (comp_x + comp_w) / width
y2 = (comp_y + comp_h) / height

bboxes.append([x1, y1, x2, y2])

return bboxes


def create_polygons_from_mask(mask_path: str) -> List[List[float]]:
"""Create polygons from binary mask.
Args:
mask_path (str): Path to binary mask.
Returns:
List[List[float]]: Polygon coordinates.
"""
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
height, width = mask.shape

polygons = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[0][0]
polygons = [[x / width, y / height] for polygon in polygons for (x, y) in polygon]

return polygons


def create_classification_json_items(pd_items: pd.DataFrame) -> Dict[str, Any]:
"""Create JSON items for the classification task.
Args:
pd_items (pd.DataFrame): MVTec AD samples in pandas DataFrame object.
Returns:
Dict[str, Any]: MVTec AD classification JSON items
"""
json_items: Dict[str, Any] = {"image_path": {}, "label": {}, "masks": {}}
for index, pd_item in pd_items.iterrows():
json_items["image_path"][str(index)] = pd_item.image_path.replace(pd_item.path, "")[1:]
json_items["label"][str(index)] = pd_item.label
if pd_item.label != "good":
json_items["masks"][str(index)] = pd_item.mask_path.replace(pd_item.path, "")[1:]

return json_items


def create_detection_json_items(pd_items: pd.DataFrame) -> Dict[str, Any]:
"""Create JSON items for the detection task.
Args:
pd_items (pd.DataFrame): MVTec AD samples in pandas DataFrame object.
Returns:
Dict[str, Any]: MVTec AD detection JSON items
"""
json_items: Dict[str, Any] = {"image_path": {}, "label": {}, "bboxes": {}}
for index, pd_item in pd_items.iterrows():
json_items["image_path"][str(index)] = pd_item.image_path.replace(pd_item.path, "")[1:]
json_items["label"][str(index)] = pd_item.label
if pd_item.label != "good":
json_items["bboxes"][str(index)] = create_bboxes_from_mask(pd_item.mask_path)

return json_items


def create_segmentation_json_items(pd_items: pd.DataFrame) -> Dict[str, Any]:
"""Create JSON items for the segmentation task.
Args:
pd_items (pd.DataFrame): MVTec AD samples in pandas DataFrame object.
Returns:
Dict[str, Any]: MVTec AD segmentation JSON items
"""
json_items: Dict[str, Any] = {"image_path": {}, "label": {}, "masks": {}}
for index, pd_item in pd_items.iterrows():
json_items["image_path"][str(index)] = pd_item.image_path.replace(pd_item.path, "")[1:]
json_items["label"][str(index)] = pd_item.label
if pd_item.label != "good":
json_items["masks"][str(index)] = create_polygons_from_mask(pd_item.mask_path)

return json_items


def save_json_items(json_items: Dict[str, Any], file: str) -> None:
"""Save JSON items to file.
Args:
json_items (Dict[str, Any]): MVTec AD JSON items
file (str): Path to save as a JSON file.
"""
with open(file=file, mode="w", encoding="utf-8") as f:
json.dump(json_items, f)


def create_task_annotations(task: str, data_path: str, annotation_path: str) -> None:
"""Create MVTec AD categories for a given task.
Args:
task (str): Task type to save annotations.
data_path (str): Path to MVTec AD category.
annotation_path (str): Path to save MVTec AD category JSON annotation items.
Raises:
ValueError: When task is not classification, detection or segmentation.
"""
annotation_path = os.path.join(data_path, task)
os.makedirs(annotation_path, exist_ok=True)

for split in ["train", "val", "test"]:

if task == "classification":
create_json_items = create_classification_json_items
elif task == "detection":
create_json_items = create_detection_json_items
elif task == "segmentation":
create_json_items = create_segmentation_json_items
else:
raise ValueError(f"Unknown task {task}. Available tasks are classification, detection and segmentation.")

df_items = make_mvtec_dataset(path=Path(data_path), create_validation_set=True, split=split)
json_items = create_json_items(df_items)
save_json_items(json_items, f"{annotation_path}/{split}.json")


def create_mvtec_ad_category_annotations(data_path: str, annotation_path: str) -> None:
"""Create MVTec AD category annotations for classification, detection and segmentation tasks.
Args:
data_path (str): Path to MVTec AD category.
annotation_path (str): Path to save MVTec AD category JSON annotation items.
"""
for task in ["classification", "detection", "segmentation"]:
create_task_annotations(task, data_path, annotation_path)


def create_mvtec_ad_annotations(mvtec_data_path: str, mvtec_annotation_path: Optional[str] = None) -> None:
"""Create JSON annotations for MVTec AD dataset.
Args:
mvtec_data_path (str): Path to MVTec AD dataset.
mvtec_annotation_path (Optional[str], optional): Path to save JSON annotations. Defaults to None.
"""
if mvtec_annotation_path is None:
mvtec_annotation_path = mvtec_data_path

categories = [
"bottle",
"cable",
"capsule",
"carpet",
"grid",
"hazelnut",
"leather",
"metal_nut",
"pill",
"screw",
"tile",
"toothbrush",
"transistor",
"wood",
"zipper",
]

for category in categories:
print(f"Creating annotations for {category}")
category_data_path = os.path.join(mvtec_data_path, category)
category_annotation_path = os.path.join(mvtec_annotation_path, category)
create_mvtec_ad_category_annotations(category_data_path, category_annotation_path)


def get_args() -> Namespace:
"""Get command line arguments.
Returns:
Namespace: List of arguments.
"""
parser = ArgumentParser()
parser.add_argument("--data_path", type=str, default="./data/anomaly/MVTec/", help="Path to Mvtec AD dataset.")
parser.add_argument("--annotation_path", type=str, required=False, help="Path to create OTE CLI annotations.")
return parser.parse_args()


def main():
"""Create MVTec AD Annotations."""
args = get_args()
create_mvtec_ad_annotations(mvtec_data_path=args.data_path, mvtec_annotation_path=args.annotation_path)


if __name__ == "__main__":
main()
8 changes: 4 additions & 4 deletions external/anomaly/ote_anomalib/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@
import numpy as np
from anomalib.pre_processing import PreProcessor
from omegaconf import DictConfig, ListConfig
from ote_anomalib.data.utils import (
contains_anomalous_images,
split_local_global_dataset,
)
from ote_anomalib.logging import get_logger
from ote_sdk.entities.datasets import DatasetEntity
from ote_sdk.entities.model_template import TaskType
from ote_sdk.entities.shapes.polygon import Polygon
from ote_sdk.entities.subset import Subset
from ote_sdk.utils.dataset_utils import (
contains_anomalous_images,
split_local_global_dataset,
)
from ote_sdk.utils.segmentation_utils import mask_from_dataset_item
from pytorch_lightning.core.datamodule import LightningDataModule
from torch import Tensor
Expand Down
19 changes: 18 additions & 1 deletion external/anomaly/ote_anomalib/data/mvtec.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
"""OTE MVTec Dataset facilitate OTE Anomaly Training."""
"""OTE MVTec Dataset facilitate OTE Anomaly Training.
License:
MVTec AD dataset is released under the Creative Commons
Attribution-NonCommercial-ShareAlike 4.0 International License
(CC BY-NC-SA 4.0)(https://creativecommons.org/licenses/by-nc-sa/4.0/).
Reference:
- Paul Bergmann, Kilian Batzner, Michael Fauser, David Sattlegger, Carsten Steger:
The MVTec Anomaly Detection Dataset: A Comprehensive Real-World Dataset for
Unsupervised Anomaly Detection; in: International Journal of Computer Vision
129(4):1038-1059, 2021, DOI: 10.1007/s11263-020-01400-4.
- Paul Bergmann, Michael Fauser, David Sattlegger, Carsten Steger: MVTec AD —
A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection;
in: IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
9584-9592, 2019, DOI: 10.1109/CVPR.2019.00982.
"""

# Copyright (C) 2021 Intel Corporation
#
Expand Down
Loading

0 comments on commit 2c968f5

Please sign in to comment.