openvinotoolkit · samet-akcay · Mar 2, 2023 · Mar 2, 2023
@@ -13,10 +13,8 @@
 from .base import AnomalibDataModule, AnomalibDataset
 from .btech import BTech
 from .folder import Folder
-from .folder_3d import Folder3D
 from .inference import InferenceDataset
 from .mvtec import MVTec
-from .mvtec_3d import MVTec3D
 from .shanghaitech import ShanghaiTech
 from .task_type import TaskType
 from .ucsd_ped import UCSDped
@@ -61,24 +59,6 @@ def get_datamodule(config: DictConfig | ListConfig) -> AnomalibDataModule:
             val_split_mode=config.dataset.val_split_mode,
             val_split_ratio=config.dataset.val_split_ratio,
         )
-    elif config.dataset.format.lower() == "mvtec_3d":
-        datamodule = MVTec3D(
-            root=config.dataset.path,
-            category=config.dataset.category,
-            image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
-            center_crop=center_crop,
-            normalization=config.dataset.normalization,
-            train_batch_size=config.dataset.train_batch_size,
-            eval_batch_size=config.dataset.eval_batch_size,
-            num_workers=config.dataset.num_workers,
-            task=config.dataset.task,
-            transform_config_train=config.dataset.transform_config.train,
-            transform_config_eval=config.dataset.transform_config.eval,
-            test_split_mode=config.dataset.test_split_mode,
-            test_split_ratio=config.dataset.test_split_ratio,
-            val_split_mode=config.dataset.val_split_mode,
-            val_split_ratio=config.dataset.val_split_ratio,
-        )
     elif config.dataset.format.lower() == "btech":
         datamodule = BTech(
             root=config.dataset.path,
@@ -119,31 +99,6 @@ def get_datamodule(config: DictConfig | ListConfig) -> AnomalibDataModule:
             val_split_mode=config.dataset.val_split_mode,
             val_split_ratio=config.dataset.val_split_ratio,
         )
-    elif config.dataset.format.lower() == "folder_3d":
-        datamodule = Folder3D(
-            root=config.dataset.root,
-            normal_dir=config.dataset.normal_dir,
-            normal_depth_dir=config.dataset.normal_depth_dir,
-            abnormal_dir=config.dataset.abnormal_dir,
-            abnormal_depth_dir=config.dataset.abnormal_depth_dir,
-            task=config.dataset.task,
-            normal_test_dir=config.dataset.normal_test_dir,
-            normal_test_depth_dir=config.dataset.normal_test_depth_dir,
-            mask_dir=config.dataset.mask_dir,
-            extensions=config.dataset.extensions,
-            image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
-            center_crop=center_crop,
-            normalization=config.dataset.normalization,
-            train_batch_size=config.dataset.train_batch_size,
-            eval_batch_size=config.dataset.eval_batch_size,
-            num_workers=config.dataset.num_workers,
-            transform_config_train=config.dataset.transform_config.train,
-            transform_config_eval=config.dataset.transform_config.eval,
-            test_split_mode=config.dataset.test_split_mode,
-            test_split_ratio=config.dataset.test_split_ratio,
-            val_split_mode=config.dataset.val_split_mode,
-            val_split_ratio=config.dataset.val_split_ratio,
-        )
     elif config.dataset.format.lower() == "ucsdped":
         datamodule = UCSDped(
             root=config.dataset.path,
@@ -232,10 +187,8 @@ def get_datamodule(config: DictConfig | ListConfig) -> AnomalibDataModule:
     "get_datamodule",
     "BTech",
     "Folder",
-    "Folder3D",
     "InferenceDataset",
     "MVTec",
-    "MVTec3D",
     "Avenue",
     "UCSDped",
     "TaskType",

@@ -6,13 +6,6 @@
 
 from .datamodule import AnomalibDataModule
 from .dataset import AnomalibDataset
-from .depth import AnomalibDepthDataset
 from .video import AnomalibVideoDataModule, AnomalibVideoDataset
 
-__all__ = [
-    "AnomalibDataset",
-    "AnomalibDataModule",
-    "AnomalibVideoDataset",
-    "AnomalibVideoDataModule",
-    "AnomalibDepthDataset",
-]
+__all__ = ["AnomalibDataset", "AnomalibDataModule", "AnomalibVideoDataset", "AnomalibVideoDataModule"]
@@ -12,7 +12,7 @@
 from pandas import DataFrame
 from pytorch_lightning import LightningDataModule
 from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
-from torch.utils.data.dataloader import DataLoader, default_collate
+from torch.utils.data import DataLoader, default_collate
 
 from anomalib.data.base.dataset import AnomalibDataset
 from anomalib.data.synthetic import SyntheticAnomalyDataset

@@ -126,7 +126,6 @@ def __getitem__(self, index: int) -> dict[str, str | Tensor]:
         elif self.task in (TaskType.DETECTION, TaskType.SEGMENTATION):
             # Only Anomalous (1) images have masks in anomaly datasets
             # Therefore, create empty mask for Normal (0) images.
-
             if label_index == 0:
                 mask = np.zeros(shape=image.shape[:2])
             else:

@@ -12,6 +12,7 @@
 
 import albumentations as A
 from pandas import DataFrame
+from torchvision.datasets.folder import IMG_EXTENSIONS
 
 from anomalib.data.base import AnomalibDataModule, AnomalibDataset
 from anomalib.data.task_type import TaskType
@@ -22,7 +23,74 @@
     ValSplitMode,
     get_transforms,
 )
-from anomalib.data.utils.path import _prepare_files_labels, _resolve_path
+
+
+def _check_and_convert_path(path: str | Path) -> Path:
+    """Check an input path, and convert to Pathlib object.
+
+    Args:
+        path (str | Path): Input path.
+
+    Returns:
+        Path: Output path converted to pathlib object.
+    """
+    if not isinstance(path, Path):
+        path = Path(path)
+    return path
+
+
+def _prepare_files_labels(
+    path: str | Path, path_type: str, extensions: tuple[str, ...] | None = None
+) -> tuple[list, list]:
+    """Return a list of filenames and list corresponding labels.
+
+    Args:
+        path (str | Path): Path to the directory containing images.
+        path_type (str): Type of images in the provided path ("normal", "abnormal", "normal_test")
+        extensions (tuple[str, ...] | None, optional): Type of the image extensions to read from the
+            directory.
+
+    Returns:
+        List, List: Filenames of the images provided in the paths, labels of the images provided in the paths
+    """
+    path = _check_and_convert_path(path)
+    if extensions is None:
+        extensions = IMG_EXTENSIONS
+
+    if isinstance(extensions, str):
+        extensions = (extensions,)
+
+    filenames = [f for f in path.glob(r"**/*") if f.suffix in extensions and not f.is_dir()]
+    if not filenames:
+        raise RuntimeError(f"Found 0 {path_type} images in {path}")
+
+    labels = [path_type] * len(filenames)
+
+    return filenames, labels
+
+
+def _resolve_path(folder: str | Path, root: str | Path | None = None) -> Path:
+    """Combines root and folder and returns the absolute path.
+
+    This allows users to pass either a root directory and relative paths, or absolute paths to each of the
+    image sources. This function makes sure that the samples dataframe always contains absolute paths.
+
+    Args:
+        folder (str | Path | None): Folder location containing image or mask data.
+        root (str | Path | None): Root directory for the dataset.
+    """
+    folder = Path(folder)
+    if folder.is_absolute():
+        # path is absolute; return unmodified
+        path = folder
+    # path is relative.
+    elif root is None:
+        # no root provided; return absolute path
+        path = folder.resolve()
+    else:
+        # root provided; prepend root and return absolute path
+        path = (Path(root) / folder).resolve()
+    return path
 
 
 def make_folder_dataset(
@@ -69,42 +137,31 @@ def make_folder_dataset(
     if normal_test_dir:
         dirs = {**dirs, **{"normal_test": normal_test_dir}}
 
-    if mask_dir:
-        dirs = {**dirs, **{"mask_dir": mask_dir}}
-
     for dir_type, path in dirs.items():
         filename, label = _prepare_files_labels(path, dir_type, extensions)
         filenames += filename
         labels += label
 
-    samples = DataFrame({"image_path": filenames, "label": labels})
-    samples = samples.sort_values(by="image_path", ignore_index=True)
+    samples = DataFrame({"image_path": filenames, "label": labels, "mask_path": ""})
 
     # Create label index for normal (0) and abnormal (1) images.
     samples.loc[(samples.label == "normal") | (samples.label == "normal_test"), "label_index"] = 0
     samples.loc[(samples.label == "abnormal"), "label_index"] = 1
-    samples.label_index = samples.label_index.astype("Int64")
+    samples.label_index = samples.label_index.astype(int)
 
     # If a path to mask is provided, add it to the sample dataframe.
     if mask_dir is not None:
-        samples.loc[samples.label == "abnormal", "mask_path"] = samples.loc[
-            samples.label == "mask_dir"
-        ].image_path.values
-        samples = samples.astype({"mask_path": "str"})
-
-        # make sure all every rgb image has a corresponding mask image.
-        assert (
-            samples.loc[samples.label_index == 1]
-            .apply(lambda x: Path(x.image_path).stem in Path(x.mask_path).stem, axis=1)
-            .all()
-        ), "Mismatch between anomalous images and mask images. Make sure the mask files \
-            folder follow the same naming convention as the anomalous images in the dataset \
-            (e.g. image: '000.png', mask: '000.png')."
-
-    # remove all the rows with temporal image samples that have already been assigned
-    samples = samples.loc[
-        (samples.label == "normal") | (samples.label == "abnormal") | (samples.label == "normal_test")
-    ]
+        mask_dir = _check_and_convert_path(mask_dir)
+        for index, row in samples.iterrows():
+            if row.label_index == 1:
+                rel_image_path = row.image_path.relative_to(abnormal_dir)
+                samples.loc[index, "mask_path"] = str(mask_dir / rel_image_path)
+
+        # make sure all the files exist
+        # samples.image_path does NOT need to be checked because we build the df based on that
+        assert samples.mask_path.apply(
+            lambda x: Path(x).exists() if x != "" else True
+        ).all(), f"missing mask files, mask_dir={mask_dir}"
 
     # Ensure the pathlib objects are converted to str.
     # This is because torch dataloader doesn't like pathlib.