From 4095826b22d6bdc320f6a03fc822b35f93804e42 Mon Sep 17 00:00:00 2001
From: a r <alex.riedel@googlemail.com>
Date: Mon, 11 Apr 2022 13:45:31 +0200
Subject: [PATCH 1/4] define test split for folder

---
 anomalib/data/__init__.py |  1 +
 anomalib/data/folder.py   | 81 +++++++++++++++++++++++++++------------
 2 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/anomalib/data/__init__.py b/anomalib/data/__init__.py
index 9a58732e18..7e24fc28b4 100644
--- a/anomalib/data/__init__.py
+++ b/anomalib/data/__init__.py
@@ -72,6 +72,7 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> LightningDataModule
             normal=config.dataset.normal,
             abnormal=config.dataset.abnormal,
             task=config.dataset.task,
+            normal_test=config.dataset.normal_test,
             mask_dir=config.dataset.mask,
             extensions=config.dataset.extensions,
             split_ratio=config.dataset.split_ratio,
diff --git a/anomalib/data/folder.py b/anomalib/data/folder.py
index 7e150ab284..53fb9931a0 100644
--- a/anomalib/data/folder.py
+++ b/anomalib/data/folder.py
@@ -57,9 +57,35 @@ def _check_and_convert_path(path: Union[str, Path]) -> Path:
     return path
 
 
+def _prepare_files_labels(path: Union[str, Path], path_type: str, extensions: Optional[Tuple[str, ...]] = None):
+    """Return a list of filenames and list corresponding labels.
+
+    Args:
+        path (Union[str, Path]): Path to the directory containing images.
+        path_type (str): Type of images in the provided path ("normal", "abnormal", "normal_test")
+        extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the
+            directory.
+
+    Returns:
+        List, List: Filenames of the images provided in the paths, labels of the images provided in the paths
+    """
+    path = _check_and_convert_path(path)
+    if extensions is None:
+        extensions = IMG_EXTENSIONS
+
+    filenames = [f for f in path.glob(r"**/*") if f.suffix in extensions]
+    if len(filenames) == 0:
+        raise RuntimeError(f"Found 0 {path_type} images in {path}")
+
+    labels = [path_type] * len(filenames)
+
+    return filenames, labels
+
+
 def make_dataset(
     normal_dir: Union[str, Path],
     abnormal_dir: Union[str, Path],
+    normal_test_dir: Optional[Union[str, Path]] = None,
     mask_dir: Optional[Union[str, Path]] = None,
     split: Optional[str] = None,
     split_ratio: float = 0.2,
@@ -72,6 +98,8 @@ def make_dataset(
     Args:
         normal_dir (Union[str, Path]): Path to the directory containing normal images.
         abnormal_dir (Union[str, Path]): Path to the directory containing abnormal images.
+        normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
+            normal images for the test dataset. Defaults to None.
         mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
             the mask annotations. Defaults to None.
         split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None.
@@ -87,40 +115,31 @@ def make_dataset(
     Returns:
         DataFrame: an output dataframe containing samples for the requested split (ie., train or test)
     """
-    normal_dir = _check_and_convert_path(normal_dir)
-    abnormal_dir = _check_and_convert_path(abnormal_dir)
-
-    if extensions is None:
-        extensions = IMG_EXTENSIONS
-
-    # Get filenames from normal and abnormal directory.
-    normal_filenames = [f for f in normal_dir.glob(r"**/*") if f.suffix in extensions]
-    abnormal_filenames = [f for f in abnormal_dir.glob(r"**/*") if f.suffix in extensions]
-    filenames = normal_filenames + abnormal_filenames
 
-    if len(normal_filenames) == 0:
-        raise RuntimeError(f"Found 0 normal images in {normal_dir}")
+    filenames = []
+    labels = []
+    dirs = {"normal": normal_dir, "abnormal": abnormal_dir}
 
-    if len(abnormal_filenames) == 0:
-        raise RuntimeError(f"Found 0 annormal images in {abnormal_dir}")
+    if normal_test_dir:
+        dirs = {**dirs, **{"normal_test": normal_test_dir}}
 
-    # Add normal and abnormal labels to the samples as `label` column.
-    normal_labels = ["normal"] * len(normal_filenames)
-    abnormal_labels = ["abnormal"] * len(abnormal_filenames)
-    labels = normal_labels + abnormal_labels
+    for dir_type, path in dirs.items():
+        filename, label = _prepare_files_labels(path, dir_type, extensions)
+        filenames += filename
+        labels += label
 
     samples = DataFrame({"image_path": filenames, "label": labels})
 
     # Create label index for normal (0) and abnormal (1) images.
-    samples.loc[(samples.label == "normal"), "label_index"] = 0
+    samples.loc[(samples.label == "normal") | (samples.label == "normal_test"), "label_index"] = 0
     samples.loc[(samples.label == "abnormal"), "label_index"] = 1
     samples.label_index = samples.label_index.astype(int)
 
     # If a path to mask is provided, add it to the sample dataframe.
     if mask_dir is not None:
         mask_dir = _check_and_convert_path(mask_dir)
-        normal_gt = ["" for f in normal_filenames]
-        abnormal_gt = [str(mask_dir / f.name) for f in abnormal_filenames]
+        normal_gt = ["" for f in samples.loc[samples.label_index == 0]["image_path"]]
+        abnormal_gt = [str(mask_dir / f.name) for f in samples.loc[samples.label_index == 1]["image_path"]]
         gt_filenames = normal_gt + abnormal_gt
 
         samples["mask_path"] = gt_filenames
@@ -133,10 +152,12 @@ def make_dataset(
     # By default, all the normal samples are assigned as train.
     #   and all the abnormal samples are test.
     samples.loc[(samples.label == "normal"), "split"] = "train"
-    samples.loc[(samples.label == "abnormal"), "split"] = "test"
-    samples = split_normal_images_in_train_set(
-        samples=samples, split_ratio=split_ratio, seed=seed, normal_label="normal"
-    )
+    samples.loc[(samples.label == "abnormal") | (samples.label == "normal_test"), "split"] = "test"
+
+    if not normal_test_dir:
+        samples = split_normal_images_in_train_set(
+            samples=samples, split_ratio=split_ratio, seed=seed, normal_label="normal"
+        )
 
     # If `create_validation_set` is set to True, the test set is split into half.
     if create_validation_set:
@@ -159,6 +180,7 @@ def __init__(
         abnormal_dir: Union[Path, str],
         split: str,
         pre_process: PreProcessor,
+        normal_test: Optional[Union[Path, str]] = None,
         split_ratio: float = 0.2,
         mask_dir: Optional[Union[Path, str]] = None,
         extensions: Optional[Tuple[str, ...]] = None,
@@ -174,6 +196,8 @@ def __init__(
             split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None.
             pre_process (Optional[PreProcessor], optional): Image Pro-processor to apply transform.
                 Defaults to None.
+            normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
+                normal images for the test dataset. Defaults to None.
             split_ratio (float, optional): Ratio to split normal training images and add to the
                 test set in case test set doesn't contain any normal images.
                 Defaults to 0.2.
@@ -207,6 +231,7 @@ def __init__(
         self.samples = make_dataset(
             normal_dir=normal_dir,
             abnormal_dir=abnormal_dir,
+            normal_test_dir=normal_test,
             mask_dir=mask_dir,
             split=split,
             split_ratio=split_ratio,
@@ -271,6 +296,7 @@ def __init__(
         normal: str = "normal",
         abnormal: str = "abnormal",
         task: str = "classification",
+        normal_test: Optional[Union[Path, str]] = None,
         mask_dir: Optional[Union[Path, str]] = None,
         extensions: Optional[Tuple[str, ...]] = None,
         split_ratio: float = 0.2,
@@ -293,6 +319,8 @@ def __init__(
                 Defaults to "abnormal".
             task (str, optional): Task type. Could be either classification or segmentation.
                 Defaults to "classification".
+            normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
+                normal images for the test dataset. Defaults to None.
             mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
                 the mask annotations. Defaults to None.
             extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the
@@ -384,6 +412,8 @@ def __init__(
         self.root = _check_and_convert_path(root)
         self.normal_dir = self.root / normal
         self.abnormal_dir = self.root / abnormal
+        if normal_test:
+            self.normal_test = self.root / normal_test
         self.mask_dir = mask_dir
         self.extensions = extensions
         self.split_ratio = split_ratio
@@ -457,6 +487,7 @@ def setup(self, stage: Optional[str] = None) -> None:
             normal_dir=self.normal_dir,
             abnormal_dir=self.abnormal_dir,
             split="test",
+            normal_test=self.normal_test,
             split_ratio=self.split_ratio,
             mask_dir=self.mask_dir,
             pre_process=self.pre_process_val,

From 93a9ab664ce9e0c0dd8bc2827332d629c34c584f Mon Sep 17 00:00:00 2001
From: a r <alex.riedel@googlemail.com>
Date: Mon, 11 Apr 2022 13:53:19 +0200
Subject: [PATCH 2/4] assign normal_test

---
 anomalib/data/folder.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/anomalib/data/folder.py b/anomalib/data/folder.py
index 53fb9931a0..89ee1a75a8 100644
--- a/anomalib/data/folder.py
+++ b/anomalib/data/folder.py
@@ -319,7 +319,7 @@ def __init__(
                 Defaults to "abnormal".
             task (str, optional): Task type. Could be either classification or segmentation.
                 Defaults to "classification".
-            normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
+            normal_test (Optional[Union[str, Path]], optional): Path to the directory containing
                 normal images for the test dataset. Defaults to None.
             mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
                 the mask annotations. Defaults to None.
@@ -412,6 +412,7 @@ def __init__(
         self.root = _check_and_convert_path(root)
         self.normal_dir = self.root / normal
         self.abnormal_dir = self.root / abnormal
+        self.normal_test = normal_test
         if normal_test:
             self.normal_test = self.root / normal_test
         self.mask_dir = mask_dir

From ea58f7ec59f400a7d68a369aeeadb3928cbe0084 Mon Sep 17 00:00:00 2001
From: alexriedel1 <alex.riedel@googlemail.com>
Date: Tue, 12 Apr 2022 13:27:52 +0200
Subject: [PATCH 3/4] dir names fixed

---
 anomalib/data/__init__.py |  6 +++---
 anomalib/data/folder.py   | 35 +++++++++++++++++++----------------
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/anomalib/data/__init__.py b/anomalib/data/__init__.py
index 7e24fc28b4..9ab65b72ee 100644
--- a/anomalib/data/__init__.py
+++ b/anomalib/data/__init__.py
@@ -69,10 +69,10 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> LightningDataModule
     elif config.dataset.format.lower() == "folder":
         datamodule = FolderDataModule(
             root=config.dataset.path,
-            normal=config.dataset.normal,
-            abnormal=config.dataset.abnormal,
+            normal_dir=config.dataset.normal_dir,
+            abnormal_dir=config.dataset.abnormal_dir,
             task=config.dataset.task,
-            normal_test=config.dataset.normal_test,
+            normal_test_dir=config.dataset.normal_test_dir,
             mask_dir=config.dataset.mask,
             extensions=config.dataset.extensions,
             split_ratio=config.dataset.split_ratio,
diff --git a/anomalib/data/folder.py b/anomalib/data/folder.py
index 89ee1a75a8..6f1020b263 100644
--- a/anomalib/data/folder.py
+++ b/anomalib/data/folder.py
@@ -57,7 +57,9 @@ def _check_and_convert_path(path: Union[str, Path]) -> Path:
     return path
 
 
-def _prepare_files_labels(path: Union[str, Path], path_type: str, extensions: Optional[Tuple[str, ...]] = None):
+def _prepare_files_labels(
+    path: Union[str, Path], path_type: str, extensions: Optional[Tuple[str, ...]] = None
+) -> Tuple[list, list]:
     """Return a list of filenames and list corresponding labels.
 
     Args:
@@ -99,7 +101,8 @@ def make_dataset(
         normal_dir (Union[str, Path]): Path to the directory containing normal images.
         abnormal_dir (Union[str, Path]): Path to the directory containing abnormal images.
         normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
-            normal images for the test dataset. Defaults to None.
+            normal images for the test dataset. Normal test images will be a split of `normal_dir`
+            if `None`. Defaults to None.
         mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
             the mask annotations. Defaults to None.
         split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None.
@@ -180,7 +183,7 @@ def __init__(
         abnormal_dir: Union[Path, str],
         split: str,
         pre_process: PreProcessor,
-        normal_test: Optional[Union[Path, str]] = None,
+        normal_test_dir: Optional[Union[Path, str]] = None,
         split_ratio: float = 0.2,
         mask_dir: Optional[Union[Path, str]] = None,
         extensions: Optional[Tuple[str, ...]] = None,
@@ -231,7 +234,7 @@ def __init__(
         self.samples = make_dataset(
             normal_dir=normal_dir,
             abnormal_dir=abnormal_dir,
-            normal_test_dir=normal_test,
+            normal_test_dir=normal_test_dir,
             mask_dir=mask_dir,
             split=split,
             split_ratio=split_ratio,
@@ -293,10 +296,10 @@ class FolderDataModule(LightningDataModule):
     def __init__(
         self,
         root: Union[str, Path],
-        normal: str = "normal",
-        abnormal: str = "abnormal",
+        normal_dir: str = "normal",
+        abnormal_dir: str = "abnormal",
         task: str = "classification",
-        normal_test: Optional[Union[Path, str]] = None,
+        normal_test_dir: Optional[Union[Path, str]] = None,
         mask_dir: Optional[Union[Path, str]] = None,
         extensions: Optional[Tuple[str, ...]] = None,
         split_ratio: float = 0.2,
@@ -313,13 +316,13 @@ def __init__(
 
         Args:
             root (Union[str, Path]): Path to the root folder containing normal and abnormal dirs.
-            normal (str, optional): Name of the directory containing normal images.
+            normal_dir (str, optional): Name of the directory containing normal images.
                 Defaults to "normal".
-            abnormal (str, optional): Name of the directory containing abnormal images.
+            abnormal_dir (str, optional): Name of the directory containing abnormal images.
                 Defaults to "abnormal".
             task (str, optional): Task type. Could be either classification or segmentation.
                 Defaults to "classification".
-            normal_test (Optional[Union[str, Path]], optional): Path to the directory containing
+            normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
                 normal images for the test dataset. Defaults to None.
             mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
                 the mask annotations. Defaults to None.
@@ -410,11 +413,11 @@ def __init__(
         super().__init__()
 
         self.root = _check_and_convert_path(root)
-        self.normal_dir = self.root / normal
-        self.abnormal_dir = self.root / abnormal
-        self.normal_test = normal_test
-        if normal_test:
-            self.normal_test = self.root / normal_test
+        self.normal_dir = self.root / normal_dir
+        self.abnormal_dir = self.root / abnormal_dir
+        self.normal_test = normal_test_dir
+        if normal_test_dir:
+            self.normal_test = self.root / normal_test_dir
         self.mask_dir = mask_dir
         self.extensions = extensions
         self.split_ratio = split_ratio
@@ -488,7 +491,7 @@ def setup(self, stage: Optional[str] = None) -> None:
             normal_dir=self.normal_dir,
             abnormal_dir=self.abnormal_dir,
             split="test",
-            normal_test=self.normal_test,
+            normal_test_dir=self.normal_test,
             split_ratio=self.split_ratio,
             mask_dir=self.mask_dir,
             pre_process=self.pre_process_val,

From ef0f23e8fa3e8e23254b18b3c234ce37ef9029f5 Mon Sep 17 00:00:00 2001
From: alexriedel1 <alex.riedel@googlemail.com>
Date: Tue, 12 Apr 2022 14:02:59 +0200
Subject: [PATCH 4/4] fix test

---
 tests/pre_merge/datasets/test_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/pre_merge/datasets/test_dataset.py b/tests/pre_merge/datasets/test_dataset.py
index 46cd126f0d..ff17499c27 100644
--- a/tests/pre_merge/datasets/test_dataset.py
+++ b/tests/pre_merge/datasets/test_dataset.py
@@ -56,8 +56,8 @@ def folder_data_module():
     root = get_dataset_path(dataset="bottle")
     datamodule = FolderDataModule(
         root=root,
-        normal="good",
-        abnormal="broken_large",
+        normal_dir="good",
+        abnormal_dir="broken_large",
         mask_dir=os.path.join(root, "ground_truth/broken_large"),
         task="segmentation",
         split_ratio=0.2,