From 4095826b22d6bdc320f6a03fc822b35f93804e42 Mon Sep 17 00:00:00 2001 From: a r Date: Mon, 11 Apr 2022 13:45:31 +0200 Subject: [PATCH 1/4] define test split for folder --- anomalib/data/__init__.py | 1 + anomalib/data/folder.py | 81 +++++++++++++++++++++++++++------------ 2 files changed, 57 insertions(+), 25 deletions(-) diff --git a/anomalib/data/__init__.py b/anomalib/data/__init__.py index 9a58732e18..7e24fc28b4 100644 --- a/anomalib/data/__init__.py +++ b/anomalib/data/__init__.py @@ -72,6 +72,7 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> LightningDataModule normal=config.dataset.normal, abnormal=config.dataset.abnormal, task=config.dataset.task, + normal_test=config.dataset.normal_test, mask_dir=config.dataset.mask, extensions=config.dataset.extensions, split_ratio=config.dataset.split_ratio, diff --git a/anomalib/data/folder.py b/anomalib/data/folder.py index 7e150ab284..53fb9931a0 100644 --- a/anomalib/data/folder.py +++ b/anomalib/data/folder.py @@ -57,9 +57,35 @@ def _check_and_convert_path(path: Union[str, Path]) -> Path: return path +def _prepare_files_labels(path: Union[str, Path], path_type: str, extensions: Optional[Tuple[str, ...]] = None): + """Return a list of filenames and list corresponding labels. + + Args: + path (Union[str, Path]): Path to the directory containing images. + path_type (str): Type of images in the provided path ("normal", "abnormal", "normal_test") + extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the + directory. + + Returns: + List, List: Filenames of the images provided in the paths, labels of the images provided in the paths + """ + path = _check_and_convert_path(path) + if extensions is None: + extensions = IMG_EXTENSIONS + + filenames = [f for f in path.glob(r"**/*") if f.suffix in extensions] + if len(filenames) == 0: + raise RuntimeError(f"Found 0 {path_type} images in {path}") + + labels = [path_type] * len(filenames) + + return filenames, labels + + def make_dataset( normal_dir: Union[str, Path], abnormal_dir: Union[str, Path], + normal_test_dir: Optional[Union[str, Path]] = None, mask_dir: Optional[Union[str, Path]] = None, split: Optional[str] = None, split_ratio: float = 0.2, @@ -72,6 +98,8 @@ def make_dataset( Args: normal_dir (Union[str, Path]): Path to the directory containing normal images. abnormal_dir (Union[str, Path]): Path to the directory containing abnormal images. + normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing + normal images for the test dataset. Defaults to None. mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing the mask annotations. Defaults to None. split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None. @@ -87,40 +115,31 @@ def make_dataset( Returns: DataFrame: an output dataframe containing samples for the requested split (ie., train or test) """ - normal_dir = _check_and_convert_path(normal_dir) - abnormal_dir = _check_and_convert_path(abnormal_dir) - - if extensions is None: - extensions = IMG_EXTENSIONS - - # Get filenames from normal and abnormal directory. - normal_filenames = [f for f in normal_dir.glob(r"**/*") if f.suffix in extensions] - abnormal_filenames = [f for f in abnormal_dir.glob(r"**/*") if f.suffix in extensions] - filenames = normal_filenames + abnormal_filenames - if len(normal_filenames) == 0: - raise RuntimeError(f"Found 0 normal images in {normal_dir}") + filenames = [] + labels = [] + dirs = {"normal": normal_dir, "abnormal": abnormal_dir} - if len(abnormal_filenames) == 0: - raise RuntimeError(f"Found 0 annormal images in {abnormal_dir}") + if normal_test_dir: + dirs = {**dirs, **{"normal_test": normal_test_dir}} - # Add normal and abnormal labels to the samples as `label` column. - normal_labels = ["normal"] * len(normal_filenames) - abnormal_labels = ["abnormal"] * len(abnormal_filenames) - labels = normal_labels + abnormal_labels + for dir_type, path in dirs.items(): + filename, label = _prepare_files_labels(path, dir_type, extensions) + filenames += filename + labels += label samples = DataFrame({"image_path": filenames, "label": labels}) # Create label index for normal (0) and abnormal (1) images. - samples.loc[(samples.label == "normal"), "label_index"] = 0 + samples.loc[(samples.label == "normal") | (samples.label == "normal_test"), "label_index"] = 0 samples.loc[(samples.label == "abnormal"), "label_index"] = 1 samples.label_index = samples.label_index.astype(int) # If a path to mask is provided, add it to the sample dataframe. if mask_dir is not None: mask_dir = _check_and_convert_path(mask_dir) - normal_gt = ["" for f in normal_filenames] - abnormal_gt = [str(mask_dir / f.name) for f in abnormal_filenames] + normal_gt = ["" for f in samples.loc[samples.label_index == 0]["image_path"]] + abnormal_gt = [str(mask_dir / f.name) for f in samples.loc[samples.label_index == 1]["image_path"]] gt_filenames = normal_gt + abnormal_gt samples["mask_path"] = gt_filenames @@ -133,10 +152,12 @@ def make_dataset( # By default, all the normal samples are assigned as train. # and all the abnormal samples are test. samples.loc[(samples.label == "normal"), "split"] = "train" - samples.loc[(samples.label == "abnormal"), "split"] = "test" - samples = split_normal_images_in_train_set( - samples=samples, split_ratio=split_ratio, seed=seed, normal_label="normal" - ) + samples.loc[(samples.label == "abnormal") | (samples.label == "normal_test"), "split"] = "test" + + if not normal_test_dir: + samples = split_normal_images_in_train_set( + samples=samples, split_ratio=split_ratio, seed=seed, normal_label="normal" + ) # If `create_validation_set` is set to True, the test set is split into half. if create_validation_set: @@ -159,6 +180,7 @@ def __init__( abnormal_dir: Union[Path, str], split: str, pre_process: PreProcessor, + normal_test: Optional[Union[Path, str]] = None, split_ratio: float = 0.2, mask_dir: Optional[Union[Path, str]] = None, extensions: Optional[Tuple[str, ...]] = None, @@ -174,6 +196,8 @@ def __init__( split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None. pre_process (Optional[PreProcessor], optional): Image Pro-processor to apply transform. Defaults to None. + normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing + normal images for the test dataset. Defaults to None. split_ratio (float, optional): Ratio to split normal training images and add to the test set in case test set doesn't contain any normal images. Defaults to 0.2. @@ -207,6 +231,7 @@ def __init__( self.samples = make_dataset( normal_dir=normal_dir, abnormal_dir=abnormal_dir, + normal_test_dir=normal_test, mask_dir=mask_dir, split=split, split_ratio=split_ratio, @@ -271,6 +296,7 @@ def __init__( normal: str = "normal", abnormal: str = "abnormal", task: str = "classification", + normal_test: Optional[Union[Path, str]] = None, mask_dir: Optional[Union[Path, str]] = None, extensions: Optional[Tuple[str, ...]] = None, split_ratio: float = 0.2, @@ -293,6 +319,8 @@ def __init__( Defaults to "abnormal". task (str, optional): Task type. Could be either classification or segmentation. Defaults to "classification". + normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing + normal images for the test dataset. Defaults to None. mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing the mask annotations. Defaults to None. extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the @@ -384,6 +412,8 @@ def __init__( self.root = _check_and_convert_path(root) self.normal_dir = self.root / normal self.abnormal_dir = self.root / abnormal + if normal_test: + self.normal_test = self.root / normal_test self.mask_dir = mask_dir self.extensions = extensions self.split_ratio = split_ratio @@ -457,6 +487,7 @@ def setup(self, stage: Optional[str] = None) -> None: normal_dir=self.normal_dir, abnormal_dir=self.abnormal_dir, split="test", + normal_test=self.normal_test, split_ratio=self.split_ratio, mask_dir=self.mask_dir, pre_process=self.pre_process_val, From 93a9ab664ce9e0c0dd8bc2827332d629c34c584f Mon Sep 17 00:00:00 2001 From: a r Date: Mon, 11 Apr 2022 13:53:19 +0200 Subject: [PATCH 2/4] assign normal_test --- anomalib/data/folder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/anomalib/data/folder.py b/anomalib/data/folder.py index 53fb9931a0..89ee1a75a8 100644 --- a/anomalib/data/folder.py +++ b/anomalib/data/folder.py @@ -319,7 +319,7 @@ def __init__( Defaults to "abnormal". task (str, optional): Task type. Could be either classification or segmentation. Defaults to "classification". - normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing + normal_test (Optional[Union[str, Path]], optional): Path to the directory containing normal images for the test dataset. Defaults to None. mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing the mask annotations. Defaults to None. @@ -412,6 +412,7 @@ def __init__( self.root = _check_and_convert_path(root) self.normal_dir = self.root / normal self.abnormal_dir = self.root / abnormal + self.normal_test = normal_test if normal_test: self.normal_test = self.root / normal_test self.mask_dir = mask_dir From ea58f7ec59f400a7d68a369aeeadb3928cbe0084 Mon Sep 17 00:00:00 2001 From: alexriedel1 Date: Tue, 12 Apr 2022 13:27:52 +0200 Subject: [PATCH 3/4] dir names fixed --- anomalib/data/__init__.py | 6 +++--- anomalib/data/folder.py | 35 +++++++++++++++++++---------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/anomalib/data/__init__.py b/anomalib/data/__init__.py index 7e24fc28b4..9ab65b72ee 100644 --- a/anomalib/data/__init__.py +++ b/anomalib/data/__init__.py @@ -69,10 +69,10 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> LightningDataModule elif config.dataset.format.lower() == "folder": datamodule = FolderDataModule( root=config.dataset.path, - normal=config.dataset.normal, - abnormal=config.dataset.abnormal, + normal_dir=config.dataset.normal_dir, + abnormal_dir=config.dataset.abnormal_dir, task=config.dataset.task, - normal_test=config.dataset.normal_test, + normal_test_dir=config.dataset.normal_test_dir, mask_dir=config.dataset.mask, extensions=config.dataset.extensions, split_ratio=config.dataset.split_ratio, diff --git a/anomalib/data/folder.py b/anomalib/data/folder.py index 89ee1a75a8..6f1020b263 100644 --- a/anomalib/data/folder.py +++ b/anomalib/data/folder.py @@ -57,7 +57,9 @@ def _check_and_convert_path(path: Union[str, Path]) -> Path: return path -def _prepare_files_labels(path: Union[str, Path], path_type: str, extensions: Optional[Tuple[str, ...]] = None): +def _prepare_files_labels( + path: Union[str, Path], path_type: str, extensions: Optional[Tuple[str, ...]] = None +) -> Tuple[list, list]: """Return a list of filenames and list corresponding labels. Args: @@ -99,7 +101,8 @@ def make_dataset( normal_dir (Union[str, Path]): Path to the directory containing normal images. abnormal_dir (Union[str, Path]): Path to the directory containing abnormal images. normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing - normal images for the test dataset. Defaults to None. + normal images for the test dataset. Normal test images will be a split of `normal_dir` + if `None`. Defaults to None. mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing the mask annotations. Defaults to None. split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None. @@ -180,7 +183,7 @@ def __init__( abnormal_dir: Union[Path, str], split: str, pre_process: PreProcessor, - normal_test: Optional[Union[Path, str]] = None, + normal_test_dir: Optional[Union[Path, str]] = None, split_ratio: float = 0.2, mask_dir: Optional[Union[Path, str]] = None, extensions: Optional[Tuple[str, ...]] = None, @@ -231,7 +234,7 @@ def __init__( self.samples = make_dataset( normal_dir=normal_dir, abnormal_dir=abnormal_dir, - normal_test_dir=normal_test, + normal_test_dir=normal_test_dir, mask_dir=mask_dir, split=split, split_ratio=split_ratio, @@ -293,10 +296,10 @@ class FolderDataModule(LightningDataModule): def __init__( self, root: Union[str, Path], - normal: str = "normal", - abnormal: str = "abnormal", + normal_dir: str = "normal", + abnormal_dir: str = "abnormal", task: str = "classification", - normal_test: Optional[Union[Path, str]] = None, + normal_test_dir: Optional[Union[Path, str]] = None, mask_dir: Optional[Union[Path, str]] = None, extensions: Optional[Tuple[str, ...]] = None, split_ratio: float = 0.2, @@ -313,13 +316,13 @@ def __init__( Args: root (Union[str, Path]): Path to the root folder containing normal and abnormal dirs. - normal (str, optional): Name of the directory containing normal images. + normal_dir (str, optional): Name of the directory containing normal images. Defaults to "normal". - abnormal (str, optional): Name of the directory containing abnormal images. + abnormal_dir (str, optional): Name of the directory containing abnormal images. Defaults to "abnormal". task (str, optional): Task type. Could be either classification or segmentation. Defaults to "classification". - normal_test (Optional[Union[str, Path]], optional): Path to the directory containing + normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing normal images for the test dataset. Defaults to None. mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing the mask annotations. Defaults to None. @@ -410,11 +413,11 @@ def __init__( super().__init__() self.root = _check_and_convert_path(root) - self.normal_dir = self.root / normal - self.abnormal_dir = self.root / abnormal - self.normal_test = normal_test - if normal_test: - self.normal_test = self.root / normal_test + self.normal_dir = self.root / normal_dir + self.abnormal_dir = self.root / abnormal_dir + self.normal_test = normal_test_dir + if normal_test_dir: + self.normal_test = self.root / normal_test_dir self.mask_dir = mask_dir self.extensions = extensions self.split_ratio = split_ratio @@ -488,7 +491,7 @@ def setup(self, stage: Optional[str] = None) -> None: normal_dir=self.normal_dir, abnormal_dir=self.abnormal_dir, split="test", - normal_test=self.normal_test, + normal_test_dir=self.normal_test, split_ratio=self.split_ratio, mask_dir=self.mask_dir, pre_process=self.pre_process_val, From ef0f23e8fa3e8e23254b18b3c234ce37ef9029f5 Mon Sep 17 00:00:00 2001 From: alexriedel1 Date: Tue, 12 Apr 2022 14:02:59 +0200 Subject: [PATCH 4/4] fix test --- tests/pre_merge/datasets/test_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pre_merge/datasets/test_dataset.py b/tests/pre_merge/datasets/test_dataset.py index 46cd126f0d..ff17499c27 100644 --- a/tests/pre_merge/datasets/test_dataset.py +++ b/tests/pre_merge/datasets/test_dataset.py @@ -56,8 +56,8 @@ def folder_data_module(): root = get_dataset_path(dataset="bottle") datamodule = FolderDataModule( root=root, - normal="good", - abnormal="broken_large", + normal_dir="good", + abnormal_dir="broken_large", mask_dir=os.path.join(root, "ground_truth/broken_large"), task="segmentation", split_ratio=0.2,