Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

assign test split for folder dataset #220

Merged
merged 4 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions anomalib/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> LightningDataModule
normal=config.dataset.normal,
abnormal=config.dataset.abnormal,
task=config.dataset.task,
normal_test=config.dataset.normal_test,
alexriedel1 marked this conversation as resolved.
Show resolved Hide resolved
mask_dir=config.dataset.mask,
extensions=config.dataset.extensions,
split_ratio=config.dataset.split_ratio,
Expand Down
82 changes: 57 additions & 25 deletions anomalib/data/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,35 @@ def _check_and_convert_path(path: Union[str, Path]) -> Path:
return path


def _prepare_files_labels(path: Union[str, Path], path_type: str, extensions: Optional[Tuple[str, ...]] = None):
alexriedel1 marked this conversation as resolved.
Show resolved Hide resolved
"""Return a list of filenames and list corresponding labels.

Args:
path (Union[str, Path]): Path to the directory containing images.
path_type (str): Type of images in the provided path ("normal", "abnormal", "normal_test")
extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the
directory.

Returns:
List, List: Filenames of the images provided in the paths, labels of the images provided in the paths
"""
path = _check_and_convert_path(path)
if extensions is None:
extensions = IMG_EXTENSIONS

filenames = [f for f in path.glob(r"**/*") if f.suffix in extensions]
if len(filenames) == 0:
raise RuntimeError(f"Found 0 {path_type} images in {path}")

labels = [path_type] * len(filenames)

return filenames, labels


def make_dataset(
normal_dir: Union[str, Path],
abnormal_dir: Union[str, Path],
normal_test_dir: Optional[Union[str, Path]] = None,
mask_dir: Optional[Union[str, Path]] = None,
split: Optional[str] = None,
split_ratio: float = 0.2,
Expand All @@ -72,6 +98,8 @@ def make_dataset(
Args:
normal_dir (Union[str, Path]): Path to the directory containing normal images.
abnormal_dir (Union[str, Path]): Path to the directory containing abnormal images.
normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
alexriedel1 marked this conversation as resolved.
Show resolved Hide resolved
mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
the mask annotations. Defaults to None.
split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None.
Expand All @@ -87,40 +115,31 @@ def make_dataset(
Returns:
DataFrame: an output dataframe containing samples for the requested split (ie., train or test)
"""
normal_dir = _check_and_convert_path(normal_dir)
abnormal_dir = _check_and_convert_path(abnormal_dir)

if extensions is None:
extensions = IMG_EXTENSIONS

# Get filenames from normal and abnormal directory.
normal_filenames = [f for f in normal_dir.glob(r"**/*") if f.suffix in extensions]
abnormal_filenames = [f for f in abnormal_dir.glob(r"**/*") if f.suffix in extensions]
filenames = normal_filenames + abnormal_filenames

if len(normal_filenames) == 0:
raise RuntimeError(f"Found 0 normal images in {normal_dir}")
filenames = []
labels = []
dirs = {"normal": normal_dir, "abnormal": abnormal_dir}

if len(abnormal_filenames) == 0:
raise RuntimeError(f"Found 0 annormal images in {abnormal_dir}")
if normal_test_dir:
dirs = {**dirs, **{"normal_test": normal_test_dir}}

# Add normal and abnormal labels to the samples as `label` column.
normal_labels = ["normal"] * len(normal_filenames)
abnormal_labels = ["abnormal"] * len(abnormal_filenames)
labels = normal_labels + abnormal_labels
for dir_type, path in dirs.items():
filename, label = _prepare_files_labels(path, dir_type, extensions)
filenames += filename
labels += label

samples = DataFrame({"image_path": filenames, "label": labels})

# Create label index for normal (0) and abnormal (1) images.
samples.loc[(samples.label == "normal"), "label_index"] = 0
samples.loc[(samples.label == "normal") | (samples.label == "normal_test"), "label_index"] = 0
samples.loc[(samples.label == "abnormal"), "label_index"] = 1
samples.label_index = samples.label_index.astype(int)

# If a path to mask is provided, add it to the sample dataframe.
if mask_dir is not None:
mask_dir = _check_and_convert_path(mask_dir)
normal_gt = ["" for f in normal_filenames]
abnormal_gt = [str(mask_dir / f.name) for f in abnormal_filenames]
normal_gt = ["" for f in samples.loc[samples.label_index == 0]["image_path"]]
abnormal_gt = [str(mask_dir / f.name) for f in samples.loc[samples.label_index == 1]["image_path"]]
gt_filenames = normal_gt + abnormal_gt

samples["mask_path"] = gt_filenames
Expand All @@ -133,10 +152,12 @@ def make_dataset(
# By default, all the normal samples are assigned as train.
# and all the abnormal samples are test.
samples.loc[(samples.label == "normal"), "split"] = "train"
samples.loc[(samples.label == "abnormal"), "split"] = "test"
samples = split_normal_images_in_train_set(
samples=samples, split_ratio=split_ratio, seed=seed, normal_label="normal"
)
samples.loc[(samples.label == "abnormal") | (samples.label == "normal_test"), "split"] = "test"

if not normal_test_dir:
samples = split_normal_images_in_train_set(
samples=samples, split_ratio=split_ratio, seed=seed, normal_label="normal"
)

# If `create_validation_set` is set to True, the test set is split into half.
if create_validation_set:
Expand All @@ -159,6 +180,7 @@ def __init__(
abnormal_dir: Union[Path, str],
split: str,
pre_process: PreProcessor,
normal_test: Optional[Union[Path, str]] = None,
alexriedel1 marked this conversation as resolved.
Show resolved Hide resolved
split_ratio: float = 0.2,
mask_dir: Optional[Union[Path, str]] = None,
extensions: Optional[Tuple[str, ...]] = None,
Expand All @@ -174,6 +196,8 @@ def __init__(
split (Optional[str], optional): Dataset split (ie., either train or test). Defaults to None.
pre_process (Optional[PreProcessor], optional): Image Pro-processor to apply transform.
Defaults to None.
normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
split_ratio (float, optional): Ratio to split normal training images and add to the
test set in case test set doesn't contain any normal images.
Defaults to 0.2.
Expand Down Expand Up @@ -207,6 +231,7 @@ def __init__(
self.samples = make_dataset(
normal_dir=normal_dir,
abnormal_dir=abnormal_dir,
normal_test_dir=normal_test,
mask_dir=mask_dir,
split=split,
split_ratio=split_ratio,
Expand Down Expand Up @@ -271,6 +296,7 @@ def __init__(
normal: str = "normal",
abnormal: str = "abnormal",
task: str = "classification",
normal_test: Optional[Union[Path, str]] = None,
mask_dir: Optional[Union[Path, str]] = None,
extensions: Optional[Tuple[str, ...]] = None,
split_ratio: float = 0.2,
Expand All @@ -293,6 +319,8 @@ def __init__(
Defaults to "abnormal".
task (str, optional): Task type. Could be either classification or segmentation.
Defaults to "classification".
normal_test (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
mask_dir (Optional[Union[str, Path]], optional): Path to the directory containing
the mask annotations. Defaults to None.
extensions (Optional[Tuple[str, ...]], optional): Type of the image extensions to read from the
Expand Down Expand Up @@ -384,6 +412,9 @@ def __init__(
self.root = _check_and_convert_path(root)
self.normal_dir = self.root / normal
self.abnormal_dir = self.root / abnormal
self.normal_test = normal_test
if normal_test:
self.normal_test = self.root / normal_test
self.mask_dir = mask_dir
self.extensions = extensions
self.split_ratio = split_ratio
Expand Down Expand Up @@ -457,6 +488,7 @@ def setup(self, stage: Optional[str] = None) -> None:
normal_dir=self.normal_dir,
abnormal_dir=self.abnormal_dir,
split="test",
normal_test=self.normal_test,
split_ratio=self.split_ratio,
mask_dir=self.mask_dir,
pre_process=self.pre_process_val,
Expand Down