add tests

openvinotoolkit · djdameln · Dec 14, 2022 · Sep 9, 2022 · Sep 12, 2022 · Sep 13, 2022
commit dfd2d80266698758bb96c696ad5fa1dd7a98c619
diff --git a/tests/pre_merge/datasets/test_datamodule.py b/tests/pre_merge/datasets/test_datamodule.py
@@ -37,7 +37,7 @@ def make_avenue_data_module(task="classification", batch_size=1, val_split_mode=
     return data_module
 
 
-def make_mvtec_data_module(task="classification", batch_size=1, val_split_mode="from_test"):
+def make_mvtec_data_module(task="classification", batch_size=1, test_split_mode="from_dir", val_split_mode="from_test"):
     data_module = MVTec(
         root=get_dataset_path(dataset="MVTec"),
         category="leather",
@@ -46,14 +46,15 @@ def make_mvtec_data_module(task="classification", batch_size=1, val_split_mode="
         eval_batch_size=batch_size,
         num_workers=0,
         task=task,
+        test_split_mode=test_split_mode,
         val_split_mode=val_split_mode,
     )
     data_module.prepare_data()
     data_module.setup()
     return data_module
 
 
-def make_btech_data_module(task="classification", batch_size=1, val_split_mode="from_test"):
+def make_btech_data_module(task="classification", batch_size=1, test_split_mode="from_dir", val_split_mode="from_test"):
     """Create BTech Data Module."""
     data_module = BTech(
         root=get_dataset_path(dataset="BTech"),
@@ -63,27 +64,38 @@ def make_btech_data_module(task="classification", batch_size=1, val_split_mode="
         eval_batch_size=batch_size,
         num_workers=0,
         task=task,
+        test_split_mode=test_split_mode,
         val_split_mode=val_split_mode,
     )
     data_module.prepare_data()
     data_module.setup()
     return data_module
 
 
-def make_folder_data_module(task="classification", batch_size=1, val_split_mode="from_test"):
+def make_folder_data_module(
+    task="classification",
+    batch_size=1,
+    test_split_mode="from_dir",
+    val_split_mode="from_test",
+    normal_dir="good",
+    abnormal_dir="broken_large",
+    normal_test_dir="good_test",
+):
     """Create Folder Data Module."""
     root = get_dataset_path(dataset="bottle")
     data_module = Folder(
         root=root,
-        normal_dir="good",
-        abnormal_dir="broken_large",
+        normal_dir=normal_dir,
+        abnormal_dir=abnormal_dir,
+        normal_test_dir=normal_test_dir,
         mask_dir=os.path.join(root, "ground_truth/broken_large"),
         normal_split_ratio=0.2,
         image_size=(256, 256),
         train_batch_size=batch_size,
         eval_batch_size=batch_size,
         num_workers=8,
         task=task,
+        test_split_mode=test_split_mode,
         val_split_mode=val_split_mode,
     )
     data_module.setup()
@@ -116,8 +128,8 @@ def make_ucsdped_data_module(task="classification", batch_size=1, val_split_mode
 
 @pytest.fixture(autouse=True)
 def make_data_module():
-    def make(dataset="folder", task="classification", batch_size=1, val_split_mode="from_test"):
-        return DATASETS[dataset](task=task, batch_size=batch_size, val_split_mode=val_split_mode)
+    def make(dataset="folder", **kwargs):
+        return DATASETS[dataset](**kwargs)
 
     return make
 
@@ -271,3 +283,46 @@ def test_image_size(self, input_size, effective_image_size, category="shapes", p
         data_module = get_datamodule(configurable_parameters)
         data_module.setup()
         assert next(iter(data_module.train_dataloader()))["image"].shape[-2:] == effective_image_size
+
+
+class TestSubsetSplitting:
+    @pytest.mark.parametrize("dataset", ["folder", "mvtec", "btech"])
+    # @pytest.mark.parametrize("dataset", ["folder"])
+    @pytest.mark.parametrize("test_split_mode", ("from_dir", "synthetic"))
+    @pytest.mark.parametrize("val_split_mode", ("from_test", "synthetic"))
+    def test_non_overlapping_splits(self, make_data_module, dataset, test_split_mode, val_split_mode):
+        """Tests if train, test and val splits are non-overlapping."""
+        data_module = make_data_module(dataset, test_split_mode=test_split_mode, val_split_mode=val_split_mode)
+        train_samples = data_module.train_data.samples
+        val_samples = data_module.val_data.samples
+        test_samples = data_module.test_data.samples
+        assert len(set(train_samples.image_path).intersection(set(test_samples.image_path))) == 0
+        assert len(set(val_samples.image_path).intersection(set(test_samples.image_path))) == 0
+
+    @pytest.mark.parametrize("dataset", ["folder", "mvtec", "btech"])
+    # @pytest.mark.parametrize("dataset", ["folder"])
+    @pytest.mark.parametrize("test_split_mode", ("from_dir", "synthetic"))
+    def test_equal_splits(self, make_data_module, dataset, test_split_mode):
+        """Tests if test and and val splits are equal and non-overlapping with train when val_split_mode == same_as_test."""
+        data_module = make_data_module(dataset, test_split_mode=test_split_mode, val_split_mode="same_as_test")
+        train_samples = data_module.train_data.samples
+        val_samples = data_module.val_data.samples
+        test_samples = data_module.test_data.samples
+        assert len(set(train_samples.image_path).intersection(set(test_samples.image_path))) == 0
+        assert len(set(val_samples.image_path).intersection(set(test_samples.image_path))) == len(val_samples)
+
+    @pytest.mark.parametrize("test_split_mode", ("from_dir", "synthetic"))
+    def test_normal_test_dir_omitted(self, make_data_module, test_split_mode):
+        """The test set should always contain normal samples even when no normal_test_dir ir provided."""
+        data_module = make_data_module(dataset="folder", test_split_mode=test_split_mode, normal_test_dir=None)
+        assert data_module.test_data.has_normal
+
+    def test_abnormal_dir_omitted_from_dir(self, make_data_module):
+        """The test set should not contain anomalous samples if no abnormal_dir provided and split mode is from_dir."""
+        data_module = make_data_module(dataset="folder", test_split_mode="from_dir", abnormal_dir=None)
+        assert not data_module.test_data.has_anomalous
+
+    def test_abnormal_dir_omitted_synthetic(self, make_data_module):
+        """The test set should contain anomalous samples if no abnormal_dir provided and split mode is synthetic."""
+        data_module = make_data_module(dataset="folder", test_split_mode="synthetic", abnormal_dir=None)
+        assert data_module.test_data.has_anomalous
diff --git a/tests/pre_merge/datasets/test_synthetic_data.py b/tests/pre_merge/datasets/test_synthetic_data.py
@@ -0,0 +1,93 @@
+"""Tests for synthetic anomalous dataset."""
+import os
+from copy import copy, deepcopy
+from pathlib import Path
+
+import pytest
+
+from anomalib.data import TaskType
+from anomalib.data.folder import FolderDataset
+from anomalib.data.synthetic import SyntheticAnomalyDataset
+from anomalib.pre_processing import PreProcessor
+from tests.helpers.dataset import get_dataset_path
+
+
+def get_folder_dataset():
+    """Create Folder Dataset."""
+    root = get_dataset_path(dataset="bottle")
+    pre_process = PreProcessor(image_size=(256, 256))
+    dataset = FolderDataset(
+        task="segmentation",
+        pre_process=pre_process,
+        root=root,
+        normal_dir="good",
+        abnormal_dir="broken_large",
+        mask_dir=os.path.join(root, "ground_truth/broken_large"),
+        split="train",
+    )
+    dataset.setup()
+
+    return dataset
+
+
+@pytest.fixture(autouse=True)
+def make_synthetic_dataset():
+    """Create synthetic anomaly dataset from folder dataset."""
+
+    def make():
+        folder_dataset = get_folder_dataset()
+        synthetic_dataset = SyntheticAnomalyDataset.from_dataset(folder_dataset)
+        return synthetic_dataset
+
+    return make
+
+
+@pytest.fixture(autouse=True)
+def synthetic_dataset_from_samples():
+    """Create synthetic anomaly dataset by passing a samples dataframe."""
+    folder_dataset = get_folder_dataset()
+    pre_process = PreProcessor(image_size=(256, 256))
+    synthetic_dataset = SyntheticAnomalyDataset(
+        task=folder_dataset.task, pre_process=pre_process, source_samples=folder_dataset.samples
+    )
+    return synthetic_dataset
+
+
+def test_create_synthetic_dataset(make_synthetic_dataset):
+    """Tests if the image and mask files listed in the synthetic dataset exist."""
+    synthetic_dataset = make_synthetic_dataset()
+    assert all(Path(path).exists() for path in synthetic_dataset.samples.image_path)
+    assert all(Path(path).exists() for path in synthetic_dataset.samples.mask_path)
+
+
+def test_create_from_dataset(synthetic_dataset_from_samples):
+    """Tests if the image and mask files listed in the synthetic dataset exist, when instantiated from samples df."""
+    synthetic_dataset = synthetic_dataset_from_samples
+    assert all(Path(path).exists() for path in synthetic_dataset.samples.image_path)
+    assert all(Path(path).exists() for path in synthetic_dataset.samples.mask_path)
+
+
+def test_cleanup(make_synthetic_dataset):
+    """Tests if the temporary directory is cleaned up when the instance is deleted."""
+    synthetic_dataset = make_synthetic_dataset()
+    root = synthetic_dataset.root
+    del synthetic_dataset
+    assert not root.exists()
+
+
+def test_copy(make_synthetic_dataset):
+    """Tests if the dataset is copied correctly, and files still exist after original instance is deleted."""
+    synthetic_dataset = make_synthetic_dataset()
+    synthetic_dataset_cp = copy(synthetic_dataset)
+    assert all(synthetic_dataset.samples == synthetic_dataset_cp.samples)
+    del synthetic_dataset
+    assert synthetic_dataset_cp.root.exists()
+
+
+def test_deepcopy(make_synthetic_dataset):
+    """Tests if the dataset is deep-copied correctly, and files still exist after original instance is deleted."""
+    synthetic_dataset = make_synthetic_dataset()
+    synthetic_dataset_cp = deepcopy(synthetic_dataset)
+    assert all(synthetic_dataset.samples == synthetic_dataset_cp.samples)
+    del synthetic_dataset
+    assert synthetic_dataset_cp.root.exists()