diff --git a/datasets.py b/datasets.py
index b4e3a7c..704a1e5 100644
--- a/datasets.py
+++ b/datasets.py
@@ -33,9 +33,9 @@ def detection_dataset_builder(*, api_version, rng, num_samples):
 
     dataset = _coco_remove_images_without_annotations(dataset)
 
-    idcs = torch.randperm(len(dataset), generator=rng)[:num_samples]
-    print(f"Caching {num_samples} COCO samples")
-    return [dataset[idx] for idx in tqdm(idcs.tolist())]
+    idcs = torch.randperm(len(dataset), generator=rng)[:num_samples].tolist()
+    print(f"Caching {num_samples} ({idcs[:3]} ... {idcs[-3:]}) COCO samples")
+    return [dataset[idx] for idx in tqdm(idcs)]
 
 
 # everything below is copy-pasted from
diff --git a/main.py b/main.py
index 48fb6d2..a883169 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,5 @@
 import contextlib
+import itertools
 import pathlib
 import string
 import sys
@@ -33,87 +34,87 @@ def main(*, input_types, tasks, num_samples):
     # https://github.com/pytorch/pytorch/blob/19162083f8831be87be01bb84f186310cad1d348/torch/utils/data/_utils/worker.py#L222
     torch.set_num_threads(1)
 
+    dataset_rng = torch.Generator()
+    dataset_rng.manual_seed(0)
+    dataset_rng_state = dataset_rng.get_state()
+
     for task_name in tasks:
         print("#" * 60)
         print(task_name)
         print("#" * 60)
 
         medians = {input_type: {} for input_type in input_types}
-        for input_type in input_types:
-            dataset_rng = torch.Generator()
-            dataset_rng.manual_seed(0)
-            dataset_rng_state = dataset_rng.get_state()
-
-            for api_version in ["v1", "v2"]:
-                dataset_rng.set_state(dataset_rng_state)
-                task = make_task(
-                    task_name,
-                    input_type=input_type,
-                    api_version=api_version,
-                    dataset_rng=dataset_rng,
-                    num_samples=num_samples,
-                )
-                if task is None:
-                    continue
-
-                print(f"{input_type=}, {api_version=}")
-                print()
-                print(f"Results computed for {num_samples:_} samples")
-                print()
-
-                pipeline, dataset = task
-
-                for sample in dataset:
-                    pipeline(sample)
-
-                results = pipeline.extract_times()
-                field_len = max(len(name) for name in results)
-                print(f"{' ' * field_len}  {'median   ':>9}    {'std   ':>9}")
-                medians[input_type][api_version] = 0.0
-                for transform_name, times in results.items():
-                    median = float(times.median())
-                    print(
-                        f"{transform_name:{field_len}}  {median * 1e6:6.0f} µs +- {float(times.std()) * 1e6:6.0f} µs"
-                    )
-                    medians[input_type][api_version] += median
+        for input_type, api_version in itertools.product(input_types, ["v1", "v2"]):
+            dataset_rng.set_state(dataset_rng_state)
+            task = make_task(
+                task_name,
+                input_type=input_type,
+                api_version=api_version,
+                dataset_rng=dataset_rng,
+                num_samples=num_samples,
+            )
+            if task is None:
+                continue
 
-                print(
-                    f"\n{'total':{field_len}}  {medians[input_type][api_version] * 1e6:6.0f} µs"
-                )
-                print("-" * 60)
+            print(f"{input_type=}, {api_version=}")
+            print()
+            print(f"Results computed for {num_samples:_} samples")
+            print()
 
-        print()
-        print("Summaries")
-        print()
+            pipeline, dataset = task
 
-        field_len = max(len(input_type) for input_type in medians)
-        print(f"{' ' * field_len}  v2 / v1")
-        for input_type, api_versions in medians.items():
-            if len(api_versions) < 2:
-                continue
+            torch.manual_seed(0)
+            for sample in dataset:
+                pipeline(sample)
+
+            results = pipeline.extract_times()
+            field_len = max(len(name) for name in results)
+            print(f"{' ' * field_len}  {'median   ':>9}    {'std   ':>9}")
+            medians[input_type][api_version] = 0.0
+            for transform_name, times in results.items():
+                median = float(times.median())
+                print(
+                    f"{transform_name:{field_len}}  {median * 1e6:6.0f} µs +- {float(times.std()) * 1e6:6.0f} µs"
+                )
+                medians[input_type][api_version] += median
 
             print(
-                f"{input_type:{field_len}}  {api_versions['v2'] / api_versions['v1']:>7.2f}"
+                f"\n{'total':{field_len}}  {medians[input_type][api_version] * 1e6:6.0f} µs"
             )
+            print("-" * 60)
 
-        print()
+    print()
+    print("Summaries")
+    print()
 
-        medians_flat = {
-            f"{input_type}, {api_version}": median
-            for input_type, api_versions in medians.items()
-            for api_version, median in api_versions.items()
-        }
-        field_len = max(len(label) for label in medians_flat)
+    field_len = max(len(input_type) for input_type in medians)
+    print(f"{' ' * field_len}  v2 / v1")
+    for input_type, api_versions in medians.items():
+        if len(api_versions) < 2:
+            continue
 
         print(
-            f"{' ' * (field_len + 5)}  {'  '.join(f' [{id}]' for _, id in zip(range(len(medians_flat)), string.ascii_lowercase))}"
+            f"{input_type:{field_len}}  {api_versions['v2'] / api_versions['v1']:>7.2f}"
         )
-        for (label, val), id in zip(medians_flat.items(), string.ascii_lowercase):
-            print(
-                f"{label:>{field_len}}, [{id}]  {'  '.join(f'{val / ref:4.2f}' for ref in medians_flat.values())}"
-            )
-        print()
-        print("Slowdown as row / col")
+
+    print()
+
+    medians_flat = {
+        f"{input_type}, {api_version}": median
+        for input_type, api_versions in medians.items()
+        for api_version, median in api_versions.items()
+    }
+    field_len = max(len(label) for label in medians_flat)
+
+    print(
+        f"{' ' * (field_len + 5)}  {'  '.join(f' [{id}]' for _, id in zip(range(len(medians_flat)), string.ascii_lowercase))}"
+    )
+    for (label, val), id in zip(medians_flat.items(), string.ascii_lowercase):
+        print(
+            f"{label:>{field_len}}, [{id}]  {'  '.join(f'{val / ref:4.2f}' for ref in medians_flat.values())}"
+        )
+    print()
+    print("Slowdown as row / col")
 
 
 if __name__ == "__main__":
diff --git a/results/20230404093341.log b/results/20230404093341.log
new file mode 100644
index 0000000..fbf71c2
--- /dev/null
+++ b/results/20230404093341.log
@@ -0,0 +1,339 @@
+############################################################
+classification-simple
+############################################################
+input_type='Tensor', api_version='v1'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+PILToTensor                          110 µs +-     10 µs
+RandomResizedCropWithoutResizeV1      52 µs +-      9 µs
+Resize                               636 µs +-    184 µs
+RandomHorizontalFlip                  25 µs +-     10 µs
+ConvertImageDtype                     47 µs +-     11 µs
+Normalize                             75 µs +-     14 µs
+
+total                                945 µs
+------------------------------------------------------------
+input_type='Tensor', api_version='v2'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+PILToTensor                          116 µs +-      8 µs
+RandomResizedCropWithoutResizeV2      55 µs +-      6 µs
+Resize                               618 µs +-    159 µs
+RandomHorizontalFlip                  35 µs +-     10 µs
+ConvertDtype                          42 µs +-      3 µs
+Normalize                             61 µs +-      4 µs
+
+total                                926 µs
+------------------------------------------------------------
+input_type='PIL', api_version='v1'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+RandomResizedCropWithoutResizeV1      76 µs +-     11 µs
+Resize                               548 µs +-    152 µs
+RandomHorizontalFlip                  51 µs +-     22 µs
+PILToTensor                           52 µs +-      5 µs
+ConvertImageDtype                     50 µs +-      6 µs
+Normalize                            438 µs +-     36 µs
+
+total                               1214 µs
+------------------------------------------------------------
+input_type='PIL', api_version='v2'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+RandomResizedCropWithoutResizeV2      73 µs +-     10 µs
+Resize                               540 µs +-    150 µs
+RandomHorizontalFlip                  58 µs +-     23 µs
+PILToTensor                           57 µs +-      3 µs
+ConvertDtype                          43 µs +-      3 µs
+Normalize                            417 µs +-     33 µs
+
+total                               1189 µs
+------------------------------------------------------------
+input_type='Datapoint', api_version='v2'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+ToImageTensor                        122 µs +-      9 µs
+RandomResizedCropWithoutResizeV2      60 µs +-      7 µs
+Resize                               619 µs +-    163 µs
+RandomHorizontalFlip                  37 µs +-     12 µs
+ConvertDtype                          45 µs +-      6 µs
+Normalize                             64 µs +-      5 µs
+
+total                                948 µs
+------------------------------------------------------------
+############################################################
+classification-complex
+############################################################
+input_type='Tensor', api_version='v1'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+PILToTensor                          109 µs +-     12 µs
+RandomResizedCropWithoutResizeV1      53 µs +-      7 µs
+Resize                               630 µs +-    166 µs
+RandomHorizontalFlip                  18 µs +-      8 µs
+AutoAugment                          765 µs +-    623 µs
+RandomErasing                         14 µs +-     36 µs
+ConvertImageDtype                     48 µs +-      5 µs
+Normalize                             74 µs +-      6 µs
+
+total                               1711 µs
+------------------------------------------------------------
+input_type='Tensor', api_version='v2'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+PILToTensor                          116 µs +-     10 µs
+RandomResizedCropWithoutResizeV2      55 µs +-      7 µs
+Resize                               632 µs +-    166 µs
+RandomHorizontalFlip                  24 µs +-     10 µs
+AutoAugment                          611 µs +-    606 µs
+RandomErasing                         18 µs +-     36 µs
+ConvertDtype                          42 µs +-      3 µs
+Normalize                             62 µs +-      5 µs
+
+total                               1560 µs
+------------------------------------------------------------
+input_type='PIL', api_version='v1'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+RandomResizedCropWithoutResizeV1      80 µs +-     15 µs
+Resize                               587 µs +-    156 µs
+RandomHorizontalFlip                  22 µs +-     23 µs
+AutoAugment                          339 µs +-    234 µs
+PILToTensor                           57 µs +-      6 µs
+RandomErasing                         15 µs +-     36 µs
+ConvertImageDtype                     54 µs +-     16 µs
+Normalize                            459 µs +-     39 µs
+
+total                               1613 µs
+------------------------------------------------------------
+input_type='PIL', api_version='v2'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+RandomResizedCropWithoutResizeV2      78 µs +-     12 µs
+Resize                               577 µs +-    160 µs
+RandomHorizontalFlip                  27 µs +-     24 µs
+AutoAugment                          307 µs +-    242 µs
+PILToTensor                           64 µs +-      4 µs
+RandomErasing                         18 µs +-     36 µs
+ConvertDtype                          45 µs +-      6 µs
+Normalize                            427 µs +-     34 µs
+
+total                               1543 µs
+------------------------------------------------------------
+input_type='Datapoint', api_version='v2'
+
+Results computed for 1_000 samples
+
+                                  median          std   
+ToImageTensor                        125 µs +-      9 µs
+RandomResizedCropWithoutResizeV2      61 µs +-      6 µs
+Resize                               646 µs +-    167 µs
+RandomHorizontalFlip                  22 µs +-     13 µs
+AutoAugment                          630 µs +-    381 µs
+RandomErasing                         18 µs +-     39 µs
+ConvertDtype                          48 µs +-      3 µs
+Normalize                             68 µs +-      6 µs
+
+total                               1617 µs
+------------------------------------------------------------
+############################################################
+detection-ssdlite
+############################################################
+loading annotations into memory...
+Done (t=9.71s)
+creating index...
+index created!
+Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples
+input_type='Tensor', api_version='v1'
+
+Results computed for 1_000 samples
+
+                          median          std   
+ConvertCocoPolysToMaskV1    2799 µs +-   4403 µs
+PILToTensorV1                268 µs +-     77 µs
+RandomIoUCropV1              467 µs +-   7166 µs
+RandomHorizontalFlipV1        18 µs +-    218 µs
+ConvertImageDtypeV1          267 µs +-    178 µs
+
+total                       3820 µs
+------------------------------------------------------------
+loading annotations into memory...
+Done (t=8.87s)
+creating index...
+index created!
+Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples
+input_type='Tensor', api_version='v2'
+
+Results computed for 1_000 samples
+
+                               median          std   
+WrapCocoSampleForTransformsV2    1487 µs +-   2446 µs
+PILToTensor                       748 µs +-   5328 µs
+RandomIoUCrop                    1835 µs +-   7046 µs
+RandomHorizontalFlip              559 µs +-   2322 µs
+ConvertDtype                      760 µs +-   5410 µs
+SanitizeBoundingBox              1004 µs +-   4817 µs
+
+total                            6394 µs
+------------------------------------------------------------
+loading annotations into memory...
+Done (t=9.84s)
+creating index...
+index created!
+Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples
+input_type='PIL', api_version='v1'
+
+Results computed for 1_000 samples
+
+                          median          std   
+ConvertCocoPolysToMaskV1    2816 µs +-   4427 µs
+RandomIoUCropV1              556 µs +-   7177 µs
+RandomHorizontalFlipV1        20 µs +-    212 µs
+PILToTensorV1                180 µs +-    112 µs
+ConvertImageDtypeV1          281 µs +-    168 µs
+
+total                       3851 µs
+------------------------------------------------------------
+loading annotations into memory...
+Done (t=9.73s)
+creating index...
+index created!
+Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples
+input_type='PIL', api_version='v2'
+
+Results computed for 1_000 samples
+
+                               median          std   
+WrapCocoSampleForTransformsV2    1536 µs +-   2480 µs
+RandomIoUCrop                    1809 µs +-   9065 µs
+RandomHorizontalFlip              582 µs +-   4570 µs
+PILToTensor                       653 µs +-   4991 µs
+ConvertDtype                      777 µs +-   5354 µs
+SanitizeBoundingBox              1012 µs +-   6233 µs
+
+total                            6369 µs
+------------------------------------------------------------
+loading annotations into memory...
+Done (t=9.91s)
+creating index...
+index created!
+Caching 1000 ([89444, 73295, 101719] ... [31395, 96727, 47807]) COCO samples
+input_type='Datapoint', api_version='v2'
+
+Results computed for 1_000 samples
+
+                               median          std   
+WrapCocoSampleForTransformsV2    1537 µs +-   2505 µs
+ToImageTensor                     833 µs +-   2973 µs
+RandomIoUCrop                    1717 µs +-   8842 µs
+RandomHorizontalFlip              547 µs +-   5286 µs
+ConvertDtype                      725 µs +-   6290 µs
+SanitizeBoundingBox              1021 µs +-   5869 µs
+
+total                            6380 µs
+------------------------------------------------------------
+
+Summaries
+
+           v2 / v1
+Tensor        1.67
+PIL           1.65
+
+                     [a]   [b]   [c]   [d]   [e]
+   Tensor, v1, [a]  1.00  0.60  0.99  0.60  0.60
+   Tensor, v2, [b]  1.67  1.00  1.66  1.00  1.00
+      PIL, v1, [c]  1.01  0.60  1.00  0.60  0.60
+      PIL, v2, [d]  1.67  1.00  1.65  1.00  1.00
+Datapoint, v2, [e]  1.67  1.00  1.66  1.00  1.00
+
+Slowdown as row / col
+############################################################
+Collecting environment information...
+PyTorch version: 2.1.0.dev20230403+cpu
+Is debug build: False
+CUDA used to build PyTorch: Could not collect
+ROCM used to build PyTorch: N/A
+
+OS: Arch Linux (x86_64)
+GCC version: (GCC) 12.2.1 20230201
+Clang version: 15.0.7
+CMake version: version 3.25.3
+Libc version: glibc-2.37
+
+Python version: 3.8.16 (default, Mar  2 2023, 03:21:46)  [GCC 11.2.0] (64-bit runtime)
+Python platform: Linux-6.2.6-arch1-1-x86_64-with-glibc2.17
+Is CUDA available: False
+CUDA runtime version: 11.7.99
+CUDA_MODULE_LOADING set to: N/A
+GPU models and configuration: GPU 0: NVIDIA GeForce GTX 1080
+Nvidia driver version: 525.89.02
+cuDNN version: Could not collect
+HIP runtime version: N/A
+MIOpen runtime version: N/A
+Is XNNPACK available: True
+
+CPU:
+Architecture:                    x86_64
+CPU op-mode(s):                  32-bit, 64-bit
+Address sizes:                   48 bits physical, 48 bits virtual
+Byte Order:                      Little Endian
+CPU(s):                          24
+On-line CPU(s) list:             0-23
+Vendor ID:                       AuthenticAMD
+Model name:                      AMD Ryzen 9 5900X 12-Core Processor
+CPU family:                      25
+Model:                           33
+Thread(s) per core:              2
+Core(s) per socket:              12
+Socket(s):                       1
+Stepping:                        0
+Frequency boost:                 enabled
+CPU(s) scaling MHz:              56%
+CPU max MHz:                     4950,1948
+CPU min MHz:                     2200,0000
+BogoMIPS:                        7388,29
+Flags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
+Virtualization:                  AMD-V
+L1d cache:                       384 KiB (12 instances)
+L1i cache:                       384 KiB (12 instances)
+L2 cache:                        6 MiB (12 instances)
+L3 cache:                        64 MiB (2 instances)
+NUMA node(s):                    1
+NUMA node0 CPU(s):               0-23
+Vulnerability Itlb multihit:     Not affected
+Vulnerability L1tf:              Not affected
+Vulnerability Mds:               Not affected
+Vulnerability Meltdown:          Not affected
+Vulnerability Mmio stale data:   Not affected
+Vulnerability Retbleed:          Not affected
+Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl
+Vulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization
+Vulnerability Spectre v2:        Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected
+Vulnerability Srbds:             Not affected
+Vulnerability Tsx async abort:   Not affected
+
+Versions of relevant libraries:
+[pip3] light-the-torch==0.7.2
+[pip3] mypy-extensions==1.0.0
+[pip3] numpy==1.24.1
+[pip3] torch==2.1.0.dev20230403+cpu
+[pip3] torchvision==0.16.0.dev20230403+cpu
+[conda] Could not collect
diff --git a/transforms.py b/transforms.py
index 0b1ae97..c337d13 100644
--- a/transforms.py
+++ b/transforms.py
@@ -199,14 +199,19 @@ def _transform(self, inpt, params):
 
 class WrapCocoSampleForTransformsV2:
     def __init__(self):
-        num_samples = 117_266
         wrapper_factory = WRAPPER_FACTORIES[datasets.CocoDetection]
-        mock_dataset = SimpleNamespace(ids=list(range(num_samples)))
+        # The v2 wrapper depends on the `.ids` attribute of a `CocoDetection` dataset.
+        # However, this is eliminated above while filtering out images without
+        # annotations. Thus, we fake it here
+        mock_dataset = SimpleNamespace(ids=["invalid"])
         wrapper = wrapper_factory(mock_dataset)
-        self.wrapper = functools.partial(wrapper, num_samples // 2)
+        # The wrapper gets passed the index alongside the sample to wrap. The former is
+        # only used to retrieve the image ID by accessing the `.ids` attribute. Thus, we
+        # need to use any value so `.ids[idx]` works.
+        self.wrapper = functools.partial(wrapper, 0)
 
-    def __call__(self, *inputs):
-        return self.wrapper(inputs if len(inputs) > 1 else inputs[0])
+    def __call__(self, image, target):
+        return self.wrapper((image, target))
 
 
 # everything below is copy-pasted from