Merge branch 'main' into pure_Tensor

pytorch · Aug 17, 2023 · 5dd0dfb · 5dd0dfb
2 parents 94539e0 + 4025fc5
commit 5dd0dfb
Show file tree

Hide file tree

Showing 11 changed files with 69 additions and 7 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -55,6 +55,13 @@ jobs:
         # cores (`-j auto`). Thus, we limit to a single process (`-j 1`) here.
         sed -i -e 's/-j auto/-j 1/' Makefile
         make html
+
+        mkdir build/html/_generated_ipynb_notebooks
+        for file in `find build/html/_downloads`; do
+          if [[ $file == *.ipynb ]]; then
+            cp $file build/html/_generated_ipynb_notebooks/
+          fi
+        done
         
         cp -r build/html "${RUNNER_ARTIFACT_DIR}"
         

diff --git a/README.md b/README.md
@@ -82,9 +82,10 @@ Torchvision currently supports the following image backends:
 - [libjpeg](http://ijg.org/) - can be installed via conda `conda install jpeg` or any of the package managers for
   debian-based and RHEL-based Linux distributions. [libjpeg-turbo](https://libjpeg-turbo.org/) can be used as well.
 
-**Notes:** `libpng` and `libjpeg` must be available at compilation time in order to be available. Make sure that it is
-available on the standard library locations, otherwise, add the include and library paths in the environment variables
-`TORCHVISION_INCLUDE` and `TORCHVISION_LIBRARY`, respectively.
+**Notes:** `libpng` and `libjpeg` are optional dependencies. If any of them is available on the system, 
+torchvision will provide encoding/decoding image functionalities from `torchvision.io.image`. 
+When building torchvision from source, `libpng` and `libjpeg` can be found on the standard library locations.
+Otherwise, please use `TORCHVISION_INCLUDE` and `TORCHVISION_LIBRARY` environment variables to set up include and library paths.
 
 ## Video Backend
 

diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -237,6 +237,7 @@ Conversion
     v2.ConvertImageDtype
     v2.ToDtype
     v2.ConvertBoundingBoxFormat
+    v2.ToPureTensor
 
 Auto-Augmentation
 -----------------

diff --git a/gallery/plot_datapoints.py b/gallery/plot_datapoints.py
@@ -3,6 +3,8 @@
 Datapoints FAQ
 ==============
 
+https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/_generated_ipynb_notebooks/plot_datapoints.ipynb
+
 Datapoints are Tensor subclasses introduced together with
 ``torchvision.transforms.v2``. This example showcases what these datapoints are
 and how they behave.

diff --git a/references/classification/presets.py b/references/classification/presets.py
@@ -68,6 +68,9 @@ def __init__(
         if random_erase_prob > 0:
             transforms.append(T.RandomErasing(p=random_erase_prob))
 
+        if use_v2:
+            transforms.append(T.ToPureTensor())
+
         self.transforms = T.Compose(transforms)
 
     def __call__(self, img):
@@ -107,6 +110,9 @@ def __init__(
             T.Normalize(mean=mean, std=std),
         ]
 
+        if use_v2:
+            transforms.append(T.ToPureTensor())
+
         self.transforms = T.Compose(transforms)
 
     def __call__(self, img):

diff --git a/references/detection/presets.py b/references/detection/presets.py
@@ -79,6 +79,7 @@ def __init__(
             transforms += [
                 T.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.XYXY),
                 T.SanitizeBoundingBoxes(),
+                T.ToPureTensor(),
             ]
 
         self.transforms = T.Compose(transforms)
@@ -103,6 +104,10 @@ def __init__(self, backend="pil", use_v2=False):
             raise ValueError(f"backend can be 'datapoint', 'tensor' or 'pil', but got {backend}")
 
         transforms += [T.ConvertImageDtype(torch.float)]
+
+        if use_v2:
+            transforms += [T.ToPureTensor()]
+
         self.transforms = T.Compose(transforms)
 
     def __call__(self, img, target):

diff --git a/references/segmentation/presets.py b/references/segmentation/presets.py
@@ -63,6 +63,8 @@ def __init__(
             transforms += [T.ConvertImageDtype(torch.float)]
 
         transforms += [T.Normalize(mean=mean, std=std)]
+        if use_v2:
+            transforms += [T.ToPureTensor()]
 
         self.transforms = T.Compose(transforms)
 
@@ -98,6 +100,9 @@ def __init__(
             T.ConvertImageDtype(torch.float),
             T.Normalize(mean=mean, std=std),
         ]
+        if use_v2:
+            transforms += [T.ToPureTensor()]
+
         self.transforms = T.Compose(transforms)
 
     def __call__(self, img, target):

diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py
@@ -2353,3 +2353,24 @@ def test_displacement_error(self, make_input):
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_transform(self, make_input, size, device):
         check_transform(transforms.ElasticTransform, make_input(size, device=device))
+
+
+class TestToPureTensor:
+    def test_correctness(self):
+        input = {
+            "img": make_image(),
+            "img_tensor": make_image_tensor(),
+            "img_pil": make_image_pil(),
+            "mask": make_detection_mask(),
+            "video": make_video(),
+            "bbox": make_bounding_box(),
+            "str": "str",
+        }
+
+        out = transforms.ToPureTensor()(input)
+
+        for input_value, out_value in zip(input.values(), out.values()):
+            if isinstance(input_value, datapoints.Datapoint):
+                assert isinstance(out_value, torch.Tensor) and not isinstance(out_value, datapoints.Datapoint)
+            else:
+                assert isinstance(out_value, type(input_value))
diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
@@ -155,7 +155,7 @@ torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
 #endif // #if !JPEG_FOUND
 
 int64_t _jpeg_version() {
-#ifdef JPEG_FOUND
+#if JPEG_FOUND
   return JPEG_LIB_VERSION;
 #else
   return -1;

diff --git a/torchvision/transforms/v2/__init__.py b/torchvision/transforms/v2/__init__.py
@@ -52,7 +52,7 @@
     ToDtype,
 )
 from ._temporal import UniformTemporalSubsample
-from ._type_conversion import PILToTensor, ToImage, ToPILImage
+from ._type_conversion import PILToTensor, ToImage, ToPILImage, ToPureTensor
 
 from ._deprecated import ToTensor  # usort: skip
 

diff --git a/torchvision/transforms/v2/_type_conversion.py b/torchvision/transforms/v2/_type_conversion.py
@@ -30,7 +30,7 @@ class ToImage(Transform):
     """[BETA] Convert a tensor, ndarray, or PIL Image to :class:`~torchvision.datapoints.Image`
     ; this does not scale values.
 
-    .. v2betastatus:: ToImageTensor transform
+    .. v2betastatus:: ToImage transform
 
     This transform does not support torchscript.
     """
@@ -46,7 +46,7 @@ def _transform(
 class ToPILImage(Transform):
     """[BETA] Convert a tensor or an ndarray to PIL Image - this does not scale values.
 
-    .. v2betastatus:: ToImagePIL transform
+    .. v2betastatus:: ToPILImage transform
 
     This transform does not support torchscript.
 
@@ -75,3 +75,17 @@ def _transform(
         self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray], params: Dict[str, Any]
     ) -> PIL.Image.Image:
         return F.to_pil_image(inpt, mode=self.mode)
+
+
+class ToPureTensor(Transform):
+    """[BETA] Convert all datapoints to pure tensors, removing associated metadata (if any).
+
+    .. v2betastatus:: ToPureTensor transform
+
+    This doesn't scale or change the values, only the type.
+    """
+
+    _transformed_types = (datapoints.Datapoint,)
+
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> torch.Tensor:
+        return inpt.as_subclass(torch.Tensor)