Skip to content

Commit

Permalink
Allow different crs for functions in multiraster.py (#374)
Browse files Browse the repository at this point in the history
* Allow different CRS for stack_rasters and merge_rasters

* Linting

* Add merge_rasters and stack_rasters to the API

* Linting
  • Loading branch information
rhugonnet authored Apr 29, 2023
1 parent 85f13fe commit 18e0d11
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 42 deletions.
10 changes: 10 additions & 0 deletions doc/source/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,16 @@ And reverse operations.
Mask.proximity
```

## Multiple rasters

```{eval-rst}
.. autosummary::
:toctree: gen_modules/
raster.stack_rasters
raster.merge_rasters
```

## Vector

```{eval-rst}
Expand Down
62 changes: 33 additions & 29 deletions geoutils/raster/multiraster.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,28 +117,28 @@ def stack_rasters(
progress: bool = True,
) -> gu.Raster:
"""
Stack a list of rasters into a common grid as a 3D np array with nodata set to Nan.
Stack a list of rasters on their maximum extent into a multi-band raster.
If use_ref_bounds is True, output will have the shape (N, height, width) where N is len(rasters) and \
height and width is equal to reference's shape.
If use_ref_bounds is False, output will have the shape (N, height2, width2) where N is len(rasters) and \
height2 and width2 are set based on reference's resolution and the maximum extent of all rasters.
The input rasters can have any transform or CRS, and will be reprojected to the
reference raster's CRS and resolution.
The output multi-band raster has an extent that is the union of all raster extents,
except if `use_ref_bounds` is used,
and the number of band equal to the number of input rasters.
Use diff=True to return directly the difference to the reference raster.
Note that currently all rasters will be loaded once in memory. However, if rasters data is not loaded prior to \
merge_rasters it will be loaded for reprojection and deleted, therefore avoiding duplication and \
optimizing memory usage.
Note that all rasters will be loaded once in memory. The data is only loaded for
reprojection then deleted to optimize memory usage.
:param rasters: A list of geoutils Raster objects to be stacked.
:param reference: The reference index, in case the reference is to be stacked, or a separate Raster object \
in case the reference should not be stacked. Defaults to the first raster in the list.
:param resampling_method: The resampling method for the raster reprojections.
:param rasters: List of rasters to be stacked.
:param reference: Index of reference raster in the list or separate reference raster.
Defaults to the first raster in the list.
:param resampling_method: Resampling method for reprojection.
:param use_ref_bounds: If True, will use reference bounds, otherwise will use maximum bounds of all rasters.
:param diff: If True, will return the difference to the reference raster.
:param progress: If True, will display a progress bar. Default is True.
:returns: The stacked raster with the same parameters (optionally bounds) as the reference.
:returns: The merged raster with same CRS and resolution (and optionally bounds) as the reference.
"""
# Check resampling method
if isinstance(resampling_method, str):
Expand All @@ -161,7 +161,9 @@ def stack_rasters(
dst_bounds = reference_raster.bounds
else:
dst_bounds = gu.projtools.merge_bounds(
[raster.bounds for raster in rasters], resolution=reference_raster.res[0], return_rio_bbox=True
[raster.get_bounds_projected(out_crs=reference_raster.crs) for raster in rasters],
resolution=reference_raster.res[0],
return_rio_bbox=True,
)

# Make a data list and add all of the reprojected rasters into it.
Expand All @@ -171,7 +173,6 @@ def stack_rasters(
# Check that data is loaded, otherwise temporarily load it
if not raster.is_loaded:
raster.load()
raster.is_loaded = False

nodata = reference_raster.nodata or gu.raster.raster._default_nodata(reference_raster.data.dtype)
# Reproject to reference grid
Expand Down Expand Up @@ -230,23 +231,26 @@ def merge_rasters(
progress: bool = True,
) -> RasterType:
"""
Merge a list of rasters into one larger raster.
Reprojects the rasters to the reference raster CRS and resolution.
Note that currently all rasters will be loaded once in memory. However, if rasters data is not loaded prior to \
merge_rasters it will be loaded for reprojection and deleted, therefore avoiding duplication and \
optimizing memory usage.
:param rasters: A list of geoutils Raster objects to be merged.
:param reference: The reference index, in case the reference is to be merged, or a separate Raster object \
in case the reference should not be merged. Defaults to the first raster in the list.
:param merge_algorithm: The algorithm, or list of algorithms, to merge the rasters with. Defaults to the mean.\
If several algorithms are provided, each result is returned as a separate band.
:param resampling_method: The resampling method for the raster reprojections.
Spatially merge a list of rasters into one larger raster of their maximum extent.
The input rasters can have any transform or CRS, and will be reprojected to the
reference raster's CRS and resolution.
The output merged raster has an extent that is the union of all raster extents,
except if `use_ref_bounds` is used.
Note that all rasters will be loaded once in memory. The data is only loaded for
reprojection then deleted to optimize memory usage.
:param rasters: List of rasters to be merged.
:param reference: Index of reference raster in the list or separate reference raster.
Defaults to the first raster in the list.
:param merge_algorithm: Reductor function (or list of functions) to merge the rasters with. Defaults to the mean.
If several algorithms are provided, each result is returned as a separate band.
:param resampling_method: Resampling method for reprojection.
:param use_ref_bounds: If True, will use reference bounds, otherwise will use maximum bounds of all rasters.
:param progress: If True, will display a progress bar. Default is True.
:returns: The merged raster with the same parameters (excl. bounds) as the reference.
:returns: The merged raster with same CRS and resolution (and optionally bounds) as the reference.
"""
# Make sure merge_algorithm is a list
if not isinstance(merge_algorithm, (list, tuple)):
Expand Down
63 changes: 50 additions & 13 deletions tests/test_multiraster.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Callable

import numpy as np
import pyproj
import pytest
import rasterio as rio

Expand All @@ -22,7 +23,9 @@ class stack_merge_images:
Param `cls` is used to set the type of the output, e.g. gu.Raster (default).
"""

def __init__(self, image: str, cls: Callable[[str], RasterType] = gu.Raster) -> None:
def __init__(
self, image: str, cls: Callable[[str], RasterType] = gu.Raster, different_crs: pyproj.CRS | None = None
) -> None:
img = cls(examples.get_path(image))
self.img = img

Expand All @@ -43,6 +46,8 @@ def __init__(self, image: str, cls: Callable[[str], RasterType] = gu.Raster) ->
left=x_midpoint - img.res[0] * 3, right=img.bounds.right, top=img.bounds.top, bottom=img.bounds.bottom
)
)
if different_crs:
self.img2 = self.img2.reproject(dst_crs=different_crs)

# To check that use_ref_bounds work - create a img that do not cover the whole extent
self.img3 = img.copy()
Expand All @@ -61,6 +66,11 @@ def images_1d(): # type: ignore
return stack_merge_images("everest_landsat_b4")


@pytest.fixture
def images_different_crs(): # type: ignore
return stack_merge_images("everest_landsat_b4", different_crs=4326)


@pytest.fixture
def sat_images(): # type: ignore
return stack_merge_images("everest_landsat_b4", cls=gu.SatelliteImage)
Expand All @@ -74,7 +84,12 @@ def images_3d(): # type: ignore
class TestMultiRaster:
@pytest.mark.parametrize(
"rasters",
[pytest.lazy_fixture("images_1d"), pytest.lazy_fixture("sat_images"), pytest.lazy_fixture("images_3d")],
[
pytest.lazy_fixture("images_1d"),
pytest.lazy_fixture("sat_images"),
pytest.lazy_fixture("images_different_crs"),
pytest.lazy_fixture("images_3d"),
],
) # type: ignore
def test_stack_rasters(self, rasters) -> None: # type: ignore
"""Test stack_rasters"""
Expand Down Expand Up @@ -102,17 +117,22 @@ def test_stack_rasters(self, rasters) -> None: # type: ignore
stacked_img = gu.raster.stack_rasters([rasters.img1, rasters.img2])

assert stacked_img.count == 2
assert rasters.img.shape == stacked_img.shape
# If the rasters were in a different projection, the final shape can vary by 1 pixel
if not all(rast.crs == rasters.img.crs for rast in [rasters.img1, rasters.img2]):
assert rasters.img.height == pytest.approx(stacked_img.height, abs=1)
assert rasters.img.width == pytest.approx(stacked_img.width, abs=1)
else:
assert rasters.img.shape == stacked_img.shape
assert type(stacked_img) == gu.Raster # Check output object is always Raster, whatever input was given
assert np.count_nonzero(np.isnan(stacked_img.data)) == 0 # Check no NaNs introduced

merged_bounds = gu.projtools.merge_bounds(
[rasters.img1.bounds, rasters.img2.bounds], resolution=rasters.img1.res[0]
[rasters.img1.bounds, rasters.img2.get_bounds_projected(rasters.img1.crs)], resolution=rasters.img1.res[0]
)
assert merged_bounds == stacked_img.bounds

# Check that reference works with input Raster
stacked_img = gu.raster.stack_rasters([rasters.img1, rasters.img2], reference=rasters.img)
stacked_img = gu.raster.stack_rasters([rasters.img1, rasters.img2], reference=rasters.img, use_ref_bounds=True)
assert rasters.img.bounds == stacked_img.bounds

# Others than int or gu.Raster should raise a ValueError
Expand All @@ -133,7 +153,12 @@ def test_stack_rasters(self, rasters) -> None: # type: ignore
assert stacked_img2.bounds == rasters.img.bounds

@pytest.mark.parametrize(
"rasters", [pytest.lazy_fixture("images_1d"), pytest.lazy_fixture("images_3d")]
"rasters",
[
pytest.lazy_fixture("images_1d"),
pytest.lazy_fixture("images_3d"),
pytest.lazy_fixture("images_different_crs"),
],
) # type: ignore
def test_merge_rasters(self, rasters) -> None: # type: ignore
"""Test merge_rasters"""
Expand All @@ -153,17 +178,29 @@ def test_merge_rasters(self, rasters) -> None: # type: ignore

merged_img = gu.raster.merge_rasters([rasters.img1, rasters.img2], merge_algorithm=np.nanmean)

assert rasters.img.shape == merged_img.shape
assert rasters.img.bounds == merged_img.bounds
assert np.count_nonzero(np.isnan(merged_img.data)) == 0 # Check no NaNs introduced
if not all(rast.crs == rasters.img.crs for rast in [rasters.img1, rasters.img2]):
assert rasters.img.height == pytest.approx(merged_img.height, abs=1)
assert rasters.img.width == pytest.approx(merged_img.width, abs=1)
else:
assert rasters.img.shape == merged_img.shape

diff = rasters.img.data - merged_img.data
merged_bounds = gu.projtools.merge_bounds(
[rasters.img1.bounds, rasters.img2.get_bounds_projected(rasters.img1.crs)], resolution=rasters.img1.res[0]
)
assert merged_bounds == merged_img.bounds

assert np.count_nonzero(np.isnan(merged_img.data)) == 0 # Check no NaNs introduced

assert np.abs(np.nanmean(diff)) < 1
# Check that only works if CRS were the same
if all(rast.crs == rasters.img.crs for rast in [rasters.img1, rasters.img2]):
diff = rasters.img.data - merged_img.data
assert np.abs(np.nanmean(diff)) < 1

# Check that reference works
merged_img2 = gu.raster.merge_rasters([rasters.img1, rasters.img2], reference=rasters.img)
assert merged_img2 == merged_img
merged_img2 = gu.raster.merge_rasters([rasters.img1, rasters.img2], reference=rasters.img, use_ref_bounds=True)
# Check that only works if CRS were the same
if all(rast.crs == rasters.img.crs for rast in [rasters.img1, rasters.img2]):
assert merged_img2 == merged_img

# Group rasters for for testing `load_multiple_rasters`
# two overlapping, single band rasters
Expand Down

0 comments on commit 18e0d11

Please sign in to comment.