Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Tiling for Multiprocessing #652

Merged
merged 4 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions geoutils/raster/georeferencing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from __future__ import annotations

import math
import warnings
from typing import Iterable, Literal

Expand Down Expand Up @@ -284,3 +285,49 @@ def _cast_nodata(out_dtype: DTypeLike, nodata: int | float | None) -> int | floa
nodata = nodata

return nodata


def _generate_tiling_grid(
row_min: int,
col_min: int,
row_max: int,
col_max: int,
row_split: int,
col_split: int,
overlap: int = 0,
) -> NDArrayNum:
"""
Generate a grid of positions by splitting [row_min, row_max] x
[col_min, col_max] into tiles of size row_split x col_split with optional overlap.

:param row_min: Minimum row index of the bounding box to split.
:param col_min: Minimum column index of the bounding box to split.
:param row_max: Maximum row index of the bounding box to split.
:param col_max: Maximum column index of the bounding box to split.
:param row_split: Height of each tile.
:param col_split: Width of each tile.
:param overlap: size of overlapping between tiles (both vertically and horizontally).
:return: A numpy array grid with splits in two dimensions (0: row, 1: column),
where each cell contains [row_min, row_max, col_min, col_max].
"""
# Calculate the number of splits considering overlap
nb_col_split = math.ceil((col_max - col_min) / (col_split - overlap))
nb_row_split = math.ceil((row_max - row_min) / (row_split - overlap))

# Initialize the output grid
tiling_grid = np.zeros(shape=(nb_row_split, nb_col_split, 4), dtype=int)

for row in range(nb_row_split):
for col in range(nb_col_split):
# Calculate the start of the tile
row_start = row_min + row * (row_split - overlap)
col_start = col_min + col * (col_split - overlap)

# Calculate the end of the tile ensuring it doesn't exceed the bounds
row_end = min(row_max, row_start + row_split)
col_end = min(col_max, col_start + col_split)

# Populate the grid with the tile boundaries
tiling_grid[row, col] = [row_start, row_end, col_start, col_end]

return tiling_grid
75 changes: 73 additions & 2 deletions geoutils/raster/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import rioxarray
import xarray as xr
from affine import Affine
from matplotlib.patches import Rectangle
from mpl_toolkits.axes_grid1 import make_axes_locatable
from packaging.version import Version
from rasterio.crs import CRS
Expand Down Expand Up @@ -77,6 +78,7 @@
_cast_pixel_interpretation,
_coords,
_default_nodata,
_generate_tiling_grid,
_ij2xy,
_outside_image,
_res,
Expand Down Expand Up @@ -2655,6 +2657,44 @@ def translate(
raster_copy.transform = translated_transform
return raster_copy

def compute_tiling(
self,
tile_size: int,
raster_ref: RasterType,
overlap: int = 0,
) -> NDArrayNum:
"""
Compute the raster tiling grid to coregister raster by block.

:param tile_size: Size of each tile (square tiles)
:param raster_ref: The other raster to coregister, use to validate the shape
:param overlap: Size of overlap between tiles (optional)
:return: tiling_grid (array of tile boundaries), new_shape (shape of the tiled grid)
"""
if self.shape != raster_ref.shape:
raise Exception("Reference and secondary rasters do not have the same shape")
row_max, col_max = self.shape

# Generate tiling
tiling_grid = _generate_tiling_grid(0, 0, row_max, col_max, tile_size, tile_size, overlap=overlap)
return tiling_grid

def plot_tiling(self, tiling_grid: NDArrayNum) -> None:
"""
Plot raster with its tiling.

:param tiling_grid: tiling given by Raster.compute_tiling.
"""
ax, caxes = self.plot(return_axes=True)
for tile in tiling_grid.reshape(-1, 4):
row_min, row_max, col_min, col_max = tile
x_min, y_min = self.transform * (col_min, row_min) # Bottom-left corner
x_max, y_max = self.transform * (col_max, row_max) # Top-right corne
rect = Rectangle(
(x_min, y_min), x_max - x_min, y_max - y_min, edgecolor="red", facecolor="none", linewidth=1.5
)
ax.add_patch(rect)

def save(
self,
filename: str | pathlib.Path | IO[bytes],
Expand Down Expand Up @@ -2922,6 +2962,38 @@ def intersection(self, raster: str | Raster, match_ref: bool = True) -> tuple[fl
# mypy raises a type issue, not sure how to address the fact that output of merge_bounds can be ()
return intersection # type: ignore

@overload
def plot(
self,
bands: int | tuple[int, ...] | None = None,
cmap: matplotlib.colors.Colormap | str | None = None,
vmin: float | int | None = None,
vmax: float | int | None = None,
alpha: float | int | None = None,
cbar_title: str | None = None,
add_cbar: bool = True,
ax: matplotlib.axes.Axes | Literal["new"] | None = None,
*,
return_axes: Literal[False] = False,
**kwargs: Any,
) -> None: ...

@overload
def plot(
self,
bands: int | tuple[int, ...] | None = None,
cmap: matplotlib.colors.Colormap | str | None = None,
vmin: float | int | None = None,
vmax: float | int | None = None,
alpha: float | int | None = None,
cbar_title: str | None = None,
add_cbar: bool = True,
ax: matplotlib.axes.Axes | Literal["new"] | None = None,
*,
return_axes: Literal[True],
**kwargs: Any,
) -> tuple[matplotlib.axes.Axes, matplotlib.colors.Colormap]: ...

def plot(
self,
bands: int | tuple[int, ...] | None = None,
Expand Down Expand Up @@ -3059,8 +3131,7 @@ def plot(
# If returning axes
if return_axes:
return ax0, cax
else:
return None
return None

def reduce_points(
self,
Expand Down
59 changes: 58 additions & 1 deletion tests/test_raster/test_georeferencing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import geoutils as gu
from geoutils import examples
from geoutils.raster.georeferencing import _generate_tiling_grid


class TestGeoreferencing:
Expand Down Expand Up @@ -173,7 +174,7 @@ def test_xy2ij(self) -> None:
@pytest.mark.parametrize("example", [landsat_b4_path, aster_dem_path, landsat_rgb_path]) # type: ignore
def test_coords(self, example: str) -> None:

img = gu.Raster(self.landsat_b4_path)
img = gu.Raster(example)

# With lower left argument
xx0, yy0 = img.coords(grid=False, force_offset="ll")
Expand Down Expand Up @@ -209,3 +210,59 @@ def test_coords(self, example: str) -> None:
xxgrid, yygrid = img.coords(grid=True, force_offset="ll")
assert np.array_equal(xxgrid, np.repeat(xx0[np.newaxis, :], img.height, axis=0))
assert np.array_equal(yygrid, np.flipud(np.repeat(yy0[:, np.newaxis], img.width, axis=1)))

@pytest.mark.parametrize("overlap", [0, 5]) # type: ignore
def test_tiling(self, overlap: int) -> None:

# Test with mock data
tiling_grid_mock = _generate_tiling_grid(0, 0, 100, 100, 50, 50, overlap)
if overlap == 0:
expected_tiling = np.array([[[0, 50, 0, 50], [0, 50, 50, 100]], [[50, 100, 0, 50], [50, 100, 50, 100]]])
assert np.array_equal(tiling_grid_mock, expected_tiling)
elif overlap == 5:
expected_tiling = np.array(
[
[[0, 50, 0, 50], [0, 50, 45, 95], [0, 50, 90, 100]],
[[45, 95, 0, 50], [45, 95, 45, 95], [45, 95, 90, 100]],
[[90, 100, 0, 50], [90, 100, 45, 95], [90, 100, 90, 100]],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have expected the tiling here to yield [0, 55, 0, 55], [0, 55, 45, 100], [45, 100, 0, 55], [45, 100, 45, 100].

The list comprehensions here should normally work, might save you time re-coding this 😉 : https://github.com/iamdonovan/pyddem/blob/main/pyddem/fit_tools.py#L1398

Copy link

@adebardo adebardo Feb 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please be careful using pyddem, it's under GPL-3.0 License (same as RichDem), It is therefore not advisable to import it. However, I will look into it regarding "inspiration."

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks to Sébastien Dinot (https://github.com/sdinot) for proposing legally viable solutions.

  • Even projects under the GNU GPL v3.0 license are rarely written ex nihilo. This function may come from another project released under a permissive license. We need to search it. Without a specialized tool, it's mission impossible, but I have a little idea: it's time to test Software Heritage.

  • If that leads nowhere, someone needs to write an equivalent function without being influenced by the existing code (in other words, it would be best to find someone who has never seen this code). When rewritten in a black-box approach, there's little chance the final result will be identical. This implementation can potentially draw inspiration from an algorithm found in a book or article, in which case the latter should be explicitly cited as the source of inspiration to avoid any ambiguity.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'm not sure where this function came from originally.
From Git blame, I'm the one who added it to Pyddem 5 years ago, but it might have been inspired from a Stackoverflow response or similar.

]
)
assert np.array_equal(tiling_grid_mock, expected_tiling)

# Test with real data
img = gu.Raster(self.landsat_b4_path)

# Define tiling parameters
row_split, col_split = 100, 100
row_max, col_max = img.shape

# Generate the tiling grid
tiling_grid = _generate_tiling_grid(0, 0, row_max, col_max, row_split, col_split, overlap)

# Calculate expected number of tiles
nb_row_tiles = np.ceil(row_max / (row_split - overlap)).astype(int)
nb_col_tiles = np.ceil(col_max / (col_split - overlap)).astype(int)

# Check that the tiling grid has the expected shape
assert tiling_grid.shape == (nb_row_tiles, nb_col_tiles, 4)

# Check the boundaries of the first and last tile
assert np.array_equal(tiling_grid[0, 0], np.array([0, min(row_split, row_max), 0, min(col_split, col_max)]))
assert np.array_equal(
tiling_grid[-1, -1],
np.array(
[
(nb_row_tiles - 1) * (row_split - overlap),
row_max,
(nb_col_tiles - 1) * (col_split - overlap),
col_max,
]
),
)

# Check if overlap is consistent between tiles
for row in range(nb_row_tiles - 1):
assert tiling_grid[row + 1, 0, 0] == tiling_grid[row, 0, 1] - overlap

for col in range(nb_col_tiles - 1): # Skip last tile in column
assert tiling_grid[0, col + 1, 2] == tiling_grid[0, col, 3] - overlap
Loading