Open-EO · ValentinaHutter · May 3, 2023 · Feb 21, 2023 · Feb 21, 2023 · Feb 23, 2023
diff --git a/openeo_processes_dask/core.py b/openeo_processes_dask/core.py
@@ -66,7 +66,7 @@ def wrapper(
             else:
                 resolved_kwargs[k] = arg
 
-        special_args = ["axis", "keepdims"]
+        special_args = ["axis", "keepdims", "source_transposed_axis"]
         # Remove 'axis' and keepdims parameter if not expected in function signature.
         for arg in special_args:
             if arg not in inspect.signature(f).parameters:

diff --git a/openeo_processes_dask/process_implementations/arrays.py b/openeo_processes_dask/process_implementations/arrays.py
@@ -27,9 +27,9 @@
     "array_labels",
     # "first",
     # "last",
-    # "order",
-    # "rearrange",
-    # "sort",
+    "order",
+    "rearrange",
+    "sort",
 ]
 
 
@@ -182,3 +182,104 @@ def array_labels(data: ArrayLike) -> ArrayLike:
     if len(data.shape) > 1:
         raise TooManyDimensions("array_labels is only implemented for 1D arrays.")
     return np.arange(len(data))
+
+
+def order(
+    data: ArrayLike,
+    asc: Optional[bool] = True,
+    nodata: Optional[bool] = None,
+    axis: Optional[int] = None,
+):
+    if isinstance(data, list):
+        data = np.asarray(data)
+    if len(data) == 0:
+        return data
+
+    # See https://github.com/dask/dask/issues/4368
+    logger.warning(
+        "order: Dask does not support lazy sorting of arrays, therefore the array is loaded into memory here. This might fail for arrays that don't fit into memory."
+    )
+
+    if asc:
+        permutation_idxs = np.argsort(data, kind="mergesort", axis=axis)
+    else:  # [::-1] not possible
+        permutation_idxs = np.argsort(
+            -data, kind="mergesort", axis=axis
+        )  # to get the indizes in descending order, the sign of the data is changed
+
+    if nodata is None:  # ignore np.nan values
+        if len(data.shape) > 1:
+            raise ValueError(
+                "order with nodata=None is not supported for arrays with more than one dimension, as this would result in sparse multi-dimensional arrays."
+            )
+        # sort the original data first, to get correct position of no data values
+        sorted_data = np.take_along_axis(data, permutation_idxs, axis=axis)
+        return permutation_idxs[~pd.isnull(sorted_data)]
+    elif nodata is False:  # put location/index of np.nan values first
+        # sort the original data first, to get correct position of no data values
+        sorted_data = data[permutation_idxs]
+        return np.append(
+            permutation_idxs[pd.isnull(sorted_data)],
+            permutation_idxs[~pd.isnull(sorted_data)],
+        )
+    elif nodata is True:  # default argsort behaviour, np.nan values are put last
+        return permutation_idxs
+
+
+def rearrange(
+    data: ArrayLike,
+    order: ArrayLike,
+    axis: Optional[int] = None,
+    source_transposed_axis: int = None,
+):
+    if len(data) == 0:
+        return data
+    if isinstance(data, list):
+        data = np.asarray(data)
+    if isinstance(order, list):
+        order = np.asarray(order)
+
+    if len(data.shape) != len(order.shape):
+        raise ValueError(
+            f"rearrange: number of axes on data ({len(data.shape)}) != number of axes ({len(order.shape)}) on order. rearrange does not support broadcasting in this case."
+        )
+
+    # This is to allow for the fact that apply_dimension can rearrange dimensions to put core dimensions in the back
+    if source_transposed_axis is not None:
+        order = np.moveaxis(order, source_transposed_axis, -1)
+
+    logger.warning(
+        "rearrange: This operation cannot be performed lazily, therefore the array will be loaded into memory here. This might fail for arrays that don't fit into memory."
+    )
+
+    return np.take_along_axis(data, indices=order, axis=axis)
+
+
+def sort(
+    data: ArrayLike,
+    asc: Optional[bool] = True,
+    nodata: Optional[bool] = None,
+    axis: Optional[int] = None,
+):
+    if isinstance(data, list):
+        data = np.asarray(data)
+    if len(data) == 0:
+        return data
+    if asc:
+        data_sorted = np.sort(data, axis=axis)
+    else:  # [::-1] not possible
+        data_sorted = -np.sort(
+            -data, axis=axis
+        )  # to get the indexes in descending order, the sign of the data is changed
+
+    if nodata is None:  # ignore np.nan values
+        nan_idxs = pd.isnull(data_sorted)
+        return data_sorted[~nan_idxs]
+    elif nodata == False:  # put np.nan values first
+        nan_idxs = pd.isnull(data_sorted)
+        data_sorted_flip = np.flip(data_sorted, axis=axis)
+        nan_idxs_flip = pd.isnull(data_sorted_flip)
+        data_sorted_flip[~nan_idxs_flip] = data_sorted[~nan_idxs]
+        return data_sorted_flip
+    elif nodata == True:  # default sort behaviour, np.nan values are put last
+        return data_sorted
diff --git a/openeo_processes_dask/process_implementations/cubes/apply.py b/openeo_processes_dask/process_implementations/cubes/apply.py
@@ -62,6 +62,7 @@ def apply_dimension(
             "named_parameters": named_parameters,
             "axis": reordered_data.get_axis_num(dimension),
             "keepdims": True,
+            "source_transposed_axis": data.get_axis_num(dimension),
         },
         exclude_dims={dimension},
     )

diff --git a/tests/test_apply.py b/tests/test_apply.py
@@ -1,5 +1,6 @@
 from functools import partial
 
+import dask.array as da
 import numpy as np
 import pytest
 import xarray as xr
@@ -156,3 +157,72 @@ def test_apply_dimension_target_dimension(
         verify_crs=False,
         expected_results=expected_output,
     )
+
+
+@pytest.mark.parametrize("size", [(6, 5, 4, 4)])
+@pytest.mark.parametrize("dtype", [np.float32])
+def test_apply_dimension_ordering_processes(
+    temporal_interval, bounding_box, random_raster_data, process_registry
+):
+    input_cube = create_fake_rastercube(
+        data=random_raster_data,
+        spatial_extent=bounding_box,
+        temporal_extent=temporal_interval,
+        bands=["B02", "B03", "B04", "B08"],
+        backend="dask",
+    )
+
+    _process_order = partial(
+        process_registry["order"],
+        data=ParameterReference(from_parameter="data"),
+        nodata=True,
+    )
+
+    output_cube_order = apply_dimension(
+        data=input_cube,
+        process=_process_order,
+        dimension="x",
+        target_dimension="target",
+    )
+
+    expected_output_order = np.argsort(input_cube.data, kind="mergesort", axis=0)
+
+    np.testing.assert_array_equal(output_cube_order.data, expected_output_order)
+    # This is to remind us that currently dask arrays don't support sorting and notify us should that change in a future version.
+    assert isinstance(output_cube_order.data, np.ndarray)
+
+    _process_rearrange = partial(
+        process_registry["rearrange"],
+        data=ParameterReference(from_parameter="data"),
+        order=da.from_array(expected_output_order),
+    )
+
+    output_cube_rearrange = apply_dimension(
+        data=input_cube, process=_process_rearrange, dimension="x", target_dimension="x"
+    )
+
+    expected_output_rearrange = np.take_along_axis(
+        input_cube.data, indices=expected_output_order, axis=0
+    )
+
+    np.testing.assert_array_equal(output_cube_rearrange.data, expected_output_rearrange)
+    # This is to remind us that currently dask arrays don't support sorting and notify us should that change in a future version.
+    assert isinstance(output_cube_rearrange.data, np.ndarray)
+
+    _process_sort = partial(
+        process_registry["sort"],
+        data=ParameterReference(from_parameter="data"),
+        nodata=True,
+    )
+
+    output_cube_sort = apply_dimension(
+        data=input_cube, process=_process_sort, dimension="x", target_dimension="target"
+    )
+
+    expected_output_sort = np.sort(input_cube.data, axis=0)
+
+    np.testing.assert_array_equal(output_cube_sort.data, expected_output_sort)
+    # This is to remind us that currently dask arrays don't support sorting and notify us should that change in a future version.
+    assert isinstance(output_cube_sort.data, np.ndarray)
+
+    np.testing.assert_array_equal(output_cube_sort.data, output_cube_rearrange.data)
diff --git a/tests/test_arrays.py b/tests/test_arrays.py
@@ -201,6 +201,109 @@ def test_array_labels():
         array_labels(np.array([[1, 0, 3, 2], [5, 0, 6, 4]]))
 
 
+@pytest.mark.parametrize(
+    "data, asc, nodata, expected",
+    [
+        (
+            [6, -1, 2, np.nan, 7, 4, np.nan, 8, 3, 9, 9],
+            True,
+            None,
+            [1, 2, 8, 5, 0, 4, 7, 9, 10],
+        ),
+        (
+            [6, -1, 2, np.nan, 7, 4, np.nan, 8, 3, 9, 9],
+            True,
+            True,
+            [1, 2, 8, 5, 0, 4, 7, 9, 10, 3, 6],
+        ),
+        (
+            [6, -1, 2, np.nan, 7, 4, np.nan, 8, 3, 9, 9],
+            False,
+            True,
+            [9, 10, 7, 4, 0, 5, 8, 2, 1, 3, 6],
+        ),
+        (
+            [6, -1, 2, np.nan, 7, 4, np.nan, 8, 3, 9, 9],
+            False,
+            False,
+            [3, 6, 9, 10, 7, 4, 0, 5, 8, 2, 1],
+        ),
+    ],
+)
+def test_order(data, asc, nodata, expected):
+    np.testing.assert_array_equal(order(data=data, asc=asc, nodata=nodata), expected)
+    np.testing.assert_array_equal(
+        order(data=np.array(data), asc=asc, nodata=nodata), np.array(expected)
+    )
+    np.testing.assert_array_equal(
+        order(data=da.from_array(np.array(data)), asc=asc, nodata=nodata),
+        da.from_array(np.array(expected)),
+    )
+
+
+@pytest.mark.parametrize(
+    "data, order, axis, expected",
+    [
+        ([5, 4, 3], [2, 1, 0], None, [3, 4, 5]),
+        ([5, 4, 3, 2], [0, 2, 1, 3], 0, [5, 3, 4, 2]),
+        ([5, 4, 3, 2], [1, 3], 0, [4, 2]),
+        ([[5, 4, 3, 2], [5, 4, 3, 2]], [[1, 3]], 1, [[4, 2], [4, 2]]),
+    ],
+)
+def test_rearrange(data, order, axis, expected):
+    np.testing.assert_array_equal(
+        rearrange(data=data, order=order, axis=axis), expected
+    )
+    np.testing.assert_array_equal(
+        rearrange(data=np.array(data), order=order, axis=axis), np.array(expected)
+    )
+    np.testing.assert_array_equal(
+        rearrange(data=da.from_array(np.array(data)), order=order, axis=axis),
+        da.from_array(np.array(expected)),
+    )
+
+
+def test_rearrange_mismatched_shape():
+    with pytest.raises(ValueError):
+        rearrange(data=[[5, 4, 3, 2], [5, 4, 3, 2]], order=[1, 3], axis=1)
+
+
+@pytest.mark.parametrize(
+    "data, asc, nodata, expected",
+    [
+        (
+            [6, -1, 2, np.nan, 7, 4, np.nan, 8, 3, 9, 9],
+            True,
+            None,
+            [-1, 2, 3, 4, 6, 7, 8, 9, 9],
+        ),
+        (
+            [6, -1, 2, np.nan, 7, 4, np.nan, 8, 3, 9, 9],
+            False,
+            True,
+            [9, 9, 8, 7, 6, 4, 3, 2, -1, np.nan, np.nan],
+        ),
+    ],
+)
+def test_sort(data, asc, nodata, expected):
+    """Tests `sort` function."""
+    assert np.isclose(
+        sort(data=data, asc=asc, nodata=nodata),
+        expected,
+        equal_nan=True,
+    ).all()
+    assert np.isclose(
+        sort(data=np.array(data), asc=asc, nodata=nodata),
+        expected,
+        equal_nan=True,
+    ).all()
+    assert np.isclose(
+        sort(data=da.from_array(np.array(data)), asc=asc, nodata=nodata),
+        expected,
+        equal_nan=True,
+    ).all()
+
+
 @pytest.mark.parametrize("size", [(3, 3, 2, 4)])
 @pytest.mark.parametrize("dtype", [np.float32])
 def test_reduce_dimension(