From 185a4e9d3e510d1879fa80d14991d51c4639ebfa Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 31 May 2021 14:45:45 +0200 Subject: [PATCH 1/4] reduce_spatial #226 and clarifications #260 --- CHANGELOG.md | 3 +- aggregate_spatial.json | 2 +- proposals/aggregate_spatial_binary.json | 2 +- proposals/reduce_dimension_binary.json | 2 +- proposals/reduce_spatial.json | 79 +++++++++++++++++++++++++ reduce_dimension.json | 2 +- 6 files changed, 85 insertions(+), 5 deletions(-) create mode 100644 proposals/reduce_spatial.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 9366d2ed..6748ec82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `date_shift` - `is_infinite` - `nan` + - `reduce_spatial` - Added return value details (property `returns`) for the schemas with the subtype `process-graph`. [API#350](https://github.com/Open-EO/openeo-api/issues/350) - `apply_neighborhood`: Clarify behavior for data cubes returned by the child processes and for that add the exception `DataCubePropertiesImmutable`. @@ -28,7 +29,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `run_udf` and `run_udf_externally`: Specify specific (extensible) protocols for UDF URIs. ### Deprecated - - `GeometryCollection`s are discouraged in all relevant processes. ### Fixed @@ -48,6 +48,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Clarified disallowed characters in subtype `file-path`. - Clarified that UDF source code must contain a newline/line-break (affects `run_udf`). - `aggregate_spatial`, `aggregate_spatial_binary`: Clarified that Features, Geometries and GeometryCollections are a single entity in computations. Only FeatureCollections are multiple entities. [#252](https://github.com/Open-EO/openeo-processes/issues/252) +- `aggregate_spatial`: Clarified that the values have no predefined order and reducers such as `first`, `last` and `median` return unpredictable results. [#260](https://github.com/Open-EO/openeo-processes/issues/260) - `load_collection`, parameter `spatial_extent`: Clarified that all pixels that are inside the bounding box of the given polygons but do not intersect with any polygon have to be set to no-data (`null`). [#256](https://github.com/Open-EO/openeo-processes/issues/256) - `load_collection`: Clarified that the parameters are recommended to be used in favor of `filter_*` processes. diff --git a/aggregate_spatial.json b/aggregate_spatial.json index 45f36e9d..1cded4e5 100644 --- a/aggregate_spatial.json +++ b/aggregate_spatial.json @@ -1,7 +1,7 @@ { "id": "aggregate_spatial", "summary": "Zonal statistics for geometries", - "description": "Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) over the spatial dimensions. This process passes a list of values to the reducer. In contrast, ``aggregate_spatial_binary()`` passes two values, which may be better suited especially for UDFs in case the number of values gets too large to be processed at once.\n\nThe data cube must have been reduced to only contain two spatial dimensions and a third dimension the values are aggregated for, for example the temporal dimension to get a time series. Otherwise, this process fails with the `TooManyDimensions` exception.\n\nThe number of total and valid pixels is returned together with the calculated values.", + "description": "Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) over the spatial dimensions.\n\nThis process passes a list of values to the reducer. The list of values has an undefined order, therefore processes such as ``last()`` and ``first()`` that depend on the order of the values will lead to unpredictable results. In contrast, ``aggregate_spatial_binary()`` passes two values, which may be better suited especially for UDFs in case the number of values gets too large to be processed at once. An 'unbounded' aggregation over the full extent of the horizontal spatial dimensions can be computed with the process ``reduce_spatial()``.\n\nThe data cube must have been reduced to only contain two spatial dimensions and a third dimension the values are aggregated for, for example the temporal dimension to get a time series. Otherwise, this process fails with the `TooManyDimensions` exception.\n\nThe number of total and valid pixels is returned together with the calculated values.", "categories": [ "cubes", "aggregate & resample" diff --git a/proposals/aggregate_spatial_binary.json b/proposals/aggregate_spatial_binary.json index f09909a7..d85f80a4 100644 --- a/proposals/aggregate_spatial_binary.json +++ b/proposals/aggregate_spatial_binary.json @@ -1,7 +1,7 @@ { "id": "aggregate_spatial_binary", "summary": "Zonal statistics for geometries by binary aggregation", - "description": "Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) over the spatial dimensions. This process consecutively passes a pair of values to the reducer. This may be better suited especially for UDFs in case the number of values gets too large to be processed at once. In contrast, ``aggregate_spatial()`` passes a list of values.\n\nThe data cube must have been reduced to only contain two raster dimensions and a third dimension the values are aggregated for, for example the temporal dimension to get a time series. Otherwise, this process fails with the `TooManyDimensions` exception.\n\nThe number of total and valid pixels is returned together with the calculated values.", + "description": "Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) over the spatial dimensions.\n\nThis process consecutively passes a pair of values to the reducer. This may be better suited especially for UDFs in case the number of values gets too large to be processed at once. In contrast, ``aggregate_spatial()`` passes a list of values. An 'unbounded' aggregation over the full extent of the horizontal spatial dimensions can be computed with the process ``reduce_spatial_binary()``.\n\nThe data cube must have been reduced to only contain two raster dimensions and a third dimension the values are aggregated for, for example the temporal dimension to get a time series. Otherwise, this process fails with the `TooManyDimensions` exception.\n\nThe number of total and valid pixels is returned together with the calculated values.", "categories": [ "cubes", "aggregate & resample" diff --git a/proposals/reduce_dimension_binary.json b/proposals/reduce_dimension_binary.json index 3ca58341..98e5477a 100644 --- a/proposals/reduce_dimension_binary.json +++ b/proposals/reduce_dimension_binary.json @@ -1,7 +1,7 @@ { "id": "reduce_dimension_binary", "summary": "Reduce dimensions using binary reduction", - "description": "Applies a binary reducer to a data cube dimension by collapsing all the pixel values along the specified dimension into an output value computed by the reducer. This process consecutively passes a pair of values to the reducer. This may be better suited especially for UDFs in case the number of values gets too large to be processed at once. In contrast, ``reduce_dimension()`` passes a list of values.\n\nThe dimension is dropped. To avoid this, use ``apply_dimension()`` instead.", + "description": "Applies a binary reducer to a data cube dimension by collapsing all the pixel values along the specified dimension into an output value computed by the reducer. The dimension is dropped.\n\nThis process consecutively passes a pair of values to the reducer. This may be better suited especially for UDFs in case the number of values gets too large to be processed at once. In contrast, ``reduce_dimension()`` passes a list of values. An aggregation over certain spatial areas can be computed with the process ``aggregate_spatial_binary()``.", "categories": [ "cubes", "reducer" diff --git a/proposals/reduce_spatial.json b/proposals/reduce_spatial.json new file mode 100644 index 00000000..8924ee41 --- /dev/null +++ b/proposals/reduce_spatial.json @@ -0,0 +1,79 @@ +{ + "id": "reduce_spatial", + "summary": "Reduce horizontal spatial dimensions", + "description": "Applies a unary reducer to a data cube dimension by collapsing all the pixel values along the horizontal spatial dimensions (i.e. axes `x` and `y`) into an output value computed by the reducer. The horizontal spatial dimensions are dropped.\n\nThis process passes a list of values to the reducer. The list of values has an undefined order, therefore processes such as ``last()`` and ``first()`` that depend on the order of the values will lead to unpredictable results.\n\nIn contrast to this process, ``reduce_spatial_binary()`` passes two values, which may be better suited especially for UDFs in case the number of values gets too large to be processed at once. An aggregation over certain spatial areas can be computed with the process ``aggregate_spatial()``.", + "categories": [ + "aggregate & resample", + "cubes", + "reducer" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "A data cube.", + "schema": { + "type": "object", + "subtype": "raster-cube" + } + }, + { + "name": "reducer", + "description": "A reducer to apply on the horizontal spatial dimensions. A reducer is a single process such as ``mean()`` or a set of processes, which computes a single value for a list of values, see the category 'reducer' for such processes.", + "schema": { + "type": "object", + "subtype": "process-graph", + "parameters": [ + { + "name": "data", + "description": "An array with elements of any type.", + "schema": { + "type": "array", + "items": { + "description": "Any data type." + } + } + }, + { + "name": "context", + "description": "Additional data passed by the user.", + "schema": { + "description": "Any data type." + }, + "optional": true, + "default": null + } + ], + "returns": { + "description": "The value to be set in the new data cube.", + "schema": { + "description": "Any data type." + } + } + } + }, + { + "name": "context", + "description": "Additional data to be passed to the reducer.", + "schema": { + "description": "Any data type." + }, + "optional": true, + "default": null + } + ], + "returns": { + "description": "A data cube with the newly computed values. It is missing the horizontal spatial dimensions, the number of dimensions decreases by two. The dimension properties (name, type, labels, reference system and resolution) for all other dimensions remain unchanged.", + "schema": { + "type": "object", + "subtype": "raster-cube" + } + }, + "links": [ + { + "href": "https://openeo.org/documentation/1.0/datacubes.html#reduce", + "rel": "about", + "title": "Reducers explained in the openEO documentation" + } + ] +} \ No newline at end of file diff --git a/reduce_dimension.json b/reduce_dimension.json index e52eaf52..34ddc060 100644 --- a/reduce_dimension.json +++ b/reduce_dimension.json @@ -1,7 +1,7 @@ { "id": "reduce_dimension", "summary": "Reduce dimensions", - "description": "Applies a unary reducer to a data cube dimension by collapsing all the pixel values along the specified dimension into an output value computed by the reducer. This process passes a list of values to the reducer. In contrast, ``reduce_dimension_binary()`` passes two values, which may be better suited especially for UDFs in case the number of values gets too large to be processed at once.\n\nThe dimension is dropped. To avoid this, use ``apply_dimension()`` instead.", + "description": "Applies a unary reducer to a data cube dimension by collapsing all the pixel values along the specified dimension into an output value computed by the reducer. The dimension is dropped. To avoid this, use ``apply_dimension()`` instead.\n\nThis process passes a list of values to the reducer. In contrast, ``reduce_dimension_binary()`` passes two values, which may be better suited especially for UDFs in case the number of values gets too large to be processed at once.", "categories": [ "cubes", "reducer" From 1169a1c2103b6b60618ccb1017efcbda64fadd7a Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 1 Jun 2021 15:51:41 +0200 Subject: [PATCH 2/4] Incorporated feedback from review --- CHANGELOG.md | 2 +- proposals/reduce_spatial.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3517415d..5b1f1e38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,7 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Clarified disallowed characters in subtype `file-path`. - Clarified that UDF source code must contain a newline/line-break (affects `run_udf`). - `aggregate_spatial`, `aggregate_spatial_binary`: Clarified that Features, Geometries and GeometryCollections are a single entity in computations. Only FeatureCollections are multiple entities. [#252](https://github.com/Open-EO/openeo-processes/issues/252) -- `aggregate_spatial`: Clarified that the values have no predefined order and reducers such as `first`, `last` and `median` return unpredictable results. [#260](https://github.com/Open-EO/openeo-processes/issues/260) +- `aggregate_spatial`: Clarified that the values have no predefined order and reducers such as `first` and `last` return unpredictable results. [#260](https://github.com/Open-EO/openeo-processes/issues/260) - `load_collection`, parameter `spatial_extent`: Clarified that all pixels that are inside the bounding box of the given polygons but do not intersect with any polygon have to be set to no-data (`null`). [#256](https://github.com/Open-EO/openeo-processes/issues/256) - `load_collection`: Clarified that the parameters are recommended to be used in favor of `filter_*` processes. diff --git a/proposals/reduce_spatial.json b/proposals/reduce_spatial.json index 660d28f4..1564d036 100644 --- a/proposals/reduce_spatial.json +++ b/proposals/reduce_spatial.json @@ -1,7 +1,7 @@ { "id": "reduce_spatial", - "summary": "Reduce horizontal spatial dimensions", - "description": "Applies a unary reducer to a data cube dimension by collapsing all the pixel values along the horizontal spatial dimensions (i.e. axes `x` and `y`) into an output value computed by the reducer. The horizontal spatial dimensions are dropped.\n\nAn aggregation over certain spatial areas can be computed with the process ``aggregate_spatial()``.\n\nThis process passes a list of values to the reducer. The list of values has an undefined order, therefore processes such as ``last()`` and ``first()`` that depend on the order of the values will lead to unpredictable results.", + "summary": "Reduce spatial dimensions 'x' and 'y", + "description": "Applies a unary reducer to a data cube by collapsing all the pixel values along the horizontal spatial dimensions (i.e. axes `x` and `y`) into an output value computed by the reducer. The horizontal spatial dimensions are dropped.\n\nAn aggregation over certain spatial areas can be computed with the process ``aggregate_spatial()``.\n\nThis process passes a list of values to the reducer. The list of values has an undefined order, therefore processes such as ``last()`` and ``first()`` that depend on the order of the values will lead to unpredictable results.", "categories": [ "aggregate & resample", "cubes", From 302cc56ae69f70381ebaa004b07b38f10ad99769 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 2 Jun 2021 11:59:00 +0200 Subject: [PATCH 3/4] Replace "unary" with better easier to understand language --- apply.json | 4 ++-- array_apply.json | 6 +++--- proposals/reduce_spatial.json | 4 ++-- reduce_dimension.json | 2 +- tests/.words | 3 +-- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/apply.json b/apply.json index 3a24080b..7090a009 100644 --- a/apply.json +++ b/apply.json @@ -1,7 +1,7 @@ { "id": "apply", "summary": "Apply a process to each pixel", - "description": "Applies a *unary* process to each pixel value in the data cube (i.e. a local operation). A unary process takes a single value and returns a single value, for example ``abs()`` or ``linear_scale_range()``. In contrast, the process ``apply_dimension()`` applies a process to all pixel values along a particular dimension.", + "description": "Applies a process to each pixel value in the data cube (i.e. a local operation). In contrast, the process ``apply_dimension()`` applies a process to all pixel values along a particular dimension.", "categories": [ "cubes" ], @@ -16,7 +16,7 @@ }, { "name": "process", - "description": "A unary process to be applied on each value, may consist of multiple sub-processes.", + "description": "A process to be applied on each individual value, may consist of multiple sub-processes. The process must accept and return a single value, for example ``abs()`` or ``linear_scale_range()``.", "schema": { "type": "object", "subtype": "process-graph", diff --git a/array_apply.json b/array_apply.json index 61ec1d3e..885b3a64 100644 --- a/array_apply.json +++ b/array_apply.json @@ -1,7 +1,7 @@ { "id": "array_apply", - "summary": "Apply a unary process to each array element", - "description": "Applies a **unary** process which takes a single value such as `abs` or `sqrt` to each value in the array. This is basically what other languages call either a `for each` loop or a `map` function.", + "summary": "Apply a process to each array element", + "description": "Applies a process to each individual value in the array. This is basically what other languages call either a `for each` loop or a `map` function.", "categories": [ "arrays" ], @@ -18,7 +18,7 @@ }, { "name": "process", - "description": "A process to be applied on each value, may consist of multiple sub-processes. The specified process must be unary meaning that it must work on a single value.", + "description": "A process to be applied on each value, may consist of multiple sub-processes. The process must accept and return a single value, for example ``abs()`` or ``sqrt()``.", "schema": { "type": "object", "subtype": "process-graph", diff --git a/proposals/reduce_spatial.json b/proposals/reduce_spatial.json index 1564d036..d9a2fb56 100644 --- a/proposals/reduce_spatial.json +++ b/proposals/reduce_spatial.json @@ -1,7 +1,7 @@ { "id": "reduce_spatial", - "summary": "Reduce spatial dimensions 'x' and 'y", - "description": "Applies a unary reducer to a data cube by collapsing all the pixel values along the horizontal spatial dimensions (i.e. axes `x` and `y`) into an output value computed by the reducer. The horizontal spatial dimensions are dropped.\n\nAn aggregation over certain spatial areas can be computed with the process ``aggregate_spatial()``.\n\nThis process passes a list of values to the reducer. The list of values has an undefined order, therefore processes such as ``last()`` and ``first()`` that depend on the order of the values will lead to unpredictable results.", + "summary": "Reduce spatial dimensions 'x' and 'y'", + "description": "Applies a reducer to a data cube by collapsing all the pixel values along the horizontal spatial dimensions (i.e. axes `x` and `y`) into an output value computed by the reducer. The horizontal spatial dimensions are dropped.\n\nAn aggregation over certain spatial areas can be computed with the process ``aggregate_spatial()``.\n\nThis process passes a list of values to the reducer. The list of values has an undefined order, therefore processes such as ``last()`` and ``first()`` that depend on the order of the values will lead to unpredictable results.", "categories": [ "aggregate & resample", "cubes", diff --git a/reduce_dimension.json b/reduce_dimension.json index e4d360d3..27ed34de 100644 --- a/reduce_dimension.json +++ b/reduce_dimension.json @@ -1,7 +1,7 @@ { "id": "reduce_dimension", "summary": "Reduce dimensions", - "description": "Applies a unary reducer to a data cube dimension by collapsing all the pixel values along the specified dimension into an output value computed by the reducer.\n\nThe dimension is dropped. To avoid this, use ``apply_dimension()`` instead.", + "description": "Applies a reducer to a data cube dimension by collapsing all the pixel values along the specified dimension into an output value computed by the reducer.\n\nThe dimension is dropped. To avoid this, use ``apply_dimension()`` instead.", "categories": [ "cubes", "reducer" diff --git a/tests/.words b/tests/.words index 568fd814..5c8a96df 100644 --- a/tests/.words +++ b/tests/.words @@ -30,5 +30,4 @@ Sentinel-2B signum STAC summand -UDFs -unary \ No newline at end of file +UDFs \ No newline at end of file From 5f76884edb7722928e353961fd66f0db913c2cc5 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 4 Jun 2021 14:02:46 +0200 Subject: [PATCH 4/4] Improved wording around process to be applied on pixels --- apply.json | 2 +- array_apply.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apply.json b/apply.json index 7090a009..a39292e0 100644 --- a/apply.json +++ b/apply.json @@ -16,7 +16,7 @@ }, { "name": "process", - "description": "A process to be applied on each individual value, may consist of multiple sub-processes. The process must accept and return a single value, for example ``abs()`` or ``linear_scale_range()``.", + "description": "A process that accepts and returns a single value and is applied on each individual value in the data cube. The process may consist of multiple sub-processes and could, for example, consist of processes such as ``abs()`` or ``linear_scale_range()``.", "schema": { "type": "object", "subtype": "process-graph", diff --git a/array_apply.json b/array_apply.json index 885b3a64..a0e248fa 100644 --- a/array_apply.json +++ b/array_apply.json @@ -18,7 +18,7 @@ }, { "name": "process", - "description": "A process to be applied on each value, may consist of multiple sub-processes. The process must accept and return a single value, for example ``abs()`` or ``sqrt()``.", + "description": "A process that accepts and returns a single value and is applied on each individual value in the array. The process may consist of multiple sub-processes and could, for example, consist of processes such as ``abs()`` or ``linear_scale_range()``.", "schema": { "type": "object", "subtype": "process-graph",