From 4ff110f05e9f42f4c1587d9b294b44103fd41028 Mon Sep 17 00:00:00 2001 From: Justin Poehnelt Date: Wed, 15 Aug 2018 15:50:39 -0700 Subject: [PATCH 01/14] dataset schema --- dataset-spec/json-schema/dataset.json | 239 ++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 dataset-spec/json-schema/dataset.json diff --git a/dataset-spec/json-schema/dataset.json b/dataset-spec/json-schema/dataset.json new file mode 100644 index 000000000..ef2f1331f --- /dev/null +++ b/dataset-spec/json-schema/dataset.json @@ -0,0 +1,239 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "id": "dataset.json#", + "title": "Dataset Item", + "type": "object", + "description": "This object represents the dataset in a SpatioTemporal Asset Catalog.", + "additionalProperties": true, + "anyOf": [ + { + "$ref": "#/definitions/core" + }, + { + "$ref": "#/definitions/eo" + }, + { + "$ref": "#/definitions/raster" + }, + { + "$ref": "#/definitions/vector" + } + ], + "definitions": { + "core": { + "allOf": [ + { + "type": "object", + "required": [ + "id", + "title", + "description", + "version", + "license_name" + ], + "properties": { + "id": { + "title": "Provider ID", + "description": "Provider item ID", + "type": "string", + "pattern": "^[A-Za-z0-9_\\-\/]+$" + }, + "title": { + "title": "Title", + "description": "Title for the dataset", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Detailed multi-line descrtion to fully explain the entity. CommonMark 0.28 syntax may be used for rich text representation.", + "type": "string" + }, + "keywords": { + "title": "Keywords", + "description": "List of keywords describing the dataset", + "type": "array", + "items": { + "type": "string" + } + }, + "license_name": { + "title": "License Name", + "description": "License name based on SPDX License Identifier", + "type": "string" + }, + "license_url": { + "title": "License URL", + "description": "License url must be specified if license_name does not contain a SPDX License Identifier", + "type": "string" + }, + "provider": { + "type": "array", + "description": "The organization the creates the content of the dataset", + "items": { + "$ref": "#/definitions/provider" + } + }, + "host": { + "allOf": [ + { + "description": "The organization the hosts the content of the dataset" + }, + { + "$ref": "#/definitions/provider" + } + ] + }, + "process_graph": { + "title": "Processing Graph", + "properties": { + "chain": { + "type": "array", + "items": { + "$ref": "#/definitions/process" + } + } + } + }, + "version": { + "title": "Version", + "description": "Version of the dataset", + "type": "string" + }, + "datetime": { + "title": "Date and Time", + "description": "The searchable date/time of the assets, in UTC (Formatted in RFC 3339) ", + "type": "string", + "format": "date-time" + }, + "geometry": { + "properties": { + "type": { + "enum": [ + "Polygon", + "MultiPolygon" + ] + } + } + } + } + } + ] + }, + "sci": { + "properties": { + "doi": { + "title": "DOI", + "description": "Digital Object Identifier", + "type": "string" + }, + "citation": { + "type": "string" + }, + "publication_doi": { + "type": "string" + }, + "publication_citation": { + "type": "string" + } + } + }, + "raster": { + "allOf": [ + { + "$ref": "#/definitions/core" + } + ] + }, + "vector": { + "allOf": [ + { + "$ref": "#/definitions/core" + } + ] + }, + "eo": { + "allOf": [ + { + "$ref": "#/definitions/raster" + }, + { + "properties": { + "periodicity": { + "type": "string" + }, + "pyramid": { + "title": "Cadence", + "description": "Time interval of collection", + "type": "string" + }, + "nodata": { + "title": "nodata", + "type": "number" + }, + "asset_schema": { + "description": "", + "type": "array", + "items": { + "type": "object" + } + } + } + } + ] + }, + "provider": { + "properties": { + "provider:name": { + "title": "Organization Name", + "description": "Name of the provider" + }, + "provider:url": { + "title": "URL", + "description": "url to provider homepage", + "type": "string" + } + } + }, + "host": { + "required": [ + "id", + "scheme" + ], + "properties": { + "id": { + "title": "Identifirer", + "description": "url to bucket or storage location", + "type": "string" + }, + "scheme": { + "type": "string", + "enum": [ + "S3", + "GCS", + "URL", + "OTHER" + ] + }, + "description": { + "title": "Description", + "description": "Detailed multi-line descrtion to fully explain the entity. CommonMark 0.28 syntax may be used for rich text representation.", + "type": "string" + }, + "region": { + "title": "Region", + "type": "string", + "description": "Provider specific region" + }, + "requester_pays": { + "title": "Requester Pays", + "type": "boolean", + "description": "True if requester pays, false if host pays" + } + } + }, + "process": { + "required": [], + "properties": {} + } + } +} \ No newline at end of file From 2980c0730a3b172a170313bd71b7d8b77afe2a12 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 20 Aug 2018 09:42:25 +0200 Subject: [PATCH 02/14] Added human-readable specification for datasets. --- dataset-spec/README.md | 92 +++++++++++++++++++++++++++ dataset-spec/json-schema/dataset.json | 33 ++-------- 2 files changed, 96 insertions(+), 29 deletions(-) create mode 100644 dataset-spec/README.md diff --git a/dataset-spec/README.md b/dataset-spec/README.md new file mode 100644 index 000000000..540c3d767 --- /dev/null +++ b/dataset-spec/README.md @@ -0,0 +1,92 @@ +# Dataset Spec for STAC + +## Introduction + +One topic of interest has been the search of datasets*, instead of within a dataset, i.e. in (sub-)catalogs, items and assets. [STAC](https://github.com/radiantearth/stac-spec) is focused on search within a dataset, but it includes some simple constructs to catalog datasets. This could be an independent spec that STAC uses, and others can also independently use, to describe datasets in a lightweight way. + +*\* There is no standardized name for the concept we are describing here. Others called it: dataset series (ISO 19115), collection (CNES, NASA), dataset (JAXA), dataset series (ESA), product (JAXA).* + +## Core + +| Element | Type | Name | Description | +| ------------- | ------------------------------------- | ------------------------------- | ------------------------------------------------------------ | +| id | string | Dataset ID (required) | Identifier for the dataset that is unique across the provider. MUST follow the pattern ` ^[A-Za-z0-9_\-\/]+$ `. TODO: Allow slash? | +| title | string | Title | A short descriptive one-line title for the dataset. | +| description | string | Description (required) | Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| keywords | [string] | Keywords | List of keywords describing the dataset. | +| version | string | Dataset Version | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | +| license_name | string | Dataset License Name (required) | Dataset's license name based on [SPDX License Identifier](https://spdx.org/licenses/) or `proprietary` (see `license_url`). TODO: How to handle non SPDX? internal, proprietary, ...? | +| license_url | string | Dataset License URL | Dataset's license URL SHOULD be specified if `license_name` does not contain a SPDX License Identifier. | +| provider | [Provider Object] | Data Provider | The organization that creates the content of the dataset. | +| host | Host Object | Storage Provider | The organization that hosts the dataset. | +| geometry | [GeoJSON Object](http://geojson.org/) | Spatial extent (required) | The spatial extent covered by the dataset as [GeoJSON](http://geojson.org/) object. | +| datetime | string | Temporal extent (required) | Temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. Open date ranges are not supported by ISO 8601 and MUST be encoded as proposed by [Dublin Core Collection Description: Open Date Range Format](http://www.ukoln.ac.uk/metadata/dcmi/date-dccd-odrf/2005-08-13/). | +| process_graph | Process Graph Object | Processing chain | ... | +| dimensions | Dimension Object | Dimensions | ... | +| links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. TODO: Remove if catalog is revised. | + +### Provider Object + +| Element | Type | Name | Description | +| ------- | ------ | --------------------- | ----------- | +| name | string | Organization name | | +| url | string | Organization homepage | | + +### Host Object + +| Element | Type | Name | Description | +| -------------- | ------- | --------------------- | ------------------------------------------------------------ | +| description | string | Description | Detailed description to explain the hosting details. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| scheme | string | Scheme (required) | Values: S3, GCS, URL, OTHER | +| id | string | Identifier (required) | Host-specific identifier such as an URL or asset id. | +| region | string | Region | Provider specific region where the data is stored. | +| requester_pays | boolean | Requester pays | `true` if requester pays, `false` if host pays. | + +### Link Object + +TODO: Should be compatible with STAC or remove if catalog is revised. + +| Element | Type | Name | Description | +| ------- | ------ | ------------------------------ | ----------------------------------- | +| href | string | Hyperlink reference (required) | | +| rel | string | Relation (required) | | +| type | string | MIME-type | MIME-type of the referenced entity. | +| title | string | Title | Human-readable title for the link. | + +## Process graph extension (pg) - Items and Datasets + +| Element | Type | Name | Description | +| ----------- | ------ | ------------- | ------------------------------------------------------------ | +| description | string | Description | Detailed multi-line description to fully explain the processing step. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| chain | object | Process chain | TODO | + +## EO extension (eo) - Items and Datasets + +TODO + +We follow the STAC EO extension, but propose additional fields: + +| Element | Type | Name | Description | +| ------------ | -------- | ------------------ | --------------------- | +| unit | ? | | | +| asset_schema | object | | | +| nodata | [number] | Nodata values | The no data value(s). | +| pyramid | ? | Pyramid parameters | | +| periodicity | string | Periodicity | ISO8601 | + +#### Bands + +TODO + +We follow the STAC EO extension for bands, but propose additional fields: + +| Element | Type | Name | Description | +| --------- | -------- | ------------- | ------------------------------------------------------------ | +| nodata | [number] | Nodata values | The no data value(s). | +| data_type | string | Data Type | Data type for band values including its bit size. Values: uint8, uint16, uint32, uint64, int8, int16, int32, int64, float16, float32, float64 | +| offset | number | Offset | offset to convert band values to the actual measurement scale | +| scale | number | Scale | scale to convert band values to the actual measurement scale. | + +## Dimensions extension (dim) + +Data can have different dimensions, e.g. in meteorology. The properties of these dimensions can be defined with several of the properties from core, EO extension etc. (TODO) diff --git a/dataset-spec/json-schema/dataset.json b/dataset-spec/json-schema/dataset.json index ef2f1331f..677276354 100644 --- a/dataset-spec/json-schema/dataset.json +++ b/dataset-spec/json-schema/dataset.json @@ -83,17 +83,6 @@ } ] }, - "process_graph": { - "title": "Processing Graph", - "properties": { - "chain": { - "type": "array", - "items": { - "$ref": "#/definitions/process" - } - } - } - }, "version": { "title": "Version", "description": "Version of the dataset", @@ -109,7 +98,11 @@ "properties": { "type": { "enum": [ + "Point", + "LineString", "Polygon", + "MultiPoint", + "MultiLineString", "MultiPolygon" ] } @@ -119,24 +112,6 @@ } ] }, - "sci": { - "properties": { - "doi": { - "title": "DOI", - "description": "Digital Object Identifier", - "type": "string" - }, - "citation": { - "type": "string" - }, - "publication_doi": { - "type": "string" - }, - "publication_citation": { - "type": "string" - } - } - }, "raster": { "allOf": [ { From c4b6e94ca992c1200af42d1b1f9837b899506e29 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 21 Aug 2018 10:11:55 +0200 Subject: [PATCH 03/14] Renamed extents, improved docs and schema, moved extensions. --- dataset-spec/README.md | 51 +++++++++----------- dataset-spec/json-schema/dataset.json | 59 +++++------------------ extensions/stac-dimension-spec.md | 3 ++ extensions/stac-processgraph-extension.md | 6 +++ 4 files changed, 43 insertions(+), 76 deletions(-) create mode 100644 extensions/stac-dimension-spec.md create mode 100644 extensions/stac-processgraph-extension.md diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 540c3d767..64abdef52 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -8,29 +8,27 @@ One topic of interest has been the search of datasets*, instead of within a data ## Core -| Element | Type | Name | Description | -| ------------- | ------------------------------------- | ------------------------------- | ------------------------------------------------------------ | -| id | string | Dataset ID (required) | Identifier for the dataset that is unique across the provider. MUST follow the pattern ` ^[A-Za-z0-9_\-\/]+$ `. TODO: Allow slash? | -| title | string | Title | A short descriptive one-line title for the dataset. | -| description | string | Description (required) | Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | -| keywords | [string] | Keywords | List of keywords describing the dataset. | -| version | string | Dataset Version | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | -| license_name | string | Dataset License Name (required) | Dataset's license name based on [SPDX License Identifier](https://spdx.org/licenses/) or `proprietary` (see `license_url`). TODO: How to handle non SPDX? internal, proprietary, ...? | -| license_url | string | Dataset License URL | Dataset's license URL SHOULD be specified if `license_name` does not contain a SPDX License Identifier. | -| provider | [Provider Object] | Data Provider | The organization that creates the content of the dataset. | -| host | Host Object | Storage Provider | The organization that hosts the dataset. | -| geometry | [GeoJSON Object](http://geojson.org/) | Spatial extent (required) | The spatial extent covered by the dataset as [GeoJSON](http://geojson.org/) object. | -| datetime | string | Temporal extent (required) | Temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. Open date ranges are not supported by ISO 8601 and MUST be encoded as proposed by [Dublin Core Collection Description: Open Date Range Format](http://www.ukoln.ac.uk/metadata/dcmi/date-dccd-odrf/2005-08-13/). | -| process_graph | Process Graph Object | Processing chain | ... | -| dimensions | Dimension Object | Dimensions | ... | -| links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. TODO: Remove if catalog is revised. | +| Element | Type | Name | Description | +| --------------- | ------------------------------------- | ------------------------------- | ------------------------------------------------------------ | +| id | string | Dataset ID (required) | Identifier for the dataset that is unique across the provider. MUST follow the pattern ` ^[A-Za-z0-9_\-\/]+$ `. TODO: Allow slash? | +| title | string | Title | A short descriptive one-line title for the dataset. | +| description | string | Description (required) | Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| keywords | [string] | Keywords | List of keywords describing the dataset. | +| version | string | Dataset Version | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | +| license | string | Dataset License Name (required) | Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. See `license_url` for more information. | +| license_url | string | Dataset License URL | Dataset's license URL SHOULD be specified if `license` is set to `proprietary`. | +| provider | [Provider Object] | Data Provider | The organizations that created the content of the dataset. | +| host | Host Object | Storage Provider | The organization that hosts the dataset. | +| spatial_extent | [GeoJSON Object](http://geojson.org/) | Spatial extent (required) | The spatial extent covered by the dataset as [GeoJSON](http://geojson.org/) object. | +| temporal_extent | string | Temporal extent (required) | Temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. Open date ranges are not supported by ISO 8601 and MUST be encoded as proposed by [Dublin Core Collection Description: Open Date Range Format](http://www.ukoln.ac.uk/metadata/dcmi/date-dccd-odrf/2005-08-13/). | +| links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. TODO: Remove if catalog is revised. | ### Provider Object -| Element | Type | Name | Description | -| ------- | ------ | --------------------- | ----------- | -| name | string | Organization name | | -| url | string | Organization homepage | | +| Element | Type | Name | Description | +| ------- | ------ | --------------------- | ----------------------------------------------- | +| name | string | Organization name | The name of the organization or the individual. | +| url | string | Organization homepage | Homepage of the provider. | ### Host Object @@ -53,13 +51,6 @@ TODO: Should be compatible with STAC or remove if catalog is revised. | type | string | MIME-type | MIME-type of the referenced entity. | | title | string | Title | Human-readable title for the link. | -## Process graph extension (pg) - Items and Datasets - -| Element | Type | Name | Description | -| ----------- | ------ | ------------- | ------------------------------------------------------------ | -| description | string | Description | Detailed multi-line description to fully explain the processing step. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | -| chain | object | Process chain | TODO | - ## EO extension (eo) - Items and Datasets TODO @@ -87,6 +78,8 @@ We follow the STAC EO extension for bands, but propose additional fields: | offset | number | Offset | offset to convert band values to the actual measurement scale | | scale | number | Scale | scale to convert band values to the actual measurement scale. | -## Dimensions extension (dim) +Other related extensions developed for the dataset spec: -Data can have different dimensions, e.g. in meteorology. The properties of these dimensions can be defined with several of the properties from core, EO extension etc. (TODO) +* Dimension extension (TBD) +* Scientific extension (currently in review) +* Process graph extension (TBD) \ No newline at end of file diff --git a/dataset-spec/json-schema/dataset.json b/dataset-spec/json-schema/dataset.json index 677276354..6c3073069 100644 --- a/dataset-spec/json-schema/dataset.json +++ b/dataset-spec/json-schema/dataset.json @@ -29,84 +29,56 @@ "title", "description", "version", - "license_name" + "license" ], "properties": { "id": { "title": "Provider ID", - "description": "Provider item ID", "type": "string", "pattern": "^[A-Za-z0-9_\\-\/]+$" }, "title": { "title": "Title", - "description": "Title for the dataset", "type": "string" }, "description": { "title": "Description", - "description": "Detailed multi-line descrtion to fully explain the entity. CommonMark 0.28 syntax may be used for rich text representation.", "type": "string" }, "keywords": { "title": "Keywords", - "description": "List of keywords describing the dataset", "type": "array", "items": { "type": "string" } }, - "license_name": { + "license": { "title": "License Name", - "description": "License name based on SPDX License Identifier", "type": "string" }, "license_url": { "title": "License URL", - "description": "License url must be specified if license_name does not contain a SPDX License Identifier", "type": "string" }, "provider": { "type": "array", - "description": "The organization the creates the content of the dataset", "items": { "$ref": "#/definitions/provider" } }, "host": { - "allOf": [ - { - "description": "The organization the hosts the content of the dataset" - }, - { - "$ref": "#/definitions/provider" - } - ] + "$ref": "#/definitions/host" }, "version": { "title": "Version", - "description": "Version of the dataset", "type": "string" }, - "datetime": { - "title": "Date and Time", - "description": "The searchable date/time of the assets, in UTC (Formatted in RFC 3339) ", - "type": "string", - "format": "date-time" + "temporal_extent": { + "title": "Temporal extent", + "type": "string" }, - "geometry": { - "properties": { - "type": { - "enum": [ - "Point", - "LineString", - "Polygon", - "MultiPoint", - "MultiLineString", - "MultiPolygon" - ] - } - } + "spatial_extent": { + "type": "object" } } } @@ -138,7 +110,6 @@ }, "pyramid": { "title": "Cadence", - "description": "Time interval of collection", "type": "string" }, "nodata": { @@ -146,7 +117,6 @@ "type": "number" }, "asset_schema": { - "description": "", "type": "array", "items": { "type": "object" @@ -158,13 +128,12 @@ }, "provider": { "properties": { - "provider:name": { + "name": { "title": "Organization Name", - "description": "Name of the provider" + "type": "string" }, - "provider:url": { - "title": "URL", - "description": "url to provider homepage", + "url": { + "title": "Organization homepage", "type": "string" } } @@ -205,10 +174,6 @@ "description": "True if requester pays, false if host pays" } } - }, - "process": { - "required": [], - "properties": {} } } } \ No newline at end of file diff --git a/extensions/stac-dimension-spec.md b/extensions/stac-dimension-spec.md new file mode 100644 index 000000000..73c091a12 --- /dev/null +++ b/extensions/stac-dimension-spec.md @@ -0,0 +1,3 @@ +## Dimensions extension (dim) + +Data can have different dimensions, e.g. in meteorology. The properties of these dimensions can be defined with several of the properties from core, EO extension etc. (TODO) diff --git a/extensions/stac-processgraph-extension.md b/extensions/stac-processgraph-extension.md new file mode 100644 index 000000000..99e322b61 --- /dev/null +++ b/extensions/stac-processgraph-extension.md @@ -0,0 +1,6 @@ +## Process graph extension (pg) - Items and Datasets + +| Element | Type | Name | Description | +| ----------- | ------ | ------------- | ------------------------------------------------------------ | +| description | string | Description | Detailed multi-line description to fully explain the processing step. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| chain | object | Process chain | TODO \ No newline at end of file From 8dab2acc942fcd41e931c29b17b239ba69f16e46 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 21 Aug 2018 15:02:06 +0200 Subject: [PATCH 04/14] Improvements to EO extension, restructured extensions, minor improvements to dataset spec. --- dataset-spec/README.md | 35 ++++++++++--------- .../README.md} | 0 extensions/stac-processgraph-extension.md | 6 ---- 3 files changed, 18 insertions(+), 23 deletions(-) rename extensions/{stac-dimension-spec.md => dimension/README.md} (100%) delete mode 100644 extensions/stac-processgraph-extension.md diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 64abdef52..5cd2bd0fe 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -40,6 +40,8 @@ One topic of interest has been the search of datasets*, instead of within a data | region | string | Region | Provider specific region where the data is stored. | | requester_pays | boolean | Requester pays | `true` if requester pays, `false` if host pays. | +**Note:** The idea of storage profiles is currently [discussed](https://github.com/radiantearth/stac-spec/issues/148). Therefore, scheme, id and region may be removed from the final spec. + ### Link Object TODO: Should be compatible with STAC or remove if catalog is revised. @@ -53,33 +55,32 @@ TODO: Should be compatible with STAC or remove if catalog is revised. ## EO extension (eo) - Items and Datasets -TODO - -We follow the STAC EO extension, but propose additional fields: +We follow the STAC EO extension where meaningful (see below), but propose additional fields: | Element | Type | Name | Description | | ------------ | -------- | ------------------ | --------------------- | -| unit | ? | | | -| asset_schema | object | | | +| asset_schema | object | Asset Schema | TODO | | nodata | [number] | Nodata values | The no data value(s). | -| pyramid | ? | Pyramid parameters | | +| pyramid | object | Pyramid parameters | TODO | | periodicity | string | Periodicity | ISO8601 | -#### Bands +Some fields such as `eo:sun_elevation ` or `eo:sun_azimuth` are only meaningful on the item level and SHOULD not be used in datasets. -TODO +#### Bands We follow the STAC EO extension for bands, but propose additional fields: -| Element | Type | Name | Description | -| --------- | -------- | ------------- | ------------------------------------------------------------ | -| nodata | [number] | Nodata values | The no data value(s). | -| data_type | string | Data Type | Data type for band values including its bit size. Values: uint8, uint16, uint32, uint64, int8, int16, int32, int64, float16, float32, float64 | -| offset | number | Offset | offset to convert band values to the actual measurement scale | -| scale | number | Scale | scale to convert band values to the actual measurement scale. | +| Element | Type | Name | Description | +| ------- | -------- | ------------- | ------------------------------------------------------------ | +| nodata | [number] | Nodata values | The no data value(s). | +| offset | number | Offset | Offset to convert band values to the actual measurement scale. | +| scale | number | Scale | Scale to convert band values to the actual measurement scale. | +| unit | string | Unit | The unit of measurement, preferably SI. TODO: Check what units are allowed, e.g. link to [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) or [the dictionary of UoM](https://www.unc.edu/~rowlett/units/). | + +## Other extensions Other related extensions developed for the dataset spec: -* Dimension extension (TBD) -* Scientific extension (currently in review) -* Process graph extension (TBD) \ No newline at end of file +* [Dimension extension](../extensions/dimension) (WIP) +* [Scientific extension](../extensions/scientific) (currently in review, see [PR #186](https://github.com/radiantearth/stac-spec/pull/186)) +* Process graph extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) \ No newline at end of file diff --git a/extensions/stac-dimension-spec.md b/extensions/dimension/README.md similarity index 100% rename from extensions/stac-dimension-spec.md rename to extensions/dimension/README.md diff --git a/extensions/stac-processgraph-extension.md b/extensions/stac-processgraph-extension.md deleted file mode 100644 index 99e322b61..000000000 --- a/extensions/stac-processgraph-extension.md +++ /dev/null @@ -1,6 +0,0 @@ -## Process graph extension (pg) - Items and Datasets - -| Element | Type | Name | Description | -| ----------- | ------ | ------------- | ------------------------------------------------------------ | -| description | string | Description | Detailed multi-line description to fully explain the processing step. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | -| chain | object | Process chain | TODO \ No newline at end of file From 9e9414ba9d07c2bef3cdc772a735fae06d43aabc Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 22 Aug 2018 09:51:45 +0200 Subject: [PATCH 05/14] Added first draft of the dimensions extension. --- extensions/dimension/README.md | 20 +++++++++++++++-- extensions/dimension/example.json | 23 ++++++++++++++++++++ extensions/dimension/schema.json | 36 +++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 extensions/dimension/example.json create mode 100644 extensions/dimension/schema.json diff --git a/extensions/dimension/README.md b/extensions/dimension/README.md index 73c091a12..a4c841b6b 100644 --- a/extensions/dimension/README.md +++ b/extensions/dimension/README.md @@ -1,3 +1,19 @@ -## Dimensions extension (dim) +# STAC Dimensions Extension Spec -Data can have different dimensions, e.g. in meteorology. The properties of these dimensions can be defined with several of the properties from core, EO extension etc. (TODO) +This document explains the fields of the STAC Dimensions Extension (dim) to a STAC `Dataset`. Data can have different dimensions (= axes), e.g. in meteorology. The properties of these dimensions can be defined with this extension. + +## Dimensions Extension Description + +This is the field that extends the `Dataset` object: + +| Element | Type | Name | Description | +| ---------------- | -------------------- | ------------------------- | ------------------------------------------------------------ | +| dim:dimensions | [Dimension Object] | Dimensions | Dimensions of the data. If the dimensions have an order, the order SHOULD be reflected in the order of the array. | + +### Dimension Object + +| Element | Type | Name | Description | +| ------- | ---------------- | ------------------- | ------------------------------------------------------------ | +| label | string | Label (required) | Human-readable label for the dimension. | +| unit | string | Unit of Measurement | Unit of measurement, preferably SI. ToDo: Any standard to express this, e.g. [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) or this [dict](https://www.unc.edu/~rowlett/units/)? | +| extent | [number\|string] | Data Extent | Specifies the extent of the data, i.e. the lower bound as the first element and the upper bound as the second element of the array. | diff --git a/extensions/dimension/example.json b/extensions/dimension/example.json new file mode 100644 index 000000000..33ffd44b3 --- /dev/null +++ b/extensions/dimension/example.json @@ -0,0 +1,23 @@ +{ + "dim:dimensions": [ + { + "label": "Longitude", + "unit": "°", + "extent": [-180, 180] + }, + { + "label": "Latitude", + "unit": "°", + "extent": [-90, 90] + }, + { + "label": "Temperature", + "unit": "°C", + "extent": [-20, 60] + }, + { + "label": "Date", + "extent": ["2018-01-01T00:00:00Z", "2018-01-31T23:59:59Z"] + } + ] +} \ No newline at end of file diff --git a/extensions/dimension/schema.json b/extensions/dimension/schema.json new file mode 100644 index 000000000..03e7dec37 --- /dev/null +++ b/extensions/dimension/schema.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "STAC Dimensions Extension Spec", + "properties": { + "dim:dimensions": { + "type": "array", + "title": "Dimensions", + "items": { + "type": "object", + "required": [ + "label" + ], + "properties": { + "label": { + "type": "string", + "title": "Label" + }, + "unit": { + "type": "string", + "title": "Unit of Measurement" + }, + "extent": { + "type": "array", + "title": "Data Extent", + "minItems": 2, + "maxItems": 2, + "items": { + "type": ["number", "string"] + } + } + } + } + } + } +} \ No newline at end of file From d32c1e2ebc2c37d2baab1337c0990f8703881e9c Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 22 Aug 2018 16:34:18 +0200 Subject: [PATCH 06/14] Moved EO extension and improved Dataset spec and adapted and fixed schema. Fixed tables. --- dataset-spec/README.md | 54 ++----- dataset-spec/json-schema/dataset.json | 217 +++++++++++--------------- extensions/stac-collection-spec.md | 10 +- extensions/stac-eo-spec.md | 72 +++++---- 4 files changed, 147 insertions(+), 206 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 5cd2bd0fe..1c470b5a1 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -20,8 +20,8 @@ One topic of interest has been the search of datasets*, instead of within a data | provider | [Provider Object] | Data Provider | The organizations that created the content of the dataset. | | host | Host Object | Storage Provider | The organization that hosts the dataset. | | spatial_extent | [GeoJSON Object](http://geojson.org/) | Spatial extent (required) | The spatial extent covered by the dataset as [GeoJSON](http://geojson.org/) object. | -| temporal_extent | string | Temporal extent (required) | Temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. Open date ranges are not supported by ISO 8601 and MUST be encoded as proposed by [Dublin Core Collection Description: Open Date Range Format](http://www.ukoln.ac.uk/metadata/dcmi/date-dccd-odrf/2005-08-13/). | -| links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. TODO: Remove if catalog is revised. | +| temporal_extent | string | Temporal extent (required) | Temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. ToDo: Support open date ranges | +| links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. TODO: Remove if catalog is revised and links are specified on the catalog level. | ### Provider Object @@ -38,49 +38,25 @@ One topic of interest has been the search of datasets*, instead of within a data | scheme | string | Scheme (required) | Values: S3, GCS, URL, OTHER | | id | string | Identifier (required) | Host-specific identifier such as an URL or asset id. | | region | string | Region | Provider specific region where the data is stored. | -| requester_pays | boolean | Requester pays | `true` if requester pays, `false` if host pays. | +| requester_pays | boolean | Requester pays | `true` if requester pays, `false` if host pays. Defaults to `false`. | **Note:** The idea of storage profiles is currently [discussed](https://github.com/radiantearth/stac-spec/issues/148). Therefore, scheme, id and region may be removed from the final spec. ### Link Object -TODO: Should be compatible with STAC or remove if catalog is revised. +| Element | Type | Name | Description | +| ------- | ------ | ------------------- | ------------------------------------------------------------ | +| href | string | Link (required) | The actual link in the format of an URL. Relative and absolute links are both allowed. | +| rel | string | Relation (required) | Relationship between the current document and the linked document. | +| type | string | MIME-type | MIME-type of the referenced entity. | +| title | string | Title | Human-readable title for the link. | -| Element | Type | Name | Description | -| ------- | ------ | ------------------------------ | ----------------------------------- | -| href | string | Hyperlink reference (required) | | -| rel | string | Relation (required) | | -| type | string | MIME-type | MIME-type of the referenced entity. | -| title | string | Title | Human-readable title for the link. | +## Extensions -## EO extension (eo) - Items and Datasets +Related extensions to be used with the dataset spec: -We follow the STAC EO extension where meaningful (see below), but propose additional fields: - -| Element | Type | Name | Description | -| ------------ | -------- | ------------------ | --------------------- | -| asset_schema | object | Asset Schema | TODO | -| nodata | [number] | Nodata values | The no data value(s). | -| pyramid | object | Pyramid parameters | TODO | -| periodicity | string | Periodicity | ISO8601 | - -Some fields such as `eo:sun_elevation ` or `eo:sun_azimuth` are only meaningful on the item level and SHOULD not be used in datasets. - -#### Bands - -We follow the STAC EO extension for bands, but propose additional fields: - -| Element | Type | Name | Description | -| ------- | -------- | ------------- | ------------------------------------------------------------ | -| nodata | [number] | Nodata values | The no data value(s). | -| offset | number | Offset | Offset to convert band values to the actual measurement scale. | -| scale | number | Scale | Scale to convert band values to the actual measurement scale. | -| unit | string | Unit | The unit of measurement, preferably SI. TODO: Check what units are allowed, e.g. link to [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) or [the dictionary of UoM](https://www.unc.edu/~rowlett/units/). | - -## Other extensions - -Other related extensions developed for the dataset spec: - -* [Dimension extension](../extensions/dimension) (WIP) +* [EO extension](../extensions/stac-eo-spec.md) + Please note that some fields such as `eo:sun_elevation ` or `eo:sun_azimuth` are only meaningful on the item level and MUST not be used in datasets. +* [Dimensions extension](../extensions/dimension) (currently in review, see [PR #164](https://github.com/radiantearth/stac-spec/pull/164)) * [Scientific extension](../extensions/scientific) (currently in review, see [PR #186](https://github.com/radiantearth/stac-spec/pull/186)) -* Process graph extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) \ No newline at end of file +* Provenance extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) \ No newline at end of file diff --git a/dataset-spec/json-schema/dataset.json b/dataset-spec/json-schema/dataset.json index 6c3073069..1e4b59332 100644 --- a/dataset-spec/json-schema/dataset.json +++ b/dataset-spec/json-schema/dataset.json @@ -2,139 +2,59 @@ "$schema": "http://json-schema.org/draft-06/schema#", "id": "dataset.json#", "title": "Dataset Item", - "type": "object", "description": "This object represents the dataset in a SpatioTemporal Asset Catalog.", - "additionalProperties": true, - "anyOf": [ - { - "$ref": "#/definitions/core" - }, - { - "$ref": "#/definitions/eo" + "type": "object", + "required": [ + "id", + "description", + "license", + "spatial_extent", + "temporal_extent", + "links" + ], + "properties": { + "id": { + "title": "Provider ID", + "type": "string", + "pattern": "^[A-Za-z0-9_\\-\/]+$" }, - { - "$ref": "#/definitions/raster" + "title": { + "title": "Title", + "type": "string" }, - { - "$ref": "#/definitions/vector" - } - ], - "definitions": { - "core": { - "allOf": [ - { - "type": "object", - "required": [ - "id", - "title", - "description", - "version", - "license" - ], - "properties": { - "id": { - "title": "Provider ID", - "type": "string", - "pattern": "^[A-Za-z0-9_\\-\/]+$" - }, - "title": { - "title": "Title", - "type": "string" - }, - "description": { - "title": "Description", - "type": "string" - }, - "keywords": { - "title": "Keywords", - "type": "array", - "items": { - "type": "string" - } - }, - "license": { - "title": "License Name", - "type": "string" - }, - "license_url": { - "title": "License URL", - "type": "string" - }, - "provider": { - "type": "array", - "items": { - "$ref": "#/definitions/provider" - } - }, - "host": { - "$ref": "#/definitions/host" - }, - "version": { - "title": "Version", - "type": "string" - }, - "temporal_extent": { - "title": "Temporal extent", - "type": "string" - }, - "spatial_extent": { - "type": "object" - } - } - } - ] + "description": { + "title": "Description", + "type": "string" }, - "raster": { - "allOf": [ - { - "$ref": "#/definitions/core" - } - ] + "keywords": { + "title": "Keywords", + "type": "array", + "items": { + "type": "string" + } }, - "vector": { - "allOf": [ - { - "$ref": "#/definitions/core" - } - ] + "license": { + "title": "License Name", + "type": "string" }, - "eo": { - "allOf": [ - { - "$ref": "#/definitions/raster" - }, - { - "properties": { - "periodicity": { - "type": "string" - }, - "pyramid": { - "title": "Cadence", - "type": "string" - }, - "nodata": { - "title": "nodata", - "type": "number" - }, - "asset_schema": { - "type": "array", - "items": { - "type": "object" - } - } - } - } - ] + "license_url": { + "title": "License URL", + "type": "string", + "format": "url" }, "provider": { - "properties": { - "name": { - "title": "Organization Name", - "type": "string" - }, - "url": { - "title": "Organization homepage", - "type": "string" + "type": "array", + "items": { + "properties": { + "name": { + "title": "Organization Name", + "type": "string" + }, + "url": { + "title": "Organization homepage", + "type": "string", + "format": "url" + } } } }, @@ -146,10 +66,10 @@ "properties": { "id": { "title": "Identifirer", - "description": "url to bucket or storage location", "type": "string" }, "scheme": { + "title": "Scheme", "type": "string", "enum": [ "S3", @@ -160,18 +80,55 @@ }, "description": { "title": "Description", - "description": "Detailed multi-line descrtion to fully explain the entity. CommonMark 0.28 syntax may be used for rich text representation.", "type": "string" }, "region": { "title": "Region", - "type": "string", - "description": "Provider specific region" + "type": "string" }, "requester_pays": { "title": "Requester Pays", "type": "boolean", - "description": "True if requester pays, false if host pays" + "default": false + } + } + }, + "version": { + "title": "Version", + "type": "string" + }, + "temporal_extent": { + "title": "Temporal extent", + "type": "string" + }, + "spatial_extent": { + "type": "object" + }, + "links": { + "type": "array", + "items": { + "type": "object", + "required": [ + "href", + "rel" + ], + "properties": { + "href": { + "title": "Link", + "type": "string" + }, + "rel": { + "title": "Relation", + "type": "string" + }, + "type": { + "title": "type", + "type": "string" + }, + "title": { + "title": "Title", + "type": "string" + } } } } diff --git a/extensions/stac-collection-spec.md b/extensions/stac-collection-spec.md index a3b7ad457..068cc987f 100644 --- a/extensions/stac-collection-spec.md +++ b/extensions/stac-collection-spec.md @@ -4,11 +4,11 @@ A group of STAC `Item` objects from a single source can share a lot of common me ## Collection Extension Description -| element | type info | name | description | -|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------| -| c:id | string | Collection ID | Machine readable ID for the collection -| c:name | string (optional) | Collection Name | A name given to the Collection, used for display -| c:description | string (optional) | Collection Description | A human readable description of the collection +| element | type info | name | description | +| ------------- | ----------------- | ---------------------- | ------------------------------------------------ | +| c:id | string | Collection ID | Machine readable ID for the collection | +| c:name | string (optional) | Collection Name | A name given to the Collection, used for display | +| c:description | string (optional) | Collection Description | A human readable description of the collection | A `Collection` does not have many specific fields, as it may contain any fields that are in the core spec as well as any other extension. This provides maximum flexibility to data providers, as some the set of common metadata fields can vary between different types of data. For instance, Landsat and Sentinel data always has a eo:off_nadir value of 0, because those satellites are always pointed downward (i.e., nadir), while satellite that can be pointed will have varying eo:off_nadir values. diff --git a/extensions/stac-eo-spec.md b/extensions/stac-eo-spec.md index 00575b7ca..e381215d8 100644 --- a/extensions/stac-eo-spec.md +++ b/extensions/stac-eo-spec.md @@ -10,19 +10,23 @@ A lot of EO data will have common metadata across many `Items`. It is not necess These are fields that extend the `Item` object ## `Item` additions -| element | type info | name | description | -|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------| -| eo:gsd* | float | Ground Sample distance | The nominal distance between pixel centers available, in meters | -| eo:platform* | string | Unique name of platform | Specific name of the platform (e.g., landsat-8, sentinel-2A, larrysdrone) | -| eo:constellation* | string | constellation the platform belongs to | Name of the group or constellation the platform belongs to | -| eo:instrument* | string | Instrument used | Name of instrument or sensor (e.g., MODIS, ASTER, OLI, Canon F-1) | -| eo:bands* | dictionary | Band Info | Band specific metadata (see below) -| eo:epsg | unsigned int | EPSG code | EPSG code of the datasource, null if no EPSG code | -| eo:cloud_cover | integer (optional) | Cloud Cover Pct | Percent of cloud cover (0-100) | -| eo:off_nadir | float (optional) | Off nadir | Viewing angle. 0-90 degrees, measured from nadir -| eo:azimuth | float (optional) | Azimuth | Viewing azimuth angle. 0-360 degrees, measured clockwise from north -| eo:sun_azimuth | float (optional) | Sun Azimuth | Sun azimuth angle. 0-360 degrees, measured clockwise from north -| eo:sun_elevation | float (optional) | Sun Elevation | Sun elevation angle. 0-90 degrees measured from horizon +| element | type info | name | description | +| ----------------- | ------------------ | ------------------------------------- | ------------------------------------------------------------ | +| eo:gsd* | float | Ground Sample distance | The nominal distance between pixel centers available, in meters | +| eo:platform* | string | Unique name of platform | Specific name of the platform (e.g., landsat-8, sentinel-2A, larrysdrone) | +| eo:constellation* | string | constellation the platform belongs to | Name of the group or constellation the platform belongs to | +| eo:instrument* | string | Instrument used | Name of instrument or sensor (e.g., MODIS, ASTER, OLI, Canon F-1) | +| eo:bands* | dictionary | Band Info | Band specific metadata (see below) | +| eo:epsg | unsigned int | EPSG code | EPSG code of the datasource, null if no EPSG code | +| eo:cloud_cover | integer (optional) | Cloud Cover Pct | Percent of cloud cover (0-100) | +| eo:off_nadir | float (optional) | Off nadir | Viewing angle. 0-90 degrees, measured from nadir | +| eo:azimuth | float (optional) | Azimuth | Viewing azimuth angle. 0-360 degrees, measured clockwise from north | +| eo:sun_azimuth | float (optional) | Sun Azimuth | Sun azimuth angle. 0-360 degrees, measured clockwise from north | +| eo:sun_elevation | float (optional) | Sun Elevation | Sun elevation angle. 0-90 degrees measured from horizon | +| eo:asset_schema | object | Asset Schema | TODO | +| eo:nodata | [number] | Nodata values | The no data value(s). | +| eo:pyramid | object | Pyramid parameters | TODO | +| eo:periodicity | string | Periodicity | ISO8601 | ## `Item` Field Descriptions @@ -51,13 +55,17 @@ These are fields that extend the `Item` object ## `Item:eo:bands` The bands field of a `Item` is a dictionary where the index identifies a specific band. This is often a band number (e.g., 1, B1, B01), but could be any unique identifier. -| element | type info | name | description | -|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------| -| common_name | string (optional) | Common name | The name commonly used to refer to this specific band (see below) -| gsd | float (optional) | Ground sample distance | The average distance between pixel centers as measured in meters on the ground. Defaults to eo:gsd if not provided -| accuracy | float (optional) | Geolocation Accuracy | The expected accuracy of the scene registration, in meters -| center_wavelength | float (optional) | Center wavelength | The center wavelength of the band, in microns -| full_width_half_max | float (optional) | Full width at half maximum | The width of the band, as measured at half the maximum transmission, in microns +| element | type info | name | description | +| ------------------- | ----------------- | -------------------------- | ------------------------------------------------------------ | +| common_name | string (optional) | Common name | The name commonly used to refer to this specific band (see below) | +| gsd | float (optional) | Ground sample distance | The average distance between pixel centers as measured in meters on the ground. Defaults to eo:gsd if not provided | +| accuracy | float (optional) | Geolocation Accuracy | The expected accuracy of the scene registration, in meters | +| center_wavelength | float (optional) | Center wavelength | The center wavelength of the band, in microns | +| full_width_half_max | float (optional) | Full width at half maximum | The width of the band, as measured at half the maximum transmission, in microns | +| nodata | [number] | Nodata values | The no data value(s). | +| offset | number | Offset | Offset to convert band values to the actual measurement scale. | +| scale | number | Scale | Scale to convert band values to the actual measurement scale. | +| unit | string | Unit | The unit of measurement, preferably SI. TODO: Check what units are allowed, e.g. link to [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) or [the dictionary of UoM](https://www.unc.edu/~rowlett/units/). | ## `Item:eo:bands` Field Descriptions @@ -75,15 +83,15 @@ The bands field of a `Item` is a dictionary where the index identifies a specifi The band's common_name is the name that is commonly used to refer to that band's spectral properties. The table below shows the common name based on the average band range for the band numbers of several popular instruments. | Common Name | Band Range (μm) | Landsat 5 | Landsat 7 | Landsat 8 | Sentinel 2 | MODIS | -|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------|------------------------------------|------------------------------------|------------------------------------| -| coastal | 0.40 - 0.45 | | | 1 | 1 | -|blue | 0.45 - 0.5 | 1 | 1 | 2 | 2 | 3 -|green | 0.5 - 0.6 | 2 | 2 | 3 | 3 | 4 -|red | 0.6 - 0.7 | 3 | 3 | 4 | 4 | 1 -|pan | 0.5 - 0.7 | | 8 | 8 | | -|nir | 0.77 - 1.00 | 4 | 4 | 5 | 8 | 2 -|cirrus | 1.35 - 1.40 | | | 9 | 10 | 26 -|swir16 | 1.55 - 1.75 | 5 | 5 | 6 | 11 | 6 -|swir22 |2.1 - 2.3 | 7 | 7 | 7 | 12 | 7 -|lwir11 | 10.5 - 11.5 | | | 10 | | 31 -|lwir12 | 11.5 - 12.5 | | | 11 | | 32 +|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------|------------------------------------|------------------------------------|------------------------------------| +| coastal | 0.40 - 0.45 | | | 1 | 1 | +|blue | 0.45 - 0.5 | 1 | 1 | 2 | 2 | 3 | +|green | 0.5 - 0.6 | 2 | 2 | 3 | 3 | 4 | +|red | 0.6 - 0.7 | 3 | 3 | 4 | 4 | 1 | +|pan | 0.5 - 0.7 | | 8 | 8 | | | +|nir | 0.77 - 1.00 | 4 | 4 | 5 | 8 | 2 | +|cirrus | 1.35 - 1.40 | | | 9 | 10 | 26 | +|swir16 | 1.55 - 1.75 | 5 | 5 | 6 | 11 | 6 | +|swir22 |2.1 - 2.3 | 7 | 7 | 7 | 12 | 7 | +|lwir11 | 10.5 - 11.5 | | | 10 | | 31 | +|lwir12 | 11.5 - 12.5 | | | 11 | | 32 | From 6224a833568f45bddb614a6aa647f147d4d06ec2 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 24 Aug 2018 19:24:02 +0200 Subject: [PATCH 07/14] Changes to EO extension, made dataset spec more compliant with WFS3. --- dataset-spec/README.md | 34 +++++++++++-------- extensions/stac-eo-spec.md | 68 +++++++++++++++++++------------------- 2 files changed, 54 insertions(+), 48 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 1c470b5a1..a32e1896c 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -8,20 +8,26 @@ One topic of interest has been the search of datasets*, instead of within a data ## Core -| Element | Type | Name | Description | -| --------------- | ------------------------------------- | ------------------------------- | ------------------------------------------------------------ | -| id | string | Dataset ID (required) | Identifier for the dataset that is unique across the provider. MUST follow the pattern ` ^[A-Za-z0-9_\-\/]+$ `. TODO: Allow slash? | -| title | string | Title | A short descriptive one-line title for the dataset. | -| description | string | Description (required) | Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | -| keywords | [string] | Keywords | List of keywords describing the dataset. | -| version | string | Dataset Version | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | -| license | string | Dataset License Name (required) | Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. See `license_url` for more information. | -| license_url | string | Dataset License URL | Dataset's license URL SHOULD be specified if `license` is set to `proprietary`. | -| provider | [Provider Object] | Data Provider | The organizations that created the content of the dataset. | -| host | Host Object | Storage Provider | The organization that hosts the dataset. | -| spatial_extent | [GeoJSON Object](http://geojson.org/) | Spatial extent (required) | The spatial extent covered by the dataset as [GeoJSON](http://geojson.org/) object. | -| temporal_extent | string | Temporal extent (required) | Temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. ToDo: Support open date ranges | -| links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. TODO: Remove if catalog is revised and links are specified on the catalog level. | +| Element | Type | Name | Description | +| ----------- | ----------------- | ------------------------------- | ------------------------------------------------------------ | +| name | string | Identifier (required) | Identifier for the dataset that is unique across the provider. The identi | +| title | string | Title | A short descriptive one-line title for the dataset. | +| description | string | Description (required) | Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| keywords | [string] | Keywords | List of keywords describing the dataset. | +| version | string | Dataset Version | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | +| license | string | Dataset License Name (required) | Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. See `license_url` for more information. | +| license_url | string | Dataset License URL | Dataset's license URL SHOULD be specified if `license` is set to `proprietary`. | +| provider | [Provider Object] | Data Provider | The organizations that created the content of the dataset. | +| host | Host Object | Storage Provider | The organization that hosts the dataset. | +| extent | [Extent Object] | Extents (required) | Spatial and temporal extents. | +| links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. | + +### Extent Object + +| Element | Type | Name | Description | +| -------- | -------- | -------------------------- | ------------------------------------------------------------ | +| spatial | [number] | Spatial extent (required) | Potential spatial extent covered by the dataset. West, north, east, south edges of the spatial extent. Only WGS84 longitude/latitude is supported. ToDo: The list of four numbers can be extended to six numbers to support a 3D spatial extent. | +| temporal | string | Temporal extent (required) | Potential temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. ToDo: Support open date ranges | ### Provider Object diff --git a/extensions/stac-eo-spec.md b/extensions/stac-eo-spec.md index e6e0cd9b9..6b522b5f9 100644 --- a/extensions/stac-eo-spec.md +++ b/extensions/stac-eo-spec.md @@ -7,28 +7,28 @@ This document explains the fields of the STAC Earth Observation (EO) Extension t A lot of EO data will have common metadata across many `Items`. It is not necessary, but recommended to use the [Collections extension](stac-collection-spec.md). While the exact metadata that would appear in a `Collection` record will vary depending on the dataset, the most common collection-level metadata fields are indicated with an * in the tables below. ## EO Extension Description -These are fields that extend the `Item` object -## `Item` additions - -| element | type info | name | description | -| ----------------- | ------------------ | ------------------------------------- | ------------------------------------------------------------ | -| eo:gsd* | float | Ground Sample distance | The nominal distance between pixel centers available, in meters | -| eo:platform* | string | Unique name of platform | Specific name of the platform (e.g., landsat-8, sentinel-2A, larrysdrone) | -| eo:constellation* | string | constellation the platform belongs to | Name of the group or constellation the platform belongs to | -| eo:instrument* | string | Instrument used | Name of instrument or sensor (e.g., MODIS, ASTER, OLI, Canon F-1) | -| eo:bands* | dictionary | Band Info | Band specific metadata (see below) | -| eo:epsg | unsigned int | EPSG code | EPSG code of the datasource, null if no EPSG code | -| eo:cloud_cover | integer (optional) | Cloud Cover Pct | Percent of cloud cover (0-100) | -| eo:off_nadir | float (optional) | Off nadir | Viewing angle. 0-90 degrees, measured from nadir | -| eo:azimuth | float (optional) | Azimuth | Viewing azimuth angle. 0-360 degrees, measured clockwise from north | -| eo:sun_azimuth | float (optional) | Sun Azimuth | Sun azimuth angle. 0-360 degrees, measured clockwise from north | -| eo:sun_elevation | float (optional) | Sun Elevation | Sun elevation angle. 0-90 degrees measured from horizon | -| eo:asset_schema | object | Asset Schema | TODO | -| eo:nodata | [number] | Nodata values | The no data value(s). | -| eo:pyramid | object | Pyramid parameters | TODO | -| eo:periodicity | string | Periodicity | ISO8601 | - -## `Item` Field Descriptions +These are fields that extend the `Item` and `Dataset` object +## `Item` and `Dataset` additions + +| element | type info | name | description | scopes | +| ---------------- | --------------------- | ------------------------------------------------ | ------------------------------------------------------------ | -------------- | +| eo:gsd | number | Ground Sample distance (required) | The nominal distance between pixel centers available, in meters | Item | +| eo:platform | string | Unique name of platform (required) | Specific name of the platform (e.g., landsat-8, sentinel-2A, larrysdrone) | Item + Dataset | +| eo:constellation | string | constellation the platform belongs to (required) | Name of the group or constellation the platform belongs to | Item + Dataset | +| eo:instrument | string | Instrument used (required) | Name of instrument or sensor (e.g., MODIS, ASTER, OLI, Canon F-1) | Item + Dataset | +| eo:bands | {Band Object} | Band Info (required) | Band specific metadata (see below) | Item + Dataset | +| eo:epsg | number | EPSG code | EPSG code of the datasource, null if no EPSG code | Item + Dataset | +| eo:cloud_cover | number | Cloud Cover | Percent of cloud cover (0-100) | Item | +| eo:off_nadir | number | Off nadir | Viewing angle. 0-90 degrees, measured from nadir | Item + Dataset | +| eo:azimuth | number | Azimuth | Viewing azimuth angle. 0-360 degrees, measured clockwise from north | Item + Dataset | +| eo:sun_azimuth | number | Sun Azimuth | Sun azimuth angle. 0-360 degrees, measured clockwise from north | Item | +| eo:sun_elevation | number | Sun Elevation | Sun elevation angle. 0-90 degrees measured from horizon | Item | +| eo:asset_schema | {Asset Schema Object} | Asset Schema | TODO | Dataset | +| eo:nodata | [number] | Nodata values | The no data value(s). | Dataset | +| eo:pyramid | {Pyramid Object} | Pyramid parameters | TODO | Dataset | +| eo:periodicity | string | Periodicity | ISO8601 | Dataset | + +## `Item` and `Dataset` Field Descriptions **eo:gsd** is the nominal Ground Sample Distance for the data, as measured in meters on the ground. Since GSD can vary across a scene depending on projection, this should be the average or most commonly used GSD in the center of the image. If the data includes multiple bands with different GSD values, this should be the value for the greatest number or most common bands. For instance, Landsat optical and short-wave IR bands are all 30 meters, but the panchromatic band is 15 meters. The eo:gsd should be 30 meters in this case since those are the bands most commonly used. @@ -52,20 +52,20 @@ These are fields that extend the `Item` object **eo:sun_elevation**: This is the angle from the tangent of ths scene center point to the sun. Measured in degrees (0-90). -## `Item:eo:bands` +## `eo:bands` The bands field of a `Item` is a dictionary where the index identifies a specific band. This is often a band number (e.g., 1, B1, B01), but could be any unique identifier. -| element | type info | name | description | -| ------------------- | ----------------- | -------------------------- | ------------------------------------------------------------ | -| common_name | string (optional) | Common name | The name commonly used to refer to this specific band (see below) | -| gsd | float (optional) | Ground sample distance | The average distance between pixel centers as measured in meters on the ground. Defaults to eo:gsd if not provided | -| accuracy | float (optional) | Geolocation Accuracy | The expected accuracy of the scene registration, in meters | -| center_wavelength | float (optional) | Center wavelength | The center wavelength of the band, in microns | -| full_width_half_max | float (optional) | Full width at half maximum | The width of the band, as measured at half the maximum transmission, in microns | -| nodata | [number] | Nodata values | The no data value(s). | -| offset | number | Offset | Offset to convert band values to the actual measurement scale. | -| scale | number | Scale | Scale to convert band values to the actual measurement scale. | -| unit | string | Unit | The unit of measurement, preferably SI. TODO: Check what units are allowed, e.g. link to [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) or [the dictionary of UoM](https://www.unc.edu/~rowlett/units/). | +| element | type info | name | description | +| ------------------- | --------- | -------------------------- | ------------------------------------------------------------ | +| common_name | string | Common name | The name commonly used to refer to this specific band (see below) | +| gsd | number | Ground sample distance | The average distance between pixel centers as measured in meters on the ground. Defaults to eo:gsd if not provided | +| accuracy | number | Geolocation Accuracy | The expected accuracy of the scene registration, in meters | +| center_wavelength | number | Center wavelength | The center wavelength of the band, in microns | +| full_width_half_max | number | Full width at half maximum | The width of the band, as measured at half the maximum transmission, in microns | +| nodata | [number] | Nodata values | The no data value(s). | +| offset | number | Offset | Offset to convert band values to the actual measurement scale. | +| scale | number | Scale | Scale to convert band values to the actual measurement scale. | +| unit | string | Unit | The unit of measurement, preferably SI. TODO: Check what units are allowed, e.g. link to [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) or [the dictionary of UoM](https://www.unc.edu/~rowlett/units/). | ## `Item:eo:bands` Field Descriptions From 28d25fc22e91f880b21b7aaa7da4c10a92fde757 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 27 Aug 2018 13:09:31 +0200 Subject: [PATCH 08/14] Improved the dataset spec (descriptions, adopted to current discussions, added rel types, ...), updated JSON schema and added an example. --- dataset-spec/README.md | 87 ++++++++++++++++++--------- dataset-spec/example-s2.json | 47 +++++++++++++++ dataset-spec/json-schema/dataset.json | 82 ++++++++++++++----------- 3 files changed, 151 insertions(+), 65 deletions(-) create mode 100644 dataset-spec/example-s2.json diff --git a/dataset-spec/README.md b/dataset-spec/README.md index a32e1896c..6f3d7d8bf 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -1,68 +1,95 @@ # Dataset Spec for STAC -## Introduction +[STAC Items](https://github.com/radiantearth/stac-spec/json-spec/) are focused on search within a dataset*. Another topic of interest is the search of datasets, instead of within a dataset. The Dataset Spec is an independent spec that STAC Items are *strongly recommended* to use. Other parties can also independently use this spec to describe datasets in a lightweight way. -One topic of interest has been the search of datasets*, instead of within a dataset, i.e. in (sub-)catalogs, items and assets. [STAC](https://github.com/radiantearth/stac-spec) is focused on search within a dataset, but it includes some simple constructs to catalog datasets. This could be an independent spec that STAC uses, and others can also independently use, to describe datasets in a lightweight way. +The Datasets Spec is a superset of the [Catalog Spec](../static-catalog/). I shares the same fields and therefore every Dataset is also a valid Catalog. Datasets can have both parent Catalogs and Datasets and child Items, Catalogs and Datasets. + +A Dataset can be represented in JSON format. Every field described here is a property in a JSON object. + +* [Example (Sentinel 2)](example-s2.json) +* [JSON Schema](json-schema/dataset.json) *\* There is no standardized name for the concept we are describing here. Others called it: dataset series (ISO 19115), collection (CNES, NASA), dataset (JAXA), dataset series (ESA), product (JAXA).* -## Core +## Dataset fields | Element | Type | Name | Description | | ----------- | ----------------- | ------------------------------- | ------------------------------------------------------------ | -| name | string | Identifier (required) | Identifier for the dataset that is unique across the provider. The identi | +| name | string | Identifier (required) | Identifier for the dataset that is unique across the provider. | | title | string | Title | A short descriptive one-line title for the dataset. | | description | string | Description (required) | Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | | keywords | [string] | Keywords | List of keywords describing the dataset. | | version | string | Dataset Version | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | -| license | string | Dataset License Name (required) | Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. See `license_url` for more information. | -| license_url | string | Dataset License URL | Dataset's license URL SHOULD be specified if `license` is set to `proprietary`. | -| provider | [Provider Object] | Data Provider | The organizations that created the content of the dataset. | +| license | string | Dataset License Name (required) | Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. Proprietary licenses SHOULD add a link to the license text, see the `license` relation type. | +| provider | [Provider Object] | Data Provider | The organizations that influenced the content of the dataset. | | host | Host Object | Storage Provider | The organization that hosts the dataset. | | extent | [Extent Object] | Extents (required) | Spatial and temporal extents. | | links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. | ### Extent Object +The object describes the spatio-temporal extents of the dataset. Both spatial and temporal extents are required to be specified. + +**Note:** STAC datasets tries to be compliant to [WFS 3.0](https://github.com/opengeospatial/WFS_FES), but there are still issues to be solved. The WFS specification is in draft state any may change, especially regarding [3D support](https://github.com/opengeospatial/WFS_FES/issues/143) for spatial extents or the handling of [open date ranges](https://github.com/opengeospatial/WFS_FES/issues/155) for temporal extents. Therefore, It is also likely that the following fields change over time. + | Element | Type | Name | Description | | -------- | -------- | -------------------------- | ------------------------------------------------------------ | -| spatial | [number] | Spatial extent (required) | Potential spatial extent covered by the dataset. West, north, east, south edges of the spatial extent. Only WGS84 longitude/latitude is supported. ToDo: The list of four numbers can be extended to six numbers to support a 3D spatial extent. | -| temporal | string | Temporal extent (required) | Potential temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. ToDo: Support open date ranges | +| spatial | [number] | Spatial extent (required) | Potential spatial extent covered by the dataset. West, north, east, south edges of the spatial extent. Only WGS84 longitude/latitude is supported. The list of four numbers can be extended to six numbers to support a 3D spatial extent. | +| temporal | string | Temporal extent (required) | Potential temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. Open date ranges are supported by omitting either the start or the end time. Example for data from the beginning of 2019 until now: `2009-01-01T00:00:00Z/`. | ### Provider Object -| Element | Type | Name | Description | -| ------- | ------ | --------------------- | ----------------------------------------------- | -| name | string | Organization name | The name of the organization or the individual. | -| url | string | Organization homepage | Homepage of the provider. | +The object provides information about a provider. A provider is any of the organizations that created or processed the content of the dataset and therefore influenced the data offered by this dataset. + +| Element | Type | Name | Description | +| ------- | ------ | ---------------------------- | ----------------------------------------------- | +| name | string | Organization name (required) | The name of the organization or the individual. | +| url | string | Organization homepage | Homepage of the provider. | ### Host Object -| Element | Type | Name | Description | -| -------------- | ------- | --------------------- | ------------------------------------------------------------ | -| description | string | Description | Detailed description to explain the hosting details. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | -| scheme | string | Scheme (required) | Values: S3, GCS, URL, OTHER | -| id | string | Identifier (required) | Host-specific identifier such as an URL or asset id. | -| region | string | Region | Provider specific region where the data is stored. | -| requester_pays | boolean | Requester pays | `true` if requester pays, `false` if host pays. Defaults to `false`. | +The objects provides information about the storage provider hosting the data. -**Note:** The idea of storage profiles is currently [discussed](https://github.com/radiantearth/stac-spec/issues/148). Therefore, scheme, id and region may be removed from the final spec. +**Note:** The idea of storage profiles is currently [discussed](https://github.com/radiantearth/stac-spec/issues/148). Therefore, scheme, id and region may be removed from the final spec once this concept id introduced to STAC. + +| Element | Type | Name | Description | +| -------------- | ------- | ---------------------------- | ------------------------------------------------------------ | +| name | string | Organization name (required) | The name of the organization or the individual hosting the data. | +| description | string | Description | Detailed description to explain the hosting details. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| scheme | string | Scheme (required) | Values: S3, GCS, URL, OTHER | +| id | string | Identifier (required) | Host-specific identifier such as an URL or asset id. | +| region | string | Region | Provider specific region where the data is stored. | +| requester_pays | boolean | Requester pays | `true` if requester pays, `false` if host pays. Defaults to `false`. | ### Link Object -| Element | Type | Name | Description | -| ------- | ------ | ------------------- | ------------------------------------------------------------ | -| href | string | Link (required) | The actual link in the format of an URL. Relative and absolute links are both allowed. | -| rel | string | Relation (required) | Relationship between the current document and the linked document. | -| type | string | MIME-type | MIME-type of the referenced entity. | -| title | string | Title | Human-readable title for the link. | +This object describes a relationship with other entities. Data providers are advised to be liberal with links. + +| Element | Type | Name | Description | +| ------- | ------ | ------------------------ | ------------------------------------------------------------ | +| href | string | Link (required) | The actual link in the format of an URL. Relative and absolute links are both allowed. | +| rel | string | Relation type (required) | Relationship between the current document and the linked document. See chapter "Relation types" for more information. | +| type | string | MIME-type | MIME-type of the referenced entity. | + +#### Relation types + +The following types are commonly used as `rel` types in the Link Object of a Dataset: + +| Type | Description | +| --------------- | ------------------------------------------------------------ | +| self (required) | *Absolute* URL to the dataset file itself. This is required, to represent the location that the file can be found online. This is particularly useful when in a download package that includes metadata, so that the downstream user can know where the data has come from. | +| root | URL to the root [STAC Catalog](../static-catalog/) or Dataset. | +| parent | URL to the parent [STAC Catalog](../static-catalog/) or Dataset. | +| child | URL to a child [STAC Catalog](../static-catalog/) or Dataset. | +| item | URL to a [STAC Item](../json-spec/). | +| license | The license URL for the dataset SHOULD be specified if the `license` field is set to `proprietary`. If there is no public license URL available, it is RECOMMENDED to supplement the STAC dataset with the license text in separate file and link to this file. | ## Extensions -Related extensions to be used with the dataset spec: +Related extensions for the dataset spec: * [EO extension](../extensions/stac-eo-spec.md) Please note that some fields such as `eo:sun_elevation ` or `eo:sun_azimuth` are only meaningful on the item level and MUST not be used in datasets. -* [Dimensions extension](../extensions/dimension) (currently in review, see [PR #164](https://github.com/radiantearth/stac-spec/pull/164)) -* [Scientific extension](../extensions/scientific) (currently in review, see [PR #186](https://github.com/radiantearth/stac-spec/pull/186)) +* Dimensions extension (currently in review) +* [Scientific extension](../extensions/scientific) * Provenance extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) \ No newline at end of file diff --git a/dataset-spec/example-s2.json b/dataset-spec/example-s2.json new file mode 100644 index 000000000..6e4bf463f --- /dev/null +++ b/dataset-spec/example-s2.json @@ -0,0 +1,47 @@ +{ + "name": "COPERNICUS/S2", + "title": "Sentinel-2 MSI: MultiSpectral Instrument, Level-1C", + "description": "Sentinel-2 is a wide-swath, high-resolution, multi-spectral\nimaging mission supporting Copernicus Land Monitoring studies,\nincluding the monitoring of vegetation, soil and water cover,\nas well as observation of inland waterways and coastal areas.\n\nThe Sentinel-2 data contain 13 UINT16 spectral bands representing\nTOA reflectance scaled by 10000. See the [Sentinel-2 User Handbook](https://sentinel.esa.int/documents/247904/685211/Sentinel-2_User_Handbook)\nfor details. In addition, three QA bands are present where one\n(QA60) is a bitmask band with cloud mask information. For more\ndetails, [see the full explanation of how cloud masks are computed.](https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-2-msi/level-1c/cloud-masks)\n\nEach Sentinel-2 product (zip archive) may contain multiple\ngranules. Each granule becomes a separate Earth Engine asset.\nEE asset ids for Sentinel-2 assets have the following format:\nCOPERNICUS/S2/20151128T002653_20151128T102149_T56MNN. Here the\nfirst numeric part represents the sensing date and time, the\nsecond numeric part represents the product generation date and\ntime, and the final 6-character string is a unique granule identifier\nindicating its UTM grid reference (see [MGRS](https://en.wikipedia.org/wiki/Military_Grid_Reference_System)).\n\nFor more details on Sentinel-2 radiometric resoltuon, [see this page](https://earth.esa.int/web/sentinel/user-guides/sentinel-2-msi/resolutions/radiometric).\n", + "license": "proprietary", + "keywords": [ + "copernicus", + "esa", + "eu", + "msi", + "radiance", + "sentinel" + ], + "provider": [ + { + "name": "European Union/ESA/Copernicus", + "url": "https://sentinel.esa.int/web/sentinel/user-guides/sentinel-2-msi" + } + ], + "extent": { + "spatial": [ + 180.0, + -56.0, + -180.0, + 83.0 + ], + "temporal": "2015-06-23T00:00:00/" + }, + "links": [ + { + "rel": "self", + "href": "https://storage.cloud.google.com/earthengine-test/catalog/COPERNICUS_S2.json" + }, + { + "rel": "parent", + "href": "https://storage.cloud.google.com/earthengine-test/catalog/catalog.json" + }, + { + "rel": "root", + "href": "https://storage.cloud.google.com/earthengine-test/catalog/catalog.json" + }, + { + "rel": "license", + "href": "https://scihub.copernicus.eu/twiki/pub/SciHubWebPortal/TermsConditions/Sentinel_Data_Terms_and_Conditions.pdf" + } + ] +} \ No newline at end of file diff --git a/dataset-spec/json-schema/dataset.json b/dataset-spec/json-schema/dataset.json index 1e4b59332..774f35999 100644 --- a/dataset-spec/json-schema/dataset.json +++ b/dataset-spec/json-schema/dataset.json @@ -5,18 +5,17 @@ "description": "This object represents the dataset in a SpatioTemporal Asset Catalog.", "type": "object", "required": [ - "id", + "name", "description", "license", - "spatial_extent", - "temporal_extent", + "extent", "links" ], + "additionalProperties": true, "properties": { - "id": { - "title": "Provider ID", - "type": "string", - "pattern": "^[A-Za-z0-9_\\-\/]+$" + "name": { + "title": "Identifier", + "type": "string" }, "title": { "title": "Title", @@ -33,14 +32,13 @@ "type": "string" } }, - "license": { - "title": "License Name", + "version": { + "title": "Dataset Version", "type": "string" }, - "license_url": { - "title": "License URL", - "type": "string", - "format": "url" + "license": { + "title": "Dataset License Name", + "type": "string" }, "provider": { "type": "array", @@ -60,12 +58,17 @@ }, "host": { "required": [ - "id", - "scheme" + "name", + "scheme", + "id" ], "properties": { - "id": { - "title": "Identifirer", + "name": { + "title": "Organization name", + "type": "string" + }, + "description": { + "title": "Description", "type": "string" }, "scheme": { @@ -78,8 +81,8 @@ "OTHER" ] }, - "description": { - "title": "Description", + "id": { + "title": "Identifirer", "type": "string" }, "region": { @@ -91,18 +94,30 @@ "type": "boolean", "default": false } - } + }, + "additionalProperties": true }, - "version": { - "title": "Version", - "type": "string" - }, - "temporal_extent": { - "title": "Temporal extent", - "type": "string" - }, - "spatial_extent": { - "type": "object" + "extent": { + "title": "Extents", + "type": "object", + "required": [ + "spatial", + "temporal" + ], + "properties": { + "spatial": { + "title": "Spatial extent", + "type": "array", + "items": { + "type": "number" + } + }, + "temporal": { + "title": "Temporal extent", + "type": "string" + } + }, + "additionalProperties": true }, "links": { "type": "array", @@ -124,12 +139,9 @@ "type": { "title": "type", "type": "string" - }, - "title": { - "title": "Title", - "type": "string" } - } + }, + "additionalProperties": true } } } From f4ccca66d1d5c5eaa3bf74b620fd148a803197fe Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 27 Aug 2018 13:22:10 +0200 Subject: [PATCH 09/14] Reverted changes to EO and collection extension. Will propose them in a separate PR. --- dataset-spec/README.md | 2 +- extensions/stac-collection-spec.md | 10 ++-- extensions/stac-eo-spec.md | 84 ++++++++++++++---------------- 3 files changed, 44 insertions(+), 52 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 6f3d7d8bf..3b15d1cba 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -90,6 +90,6 @@ Related extensions for the dataset spec: * [EO extension](../extensions/stac-eo-spec.md) Please note that some fields such as `eo:sun_elevation ` or `eo:sun_azimuth` are only meaningful on the item level and MUST not be used in datasets. -* Dimensions extension (currently in review) +* [Dimensions extension](../extensions/dimensions) * [Scientific extension](../extensions/scientific) * Provenance extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) \ No newline at end of file diff --git a/extensions/stac-collection-spec.md b/extensions/stac-collection-spec.md index bad0ba4b2..1e8f11f2a 100644 --- a/extensions/stac-collection-spec.md +++ b/extensions/stac-collection-spec.md @@ -4,11 +4,11 @@ A group of STAC `Item` objects from a single source can share a lot of common me ## Collection Extension Description -| element | type info | name | description | -| ------------- | ----------------- | ---------------------- | ------------------------------------------------ | -| c:id | string | Collection ID | Machine readable ID for the collection | -| c:name | string (optional) | Collection Name | A name given to the Collection, used for display | -| c:description | string (optional) | Collection Description | A human readable description of the collection. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| element | type info | name | description | +|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------| +| c:id | string | Collection ID | Machine readable ID for the collection +| c:name | string (optional) | Collection Name | A name given to the Collection, used for display +| c:description | string (optional) | Collection Description | A human readable description of the collection. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. A `Collection` does not have many specific fields, as it may contain any fields that are in the core spec as well as any other extension. This provides maximum flexibility to data providers, as some the set of common metadata fields can vary between different types of data. For instance, Landsat and Sentinel data always has a eo:off_nadir value of 0, because those satellites are always pointed downward (i.e., nadir), while satellite that can be pointed will have varying eo:off_nadir values. diff --git a/extensions/stac-eo-spec.md b/extensions/stac-eo-spec.md index 6b522b5f9..6e934c88b 100644 --- a/extensions/stac-eo-spec.md +++ b/extensions/stac-eo-spec.md @@ -7,28 +7,24 @@ This document explains the fields of the STAC Earth Observation (EO) Extension t A lot of EO data will have common metadata across many `Items`. It is not necessary, but recommended to use the [Collections extension](stac-collection-spec.md). While the exact metadata that would appear in a `Collection` record will vary depending on the dataset, the most common collection-level metadata fields are indicated with an * in the tables below. ## EO Extension Description -These are fields that extend the `Item` and `Dataset` object -## `Item` and `Dataset` additions - -| element | type info | name | description | scopes | -| ---------------- | --------------------- | ------------------------------------------------ | ------------------------------------------------------------ | -------------- | -| eo:gsd | number | Ground Sample distance (required) | The nominal distance between pixel centers available, in meters | Item | -| eo:platform | string | Unique name of platform (required) | Specific name of the platform (e.g., landsat-8, sentinel-2A, larrysdrone) | Item + Dataset | -| eo:constellation | string | constellation the platform belongs to (required) | Name of the group or constellation the platform belongs to | Item + Dataset | -| eo:instrument | string | Instrument used (required) | Name of instrument or sensor (e.g., MODIS, ASTER, OLI, Canon F-1) | Item + Dataset | -| eo:bands | {Band Object} | Band Info (required) | Band specific metadata (see below) | Item + Dataset | -| eo:epsg | number | EPSG code | EPSG code of the datasource, null if no EPSG code | Item + Dataset | -| eo:cloud_cover | number | Cloud Cover | Percent of cloud cover (0-100) | Item | -| eo:off_nadir | number | Off nadir | Viewing angle. 0-90 degrees, measured from nadir | Item + Dataset | -| eo:azimuth | number | Azimuth | Viewing azimuth angle. 0-360 degrees, measured clockwise from north | Item + Dataset | -| eo:sun_azimuth | number | Sun Azimuth | Sun azimuth angle. 0-360 degrees, measured clockwise from north | Item | -| eo:sun_elevation | number | Sun Elevation | Sun elevation angle. 0-90 degrees measured from horizon | Item | -| eo:asset_schema | {Asset Schema Object} | Asset Schema | TODO | Dataset | -| eo:nodata | [number] | Nodata values | The no data value(s). | Dataset | -| eo:pyramid | {Pyramid Object} | Pyramid parameters | TODO | Dataset | -| eo:periodicity | string | Periodicity | ISO8601 | Dataset | - -## `Item` and `Dataset` Field Descriptions +These are fields that extend the `Item` object +## `Item` additions + +| element | type info | name | description | +|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------| +| eo:gsd* | float | Ground Sample distance | The nominal distance between pixel centers available, in meters | +| eo:platform* | string | Unique name of platform | Specific name of the platform (e.g., landsat-8, sentinel-2A, larrysdrone) | +| eo:constellation* | string | constellation the platform belongs to | Name of the group or constellation the platform belongs to | +| eo:instrument* | string | Instrument used | Name of instrument or sensor (e.g., MODIS, ASTER, OLI, Canon F-1) | +| eo:bands* | dictionary | Band Info | Band specific metadata (see below) +| eo:epsg | unsigned int | EPSG code | EPSG code of the datasource, null if no EPSG code | +| eo:cloud_cover | integer (optional) | Cloud Cover Pct | Percent of cloud cover (0-100) | +| eo:off_nadir | float (optional) | Off nadir | Viewing angle. 0-90 degrees, measured from nadir +| eo:azimuth | float (optional) | Azimuth | Viewing azimuth angle. 0-360 degrees, measured clockwise from north +| eo:sun_azimuth | float (optional) | Sun Azimuth | Sun azimuth angle. 0-360 degrees, measured clockwise from north +| eo:sun_elevation | float (optional) | Sun Elevation | Sun elevation angle. 0-90 degrees measured from horizon + +## `Item` Field Descriptions **eo:gsd** is the nominal Ground Sample Distance for the data, as measured in meters on the ground. Since GSD can vary across a scene depending on projection, this should be the average or most commonly used GSD in the center of the image. If the data includes multiple bands with different GSD values, this should be the value for the greatest number or most common bands. For instance, Landsat optical and short-wave IR bands are all 30 meters, but the panchromatic band is 15 meters. The eo:gsd should be 30 meters in this case since those are the bands most commonly used. @@ -52,20 +48,16 @@ These are fields that extend the `Item` and `Dataset` object **eo:sun_elevation**: This is the angle from the tangent of ths scene center point to the sun. Measured in degrees (0-90). -## `eo:bands` +## `Item:eo:bands` The bands field of a `Item` is a dictionary where the index identifies a specific band. This is often a band number (e.g., 1, B1, B01), but could be any unique identifier. -| element | type info | name | description | -| ------------------- | --------- | -------------------------- | ------------------------------------------------------------ | -| common_name | string | Common name | The name commonly used to refer to this specific band (see below) | -| gsd | number | Ground sample distance | The average distance between pixel centers as measured in meters on the ground. Defaults to eo:gsd if not provided | -| accuracy | number | Geolocation Accuracy | The expected accuracy of the scene registration, in meters | -| center_wavelength | number | Center wavelength | The center wavelength of the band, in microns | -| full_width_half_max | number | Full width at half maximum | The width of the band, as measured at half the maximum transmission, in microns | -| nodata | [number] | Nodata values | The no data value(s). | -| offset | number | Offset | Offset to convert band values to the actual measurement scale. | -| scale | number | Scale | Scale to convert band values to the actual measurement scale. | -| unit | string | Unit | The unit of measurement, preferably SI. TODO: Check what units are allowed, e.g. link to [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) or [the dictionary of UoM](https://www.unc.edu/~rowlett/units/). | +| element | type info | name | description | +|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------| +| common_name | string (optional) | Common name | The name commonly used to refer to this specific band (see below) +| gsd | float (optional) | Ground sample distance | The average distance between pixel centers as measured in meters on the ground. Defaults to eo:gsd if not provided +| accuracy | float (optional) | Geolocation Accuracy | The expected accuracy of the scene registration, in meters +| center_wavelength | float (optional) | Center wavelength | The center wavelength of the band, in microns +| full_width_half_max | float (optional) | Full width at half maximum | The width of the band, as measured at half the maximum transmission, in microns ## `Item:eo:bands` Field Descriptions @@ -83,15 +75,15 @@ The bands field of a `Item` is a dictionary where the index identifies a specifi The band's common_name is the name that is commonly used to refer to that band's spectral properties. The table below shows the common name based on the average band range for the band numbers of several popular instruments. | Common Name | Band Range (μm) | Landsat 5 | Landsat 7 | Landsat 8 | Sentinel 2 | MODIS | -|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------|------------------------------------|------------------------------------|------------------------------------| -| coastal | 0.40 - 0.45 | | | 1 | 1 | -|blue | 0.45 - 0.5 | 1 | 1 | 2 | 2 | 3 | -|green | 0.5 - 0.6 | 2 | 2 | 3 | 3 | 4 | -|red | 0.6 - 0.7 | 3 | 3 | 4 | 4 | 1 | -|pan | 0.5 - 0.7 | | 8 | 8 | | | -|nir | 0.77 - 1.00 | 4 | 4 | 5 | 8 | 2 | -|cirrus | 1.35 - 1.40 | | | 9 | 10 | 26 | -|swir16 | 1.55 - 1.75 | 5 | 5 | 6 | 11 | 6 | -|swir22 |2.1 - 2.3 | 7 | 7 | 7 | 12 | 7 | -|lwir11 | 10.5 - 11.5 | | | 10 | | 31 | -|lwir12 | 11.5 - 12.5 | | | 11 | | 32 | +|----------------------|---------------------------|-------------------------|---------------------------------------------------------------------------------------------|------------------------------------|------------------------------------|------------------------------------| +| coastal | 0.40 - 0.45 | | | 1 | 1 | +|blue | 0.45 - 0.5 | 1 | 1 | 2 | 2 | 3 +|green | 0.5 - 0.6 | 2 | 2 | 3 | 3 | 4 +|red | 0.6 - 0.7 | 3 | 3 | 4 | 4 | 1 +|pan | 0.5 - 0.7 | | 8 | 8 | | +|nir | 0.77 - 1.00 | 4 | 4 | 5 | 8 | 2 +|cirrus | 1.35 - 1.40 | | | 9 | 10 | 26 +|swir16 | 1.55 - 1.75 | 5 | 5 | 6 | 11 | 6 +|swir22 |2.1 - 2.3 | 7 | 7 | 7 | 12 | 7 +|lwir11 | 10.5 - 11.5 | | | 10 | | 31 +|lwir12 | 11.5 - 12.5 | | | 11 | | 32 From e7a9e5cbd673068e2eb152181ee4ff5ad856d4a9 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 27 Aug 2018 14:45:55 +0200 Subject: [PATCH 10/14] Changes to formatting (trying to make all specs look the same). --- dataset-spec/README.md | 100 ++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 3b15d1cba..a183d1a59 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -1,30 +1,36 @@ -# Dataset Spec for STAC +# STAC Dataset Spec [STAC Items](https://github.com/radiantearth/stac-spec/json-spec/) are focused on search within a dataset*. Another topic of interest is the search of datasets, instead of within a dataset. The Dataset Spec is an independent spec that STAC Items are *strongly recommended* to use. Other parties can also independently use this spec to describe datasets in a lightweight way. The Datasets Spec is a superset of the [Catalog Spec](../static-catalog/). I shares the same fields and therefore every Dataset is also a valid Catalog. Datasets can have both parent Catalogs and Datasets and child Items, Catalogs and Datasets. -A Dataset can be represented in JSON format. Every field described here is a property in a JSON object. +A Dataset can be represented in JSON format. Any JSON object that contains all the required fields is a valid STAC Dataset and Catalog. * [Example (Sentinel 2)](example-s2.json) * [JSON Schema](json-schema/dataset.json) *\* There is no standardized name for the concept we are describing here. Others called it: dataset series (ISO 19115), collection (CNES, NASA), dataset (JAXA), dataset series (ESA), product (JAXA).* +## WARNING + +**This is still an early version of the STAC spec, expect that there may be some changes before everything is finalized.** + +Implementations are encouraged, however, as good effort will be made to not change anything too drastically. Using the specification now will ensure that needed changes can be made before everything is locked in. So now is an ideal time to implement, as your feedback will be directly incorporated. + ## Dataset fields -| Element | Type | Name | Description | -| ----------- | ----------------- | ------------------------------- | ------------------------------------------------------------ | -| name | string | Identifier (required) | Identifier for the dataset that is unique across the provider. | -| title | string | Title | A short descriptive one-line title for the dataset. | -| description | string | Description (required) | Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | -| keywords | [string] | Keywords | List of keywords describing the dataset. | -| version | string | Dataset Version | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | -| license | string | Dataset License Name (required) | Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. Proprietary licenses SHOULD add a link to the license text, see the `license` relation type. | -| provider | [Provider Object] | Data Provider | The organizations that influenced the content of the dataset. | -| host | Host Object | Storage Provider | The organization that hosts the dataset. | -| extent | [Extent Object] | Extents (required) | Spatial and temporal extents. | -| links | [Link Object] | Links (required) | A list of references to other documents, see Link Object for further documentation. | +| Element | Type | Description | +| ----------- | ----------------- | ------------------------------------------------------------ | +| name | string | **REQUIRED.** Identifier for the dataset that is unique across the provider. | +| title | string | A short descriptive one-line title for the dataset. | +| description | string | **REQUIRED.** Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| keywords | [string] | List of keywords describing the dataset. | +| version | string | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | +| license | string | **REQUIRED.** Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. Proprietary licensed data SHOULD add a link to the license text, see the `license` relation type. | +| provider | [Provider Object] | Data provider, the organizations which influenced the content of the dataset. | +| host | Host Object | Storage provider, the organization that hosts the dataset. | +| extent | [Extent Object] | **REQUIRED.** Spatial and temporal extents. | +| links | [Link Object] | **REQUIRED.** A list of references to other documents. | ### Extent Object @@ -32,19 +38,19 @@ The object describes the spatio-temporal extents of the dataset. Both spatial an **Note:** STAC datasets tries to be compliant to [WFS 3.0](https://github.com/opengeospatial/WFS_FES), but there are still issues to be solved. The WFS specification is in draft state any may change, especially regarding [3D support](https://github.com/opengeospatial/WFS_FES/issues/143) for spatial extents or the handling of [open date ranges](https://github.com/opengeospatial/WFS_FES/issues/155) for temporal extents. Therefore, It is also likely that the following fields change over time. -| Element | Type | Name | Description | -| -------- | -------- | -------------------------- | ------------------------------------------------------------ | -| spatial | [number] | Spatial extent (required) | Potential spatial extent covered by the dataset. West, north, east, south edges of the spatial extent. Only WGS84 longitude/latitude is supported. The list of four numbers can be extended to six numbers to support a 3D spatial extent. | -| temporal | string | Temporal extent (required) | Potential temporal extent covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. Open date ranges are supported by omitting either the start or the end time. Example for data from the beginning of 2019 until now: `2009-01-01T00:00:00Z/`. | +| Element | Type | Description | +| -------- | -------- | ------------------------------------------------------------ | +| spatial | [number] | **REQUIRED.** Potential *spatial extent* covered by the dataset. West, north, east, south edges of the spatial extent. Only WGS84 longitude/latitude is supported. The list of four numbers can be extended to six numbers to support a 3D spatial extent. | +| temporal | string | **REQUIRED.** Potential *temporal extent* covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. Open date ranges are supported by omitting either the start or the end time. Example for data from the beginning of 2019 until now: `2009-01-01T00:00:00Z/`. | ### Provider Object The object provides information about a provider. A provider is any of the organizations that created or processed the content of the dataset and therefore influenced the data offered by this dataset. -| Element | Type | Name | Description | -| ------- | ------ | ---------------------------- | ----------------------------------------------- | -| name | string | Organization name (required) | The name of the organization or the individual. | -| url | string | Organization homepage | Homepage of the provider. | +| Field Name | Type | Description | +| ---------- | ------ | ------------------------------------------------------------ | +| name | string | **REQUIRED.** The name of the organization or the individual. | +| url | string | Homepage of the provider. | ### Host Object @@ -52,44 +58,46 @@ The objects provides information about the storage provider hosting the data. **Note:** The idea of storage profiles is currently [discussed](https://github.com/radiantearth/stac-spec/issues/148). Therefore, scheme, id and region may be removed from the final spec once this concept id introduced to STAC. -| Element | Type | Name | Description | -| -------------- | ------- | ---------------------------- | ------------------------------------------------------------ | -| name | string | Organization name (required) | The name of the organization or the individual hosting the data. | -| description | string | Description | Detailed description to explain the hosting details. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | -| scheme | string | Scheme (required) | Values: S3, GCS, URL, OTHER | -| id | string | Identifier (required) | Host-specific identifier such as an URL or asset id. | -| region | string | Region | Provider specific region where the data is stored. | -| requester_pays | boolean | Requester pays | `true` if requester pays, `false` if host pays. Defaults to `false`. | +| Field Name | Type | Description | +| -------------- | ------- | ------------------------------------------------------------ | +| name | string | **REQUIRED.** The name of the organization or the individual hosting the data. | +| description | string | Detailed description to explain the hosting details. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| scheme | string | **REQUIRED.** The protocol/scheme used to access the data. Any of: `S3`, `GCS`, `URL`, `OTHER` | +| id | string | **REQUIRED.** Host-specific identifier such as an URL or asset id. | +| region | string | Provider specific region where the data is stored. | +| requester_pays | boolean | `true` if requester pays, `false` if host pays. Defaults to `false`. | ### Link Object -This object describes a relationship with other entities. Data providers are advised to be liberal with links. +This object describes a relationship with another entity. Data providers are advised to be liberal with links. -| Element | Type | Name | Description | -| ------- | ------ | ------------------------ | ------------------------------------------------------------ | -| href | string | Link (required) | The actual link in the format of an URL. Relative and absolute links are both allowed. | -| rel | string | Relation type (required) | Relationship between the current document and the linked document. See chapter "Relation types" for more information. | -| type | string | MIME-type | MIME-type of the referenced entity. | +| Field Name | Type | Description | +| ---------- | ------ | ------------------------------------------------------------ | +| href | string | **REQUIRED.** The actual link in the format of an URL. Relative and absolute links are both allowed. | +| rel | string | **REQUIRED.** Relationship between the current document and the linked document. See chapter "Relation types" for more information. | +| type | string | MIME-type of the referenced entity. | #### Relation types The following types are commonly used as `rel` types in the Link Object of a Dataset: -| Type | Description | -| --------------- | ------------------------------------------------------------ | -| self (required) | *Absolute* URL to the dataset file itself. This is required, to represent the location that the file can be found online. This is particularly useful when in a download package that includes metadata, so that the downstream user can know where the data has come from. | -| root | URL to the root [STAC Catalog](../static-catalog/) or Dataset. | -| parent | URL to the parent [STAC Catalog](../static-catalog/) or Dataset. | -| child | URL to a child [STAC Catalog](../static-catalog/) or Dataset. | -| item | URL to a [STAC Item](../json-spec/). | -| license | The license URL for the dataset SHOULD be specified if the `license` field is set to `proprietary`. If there is no public license URL available, it is RECOMMENDED to supplement the STAC dataset with the license text in separate file and link to this file. | +| Type | Description | +| ------- | ------------------------------------------------------------ | +| self | **REQUIRED.** *Absolute* URL to the dataset file itself. This is required, to represent the location that the file can be found online. This is particularly useful when in a download package that includes metadata, so that the downstream user can know where the data has come from. | +| root | URL to the root [STAC Catalog](../static-catalog/) or Dataset. | +| parent | URL to the parent [STAC Catalog](../static-catalog/) or Dataset. | +| child | URL to a child [STAC Catalog](../static-catalog/) or Dataset. | +| item | URL to a [STAC Item](../json-spec/). | +| license | The license URL for the dataset SHOULD be specified if the `license` field is set to `proprietary`. If there is no public license URL available, it is RECOMMENDED to supplement the STAC catalog with the license text in separate file and link to this file. | ## Extensions -Related extensions for the dataset spec: +Important related extensions for the dataset spec: * [EO extension](../extensions/stac-eo-spec.md) Please note that some fields such as `eo:sun_elevation ` or `eo:sun_azimuth` are only meaningful on the item level and MUST not be used in datasets. * [Dimensions extension](../extensions/dimensions) * [Scientific extension](../extensions/scientific) -* Provenance extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) \ No newline at end of file +* Provenance extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) + +The [extensions page](../extensions/) gives a full overview about relevant extensions for STAC Datasets. \ No newline at end of file From 89e35a908872ccc58a5ddb4120419b229e864eff Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 29 Aug 2018 10:17:37 +0200 Subject: [PATCH 11/14] Improved descriptions and fixed several minor issues, --- dataset-spec/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index a183d1a59..812c3c39d 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -1,8 +1,8 @@ # STAC Dataset Spec -[STAC Items](https://github.com/radiantearth/stac-spec/json-spec/) are focused on search within a dataset*. Another topic of interest is the search of datasets, instead of within a dataset. The Dataset Spec is an independent spec that STAC Items are *strongly recommended* to use. Other parties can also independently use this spec to describe datasets in a lightweight way. +[STAC Items](https://github.com/radiantearth/stac-spec/json-spec/) are focused on search within a dataset*. Another topic of interest is the search of datasets, instead of within a dataset. The Dataset Spec is an independent spec that STAC Items are *strongly recommended* to provide a link to a dataset definition. Other parties can also independently use this spec to describe datasets in a lightweight way. -The Datasets Spec is a superset of the [Catalog Spec](../static-catalog/). I shares the same fields and therefore every Dataset is also a valid Catalog. Datasets can have both parent Catalogs and Datasets and child Items, Catalogs and Datasets. +The Datasets Spec extends the [Catalog Spec](../static-catalog/) with additional fields to describe the set of items in the catalog. It shares the same fields and therefore every Dataset is also a valid Catalog. Datasets can have both parent Catalogs and Datasets and child Items, Catalogs and Datasets. A Dataset can be represented in JSON format. Any JSON object that contains all the required fields is a valid STAC Dataset and Catalog. @@ -23,11 +23,11 @@ Implementations are encouraged, however, as good effort will be made to not chan | ----------- | ----------------- | ------------------------------------------------------------ | | name | string | **REQUIRED.** Identifier for the dataset that is unique across the provider. | | title | string | A short descriptive one-line title for the dataset. | -| description | string | **REQUIRED.** Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| description | string | **REQUIRED.** Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | | keywords | [string] | List of keywords describing the dataset. | | version | string | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | | license | string | **REQUIRED.** Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. Proprietary licensed data SHOULD add a link to the license text, see the `license` relation type. | -| provider | [Provider Object] | Data provider, the organizations which influenced the content of the dataset. | +| provider | [Provider Object] | A list of data providers, the organizations which influenced the content of the dataset. Providers should be listed in chronological order with the most recent provider being the last element of the list. | | host | Host Object | Storage provider, the organization that hosts the dataset. | | extent | [Extent Object] | **REQUIRED.** Spatial and temporal extents. | | links | [Link Object] | **REQUIRED.** A list of references to other documents. | @@ -56,12 +56,12 @@ The object provides information about a provider. A provider is any of the organ The objects provides information about the storage provider hosting the data. -**Note:** The idea of storage profiles is currently [discussed](https://github.com/radiantearth/stac-spec/issues/148). Therefore, scheme, id and region may be removed from the final spec once this concept id introduced to STAC. +**Note:** The idea of storage profiles is currently [discussed](https://github.com/radiantearth/stac-spec/issues/148). Therefore, scheme, id and region may be removed from the final spec once this concept is introduced to STAC. | Field Name | Type | Description | | -------------- | ------- | ------------------------------------------------------------ | | name | string | **REQUIRED.** The name of the organization or the individual hosting the data. | -| description | string | Detailed description to explain the hosting details. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | +| description | string | Detailed description to explain the hosting details. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | | scheme | string | **REQUIRED.** The protocol/scheme used to access the data. Any of: `S3`, `GCS`, `URL`, `OTHER` | | id | string | **REQUIRED.** Host-specific identifier such as an URL or asset id. | | region | string | Provider specific region where the data is stored. | From 6a32acaac93f6379c83852b851dc42d940edea03 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 29 Aug 2018 11:32:31 +0200 Subject: [PATCH 12/14] Minor change to the license spec. --- dataset-spec/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 812c3c39d..1ad2b42cb 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -2,7 +2,7 @@ [STAC Items](https://github.com/radiantearth/stac-spec/json-spec/) are focused on search within a dataset*. Another topic of interest is the search of datasets, instead of within a dataset. The Dataset Spec is an independent spec that STAC Items are *strongly recommended* to provide a link to a dataset definition. Other parties can also independently use this spec to describe datasets in a lightweight way. -The Datasets Spec extends the [Catalog Spec](../static-catalog/) with additional fields to describe the set of items in the catalog. It shares the same fields and therefore every Dataset is also a valid Catalog. Datasets can have both parent Catalogs and Datasets and child Items, Catalogs and Datasets. +The Datasets Spec extends the [Catalog Spec](../static-catalog/) with additional fields to describe the set of items in the catalog. It shares the same fields and therefore every Dataset is also a valid Catalog. Datasets can have both parent Catalogs and Datasets and child Items, Catalogs and Datasets. A Dataset can be represented in JSON format. Any JSON object that contains all the required fields is a valid STAC Dataset and Catalog. @@ -26,7 +26,7 @@ Implementations are encouraged, however, as good effort will be made to not chan | description | string | **REQUIRED.** Detailed multi-line description to fully explain the entity. [CommonMark 0.28](http://commonmark.org/) syntax MAY be used for rich text representation. | | keywords | [string] | List of keywords describing the dataset. | | version | string | Version of the dataset. [Semantic Versioning (SemVer)](https://semver.org/) SHOULD be followed. | -| license | string | **REQUIRED.** Dataset's license(s) as a [SPDX License identifier or expression](https://spdx.org/licenses/) or `proprietary` if the license is not on the SPDX license list. Proprietary licensed data SHOULD add a link to the license text, see the `license` relation type. | +| license | string | **REQUIRED.** Dataset's license(s) as a SPDX [License identifier](https://spdx.org/licenses/) or [expression](https://spdx.org/spdx-specification-21-web-version#h.jxpfx0ykyb60) or `proprietary` if the license is not on the SPDX license list. Proprietary licensed data SHOULD add a link to the license text, see the `license` relation type. | | provider | [Provider Object] | A list of data providers, the organizations which influenced the content of the dataset. Providers should be listed in chronological order with the most recent provider being the last element of the list. | | host | Host Object | Storage provider, the organization that hosts the dataset. | | extent | [Extent Object] | **REQUIRED.** Spatial and temporal extents. | From af1b16ae8dc51e44cbbc81a36b15be8929a4b7db Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 29 Aug 2018 17:19:24 +0200 Subject: [PATCH 13/14] Changed temporal extent to be more WFS3 like. --- dataset-spec/README.md | 2 +- dataset-spec/example-s2.json | 5 ++++- dataset-spec/json-schema/dataset.json | 11 ++++++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 1ad2b42cb..9aa051134 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -41,7 +41,7 @@ The object describes the spatio-temporal extents of the dataset. Both spatial an | Element | Type | Description | | -------- | -------- | ------------------------------------------------------------ | | spatial | [number] | **REQUIRED.** Potential *spatial extent* covered by the dataset. West, north, east, south edges of the spatial extent. Only WGS84 longitude/latitude is supported. The list of four numbers can be extended to six numbers to support a 3D spatial extent. | -| temporal | string | **REQUIRED.** Potential *temporal extent* covered by the dataset. Date/time intervals MUST be formatted according to ISO 8601. Open date ranges are supported by omitting either the start or the end time. Example for data from the beginning of 2019 until now: `2009-01-01T00:00:00Z/`. | +| temporal | [string\|null] | **REQUIRED.** Potential *temporal extent* covered by the dataset. A list of two timestamps, which MUST be formatted according to [RFC 3339, section 5.6](https://tools.ietf.org/html/rfc3339#section-5.6). Open date ranges are supported by setting either the start or the end time to `null`. Example for data from the beginning of 2019 until now: `["2009-01-01T00:00:00Z", null]`. | ### Provider Object diff --git a/dataset-spec/example-s2.json b/dataset-spec/example-s2.json index 6e4bf463f..4f73c9c37 100644 --- a/dataset-spec/example-s2.json +++ b/dataset-spec/example-s2.json @@ -24,7 +24,10 @@ -180.0, 83.0 ], - "temporal": "2015-06-23T00:00:00/" + "temporal": [ + "2015-06-23T00:00:00", + null + ] }, "links": [ { diff --git a/dataset-spec/json-schema/dataset.json b/dataset-spec/json-schema/dataset.json index 774f35999..289323798 100644 --- a/dataset-spec/json-schema/dataset.json +++ b/dataset-spec/json-schema/dataset.json @@ -114,7 +114,16 @@ }, "temporal": { "title": "Temporal extent", - "type": "string" + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": { + "type": [ + "string", + "null" + ], + "format": "date-time" + } } }, "additionalProperties": true From a592be63f74597f40695e009a489687e48dc5b34 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 30 Aug 2018 10:06:32 +0200 Subject: [PATCH 14/14] Moved dimensions extension to a separate PR --- dataset-spec/README.md | 4 ++-- extensions/README.md | 14 ++++++------ extensions/dimension/README.md | 19 ---------------- extensions/dimension/example.json | 23 -------------------- extensions/dimension/schema.json | 36 ------------------------------- 5 files changed, 9 insertions(+), 87 deletions(-) delete mode 100644 extensions/dimension/README.md delete mode 100644 extensions/dimension/example.json delete mode 100644 extensions/dimension/schema.json diff --git a/dataset-spec/README.md b/dataset-spec/README.md index 9aa051134..58969076c 100644 --- a/dataset-spec/README.md +++ b/dataset-spec/README.md @@ -96,8 +96,8 @@ Important related extensions for the dataset spec: * [EO extension](../extensions/stac-eo-spec.md) Please note that some fields such as `eo:sun_elevation ` or `eo:sun_azimuth` are only meaningful on the item level and MUST not be used in datasets. -* [Dimensions extension](../extensions/dimensions) +* Dimensions extension (proposed, see [PR #227](https://github.com/radiantearth/stac-spec/pull/227)) * [Scientific extension](../extensions/scientific) -* Provenance extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) +* Provenance extension (planned, see [issue #179](https://github.com/radiantearth/stac-spec/issues/179)) The [extensions page](../extensions/) gives a full overview about relevant extensions for STAC Datasets. \ No newline at end of file diff --git a/extensions/README.md b/extensions/README.md index 579ee779f..62130645a 100644 --- a/extensions/README.md +++ b/extensions/README.md @@ -11,13 +11,13 @@ them they can create a shared extension and include it in the STAC repository. ## List of official extensions -| Extension Name (Prefix) | Scope | Description | -| ------------------------------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [Collection](stac-collection-spec.md) (`c`) | Item | Provides a way to specify data fields that are common across a collection of STAC Items, so that each does not need to repeat all the same information. | -| [EO](stac-eo-spec.md) (`eo`) | Item | Covers data that represents a snapshot of the earth for a single date and time. It could consist of multiple spectral bands in any part of the electromagnetic spectrum. Examples of EO data include sensors with visible bands, IR bands as well as SAR instruments. The extension provides common fields like bands, cloud cover, off nadir, sun angle + elevation, gsd and more. | -| [Scientific](scientific/) (`sci`) | Catalog | Scientific metadata is considered to be data that indicate from which publication a dataset originates and how the dataset itself should be cited or referenced. | -| [Start end datetime](stac-start-end-datetime-spec.md) (`set`) | Item | An extension to provide start and end datetime stamps in a consistent way. | -| [Transaction](transaction/) | API | Provides an API extension to support the creation, editing, and deleting of items on a specific WFS3 collection. | +| Extension Name (Prefix) | Scope | Description | +| ------------------------------------------------------------ | ---------------- | ------------------------------------------------------------ | +| [Collection](stac-collection-spec.md) (`c`) | Item | Provides a way to specify data fields that are common across a collection of STAC Items, so that each does not need to repeat all the same information. | +| [EO](stac-eo-spec.md) (`eo`) | Item | Covers data that represents a snapshot of the earth for a single date and time. It could consist of multiple spectral bands in any part of the electromagnetic spectrum. Examples of EO data include sensors with visible bands, IR bands as well as SAR instruments. The extension provides common fields like bands, cloud cover, off nadir, sun angle + elevation, gsd and more. | +| [Scientific](scientific/) (`sci`) | Catalog +Dataset | Scientific metadata is considered to be data that indicate from which publication a dataset originates and how the dataset itself should be cited or referenced. | +| [Start end datetime](stac-start-end-datetime-spec.md) (`set`) | Item | An extension to provide start and end datetime stamps in a consistent way. | +| [Transaction](transaction/) | API | Provides an API extension to support the creation, editing, and deleting of items on a specific WFS3 collection. | ## Third-party / vendor extensions diff --git a/extensions/dimension/README.md b/extensions/dimension/README.md deleted file mode 100644 index a4c841b6b..000000000 --- a/extensions/dimension/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# STAC Dimensions Extension Spec - -This document explains the fields of the STAC Dimensions Extension (dim) to a STAC `Dataset`. Data can have different dimensions (= axes), e.g. in meteorology. The properties of these dimensions can be defined with this extension. - -## Dimensions Extension Description - -This is the field that extends the `Dataset` object: - -| Element | Type | Name | Description | -| ---------------- | -------------------- | ------------------------- | ------------------------------------------------------------ | -| dim:dimensions | [Dimension Object] | Dimensions | Dimensions of the data. If the dimensions have an order, the order SHOULD be reflected in the order of the array. | - -### Dimension Object - -| Element | Type | Name | Description | -| ------- | ---------------- | ------------------- | ------------------------------------------------------------ | -| label | string | Label (required) | Human-readable label for the dimension. | -| unit | string | Unit of Measurement | Unit of measurement, preferably SI. ToDo: Any standard to express this, e.g. [UDUNITS](https://www.unidata.ucar.edu/software/udunits/) or this [dict](https://www.unc.edu/~rowlett/units/)? | -| extent | [number\|string] | Data Extent | Specifies the extent of the data, i.e. the lower bound as the first element and the upper bound as the second element of the array. | diff --git a/extensions/dimension/example.json b/extensions/dimension/example.json deleted file mode 100644 index 33ffd44b3..000000000 --- a/extensions/dimension/example.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "dim:dimensions": [ - { - "label": "Longitude", - "unit": "°", - "extent": [-180, 180] - }, - { - "label": "Latitude", - "unit": "°", - "extent": [-90, 90] - }, - { - "label": "Temperature", - "unit": "°C", - "extent": [-20, 60] - }, - { - "label": "Date", - "extent": ["2018-01-01T00:00:00Z", "2018-01-31T23:59:59Z"] - } - ] -} \ No newline at end of file diff --git a/extensions/dimension/schema.json b/extensions/dimension/schema.json deleted file mode 100644 index 03e7dec37..000000000 --- a/extensions/dimension/schema.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "title": "STAC Dimensions Extension Spec", - "properties": { - "dim:dimensions": { - "type": "array", - "title": "Dimensions", - "items": { - "type": "object", - "required": [ - "label" - ], - "properties": { - "label": { - "type": "string", - "title": "Label" - }, - "unit": { - "type": "string", - "title": "Unit of Measurement" - }, - "extent": { - "type": "array", - "title": "Data Extent", - "minItems": 2, - "maxItems": 2, - "items": { - "type": ["number", "string"] - } - } - } - } - } - } -} \ No newline at end of file