From e2f5c49a2c1a75963aa488c67269647b188e8060 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 20 Apr 2021 16:43:45 +0200 Subject: [PATCH] First try at supporting JSON Schema in summaries #1045 --- CHANGELOG.md | 7 ++++++- catalog-spec/catalog-spec.md | 2 +- catalog-spec/json-schema/catalog-core.json | 15 ++++++++++++--- collection-spec/collection-spec.md | 21 +++++++++++++++------ examples/collection-only/collection.json | 8 +++++--- package.json | 3 ++- schema.json | 0 7 files changed, 41 insertions(+), 15 deletions(-) delete mode 100644 schema.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e5a4424b..f9f0f2c6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added + +- Summaries are allowed to specify JSON Schema in addition to ranges and sets of values. ([#1045](https://github.com/radiantearth/stac-spec/issues/1045)) + ### Changed - The first extent in a Collection is always the overall extent, followed by more specific extents. ([#1064](https://github.com/radiantearth/stac-spec/issues/1064), [opengeospatial/ogcapi-features#520](https://github.com/opengeospatial/ogcapi-features/pull/520)) - Updated examples for automatic collection creation from code and validation ([#1080](https://github.com/radiantearth/stac-spec/pull/1080) - Clarified that stac_extensions should also list extensions that are used in Collection summaries. ([#1077](https://github.com/radiantearth/stac-spec/issues/1077)) +- The Stats Object for Summaries has been renamed to Range Object (no functional change). ## [v1.0.0-rc.2] - 2021-03-30 @@ -46,7 +51,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed -- The [Stats Object](collection-spec/collection-spec.md#stats-object) for Collection `summaries` changed `min` to `minimum` and `max` to `maximum` to align with JSON Schema. ([#967](https://github.com/radiantearth/stac-spec/pull/967)) +- The [Stats Object](collection-spec/collection-spec.md#range-object) for Collection `summaries` changed `min` to `minimum` and `max` to `maximum` to align with JSON Schema. ([#967](https://github.com/radiantearth/stac-spec/pull/967)) - URIs (usually found int properties like `href`, `url`) are now validated using the `iri-reference` format in JSON Schema (allows international characters in URIs) ([#953](https://github.com/radiantearth/stac-spec/pull/953)) - Enhanced the way the spec talks about ID's to encourage more global uniqueness. ([#883](https://github.com/radiantearth/stac-spec/pull/883)) - Clarified how collection-level asset object properties do not remove the need for item-level asset object properties in the `item-assets` extension ([#880](https://github.com/radiantearth/stac-spec/pull/880)) diff --git a/catalog-spec/catalog-spec.md b/catalog-spec/catalog-spec.md index 573e848a9..6cdd6d556 100644 --- a/catalog-spec/catalog-spec.md +++ b/catalog-spec/catalog-spec.md @@ -49,7 +49,7 @@ also a valid STAC Catalog. | id | string | **REQUIRED.** Identifier for the Catalog. | | title | string | A short descriptive one-line title for the Catalog. | | description | string | **REQUIRED.** Detailed multi-line description to fully explain the Catalog. [CommonMark 0.29](http://commonmark.org/) syntax MAY be used for rich text representation. | -| summaries | Map | A map of property summaries, either a set of values or statistics such as a range. More info in the [Collection spec](../collection-spec/collection-spec.md#summaries). | +| summaries | Map | A map of property summaries, either a set of values, a range of values or a [JSON Schema](https://json-schema.org). More info in the [Collection spec](../collection-spec/collection-spec.md#summaries). | | links | [[Link Object](#link-object)] | **REQUIRED.** A list of references to other documents. | ### Additional Field Information diff --git a/catalog-spec/json-schema/catalog-core.json b/catalog-spec/json-schema/catalog-core.json index 503d7a66f..dec52cf9d 100644 --- a/catalog-spec/json-schema/catalog-core.json +++ b/catalog-spec/json-schema/catalog-core.json @@ -99,9 +99,18 @@ "summaries": { "type": "object", "additionalProperties": { - "oneOf": [ + "anyOf": [ { - "title": "Stats", + "title": "JSON Schema", + "type": "object", + "allOf": [ + { + "$ref": "http://json-schema.org/draft-07/schema" + } + ] + }, + { + "title": "Range", "type": "object", "required": [ "minimum", @@ -129,7 +138,7 @@ "type": "array", "minItems": 1, "items": { - "description": "Any data type could occur." + "description": "For each field only the original data type of the property can occur (except for arrays), but we can't validate that in JSON Schema yet. See the sumamry description in the STAC specification for details." } } ] diff --git a/collection-spec/collection-spec.md b/collection-spec/collection-spec.md index f4e331c78..203eb8b1e 100644 --- a/collection-spec/collection-spec.md +++ b/collection-spec/collection-spec.md @@ -16,7 +16,8 @@ - [Link Object](#link-object) - [Relation types](#relation-types) - [Asset Object](#asset-object) - - [Stats Object](#stats-object) + - [Range Object](#range-object) + - [JSON Schema Object](#json-schema-object) - [Media Type for STAC Collections](#media-type-for-stac-collections) - [Standalone Collections](#standalone-collections) @@ -54,7 +55,7 @@ specified in [*OGC API - Features*](https://ogcapi.ogc.org/features/), but they | license | string | **REQUIRED.** Collection's license(s), either a SPDX [License identifier](https://spdx.org/licenses/), `various` if multiple licenses apply or `proprietary` for all other cases. | | providers | \[[Provider Object](#provider-object)] | A list of providers, which may include all organizations capturing or processing the data or the hosting provider. Providers should be listed in chronological order with the most recent provider being the last element of the list. | | extent | [Extent Object](#extent-object) | **REQUIRED.** Spatial and temporal extents. | -| summaries | Map | STRONGLY RECOMMENDED. A map of property summaries, either a set of values or statistics such as a range. | +| summaries | Map | STRONGLY RECOMMENDED. A map of property summaries, either a set of values, a range of values or a [JSON Schema](https://json-schema.org). | | links | \[[Link Object](#link-object)] | **REQUIRED.** A list of references to other documents. | | assets | Map | Dictionary of asset objects that can be downloaded, each with a unique key. | @@ -95,12 +96,12 @@ Summaries help to fully define Collections, especially if they don't link to any build tailored user interfaces for querying the data, by presenting the potential values that are available. Summaries should summarize all values in every Item underneath the collection, including in any nested sub-Catalogs. -A summary for a field can be specified in two ways: +A summary for a field can be specified in three ways: 1. A set of all distinct values in an array: The set of values must contain at least one element and it is strongly recommended to list all values. If the field summarizes an array (e.g. [`instruments`](../item-spec/common-metadata.md#instrument)), the field's array elements of each Item must be merged to a single array with unique elements. -2. Statistics in a [Stats Object](#stats-object): Statistics by default only specify the range (minimum and maximum values), +2. A Range in a [Range Object](#range-object): Statistics by default only specify the range (minimum and maximum values), but can optionally be accompanied by additional statistical values. The range specified by the `minimum` and `maximum` properties can specify the potential range of values, but it is recommended to be as precise as possible. @@ -281,9 +282,9 @@ or streamed. The definition provided here, at the Collection level, is the same | type | string | [Media type](../item-spec/item-spec.md#asset-media-type) of the asset. See the [common media types](../best-practices.md#common-media-types-in-stac) in the best practice doc for commonly used asset types. | | roles | \[string] | The [semantic roles](../item-spec/item-spec.md#asset-role-types) of the asset, similar to the use of `rel` in links. | -### Stats Object +### Range Object -For a good understanding of the summarized field, statistics can be added. +For summaries with a lot of continuous values, statistics can be added. By default, only ranges with a minimum and a maximum value can be specified. Ranges can be specified for [ordinal](https://en.wikipedia.org/wiki/Level_of_measurement#Ordinal_scale) values only, which means they need to have a rank order. @@ -295,6 +296,14 @@ Implementors are free to add other derived statistical values to the object, for | minimum | number\|string | **REQUIRED.** Minimum value. | | maximum | number\|string | **REQUIRED.** Maximum value. | +### JSON Schema Object + +For a full understanding of the summarized field, a JSON Schema can be added for each summarized field. +This allows very fine-grained information for each field and each value as JSON Schema is also extensible. + +It is recommended to use [JSON Schema draft-07](https://json-schema.org/specification-links.html#draft-7) +to align with the JSON Schemas provided by STAC. + ## Media Type for STAC Collections A STAC Collection is a JSON file ([RFC 8259](https://tools.ietf.org/html/rfc8259)), and thus should use the diff --git a/examples/collection-only/collection.json b/examples/collection-only/collection.json index 9a8da7c35..3d394e897 100644 --- a/examples/collection-only/collection.json +++ b/examples/collection-only/collection.json @@ -78,9 +78,11 @@ "minimum": 6.78, "maximum": 89.9 }, - "sci:citation": [ - "Copernicus Sentinel data [Year]" - ], + "sci:citation": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "string", + "pattern": "Copernicus Sentinel data \\d{4}" + }, "gsd": [ 10, 30, diff --git a/package.json b/package.json index a787cad3e..c6bac44d1 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "publish-schemas": "node .circleci/publish-schemas.js" }, "dependencies": { + "fs-extra": "^8.1.0", "gh-pages": "^3.0.0", "klaw-sync": "^6.0.0", "remark-cli": "^8.0.0", @@ -21,6 +22,6 @@ "remark-preset-lint-markdown-style-guide": "^3.0.0", "remark-preset-lint-recommended": "^4.0.0", "remark-validate-links": "^10.0.0", - "stac-node-validator": "^1.0.1" + "stac-node-validator": "^1.1.0" } } diff --git a/schema.json b/schema.json deleted file mode 100644 index e69de29bb..000000000