From b03e651090444078944bdc70a9bf301009e62c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Fri, 17 Jan 2025 01:57:30 -0800 Subject: [PATCH] new schema format/spec --- SCHEMA-SPEC.md | 10 +- schema/cargo.kdl | 172 ++++++++++++ schema/ksl-schema.kdl | 613 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 794 insertions(+), 1 deletion(-) create mode 100644 schema/cargo.kdl create mode 100644 schema/ksl-schema.kdl diff --git a/SCHEMA-SPEC.md b/SCHEMA-SPEC.md index 907c1af..d453edd 100644 --- a/SCHEMA-SPEC.md +++ b/SCHEMA-SPEC.md @@ -6,7 +6,7 @@ constrain the allowed semantics of a KDL document. This can be used for many purposes: documentation for users, automated verification, or even automated generation of bindings! -This document describes KDL Schema version `1.0.0`. It was released on September 11, 2021. +This document describes KDL Schema version `2.0.0`. It is unreleased. ## The Formal Schema @@ -39,6 +39,14 @@ None. * `tag-names` (optional): [Validations](#validation-nodes) to apply to the _names_ of tags of child nodes. * `other-tags-allowed` (optional): Whether to allow node tags other than the ones explicitly listed here. Defaults to `#false`. +#### Example + +```kdl +document { + +} +``` + ### `info` node The `info` node describes the schema itself. diff --git a/schema/cargo.kdl b/schema/cargo.kdl new file mode 100644 index 0000000..f5497ee --- /dev/null +++ b/schema/cargo.kdl @@ -0,0 +1,172 @@ +@kdl:schema "https://github.com/kdl-org/kdl/blob/main/schema/kdl-schema.kdl" + +metadata { + // TODO: update this link when we're ready to release something. + link "https://github.com/kdl-org/kdl/blob/main/schema/cargo.kdl" rel=self + title "Cargo Schema" lang=en + description "KDL-based translation of the Cargo.toml schema." lang=en + author "Kat Marchán" { + link "https://github.com/zkat" rel=self + } + link "https://github.com/kdl-org/kdl" rel=documentation + link "https://doc.rust-lang.org/cargo/reference/manifest.html" rel=documentation + license "Creative Commons Attribution-ShareAlike 4.0 International License" spdx=CC-BY-SA-4.0 { + link "https://creativecommons.org/licenses/by-sa/4.0/" lang=en + } +} + +children { + node package title="Describes a package" { + children { + node name title="The name of the package" { + required + arg { + type string + pattern #"^[a-zA-Z0-0\-_]+$"# + } + } + node version title="The version of the package." { + arg { + type string + // From https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string + pattern #"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"# + } + } + node authors title="The authors of the package." { + repeatable + args { + distinct + type string + } + children { + node - { + repeatable + arg title="Name" { + type string + } + prop email title="Email address" { + type string + format email + } + prop about title="Brief note about author (role, etc)" { + type string + } + } + } + } + node edition title="The Rust edition." { + arg { + type string + enum "2015" "2018" "2021" "2024" + } + } + node rust-version title="The minimal supported Rust version." { + arg { + type string + } + } + node description title="A description of the package." { + arg { + type string + } + } + node documentation title="URL of the package documentation." { + arg { + type string + format url + } + } + node readme title="Path to the package’s README file." { + arg { + type string #boolean + } + } + node homepage title="URL of the package homepage." { + arg { + type string + format url + } + } + node repository title="URL of the package source repository." { + arg { + type string + format url + } + } + node license title="The package license." { + arg { + type string + } + } + node license-file title="Path to the text of the license." { + arg { + type string + } + } + node keywords title="Keywords for the package." { + args { + type string + // No pattern because keyword restrictions are only on + // crates.io + } + } + node categories title="Categories of the package." { + args { + type string + // No pattern because category restrictions are only on + // crates.io + } + } + node workspace title="Path to the workspace for the package." { + arg { + type string + } + } + node build title="Path to the package build script." { + arg { + type string boolean + } + } + node links title="Name of the native library the package links with." { + arg { + type string + } + } + node exclude title="Files to exclude when publishing." { + args { + type string + } + } + node include title="Files to include when publishing." { + args { + type string + } + } + node publish title="Can be used to prevent publishing the package." { + // TODO: This is a good example of where we might need smarter + // comstraints ("either a single boolean, or 1+ strings") + args { + type string boolean + } + ] + node metadata title="Extra settings for external tools." { + repeat + args + props { + allow-others + } + } + node default-run title="The default binary to run by cargo run." { + arg { + type string + } + } + node no-autolib title="Disables library auto discovery." + node no-autobins title="Disables binary auto discovery." + node no-autoexamples title="Disables example auto discovery." + node no-autotests title="Disables test auto discovery." + node no-autobenches title="Disables bench auto discovery." + node resolver title="Sets the dependency resolver to use." + } + } +} diff --git a/schema/ksl-schema.kdl b/schema/ksl-schema.kdl new file mode 100644 index 0000000..ef3dd71 --- /dev/null +++ b/schema/ksl-schema.kdl @@ -0,0 +1,613 @@ +// TODO: +// * examples +// * dependentRequired +// * dependentSchema +// * if-then-else +// * composition (anyOf, allOf, oneOf, not, etc: https://json-schema.org/understanding-json-schema/reference/combining) +// * followed-by (I think this might be useful: declaring relationships between children) +// * requires (a more general-purpose version of followed-by that uses kpath+children?) + +// simplify everything? If you want to deprecate an argument or option, you have +// to make a new node, basically. Which seems sensible, tbh. But might be too +// strict. + +@ksl:schema "https://github.com/kdl-org/kdl/blob/main/examples/ksl-schema.kdl" + +metadata { + // TODO: update this link when we're ready to release something. + id "https://github.com/kdl-org/kdl/blob/main/examples/ksl-schema.kdl" + title "KDL Schema" lang=en + description "KDL Schema schema using KDL Schema" lang=en + author "Kat Marchán" { + link "https://github.com/zkat" rel=self + } + contributor "Lars Willighagen" { + link "https://github.com/larsgw" rel=self + } + link "https://github.com/kdl-org/kdl" rel=documentation + license "Creative Commons Attribution-ShareAlike 4.0 International License" spdx=CC-BY-SA-4.0 { + link "https://creativecommons.org/licenses/by-sa/4.0/" lang=en + } + published "2021-08-31" + modified "2021-09-01" +} + +children { + node metadata title="Schema metadata" description="Contains metadata about the schema itself." { + required + children { + node id title="Schema identifier" description="The unique identifier for this schema. MUST be a valid URL/IRL. Implementations MAY attempt to visit it, but MUST NOT assume it is valid." { + arg { + type string + format url irl + } + } + node title title="Schema title" description="The title of the schema or the format it describes" { + arg title="The title text" { + type string + } + prop lang id=metadata-lang title="Title language" description="The (human) language of the text" { + type string + } + } + node description title="Schema description" description="A description of the schema or the format it validates, which may include its purposes, its usage, and even examples." { + arg title="Description text" { + type string + } + prop title="Description language" { + @ksl:ref metadata-lang + } + } + node author id=metadata-author title="Schema author" description="An author for the schema" { + repeatable + arg id=metadata-person-name description="Person name" { + optional + type string + } + prop orcid id=metadata-orcid description="The ORCID of the person" { + type string + pattern #"\d{4}-\d{4}-\d{4}-\d{4}"# + } + children { + node link { + @ksl:ref metadata-link + } + } + } + node contributor title="Schema contributor" description="A contributor to the schema who is not considered an author." { + @ksl:ref metadata-author + } + node link id=metadata-link title="External link" description="Link to an external resource of some sort, such as the schema itself (`rel=self`) or documentation (`rel=documentation`). Implementations MAY visit the URL, but MUST NOT assume it is valid." { + repeatable + arg title="Link URL" description="A URL that the link points to" { + type string + format url irl + } + prop rel title="Link relationship" description="The relation between the current entity and the URL." { + required + type string + enum self documentation + } + prop title="Link language" description="Language of the destination document." { + @ksl:ref metadata-lang + } + } + node license title="Schema license" description="The license(s) that the schema is licensed under" { + repeatable + arg description="Name of the used license" { + type string + } + prop spdx description="An SPDX license identifier" { + type string + // TODO: validation? + } + children { + node link { + @ksl:ref metadata-link + } + } + } + node published title="Schema publication date" description="When the schema was published" { + arg title="Publication date" { + type string + format date date-time + } + } + node modified title="Schema modification date" description="When the schema was modified. If used multiple times, the most recent date will be considered 'latest'." { + repeatable + args title="Modification date" { + type string + format date date-time + } + } + node version title="Schema semver version" description="The version number of this version of the schema, in semver format." { + arg title="Semver version number" { + type string + pattern #"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"# + } + } + } + } + node definitions title="Inert validation definitions" description="An optional set of definitions that may be @ksl:referenced elsewhere in the schema. They will be inert (that is, not directly apply to the document) unless @ksl:referenced by another node." + node document { + @ksl:ref children-node + default { + node @ksl:schema title="Schema @ksl:reference" description=""" + Reference(s) for the schema(s) describing this document. They + MUST be properly-formatted URLs. Implementations MAY attempt to + visit them, but MUST NOT assume they are valid. + + If multiple URLs are provided, or if multiple `@ksl:schema` + nodes are present, ALL schemas MUST successfully validate in + order for the document to validate, unless `warn` is `#true`. + + In such a case, implementations SHOULD warn that validation has + failed and report which schema failed to pass--they SHOULD + include more details about what the specific failure was, but + MAY simply indicate that certain schema(s) failed to validate. + """ { + repeatable + prop warn { + type boolean + default #false + } + args { + min 1 + type string + format url irl + } + } + } + children { + node children id=children-node title="Node children" description="Validations and definitions used for all nodes in this scope. Children are only allowed on nodes (or the toplevel document) if at least one `children` node is present in their definitions." { + children { + node names title="Child node name validations" description="Validations to apply to all node names in this scope." { + @ksl:ref string-validations + repeatable + } + node allow-others title="Allow other children" description=""" + If present, allows child nodes in this scope other than the ones + explicitly listed and those allowed by `names`. + """ { + arg { + type boolean + default #true + } + } + node node title="A KDL node" description="A KDL node belonging either to the top-level document or to another `node`'s children." { + repeatable + arg title="Node name" description="The name of the node." { + type string + } + default description="All KDL Schema nodes will include these components. To prevent conflicts, they are all prefixed with `@ksl:`" { + prop @ksl:title title="Node title" description="Short descriptor for node." { + type string + } + prop @ksl:desc title="Node description" description="A longer description of this item's purpose and behavior." { + type string + } + prop @ksl:id title="Node identifier" description="A schema-unique ID/anchor for this node." { + type string + } + children { + node @ksl:title title="Short description" title="A short descriptor for node." { + arg { + type string + } + } + node @ksl:desc title="Long description" description="A longer description of this item's purpose and behavior." { + arg { + type string + } + } + node @ksl:ref title="Node @ksl:reference" description=""" + A @ksl:reference to a separately-defined node. + + Each `ref` child will be interpreted in order of + appearance. Any overlapping definitions will replace + preceding instances, with each subsequent `ref` + replacing any duplicate node components. + + The replacement rules are as follows, and apply recursively: + * node properties MUST by replaced by key. + * node arguments MUST be replaced by order of appearance. + * `prop` definitions MUST be replaced by key (their first argument) + * `arg` definitions MUST be replaced based on _order of + appearance_. That is, the first `arg` in @ksl:ref `B` till be + merged into the first `arg` in preceding @ksl:ref `A`. + * For all other components: + * If the definition specified is marked as + `repeatable`, then all definitions using that node + will be concatenated, with later `ref`s + concatenating definitions after the previous `ref`'s + definitions. + * If the definition is NOT marked as `repeatable`, + it will be replaced by subsequent `ref`s. + + Once all `ref` children are resolved, the containing + node's own items will override anything defined by + `ref`s, using the same rules as above (essentially, the + current node is treated as a 'final `ref`'). + + If both an ID argument and a `path` are provided, + the ID will take precedence and, if not found, fall + back to the path. For `id` and `path` children, + precedence is in order of appearance, regardless of + whether the child is an `id` or a `path`. + + If no items resolve into a valid @ksl:reference, + validation MUST error, unless the @ksl:ref is configured + as `optional`, in which case validation MAY warn, + but MUST NOT fail. + """ { + repeatable + arg title="ID @ksl:reference to another node" { + type string + optional + } + prop path title="KPath @ksl:reference to another node." { + type string + format kpath + } + prop base title="Base schema" description=""" + The schema to resolve @ksl:references against. If not + provided, the base schema SHALL be the one + defined in `metadata > id` for the current + schema. + + Relative schema @ksl:references SHALL be resolved + against `metadata > id`. + """ { + type string + format url-reference irl-reference + } + children { + node id title="ID @ksl:reference to another node." { + repeatable + arg { + type string + } + + } + node path title="KPath @ksl:reference to another node." { + repeatable + arg { + type string + format kpath + } + } + } + } + } + } + children { + // TODO: be more precise and clear about merge + override behavior. + // TODO: provide a way to "undefine" the defaults? + node default title="Definition default" description=""" + Any children of this node will be merged into the node + definition itself, as if written directly into the node. + Any conflicting definitions will be overriden by + definitions within the rest of the node. + """ { + children { + allow-others + } + } + node required title="Node is required" description="By default, all declared child nodes are optional. Including this option will require that this node always appear in its parent's children block." { + arg { + type boolean + default #true + } + } + node repeatable title="Node is repeatable" description="By default, each node in a `children` block may only appear once in its scope. When this option is present, the node will be allowed to have multiple instances within the same scope." { + prop min title="Minimum node count" description="Minimum number of repeated instances of this node that must appear in the same scope." { + arg { + gte 0 + type integer + } + } + prop max title="Maximum node count" description="Maximum numbers of repeated instances of this node that may appear in the same scope." { + arg { + gte 0 + type integer + } + } + } + node deprecated title="Mark node as deprecated" description="When present, this node will be considered a deprecated part of the API. You may optionally supply a message, and/or a @ksl:reference to a node that should be used instead." { + arg { + optional + type boolean + default #true + } + prop message title="Deprecation message" description="A helpful deprecation message that may explain why the node was deprecated and other information, such as when the node will be removed altogether. Users SHOULD use `by=` and `by-kpath` to specify what node this will be replaced with instead of including it in the `message` itself." { + type string + } + prop by title="Deprecated by this node `id`" { + type string + } + prop by-kpath title="Depreceated by this node KPath" { + type string + format kpath + } + } + node annotations "Node type annotations" description="Validations to apply specifically to arbitrary node type annotation names" { + @ksl:ref string-validations + repeatable + } + node prop title="Node property" description="A node property key/value pair. Properties declared with `prop` are always optional, unless marked as `required` or included in `props:required`" { + @ksl:ref value-validations + repeatable + arg description="The property key." { + type string + } + children description="Property-specific validations." { + node required description="Whether this property is required in the node." + } + } + node props description="Validations to apply to all properties of this node." { + @ksl:ref value-validations + children { + node names description="Validations to apply to all property names." { + @ksl:ref string-validations + } + node min title="Minimum property count" description="Minimum number of properties this node must have." { + arg { + gte 0 + type integer + } + } + node max title="Maximum property count" description="Maximum number of properties this node may have." { + arg { + gte 0 + type integer + } + } + node required title="List of required props" description=""" + List of property names that must be present on \ + the node. Individual `prop` nodes may specify \ + additional required properties beyond those \ + specified in this list. Properties listed here \ + which already have a `prop` node marked as \ + `required` are allowed, but are redundant. + """ { + args { + min 1 + type string + } + } + node allow-others title="Allow other properties" description="If present, allows other properties that don't match this validator." { + arg { + type boolean + default #true + } + } + } + } + node arg title="Single node argument" description=""" + Specifies validations for a single node argument. + + Each nth instance of this node will specify validations \ + for the corresponding nth instance of the arg. Every \ + specified `arg` is required, in the given order, unless \ + marked as `optional`. + """ { + @ksl:ref value-validations + repeatable + children { + node optional title="Argument is not required" description=""" + Whether this argument is optional. Specified \ + `arg`s are required by default. + + Note: `optional` only applies to *presence*: \ + an existing argument in an optional `arg` \ + \"slot\" that fails validation will fail \ + normally, even though it is optional. As such, \ + `optional` is only really useful if it is on the \ + last `arg`, or is only followed by optional \ + `arg`s. + """ { + arg { + type boolean + default #true + } + } + } + } + // TODO: add a feature that will let us specify that `args` + // MUST be after any existing `arg` nodes in the current + // scope. i.e. you can't do `node x { args; arg }` + node args title="Validations for all args" description="Specifies validations for all arguments. Can be used in conjunction with `arg`. If this node is not present, and if there are no `arg` nodes, no arguments will be allowed on the node at all" { + @ksl:ref value-validation + children { + // TODO: opportunity for mutual requirements here + node min title="Minimum argument count" description="Minimum number of arguments that must be present in a node. Must be less than or equal to `max`, if the latter is present." { + arg { + gte 0 + type integer + } + } + node max title="Maximum argument count" description="Maximum number of arguments that may be present in a node. Must be greater than or equal to `max`, if the latter is present." { + arg { + gte 0 + type integer + } + } + node distinct title="All arguments must be distinct" description="If present, all of this node's arguments need to be distinct values." { + arg { + type boolean + default #true + } + } + } + } + node children { + @ksl:ref children-node + } + } + } + } + } + } + } +} +definitions { + node string-validations id=string-validations description="String-related validations" { + @ksl:ref shared-validations + children { + node pattern title="Regex validations" description="EcmaScript-compatible Regex pattern or patterns to test string values against." { + args { + min 1 + type string + } + } + node min-length title="Minimum string length" description="Minimum length of the value, if it's a string." { + arg { + gte 0 + type integer + } + } + node max-length title="Maximum string length" description="Maximum length of the value, if it's a string." { + arg { + gte 0 + type integer + } + } + node format title="Value format" description=""" + Specifies the format of the value. + + Any supported type annotation from the KDL spec may be specified. It is up to implementations whether they validate this node. They SHOULD document the ones they support, if any. + + Any format that the implementation supports MUST be compliant with the specified reserved format in the KDL spec, and only apply it to the specified data types (e.g. `u8` can only apply to items of type `integer`, not to `string` or `number`). If the checked value is not of an applicable type, the implementation MUST skip applying this to the given type. It MAY choose to warn about skipping the format check. + + If a value specifies multiple `type`s, any `format`s are checked as usual against the matrix of compatible `type`/`format` values. + + Implementations MAY choose either error or simply warn about format violations. They SHOULD document the behavior, and MAY provide configuration for it. + """ + repeatable + args { + min 1 + type string + // https://json-schema.org/understanding-json-schema/reference/string.html#format + // TODO: Make sure this is up to date with the types listed in the spec. + enum date-time date time duration decimal currency country-2 country-3 \ + country-subdivision email idn-email hostname idn-hostname ipv4 ipv6 url \ + url-reference irl irl-reference url-template regex uuid kpath i8 i16 \ + i32 i64 i128 u8 u16 u32 u64 u128 isize usize f32 f64 decimal64 decimal128 + } + } + node media-type title="MIME type" description="MIME type of string value. May be applied to 'deserialized' data if value format is base64/base85 or some other stringly binary encoding." { + repeatable + args { + min 1 + type string + } + } + } + } + // Number-specific validations + node number-validations id=number-validations { + @ksl:ref shared-validations + children { + node div description="Only used for numeric values. Constrains them to be multiples of the given number(s)" { + repeatable + args { + min 1 + type number + } + } + node gt description="Only used for numeric values. Constrains them to be greater than the given number" { + arg { + type number + } + } + node gte description="Only used for numeric values. Constrains them to be greater than or equal to the given number" { + arg { + type number + } + } + node lt description="Only used for numeric values. Constrains them to be less than the given number" { + arg { + type number + } + } + node lte description="Only used for numeric values. Constrains them to be less than or equal to the given number" { + arg { + type number + } + } + } + } + // Validations shared across all types. + node shared-validations id=shared-validations { + children { + node type description="The type for this value. Multiple arguments signify a sum type." { + repeatable + args { + min 1 + type string + enum string boolean number integer #null + distinct + } + } + node const title="Specific value" description="Exact value that this value must match. Equivalent to a single-value `enum` validation." { + arg description="Constant value." + } + node enum title="Enumeration of values" description="An enumeration of possible values" { + repeatable + args description="Enumeration choices" { + min 1 + } + children description="Enumeration choices" { + node allow-others title="Allow other choices" description=""" + Whether other values than those explicitly enumerated + may be provided, so long as they pass other validations + in the node. + + While apparently redundant, this option may be useful in + cases where there's a set of suggested values, but + others are acceptable. This information can then be used + by tooling to e.g. suggest completion items. + """ { + arg { + type boolean + default #true + } + } + node - description="Enumeration choice" { + prop description description="Documentation for this enumerated item." + arg description="Enum value" + } + } + } + } + } + // General value validations + node value-validations id=value-validations { + @ksl:ref string-validations number-validations + children { + node annotations @ksl:title="Value type annotations" @ksl:desc="Validations for the type annotations that can be applied to this value." { + @ksl:ref string-validations + } + node default title="Default value" description="Sets a default value when optional. That is, it requires `optional` for `arg` nodes, and doesn't do anything useful if a `prop` is marked `required`, though it is not invalid to do so." { + arg + } + } + } + node node-validations id=node-validations description="Validations that can be applied to nodes themselves." { + children { + node min title="Minimum node instance count" description="Minimum number of instances of this node in its parent's children." { + // TODO: + arg { + gte 0 + type integer + } + } + node max title="Maximum node instance count" description="Maximum number of instances of this node in its parent's children." { + arg { + gte 0 + type integer + } + } + } + } +}