From 35761e6120466e81bb4f78be21510110b83e83ef Mon Sep 17 00:00:00 2001 From: Christian Siefkes Date: Fri, 27 Oct 2023 17:52:06 +0200 Subject: [PATCH 1/2] Extended bare key ranges to include all emojis. Also explain better what's allowed in bare keys and remove the emoji example, since (though it's possible) we don't advice using emojis as bare keys). --- toml.abnf | 7 +++++-- toml.md | 14 +++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/toml.abnf b/toml.abnf index 0446f8b6..580a1f13 100644 --- a/toml.abnf +++ b/toml.abnf @@ -58,8 +58,11 @@ unquoted-key-char =/ %xB2 / %xB3 / %xB9 / %xBC-BE ; superscript digits, fr unquoted-key-char =/ %xC0-D6 / %xD8-F6 / %xF8-37D ; non-symbol chars in Latin block unquoted-key-char =/ %x37F-1FFF ; exclude GREEK QUESTION MARK, which is basically a semi-colon unquoted-key-char =/ %x200C-200D / %x203F-2040 ; from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ -unquoted-key-char =/ %x2070-218F / %x2460-24FF ; include super-/subscripts, letterlike/numberlike forms, enclosed alphanumerics -unquoted-key-char =/ %x2C00-2FEF / %x3001-D7FF ; skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces +unquoted-key-char =/ %x2070-21FF / %x2300-24FF ; skip math operators +unquoted-key-char =/ %x25A0-268B / %x2690-2757 ; skip box drawing, block elements, and some yin-yang symbols +unquoted-key-char =/ %x2762-2767 / %x2776-27E5 ; skip some Dingbat punctuation +unquoted-key-char =/ %x2801-297F ; skip some math brackets and arrows, and braille blank +unquoted-key-char =/ %x2B00-2FFF / %x3001-D7FF ; skip various math operators and symbols, and ideographic space unquoted-key-char =/ %xF900-FDCF / %xFDF0-FFFD ; skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode) unquoted-key-char =/ %x10000-EFFFF ; all chars outside BMP range, excluding Private Use planes (F0000-10FFFF) diff --git a/toml.md b/toml.md index 8862f0ea..0c640690 100644 --- a/toml.md +++ b/toml.md @@ -104,10 +104,15 @@ first = "Tom" last = "Preston-Werner" # INVALID A key may be either bare, quoted, or dotted. **Bare keys** may contain any letter-like or number-like Unicode character from -any Unicode script, as well as ASCII digits, dashes and underscores. -Punctuation, spaces, arrows, box drawing and private use characters are not -allowed. Note that bare keys are allowed to be composed of only ASCII digits, -e.g. 1234, but are always interpreted as strings. +any Unicode script, as well as digits, dashes and underscores. Various symbols +(such as emojis or arrows) and punctuation marks from outside the ASCII range +are also allowed, while whitespace and private use characters are forbidden. It +is advisable to use only words (in arbitrary languages) as bare keys, as they +are generally accepted, while not all symbols and punctuation marks are. If you +want to use a bare key made up of several words, use a suitable separator +character (such as a underscore or hyphen) between the words, as spaces are not +allowed. Note that bare keys are allowed to be composed of only digits, e.g. +1234, but are always interpreted as strings. ℹ️ The exact ranges of allowed code points can be found in the [ABNF grammar file][abnf]. @@ -118,7 +123,6 @@ bare_key = "value" bare-key = "value" 1234 = "value" Fuß = "value" -😂 = "value" 汉语大字典 = "value" 辭源 = "value" பெண்டிரேம் = "value" From 673bc59239f6faa436ea64f6d9bd9a58cff66b77 Mon Sep 17 00:00:00 2001 From: Christian Siefkes Date: Fri, 27 Oct 2023 18:20:46 +0200 Subject: [PATCH 2/2] Fix typo. --- toml.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toml.md b/toml.md index 0c640690..33bde080 100644 --- a/toml.md +++ b/toml.md @@ -110,7 +110,7 @@ are also allowed, while whitespace and private use characters are forbidden. It is advisable to use only words (in arbitrary languages) as bare keys, as they are generally accepted, while not all symbols and punctuation marks are. If you want to use a bare key made up of several words, use a suitable separator -character (such as a underscore or hyphen) between the words, as spaces are not +character (such as an underscore or hyphen) between the words, as spaces are not allowed. Note that bare keys are allowed to be composed of only digits, e.g. 1234, but are always interpreted as strings.