Skip to content

Commit

Permalink
Allow parsing of \u0000 and \x00
Browse files Browse the repository at this point in the history
Previously the JSON parser was deliberately designed to choke
	if it encountered \x00 or \u0000, since those represent the NUL character,
	which is treated as the terminating character
	in any string passed from NPP to a plugin or vice versa.
However, I have decided that this limitation does not make much sense,
	because when JsonTools reformats JSON, it represents all control characters
	using the \u escape (so it is internally consistent and correct to represent NUL as \u0000),
	and thus most plugin commands will not cause problems when working with JSON that contains
	\u0000 or \x00.

WARNINGS:
1. JsonTools is still incapable of parsing text that contains a literal NUL character (as opposed to the \x00 or \u0000 escape sequences),
	because when Notepad++ sends the text of a file to a plugin, it truncates that text at the first NUL character.
2. The "Dump JSON string(s) as raw text" plugin command (and any other plugin commands that return the raw text of a JSON string),
	when used to dump a JSON string that contains \x00 or \u0000 escapes,
	will instead dump all the raw text in that string *up until the first \x00 or \u0000*,
	because JsonTools sends Notepad++ a string containing a literal NUL character, which Notepad++ treats as the end of the string.
  • Loading branch information
molsonkiko committed Nov 29, 2024
1 parent 7c6ce50 commit f3df446
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 68 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
### Fixed

1. When a file is [pretty-printed or compressed](/docs/README.md#the-basics) or [edited with RemesPath](/docs/RemesPath.md#editing-with-assignment-expressions), the caret will be scrolled into view. Previously, if the file contained very long lines and word wrapping was turned off, [the user might have to manually scroll to the left](https://github.com/molsonkiko/JsonToolsNppPlugin/issues/84) after pretty-printing or compressing.
2. NUL characters (when rendered as `\u0000` or `\x00`) can now be parsed. Previously [they raised a fatal error](https://github.com/molsonkiko/JsonToolsNppPlugin/issues/85).

## [8.2.0] - 2024-11-09

Expand Down
1 change: 1 addition & 0 deletions JsonToolsNppPlugin/JSONTools/JNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ public static void CharToSb(StringBuilder sb, char c)
{
case '\\': sb.Append("\\\\" ); break;
case '"': sb.Append("\\\"" ); break;
case '\x00': sb.Append("\\u0000"); break;
case '\x01': sb.Append("\\u0001"); break;
case '\x02': sb.Append("\\u0002"); break;
case '\x03': sb.Append("\\u0003"); break;
Expand Down
5 changes: 1 addition & 4 deletions JsonToolsNppPlugin/JSONTools/JsonParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ public string TranslateMessageIfDesired(bool translated)
// FATAL messages
case JsonLintType.FATAL_EXPECTED_JAVASCRIPT_COMMENT: return Translator.TranslateLintMessage(translated, lintType, "Expected JavaScript comment after '/'");
case JsonLintType.FATAL_HEXADECIMAL_TOO_SHORT: return TryTranslateWithOneParam(translated, lintType, "Could not find valid hexadecimal of length {0}", param1);
case JsonLintType.FATAL_NUL_CHAR: return Translator.TranslateLintMessage(translated, lintType, "'\\x00' is the null character, which is illegal in JsonTools");
case JsonLintType.FATAL_UNTERMINATED_KEY: return Translator.TranslateLintMessage(translated, lintType, "Unterminated object key");
case JsonLintType.FATAL_INVALID_STARTSWITH_n: return Translator.TranslateLintMessage(translated, lintType, "Expected literal starting with 'n' to be null or nan");
case JsonLintType.FATAL_PLUS_OR_MINUS_AT_EOF: return TryTranslateWithOneParam(translated, lintType, "'{0}' sign at end of document", param1);
Expand Down Expand Up @@ -366,7 +365,7 @@ public enum JsonLintType : short
/// param1 = expected_hex_length (int)
/// </summary>
FATAL_HEXADECIMAL_TOO_SHORT = FATAL_EXPECTED_JAVASCRIPT_COMMENT + 1,
FATAL_NUL_CHAR = FATAL_EXPECTED_JAVASCRIPT_COMMENT + 2,

FATAL_UNTERMINATED_KEY = FATAL_EXPECTED_JAVASCRIPT_COMMENT + 3,
FATAL_INVALID_STARTSWITH_n = FATAL_EXPECTED_JAVASCRIPT_COMMENT + 4,
/// <summary>
Expand Down Expand Up @@ -888,8 +887,6 @@ private bool HandleCharErrors(int c, string inp, int ii)
{
if (c == '\n')
return HandleError(JsonLintType.BAD_STRING_CONTAINS_NEWLINE, inp, ii, ParserState.BAD);
if (c == 0)
return HandleError(JsonLintType.FATAL_NUL_CHAR, inp, ii);
if (c < 0)
return true;
return HandleError(JsonLintType.OK_CONTROL_CHAR, inp, ii);
Expand Down
6 changes: 4 additions & 2 deletions JsonToolsNppPlugin/JSONTools/YamlDumper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ private string EscapeBackslash(string s)
{
switch (c)
{
case '\x00': sb.Append("\\0"); break;
case '\n': sb.Append("\\n"); break;
case '\t': sb.Append(@"\\t"); break;
case '\\': sb.Append(@"\\"); break;
Expand All @@ -83,6 +84,7 @@ private string EscapeBackslashKey(string s)
{
switch (c)
{
case '\x00': sb.Append("\\0"); break;
case '\n': sb.Append("\\n"); break;
case '\t': sb.Append(@"\\t"); break;
case '\\': sb.Append(@"\\"); break;
Expand Down Expand Up @@ -112,7 +114,7 @@ private string YamlKeyRepr(string k)
// a YAML parser will recognize that it is not actually a number.
return "'" + k + "'";
}
Regex forbiddenKeyChars = new Regex(@"[\t :]");
Regex forbiddenKeyChars = new Regex(@"[\t :\x00]");
if (forbiddenKeyChars.IsMatch(k))
{
// '\t', ' ', and ':' are all illegal inside a YAML key. We will escape those out
Expand Down Expand Up @@ -141,7 +143,7 @@ private string YamlValRepr(JNode v)
{
return $"\"{strv}\"";
}
Regex backslash = new Regex("([\\\\:\"'\r\t\n\f\b])");
Regex backslash = new Regex("([\\\\:\"'\r\t\n\f\b\x00])");
if (backslash.IsMatch(strv))
{
return EscapeBackslash(strv);
Expand Down
4 changes: 2 additions & 2 deletions JsonToolsNppPlugin/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@
// Build Number
// Revision
//
[assembly: AssemblyVersion("8.2.0.1")]
[assembly: AssemblyFileVersion("8.2.0.1")]
[assembly: AssemblyVersion("8.2.0.2")]
[assembly: AssemblyFileVersion("8.2.0.2")]
12 changes: 8 additions & 4 deletions JsonToolsNppPlugin/Tests/JsonParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -947,7 +947,7 @@ public static bool TestLinter()
"Whitespace characters other than ' ', '\\t', '\\r', and '\\n' are only allowed in JSON5",
}),
("{foo: 1, $baz: 2, 草: 2, _quЯ: 3, \\ud83d\\ude00_$\\u1ed3: 4, a\\uff6acf: 5, \\u0008\\u000a: 6, f\\u0000o: 1}",
"{\"foo\": 1, \"$baz\": 2, \"\": 2, \"_quЯ\": 3, \"😀_$ồ\": 4, \"aェcf\": 5, \"\\b\\n\": 6}",
"{\"foo\": 1, \"$baz\": 2, \"\": 2, \"_quЯ\": 3, \"😀_$ồ\": 4, \"aェcf\": 5, \"\\b\\n\": 6, \"f\\u0000o\": 1}",
new string[]{
"Unquoted keys are only allowed in JSON5",
"Unquoted keys are only allowed in JSON5",
Expand All @@ -959,7 +959,7 @@ public static bool TestLinter()
"Control characters (ASCII code less than 0x20) are disallowed inside strings under the strict JSON specification",
"String literal contains newline", // the \u000a in \\b\\u000a is secretly a newline
"Unquoted keys are only allowed in JSON5",
"'\\x00' is the null character, which is illegal in JsonTools"
"Control characters (ASCII code less than 0x20) are disallowed inside strings under the strict JSON specification"
}),
("[1,\"b\\\nb\\\rb\\\r\nb\"]", "[1, \"bbbb\"]",
new string[]
Expand All @@ -975,8 +975,12 @@ public static bool TestLinter()
"Escaped newline characters are only allowed in JSON5",
"Escaped newline characters are only allowed in JSON5",
}),
("[\"a\\x00b\", 1]", "[\"a\"]", new string[]{"'\\x00' is the null character, which is illegal in JsonTools"}),
("[\"a\\u0000b\", 1]", "[\"a\"]", new string[]{"'\\x00' is the null character, which is illegal in JsonTools"}),
("[\"a\\x00b\", 1]", "[\"a\\u0000b\", 1]", new string[]
{
"Control characters (ASCII code less than 0x20) are disallowed inside strings under the strict JSON specification",
"\\x escapes are only allowed in JSON5",
}),
("[\"a\\u0000b\", 1]", "[\"a\\u0000b\", 1]", new string[]{"Control characters (ASCII code less than 0x20) are disallowed inside strings under the strict JSON specification"}),
("{\"\\1\\A\": \"\\7\\B\"}", "{\"1A\": \"7B\"}",
new string[]{
"Escaped char '1' is only allowed in JSON5",
Expand Down
2 changes: 2 additions & 0 deletions JsonToolsNppPlugin/Tests/YamlDumperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ public static bool Test()
"a:\n -\n - 1\n - 2.0\n -\n '3':\n - '5'\n'2': 6\n",
"nested iterables" },
new string[] { "{\"a\": \"a: b\"}", "a: \"a: b\"\n", "value contains colon" },
new string[] { "{\"a\\u0000\": \"a: b\"}", "\"a\\0\": \"a: b\"\n", "value contains colon and key contains NUL" },
new string[] { "{\"zwg\": \"a\\u0000b\"}", "zwg: \"a\\0b\"\n", "value contains NUL" },
new string[] { "{\"a: b\": \"a\"}", "\"a: b\": a\n", "key contains colon" },
new string[] { "{\"a\": \"RT @blah: MondayMo\\\"r\'ing\"}", "a: \'RT @blah: MondayMo\"r\'\'ing\'\n", "Value contains quotes and colon" },
new string[] { "{\"a\": \"a\\n\'big\'\\ndog\"}", "a: \"a\\n\'big\'\\ndog\"\n", "Value contains quotes and newline" },
Expand Down
Loading

0 comments on commit f3df446

Please sign in to comment.