Skip to content

Commit

Permalink
unquote should check for invalid UTF-8 code points (#3595)
Browse files Browse the repository at this point in the history
Quoted tokens can contain both UTF-8 byte and code point literals
that should be interpreted when quoted. However, we need to check
that the interpreted literals are valid UTF-8 code points or not.
This now happens in unquote.

Signed-off-by: George Robinson <george.robinson@grafana.com>
  • Loading branch information
grobinson-grafana authored Nov 13, 2023
1 parent ce6efba commit f96ba1b
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
24 changes: 24 additions & 0 deletions matchers/parse/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ func TestMatchers(t *testing.T) {
name: "equals unicode emoji in quotes",
input: "{\"foo\"=\"🙂\"}",
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
}, {
name: "equals unicode emoji as bytes in quotes",
input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}",
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
}, {
name: "equals unicode emoji as code points in quotes",
input: "{\"foo\"=\"\\U0001f642\"}",
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
}, {
name: "equals unicode sentence in quotes",
input: "{\"foo\"=\"🙂bar\"}",
Expand Down Expand Up @@ -199,6 +207,10 @@ func TestMatchers(t *testing.T) {
name: "no unquoted escape sequences",
input: "{foo=bar\\n}",
error: "8:9: \\: invalid input: expected a comma or close brace",
}, {
name: "invalid unicode",
input: "{\"foo\"=\"\\xf0\\x9f\"}",
error: "7:17: \"\\xf0\\x9f\": invalid input",
}}

for _, test := range tests {
Expand Down Expand Up @@ -244,6 +256,14 @@ func TestMatcher(t *testing.T) {
name: "equals unicode emoji",
input: "{foo=🙂}",
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
}, {
name: "equals unicode emoji as bytes in quotes",
input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}",
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
}, {
name: "equals unicode emoji as code points in quotes",
input: "{\"foo\"=\"\\U0001f642\"}",
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
}, {
name: "equals unicode sentence",
input: "{foo=🙂bar}",
Expand Down Expand Up @@ -331,6 +351,10 @@ func TestMatcher(t *testing.T) {
name: "two or more returns error",
input: "foo=bar,bar=baz",
error: "expected 1 matcher, found 2",
}, {
name: "invalid unicode",
input: "foo=\"\\xf0\\x9f\"",
error: "4:14: \"\\xf0\\x9f\": invalid input",
}}

for _, test := range tests {
Expand Down
11 changes: 10 additions & 1 deletion matchers/parse/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
package parse

import (
"errors"
"fmt"
"strconv"
"unicode/utf8"
)

type tokenKind int
Expand Down Expand Up @@ -82,7 +84,14 @@ func (t token) isOneOf(kinds ...tokenKind) bool {
// unquote the value in token. If unquoted returns it unmodified.
func (t token) unquote() (string, error) {
if t.kind == tokenQuoted {
return strconv.Unquote(t.value)
unquoted, err := strconv.Unquote(t.value)
if err != nil {
return "", err
}
if !utf8.ValidString(unquoted) {
return "", errors.New("quoted string contains invalid UTF-8 code points")
}
return unquoted, nil
}
return t.value, nil
}
Expand Down

0 comments on commit f96ba1b

Please sign in to comment.