Skip to content

Commit

Permalink
Use strings.Builder in lexer (#438)
Browse files Browse the repository at this point in the history
Replace all string building operations in the lexer with
strings.Builder. Doing so shows significant performance improvements.
BurntSushi still has a slight edge in CPU performance, but there's still
much work to do on memory performance.

name                       old time/op    new time/op    delta
ParseToml-2                   311µs ± 0%     273µs ± 3%  -12.29%  (p=0.008 n=5+5)
UnmarshalToml-2               386µs ± 4%     349µs ± 3%   -9.63%  (p=0.008 n=5+5)
UnmarshalBurntSushiToml-2     368µs ± 8%     341µs ± 2%     ~     (p=0.056 n=5+5)

name                       old alloc/op   new alloc/op   delta
ParseToml-2                   132kB ± 0%     118kB ± 0%  -11.07%  (p=0.008 n=5+5)
UnmarshalToml-2               147kB ± 0%     133kB ± 0%   -9.92%  (p=0.008 n=5+5)
UnmarshalBurntSushiToml-2    82.6kB ± 0%    82.6kB ± 0%     ~     (p=1.000 n=5+5)

name                       old allocs/op  new allocs/op  delta
ParseToml-2                   3.19k ± 0%     1.91k ± 0%  -40.19%  (p=0.008 n=5+5)
UnmarshalToml-2               4.03k ± 0%     2.75k ± 0%  -31.83%  (p=0.008 n=5+5)
UnmarshalBurntSushiToml-2     1.73k ± 0%     1.73k ± 0%     ~     (all equal)

Out of curiosity, I benchmarked the results of updating each function
along the way to see how each change effected the overall performance:

name \ time/op             master       lexKey       lexLitStringAsString  lexStringAsString
ParseToml-2                 311µs ± 0%   299µs ± 1%            290µs ± 3%         273µs ± 3%
UnmarshalToml-2             386µs ± 4%   381µs ± 2%            364µs ± 2%         349µs ± 3%
UnmarshalBurntSushiToml-2   368µs ± 8%   341µs ± 2%            345µs ± 5%         341µs ± 2%

name \ alloc/op            master       lexKey       lexLitStringAsString  lexStringAsString
ParseToml-2                 132kB ± 0%   132kB ± 0%            125kB ± 0%         118kB ± 0%
UnmarshalToml-2             147kB ± 0%   146kB ± 0%            140kB ± 0%         133kB ± 0%
UnmarshalBurntSushiToml-2  82.6kB ± 0%  82.6kB ± 0%           82.6kB ± 0%        82.6kB ± 0%

name \ allocs/op           master       lexKey       lexLitStringAsString  lexStringAsString
ParseToml-2                 3.19k ± 0%   2.86k ± 0%            2.49k ± 0%         1.91k ± 0%
UnmarshalToml-2             4.03k ± 0%   3.70k ± 0%            3.33k ± 0%         2.75k ± 0%
UnmarshalBurntSushiToml-2   1.73k ± 0%   1.73k ± 0%            1.73k ± 0%         1.73k ± 0%

Benchmarks were run from the benchmark/ directory using:

go test -bench=.*Toml -benchmem -count=5 ./...
  • Loading branch information
moorereason authored Sep 12, 2020
1 parent b76eb62 commit 5c94d86
Showing 1 changed file with 53 additions and 47 deletions.
100 changes: 53 additions & 47 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func (l *tomlLexer) lexComma() tomlLexStateFn {
// Parse the key and emits its value without escape sequences.
// bare keys, basic string keys and literal string keys are supported.
func (l *tomlLexer) lexKey() tomlLexStateFn {
growingString := ""
var sb strings.Builder

for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
if r == '"' {
Expand All @@ -315,7 +315,9 @@ func (l *tomlLexer) lexKey() tomlLexStateFn {
if err != nil {
return l.errorf(err.Error())
}
growingString += "\"" + str + "\""
sb.WriteString("\"")
sb.WriteString(str)
sb.WriteString("\"")
l.next()
continue
} else if r == '\'' {
Expand All @@ -324,41 +326,45 @@ func (l *tomlLexer) lexKey() tomlLexStateFn {
if err != nil {
return l.errorf(err.Error())
}
growingString += "'" + str + "'"
sb.WriteString("'")
sb.WriteString(str)
sb.WriteString("'")
l.next()
continue
} else if r == '\n' {
return l.errorf("keys cannot contain new lines")
} else if isSpace(r) {
str := " "
var str strings.Builder
str.WriteString(" ")

// skip trailing whitespace
l.next()
for r = l.peek(); isSpace(r); r = l.peek() {
str += string(r)
str.WriteRune(r)
l.next()
}
// break loop if not a dot
if r != '.' {
break
}
str += "."
str.WriteString(".")
// skip trailing whitespace after dot
l.next()
for r = l.peek(); isSpace(r); r = l.peek() {
str += string(r)
str.WriteRune(r)
l.next()
}
growingString += str
sb.WriteString(str.String())
continue
} else if r == '.' {
// skip
} else if !isValidBareChar(r) {
return l.errorf("keys cannot contain %c character", r)
}
growingString += string(r)
sb.WriteRune(r)
l.next()
}
l.emitWithValue(tokenKey, growingString)
l.emitWithValue(tokenKey, sb.String())
return l.lexVoid
}

Expand All @@ -383,7 +389,7 @@ func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
}

func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
growingString := ""
var sb strings.Builder

if discardLeadingNewLine {
if l.follow("\r\n") {
Expand All @@ -397,14 +403,14 @@ func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNe
// find end of string
for {
if l.follow(terminator) {
return growingString, nil
return sb.String(), nil
}

next := l.peek()
if next == eof {
break
}
growingString += string(l.next())
sb.WriteRune(l.next())
}

return "", errors.New("unclosed string")
Expand Down Expand Up @@ -438,7 +444,7 @@ func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
// Terminator is the substring indicating the end of the token.
// The resulting string does not include the terminator.
func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
growingString := ""
var sb strings.Builder

if discardLeadingNewLine {
if l.follow("\r\n") {
Expand All @@ -451,7 +457,7 @@ func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine,

for {
if l.follow(terminator) {
return growingString, nil
return sb.String(), nil
}

if l.follow("\\") {
Expand All @@ -469,61 +475,61 @@ func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine,
l.next()
}
case '"':
growingString += "\""
sb.WriteString("\"")
l.next()
case 'n':
growingString += "\n"
sb.WriteString("\n")
l.next()
case 'b':
growingString += "\b"
sb.WriteString("\b")
l.next()
case 'f':
growingString += "\f"
sb.WriteString("\f")
l.next()
case '/':
growingString += "/"
sb.WriteString("/")
l.next()
case 't':
growingString += "\t"
sb.WriteString("\t")
l.next()
case 'r':
growingString += "\r"
sb.WriteString("\r")
l.next()
case '\\':
growingString += "\\"
sb.WriteString("\\")
l.next()
case 'u':
l.next()
code := ""
var code strings.Builder
for i := 0; i < 4; i++ {
c := l.peek()
if !isHexDigit(c) {
return "", errors.New("unfinished unicode escape")
}
l.next()
code = code + string(c)
code.WriteRune(c)
}
intcode, err := strconv.ParseInt(code, 16, 32)
intcode, err := strconv.ParseInt(code.String(), 16, 32)
if err != nil {
return "", errors.New("invalid unicode escape: \\u" + code)
return "", errors.New("invalid unicode escape: \\u" + code.String())
}
growingString += string(rune(intcode))
sb.WriteRune(rune(intcode))
case 'U':
l.next()
code := ""
var code strings.Builder
for i := 0; i < 8; i++ {
c := l.peek()
if !isHexDigit(c) {
return "", errors.New("unfinished unicode escape")
}
l.next()
code = code + string(c)
code.WriteRune(c)
}
intcode, err := strconv.ParseInt(code, 16, 64)
intcode, err := strconv.ParseInt(code.String(), 16, 64)
if err != nil {
return "", errors.New("invalid unicode escape: \\U" + code)
return "", errors.New("invalid unicode escape: \\U" + code.String())
}
growingString += string(rune(intcode))
sb.WriteRune(rune(intcode))
default:
return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
}
Expand All @@ -534,7 +540,7 @@ func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine,
return "", fmt.Errorf("unescaped control character %U", r)
}
l.next()
growingString += string(r)
sb.WriteRune(r)
}

if l.peek() == eof {
Expand Down Expand Up @@ -769,19 +775,19 @@ func init() {
// /!\ also matches the empty string
//
// Example matches:
//1979-05-27T07:32:00Z
//1979-05-27T00:32:00-07:00
//1979-05-27T00:32:00.999999-07:00
//1979-05-27 07:32:00Z
//1979-05-27 00:32:00-07:00
//1979-05-27 00:32:00.999999-07:00
//1979-05-27T07:32:00
//1979-05-27T00:32:00.999999
//1979-05-27 07:32:00
//1979-05-27 00:32:00.999999
//1979-05-27
//07:32:00
//00:32:00.999999
// 1979-05-27T07:32:00Z
// 1979-05-27T00:32:00-07:00
// 1979-05-27T00:32:00.999999-07:00
// 1979-05-27 07:32:00Z
// 1979-05-27 00:32:00-07:00
// 1979-05-27 00:32:00.999999-07:00
// 1979-05-27T07:32:00
// 1979-05-27T00:32:00.999999
// 1979-05-27 07:32:00
// 1979-05-27 00:32:00.999999
// 1979-05-27
// 07:32:00
// 00:32:00.999999
dateRegexp = regexp.MustCompile(`^(?:\d{1,4}-\d{2}-\d{2})?(?:[T ]?\d{2}:\d{2}:\d{2}(\.\d{1,9})?(Z|[+-]\d{2}:\d{2})?)?`)
}

Expand Down

0 comments on commit 5c94d86

Please sign in to comment.