Skip to content

Commit

Permalink
pkg/trace/agent: improve NormalizeTag (#2951)
Browse files Browse the repository at this point in the history
Improves the performance of `agent.NormalizeTag`

	name                      old time/op    new time/op    delta
	NormalizeTag/ok-4            129ns ± 0%      69ns ± 0%  -46.74%
	NormalizeTag/trim-4          167ns ± 0%      92ns ± 0%  -44.97%
	NormalizeTag/trim-both-4     197ns ± 0%     160ns ± 0%  -18.78%
	NormalizeTag/plenty-4        193ns ± 0%     147ns ± 0%  -23.83%
	NormalizeTag/more-4          261ns ± 0%     264ns ± 0%   +1.15%

	name                      old alloc/op   new alloc/op   delta
	NormalizeTag/ok-4             136B ± 0%       24B ± 0%  -82.35%
	NormalizeTag/trim-4           160B ± 0%       32B ± 0%  -80.00%
	NormalizeTag/trim-both-4      176B ± 0%       64B ± 0%  -63.64%
	NormalizeTag/plenty-4         160B ± 0%       64B ± 0%  -60.00%
	NormalizeTag/more-4           176B ± 0%      128B ± 0%  -27.27%

	name                      old allocs/op  new allocs/op  delta
	NormalizeTag/ok-4             3.00 ± 0%      2.00 ± 0%  -33.33%
	NormalizeTag/trim-4           3.00 ± 0%      2.00 ± 0%  -33.33%
	NormalizeTag/trim-both-4      3.00 ± 0%      3.00 ± 0%    0.00%
	NormalizeTag/plenty-4         3.00 ± 0%      3.00 ± 0%    0.00%
	NormalizeTag/more-4           3.00 ± 0%      4.00 ± 0%  +33.33%
  • Loading branch information
gbbr authored Jan 28, 2019
1 parent 73bd3f6 commit c6012c3
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 32 deletions.
100 changes: 68 additions & 32 deletions pkg/trace/agent/tags.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package agent

import (
"bytes"
"sort"
"strings"
"unicode"
Expand Down Expand Up @@ -251,54 +250,91 @@ func FilterTags(tags, groups []string) []string {
// backend requirements
// taken from dd-go.model.NormalizeTag
func NormalizeTag(tag string) string {
// unless you just throw out unicode, this is already as fast as it gets

buf := bytes.NewBuffer(make([]byte, 0, 2*len(tag)))
lastWasUnderscore := false

for _, c := range tag {
// fast path for len check
if buf.Len() >= maxTagLength {
var (
trim int // start character (if trimming)
wiping bool // true when the previous character has been discarded
wipe [][2]int // sections to discard: (start, end) pairs
chars int // number of characters processed
)
var (
i int // current byte
c rune // current rune
)
norm := []byte(tag)
for i, c = range tag {
if chars >= maxTagLength {
// we've reached the maximum
break
}
// fast path for ascii alphabetic chars
// fast path; all letters are ok
switch {
case c >= 'a' && c <= 'z':
buf.WriteRune(c)
lastWasUnderscore = false
chars++
wiping = false
continue
case c >= 'A' && c <= 'Z':
c -= 'A' - 'a'
buf.WriteRune(c)
lastWasUnderscore = false
// lower-case
norm[i] += 'a' - 'A'
chars++
wiping = false
continue
}

c = unicode.ToLower(c)
switch {
// handle always valid cases
case unicode.IsLetter(c) || c == ':':
buf.WriteRune(c)
lastWasUnderscore = false
// skip any characters that can't start the string
case buf.Len() == 0:
chars++
wiping = false
case chars == 0:
// this character can not start the string, trim
trim = i + 1
continue
// handle valid characters that can't start the string.
case unicode.IsDigit(c) || c == '.' || c == '/' || c == '-':
buf.WriteRune(c)
lastWasUnderscore = false
// convert anything else to underscores (including underscores), but only allow one in a row.
case !lastWasUnderscore:
buf.WriteRune('_')
lastWasUnderscore = true
chars++
wiping = false
default:
// illegal character
if !wiping {
// start a new cut
wipe = append(wipe, [2]int{i, i + 1})
wiping = true
} else {
// lengthen current cut
wipe[len(wipe)-1][1]++
}
}
}

// strip trailing underscores
if lastWasUnderscore {
b := buf.Bytes()
return string(b[:len(b)-1])
norm = norm[trim : i+1] // trim start and end
if len(wipe) == 0 {
// tag was ok, return it as it is
return string(norm)
}
delta := trim // cut offsets delta
for _, cut := range wipe {
// start and end of cut, including delta from previous cuts:
start, end := cut[0]-delta, cut[1]-delta

if end >= len(norm) {
// this cut includes the end of the string; discard it
// completely and finish the loop.
norm = norm[:start]
break
}
// replace the beginning of the cut with '_'
norm[start] = '_'
if end-start == 1 {
// nothing to discard
continue
}
// discard remaining characters in the cut
copy(norm[start+1:], norm[end:])

return buf.String()
// shorten the slice
norm = norm[:len(norm)-(end-start)+1]

// count the new delta for future cuts
delta += cut[1] - cut[0] - 1
}
return string(norm)
}
38 changes: 38 additions & 0 deletions pkg/trace/agent/tags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,41 @@ func TestTagSetKey(t *testing.T) {
ts := NewTagSetFromString("a:b,a:b:c,abc")
assert.Equal(t, ":abc,a:b,a:b:c", ts.Key())
}

func TestNormalizeTag(t *testing.T) {
for _, tt := range []struct{ in, out string }{
{in: "ok", out: "ok"},
{in: "AlsO:ök", out: "also:ök"},
{in: ":still_ok", out: ":still_ok"},
{in: "___trim", out: "trim"},
{in: "12.:trim@", out: ":trim"},
{in: "12.:trim@@", out: ":trim"},
{in: "fun:ky__tag/1", out: "fun:ky_tag/1"},
{in: "fun:ky@tag/2", out: "fun:ky_tag/2"},
{in: "fun:ky@@@tag/3", out: "fun:ky_tag/3"},
{in: "tag:1/2.3", out: "tag:1/2.3"},
{in: "---fun:k####y_ta@#g/1_@@#", out: "fun:k_y_ta_g/1"},
{in: "AlsO:œ#@ö))œk", out: "also:œ_ö_œk"},
} {
t.Run("", func(t *testing.T) {
assert.Equal(t, tt.out, NormalizeTag(tt.in), tt.in)
})
}
}

func benchNormalizeTag(tag string) func(b *testing.B) {
return func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
NormalizeTag(tag)
}
}
}

func BenchmarkNormalizeTag(b *testing.B) {
b.Run("ok", benchNormalizeTag("good_tag"))
b.Run("trim", benchNormalizeTag("___trim_left"))
b.Run("trim-both", benchNormalizeTag("___trim_right@@#!"))
b.Run("plenty", benchNormalizeTag("fun:ky_ta@#g/1"))
b.Run("more", benchNormalizeTag("fun:k####y_ta@#g/1_@@#"))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
enhancements:
- |
APM: improve performance of NormalizeTag function.

0 comments on commit c6012c3

Please sign in to comment.