diff --git a/pkg/trace/agent/tags.go b/pkg/trace/agent/tags.go index e3e3ba64f899c..3e7ea34f93205 100644 --- a/pkg/trace/agent/tags.go +++ b/pkg/trace/agent/tags.go @@ -1,7 +1,6 @@ package agent import ( - "bytes" "sort" "strings" "unicode" @@ -251,54 +250,91 @@ func FilterTags(tags, groups []string) []string { // backend requirements // taken from dd-go.model.NormalizeTag func NormalizeTag(tag string) string { - // unless you just throw out unicode, this is already as fast as it gets - - buf := bytes.NewBuffer(make([]byte, 0, 2*len(tag))) - lastWasUnderscore := false - - for _, c := range tag { - // fast path for len check - if buf.Len() >= maxTagLength { + var ( + trim int // start character (if trimming) + wiping bool // true when the previous character has been discarded + wipe [][2]int // sections to discard: (start, end) pairs + chars int // number of characters processed + ) + var ( + i int // current byte + c rune // current rune + ) + norm := []byte(tag) + for i, c = range tag { + if chars >= maxTagLength { + // we've reached the maximum break } - // fast path for ascii alphabetic chars + // fast path; all letters are ok switch { case c >= 'a' && c <= 'z': - buf.WriteRune(c) - lastWasUnderscore = false + chars++ + wiping = false continue case c >= 'A' && c <= 'Z': - c -= 'A' - 'a' - buf.WriteRune(c) - lastWasUnderscore = false + // lower-case + norm[i] += 'a' - 'A' + chars++ + wiping = false continue } c = unicode.ToLower(c) switch { - // handle always valid cases case unicode.IsLetter(c) || c == ':': - buf.WriteRune(c) - lastWasUnderscore = false - // skip any characters that can't start the string - case buf.Len() == 0: + chars++ + wiping = false + case chars == 0: + // this character can not start the string, trim + trim = i + 1 continue - // handle valid characters that can't start the string. case unicode.IsDigit(c) || c == '.' || c == '/' || c == '-': - buf.WriteRune(c) - lastWasUnderscore = false - // convert anything else to underscores (including underscores), but only allow one in a row. - case !lastWasUnderscore: - buf.WriteRune('_') - lastWasUnderscore = true + chars++ + wiping = false + default: + // illegal character + if !wiping { + // start a new cut + wipe = append(wipe, [2]int{i, i + 1}) + wiping = true + } else { + // lengthen current cut + wipe[len(wipe)-1][1]++ + } } } - // strip trailing underscores - if lastWasUnderscore { - b := buf.Bytes() - return string(b[:len(b)-1]) + norm = norm[trim : i+1] // trim start and end + if len(wipe) == 0 { + // tag was ok, return it as it is + return string(norm) } + delta := trim // cut offsets delta + for _, cut := range wipe { + // start and end of cut, including delta from previous cuts: + start, end := cut[0]-delta, cut[1]-delta + + if end >= len(norm) { + // this cut includes the end of the string; discard it + // completely and finish the loop. + norm = norm[:start] + break + } + // replace the beginning of the cut with '_' + norm[start] = '_' + if end-start == 1 { + // nothing to discard + continue + } + // discard remaining characters in the cut + copy(norm[start+1:], norm[end:]) - return buf.String() + // shorten the slice + norm = norm[:len(norm)-(end-start)+1] + + // count the new delta for future cuts + delta += cut[1] - cut[0] - 1 + } + return string(norm) } diff --git a/pkg/trace/agent/tags_test.go b/pkg/trace/agent/tags_test.go index 927921b8e9089..2368c2a819f98 100644 --- a/pkg/trace/agent/tags_test.go +++ b/pkg/trace/agent/tags_test.go @@ -145,3 +145,41 @@ func TestTagSetKey(t *testing.T) { ts := NewTagSetFromString("a:b,a:b:c,abc") assert.Equal(t, ":abc,a:b,a:b:c", ts.Key()) } + +func TestNormalizeTag(t *testing.T) { + for _, tt := range []struct{ in, out string }{ + {in: "ok", out: "ok"}, + {in: "AlsO:ök", out: "also:ök"}, + {in: ":still_ok", out: ":still_ok"}, + {in: "___trim", out: "trim"}, + {in: "12.:trim@", out: ":trim"}, + {in: "12.:trim@@", out: ":trim"}, + {in: "fun:ky__tag/1", out: "fun:ky_tag/1"}, + {in: "fun:ky@tag/2", out: "fun:ky_tag/2"}, + {in: "fun:ky@@@tag/3", out: "fun:ky_tag/3"}, + {in: "tag:1/2.3", out: "tag:1/2.3"}, + {in: "---fun:k####y_ta@#g/1_@@#", out: "fun:k_y_ta_g/1"}, + {in: "AlsO:œ#@ö))œk", out: "also:œ_ö_œk"}, + } { + t.Run("", func(t *testing.T) { + assert.Equal(t, tt.out, NormalizeTag(tt.in), tt.in) + }) + } +} + +func benchNormalizeTag(tag string) func(b *testing.B) { + return func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + NormalizeTag(tag) + } + } +} + +func BenchmarkNormalizeTag(b *testing.B) { + b.Run("ok", benchNormalizeTag("good_tag")) + b.Run("trim", benchNormalizeTag("___trim_left")) + b.Run("trim-both", benchNormalizeTag("___trim_right@@#!")) + b.Run("plenty", benchNormalizeTag("fun:ky_ta@#g/1")) + b.Run("more", benchNormalizeTag("fun:k####y_ta@#g/1_@@#")) +} diff --git a/releasenotes/notes/improve-performance-of-NormalizeTag-function-7eba70c13f0bdad7.yaml b/releasenotes/notes/improve-performance-of-NormalizeTag-function-7eba70c13f0bdad7.yaml new file mode 100644 index 0000000000000..90b144602fe5a --- /dev/null +++ b/releasenotes/notes/improve-performance-of-NormalizeTag-function-7eba70c13f0bdad7.yaml @@ -0,0 +1,4 @@ +--- +enhancements: + - | + APM: improve performance of NormalizeTag function.