From 3f0a4e87333d250ad4351d2f727a031f4f5fc213 Mon Sep 17 00:00:00 2001 From: braydonk Date: Tue, 28 May 2024 14:27:12 +0000 Subject: [PATCH] matching code rewritten --- cmd/mdatagen/templates/config.go.tmpl | 24 +- cmd/mdatagen/templates/match.go.tmpl | 567 +++++++++++++++------ cmd/mdatagen/templates/match_test.go.tmpl | 571 +++++++++++++++------- 3 files changed, 806 insertions(+), 356 deletions(-) diff --git a/cmd/mdatagen/templates/config.go.tmpl b/cmd/mdatagen/templates/config.go.tmpl index 06653db8367..0f9262eb7f6 100644 --- a/cmd/mdatagen/templates/config.go.tmpl +++ b/cmd/mdatagen/templates/config.go.tmpl @@ -56,13 +56,13 @@ func (msc *MetricsConfig) Unmarshal(parser *confmap.Conf) error { if parser == nil { return nil } - if ContainsPattern(parser.AllKeys()) { - confStrMap := parser.ToStringMap() - expandedConfig := ExpandPatternMap(confStrMap, MetricNames) - newParser := confmap.NewFromStringMap(expandedConfig) - return newParser.Unmarshal(msc) + confStrMap := parser.ToStringMap() + expandedConfig, err := expandPatternMap(confStrMap, MetricNames) + if err != nil { + return err } - return parser.Unmarshal(msc) + newParser := confmap.NewFromStringMap(expandedConfig) + return newParser.Unmarshal(msc) } {{- end }} @@ -122,13 +122,13 @@ func (rasc *ResourceAttributesConfig) Unmarshal(parser *confmap.Conf) error { if parser == nil { return nil } - if ContainsPattern(parser.AllKeys()) { - confStrMap := parser.ToStringMap() - expandedConfig := ExpandPatternMap(confStrMap, ResourceAttributeNames) - newParser := confmap.NewFromStringMap(expandedConfig) - return newParser.Unmarshal(rasc) + confStrMap := parser.ToStringMap() + expandedConfig, err := expandPatternMap(confStrMap, ResourceAttributeNames) + if err != nil { + return err } - return parser.Unmarshal(rasc) + newParser := confmap.NewFromStringMap(expandedConfig) + return newParser.Unmarshal(rasc) } {{- end }} diff --git a/cmd/mdatagen/templates/match.go.tmpl b/cmd/mdatagen/templates/match.go.tmpl index 6491a652626..979b8b8f28b 100644 --- a/cmd/mdatagen/templates/match.go.tmpl +++ b/cmd/mdatagen/templates/match.go.tmpl @@ -4,231 +4,484 @@ package metadata import ( "errors" + "fmt" "io" "sort" "strings" + "unicode" ) -var ErrNotMultimatch = errors.New("this index doesn't represent a valid multimatch pattern") +func expandPatternMap(patternMap map[string]any, matchNames []string) (map[string]any, error) { + patterns := patternBlocks{} -type Pattern struct { - pattern string - level int - hasWildcard bool - valueAssign any -} + patternStrBlocks := []patternStringBlock{} + for patternStr, data := range patternMap { + patternStrBlocks = append(patternStrBlocks, patternStringBlock{ + ptStr: patternStr, + data: data, + }) + } -func NewPattern(pattern string, valueAssign any) Pattern { - level := 0 - for _, c := range pattern { - if c == '.' { - level++ + for _, patternStrBlock := range patternStrBlocks { + patternBlock, err := patternBlockFromStringBlock(patternStrBlock) + if err != nil { + return nil, err } + patterns.add(patternBlock) } - wildcard := strings.Contains(pattern, "*") - return Pattern{ - pattern: pattern, - level: level, - hasWildcard: wildcard, - valueAssign: valueAssign, - } -} -func (p Pattern) Match(s string) bool { - if len(s) == 0 { - return true + expandedMap, err := matchAllPatterns(patterns, matchNames) + if err != nil { + return nil, err } - lastWildcard := -1 - patternScan := &scanner{str: p.pattern} - strScan := &scanner{str: s} - for !strScan.isFinished() { - strChar, err := strScan.current() - if err != nil { - return false - } - patternChar, err := patternScan.current() - if err != nil { - return false - } - switch patternChar { - case '*': - if patternScan.isLast() { - return true - } - patternNext, err := patternScan.peek() - if err == nil && strChar == patternNext { - lastWildcard = patternScan.idx - patternScan.idx += 2 - } + return expandedMap, nil +} - case '{': - allowedMatches, err := patternScan.parseMultimatch() - if err == nil { - if !strScan.tryMultimatch(allowedMatches) { - return false - } - // Breaks the switch statement - break - } - // Fallthrough if the multimatch parsing failed - fallthrough +type groupType int - default: - if strChar != patternChar { - if lastWildcard == -1 { - return false - } - patternScan.idx = lastWildcard - lastWildcard = -1 - } else { - patternScan.idx++ - } - } +const ( + groupWildcard groupType = iota + groupMultimatch + groupIdentifier +) - strScan.idx++ - } - return patternScan.isFinished() +type group struct { + typ groupType + values []string } -type Patterns []Pattern +type pattern struct { + originalStr string + groups []group +} -func AddPattern(ps *Patterns, p Pattern) { - newPatterns := append(*ps, p) - sort.SliceStable(newPatterns, func(i, j int) bool { - if newPatterns[i].hasWildcard && newPatterns[j].hasWildcard { - return newPatterns[i].level < newPatterns[j].level - } - return newPatterns[i].hasWildcard - }) - *ps = newPatterns +func newPatternFromString(str string) (*pattern, error) { + p := &parser{str: str} + return p.parse() } -type scanner struct { +type parser struct { str string idx int } -func (s *scanner) current() (rune, error) { - if s.isFinished() { - return 0, io.EOF - } +func (p *parser) parse() (*pattern, error) { + // parse: + pt := &pattern{originalStr: p.str} + var parseErr error +parseLoop: + for !p.isFinished() { + curr, err := p.current() + if err != nil { + break + } + switch curr { + case '*': + g, err := p.parseWildcard() + if err != nil { + parseErr = err + break parseLoop + } + pt.groups = append(pt.groups, g) + case '{': + g, err := p.parseMultimatch() + if err != nil { + parseErr = err + break parseLoop + } + pt.groups = append(pt.groups, g) + default: + identifier, err := p.parseIdentifier() + if err != nil { + parseErr = err + break parseLoop + } - return rune(s.str[s.idx]), nil -} + pt.groups = append( + pt.groups, + group{typ: groupIdentifier, values: []string{identifier}}, + ) + } -func (s *scanner) peek() (rune, error) { - if s.idx >= len(s.str)-1 { - return 0, io.EOF + // After a valid group, only a '.' and another group + // or end of input are valid. + if !p.isFinished() { + curr, _ := p.current() + if curr != '.' { + parseErr = p.parseError(errUnexpectedToken) + break parseLoop + } + p.idx++ + if p.isFinished() { + parseErr = p.parseError(errUnexpectedEndOfInput) + break parseLoop + } + } } - return rune(s.str[s.idx+1]), nil + return pt, parseErr } -func (s *scanner) isLast() bool { - return s.idx == len(s.str)-1 -} +func (p *parser) parseWildcard() (group, error) { + // If a wildcard is found, the only valid next steps + // are a group separator or the end of the pattern. -func (s *scanner) isFinished() bool { - return s.idx >= len(s.str) + curr, err := p.current() + if err != nil { + return group{}, p.internalError(errUnexpectedEndOfInput) + } + if curr != '*' { + return group{}, p.internalError(errUnexpectedToken) + } + + p.idx++ + return group{typ: groupWildcard}, nil } -func (s *scanner) parseMultimatch() ([]string, error) { - current, err := s.current() - if err != nil || current != '{' { - return nil, ErrNotMultimatch +func (p *parser) parseMultimatch() (group, error) { + // A multimatch is defined by this rule in EBNF: + // + // multimatch: '{' (otel_identifier ((',' ' '*)? otel_identifier)*) '}' + // + // It is a comma-separated list of identifiers enclosed in braces. + // It cannot be empty. + // Whitespace after a comma is allowed and has no significance. + + // Consume the opening brace. + curr, err := p.current() + if err != nil { + return group{}, p.internalError(errUnexpectedEndOfInput) } - startIdx := s.idx - s.idx++ - matchStrings := []string{} - currentStr := "" - for !s.isFinished() { - c, err := s.current() + if curr != '{' { + return group{}, p.internalError(errUnexpectedToken) + } + p.idx++ + + var parseErr error + identifiers := []string{} +parseLoop: + for !p.isFinished() { + curr, err := p.current() if err != nil { - return nil, err + parseErr = p.parseError(errUnexpectedEndOfInput) + break } - switch c { + switch curr { case '}': - matchStrings = append(matchStrings, currentStr) - s.idx++ - return matchStrings, nil - + // The multimatch is complete. + p.idx++ + break parseLoop case ',': - matchStrings = append(matchStrings, currentStr) - currentStr = "" - + // After a comma, only whitespace or the start of an identifier + // (a CHARACTER) is valid. + p.idx++ + curr, err := p.current() + if err != nil { + parseErr = p.parseError(errUnexpectedEndOfInput) + break parseLoop + } + if curr != ' ' && !validIdentifierChar(curr) { + parseErr = p.parseError(errUnexpectedToken) + break parseLoop + } + case ' ': + // Whitespace is silently consumed. + p.idx++ default: - currentStr += string(c) + if !validIdentifierChar(curr) { + parseErr = p.parseError(errUnexpectedToken) + break parseLoop + } + identifier, err := p.parseIdentifier() + if err != nil { + parseErr = err + break parseLoop + } + identifiers = append(identifiers, identifier) } + } - s.idx++ + if len(identifiers) == 0 { + parseErr = p.parseError(errEmptyMultimatch) } - s.idx = startIdx - return nil, ErrNotMultimatch + return group{ + typ: groupMultimatch, + values: identifiers, + }, parseErr } -func (s *scanner) tryMultimatch(multimatch []string) bool { - if len(multimatch) == 0 { - return true +func (p *parser) parseIdentifier() (string, error) { + // An identifier is defined by this rule in EBNF: + // + // otel_identifier: CHARACTER (UNDERSCORE? CHARACTER)* + // + // It can only contain characters and underscores, and cannot + // start or end with an underscore. + var identifier string + var parseErr error + + // Consume the starting CHARACTER. + curr, err := p.current() + if err != nil { + return "", p.internalError(errUnexpectedEndOfInput) + } + if !validIdentifierChar(curr) { + return "", p.parseError(errStartAlphabetic) } - startIdx := s.idx - for _, m := range multimatch { - matchScan := &scanner{str: m} - for !s.isFinished() { - matchChar, err := matchScan.current() + p.idx++ + + identifier += string(curr) + + // Consume the rest of the identifier with the following procedure: + // + // If a character is found, add it to the identifier. + // + // If an underscore is found, add it to the identifier and consume the next + // token ensuring it is a character. + // + // If any other character or the end of input is found, assume the identifier + // is complete. + for !p.isFinished() { + curr, err := p.current() + if err != nil { + break + } + + if validIdentifierChar(curr) { + identifier += string(curr) + p.idx++ + continue + } + if curr == '_' { + identifier += string(curr) + p.idx++ + afterUnderscore, err := p.current() if err != nil { + parseErr = p.parseError(errUnexpectedEndOfInput) break } - strChar, err := s.current() - if err != nil { + if validIdentifierChar(afterUnderscore) { + identifier += string(afterUnderscore) + p.idx++ + continue + } else { + parseErr = p.parseError(errUnexpectedToken) break } + } - if matchChar != strChar { - s.idx = startIdx - break + break + } + + return identifier, parseErr +} + +func (p *parser) current() (rune, error) { + if p.isFinished() { + return 0, io.EOF + } + + return rune(p.str[p.idx]), nil +} + +func (p *parser) isFinished() bool { + return p.idx >= len(p.str) +} + +func (p *parser) parseError(err error) error { + return &patternParseError{ + p: p.str, + idx: p.idx, + err: fmt.Errorf("invalid pattern: %w", err), + } +} + +func (p *parser) internalError(err error) error { + return &patternParseError{ + p: p.str, + idx: p.idx, + err: fmt.Errorf("internal parser error: %w", err), + } +} + +func validIdentifierChar(c rune) bool { + return unicode.IsLetter(c) || unicode.IsDigit(c) +} + +var ( + errUnexpectedEndOfInput = errors.New("unexpected end of input") + errUnexpectedToken = errors.New("unexpected token") + errEmptyMultimatch = errors.New("empty multimatch") + errStartAlphabetic = errors.New("identifier must start with an alphabetic character") +) + +type patternParseError struct { + p string + idx int + err error +} + +func (e *patternParseError) Error() string { + msg := "pattern parsing error:\n" + msg += e.p + "\n" + msg += strings.Repeat(" ", e.idx) + "^" + "\n" + msg += e.err.Error() + return msg +} + +type patternStringBlock struct { + ptStr string + data any +} + +type patternBlock struct { + pt *pattern + data any +} + +func patternBlockFromStringBlock(pb patternStringBlock) (patternBlock, error) { + pt, err := newPatternFromString(pb.ptStr) + if err != nil { + return patternBlock{}, err + } + return patternBlock{ + pt: pt, + data: pb.data, + }, nil +} + +type patternBlocks []patternBlock + +func (pbs *patternBlocks) add(pb patternBlock) { + newPbs := append(*pbs, pb) + sort.SliceStable(newPbs, func(i, j int) bool { + // The pattern with less groups will be applied first. + if len(newPbs[i].pt.groups) != len(newPbs[j].pt.groups) { + return len(newPbs[i].pt.groups) < len(newPbs[j].pt.groups) + } + + // Checking if one has a matcher and one doesn't, or one has an + // earlier matcher than another. + earliestMatcherI := -1 + earliestMatcherJ := -1 + for groupIdx := 0; groupIdx < len(newPbs[i].pt.groups); groupIdx++ { + iGroup := newPbs[i].pt.groups[groupIdx] + jGroup := newPbs[j].pt.groups[groupIdx] + + if (iGroup.typ == groupWildcard || iGroup.typ == groupMultimatch) && earliestMatcherI == -1 { + earliestMatcherI = groupIdx } - matchScan.idx++ - if matchScan.isFinished() { - return true + if (jGroup.typ == groupWildcard || jGroup.typ == groupMultimatch) && earliestMatcherJ == -1 { + earliestMatcherJ = groupIdx } - s.idx++ } - } - return false -} -func ContainsPattern(keys []string) bool { - for _, key := range keys { - if strings.Contains(key, "*") || strings.Contains(key, "{") { + // If neither has a matcher, jump to lexicographic ordering. + if earliestMatcherI == -1 && earliestMatcherJ == -1 { + goto lexicographic + } + + // If one has a matcher and one doesn't, that will be applied first. + if earliestMatcherI != -1 && earliestMatcherJ == -1 { return true } + if earliestMatcherJ != -1 && earliestMatcherI == -1 { + return false + } + + // If they both have matchers but aren't the same, the pattern with the + // earlier matcher will be applied first. + if earliestMatcherI != earliestMatcherJ { + return earliestMatcherI < earliestMatcherJ + } + + // If they both have their earliest matchers at the same index, + // wildcards are applied first. + if newPbs[i].pt.groups[earliestMatcherI].typ != newPbs[j].pt.groups[earliestMatcherJ].typ { + return newPbs[i].pt.groups[earliestMatcherI].typ == groupWildcard + } + + lexicographic: + // If the earliest present matcher is the same, the final sorting is determined + // lexicographically. + return strings.Compare(newPbs[i].pt.originalStr, newPbs[j].pt.originalStr) < 0 + }) + *pbs = newPbs +} + +func matchAllPatterns(patterns patternBlocks, names []string) (map[string]any, error) { + matchedNameData := map[string]any{} + for _, patternBlock := range patterns { + matches, err := matchNames(patternBlock.pt, names) + if err != nil { + return nil, err + } + for _, match := range matches { + matchedNameData[match] = patternBlock.data + } } - return false + return matchedNameData, nil } -func ExpandPatternMap(patternMap map[string]any, matchNames []string) map[string]any { - expandedMap := map[string]any{} - patterns := Patterns{} +func matchNames(p *pattern, names []string) ([]string, error) { + matches := []string{} + for _, nameStr := range names { + name, err := newPatternFromString(nameStr) + if err != nil { + return nil, err + } - for pattern, assign := range patternMap { - AddPattern(&patterns, NewPattern(pattern, assign)) + pGroupIdx := 0 + matched := true + for nameGroupIdx, nameGroup := range name.groups { + currPatternGroup := p.groups[pGroupIdx] + if matchGroup(currPatternGroup, nameGroup) { + if pGroupIdx == len(p.groups)-1 { // If this is the last group in the pattern + // If this last group is not a wildcard + if currPatternGroup.typ != groupWildcard { + // Check that we are at the last group in the name, if + // we are not then the match fails. + if nameGroupIdx != len(name.groups)-1 { + matched = false + break + } + } + } else if nameGroupIdx == len(name.groups)-1 { // If this is the last group in the name but not the pattern + matched = false + break + } else { // Otherwise move to the next group in the patter + pGroupIdx++ + } + } else { + matched = false + break + } + } + if matched { + matches = append(matches, nameStr) + } } - for _, pattern := range patterns { - matched := []string{} - for _, name := range matchNames { - if pattern.Match(name) { - matched = append(matched, name) + return matches, nil +} + +func matchGroup(patternGroup, matchGroup group) bool { + switch patternGroup.typ { + case groupWildcard: + return true + case groupMultimatch: + for _, value := range patternGroup.values { + if value == matchGroup.values[0] { + return true } } - for _, name := range matched { - expandedMap[name] = pattern.valueAssign - } + case groupIdentifier: + return patternGroup.values[0] == matchGroup.values[0] } - return expandedMap + return false } diff --git a/cmd/mdatagen/templates/match_test.go.tmpl b/cmd/mdatagen/templates/match_test.go.tmpl index 6269396da73..edf266ee569 100644 --- a/cmd/mdatagen/templates/match_test.go.tmpl +++ b/cmd/mdatagen/templates/match_test.go.tmpl @@ -1,249 +1,446 @@ -package metadata +// Code generated by mdatagen. DO NOT EDIT. + +package {{ .Package }} import ( + "reflect" "testing" ) -func TestMatch(t *testing.T) { - type tryMatch struct { - text string - matched bool - } +func TestParsePatternValid(t *testing.T) { testCases := []struct { - pattern string - tryMatches []tryMatch + name string + input string + pattern *pattern }{ { - pattern: "process.memory.*", - tryMatches: []tryMatch{ - { - text: "process.memory.rss", - matched: true, - }, - { - text: "process.memory.virtual", - matched: true, + name: "basic", + input: "*", + pattern: &pattern{ + groups: []group{ + { + typ: groupWildcard, + }, }, }, }, { - pattern: "*.memory.usage", - tryMatches: []tryMatch{ - { - text: "process.memory.usage", - matched: true, - }, - { - text: "process.memory.rss", - matched: false, - }, - { - text: ".memory.usage", - matched: true, + name: "wildcard end", + input: "x.*", + pattern: &pattern{ + groups: []group{ + { + typ: groupIdentifier, + values: []string{ + "x", + }, + }, + { + typ: groupWildcard, + }, }, }, }, { - pattern: "process.memory.{rss,virtual}", - tryMatches: []tryMatch{ - { - text: "process.memory.rss", - matched: true, - }, - { - text: "process.memory.virtual", - matched: true, - }, - { - text: "process.memory.usage", - matched: false, + name: "wildcard start", + input: "*.x", + pattern: &pattern{ + groups: []group{ + { + typ: groupWildcard, + }, + { + typ: groupIdentifier, + values: []string{ + "x", + }, + }, }, }, }, { - pattern: "{process,system}.memory.utilization", - tryMatches: []tryMatch{ - { - text: "process.memory.utilization", - matched: true, - }, - { - text: "system.memory.utilization", - matched: true, - }, - { - text: "process.memory.rss", - matched: false, - }, - { - text: "some.random.metric", - matched: false, + name: "multimatch", + input: "a_b.{one,two}", + pattern: &pattern{ + groups: []group{ + { + typ: groupIdentifier, + values: []string{ + "a_b", + }, + }, + { + typ: groupMultimatch, + values: []string{ + "one", + "two", + }, + }, }, }, }, { - pattern: "illegal.multimatch.{pattern", - tryMatches: []tryMatch{ - { - text: "illegal.multimatch.pattern", - matched: false, - }, - { - text: "illegal.multimatch.{pattern", - matched: true, + name: "identifier wildcard and multimatch", + input: "x.*.{first_metric,second_metric}", + pattern: &pattern{ + groups: []group{ + { + typ: groupIdentifier, + values: []string{ + "x", + }, + }, + { + typ: groupWildcard, + }, + { + typ: groupMultimatch, + values: []string{ + "first_metric", + "second_metric", + }, + }, }, }, }, { - pattern: "process.{memory,cpu}.*", - tryMatches: []tryMatch{ - { - text: "process.memory.rss", - matched: true, + name: "identifier multimatch and wildcard", + input: "x.{first_metric,second_metric}.*", + pattern: &pattern{ + groups: []group{ + { + typ: groupIdentifier, + values: []string{ + "x", + }, + }, + { + typ: groupMultimatch, + values: []string{ + "first_metric", + "second_metric", + }, + }, + { + typ: groupWildcard, + }, }, - { - text: "process.cpu.utilization", - matched: true, + }, + }, + { + name: "with numeric character", + input: "x1", + pattern: &pattern{ + groups: []group{ + { + typ: groupIdentifier, + values: []string{ + "x1", + }, + }, }, }, }, } + for _, tc := range testCases { - p := NewPattern(tc.pattern, false) - for _, m := range tc.tryMatches { - matched := p.Match(m.text) - if matched != m.matched { - t.Errorf("pattern=%q, text=%q = %v, want %v", tc.pattern, m.text, matched, m.matched) + t.Run(tc.name, func(t *testing.T) { + p, err := newPatternFromString(tc.input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !comparePatterns(p, tc.pattern) { + t.Errorf("expected %v, got %v", tc.pattern, p) } + }) + } +} + +func comparePatterns(a, b *pattern) bool { + if len(a.groups) != len(b.groups) { + return false + } + for i := range a.groups { + if a.groups[i].typ != b.groups[i].typ { + return false + } + if !arrayCompare(a.groups[i].values, b.groups[i].values) { + return false } } + return true } -func TestPatternPriority(t *testing.T) { - type tryPattern struct { - pattern string - value int +func arrayCompare[T comparable](a, b []T) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } } - type tryMetric struct { + return true +} + +func TestParsePatternInvalid(t *testing.T) { + testCases := []struct { name string - value int + input string + }{ + { + name: "identifier underscore ending", + input: "x_", + }, + { + name: "identifier underscore starting", + input: "_x", + }, + { + name: "wildcard in the middle of identifier", + input: "x*y", + }, + { + name: "multimatch in the middle of identifier", + input: "x{one,two}y", + }, + { + name: "invalid character in multimatch option", + input: "{a.b,c}", + }, + { + name: "invalid ending to multimatch", + input: "x.{a,", + }, + { + name: "invalid start to multimatch", + input: "x.b}", + }, + { + name: "starts with .", + input: ".x", + }, + { + name: "ends with .", + input: "x.", + }, + { + name: "random character", + input: "x.f^fl.x", + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + _, err := newPatternFromString(tc.input) + // Use -v to check out the error message. + t.Logf("error output:\n---\n%v\n---\n", err) + if err == nil { + t.Fatalf("expected error, got nil") + } + }) } +} + +func TestMatchNames(t *testing.T) { testCases := []struct { - patterns []tryPattern - metrics []tryMetric + name string + pattern string + names []string + expectedMatches []string + expectError bool }{ { - patterns: []tryPattern{ - { - pattern: "processes.*", - value: 1, - }, - { - pattern: "*", - value: 0, - }, + name: "simple match", + pattern: "a", + names: []string{"a"}, + expectedMatches: []string{"a"}, + }, + { + name: "simple wildcard match", + pattern: "*", + names: []string{"a", "a.a", "a.a.a"}, + expectedMatches: []string{"a", "a.a", "a.a.a"}, + }, + { + name: "simple multimatch", + pattern: "a.{b,c}", + names: []string{"a.b", "a.c", "a.d"}, + expectedMatches: []string{"a.b", "a.c"}, + }, + { + name: "wildcard match in prefix", + pattern: "*.a", + names: []string{"a", "b.a", "c.a"}, + expectedMatches: []string{"b.a", "c.a"}, + }, + { + name: "wildcard and multimatch", + pattern: "a.{b,c,d}.*", + names: []string{"a.b.a", "a.c.x", "a.d.c.d.f", "a.e.x"}, + expectedMatches: []string{"a.b.a", "a.c.x", "a.d.c.d.f"}, + }, + { + name: "shorter name than pattern", + pattern: "{b,c}", + names: []string{"a.b", "a.c"}, + expectedMatches: []string{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p, err := newPatternFromString(tc.pattern) + if err != nil { + t.Fatalf("failed to parse pattern: %v", err) + } + matches, err := matchNames(p, tc.names) + if tc.expectError && err == nil { + t.Fatalf("expected error, got none") + } else { + if !arrayCompare(matches, tc.expectedMatches) { + t.Fatalf("expected %v, got %v", tc.expectedMatches, matches) + } + } + }) + } +} + +func TestPatternPriorityAndDeterminism(t *testing.T) { + testCases := []struct { + name string + patterns []string + expectedOrder []string + }{ + { + name: "less groups applied first", + patterns: []string{"b.a", "a"}, + expectedOrder: []string{"a", "b.a"}, + }, + { + name: "earlier matcher applied first", + patterns: []string{"a.*", "*.a", "*.*.a", "a.a.*"}, + expectedOrder: []string{"*.a", "a.*", "*.*.a", "a.a.*"}, + }, + { + name: "wildcard before multimatch", + patterns: []string{"{a,b}.*", "*.{a,b}"}, + expectedOrder: []string{"*.{a,b}", "{a,b}.*"}, + }, + { + name: "lexicographical order", + patterns: []string{"b", "c", "a"}, + expectedOrder: []string{"a", "b", "c"}, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + testPatternPriorityOrder(t, tc.patterns, tc.expectedOrder) + }) + } +} + +func stringsToPatternBlocks(t *testing.T, patterns []string) patternBlocks { + t.Helper() + + ptBlocks := patternBlocks{} + for _, p := range patterns { + ptStrBlock := patternStringBlock{ptStr: p} + ptBlock, err := patternBlockFromStringBlock(ptStrBlock) + if err != nil { + t.Fatalf("failed to parse pattern: %v", err) + } + ptBlocks.add(ptBlock) + } + return ptBlocks +} + +func testPatternPriorityOrder(t *testing.T, input []string, expectedOrder []string) { + t.Helper() + + ptBlocks := stringsToPatternBlocks(t, input) + resultOrder := []string{} + for _, ptBlock := range ptBlocks { + resultOrder = append(resultOrder, ptBlock.pt.originalStr) + } + if !arrayCompare(resultOrder, expectedOrder) { + t.Fatalf("expected %v, got %v", expectedOrder, resultOrder) + } +} + +var httpMetrics = []string{ + "http.server.request.duration", + "http.server.active_requests", + "http.server.request.body.size", + "http.server.response.body.size", + "http.client.request.duration", + "http.client.request.body.size", + "http.client.response.body.size", + "http.client.open_connections", + "http.client.connection.duration", + "http.client.active_requests", +} + +func TestApplicationScenarios(t *testing.T) { + testCases := []struct { + name string + patterns map[string]any + names []string + expectedMatches map[string]any + }{ + { + name: "simple match", + patterns: map[string]any{ + "a": 1, }, - metrics: []tryMetric{ - { - name: "process.memory.rss", - value: 0, - }, - { - name: "process.memory.usage", - value: 0, - }, - { - name: "process.memory.utilization", - value: 0, - }, - { - name: "processes.count", - value: 1, - }, - { - name: "processes.created", - value: 1, - }, + names: []string{"a"}, + expectedMatches: map[string]any{ + "a": 1, }, }, { - patterns: []tryPattern{ - { - pattern: "process.memory.*", - value: 1, - }, - { - pattern: "process.memory.rss", - value: 0, - }, + name: "http metrics wildcard", + names: httpMetrics, + patterns: map[string]any{ + "*": 0, + "http.client.*": 1, }, - metrics: []tryMetric{ - { - name: "process.memory.vm", - value: 1, - }, - { - name: "process.memory.rss", - value: 0, - }, + expectedMatches: map[string]any{ + "http.server.request.duration": 0, + "http.server.active_requests": 0, + "http.server.request.body.size": 0, + "http.server.response.body.size": 0, + "http.client.request.duration": 1, + "http.client.request.body.size": 1, + "http.client.response.body.size": 1, + "http.client.open_connections": 1, + "http.client.connection.duration": 1, + "http.client.active_requests": 1, }, }, { - patterns: []tryPattern{ - { - pattern: "*", - value: 1, - }, - { - pattern: "process.memory.rss", - value: 0, - }, + name: "http metrics complex", + names: httpMetrics, + patterns: map[string]any{ + "http.*.{request,response}.body.size": 1, }, - metrics: []tryMetric{ - { - name: "process.memory.rss", - value: 0, - }, - { - name: "process.memory.usage", - value: 1, - }, + expectedMatches: map[string]any{ + "http.client.request.body.size": 1, + "http.client.response.body.size": 1, + "http.server.request.body.size": 1, + "http.server.response.body.size": 1, }, }, } - for _, tc := range testCases { - patterns := Patterns{} - for _, p := range tc.patterns { - AddPattern(&patterns, NewPattern(p.pattern, p.value)) - } - metrics := map[string]any{} - for _, m := range tc.metrics { - metrics[m.name] = false - } - for _, p := range patterns { - for _, m := range tc.metrics { - if p.Match(m.name) { - metrics[m.name] = p.valueAssign - } - } - } - - for _, m := range tc.metrics { - valueAny, ok := metrics[m.name] - if !ok { - t.Errorf("metric=%q not found", m.name) - continue - } - value, ok := valueAny.(int) - if !ok { - t.Errorf("invalid test data, metric=%q value is not an int", m.name) - continue + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := expandPatternMap(tc.patterns, tc.names) + if err != nil { + t.Fatalf("failed to match: %v", err) } - - if value != m.value { - t.Fatalf("metric=%q, got value=%v, want %v", m.name, metrics[m.name], m.value) + if !reflect.DeepEqual(result, tc.expectedMatches) { + t.Fatalf("expected %v, got %v", tc.expectedMatches, result) } - } + }) } }