Skip to content

Commit

Permalink
Make * and + non-greedy to double regex filter speed. (#4775)
Browse files Browse the repository at this point in the history
* Optimize regular expressions in filters.

* Run original RE2 tests.

* Run both tests.

* Update pkg/logql/log/filter.go

Co-authored-by: Bryan Boreham <bjboreham@gmail.com>

* Update pkg/logql/log/filter.go

Co-authored-by: Bryan Boreham <bjboreham@gmail.com>

* Fix tests.

* Format code.

Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
  • Loading branch information
jeschkies and bboreham authored Nov 19, 2021
1 parent 53fc2d7 commit 4c9e4f5
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 14 deletions.
22 changes: 21 additions & 1 deletion pkg/logql/log/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,14 +335,34 @@ func parseRegexpFilter(re string, match bool) (Filterer, error) {
// attempt to improve regex with tricks
f, ok := simplify(reg)
if !ok {
return newRegexpFilter(re, match)
allNonGreedy(reg)
return newRegexpFilter(reg.String(), match)
}
if match {
return f, nil
}
return newNotFilter(f), nil
}

// allNonGreedy turns greedy quantifiers such as `.*` and `.+` into non-greedy ones. This is the same effect as writing
// `.*?` and `.+?`. This is only safe because we use `Match`. If we were to find the exact position and length of the match
// we would not be allowed to make this optimization. `Match` can return quicker because it is not looking for the longest match.
// Prepending the expression with `(?U)` or passing `NonGreedy` to the expression compiler is not enough since it will
// just negate `.*` and `.*?`.
func allNonGreedy(regs ...*syntax.Regexp) {
clearCapture(regs...)
for _, re := range regs {
switch re.Op {
case syntax.OpCapture, syntax.OpConcat, syntax.OpAlternate:
allNonGreedy(re.Sub...)
case syntax.OpStar, syntax.OpPlus:
re.Flags = re.Flags | syntax.NonGreedy
default:
continue
}
}
}

// simplify a regexp expression by replacing it, when possible, with a succession of literal filters.
// For example `(foo|bar)` will be replaced by `containsFilter(foo) or containsFilter(bar)`
func simplify(reg *syntax.Regexp) (Filterer, bool) {
Expand Down
28 changes: 15 additions & 13 deletions pkg/logql/log/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,19 @@ func Test_SimplifiedRegex(t *testing.T) {
{"(?i)foo", true, newContainsFilter([]byte("foo"), true), true},

// regex we are not supporting.
{"[a-z]+foo", false, nil, false},
{".+foo", false, nil, false},
{".*fo.*o", false, nil, false},
{`\d`, false, nil, false},
{`\sfoo`, false, nil, false},
{"[a-z]+foo", true, nil, false},
{".+foo", true, nil, false},
{".*fo.*o", true, nil, false},
{`\d`, true, nil, false},
{`\sfoo`, true, nil, false},
{`foo?`, false, nil, false},
{`foo{1,2}bar{2,3}`, false, nil, false},
{`foo|\d*bar`, false, nil, false},
{`foo|fo{1,2}`, false, nil, false},
{`foo|fo\d*`, false, nil, false},
{`foo|fo\d+`, false, nil, false},
{`(\w\d+)`, false, nil, false},
{`.*f.*oo|fo{1,2}`, false, nil, false},
{`foo{1,2}bar{2,3}`, true, nil, false},
{`foo|\d*bar`, true, nil, false},
{`foo|fo{1,2}`, true, nil, false},
{`foo|fo\d*`, true, nil, false},
{`foo|fo\d+`, true, nil, false},
{`(\w\d+)`, true, nil, false},
{`.*f.*oo|fo{1,2}`, true, nil, false},
} {
t.Run(test.re, func(t *testing.T) {
d, err := newRegexpFilter(test.re, test.match)
Expand All @@ -83,7 +83,9 @@ func Test_SimplifiedRegex(t *testing.T) {
}
// otherwise ensure we have different filter
require.NotEqual(t, f, d)
require.Equal(t, test.expected, f)
if test.expected != nil {
require.Equal(t, test.expected, f)
}
// tests all lines with both filter, they should have the same result.
for _, line := range fixtures {
l := []byte(line)
Expand Down

0 comments on commit 4c9e4f5

Please sign in to comment.