Skip to content

Commit

Permalink
Add success cache minimum TTL for DNS responses (elastic#18986)
Browse files Browse the repository at this point in the history
An enhancement to add a minimum alternative cache TTL to the libbeat dns processor for successful DNS responses. This ensures that TTL=0 successful reverse DNS responses can be cached to avoid sending the same reverse DNS request again within a short period of time.

The libbeat dns processor is used as a reverse DNS annotator for auditbeat events. Some of these IP addresses respond to reverse DNS requests with TTL=0 in the responses. These were causing load issues for my systems when I had the reverse DNS processor enabled for auditbeat.

The new settings is `success_cache.min_ttl`.

Closes elastic#18709

Signed-off-by: Peter Ansell <p_ansell@yahoo.com>
Co-authored-by: Marc Guasch <marc.guasch@elastic.co>
  • Loading branch information
ansell and marc-gr authored Aug 10, 2020
1 parent dec042c commit 72da5a6
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 14 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,12 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Add support to trim captured values in the dissect processor. {pull}19464[19464]
- Added the `max_cached_sessions` option to the script processor. {pull}19562[19562]
- Add support for DNS over TLS for the dns_processor. {pull}19321[19321]
- Add minimum cache TTL for successful DNS responses. {pull}18986[18986]
- Set index.max_docvalue_fields_search in index template to increase value to 200 fields. {issue}20215[20215]
- Add leader election for Kubernetes autodiscover. {pull}20281[20281]
- Add capability of enriching process metadata with contianer id also for non-privileged containers in `add_process_metadata` processor. {pull}19767[19767]


*Auditbeat*

- Reference kubernetes manifests include configuration for auditd and enrichment with kubernetes metadata. {pull}17431[17431]
Expand Down
24 changes: 14 additions & 10 deletions libbeat/processors/dns/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ func (r ptrRecord) IsExpired(now time.Time) bool {

type ptrCache struct {
sync.RWMutex
data map[string]ptrRecord
maxSize int
data map[string]ptrRecord
maxSize int
minSuccessTTL time.Duration
}

func (c *ptrCache) set(now time.Time, key string, ptr *PTR) {
Expand Down Expand Up @@ -135,11 +136,10 @@ func (ce *cachedError) Cause() error { return ce.err }
// reverse DNS queries. It caches the results of queries regardless of their
// outcome (success or failure).
type PTRLookupCache struct {
success *ptrCache
failure *failureCache
failureTTL time.Duration
resolver PTRResolver
stats cacheStats
success *ptrCache
failure *failureCache
resolver PTRResolver
stats cacheStats
}

type cacheStats struct {
Expand All @@ -155,8 +155,9 @@ func NewPTRLookupCache(reg *monitoring.Registry, conf CacheConfig, resolver PTRR

c := &PTRLookupCache{
success: &ptrCache{
data: make(map[string]ptrRecord, conf.SuccessCache.InitialCapacity),
maxSize: conf.SuccessCache.MaxCapacity,
data: make(map[string]ptrRecord, conf.SuccessCache.InitialCapacity),
maxSize: conf.SuccessCache.MaxCapacity,
minSuccessTTL: conf.SuccessCache.MinTTL,
},
failure: &failureCache{
data: make(map[string]failureRecord, conf.FailureCache.InitialCapacity),
Expand Down Expand Up @@ -198,11 +199,14 @@ func (c PTRLookupCache) LookupPTR(ip string) (*PTR, error) {
return nil, err
}

// We set the ptr.TTL to the minimum TTL in case it is less than that.
ptr.TTL = max(ptr.TTL, uint32(c.success.minSuccessTTL/time.Second))

c.success.set(now, ip, ptr)
return ptr, nil
}

func max(a, b int) int {
func max(a, b uint32) uint32 {
if a >= b {
return a
}
Expand Down
35 changes: 31 additions & 4 deletions libbeat/processors/dns/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ package dns

import (
"io"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"

Expand All @@ -30,12 +30,14 @@ import (
type stubResolver struct{}

func (r *stubResolver) LookupPTR(ip string) (*PTR, error) {
if ip == gatewayIP {
switch ip {
case gatewayIP:
return &PTR{Host: gatewayName, TTL: gatewayTTL}, nil
} else if strings.HasSuffix(ip, "11") {
case gatewayIP + "1":
return nil, io.ErrUnexpectedEOF
case gatewayIP + "2":
return &PTR{Host: gatewayName, TTL: 0}, nil
}

return nil, &dnsError{"fake lookup returned NXDOMAIN"}
}

Expand Down Expand Up @@ -98,4 +100,29 @@ func TestCache(t *testing.T) {
assert.EqualValues(t, 3, c.stats.Hit.Get())
assert.EqualValues(t, 3, c.stats.Miss.Get()) // Cache miss.
}

minTTL := defaultConfig.CacheConfig.SuccessCache.MinTTL
// Initial success returned TTL=0 with MinTTL.
ptr, err = c.LookupPTR(gatewayIP + "2")
if assert.NoError(t, err) {
assert.EqualValues(t, gatewayName, ptr.Host)

assert.EqualValues(t, minTTL/time.Second, ptr.TTL)
assert.EqualValues(t, 3, c.stats.Hit.Get())
assert.EqualValues(t, 4, c.stats.Miss.Get())

expectedExpire := time.Now().Add(minTTL).Unix()
gotExpire := c.success.data[gatewayIP+"2"].expires.Unix()
assert.InDelta(t, expectedExpire, gotExpire, 1)
}

// Cached success from a previous TTL=0 response.
ptr, err = c.LookupPTR(gatewayIP + "2")
if assert.NoError(t, err) {
assert.EqualValues(t, gatewayName, ptr.Host)
// TTL counts down while in cache.
assert.InDelta(t, minTTL/time.Second, ptr.TTL, 1)
assert.EqualValues(t, 4, c.stats.Hit.Get())
assert.EqualValues(t, 4, c.stats.Miss.Get())
}
}
7 changes: 7 additions & 0 deletions libbeat/processors/dns/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ type CacheSettings struct {
// from the DNS record.
TTL time.Duration `config:"ttl"`

// Minimum TTL value for successful DNS responses.
MinTTL time.Duration `config:"min_ttl" validate:"min=1"`

// Initial capacity. How much space is allocated at initialization.
InitialCapacity int `config:"capacity.initial" validate:"min=0"`

Expand Down Expand Up @@ -131,6 +134,9 @@ func (c *Config) Validate() error {

// Validate validates the data contained in the CacheConfig.
func (c *CacheConfig) Validate() error {
if c.SuccessCache.MinTTL <= 0 {
return errors.Errorf("success_cache.min_ttl must be > 0")
}
if c.FailureCache.TTL <= 0 {
return errors.Errorf("failure_cache.ttl must be > 0")
}
Expand All @@ -155,6 +161,7 @@ func (c *CacheConfig) Validate() error {
var defaultConfig = Config{
CacheConfig: CacheConfig{
SuccessCache: CacheSettings{
MinTTL: time.Minute,
InitialCapacity: 1000,
MaxCapacity: 10000,
},
Expand Down
4 changes: 4 additions & 0 deletions libbeat/processors/dns/docs/dns.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ processors:
success_cache:
capacity.initial: 1000
capacity.max: 10000
min_ttl: 1m
failure_cache:
capacity.initial: 1000
capacity.max: 10000
Expand Down Expand Up @@ -81,6 +82,9 @@ the memory for this number of items. Default value is `1000`.
cache can hold. When the maximum capacity is reached a random item is evicted.
Default value is `10000`.

`success_cache.min_ttl`:: The duration of the minimum alternative cache TTL for successful DNS responses. Ensures that `TTL=0` successful reverse DNS responses can be cached.
Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Default value is `1m`.

`failure_cache.capacity.initial`:: The initial number of items that the failure
cache will be allocated to hold. When initialized the processor will allocate
the memory for this number of items. Default value is `1000`.
Expand Down

0 comments on commit 72da5a6

Please sign in to comment.