From 89ee6b2fde2f44c6fe4e3feeac31ef516f43dfbb Mon Sep 17 00:00:00 2001 From: Peter Ansell Date: Tue, 11 Aug 2020 07:15:27 +1000 Subject: [PATCH 1/2] Add success cache minimum TTL for DNS responses (#18986) An enhancement to add a minimum alternative cache TTL to the libbeat dns processor for successful DNS responses. This ensures that TTL=0 successful reverse DNS responses can be cached to avoid sending the same reverse DNS request again within a short period of time. The libbeat dns processor is used as a reverse DNS annotator for auditbeat events. Some of these IP addresses respond to reverse DNS requests with TTL=0 in the responses. These were causing load issues for my systems when I had the reverse DNS processor enabled for auditbeat. The new settings is `success_cache.min_ttl`. Closes #18709 Signed-off-by: Peter Ansell Co-authored-by: Marc Guasch (cherry picked from commit 72da5a623b80e55af83078ca1fcb982284fde1e5) --- CHANGELOG.next.asciidoc | 3 ++ libbeat/processors/dns/cache.go | 24 +++++++++------- libbeat/processors/dns/cache_test.go | 35 +++++++++++++++++++++--- libbeat/processors/dns/config.go | 7 +++++ libbeat/processors/dns/docs/dns.asciidoc | 4 +++ 5 files changed, 59 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index a151b7b507b..fa6d084ce1a 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -433,9 +433,12 @@ field. You can revert this change by configuring tags for the module and omittin - Add the `overwrite_keys` configuration option to the dissect processor. {pull}19464[19464] - Add support to trim captured values in the dissect processor. {pull}19464[19464] - Added the `max_cached_sessions` option to the script processor. {pull}19562[19562] +- Add support for DNS over TLS for the dns_processor. {pull}19321[19321] +- Add minimum cache TTL for successful DNS responses. {pull}18986[18986] - Set index.max_docvalue_fields_search in index template to increase value to 200 fields. {issue}20215[20215] - Add capability of enriching process metadata with contianer id also for non-privileged containers in `add_process_metadata` processor. {pull}19767[19767] + *Auditbeat* - Reference kubernetes manifests include configuration for auditd and enrichment with kubernetes metadata. {pull}17431[17431] diff --git a/libbeat/processors/dns/cache.go b/libbeat/processors/dns/cache.go index 6bd6b373db9..d8be672d920 100644 --- a/libbeat/processors/dns/cache.go +++ b/libbeat/processors/dns/cache.go @@ -35,8 +35,9 @@ func (r ptrRecord) IsExpired(now time.Time) bool { type ptrCache struct { sync.RWMutex - data map[string]ptrRecord - maxSize int + data map[string]ptrRecord + maxSize int + minSuccessTTL time.Duration } func (c *ptrCache) set(now time.Time, key string, ptr *PTR) { @@ -135,11 +136,10 @@ func (ce *cachedError) Cause() error { return ce.err } // reverse DNS queries. It caches the results of queries regardless of their // outcome (success or failure). type PTRLookupCache struct { - success *ptrCache - failure *failureCache - failureTTL time.Duration - resolver PTRResolver - stats cacheStats + success *ptrCache + failure *failureCache + resolver PTRResolver + stats cacheStats } type cacheStats struct { @@ -155,8 +155,9 @@ func NewPTRLookupCache(reg *monitoring.Registry, conf CacheConfig, resolver PTRR c := &PTRLookupCache{ success: &ptrCache{ - data: make(map[string]ptrRecord, conf.SuccessCache.InitialCapacity), - maxSize: conf.SuccessCache.MaxCapacity, + data: make(map[string]ptrRecord, conf.SuccessCache.InitialCapacity), + maxSize: conf.SuccessCache.MaxCapacity, + minSuccessTTL: conf.SuccessCache.MinTTL, }, failure: &failureCache{ data: make(map[string]failureRecord, conf.FailureCache.InitialCapacity), @@ -198,11 +199,14 @@ func (c PTRLookupCache) LookupPTR(ip string) (*PTR, error) { return nil, err } + // We set the ptr.TTL to the minimum TTL in case it is less than that. + ptr.TTL = max(ptr.TTL, uint32(c.success.minSuccessTTL/time.Second)) + c.success.set(now, ip, ptr) return ptr, nil } -func max(a, b int) int { +func max(a, b uint32) uint32 { if a >= b { return a } diff --git a/libbeat/processors/dns/cache_test.go b/libbeat/processors/dns/cache_test.go index d64dbd460b4..2242d1527c0 100644 --- a/libbeat/processors/dns/cache_test.go +++ b/libbeat/processors/dns/cache_test.go @@ -19,8 +19,8 @@ package dns import ( "io" - "strings" "testing" + "time" "github.com/stretchr/testify/assert" @@ -30,12 +30,14 @@ import ( type stubResolver struct{} func (r *stubResolver) LookupPTR(ip string) (*PTR, error) { - if ip == gatewayIP { + switch ip { + case gatewayIP: return &PTR{Host: gatewayName, TTL: gatewayTTL}, nil - } else if strings.HasSuffix(ip, "11") { + case gatewayIP + "1": return nil, io.ErrUnexpectedEOF + case gatewayIP + "2": + return &PTR{Host: gatewayName, TTL: 0}, nil } - return nil, &dnsError{"fake lookup returned NXDOMAIN"} } @@ -98,4 +100,29 @@ func TestCache(t *testing.T) { assert.EqualValues(t, 3, c.stats.Hit.Get()) assert.EqualValues(t, 3, c.stats.Miss.Get()) // Cache miss. } + + minTTL := defaultConfig.CacheConfig.SuccessCache.MinTTL + // Initial success returned TTL=0 with MinTTL. + ptr, err = c.LookupPTR(gatewayIP + "2") + if assert.NoError(t, err) { + assert.EqualValues(t, gatewayName, ptr.Host) + + assert.EqualValues(t, minTTL/time.Second, ptr.TTL) + assert.EqualValues(t, 3, c.stats.Hit.Get()) + assert.EqualValues(t, 4, c.stats.Miss.Get()) + + expectedExpire := time.Now().Add(minTTL).Unix() + gotExpire := c.success.data[gatewayIP+"2"].expires.Unix() + assert.InDelta(t, expectedExpire, gotExpire, 1) + } + + // Cached success from a previous TTL=0 response. + ptr, err = c.LookupPTR(gatewayIP + "2") + if assert.NoError(t, err) { + assert.EqualValues(t, gatewayName, ptr.Host) + // TTL counts down while in cache. + assert.InDelta(t, minTTL/time.Second, ptr.TTL, 1) + assert.EqualValues(t, 4, c.stats.Hit.Get()) + assert.EqualValues(t, 4, c.stats.Miss.Get()) + } } diff --git a/libbeat/processors/dns/config.go b/libbeat/processors/dns/config.go index ae447a20c72..2c2bcedda24 100644 --- a/libbeat/processors/dns/config.go +++ b/libbeat/processors/dns/config.go @@ -87,6 +87,9 @@ type CacheSettings struct { // from the DNS record. TTL time.Duration `config:"ttl"` + // Minimum TTL value for successful DNS responses. + MinTTL time.Duration `config:"min_ttl" validate:"min=1"` + // Initial capacity. How much space is allocated at initialization. InitialCapacity int `config:"capacity.initial" validate:"min=0"` @@ -122,6 +125,9 @@ func (c *Config) Validate() error { // Validate validates the data contained in the CacheConfig. func (c *CacheConfig) Validate() error { + if c.SuccessCache.MinTTL <= 0 { + return errors.Errorf("success_cache.min_ttl must be > 0") + } if c.FailureCache.TTL <= 0 { return errors.Errorf("failure_cache.ttl must be > 0") } @@ -146,6 +152,7 @@ func (c *CacheConfig) Validate() error { var defaultConfig = Config{ CacheConfig: CacheConfig{ SuccessCache: CacheSettings{ + MinTTL: time.Minute, InitialCapacity: 1000, MaxCapacity: 10000, }, diff --git a/libbeat/processors/dns/docs/dns.asciidoc b/libbeat/processors/dns/docs/dns.asciidoc index b75fb8bf87a..10063a0ca2e 100644 --- a/libbeat/processors/dns/docs/dns.asciidoc +++ b/libbeat/processors/dns/docs/dns.asciidoc @@ -51,6 +51,7 @@ processors: success_cache: capacity.initial: 1000 capacity.max: 10000 + min_ttl: 1m failure_cache: capacity.initial: 1000 capacity.max: 10000 @@ -80,6 +81,9 @@ the memory for this number of items. Default value is `1000`. cache can hold. When the maximum capacity is reached a random item is evicted. Default value is `10000`. +`success_cache.min_ttl`:: The duration of the minimum alternative cache TTL for successful DNS responses. Ensures that `TTL=0` successful reverse DNS responses can be cached. +Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Default value is `1m`. + `failure_cache.capacity.initial`:: The initial number of items that the failure cache will be allocated to hold. When initialized the processor will allocate the memory for this number of items. Default value is `1000`. From 55f42b8331099ea7f0916c9b6ba238cff65bfaf6 Mon Sep 17 00:00:00 2001 From: Andrew Kroh Date: Tue, 11 Aug 2020 07:18:26 -0400 Subject: [PATCH 2/2] Update CHANGELOG.next.asciidoc --- CHANGELOG.next.asciidoc | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index fa6d084ce1a..d5eb0474918 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -433,12 +433,10 @@ field. You can revert this change by configuring tags for the module and omittin - Add the `overwrite_keys` configuration option to the dissect processor. {pull}19464[19464] - Add support to trim captured values in the dissect processor. {pull}19464[19464] - Added the `max_cached_sessions` option to the script processor. {pull}19562[19562] -- Add support for DNS over TLS for the dns_processor. {pull}19321[19321] - Add minimum cache TTL for successful DNS responses. {pull}18986[18986] - Set index.max_docvalue_fields_search in index template to increase value to 200 fields. {issue}20215[20215] - Add capability of enriching process metadata with contianer id also for non-privileged containers in `add_process_metadata` processor. {pull}19767[19767] - *Auditbeat* - Reference kubernetes manifests include configuration for auditd and enrichment with kubernetes metadata. {pull}17431[17431]