From 459408869882a9363d94f99bf701eb6fba8845f1 Mon Sep 17 00:00:00 2001 From: Alex K <8418476+fearful-symmetry@users.noreply.github.com> Date: Wed, 20 Mar 2024 07:17:50 -0700 Subject: [PATCH] Add metrics-monitoring beats to resource monitoring (#4326) * add monitoring beats to resources * finish tests, use constant for names in monitoring * fix headers, add changelog * fix tests * refactor tests * oops * refine es query * use const for tests * formatting * adjust document check --- ...28530-add-monitoring-beats-to-metrics.yaml | 32 + .../application/monitoring/v1_monitor.go | 49 +- .../application/monitoring/v1_monitor_test.go | 29 +- pkg/testing/tools/estools/elasticsearch.go | 16 +- pkg/testing/tools/kibana.go | 32 + .../agent_long_running_leak_test.go | 26 +- .../integration/metrics_monitoring_test.go | 145 ++++ .../integration/system_integration_setup.json | 789 ++++++++++++++++++ 8 files changed, 1059 insertions(+), 59 deletions(-) create mode 100644 changelog/fragments/1708628530-add-monitoring-beats-to-metrics.yaml create mode 100644 testing/integration/metrics_monitoring_test.go create mode 100644 testing/integration/system_integration_setup.json diff --git a/changelog/fragments/1708628530-add-monitoring-beats-to-metrics.yaml b/changelog/fragments/1708628530-add-monitoring-beats-to-metrics.yaml new file mode 100644 index 00000000000..58f6b85ac1a --- /dev/null +++ b/changelog/fragments/1708628530-add-monitoring-beats-to-metrics.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: add monitoring beats to usage metrics reporting + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: monitoring + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/elastic/elastic-agent/pull/4326 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/elastic-agent/issues/4082 diff --git a/internal/pkg/agent/application/monitoring/v1_monitor.go b/internal/pkg/agent/application/monitoring/v1_monitor.go index 485595eda19..bf8d33a16bc 100644 --- a/internal/pkg/agent/application/monitoring/v1_monitor.go +++ b/internal/pkg/agent/application/monitoring/v1_monitor.go @@ -51,6 +51,9 @@ const ( defaultMonitoringNamespace = "default" agentName = "elastic-agent" + monitoringMetricsUnitID = "metrics-monitoring" + monitoringFilesUnitsID = "filestream-monitoring" + windowsOS = "windows" // metricset execution period used for the monitoring metrics inputs @@ -301,7 +304,7 @@ func (b *BeatsMonitor) injectLogsInput(cfg map[string]interface{}, components [] streams := []interface{}{ map[string]interface{}{ - idKey: "filestream-monitoring-agent", + idKey: fmt.Sprintf("%s-agent", monitoringFilesUnitsID), "type": "filestream", "paths": []interface{}{ filepath.Join(logsDrop, agentName+"-*.ndjson"), @@ -439,7 +442,7 @@ func (b *BeatsMonitor) injectLogsInput(cfg map[string]interface{}, components [] fixedBinaryName := strings.ReplaceAll(strings.ReplaceAll(comp.InputSpec.BinaryName, "-", "_"), "/", "_") // conform with index naming policy dataset := fmt.Sprintf("elastic_agent.%s", fixedBinaryName) streams = append(streams, map[string]interface{}{ - idKey: fmt.Sprintf("filestream-monitoring-%s", comp.ID), + idKey: fmt.Sprintf("%s-%s", monitoringFilesUnitsID, comp.ID), "type": "filestream", "paths": []interface{}{ comp.InputSpec.Spec.Service.Log.Path, @@ -492,8 +495,8 @@ func (b *BeatsMonitor) injectLogsInput(cfg map[string]interface{}, components [] inputs := []interface{}{ map[string]interface{}{ - idKey: "filestream-monitoring-agent", - "name": "filestream-monitoring-agent", + idKey: fmt.Sprintf("%s-agent", monitoringFilesUnitsID), + "name": fmt.Sprintf("%s-agent", monitoringFilesUnitsID), "type": "filestream", useOutputKey: monitoringOutput, "streams": streams, @@ -522,14 +525,13 @@ func (b *BeatsMonitor) monitoringNamespace() string { } func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentIDToBinary map[string]string, monitoringOutputName string, componentList []component.Component) error { - metricsCollectionIntervalString := metricsCollectionInterval.String() monitoringNamespace := b.monitoringNamespace() fixedAgentName := strings.ReplaceAll(agentName, "-", "_") beatsStreams := make([]interface{}, 0, len(componentIDToBinary)) streams := []interface{}{ map[string]interface{}{ - idKey: "metrics-monitoring-agent", + idKey: fmt.Sprintf("%s-agent", monitoringMetricsUnitID), "data_stream": map[string]interface{}{ "type": "metrics", "dataset": fmt.Sprintf("elastic_agent.%s", fixedAgentName), @@ -606,7 +608,18 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI }, }, } - for unit, binaryName := range componentIDToBinary { + + //create a new map with the monitoring beats included + componentListWithMonitoring := map[string]string{ + fmt.Sprintf("beat/%s", monitoringMetricsUnitID): "metricbeat", + fmt.Sprintf("http/%s", monitoringMetricsUnitID): "metricbeat", + monitoringFilesUnitsID: "filebeat", + } + for k, v := range componentIDToBinary { + componentListWithMonitoring[k] = v + } + + for unit, binaryName := range componentListWithMonitoring { if !isSupportedMetricsBinary(binaryName) { continue } @@ -616,7 +629,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI if isSupportedBeatsBinary(binaryName) { beatsStreams = append(beatsStreams, map[string]interface{}{ - idKey: "metrics-monitoring-" + name, + idKey: fmt.Sprintf("%s-", monitoringMetricsUnitID) + name, "data_stream": map[string]interface{}{ "type": "metrics", "dataset": fmt.Sprintf("elastic_agent.%s", name), @@ -678,7 +691,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI } streams = append(streams, map[string]interface{}{ - idKey: "metrics-monitoring-" + name + "-1", + idKey: fmt.Sprintf("%s-%s-1", monitoringMetricsUnitID, name), "data_stream": map[string]interface{}{ "type": "metrics", "dataset": fmt.Sprintf("elastic_agent.%s", fixedAgentName), @@ -748,7 +761,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI if strings.EqualFold(name, "filebeat") { fbDataStreamName := "filebeat_input" streams = append(streams, map[string]interface{}{ - idKey: "metrics-monitoring-" + name + "-1", + idKey: fmt.Sprintf("%s-%s-1", monitoringMetricsUnitID, name), "data_stream": map[string]interface{}{ "type": "metrics", "dataset": fmt.Sprintf("elastic_agent.%s", fbDataStreamName), @@ -832,7 +845,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI // note: this doesn't fetch anything from the /state endpoint, as it doesn't report much beyond name/version, // the equivalent of the beat /state metrics end up in /shipper shipperHTTPStreams = append(shipperHTTPStreams, map[string]interface{}{ - idKey: "metrics-monitoring-shipper", + idKey: fmt.Sprintf("%s-shipper", monitoringMetricsUnitID), "data_stream": map[string]interface{}{ "type": "metrics", "dataset": fmt.Sprintf("elastic_agent.%s", name), @@ -846,7 +859,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI "processors": createProcessorsForJSONInput(name, comp.ID, monitoringNamespace, b.agentInfo), }, map[string]interface{}{ - idKey: "metrics-monitoring-shipper-stats", + idKey: fmt.Sprintf("%s-shipper-stats", monitoringMetricsUnitID), "data_stream": map[string]interface{}{ "type": "metrics", "dataset": fmt.Sprintf("elastic_agent.%s", name), @@ -864,8 +877,8 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI inputs := []interface{}{ map[string]interface{}{ - idKey: "metrics-monitoring-beats", - "name": "metrics-monitoring-beats", + idKey: fmt.Sprintf("%s-beats", monitoringMetricsUnitID), + "name": fmt.Sprintf("%s-beats", monitoringMetricsUnitID), "type": "beat/metrics", useOutputKey: monitoringOutput, "data_stream": map[string]interface{}{ @@ -874,8 +887,8 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI "streams": beatsStreams, }, map[string]interface{}{ - idKey: "metrics-monitoring-agent", - "name": "metrics-monitoring-agent", + idKey: fmt.Sprintf("%s-agent", monitoringMetricsUnitID), + "name": fmt.Sprintf("%s-agent", monitoringMetricsUnitID), "type": "http/metrics", useOutputKey: monitoringOutput, "data_stream": map[string]interface{}{ @@ -888,8 +901,8 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI // if we have shipper data, inject the extra inputs if len(shipperHTTPStreams) > 0 { inputs = append(inputs, map[string]interface{}{ - idKey: "metrics-monitoring-shipper", - "name": "metrics-monitoring-shipper", + idKey: fmt.Sprintf("%s-shipper", monitoringMetricsUnitID), + "name": fmt.Sprintf("%s-shipper", monitoringMetricsUnitID), "type": "http/metrics", useOutputKey: monitoringOutput, "data_stream": map[string]interface{}{ diff --git a/internal/pkg/agent/application/monitoring/v1_monitor_test.go b/internal/pkg/agent/application/monitoring/v1_monitor_test.go index 3f852d44200..f18a459b4cc 100644 --- a/internal/pkg/agent/application/monitoring/v1_monitor_test.go +++ b/internal/pkg/agent/application/monitoring/v1_monitor_test.go @@ -165,16 +165,16 @@ func TestMonitoringConfigComponentFields(t *testing.T) { if _, exists := processor["add_fields"]; !exists { continue } - p := Processor{} - if err := json.Unmarshal([]byte(mapstr.M(processor).String()), &p); err != nil { + streamProc := Processor{} + if err := json.Unmarshal([]byte(mapstr.M(processor).String()), &streamProc); err != nil { t.Errorf("could not decode processor config: %q, err: %s", "foo", err) } - if p.AddFields.Target != "component" { + if streamProc.AddFields.Target != "component" { continue } - binary := p.AddFields.Fields.Binary - componentID := p.AddFields.Fields.ID + binary := streamProc.AddFields.Fields.Binary + componentID := streamProc.AddFields.Fields.ID // The elastic-Agent is a special case, handle it first if strings.Contains(streamID, "monitoring-agent") { @@ -186,11 +186,20 @@ func TestMonitoringConfigComponentFields(t *testing.T) { } continue } - if binary != "filebeat" { - t.Errorf("expecting fields['binary'] = 'filebeat', got %q", binary) - } - if componentID != "filestream-default" { - t.Errorf("expecting fields['id'] = 'filestream-default', got %q", componentID) + if !strings.Contains(componentID, "monitoring") { + if binary != "filebeat" { + t.Errorf("expecting fields['binary'] = 'filebeat', got %q", binary) + } + if componentID != "filestream-default" { + t.Errorf("expecting fields['id'] = 'filestream-default', got %q", componentID) + } + } else { + if binary != "filebeat" && binary != "metricbeat" { + t.Errorf("expected monitoring compoent to be metricbeat or filebeat, got %s", binary) + } + if componentID != monitoringFilesUnitsID && componentID != "beat/metrics-monitoring" && componentID != "http/metrics-monitoring" { + t.Errorf("got unxpected monitoring component ID: %s", componentID) + } } } diff --git a/pkg/testing/tools/estools/elasticsearch.go b/pkg/testing/tools/estools/elasticsearch.go index 8fcb4e41c0b..67902a08e2c 100644 --- a/pkg/testing/tools/estools/elasticsearch.go +++ b/pkg/testing/tools/estools/elasticsearch.go @@ -222,7 +222,7 @@ func GetLatestDocumentMatchingQuery(ctx context.Context, client elastictransport return Documents{}, fmt.Errorf("error creating ES query: %w", err) } - return performQueryForRawQuery(ctx, queryRaw, indexPattern, client) + return PerformQueryForRawQuery(ctx, queryRaw, indexPattern, client) } // GetIndexTemplatesForPattern lists all index templates on the system @@ -362,7 +362,7 @@ func FindMatchingLogLinesWithContext(ctx context.Context, client elastictranspor return Documents{}, fmt.Errorf("error creating ES query: %w", err) } - return performQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) + return PerformQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) } @@ -434,7 +434,7 @@ func CheckForErrorsInLogsWithContext(ctx context.Context, client elastictranspor return Documents{}, fmt.Errorf("error creating ES query: %w", err) } - return performQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) + return PerformQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) } // GetLogsForDataset returns any logs associated with the datastream @@ -525,7 +525,7 @@ func GetLogsForDatasetWithContext(ctx context.Context, client elastictransport.I }, } - return performQueryForRawQuery(ctx, indexQuery, "logs-elastic_agent*", client) + return PerformQueryForRawQuery(ctx, indexQuery, "logs-elastic_agent*", client) } // GetLogsForIndexWithContext returns any logs that match the given condition @@ -536,7 +536,7 @@ func GetLogsForIndexWithContext(ctx context.Context, client elastictransport.Int }, } - return performQueryForRawQuery(ctx, indexQuery, index, client) + return PerformQueryForRawQuery(ctx, indexQuery, index, client) } // GetPing performs a basic ping and returns ES config info @@ -561,7 +561,8 @@ func GetPing(ctx context.Context, client elastictransport.Interface) (Ping, erro } -func performQueryForRawQuery(ctx context.Context, queryRaw map[string]interface{}, index string, client elastictransport.Interface) (Documents, error) { +// PerformQueryForRawQuery executes the ES query specified by queryRaw +func PerformQueryForRawQuery(ctx context.Context, queryRaw map[string]interface{}, index string, client elastictransport.Interface) (Documents, error) { var buf bytes.Buffer err := json.NewEncoder(&buf).Encode(queryRaw) if err != nil { @@ -576,6 +577,7 @@ func performQueryForRawQuery(ctx context.Context, queryRaw map[string]interface{ es.Search.WithTrackTotalHits(true), es.Search.WithPretty(), es.Search.WithContext(ctx), + es.Search.WithSize(300), ) if err != nil { return Documents{}, fmt.Errorf("error performing ES search: %w", err) @@ -613,7 +615,7 @@ func FindMatchingLogLinesForAgentWithContext(ctx context.Context, client elastic return Documents{}, fmt.Errorf("error creating ES query: %w", err) } - return performQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) + return PerformQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) } // GetLogsForDatastream returns any logs associated with the datastream diff --git a/pkg/testing/tools/kibana.go b/pkg/testing/tools/kibana.go index bfb804c6177..4addc9746ef 100644 --- a/pkg/testing/tools/kibana.go +++ b/pkg/testing/tools/kibana.go @@ -10,6 +10,7 @@ import ( "fmt" "net/http" "net/url" + "os" "time" "github.com/elastic/elastic-agent-libs/kibana" @@ -89,3 +90,34 @@ func GetDashboards(ctx context.Context, client *kibana.Client) ([]Dashboard, err return dashboards, nil } + +// InstallPackageFromDefaultFile allows for a test ideom where a JSON policy file can be loaded, and then updated with variables that are specific to a given test. +// This can allow a single JSON policy file to be reused across multiple tests. +// existingPolicyID should be the ID of an agent policy that was already created with InstallAgentWithPolicy() +func InstallPackageFromDefaultFile(ctx context.Context, client *kibana.Client, packagePolicyName string, packageVersion string, policyJsonPath string, policyUUID string, existingPolicyID string) (kibana.PackagePolicyResponse, error) { + installPackage := kibana.PackagePolicyRequest{} + + jsonRaw, err := os.ReadFile(policyJsonPath) + if err != nil { + return kibana.PackagePolicyResponse{}, fmt.Errorf("error reading JSON policy file: %w", err) + } + + err = json.Unmarshal(jsonRaw, &installPackage) + if err != nil { + return kibana.PackagePolicyResponse{}, fmt.Errorf("error unmarshaling json: %w", err) + } + + installPackage.Package.Version = packageVersion + installPackage.ID = policyUUID + installPackage.PolicyID = existingPolicyID + installPackage.Namespace = "default" + installPackage.Name = fmt.Sprintf("%s-test-%s", packagePolicyName, policyUUID) + installPackage.Vars = map[string]interface{}{} + + resp, err := client.InstallFleetPackage(ctx, installPackage) + if err != nil { + return kibana.PackagePolicyResponse{}, fmt.Errorf("error installing fleet package: %w", err) + } + + return resp, nil +} diff --git a/testing/integration/agent_long_running_leak_test.go b/testing/integration/agent_long_running_leak_test.go index 7b2c5035e0b..1673e9337e4 100644 --- a/testing/integration/agent_long_running_leak_test.go +++ b/testing/integration/agent_long_running_leak_test.go @@ -9,7 +9,6 @@ package integration import ( "context" "encoding/json" - "fmt" "io" "net" "net/http" @@ -122,33 +121,12 @@ func (runner *ExtendedRunner) SetupSuite() { policyResp, err := tools.InstallAgentWithPolicy(ctx, runner.T(), installOpts, runner.agentFixture, runner.info.KibanaClient, basePolicy) require.NoError(runner.T(), err) - // install system package - runner.InstallPackage(ctx, "system", "1.53.1", "agent_long_test_base_system_integ.json", uuid.New().String(), policyResp.ID) - - // install cef - runner.InstallPackage(ctx, "apache", "1.17.0", "agent_long_test_apache.json", uuid.New().String(), policyResp.ID) - -} - -func (runner *ExtendedRunner) InstallPackage(ctx context.Context, name string, version string, cfgFile string, policyUUID string, policyID string) { - installPackage := kibana.PackagePolicyRequest{} - - jsonRaw, err := os.ReadFile(cfgFile) + _, err = tools.InstallPackageFromDefaultFile(ctx, runner.info.KibanaClient, "system", "1.53.1", "agent_long_test_base_system_integ.json", uuid.New().String(), policyResp.ID) require.NoError(runner.T(), err) - err = json.Unmarshal(jsonRaw, &installPackage) + _, err = tools.InstallPackageFromDefaultFile(ctx, runner.info.KibanaClient, "apache", "1.17.0", "agent_long_test_apache.json", uuid.New().String(), policyResp.ID) require.NoError(runner.T(), err) - installPackage.Package.Version = version - installPackage.ID = policyUUID - installPackage.PolicyID = policyID - installPackage.Namespace = "default" - installPackage.Name = fmt.Sprintf("%s-long-test-%s", name, policyUUID) - installPackage.Vars = map[string]interface{}{} - - runner.T().Logf("Installing %s package....", name) - _, err = runner.info.KibanaClient.InstallFleetPackage(ctx, installPackage) - require.NoError(runner.T(), err, "error creating fleet package") } func (runner *ExtendedRunner) TestHandleLeak() { diff --git a/testing/integration/metrics_monitoring_test.go b/testing/integration/metrics_monitoring_test.go new file mode 100644 index 00000000000..4e8fc4072ef --- /dev/null +++ b/testing/integration/metrics_monitoring_test.go @@ -0,0 +1,145 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/elastic/elastic-agent-libs/kibana" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/testing/tools/estools" +) + +type MetricsRunner struct { + suite.Suite + info *define.Info + agentFixture *atesting.Fixture + + ESHost string +} + +func TestMetricsMonitoringCorrectBinaries(t *testing.T) { + info := define.Require(t, define.Requirements{ + Group: Fleet, + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + OS: []define.OS{ + {Type: define.Linux}, + {Type: define.Windows}, + }, + }) + + suite.Run(t, &MetricsRunner{info: info}) +} + +func (runner *MetricsRunner) SetupSuite() { + fixture, err := define.NewFixture(runner.T(), define.Version()) + require.NoError(runner.T(), err) + runner.agentFixture = fixture + + policyUUID := uuid.New().String() + basePolicy := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + + unpr := false + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Force: true, + Unprivileged: &unpr, + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + defer cancel() + + policyResp, err := tools.InstallAgentWithPolicy(ctx, runner.T(), installOpts, runner.agentFixture, runner.info.KibanaClient, basePolicy) + require.NoError(runner.T(), err) + + _, err = tools.InstallPackageFromDefaultFile(ctx, runner.info.KibanaClient, "system", "1.53.1", "system_integration_setup.json", uuid.New().String(), policyResp.ID) + require.NoError(runner.T(), err) + +} + +func (runner *MetricsRunner) TestBeatsMetrics() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*20) + defer cancel() + agentStatus, err := runner.agentFixture.ExecStatus(ctx) + require.NoError(runner.T(), err) + + componentIds := []string{ + "system/metrics-default", + "log-default", + "beat/metrics-monitoring", + "elastic-agent", + "http/metrics-monitoring", + "filestream-monitoring", + } + + require.Eventually(runner.T(), func() bool { + for _, cid := range componentIds { + query := genESQuery(agentStatus.Info.ID, cid) + res, err := estools.PerformQueryForRawQuery(ctx, query, "metrics-elastic_agent*", runner.info.ESClient) + require.NoError(runner.T(), err) + runner.T().Logf("Fetched metrics for %s, got %d hits", cid, res.Hits.Total.Value) + if res.Hits.Total.Value < 1 { + return false + } + + } + return true + }, time.Minute*10, time.Second*10, "could not fetch metrics for all known beats in default install: %v", componentIds) +} + +func genESQuery(agentID string, componentID string) map[string]interface{} { + // see https://github.com/elastic/kibana/blob/main/x-pack/plugins/fleet/server/services/agents/agent_metrics.ts + queryRaw := map[string]interface{}{ + "query": map[string]interface{}{ + "bool": map[string]interface{}{ + "must": []map[string]interface{}{ + { + "match": map[string]interface{}{ + "agent.id": agentID, + }, + }, + { + "match": map[string]interface{}{ + "component.id": componentID, + }, + }, + // make sure we fetch documents that have the metric field used by fleet monitoring + { + "exists": map[string]interface{}{ + "field": "system.process.cpu.total.value", + }, + }, + { + "exists": map[string]interface{}{ + "field": "system.process.memory.size", + }, + }, + }, + }, + }, + } + + return queryRaw +} diff --git a/testing/integration/system_integration_setup.json b/testing/integration/system_integration_setup.json new file mode 100644 index 00000000000..1f202d51f0f --- /dev/null +++ b/testing/integration/system_integration_setup.json @@ -0,0 +1,789 @@ +{ + "id": "9bf446fc-58d4-4767-b42d-3450815d5d3d", + "version": "WzYzMSwxXQ==", + "name": "system-1", + "namespace": "default", + "package": { + "name": "system", + "title": "System", + "version": "1.53.0" + }, + "enabled": true, + "policy_id": "0a4f6c12-446a-401a-b0eb-96afea6ca92d", + "inputs": [ + { + "type": "logfile", + "policy_template": "system", + "enabled": true, + "streams": [ + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.auth" + }, + "vars": { + "ignore_older": { + "value": "72h", + "type": "text" + }, + "paths": { + "value": [ + "/var/log/auth.log*", + "/var/log/secure*" + ], + "type": "text" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "tags": { + "value": [ + "system-auth" + ], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "logfile-system.auth-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "ignore_older": "72h", + "paths": [ + "/var/log/auth.log*", + "/var/log/secure*" + ], + "exclude_files": [ + ".gz$" + ], + "multiline": { + "pattern": "^\\s", + "match": "after" + }, + "tags": [ + "system-auth" + ], + "processors": [ + { + "add_locale": null + }, + { + "rename": { + "fields": [ + { + "from": "message", + "to": "event.original" + } + ], + "ignore_missing": true, + "fail_on_error": false + } + }, + { + "syslog": { + "field": "event.original", + "ignore_missing": true, + "ignore_failure": true + } + } + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.syslog" + }, + "vars": { + "paths": { + "value": [ + "/var/log/messages*", + "/var/log/syslog*", + "/var/log/system*" + ], + "type": "text" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + }, + "ignore_older": { + "value": "72h", + "type": "text" + }, + "exclude_files": { + "value": [ + "\\.gz$" + ], + "type": "text" + } + }, + "id": "logfile-system.syslog-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "paths": [ + "/var/log/messages*", + "/var/log/syslog*", + "/var/log/system*" + ], + "exclude_files": [ + "\\.gz$" + ], + "multiline": { + "pattern": "^\\s", + "match": "after" + }, + "processors": [ + { + "add_locale": null + } + ], + "tags": null, + "ignore_older": "72h" + } + } + ] + }, + { + "type": "winlog", + "policy_template": "system", + "enabled": true, + "streams": [ + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.application" + }, + "vars": { + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "event_id": { + "type": "text" + }, + "ignore_older": { + "value": "72h", + "type": "text" + }, + "language": { + "value": 0, + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "winlog-system.application-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "name": "Application", + "condition": "${host.platform} == 'windows'", + "ignore_older": "72h" + } + }, + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.security" + }, + "vars": { + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "event_id": { + "type": "text" + }, + "ignore_older": { + "value": "72h", + "type": "text" + }, + "language": { + "value": 0, + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "winlog-system.security-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "name": "Security", + "condition": "${host.platform} == 'windows'", + "ignore_older": "72h" + } + }, + { + "enabled": true, + "data_stream": { + "type": "logs", + "dataset": "system.system" + }, + "vars": { + "preserve_original_event": { + "value": false, + "type": "bool" + }, + "event_id": { + "type": "text" + }, + "ignore_older": { + "value": "72h", + "type": "text" + }, + "language": { + "value": 0, + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "winlog-system.system-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "name": "System", + "condition": "${host.platform} == 'windows'", + "ignore_older": "72h" + } + } + ] + }, + { + "type": "system/metrics", + "policy_template": "system", + "enabled": true, + "streams": [ + { + "enabled": false, + "data_stream": { + "type": "metrics", + "dataset": "system.core" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "core.metrics": { + "value": [ + "percentages" + ], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.core-9bf446fc-58d4-4767-b42d-3450815d5d3d" + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.cpu" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "cpu.metrics": { + "value": [ + "percentages", + "normalized_percentages" + ], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.cpu-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "cpu" + ], + "cpu.metrics": [ + "percentages", + "normalized_percentages" + ], + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.diskio" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "diskio.include_devices": { + "value": [], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + } + }, + "id": "system/metrics-system.diskio-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "diskio" + ], + "diskio.include_devices": null, + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.filesystem" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "filesystem.ignore_types": { + "value": [], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "value": "\"\"", + "type": "yaml" + } + }, + "id": "system/metrics-system.filesystem-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "filesystem" + ], + "period": "1s", + "processors": [ + { + "drop_event.when.regexp": { + "system.filesystem.mount_point": "^/(sys|cgroup|proc|dev|etc|host|lib|snap)($|/)" + } + } + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.fsstat" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "value": "\"\"", + "type": "yaml" + } + }, + "id": "system/metrics-system.fsstat-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "fsstat" + ], + "period": "1s", + "processors": [ + { + "drop_event.when.regexp": { + "system.fsstat.mount_point": "^/(sys|cgroup|proc|dev|etc|host|lib|snap)($|/)" + } + } + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.load" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.load-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "load" + ], + "condition": "${host.platform} != 'windows'", + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.memory" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.memory-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "memory" + ], + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.network" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "network.interfaces": { + "value": [], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.network-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "network" + ], + "period": "1s", + "network.interfaces": null + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.process" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "process.include_top_n.by_cpu": { + "value": 5, + "type": "integer" + }, + "process.include_top_n.by_memory": { + "value": 5, + "type": "integer" + }, + "process.cmdline.cache.enabled": { + "value": true, + "type": "bool" + }, + "process.cgroups.enabled": { + "value": false, + "type": "bool" + }, + "process.env.whitelist": { + "value": [], + "type": "text" + }, + "process.include_cpu_ticks": { + "value": false, + "type": "bool" + }, + "processes": { + "value": [ + ".*" + ], + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.process-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "process" + ], + "period": "1s", + "process.include_top_n.by_cpu": 5, + "process.include_top_n.by_memory": 5, + "process.cmdline.cache.enabled": true, + "process.cgroups.enabled": false, + "process.include_cpu_ticks": false, + "processes": [ + ".*" + ] + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.process.summary" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.process.summary-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "process_summary" + ], + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.socket_summary" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.socket_summary-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "socket_summary" + ], + "period": "1s" + } + }, + { + "enabled": true, + "data_stream": { + "type": "metrics", + "dataset": "system.uptime" + }, + "vars": { + "period": { + "value": "1s", + "type": "text" + }, + "tags": { + "value": [], + "type": "text" + }, + "processors": { + "type": "yaml" + } + }, + "id": "system/metrics-system.uptime-9bf446fc-58d4-4767-b42d-3450815d5d3d", + "compiled_stream": { + "metricsets": [ + "uptime" + ], + "period": "1s" + } + } + ], + "vars": { + "system.hostfs": { + "type": "text" + } + } + }, + { + "type": "httpjson", + "policy_template": "system", + "enabled": false, + "streams": [ + { + "enabled": false, + "data_stream": { + "type": "logs", + "dataset": "system.application" + }, + "vars": { + "interval": { + "value": "1s", + "type": "text" + }, + "search": { + "value": "search sourcetype=\"XmlWinEventLog:Application\"", + "type": "text" + }, + "tags": { + "value": [ + "forwarded" + ], + "type": "text" + } + }, + "id": "httpjson-system.application-9bf446fc-58d4-4767-b42d-3450815d5d3d" + }, + { + "enabled": false, + "data_stream": { + "type": "logs", + "dataset": "system.security" + }, + "vars": { + "interval": { + "value": "1s", + "type": "text" + }, + "search": { + "value": "search sourcetype=\"XmlWinEventLog:Security\"", + "type": "text" + }, + "tags": { + "value": [ + "forwarded" + ], + "type": "text" + } + }, + "id": "httpjson-system.security-9bf446fc-58d4-4767-b42d-3450815d5d3d" + }, + { + "enabled": false, + "data_stream": { + "type": "logs", + "dataset": "system.system" + }, + "vars": { + "interval": { + "value": "1s", + "type": "text" + }, + "search": { + "value": "search sourcetype=\"XmlWinEventLog:System\"", + "type": "text" + }, + "tags": { + "value": [ + "forwarded" + ], + "type": "text" + } + }, + "id": "httpjson-system.system-9bf446fc-58d4-4767-b42d-3450815d5d3d" + } + ], + "vars": { + "url": { + "value": "https://server.example.com:8089", + "type": "text" + }, + "enable_request_tracer": { + "type": "bool" + }, + "username": { + "type": "text" + }, + "password": { + "type": "password" + }, + "token": { + "type": "password" + }, + "preserve_original_event": { + "value": false, + "type": "bool" + } + } + } + ] + } \ No newline at end of file