Skip to content

Commit

Permalink
change default for process checks in core agent (#34489)
Browse files Browse the repository at this point in the history
  • Loading branch information
brycekahle authored Mar 3, 2025
1 parent c99883d commit 369a8db
Show file tree
Hide file tree
Showing 29 changed files with 158 additions and 114 deletions.
1 change: 1 addition & 0 deletions cmd/agent/subcommands/flare/command_other_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ func NewSystemProbeTestServer(_ http.Handler) (*httptest.Server, error) {
func InjectConnectionFailures(mockSysProbeConfig model.Config, _ model.Config) {
mockSysProbeConfig.SetWithoutSource("system_probe_config.enabled", true)
mockSysProbeConfig.SetWithoutSource("system_probe_config.sysprobe_socket", "/opt/datadog-agent/run/sysprobe-bad.sock")
mockSysProbeConfig.SetWithoutSource("network_config.enabled", true)
}

// CheckExpectedConnectionFailures checks the expected errors after simulated
Expand Down
2 changes: 2 additions & 0 deletions cmd/agent/subcommands/flare/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ func (c *commandTestSuite) TestReadProfileData() {
if runtime.GOOS != "darwin" {
mockSysProbeConfig.SetWithoutSource("system_probe_config.enabled", true)
mockSysProbeConfig.SetWithoutSource("system_probe_config.sysprobe_socket", c.sysprobeSocketPath)
mockSysProbeConfig.SetWithoutSource("network_config.enabled", true)
}

profiler := getProfiler(t, mockConfig, mockSysProbeConfig)
Expand Down Expand Up @@ -214,6 +215,7 @@ func (c *commandTestSuite) TestReadProfileDataNoTraceAgent() {
mockSysProbeConfig := configmock.NewSystemProbe(t)
mockSysProbeConfig.SetWithoutSource("system_probe_config.enabled", true)
mockSysProbeConfig.SetWithoutSource("system_probe_config.sysprobe_socket", c.sysprobeSocketPath)
mockSysProbeConfig.SetWithoutSource("network_config.enabled", true)

profiler := getProfiler(t, mockConfig, mockSysProbeConfig)
data, err := profiler.ReadProfileData(10, func(string, ...interface{}) error { return nil })
Expand Down
8 changes: 7 additions & 1 deletion comp/core/profiler/impl/profiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,15 @@ func (profiler) securityAgentEnabled() bool {
}

func (p profiler) processAgentEnabled() bool {
return p.cfg.GetBool("process_config.enabled") ||
processChecksEnabled := p.cfg.GetBool("process_config.enabled") ||
p.cfg.GetBool("process_config.container_collection.enabled") ||
p.cfg.GetBool("process_config.process_collection.enabled")
processChecksInProcessAgent := !p.cfg.GetBool("process_config.run_in_core_agent.enabled") &&
processChecksEnabled
npmEnabled := p.sysProbeCfg.GetBool("network_config.enabled")
usmEnabled := p.sysProbeCfg.GetBool("service_monitoring_config.enabled")

return processChecksInProcessAgent || npmEnabled || usmEnabled
}

func (p profiler) apmEnabled() bool {
Expand Down
35 changes: 33 additions & 2 deletions comp/core/profiler/impl/profiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ func createGenericConfig(t *testing.T) model.Config {
mockConfig.SetWithoutSource("process_config.expvar_port", port)
mockConfig.SetWithoutSource("security_agent.expvar_port", port)

mockConfig.SetWithoutSource("process_config.run_in_core_agent.enabled", false)
mockConfig.SetWithoutSource("process_config.enabled", false)
mockConfig.SetWithoutSource("process_config.container_collection.enabled", false)
mockConfig.SetWithoutSource("process_config.process_collection.enabled", false)
mockConfig.SetWithoutSource("apm_config.enabled", false)
mockConfig.SetWithoutSource("system_probe_config.enabled", false)

return mockConfig
}
Expand Down Expand Up @@ -212,14 +212,45 @@ func TestTimeout(t *testing.T) {
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 6*(10*time.Second),
},
{
name: "Process Agent Checks in Core Agent",
extraCfgs: map[string]interface{}{
"process_config.run_in_core_agent.enabled": true,
},
extraSysCfgs: map[string]interface{}{},
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 4*(10*time.Second),
},
{
name: "Process Agent Enabled, via NPM",
extraCfgs: map[string]interface{}{
"process_config.run_in_core_agent.enabled": true,
},
extraSysCfgs: map[string]interface{}{
"network_config.enabled": true,
},
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 8*(10*time.Second),
},
{
name: "Process Agent Enabled, via USM",
extraCfgs: map[string]interface{}{
"process_config.run_in_core_agent.enabled": true,
},
extraSysCfgs: map[string]interface{}{
"service_monitoring_config.enabled": true,
},
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 8*(10*time.Second),
},
{
name: "SysProbe Enabled",
extraCfgs: map[string]interface{}{},
extraSysCfgs: map[string]interface{}{
"system_probe_config.enabled": true,
},
profileDuration: 10 * time.Second,
expTimeout: baseTimeout + 6*(10*time.Second),
expTimeout: baseTimeout + 8*(10*time.Second), // config enables NPM, which enables process agent
},
{
name: "Everything Enabled",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ func TestCollection(t *testing.T) {
t.Run(test.name, func(t *testing.T) {

overrides := map[string]interface{}{
"language_detection.enabled": true,
"language_detection.enabled": true,
"process_config.run_in_core_agent.enabled": false,
}

// We do not inject any collectors here; we instantiate
Expand Down
4 changes: 2 additions & 2 deletions pkg/config/config_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1556,9 +1556,9 @@ api_key:
## @param run_in_core_agent - custom object - optional
## Controls whether the process Agent or core Agent collects process and/or container information (Linux only).
# run_in_core_agent:
## @param enabled - boolean - optional - default: false
## @param enabled - boolean - optional - default: true
## Enables process/container collection on the core Agent instead of the process Agent.
# enabled: false
# enabled: true
{{ end }}

## @param process_collection - custom object - optional
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/setup/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func setupProcesses(config pkgconfigmodel.Setup) {
procBindEnvAndSetDefault(config, "process_config.process_collection.enabled", false)

// This allows for the process check to run in the core agent but is for linux only
procBindEnvAndSetDefault(config, "process_config.run_in_core_agent.enabled", false)
procBindEnvAndSetDefault(config, "process_config.run_in_core_agent.enabled", runtime.GOOS == "linux")

config.BindEnv("process_config.process_dd_url",
"DD_PROCESS_CONFIG_PROCESS_DD_URL",
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/setup/process_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestProcessDefaultConfig(t *testing.T) {
},
{
key: "process_config.run_in_core_agent.enabled",
defaultValue: false,
defaultValue: runtime.GOOS == "linux",
},
{
key: "process_config.queue_size",
Expand Down
6 changes: 3 additions & 3 deletions pkg/flare/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ func provideExtraFiles(fb flaretypes.FlareBuilder) error {
fb.AddFile("status.log", []byte("unable to get the status of the agent, is it running?")) //nolint:errcheck
fb.AddFile("config-check.log", []byte("unable to get loaded checks config, is the agent running?")) //nolint:errcheck
} else {
fb.AddFileFromFunc("tagger-list.json", getAgentTaggerList) //nolint:errcheck
fb.AddFileFromFunc("workload-list.log", getAgentWorkloadList) //nolint:errcheck
fb.AddFileFromFunc("process-agent_tagger-list.json", getProcessAgentTaggerList) //nolint:errcheck
fb.AddFileFromFunc("tagger-list.json", getAgentTaggerList) //nolint:errcheck
fb.AddFileFromFunc("workload-list.log", getAgentWorkloadList) //nolint:errcheck
if !pkgconfigsetup.Datadog().GetBool("process_config.run_in_core_agent.enabled") {
fb.AddFileFromFunc("process-agent_tagger-list.json", getProcessAgentTaggerList) //nolint:errcheck
getChecksFromProcessAgent(fb, getProcessAPIAddressPort)
}
}
Expand Down
20 changes: 16 additions & 4 deletions pkg/process/metadata/workloadmeta/collector/process_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,34 +171,45 @@ func TestProcessCollector(t *testing.T) {
// the remote process collector is enabled.
func TestEnabled(t *testing.T) {
type testCase struct {
name string
processCollectionEnabled, remoteProcessCollectorEnabled bool
expectEnabled bool
flavor string
name string
processCollectionEnabled, remoteProcessCollectorEnabled, runInCoreAgentEnabled bool
expectEnabled bool
flavor string
}

testCases := []testCase{
{
name: "process check enabled",
processCollectionEnabled: true,
remoteProcessCollectorEnabled: false,
runInCoreAgentEnabled: false,
flavor: flavor.ProcessAgent,
expectEnabled: false,
},
{
name: "remote collector disabled",
processCollectionEnabled: false,
remoteProcessCollectorEnabled: false,
runInCoreAgentEnabled: false,
flavor: flavor.ProcessAgent,
expectEnabled: false,
},
{
name: "collector enabled",
processCollectionEnabled: false,
remoteProcessCollectorEnabled: true,
runInCoreAgentEnabled: false,
flavor: flavor.ProcessAgent,
expectEnabled: true,
},
{
name: "collector enabled but in core agent",
processCollectionEnabled: false,
remoteProcessCollectorEnabled: true,
runInCoreAgentEnabled: true,
flavor: flavor.ProcessAgent,
expectEnabled: false,
},
}

for _, tc := range testCases {
Expand All @@ -208,6 +219,7 @@ func TestEnabled(t *testing.T) {
cfg := configmock.New(t)
cfg.SetWithoutSource("process_config.process_collection.enabled", tc.processCollectionEnabled)
cfg.SetWithoutSource("language_detection.enabled", tc.remoteProcessCollectorEnabled)
cfg.SetWithoutSource("process_config.run_in_core_agent.enabled", tc.runInCoreAgentEnabled)

assert.Equal(t, tc.expectEnabled, Enabled(cfg))
})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Each section from every release note are combined when the
# CHANGELOG.rst is rendered. So the text needs to be worded so that
# it does not depend on any information only available in another
# section. This may mean repeating some details, but each section
# must be readable independently of the other.
#
# Each section note must be formatted as reStructuredText.
---
enhancements:
- |
Process checks now run in the core Agent by default on Linux. Process Agent will only run if needed for other configured features.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ security_agent:
cmd_port: {{.SecurityCmdPort}}

process_config:
run_in_core_agent:
enabled: false
process_collection:
enabled: true
cmd_port: {{.ProcessCmdPort}}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ security_agent:
cmd_port: {{.SecurityCmdPort}}

process_config:
run_in_core_agent:
enabled: false
process_collection:
enabled: true
cmd_port: {{.ProcessCmdPort}}
17 changes: 12 additions & 5 deletions test/new-e2e/tests/agent-subcommands/flare/flare_common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package flare

import (
"runtime"
"time"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -40,9 +41,12 @@ func (v *baseFlareSuite) TestFlareDefaultFiles() {
assertLogsFolderOnlyContainsLogFile(v.T(), flare)
assertEtcFolderOnlyContainsConfigFile(v.T(), flare)

assertFileContains(v.T(), flare, "process_check_output.json", "'process_config.process_collection.enabled' is disabled")
assertFileNotContains(v.T(), flare, "container_check_output.json", "'process_config.container_collection.enabled' is disabled")
assertFileNotContains(v.T(), flare, "process_discovery_check_output.json", "'process_config.process_discovery.enabled' is disabled")
if runtime.GOOS != "linux" {
assertFilesExist(v.T(), flare, []string{"process-agent_tagger-list.json"})
assertFileContains(v.T(), flare, "process_check_output.json", "'process_config.process_collection.enabled' is disabled")
assertFileNotContains(v.T(), flare, "container_check_output.json", "'process_config.container_collection.enabled' is disabled")
assertFileNotContains(v.T(), flare, "process_discovery_check_output.json", "'process_config.process_discovery.enabled' is disabled")
}
}

func (v *baseFlareSuite) TestLocalFlareDefaultFiles() {
Expand Down Expand Up @@ -72,9 +76,12 @@ func (v *baseFlareSuite) TestFlareProfiling() {
assert.Contains(v.T(), logs, "Setting runtime_block_profile_rate to 5000")
assert.Contains(v.T(), logs, "Getting a 31s profile snapshot from core.")
assert.Contains(v.T(), logs, "Getting a 31s profile snapshot from security-agent.")
assert.Contains(v.T(), logs, "Getting a 31s profile snapshot from process.")

assertFilesExist(v.T(), flare, profilingFiles)

if runtime.GOOS != "linux" {
assert.Contains(v.T(), logs, "Getting a 31s profile snapshot from process.")
assertFilesExist(v.T(), flare, profilingNonLinuxFiles)
}
}

func requestAgentFlareAndFetchFromFakeIntake(v *baseFlareSuite, flareArgs ...agentclient.AgentArgsOption) (flare.Flare, string) {
Expand Down
14 changes: 8 additions & 6 deletions test/new-e2e/tests/agent-subcommands/flare/flare_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ var nonLocalMetadataFlareFiles = []string{
}

var nonLocalFlareFiles = []string{
"process-agent_tagger-list.json",
"tagger-list.json",
"workload-list.log",
"agent_open_files.txt",
Expand Down Expand Up @@ -94,18 +93,21 @@ var profilingFiles = []string{
"profiles/core-block.pprof",
"profiles/core-cpu.pprof",
"profiles/core-mutex.pprof",
"profiles/process-1st-heap.pprof",
"profiles/process-2nd-heap.pprof",
"profiles/process-block.pprof",
"profiles/process-cpu.pprof",
"profiles/process-mutex.pprof",
"profiles/trace-1st-heap.pprof",
"profiles/trace-2nd-heap.pprof",
"profiles/trace-block.pprof",
"profiles/trace-cpu.pprof",
"profiles/trace-mutex.pprof",
}

var profilingNonLinuxFiles = []string{
"profiles/process-1st-heap.pprof",
"profiles/process-2nd-heap.pprof",
"profiles/process-block.pprof",
"profiles/process-cpu.pprof",
"profiles/process-mutex.pprof",
}

// untestedFiles contains some untested files that needs specific scenario which should be added later.
//
//nolint:unused
Expand Down
4 changes: 0 additions & 4 deletions test/new-e2e/tests/agent-subcommands/flare/flare_nix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,6 @@ func (v *linuxFlareSuite) TestzzzFlareWithAllConfiguration() {
extraCustomConfigFiles := []string{"etc/confd/dist/test.yaml", "etc/confd/dist/test.yml", "etc/confd/dist/test.yml.test", "etc/confd/checksd/test.yml"}
assertFilesExist(v.T(), flare, extraCustomConfigFiles)

assertFileNotContains(v.T(), flare, "process_check_output.json", "'process_config.process_collection.enabled' is disabled")
assertFileContains(v.T(), flare, "container_check_output.json", "'process_config.container_collection.enabled' is disabled")
assertFileContains(v.T(), flare, "process_discovery_check_output.json", "'process_config.process_discovery.enabled' is disabled")

filesRegistredInPermissionsLog := append(systemProbeDummyFiles, "/etc/datadog-agent/auth_token")
assertFileContains(v.T(), flare, "permissions.log", filesRegistredInPermissionsLog...)
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func fetchAndCheckStatus(v *baseStatusSuite, expectedSections []expectedSection)
}, 2*time.Minute, 20*time.Second)
}

func (v *baseStatusSuite) TestDefaultInstallStatus() {
func (v *baseStatusSuite) testDefaultInstallStatus(processAgentContain, processAgentNotContain []string) {
expectedSections := []expectedSection{
{
name: `Agent \(.*\)`, // TODO: verify that the right version is output
Expand Down Expand Up @@ -176,7 +176,8 @@ func (v *baseStatusSuite) TestDefaultInstallStatus() {
{
name: "Process Agent",
shouldBePresent: true,
shouldNotContain: []string{"Status: Not running or unreachable"},
shouldContain: processAgentContain,
shouldNotContain: processAgentNotContain,
},
{
name: "Remote Configuration",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,7 @@ func (v *linuxStatusSuite) TestChecksMetadataUnix() {

fetchAndCheckStatus(&v.baseStatusSuite, expectedSections)
}

func (v *linuxStatusSuite) TestDefaultInstallStatus() {
v.testDefaultInstallStatus([]string{"Status: Not running or unreachable"}, nil)
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,7 @@ func (v *windowsStatusSuite) TestChecksMetadataWindows() {

fetchAndCheckStatus(&v.baseStatusSuite, expectedSections)
}

func (v *windowsStatusSuite) TestDefaultInstallStatus() {
v.testDefaultInstallStatus(nil, []string{"Status: Not running or unreachable"})
}
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
network_config:
enabled: true
runtime_security_config:
enabled: true
enabled: true
8 changes: 5 additions & 3 deletions test/new-e2e/tests/installer/script/default_script_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@ import (
"os"
"strings"

"github.com/DataDog/datadog-agent/pkg/util/testutil/flake"
awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners/aws/host"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e/client"
e2eos "github.com/DataDog/test-infra-definitions/components/os"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"

"github.com/DataDog/datadog-agent/pkg/util/testutil/flake"
awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners/aws/host"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/utils/e2e/client"
)

type installScriptDefaultSuite struct {
Expand All @@ -40,6 +41,7 @@ func (s *installScriptDefaultSuite) TestInstall() {
"DD_SITE=datadoghq.com",
"DD_APM_INSTRUMENTATION_LIBRARIES=java:1,python:2,js:5,dotnet:3",
"DD_APM_INSTRUMENTATION_ENABLED=host",
"DD_NETWORK_CONFIG_ENABLED=true", // necessary to get process-agent running
"DD_RUNTIME_SECURITY_CONFIG_ENABLED=true",
"DD_SBOM_CONTAINER_IMAGE_ENABLED=true",
"DD_SBOM_HOST_ENABLED=true",
Expand Down
Loading

0 comments on commit 369a8db

Please sign in to comment.