diff --git a/.chloggen/fix_supervisor-windows-signalling.yaml b/.chloggen/fix_supervisor-windows-signalling.yaml new file mode 100644 index 000000000000..525a96c7e08f --- /dev/null +++ b/.chloggen/fix_supervisor-windows-signalling.yaml @@ -0,0 +1,13 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: cmd/opampsupervisor + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Fix supervisor support for Windows." + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [34570] diff --git a/.github/workflows/e2e-tests-windows.yml b/.github/workflows/e2e-tests-windows.yml new file mode 100644 index 000000000000..a04211b05421 --- /dev/null +++ b/.github/workflows/e2e-tests-windows.yml @@ -0,0 +1,82 @@ +name: e2e-tests-windows + +on: + push: + branches: + - main + tags: + - "v[0-9]+.[0-9]+.[0-9]+*" + paths-ignore: + - "**/README.md" + pull_request: + paths-ignore: + - "**/README.md" + merge_group: + +env: + # Make sure to exit early if cache segment download times out after 2 minutes. + # We limit cache download as a whole to 5 minutes. + SEGMENT_DOWNLOAD_TIMEOUT_MINS: 2 + +jobs: + collector-build: + runs-on: windows-latest + if: ${{ github.actor != 'dependabot[bot]' && (contains(github.event.pull_request.labels.*.name, 'Run Windows') || github.event_name == 'push' || github.event_name == 'merge_group') }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: "1.21.12" + cache: false + - name: Cache Go + id: go-mod-cache + timeout-minutes: 25 + uses: actions/cache@v4 + with: + path: | + ~\go\pkg\mod + ~\AppData\Local\go-build + key: go-build-cache-${{ runner.os }}-${{ matrix.group }}-go-${{ hashFiles('**/go.sum') }} + - name: Install dependencies + if: steps.go-mod-cache.outputs.cache-hit != 'true' + run: make -j2 gomoddownload + - name: Build Collector + run: make otelcontribcol + - name: Upload Collector Binary + uses: actions/upload-artifact@v4 + with: + name: collector-binary + path: ./bin/* + + supervisor-test: + runs-on: windows-latest + if: ${{ github.actor != 'dependabot[bot]' && (contains(github.event.pull_request.labels.*.name, 'Run Windows') || github.event_name == 'push' || github.event_name == 'merge_group') }} + needs: [collector-build] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: "1.21.12" + cache: false + - name: Cache Go + id: go-mod-cache + timeout-minutes: 25 + uses: actions/cache@v4 + with: + path: | + ~\go\pkg\mod + ~\AppData\Local\go-build + key: go-build-cache-${{ runner.os }}-${{ matrix.group }}-go-${{ hashFiles('**/go.sum') }} + - name: Install dependencies + if: steps.go-mod-cache.outputs.cache-hit != 'true' + run: make -j2 gomoddownload + - name: Download Collector Binary + uses: actions/download-artifact@v4 + with: + name: collector-binary + path: bin/ + - name: Run opampsupervisor e2e tests + run: | + cd cmd/opampsupervisor + go test -v --tags=e2e diff --git a/cmd/opampsupervisor/e2e_test.go b/cmd/opampsupervisor/e2e_test.go index c02ab346f455..22a1bcb755bd 100644 --- a/cmd/opampsupervisor/e2e_test.go +++ b/cmd/opampsupervisor/e2e_test.go @@ -172,6 +172,7 @@ func getSupervisorConfig(t *testing.T, configType string, extraConfigData map[st if runtime.GOOS == "windows" { extension = ".exe" } + configData := map[string]string{ "goos": runtime.GOOS, "goarch": runtime.GOARCH, @@ -184,7 +185,10 @@ func getSupervisorConfig(t *testing.T, configType string, extraConfigData map[st } err = templ.Execute(&buf, configData) require.NoError(t, err) - cfgFile, _ := os.CreateTemp(t.TempDir(), "config_*.yaml") + cfgFile, err := os.CreateTemp(t.TempDir(), "config_*.yaml") + require.NoError(t, err) + t.Cleanup(func() { cfgFile.Close() }) + _, err = cfgFile.Write(buf.Bytes()) require.NoError(t, err) @@ -617,11 +621,13 @@ func TestSupervisorReportsEffectiveConfig(t *testing.T) { tempDir := t.TempDir() testKeyFile, err := os.CreateTemp(tempDir, "confKey") require.NoError(t, err) + t.Cleanup(func() { testKeyFile.Close() }) + n, err := testKeyFile.Write([]byte(testKeyFile.Name())) require.NoError(t, err) require.NotZero(t, n) - colCfgTpl, err := os.ReadFile(path.Join("testdata", "collector", "split_config.yaml")) + colCfgTpl, err := os.ReadFile(filepath.Join("testdata", "collector", "split_config.yaml")) require.NoError(t, err) templ, err := template.New("").Parse(string(colCfgTpl)) @@ -770,9 +776,11 @@ func createSimplePipelineCollectorConf(t *testing.T) (*bytes.Buffer, []byte, *os tempDir := t.TempDir() inputFile, err := os.CreateTemp(tempDir, "input_*.yaml") require.NoError(t, err) + t.Cleanup(func() { inputFile.Close() }) outputFile, err := os.CreateTemp(tempDir, "output_*.yaml") require.NoError(t, err) + t.Cleanup(func() { outputFile.Close() }) colCfgTpl, err := os.ReadFile(path.Join(wd, "testdata", "collector", "simple_pipeline.yaml")) require.NoError(t, err) diff --git a/cmd/opampsupervisor/go.mod b/cmd/opampsupervisor/go.mod index 8ef28bc2b858..65290228e47d 100644 --- a/cmd/opampsupervisor/go.mod +++ b/cmd/opampsupervisor/go.mod @@ -17,6 +17,7 @@ require ( go.opentelemetry.io/collector/semconv v0.107.1-0.20240816132030-9fd84668bb02 go.uber.org/goleak v1.3.0 go.uber.org/zap v1.27.0 + golang.org/x/sys v0.21.0 google.golang.org/protobuf v1.34.2 gopkg.in/yaml.v3 v3.0.1 ) @@ -32,5 +33,4 @@ require ( github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.21.0 // indirect ) diff --git a/cmd/opampsupervisor/supervisor/commander/commander.go b/cmd/opampsupervisor/supervisor/commander/commander.go index b4e4c74c03b2..c9891d7b7d60 100644 --- a/cmd/opampsupervisor/supervisor/commander/commander.go +++ b/cmd/opampsupervisor/supervisor/commander/commander.go @@ -11,7 +11,6 @@ import ( "os/exec" "path/filepath" "sync/atomic" - "syscall" "time" "go.uber.org/zap" @@ -73,26 +72,28 @@ func (c *Commander) Start(ctx context.Context) error { c.logger.Debug("Starting agent", zap.String("agent", c.cfg.Executable)) logFilePath := filepath.Join(c.logsDir, "agent.log") - logFile, err := os.Create(logFilePath) + stdoutFile, err := os.Create(logFilePath) if err != nil { return fmt.Errorf("cannot create %s: %w", logFilePath, err) } c.cmd = exec.CommandContext(ctx, c.cfg.Executable, c.args...) // #nosec G204 + c.cmd.SysProcAttr = sysProcAttrs() // Capture standard output and standard error. // https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/21072 - c.cmd.Stdout = logFile - c.cmd.Stderr = logFile + c.cmd.Stdout = stdoutFile + c.cmd.Stderr = stdoutFile if err := c.cmd.Start(); err != nil { + stdoutFile.Close() return err } c.logger.Debug("Agent process started", zap.Int("pid", c.cmd.Process.Pid)) c.running.Store(1) - go c.watch() + go c.watch(stdoutFile) return nil } @@ -106,7 +107,9 @@ func (c *Commander) Restart(ctx context.Context) error { return c.Start(ctx) } -func (c *Commander) watch() { +func (c *Commander) watch(stdoutFile *os.File) { + defer stdoutFile.Close() + err := c.cmd.Wait() // cmd.Wait returns an exec.ExitError when the Collector exits unsuccessfully or stops @@ -160,7 +163,7 @@ func (c *Commander) Stop(ctx context.Context) error { c.logger.Debug("Stopping agent process", zap.Int("pid", pid)) // Gracefully signal process to stop. - if err := c.cmd.Process.Signal(syscall.SIGTERM); err != nil { + if err := sendShutdownSignal(c.cmd.Process); err != nil { return err } @@ -181,7 +184,7 @@ func (c *Commander) Stop(ctx context.Context) error { c.logger.Debug( "Agent process is not responding to SIGTERM. Sending SIGKILL to kill forcibly.", zap.Int("pid", pid)) - if innerErr = c.cmd.Process.Signal(syscall.SIGKILL); innerErr != nil { + if innerErr = c.cmd.Process.Signal(os.Kill); innerErr != nil { return } }() diff --git a/cmd/opampsupervisor/supervisor/commander/commander_others.go b/cmd/opampsupervisor/supervisor/commander/commander_others.go new file mode 100644 index 000000000000..15fe0f7e97c4 --- /dev/null +++ b/cmd/opampsupervisor/supervisor/commander/commander_others.go @@ -0,0 +1,20 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !windows + +package commander + +import ( + "os" + "syscall" +) + +func sendShutdownSignal(process *os.Process) error { + return process.Signal(os.Interrupt) +} + +func sysProcAttrs() *syscall.SysProcAttr { + // On non-windows systems, no extra attributes are needed. + return nil +} diff --git a/cmd/opampsupervisor/supervisor/commander/commander_windows.go b/cmd/opampsupervisor/supervisor/commander/commander_windows.go new file mode 100644 index 000000000000..3256f1d570da --- /dev/null +++ b/cmd/opampsupervisor/supervisor/commander/commander_windows.go @@ -0,0 +1,40 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:build windows + +package commander + +import ( + "os" + "syscall" + + "golang.org/x/sys/windows" +) + +var ( + kernel32API = windows.NewLazySystemDLL("kernel32.dll") + + ctrlEventProc = kernel32API.NewProc("GenerateConsoleCtrlEvent") +) + +func sendShutdownSignal(process *os.Process) error { + // signaling with os.Interrupt is not supported on windows systems, + // so we need to use the windows API to properly send a graceful shutdown signal. + // See: https://learn.microsoft.com/en-us/windows/console/generateconsolectrlevent + r, _, e := ctrlEventProc.Call(syscall.CTRL_BREAK_EVENT, uintptr(process.Pid)) + if r == 0 { + return e + } + + return nil +} + +func sysProcAttrs() *syscall.SysProcAttr { + // By default, a ctrl-break event applies to a whole process group, which ends up + // shutting down the supervisor. Instead, we spawn the collector in its own process + // group, so that sending a ctrl-break event shuts down just the collector. + return &syscall.SysProcAttr{ + CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP, + } +} diff --git a/cmd/opampsupervisor/supervisor/supervisor.go b/cmd/opampsupervisor/supervisor/supervisor.go index 1d7150f6b2dd..804face7d1ae 100644 --- a/cmd/opampsupervisor/supervisor/supervisor.go +++ b/cmd/opampsupervisor/supervisor/supervisor.go @@ -459,17 +459,11 @@ func (s *Supervisor) startOpAMPClient() error { func (s *Supervisor) startOpAMPServer() error { s.opampServer = server.New(newLoggerFromZap(s.logger)) - var err error - s.opampServerPort, err = s.findRandomPort() - if err != nil { - return err - } - s.logger.Debug("Starting OpAMP server...") connected := &atomic.Bool{} - err = s.opampServer.Start(flattenedSettings{ + err := s.opampServer.Start(flattenedSettings{ endpoint: fmt.Sprintf("localhost:%d", s.opampServerPort), onConnectingFunc: func(_ *http.Request) (bool, int) { // Only allow one agent to be connected the this server at a time. diff --git a/cmd/opampsupervisor/testdata/collector/split_config.yaml b/cmd/opampsupervisor/testdata/collector/split_config.yaml index 1d2e4c7c1cc0..f3e688203e82 100644 --- a/cmd/opampsupervisor/testdata/collector/split_config.yaml +++ b/cmd/opampsupervisor/testdata/collector/split_config.yaml @@ -11,4 +11,4 @@ service: exporters: [nop] telemetry: resource: - test_key: "${file:{{.TestKeyFile}}}" \ No newline at end of file + test_key: '${file:{{.TestKeyFile}}}' \ No newline at end of file diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml index e86ab6cb25f0..6fba120fffc6 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml @@ -12,7 +12,7 @@ capabilities: accepts_opamp_connection_settings: true storage: - directory: "{{.storage_dir}}" + directory: '{{.storage_dir}}' agent: executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_agent_description.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_agent_description.yaml index be601485ee9b..404fa3bb500b 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_agent_description.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_agent_description.yaml @@ -12,7 +12,7 @@ capabilities: accepts_restart_command: true storage: - directory: "{{.storage_dir}}" + directory: '{{.storage_dir}}' agent: executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml index 75490189b904..f713be57c24b 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml @@ -12,7 +12,7 @@ capabilities: accepts_restart_command: true storage: - directory: "{{.storage_dir}}" + directory: '{{.storage_dir}}' agent: executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml index ca0d9378887d..34c45f4d752c 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml @@ -11,7 +11,7 @@ capabilities: reports_remote_config: false storage: - directory: "{{.storage_dir}}" + directory: '{{.storage_dir}}' agent: executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_persistence.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_persistence.yaml index 7595f758f851..600e0c19f7bb 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_persistence.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_persistence.yaml @@ -11,7 +11,7 @@ capabilities: reports_remote_config: true storage: - directory: {{.storage_dir}} + directory: '{{.storage_dir}}' agent: executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}} diff --git a/cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml b/cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml index 68881cd4970c..8c5f028e3f6e 100644 --- a/cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml +++ b/cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml @@ -11,7 +11,7 @@ capabilities: reports_remote_config: true storage: - directory: "{{.storage_dir}}" + directory: '{{.storage_dir}}' agent: executable: ../../bin/otelcontribcol_darwin_arm64