Skip to content

Commit

Permalink
Set env vars for service instead of machine (#3930)
Browse files Browse the repository at this point in the history
* Set env vars for service instead of machine

* Fix copy and pasted typo

* Remove unused variable

* Update chocolatey to use env vars at the service scope

* Fix check for "SPLUNK_ACCESS_TOKEN"

* Handle upgrade in chocolatey

* Get-Package not available on all Windows versions

* Add messages to track stage on chocolatey install

* Adding more messages to chocolatey script

* Fix service on the registry but no env

* Restore $regKey on install.ps1

* Add helper allow running the collector manually on Windows

* Add breaking change to CHANGELOG.md

* Add -config_path switch to install.ps1

* Fix pipeline collector for Windows ZC IIS test

* MSI to set SPLUNK_CONFIG at the service scope

* Minor doc updates

* Update ansible tests for Windows env vars

* Correct type of *_ENABLE_PROFILING value

* ansible: Windows deployment updates

* fix yamllint issues

* fix yamllint

* Output vagrant.err in case of failure

* Fix splunk_trace_url check

* Always try to display the vagrant errors

* Check if event log has some info about failure

* Remove event log steps: running on mac not Windows

* Launch vagrant

* Init vagrant

* Drop launch vagrant step

* Fix ansible Windows

* Fix yamllint issues

* Remove vagrant init

* Updates to pass local run of molecule tests

* Update chef deployment

* Proper data type for multi_string registry entry

* Fix var name typo

* Fix order of collector environment variables

* Experiment custom_vars test

* Pay attention to conditional variables

* First try with Puppet

* Fix syntax for Puppet

* Back to curly brackets

* Try extension fixes

* Fix conditional

* Remove to_s

* Use empty instead of is_empty

* Fix re-declaration of registry_key

* Fix puppet lint issue

* Do not re-use registry_key

* Use array instead of multi_string

* Ensure side effects

* Update test to use REG_MULTI_SZ

* Immutability?

* Fix lint error

* Fix test

* Removing name re-use

* Update CHANGELOG and README for Puppet

* Updates to mass deployments docs

* Update last version deploying with machine wide env vars

* Sort service environment variables set via PowerShell

* Improve handling of collector restart in Ansible

* Remove extra double-quotes in SPLUNK_CONFIG for Ansible

* Fix Ansible verify SPLUNK_CONFIG strings

* Fix Windows testing checking for legacy versions of the collector

* Update CHANGELOG.md

Co-authored-by: Ryan Fitzpatrick <10867373+rmfitzpatrick@users.noreply.github.com>

* Update last version still setting machine wide env vars

---------

Co-authored-by: Ryan Fitzpatrick <10867373+rmfitzpatrick@users.noreply.github.com>
  • Loading branch information
pjanotti and rmfitzpatrick authored Jan 23, 2024
1 parent 3d2d87b commit 1da0e2c
Show file tree
Hide file tree
Showing 33 changed files with 529 additions and 569 deletions.
45 changes: 31 additions & 14 deletions .github/workflows/scripts/win-test-services.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,25 @@ param (
$ErrorActionPreference = 'Stop'
Set-PSDebug -Trace 1

function check_regkey([string]$name, [string]$value) {
$actual = Get-ItemPropertyValue -PATH "HKLM:\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" -name "$name"
if ( "$value" -ne "$actual" ) {
throw "Environment variable $name is not properly set. Found: '$actual', Expected '$value'"
function check_collector_svc_environment([hashtable]$expected_env_vars) {
$actual_env_vars = @{}
try {
$env_array = Get-ItemPropertyValue -Path "HKLM:\SYSTEM\CurrentControlSet\Services\splunk-otel-collector" -Name "Environment"
foreach ($entry in $env_array) {
$key, $value = $entry.Split("=", 2)
$actual_env_vars.Add($key, $value)
}
} catch {
Write-Host "Assuming an old version of the collector with environment variables at the machine scope"
$actual_env_vars = [Environment]::GetEnvironmentVariables("Machine")<#Do this if a terminating exception happens#>
}

foreach ($key in $expected_env_vars.Keys) {
$expected_value = $expected_env_vars[$key]
$actual_value = $actual_env_vars[$key]
if ($expected_value -ne $actual_value) {
throw "Environment variable $key is not properly set. Found: '$actual_value', Expected '$expected_value'"
}
}
}

Expand All @@ -24,16 +39,18 @@ function service_running([string]$name) {
$api_url = "https://api.${realm}.signalfx.com"
$ingest_url = "https://ingest.${realm}.signalfx.com"

check_regkey -name "SPLUNK_CONFIG" -value "${env:PROGRAMDATA}\Splunk\OpenTelemetry Collector\${mode}_config.yaml"
check_regkey -name "SPLUNK_ACCESS_TOKEN" -value "$access_token"
check_regkey -name "SPLUNK_REALM" -value "$realm"
check_regkey -name "SPLUNK_API_URL" -value "$api_url"
check_regkey -name "SPLUNK_INGEST_URL" -value "$ingest_url"
check_regkey -name "SPLUNK_TRACE_URL" -value "${ingest_url}/v2/trace"
check_regkey -name "SPLUNK_HEC_URL" -value "${ingest_url}/v1/log"
check_regkey -name "SPLUNK_HEC_TOKEN" -value "$access_token"
check_regkey -name "SPLUNK_BUNDLE_DIR" -value "${env:PROGRAMFILES}\Splunk\OpenTelemetry Collector\agent-bundle"
check_regkey -name "SPLUNK_MEMORY_TOTAL_MIB" -value "$memory"
check_collector_svc_environment @{
"SPLUNK_CONFIG" = "${env:PROGRAMDATA}\Splunk\OpenTelemetry Collector\${mode}_config.yaml";
"SPLUNK_ACCESS_TOKEN" = "$access_token";
"SPLUNK_REALM" = "$realm";
"SPLUNK_API_URL" = "$api_url";
"SPLUNK_INGEST_URL" = "$ingest_url";
"SPLUNK_TRACE_URL" = "${ingest_url}/v2/trace";
"SPLUNK_HEC_URL" = "${ingest_url}/v1/log";
"SPLUNK_HEC_TOKEN" = "$access_token";
"SPLUNK_BUNDLE_DIR" = "${env:PROGRAMFILES}\Splunk\OpenTelemetry Collector\agent-bundle";
"SPLUNK_MEMORY_TOTAL_MIB" = "$memory";
}

if ((service_running -name "splunk-otel-collector")) {
write-host "splunk-otel-collector service is running."
Expand Down
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

### 🛑 Breaking changes 🛑

- (Splunk) On Windows the `SPLUNK_*` environment variables were moved from the machine scope to the collector service scope. This avoids collisions with other agents and instrumentation. If any of these environment variables are required by your apps, please adopt them directly. ([#3930](https://github.com/signalfx/splunk-otel-collector/pull/3930))

## v0.92.0

This Splunk OpenTelemetry Collector release includes changes from the [opentelemetry-collector v0.92.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.92.0) and the [opentelemetry-collector-contrib v0.92.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.92.0) releases where appropriate.
Expand Down Expand Up @@ -86,6 +90,7 @@ This Splunk OpenTelemetry Collector release includes changes from the [opentelem
- (Core) `otlpexporter`: remove dependency of otlphttpreceiver on otlpexporter ([#6454](https://github.com/open-telemetry/opentelemetry-collector/issues/6454))

## v0.91.3

- (Splunk) Properly sign and associate changelog to release. This should be otherwise identical to v0.91.2

## v0.91.2
Expand Down Expand Up @@ -113,7 +118,6 @@ This Splunk OpenTelemetry Collector release includes changes from the [opentelem
- (Splunk) Adopt `awss3` exporter ([#4117](https://github.com/signalfx/splunk-otel-collector/pull/4117))
- (Splunk) Convert loglevel to verbosity on logging exporter ([#4097](https://github.com/signalfx/splunk-otel-collector/pull/4097))


## v0.91.1

### 💡 Enhancements 💡
Expand Down Expand Up @@ -301,7 +305,6 @@ This Splunk OpenTelemetry Collector release includes changes from the [opentelem
- (Contrib) `zipkinreceiver`: Return BadRequest in case of permanent errors ([#4335](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/4335))
- (Core) `exporterhelper`: fix bug with queue size and capacity metrics ([#8682](https://github.com/open-telemetry/opentelemetry-collector/issues/8682))


## v0.88.0

This Splunk OpenTelemetry Collector release includes changes from the [opentelemetry-collector v0.88.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.88.0) and the [opentelemetry-collector-contrib v0.88.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.88.0) releases where appropriate.
Expand Down
3 changes: 3 additions & 0 deletions deployments/ansible/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## unreleased

- On Windows the `SPLUNK_*` environment variables were moved from the machine scope to the collector service scope.
It is possible that some instrumentations are relying on the machine-wide environment variables set by the installation. ([#3930](https://github.com/signalfx/splunk-otel-collector/pull/3930))

## ansible-v0.24.0

### 🚩 Deprecations 🚩
Expand Down
4 changes: 4 additions & 0 deletions deployments/ansible/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ Currently, the following Windows versions are supported:
- Windows Server 2019 64-bit
- Windows Server 2022 64-bit

On Windows, the collector is installed as a Windows service and its environment
variables are set at the service scope, i.e.: they are only available to the
collector service and not to the entire machine.

Ansible requires PowerShell 3.0 or newer and at least .NET 4.0 to be installed on Windows host.
A WinRM listener should be created and activeted.
For setting up Windows Host refer [Ansible Docs](https://docs.ansible.com/ansible/latest/user_guide/windows_setup.html).
Expand Down
30 changes: 19 additions & 11 deletions deployments/ansible/molecule/custom_vars/windows-verify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
gather_facts: true
become: no
vars:
reg_values:
collector_reg_values:
SPLUNK_CONFIG: '{{ ansible_env.ProgramData }}\Splunk\OpenTelemetry Collector\custom_config.yml'
SPLUNK_INGEST_URL: https://fake-splunk-ingest.com
SPLUNK_API_URL: https://fake-splunk-api.com
Expand All @@ -15,11 +15,10 @@
SPLUNK_BALLAST_SIZE_MIB: "100"
MY_CUSTOM_VAR1: value1
MY_CUSTOM_VAR2: value2
SIGNALFX_DOTNET_TRACER_HOME: '{{ ansible_env.ProgramFiles }}\SignalFx\.NET Tracing\'
dotnet_reg_values:
COR_ENABLE_PROFILING: "true"
iis_reg_values:
COR_ENABLE_PROFILING: "1"
COR_PROFILER: "{B4C89B0F-9908-4F73-9F59-0D77C5A06874}"
CORECLR_ENABLE_PROFILING: "true"
CORECLR_ENABLE_PROFILING: "1"
CORECLR_PROFILER: "{B4C89B0F-9908-4F73-9F59-0D77C5A06874}"
SIGNALFX_ENV: test-environment
SIGNALFX_SERVICE_NAME: test-service-name
Expand All @@ -28,6 +27,8 @@
SIGNALFX_GLOBAL_TAGS: splunk.zc.method:signalfx-dotnet-tracing-1.0.0,dotnet-tag:dotnet-tag-value
SIGNALFX_DOTNET_VAR1: dotnet-value1
SIGNALFX_DOTNET_VAR2: dotnet-value2
machine_reg_values:
SIGNALFX_DOTNET_TRACER_HOME: '{{ ansible_env.ProgramFiles }}\SignalFx\.NET Tracing\'
tasks:
- name: Check splunk-otel-collector service
ansible.windows.win_service:
Expand All @@ -36,10 +37,6 @@
check_mode: yes
register: service_status

- name: Assert splunk-otel-collector service status
assert:
that: not service_status.changed

- name: Check fluentdwinsvc service
ansible.windows.win_service:
name: fluentdwinsvc
Expand Down Expand Up @@ -124,12 +121,23 @@
- name: Verify IIS env vars
assert:
that: (item.key + '=' + item.value) in iis_env.value
loop: "{{ dotnet_reg_values | dict2items }}"
loop: "{{ iis_reg_values | dict2items }}"

- name: Get splunk-otel-collector service env vars
ansible.windows.win_reg_stat:
path: HKLM:\SYSTEM\CurrentControlSet\Services\splunk-otel-collector
name: Environment
register: collector_env

- name: Verify splunk-otel-collector service env vars
assert:
that: (item.key + '=' + item.value) in collector_env.value
loop: "{{ collector_reg_values | dict2items }}"

- name: Verify env vars
include_tasks: ../shared/verify_registry_key.yml
vars:
path: HKLM:\SYSTEM\CurrentControlSet\Control\Session Manager\Environment
name: "{{ item.key }}"
value: "{{ item.value }}"
loop: "{{ reg_values | combine(dotnet_reg_values) | dict2items }}"
loop: "{{ machine_reg_values | dict2items }}"
23 changes: 11 additions & 12 deletions deployments/ansible/molecule/default/windows-verify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
gather_facts: true
become: no
vars:
reg_values:
collector_reg_values:
SPLUNK_CONFIG: '{{ ansible_env.ProgramData }}\Splunk\OpenTelemetry Collector\agent_config.yaml'
SPLUNK_ACCESS_TOKEN: fake-token
SPLUNK_REALM: fake-realm
Expand All @@ -21,10 +21,6 @@
check_mode: yes
register: service_status

- name: Assert splunk-otel-collector service status
assert:
that: not service_status.changed

- name: Check fluentdwinsvc service
ansible.windows.win_service:
name: fluentdwinsvc
Expand All @@ -36,10 +32,13 @@
assert:
that: not service_status.exists

- name: Verify env vars
include_tasks: ../shared/verify_registry_key.yml
vars:
path: HKLM:\SYSTEM\CurrentControlSet\Control\Session Manager\Environment
name: "{{ item.key }}"
value: "{{ item.value }}"
loop: "{{ reg_values | dict2items }}"
- name: Get splunk-otel-collector service env vars
ansible.windows.win_reg_stat:
path: HKLM:\SYSTEM\CurrentControlSet\Services\splunk-otel-collector
name: Environment
register: collector_env

- name: Verify splunk-otel-collector service env vars
assert:
that: (item.key + '=' + item.value) in collector_env.value
loop: "{{ collector_reg_values | dict2items }}"
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
gather_facts: true
become: no
vars:
reg_values:
collector_reg_values:
SPLUNK_CONFIG: '{{ ansible_env.ProgramData }}\Splunk\OpenTelemetry Collector\agent_config.yaml'
SPLUNK_ACCESS_TOKEN: fake-token
SPLUNK_REALM: fake-realm
Expand All @@ -13,16 +13,17 @@
SPLUNK_HEC_URL: https://ingest.fake-realm.signalfx.com/v1/log
SPLUNK_INGEST_URL: https://ingest.fake-realm.signalfx.com
SPLUNK_TRACE_URL: https://ingest.fake-realm.signalfx.com/v2/trace
SIGNALFX_DOTNET_TRACER_HOME: '{{ ansible_env.ProgramFiles }}\SignalFx\.NET Tracing\'
dotnet_reg_values:
COR_ENABLE_PROFILING: "true"
iis_reg_values:
COR_ENABLE_PROFILING: "1"
COR_PROFILER: "{B4C89B0F-9908-4F73-9F59-0D77C5A06874}"
CORECLR_ENABLE_PROFILING: "true"
CORECLR_ENABLE_PROFILING: "1"
CORECLR_PROFILER: "{B4C89B0F-9908-4F73-9F59-0D77C5A06874}"
SIGNALFX_ENV: ""
SIGNALFX_SERVICE_NAME: ""
SIGNALFX_PROFILER_ENABLED: "false"
SIGNALFX_PROFILER_MEMORY_ENABLED: "false"
machine_reg_values:
SIGNALFX_DOTNET_TRACER_HOME: '{{ ansible_env.ProgramFiles }}\SignalFx\.NET Tracing\'
tasks:
- name: Check splunk-otel-collector service
ansible.windows.win_service:
Expand All @@ -31,10 +32,6 @@
check_mode: yes
register: service_status

- name: Assert splunk-otel-collector service status
assert:
that: not service_status.changed

- name: Get installed signalfx-dotnet-tracing MSI version
ansible.windows.win_shell: |
$msi_version = ""
Expand All @@ -44,19 +41,11 @@
echo $msi_version
register: msi_version

- name: Add SIGNALFX_GLOBAL_TAGS to dotnet_reg_values
- name: Add SIGNALFX_GLOBAL_TAGS to iis_reg_values
set_fact:
dotnet_reg_values: |-
iis_reg_values: |-
{%- set tags = "splunk.zc.method:signalfx-dotnet-tracing-" + (msi_version.stdout | trim) -%}
{{ dotnet_reg_values | combine({"SIGNALFX_GLOBAL_TAGS": tags}) }}
- name: Verify collector env vars
include_tasks: ../shared/verify_registry_key.yml
vars:
path: HKLM:\SYSTEM\CurrentControlSet\Control\Session Manager\Environment
name: "{{ item.key }}"
value: "{{ item.value }}"
loop: "{{ reg_values | dict2items }}"
{{ iis_reg_values | combine({"SIGNALFX_GLOBAL_TAGS": tags}) }}
- name: Get IIS env vars
ansible.windows.win_reg_stat:
Expand All @@ -67,12 +56,31 @@
- name: Verify IIS env vars
assert:
that: (item.key + '=' + item.value) in iis_env.value
loop: "{{ dotnet_reg_values | dict2items }}"
loop: "{{ iis_reg_values | dict2items }}"

- name: Get splunk-otel-collector service env vars
ansible.windows.win_reg_stat:
path: HKLM:\SYSTEM\CurrentControlSet\Services\splunk-otel-collector
name: Environment
register: collector_env

- name: Verify splunk-otel-collector service env vars
assert:
that: (item.key + '=' + item.value) in collector_env.value
loop: "{{ collector_reg_values | dict2items }}"

- name: Verify .NET tracing env vars were not added to the system
include_tasks: ../shared/verify_registry_key.yml
vars:
path: HKLM:\SYSTEM\CurrentControlSet\Control\Session Manager\Environment
name: "{{ item.key }}"
exists: false
loop: "{{ dotnet_reg_values | dict2items }}"
loop: "{{ iis_reg_values | dict2items }}"

- name: Verify .NET tracing MSI env vars were added to the system
include_tasks: ../shared/verify_registry_key.yml
vars:
path: HKLM:\SYSTEM\CurrentControlSet\Control\Session Manager\Environment
name: "{{ item.key }}"
value: "{{ item.value }}"
loop: "{{ machine_reg_values | dict2items }}"
4 changes: 2 additions & 2 deletions deployments/ansible/roles/collector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,9 @@ For proxy options, see the [Windows Proxy](#windows-proxy) section.
`signalfx_dotnet_auto_instrumentation_additional_options` option to
enable/configure auto instrumentation for ***only*** IIS applications:
```yaml
COR_ENABLE_PROFILING: true # Required
COR_ENABLE_PROFILING: "1" # Required
COR_PROFILER: "{B4C89B0F-9908-4F73-9F59-0D77C5A06874}" # Required
CORECLR_ENABLE_PROFILING: true # Required
CORECLR_ENABLE_PROFILING: "1" # Required
CORECLR_PROFILER: "{B4C89B0F-9908-4F73-9F59-0D77C5A06874}" # Required
SIGNALFX_ENV: "{{ signalfx_dotnet_auto_instrumentation_environment }}"
SIGNALFX_GLOBAL_TAGS: "{{ signalfx_dotnet_auto_instrumentation_global_tags }}"
Expand Down
21 changes: 18 additions & 3 deletions deployments/ansible/roles/collector/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,24 @@
- (start_service | default(true) | bool)

- name: Restart Splunk OpenTelemetry Collector for windows
ansible.windows.win_service:
name: splunk-otel-collector
state: restarted
ansible.windows.win_shell: |
Try {
Restart-Service splunk-otel-collector
} Catch {
# Try to get some more helpful information given that the error message is not very helpful
Write-Host "Error restarting splunk-otel-collector service: $_"
Write-Host "Splunk OpenTelemetry Collector service registry entry:"
Get-Item HKLM:\SYSTEM\CurrentControlSet\Services\splunk-otel-collector
Write-Host "Last 15 Application log events:"
Get-WinEvent -Log Application -MaxEvents 15 | Format-List TimeCreated, ProviderName, Message
Write-Host "Last 15 System log events:"
Get-WinEvent -Log System -MaxEvents 15 | Format-List TimeCreated, ProviderName, Message
Throw $_
}
listen: "restart windows splunk-otel-collector"
when:
- (start_service | default(true) | bool)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
ansible.windows.win_package:
path: "{{otel_msi_package.dest}}"
state: present
notify: "restart windows splunk-otel-collector"

- name: Merge custom config into the default config
ansible.builtin.import_tasks: config_override.yml
Expand All @@ -48,11 +47,9 @@
content: '{{ updated_config | to_nice_yaml (indent=2) }}'
dest: "{{ splunk_otel_collector_config }}"
when: splunk_config_override != ''
notify: "restart windows splunk-otel-collector"

- name: Push Custom Config file for splunk-otel-collector, If provided
ansible.windows.win_template:
src: "{{splunk_otel_collector_config_source}}"
dest: "{{splunk_otel_collector_config}}"
when: splunk_otel_collector_config_source != ""
notify: "restart windows splunk-otel-collector"
Loading

0 comments on commit 1da0e2c

Please sign in to comment.