Skip to content

Commit

Permalink
chore: Add OTEL_EXCLUDE_METRICS (#11317)
Browse files Browse the repository at this point in the history
Adds an env var to allow excluding certain metrics from being exported
to reduce noisiness and cost. Metrics are defined by prefix and
comma-separated.
  • Loading branch information
spalladino authored Jan 20, 2025
1 parent e385ea9 commit 37d4fa8
Show file tree
Hide file tree
Showing 13 changed files with 83 additions and 2 deletions.
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@ spec:
value: "{{ .Values.storage.worldStateMapSize }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.bootNode.otelExcludeMetrics }}"
ports:
- containerPort: {{ .Values.bootNode.service.nodePort }}
- containerPort: {{ .Values.bootNode.service.p2pTcpPort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/faucet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ spec:
value: faucet
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.faucet.otelExcludeMetrics }}"
ports:
- name: http
containerPort: {{ .Values.faucet.service.nodePort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ spec:
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.proverAgent.otelExcludeMetrics }}"
resources:
{{- toYaml .Values.proverAgent.resources | nindent 12 }}
{{- end }}
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-broker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ spec:
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.proverBroker.otelExcludeMetrics }}"
resources:
{{- toYaml .Values.proverBroker.resources | nindent 12 }}
volumes:
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ spec:
value: "{{ .Values.storage.worldStateMapSize }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.proverNode.otelExcludeMetrics }}"
ports:
- containerPort: {{ .Values.proverNode.service.nodePort }}
- containerPort: {{ .Values.proverNode.service.p2pTcpPort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/pxe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ spec:
value: "{{ .Values.aztec.realProofs }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.pxe.otelExcludeMetrics }}"
ports:
- name: http
containerPort: {{ .Values.pxe.service.nodePort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/transaction-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ spec:
value: "{{ .Values.bot.stopIfUnhealthy }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.bot.otelExcludeMetrics }}"
ports:
- name: http
containerPort: {{ .Values.bot.service.nodePort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ spec:
value: "{{ .Values.storage.worldStateMapSize }}"
- name: USE_GCLOUD_OBSERVABILITY
value: "{{ .Values.telemetry.useGcloudObservability }}"
- name: OTEL_EXCLUDE_METRICS
value: "{{ .Values.validator.otelExcludeMetrics }}"
ports:
- containerPort: {{ .Values.validator.service.nodePort }}
- containerPort: {{ .Values.validator.service.p2pTcpPort }}
Expand Down
9 changes: 9 additions & 0 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ bootNode:
stakingAssetAddress: ""
storageSize: "1Gi"
dataDir: "/data"
otelExcludeMetrics: ""

validator:
# If true, the validator will use its peers to serve as the boot node.
Expand Down Expand Up @@ -130,6 +131,7 @@ validator:
dataDir: "/data"
l1FixedPriorityFeePerGas: ""
l1GasLimitBufferPercentage: ""
otelExcludeMetrics: ""

proverNode:
proverPublisherPrivateKey: "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80"
Expand Down Expand Up @@ -166,6 +168,7 @@ proverNode:
intervalMs: 1000
maxParallelRequests: 100
failedProofStore: "gs://aztec-develop/spartan/failed-proofs"
otelExcludeMetrics: ""

pxe:
logLevel: "debug; info: aztec:simulator, json-rpc"
Expand All @@ -182,6 +185,7 @@ pxe:
requests:
memory: "4Gi"
cpu: "1"
otelExcludeMetrics: ""

bot:
enabled: true
Expand Down Expand Up @@ -211,6 +215,7 @@ bot:
requests:
memory: "4Gi"
cpu: "1"
otelExcludeMetrics: ""

ethereum:
externalHost: ""
Expand All @@ -237,6 +242,7 @@ ethereum:
cpu: "1"
storageSize: "80Gi"
deployL1ContractsPrivateKey:
otelExcludeMetrics: ""

proverAgent:
service:
Expand All @@ -255,6 +261,7 @@ proverAgent:
memory: "4Gi"
cpu: "1"
pollInterval: 200
otelExcludeMetrics: ""

proverBroker:
service:
Expand All @@ -272,6 +279,7 @@ proverBroker:
memory: "4Gi"
cpu: "1"
maxOldSpaceSize: "3584"
otelExcludeMetrics: ""

jobs:
deployL1Verifier:
Expand All @@ -289,3 +297,4 @@ faucet:
requests:
memory: "2Gi"
cpu: "200m"
otelExcludeMetrics: ""
1 change: 1 addition & 0 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export type EnvVar =
| 'OTEL_EXPORTER_OTLP_LOGS_ENDPOINT'
| 'OTEL_SERVICE_NAME'
| 'OTEL_COLLECT_INTERVAL_MS'
| 'OTEL_EXCLUDE_METRICS'
| 'OTEL_EXPORT_TIMEOUT_MS'
| 'OUTBOX_CONTRACT_ADDRESS'
| 'P2P_BLOCK_CHECK_INTERVAL_MS'
Expand Down
13 changes: 13 additions & 0 deletions yarn-project/telemetry-client/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export interface TelemetryClientConfig {
k8sPodUid?: string;
k8sPodName?: string;
k8sNamespaceName?: string;
otelExcludeMetrics?: string[];
}

export const telemetryClientConfigMappings: ConfigMappingsType<TelemetryClientConfig> = {
Expand Down Expand Up @@ -57,6 +58,18 @@ export const telemetryClientConfigMappings: ConfigMappingsType<TelemetryClientCo
defaultValue: 30000, // Default extracted from otel client
parseEnv: (val: string) => parseInt(val),
},
otelExcludeMetrics: {
env: 'OTEL_EXCLUDE_METRICS',
description: 'A list of metric prefixes to exclude from export',
parseEnv: (val: string) =>
val
? val
.split(',')
.map(s => s.trim())
.filter(s => s.length > 0)
: [],
defaultValue: [],
},
k8sPodUid: {
env: 'K8S_POD_UID',
description: 'The UID of the Kubernetes pod (injected automatically by k8s)',
Expand Down
8 changes: 6 additions & 2 deletions yarn-project/telemetry-client/src/otel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic
import { type TelemetryClientConfig } from './config.js';
import { EventLoopMonitor } from './event_loop_monitor.js';
import { linearBuckets } from './histogram_utils.js';
import { OtelFilterMetricExporter } from './otel_filter_metric_exporter.js';
import { registerOtelLoggerProvider } from './otel_logger_provider.js';
import { getOtelResource } from './otel_resource.js';
import { type Gauge, type TelemetryClient } from './telemetry.js';
Expand Down Expand Up @@ -247,7 +248,7 @@ export class OpenTelemetryClient implements TelemetryClient {
tracerProvider.register();

const meterProvider = OpenTelemetryClient.createMeterProvider(resource, {
exporter: new GoogleCloudMetricExporter(),
exporter: new OtelFilterMetricExporter(new GoogleCloudMetricExporter(), config.otelExcludeMetrics ?? []),
exportTimeoutMillis: config.otelExportTimeoutMs,
exportIntervalMillis: config.otelCollectIntervalMs,
});
Expand All @@ -269,7 +270,10 @@ export class OpenTelemetryClient implements TelemetryClient {

const meterProvider = OpenTelemetryClient.createMeterProvider(resource, {
exporter: config.metricsCollectorUrl
? new OTLPMetricExporter({ url: config.metricsCollectorUrl.href })
? new OtelFilterMetricExporter(
new OTLPMetricExporter({ url: config.metricsCollectorUrl.href }),
config.otelExcludeMetrics ?? [],
)
: undefined,
exportTimeoutMillis: config.otelExportTimeoutMs,
exportIntervalMillis: config.otelCollectIntervalMs,
Expand Down
38 changes: 38 additions & 0 deletions yarn-project/telemetry-client/src/otel_filter_metric_exporter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { type ExportResult } from '@opentelemetry/core';
import { type MetricData, type PushMetricExporter, type ResourceMetrics } from '@opentelemetry/sdk-metrics';

export class OtelFilterMetricExporter implements PushMetricExporter {
constructor(private readonly exporter: PushMetricExporter, private readonly excludeMetricPrefixes: string[]) {
if (exporter.selectAggregation) {
(this as PushMetricExporter).selectAggregation = exporter.selectAggregation.bind(exporter);
}
if (exporter.selectAggregationTemporality) {
(this as PushMetricExporter).selectAggregationTemporality = exporter.selectAggregationTemporality.bind(exporter);
}
}

public export(metrics: ResourceMetrics, resultCallback: (result: ExportResult) => void): void {
const filteredMetrics: ResourceMetrics = {
resource: metrics.resource,
scopeMetrics: metrics.scopeMetrics
.map(({ scope, metrics }) => ({ scope, metrics: this.filterMetrics(metrics) }))
.filter(({ metrics }) => metrics.length > 0),
};

this.exporter.export(filteredMetrics, resultCallback);
}

private filterMetrics(metrics: MetricData[]): MetricData[] {
return metrics.filter(
metric => !this.excludeMetricPrefixes.some(prefix => metric.descriptor.name.startsWith(prefix)),
);
}

public forceFlush(): Promise<void> {
return this.exporter.forceFlush();
}

public shutdown(): Promise<void> {
return this.exporter.shutdown();
}
}

0 comments on commit 37d4fa8

Please sign in to comment.