From 1726603ff0f49230e9ccc2effd03b2e08c442609 Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Mon, 30 Sep 2024 19:01:31 +0200 Subject: [PATCH 01/11] Add ci.pipeline.run.duration metric Signed-off-by: Cyrille Le Clerc --- .../job/MonitoringRunListener.java | 17 +++++++------- .../OtelContextAwareAbstractRunListener.java | 22 +++++++++---------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java index a67201986..07ba0145b 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java @@ -153,7 +153,7 @@ public CauseHandler getCauseHandler(@NonNull Cause cause) throws NoSuchElementEx } @Override - public void _onInitialize(@NonNull Run run) { + public void _onInitialize(@NonNull Run run) { LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - onInitialize"); activeRunGauge.incrementAndGet(); @@ -232,14 +232,14 @@ public void _onInitialize(@NonNull Run run) { }); // CAUSES - List causesDescriptions = ((List) run.getCauses()).stream().map(c -> getCauseHandler(c).getStructuredDescription(c)).collect(Collectors.toList()); + List causesDescriptions = run.getCauses().stream().map(c -> getCauseHandler(c).getStructuredDescription(c)).collect(Collectors.toList()); rootSpanBuilder.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_CAUSE, causesDescriptions); - Optional optCause = run.getCauses().stream().findFirst(); + Optional optCause = run.getCauses().stream().findFirst(); optCause.ifPresent(cause -> { if (cause instanceof Cause.UpstreamCause) { Cause.UpstreamCause upstreamCause = (Cause.UpstreamCause) cause; - Run upstreamRun = upstreamCause.getUpstreamRun(); + Run upstreamRun = upstreamCause.getUpstreamRun(); if (upstreamRun == null) { // hudson.model.Cause.UpstreamCause.getUpstreamRun() can return null, probably if upstream job or build has been deleted. } else { @@ -299,7 +299,7 @@ public String get(@Nullable Map carrier, String key) { } @Override - public void _onStarted(@NonNull Run run, @NonNull TaskListener listener) { + public void _onStarted(@NonNull Run run, @NonNull TaskListener listener) { try (Scope parentScope = endPipelinePhaseSpan(run)) { Span runSpan = getTracer().spanBuilder(JenkinsOtelSemanticAttributes.JENKINS_JOB_SPAN_PHASE_RUN_NAME).setParent(Context.current()).startSpan(); LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - begin " + OtelUtils.toDebugString(runSpan)); @@ -311,7 +311,7 @@ public void _onStarted(@NonNull Run run, @NonNull TaskListener listener) { } @Override - public void _onCompleted(@NonNull Run run, @NonNull TaskListener listener) { + public void _onCompleted(@NonNull Run run, @NonNull TaskListener listener) { try (Scope parentScope = endPipelinePhaseSpan(run)) { Span finalizeSpan = getTracer().spanBuilder(JenkinsOtelSemanticAttributes.JENKINS_JOB_SPAN_PHASE_FINALIZE_NAME).setParent(Context.current()).startSpan(); LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - begin " + OtelUtils.toDebugString(finalizeSpan)); @@ -323,7 +323,7 @@ public void _onCompleted(@NonNull Run run, @NonNull TaskListener listener) { @MustBeClosed @NonNull - protected Scope endPipelinePhaseSpan(@NonNull Run run) { + protected Scope endPipelinePhaseSpan(@NonNull Run run) { Span pipelinePhaseSpan = verifyNotNull(Span.current(), "No pipelinePhaseSpan found in context"); pipelinePhaseSpan.end(); LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - end " + OtelUtils.toDebugString(pipelinePhaseSpan)); @@ -334,7 +334,7 @@ protected Scope endPipelinePhaseSpan(@NonNull Run run) { } @Override - public void _onFinalized(@NonNull Run run) { + public void _onFinalized(@NonNull Run run) { try (Scope parentScope = endPipelinePhaseSpan(run)) { Span parentSpan = Span.current(); @@ -381,7 +381,6 @@ public void _onFinalized(@NonNull Run run) { this.getTraceService().purgeRun(run); - Result result = verifyNotNull(run.getResult(), "%s", run); if (result.isCompleteBuild()) { diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/opentelemetry/OtelContextAwareAbstractRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/opentelemetry/OtelContextAwareAbstractRunListener.java index 44fb6c4b5..1e099d7a3 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/opentelemetry/OtelContextAwareAbstractRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/opentelemetry/OtelContextAwareAbstractRunListener.java @@ -30,7 +30,7 @@ * {@link RunListener} that setups the OpenTelemetry {@link io.opentelemetry.context.Context} * with the current {@link Span}. */ -public abstract class OtelContextAwareAbstractRunListener extends RunListener { +public abstract class OtelContextAwareAbstractRunListener extends RunListener> { private final static Logger LOGGER = Logger.getLogger(OtelContextAwareAbstractRunListener.class.getName()); @@ -56,18 +56,18 @@ public final void setOpenTelemetry(@NonNull ReconfigurableOpenTelemetry jenkinsC } @Override - public final void onCompleted(@NonNull Run run, @NonNull TaskListener listener) { + public final void onCompleted(@NonNull Run run, @NonNull TaskListener listener) { Span span = getTraceService().getSpan(run); try (Scope scope = span.makeCurrent()) { this._onCompleted(run, listener); } } - public void _onCompleted(@NonNull Run run, @NonNull TaskListener listener) { + public void _onCompleted(@NonNull Run run, @NonNull TaskListener listener) { } @Override - public final void onFinalized(@NonNull Run run) { + public final void onFinalized(@NonNull Run run) { Span span = getTraceService().getSpan(run); try (Scope scope = span.makeCurrent()) { this._onFinalized(run); @@ -75,26 +75,26 @@ public final void onFinalized(@NonNull Run run) { } - public void _onFinalized(Run run) { + public void _onFinalized(Run run) { } @Override - public final void onInitialize(@NonNull Run run) { + public final void onInitialize(@NonNull Run run) { this._onInitialize(run); } - public void _onInitialize(@NonNull Run run) { + public void _onInitialize(@NonNull Run run) { } @Override - public final void onStarted(@NonNull Run run, @NonNull TaskListener listener) { + public final void onStarted(@NonNull Run run, @NonNull TaskListener listener) { Span span = getTraceService().getSpan(run); try (Scope scope = span.makeCurrent()) { this._onStarted(run, listener); } } - public void _onStarted(@NonNull Run run, @NonNull TaskListener listener) { + public void _onStarted(@NonNull Run run, @NonNull TaskListener listener) { } @Override @@ -112,14 +112,14 @@ public Environment _setUpEnvironment(@NonNull AbstractBuild build, @NonNull Laun } @Override - public final void onDeleted(@NonNull Run run) { + public final void onDeleted(@NonNull Run run) { Span span = getTraceService().getSpan(run); try (Scope ignored = span.makeCurrent()) { this._onDeleted(run); } } - public void _onDeleted(@NonNull Run run) { + public void _onDeleted(@NonNull Run run) { } @NonNull From aed54524f82d8e7c0fbaeeeb73ba60156cda1061 Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Tue, 1 Oct 2024 06:33:05 -0400 Subject: [PATCH 02/11] Add ci.pipeline.run.duration metric Signed-off-by: Cyrille Le Clerc --- .../job/MonitoringRunListener.java | 108 ++++++++++++------ .../JenkinsOtelSemanticAttributes.java | 6 + 2 files changed, 81 insertions(+), 33 deletions(-) diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java index 07ba0145b..010a08c2a 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java @@ -28,6 +28,8 @@ import io.jenkins.plugins.opentelemetry.queue.RemoteSpanAction; import io.jenkins.plugins.opentelemetry.semconv.JenkinsOtelSemanticAttributes; import io.jenkins.plugins.opentelemetry.semconv.JenkinsSemanticMetrics; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.DoubleHistogram; import io.opentelemetry.api.metrics.LongCounter; import io.opentelemetry.api.metrics.Meter; import io.opentelemetry.api.trace.Span; @@ -58,10 +60,12 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Pattern; import java.util.stream.Collectors; import static com.google.common.base.Verify.verifyNotNull; @@ -72,10 +76,15 @@ @Extension(dynamicLoadable = YesNoMaybe.YES, optional = true) public class MonitoringRunListener extends OtelContextAwareAbstractRunListener implements OpenTelemetryLifecycleListener { + static final Pattern MATCH_ANYTHING = Pattern.compile(".*"); + static final Pattern MATCH_NOTHING = Pattern.compile(""); // FIXME check regex + + protected static final Logger LOGGER = Logger.getLogger(MonitoringRunListener.class.getName()); private AtomicInteger activeRunGauge; private List causeHandlers; + private DoubleHistogram runDurationHistogram; private LongCounter runLaunchedCounter; private LongCounter runStartedCounter; private LongCounter runCompletedCounter; @@ -83,6 +92,8 @@ public class MonitoringRunListener extends OtelContextAwareAbstractRunListener i private LongCounter runSuccessCounter; private LongCounter runFailedCounter; private List runHandlers; + private Pattern runDurationHistogramAllowList; + private Pattern runDurationHistogramDenyList; @PostConstruct public void postConstruct() { @@ -104,42 +115,60 @@ public void postConstruct() { // METRICS activeRunGauge = new AtomicInteger(); + + runDurationHistogram = meter.histogramBuilder("ci.pipeline.run.duration") + .setUnit("s") + .build(); + runDurationHistogramAllowList = MATCH_ANYTHING; // allow all + runDurationHistogramDenyList = MATCH_NOTHING; // deny nothing + meter.gaugeBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_ACTIVE) .ofLongs() .setDescription("Gauge of active jobs") .setUnit("{jobs}") .buildWithCallback(valueObserver -> valueObserver.record(this.activeRunGauge.get())); runLaunchedCounter = - meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_LAUNCHED) - .setDescription("Job launched") - .setUnit("{jobs}") - .build(); + meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_LAUNCHED) + .setDescription("Job launched") + .setUnit("{jobs}") + .build(); runStartedCounter = - meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_STARTED) - .setDescription("Job started") - .setUnit("{jobs}") - .build(); + meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_STARTED) + .setDescription("Job started") + .setUnit("{jobs}") + .build(); runSuccessCounter = - meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_SUCCESS) - .setDescription("Job succeed") - .setUnit("{jobs}") - .build(); + meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_SUCCESS) + .setDescription("Job succeed") + .setUnit("{jobs}") + .build(); runFailedCounter = meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_FAILED) .setDescription("Job failed") .setUnit("{jobs}") .build(); runAbortedCounter = - meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_ABORTED) - .setDescription("Job aborted") - .setUnit("{jobs}") - .build(); + meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_ABORTED) + .setDescription("Job aborted") + .setUnit("{jobs}") + .build(); runCompletedCounter = - meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_COMPLETED) - .setDescription("Job completed") - .setUnit("{jobs}") - .build(); + meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_COMPLETED) + .setDescription("Job completed") + .setUnit("{jobs}") + .build(); + } + @Override + public void afterConfiguration(ConfigProperties configProperties) { + this.runDurationHistogramAllowList = Optional + .ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST)) + .map(Pattern::compile) + .orElse(MATCH_ANYTHING); + this.runDurationHistogramDenyList = Optional + .ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST)) + .map(Pattern::compile) + .orElse(MATCH_NOTHING); } @NonNull @@ -149,7 +178,7 @@ public List getCauseHandlers() { @NonNull public CauseHandler getCauseHandler(@NonNull Cause cause) throws NoSuchElementException { - return getCauseHandlers().stream().filter(ch -> ch.isSupported(cause)).findFirst().get(); + return getCauseHandlers().stream().filter(ch -> ch.isSupported(cause)).findFirst().orElseThrow(); } @Override @@ -167,28 +196,28 @@ public void _onInitialize(@NonNull Run run) { // TODO move this to a pluggable span enrichment API with implementations for different observability backends rootSpanBuilder - .setAttribute(JenkinsOtelSemanticAttributes.ELASTIC_TRANSACTION_TYPE, "job"); + .setAttribute(JenkinsOtelSemanticAttributes.ELASTIC_TRANSACTION_TYPE, "job"); rootSpanBuilder - .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_ID, run.getParent().getFullName()) - .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_NAME, run.getParent().getFullDisplayName()) - .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_URL, runUrl) - .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_NUMBER, (long) run.getNumber()) - .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_TYPE, OtelUtils.getProjectType(run)); + .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_ID, run.getParent().getFullName()) + .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_NAME, run.getParent().getFullDisplayName()) + .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_URL, runUrl) + .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_NUMBER, (long) run.getNumber()) + .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_TYPE, OtelUtils.getProjectType(run)); // CULPRITS Set culpritIds; if (run instanceof WorkflowRun) { culpritIds = ((WorkflowRun) run).getCulprits(); } else if (run instanceof AbstractBuild) { - culpritIds = ((AbstractBuild) run).getCulprits(); + culpritIds = ((AbstractBuild) run).getCulprits(); } else { culpritIds = null; } if (culpritIds != null) { rootSpanBuilder .setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_COMMITTERS, - culpritIds.stream().map(p -> p.getId()).collect(Collectors.toList())); + culpritIds.stream().map(User::getId).collect(Collectors.toList())); } // PARAMETERS @@ -287,8 +316,8 @@ public String get(@Nullable Map carrier, String key) { // START initialize span Span startSpan = getTracer().spanBuilder(JenkinsOtelSemanticAttributes.JENKINS_JOB_SPAN_PHASE_START_NAME) - .setParent(Context.current().with(rootSpan)) - .startSpan(); + .setParent(Context.current().with(rootSpan)) + .startSpan(); LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - begin " + OtelUtils.toDebugString(startSpan)); this.getTraceService().putRunPhaseSpan(run, startSpan); @@ -357,7 +386,7 @@ public void _onFinalized(@NonNull Run run) { if (Result.SUCCESS.equals(runResult)) { parentSpan.setStatus(StatusCode.OK, runResult.toString()); - } else if (Result.FAILURE.equals(runResult) || Result.UNSTABLE.equals(runResult)){ + } else if (Result.FAILURE.equals(runResult) || Result.UNSTABLE.equals(runResult)) { parentSpan.setAttribute(ExceptionAttributes.EXCEPTION_TYPE, "PIPELINE_" + runResult); parentSpan.setAttribute(ExceptionAttributes.EXCEPTION_MESSAGE, "PIPELINE_" + runResult); parentSpan.setStatus(StatusCode.ERROR, runResult.toString()); @@ -367,7 +396,7 @@ public void _onFinalized(@NonNull Run run) { } // NODE if (run instanceof AbstractBuild) { - Node node = ((AbstractBuild) run).getBuiltOn(); + Node node = ((AbstractBuild) run).getBuiltOn(); if (node != null) { parentSpan.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_AGENT_LABEL, node.getLabelString()); parentSpan.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_AGENT_ID, node.getNodeName()); @@ -394,6 +423,19 @@ public void _onFinalized(@NonNull Run run) { } else { this.runAbortedCounter.add(1); } + + String pipelineId = + runDurationHistogramAllowList.matcher(run.getParent().getFullName()).matches() + && + !runDurationHistogramDenyList.matcher(run.getParent().getFullName()).matches() ? + run.getParent().getFullName() : "#other#"; + runDurationHistogram.record( + TimeUnit.SECONDS.convert(run.getDuration(), TimeUnit.MILLISECONDS), + Attributes.of( + JenkinsOtelSemanticAttributes.CI_PIPELINE_ID, pipelineId, + JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_RESULT, result.toString(), + JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_COMPLETED, result.isCompleteBuild()) + ); } finally { activeRunGauge.decrementAndGet(); } diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsOtelSemanticAttributes.java b/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsOtelSemanticAttributes.java index f03d31996..d9ce95e68 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsOtelSemanticAttributes.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsOtelSemanticAttributes.java @@ -7,6 +7,7 @@ import hudson.PluginWrapper; import hudson.model.Computer; +import hudson.model.Job; import io.jenkins.plugins.opentelemetry.api.semconv.JenkinsAttributes; import io.opentelemetry.api.common.AttributeKey; import jenkins.model.Jenkins; @@ -21,6 +22,9 @@ public final class JenkinsOtelSemanticAttributes extends JenkinsAttributes { public static final AttributeKey CI_PIPELINE_TYPE = AttributeKey.stringKey("ci.pipeline.type"); public static final AttributeKey CI_PIPELINE_MULTIBRANCH_TYPE = AttributeKey.stringKey("ci.pipeline.multibranch.type"); + /** + * @see Job#getFullName() + */ public static final AttributeKey CI_PIPELINE_ID = AttributeKey.stringKey("ci.pipeline.id"); public static final AttributeKey CI_PIPELINE_NAME = AttributeKey.stringKey("ci.pipeline.name"); public static final AttributeKey CI_PIPELINE_TEMPLATE_ID = AttributeKey.stringKey("ci.pipeline.template.id"); @@ -135,6 +139,8 @@ public final class JenkinsOtelSemanticAttributes extends JenkinsAttributes { public static final String OTEL_INSTRUMENTATION_JENKINS_WEB_ENABLED = "otel.instrumentation.jenkins.web.enabled"; public static final String OTEL_INSTRUMENTATION_JENKINS_REMOTE_SPAN_ENABLED = "otel.instrumentation.jenkins.remote.span.enabled"; + public static final String OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST = "otel.instrumentation.jenkins.run.metric.duration.allow_list"; + public static final String OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST = "otel.instrumentation.jenkins.run.metric.duration.deny_list"; /** * Instrument Jenkins Remoting from the Jenkins controller to Jenkins build agents */ From 7b51ae3e699ea114e7a5d0a59a3050592e5867e0 Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Wed, 2 Oct 2024 11:03:58 -0400 Subject: [PATCH 03/11] Update src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java --- .../jenkins/plugins/opentelemetry/job/MonitoringRunListener.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java index 010a08c2a..7ce333ca4 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java @@ -117,6 +117,7 @@ public void postConstruct() { activeRunGauge = new AtomicInteger(); runDurationHistogram = meter.histogramBuilder("ci.pipeline.run.duration") + // TODO clarify histogram buckets. .setUnit("s") .build(); runDurationHistogramAllowList = MATCH_ANYTHING; // allow all From 37b6b0ef90554d325e472d02c9caf68876bd414b Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Wed, 2 Oct 2024 11:15:08 -0400 Subject: [PATCH 04/11] Update src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java --- .../jenkins/plugins/opentelemetry/job/MonitoringRunListener.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java index 7ce333ca4..b897c2a7e 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java @@ -435,6 +435,7 @@ public void _onFinalized(@NonNull Run run) { Attributes.of( JenkinsOtelSemanticAttributes.CI_PIPELINE_ID, pipelineId, JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_RESULT, result.toString(), + // TODO do we need this `completed` dimension that we captured on spans? Can't this inferred from the `result` attribute ? JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_COMPLETED, result.isCompleteBuild()) ); } finally { From 86ea5dbf62afa63a83d02c4e6201c5d748ecee71 Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Fri, 25 Oct 2024 22:08:57 +0530 Subject: [PATCH 05/11] Add ci.pipeline.run.duration metric Signed-off-by: Cyrille Le Clerc --- docs/monitoring-metrics.md | 100 +++++++++++------- ...nkinsOpenTelemetryPluginConfiguration.java | 2 +- .../computer/MonitoringCloudListener.java | 4 +- .../JenkinsExecutorMonitoringInitializer.java | 2 +- .../job/MonitoringRunListener.java | 59 +++++++---- .../OtelContextAwareAbstractRunListener.java | 2 +- .../JenkinsOtelSemanticAttributes.java | 4 + .../semconv/JenkinsSemanticMetrics.java | 1 + .../job/MonitoringRunListenerTest.java | 67 ++++++++++++ 9 files changed, 181 insertions(+), 60 deletions(-) create mode 100644 src/test/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListenerTest.java diff --git a/docs/monitoring-metrics.md b/docs/monitoring-metrics.md index 76c7f3843..19128f962 100644 --- a/docs/monitoring-metrics.md +++ b/docs/monitoring-metrics.md @@ -24,7 +24,21 @@ or APIs ([here](https://www.elastic.co/guide/en/kibana/current/dashboard-import- |------------------------------------------------|----------------------------------| | Jenkins Health Dashboard with Elastic Kibana | Jenkins Agent Provisioning Health Dashboard with Elastic Kibana | -## Jenkins Health Metrics +## Build Duration + +* Name: `ci.pipeline.run.duration` +* Type: Histogram with buckets: `1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192` (buckets subject to change) +* Unit: `s` +* Attributes: + * `ci.pipeline.id`: The full name of the Jenkins job if complying with the allow and deny lists specified through + configuration parameters documented below, otherwise `#other#` to limit the cardinality of the metric. + Example: `my-team/my-app/main`. See `hudson.model.AbstractItem#getFullName()`. + * `ci.pipeline.result`: `SUCCESS`, `UNSTABLE`, `FAILUIRE`, `NOT_BUILT`, `ABORTED`. See `hudson.model.Run#getResult()`. +* Configuration parameters to control the cardinality of the `ci.pipeline.id` attribute: + * `otel.instrumentation.jenkins.run.metric.duration.allow_list`: Java regex. Example `jenkins_folder_a/.*|jenkins_folder_b/.*` + * `otel.instrumentation.jenkins.run.metric.duration.deny_list`: Java regex. Example `.*test.*` + +## Jenkins Build & Health Metrics Inventory of health metrics collected by the Jenkins OpenTelemetry integration: @@ -35,128 +49,142 @@ Inventory of health metrics collected by the Jenkins OpenTelemetry integration: + + + + + + - + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -189,42 +217,42 @@ Inventory of health metrics collected by the Jenkins OpenTelemetry integration: - + - + - + - + - + - + @@ -243,7 +271,7 @@ Inventory of health metrics collected by the Jenkins OpenTelemetry integration: - + - + - + - + - + @@ -304,7 +332,7 @@ See OpenTelemetry [Semantic Conventions for Runtime Environment Metrics](https:/ - + @@ -435,8 +463,8 @@ See OpenTelemetry [Semantic Conventions for Runtime Environment Metrics](https:/ ## Jenkins Security Metrics -| Metrics | Unit | Attribute Key | Attribute value | Description | -|----------------------------------|-------|-----------------------|-------------------------|------------------------| -| login | 1 | | | Login count | -| login_success | 1 | | | Successful login count | -| login_failure | 1 | | | Failed login count | +| Metrics | Unit | Attribute Key | Attribute value | Description | +|----------------------------------|-------------|-----------------------|-------------------------|------------------------| +| login | ${logins} | | | Login count | +| login_success | ${logins} | | | Successful login count | +| login_failure | ${logins} | | | Failed login count | diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/JenkinsOpenTelemetryPluginConfiguration.java b/src/main/java/io/jenkins/plugins/opentelemetry/JenkinsOpenTelemetryPluginConfiguration.java index 77f8bdcc3..e8d6ce5b2 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/JenkinsOpenTelemetryPluginConfiguration.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/JenkinsOpenTelemetryPluginConfiguration.java @@ -179,7 +179,7 @@ public boolean configure(StaplerRequest req, JSONObject json) throws FormExcepti try { configureOpenTelemetrySdk(); save(); - } catch (ConfigurationException e) { + } catch (RuntimeException e) { LOGGER.log(Level.WARNING, "Exception configuring OpenTelemetry SDK", e); throw new FormException("Exception configuring OpenTelemetry SDK: " + e.getMessage(), e, "endpoint"); } diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/computer/MonitoringCloudListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/computer/MonitoringCloudListener.java index c6034ead2..c32002996 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/computer/MonitoringCloudListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/computer/MonitoringCloudListener.java @@ -39,11 +39,11 @@ public void postConstruct() { failureCloudCounter = meter.counterBuilder(JenkinsSemanticMetrics.JENKINS_CLOUD_AGENTS_FAILURE) .setDescription("Number of failed cloud agents when provisioning") - .setUnit("1") + .setUnit("{agents}") .build(); totalCloudCount = meter.counterBuilder(JenkinsSemanticMetrics.JENKINS_CLOUD_AGENTS_COMPLETED) .setDescription("Number of provisioned cloud agents") - .setUnit("1") + .setUnit("{agents}") .build(); } diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/init/JenkinsExecutorMonitoringInitializer.java b/src/main/java/io/jenkins/plugins/opentelemetry/init/JenkinsExecutorMonitoringInitializer.java index 9cdf14dae..89734b063 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/init/JenkinsExecutorMonitoringInitializer.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/init/JenkinsExecutorMonitoringInitializer.java @@ -48,7 +48,7 @@ public void postConstruct() { final ObservableLongMeasurement onlineExecutors = meter.gaugeBuilder(JENKINS_EXECUTOR_ONLINE).setUnit("${executors}").setDescription("Online executors").ofLongs().buildObserver(); final ObservableLongMeasurement connectingExecutors = meter.gaugeBuilder(JENKINS_EXECUTOR_CONNECTING).setUnit("${executors}").setDescription("Connecting executors").ofLongs().buildObserver(); final ObservableLongMeasurement definedExecutors = meter.gaugeBuilder(JENKINS_EXECUTOR_DEFINED).setUnit("${executors}").setDescription("Defined executors").ofLongs().buildObserver(); - final ObservableLongMeasurement queueLength = meter.gaugeBuilder(JENKINS_EXECUTOR_QUEUE).setUnit("${executors}").setDescription("Defined executors").ofLongs().buildObserver(); + final ObservableLongMeasurement queueLength = meter.gaugeBuilder(JENKINS_EXECUTOR_QUEUE).setUnit("${items}").setDescription("Executors queue items").ofLongs().buildObserver(); logger.log(Level.FINER, () -> "Metrics: " + availableExecutors + ", " + busyExecutors + ", " + idleExecutors + ", " + onlineExecutors + ", " + connectingExecutors + ", " + definedExecutors + ", " + queueLength); meter.batchCallback(() -> { diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java index b897c2a7e..91155b961 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java @@ -5,6 +5,10 @@ package io.jenkins.plugins.opentelemetry.job; +import static java.util.Arrays.asList; +import static java.util.Collections.unmodifiableList; + +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.errorprone.annotations.MustBeClosed; import edu.umd.cs.findbugs.annotations.NonNull; @@ -66,6 +70,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import java.util.stream.Collectors; import static com.google.common.base.Verify.verifyNotNull; @@ -77,8 +82,11 @@ public class MonitoringRunListener extends OtelContextAwareAbstractRunListener implements OpenTelemetryLifecycleListener { static final Pattern MATCH_ANYTHING = Pattern.compile(".*"); - static final Pattern MATCH_NOTHING = Pattern.compile(""); // FIXME check regex + static final Pattern MATCH_NOTHING = Pattern.compile("$^"); + static final List DURATION_SECONDS_BUCKETS = + unmodifiableList( + asList(1D, 2D, 4D, 8D, 16D, 32D, 64D, 128D, 256D, 512D, 1024D, 2048D, 4096D, 8192D)); protected static final Logger LOGGER = Logger.getLogger(MonitoringRunListener.class.getName()); @@ -92,8 +100,10 @@ public class MonitoringRunListener extends OtelContextAwareAbstractRunListener i private LongCounter runSuccessCounter; private LongCounter runFailedCounter; private List runHandlers; - private Pattern runDurationHistogramAllowList; - private Pattern runDurationHistogramDenyList; + @VisibleForTesting + Pattern runDurationHistogramAllowList; + @VisibleForTesting + Pattern runDurationHistogramDenyList; @PostConstruct public void postConstruct() { @@ -116,9 +126,9 @@ public void postConstruct() { // METRICS activeRunGauge = new AtomicInteger(); - runDurationHistogram = meter.histogramBuilder("ci.pipeline.run.duration") - // TODO clarify histogram buckets. + runDurationHistogram = meter.histogramBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_DURATION) .setUnit("s") + .setExplicitBucketBoundariesAdvice(DURATION_SECONDS_BUCKETS) .build(); runDurationHistogramAllowList = MATCH_ANYTHING; // allow all runDurationHistogramDenyList = MATCH_NOTHING; // deny nothing @@ -162,14 +172,26 @@ public void postConstruct() { @Override public void afterConfiguration(ConfigProperties configProperties) { - this.runDurationHistogramAllowList = Optional - .ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST)) - .map(Pattern::compile) - .orElse(MATCH_ANYTHING); - this.runDurationHistogramDenyList = Optional - .ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST)) - .map(Pattern::compile) - .orElse(MATCH_NOTHING); + try { + this.runDurationHistogramAllowList = Optional + .ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST)) + .map(Pattern::compile) + .orElse(MATCH_NOTHING); + } catch (PatternSyntaxException e) { + this.runDurationHistogramAllowList = MATCH_NOTHING; + throw new IllegalArgumentException("Invalid regex for '" + + JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST + "'", e); + } + try { + this.runDurationHistogramDenyList = Optional + .ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST)) + .map(Pattern::compile) + .orElse(MATCH_NOTHING); + } catch (PatternSyntaxException e) { + this.runDurationHistogramDenyList = MATCH_NOTHING; + throw new IllegalArgumentException("Invalid regex for '" + + JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST + "'", e); + } } @NonNull @@ -425,18 +447,17 @@ public void _onFinalized(@NonNull Run run) { this.runAbortedCounter.add(1); } + String jobFullName = run.getParent().getFullName(); String pipelineId = - runDurationHistogramAllowList.matcher(run.getParent().getFullName()).matches() + runDurationHistogramAllowList.matcher(jobFullName).matches() && - !runDurationHistogramDenyList.matcher(run.getParent().getFullName()).matches() ? - run.getParent().getFullName() : "#other#"; + !runDurationHistogramDenyList.matcher(jobFullName).matches() ? + jobFullName : "#other#"; runDurationHistogram.record( TimeUnit.SECONDS.convert(run.getDuration(), TimeUnit.MILLISECONDS), Attributes.of( JenkinsOtelSemanticAttributes.CI_PIPELINE_ID, pipelineId, - JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_RESULT, result.toString(), - // TODO do we need this `completed` dimension that we captured on spans? Can't this inferred from the `result` attribute ? - JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_COMPLETED, result.isCompleteBuild()) + JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_RESULT, result.toString()) ); } finally { activeRunGauge.decrementAndGet(); diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/opentelemetry/OtelContextAwareAbstractRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/opentelemetry/OtelContextAwareAbstractRunListener.java index 1e099d7a3..886654965 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/opentelemetry/OtelContextAwareAbstractRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/opentelemetry/OtelContextAwareAbstractRunListener.java @@ -137,7 +137,7 @@ public Meter getMeter() { return meter; } - public ConfigProperties getConfigProperties() { + protected ConfigProperties getConfigProperties() { return configProperties; } } diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsOtelSemanticAttributes.java b/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsOtelSemanticAttributes.java index d9ce95e68..71f2ca239 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsOtelSemanticAttributes.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsOtelSemanticAttributes.java @@ -8,6 +8,7 @@ import hudson.PluginWrapper; import hudson.model.Computer; import hudson.model.Job; +import hudson.model.Run; import io.jenkins.plugins.opentelemetry.api.semconv.JenkinsAttributes; import io.opentelemetry.api.common.AttributeKey; import jenkins.model.Jenkins; @@ -46,6 +47,9 @@ public final class JenkinsOtelSemanticAttributes extends JenkinsAttributes { public static final AttributeKey> CI_PIPELINE_RUN_PARAMETER_IS_SENSITIVE = AttributeKey.booleanArrayKey("ci.pipeline.parameter.sensitive"); public static final AttributeKey> CI_PIPELINE_RUN_PARAMETER_NAME = AttributeKey.stringArrayKey("ci.pipeline.parameter.name"); public static final AttributeKey> CI_PIPELINE_RUN_PARAMETER_VALUE = AttributeKey.stringArrayKey("ci.pipeline.parameter.value"); + /** + * @see Run#getResult() + */ public static final AttributeKey CI_PIPELINE_RUN_RESULT = AttributeKey.stringKey("ci.pipeline.run.result"); public static final AttributeKey CI_PIPELINE_RUN_URL = AttributeKey.stringKey("ci.pipeline.run.url"); public static final AttributeKey CI_PIPELINE_RUN_USER = AttributeKey.stringKey("ci.pipeline.run.user"); diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsSemanticMetrics.java b/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsSemanticMetrics.java index 1234cf00d..fb0bfc437 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsSemanticMetrics.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/semconv/JenkinsSemanticMetrics.java @@ -7,6 +7,7 @@ public class JenkinsSemanticMetrics { public static final String CI_PIPELINE_RUN_ACTIVE = "ci.pipeline.run.active"; + public static final String CI_PIPELINE_RUN_DURATION = "ci.pipeline.run.duration"; public static final String CI_PIPELINE_RUN_LAUNCHED = "ci.pipeline.run.launched"; public static final String CI_PIPELINE_RUN_STARTED = "ci.pipeline.run.started"; public static final String CI_PIPELINE_RUN_COMPLETED = "ci.pipeline.run.completed"; diff --git a/src/test/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListenerTest.java b/src/test/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListenerTest.java new file mode 100644 index 000000000..66f229ded --- /dev/null +++ b/src/test/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListenerTest.java @@ -0,0 +1,67 @@ +/* + * Copyright The Original Author or Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.jenkins.plugins.opentelemetry.job; + +import io.opentelemetry.sdk.autoconfigure.spi.internal.DefaultConfigProperties; +import org.junit.Test; + +import java.util.Map; + +import static org.junit.Assert.*; + +public class MonitoringRunListenerTest { + + @Test + public void test_default_allow_deny_list() { + MonitoringRunListener monitoringRunListener = new MonitoringRunListener(); + Map configProperties = Map.of(); + monitoringRunListener.afterConfiguration(DefaultConfigProperties.createFromMap(configProperties)); + String jobFullName = "my-team/my-war/main"; + assertTrue(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); + assertFalse(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); + + } + + @Test + public void test_deny_list_matching() { + MonitoringRunListener monitoringRunListener = new MonitoringRunListener(); + Map configProperties = Map.of("otel.instrumentation.jenkins.run.metric.duration.deny_list", "my-team/.*"); + monitoringRunListener.afterConfiguration(DefaultConfigProperties.createFromMap(configProperties)); + String jobFullName = "my-team/my-war/main"; + assertTrue(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); + assertTrue(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); + } + @Test + public void test_deny_list_not_matching() { + MonitoringRunListener monitoringRunListener = new MonitoringRunListener(); + Map configProperties = Map.of("otel.instrumentation.jenkins.run.metric.duration.deny_list", "my-team/.*"); + monitoringRunListener.afterConfiguration(DefaultConfigProperties.createFromMap(configProperties)); + String jobFullName = "another-team/my-war/main"; + assertTrue(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); + assertFalse(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); + } + + + @Test + public void test_allow_list_matching() { + MonitoringRunListener monitoringRunListener = new MonitoringRunListener(); + Map configProperties = Map.of("otel.instrumentation.jenkins.run.metric.duration.allow_list", "my-team/.*"); + monitoringRunListener.afterConfiguration(DefaultConfigProperties.createFromMap(configProperties)); + String jobFullName = "my-team/my-war/main"; + assertTrue(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); + assertFalse(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); + } + @Test + public void test_allow_list_not_matching() { + MonitoringRunListener monitoringRunListener = new MonitoringRunListener(); + Map configProperties = Map.of("otel.instrumentation.jenkins.run.metric.duration.allow_list", "my-team/.*"); + monitoringRunListener.afterConfiguration(DefaultConfigProperties.createFromMap(configProperties)); + String jobFullName = "another-team/my-war/main"; + assertFalse(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); + assertFalse(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); + } + +} \ No newline at end of file From 3580d84d953fccc016fe28ee0309c56657836c4a Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Fri, 25 Oct 2024 22:19:55 +0530 Subject: [PATCH 06/11] Add ci.pipeline.run.duration metric Signed-off-by: Cyrille Le Clerc --- .../opentelemetry/job/MonitoringRunListener.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java index 91155b961..3c39a087c 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java @@ -172,26 +172,28 @@ public void postConstruct() { @Override public void afterConfiguration(ConfigProperties configProperties) { + Pattern newRunDurationHistogramAllowList; + Pattern newRunDurationHistogramDenyList; try { - this.runDurationHistogramAllowList = Optional + newRunDurationHistogramAllowList = Optional .ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST)) .map(Pattern::compile) .orElse(MATCH_NOTHING); } catch (PatternSyntaxException e) { - this.runDurationHistogramAllowList = MATCH_NOTHING; throw new IllegalArgumentException("Invalid regex for '" + JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST + "'", e); } try { - this.runDurationHistogramDenyList = Optional + newRunDurationHistogramDenyList = Optional .ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST)) .map(Pattern::compile) .orElse(MATCH_NOTHING); } catch (PatternSyntaxException e) { - this.runDurationHistogramDenyList = MATCH_NOTHING; throw new IllegalArgumentException("Invalid regex for '" + JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST + "'", e); } + this.runDurationHistogramAllowList = newRunDurationHistogramAllowList; + this.runDurationHistogramDenyList = newRunDurationHistogramDenyList; } @NonNull From 247c6e0bcdbd0739d698fbddd722b53ea9ee087e Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Sun, 27 Oct 2024 15:56:14 +0530 Subject: [PATCH 07/11] Add ci.pipeline.run.duration metric Signed-off-by: Cyrille Le Clerc --- docs/monitoring-metrics.md | 4 ++-- .../job/MonitoringRunListenerTest.java | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/monitoring-metrics.md b/docs/monitoring-metrics.md index 19128f962..13e1e1490 100644 --- a/docs/monitoring-metrics.md +++ b/docs/monitoring-metrics.md @@ -35,8 +35,8 @@ or APIs ([here](https://www.elastic.co/guide/en/kibana/current/dashboard-import- Example: `my-team/my-app/main`. See `hudson.model.AbstractItem#getFullName()`. * `ci.pipeline.result`: `SUCCESS`, `UNSTABLE`, `FAILUIRE`, `NOT_BUILT`, `ABORTED`. See `hudson.model.Run#getResult()`. * Configuration parameters to control the cardinality of the `ci.pipeline.id` attribute: - * `otel.instrumentation.jenkins.run.metric.duration.allow_list`: Java regex. Example `jenkins_folder_a/.*|jenkins_folder_b/.*` - * `otel.instrumentation.jenkins.run.metric.duration.deny_list`: Java regex. Example `.*test.*` + * `otel.instrumentation.jenkins.run.metric.duration.allow_list`: Java regex, default value: `$^` (ie match nothing). Example `jenkins_folder_a/.*|jenkins_folder_b/.*` + * `otel.instrumentation.jenkins.run.metric.duration.deny_list`: Java regex, default value: `$^` (ie match nothing). Example `.*test.*` ## Jenkins Build & Health Metrics diff --git a/src/test/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListenerTest.java b/src/test/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListenerTest.java index 66f229ded..7e3b3ab08 100644 --- a/src/test/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListenerTest.java +++ b/src/test/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListenerTest.java @@ -20,7 +20,7 @@ public void test_default_allow_deny_list() { Map configProperties = Map.of(); monitoringRunListener.afterConfiguration(DefaultConfigProperties.createFromMap(configProperties)); String jobFullName = "my-team/my-war/main"; - assertTrue(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); + assertFalse(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); assertFalse(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); } @@ -28,18 +28,21 @@ public void test_default_allow_deny_list() { @Test public void test_deny_list_matching() { MonitoringRunListener monitoringRunListener = new MonitoringRunListener(); - Map configProperties = Map.of("otel.instrumentation.jenkins.run.metric.duration.deny_list", "my-team/.*"); + Map configProperties = Map.of("otel.instrumentation.jenkins.run.metric.duration.allow_list", "my-team/.*", + "otel.instrumentation.jenkins.run.metric.duration.deny_list", ".*test.*"); monitoringRunListener.afterConfiguration(DefaultConfigProperties.createFromMap(configProperties)); - String jobFullName = "my-team/my-war/main"; + String jobFullName = "my-team/my-war/test-123"; assertTrue(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); assertTrue(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); } + @Test public void test_deny_list_not_matching() { MonitoringRunListener monitoringRunListener = new MonitoringRunListener(); - Map configProperties = Map.of("otel.instrumentation.jenkins.run.metric.duration.deny_list", "my-team/.*"); + Map configProperties = Map.of("otel.instrumentation.jenkins.run.metric.duration.allow_list", "my-team/.*", + "otel.instrumentation.jenkins.run.metric.duration.deny_list", ".*test.*"); monitoringRunListener.afterConfiguration(DefaultConfigProperties.createFromMap(configProperties)); - String jobFullName = "another-team/my-war/main"; + String jobFullName = "my-team/my-war/main"; assertTrue(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); assertFalse(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); } @@ -54,6 +57,7 @@ public void test_allow_list_matching() { assertTrue(monitoringRunListener.runDurationHistogramAllowList.matcher(jobFullName).matches()); assertFalse(monitoringRunListener.runDurationHistogramDenyList.matcher(jobFullName).matches()); } + @Test public void test_allow_list_not_matching() { MonitoringRunListener monitoringRunListener = new MonitoringRunListener(); From f0786d62f63d8a7002ba64345a60e97bcf57379c Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Sun, 27 Oct 2024 22:25:18 +0530 Subject: [PATCH 08/11] Add ci.pipeline.run.duration metric Signed-off-by: Cyrille Le Clerc --- docs/monitoring-metrics.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/monitoring-metrics.md b/docs/monitoring-metrics.md index 13e1e1490..9713a3056 100644 --- a/docs/monitoring-metrics.md +++ b/docs/monitoring-metrics.md @@ -26,6 +26,11 @@ or APIs ([here](https://www.elastic.co/guide/en/kibana/current/dashboard-import- ## Build Duration +:information: In order to control metrics cardinality, the `ci.pipeline.run.duration` metrics are enabled by default +aggregating the durations of all the jobs/pipelines under the umbrella `ci.pipeline.id=#other#`. +To enable per job/pipeline metrics, use the allow and deny list setting the configuration parameters +`otel.instrumentation.jenkins.run.metric.duration.allow_list` and `otel.instrumentation.jenkins.run.metric.duration.deny_list`. + * Name: `ci.pipeline.run.duration` * Type: Histogram with buckets: `1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192` (buckets subject to change) * Unit: `s` From 88f0a152f0aa72749a62800f69900f8a7e614d17 Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Sun, 27 Oct 2024 22:27:36 +0530 Subject: [PATCH 09/11] Update monitoring-metrics.md --- docs/monitoring-metrics.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/monitoring-metrics.md b/docs/monitoring-metrics.md index 9713a3056..14728a52e 100644 --- a/docs/monitoring-metrics.md +++ b/docs/monitoring-metrics.md @@ -26,10 +26,10 @@ or APIs ([here](https://www.elastic.co/guide/en/kibana/current/dashboard-import- ## Build Duration -:information: In order to control metrics cardinality, the `ci.pipeline.run.duration` metrics are enabled by default +**⚠️ In order to control metrics cardinality, the `ci.pipeline.run.duration` metrics are enabled by default aggregating the durations of all the jobs/pipelines under the umbrella `ci.pipeline.id=#other#`. To enable per job/pipeline metrics, use the allow and deny list setting the configuration parameters -`otel.instrumentation.jenkins.run.metric.duration.allow_list` and `otel.instrumentation.jenkins.run.metric.duration.deny_list`. +`otel.instrumentation.jenkins.run.metric.duration.allow_list` and `otel.instrumentation.jenkins.run.metric.duration.deny_list`.** * Name: `ci.pipeline.run.duration` * Type: Histogram with buckets: `1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192` (buckets subject to change) From b8062ba915c740b719aea9d9ed48eb328a2b58f9 Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Mon, 28 Oct 2024 11:20:15 +0530 Subject: [PATCH 10/11] Update docs/monitoring-metrics.md Co-authored-by: Christophe Kamphaus <44020965+christophe-kamphaus-jemmic@users.noreply.github.com> --- docs/monitoring-metrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/monitoring-metrics.md b/docs/monitoring-metrics.md index 14728a52e..5985bbbef 100644 --- a/docs/monitoring-metrics.md +++ b/docs/monitoring-metrics.md @@ -28,7 +28,7 @@ or APIs ([here](https://www.elastic.co/guide/en/kibana/current/dashboard-import- **⚠️ In order to control metrics cardinality, the `ci.pipeline.run.duration` metrics are enabled by default aggregating the durations of all the jobs/pipelines under the umbrella `ci.pipeline.id=#other#`. -To enable per job/pipeline metrics, use the allow and deny list setting the configuration parameters +To enable per job/pipeline metrics, use the allow and deny list setting the configuration parameters `otel.instrumentation.jenkins.run.metric.duration.allow_list` and `otel.instrumentation.jenkins.run.metric.duration.deny_list`.** * Name: `ci.pipeline.run.duration` From e4f5bb99c57b74f7ca9eebd70b1e31810889d191 Mon Sep 17 00:00:00 2001 From: Cyrille Le Clerc Date: Mon, 28 Oct 2024 11:19:48 +0530 Subject: [PATCH 11/11] Add ci.pipeline.run.duration metric Signed-off-by: Cyrille Le Clerc --- .../plugins/opentelemetry/job/MonitoringRunListener.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java index 3c39a087c..6647a9447 100644 --- a/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java +++ b/src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java @@ -84,6 +84,8 @@ public class MonitoringRunListener extends OtelContextAwareAbstractRunListener i static final Pattern MATCH_ANYTHING = Pattern.compile(".*"); static final Pattern MATCH_NOTHING = Pattern.compile("$^"); + // TODO support configurability of these histogram buckets. Note that the conversion from a string to a list of + // doubles will require boilerplate so we are interested in getting user feedback before implementing this. static final List DURATION_SECONDS_BUCKETS = unmodifiableList( asList(1D, 2D, 4D, 8D, 16D, 32D, 64D, 128D, 256D, 512D, 1024D, 2048D, 4096D, 8192D));
Attribute value Description
/td> + `s`Duration of runs
ci.pipeline.run.active1`{jobs}`Gauge of active jobs
ci.pipeline.run.active`{jobs}` Gauge of active jobs
ci.pipeline.run.launched1`{jobs}` Job launched
ci.pipeline.run.started1`{jobs}` Job started
ci.pipeline.run.completed1`{jobs}` Job completed
ci.pipeline.run.aborted1`{jobs}` Job aborted
ci.pipeline.run.success1`{jobs}` Job successful
ci.pipeline.run.failed1`{jobs}` Job failed
jenkins.executor.available1`${executors}` label
jenkins.executor.busy1`${executors}` label
jenkins.executor.idle1`${executors}` label
jenkins.executor.online1`${executors}` label
jenkins.executor.connecting1`${executors}` label
jenkins.executor.defined1`${executors}` label
jenkins.executor.queue1`${items}` label
jenkins.queue.waiting1`${items}` Number of tasks in the queue with the status 'buildable' or 'pending' (see `Queue#getUnblockedItems()`)
jenkins.queue.blocked1`${items}` Number of blocked tasks in the queue. Note that waiting for an executor to be available is not a reason to be counted as blocked. (see `QueueListener#onEnterBlocked() - QueueListener#onLeaveBlocked()`)
jenkins.queue.buildable1`${items}` Number of tasks in the queue with the status 'buildable' or 'pending' (see `Queue#getBuildableItems()`])
jenkins.queue.left1`${items}` Total count of tasks that have been processed (see [`QueueListener#onLeft`]()-
jenkins.agents.total1`{agents}` Number of agents
jenkins.agents.online1`{agents}` Number of online agents
jenkins.agents.offline1`{agents}` Number of offline agents
jenkins.agents.launch.failure1`{agents}` Number of failed launched agents
jenkins.cloud.agents.completed1`{agents}` Number of provisioned cloud agents
jenkins.cloud.agents.launch.failure1`{agents}` Number of failed cloud agents
github.api.rate_limit.remaining_requests1`{requests}` Always reported: github.api.url, github.authentication
For user based authentication:, enduser.id
@@ -261,28 +289,28 @@ Inventory of health metrics collected by the Jenkins OpenTelemetry integration:
jenkins.scm.event.pool_size1`{events}` Thread pool size of the SCM Event queue processor
jenkins.scm.event.active_threads1`{threads}` Number of active threads of the SCM events thread pool
jenkins.scm.event.queued_tasks1`{tasks}` Number of events in the SCM event queue
jenkins.scm.event.completed_tasks1`{tasks}` Number of processed SCM events
process.runtime.jvm.buffer.count The number of buffers in the pool gaugegauge pool direct, mapped, mapped - 'non-volatile memory'