Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ci.pipeline.run.duration metric #959

Merged
merged 15 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import io.jenkins.plugins.opentelemetry.queue.RemoteSpanAction;
import io.jenkins.plugins.opentelemetry.semconv.JenkinsOtelSemanticAttributes;
import io.jenkins.plugins.opentelemetry.semconv.JenkinsSemanticMetrics;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.DoubleHistogram;
import io.opentelemetry.api.metrics.LongCounter;
import io.opentelemetry.api.metrics.Meter;
import io.opentelemetry.api.trace.Span;
Expand Down Expand Up @@ -58,10 +60,12 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static com.google.common.base.Verify.verifyNotNull;
Expand All @@ -72,17 +76,24 @@
@Extension(dynamicLoadable = YesNoMaybe.YES, optional = true)
public class MonitoringRunListener extends OtelContextAwareAbstractRunListener implements OpenTelemetryLifecycleListener {

static final Pattern MATCH_ANYTHING = Pattern.compile(".*");
static final Pattern MATCH_NOTHING = Pattern.compile(""); // FIXME check regex

Check warning on line 80 in src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java

View check run for this annotation

ci.jenkins.io / Open Tasks Scanner

FIXME

HIGH: check regex


protected static final Logger LOGGER = Logger.getLogger(MonitoringRunListener.class.getName());

private AtomicInteger activeRunGauge;
private List<CauseHandler> causeHandlers;
private DoubleHistogram runDurationHistogram;
private LongCounter runLaunchedCounter;
private LongCounter runStartedCounter;
private LongCounter runCompletedCounter;
private LongCounter runAbortedCounter;
private LongCounter runSuccessCounter;
private LongCounter runFailedCounter;
private List<RunHandler> runHandlers;
private Pattern runDurationHistogramAllowList;
private Pattern runDurationHistogramDenyList;

@PostConstruct
public void postConstruct() {
Expand All @@ -104,42 +115,60 @@

// METRICS
activeRunGauge = new AtomicInteger();

runDurationHistogram = meter.histogramBuilder("ci.pipeline.run.duration")
cyrille-leclerc marked this conversation as resolved.
Show resolved Hide resolved
.setUnit("s")
.build();
runDurationHistogramAllowList = MATCH_ANYTHING; // allow all
runDurationHistogramDenyList = MATCH_NOTHING; // deny nothing

meter.gaugeBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_ACTIVE)
.ofLongs()
.setDescription("Gauge of active jobs")
.setUnit("{jobs}")
.buildWithCallback(valueObserver -> valueObserver.record(this.activeRunGauge.get()));
runLaunchedCounter =
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_LAUNCHED)
.setDescription("Job launched")
.setUnit("{jobs}")
.build();
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_LAUNCHED)
.setDescription("Job launched")
.setUnit("{jobs}")
.build();
runStartedCounter =
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_STARTED)
.setDescription("Job started")
.setUnit("{jobs}")
.build();
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_STARTED)
.setDescription("Job started")
.setUnit("{jobs}")
.build();
runSuccessCounter =
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_SUCCESS)
.setDescription("Job succeed")
.setUnit("{jobs}")
.build();
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_SUCCESS)
.setDescription("Job succeed")
.setUnit("{jobs}")
.build();
runFailedCounter =
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_FAILED)
.setDescription("Job failed")
.setUnit("{jobs}")
.build();
runAbortedCounter =
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_ABORTED)
.setDescription("Job aborted")
.setUnit("{jobs}")
.build();
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_ABORTED)
.setDescription("Job aborted")
.setUnit("{jobs}")
.build();
runCompletedCounter =
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_COMPLETED)
.setDescription("Job completed")
.setUnit("{jobs}")
.build();
meter.counterBuilder(JenkinsSemanticMetrics.CI_PIPELINE_RUN_COMPLETED)
.setDescription("Job completed")
.setUnit("{jobs}")
.build();
}

@Override
public void afterConfiguration(ConfigProperties configProperties) {
this.runDurationHistogramAllowList = Optional
.ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST))
.map(Pattern::compile)
.orElse(MATCH_ANYTHING);
this.runDurationHistogramDenyList = Optional
.ofNullable(configProperties.getString(JenkinsOtelSemanticAttributes.OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST))
.map(Pattern::compile)
.orElse(MATCH_NOTHING);
}

@NonNull
Expand All @@ -149,11 +178,11 @@

@NonNull
public CauseHandler getCauseHandler(@NonNull Cause cause) throws NoSuchElementException {
return getCauseHandlers().stream().filter(ch -> ch.isSupported(cause)).findFirst().get();
return getCauseHandlers().stream().filter(ch -> ch.isSupported(cause)).findFirst().orElseThrow();
}

@Override
public void _onInitialize(@NonNull Run run) {
public void _onInitialize(@NonNull Run<?, ?> run) {
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - onInitialize");

activeRunGauge.incrementAndGet();
Expand All @@ -165,30 +194,30 @@
rootSpanBuilder.setSpanKind(SpanKind.SERVER);
String runUrl = Objects.toString(Jenkins.get().getRootUrl(), "") + run.getUrl();

// TODO move this to a pluggable span enrichment API with implementations for different observability backends

Check warning on line 197 in src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java

View check run for this annotation

ci.jenkins.io / Open Tasks Scanner

TODO

NORMAL: move this to a pluggable span enrichment API with implementations for different observability backends
rootSpanBuilder
.setAttribute(JenkinsOtelSemanticAttributes.ELASTIC_TRANSACTION_TYPE, "job");
.setAttribute(JenkinsOtelSemanticAttributes.ELASTIC_TRANSACTION_TYPE, "job");

rootSpanBuilder
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_ID, run.getParent().getFullName())
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_NAME, run.getParent().getFullDisplayName())
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_URL, runUrl)
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_NUMBER, (long) run.getNumber())
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_TYPE, OtelUtils.getProjectType(run));
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_ID, run.getParent().getFullName())
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_NAME, run.getParent().getFullDisplayName())
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_URL, runUrl)
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_NUMBER, (long) run.getNumber())
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_TYPE, OtelUtils.getProjectType(run));

// CULPRITS
Set<User> culpritIds;
if (run instanceof WorkflowRun) {
culpritIds = ((WorkflowRun) run).getCulprits();
} else if (run instanceof AbstractBuild) {
culpritIds = ((AbstractBuild) run).getCulprits();
culpritIds = ((AbstractBuild<?, ?>) run).getCulprits();
} else {
culpritIds = null;
}
if (culpritIds != null) {
rootSpanBuilder
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_COMMITTERS,
culpritIds.stream().map(p -> p.getId()).collect(Collectors.toList()));
culpritIds.stream().map(User::getId).collect(Collectors.toList()));
}

// PARAMETERS
Expand Down Expand Up @@ -232,14 +261,14 @@
});

// CAUSES
List<String> causesDescriptions = ((List<Cause>) run.getCauses()).stream().map(c -> getCauseHandler(c).getStructuredDescription(c)).collect(Collectors.toList());
List<String> causesDescriptions = run.getCauses().stream().map(c -> getCauseHandler(c).getStructuredDescription(c)).collect(Collectors.toList());
rootSpanBuilder.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_CAUSE, causesDescriptions);

Optional optCause = run.getCauses().stream().findFirst();
Optional<Cause> optCause = run.getCauses().stream().findFirst();
optCause.ifPresent(cause -> {
if (cause instanceof Cause.UpstreamCause) {
Cause.UpstreamCause upstreamCause = (Cause.UpstreamCause) cause;
Run upstreamRun = upstreamCause.getUpstreamRun();
Run<?, ?> upstreamRun = upstreamCause.getUpstreamRun();
if (upstreamRun == null) {
// hudson.model.Cause.UpstreamCause.getUpstreamRun() can return null, probably if upstream job or build has been deleted.
} else {
Expand Down Expand Up @@ -287,8 +316,8 @@

// START initialize span
Span startSpan = getTracer().spanBuilder(JenkinsOtelSemanticAttributes.JENKINS_JOB_SPAN_PHASE_START_NAME)
.setParent(Context.current().with(rootSpan))
.startSpan();
.setParent(Context.current().with(rootSpan))
.startSpan();
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - begin " + OtelUtils.toDebugString(startSpan));

this.getTraceService().putRunPhaseSpan(run, startSpan);
Expand All @@ -299,7 +328,7 @@
}

@Override
public void _onStarted(@NonNull Run run, @NonNull TaskListener listener) {
public void _onStarted(@NonNull Run<?, ?> run, @NonNull TaskListener listener) {
try (Scope parentScope = endPipelinePhaseSpan(run)) {
Span runSpan = getTracer().spanBuilder(JenkinsOtelSemanticAttributes.JENKINS_JOB_SPAN_PHASE_RUN_NAME).setParent(Context.current()).startSpan();
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - begin " + OtelUtils.toDebugString(runSpan));
Expand All @@ -311,7 +340,7 @@
}

@Override
public void _onCompleted(@NonNull Run run, @NonNull TaskListener listener) {
public void _onCompleted(@NonNull Run<?, ?> run, @NonNull TaskListener listener) {
try (Scope parentScope = endPipelinePhaseSpan(run)) {
Span finalizeSpan = getTracer().spanBuilder(JenkinsOtelSemanticAttributes.JENKINS_JOB_SPAN_PHASE_FINALIZE_NAME).setParent(Context.current()).startSpan();
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - begin " + OtelUtils.toDebugString(finalizeSpan));
Expand All @@ -323,7 +352,7 @@

@MustBeClosed
@NonNull
protected Scope endPipelinePhaseSpan(@NonNull Run run) {
protected Scope endPipelinePhaseSpan(@NonNull Run<?, ?> run) {
Span pipelinePhaseSpan = verifyNotNull(Span.current(), "No pipelinePhaseSpan found in context");
pipelinePhaseSpan.end();
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - end " + OtelUtils.toDebugString(pipelinePhaseSpan));
Expand All @@ -334,7 +363,7 @@
}

@Override
public void _onFinalized(@NonNull Run run) {
public void _onFinalized(@NonNull Run<?, ?> run) {

try (Scope parentScope = endPipelinePhaseSpan(run)) {
Span parentSpan = Span.current();
Expand All @@ -357,7 +386,7 @@

if (Result.SUCCESS.equals(runResult)) {
parentSpan.setStatus(StatusCode.OK, runResult.toString());
} else if (Result.FAILURE.equals(runResult) || Result.UNSTABLE.equals(runResult)){
} else if (Result.FAILURE.equals(runResult) || Result.UNSTABLE.equals(runResult)) {
parentSpan.setAttribute(ExceptionAttributes.EXCEPTION_TYPE, "PIPELINE_" + runResult);
parentSpan.setAttribute(ExceptionAttributes.EXCEPTION_MESSAGE, "PIPELINE_" + runResult);
parentSpan.setStatus(StatusCode.ERROR, runResult.toString());
Expand All @@ -367,7 +396,7 @@
}
// NODE
if (run instanceof AbstractBuild) {
Node node = ((AbstractBuild) run).getBuiltOn();
Node node = ((AbstractBuild<?, ?>) run).getBuiltOn();
if (node != null) {
parentSpan.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_AGENT_LABEL, node.getLabelString());
parentSpan.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_AGENT_ID, node.getNodeName());
Expand All @@ -381,7 +410,6 @@

this.getTraceService().purgeRun(run);


Result result = verifyNotNull(run.getResult(), "%s", run);

if (result.isCompleteBuild()) {
Expand All @@ -395,6 +423,19 @@
} else {
this.runAbortedCounter.add(1);
}

String pipelineId =
runDurationHistogramAllowList.matcher(run.getParent().getFullName()).matches()

Check warning on line 428 in src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 428 is only partially covered, one branch is missing
&&
!runDurationHistogramDenyList.matcher(run.getParent().getFullName()).matches() ?

Check warning on line 430 in src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringRunListener.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 430 is only partially covered, one branch is missing
run.getParent().getFullName() : "#other#";
runDurationHistogram.record(
TimeUnit.SECONDS.convert(run.getDuration(), TimeUnit.MILLISECONDS),
Attributes.of(
JenkinsOtelSemanticAttributes.CI_PIPELINE_ID, pipelineId,
JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_RESULT, result.toString(),
JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_COMPLETED, result.isCompleteBuild())
cyrille-leclerc marked this conversation as resolved.
Show resolved Hide resolved
);
} finally {
activeRunGauge.decrementAndGet();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
* {@link RunListener} that setups the OpenTelemetry {@link io.opentelemetry.context.Context}
* with the current {@link Span}.
*/
public abstract class OtelContextAwareAbstractRunListener extends RunListener<Run> {
public abstract class OtelContextAwareAbstractRunListener extends RunListener<Run<?, ?>> {

private final static Logger LOGGER = Logger.getLogger(OtelContextAwareAbstractRunListener.class.getName());

Expand All @@ -56,45 +56,45 @@ public final void setOpenTelemetry(@NonNull ReconfigurableOpenTelemetry jenkinsC
}

@Override
public final void onCompleted(@NonNull Run run, @NonNull TaskListener listener) {
public final void onCompleted(@NonNull Run<?, ?> run, @NonNull TaskListener listener) {
Span span = getTraceService().getSpan(run);
try (Scope scope = span.makeCurrent()) {
this._onCompleted(run, listener);
}
}

public void _onCompleted(@NonNull Run run, @NonNull TaskListener listener) {
public void _onCompleted(@NonNull Run<?, ?> run, @NonNull TaskListener listener) {
}

@Override
public final void onFinalized(@NonNull Run run) {
public final void onFinalized(@NonNull Run<?, ?> run) {
Span span = getTraceService().getSpan(run);
try (Scope scope = span.makeCurrent()) {
this._onFinalized(run);
}
}


public void _onFinalized(Run run) {
public void _onFinalized(Run<?, ?> run) {
}

@Override
public final void onInitialize(@NonNull Run run) {
public final void onInitialize(@NonNull Run<?, ?> run) {
this._onInitialize(run);
}

public void _onInitialize(@NonNull Run run) {
public void _onInitialize(@NonNull Run<?, ?> run) {
}

@Override
public final void onStarted(@NonNull Run run, @NonNull TaskListener listener) {
public final void onStarted(@NonNull Run<?, ?> run, @NonNull TaskListener listener) {
Span span = getTraceService().getSpan(run);
try (Scope scope = span.makeCurrent()) {
this._onStarted(run, listener);
}
}

public void _onStarted(@NonNull Run run, @NonNull TaskListener listener) {
public void _onStarted(@NonNull Run<?, ?> run, @NonNull TaskListener listener) {
}

@Override
Expand All @@ -112,14 +112,14 @@ public Environment _setUpEnvironment(@NonNull AbstractBuild build, @NonNull Laun
}

@Override
public final void onDeleted(@NonNull Run run) {
public final void onDeleted(@NonNull Run<?, ?> run) {
Span span = getTraceService().getSpan(run);
try (Scope ignored = span.makeCurrent()) {
this._onDeleted(run);
}
}

public void _onDeleted(@NonNull Run run) {
public void _onDeleted(@NonNull Run<?, ?> run) {
}

@NonNull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import hudson.PluginWrapper;
import hudson.model.Computer;
import hudson.model.Job;
import io.jenkins.plugins.opentelemetry.api.semconv.JenkinsAttributes;
import io.opentelemetry.api.common.AttributeKey;
import jenkins.model.Jenkins;
Expand All @@ -21,6 +22,9 @@
public final class JenkinsOtelSemanticAttributes extends JenkinsAttributes {
public static final AttributeKey<String> CI_PIPELINE_TYPE = AttributeKey.stringKey("ci.pipeline.type");
public static final AttributeKey<String> CI_PIPELINE_MULTIBRANCH_TYPE = AttributeKey.stringKey("ci.pipeline.multibranch.type");
/**
* @see Job#getFullName()
*/
public static final AttributeKey<String> CI_PIPELINE_ID = AttributeKey.stringKey("ci.pipeline.id");
public static final AttributeKey<String> CI_PIPELINE_NAME = AttributeKey.stringKey("ci.pipeline.name");
public static final AttributeKey<String> CI_PIPELINE_TEMPLATE_ID = AttributeKey.stringKey("ci.pipeline.template.id");
Expand Down Expand Up @@ -135,6 +139,8 @@ public final class JenkinsOtelSemanticAttributes extends JenkinsAttributes {

public static final String OTEL_INSTRUMENTATION_JENKINS_WEB_ENABLED = "otel.instrumentation.jenkins.web.enabled";
public static final String OTEL_INSTRUMENTATION_JENKINS_REMOTE_SPAN_ENABLED = "otel.instrumentation.jenkins.remote.span.enabled";
public static final String OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_ALLOW_LIST = "otel.instrumentation.jenkins.run.metric.duration.allow_list";
public static final String OTEL_INSTRUMENTATION_JENKINS_RUN_DURATION_DENY_LIST = "otel.instrumentation.jenkins.run.metric.duration.deny_list";
/**
* Instrument Jenkins Remoting from the Jenkins controller to Jenkins build agents
*/
Expand Down
Loading