diff --git a/pom.xml b/pom.xml index 2a25db396c..2ba659ea2d 100644 --- a/pom.xml +++ b/pom.xml @@ -126,6 +126,11 @@ workflow-cps true + + org.jenkins-ci.plugins + metrics + 4.0.2.6 + diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesCloud.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesCloud.java index 86d3a75ded..71e0039dc6 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesCloud.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesCloud.java @@ -29,6 +29,7 @@ import hudson.model.ItemGroup; import hudson.model.Node; import hudson.util.XStream2; +import jenkins.metrics.api.Metrics; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; import org.csanchez.jenkins.plugins.kubernetes.pipeline.PodTemplateMap; @@ -72,6 +73,8 @@ import jenkins.model.JenkinsLocationConfiguration; import jenkins.authentication.tokens.api.AuthenticationTokens; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.csanchez.jenkins.plugins.kubernetes.MetricNames.metricNameForLabel; + import jenkins.websocket.WebSockets; /** @@ -529,6 +532,7 @@ public KubernetesClient connect() throws KubernetesAuthException, IOException { @Override public synchronized Collection provision(@NonNull final Cloud.CloudState state, final int excessWorkload) { try { + Metrics.metricRegistry().meter(metricNameForLabel(state.getLabel())).mark(excessWorkload); Label label = state.getLabel(); int plannedCapacity = state.getAdditionalPlannedCapacity(); // Planned nodes, will be launched on the next round of NodeProvisioner Set allInProvisioning = InProvisioning.getAllInProvisioning(label); // Nodes being launched @@ -562,15 +566,23 @@ public synchronized Collection provision(@NonNull f if (!plannedNodes.isEmpty()) { // Return early when a matching template was found and nodes were planned LOGGER.log(Level.FINEST, "Planned {0} Kubernetes agents with template \"{1}\"", new Object[]{plannedNodes.size(), podTemplate.getName()}); + Metrics.metricRegistry().counter(MetricNames.PROVISION_NODES).inc(plannedNodes.size()); + if (plannedNodes.size() == provisioningLimit && plannedNodes.size() < toBeProvisioned) { + Metrics.metricRegistry().counter(MetricNames.REACHED_POD_CAP).inc(); + } + return plannedNodes; } } } else { LOGGER.log(Level.INFO, "No slot left for provisioning (global limit)"); + Metrics.metricRegistry().counter(MetricNames.REACHED_GLOBAL_CAP).inc(); } } + Metrics.metricRegistry().counter(MetricNames.PROVISION_NODES).inc(plannedNodes.size()); return plannedNodes; } catch (KubernetesClientException e) { + Metrics.metricRegistry().counter(MetricNames.PROVISION_FAILED).inc(); Throwable cause = e.getCause(); if (cause instanceof SocketTimeoutException || cause instanceof ConnectException || cause instanceof UnknownHostException) { LOGGER.log(Level.WARNING, "Failed to connect to Kubernetes at {0}: {1}", diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java index 13c17ace5e..0c327d764b 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java @@ -39,6 +39,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.fabric8.kubernetes.client.KubernetesClientException; +import jenkins.metrics.api.Metrics; import org.apache.commons.lang.StringUtils; import org.kohsuke.stapler.DataBoundConstructor; @@ -132,6 +133,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { try { pod = client.pods().inNamespace(namespace).create(pod); } catch (KubernetesClientException e) { + Metrics.metricRegistry().counter(MetricNames.CREATION_FAILED).inc(); int httpCode = e.getCode(); if (400 <= httpCode && httpCode < 500) { // 4xx runListener.getLogger().printf("ERROR: Unable to create pod %s/%s.%n%s%n", namespace, pod.getMetadata().getName(), e.getMessage()); @@ -145,6 +147,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { } LOGGER.log(INFO, "Created Pod: {0}/{1}", new Object[] { namespace, podName }); listener.getLogger().printf("Created Pod: %s/%s%n", namespace, podName); + Metrics.metricRegistry().counter(MetricNames.PODS_CREATED).inc(); runListener.getLogger().printf("Created Pod: %s/%s%n", namespace, podName); kubernetesComputer.setLaunching(true); @@ -173,6 +176,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { for (waitedForSlave = 0; waitedForSlave < waitForSlaveToConnect; waitedForSlave++) { slaveComputer = slave.getComputer(); if (slaveComputer == null) { + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); throw new IllegalStateException("Node was deleted, computer is null"); } if (slaveComputer.isOnline()) { @@ -182,10 +186,13 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { // Check that the pod hasn't failed already pod = client.pods().inNamespace(namespace).withName(podName).get(); if (pod == null) { + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); throw new IllegalStateException("Pod no longer exists: " + podName); } status = pod.getStatus().getPhase(); if (!validStates.contains(status)) { + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); + Metrics.metricRegistry().counter(MetricNames.metricNameForPodStatus(status)).inc(); break; } @@ -199,6 +206,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { new Object[] { podName, info.getState().getTerminated(), info.getName() }); listener.getLogger().printf("Container is terminated %1$s [%3$s]: %2$s%n", podName, info.getState().getTerminated(), info.getName()); + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); terminatedContainers.add(info); } } @@ -216,6 +224,9 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { Thread.sleep(1000); } if (slaveComputer == null || slaveComputer.isOffline()) { + Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc(); + Metrics.metricRegistry().counter(MetricNames.FAILED_TIMEOUT).inc(); + logLastLines(containerStatuses, podName, namespace, slave, null, client); throw new IllegalStateException( "Agent is not connected after " + waitedForSlave + " seconds, status: " + status); @@ -229,6 +240,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { } catch (IOException e) { LOGGER.log(Level.WARNING, "Could not save() agent: " + e.getMessage(), e); } + Metrics.metricRegistry().counter(MetricNames.PODS_LAUNCHED).inc(); } catch (Throwable ex) { setProblem(ex); LOGGER.log(Level.WARNING, String.format("Error in provisioning; agent=%s, template=%s", slave, template), ex); diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java index 7fa177201b..9956bc1465 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java @@ -20,13 +20,13 @@ import hudson.slaves.SlaveComputer; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.client.utils.Serialization; +import jenkins.metrics.api.Metrics; import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.Validate; import org.csanchez.jenkins.plugins.kubernetes.pod.retention.PodRetention; import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException; -import org.jvnet.localizer.Localizable; import org.jvnet.localizer.ResourceBundleHolder; import org.kohsuke.stapler.DataBoundConstructor; @@ -334,6 +334,7 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted if (deletePod) { deleteSlavePod(listener, client); + Metrics.metricRegistry().counter(MetricNames.PODS_TERMINATED).inc(); } else { // Log warning, as the slave pod may still be running LOGGER.log(Level.WARNING, "Slave pod {0} was not deleted due to retention policy {1}.", diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/MetricNames.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/MetricNames.java new file mode 100644 index 0000000000..5d3c6d7262 --- /dev/null +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/MetricNames.java @@ -0,0 +1,28 @@ +package org.csanchez.jenkins.plugins.kubernetes; + +import hudson.model.Label; + +public class MetricNames { + private static final String PREFIX = "kubernetes.cloud"; + + public static final String CREATION_FAILED = PREFIX + ".pods.creation.failed"; + public static final String PODS_CREATED = PREFIX + ".pods.created"; + public static final String LAUNCH_FAILED = PREFIX + ".pods.launch.failed"; + public static final String PODS_TERMINATED = PREFIX + ".pods.terminated"; + public static final String REACHED_POD_CAP = PREFIX + ".provision.reached.pod.cap"; + public static final String REACHED_GLOBAL_CAP = PREFIX + ".provision.reached.global.cap"; + public static final String FAILED_TIMEOUT = PREFIX + ".pods.launch.failed.timeout"; + public static final String PROVISION_NODES = PREFIX + ".provision.nodes"; + public static final String PROVISION_FAILED = PREFIX + ".provision.failed"; + public static final String PODS_LAUNCHED = PREFIX + ".pods.launched"; + + public static String metricNameForPodStatus(String status) { + String formattedStatus = status == null ? "null" : status.toLowerCase(); + return PREFIX + ".pods.launch.status." + formattedStatus; + } + + public static String metricNameForLabel(Label label) { + String labelText = (label == null) ? "nolabel" : label.getDisplayName(); + return String.format("%s.%s.provision.request", PREFIX, labelText); + } +} diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/MetricNamesTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/MetricNamesTest.java new file mode 100644 index 0000000000..73ee0cbc39 --- /dev/null +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/MetricNamesTest.java @@ -0,0 +1,50 @@ +package org.csanchez.jenkins.plugins.kubernetes; + +import antlr.ANTLRException; +import hudson.model.Label; +import hudson.model.labels.LabelAtom; +import org.junit.Assert; +import org.junit.Test; + +public class MetricNamesTest { + + @Test + public void metricNameForPodStatusAddsNullWhenStatusIsNull() { + String expected = "kubernetes.cloud.pods.launch.status.null"; + String actual = MetricNames.metricNameForPodStatus(null); + + Assert.assertEquals(expected, actual); + } + + @Test + public void metricNameForPodStatusAddsStatusValueIfNotNull() { + String expected = "kubernetes.cloud.pods.launch.status.running"; + String actual = MetricNames.metricNameForPodStatus("RUNNING"); + + Assert.assertEquals(expected, actual); + } + + @Test + public void metricNameForPodStatusChangeStatusToLowercase() { + String expected = "kubernetes.cloud.pods.launch.status.failed"; + String actual = MetricNames.metricNameForPodStatus("FaIlEd"); + + Assert.assertEquals(expected, actual); + } + + @Test + public void metricNameForLabelAddsNoLabelIfLabelIsNull() { + String expected = "kubernetes.cloud.nolabel.provision.request"; + String actual = MetricNames.metricNameForLabel(null); + + Assert.assertEquals(expected, actual); + } + + @Test + public void metricNameForLabelAddsLabelValue() { + String expected = "kubernetes.cloud.java.provision.request"; + String actual = MetricNames.metricNameForLabel(new LabelAtom("java")); + + Assert.assertEquals(expected, actual); + } +} \ No newline at end of file diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java index 3220e56b47..8975ce37d2 100644 --- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java @@ -62,9 +62,11 @@ import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.PodList; import io.fabric8.kubernetes.client.KubernetesClientException; +import jenkins.metrics.api.Metrics; import jenkins.model.Jenkins; import org.csanchez.jenkins.plugins.kubernetes.ContainerTemplate; import org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave; +import org.csanchez.jenkins.plugins.kubernetes.MetricNames; import org.csanchez.jenkins.plugins.kubernetes.PodAnnotation; import org.csanchez.jenkins.plugins.kubernetes.PodTemplate; import org.csanchez.jenkins.plugins.kubernetes.PodTemplateUtils; @@ -211,6 +213,9 @@ public void runInPod() throws Exception { filter(lr -> lr.getLevel().intValue() >= Level.WARNING.intValue()). // TODO .record(…, WARNING) does not accomplish this map(lr -> lr.getSourceClassName() + "." + lr.getSourceMethodName() + ": " + lr.getMessage()).collect(Collectors.toList()), // LogRecord does not override toString emptyIterable()); + + assertTrue(Metrics.metricRegistry().counter(MetricNames.PODS_LAUNCHED).getCount() > 0); + assertTrue(Metrics.metricRegistry().meter(MetricNames.metricNameForLabel(Label.parseExpression("runInPod"))).getCount() > 0); } @Test