Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pr 872 follow-up #893

Merged
merged 5 commits into from
Nov 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@
<artifactId>workflow-cps</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.jenkins-ci.plugins</groupId>
<artifactId>metrics</artifactId>
<version>4.0.2.6</version>
</dependency>

<!-- for testing -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import hudson.model.ItemGroup;
import hudson.model.Node;
import hudson.util.XStream2;
import jenkins.metrics.api.Metrics;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.csanchez.jenkins.plugins.kubernetes.pipeline.PodTemplateMap;
Expand Down Expand Up @@ -72,6 +73,8 @@
import jenkins.model.JenkinsLocationConfiguration;
import jenkins.authentication.tokens.api.AuthenticationTokens;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.csanchez.jenkins.plugins.kubernetes.MetricNames.metricNameForLabel;

import jenkins.websocket.WebSockets;

/**
Expand Down Expand Up @@ -529,6 +532,7 @@ public KubernetesClient connect() throws KubernetesAuthException, IOException {
@Override
public synchronized Collection<NodeProvisioner.PlannedNode> provision(@NonNull final Cloud.CloudState state, final int excessWorkload) {
try {
Metrics.metricRegistry().meter(metricNameForLabel(state.getLabel())).mark(excessWorkload);
Label label = state.getLabel();
int plannedCapacity = state.getAdditionalPlannedCapacity(); // Planned nodes, will be launched on the next round of NodeProvisioner
Set<String> allInProvisioning = InProvisioning.getAllInProvisioning(label); // Nodes being launched
Expand Down Expand Up @@ -562,15 +566,23 @@ public synchronized Collection<NodeProvisioner.PlannedNode> provision(@NonNull f
if (!plannedNodes.isEmpty()) {
// Return early when a matching template was found and nodes were planned
LOGGER.log(Level.FINEST, "Planned {0} Kubernetes agents with template \"{1}\"", new Object[]{plannedNodes.size(), podTemplate.getName()});
Metrics.metricRegistry().counter(MetricNames.PROVISION_NODES).inc(plannedNodes.size());
if (plannedNodes.size() == provisioningLimit && plannedNodes.size() < toBeProvisioned) {
Metrics.metricRegistry().counter(MetricNames.REACHED_POD_CAP).inc();
}

return plannedNodes;
}
}
} else {
LOGGER.log(Level.INFO, "No slot left for provisioning (global limit)");
Metrics.metricRegistry().counter(MetricNames.REACHED_GLOBAL_CAP).inc();
}
}
Metrics.metricRegistry().counter(MetricNames.PROVISION_NODES).inc(plannedNodes.size());
return plannedNodes;
} catch (KubernetesClientException e) {
Metrics.metricRegistry().counter(MetricNames.PROVISION_FAILED).inc();
Throwable cause = e.getCause();
if (cause instanceof SocketTimeoutException || cause instanceof ConnectException || cause instanceof UnknownHostException) {
LOGGER.log(Level.WARNING, "Failed to connect to Kubernetes at {0}: {1}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import io.fabric8.kubernetes.client.KubernetesClientException;
import jenkins.metrics.api.Metrics;
import org.apache.commons.lang.StringUtils;
import org.kohsuke.stapler.DataBoundConstructor;

Expand Down Expand Up @@ -132,6 +133,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
try {
pod = client.pods().inNamespace(namespace).create(pod);
} catch (KubernetesClientException e) {
Metrics.metricRegistry().counter(MetricNames.CREATION_FAILED).inc();
int httpCode = e.getCode();
if (400 <= httpCode && httpCode < 500) { // 4xx
runListener.getLogger().printf("ERROR: Unable to create pod %s/%s.%n%s%n", namespace, pod.getMetadata().getName(), e.getMessage());
Expand All @@ -145,6 +147,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
}
LOGGER.log(INFO, "Created Pod: {0}/{1}", new Object[] { namespace, podName });
listener.getLogger().printf("Created Pod: %s/%s%n", namespace, podName);
Metrics.metricRegistry().counter(MetricNames.PODS_CREATED).inc();

runListener.getLogger().printf("Created Pod: %s/%s%n", namespace, podName);
kubernetesComputer.setLaunching(true);
Expand Down Expand Up @@ -173,6 +176,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
for (waitedForSlave = 0; waitedForSlave < waitForSlaveToConnect; waitedForSlave++) {
slaveComputer = slave.getComputer();
if (slaveComputer == null) {
Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc();
throw new IllegalStateException("Node was deleted, computer is null");
}
if (slaveComputer.isOnline()) {
Expand All @@ -182,10 +186,13 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
// Check that the pod hasn't failed already
pod = client.pods().inNamespace(namespace).withName(podName).get();
if (pod == null) {
Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc();
throw new IllegalStateException("Pod no longer exists: " + podName);
}
status = pod.getStatus().getPhase();
if (!validStates.contains(status)) {
Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc();
Metrics.metricRegistry().counter(MetricNames.metricNameForPodStatus(status)).inc();
break;
}

Expand All @@ -199,6 +206,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
new Object[] { podName, info.getState().getTerminated(), info.getName() });
listener.getLogger().printf("Container is terminated %1$s [%3$s]: %2$s%n", podName,
info.getState().getTerminated(), info.getName());
Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc();
terminatedContainers.add(info);
}
}
Expand All @@ -216,6 +224,9 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
Thread.sleep(1000);
}
if (slaveComputer == null || slaveComputer.isOffline()) {
Metrics.metricRegistry().counter(MetricNames.LAUNCH_FAILED).inc();
Metrics.metricRegistry().counter(MetricNames.FAILED_TIMEOUT).inc();

logLastLines(containerStatuses, podName, namespace, slave, null, client);
throw new IllegalStateException(
"Agent is not connected after " + waitedForSlave + " seconds, status: " + status);
Expand All @@ -229,6 +240,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) {
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Could not save() agent: " + e.getMessage(), e);
}
Metrics.metricRegistry().counter(MetricNames.PODS_LAUNCHED).inc();
} catch (Throwable ex) {
setProblem(ex);
LOGGER.log(Level.WARNING, String.format("Error in provisioning; agent=%s, template=%s", slave, template), ex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
import hudson.slaves.SlaveComputer;
import io.fabric8.kubernetes.api.model.Container;
import io.fabric8.kubernetes.client.utils.Serialization;
import jenkins.metrics.api.Metrics;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.Validate;
import org.csanchez.jenkins.plugins.kubernetes.pod.retention.PodRetention;
import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy;
import org.jenkinsci.plugins.kubernetes.auth.KubernetesAuthException;
import org.jvnet.localizer.Localizable;
import org.jvnet.localizer.ResourceBundleHolder;
import org.kohsuke.stapler.DataBoundConstructor;

Expand Down Expand Up @@ -334,6 +334,7 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted

if (deletePod) {
deleteSlavePod(listener, client);
Metrics.metricRegistry().counter(MetricNames.PODS_TERMINATED).inc();
} else {
// Log warning, as the slave pod may still be running
LOGGER.log(Level.WARNING, "Slave pod {0} was not deleted due to retention policy {1}.",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package org.csanchez.jenkins.plugins.kubernetes;

import hudson.model.Label;

public class MetricNames {
private static final String PREFIX = "kubernetes.cloud";

public static final String CREATION_FAILED = PREFIX + ".pods.creation.failed";
public static final String PODS_CREATED = PREFIX + ".pods.created";
public static final String LAUNCH_FAILED = PREFIX + ".pods.launch.failed";
public static final String PODS_TERMINATED = PREFIX + ".pods.terminated";
public static final String REACHED_POD_CAP = PREFIX + ".provision.reached.pod.cap";
public static final String REACHED_GLOBAL_CAP = PREFIX + ".provision.reached.global.cap";
public static final String FAILED_TIMEOUT = PREFIX + ".pods.launch.failed.timeout";
public static final String PROVISION_NODES = PREFIX + ".provision.nodes";
public static final String PROVISION_FAILED = PREFIX + ".provision.failed";
public static final String PODS_LAUNCHED = PREFIX + ".pods.launched";

public static String metricNameForPodStatus(String status) {
String formattedStatus = status == null ? "null" : status.toLowerCase();
return PREFIX + ".pods.launch.status." + formattedStatus;
}

public static String metricNameForLabel(Label label) {
String labelText = (label == null) ? "nolabel" : label.getDisplayName();
return String.format("%s.%s.provision.request", PREFIX, labelText);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package org.csanchez.jenkins.plugins.kubernetes;

import antlr.ANTLRException;
import hudson.model.Label;
import hudson.model.labels.LabelAtom;
import org.junit.Assert;
import org.junit.Test;

public class MetricNamesTest {

@Test
public void metricNameForPodStatusAddsNullWhenStatusIsNull() {
String expected = "kubernetes.cloud.pods.launch.status.null";
String actual = MetricNames.metricNameForPodStatus(null);

Assert.assertEquals(expected, actual);
}

@Test
public void metricNameForPodStatusAddsStatusValueIfNotNull() {
String expected = "kubernetes.cloud.pods.launch.status.running";
String actual = MetricNames.metricNameForPodStatus("RUNNING");

Assert.assertEquals(expected, actual);
}

@Test
public void metricNameForPodStatusChangeStatusToLowercase() {
String expected = "kubernetes.cloud.pods.launch.status.failed";
String actual = MetricNames.metricNameForPodStatus("FaIlEd");

Assert.assertEquals(expected, actual);
}

@Test
public void metricNameForLabelAddsNoLabelIfLabelIsNull() {
String expected = "kubernetes.cloud.nolabel.provision.request";
String actual = MetricNames.metricNameForLabel(null);

Assert.assertEquals(expected, actual);
}

@Test
public void metricNameForLabelAddsLabelValue() {
String expected = "kubernetes.cloud.java.provision.request";
String actual = MetricNames.metricNameForLabel(new LabelAtom("java"));

Assert.assertEquals(expected, actual);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@
import io.fabric8.kubernetes.api.model.Pod;
import io.fabric8.kubernetes.api.model.PodList;
import io.fabric8.kubernetes.client.KubernetesClientException;
import jenkins.metrics.api.Metrics;
import jenkins.model.Jenkins;
import org.csanchez.jenkins.plugins.kubernetes.ContainerTemplate;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave;
import org.csanchez.jenkins.plugins.kubernetes.MetricNames;
import org.csanchez.jenkins.plugins.kubernetes.PodAnnotation;
import org.csanchez.jenkins.plugins.kubernetes.PodTemplate;
import org.csanchez.jenkins.plugins.kubernetes.PodTemplateUtils;
Expand Down Expand Up @@ -211,6 +213,9 @@ public void runInPod() throws Exception {
filter(lr -> lr.getLevel().intValue() >= Level.WARNING.intValue()). // TODO .record(…, WARNING) does not accomplish this
map(lr -> lr.getSourceClassName() + "." + lr.getSourceMethodName() + ": " + lr.getMessage()).collect(Collectors.toList()), // LogRecord does not override toString
emptyIterable());

assertTrue(Metrics.metricRegistry().counter(MetricNames.PODS_LAUNCHED).getCount() > 0);
assertTrue(Metrics.metricRegistry().meter(MetricNames.metricNameForLabel(Label.parseExpression("runInPod"))).getCount() > 0);
}

@Test
Expand Down