diff --git a/Jenkinsfile b/Jenkinsfile
index b3e75c465c..3063ec25ed 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1 +1,4 @@
-buildPlugin(configurations: buildPlugin.recommendedConfigurations().findAll { it.platform == 'linux' })
+buildPlugin(configurations: [
+ [platform: 'linux', jdk: '8', jenkins: null],
+ [platform: 'linux', jdk: '11', jenkins: null],
+])
diff --git a/pom.xml b/pom.xml
index 63e658fc46..9d1541dd45 100644
--- a/pom.xml
+++ b/pom.xml
@@ -46,11 +46,12 @@
8
- 2.138.4
+ 2.176.1
false
true
0
1.3.7
+ 3.3
2.20
1.7.26
@@ -145,19 +146,19 @@
org.jenkins-ci.plugins.workflow
workflow-support
- 3.3
+ ${workflow-support-plugin.version}
test
org.jenkins-ci.plugins.workflow
workflow-durable-task-step
- 2.28
+ 2.32
test
org.jenkins-ci.plugins.workflow
workflow-support
- 3.0
+ ${workflow-support-plugin.version}
tests
test
diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java
new file mode 100644
index 0000000000..8621731706
--- /dev/null
+++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pod/retention/Reaper.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2019 CloudBees, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.csanchez.jenkins.plugins.kubernetes.pod.retention;
+
+import hudson.Extension;
+import hudson.model.Computer;
+import hudson.model.Node;
+import hudson.model.TaskListener;
+import hudson.slaves.Cloud;
+import hudson.slaves.ComputerListener;
+import hudson.slaves.EphemeralNode;
+import io.fabric8.kubernetes.api.model.Pod;
+import io.fabric8.kubernetes.client.KubernetesClient;
+import io.fabric8.kubernetes.client.KubernetesClientException;
+import io.fabric8.kubernetes.client.Watcher;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import jenkins.model.Jenkins;
+import org.csanchez.jenkins.plugins.kubernetes.KubernetesCloud;
+import org.csanchez.jenkins.plugins.kubernetes.KubernetesComputer;
+import org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave;
+
+/**
+ * Checks for deleted pods corresponding to {@link KubernetesSlave} and ensures the node is removed from Jenkins too.
+ *
If the pod has been deleted, all of the associated state (running user processes, workspace, etc.) must also be gone;
+ * so there is no point in retaining this agent definition any further.
+ * ({@link KubernetesSlave} is not an {@link EphemeralNode}: it does support running across Jenkins restarts.)
+ *
Note that pod retention policies other than the default {@link Never} may disable this system,
+ * unless some external process or garbage collection policy results in pod deletion.
+ */
+@Extension
+public class Reaper extends ComputerListener implements Watcher {
+
+ private static final Logger LOGGER = Logger.getLogger(Reaper.class.getName());
+
+ /**
+ * Activate this feature only if and when some Kubernetes agent is actually used.
+ * Avoids touching the API server when this plugin is not even in use.
+ */
+ private final AtomicBoolean activated = new AtomicBoolean();
+
+ @Override
+ public void onOnline(Computer c, TaskListener listener) throws IOException, InterruptedException {
+ if (c instanceof KubernetesComputer && activated.compareAndSet(false, true)) {
+ activate();
+ }
+ }
+
+ private void activate() {
+ LOGGER.fine("Activating reaper");
+ // First check all existing nodes to see if they still have active pods.
+ // (We may have missed deletion events while Jenkins was shut off,
+ // or pods may have been deleted before any Kubernetes agent was brought online.)
+ for (Node n : new ArrayList<>(Jenkins.get().getNodes())) {
+ if (!(n instanceof KubernetesSlave)) {
+ continue;
+ }
+ KubernetesSlave ks = (KubernetesSlave) n;
+ String ns = ks.getNamespace();
+ String name = ks.getPodName();
+ try {
+ // TODO more efficient to do a single (or paged) list request, but tricky since there may be multiple clouds,
+ // and even within a single cloud an agent pod is permitted to use a nondefault namespace,
+ // yet we do not want to do an unnamespaced pod list for RBAC reasons.
+ // Could use a hybrid approach: first list all pods in the configured namespace for all clouds;
+ // then go back and individually check any unmatched agents with their configured namespace.
+ if (ks.getKubernetesCloud().connect().pods().inNamespace(ns).withName(name).get() == null) {
+ LOGGER.info(() -> ns + "/" + name + " seems to have been deleted, so removing corresponding Jenkins agent");
+ Jenkins.get().removeNode(ks);
+ } else {
+ LOGGER.fine(() -> ns + "/" + name + " still seems to exist, OK");
+ }
+ } catch (Exception x) {
+ LOGGER.log(Level.WARNING, "failed to do initial reap check for " + ns + "/" + name, x);
+ }
+ }
+ // Now set up a watch for any subsequent pod deletions.
+ for (Cloud c : Jenkins.get().clouds) {
+ if (!(c instanceof KubernetesCloud)) {
+ continue;
+ }
+ KubernetesCloud kc = (KubernetesCloud) c;
+ try {
+ KubernetesClient client = kc.connect();
+ client.pods().inNamespace(client.getNamespace()).watch(this);
+ } catch (Exception x) {
+ LOGGER.log(Level.WARNING, "failed to set up watcher on " + kc.getDisplayName(), x);
+ }
+ }
+ }
+
+ @Override
+ public void eventReceived(Watcher.Action action, Pod pod) {
+ if (action == Watcher.Action.DELETED) {
+ String ns = pod.getMetadata().getNamespace();
+ String name = pod.getMetadata().getName();
+ for (Node n : new ArrayList<>(Jenkins.get().getNodes())) {
+ if (!(n instanceof KubernetesSlave)) {
+ continue;
+ }
+ KubernetesSlave ks = (KubernetesSlave) n;
+ if (ks.getNamespace().equals(ns) && ks.getPodName().equals(name)) {
+ LOGGER.info(() -> ns + "/" + name + " was just deleted, so removing corresponding Jenkins agent");
+ try {
+ Jenkins.get().removeNode(ks);
+ return;
+ } catch (Exception x) {
+ LOGGER.log(Level.WARNING, "failed to reap " + ns + "/" + name, x);
+ }
+ }
+ }
+ LOGGER.fine(() -> "received deletion notice for " + ns + "/" + name + " which does not seem to correspond to any Jenkins agent");
+ }
+ }
+
+ @Override
+ public void onClose(KubernetesClientException cause) {
+ // TODO ignore, or do we need to manually reattach the watcher?
+ // AllContainersRunningPodWatcher is not reattached, but this is expected to be short-lived,
+ // useful only until the containers of a single pod start running.
+ // (At least when using kubernetes-client/java, the connection gets closed after 2m on GKE
+ // and you need to rerun the watch. Does the fabric8io client wrap this?)
+ }
+
+}
diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
index 7ac3dced7f..bdbff5e519 100644
--- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
+++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineTest.java
@@ -46,6 +46,7 @@
import org.csanchez.jenkins.plugins.kubernetes.PodAnnotation;
import org.csanchez.jenkins.plugins.kubernetes.PodTemplate;
import org.jenkinsci.plugins.workflow.job.WorkflowRun;
+import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution;
import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep;
import org.junit.Before;
import org.junit.Rule;
@@ -352,6 +353,15 @@ public void runInPodWithRetention() throws Exception {
assertTrue(deletePods(cloud.connect(), getLabels(this, name), true));
}
+ @Issue("JENKINS-49707")
+ @Test
+ public void terminatedPod() throws Exception {
+ r.waitForMessage("+ sleep", b);
+ deletePods(cloud.connect(), getLabels(this, name), false);
+ r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
+ r.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b);
+ }
+
@Test
public void computerCantBeConfigured() throws Exception {
r.jenkins.setSecurityRealm(r.createDummySecurityRealm());
diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
index 301ebf88e4..ffb3129d94 100644
--- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
+++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/RestartPipelineTest.java
@@ -47,6 +47,7 @@
import org.csanchez.jenkins.plugins.kubernetes.model.TemplateEnvVar;
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
import org.jenkinsci.plugins.workflow.job.WorkflowRun;
+import org.jenkinsci.plugins.workflow.support.steps.ExecutorStepExecution;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Rule;
@@ -54,11 +55,13 @@
import org.junit.rules.TemporaryFolder;
import org.junit.rules.TestName;
import org.jvnet.hudson.test.BuildWatcher;
+import org.jvnet.hudson.test.Issue;
import org.jvnet.hudson.test.JenkinsRule;
import org.jvnet.hudson.test.LoggerRule;
import org.jvnet.hudson.test.RestartableJenkinsNonLocalhostRule;
import hudson.model.Node;
+import hudson.model.Result;
import hudson.slaves.DumbSlave;
import hudson.slaves.JNLPLauncher;
import hudson.slaves.NodeProperty;
@@ -188,6 +191,34 @@ public void runInPodWithRestartWithLongSleep() throws Exception {
});
}
+ @Issue("JENKINS-49707")
+ @Test
+ public void terminatedPodAfterRestart() throws Exception {
+ AtomicReference projectName = new AtomicReference<>();
+ story.then(r -> {
+ configureCloud();
+ WorkflowRun b = getPipelineJobThenScheduleRun(r);
+ projectName.set(b.getParent().getFullName());
+ r.waitForMessage("+ sleep", b);
+ });
+ story.then(r -> {
+ WorkflowRun b = r.jenkins.getItemByFullName(projectName.get(), WorkflowJob.class).getBuildByNumber(1);
+ r.waitForMessage("Ready to run", b);
+ // Note that the test is cheating here slightly.
+ // The watch in Reaper is still running across the in-JVM restarts,
+ // whereas in production it would have been cancelled during the shutdown.
+ // But it does not matter since we are waiting for the agent to come back online after the restart,
+ // which is sufficient trigger to reactivate the reaper.
+ // Indeed we get two Reaper instances running, which independently remove the node.
+ deletePods(cloud.connect(), getLabels(this, name), false);
+ r.assertBuildStatus(Result.ABORTED, r.waitForCompletion(b));
+ r.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b);
+ // Currently the logic in ExecutorStepExecution cannot handle a Jenkins restart so it prints the following.
+ // It does not matter since DurableTaskStep redundantly implements the same check.
+ r.assertLogContains(" was deleted, but do not have a node body to cancel", b);
+ });
+ }
+
@Test
public void getContainerLogWithRestart() throws Exception {
AtomicReference projectName = new AtomicReference<>();
diff --git a/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy
new file mode 100644
index 0000000000..e0b1c9f982
--- /dev/null
+++ b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPod.groovy
@@ -0,0 +1,9 @@
+podTemplate(label: '$NAME', containers: [
+ containerTemplate(name: 'busybox', image: 'busybox', ttyEnabled: true, command: '/bin/cat'),
+ ]) {
+ node ('$NAME') {
+ container('busybox') {
+ sh 'sleep 9999999'
+ }
+ }
+}
diff --git a/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPodAfterRestart.groovy b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPodAfterRestart.groovy
new file mode 100644
index 0000000000..d1eca4791b
--- /dev/null
+++ b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/terminatedPodAfterRestart.groovy
@@ -0,0 +1,11 @@
+package org.csanchez.jenkins.plugins.kubernetes.pipeline
+
+podTemplate(label: '$NAME', containers: [
+ containerTemplate(name: 'busybox', image: 'busybox', ttyEnabled: true, command: '/bin/cat'),
+]) {
+ node ('$NAME') {
+ container('busybox') {
+ sh 'sleep 9999999'
+ }
+ }
+}