diff --git a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java index 29e703ea0a..c281fdab28 100644 --- a/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java +++ b/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesLauncher.java @@ -47,7 +47,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -68,7 +67,7 @@ public class KubernetesLauncher extends JNLPLauncher { private static final Logger LOGGER = Logger.getLogger(KubernetesLauncher.class.getName()); - private final AtomicBoolean launched = new AtomicBoolean(false); + private volatile boolean launched = false; /** * Provisioning exception if any. @@ -87,7 +86,7 @@ public KubernetesLauncher() { @Override public boolean isLaunchSupported() { - return !launched.get(); + return !launched; } @Override @@ -104,7 +103,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { if (node == null) { throw new IllegalStateException("Node has been removed, cannot launch " + computer.getName()); } - if (launched.get()) { + if (launched) { LOGGER.log(INFO, "Agent has already been launched, activating: {0}", node.getNodeName()); computer.setAcceptingTasks(true); return; @@ -129,51 +128,71 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { .orElse(null); node.setNamespace(namespace); - LOGGER.log(FINE, () -> "Creating Pod: " + cloudName + " " + namespace + "/" + podName); - try { - pod = client.pods().inNamespace(namespace).create(pod); - } catch (KubernetesClientException e) { - Metrics.metricRegistry().counter(MetricNames.CREATION_FAILED).inc(); - int httpCode = e.getCode(); - if (400 <= httpCode && httpCode < 500) { // 4xx - if (httpCode == 403 && e.getMessage().contains("is forbidden: exceeded quota")) { - node.getRunListener() - .getLogger() - .printf( - "WARNING: Unable to create pod: %s %s/%s because kubernetes resource quota exceeded. %n%s%nRetrying...%n%n", - cloudName, namespace, pod.getMetadata().getName(), e.getMessage()); - } else if (httpCode == 409 - && e.getMessage().contains("Operation cannot be fulfilled on resourcequotas")) { - // See: https://github.com/kubernetes/kubernetes/issues/67761 ; A retry usually works. - node.getRunListener() - .getLogger() - .printf( - "WARNING: Unable to create pod: %s %s/%s because kubernetes resource quota update conflict. %n%s%nRetrying...%n%n", - cloudName, namespace, pod.getMetadata().getName(), e.getMessage()); + // if the controller was interrupted after creating the pod but before it connected back, then + // the pod might already exist and the creating logic must be skipped. + Pod existingPod = + client.pods().inNamespace(namespace).withName(podName).get(); + if (existingPod == null) { + LOGGER.log(FINE, () -> "Creating Pod: " + cloudName + " " + namespace + "/" + podName); + try { + pod = client.pods().inNamespace(namespace).create(pod); + } catch (KubernetesClientException e) { + Metrics.metricRegistry() + .counter(MetricNames.CREATION_FAILED) + .inc(); + int httpCode = e.getCode(); + if (400 <= httpCode && httpCode < 500) { // 4xx + if (httpCode == 403 && e.getMessage().contains("is forbidden: exceeded quota")) { + node.getRunListener() + .getLogger() + .printf( + "WARNING: Unable to create pod: %s %s/%s because kubernetes resource quota exceeded. %n%s%nRetrying...%n%n", + cloudName, + namespace, + pod.getMetadata().getName(), + e.getMessage()); + } else if (httpCode == 409 + && e.getMessage().contains("Operation cannot be fulfilled on resourcequotas")) { + // See: https://github.com/kubernetes/kubernetes/issues/67761 ; A retry usually works. + node.getRunListener() + .getLogger() + .printf( + "WARNING: Unable to create pod: %s %s/%s because kubernetes resource quota update conflict. %n%s%nRetrying...%n%n", + cloudName, + namespace, + pod.getMetadata().getName(), + e.getMessage()); + } else { + node.getRunListener() + .getLogger() + .printf( + "ERROR: Unable to create pod %s %s/%s.%n%s%n", + cloudName, + namespace, + pod.getMetadata().getName(), + e.getMessage()); + PodUtils.cancelQueueItemFor(pod, e.getMessage()); + } + } else if (500 <= httpCode && httpCode < 600) { // 5xx + LOGGER.log(FINE, "Kubernetes returned HTTP code {0} {1}. Retrying...", new Object[] { + e.getCode(), e.getStatus() + }); } else { - node.getRunListener() - .getLogger() - .printf( - "ERROR: Unable to create pod %s %s/%s.%n%s%n", - cloudName, namespace, pod.getMetadata().getName(), e.getMessage()); - PodUtils.cancelQueueItemFor(pod, e.getMessage()); + LOGGER.log(WARNING, "Kubernetes returned unhandled HTTP code {0} {1}", new Object[] { + e.getCode(), e.getStatus() + }); } - } else if (500 <= httpCode && httpCode < 600) { // 5xx - LOGGER.log(FINE, "Kubernetes returned HTTP code {0} {1}. Retrying...", new Object[] { - e.getCode(), e.getStatus() - }); - } else { - LOGGER.log(WARNING, "Kubernetes returned unhandled HTTP code {0} {1}", new Object[] { - e.getCode(), e.getStatus() - }); + throw e; } - throw e; + LOGGER.log(INFO, () -> "Created Pod: " + cloudName + " " + namespace + "/" + podName); + listener.getLogger().printf("Created Pod: %s %s/%s%n", cloudName, namespace, podName); + Metrics.metricRegistry().counter(MetricNames.PODS_CREATED).inc(); + + node.getRunListener().getLogger().printf("Created Pod: %s %s/%s%n", cloudName, namespace, podName); + } else { + LOGGER.log(INFO, () -> "Pod already exists: " + cloudName + " " + namespace + "/" + podName); + listener.getLogger().printf("Pod already exists: %s %s/%s%n", cloudName, namespace, podName); } - LOGGER.log(INFO, () -> "Created Pod: " + cloudName + " " + namespace + "/" + podName); - listener.getLogger().printf("Created Pod: %s %s/%s%n", cloudName, namespace, podName); - Metrics.metricRegistry().counter(MetricNames.PODS_CREATED).inc(); - - node.getRunListener().getLogger().printf("Created Pod: %s %s/%s%n", cloudName, namespace, podName); kubernetesComputer.setLaunching(true); ObjectMeta podMetadata = pod.getMetadata(); @@ -268,7 +287,7 @@ public synchronized void launch(SlaveComputer computer, TaskListener listener) { } computer.setAcceptingTasks(true); - launched.set(true); + launched = true; try { // We need to persist the "launched" setting... node.save(); diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/AbstractKubernetesPipelineRJRTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/AbstractKubernetesPipelineRJRTest.java index 0e3b769033..9c16a66add 100644 --- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/AbstractKubernetesPipelineRJRTest.java +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/AbstractKubernetesPipelineRJRTest.java @@ -35,8 +35,6 @@ public abstract class AbstractKubernetesPipelineRJRTest { } } - protected RunId runId; - private SetupCloud setup; public AbstractKubernetesPipelineRJRTest(SetupCloud setup) { @@ -52,7 +50,10 @@ public static void isKubernetesConfigured() throws Exception { public void setUp() throws Throwable { rjr.startJenkins(); rjr.runRemotely(setup); - runId = rjr.runRemotely(new CreateWorkflowJobThenScheduleRun( + } + + protected RunId createWorkflowJobThenScheduleRun() throws Throwable { + return rjr.runRemotely(new CreateWorkflowJobThenScheduleRun( KubernetesTestUtil.loadPipelineScript(getClass(), name.getMethodName() + ".groovy"))); } } diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesDeclarativeAgentRJRTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesDeclarativeAgentRJRTest.java index 24916dd665..5b41ad3864 100644 --- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesDeclarativeAgentRJRTest.java +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesDeclarativeAgentRJRTest.java @@ -18,6 +18,7 @@ import java.net.UnknownHostException; import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.AssertBuildStatusSuccess; +import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.RunId; import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.SetupCloud; import org.junit.Test; @@ -29,6 +30,7 @@ public KubernetesDeclarativeAgentRJRTest() throws UnknownHostException { @Test public void declarative() throws Throwable { + RunId runId = createWorkflowJobThenScheduleRun(); rjr.runRemotely(new AssertBuildStatusSuccess(runId)); } } diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineRJRTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineRJRTest.java index 4b3c3a1be4..71e4a484ef 100644 --- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineRJRTest.java +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineRJRTest.java @@ -1,7 +1,14 @@ package org.csanchez.jenkins.plugins.kubernetes.pipeline; +import io.fabric8.kubernetes.api.model.NodeBuilder; +import io.fabric8.kubernetes.client.KubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClientBuilder; import java.net.UnknownHostException; +import org.csanchez.jenkins.plugins.kubernetes.KubernetesTestUtil; +import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.AssertBuildLogMessage; import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.AssertBuildStatusSuccess; +import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.CreateWorkflowJobThenScheduleTask; +import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.RunId; import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.SetupCloud; import org.junit.Test; @@ -12,6 +19,44 @@ public KubernetesPipelineRJRTest() throws UnknownHostException { @Test public void basicPipeline() throws Throwable { + RunId runId = createWorkflowJobThenScheduleRun(); rjr.runRemotely(new AssertBuildStatusSuccess(runId)); } + + @Test + public void restartDuringPodLaunch() throws Throwable { + // try to run something on a pod which is not schedulable (disktype=special) + RunId build = rjr.runRemotely(new CreateWorkflowJobThenScheduleTask( + KubernetesTestUtil.loadPipelineScript(getClass(), name.getMethodName() + ".groovy"))); + // the pod is created, but not connected yet + rjr.runRemotely(new AssertBuildLogMessage("Created Pod", build)); + // restart + rjr.stopJenkins(); + rjr.startJenkins(); + // update k8s to make a node suitable to schedule (add disktype=special to the node) + System.out.println("Adding label to node...."); + try (KubernetesClient client = new KubernetesClientBuilder().build()) { + String nodeName = + client.nodes().list().getItems().get(0).getMetadata().getName(); + client.nodes().withName(nodeName).edit(n -> new NodeBuilder(n) + .editMetadata() + .addToLabels("disktype", "special") + .endMetadata() + .build()); + + // pod connects back and the build finishes correctly + rjr.runRemotely(new AssertBuildStatusSuccess(build)); + } finally { + // clean up + try (KubernetesClient client = new KubernetesClientBuilder().build()) { + String nodeName = + client.nodes().list().getItems().get(0).getMetadata().getName(); + client.nodes().withName(nodeName).edit(n -> new NodeBuilder(n) + .editMetadata() + .removeFromLabels("disktype") + .endMetadata() + .build()); + } + } + } } diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineWebsocketRJRTest.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineWebsocketRJRTest.java index f8ae629994..6536ee8e6a 100644 --- a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineWebsocketRJRTest.java +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/KubernetesPipelineWebsocketRJRTest.java @@ -2,6 +2,7 @@ import java.net.UnknownHostException; import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.AssertBuildStatusSuccess; +import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.RunId; import org.csanchez.jenkins.plugins.kubernetes.pipeline.steps.SetupCloud; import org.junit.Test; @@ -13,6 +14,7 @@ public KubernetesPipelineWebsocketRJRTest() throws UnknownHostException { @Test public void basicPipeline() throws Throwable { + RunId runId = createWorkflowJobThenScheduleRun(); rjr.runRemotely(new AssertBuildStatusSuccess(runId)); } } diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/steps/AssertBuildLogMessage.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/steps/AssertBuildLogMessage.java new file mode 100644 index 0000000000..9adf22084c --- /dev/null +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/steps/AssertBuildLogMessage.java @@ -0,0 +1,24 @@ +package org.csanchez.jenkins.plugins.kubernetes.pipeline.steps; + +import org.jenkinsci.plugins.workflow.job.WorkflowJob; +import org.jenkinsci.plugins.workflow.job.WorkflowRun; +import org.jvnet.hudson.test.JenkinsRule; +import org.jvnet.hudson.test.RealJenkinsRule; + +public class AssertBuildLogMessage implements RealJenkinsRule.Step { + + private final String message; + private final RunId runId; + + public AssertBuildLogMessage(String message, RunId runId) { + this.message = message; + this.runId = runId; + } + + @Override + public void run(JenkinsRule r) throws Throwable { + WorkflowJob p = r.jenkins.getItemByFullName(runId.name, WorkflowJob.class); + WorkflowRun b = p.getBuildByNumber(runId.number); + r.waitForMessage(message, b); + } +} diff --git a/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/steps/CreateWorkflowJobThenScheduleTask.java b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/steps/CreateWorkflowJobThenScheduleTask.java new file mode 100644 index 0000000000..882fad33ef --- /dev/null +++ b/src/test/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/steps/CreateWorkflowJobThenScheduleTask.java @@ -0,0 +1,29 @@ +package org.csanchez.jenkins.plugins.kubernetes.pipeline.steps; + +import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition; +import org.jenkinsci.plugins.workflow.job.WorkflowJob; +import org.jenkinsci.plugins.workflow.job.WorkflowRun; +import org.jvnet.hudson.test.JenkinsRule; +import org.jvnet.hudson.test.RealJenkinsRule; + +/** + * Creates a workflow job using the specified script, then schedules it and returns a reference to the run. + */ +public class CreateWorkflowJobThenScheduleTask implements RealJenkinsRule.Step2 { + private String script; + + public CreateWorkflowJobThenScheduleTask(String script) { + this.script = script; + } + + @Override + public RunId run(JenkinsRule r) throws Throwable { + WorkflowJob project = r.createProject(WorkflowJob.class); + project.setDefinition(new CpsFlowDefinition(script, true)); + project.save(); + System.out.println("Scheduling build..."); + WorkflowRun b = project.scheduleBuild2(0).getStartCondition().get(); + System.out.println("Build scheduled..."); + return new RunId(project.getFullName(), b.number); + } +} diff --git a/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/restartDuringPodLaunch.groovy b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/restartDuringPodLaunch.groovy new file mode 100644 index 0000000000..024d21f83b --- /dev/null +++ b/src/test/resources/org/csanchez/jenkins/plugins/kubernetes/pipeline/restartDuringPodLaunch.groovy @@ -0,0 +1,11 @@ +podTemplate(yaml: ''' +apiVersion: v1 +kind: Pod +spec: + nodeSelector: + disktype: special +''') { + node(POD_LABEL) { + sh 'true' + } +} \ No newline at end of file diff --git a/test-in-k8s.yaml b/test-in-k8s.yaml index 08afad3b72..c82c30f0b6 100644 --- a/test-in-k8s.yaml +++ b/test-in-k8s.yaml @@ -69,7 +69,7 @@ rules: verbs: ["watch"] - apiGroups: [""] resources: ["nodes"] - verbs: ["list"] + verbs: ["list","get","patch","update"] # KubernetesPipelineRJRTest.restartDuringPodLaunch - apiGroups: [""] resources: ["secrets"] verbs: ["create","delete","get","list","patch","update","watch"]