From 2eac3f9b252453a2079d3902941c050800d29739 Mon Sep 17 00:00:00 2001
From: Nicolas Busseneau <nicolas@isovalent.com>
Date: Fri, 15 Oct 2021 16:12:45 +0200
Subject: [PATCH] workflows: update AKS workflows with new taints

Re-impacted from: https://github.com/cilium/cilium/pull/17529

Context: we recommend users taint all nodepools with `node.cilium.io/agent-not-ready=true:NoSchedule`
to prevent application pods from being managed by the default AKS CNI
plugin.

To this end, the proposed workflow users should follow when installing
Cilium into AKS was to replace the initial AKS node pool with a new
tainted system node pool, as it is not possible to taint the initial AKS
node pool, cf. Azure/AKS#1402.

AKS recently pushed a change on the API side that forbids setting up
custom taints on system node pools, cf. Azure/AKS#2578.

It is not possible anymore for us to recommend users taint all nodepools
with `node.cilium.io/agent-not-ready=true:NoSchedule` to prevent
application pods from being managed by the default AKS CNI plugin.

To work around this new limitation, we propose the following workflow
instead:

- Replace the initial node pool with a system node pool tainted with
  `CriticalAddonsOnly=true:NoSchedule`, preventing application pods from
  being scheduled on it.
- Create a secondary user node pool tainted with `node.cilium.io/agent-not-ready=true:NoSchedule`
  to prevent application pods from being scheduled on the user node pool
  until Cilium is ready to manage them.

Signed-off-by: Nicolas Busseneau <nicolas@isovalent.com>
---
 .github/workflows/aks.yaml | 58 ++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/aks.yaml b/.github/workflows/aks.yaml
index f369af0ea4..91e68f6c59 100644
--- a/.github/workflows/aks.yaml
+++ b/.github/workflows/aks.yaml
@@ -28,6 +28,7 @@ concurrency:
 env:
   name: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}
   location: westeurope
+  cost_reduction: --node-vm-size Standard_B2s --node-osdisk-size 30
 
 jobs:
   installation-and-connectivity:
@@ -69,7 +70,6 @@ jobs:
           echo ::set-output name=owner::${OWNER}
 
       - name: Create AKS cluster
-        id: cluster-creation
         run: |
           # Create group
           az group create \
@@ -77,37 +77,46 @@ jobs:
             --location ${{ env.location }} \
             --tags usage=${{ github.repository_owner }}-${{ github.event.repository.name }} owner=${{ steps.vars.outputs.owner }}
 
-          # Create cluster with a 1 node-count (we will remove this node pool
-          # afterwards)
-          # Details: Basic load balancers are not supported with multiple node
-          # pools. Create a cluster with standard load balancer selected to use
-          # multiple node pools, learn more at https://aka.ms/aks/nodepools.
+          # Create AKS cluster
           az aks create \
             --resource-group ${{ env.name }} \
             --name ${{ env.name }} \
             --location ${{ env.location }} \
             --network-plugin azure \
             --node-count 1 \
-            --load-balancer-sku standard \
-            --node-vm-size Standard_B2s \
-            --node-osdisk-size 30 \
+            ${{ env.cost_reduction }} \
             --generate-ssh-keys
 
-          # Get the name of the node pool that we will delete afterwards
-          echo ::set-output name=nodepool_to_delete::$(az aks nodepool list --cluster-name ${{ env.name }} -g ${{ env.name }} -o json | jq -r '.[0].name')
+          # Get name of initial system node pool
+          nodepool_to_delete=$(az aks nodepool list --resource-group ${{ env.name }} --cluster-name ${{ env.name }} --output tsv --query "[0].name")
 
-          # Create a node pool with the taint 'node.cilium.io/agent-not-ready=true:NoSchedule'
-          # and with 'mode=system' as it it the same mode used for the nodepool
-          # created with the cluster.
+          # Create system node pool tainted with `CriticalAddonsOnly=true:NoSchedule`
           az aks nodepool add \
-            --name nodepool2 \
+            --resource-group ${{ env.name }} \
             --cluster-name ${{ env.name }} \
+            --name systempool \
+            --mode system \
+            --node-count 1 \
+            --node-taints "CriticalAddonsOnly=true:NoSchedule" \
+            ${{ env.cost_reduction }} \
+            --no-wait
+
+          # Create user node pool tainted with `node.cilium.io/agent-not-ready=true:NoSchedule`
+          az aks nodepool add \
             --resource-group ${{ env.name }} \
+            --cluster-name ${{ env.name }} \
+            --name userpool \
+            --mode user \
             --node-count 2 \
-            --node-vm-size Standard_B2s \
-            --node-osdisk-size 30 \
-            --mode system \
-            --node-taints node.cilium.io/agent-not-ready=true:NoSchedule
+            --node-taints "node.cilium.io/agent-not-ready=true:NoSchedule" \
+            ${{ env.cost_reduction }} \
+            --no-wait
+
+          # Delete the initial system node pool
+          az aks nodepool delete \
+            --resource-group ${{ env.name }} \
+            --cluster-name ${{ env.name }} \
+            --name "${nodepool_to_delete}"
 
       - name: Get cluster credentials
         run: |
@@ -180,17 +189,6 @@ jobs:
           exit ${EXIT_CODE}
         shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
 
-      - name: Delete the first node pool
-        run: |
-          # We can only delete the first node pool after Cilium is installed
-          # because some pods have Pod Disruption Budgets set. If we try to
-          # delete the first node pool without the second node pool being ready,
-          # AKS will not succeed with the pool deletion because some Deployments
-          # can't cease to exist in the cluster.
-          az aks nodepool delete --name ${{ steps.cluster-creation.outputs.nodepool_to_delete }} \
-            --cluster-name ${{ env.name }} \
-            --resource-group ${{ env.name }}
-
       - name: Load test script in configmap
         run: |
           kubectl create configmap cilium-cli-test-script -n kube-system --from-file=in-cluster-test-script.sh=.github/in-cluster-test-scripts/aks.sh