From 66819791153db99e99c47462724b9c5e79a8f007 Mon Sep 17 00:00:00 2001 From: Knative Prow Robot Date: Sun, 13 Feb 2022 09:58:20 -0800 Subject: [PATCH] Address 503s when the autoscaler is being rolled (#12621) The activator's readiness depends on the status of web socket connection to the autoscaler. When the connection is down the activator will report ready=false. This can occur when the autoscaler deployment is updating. PR #12614 made the activator's readiness probe fail aggressively after a single failure. This didn't seem to impact istio but with contour it started returning 503s since the activator started to report ready=false immediately. This PR does two things to mitigate 503s: - bump the readiness threshold to give the autoscaler more time to rollout/startup. This still remains lower than the drain duration - Update the autoscaler rollout strategy so we spin up a new instance prior to bring down the older one. This is done using maxUnavailable=0 Co-authored-by: dprotaso --- config/core/deployments/activator.yaml | 2 +- config/core/deployments/autoscaler.yaml | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/config/core/deployments/activator.yaml b/config/core/deployments/activator.yaml index 2376144320ec..2d369ab10e70 100644 --- a/config/core/deployments/activator.yaml +++ b/config/core/deployments/activator.yaml @@ -105,7 +105,7 @@ spec: - name: k-kubelet-probe value: "activator" periodSeconds: 5 - failureThreshold: 1 + failureThreshold: 5 livenessProbe: httpGet: port: 8012 diff --git a/config/core/deployments/autoscaler.yaml b/config/core/deployments/autoscaler.yaml index 8565ce957ba4..df1ee6f46983 100644 --- a/config/core/deployments/autoscaler.yaml +++ b/config/core/deployments/autoscaler.yaml @@ -27,6 +27,10 @@ spec: selector: matchLabels: app: autoscaler + strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 0 template: metadata: annotations: