Skip to content

Commit

Permalink
Automatic switch to emergency mode when metrics unavailable (#424)
Browse files Browse the repository at this point in the history
* upgrade kubebuilder to plugin/v4

* add test utils

* fix controller test

* fix gha test

* chmod tortoisectl test

* edit tortoisectl

* fix lint

* fix lint

* add lint-fix to ci

* go mod tidy

* add make dependencies

* remove lint-fix

* upgrade tools

* lint-fix

* add tool chain version

* change toolchain to 1.22

* add timeout

* remove lint-fix

* edit licenses

* remove chmod

* automatic emergency mode trigger when kube metrics unavailable for hpa

* add return statement

* clean up code

* clean up code

* add hpa test and try to fix controller test

* fix old controller tests

* add controller test and fix checkHPAStatus function

* clean up code

* remove autoemergency phase and use emergency instead

* fix lint

* refactor tortoisephase change into tortoise service and write unit tests

* fix lint

* fix lint

* fix review comments

* fix nits

* fix nits

* fix nits

* add back to normal test for automatic emergency mode

* Update internal/controller/tortoise_controller_test.go

Co-authored-by: Kensei Nakada <handbomusic@gmail.com>

* fix test name

---------

Co-authored-by: Kensei Nakada <handbomusic@gmail.com>
  • Loading branch information
randytqwjp and sanposhiho authored Jan 17, 2025
1 parent 310df59 commit 860456d
Show file tree
Hide file tree
Showing 73 changed files with 2,787 additions and 120 deletions.
19 changes: 19 additions & 0 deletions internal/controller/testdata/deletion-no-delete/before/hpa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ metadata:
tortoise.autoscaling.mercari.com/managed-by-tortoise: "true"
name: tortoise-hpa-mercari
namespace: default
status:
conditions:
- status: "True"
type: AbleToScale
message: "recommended size matches current size"
- status: "True"
type: ScalingActive
message: "the HPA was able to compute the replica count"
currentMetrics:
- containerResource:
container: app
name: cpu
current:
value: 3
- containerResource:
container: istio-proxy
name: cpu
current:
value: 3
spec:
behavior:
scaleDown:
Expand Down
19 changes: 19 additions & 0 deletions internal/controller/testdata/deletion-policy-all/before/hpa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ metadata:
tortoise.autoscaling.mercari.com/managed-by-tortoise: "true"
name: tortoise-hpa-mercari
namespace: default
status:
conditions:
- status: "True"
type: AbleToScale
message: "recommended size matches current size"
- status: "True"
type: ScalingActive
message: "the HPA was able to compute the replica count"
currentMetrics:
- containerResource:
container: app
name: cpu
current:
value: 3
- containerResource:
container: istio-proxy
name: cpu
current:
value: 3
spec:
behavior:
scaleDown:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,21 @@ spec:
kind: Deployment
name: mercari-app
status:
currentMetrics: null
desiredReplicas: 0
conditions:
- status: "True"
type: AbleToScale
message: "recommended size matches current size"
- status: "True"
type: ScalingActive
message: "the HPA was able to compute the replica count"
currentMetrics:
- containerResource:
container: app
name: cpu
current:
value: 3
- containerResource:
container: istio-proxy
name: cpu
current:
value: 3
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ metadata:
tortoise.autoscaling.mercari.com/managed-by-tortoise: "true"
name: tortoise-hpa-mercari
namespace: default
status:
conditions:
- status: "True"
type: AbleToScale
message: "recommended size matches current size"
- status: "True"
type: ScalingActive
message: "the HPA was able to compute the replica count"
currentMetrics:
- containerResource:
container: app
name: cpu
current:
value: 3
- containerResource:
container: istio-proxy
name: cpu
current:
value: 3
spec:
behavior:
scaleDown:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ spec:
strategy: {}
template:
metadata:
annotations:
kubectl.kubernetes.io/restartedAt: "2023-01-01T00:00:00Z"
annotations: null
creationTimestamp: null
labels:
app: mercari
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,110 +20,97 @@ status:
cpu: Vertical
memory: Vertical
conditions:
containerResourceRequests:
- containerName: app
resource:
cpu: "10"
memory: 10Gi
- containerName: istio-proxy
resource:
cpu: "4"
memory: 4Gi
tortoiseConditions:
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
status: "False"
type: FailedToReconcile
containerRecommendationFromVPA:
- containerName: app
maxRecommendation:
cpu:
quantity: "3"
updatedAt: "2023-01-01T00:00:00Z"
quantity: "10"
updatedAt: null
memory:
quantity: 3Gi
updatedAt: "2023-01-01T00:00:00Z"
quantity: 10Gi
updatedAt: null
recommendation:
cpu:
quantity: "3"
updatedAt: "2023-01-01T00:00:00Z"
quantity: "10"
updatedAt: null
memory:
quantity: 3Gi
updatedAt: "2023-01-01T00:00:00Z"
quantity: 10Gi
updatedAt: null
- containerName: istio-proxy
maxRecommendation:
cpu:
quantity: "3"
updatedAt: "2023-01-01T00:00:00Z"
quantity: "4"
updatedAt: null
memory:
quantity: 3Gi
updatedAt: "2023-01-01T00:00:00Z"
quantity: 4Gi
updatedAt: null
recommendation:
cpu:
quantity: "3"
updatedAt: "2023-01-01T00:00:00Z"
quantity: "4"
updatedAt: null
memory:
quantity: 3Gi
updatedAt: "2023-01-01T00:00:00Z"
containerResourceRequests:
- containerName: app
resource:
cpu: "10"
memory: 3Gi
- containerName: istio-proxy
resource:
cpu: "3"
memory: 3Gi
tortoiseConditions:
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: the current number of replicas is not bigger than the preferred max
replica number
reason: ScaledUpBasedOnPreferredMaxReplicas
status: "False"
type: ScaledUpBasedOnPreferredMaxReplicas
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: The recommendation is provided
status: "True"
type: VerticalRecommendationUpdated
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
status: "False"
type: FailedToReconcile
containerResourcePhases:
- containerName: app
resourcePhases:
cpu:
lastTransitionTime: "2023-01-01T00:00:00Z"
phase: GatheringData
memory:
lastTransitionTime: "2023-01-01T00:00:00Z"
phase: Working
- containerName: istio-proxy
resourcePhases:
cpu:
lastTransitionTime: "2023-01-01T00:00:00Z"
phase: Working
memory:
lastTransitionTime: "2023-01-01T00:00:00Z"
phase: Working
quantity: 4Gi
updatedAt: null
recommendations:
horizontal:
maxReplicas:
- from: 0
timezone: Local
to: 24
updatedAt: "2023-01-01T00:00:00Z"
updatedAt: "2023-10-06T01:15:47Z"
value: 20
minReplicas:
- from: 0
timezone: Local
to: 24
updatedAt: "2023-01-01T00:00:00Z"
updatedAt: "2023-10-06T01:15:47Z"
value: 5
targetUtilizations:
- containerName: app
targetUtilization:
cpu: 70
targetUtilization: {}
- containerName: istio-proxy
targetUtilization: {}
vertical:
containerResourceRecommendation:
- RecommendedResource:
cpu: "10"
memory: 3Gi
memory: 10Gi
containerName: app
- RecommendedResource:
cpu: "3"
memory: 3Gi
cpu: "4"
memory: 4Gi
containerName: istio-proxy
containerResourcePhases:
- containerName: app
resourcePhases:
cpu:
lastTransitionTime: "2023-01-01T00:00:00Z"
phase: GatheringData
memory:
lastTransitionTime: "2023-01-01T00:00:00Z"
phase: Working
- containerName: istio-proxy
resourcePhases:
cpu:
lastTransitionTime: "2023-01-01T00:00:00Z"
phase: Working
memory:
lastTransitionTime: "2023-01-01T00:00:00Z"
phase: Working
targets:
horizontalPodAutoscaler: tortoise-hpa-mercari
scaleTargetRef:
Expand All @@ -132,4 +119,4 @@ status:
verticalPodAutoscalers:
- name: tortoise-monitor-mercari
role: Monitor
tortoisePhase: Working
tortoisePhase: PartlyWorking
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,21 @@ spec:
kind: Deployment
name: mercari-app
status:
currentMetrics: null
desiredReplicas: 0
conditions:
- status: "True"
type: AbleToScale
message: "recommended size matches current size"
- status: "True"
type: ScalingActive
message: "the HPA was able to compute the replica count"
currentMetrics:
- containerResource:
container: app
name: cpu
current:
value: 3
- containerResource:
container: istio-proxy
name: cpu
current:
value: 3
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ metadata:
tortoise.autoscaling.mercari.com/managed-by-tortoise: "true"
name: tortoise-hpa-mercari
namespace: default
status:
conditions:
- status: "True"
type: AbleToScale
message: "recommended size matches current size"
- status: "True"
type: ScalingActive
message: "the HPA was able to compute the replica count"
currentMetrics:
- containerResource:
container: app
name: cpu
current:
value: 3
- containerResource:
container: istio-proxy
name: cpu
current:
value: 3
spec:
behavior:
scaleDown:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ metadata:
tortoise.autoscaling.mercari.com/managed-by-tortoise: "true"
name: tortoise-hpa-mercari
namespace: default
status:
conditions:
- status: "True"
type: AbleToScale
message: "recommended size matches current size"
- status: "True"
type: ScalingActive
message: "the HPA was able to compute the replica count"
currentMetrics:
- containerResource:
container: app
name: cpu
current:
value: 3
- containerResource:
container: istio-proxy
name: cpu
current:
value: 3
spec:
behavior:
scaleDown:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
metadata:
name: mercari-app
namespace: default
spec:
selector:
matchLabels:
app: mercari
strategy: {}
template:
metadata:
annotations:
kubectl.kubernetes.io/restartedAt: "2023-01-01T00:00:00Z"
creationTimestamp: null
labels:
app: mercari
spec:
containers:
- image: awesome-mercari-app-image
name: app
resources:
requests:
cpu: "10"
memory: 10Gi
- image: awesome-istio-proxy-image
name: istio-proxy
resources:
requests:
cpu: "4"
memory: 4Gi
status: {}
Loading

0 comments on commit 860456d

Please sign in to comment.