From aeb14a14e57047d1c1b651a73e2d62ee3379bfce Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Thu, 16 Jan 2025 12:05:01 -0800 Subject: [PATCH 01/16] feat: on-demand capacity reservation support --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 88 ++++++++- .../templates/karpenter.sh_nodeclaims.yaml | 2 +- .../templates/karpenter.sh_nodepools.yaml | 4 +- cmd/controller/main.go | 1 + hack/docs/instancetypes_gen/main.go | 14 +- hack/tools/launchtemplate_counter/main.go | 14 +- hack/validation/labels.sh | 2 +- hack/validation/requirements.sh | 2 +- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 88 ++++++++- pkg/apis/crds/karpenter.sh_nodeclaims.yaml | 2 +- pkg/apis/crds/karpenter.sh_nodepools.yaml | 4 +- pkg/apis/v1/doc.go | 3 + pkg/apis/v1/ec2nodeclass.go | 34 +++- pkg/apis/v1/ec2nodeclass_hash_test.go | 27 ++- pkg/apis/v1/ec2nodeclass_status.go | 47 ++++- .../v1/ec2nodeclass_validation_cel_test.go | 104 +++++++++++ pkg/apis/v1/labels.go | 18 +- pkg/apis/v1/zz_generated.deepcopy.go | 55 ++++++ pkg/aws/sdk.go | 1 + pkg/cache/unavailableofferings.go | 14 +- pkg/cloudprovider/suite_test.go | 6 +- pkg/controllers/controllers.go | 7 +- pkg/controllers/interruption/controller.go | 3 +- .../nodeclass/capacityreservation.go | 133 ++++++++++++++ pkg/controllers/nodeclass/controller.go | 31 +++- pkg/controllers/nodeclass/readiness_test.go | 2 +- pkg/controllers/nodeclass/suite_test.go | 5 +- pkg/operator/operator.go | 70 ++++---- pkg/providers/amifamily/resolver.go | 125 ++++++++----- pkg/providers/capacityreservation/provider.go | 102 +++++++++++ pkg/providers/capacityreservation/types.go | 105 +++++++++++ pkg/providers/instance/instance.go | 69 ++++++-- pkg/providers/instance/types.go | 47 +++-- pkg/providers/instancetype/instancetype.go | 115 ++++++------ pkg/providers/instancetype/metrics.go | 27 --- .../instancetype/offering/metrics.go | 61 +++++++ .../instancetype/offering/provider.go | 167 ++++++++++++++++++ pkg/providers/instancetype/suite_test.go | 69 +++++++- pkg/providers/instancetype/types.go | 158 ++++++++--------- .../launchtemplate/launchtemplate.go | 24 ++- pkg/providers/launchtemplate/suite_test.go | 8 +- pkg/test/environment.go | 98 +++++----- 42 files changed, 1535 insertions(+), 421 deletions(-) create mode 100644 pkg/controllers/nodeclass/capacityreservation.go create mode 100644 pkg/providers/capacityreservation/provider.go create mode 100644 pkg/providers/capacityreservation/types.go create mode 100644 pkg/providers/instancetype/offering/metrics.go create mode 100644 pkg/providers/instancetype/offering/provider.go diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index 75021e7afb46..6e179257a4d9 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -239,6 +239,39 @@ spec: x-kubernetes-validations: - message: must have only one blockDeviceMappings with rootVolume rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1 + capacityReservationSelectorTerms: + description: |- + CapacityReservationSelectorTerms is a list of capacity reservation selector terms. Each term is ORed together to + determine the set of eligible capacity reservations. + items: + properties: + id: + description: ID is the capacity reservation id in EC2 + pattern: ^cr-[0-9a-z]+$ + type: string + ownerID: + description: Owner is the owner id for the ami. + pattern: ^[0-9]{12}$ + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select capacity reservations. + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: expected at least one, got none, ['tags', 'id'] + rule: self.all(x, has(x.tags) || has(x.id)) + - message: '''id'' is mutually exclusive, cannot be set along with tags in a capacity reservation selector term' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.ownerID)))' context: description: |- Context is a Reserved field in EC2 APIs @@ -469,7 +502,7 @@ spec: - message: immutable field changed rule: self == oldSelf securityGroupSelectorTerms: - description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. + description: SecurityGroupSelectorTerms is a list of security group selector terms. The terms are ORed. items: description: |- SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. @@ -503,12 +536,12 @@ spec: rule: self.size() != 0 - message: expected at least one, got none, ['tags', 'id', 'name'] rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term' rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' - - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term' rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' subnetSelectorTerms: - description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. + description: SubnetSelectorTerms is a list of subnet selector terms. The terms are ORed. items: description: |- SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. @@ -537,7 +570,7 @@ spec: rule: self.size() != 0 - message: expected at least one, got none, ['tags', 'id'] rule: self.all(x, has(x.tags) || has(x.id)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms' + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a subnet selector term' rule: '!self.all(x, has(x.id) && has(x.tags))' tags: additionalProperties: @@ -640,6 +673,51 @@ spec: - requirements type: object type: array + capacityReservations: + items: + properties: + availabilityZone: + description: The availability zone the capacity reservation is available in. + type: string + availableInstanceCount: + description: The last known available instance count for the capacity reservation. + type: integer + endTime: + description: |- + The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter + will no longer be able to launch instances into that reservation. + format: date-time + type: string + id: + description: The id for the capacity reservation. + pattern: ^cr-[0-9a-z]+$ + type: string + instanceMatchCriteria: + description: Indicates the type of instance launches the capacity reservation accepts. + enum: + - open + - targeted + type: string + instanceType: + description: The instance type for the capacity reservation. + type: string + ownerID: + description: The ID of the AWS account that owns the capacity reservation. + pattern: ^[0-9]{12}$ + type: string + totalInstanceCount: + description: The total instance count for the capacity reservation. + type: integer + required: + - availabilityZone + - availableInstanceCount + - id + - instanceMatchCriteria + - instanceType + - ownerID + - totalInstanceCount + type: object + type: array conditions: description: Conditions contains signals for health and readiness items: diff --git a/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml b/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml index e0a0184f30d7..5b9b22b4932c 100644 --- a/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml +++ b/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml @@ -137,7 +137,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml b/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml index dac45d671119..59ddb62b12b1 100644 --- a/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml +++ b/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml @@ -210,7 +210,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: |- @@ -283,7 +283,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 52075ecd984b..ea7df8a5fd6e 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -69,6 +69,7 @@ func main() { op.LaunchTemplateProvider, op.VersionProvider, op.InstanceTypesProvider, + op.CapacityReservationProvider, )...). Start(ctx) } diff --git a/hack/docs/instancetypes_gen/main.go b/hack/docs/instancetypes_gen/main.go index e22064fb43aa..ae79769ee64f 100644 --- a/hack/docs/instancetypes_gen/main.go +++ b/hack/docs/instancetypes_gen/main.go @@ -134,15 +134,15 @@ below are the resources available with some assumptions and after the instance o cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, + pricing.NewDefaultProvider( + ctx, + pricing.NewAPI(cfg), + ec2api, + cfg.Region, + ), + awscache.NewUnavailableOfferings(), instancetype.NewDefaultResolver( region, - pricing.NewDefaultProvider( - ctx, - pricing.NewAPI(cfg), - ec2api, - cfg.Region, - ), - awscache.NewUnavailableOfferings(), ), ) if err = instanceTypeProvider.UpdateInstanceTypes(ctx); err != nil { diff --git a/hack/tools/launchtemplate_counter/main.go b/hack/tools/launchtemplate_counter/main.go index 2ea169ef2dd4..ef56f53e08ca 100644 --- a/hack/tools/launchtemplate_counter/main.go +++ b/hack/tools/launchtemplate_counter/main.go @@ -61,15 +61,15 @@ func main() { cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, + pricing.NewDefaultProvider( + ctx, + pricing.NewAPI(cfg), + ec2api, + cfg.Region, + ), + awscache.NewUnavailableOfferings(), instancetype.NewDefaultResolver( region, - pricing.NewDefaultProvider( - ctx, - pricing.NewAPI(cfg), - ec2api, - cfg.Region, - ), - awscache.NewUnavailableOfferings(), ), ) if err := instanceTypeProvider.UpdateInstanceTypes(ctx); err != nil { diff --git a/hack/validation/labels.sh b/hack/validation/labels.sh index 23aa02d7ee4c..d4d9b0c2c93e 100755 --- a/hack/validation/labels.sh +++ b/hack/validation/labels.sh @@ -2,7 +2,7 @@ function injectDomainLabelRestrictions() { domain=$1 - rule="self.all(x, x in [\"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"${domain}\"))" + rule="self.all(x, x in [\"${domain}/capacity-reservation-id\", \"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"${domain}\"))" message="label domain \"${domain}\" is restricted" MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.metadata.properties.labels.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml } diff --git a/hack/validation/requirements.sh b/hack/validation/requirements.sh index ccc70e2575fe..f4dc91867a09 100755 --- a/hack/validation/requirements.sh +++ b/hack/validation/requirements.sh @@ -2,7 +2,7 @@ function injectDomainRequirementRestrictions() { domain=$1 - rule="self in [\"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"${domain}\")" + rule="self in [\"${domain}/capacity-reservation-id\", \"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"${domain}\")" message="label domain \"${domain}\" is restricted" MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 3b915b075962..152dd1ac92e9 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -236,6 +236,39 @@ spec: x-kubernetes-validations: - message: must have only one blockDeviceMappings with rootVolume rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1 + capacityReservationSelectorTerms: + description: |- + CapacityReservationSelectorTerms is a list of capacity reservation selector terms. Each term is ORed together to + determine the set of eligible capacity reservations. + items: + properties: + id: + description: ID is the capacity reservation id in EC2 + pattern: ^cr-[0-9a-z]+$ + type: string + ownerID: + description: Owner is the owner id for the ami. + pattern: ^[0-9]{12}$ + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select capacity reservations. + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: expected at least one, got none, ['tags', 'id'] + rule: self.all(x, has(x.tags) || has(x.id)) + - message: '''id'' is mutually exclusive, cannot be set along with tags in a capacity reservation selector term' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.ownerID)))' context: description: |- Context is a Reserved field in EC2 APIs @@ -466,7 +499,7 @@ spec: - message: immutable field changed rule: self == oldSelf securityGroupSelectorTerms: - description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. + description: SecurityGroupSelectorTerms is a list of security group selector terms. The terms are ORed. items: description: |- SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. @@ -500,12 +533,12 @@ spec: rule: self.size() != 0 - message: expected at least one, got none, ['tags', 'id', 'name'] rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term' rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' - - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term' rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' subnetSelectorTerms: - description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. + description: SubnetSelectorTerms is a list of subnet selector terms. The terms are ORed. items: description: |- SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. @@ -534,7 +567,7 @@ spec: rule: self.size() != 0 - message: expected at least one, got none, ['tags', 'id'] rule: self.all(x, has(x.tags) || has(x.id)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms' + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a subnet selector term' rule: '!self.all(x, has(x.id) && has(x.tags))' tags: additionalProperties: @@ -637,6 +670,51 @@ spec: - requirements type: object type: array + capacityReservations: + items: + properties: + availabilityZone: + description: The availability zone the capacity reservation is available in. + type: string + availableInstanceCount: + description: The last known available instance count for the capacity reservation. + type: integer + endTime: + description: |- + The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter + will no longer be able to launch instances into that reservation. + format: date-time + type: string + id: + description: The id for the capacity reservation. + pattern: ^cr-[0-9a-z]+$ + type: string + instanceMatchCriteria: + description: Indicates the type of instance launches the capacity reservation accepts. + enum: + - open + - targeted + type: string + instanceType: + description: The instance type for the capacity reservation. + type: string + ownerID: + description: The ID of the AWS account that owns the capacity reservation. + pattern: ^[0-9]{12}$ + type: string + totalInstanceCount: + description: The total instance count for the capacity reservation. + type: integer + required: + - availabilityZone + - availableInstanceCount + - id + - instanceMatchCriteria + - instanceType + - ownerID + - totalInstanceCount + type: object + type: array conditions: description: Conditions contains signals for health and readiness items: diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index e255d9c894fc..cf065565c848 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -134,7 +134,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 155bd626c067..6fc612d83918 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -207,7 +207,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: |- @@ -280,7 +280,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/apis/v1/doc.go b/pkg/apis/v1/doc.go index 44692b28c362..22a855b23ba2 100644 --- a/pkg/apis/v1/doc.go +++ b/pkg/apis/v1/doc.go @@ -22,6 +22,7 @@ import ( corev1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/karpenter/pkg/cloudprovider" "github.com/aws/karpenter-provider-aws/pkg/apis" ) @@ -33,4 +34,6 @@ func init() { &EC2NodeClass{}, &EC2NodeClassList{}, ) + + cloudprovider.ReservationIDLabel = LabelCapacityReservationID } diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index 8d61dc78d156..fdc76e33458f 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -28,21 +28,28 @@ import ( // EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. // This will contain configuration necessary to launch instances in AWS. type EC2NodeClassSpec struct { - // SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. + // SubnetSelectorTerms is a list of subnet selector terms. The terms are ORed. // +kubebuilder:validation:XValidation:message="subnetSelectorTerms cannot be empty",rule="self.size() != 0" // +kubebuilder:validation:XValidation:message="expected at least one, got none, ['tags', 'id']",rule="self.all(x, has(x.tags) || has(x.id))" - // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms",rule="!self.all(x, has(x.id) && has(x.tags))" + // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in a subnet selector term",rule="!self.all(x, has(x.id) && has(x.tags))" // +kubebuilder:validation:MaxItems:=30 // +required SubnetSelectorTerms []SubnetSelectorTerm `json:"subnetSelectorTerms" hash:"ignore"` - // SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. + // SecurityGroupSelectorTerms is a list of security group selector terms. The terms are ORed. // +kubebuilder:validation:XValidation:message="securityGroupSelectorTerms cannot be empty",rule="self.size() != 0" // +kubebuilder:validation:XValidation:message="expected at least one, got none, ['tags', 'id', 'name']",rule="self.all(x, has(x.tags) || has(x.id) || has(x.name))" - // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms",rule="!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))" - // +kubebuilder:validation:XValidation:message="'name' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms",rule="!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))" + // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term",rule="!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))" + // +kubebuilder:validation:XValidation:message="'name' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term",rule="!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))" // +kubebuilder:validation:MaxItems:=30 // +required SecurityGroupSelectorTerms []SecurityGroupSelectorTerm `json:"securityGroupSelectorTerms" hash:"ignore"` + // CapacityReservationSelectorTerms is a list of capacity reservation selector terms. Each term is ORed together to + // determine the set of eligible capacity reservations. + // +kubebuilder:validation:XValidation:message="expected at least one, got none, ['tags', 'id']",rule="self.all(x, has(x.tags) || has(x.id))" + // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set along with tags in a capacity reservation selector term",rule="!self.all(x, has(x.id) && (has(x.tags) || has(x.ownerID)))" + // +kubebuilder:validation:MaxItems:=30 + // +optional + CapacityReservationSelectorTerms []CapacityReservationSelectorTerm `json:"capacityReservationSelectorTerms" hash:"ignore"` // AssociatePublicIPAddress controls if public IP addresses are assigned to instances that are launched with the nodeclass. // +optional AssociatePublicIPAddress *bool `json:"associatePublicIPAddress,omitempty"` @@ -169,6 +176,23 @@ type SecurityGroupSelectorTerm struct { Name string `json:"name,omitempty"` } +type CapacityReservationSelectorTerm struct { + // Tags is a map of key/value tags used to select capacity reservations. + // Specifying '*' for a value selects all values for a given tag key. + // +kubebuilder:validation:XValidation:message="empty tag keys or values aren't supported",rule="self.all(k, k != '' && self[k] != '')" + // +kubebuilder:validation:MaxProperties:=20 + // +optional + Tags map[string]string `json:"tags,omitempty"` + // ID is the capacity reservation id in EC2 + // +kubebuilder:validation:Pattern:="^cr-[0-9a-z]+$" + // +optional + ID string `json:"id,omitempty"` + // Owner is the owner id for the ami. + // +kubebuilder:validation:Pattern:="^[0-9]{12}$" + // +optional + OwnerID string `json:"ownerID,omitempty"` +} + // AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. // If multiple fields are used for selection, the requirements are ANDed. type AMISelectorTerm struct { diff --git a/pkg/apis/v1/ec2nodeclass_hash_test.go b/pkg/apis/v1/ec2nodeclass_hash_test.go index d88d561c2b34..a523cfd8189b 100644 --- a/pkg/apis/v1/ec2nodeclass_hash_test.go +++ b/pkg/apis/v1/ec2nodeclass_hash_test.go @@ -184,21 +184,18 @@ var _ = Describe("Hash", func() { hash := nodeClass.Hash() // Update a behavior/dynamic field - nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{ - { - Tags: map[string]string{"subnet-test-key": "subnet-test-value"}, - }, - } - nodeClass.Spec.SecurityGroupSelectorTerms = []v1.SecurityGroupSelectorTerm{ - { - Tags: map[string]string{"sg-test-key": "sg-test-value"}, - }, - } - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ - { - Tags: map[string]string{"ami-test-key": "ami-test-value"}, - }, - } + nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ + Tags: map[string]string{"subnet-test-key": "subnet-test-value"}, + }} + nodeClass.Spec.SecurityGroupSelectorTerms = []v1.SecurityGroupSelectorTerm{{ + Tags: map[string]string{"sg-test-key": "sg-test-value"}, + }} + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ + Tags: map[string]string{"ami-test-key": "ami-test-value"}, + }} + nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ + Tags: map[string]string{"cr-test-key": "cr-test-value"}, + }} updatedHash := nodeClass.Hash() Expect(hash).To(Equal(updatedHash)) }) diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index 4c210ef81789..532fd48f4341 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -17,14 +17,16 @@ package v1 import ( "github.com/awslabs/operatorpkg/status" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( - ConditionTypeSubnetsReady = "SubnetsReady" - ConditionTypeSecurityGroupsReady = "SecurityGroupsReady" - ConditionTypeAMIsReady = "AMIsReady" - ConditionTypeInstanceProfileReady = "InstanceProfileReady" - ConditionTypeValidationSucceeded = "ValidationSucceeded" + ConditionTypeSubnetsReady = "SubnetsReady" + ConditionTypeSecurityGroupsReady = "SecurityGroupsReady" + ConditionTypeAMIsReady = "AMIsReady" + ConditionTypeInstanceProfileReady = "InstanceProfileReady" + ConditionTypeCapacityReservationsReady = "CapacityReservationsReady" + ConditionTypeValidationSucceeded = "ValidationSucceeded" ) // Subnet contains resolved Subnet selector values utilized for node launch @@ -66,6 +68,37 @@ type AMI struct { Requirements []corev1.NodeSelectorRequirement `json:"requirements"` } +type CapacityReservation struct { + // The availability zone the capacity reservation is available in. + // +required + AvailabilityZone string `json:"availabilityZone"` + // The last known available instance count for the capacity reservation. + // +required + AvailableInstanceCount int `json:"availableInstanceCount" hash:"ignore"` + // The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter + // will no longer be able to launch instances into that reservation. + // +optional + EndTime *metav1.Time `json:"endTime,omitempty" hash:"ignore"` + // The id for the capacity reservation. + // +kubebuilder:validation:Pattern:="^cr-[0-9a-z]+$" + // +required + ID string `json:"id"` + // Indicates the type of instance launches the capacity reservation accepts. + // +kubebuilder:validation:Enum:={open,targeted} + // +required + InstanceMatchCriteria string `json:"instanceMatchCriteria"` + // The instance type for the capacity reservation. + // +required + InstanceType string `json:"instanceType"` + // The ID of the AWS account that owns the capacity reservation. + // +kubebuilder:validation:Pattern:="^[0-9]{12}$" + // +required + OwnerID string `json:"ownerID"` + // The total instance count for the capacity reservation. + // +required + TotalInstanceCount int `json:"totalInstanceCount" hash:"ignore"` +} + // EC2NodeClassStatus contains the resolved state of the EC2NodeClass type EC2NodeClassStatus struct { // Subnets contains the current subnet values that are available to the @@ -75,7 +108,8 @@ type EC2NodeClassStatus struct { // SecurityGroups contains the current security group values that are available to the // cluster under the SecurityGroups selectors. // +optional - SecurityGroups []SecurityGroup `json:"securityGroups,omitempty"` + SecurityGroups []SecurityGroup `json:"securityGroups,omitempty"` + CapacityReservations []CapacityReservation `json:"capacityReservations,omitempty"` // AMI contains the current AMI values that are available to the // cluster under the AMI selectors. // +optional @@ -94,6 +128,7 @@ func (in *EC2NodeClass) StatusConditions() status.ConditionSet { ConditionTypeSubnetsReady, ConditionTypeSecurityGroupsReady, ConditionTypeInstanceProfileReady, + ConditionTypeCapacityReservationsReady, ConditionTypeValidationSucceeded, ).For(in) } diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index 7685f1546118..898ba9dc7ad1 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -435,6 +435,110 @@ var _ = Describe("CEL/Validation", func() { Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) }) + Context("CapacityReservationSelectorTerms", func() { + It("should succeed with a valid capacity reservation selector on tags", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should succeed with a valid capacity reservation selector on id", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + ID: "cr-12345749", + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should fail with a capacity reservation selector on a malformed id", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + ID: "r-12345749", + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should succeed when capacity group selector terms is set to nil", func() { + nc.Spec.CapacityReservationSelectorTerms = nil + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should fail when a capacity reservation selector term has no values", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{}} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when a capacity reservation selector term has no tag map values", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{}, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when a capacity reservation selector term has a tag map key that is empty", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{ + "test": "", + }, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when a capacity reservation selector term has a tag map value that is empty", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{ + "": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when the last capacity reservation selector is invalid", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{ + { + Tags: map[string]string{ + "test": "testvalue", + }, + }, + { + Tags: map[string]string{ + "test2": "testvalue2", + }, + }, + { + Tags: map[string]string{ + "test3": "testvalue3", + }, + }, + { + Tags: map[string]string{ + "": "testvalue4", + }, + }, + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when specifying id with tags in a single term", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + ID: "cr-12345749", + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should succeed for a valid ownerID", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "012345678901", + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should fail when the ownerID is malformed", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "01234567890", // OwnerID must be 12 digits, this is 11 + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + }) Context("AMISelectorTerms", func() { It("should succeed with a valid ami selector on alias", func() { nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ diff --git a/pkg/apis/v1/labels.go b/pkg/apis/v1/labels.go index 7090f623c9ab..1783be8cbda8 100644 --- a/pkg/apis/v1/labels.go +++ b/pkg/apis/v1/labels.go @@ -29,6 +29,7 @@ import ( func init() { karpv1.RestrictedLabelDomains = karpv1.RestrictedLabelDomains.Insert(RestrictedLabelDomains...) karpv1.WellKnownLabels = karpv1.WellKnownLabels.Insert( + LabelCapacityReservationID, LabelInstanceHypervisor, LabelInstanceEncryptionInTransitSupported, LabelInstanceCategory, @@ -97,10 +98,7 @@ var ( ResourcePrivateIPv4Address corev1.ResourceName = "vpc.amazonaws.com/PrivateIPv4Address" ResourceEFA corev1.ResourceName = "vpc.amazonaws.com/efa" - LabelNodeClass = apis.Group + "/ec2nodeclass" - - LabelTopologyZoneID = "topology.k8s.aws/zone-id" - + LabelCapacityReservationID = apis.Group + "/capacity-reservation-id" LabelInstanceHypervisor = apis.Group + "/instance-hypervisor" LabelInstanceEncryptionInTransitSupported = apis.Group + "/instance-encryption-in-transit-supported" LabelInstanceCategory = apis.Group + "/instance-category" @@ -121,10 +119,14 @@ var ( LabelInstanceAcceleratorName = apis.Group + "/instance-accelerator-name" LabelInstanceAcceleratorManufacturer = apis.Group + "/instance-accelerator-manufacturer" LabelInstanceAcceleratorCount = apis.Group + "/instance-accelerator-count" - AnnotationEC2NodeClassHash = apis.Group + "/ec2nodeclass-hash" - AnnotationClusterNameTaggedCompatability = apis.CompatibilityGroup + "/cluster-name-tagged" - AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version" - AnnotationInstanceTagged = apis.Group + "/tagged" + LabelNodeClass = apis.Group + "/ec2nodeclass" + + LabelTopologyZoneID = "topology.k8s.aws/zone-id" + + AnnotationEC2NodeClassHash = apis.Group + "/ec2nodeclass-hash" + AnnotationClusterNameTaggedCompatability = apis.CompatibilityGroup + "/cluster-name-tagged" + AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version" + AnnotationInstanceTagged = apis.Group + "/tagged" NodeClaimTagKey = coreapis.Group + "/nodeclaim" NameTagKey = "Name" diff --git a/pkg/apis/v1/zz_generated.deepcopy.go b/pkg/apis/v1/zz_generated.deepcopy.go index 802b4929776c..344bb1c23917 100644 --- a/pkg/apis/v1/zz_generated.deepcopy.go +++ b/pkg/apis/v1/zz_generated.deepcopy.go @@ -164,6 +164,47 @@ func (in *BlockDeviceMapping) DeepCopy() *BlockDeviceMapping { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CapacityReservation) DeepCopyInto(out *CapacityReservation) { + *out = *in + if in.EndTime != nil { + in, out := &in.EndTime, &out.EndTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservation. +func (in *CapacityReservation) DeepCopy() *CapacityReservation { + if in == nil { + return nil + } + out := new(CapacityReservation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CapacityReservationSelectorTerm) DeepCopyInto(out *CapacityReservationSelectorTerm) { + *out = *in + if in.Tags != nil { + in, out := &in.Tags, &out.Tags + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservationSelectorTerm. +func (in *CapacityReservationSelectorTerm) DeepCopy() *CapacityReservationSelectorTerm { + if in == nil { + return nil + } + out := new(CapacityReservationSelectorTerm) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EC2NodeClass) DeepCopyInto(out *EC2NodeClass) { *out = *in @@ -240,6 +281,13 @@ func (in *EC2NodeClassSpec) DeepCopyInto(out *EC2NodeClassSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.CapacityReservationSelectorTerms != nil { + in, out := &in.CapacityReservationSelectorTerms, &out.CapacityReservationSelectorTerms + *out = make([]CapacityReservationSelectorTerm, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.AssociatePublicIPAddress != nil { in, out := &in.AssociatePublicIPAddress, &out.AssociatePublicIPAddress *out = new(bool) @@ -335,6 +383,13 @@ func (in *EC2NodeClassStatus) DeepCopyInto(out *EC2NodeClassStatus) { *out = make([]SecurityGroup, len(*in)) copy(*out, *in) } + if in.CapacityReservations != nil { + in, out := &in.CapacityReservations, &out.CapacityReservations + *out = make([]CapacityReservation, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.AMIs != nil { in, out := &in.AMIs, &out.AMIs *out = make([]AMI, len(*in)) diff --git a/pkg/aws/sdk.go b/pkg/aws/sdk.go index b6449125714b..b807b7922761 100644 --- a/pkg/aws/sdk.go +++ b/pkg/aws/sdk.go @@ -27,6 +27,7 @@ import ( ) type EC2API interface { + DescribeCapacityReservations(context.Context, *ec2.DescribeCapacityReservationsInput, ...func(*ec2.Options)) (*ec2.DescribeCapacityReservationsOutput, error) DescribeImages(context.Context, *ec2.DescribeImagesInput, ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) DescribeLaunchTemplates(context.Context, *ec2.DescribeLaunchTemplatesInput, ...func(*ec2.Options)) (*ec2.DescribeLaunchTemplatesOutput, error) DescribeSubnets(context.Context, *ec2.DescribeSubnetsInput, ...func(*ec2.Options)) (*ec2.DescribeSubnetsOutput, error) diff --git a/pkg/cache/unavailableofferings.go b/pkg/cache/unavailableofferings.go index fc78412dfd75..8efd3a2e5c42 100644 --- a/pkg/cache/unavailableofferings.go +++ b/pkg/cache/unavailableofferings.go @@ -48,13 +48,17 @@ func NewUnavailableOfferings() *UnavailableOfferings { } // IsUnavailable returns true if the offering appears in the cache -func (u *UnavailableOfferings) IsUnavailable(instanceType ec2types.InstanceType, zone, capacityType string) bool { +func (u *UnavailableOfferings) IsUnavailable(instanceType string, zone, capacityType string) bool { _, found := u.cache.Get(u.key(instanceType, zone, capacityType)) return found } +func (u *UnavailableOfferings) IsReservationUnavailable(reservationID string) bool { + return false +} + // MarkUnavailable communicates recently observed temporary capacity shortages in the provided offerings -func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason string, instanceType ec2types.InstanceType, zone, capacityType string) { +func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason, instanceType, zone, capacityType string) { // even if the key is already in the cache, we still need to call Set to extend the cached entry's TTL log.FromContext(ctx).WithValues( "reason", unavailableReason, @@ -69,10 +73,10 @@ func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableR func (u *UnavailableOfferings) MarkUnavailableForFleetErr(ctx context.Context, fleetErr ec2types.CreateFleetError, capacityType string) { instanceType := fleetErr.LaunchTemplateAndOverrides.Overrides.InstanceType zone := aws.ToString(fleetErr.LaunchTemplateAndOverrides.Overrides.AvailabilityZone) - u.MarkUnavailable(ctx, lo.FromPtr(fleetErr.ErrorCode), instanceType, zone, capacityType) + u.MarkUnavailable(ctx, lo.FromPtr(fleetErr.ErrorCode), string(instanceType), zone, capacityType) } -func (u *UnavailableOfferings) Delete(instanceType ec2types.InstanceType, zone string, capacityType string) { +func (u *UnavailableOfferings) Delete(instanceType, zone, capacityType string) { u.cache.Delete(u.key(instanceType, zone, capacityType)) } @@ -81,6 +85,6 @@ func (u *UnavailableOfferings) Flush() { } // key returns the cache key for all offerings in the cache -func (u *UnavailableOfferings) key(instanceType ec2types.InstanceType, zone string, capacityType string) string { +func (u *UnavailableOfferings) key(instanceType, zone, capacityType string) string { return fmt.Sprintf("%s:%s:%s", capacityType, instanceType, zone) } diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 9e055ee0f194..55fd0dc8059f 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -1158,7 +1158,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(100), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) + controller := nodeclass.NewController(awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectApplied(ctx, env.Client, nodePool, nodeClass) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{corev1.LabelTopologyZone: "test-zone-1a"}}) @@ -1175,7 +1175,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(11), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) + controller := nodeclass.NewController(awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{ MaxPods: aws.Int32(1), } @@ -1216,7 +1216,7 @@ var _ = Describe("CloudProvider", func() { }}) nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) + controller := nodeclass.NewController(awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) podSubnet1 := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet1) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index a9de8ba97fa9..2c17f7013794 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -34,6 +34,7 @@ import ( controllerspricing "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/pricing" ssminvalidation "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/ssm/invalidation" controllersversion "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/version" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/version" @@ -78,10 +79,12 @@ func NewControllers( amiProvider amifamily.Provider, launchTemplateProvider launchtemplate.Provider, versionProvider *version.DefaultProvider, - instanceTypeProvider *instancetype.DefaultProvider) []controller.Controller { + instanceTypeProvider *instancetype.DefaultProvider, + capacityReservationProvider capacityreservation.Provider, +) []controller.Controller { controllers := []controller.Controller{ nodeclasshash.NewController(kubeClient), - nodeclass.NewController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider, ec2api), + nodeclass.NewController(clk, kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider, capacityReservationProvider, ec2api), nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider), nodeclaimtagging.NewController(kubeClient, cloudProvider, instanceProvider), controllerspricing.NewController(pricingProvider), diff --git a/pkg/controllers/interruption/controller.go b/pkg/controllers/interruption/controller.go index 51899c9084e0..b5205ab57232 100644 --- a/pkg/controllers/interruption/controller.go +++ b/pkg/controllers/interruption/controller.go @@ -22,7 +22,6 @@ import ( "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/metrics" - ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" sqstypes "github.com/aws/aws-sdk-go-v2/service/sqs/types" "github.com/awslabs/operatorpkg/singleton" "go.uber.org/multierr" @@ -208,7 +207,7 @@ func (c *Controller) handleNodeClaim(ctx context.Context, msg messages.Message, zone := nodeClaim.Labels[corev1.LabelTopologyZone] instanceType := nodeClaim.Labels[corev1.LabelInstanceTypeStable] if zone != "" && instanceType != "" { - c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), ec2types.InstanceType(instanceType), zone, karpv1.CapacityTypeSpot) + c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), instanceType, zone, karpv1.CapacityTypeSpot) } } if action != NoAction { diff --git a/pkg/controllers/nodeclass/capacityreservation.go b/pkg/controllers/nodeclass/capacityreservation.go new file mode 100644 index 000000000000..dac3c238ba36 --- /dev/null +++ b/pkg/controllers/nodeclass/capacityreservation.go @@ -0,0 +1,133 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeclass + +import ( + "context" + "fmt" + "sort" + "time" + + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/awslabs/operatorpkg/singleton" + "github.com/samber/lo" + "go.uber.org/multierr" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/clock" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/karpenter/pkg/utils/pretty" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" +) + +const capacityReservationPollPeriod = time.Minute + +type CapacityReservation struct { + provider capacityreservation.Provider + clk clock.Clock + cm *pretty.ChangeMonitor +} + +func NewCapacityReservationReconciler(clk clock.Clock, provider capacityreservation.Provider) *CapacityReservation { + return &CapacityReservation{ + provider: provider, + clk: clk, + cm: pretty.NewChangeMonitor(), + } +} + +func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass) (reconcile.Result, error) { + reservations, err := c.provider.List(ctx, nc.Spec.CapacityReservationSelectorTerms...) + if err != nil { + return reconcile.Result{}, fmt.Errorf("getting capacity reservations, %w", err) + } + if len(reservations) == 0 { + nc.StatusConditions().SetTrue(v1.ConditionTypeCapacityReservationsReady) + return reconcile.Result{RequeueAfter: capacityReservationPollPeriod}, nil + } + + if ids := lo.Map(reservations, func(r *ec2types.CapacityReservation, _ int) string { + return *r.CapacityReservationId + }); c.cm.HasChanged(nc.Name, ids) { + log.FromContext(ctx).V(1).WithValues("ids", ids).Info("discovered capacity reservations") + } + sort.Slice(reservations, func(i, j int) bool { + return *reservations[i].CapacityReservationId < *reservations[j].CapacityReservationId + }) + errors := []error{} + nc.Status.CapacityReservations = []v1.CapacityReservation{} + for _, r := range reservations { + reservation, err := capacityReservationFromEC2(r) + if err != nil { + errors = append(errors, err) + continue + } + nc.Status.CapacityReservations = append(nc.Status.CapacityReservations, reservation) + } + if len(errors) != 0 { + log.FromContext(ctx).Error(multierr.Combine(errors...), "failed to update status with %d of %d capacity reservations", len(errors), len(reservations)) + } + nc.StatusConditions().SetTrue(v1.ConditionTypeCapacityReservationsReady) + return reconcile.Result{RequeueAfter: c.requeueAfter(reservations...)}, nil +} + +func capacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityReservation, error) { + if !lo.Contains([]ec2types.InstanceMatchCriteria{ + ec2types.InstanceMatchCriteriaOpen, + ec2types.InstanceMatchCriteriaTargeted, + }, cr.InstanceMatchCriteria) { + return v1.CapacityReservation{}, fmt.Errorf("capacity reservation %s has an unsupported instance match criteria %q", *cr.CapacityReservationId, cr.InstanceMatchCriteria) + } + var endTime *metav1.Time + if cr.EndDate != nil { + endTime = lo.ToPtr(metav1.NewTime(*cr.EndDate)) + } + + return v1.CapacityReservation{ + AvailabilityZone: *cr.AvailabilityZone, + AvailableInstanceCount: int(*cr.AvailableInstanceCount), + EndTime: endTime, + ID: *cr.CapacityReservationId, + InstanceMatchCriteria: string(cr.InstanceMatchCriteria), + InstanceType: *cr.InstanceType, + OwnerID: *cr.OwnerId, + TotalInstanceCount: int(*cr.TotalInstanceCount), + }, nil +} + +func (c *CapacityReservation) requeueAfter(reservations ...*ec2types.CapacityReservation) time.Duration { + var next *time.Time + for _, reservation := range reservations { + if reservation.EndDate == nil { + continue + } + if next == nil { + next = reservation.EndDate + continue + } + if next.After(*reservation.EndDate) { + next = reservation.EndDate + } + } + if next == nil { + return capacityReservationPollPeriod + } + if d := next.Sub(c.clk.Now()); d < capacityReservationPollPeriod { + return lo.Ternary(d < 0, singleton.RequeueImmediately, d) + } + return capacityReservationPollPeriod +} diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index ff56928d7f94..0afad54158d5 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -21,6 +21,7 @@ import ( "go.uber.org/multierr" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/utils/clock" "sigs.k8s.io/karpenter/pkg/operator/injection" nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" "sigs.k8s.io/karpenter/pkg/utils/result" @@ -46,6 +47,7 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" sdk "github.com/aws/karpenter-provider-aws/pkg/aws" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" @@ -61,22 +63,34 @@ type Controller struct { recorder events.Recorder launchTemplateProvider launchtemplate.Provider - ami *AMI - instanceProfile *InstanceProfile - subnet *Subnet - securityGroup *SecurityGroup - validation *Validation - readiness *Readiness //TODO : Remove this when we have sub status conditions + ami *AMI + capacityReservation *CapacityReservation + instanceProfile *InstanceProfile + subnet *Subnet + securityGroup *SecurityGroup + validation *Validation + readiness *Readiness //TODO : Remove this when we have sub status conditions } -func NewController(kubeClient client.Client, recorder events.Recorder, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, - amiProvider amifamily.Provider, instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider, ec2api sdk.EC2API) *Controller { +func NewController( + clk clock.Clock, + kubeClient client.Client, + recorder events.Recorder, + subnetProvider subnet.Provider, + securityGroupProvider securitygroup.Provider, + amiProvider amifamily.Provider, + instanceProfileProvider instanceprofile.Provider, + launchTemplateProvider launchtemplate.Provider, + capacityReservationProvider capacityreservation.Provider, + ec2api sdk.EC2API, +) *Controller { return &Controller{ kubeClient: kubeClient, recorder: recorder, launchTemplateProvider: launchTemplateProvider, ami: NewAMIReconciler(amiProvider), + capacityReservation: NewCapacityReservationReconciler(clk, capacityReservationProvider), subnet: &Subnet{subnetProvider: subnetProvider}, securityGroup: &SecurityGroup{securityGroupProvider: securityGroupProvider}, instanceProfile: &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, @@ -116,6 +130,7 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) var errs error for _, reconciler := range []nodeClassReconciler{ c.ami, + c.capacityReservation, c.subnet, c.securityGroup, c.instanceProfile, diff --git a/pkg/controllers/nodeclass/readiness_test.go b/pkg/controllers/nodeclass/readiness_test.go index fdd5f3f95010..645253d732be 100644 --- a/pkg/controllers/nodeclass/readiness_test.go +++ b/pkg/controllers/nodeclass/readiness_test.go @@ -53,7 +53,7 @@ var _ = Describe("NodeClass Status Condition Controller", func() { ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Status.Conditions).To(HaveLen(6)) + Expect(nodeClass.Status.Conditions).To(HaveLen(7)) Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsTrue()).To(BeTrue()) }) It("should update status condition as Not Ready", func() { diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 2a7e8813db9b..8ffda4b98615 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -67,12 +67,15 @@ var _ = BeforeSuite(func() { awsEnv = test.NewEnvironment(ctx, env) controller = nodeclass.NewController( - env.Client, events.NewRecorder(&record.FakeRecorder{}), + awsEnv.Clock, + env.Client, + events.NewRecorder(&record.FakeRecorder{}), awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, + awsEnv.CapacityReservationProvider, awsEnv.EC2API, ) }) diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index abbb0a56d365..398183a7ae63 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -58,6 +58,7 @@ import ( awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" @@ -77,21 +78,22 @@ func init() { // Operator is injected into the AWS CloudProvider's factories type Operator struct { *operator.Operator - Config aws.Config - UnavailableOfferingsCache *awscache.UnavailableOfferings - SSMCache *cache.Cache - SubnetProvider subnet.Provider - SecurityGroupProvider securitygroup.Provider - InstanceProfileProvider instanceprofile.Provider - AMIProvider amifamily.Provider - AMIResolver amifamily.Resolver - LaunchTemplateProvider launchtemplate.Provider - PricingProvider pricing.Provider - VersionProvider *version.DefaultProvider - InstanceTypesProvider *instancetype.DefaultProvider - InstanceProvider instance.Provider - SSMProvider ssmp.Provider - EC2API *ec2.Client + Config aws.Config + UnavailableOfferingsCache *awscache.UnavailableOfferings + SSMCache *cache.Cache + SubnetProvider subnet.Provider + SecurityGroupProvider securitygroup.Provider + InstanceProfileProvider instanceprofile.Provider + AMIProvider amifamily.Provider + AMIResolver amifamily.Resolver + LaunchTemplateProvider launchtemplate.Provider + PricingProvider pricing.Provider + VersionProvider *version.DefaultProvider + InstanceTypesProvider *instancetype.DefaultProvider + InstanceProvider instance.Provider + SSMProvider ssmp.Provider + CapacityReservationProvider capacityreservation.Provider + EC2API *ec2.Client } func NewOperator(ctx context.Context, operator *operator.Operator) (context.Context, *Operator) { @@ -177,7 +179,9 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, - instancetype.NewDefaultResolver(cfg.Region, pricingProvider, unavailableOfferingsCache), + pricingProvider, + unavailableOfferingsCache, + instancetype.NewDefaultResolver(cfg.Region), ) instanceProvider := instance.NewDefaultProvider( ctx, @@ -187,28 +191,30 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont subnetProvider, launchTemplateProvider, ) + capacityReservationProvider := capacityreservation.NewProvider(ec2api, operator.Clock, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)) // Setup field indexers on instanceID -- specifically for the interruption controller if options.FromContext(ctx).InterruptionQueue != "" { SetupIndexers(ctx, operator.Manager) } return ctx, &Operator{ - Operator: operator, - Config: cfg, - UnavailableOfferingsCache: unavailableOfferingsCache, - SSMCache: ssmCache, - SubnetProvider: subnetProvider, - SecurityGroupProvider: securityGroupProvider, - InstanceProfileProvider: instanceProfileProvider, - AMIProvider: amiProvider, - AMIResolver: amiResolver, - VersionProvider: versionProvider, - LaunchTemplateProvider: launchTemplateProvider, - PricingProvider: pricingProvider, - InstanceTypesProvider: instanceTypeProvider, - InstanceProvider: instanceProvider, - SSMProvider: ssmProvider, - EC2API: ec2api, + Operator: operator, + Config: cfg, + UnavailableOfferingsCache: unavailableOfferingsCache, + SSMCache: ssmCache, + SubnetProvider: subnetProvider, + SecurityGroupProvider: securityGroupProvider, + InstanceProfileProvider: instanceProfileProvider, + AMIProvider: amiProvider, + AMIResolver: amiResolver, + VersionProvider: versionProvider, + LaunchTemplateProvider: launchTemplateProvider, + PricingProvider: pricingProvider, + InstanceTypesProvider: instanceTypeProvider, + InstanceProvider: instanceProvider, + SSMProvider: ssmProvider, + CapacityReservationProvider: capacityReservationProvider, + EC2API: ec2api, } } diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 8566b05a4d8a..84d53eaa1f7b 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "net" + "strings" "github.com/aws/aws-sdk-go-v2/aws" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" @@ -68,14 +69,15 @@ type Options struct { // LaunchTemplate holds the dynamically generated launch template parameters type LaunchTemplate struct { *Options - UserData bootstrap.Bootstrapper - BlockDeviceMappings []*v1.BlockDeviceMapping - MetadataOptions *v1.MetadataOptions - AMIID string - InstanceTypes []*cloudprovider.InstanceType `hash:"ignore"` - DetailedMonitoring bool - EFACount int - CapacityType string + UserData bootstrap.Bootstrapper + BlockDeviceMappings []*v1.BlockDeviceMapping + MetadataOptions *v1.MetadataOptions + AMIID string + InstanceTypes []*cloudprovider.InstanceType `hash:"ignore"` + DetailedMonitoring bool + EFACount int + CapacityType string + CapacityReservationID string } // AMIFamily can be implemented to override the default logic for generating dynamic launch template parameters @@ -134,25 +136,43 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N // In order to support reserved ENIs for CNI custom networking setups, // we need to pass down the max-pods calculation to the kubelet. // This requires that we resolve a unique launch template per max-pods value. - // Similarly, instance types configured with EfAs require unique launch templates depending on the number of + // Similarly, instance types configured with EFAs require unique launch templates depending on the number of // EFAs they support. + // Reservations IDs are also included since we need to create a separate LaunchTemplate per reservation ID when + // launching reserved capacity. If it's a reserved capacity launch, we've already filtered the instance types + // further up the call stack. type launchTemplateParams struct { - efaCount int - maxPods int + efaCount int + maxPods int + reservationIDs string } - paramsToInstanceTypes := lo.GroupBy(instanceTypes, func(instanceType *cloudprovider.InstanceType) launchTemplateParams { + paramsToInstanceTypes := lo.GroupBy(instanceTypes, func(it *cloudprovider.InstanceType) launchTemplateParams { + var reservationIDs []string + if capacityType == karpv1.CapacityTypeReserved { + for i := range it.Offerings { + if it.Offerings[i].Requirements.Get(karpv1.CapacityTypeLabelKey).Any() != karpv1.CapacityTypeReserved { + continue + } + reservationIDs = append(reservationIDs, it.Offerings[i].Requirements.Get(cloudprovider.ReservationIDLabel).Any()) + } + } return launchTemplateParams{ efaCount: lo.Ternary( lo.Contains(lo.Keys(nodeClaim.Spec.Resources.Requests), v1.ResourceEFA), - int(lo.ToPtr(instanceType.Capacity[v1.ResourceEFA]).Value()), + int(lo.ToPtr(it.Capacity[v1.ResourceEFA]).Value()), 0, ), - maxPods: int(instanceType.Capacity.Pods().Value()), + maxPods: int(it.Capacity.Pods().Value()), + // If we're dealing with reserved instances, there's only going to be a single instance per group. This invariant + // is due to reservation IDs not being shared across instance types. Because of this, we don't need to worry about + // ordering in this string. + reservationIDs: strings.Join(reservationIDs, ","), } }) + for params, instanceTypes := range paramsToInstanceTypes { - resolved := r.resolveLaunchTemplate(nodeClass, nodeClaim, instanceTypes, capacityType, amiFamily, amiID, params.maxPods, params.efaCount, options) - resolvedTemplates = append(resolvedTemplates, resolved) + reservationIDs := strings.Split(params.reservationIDs, ",") + resolvedTemplates = append(resolvedTemplates, r.resolveLaunchTemplates(nodeClass, nodeClaim, instanceTypes, capacityType, amiFamily, amiID, params.maxPods, params.efaCount, reservationIDs, options)...) } } return resolvedTemplates, nil @@ -201,8 +221,18 @@ func (r DefaultResolver) defaultClusterDNS(opts *Options, kubeletConfig *v1.Kube return newKubeletConfig } -func (r DefaultResolver) resolveLaunchTemplate(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, capacityType string, - amiFamily AMIFamily, amiID string, maxPods int, efaCount int, options *Options) *LaunchTemplate { +func (r DefaultResolver) resolveLaunchTemplates( + nodeClass *v1.EC2NodeClass, + nodeClaim *karpv1.NodeClaim, + instanceTypes []*cloudprovider.InstanceType, + capacityType string, + amiFamily AMIFamily, + amiID string, + maxPods int, + efaCount int, + capacityReservationIDs []string, + options *Options, +) []*LaunchTemplate { kubeletConfig := &v1.KubeletConfiguration{} if nodeClass.Spec.Kubelet != nil { kubeletConfig = nodeClass.Spec.Kubelet.DeepCopy() @@ -222,31 +252,38 @@ func (r DefaultResolver) resolveLaunchTemplate(nodeClass *v1.EC2NodeClass, nodeC }); !found { taints = append(taints, karpv1.UnregisteredNoExecuteTaint) } - - resolved := &LaunchTemplate{ - Options: options, - UserData: amiFamily.UserData( - r.defaultClusterDNS(options, kubeletConfig), - taints, - options.Labels, - options.CABundle, - instanceTypes, - nodeClass.Spec.UserData, - options.InstanceStorePolicy, - ), - BlockDeviceMappings: nodeClass.Spec.BlockDeviceMappings, - MetadataOptions: nodeClass.Spec.MetadataOptions, - DetailedMonitoring: aws.ToBool(nodeClass.Spec.DetailedMonitoring), - AMIID: amiID, - InstanceTypes: instanceTypes, - EFACount: efaCount, - CapacityType: capacityType, - } - if len(resolved.BlockDeviceMappings) == 0 { - resolved.BlockDeviceMappings = amiFamily.DefaultBlockDeviceMappings() - } - if resolved.MetadataOptions == nil { - resolved.MetadataOptions = amiFamily.DefaultMetadataOptions() + // If no reservation IDs are provided, insert an empty string so the end result is a single launch template with no + // associated capacity reservation. + if len(capacityReservationIDs) == 0 { + capacityReservationIDs = append(capacityReservationIDs, "") } - return resolved + return lo.Map(capacityReservationIDs, func(id string, _ int) *LaunchTemplate { + resolved := &LaunchTemplate{ + Options: options, + UserData: amiFamily.UserData( + r.defaultClusterDNS(options, kubeletConfig), + taints, + options.Labels, + options.CABundle, + instanceTypes, + nodeClass.Spec.UserData, + options.InstanceStorePolicy, + ), + BlockDeviceMappings: nodeClass.Spec.BlockDeviceMappings, + MetadataOptions: nodeClass.Spec.MetadataOptions, + DetailedMonitoring: aws.ToBool(nodeClass.Spec.DetailedMonitoring), + AMIID: amiID, + InstanceTypes: instanceTypes, + EFACount: efaCount, + CapacityType: capacityType, + CapacityReservationID: id, + } + if len(resolved.BlockDeviceMappings) == 0 { + resolved.BlockDeviceMappings = amiFamily.DefaultBlockDeviceMappings() + } + if resolved.MetadataOptions == nil { + resolved.MetadataOptions = amiFamily.DefaultMetadataOptions() + } + return resolved + }) } diff --git a/pkg/providers/capacityreservation/provider.go b/pkg/providers/capacityreservation/provider.go new file mode 100644 index 000000000000..753df300cf80 --- /dev/null +++ b/pkg/providers/capacityreservation/provider.go @@ -0,0 +1,102 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation + +import ( + "context" + "fmt" + "sync" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/patrickmn/go-cache" + "github.com/samber/lo" + "k8s.io/utils/clock" + "sigs.k8s.io/karpenter/pkg/utils/pretty" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + sdk "github.com/aws/karpenter-provider-aws/pkg/aws" +) + +type Provider interface { + List(context.Context, ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) +} + +type DefaultProvider struct { + sync.RWMutex + + ec2api sdk.EC2API + clk clock.Clock + cache *cache.Cache + cm *pretty.ChangeMonitor +} + +func NewProvider(ec2api sdk.EC2API, clk clock.Clock, cache *cache.Cache) *DefaultProvider { + return &DefaultProvider{ + ec2api: ec2api, + clk: clk, + cache: cache, + cm: pretty.NewChangeMonitor(), + } +} + +func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) { + queries := QueriesFromSelectorTerms(selectorTerms...) + reservations, remainingQueries := func() ([]*ec2types.CapacityReservation, []*Query) { + p.RLock() + defer p.RUnlock() + reservations := []*ec2types.CapacityReservation{} + remaining := []*Query{} + for _, query := range queries { + if value, ok := p.cache.Get(query.CacheKey()); ok { + reservations = append(reservations, value.([]*ec2types.CapacityReservation)...) + } else { + remaining = append(remaining, query) + } + } + return reservations, remaining + }() + if len(remainingQueries) == 0 { + return p.filterReservations(reservations), nil + } + + p.Lock() + defer p.Unlock() + for _, query := range remainingQueries { + paginator := ec2.NewDescribeCapacityReservationsPaginator(p.ec2api, query.DescribeCapacityReservationsInput()) + for paginator.HasMorePages() { + out, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("listing capacity reservations, %w", err) + } + queryReservations := lo.ToSlicePtr(out.CapacityReservations) + p.cache.SetDefault(query.CacheKey(), queryReservations) + reservations = append(reservations, queryReservations...) + } + } + return p.filterReservations(reservations), nil +} + +// filterReservations removes duplicate and expired reservations +func (p *DefaultProvider) filterReservations(reservations []*ec2types.CapacityReservation) []*ec2types.CapacityReservation { + return lo.Filter(lo.UniqBy(reservations, func(r *ec2types.CapacityReservation) string { + return *r.CapacityReservationId + }), func(r *ec2types.CapacityReservation, _ int) bool { + if r.EndDate == nil { + return true + } + return r.EndDate.After(p.clk.Now()) + }) +} diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go new file mode 100644 index 000000000000..ae8b857e2d0d --- /dev/null +++ b/pkg/providers/capacityreservation/types.go @@ -0,0 +1,105 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation + +import ( + "fmt" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/mitchellh/hashstructure/v2" + "github.com/samber/lo" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" +) + +type Query struct { + ids []string + ownerID string + tags map[string]string +} + +func QueriesFromSelectorTerms(terms ...v1.CapacityReservationSelectorTerm) []*Query { + queries := []*Query{} + ids := []string{} + for i := range terms { + if terms[i].ID != "" { + ids = append(ids, terms[i].ID) + } + queries = append(queries, &Query{ + ownerID: terms[i].OwnerID, + tags: terms[i].Tags, + }) + } + if len(ids) != 0 { + queries = append(queries, &Query{ids: ids}) + } + return queries +} + +func (q *Query) CacheKey() string { + return fmt.Sprintf("%d", lo.Must(hashstructure.Hash(q, hashstructure.FormatV2, &hashstructure.HashOptions{ + SlicesAsSets: true, + }))) +} + +func (q *Query) DescribeCapacityReservationsInput() *ec2.DescribeCapacityReservationsInput { + if len(q.ids) != 0 { + return &ec2.DescribeCapacityReservationsInput{ + Filters: []ec2types.Filter{lo.Must(q.stateFilter())[0]}, + CapacityReservationIds: q.ids, + } + } + type filterProvider func() ([]ec2types.Filter, bool) + return &ec2.DescribeCapacityReservationsInput{ + Filters: lo.Flatten(lo.FilterMap([]filterProvider{ + q.stateFilter, + q.ownerIDFilter, + q.tagsFilter, + }, func(f filterProvider, _ int) ([]ec2types.Filter, bool) { + return f() + })), + } +} + +func (q *Query) stateFilter() ([]ec2types.Filter, bool) { + return []ec2types.Filter{{ + Name: lo.ToPtr("state"), + Values: []string{string(ec2types.CapacityReservationStateActive)}, + }}, true +} + +func (q *Query) ownerIDFilter() ([]ec2types.Filter, bool) { + return []ec2types.Filter{{ + Name: lo.ToPtr("owner-id"), + Values: []string{q.ownerID}, + }}, q.ownerID != "" +} + +func (q *Query) tagsFilter() ([]ec2types.Filter, bool) { + return lo.MapToSlice(q.tags, func(k, v string) ec2types.Filter { + if v == "*" { + return ec2types.Filter{ + Name: lo.ToPtr("tag-key"), + Values: []string{k}, + } + } + return ec2types.Filter{ + Name: lo.ToPtr(fmt.Sprintf("tag:%s", k)), + Values: []string{v}, + } + }), len(q.tags) != 0 + +} diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 74c0a6c11bec..3d66525ecb8e 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -103,6 +103,9 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass if !schedulingRequirements.HasMinValues() { instanceTypes = p.filterInstanceTypes(nodeClaim, instanceTypes) } + // We filter out non-reserved instances regardless of the min-values settings, since if the launch is eligible for + // reserved instances that's all we'll include in our fleet request. + instanceTypes = p.filterReservedInstanceTypes(nodeClaim, instanceTypes) instanceTypes, err := cloudprovider.InstanceTypes(instanceTypes).Truncate(schedulingRequirements, maxInstanceTypes) if err != nil { return nil, cloudprovider.NewCreateError(fmt.Errorf("truncating instance types, %w", err), "InstanceTypeResolutionFailed", "Error truncating instance types based on the passed-in requirements") @@ -262,8 +265,13 @@ func GetCreateFleetInput(nodeClass *v1.EC2NodeClass, capacityType string, tags m Context: nodeClass.Spec.Context, LaunchTemplateConfigs: launchTemplateConfigs, TargetCapacitySpecification: &ec2types.TargetCapacitySpecificationRequest{ - DefaultTargetCapacityType: ec2types.DefaultTargetCapacityType(capacityType), - TotalTargetCapacity: aws.Int32(1), + DefaultTargetCapacityType: func() ec2types.DefaultTargetCapacityType { + if capacityType == karpv1.CapacityTypeReserved { + return ec2types.DefaultTargetCapacityType(karpv1.CapacityTypeOnDemand) + } + return ec2types.DefaultTargetCapacityType(capacityType) + }(), + TotalTargetCapacity: aws.Int32(1), }, TagSpecifications: []ec2types.TagSpecification{ {ResourceType: ec2types.ResourceTypeInstance, Tags: utils.MergeTags(tags)}, @@ -293,8 +301,15 @@ func (p *DefaultProvider) checkODFallback(nodeClaim *karpv1.NodeClaim, instanceT return nil } -func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, - instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, capacityType string, tags map[string]string) ([]ec2types.FleetLaunchTemplateConfigRequest, error) { +func (p *DefaultProvider) getLaunchTemplateConfigs( + ctx context.Context, + nodeClass *v1.EC2NodeClass, + nodeClaim *karpv1.NodeClaim, + instanceTypes []*cloudprovider.InstanceType, + zonalSubnets map[string]*subnet.Subnet, + capacityType string, + tags map[string]string, +) ([]ec2types.FleetLaunchTemplateConfigRequest, error) { var launchTemplateConfigs []ec2types.FleetLaunchTemplateConfigRequest launchTemplates, err := p.launchTemplateProvider.EnsureAll(ctx, nodeClass, nodeClaim, instanceTypes, capacityType, tags) if err != nil { @@ -326,12 +341,12 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy // Unwrap all the offerings to a flat slice that includes a pointer // to the parent instance type name type offeringWithParentName struct { - cloudprovider.Offering + *cloudprovider.Offering parentInstanceTypeName ec2types.InstanceType } var unwrappedOfferings []offeringWithParentName for _, it := range instanceTypes { - ofs := lo.Map(it.Offerings.Available(), func(of cloudprovider.Offering, _ int) offeringWithParentName { + ofs := lo.Map(it.Offerings.Available(), func(of *cloudprovider.Offering, _ int) offeringWithParentName { return offeringWithParentName{ Offering: of, parentInstanceTypeName: ec2types.InstanceType(it.Name), @@ -351,7 +366,7 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy overrides = append(overrides, ec2types.FleetLaunchTemplateOverridesRequest{ InstanceType: offering.parentInstanceTypeName, SubnetId: lo.ToPtr(subnet.ID), - ImageId: aws.String(image), + ImageId: lo.ToPtr(image), // This is technically redundant, but is useful if we have to parse insufficient capacity errors from // CreateFleet so that we can figure out the zone rather than additional API calls to look up the subnet AvailabilityZone: lo.ToPtr(subnet.Zone), @@ -368,24 +383,42 @@ func (p *DefaultProvider) updateUnavailableOfferingsCache(ctx context.Context, e } } -// getCapacityType selects spot if both constraints are flexible and there is an -// available offering. The AWS Cloud Provider defaults to [ on-demand ], so spot -// must be explicitly included in capacity type requirements. +// getCapacityType selects the capacity type based on the flexibility of the NodeClaim and the available offerings. +// Prioritization is as follows: reserved, spot, on-demand. func (p *DefaultProvider) getCapacityType(nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) string { - requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) - if requirements.Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeSpot) { - requirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeSpot) - for _, instanceType := range instanceTypes { - for _, offering := range instanceType.Offerings.Available() { - if requirements.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) == nil { - return karpv1.CapacityTypeSpot - } + for _, capacityType := range []string{karpv1.CapacityTypeReserved, karpv1.CapacityTypeSpot} { + requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) + if !requirements.Get(karpv1.CapacityTypeLabelKey).Has(capacityType) { + continue + } + requirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityType) + for _, it := range instanceTypes { + if len(it.Offerings.Available().Compatible(requirements)) != 0 { + return capacityType } } } return karpv1.CapacityTypeOnDemand } +// filterReservedInstanceTypes is used to filter the provided set of instance types to only include those with +// available reserved offerings if the nodeclaim is compatible. If there are no available reserved offerings, no +// filtering is applied. +func (*DefaultProvider) filterReservedInstanceTypes(nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) []*cloudprovider.InstanceType { + requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) + if !requirements.Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeReserved) { + return instanceTypes + } + // Constrain the NodeClaim's capacity type requirement to reserved before filtering for offering availability. If we + // don't perform this step, it's possible the only reserved instance available could have an incompatible reservation + // ID. + requirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeReserved) + reservedInstances := lo.Filter(instanceTypes, func(it *cloudprovider.InstanceType, _ int) bool { + return len(it.Offerings.Available().Compatible(requirements)) != 0 + }) + return lo.Ternary(len(reservedInstances) != 0, reservedInstances, instanceTypes) +} + // filterInstanceTypes is used to provide filtering on the list of potential instance types to further limit it to those // that make the most sense given our specific AWS cloudprovider. func (p *DefaultProvider) filterInstanceTypes(nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) []*cloudprovider.InstanceType { diff --git a/pkg/providers/instance/types.go b/pkg/providers/instance/types.go index 49f1ea7aaec7..62fb8cf11408 100644 --- a/pkg/providers/instance/types.go +++ b/pkg/providers/instance/types.go @@ -27,28 +27,39 @@ import ( // Instance is an internal data representation of either an ec2.Instance or an ec2.FleetInstance // It contains all the common data that is needed to inject into the Machine from either of these responses type Instance struct { - LaunchTime time.Time - State ec2types.InstanceStateName - ID string - ImageID string - Type ec2types.InstanceType - Zone string - CapacityType string - SecurityGroupIDs []string - SubnetID string - Tags map[string]string - EFAEnabled bool + LaunchTime time.Time + State ec2types.InstanceStateName + ID string + ImageID string + Type ec2types.InstanceType + Zone string + CapacityType string + CapacityReservationID string + SecurityGroupIDs []string + SubnetID string + Tags map[string]string + EFAEnabled bool } func NewInstance(out ec2types.Instance) *Instance { return &Instance{ - LaunchTime: aws.ToTime(out.LaunchTime), - State: out.State.Name, - ID: aws.ToString(out.InstanceId), - ImageID: aws.ToString(out.ImageId), - Type: out.InstanceType, - Zone: aws.ToString(out.Placement.AvailabilityZone), - CapacityType: lo.Ternary(out.SpotInstanceRequestId != nil, karpv1.CapacityTypeSpot, karpv1.CapacityTypeOnDemand), + LaunchTime: aws.ToTime(out.LaunchTime), + State: out.State.Name, + ID: aws.ToString(out.InstanceId), + ImageID: aws.ToString(out.ImageId), + Type: out.InstanceType, + Zone: aws.ToString(out.Placement.AvailabilityZone), + CapacityType: func() string { + switch { + case out.SpotInstanceRequestId != nil: + return karpv1.CapacityTypeSpot + case out.CapacityReservationId != nil: + return karpv1.CapacityTypeReserved + default: + return karpv1.CapacityTypeOnDemand + } + }(), + CapacityReservationID: lo.FromPtr(out.CapacityReservationId), SecurityGroupIDs: lo.Map(out.SecurityGroups, func(securitygroup ec2types.GroupIdentifier, _ int) string { return aws.ToString(securitygroup.GroupId) }), diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 1b6ec4604571..46045ff0191e 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -23,7 +23,10 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "sigs.k8s.io/karpenter/pkg/scheduling" + awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype/offering" + "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" @@ -65,6 +68,7 @@ type DefaultProvider struct { muInstanceTypesOfferings sync.RWMutex instanceTypesOfferings map[string]sets.Set[string] + allZones sets.Set[string] instanceTypesCache *cache.Cache discoveredCapacityCache *cache.Cache @@ -73,9 +77,19 @@ type DefaultProvider struct { instanceTypesSeqNum uint64 // instanceTypesOfferingsSeqNum is a monotonically increasing change counter used to avoid the expensive hashing operation on instance types instanceTypesOfferingsSeqNum uint64 + + offeringProvider *offering.DefaultProvider } -func NewDefaultProvider(instanceTypesCache *cache.Cache, discoveredCapacityCache *cache.Cache, ec2api sdk.EC2API, subnetProvider subnet.Provider, instanceTypesResolver Resolver) *DefaultProvider { +func NewDefaultProvider( + instanceTypesCache *cache.Cache, + discoveredCapacityCache *cache.Cache, + ec2api sdk.EC2API, + subnetProvider subnet.Provider, + pricingProvider pricing.Provider, + unavailableOfferingsCache *awscache.UnavailableOfferings, + instanceTypesResolver Resolver, +) *DefaultProvider { return &DefaultProvider{ ec2api: ec2api, subnetProvider: subnetProvider, @@ -86,6 +100,8 @@ func NewDefaultProvider(instanceTypesCache *cache.Cache, discoveredCapacityCache discoveredCapacityCache: discoveredCapacityCache, cm: pretty.NewChangeMonitor(), instanceTypesSeqNum: 0, + + offeringProvider: offering.NewDefaultProvider(unavailableOfferingsCache, pricingProvider), } } @@ -112,10 +128,8 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) // Compute fully initialized instance types hash key subnetZonesHash, _ := hashstructure.Hash(subnetZones, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) - // Compute hash key against node class AMIs (used to force cache rebuild when AMIs change) amiHash, _ := hashstructure.Hash(nodeClass.Status.AMIs, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) - key := fmt.Sprintf("%d-%d-%016x-%016x-%016x", p.instanceTypesSeqNum, p.instanceTypesOfferingsSeqNum, @@ -123,80 +137,53 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) subnetZonesHash, p.instanceTypesResolver.CacheKey(nodeClass), ) + var instanceTypes []*cloudprovider.InstanceType if item, ok := p.instanceTypesCache.Get(key); ok { // Ensure what's returned from this function is a shallow-copy of the slice (not a deep-copy of the data itself) // so that modifications to the ordering of the data don't affect the original - return append([]*cloudprovider.InstanceType{}, item.([]*cloudprovider.InstanceType)...), nil + instanceTypes = item.([]*cloudprovider.InstanceType) + } else { + instanceTypes = p.resolveInstanceTypes(ctx, nodeClass, amiHash) + p.instanceTypesCache.SetDefault(key, instanceTypes) } + return p.offeringProvider.InjectOfferings( + instanceTypes, + nodeClass, + p.allZones, + ), nil +} - // Get all zones across all offerings - // We don't use this in the cache key since this is produced from our instanceTypesOfferings which we do cache - allZones := sets.New[string]() - for _, offeringZones := range p.instanceTypesOfferings { - for zone := range offeringZones { - allZones.Insert(zone) - } - } - if p.cm.HasChanged("zones", allZones) { - log.FromContext(ctx).WithValues("zones", allZones.UnsortedList()).V(1).Info("discovered zones") - } - subnetZoneToID := lo.SliceToMap(nodeClass.Status.Subnets, func(s v1.Subnet) (string, string) { +func (p *DefaultProvider) resolveInstanceTypes( + ctx context.Context, + nodeClass *v1.EC2NodeClass, + amiHash uint64, +) []*cloudprovider.InstanceType { + zonesToZoneIDs := lo.SliceToMap(nodeClass.Status.Subnets, func(s v1.Subnet) (string, string) { return s.Zone, s.ZoneID }) - result := lo.Map(p.instanceTypesInfo, func(i ec2types.InstanceTypeInfo, _ int) *cloudprovider.InstanceType { - InstanceTypeVCPU.Set(float64(lo.FromPtr(i.VCpuInfo.DefaultVCpus)), map[string]string{ - instanceTypeLabel: string(i.InstanceType), - }) - InstanceTypeMemory.Set(float64(lo.FromPtr(i.MemoryInfo.SizeInMiB)*1024*1024), map[string]string{ - instanceTypeLabel: string(i.InstanceType), - }) - - zoneData := lo.Map(allZones.UnsortedList(), func(zoneName string, _ int) ZoneData { - if !p.instanceTypesOfferings[string(i.InstanceType)].Has(zoneName) || !subnetZones.Has(zoneName) { - return ZoneData{ - Name: zoneName, - Available: false, - } - } - return ZoneData{ - Name: zoneName, - ID: subnetZoneToID[zoneName], - Available: true, - } - }) - - it := p.instanceTypesResolver.Resolve(ctx, i, zoneData, nodeClass) + return lo.Map(p.instanceTypesInfo, func(info ec2types.InstanceTypeInfo, _ int) *cloudprovider.InstanceType { + it := p.instanceTypesResolver.Resolve(ctx, info, p.instanceTypesOfferings[string(info.InstanceType)].UnsortedList(), zonesToZoneIDs, nodeClass) if cached, ok := p.discoveredCapacityCache.Get(fmt.Sprintf("%s-%016x", it.Name, amiHash)); ok { it.Capacity[corev1.ResourceMemory] = cached.(resource.Quantity) } - for _, of := range it.Offerings { - InstanceTypeOfferingAvailable.Set(float64(lo.Ternary(of.Available, 1, 0)), map[string]string{ - instanceTypeLabel: it.Name, - capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), - zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), - }) - InstanceTypeOfferingPriceEstimate.Set(of.Price, map[string]string{ - instanceTypeLabel: it.Name, - capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), - zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), - }) - } + InstanceTypeVCPU.Set(float64(lo.FromPtr(info.VCpuInfo.DefaultVCpus)), map[string]string{ + instanceTypeLabel: string(info.InstanceType), + }) + InstanceTypeMemory.Set(float64(lo.FromPtr(info.MemoryInfo.SizeInMiB)*1024*1024), map[string]string{ + instanceTypeLabel: string(info.InstanceType), + }) return it }) - p.instanceTypesCache.SetDefault(key, result) - return result, nil } func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { // DO NOT REMOVE THIS LOCK ---------------------------------------------------------------------------- // We lock here so that multiple callers to getInstanceTypeOfferings do not result in cache misses and multiple // calls to EC2 when we could have just made one call. - // TODO @joinnis: This can be made more efficient by holding a Read lock and only obtaining the Write if not in cache p.muInstanceTypesInfo.Lock() defer p.muInstanceTypesInfo.Unlock() - var instanceTypes []ec2types.InstanceTypeInfo - + instanceTypes := []ec2types.InstanceTypeInfo{} paginator := ec2.NewDescribeInstanceTypesPaginator(p.ec2api, &ec2.DescribeInstanceTypesInput{ Filters: []ec2types.Filter{ { @@ -209,13 +196,11 @@ func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { }, }, }) - for paginator.HasMorePages() { page, err := paginator.NextPage(ctx) if err != nil { return fmt.Errorf("describing instance types, %w", err) } - instanceTypes = append(instanceTypes, page.InstanceTypes...) } @@ -223,8 +208,7 @@ func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { // Only update instanceTypesSeqNun with the instance types have been changed // This is to not create new keys with duplicate instance types option atomic.AddUint64(&p.instanceTypesSeqNum, 1) - log.FromContext(ctx).WithValues( - "count", len(instanceTypes)).V(1).Info("discovered instance types") + log.FromContext(ctx).WithValues("count", len(instanceTypes)).V(1).Info("discovered instance types") } p.instanceTypesInfo = instanceTypes return nil @@ -267,6 +251,17 @@ func (p *DefaultProvider) UpdateInstanceTypeOfferings(ctx context.Context) error log.FromContext(ctx).WithValues("instance-type-count", len(instanceTypeOfferings)).V(1).Info("discovered offerings for instance types") } p.instanceTypesOfferings = instanceTypeOfferings + + allZones := sets.New[string]() + for _, offeringZones := range instanceTypeOfferings { + for zone := range offeringZones { + allZones.Insert(zone) + } + } + if p.cm.HasChanged("zones", allZones) { + log.FromContext(ctx).WithValues("zones", allZones.UnsortedList()).V(1).Info("discovered zones") + } + p.allZones = allZones return nil } diff --git a/pkg/providers/instancetype/metrics.go b/pkg/providers/instancetype/metrics.go index 726c851e13b8..050c1a99b844 100644 --- a/pkg/providers/instancetype/metrics.go +++ b/pkg/providers/instancetype/metrics.go @@ -54,31 +54,4 @@ var ( instanceTypeLabel, }, ) - InstanceTypeOfferingAvailable = opmetrics.NewPrometheusGauge( - crmetrics.Registry, - prometheus.GaugeOpts{ - Namespace: metrics.Namespace, - Subsystem: cloudProviderSubsystem, - Name: "instance_type_offering_available", - Help: "Instance type offering availability, based on instance type, capacity type, and zone", - }, - []string{ - instanceTypeLabel, - capacityTypeLabel, - zoneLabel, - }, - ) - InstanceTypeOfferingPriceEstimate = opmetrics.NewPrometheusGauge( - crmetrics.Registry, - prometheus.GaugeOpts{ - Namespace: metrics.Namespace, - Subsystem: cloudProviderSubsystem, - Name: "instance_type_offering_price_estimate", - Help: "Instance type offering estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.", - }, - []string{ - instanceTypeLabel, - capacityTypeLabel, - zoneLabel, - }) ) diff --git a/pkg/providers/instancetype/offering/metrics.go b/pkg/providers/instancetype/offering/metrics.go new file mode 100644 index 000000000000..a4c70f1713b3 --- /dev/null +++ b/pkg/providers/instancetype/offering/metrics.go @@ -0,0 +1,61 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package offering + +import ( + opmetrics "github.com/awslabs/operatorpkg/metrics" + "github.com/prometheus/client_golang/prometheus" + crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + + "sigs.k8s.io/karpenter/pkg/metrics" +) + +const ( + cloudProviderSubsystem = "cloudprovider" + instanceTypeLabel = "instance_type" + capacityTypeLabel = "capacity_type" + zoneLabel = "zone" +) + +var ( + InstanceTypeOfferingAvailable = opmetrics.NewPrometheusGauge( + crmetrics.Registry, + prometheus.GaugeOpts{ + Namespace: metrics.Namespace, + Subsystem: cloudProviderSubsystem, + Name: "instance_type_offering_available", + Help: "Instance type offering availability, based on instance type, capacity type, and zone", + }, + []string{ + instanceTypeLabel, + capacityTypeLabel, + zoneLabel, + }, + ) + InstanceTypeOfferingPriceEstimate = opmetrics.NewPrometheusGauge( + crmetrics.Registry, + prometheus.GaugeOpts{ + Namespace: metrics.Namespace, + Subsystem: cloudProviderSubsystem, + Name: "instance_type_offering_price_estimate", + Help: "Instance type offering estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.", + }, + []string{ + instanceTypeLabel, + capacityTypeLabel, + zoneLabel, + }, + ) +) diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go new file mode 100644 index 000000000000..fd91cd4bcd5c --- /dev/null +++ b/pkg/providers/instancetype/offering/provider.go @@ -0,0 +1,167 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package offering + +import ( + "context" + "fmt" + + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/scheduling" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + awscache "github.com/aws/karpenter-provider-aws/pkg/cache" + "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" +) + +type Provider interface { + InjectOfferings(context.Context, []*cloudprovider.InstanceType, *v1.EC2NodeClass, []string) []*cloudprovider.InstanceType +} + +type DefaultProvider struct { + unavailableOfferings *awscache.UnavailableOfferings + pricingProvider pricing.Provider +} + +func NewDefaultProvider(unavailableOfferingsCache *awscache.UnavailableOfferings, pricingProvider pricing.Provider) *DefaultProvider { + return &DefaultProvider{ + unavailableOfferings: unavailableOfferingsCache, + pricingProvider: pricingProvider, + } +} + +func (p *DefaultProvider) InjectOfferings( + instanceTypes []*cloudprovider.InstanceType, + nodeClass *v1.EC2NodeClass, + allZones sets.Set[string], +) []*cloudprovider.InstanceType { + subnetZones := lo.SliceToMap(nodeClass.Status.Subnets, func(s v1.Subnet) (string, string) { + return s.Zone, s.ZoneID + }) + its := []*cloudprovider.InstanceType{} + for _, it := range instanceTypes { + offerings := p.createOfferings( + it, + nodeClass, + allZones, + subnetZones, + ) + for _, of := range offerings { + InstanceTypeOfferingAvailable.Set(float64(lo.Ternary(of.Available, 1, 0)), map[string]string{ + instanceTypeLabel: it.Name, + capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), + zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), + }) + InstanceTypeOfferingPriceEstimate.Set(of.Price, map[string]string{ + instanceTypeLabel: it.Name, + capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), + zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), + }) + } + + its = append(its, &cloudprovider.InstanceType{ + Name: it.Name, + Requirements: it.Requirements, + Offerings: offerings, + Capacity: it.Capacity, + Overhead: it.Overhead, + }) + } + return its +} + +//nolint:gocyclo +func (p *DefaultProvider) createOfferings( + it *cloudprovider.InstanceType, + nodeClass *v1.EC2NodeClass, + allZones sets.Set[string], + subnetZones map[string]string, +) cloudprovider.Offerings { + itZones := sets.New(it.Requirements.Get(corev1.LabelTopologyZone).Values()...) + + offerings := []*cloudprovider.Offering{} + for zone := range allZones { + for _, capacityType := range it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values() { + // Reserved capacity types are constructed separately, skip them for now. + if capacityType == karpv1.CapacityTypeReserved { + continue + } + + isUnavailable := p.unavailableOfferings.IsUnavailable(it.Name, zone, capacityType) + _, hasSubnetZone := subnetZones[zone] + var price float64 + var hasPrice bool + switch capacityType { + case karpv1.CapacityTypeOnDemand: + price, hasPrice = p.pricingProvider.OnDemandPrice(ec2types.InstanceType(it.Name)) + case karpv1.CapacityTypeSpot: + price, hasPrice = p.pricingProvider.SpotPrice(ec2types.InstanceType(it.Name), zone) + default: + panic(fmt.Sprintf("invalid capacity type %q in requirements for instance type %q", capacityType, it.Name)) + } + offering := &cloudprovider.Offering{ + Requirements: scheduling.NewRequirements( + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zone), + scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpDoesNotExist), + ), + Price: price, + Available: !isUnavailable && hasPrice && itZones.Has(zone) && hasSubnetZone, + } + if id, ok := subnetZones[zone]; ok { + offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, id)) + } + offerings = append(offerings, offering) + } + } + + for i := range nodeClass.Status.CapacityReservations { + if nodeClass.Status.CapacityReservations[i].InstanceType != it.Name { + continue + } + reservation := &nodeClass.Status.CapacityReservations[i] + + isUnavailable := p.unavailableOfferings.IsReservationUnavailable(reservation.ID) + _, hasSubnetZone := subnetZones[reservation.AvailabilityZone] + price := 0.0 + if odPrice, ok := p.pricingProvider.OnDemandPrice(ec2types.InstanceType(it.Name)); ok { + // Divide the on-demand price by a sufficiently large constant. This allows us to treat the reservation as "free", + // while maintaining relative ordering for consolidation. If the pricing details are unavailable for whatever reason, + // still succeed to create the offering and leave the price at zero. This will break consolidation, but will allow + // users to utilize the instances they're already paying for. + price = odPrice / 10_000_000.0 + } + offering := &cloudprovider.Offering{ + Requirements: scheduling.NewRequirements( + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeReserved), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, reservation.AvailabilityZone), + scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpIn, reservation.ID), + ), + Price: price, + Available: !isUnavailable && itZones.Has(reservation.AvailabilityZone) && hasSubnetZone, + ReservationCapacity: reservation.AvailableInstanceCount, + } + if id, ok := subnetZones[reservation.AvailabilityZone]; ok { + offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, id)) + } + offerings = append(offerings, offering) + } + return offerings +} diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 3c5fb0992ce5..77b2343c3cb1 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -266,7 +266,10 @@ var _ = Describe("InstanceTypeProvider", func() { } // Ensure that we're exercising all well known labels - Expect(lo.Keys(nodeSelector)).To(ContainElements(append(karpv1.WellKnownLabels.UnsortedList(), lo.Keys(karpv1.NormalizedLabels)...))) + Expect(lo.Keys(nodeSelector)).To(ContainElements(append(karpv1.WellKnownLabels.Difference(sets.New( + // TODO: add back to test with a preconfigured reserved instance type + v1.LabelCapacityReservationID, + )).UnsortedList(), lo.Keys(karpv1.NormalizedLabels)...))) var pods []*corev1.Pod for key, value := range nodeSelector { @@ -317,10 +320,11 @@ var _ = Describe("InstanceTypeProvider", func() { "topology.ebs.csi.aws.com/zone": "test-zone-1a", } - // Ensure that we're exercising all well known labels except for accelerator labels + // Ensure that we're exercising all well known labels except for the accelerator and capacity reservation labels Expect(lo.Keys(nodeSelector)).To(ContainElements( append( karpv1.WellKnownLabels.Difference(sets.New( + v1.LabelCapacityReservationID, v1.LabelInstanceAcceleratorCount, v1.LabelInstanceAcceleratorName, v1.LabelInstanceAcceleratorManufacturer, @@ -369,8 +373,9 @@ var _ = Describe("InstanceTypeProvider", func() { "topology.ebs.csi.aws.com/zone": "test-zone-1a", } - // Ensure that we're exercising all well known labels except for gpu labels and nvme + // Ensure that we're exercising all well known labels except for the gpu, nvme and capacity reservation id labels expectedLabels := append(karpv1.WellKnownLabels.Difference(sets.New( + v1.LabelCapacityReservationID, v1.LabelInstanceGPUCount, v1.LabelInstanceGPUName, v1.LabelInstanceGPUManufacturer, @@ -952,6 +957,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -974,6 +981,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, windowsNodeClass.Spec.BlockDeviceMappings, windowsNodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1093,6 +1102,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1119,6 +1130,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1141,6 +1154,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1172,6 +1187,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1210,6 +1227,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1238,6 +1257,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1266,6 +1287,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1294,6 +1317,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1324,6 +1349,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1355,6 +1382,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1383,6 +1412,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1415,6 +1446,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1434,6 +1467,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1467,6 +1502,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1498,6 +1535,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1529,6 +1568,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1552,6 +1593,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1569,6 +1612,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1594,6 +1639,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1618,6 +1665,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1647,6 +1696,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, t3Large, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1681,6 +1732,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, t3Large, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1711,6 +1764,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1736,6 +1791,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1761,6 +1818,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1787,6 +1846,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1804,6 +1865,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index 8f178418e690..65a0eb0ea8fb 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -28,15 +28,11 @@ import ( "github.com/samber/lo" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/util/sets" - "sigs.k8s.io/controller-runtime/pkg/log" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" - awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" - "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" @@ -62,20 +58,16 @@ type Resolver interface { // CacheKey tells the InstanceType cache if something changes about the InstanceTypes or Offerings based on the NodeClass. CacheKey(nodeClass *v1.EC2NodeClass) string // Resolve generates an InstanceType based on raw InstanceTypeInfo and NodeClass setting data - Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zoneData []ZoneData, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType + Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zones []string, zonesToZoneIDs map[string]string, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType } type DefaultResolver struct { - region string - pricingProvider pricing.Provider - unavailableOfferings *awscache.UnavailableOfferings + region string } -func NewDefaultResolver(region string, pricingProvider pricing.Provider, unavailableOfferingsCache *awscache.UnavailableOfferings) *DefaultResolver { +func NewDefaultResolver(region string) *DefaultResolver { return &DefaultResolver{ - region: region, - pricingProvider: pricingProvider, - unavailableOfferings: unavailableOfferingsCache, + region: region, } } @@ -86,16 +78,18 @@ func (d *DefaultResolver) CacheKey(nodeClass *v1.EC2NodeClass) string { } kcHash, _ := hashstructure.Hash(kc, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) blockDeviceMappingsHash, _ := hashstructure.Hash(nodeClass.Spec.BlockDeviceMappings, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) - return fmt.Sprintf("%016x-%016x-%s-%s-%d", + capacityReservationHash, _ := hashstructure.Hash(nodeClass.Status.CapacityReservations, hashstructure.FormatV2, nil) + return fmt.Sprintf( + "%016x-%016x-%016x-%s-%s", kcHash, blockDeviceMappingsHash, + capacityReservationHash, lo.FromPtr((*string)(nodeClass.Spec.InstanceStorePolicy)), nodeClass.AMIFamily(), - d.unavailableOfferings.SeqNum, ) } -func (d *DefaultResolver) Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zoneData []ZoneData, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType { +func (d *DefaultResolver) Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zones []string, zonesToZoneIDs map[string]string, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType { // !!! Important !!! // Any changes to the values passed into the NewInstanceType method will require making updates to the cache key // so that Karpenter is able to cache the set of InstanceTypes based on values that alter the set of instance types @@ -104,68 +98,48 @@ func (d *DefaultResolver) Resolve(ctx context.Context, info ec2types.InstanceTyp if nodeClass.Spec.Kubelet != nil { kc = nodeClass.Spec.Kubelet } - return NewInstanceType(ctx, info, d.region, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, kc.MaxPods, kc.PodsPerCore, kc.KubeReserved, - kc.SystemReserved, kc.EvictionHard, kc.EvictionSoft, nodeClass.AMIFamily(), d.createOfferings(ctx, info, zoneData)) -} - -// createOfferings creates a set of mutually exclusive offerings for a given instance type. This provider maintains an -// invariant that each offering is mutually exclusive. Specifically, there is an offering for each permutation of zone -// and capacity type. ZoneID is also injected into the offering requirements, when available, but there is a 1-1 -// mapping between zone and zoneID so this does not change the number of offerings. -// -// Each requirement on the offering is guaranteed to have a single value. To get the value for a requirement on an -// offering, you can do the following thanks to this invariant: -// -// offering.Requirements.Get(v1.TopologyLabelZone).Any() -func (d *DefaultResolver) createOfferings(ctx context.Context, instanceType ec2types.InstanceTypeInfo, zoneData []ZoneData) []cloudprovider.Offering { - var offerings []cloudprovider.Offering - for _, zone := range zoneData { - // while usage classes should be a distinct set, there's no guarantee of that - for capacityType := range sets.New((instanceType.SupportedUsageClasses)...) { - // exclude any offerings that have recently seen an insufficient capacity error from EC2 - isUnavailable := d.unavailableOfferings.IsUnavailable(instanceType.InstanceType, zone.Name, string(capacityType)) - var price float64 - var ok bool - switch capacityType { - case ec2types.UsageClassTypeSpot: - price, ok = d.pricingProvider.SpotPrice(instanceType.InstanceType, zone.Name) - case ec2types.UsageClassTypeOnDemand: - price, ok = d.pricingProvider.OnDemandPrice(instanceType.InstanceType) - case "capacity-block": - // ignore since karpenter doesn't support it yet, but do not log an unknown capacity type error - continue - default: - log.FromContext(ctx).WithValues("capacity-type", capacityType, "instance-type", instanceType.InstanceType).Error(fmt.Errorf("received unknown capacity type"), "failed parsing offering") - continue - } - available := !isUnavailable && ok && zone.Available - offering := cloudprovider.Offering{ - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, string(capacityType)), - scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zone.Name), - ), - Price: price, - Available: available, - } - if zone.ID != "" { - offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, zone.ID)) - } - offerings = append(offerings, offering) - } - } - return offerings + return NewInstanceType( + ctx, + info, + d.region, + zones, + zonesToZoneIDs, + nodeClass.Spec.BlockDeviceMappings, + nodeClass.Spec.InstanceStorePolicy, + kc.MaxPods, + kc.PodsPerCore, + kc.KubeReserved, + kc.SystemReserved, + kc.EvictionHard, + kc.EvictionSoft, + nodeClass.AMIFamily(), + lo.Filter(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) bool { + return cr.InstanceType == string(info.InstanceType) + }), + ) } -func NewInstanceType(ctx context.Context, info ec2types.InstanceTypeInfo, region string, - blockDeviceMappings []*v1.BlockDeviceMapping, instanceStorePolicy *v1.InstanceStorePolicy, maxPods *int32, podsPerCore *int32, - kubeReserved map[string]string, systemReserved map[string]string, evictionHard map[string]string, evictionSoft map[string]string, - amiFamilyType string, offerings cloudprovider.Offerings) *cloudprovider.InstanceType { - +func NewInstanceType( + ctx context.Context, + info ec2types.InstanceTypeInfo, + region string, + zones []string, + zonesToZoneIDs map[string]string, + blockDeviceMappings []*v1.BlockDeviceMapping, + instanceStorePolicy *v1.InstanceStorePolicy, + maxPods *int32, + podsPerCore *int32, + kubeReserved map[string]string, + systemReserved map[string]string, + evictionHard map[string]string, + evictionSoft map[string]string, + amiFamilyType string, + capacityReservations []v1.CapacityReservation, +) *cloudprovider.InstanceType { amiFamily := amifamily.GetAMIFamily(amiFamilyType, &amifamily.Options{}) it := &cloudprovider.InstanceType{ Name: string(info.InstanceType), - Requirements: computeRequirements(info, offerings, region, amiFamily), - Offerings: offerings, + Requirements: computeRequirements(info, region, zones, zonesToZoneIDs, amiFamily, capacityReservations), Capacity: computeCapacity(ctx, info, amiFamily, blockDeviceMappings, instanceStorePolicy, maxPods, podsPerCore), Overhead: &cloudprovider.InstanceTypeOverhead{ KubeReserved: kubeReservedResources(cpu(info), pods(ctx, info, amiFamily, maxPods, podsPerCore), ENILimitedPods(ctx, info), amiFamily, kubeReserved), @@ -180,21 +154,34 @@ func NewInstanceType(ctx context.Context, info ec2types.InstanceTypeInfo, region } //nolint:gocyclo -func computeRequirements(info ec2types.InstanceTypeInfo, offerings cloudprovider.Offerings, region string, amiFamily amifamily.AMIFamily) scheduling.Requirements { +func computeRequirements( + info ec2types.InstanceTypeInfo, + region string, + zones []string, + zonesToZoneIDs map[string]string, + amiFamily amifamily.AMIFamily, + capacityReservations []v1.CapacityReservation, +) scheduling.Requirements { + capacityTypes := lo.FilterMap(info.SupportedUsageClasses, func(uc ec2types.UsageClassType, _ int) (string, bool) { + if uc != ec2types.UsageClassTypeOnDemand && uc != ec2types.UsageClassTypeSpot { + return "", false + } + return string(uc), true + }) + if len(capacityReservations) != 0 { + capacityTypes = append(capacityTypes, karpv1.CapacityTypeReserved) + } + requirements := scheduling.NewRequirements( // Well Known Upstream scheduling.NewRequirement(corev1.LabelInstanceTypeStable, corev1.NodeSelectorOpIn, string(info.InstanceType)), scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, getArchitecture(info)), scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, getOS(info, amiFamily)...), - scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.Requirements.Get(corev1.LabelTopologyZone).Any() - })...), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zones...), scheduling.NewRequirement(corev1.LabelTopologyRegion, corev1.NodeSelectorOpIn, region), scheduling.NewRequirement(corev1.LabelWindowsBuild, corev1.NodeSelectorOpDoesNotExist), // Well Known to Karpenter - scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.Requirements.Get(karpv1.CapacityTypeLabelKey).Any() - })...), + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityTypes...), // Well Known to AWS scheduling.NewRequirement(v1.LabelInstanceCPU, corev1.NodeSelectorOpIn, fmt.Sprint(lo.FromPtr(info.VCpuInfo.DefaultVCpus))), scheduling.NewRequirement(v1.LabelInstanceCPUManufacturer, corev1.NodeSelectorOpDoesNotExist), @@ -219,12 +206,19 @@ func computeRequirements(info ec2types.InstanceTypeInfo, offerings cloudprovider ) // Only add zone-id label when available in offerings. It may not be available if a user has upgraded from a // previous version of Karpenter w/o zone-id support and the nodeclass subnet status has not yet updated. - if zoneIDs := lo.FilterMap(offerings.Available(), func(o cloudprovider.Offering, _ int) (string, bool) { - zoneID := o.Requirements.Get(v1.LabelTopologyZoneID).Any() - return zoneID, zoneID != "" + if zoneIDs := lo.FilterMap(zones, func(zone string, _ int) (string, bool) { + id, ok := zonesToZoneIDs[zone] + return id, ok }); len(zoneIDs) != 0 { requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, zoneIDs...)) } + if len(capacityReservations) != 0 { + requirements.Add(scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpIn, lo.Map(capacityReservations, func(cr v1.CapacityReservation, _ int) string { + return cr.ID + })...)) + } else { + requirements.Add(scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpDoesNotExist)) + } // Instance Type Labels instanceFamilyParts := instanceTypeScheme.FindStringSubmatch(string(info.InstanceType)) if len(instanceFamilyParts) == 4 { diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 4e1fe5b9a653..49c8a72dc769 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -109,8 +109,14 @@ func NewDefaultProvider(ctx context.Context, cache *cache.Cache, ec2api sdk.EC2A }() return l } -func (p *DefaultProvider) EnsureAll(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, - instanceTypes []*cloudprovider.InstanceType, capacityType string, tags map[string]string) ([]*LaunchTemplate, error) { +func (p *DefaultProvider) EnsureAll( + ctx context.Context, + nodeClass *v1.EC2NodeClass, + nodeClaim *karpv1.NodeClaim, + instanceTypes []*cloudprovider.InstanceType, + capacityType string, + tags map[string]string, +) ([]*LaunchTemplate, error) { p.Lock() defer p.Unlock() options, err := p.createAMIOptions(ctx, nodeClass, lo.Assign(nodeClaim.Labels, map[string]string{karpv1.CapacityTypeLabelKey: capacityType}), tags) @@ -241,6 +247,20 @@ func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFa LaunchTemplateName: aws.String(LaunchTemplateName(options)), LaunchTemplateData: &ec2types.RequestLaunchTemplateData{ BlockDeviceMappings: blockDeviceMappings(options.BlockDeviceMappings), + CapacityReservationSpecification: &ec2types.LaunchTemplateCapacityReservationSpecificationRequest{ + CapacityReservationPreference: lo.Ternary( + options.CapacityType == karpv1.CapacityTypeReserved, + ec2types.CapacityReservationPreferenceCapacityReservationsOnly, + ec2types.CapacityReservationPreferenceNone, + ), + CapacityReservationTarget: lo.Ternary( + options.CapacityType == karpv1.CapacityTypeReserved, + &ec2types.CapacityReservationTarget{ + CapacityReservationId: &options.CapacityReservationID, + }, + nil, + ), + }, IamInstanceProfile: &ec2types.LaunchTemplateIamInstanceProfileSpecificationRequest{ Name: aws.String(options.InstanceProfile), }, diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index b31b02177039..596a2146eae8 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -1016,6 +1016,8 @@ var _ = Describe("LaunchTemplate Provider", func() { it := instancetype.NewInstanceType(ctx, info, "", + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1067,6 +1069,8 @@ var _ = Describe("LaunchTemplate Provider", func() { it := instancetype.NewInstanceType(ctx, info, "", + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1092,6 +1096,8 @@ var _ = Describe("LaunchTemplate Provider", func() { it := instancetype.NewInstanceType(ctx, info, "", + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -2036,7 +2042,7 @@ essential = true nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyCustom) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} ExpectApplied(ctx, env.Client, nodeClass) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) + controller := nodeclass.NewController(awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{ { diff --git a/pkg/test/environment.go b/pkg/test/environment.go index a03b6081fb33..357af1245341 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -32,6 +32,7 @@ import ( awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/fake" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" @@ -76,19 +77,21 @@ type Environment struct { InstanceProfileCache *cache.Cache SSMCache *cache.Cache DiscoveredCapacityCache *cache.Cache + CapacityReservationCache *cache.Cache // Providers - InstanceTypesResolver *instancetype.DefaultResolver - InstanceTypesProvider *instancetype.DefaultProvider - InstanceProvider *instance.DefaultProvider - SubnetProvider *subnet.DefaultProvider - SecurityGroupProvider *securitygroup.DefaultProvider - InstanceProfileProvider *instanceprofile.DefaultProvider - PricingProvider *pricing.DefaultProvider - AMIProvider *amifamily.DefaultProvider - AMIResolver *amifamily.DefaultResolver - VersionProvider *version.DefaultProvider - LaunchTemplateProvider *launchtemplate.DefaultProvider + CapacityReservationProvider *capacityreservation.DefaultProvider + InstanceTypesResolver *instancetype.DefaultResolver + InstanceTypesProvider *instancetype.DefaultProvider + InstanceProvider *instance.DefaultProvider + SubnetProvider *subnet.DefaultProvider + SecurityGroupProvider *securitygroup.DefaultProvider + InstanceProfileProvider *instanceprofile.DefaultProvider + PricingProvider *pricing.DefaultProvider + AMIProvider *amifamily.DefaultProvider + AMIResolver *amifamily.DefaultResolver + VersionProvider *version.DefaultProvider + LaunchTemplateProvider *launchtemplate.DefaultProvider } func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment { @@ -113,6 +116,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment securityGroupCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) instanceProfileCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) ssmCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) + capacityReservationCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) fakePricingAPI := &fake.PricingAPI{} // Providers @@ -128,30 +132,30 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment ssmProvider := ssmp.NewDefaultProvider(ssmapi, ssmCache) amiProvider := amifamily.NewDefaultProvider(clock, versionProvider, ssmProvider, ec2api, ec2Cache) amiResolver := amifamily.NewDefaultResolver() - instanceTypesResolver := instancetype.NewDefaultResolver(fake.DefaultRegion, pricingProvider, unavailableOfferingsCache) - instanceTypesProvider := instancetype.NewDefaultProvider(instanceTypeCache, discoveredCapacityCache, ec2api, subnetProvider, instanceTypesResolver) - launchTemplateProvider := - launchtemplate.NewDefaultProvider( - ctx, - launchTemplateCache, - ec2api, - eksapi, - amiResolver, - securityGroupProvider, - subnetProvider, - lo.ToPtr("ca-bundle"), - make(chan struct{}), - net.ParseIP("10.0.100.10"), - "https://test-cluster", - ) - instanceProvider := - instance.NewDefaultProvider(ctx, - "", - ec2api, - unavailableOfferingsCache, - subnetProvider, - launchTemplateProvider, - ) + instanceTypesResolver := instancetype.NewDefaultResolver(fake.DefaultRegion) + instanceTypesProvider := instancetype.NewDefaultProvider(instanceTypeCache, discoveredCapacityCache, ec2api, subnetProvider, pricingProvider, unavailableOfferingsCache, instanceTypesResolver) + launchTemplateProvider := launchtemplate.NewDefaultProvider( + ctx, + launchTemplateCache, + ec2api, + eksapi, + amiResolver, + securityGroupProvider, + subnetProvider, + lo.ToPtr("ca-bundle"), + make(chan struct{}), + net.ParseIP("10.0.100.10"), + "https://test-cluster", + ) + instanceProvider := instance.NewDefaultProvider( + ctx, + "", + ec2api, + unavailableOfferingsCache, + subnetProvider, + launchTemplateProvider, + ) + capacityReservationProvider := capacityreservation.NewProvider(ec2api, clock, capacityReservationCache) return &Environment{ Clock: clock, @@ -173,18 +177,20 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment UnavailableOfferingsCache: unavailableOfferingsCache, SSMCache: ssmCache, DiscoveredCapacityCache: discoveredCapacityCache, + CapacityReservationCache: capacityReservationCache, - InstanceTypesResolver: instanceTypesResolver, - InstanceTypesProvider: instanceTypesProvider, - InstanceProvider: instanceProvider, - SubnetProvider: subnetProvider, - SecurityGroupProvider: securityGroupProvider, - LaunchTemplateProvider: launchTemplateProvider, - InstanceProfileProvider: instanceProfileProvider, - PricingProvider: pricingProvider, - AMIProvider: amiProvider, - AMIResolver: amiResolver, - VersionProvider: versionProvider, + CapacityReservationProvider: capacityReservationProvider, + InstanceTypesResolver: instanceTypesResolver, + InstanceTypesProvider: instanceTypesProvider, + InstanceProvider: instanceProvider, + SubnetProvider: subnetProvider, + SecurityGroupProvider: securityGroupProvider, + LaunchTemplateProvider: launchTemplateProvider, + InstanceProfileProvider: instanceProfileProvider, + PricingProvider: pricingProvider, + AMIProvider: amiProvider, + AMIResolver: amiResolver, + VersionProvider: versionProvider, } } From 777339764854401ebf2fa4c86ab9087f78ee79a8 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 21 Feb 2025 02:57:57 -0800 Subject: [PATCH 02/16] checkpoint --- Makefile | 3 +- charts/karpenter/templates/deployment.yaml | 2 +- charts/karpenter/values.yaml | 9 +- cmd/controller/main.go | 1 + go.mod | 2 + go.sum | 4 +- hack/tools/allocatable_diff/main.go | 1 + pkg/apis/v1/ec2nodeclass_status.go | 2 +- pkg/cache/unavailableofferings.go | 13 +- pkg/cloudprovider/cloudprovider.go | 40 +++-- pkg/cloudprovider/suite_test.go | 2 +- pkg/controllers/controllers.go | 4 +- pkg/controllers/interruption/suite_test.go | 2 +- .../nodeclaim/garbagecollection/suite_test.go | 2 +- .../nodeclaim/tagging/suite_test.go | 2 +- .../nodeclass/capacityreservation.go | 16 +- .../instancetype/capacity/suite_test.go | 2 +- pkg/errors/errors.go | 7 + pkg/operator/operator.go | 4 +- pkg/providers/amifamily/resolver.go | 34 ++-- pkg/providers/capacityreservation/provider.go | 59 ++++--- pkg/providers/capacityreservation/types.go | 80 +++++++++ pkg/providers/instance/instance.go | 162 +++++++++++++----- pkg/providers/instance/suite_test.go | 2 +- pkg/providers/instance/types.go | 57 +++--- .../instancetype/offering/provider.go | 16 +- pkg/providers/instancetype/suite_test.go | 4 +- .../launchtemplate/launchtemplate.go | 14 +- pkg/providers/launchtemplate/suite_test.go | 2 +- pkg/test/environment.go | 54 +++--- 30 files changed, 411 insertions(+), 191 deletions(-) diff --git a/Makefile b/Makefile index 0fcce58d5b9b..1d0bac23058b 100644 --- a/Makefile +++ b/Makefile @@ -17,8 +17,9 @@ HELM_OPTS ?= --set serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn=${K --set controller.resources.requests.memory=1Gi \ --set controller.resources.limits.cpu=1 \ --set controller.resources.limits.memory=1Gi \ - --set settings.featureGates.spotToSpotConsolidation=true \ --set settings.featureGates.nodeRepair=true \ + --set settings.featureGates.reservedCapacity=true \ + --set settings.featureGates.spotToSpotConsolidation=true \ --create-namespace # CR for local builds of Karpenter diff --git a/charts/karpenter/templates/deployment.yaml b/charts/karpenter/templates/deployment.yaml index edd2aea0429a..0f2a69e89592 100644 --- a/charts/karpenter/templates/deployment.yaml +++ b/charts/karpenter/templates/deployment.yaml @@ -107,7 +107,7 @@ spec: divisor: "0" resource: limits.memory - name: FEATURE_GATES - value: "SpotToSpotConsolidation={{ .Values.settings.featureGates.spotToSpotConsolidation }},NodeRepair={{ .Values.settings.featureGates.nodeRepair }}" + value: "ReservedCapacity={{ .Values.settings.featureGates.reservedCapacity }},SpotToSpotConsolidation={{ .Values.settings.featureGates.spotToSpotConsolidation }},NodeRepair={{ .Values.settings.featureGates.nodeRepair }}" {{- with .Values.settings.batchMaxDuration }} - name: BATCH_MAX_DURATION value: "{{ . }}" diff --git a/charts/karpenter/values.yaml b/charts/karpenter/values.yaml index bc24a5852ee9..da1680e49beb 100644 --- a/charts/karpenter/values.yaml +++ b/charts/karpenter/values.yaml @@ -184,9 +184,12 @@ settings: # -- Feature Gate configuration values. Feature Gates will follow the same graduation process and requirements as feature gates # in Kubernetes. More information here https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features featureGates: - # -- spotToSpotConsolidation is ALPHA and is disabled by default. - # Setting this to true will enable spot replacement consolidation for both single and multi-node consolidation. - spotToSpotConsolidation: false # -- nodeRepair is ALPHA and is disabled by default. # Setting this to true will enable node repair. nodeRepair: false + # -- reservedCapacity is ALPHA and is disabled by default. + # Setting this will enable native on-demand capacity reservation support. + reservedCapacity: false + # -- spotToSpotConsolidation is ALPHA and is disabled by default. + # Setting this to true will enable spot replacement consolidation for both single and multi-node consolidation. + spotToSpotConsolidation: false diff --git a/cmd/controller/main.go b/cmd/controller/main.go index ea7df8a5fd6e..7761bdfebed1 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -35,6 +35,7 @@ func main() { op.GetClient(), op.AMIProvider, op.SecurityGroupProvider, + op.CapacityReservationProvider, ) cloudProvider := metrics.Decorate(awsCloudProvider) clusterState := state.NewCluster(op.Clock, op.GetClient(), cloudProvider) diff --git a/go.mod b/go.mod index cadcca7e944c..a7671b761385 100644 --- a/go.mod +++ b/go.mod @@ -119,3 +119,5 @@ require ( sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect ) + +replace sigs.k8s.io/karpenter => github.com/jmdeal/karpenter v0.0.0-20250221104820-4c25410338d8 diff --git a/go.sum b/go.sum index 0db46a26c9d5..aa288ae00a06 100644 --- a/go.sum +++ b/go.sum @@ -116,6 +116,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jmdeal/karpenter v0.0.0-20250221104820-4c25410338d8 h1:K89kW02bTZkegQnJPlOHSTt+a7WXGQOfrt+pP7lBJos= +github.com/jmdeal/karpenter v0.0.0-20250221104820-4c25410338d8/go.mod h1:/FgjYrt+hwAMcvY46hku76st/aeP4KjOib6RLEj312g= github.com/jonathan-innis/aws-sdk-go-prometheus v0.1.1 h1:gmpuckrozJ3lfKqSIia9YMGh0caoQmEY7mQP5MsnbTM= github.com/jonathan-innis/aws-sdk-go-prometheus v0.1.1/go.mod h1:168XvZFghCqo32ISSWnTXwdlMKzEq+x9TqdfswCjkrQ= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -342,8 +344,6 @@ sigs.k8s.io/controller-runtime v0.20.2 h1:/439OZVxoEc02psi1h4QO3bHzTgu49bb347Xp4 sigs.k8s.io/controller-runtime v0.20.2/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= -sigs.k8s.io/karpenter v1.2.1-0.20250221214626-1c6342c8405e h1:UQLz2DYoXnrIN/WVjKQWS6beVS6di3r7KRVimHp/x6s= -sigs.k8s.io/karpenter v1.2.1-0.20250221214626-1c6342c8405e/go.mod h1:/FgjYrt+hwAMcvY46hku76st/aeP4KjOib6RLEj312g= sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA= sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/hack/tools/allocatable_diff/main.go b/hack/tools/allocatable_diff/main.go index d686d9330740..385c5e78598f 100644 --- a/hack/tools/allocatable_diff/main.go +++ b/hack/tools/allocatable_diff/main.go @@ -78,6 +78,7 @@ func main() { op.GetClient(), op.AMIProvider, op.SecurityGroupProvider, + op.CapacityReservationProvider, ) instanceTypes := lo.Must(cloudProvider.GetInstanceTypes(ctx, nil)) diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index 532fd48f4341..28308900e2bc 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -74,7 +74,7 @@ type CapacityReservation struct { AvailabilityZone string `json:"availabilityZone"` // The last known available instance count for the capacity reservation. // +required - AvailableInstanceCount int `json:"availableInstanceCount" hash:"ignore"` + AvailableInstanceCount int `json:"availableInstanceCount,omitempty" hash:"ignore"` // The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter // will no longer be able to launch instances into that reservation. // +optional diff --git a/pkg/cache/unavailableofferings.go b/pkg/cache/unavailableofferings.go index 8efd3a2e5c42..e9b10154de35 100644 --- a/pkg/cache/unavailableofferings.go +++ b/pkg/cache/unavailableofferings.go @@ -53,10 +53,6 @@ func (u *UnavailableOfferings) IsUnavailable(instanceType string, zone, capacity return found } -func (u *UnavailableOfferings) IsReservationUnavailable(reservationID string) bool { - return false -} - // MarkUnavailable communicates recently observed temporary capacity shortages in the provided offerings func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason, instanceType, zone, capacityType string) { // even if the key is already in the cache, we still need to call Set to extend the cached entry's TTL @@ -65,7 +61,8 @@ func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableR "instance-type", instanceType, "zone", zone, "capacity-type", capacityType, - "ttl", UnavailableOfferingsTTL).V(1).Info("removing offering from offerings") + "ttl", UnavailableOfferingsTTL, + ).V(1).Info("removing offering from offerings") u.cache.SetDefault(u.key(instanceType, zone, capacityType), struct{}{}) atomic.AddUint64(&u.SeqNum, 1) } @@ -76,7 +73,7 @@ func (u *UnavailableOfferings) MarkUnavailableForFleetErr(ctx context.Context, f u.MarkUnavailable(ctx, lo.FromPtr(fleetErr.ErrorCode), string(instanceType), zone, capacityType) } -func (u *UnavailableOfferings) Delete(instanceType, zone, capacityType string) { +func (u *UnavailableOfferings) DeleteOffering(instanceType, zone, capacityType string) { u.cache.Delete(u.key(instanceType, zone, capacityType)) } @@ -85,6 +82,6 @@ func (u *UnavailableOfferings) Flush() { } // key returns the cache key for all offerings in the cache -func (u *UnavailableOfferings) key(instanceType, zone, capacityType string) string { - return fmt.Sprintf("%s:%s:%s", capacityType, instanceType, zone) +func (*UnavailableOfferings) key(instanceType, zone, capacityType string) string { + return fmt.Sprintf("o:%s:%s:%s", capacityType, instanceType, zone) } diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 6bb838cd67e5..11c32ce04a30 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -46,6 +46,7 @@ import ( cloudproviderevents "github.com/aws/karpenter-provider-aws/pkg/cloudprovider/events" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" @@ -59,21 +60,30 @@ type CloudProvider struct { kubeClient client.Client recorder events.Recorder - instanceTypeProvider instancetype.Provider - instanceProvider instance.Provider - amiProvider amifamily.Provider - securityGroupProvider securitygroup.Provider + instanceTypeProvider instancetype.Provider + instanceProvider instance.Provider + amiProvider amifamily.Provider + securityGroupProvider securitygroup.Provider + capacityReservationProvider capacityreservation.Provider } -func New(instanceTypeProvider instancetype.Provider, instanceProvider instance.Provider, recorder events.Recorder, - kubeClient client.Client, amiProvider amifamily.Provider, securityGroupProvider securitygroup.Provider) *CloudProvider { +func New( + instanceTypeProvider instancetype.Provider, + instanceProvider instance.Provider, + recorder events.Recorder, + kubeClient client.Client, + amiProvider amifamily.Provider, + securityGroupProvider securitygroup.Provider, + capacityReservationProvider capacityreservation.Provider, +) *CloudProvider { return &CloudProvider{ - instanceTypeProvider: instanceTypeProvider, - instanceProvider: instanceProvider, - kubeClient: kubeClient, - amiProvider: amiProvider, - securityGroupProvider: securityGroupProvider, - recorder: recorder, + instanceTypeProvider: instanceTypeProvider, + instanceProvider: instanceProvider, + kubeClient: kubeClient, + amiProvider: amiProvider, + securityGroupProvider: securityGroupProvider, + capacityReservationProvider: capacityReservationProvider, + recorder: recorder, } } @@ -111,6 +121,9 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim) if err != nil { return nil, fmt.Errorf("creating instance, %w", err) } + if instance.CapacityType == karpv1.CapacityTypeReserved { + c.capacityReservationProvider.MarkLaunched(instance.CapacityReservationID) + } instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool { return i.Name == string(instance.Type) }) @@ -399,6 +412,9 @@ func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType * } } labels[karpv1.CapacityTypeLabelKey] = i.CapacityType + if i.CapacityType == karpv1.CapacityTypeReserved { + labels[cloudprovider.ReservationIDLabel] = i.CapacityReservationID + } if v, ok := i.Tags[karpv1.NodePoolLabelKey]; ok { labels[karpv1.NodePoolLabelKey] = v } diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 55fd0dc8059f..39d84ba1bb7a 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -88,7 +88,7 @@ var _ = BeforeSuite(func() { fakeClock = clock.NewFakeClock(time.Now()) recorder = events.NewRecorder(&record.FakeRecorder{}) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder, - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster, fakeClock) }) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index 2c17f7013794..74d8058eab4c 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -34,7 +34,7 @@ import ( controllerspricing "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/pricing" ssminvalidation "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/ssm/invalidation" controllersversion "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/version" - "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" + capacityreservationprovider "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/version" @@ -80,7 +80,7 @@ func NewControllers( launchTemplateProvider launchtemplate.Provider, versionProvider *version.DefaultProvider, instanceTypeProvider *instancetype.DefaultProvider, - capacityReservationProvider capacityreservation.Provider, + capacityReservationProvider capacityreservationprovider.Provider, ) []controller.Controller { controllers := []controller.Controller{ nodeclasshash.NewController(kubeClient), diff --git a/pkg/controllers/interruption/suite_test.go b/pkg/controllers/interruption/suite_test.go index 042131d03164..cb119a240ff5 100644 --- a/pkg/controllers/interruption/suite_test.go +++ b/pkg/controllers/interruption/suite_test.go @@ -91,7 +91,7 @@ var _ = BeforeSuite(func() { sqsapi = &fake.SQSAPI{} sqsProvider = lo.Must(sqs.NewDefaultProvider(sqsapi, fmt.Sprintf("https://sqs.%s.amazonaws.com/%s/test-cluster", fake.DefaultRegion, fake.DefaultAccount))) cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) controller = interruption.NewController(env.Client, cloudProvider, fakeClock, events.NewRecorder(&record.FakeRecorder{}), sqsProvider, unavailableOfferingsCache) }) diff --git a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go index db2b76bf6f2b..2ab7dca2c3f7 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go +++ b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go @@ -65,7 +65,7 @@ var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) garbageCollectionController = garbagecollection.NewController(env.Client, cloudProvider) }) diff --git a/pkg/controllers/nodeclaim/tagging/suite_test.go b/pkg/controllers/nodeclaim/tagging/suite_test.go index 5f284108543f..627d1e6c8ddb 100644 --- a/pkg/controllers/nodeclaim/tagging/suite_test.go +++ b/pkg/controllers/nodeclaim/tagging/suite_test.go @@ -64,7 +64,7 @@ var _ = BeforeSuite(func() { ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) taggingController = tagging.NewController(env.Client, cloudProvider, awsEnv.InstanceProvider) }) var _ = AfterSuite(func() { diff --git a/pkg/controllers/nodeclass/capacityreservation.go b/pkg/controllers/nodeclass/capacityreservation.go index dac3c238ba36..dbcad52186aa 100644 --- a/pkg/controllers/nodeclass/capacityreservation.go +++ b/pkg/controllers/nodeclass/capacityreservation.go @@ -98,14 +98,14 @@ func capacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityRe } return v1.CapacityReservation{ - AvailabilityZone: *cr.AvailabilityZone, - AvailableInstanceCount: int(*cr.AvailableInstanceCount), - EndTime: endTime, - ID: *cr.CapacityReservationId, - InstanceMatchCriteria: string(cr.InstanceMatchCriteria), - InstanceType: *cr.InstanceType, - OwnerID: *cr.OwnerId, - TotalInstanceCount: int(*cr.TotalInstanceCount), + AvailabilityZone: *cr.AvailabilityZone, + // AvailableInstanceCount: int(*cr.AvailableInstanceCount), + EndTime: endTime, + ID: *cr.CapacityReservationId, + InstanceMatchCriteria: string(cr.InstanceMatchCriteria), + InstanceType: *cr.InstanceType, + OwnerID: *cr.OwnerId, + TotalInstanceCount: int(*cr.TotalInstanceCount), }, nil } diff --git a/pkg/controllers/providers/instancetype/capacity/suite_test.go b/pkg/controllers/providers/instancetype/capacity/suite_test.go index 246d1656d71d..b6a6e5f272d7 100644 --- a/pkg/controllers/providers/instancetype/capacity/suite_test.go +++ b/pkg/controllers/providers/instancetype/capacity/suite_test.go @@ -80,7 +80,7 @@ var _ = BeforeSuite(func() { nodeClaim = coretest.NodeClaim() node = coretest.Node() cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) controller = controllersinstancetypecapacity.NewController(env.Client, cloudProvider, awsEnv.InstanceTypesProvider) }) diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index d3dfe9e1c8ea..22d57c569cc3 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -42,6 +42,8 @@ var ( "EntityAlreadyExists", ) + reservationCapacityExceededErrorCode = "ReservationCapacityExceeded" + // unfulfillableCapacityErrorCodes signify that capacity is temporarily unable to be launched unfulfillableCapacityErrorCodes = sets.New[string]( "InsufficientInstanceCapacity", @@ -50,6 +52,7 @@ var ( "UnfulfillableCapacity", "Unsupported", "InsufficientFreeAddressesInSubnet", + reservationCapacityExceededErrorCode, ) ) @@ -135,6 +138,10 @@ func IsUnfulfillableCapacity(err ec2types.CreateFleetError) bool { return unfulfillableCapacityErrorCodes.Has(*err.ErrorCode) } +func IsReservationCapacityExceeded(err ec2types.CreateFleetError) bool { + return *err.ErrorCode == reservationCapacityExceededErrorCode +} + func IsLaunchTemplateNotFound(err error) bool { if err == nil { return false diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 398183a7ae63..ac4dfc71d4de 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -23,6 +23,7 @@ import ( "net" "os" "strings" + "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/aws/middleware" @@ -183,6 +184,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont unavailableOfferingsCache, instancetype.NewDefaultResolver(cfg.Region), ) + capacityReservationProvider := capacityreservation.NewProvider(ec2api, operator.Clock, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval), cache.New(time.Hour*24, awscache.DefaultCleanupInterval)) instanceProvider := instance.NewDefaultProvider( ctx, cfg.Region, @@ -190,8 +192,8 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont unavailableOfferingsCache, subnetProvider, launchTemplateProvider, + capacityReservationProvider, ) - capacityReservationProvider := capacityreservation.NewProvider(ec2api, operator.Clock, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)) // Setup field indexers on instanceID -- specifically for the interruption controller if options.FromContext(ctx).InterruptionQueue != "" { diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 84d53eaa1f7b..e8a9e213558d 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -147,15 +147,6 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N reservationIDs string } paramsToInstanceTypes := lo.GroupBy(instanceTypes, func(it *cloudprovider.InstanceType) launchTemplateParams { - var reservationIDs []string - if capacityType == karpv1.CapacityTypeReserved { - for i := range it.Offerings { - if it.Offerings[i].Requirements.Get(karpv1.CapacityTypeLabelKey).Any() != karpv1.CapacityTypeReserved { - continue - } - reservationIDs = append(reservationIDs, it.Offerings[i].Requirements.Get(cloudprovider.ReservationIDLabel).Any()) - } - } return launchTemplateParams{ efaCount: lo.Ternary( lo.Contains(lo.Keys(nodeClaim.Spec.Resources.Requests), v1.ResourceEFA), @@ -166,7 +157,11 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N // If we're dealing with reserved instances, there's only going to be a single instance per group. This invariant // is due to reservation IDs not being shared across instance types. Because of this, we don't need to worry about // ordering in this string. - reservationIDs: strings.Join(reservationIDs, ","), + reservationIDs: lo.Ternary( + capacityType == karpv1.CapacityTypeReserved, + strings.Join(selectReservationIDs(it, nodeClaim), ","), + "", + ), } }) @@ -178,6 +173,25 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N return resolvedTemplates, nil } +// selectReservationIDs filters the set of reservation IDs available on the given instance type to only include those +// that are compatible with the given NodeClaim. Additionally, if there are multiple reservations available in the same +// zone, only the reservation with the greatest availability is selected. This is to address a limitation in the +// CreateFleet interface, where you can only provide one override for a given instance-zone combination. +func selectReservationIDs(it *cloudprovider.InstanceType, nodeClaim *karpv1.NodeClaim) []string { + zonalOfferings := map[string]*cloudprovider.Offering{} + for _, o := range it.Offerings.Available().Compatible(scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...)) { + if o.CapacityType() != karpv1.CapacityTypeReserved { + continue + } + if current, ok := zonalOfferings[o.Zone()]; !ok || current.ReservationCapacity < o.ReservationCapacity { + zonalOfferings[o.Zone()] = o + } + } + return lo.Map(lo.Values(zonalOfferings), func(o *cloudprovider.Offering, _ int) string { + return o.ReservationID() + }) +} + func GetAMIFamily(amiFamily string, options *Options) AMIFamily { switch amiFamily { case v1.AMIFamilyBottlerocket: diff --git a/pkg/providers/capacityreservation/provider.go b/pkg/providers/capacityreservation/provider.go index 753df300cf80..2a136debaaa3 100644 --- a/pkg/providers/capacityreservation/provider.go +++ b/pkg/providers/capacityreservation/provider.go @@ -17,7 +17,6 @@ package capacityreservation import ( "context" "fmt" - "sync" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" @@ -32,48 +31,50 @@ import ( type Provider interface { List(context.Context, ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) + GetAvailableInstanceCount(string) int + MarkLaunched(string) + MarkTerminated(string) + MarkUnavailable(...string) } type DefaultProvider struct { - sync.RWMutex + availabilityCache - ec2api sdk.EC2API - clk clock.Clock - cache *cache.Cache - cm *pretty.ChangeMonitor + ec2api sdk.EC2API + clk clock.Clock + reservationCache *cache.Cache + cm *pretty.ChangeMonitor } -func NewProvider(ec2api sdk.EC2API, clk clock.Clock, cache *cache.Cache) *DefaultProvider { +func NewProvider(ec2api sdk.EC2API, clk clock.Clock, reservationCache, reservationAvailabilityCache *cache.Cache) *DefaultProvider { return &DefaultProvider{ - ec2api: ec2api, - clk: clk, - cache: cache, - cm: pretty.NewChangeMonitor(), + availabilityCache: availabilityCache{ + cache: reservationAvailabilityCache, + clk: clk, + }, + ec2api: ec2api, + clk: clk, + reservationCache: reservationCache, + cm: pretty.NewChangeMonitor(), } } func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) { queries := QueriesFromSelectorTerms(selectorTerms...) - reservations, remainingQueries := func() ([]*ec2types.CapacityReservation, []*Query) { - p.RLock() - defer p.RUnlock() - reservations := []*ec2types.CapacityReservation{} - remaining := []*Query{} - for _, query := range queries { - if value, ok := p.cache.Get(query.CacheKey()); ok { - reservations = append(reservations, value.([]*ec2types.CapacityReservation)...) - } else { - remaining = append(remaining, query) - } + + var reservations []*ec2types.CapacityReservation + var remainingQueries []*Query + for _, query := range queries { + if value, ok := p.reservationCache.Get(query.CacheKey()); ok { + reservations = append(reservations, value.([]*ec2types.CapacityReservation)...) + } else { + remainingQueries = append(remainingQueries, query) } - return reservations, remaining - }() + } if len(remainingQueries) == 0 { return p.filterReservations(reservations), nil } - p.Lock() - defer p.Unlock() for _, query := range remainingQueries { paginator := ec2.NewDescribeCapacityReservationsPaginator(p.ec2api, query.DescribeCapacityReservationsInput()) for paginator.HasMorePages() { @@ -82,10 +83,14 @@ func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.Capacity return nil, fmt.Errorf("listing capacity reservations, %w", err) } queryReservations := lo.ToSlicePtr(out.CapacityReservations) - p.cache.SetDefault(query.CacheKey(), queryReservations) + p.reservationCache.SetDefault(query.CacheKey(), queryReservations) reservations = append(reservations, queryReservations...) + p.syncAvailability(lo.SliceToMap(queryReservations, func(r *ec2types.CapacityReservation) (string, int) { + return *r.CapacityReservationId, int(*r.AvailableInstanceCount) + })) } } + return p.filterReservations(reservations), nil } diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go index ae8b857e2d0d..bd970b0c33b4 100644 --- a/pkg/providers/capacityreservation/types.go +++ b/pkg/providers/capacityreservation/types.go @@ -16,11 +16,15 @@ package capacityreservation import ( "fmt" + "sync" + "time" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/mitchellh/hashstructure/v2" + "github.com/patrickmn/go-cache" "github.com/samber/lo" + "k8s.io/utils/clock" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" ) @@ -103,3 +107,79 @@ func (q *Query) tagsFilter() ([]ec2types.Filter, bool) { }), len(q.tags) != 0 } + +type availabilityCache struct { + mu sync.RWMutex + cache *cache.Cache + clk clock.Clock +} + +type availabilityCacheEntry struct { + count int + syncTime time.Time +} + +func (c *availabilityCache) syncAvailability(availability map[string]int) { + now := c.clk.Now() + c.mu.Lock() + defer c.mu.Unlock() + for id, count := range availability { + c.cache.SetDefault(id, &availabilityCacheEntry{ + count: count, + syncTime: now, + }) + } +} + +func (c *availabilityCache) MarkLaunched(reservationID string) { + now := c.clk.Now() + c.mu.Lock() + defer c.mu.Unlock() + entry, ok := c.cache.Get(reservationID) + if !ok { + return + } + // Only count the launch if it occurred before the last sync from EC2. In the worst case, this will lead to us + // overestimating availability if there's an eventual consistency delay with EC2, but we'd rather overestimate than + // underestimate. + if entry.(*availabilityCacheEntry).syncTime.After(now) { + return + } + + if entry.(*availabilityCacheEntry).count != 0 { + entry.(*availabilityCacheEntry).count -= 1 + } +} + +func (c *availabilityCache) MarkTerminated(reservationID string) { + // We don't do a time based comparison for CountTerminated because the reservation becomes available some time between + // the termination call and the instance state transitioning to terminated. This can be a pretty big gap, so a time + // based comparison would have limited value. In the worst case, this can result in us overestimating the available + // capacity, but we'd rather overestimate than underestimate. + c.mu.Lock() + defer c.mu.Unlock() + entry, ok := c.cache.Get(reservationID) + if !ok { + return + } + entry.(*availabilityCacheEntry).count += 1 +} + +func (c *availabilityCache) GetAvailableInstanceCount(reservationID string) int { + c.mu.RLock() + defer c.mu.RUnlock() + entry, ok := c.cache.Get(reservationID) + return lo.Ternary(ok, entry.(*availabilityCacheEntry).count, 0) +} + +func (c *availabilityCache) MarkUnavailable(reservationIDs ...string) { + c.mu.Lock() + defer c.mu.Unlock() + for _, id := range reservationIDs { + entry, ok := c.cache.Get(id) + if !ok { + continue + } + entry.(*availabilityCacheEntry).count = 0 + } +} diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 3d66525ecb8e..8a52c8a56ee7 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -43,6 +43,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/cache" awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" "github.com/aws/karpenter-provider-aws/pkg/operator/options" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" @@ -77,23 +78,32 @@ type Provider interface { } type DefaultProvider struct { - region string - ec2api sdk.EC2API - unavailableOfferings *cache.UnavailableOfferings - subnetProvider subnet.Provider - launchTemplateProvider launchtemplate.Provider - ec2Batcher *batcher.EC2API + region string + ec2api sdk.EC2API + unavailableOfferings *cache.UnavailableOfferings + subnetProvider subnet.Provider + launchTemplateProvider launchtemplate.Provider + ec2Batcher *batcher.EC2API + capacityReservationProvider capacityreservation.Provider } -func NewDefaultProvider(ctx context.Context, region string, ec2api sdk.EC2API, unavailableOfferings *cache.UnavailableOfferings, - subnetProvider subnet.Provider, launchTemplateProvider launchtemplate.Provider) *DefaultProvider { +func NewDefaultProvider( + ctx context.Context, + region string, + ec2api sdk.EC2API, + unavailableOfferings *cache.UnavailableOfferings, + subnetProvider subnet.Provider, + launchTemplateProvider launchtemplate.Provider, + capacityReservationProvider capacityreservation.Provider, +) *DefaultProvider { return &DefaultProvider{ - region: region, - ec2api: ec2api, - unavailableOfferings: unavailableOfferings, - subnetProvider: subnetProvider, - launchTemplateProvider: launchTemplateProvider, - ec2Batcher: batcher.EC2(ctx, ec2api), + region: region, + ec2api: ec2api, + unavailableOfferings: unavailableOfferings, + subnetProvider: subnetProvider, + launchTemplateProvider: launchTemplateProvider, + ec2Batcher: batcher.EC2(ctx, ec2api), + capacityReservationProvider: capacityReservationProvider, } } @@ -105,7 +115,9 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass } // We filter out non-reserved instances regardless of the min-values settings, since if the launch is eligible for // reserved instances that's all we'll include in our fleet request. - instanceTypes = p.filterReservedInstanceTypes(nodeClaim, instanceTypes) + if reqs := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...); reqs.Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeReserved) { + instanceTypes = p.filterReservedInstanceTypes(reqs, instanceTypes) + } instanceTypes, err := cloudprovider.InstanceTypes(instanceTypes).Truncate(schedulingRequirements, maxInstanceTypes) if err != nil { return nil, cloudprovider.NewCreateError(fmt.Errorf("truncating instance types, %w", err), "InstanceTypeResolutionFailed", "Error truncating instance types based on the passed-in requirements") @@ -119,8 +131,23 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass if err != nil { return nil, err } - efaEnabled := lo.Contains(lo.Keys(nodeClaim.Spec.Resources.Requests), v1.ResourceEFA) - return NewInstanceFromFleet(fleetInstance, tags, efaEnabled), nil + + capacityType := p.getCapacityType(nodeClaim, instanceTypes) + var capacityReservation string + if capacityType == karpv1.CapacityTypeReserved { + capacityReservation = p.getCapacityReservationForInstance( + string(fleetInstance.InstanceType), + *fleetInstance.LaunchTemplateAndOverrides.Overrides.AvailabilityZone, + instanceTypes, + ) + } + return NewInstanceFromFleet( + fleetInstance, + tags, + capacityType, + capacityReservation, + lo.Contains(lo.Keys(nodeClaim.Spec.Resources.Requests), v1.ResourceEFA), + ), nil } func (p *DefaultProvider) Get(ctx context.Context, id string) (*Instance, error) { @@ -252,7 +279,7 @@ func (p *DefaultProvider) launchInstance(ctx context.Context, nodeClass *v1.EC2N } return ec2types.CreateFleetInstance{}, cloudprovider.NewCreateError(fmt.Errorf("creating fleet request, %w", err), reason, fmt.Sprintf("Error creating fleet request: %s", message)) } - p.updateUnavailableOfferingsCache(ctx, createFleetOutput.Errors, capacityType) + p.updateUnavailableOfferingsCache(ctx, createFleetOutput.Errors, capacityType, instanceTypes) if len(createFleetOutput.Instances) == 0 || len(createFleetOutput.Instances[0].InstanceIds) == 0 { return ec2types.CreateFleetInstance{}, combineFleetErrors(createFleetOutput.Errors) } @@ -265,12 +292,11 @@ func GetCreateFleetInput(nodeClass *v1.EC2NodeClass, capacityType string, tags m Context: nodeClass.Spec.Context, LaunchTemplateConfigs: launchTemplateConfigs, TargetCapacitySpecification: &ec2types.TargetCapacitySpecificationRequest{ - DefaultTargetCapacityType: func() ec2types.DefaultTargetCapacityType { - if capacityType == karpv1.CapacityTypeReserved { - return ec2types.DefaultTargetCapacityType(karpv1.CapacityTypeOnDemand) - } - return ec2types.DefaultTargetCapacityType(capacityType) - }(), + DefaultTargetCapacityType: lo.Ternary( + capacityType == karpv1.CapacityTypeReserved, + ec2types.DefaultTargetCapacityType(karpv1.CapacityTypeOnDemand), + ec2types.DefaultTargetCapacityType(capacityType), + ), TotalTargetCapacity: aws.Int32(1), }, TagSpecifications: []ec2types.TagSpecification{ @@ -319,7 +345,7 @@ func (p *DefaultProvider) getLaunchTemplateConfigs( requirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityType) for _, launchTemplate := range launchTemplates { launchTemplateConfig := ec2types.FleetLaunchTemplateConfigRequest{ - Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, requirements, launchTemplate.ImageID), + Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, requirements, launchTemplate.ImageID, launchTemplate.CapacityReservationID), LaunchTemplateSpecification: &ec2types.FleetLaunchTemplateSpecificationRequest{ LaunchTemplateName: aws.String(launchTemplate.Name), Version: aws.String("$Latest"), @@ -337,7 +363,7 @@ func (p *DefaultProvider) getLaunchTemplateConfigs( // getOverrides creates and returns launch template overrides for the cross product of InstanceTypes and subnets (with subnets being constrained by // zones and the offerings in InstanceTypes) -func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, reqs scheduling.Requirements, image string) []ec2types.FleetLaunchTemplateOverridesRequest { +func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, reqs scheduling.Requirements, image, capacityReservationID string) []ec2types.FleetLaunchTemplateOverridesRequest { // Unwrap all the offerings to a flat slice that includes a pointer // to the parent instance type name type offeringWithParentName struct { @@ -356,6 +382,9 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy } var overrides []ec2types.FleetLaunchTemplateOverridesRequest for _, offering := range unwrappedOfferings { + if capacityReservationID != "" && offering.ReservationID() != capacityReservationID { + continue + } if reqs.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { continue } @@ -375,12 +404,53 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy return overrides } -func (p *DefaultProvider) updateUnavailableOfferingsCache(ctx context.Context, errors []ec2types.CreateFleetError, capacityType string) { - for _, err := range errors { - if awserrors.IsUnfulfillableCapacity(err) { - p.unavailableOfferings.MarkUnavailableForFleetErr(ctx, err, capacityType) +func (p *DefaultProvider) updateUnavailableOfferingsCache( + ctx context.Context, + errs []ec2types.CreateFleetError, + capacityType string, + instanceTypes []*cloudprovider.InstanceType, +) { + if capacityType != karpv1.CapacityTypeReserved { + for _, err := range errs { + if awserrors.IsUnfulfillableCapacity(err) { + p.unavailableOfferings.MarkUnavailableForFleetErr(ctx, err, capacityType) + } + } + return + } + + reservationIDs := make([]string, 0, len(errs)) + for i := range errs { + id := p.getCapacityReservationForInstance( + string(errs[i].LaunchTemplateAndOverrides.Overrides.InstanceType), + lo.FromPtr(errs[i].LaunchTemplateAndOverrides.Overrides.AvailabilityZone), + instanceTypes, + ) + reservationIDs = append(reservationIDs, id) + log.FromContext(ctx).WithValues( + "reason", lo.FromPtr(errs[i].ErrorCode), + "instance-type", errs[i].LaunchTemplateAndOverrides.Overrides.InstanceType, + "zone", lo.FromPtr(errs[i].LaunchTemplateAndOverrides.Overrides.AvailabilityZone), + "capacity-reservation-id", id, + ).V(1).Info("marking capacity reservation unavailable") + } + p.capacityReservationProvider.MarkUnavailable(reservationIDs...) +} + +func (p *DefaultProvider) getCapacityReservationForInstance(instance, zone string, instanceTypes []*cloudprovider.InstanceType) string { + for _, it := range instanceTypes { + if it.Name != instance { + continue + } + for _, o := range it.Offerings { + if o.CapacityType() != karpv1.CapacityTypeReserved || o.Zone() != zone { + continue + } + return o.ReservationID() } } + // note: this is an invariant that the caller must enforce, should not occur at runtime + panic("reservation ID doesn't exist for reserved launch") } // getCapacityType selects the capacity type based on the flexibility of the NodeClaim and the available offerings. @@ -404,19 +474,29 @@ func (p *DefaultProvider) getCapacityType(nodeClaim *karpv1.NodeClaim, instanceT // filterReservedInstanceTypes is used to filter the provided set of instance types to only include those with // available reserved offerings if the nodeclaim is compatible. If there are no available reserved offerings, no // filtering is applied. -func (*DefaultProvider) filterReservedInstanceTypes(nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) []*cloudprovider.InstanceType { - requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) - if !requirements.Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeReserved) { +func (*DefaultProvider) filterReservedInstanceTypes(nodeClaimRequirements scheduling.Requirements, instanceTypes []*cloudprovider.InstanceType) []*cloudprovider.InstanceType { + nodeClaimRequirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeReserved) + var reservedInstanceTypes []*cloudprovider.InstanceType + for _, it := range instanceTypes { + // We only want to include a single offering per pool (instance type / AZ combo). This is due to a limitation in the + // CreateFleet API, which limits calls to specifying a single override per pool. We'll choose to launch into the pool + // with the most capacity. + zonalOfferings := map[string]*cloudprovider.Offering{} + for _, o := range it.Offerings.Available().Compatible(nodeClaimRequirements) { + if current, ok := zonalOfferings[o.Zone()]; !ok || o.ReservationCapacity > current.ReservationCapacity { + zonalOfferings[o.Zone()] = o + } + } + if len(zonalOfferings) == 0 { + continue + } + it.Offerings = lo.Values(zonalOfferings) + reservedInstanceTypes = append(reservedInstanceTypes, it) + } + if len(reservedInstanceTypes) == 0 { return instanceTypes } - // Constrain the NodeClaim's capacity type requirement to reserved before filtering for offering availability. If we - // don't perform this step, it's possible the only reserved instance available could have an incompatible reservation - // ID. - requirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeReserved) - reservedInstances := lo.Filter(instanceTypes, func(it *cloudprovider.InstanceType, _ int) bool { - return len(it.Offerings.Available().Compatible(requirements)) != 0 - }) - return lo.Ternary(len(reservedInstances) != 0, reservedInstances, instanceTypes) + return reservedInstanceTypes } // filterInstanceTypes is used to provide filtering on the list of potential instance types to further limit it to those diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index 58b3ebdecf63..dd29355697fa 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -66,7 +66,7 @@ var _ = BeforeSuite(func() { ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) }) var _ = AfterSuite(func() { diff --git a/pkg/providers/instance/types.go b/pkg/providers/instance/types.go index 62fb8cf11408..f8ff74557255 100644 --- a/pkg/providers/instance/types.go +++ b/pkg/providers/instance/types.go @@ -17,7 +17,6 @@ package instance import ( "time" - "github.com/aws/aws-sdk-go-v2/aws" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/samber/lo" @@ -43,28 +42,21 @@ type Instance struct { func NewInstance(out ec2types.Instance) *Instance { return &Instance{ - LaunchTime: aws.ToTime(out.LaunchTime), + LaunchTime: lo.FromPtr(out.LaunchTime), State: out.State.Name, - ID: aws.ToString(out.InstanceId), - ImageID: aws.ToString(out.ImageId), + ID: lo.FromPtr(out.InstanceId), + ImageID: lo.FromPtr(out.ImageId), Type: out.InstanceType, - Zone: aws.ToString(out.Placement.AvailabilityZone), - CapacityType: func() string { - switch { - case out.SpotInstanceRequestId != nil: - return karpv1.CapacityTypeSpot - case out.CapacityReservationId != nil: - return karpv1.CapacityTypeReserved - default: - return karpv1.CapacityTypeOnDemand - } - }(), + Zone: lo.FromPtr(out.Placement.AvailabilityZone), + CapacityType: lo.If(out.SpotInstanceRequestId != nil, karpv1.CapacityTypeSpot). + ElseIf(out.CapacityReservationId != nil, karpv1.CapacityTypeReserved). + Else(karpv1.CapacityTypeOnDemand), CapacityReservationID: lo.FromPtr(out.CapacityReservationId), SecurityGroupIDs: lo.Map(out.SecurityGroups, func(securitygroup ec2types.GroupIdentifier, _ int) string { - return aws.ToString(securitygroup.GroupId) + return lo.FromPtr(securitygroup.GroupId) }), - SubnetID: aws.ToString(out.SubnetId), - Tags: lo.SliceToMap(out.Tags, func(t ec2types.Tag) (string, string) { return aws.ToString(t.Key), aws.ToString(t.Value) }), + SubnetID: lo.FromPtr(out.SubnetId), + Tags: lo.SliceToMap(out.Tags, func(t ec2types.Tag) (string, string) { return lo.FromPtr(t.Key), lo.FromPtr(t.Value) }), EFAEnabled: lo.ContainsBy(out.NetworkInterfaces, func(item ec2types.InstanceNetworkInterface) bool { return item.InterfaceType != nil && *item.InterfaceType == string(ec2types.NetworkInterfaceTypeEfa) }), @@ -72,17 +64,24 @@ func NewInstance(out ec2types.Instance) *Instance { } -func NewInstanceFromFleet(out ec2types.CreateFleetInstance, tags map[string]string, efaEnabled bool) *Instance { +func NewInstanceFromFleet( + out ec2types.CreateFleetInstance, + tags map[string]string, + capacityType string, + capacityReservationID string, + efaEnabled bool, +) *Instance { return &Instance{ - LaunchTime: time.Now(), // estimate the launch time since we just launched - State: ec2types.InstanceStateNamePending, - ID: out.InstanceIds[0], - ImageID: aws.ToString(out.LaunchTemplateAndOverrides.Overrides.ImageId), - Type: out.InstanceType, - Zone: aws.ToString(out.LaunchTemplateAndOverrides.Overrides.AvailabilityZone), - CapacityType: string(out.Lifecycle), - SubnetID: aws.ToString(out.LaunchTemplateAndOverrides.Overrides.SubnetId), - Tags: tags, - EFAEnabled: efaEnabled, + LaunchTime: time.Now(), // estimate the launch time since we just launched + State: ec2types.InstanceStateNamePending, + ID: out.InstanceIds[0], + ImageID: lo.FromPtr(out.LaunchTemplateAndOverrides.Overrides.ImageId), + Type: out.InstanceType, + Zone: lo.FromPtr(out.LaunchTemplateAndOverrides.Overrides.AvailabilityZone), + CapacityType: capacityType, + CapacityReservationID: capacityReservationID, + SubnetID: lo.FromPtr(out.LaunchTemplateAndOverrides.Overrides.SubnetId), + Tags: tags, + EFAEnabled: efaEnabled, } } diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go index fd91cd4bcd5c..74f3211aeed6 100644 --- a/pkg/providers/instancetype/offering/provider.go +++ b/pkg/providers/instancetype/offering/provider.go @@ -28,6 +28,7 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" awscache "github.com/aws/karpenter-provider-aws/pkg/cache" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" ) @@ -36,8 +37,9 @@ type Provider interface { } type DefaultProvider struct { - unavailableOfferings *awscache.UnavailableOfferings - pricingProvider pricing.Provider + unavailableOfferings *awscache.UnavailableOfferings + pricingProvider pricing.Provider + capacityReservationProvider capacityreservation.Provider } func NewDefaultProvider(unavailableOfferingsCache *awscache.UnavailableOfferings, pricingProvider pricing.Provider) *DefaultProvider { @@ -96,10 +98,10 @@ func (p *DefaultProvider) createOfferings( ) cloudprovider.Offerings { itZones := sets.New(it.Requirements.Get(corev1.LabelTopologyZone).Values()...) - offerings := []*cloudprovider.Offering{} + var offerings []*cloudprovider.Offering for zone := range allZones { for _, capacityType := range it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values() { - // Reserved capacity types are constructed separately, skip them for now. + // Reserved capacity types are constructed separately if capacityType == karpv1.CapacityTypeReserved { continue } @@ -138,7 +140,6 @@ func (p *DefaultProvider) createOfferings( } reservation := &nodeClass.Status.CapacityReservations[i] - isUnavailable := p.unavailableOfferings.IsReservationUnavailable(reservation.ID) _, hasSubnetZone := subnetZones[reservation.AvailabilityZone] price := 0.0 if odPrice, ok := p.pricingProvider.OnDemandPrice(ec2types.InstanceType(it.Name)); ok { @@ -148,6 +149,7 @@ func (p *DefaultProvider) createOfferings( // users to utilize the instances they're already paying for. price = odPrice / 10_000_000.0 } + reservationCapacity := p.capacityReservationProvider.GetAvailableInstanceCount(reservation.ID) offering := &cloudprovider.Offering{ Requirements: scheduling.NewRequirements( scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeReserved), @@ -155,8 +157,8 @@ func (p *DefaultProvider) createOfferings( scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpIn, reservation.ID), ), Price: price, - Available: !isUnavailable && itZones.Has(reservation.AvailabilityZone) && hasSubnetZone, - ReservationCapacity: reservation.AvailableInstanceCount, + Available: reservationCapacity != 0 && itZones.Has(reservation.AvailabilityZone) && hasSubnetZone, + ReservationCapacity: reservationCapacity, } if id, ok := subnetZones[reservation.AvailabilityZone]; ok { offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, id)) diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 77b2343c3cb1..db25bf6b21f6 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -88,7 +88,7 @@ var _ = BeforeSuite(func() { awsEnv = test.NewEnvironment(ctx, env) fakeClock = &clock.FakeClock{} cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster, fakeClock) }) @@ -2079,7 +2079,7 @@ var _ = Describe("InstanceTypeProvider", func() { ExpectNotScheduled(ctx, env.Client, pod) // capacity shortage is over - expire the item from the cache and try again awsEnv.EC2API.InsufficientCapacityPools.Set([]fake.CapacityPool{}) - awsEnv.UnavailableOfferingsCache.Delete("inf2.24xlarge", "test-zone-1a", karpv1.CapacityTypeOnDemand) + awsEnv.UnavailableOfferingsCache.DeleteOffering("inf2.24xlarge", "test-zone-1a", karpv1.CapacityTypeOnDemand) ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) node := ExpectScheduled(ctx, env.Client, pod) Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "inf2.24xlarge")) diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 49c8a72dc769..d07c37b09bb3 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -60,9 +60,10 @@ type Provider interface { ResolveClusterCIDR(context.Context) error } type LaunchTemplate struct { - Name string - InstanceTypes []*cloudprovider.InstanceType - ImageID string + Name string + InstanceTypes []*cloudprovider.InstanceType + ImageID string + CapacityReservationID string } type DefaultProvider struct { @@ -134,7 +135,12 @@ func (p *DefaultProvider) EnsureAll( if err != nil { return nil, err } - launchTemplates = append(launchTemplates, &LaunchTemplate{Name: *ec2LaunchTemplate.LaunchTemplateName, InstanceTypes: resolvedLaunchTemplate.InstanceTypes, ImageID: resolvedLaunchTemplate.AMIID}) + launchTemplates = append(launchTemplates, &LaunchTemplate{ + Name: *ec2LaunchTemplate.LaunchTemplateName, + InstanceTypes: resolvedLaunchTemplate.InstanceTypes, + ImageID: resolvedLaunchTemplate.AMIID, + CapacityReservationID: resolvedLaunchTemplate.CapacityReservationID, + }) } return launchTemplates, nil } diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 596a2146eae8..cc08e153970f 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -99,7 +99,7 @@ var _ = BeforeSuite(func() { fakeClock = &clock.FakeClock{} recorder = events.NewRecorder(&record.FakeRecorder{}) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder, - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster, fakeClock) }) diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 357af1245341..b8f91f397b31 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -66,18 +66,19 @@ type Environment struct { PricingAPI *fake.PricingAPI // Cache - EC2Cache *cache.Cache - InstanceTypeCache *cache.Cache - UnavailableOfferingsCache *awscache.UnavailableOfferings - LaunchTemplateCache *cache.Cache - SubnetCache *cache.Cache - AvailableIPAdressCache *cache.Cache - AssociatePublicIPAddressCache *cache.Cache - SecurityGroupCache *cache.Cache - InstanceProfileCache *cache.Cache - SSMCache *cache.Cache - DiscoveredCapacityCache *cache.Cache - CapacityReservationCache *cache.Cache + EC2Cache *cache.Cache + InstanceTypeCache *cache.Cache + UnavailableOfferingsCache *awscache.UnavailableOfferings + LaunchTemplateCache *cache.Cache + SubnetCache *cache.Cache + AvailableIPAdressCache *cache.Cache + AssociatePublicIPAddressCache *cache.Cache + SecurityGroupCache *cache.Cache + InstanceProfileCache *cache.Cache + SSMCache *cache.Cache + DiscoveredCapacityCache *cache.Cache + CapacityReservationCache *cache.Cache + CapacityReservationAvailabilityCache *cache.Cache // Providers CapacityReservationProvider *capacityreservation.DefaultProvider @@ -117,6 +118,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment instanceProfileCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) ssmCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) capacityReservationCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) + capacityReservationAvailabilityCache := cache.New(24*time.Hour, awscache.DefaultCleanupInterval) fakePricingAPI := &fake.PricingAPI{} // Providers @@ -147,6 +149,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment net.ParseIP("10.0.100.10"), "https://test-cluster", ) + capacityReservationProvider := capacityreservation.NewProvider(ec2api, clock, capacityReservationCache, capacityReservationAvailabilityCache) instanceProvider := instance.NewDefaultProvider( ctx, "", @@ -154,8 +157,8 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment unavailableOfferingsCache, subnetProvider, launchTemplateProvider, + capacityReservationProvider, ) - capacityReservationProvider := capacityreservation.NewProvider(ec2api, clock, capacityReservationCache) return &Environment{ Clock: clock, @@ -166,18 +169,19 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment IAMAPI: iamapi, PricingAPI: fakePricingAPI, - EC2Cache: ec2Cache, - InstanceTypeCache: instanceTypeCache, - LaunchTemplateCache: launchTemplateCache, - SubnetCache: subnetCache, - AvailableIPAdressCache: availableIPAdressCache, - AssociatePublicIPAddressCache: associatePublicIPAddressCache, - SecurityGroupCache: securityGroupCache, - InstanceProfileCache: instanceProfileCache, - UnavailableOfferingsCache: unavailableOfferingsCache, - SSMCache: ssmCache, - DiscoveredCapacityCache: discoveredCapacityCache, - CapacityReservationCache: capacityReservationCache, + EC2Cache: ec2Cache, + InstanceTypeCache: instanceTypeCache, + LaunchTemplateCache: launchTemplateCache, + SubnetCache: subnetCache, + AvailableIPAdressCache: availableIPAdressCache, + AssociatePublicIPAddressCache: associatePublicIPAddressCache, + SecurityGroupCache: securityGroupCache, + InstanceProfileCache: instanceProfileCache, + UnavailableOfferingsCache: unavailableOfferingsCache, + SSMCache: ssmCache, + DiscoveredCapacityCache: discoveredCapacityCache, + CapacityReservationCache: capacityReservationCache, + CapacityReservationAvailabilityCache: capacityReservationAvailabilityCache, CapacityReservationProvider: capacityReservationProvider, InstanceTypesResolver: instanceTypesResolver, From 112b9b38c112ee7d5a837649e8fac632fe574196 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 21 Feb 2025 06:43:31 -0800 Subject: [PATCH 03/16] checkpoint review feedback --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 8 --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 8 --- pkg/apis/v1/ec2nodeclass_hash_test.go | 2 +- pkg/apis/v1/ec2nodeclass_status.go | 6 -- .../v1/ec2nodeclass_validation_cel_test.go | 18 +++--- pkg/cache/unavailableofferings.go | 15 +++-- pkg/controllers/interruption/controller.go | 3 +- .../nodeclass/capacityreservation.go | 10 ++- pkg/controllers/nodeclass/validation.go | 2 +- pkg/providers/amifamily/resolver.go | 23 +------ pkg/providers/capacityreservation/provider.go | 39 +++++++----- pkg/providers/capacityreservation/types.go | 63 ++++++++----------- pkg/providers/instance/instance.go | 3 + pkg/providers/instancetype/instancetype.go | 2 +- .../instancetype/offering/provider.go | 2 +- pkg/providers/instancetype/suite_test.go | 2 +- .../launchtemplate/launchtemplate.go | 46 +++++++++----- 17 files changed, 112 insertions(+), 140 deletions(-) diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index 6e179257a4d9..b1f48ae85d79 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -679,9 +679,6 @@ spec: availabilityZone: description: The availability zone the capacity reservation is available in. type: string - availableInstanceCount: - description: The last known available instance count for the capacity reservation. - type: integer endTime: description: |- The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter @@ -705,17 +702,12 @@ spec: description: The ID of the AWS account that owns the capacity reservation. pattern: ^[0-9]{12}$ type: string - totalInstanceCount: - description: The total instance count for the capacity reservation. - type: integer required: - availabilityZone - - availableInstanceCount - id - instanceMatchCriteria - instanceType - ownerID - - totalInstanceCount type: object type: array conditions: diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 152dd1ac92e9..bd817cc477ec 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -676,9 +676,6 @@ spec: availabilityZone: description: The availability zone the capacity reservation is available in. type: string - availableInstanceCount: - description: The last known available instance count for the capacity reservation. - type: integer endTime: description: |- The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter @@ -702,17 +699,12 @@ spec: description: The ID of the AWS account that owns the capacity reservation. pattern: ^[0-9]{12}$ type: string - totalInstanceCount: - description: The total instance count for the capacity reservation. - type: integer required: - availabilityZone - - availableInstanceCount - id - instanceMatchCriteria - instanceType - ownerID - - totalInstanceCount type: object type: array conditions: diff --git a/pkg/apis/v1/ec2nodeclass_hash_test.go b/pkg/apis/v1/ec2nodeclass_hash_test.go index a523cfd8189b..87ad4de0b5af 100644 --- a/pkg/apis/v1/ec2nodeclass_hash_test.go +++ b/pkg/apis/v1/ec2nodeclass_hash_test.go @@ -193,7 +193,7 @@ var _ = Describe("Hash", func() { nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ Tags: map[string]string{"ami-test-key": "ami-test-value"}, }} - nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ Tags: map[string]string{"cr-test-key": "cr-test-value"}, }} updatedHash := nodeClass.Hash() diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index 28308900e2bc..b89e5e9b6bcb 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -72,9 +72,6 @@ type CapacityReservation struct { // The availability zone the capacity reservation is available in. // +required AvailabilityZone string `json:"availabilityZone"` - // The last known available instance count for the capacity reservation. - // +required - AvailableInstanceCount int `json:"availableInstanceCount,omitempty" hash:"ignore"` // The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter // will no longer be able to launch instances into that reservation. // +optional @@ -94,9 +91,6 @@ type CapacityReservation struct { // +kubebuilder:validation:Pattern:="^[0-9]{12}$" // +required OwnerID string `json:"ownerID"` - // The total instance count for the capacity reservation. - // +required - TotalInstanceCount int `json:"totalInstanceCount" hash:"ignore"` } // EC2NodeClassStatus contains the resolved state of the EC2NodeClass diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index 898ba9dc7ad1..bbf844aea15c 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -450,6 +450,15 @@ var _ = Describe("CEL/Validation", func() { }} Expect(env.Client.Create(ctx, nc)).To(Succeed()) }) + It("should succeed for a valid ownerID", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "012345678901", + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) It("should fail with a capacity reservation selector on a malformed id", func() { nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ ID: "r-12345749", @@ -520,15 +529,6 @@ var _ = Describe("CEL/Validation", func() { }} Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) - It("should succeed for a valid ownerID", func() { - nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ - OwnerID: "012345678901", - Tags: map[string]string{ - "test": "testvalue", - }, - }} - Expect(env.Client.Create(ctx, nc)).To(Succeed()) - }) It("should fail when the ownerID is malformed", func() { nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ OwnerID: "01234567890", // OwnerID must be 12 digits, this is 11 diff --git a/pkg/cache/unavailableofferings.go b/pkg/cache/unavailableofferings.go index e9b10154de35..fc78412dfd75 100644 --- a/pkg/cache/unavailableofferings.go +++ b/pkg/cache/unavailableofferings.go @@ -48,21 +48,20 @@ func NewUnavailableOfferings() *UnavailableOfferings { } // IsUnavailable returns true if the offering appears in the cache -func (u *UnavailableOfferings) IsUnavailable(instanceType string, zone, capacityType string) bool { +func (u *UnavailableOfferings) IsUnavailable(instanceType ec2types.InstanceType, zone, capacityType string) bool { _, found := u.cache.Get(u.key(instanceType, zone, capacityType)) return found } // MarkUnavailable communicates recently observed temporary capacity shortages in the provided offerings -func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason, instanceType, zone, capacityType string) { +func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason string, instanceType ec2types.InstanceType, zone, capacityType string) { // even if the key is already in the cache, we still need to call Set to extend the cached entry's TTL log.FromContext(ctx).WithValues( "reason", unavailableReason, "instance-type", instanceType, "zone", zone, "capacity-type", capacityType, - "ttl", UnavailableOfferingsTTL, - ).V(1).Info("removing offering from offerings") + "ttl", UnavailableOfferingsTTL).V(1).Info("removing offering from offerings") u.cache.SetDefault(u.key(instanceType, zone, capacityType), struct{}{}) atomic.AddUint64(&u.SeqNum, 1) } @@ -70,10 +69,10 @@ func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableR func (u *UnavailableOfferings) MarkUnavailableForFleetErr(ctx context.Context, fleetErr ec2types.CreateFleetError, capacityType string) { instanceType := fleetErr.LaunchTemplateAndOverrides.Overrides.InstanceType zone := aws.ToString(fleetErr.LaunchTemplateAndOverrides.Overrides.AvailabilityZone) - u.MarkUnavailable(ctx, lo.FromPtr(fleetErr.ErrorCode), string(instanceType), zone, capacityType) + u.MarkUnavailable(ctx, lo.FromPtr(fleetErr.ErrorCode), instanceType, zone, capacityType) } -func (u *UnavailableOfferings) DeleteOffering(instanceType, zone, capacityType string) { +func (u *UnavailableOfferings) Delete(instanceType ec2types.InstanceType, zone string, capacityType string) { u.cache.Delete(u.key(instanceType, zone, capacityType)) } @@ -82,6 +81,6 @@ func (u *UnavailableOfferings) Flush() { } // key returns the cache key for all offerings in the cache -func (*UnavailableOfferings) key(instanceType, zone, capacityType string) string { - return fmt.Sprintf("o:%s:%s:%s", capacityType, instanceType, zone) +func (u *UnavailableOfferings) key(instanceType ec2types.InstanceType, zone string, capacityType string) string { + return fmt.Sprintf("%s:%s:%s", capacityType, instanceType, zone) } diff --git a/pkg/controllers/interruption/controller.go b/pkg/controllers/interruption/controller.go index b5205ab57232..51899c9084e0 100644 --- a/pkg/controllers/interruption/controller.go +++ b/pkg/controllers/interruption/controller.go @@ -22,6 +22,7 @@ import ( "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/metrics" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" sqstypes "github.com/aws/aws-sdk-go-v2/service/sqs/types" "github.com/awslabs/operatorpkg/singleton" "go.uber.org/multierr" @@ -207,7 +208,7 @@ func (c *Controller) handleNodeClaim(ctx context.Context, msg messages.Message, zone := nodeClaim.Labels[corev1.LabelTopologyZone] instanceType := nodeClaim.Labels[corev1.LabelInstanceTypeStable] if zone != "" && instanceType != "" { - c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), instanceType, zone, karpv1.CapacityTypeSpot) + c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), ec2types.InstanceType(instanceType), zone, karpv1.CapacityTypeSpot) } } if action != NoAction { diff --git a/pkg/controllers/nodeclass/capacityreservation.go b/pkg/controllers/nodeclass/capacityreservation.go index dbcad52186aa..0d862ba31d66 100644 --- a/pkg/controllers/nodeclass/capacityreservation.go +++ b/pkg/controllers/nodeclass/capacityreservation.go @@ -56,6 +56,7 @@ func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass return reconcile.Result{}, fmt.Errorf("getting capacity reservations, %w", err) } if len(reservations) == 0 { + nc.Status.CapacityReservations = nil nc.StatusConditions().SetTrue(v1.ConditionTypeCapacityReservationsReady) return reconcile.Result{RequeueAfter: capacityReservationPollPeriod}, nil } @@ -86,6 +87,8 @@ func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass } func capacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityReservation, error) { + // Guard against new instance match criteria added in the future. See https://github.com/kubernetes-sigs/karpenter/issues/806 + // for a similar issue. if !lo.Contains([]ec2types.InstanceMatchCriteria{ ec2types.InstanceMatchCriteriaOpen, ec2types.InstanceMatchCriteriaTargeted, @@ -98,17 +101,18 @@ func capacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityRe } return v1.CapacityReservation{ - AvailabilityZone: *cr.AvailabilityZone, - // AvailableInstanceCount: int(*cr.AvailableInstanceCount), + AvailabilityZone: *cr.AvailabilityZone, EndTime: endTime, ID: *cr.CapacityReservationId, InstanceMatchCriteria: string(cr.InstanceMatchCriteria), InstanceType: *cr.InstanceType, OwnerID: *cr.OwnerId, - TotalInstanceCount: int(*cr.TotalInstanceCount), }, nil } +// requeueAfter determines the duration until the next target reconciliation time based on the provided reservations. If +// any reservations are expected to expire before we would typically requeue, the duration will be based on the +// nearest expiration time. func (c *CapacityReservation) requeueAfter(reservations ...*ec2types.CapacityReservation) time.Duration { var next *time.Time for _, reservation := range reservations { diff --git a/pkg/controllers/nodeclass/validation.go b/pkg/controllers/nodeclass/validation.go index 8131b66047dc..3621e2044b89 100644 --- a/pkg/controllers/nodeclass/validation.go +++ b/pkg/controllers/nodeclass/validation.go @@ -91,7 +91,7 @@ func (n Validation) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) ( return reconcile.Result{}, nil } - createLaunchTemplateInput := launchtemplate.GetCreateLaunchTemplateInput(mockOptions(*nodeClaim, nodeClass, tags), corev1.IPv4Protocol, "") + createLaunchTemplateInput := launchtemplate.GetCreateLaunchTemplateInput(ctx, mockOptions(*nodeClaim, nodeClass, tags), corev1.IPv4Protocol, "") createLaunchTemplateInput.DryRun = aws.Bool(true) if _, err := n.ec2api.CreateLaunchTemplate(ctx, createLaunchTemplateInput); awserrors.IgnoreDryRunError(err) != nil { diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index e8a9e213558d..43ce2afd0ec1 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -159,7 +159,9 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N // ordering in this string. reservationIDs: lo.Ternary( capacityType == karpv1.CapacityTypeReserved, - strings.Join(selectReservationIDs(it, nodeClaim), ","), + strings.Join(lo.FilterMap(it.Offerings, func(o *cloudprovider.Offering, _ int) (string, bool) { + return o.ReservationID(), o.CapacityType() == karpv1.CapacityTypeReserved + }), ","), "", ), } @@ -173,25 +175,6 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N return resolvedTemplates, nil } -// selectReservationIDs filters the set of reservation IDs available on the given instance type to only include those -// that are compatible with the given NodeClaim. Additionally, if there are multiple reservations available in the same -// zone, only the reservation with the greatest availability is selected. This is to address a limitation in the -// CreateFleet interface, where you can only provide one override for a given instance-zone combination. -func selectReservationIDs(it *cloudprovider.InstanceType, nodeClaim *karpv1.NodeClaim) []string { - zonalOfferings := map[string]*cloudprovider.Offering{} - for _, o := range it.Offerings.Available().Compatible(scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...)) { - if o.CapacityType() != karpv1.CapacityTypeReserved { - continue - } - if current, ok := zonalOfferings[o.Zone()]; !ok || current.ReservationCapacity < o.ReservationCapacity { - zonalOfferings[o.Zone()] = o - } - } - return lo.Map(lo.Values(zonalOfferings), func(o *cloudprovider.Offering, _ int) string { - return o.ReservationID() - }) -} - func GetAMIFamily(amiFamily string, options *Options) AMIFamily { switch amiFamily { case v1.AMIFamilyBottlerocket: diff --git a/pkg/providers/capacityreservation/provider.go b/pkg/providers/capacityreservation/provider.go index 2a136debaaa3..e16f5f867b8a 100644 --- a/pkg/providers/capacityreservation/provider.go +++ b/pkg/providers/capacityreservation/provider.go @@ -46,7 +46,11 @@ type DefaultProvider struct { cm *pretty.ChangeMonitor } -func NewProvider(ec2api sdk.EC2API, clk clock.Clock, reservationCache, reservationAvailabilityCache *cache.Cache) *DefaultProvider { +func NewProvider( + ec2api sdk.EC2API, + clk clock.Clock, + reservationCache, reservationAvailabilityCache *cache.Cache, +) *DefaultProvider { return &DefaultProvider{ availabilityCache: availabilityCache{ cache: reservationAvailabilityCache, @@ -60,40 +64,41 @@ func NewProvider(ec2api sdk.EC2API, clk clock.Clock, reservationCache, reservati } func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) { - queries := QueriesFromSelectorTerms(selectorTerms...) - var reservations []*ec2types.CapacityReservation - var remainingQueries []*Query - for _, query := range queries { - if value, ok := p.reservationCache.Get(query.CacheKey()); ok { - reservations = append(reservations, value.([]*ec2types.CapacityReservation)...) - } else { - remainingQueries = append(remainingQueries, query) - } - } - if len(remainingQueries) == 0 { + queries := QueriesFromSelectorTerms(selectorTerms...) + reservations, queries = p.resolveCachedQueries(queries...) + if len(queries) == 0 { return p.filterReservations(reservations), nil } - - for _, query := range remainingQueries { - paginator := ec2.NewDescribeCapacityReservationsPaginator(p.ec2api, query.DescribeCapacityReservationsInput()) + for _, q := range queries { + paginator := ec2.NewDescribeCapacityReservationsPaginator(p.ec2api, q.DescribeCapacityReservationsInput()) for paginator.HasMorePages() { out, err := paginator.NextPage(ctx) if err != nil { return nil, fmt.Errorf("listing capacity reservations, %w", err) } queryReservations := lo.ToSlicePtr(out.CapacityReservations) - p.reservationCache.SetDefault(query.CacheKey(), queryReservations) + p.reservationCache.SetDefault(q.CacheKey(), queryReservations) reservations = append(reservations, queryReservations...) p.syncAvailability(lo.SliceToMap(queryReservations, func(r *ec2types.CapacityReservation) (string, int) { return *r.CapacityReservationId, int(*r.AvailableInstanceCount) })) } } - return p.filterReservations(reservations), nil } +func (p *DefaultProvider) resolveCachedQueries(queries ...*Query) (reservations []*ec2types.CapacityReservation, remainingQueries []*Query) { + for _, q := range queries { + if value, ok := p.reservationCache.Get(q.CacheKey()); ok { + reservations = append(reservations, value.([]*ec2types.CapacityReservation)...) + } else { + remainingQueries = append(remainingQueries, q) + } + } + return reservations, remainingQueries +} + // filterReservations removes duplicate and expired reservations func (p *DefaultProvider) filterReservations(reservations []*ec2types.CapacityReservation) []*ec2types.CapacityReservation { return lo.Filter(lo.UniqBy(reservations, func(r *ec2types.CapacityReservation) string { diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go index bd970b0c33b4..d5ec2c0c5461 100644 --- a/pkg/providers/capacityreservation/types.go +++ b/pkg/providers/capacityreservation/types.go @@ -60,52 +60,39 @@ func (q *Query) CacheKey() string { } func (q *Query) DescribeCapacityReservationsInput() *ec2.DescribeCapacityReservationsInput { + filters := []ec2types.Filter{{ + Name: lo.ToPtr("state"), + Values: []string{string(ec2types.CapacityReservationStateActive)}, + }} if len(q.ids) != 0 { return &ec2.DescribeCapacityReservationsInput{ - Filters: []ec2types.Filter{lo.Must(q.stateFilter())[0]}, + Filters: filters, CapacityReservationIds: q.ids, } } - type filterProvider func() ([]ec2types.Filter, bool) - return &ec2.DescribeCapacityReservationsInput{ - Filters: lo.Flatten(lo.FilterMap([]filterProvider{ - q.stateFilter, - q.ownerIDFilter, - q.tagsFilter, - }, func(f filterProvider, _ int) ([]ec2types.Filter, bool) { - return f() - })), + if q.ownerID != "" { + filters = append(filters, ec2types.Filter{ + Name: lo.ToPtr("owner-id"), + Values: []string{q.ownerID}, + }) } -} - -func (q *Query) stateFilter() ([]ec2types.Filter, bool) { - return []ec2types.Filter{{ - Name: lo.ToPtr("state"), - Values: []string{string(ec2types.CapacityReservationStateActive)}, - }}, true -} - -func (q *Query) ownerIDFilter() ([]ec2types.Filter, bool) { - return []ec2types.Filter{{ - Name: lo.ToPtr("owner-id"), - Values: []string{q.ownerID}, - }}, q.ownerID != "" -} - -func (q *Query) tagsFilter() ([]ec2types.Filter, bool) { - return lo.MapToSlice(q.tags, func(k, v string) ec2types.Filter { - if v == "*" { + if len(q.tags) != 0 { + filters = append(filters, lo.MapToSlice(q.tags, func(k, v string) ec2types.Filter { + if v == "*" { + return ec2types.Filter{ + Name: lo.ToPtr("tag-key"), + Values: []string{k}, + } + } return ec2types.Filter{ - Name: lo.ToPtr("tag-key"), - Values: []string{k}, + Name: lo.ToPtr(fmt.Sprintf("tag:%s", k)), + Values: []string{v}, } - } - return ec2types.Filter{ - Name: lo.ToPtr(fmt.Sprintf("tag:%s", k)), - Values: []string{v}, - } - }), len(q.tags) != 0 - + })...) + } + return &ec2.DescribeCapacityReservationsInput{ + Filters: filters, + } } type availabilityCache struct { diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 8a52c8a56ee7..df81a676a29e 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -117,6 +117,9 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass // reserved instances that's all we'll include in our fleet request. if reqs := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...); reqs.Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeReserved) { instanceTypes = p.filterReservedInstanceTypes(reqs, instanceTypes) + if _, err := cloudprovider.InstanceTypes(instanceTypes).SatisfiesMinValues(schedulingRequirements); err != nil { + return nil, cloudprovider.NewCreateError(fmt.Errorf("failed to construct CreateFleet request while respecting minValues requirements"), "CreateFleetRequestConstructionFailed", "Failed to construct CreateFleet request while respecting minValues") + } } instanceTypes, err := cloudprovider.InstanceTypes(instanceTypes).Truncate(schedulingRequirements, maxInstanceTypes) if err != nil { diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 46045ff0191e..2ab95c5714b2 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -183,7 +183,7 @@ func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { p.muInstanceTypesInfo.Lock() defer p.muInstanceTypesInfo.Unlock() - instanceTypes := []ec2types.InstanceTypeInfo{} + var instanceTypes []ec2types.InstanceTypeInfo paginator := ec2.NewDescribeInstanceTypesPaginator(p.ec2api, &ec2.DescribeInstanceTypesInput{ Filters: []ec2types.Filter{ { diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go index 74f3211aeed6..ae5e667f145d 100644 --- a/pkg/providers/instancetype/offering/provider.go +++ b/pkg/providers/instancetype/offering/provider.go @@ -106,7 +106,7 @@ func (p *DefaultProvider) createOfferings( continue } - isUnavailable := p.unavailableOfferings.IsUnavailable(it.Name, zone, capacityType) + isUnavailable := p.unavailableOfferings.IsUnavailable(ec2types.InstanceType(it.Name), zone, capacityType) _, hasSubnetZone := subnetZones[zone] var price float64 var hasPrice bool diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index db25bf6b21f6..eb162c9c4cd2 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -2079,7 +2079,7 @@ var _ = Describe("InstanceTypeProvider", func() { ExpectNotScheduled(ctx, env.Client, pod) // capacity shortage is over - expire the item from the cache and try again awsEnv.EC2API.InsufficientCapacityPools.Set([]fake.CapacityPool{}) - awsEnv.UnavailableOfferingsCache.DeleteOffering("inf2.24xlarge", "test-zone-1a", karpv1.CapacityTypeOnDemand) + awsEnv.UnavailableOfferingsCache.Delete("inf2.24xlarge", "test-zone-1a", karpv1.CapacityTypeOnDemand) ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) node := ExpectScheduled(ctx, env.Client, pod) Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "inf2.24xlarge")) diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index d07c37b09bb3..103173b7c72a 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -38,6 +38,8 @@ import ( "k8s.io/apimachinery/pkg/api/resource" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + karpoptions "sigs.k8s.io/karpenter/pkg/operator/options" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" "github.com/aws/karpenter-provider-aws/pkg/operator/options" @@ -231,7 +233,7 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami if err != nil { return ec2types.LaunchTemplate{}, err } - createLaunchTemplateInput := GetCreateLaunchTemplateInput(options, p.ClusterIPFamily, userData) + createLaunchTemplateInput := GetCreateLaunchTemplateInput(ctx, options, p.ClusterIPFamily, userData) output, err := p.ec2api.CreateLaunchTemplate(ctx, createLaunchTemplateInput) if err != nil { return ec2types.LaunchTemplate{}, err @@ -241,7 +243,12 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami } // you need UserData, AmiID, tags, blockdevicemappings, instance profile, -func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFamily corev1.IPFamily, userData string) *ec2.CreateLaunchTemplateInput { +func GetCreateLaunchTemplateInput( + ctx context.Context, + options *amifamily.LaunchTemplate, + ClusterIPFamily corev1.IPFamily, + userData string, +) *ec2.CreateLaunchTemplateInput { launchTemplateDataTags := []ec2types.LaunchTemplateTagSpecificationRequest{ {ResourceType: ec2types.ResourceTypeNetworkInterface, Tags: utils.MergeTags(options.Tags)}, } @@ -249,24 +256,10 @@ func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFa launchTemplateDataTags = append(launchTemplateDataTags, ec2types.LaunchTemplateTagSpecificationRequest{ResourceType: ec2types.ResourceTypeSpotInstancesRequest, Tags: utils.MergeTags(options.Tags)}) } networkInterfaces := generateNetworkInterfaces(options, ClusterIPFamily) - return &ec2.CreateLaunchTemplateInput{ + lt := &ec2.CreateLaunchTemplateInput{ LaunchTemplateName: aws.String(LaunchTemplateName(options)), LaunchTemplateData: &ec2types.RequestLaunchTemplateData{ BlockDeviceMappings: blockDeviceMappings(options.BlockDeviceMappings), - CapacityReservationSpecification: &ec2types.LaunchTemplateCapacityReservationSpecificationRequest{ - CapacityReservationPreference: lo.Ternary( - options.CapacityType == karpv1.CapacityTypeReserved, - ec2types.CapacityReservationPreferenceCapacityReservationsOnly, - ec2types.CapacityReservationPreferenceNone, - ), - CapacityReservationTarget: lo.Ternary( - options.CapacityType == karpv1.CapacityTypeReserved, - &ec2types.CapacityReservationTarget{ - CapacityReservationId: &options.CapacityReservationID, - }, - nil, - ), - }, IamInstanceProfile: &ec2types.LaunchTemplateIamInstanceProfileSpecificationRequest{ Name: aws.String(options.InstanceProfile), }, @@ -301,6 +294,25 @@ func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFa }, }, } + // Gate this specifically since the update to CapacityReservationPreference will opt od / spot launches out of open + // ODCRs, which is a breaking change from the pre-native ODCR support behavior. + if karpoptions.FromContext(ctx).FeatureGates.ReservedCapacity { + lt.LaunchTemplateData.CapacityReservationSpecification = &ec2types.LaunchTemplateCapacityReservationSpecificationRequest{ + CapacityReservationPreference: lo.Ternary( + options.CapacityType == karpv1.CapacityTypeReserved, + ec2types.CapacityReservationPreferenceCapacityReservationsOnly, + ec2types.CapacityReservationPreferenceNone, + ), + CapacityReservationTarget: lo.Ternary( + options.CapacityType == karpv1.CapacityTypeReserved, + &ec2types.CapacityReservationTarget{ + CapacityReservationId: &options.CapacityReservationID, + }, + nil, + ), + } + } + return lt } // generateNetworkInterfaces generates network interfaces for the launch template. From bcdee30ede6d637a989ab24f21be4acfc8e308af Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Sun, 23 Feb 2025 01:08:34 -0800 Subject: [PATCH 04/16] checkpointing --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 3 + cmd/controller/main.go | 6 + hack/docs/instancetypes_gen/main.go | 8 +- hack/tools/launchtemplate_counter/main.go | 8 +- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 3 + pkg/apis/v1/ec2nodeclass_status.go | 18 ++- .../v1/ec2nodeclass_validation_cel_test.go | 13 ++ pkg/cache/cache.go | 8 +- pkg/cloudprovider/cloudprovider.go | 6 +- pkg/cloudprovider/suite_test.go | 6 +- pkg/controllers/controllers.go | 4 +- .../capacityreservation/controller.go | 151 ++++++++++++++++++ .../nodeclass/capacityreservation.go | 5 +- pkg/controllers/nodeclass/controller.go | 59 +++---- pkg/controllers/nodeclass/instanceprofile.go | 7 - pkg/controllers/nodeclass/readiness_test.go | 2 +- pkg/controllers/nodeclass/suite_test.go | 1 + pkg/errors/errors.go | 7 +- pkg/operator/operator.go | 12 +- pkg/providers/amifamily/resolver.go | 8 +- pkg/providers/capacityreservation/provider.go | 19 ++- pkg/providers/instance/instance.go | 62 ++++--- pkg/providers/instancetype/instancetype.go | 12 +- .../instancetype/offering/provider.go | 116 +++++++++----- pkg/providers/instancetype/suite_test.go | 56 ++++--- pkg/providers/launchtemplate/suite_test.go | 2 +- pkg/test/environment.go | 13 +- 27 files changed, 453 insertions(+), 162 deletions(-) create mode 100644 pkg/controllers/nodeclaim/capacityreservation/controller.go diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index b1f48ae85d79..32c21066e067 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -674,6 +674,9 @@ spec: type: object type: array capacityReservations: + description: |- + CapacityReservations contains the current capacity reservation values that are available to this NodeClass under the + CapacityReservation selectors. items: properties: availabilityZone: diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 7761bdfebed1..b75b56f35b90 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -15,6 +15,7 @@ limitations under the License. package main import ( + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" "github.com/aws/karpenter-provider-aws/pkg/controllers" "github.com/aws/karpenter-provider-aws/pkg/operator" @@ -23,6 +24,7 @@ import ( corecontrollers "sigs.k8s.io/karpenter/pkg/controllers" "sigs.k8s.io/karpenter/pkg/controllers/state" coreoperator "sigs.k8s.io/karpenter/pkg/operator" + karpoptions "sigs.k8s.io/karpenter/pkg/operator/options" ) func main() { @@ -40,6 +42,10 @@ func main() { cloudProvider := metrics.Decorate(awsCloudProvider) clusterState := state.NewCluster(op.Clock, op.GetClient(), cloudProvider) + if karpoptions.FromContext(ctx).FeatureGates.ReservedCapacity { + v1.CapacityReservationsEnabled = true + } + op. WithControllers(ctx, corecontrollers.NewControllers( ctx, diff --git a/hack/docs/instancetypes_gen/main.go b/hack/docs/instancetypes_gen/main.go index ae79769ee64f..fdaa4212f90b 100644 --- a/hack/docs/instancetypes_gen/main.go +++ b/hack/docs/instancetypes_gen/main.go @@ -89,7 +89,9 @@ func main() { lo.Must0(os.Setenv("SYSTEM_NAMESPACE", "karpenter")) lo.Must0(os.Setenv("AWS_SDK_LOAD_CONFIG", "true")) - ctx := coreoptions.ToContext(context.Background(), coretest.Options()) + ctx := coreoptions.ToContext(context.Background(), coretest.Options(coretest.OptionsFields{ + FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(false)}, + })) ctx = options.ToContext(ctx, test.Options(test.OptionsFields{ ClusterName: lo.ToPtr("docs-gen"), ClusterEndpoint: lo.ToPtr("https://docs-gen.aws"), @@ -130,7 +132,8 @@ below are the resources available with some assumptions and after the instance o ec2api := ec2.NewFromConfig(cfg) subnetProvider := subnet.NewDefaultProvider(ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval), cache.New(awscache.AvailableIPAddressTTL, awscache.DefaultCleanupInterval), cache.New(awscache.AssociatePublicIPAddressTTL, awscache.DefaultCleanupInterval)) instanceTypeProvider := instancetype.NewDefaultProvider( - cache.New(awscache.InstanceTypesAndZonesTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, @@ -140,6 +143,7 @@ below are the resources available with some assumptions and after the instance o ec2api, cfg.Region, ), + nil, awscache.NewUnavailableOfferings(), instancetype.NewDefaultResolver( region, diff --git a/hack/tools/launchtemplate_counter/main.go b/hack/tools/launchtemplate_counter/main.go index ef56f53e08ca..1ac6bf229278 100644 --- a/hack/tools/launchtemplate_counter/main.go +++ b/hack/tools/launchtemplate_counter/main.go @@ -45,7 +45,9 @@ import ( func main() { lo.Must0(os.Setenv("AWS_SDK_LOAD_CONFIG", "true")) - ctx := coreoptions.ToContext(context.Background(), coretest.Options()) + ctx := coreoptions.ToContext(context.Background(), coretest.Options(coretest.OptionsFields{ + FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(false)}, + })) ctx = options.ToContext(ctx, test.Options(test.OptionsFields{ ClusterName: lo.ToPtr("docs-gen"), ClusterEndpoint: lo.ToPtr("https://docs-gen.aws"), @@ -57,7 +59,8 @@ func main() { ec2api := ec2.NewFromConfig(cfg) subnetProvider := subnet.NewDefaultProvider(ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval), cache.New(awscache.AvailableIPAddressTTL, awscache.DefaultCleanupInterval), cache.New(awscache.AssociatePublicIPAddressTTL, awscache.DefaultCleanupInterval)) instanceTypeProvider := instancetype.NewDefaultProvider( - cache.New(awscache.InstanceTypesAndZonesTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, @@ -67,6 +70,7 @@ func main() { ec2api, cfg.Region, ), + nil, awscache.NewUnavailableOfferings(), instancetype.NewDefaultResolver( region, diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index bd817cc477ec..9dbc2c2817b6 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -671,6 +671,9 @@ spec: type: object type: array capacityReservations: + description: |- + CapacityReservations contains the current capacity reservation values that are available to this NodeClass under the + CapacityReservation selectors. items: properties: availabilityZone: diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index b89e5e9b6bcb..2b56635c1060 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -20,6 +20,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +var ( + CapacityReservationsEnabled = false +) + const ( ConditionTypeSubnetsReady = "SubnetsReady" ConditionTypeSecurityGroupsReady = "SecurityGroupsReady" @@ -102,7 +106,10 @@ type EC2NodeClassStatus struct { // SecurityGroups contains the current security group values that are available to the // cluster under the SecurityGroups selectors. // +optional - SecurityGroups []SecurityGroup `json:"securityGroups,omitempty"` + SecurityGroups []SecurityGroup `json:"securityGroups,omitempty"` + // CapacityReservations contains the current capacity reservation values that are available to this NodeClass under the + // CapacityReservation selectors. + // +optional CapacityReservations []CapacityReservation `json:"capacityReservations,omitempty"` // AMI contains the current AMI values that are available to the // cluster under the AMI selectors. @@ -117,14 +124,17 @@ type EC2NodeClassStatus struct { } func (in *EC2NodeClass) StatusConditions() status.ConditionSet { - return status.NewReadyConditions( + conds := []string{ ConditionTypeAMIsReady, ConditionTypeSubnetsReady, ConditionTypeSecurityGroupsReady, ConditionTypeInstanceProfileReady, - ConditionTypeCapacityReservationsReady, ConditionTypeValidationSucceeded, - ).For(in) + } + if CapacityReservationsEnabled { + conds = append(conds, ConditionTypeCapacityReservationsReady) + } + return status.NewReadyConditions(conds...).For(in) } func (in *EC2NodeClass) GetConditions() []status.Condition { diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index bbf844aea15c..296eb17a2e2d 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -529,6 +529,13 @@ var _ = Describe("CEL/Validation", func() { }} Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) + It("should fail when specifying id with ownerID in a single term", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "012345678901", + ID: "cr-12345749", + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) It("should fail when the ownerID is malformed", func() { nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ OwnerID: "01234567890", // OwnerID must be 12 digits, this is 11 @@ -538,6 +545,12 @@ var _ = Describe("CEL/Validation", func() { }} Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) + It("should fail when the ownerID is set by itself", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "012345678901", + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) }) Context("AMISelectorTerms", func() { It("should succeed with a valid ami selector on alias", func() { diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index ce13687cb550..1fda37b6da03 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -27,8 +27,12 @@ const ( // UnavailableOfferingsTTL is the time before offerings that were marked as unavailable // are removed from the cache and are available for launch again UnavailableOfferingsTTL = 3 * time.Minute - // InstanceTypesAndZonesTTL is the time before we refresh instance types and zones at EC2 - InstanceTypesAndZonesTTL = 5 * time.Minute + // CapacityReservationAvailabilityTTL is the time we will persist cached capacity availability. Nominally, this is + // updated every minute, but we want to persist the data longer in the event of an EC2 API outage. 24 hours was the + // compormise made for API outage reseliency and gargage collecting entries for orphaned reservations. + CapacityReservationAvailabilityTTL = 24 * time.Hour + // InstanceTypesZonesAndOfferingsTTL is the time before we refresh instance types, zones, and offerings at EC2 + InstanceTypesZonesAndOfferingsTTL = 5 * time.Minute // InstanceProfileTTL is the time before we refresh checking instance profile existence at IAM InstanceProfileTTL = 15 * time.Minute // AvailableIPAddressTTL is time to drop AvailableIPAddress data if it is not updated within the TTL diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 11c32ce04a30..2ad32737c96f 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -202,7 +202,11 @@ func (c *CloudProvider) Delete(ctx context.Context, nodeClaim *karpv1.NodeClaim) return fmt.Errorf("getting instance ID, %w", err) } ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("id", id)) - return c.instanceProvider.Delete(ctx, id) + err = c.instanceProvider.Delete(ctx, id) + if id := nodeClaim.Labels[cloudprovider.ReservationIDLabel]; id != "" && cloudprovider.IsNodeClaimNotFoundError(err) { + c.capacityReservationProvider.MarkTerminated(id) + } + return err } func (c *CloudProvider) DisruptionReasons() []karpv1.DisruptionReason { diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 39d84ba1bb7a..91d3df302ee7 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -1158,7 +1158,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(100), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) + controller := nodeclass.NewController(ctx, awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectApplied(ctx, env.Client, nodePool, nodeClass) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{corev1.LabelTopologyZone: "test-zone-1a"}}) @@ -1175,7 +1175,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(11), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) + controller := nodeclass.NewController(ctx, awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{ MaxPods: aws.Int32(1), } @@ -1216,7 +1216,7 @@ var _ = Describe("CloudProvider", func() { }}) nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) - controller := nodeclass.NewController(awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) + controller := nodeclass.NewController(ctx, awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) podSubnet1 := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet1) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index 74d8058eab4c..34fba8215827 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -47,6 +47,7 @@ import ( awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/controllers/interruption" + "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclaim/capacityreservation" nodeclaimgarbagecollection "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclaim/garbagecollection" nodeclaimtagging "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclaim/tagging" "github.com/aws/karpenter-provider-aws/pkg/operator/options" @@ -84,7 +85,7 @@ func NewControllers( ) []controller.Controller { controllers := []controller.Controller{ nodeclasshash.NewController(kubeClient), - nodeclass.NewController(clk, kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider, capacityReservationProvider, ec2api), + nodeclass.NewController(ctx, clk, kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider, capacityReservationProvider, ec2api), nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider), nodeclaimtagging.NewController(kubeClient, cloudProvider, instanceProvider), controllerspricing.NewController(pricingProvider), @@ -93,6 +94,7 @@ func NewControllers( ssminvalidation.NewController(ssmCache, amiProvider), status.NewController[*v1.EC2NodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter"), status.EmitDeprecatedMetrics), controllersversion.NewController(versionProvider, versionProvider.UpdateVersionWithValidation), + capacityreservation.NewController(), } if options.FromContext(ctx).InterruptionQueue != "" { sqsapi := servicesqs.NewFromConfig(cfg) diff --git a/pkg/controllers/nodeclaim/capacityreservation/controller.go b/pkg/controllers/nodeclaim/capacityreservation/controller.go new file mode 100644 index 000000000000..32efa849791d --- /dev/null +++ b/pkg/controllers/nodeclaim/capacityreservation/controller.go @@ -0,0 +1,151 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation + +import ( + "context" + "fmt" + "time" + + "github.com/awslabs/operatorpkg/singleton" + "github.com/samber/lo" + "go.uber.org/multierr" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + controllerruntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/operator/injection" + nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" +) + +type Controller struct { + cp cloudprovider.CloudProvider + kubeClient client.Client +} + +func NewController() *Controller { + return nil +} + +func (*Controller) Name() string { + return "nodeclaim.capacityreservation" +} + +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named(c.Name()). + WatchesRawSource(singleton.Source()). + Complete(singleton.AsReconciler(c)) +} + +func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { + ctx = injection.WithControllerName(ctx, c.Name()) + + cpNodeClaims, err := c.cp.List(ctx) + if err != nil { + return reconcile.Result{}, fmt.Errorf("listing instance types, %w", err) + } + cpNodeClaimIndex := lo.SliceToMap(cpNodeClaims, func(nc *karpv1.NodeClaim) (string, *karpv1.NodeClaim) { + return nc.Status.ProviderID, nc + }) + + ncs := &karpv1.NodeClaimList{} + if err := c.kubeClient.List(ctx, ncs); err != nil { + return reconcile.Result{}, fmt.Errorf("listing nodeclaims, %w", err) + } + updatedNodeClaims := sets.New[string]() + var errs []error + for i := range ncs.Items { + cpNC, ok := cpNodeClaimIndex[ncs.Items[i].Status.ProviderID] + if !ok { + continue + } + updated, err := c.syncCapacityType(ctx, cpNC.Labels[karpv1.CapacityTypeLabelKey], &ncs.Items[i]) + if err != nil { + errs = append(errs, err) + } + if updated { + updatedNodeClaims.Insert(ncs.Items[i].Name) + } + } + log.FromContext(ctx).WithValues("NodeClaims", lo.Map(updatedNodeClaims.UnsortedList(), func(name string, _ int) klog.ObjectRef { + return klog.KRef("", name) + })).V(1).Info("updated capacity type for nodeclaims") + + if len(errs) != 0 { + if lo.EveryBy(errs, func(err error) bool { return errors.IsConflict(err) }) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, multierr.Combine(errs...) + } + return reconcile.Result{RequeueAfter: time.Minute}, nil +} + +// syncCapacityType will update the capacity type for the given NodeClaim. This accounts for the fact that capacity +// reservations will expire, demoting NodeClaims with capacity type "reserved" to "on-demand". +func (c *Controller) syncCapacityType(ctx context.Context, capacityType string, nc *karpv1.NodeClaim) (bool, error) { + // We won't be able to sync deleting NodeClaims, and there's no real need to either as they're already draining. + if !nc.DeletionTimestamp.IsZero() { + return false, nil + } + + // For now we only account for the case where a reserved NodeClaim becomes an on-demand NodeClaim. This does not + // account for on-demand NodeClaims being promoted to reserved since that is not natively supported by Karpenter. + if capacityType != karpv1.CapacityTypeOnDemand { + return false, nil + } + if nc.Labels[karpv1.CapacityTypeLabelKey] == karpv1.CapacityTypeReserved { + stored := nc.DeepCopy() + nc.Labels[karpv1.CapacityTypeLabelKey] = karpv1.CapacityTypeOnDemand + delete(nc.Labels, cloudprovider.ReservationIDLabel) + if err := c.kubeClient.Patch(ctx, nc, client.MergeFrom(stored)); err != nil { + return false, fmt.Errorf("patching nodeclaim %q, %w", nc.Name, err) + } + } + + // If the reservation expired before the NodeClaim became registered, there may not be a Node on the cluster. Note + // that there should never be duplicate Nodes for a given NodeClaim, but handling this user-induced error is more + // straightforward than handling the duplicate error. + nodes, err := nodeclaimutils.AllNodesForNodeClaim(ctx, c.kubeClient, nc) + if err != nil { + return false, fmt.Errorf("listing nodes for nodeclaim %q, %w", nc.Name, err) + } + for _, n := range nodes { + if !n.DeletionTimestamp.IsZero() { + continue + } + // Skip Nodes which haven't been registered since we still may not have synced labels. We'll get it on the next + // iteration. + if n.Labels[karpv1.NodeRegisteredLabelKey] != "true" { + continue + } + if n.Labels[karpv1.CapacityTypeLabelKey] != karpv1.CapacityTypeReserved { + continue + } + stored := n.DeepCopy() + n.Labels[karpv1.CapacityTypeLabelKey] = karpv1.CapacityTypeOnDemand + delete(nc.Labels, cloudprovider.ReservationIDLabel) + if err := c.kubeClient.Patch(ctx, n, client.MergeFrom(stored)); err != nil { + return false, fmt.Errorf("patching node %q, %w", n.Name, err) + } + } + return true, nil +} diff --git a/pkg/controllers/nodeclass/capacityreservation.go b/pkg/controllers/nodeclass/capacityreservation.go index 0d862ba31d66..7f9d3b149807 100644 --- a/pkg/controllers/nodeclass/capacityreservation.go +++ b/pkg/controllers/nodeclass/capacityreservation.go @@ -80,7 +80,10 @@ func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass nc.Status.CapacityReservations = append(nc.Status.CapacityReservations, reservation) } if len(errors) != 0 { - log.FromContext(ctx).Error(multierr.Combine(errors...), "failed to update status with %d of %d capacity reservations", len(errors), len(reservations)) + log.FromContext(ctx).WithValues( + "error-count", len(errors), + "total-count", len(reservations), + ).Error(multierr.Combine(errors...), "failed to parse discovered capacity reservations") } nc.StatusConditions().SetTrue(v1.ConditionTypeCapacityReservationsReady) return reconcile.Result{RequeueAfter: c.requeueAfter(reservations...)}, nil diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 0afad54158d5..09ef21a6a798 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -23,6 +23,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" "k8s.io/utils/clock" "sigs.k8s.io/karpenter/pkg/operator/injection" + "sigs.k8s.io/karpenter/pkg/operator/options" nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" "sigs.k8s.io/karpenter/pkg/utils/result" @@ -59,20 +60,15 @@ type nodeClassReconciler interface { } type Controller struct { - kubeClient client.Client - recorder events.Recorder - launchTemplateProvider launchtemplate.Provider - - ami *AMI - capacityReservation *CapacityReservation - instanceProfile *InstanceProfile - subnet *Subnet - securityGroup *SecurityGroup - validation *Validation - readiness *Readiness //TODO : Remove this when we have sub status conditions + kubeClient client.Client + recorder events.Recorder + launchTemplateProvider launchtemplate.Provider + instanceProfileProvider instanceprofile.Provider + reconcilers []nodeClassReconciler } func NewController( + ctx context.Context, clk clock.Clock, kubeClient client.Client, recorder events.Recorder, @@ -84,18 +80,23 @@ func NewController( capacityReservationProvider capacityreservation.Provider, ec2api sdk.EC2API, ) *Controller { - + reconcilers := []nodeClassReconciler{ + NewAMIReconciler(amiProvider), + &Subnet{subnetProvider: subnetProvider}, + &SecurityGroup{securityGroupProvider: securityGroupProvider}, + &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, + &Validation{ec2api: ec2api, amiProvider: amiProvider}, + &Readiness{launchTemplateProvider: launchTemplateProvider}, + } + if options.FromContext(ctx).FeatureGates.ReservedCapacity { + reconcilers = append(reconcilers, NewCapacityReservationReconciler(clk, capacityReservationProvider)) + } return &Controller{ - kubeClient: kubeClient, - recorder: recorder, - launchTemplateProvider: launchTemplateProvider, - ami: NewAMIReconciler(amiProvider), - capacityReservation: NewCapacityReservationReconciler(clk, capacityReservationProvider), - subnet: &Subnet{subnetProvider: subnetProvider}, - securityGroup: &SecurityGroup{securityGroupProvider: securityGroupProvider}, - instanceProfile: &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, - validation: &Validation{ec2api: ec2api, amiProvider: amiProvider}, - readiness: &Readiness{launchTemplateProvider: launchTemplateProvider}, + kubeClient: kubeClient, + recorder: recorder, + launchTemplateProvider: launchTemplateProvider, + instanceProfileProvider: instanceProfileProvider, + reconcilers: reconcilers, } } @@ -128,15 +129,7 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) var results []reconcile.Result var errs error - for _, reconciler := range []nodeClassReconciler{ - c.ami, - c.capacityReservation, - c.subnet, - c.securityGroup, - c.instanceProfile, - c.validation, - c.readiness, - } { + for _, reconciler := range c.reconcilers { res, err := reconciler.Reconcile(ctx, nodeClass) errs = multierr.Append(errs, err) results = append(results, res) @@ -173,8 +166,8 @@ func (c *Controller) finalize(ctx context.Context, nodeClass *v1.EC2NodeClass) ( return reconcile.Result{RequeueAfter: time.Minute * 10}, nil // periodically fire the event } if nodeClass.Spec.Role != "" { - if _, err := c.instanceProfile.Finalize(ctx, nodeClass); err != nil { - return reconcile.Result{}, err + if err := c.instanceProfileProvider.Delete(ctx, nodeClass); err != nil { + return reconcile.Result{}, fmt.Errorf("deleting instance profile, %w", err) } } if err := c.launchTemplateProvider.DeleteAll(ctx, nodeClass); err != nil { diff --git a/pkg/controllers/nodeclass/instanceprofile.go b/pkg/controllers/nodeclass/instanceprofile.go index 15402ea618ea..ab2322183e73 100644 --- a/pkg/controllers/nodeclass/instanceprofile.go +++ b/pkg/controllers/nodeclass/instanceprofile.go @@ -42,10 +42,3 @@ func (ip *InstanceProfile) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeC nodeClass.StatusConditions().SetTrue(v1.ConditionTypeInstanceProfileReady) return reconcile.Result{}, nil } - -func (ip *InstanceProfile) Finalize(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { - if err := ip.instanceProfileProvider.Delete(ctx, nodeClass); err != nil { - return reconcile.Result{}, fmt.Errorf("deleting instance profile, %w", err) - } - return reconcile.Result{}, nil -} diff --git a/pkg/controllers/nodeclass/readiness_test.go b/pkg/controllers/nodeclass/readiness_test.go index 645253d732be..fdd5f3f95010 100644 --- a/pkg/controllers/nodeclass/readiness_test.go +++ b/pkg/controllers/nodeclass/readiness_test.go @@ -53,7 +53,7 @@ var _ = Describe("NodeClass Status Condition Controller", func() { ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Status.Conditions).To(HaveLen(7)) + Expect(nodeClass.Status.Conditions).To(HaveLen(6)) Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsTrue()).To(BeTrue()) }) It("should update status condition as Not Ready", func() { diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 8ffda4b98615..d11266ba119d 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -67,6 +67,7 @@ var _ = BeforeSuite(func() { awsEnv = test.NewEnvironment(ctx, env) controller = nodeclass.NewController( + ctx, awsEnv.Clock, env.Client, events.NewRecorder(&record.FakeRecorder{}), diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index 22d57c569cc3..b9984b34a20e 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -131,13 +131,14 @@ func IgnoreUnauthorizedOperationError(err error) error { return err } -// IsUnfulfillableCapacity returns true if the Fleet err means -// capacity is temporarily unavailable for launching. -// This could be due to account limits, insufficient ec2 capacity, etc. +// IsUnfulfillableCapacity returns true if the Fleet err means capacity is temporarily unavailable for launching. This +// could be due to account limits, insufficient ec2 capacity, etc. func IsUnfulfillableCapacity(err ec2types.CreateFleetError) bool { return unfulfillableCapacityErrorCodes.Has(*err.ErrorCode) } +// IsReservationCapacityExceeded returns true if the fleet error means there is no remaining capacity for the provided +// capacity reservation. func IsReservationCapacityExceeded(err ec2types.CreateFleetError) bool { return *err.ErrorCode == reservationCapacityExceededErrorCode } diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index ac4dfc71d4de..897239f74aaa 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -23,7 +23,6 @@ import ( "net" "os" "strings" - "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/aws/middleware" @@ -175,16 +174,23 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont kubeDNSIP, clusterEndpoint, ) + capacityReservationProvider := capacityreservation.NewProvider( + ec2api, + operator.Clock, + cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.CapacityReservationAvailabilityTTL, awscache.DefaultCleanupInterval), + ) instanceTypeProvider := instancetype.NewDefaultProvider( - cache.New(awscache.InstanceTypesAndZonesTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, pricingProvider, + capacityReservationProvider, unavailableOfferingsCache, instancetype.NewDefaultResolver(cfg.Region), ) - capacityReservationProvider := capacityreservation.NewProvider(ec2api, operator.Clock, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval), cache.New(time.Hour*24, awscache.DefaultCleanupInterval)) instanceProvider := instance.NewDefaultProvider( ctx, cfg.Region, diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 43ce2afd0ec1..a47d3db3ebdc 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -142,8 +142,9 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N // launching reserved capacity. If it's a reserved capacity launch, we've already filtered the instance types // further up the call stack. type launchTemplateParams struct { - efaCount int - maxPods int + efaCount int + maxPods int + // reservationIDs is encoded as a string rather than a slice to ensure this type is comparable for use by `lo.GroupBy`. reservationIDs string } paramsToInstanceTypes := lo.GroupBy(instanceTypes, func(it *cloudprovider.InstanceType) launchTemplateParams { @@ -251,6 +252,9 @@ func (r DefaultResolver) resolveLaunchTemplates( } // If no reservation IDs are provided, insert an empty string so the end result is a single launch template with no // associated capacity reservation. + // TODO: We can simplify this by creating an initial lt, and then copying it for each cr. However, this requires a deep + // copy of the LT struct, which contains an interface causing problems for deepcopy-gen. See review comment for context: + // https://github.com/aws/karpenter-provider-aws/pull/7726#discussion_r1955280055 if len(capacityReservationIDs) == 0 { capacityReservationIDs = append(capacityReservationIDs, "") } diff --git a/pkg/providers/capacityreservation/provider.go b/pkg/providers/capacityreservation/provider.go index e16f5f867b8a..8a7332c40c6a 100644 --- a/pkg/providers/capacityreservation/provider.go +++ b/pkg/providers/capacityreservation/provider.go @@ -17,6 +17,7 @@ package capacityreservation import ( "context" "fmt" + "sync" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" @@ -39,6 +40,7 @@ type Provider interface { type DefaultProvider struct { availabilityCache + sync.Mutex ec2api sdk.EC2API clk clock.Clock @@ -64,6 +66,10 @@ func NewProvider( } func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) { + // Take a write lock over the entire List operation to ensure minimize duplicate DescribeCapacityReservation calls + p.Lock() + defer p.Unlock() + var reservations []*ec2types.CapacityReservation queries := QueriesFromSelectorTerms(selectorTerms...) reservations, queries = p.resolveCachedQueries(queries...) @@ -72,18 +78,19 @@ func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.Capacity } for _, q := range queries { paginator := ec2.NewDescribeCapacityReservationsPaginator(p.ec2api, q.DescribeCapacityReservationsInput()) + var queryReservations []*ec2types.CapacityReservation for paginator.HasMorePages() { out, err := paginator.NextPage(ctx) if err != nil { return nil, fmt.Errorf("listing capacity reservations, %w", err) } - queryReservations := lo.ToSlicePtr(out.CapacityReservations) - p.reservationCache.SetDefault(q.CacheKey(), queryReservations) - reservations = append(reservations, queryReservations...) - p.syncAvailability(lo.SliceToMap(queryReservations, func(r *ec2types.CapacityReservation) (string, int) { - return *r.CapacityReservationId, int(*r.AvailableInstanceCount) - })) + queryReservations = append(queryReservations, lo.ToSlicePtr(out.CapacityReservations)...) } + p.syncAvailability(lo.SliceToMap(queryReservations, func(r *ec2types.CapacityReservation) (string, int) { + return *r.CapacityReservationId, int(*r.AvailableInstanceCount) + })) + p.reservationCache.SetDefault(q.CacheKey(), queryReservations) + reservations = append(reservations, queryReservations...) } return p.filterReservations(reservations), nil } diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index df81a676a29e..13808b5225d8 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -125,20 +125,20 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass if err != nil { return nil, cloudprovider.NewCreateError(fmt.Errorf("truncating instance types, %w", err), "InstanceTypeResolutionFailed", "Error truncating instance types based on the passed-in requirements") } - fleetInstance, err := p.launchInstance(ctx, nodeClass, nodeClaim, instanceTypes, tags) + capacityType := p.getCapacityType(nodeClaim, instanceTypes) + fleetInstance, err := p.launchInstance(ctx, nodeClass, nodeClaim, capacityType, instanceTypes, tags) if awserrors.IsLaunchTemplateNotFound(err) { // retry once if launch template is not found. This allows karpenter to generate a new LT if the // cache was out-of-sync on the first try - fleetInstance, err = p.launchInstance(ctx, nodeClass, nodeClaim, instanceTypes, tags) + fleetInstance, err = p.launchInstance(ctx, nodeClass, nodeClaim, capacityType, instanceTypes, tags) } if err != nil { return nil, err } - capacityType := p.getCapacityType(nodeClaim, instanceTypes) var capacityReservation string if capacityType == karpv1.CapacityTypeReserved { - capacityReservation = p.getCapacityReservationForInstance( + capacityReservation = p.getCapacityReservationIDForInstance( string(fleetInstance.InstanceType), *fleetInstance.LaunchTemplateAndOverrides.Overrides.AvailabilityZone, instanceTypes, @@ -242,8 +242,14 @@ func (p *DefaultProvider) CreateTags(ctx context.Context, id string, tags map[st return nil } -func (p *DefaultProvider) launchInstance(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, tags map[string]string) (ec2types.CreateFleetInstance, error) { - capacityType := p.getCapacityType(nodeClaim, instanceTypes) +func (p *DefaultProvider) launchInstance( + ctx context.Context, + nodeClass *v1.EC2NodeClass, + nodeClaim *karpv1.NodeClaim, + capacityType string, + instanceTypes []*cloudprovider.InstanceType, + tags map[string]string, +) (ec2types.CreateFleetInstance, error) { zonalSubnets, err := p.subnetProvider.ZonalSubnetsForLaunch(ctx, nodeClass, instanceTypes, capacityType) if err != nil { return ec2types.CreateFleetInstance{}, cloudprovider.NewCreateError(fmt.Errorf("getting subnets, %w", err), "SubnetResolutionFailed", "Error getting subnets") @@ -366,32 +372,40 @@ func (p *DefaultProvider) getLaunchTemplateConfigs( // getOverrides creates and returns launch template overrides for the cross product of InstanceTypes and subnets (with subnets being constrained by // zones and the offerings in InstanceTypes) -func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, reqs scheduling.Requirements, image, capacityReservationID string) []ec2types.FleetLaunchTemplateOverridesRequest { +func (p *DefaultProvider) getOverrides( + instanceTypes []*cloudprovider.InstanceType, + zonalSubnets map[string]*subnet.Subnet, + reqs scheduling.Requirements, + image, capacityReservationID string, +) []ec2types.FleetLaunchTemplateOverridesRequest { // Unwrap all the offerings to a flat slice that includes a pointer // to the parent instance type name type offeringWithParentName struct { *cloudprovider.Offering parentInstanceTypeName ec2types.InstanceType } - var unwrappedOfferings []offeringWithParentName + var filteredOfferings []offeringWithParentName for _, it := range instanceTypes { - ofs := lo.Map(it.Offerings.Available(), func(of *cloudprovider.Offering, _ int) offeringWithParentName { - return offeringWithParentName{ - Offering: of, + ofs := it.Offerings.Available().Compatible(reqs) + // If we are generating a launch template for a specific capacity reservation, we only want to include the offering + // for that capacity reservation when generating overrides. + if capacityReservationID != "" { + ofs = ofs.Compatible(scheduling.NewRequirements(scheduling.NewRequirement( + cloudprovider.ReservationIDLabel, + corev1.NodeSelectorOpIn, + capacityReservationID, + ))) + } + for _, o := range ofs { + filteredOfferings = append(filteredOfferings, offeringWithParentName{ + Offering: o, parentInstanceTypeName: ec2types.InstanceType(it.Name), - } - }) - unwrappedOfferings = append(unwrappedOfferings, ofs...) + }) + } } var overrides []ec2types.FleetLaunchTemplateOverridesRequest - for _, offering := range unwrappedOfferings { - if capacityReservationID != "" && offering.ReservationID() != capacityReservationID { - continue - } - if reqs.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { - continue - } - subnet, ok := zonalSubnets[offering.Requirements.Get(corev1.LabelTopologyZone).Any()] + for _, offering := range filteredOfferings { + subnet, ok := zonalSubnets[offering.Zone()] if !ok { continue } @@ -424,7 +438,7 @@ func (p *DefaultProvider) updateUnavailableOfferingsCache( reservationIDs := make([]string, 0, len(errs)) for i := range errs { - id := p.getCapacityReservationForInstance( + id := p.getCapacityReservationIDForInstance( string(errs[i].LaunchTemplateAndOverrides.Overrides.InstanceType), lo.FromPtr(errs[i].LaunchTemplateAndOverrides.Overrides.AvailabilityZone), instanceTypes, @@ -440,7 +454,7 @@ func (p *DefaultProvider) updateUnavailableOfferingsCache( p.capacityReservationProvider.MarkUnavailable(reservationIDs...) } -func (p *DefaultProvider) getCapacityReservationForInstance(instance, zone string, instanceTypes []*cloudprovider.InstanceType) string { +func (p *DefaultProvider) getCapacityReservationIDForInstance(instance, zone string, instanceTypes []*cloudprovider.InstanceType) string { for _, it := range instanceTypes { if it.Name != instance { continue diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 2ab95c5714b2..24dd96deeb08 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -25,6 +25,7 @@ import ( awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype/offering" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" @@ -83,10 +84,12 @@ type DefaultProvider struct { func NewDefaultProvider( instanceTypesCache *cache.Cache, + offeringCache *cache.Cache, discoveredCapacityCache *cache.Cache, ec2api sdk.EC2API, subnetProvider subnet.Provider, pricingProvider pricing.Provider, + capacityReservationProvider capacityreservation.Provider, unavailableOfferingsCache *awscache.UnavailableOfferings, instanceTypesResolver Resolver, ) *DefaultProvider { @@ -100,8 +103,12 @@ func NewDefaultProvider( discoveredCapacityCache: discoveredCapacityCache, cm: pretty.NewChangeMonitor(), instanceTypesSeqNum: 0, - - offeringProvider: offering.NewDefaultProvider(unavailableOfferingsCache, pricingProvider), + offeringProvider: offering.NewDefaultProvider( + pricingProvider, + capacityReservationProvider, + unavailableOfferingsCache, + offeringCache, + ), } } @@ -147,6 +154,7 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) p.instanceTypesCache.SetDefault(key, instanceTypes) } return p.offeringProvider.InjectOfferings( + ctx, instanceTypes, nodeClass, p.allZones, diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go index ae5e667f145d..602a5c9b5180 100644 --- a/pkg/providers/instancetype/offering/provider.go +++ b/pkg/providers/instancetype/offering/provider.go @@ -19,11 +19,14 @@ import ( "fmt" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/mitchellh/hashstructure/v2" + "github.com/patrickmn/go-cache" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/scheduling" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" @@ -37,19 +40,28 @@ type Provider interface { } type DefaultProvider struct { - unavailableOfferings *awscache.UnavailableOfferings pricingProvider pricing.Provider capacityReservationProvider capacityreservation.Provider + unavailableOfferings *awscache.UnavailableOfferings + cache *cache.Cache } -func NewDefaultProvider(unavailableOfferingsCache *awscache.UnavailableOfferings, pricingProvider pricing.Provider) *DefaultProvider { +func NewDefaultProvider( + pricingProvider pricing.Provider, + capacityReservationProvider capacityreservation.Provider, + unavailableOfferingsCache *awscache.UnavailableOfferings, + offeringCache *cache.Cache, +) *DefaultProvider { return &DefaultProvider{ - unavailableOfferings: unavailableOfferingsCache, - pricingProvider: pricingProvider, + pricingProvider: pricingProvider, + capacityReservationProvider: capacityReservationProvider, + unavailableOfferings: unavailableOfferingsCache, + cache: offeringCache, } } func (p *DefaultProvider) InjectOfferings( + ctx context.Context, instanceTypes []*cloudprovider.InstanceType, nodeClass *v1.EC2NodeClass, allZones sets.Set[string], @@ -60,6 +72,7 @@ func (p *DefaultProvider) InjectOfferings( its := []*cloudprovider.InstanceType{} for _, it := range instanceTypes { offerings := p.createOfferings( + ctx, it, nodeClass, allZones, @@ -91,47 +104,58 @@ func (p *DefaultProvider) InjectOfferings( //nolint:gocyclo func (p *DefaultProvider) createOfferings( + ctx context.Context, it *cloudprovider.InstanceType, nodeClass *v1.EC2NodeClass, allZones sets.Set[string], subnetZones map[string]string, ) cloudprovider.Offerings { - itZones := sets.New(it.Requirements.Get(corev1.LabelTopologyZone).Values()...) - var offerings []*cloudprovider.Offering - for zone := range allZones { - for _, capacityType := range it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values() { - // Reserved capacity types are constructed separately - if capacityType == karpv1.CapacityTypeReserved { - continue - } + itZones := sets.New(it.Requirements.Get(corev1.LabelTopologyZone).Values()...) - isUnavailable := p.unavailableOfferings.IsUnavailable(ec2types.InstanceType(it.Name), zone, capacityType) - _, hasSubnetZone := subnetZones[zone] - var price float64 - var hasPrice bool - switch capacityType { - case karpv1.CapacityTypeOnDemand: - price, hasPrice = p.pricingProvider.OnDemandPrice(ec2types.InstanceType(it.Name)) - case karpv1.CapacityTypeSpot: - price, hasPrice = p.pricingProvider.SpotPrice(ec2types.InstanceType(it.Name), zone) - default: - panic(fmt.Sprintf("invalid capacity type %q in requirements for instance type %q", capacityType, it.Name)) + if ofs, ok := p.cache.Get(p.cacheKeyFromInstanceType(it)); ok { + offerings = append(offerings, ofs.([]*cloudprovider.Offering)...) + } else { + var cachedOfferings []*cloudprovider.Offering + for zone := range allZones { + for _, capacityType := range it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values() { + // Reserved capacity types are constructed separately + if capacityType == karpv1.CapacityTypeReserved { + continue + } + isUnavailable := p.unavailableOfferings.IsUnavailable(ec2types.InstanceType(it.Name), zone, capacityType) + _, hasSubnetZone := subnetZones[zone] + var price float64 + var hasPrice bool + switch capacityType { + case karpv1.CapacityTypeOnDemand: + price, hasPrice = p.pricingProvider.OnDemandPrice(ec2types.InstanceType(it.Name)) + case karpv1.CapacityTypeSpot: + price, hasPrice = p.pricingProvider.SpotPrice(ec2types.InstanceType(it.Name), zone) + default: + panic(fmt.Sprintf("invalid capacity type %q in requirements for instance type %q", capacityType, it.Name)) + } + offering := &cloudprovider.Offering{ + Requirements: scheduling.NewRequirements( + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zone), + scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpDoesNotExist), + ), + Price: price, + Available: !isUnavailable && hasPrice && itZones.Has(zone) && hasSubnetZone, + } + if id, ok := subnetZones[zone]; ok { + offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, id)) + } + cachedOfferings = append(cachedOfferings, offering) + offerings = append(cachedOfferings, offering) } - offering := &cloudprovider.Offering{ - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityType), - scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zone), - scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpDoesNotExist), - ), - Price: price, - Available: !isUnavailable && hasPrice && itZones.Has(zone) && hasSubnetZone, - } - if id, ok := subnetZones[zone]; ok { - offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, id)) - } - offerings = append(offerings, offering) } + p.cache.SetDefault(p.cacheKeyFromInstanceType(it), cachedOfferings) + offerings = append(offerings, cachedOfferings...) + } + if !options.FromContext(ctx).FeatureGates.ReservedCapacity { + return offerings } for i := range nodeClass.Status.CapacityReservations { @@ -167,3 +191,23 @@ func (p *DefaultProvider) createOfferings( } return offerings } + +func (p *DefaultProvider) cacheKeyFromInstanceType(it *cloudprovider.InstanceType) string { + zoneHash, _ := hashstructure.Hash( + it.Requirements.Get(corev1.LabelTopologyZone).Values(), + hashstructure.FormatV2, + &hashstructure.HashOptions{SlicesAsSets: true}, + ) + ctHash, _ := hashstructure.Hash( + it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values(), + hashstructure.FormatV2, + &hashstructure.HashOptions{SlicesAsSets: true}, + ) + return fmt.Sprintf( + "%s-%016x-%016x-%d", + it.Name, + zoneHash, + ctHash, + p.unavailableOfferings.SeqNum, + ) +} diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index eb162c9c4cd2..e504866e62e0 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -2448,7 +2448,7 @@ var _ = Describe("InstanceTypeProvider", func() { nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ { DeviceName: lo.ToPtr("/dev/xvda"), - EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(10, resource.Giga)}, + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(20, resource.Giga)}, RootVolume: false, }, } @@ -2457,32 +2457,39 @@ var _ = Describe("InstanceTypeProvider", func() { {Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}, {Spec: v1.EC2NodeClassSpec{AMISelectorTerms: []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}}}}, { - Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ - { - DeviceName: lo.ToPtr("/dev/sda1"), - EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(10, resource.Giga)}, - RootVolume: true, + Spec: v1.EC2NodeClassSpec{ + BlockDeviceMappings: []*v1.BlockDeviceMapping{ + { + DeviceName: lo.ToPtr("/dev/xvda"), + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(20, resource.Giga)}, + RootVolume: false, + }, + { + DeviceName: lo.ToPtr("/dev/sda1"), + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(10, resource.Giga)}, + RootVolume: true, + }, }, }, - }}, + }, { - Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ - { + Spec: v1.EC2NodeClassSpec{ + BlockDeviceMappings: []*v1.BlockDeviceMapping{{ DeviceName: lo.ToPtr("/dev/xvda"), - EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(10, resource.Giga)}, - RootVolume: true, - }, + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(15, resource.Giga)}, + RootVolume: false, + }}, }, - }}, + }, { - Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ - { - DeviceName: lo.ToPtr("/dev/xvda"), - EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(20, resource.Giga)}, - RootVolume: false, - }, + Spec: v1.EC2NodeClassSpec{ + BlockDeviceMappings: []*v1.BlockDeviceMapping{{ + DeviceName: lo.ToPtr("/dev/yvda"), + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(25, resource.Giga)}, + RootVolume: true, + }}, }, - }}, + }, } var instanceTypeResult [][]*corecloudprovider.InstanceType ExpectApplied(ctx, env.Client, nodeClass) @@ -2502,12 +2509,12 @@ var _ = Describe("InstanceTypeProvider", func() { _, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) // We are making sure to pull from the cache - instanetypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass) + its, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) - sort.Slice(instanetypes, func(x int, y int) bool { - return instanetypes[x].Name < instanetypes[y].Name + sort.Slice(its, func(x int, y int) bool { + return its[x].Name < its[y].Name }) - instanceTypeResult = append(instanceTypeResult, instanetypes) + instanceTypeResult = append(instanceTypeResult, its) } // Based on the nodeclass configuration, we expect to have 5 unique set of instance types @@ -2552,6 +2559,7 @@ var _ = Describe("InstanceTypeProvider", func() { }) func uniqueInstanceTypeList(instanceTypesLists [][]*corecloudprovider.InstanceType) { + GinkgoHelper() for x := range instanceTypesLists { for y := range instanceTypesLists { if x == y { diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index cc08e153970f..e73e50d254ae 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -2042,7 +2042,7 @@ essential = true nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyCustom) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} ExpectApplied(ctx, env.Client, nodeClass) - controller := nodeclass.NewController(awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) + controller := nodeclass.NewController(ctx, awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{ { diff --git a/pkg/test/environment.go b/pkg/test/environment.go index b8f91f397b31..e0199a8552a7 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -68,6 +68,7 @@ type Environment struct { // Cache EC2Cache *cache.Cache InstanceTypeCache *cache.Cache + OfferingCache *cache.Cache UnavailableOfferingsCache *awscache.UnavailableOfferings LaunchTemplateCache *cache.Cache SubnetCache *cache.Cache @@ -108,6 +109,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment // cache ec2Cache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) instanceTypeCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) + offeringCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) discoveredCapacityCache := cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval) unavailableOfferingsCache := awscache.NewUnavailableOfferings() launchTemplateCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) @@ -135,7 +137,8 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment amiProvider := amifamily.NewDefaultProvider(clock, versionProvider, ssmProvider, ec2api, ec2Cache) amiResolver := amifamily.NewDefaultResolver() instanceTypesResolver := instancetype.NewDefaultResolver(fake.DefaultRegion) - instanceTypesProvider := instancetype.NewDefaultProvider(instanceTypeCache, discoveredCapacityCache, ec2api, subnetProvider, pricingProvider, unavailableOfferingsCache, instanceTypesResolver) + capacityReservationProvider := capacityreservation.NewProvider(ec2api, clock, capacityReservationCache, capacityReservationAvailabilityCache) + instanceTypesProvider := instancetype.NewDefaultProvider(instanceTypeCache, offeringCache, discoveredCapacityCache, ec2api, subnetProvider, pricingProvider, capacityReservationProvider, unavailableOfferingsCache, instanceTypesResolver) launchTemplateProvider := launchtemplate.NewDefaultProvider( ctx, launchTemplateCache, @@ -149,7 +152,6 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment net.ParseIP("10.0.100.10"), "https://test-cluster", ) - capacityReservationProvider := capacityreservation.NewProvider(ec2api, clock, capacityReservationCache, capacityReservationAvailabilityCache) instanceProvider := instance.NewDefaultProvider( ctx, "", @@ -169,8 +171,10 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment IAMAPI: iamapi, PricingAPI: fakePricingAPI, - EC2Cache: ec2Cache, - InstanceTypeCache: instanceTypeCache, + EC2Cache: ec2Cache, + InstanceTypeCache: instanceTypeCache, + OfferingCache: offeringCache, + LaunchTemplateCache: launchTemplateCache, SubnetCache: subnetCache, AvailableIPAdressCache: availableIPAdressCache, @@ -210,6 +214,7 @@ func (env *Environment) Reset() { env.EC2Cache.Flush() env.UnavailableOfferingsCache.Flush() + env.OfferingCache.Flush() env.LaunchTemplateCache.Flush() env.SubnetCache.Flush() env.AssociatePublicIPAddressCache.Flush() From a9e83b6955ab9829ce2f3bd585bdcd44d8c8baf5 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Sun, 23 Feb 2025 23:02:15 -0800 Subject: [PATCH 05/16] offering cache key fix needed to cache based on subnet zones, since NodeClaims don't get zonal requirements injected from the nodeclass --- .../instancetype/offering/provider.go | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go index 602a5c9b5180..a5d156930ca5 100644 --- a/pkg/providers/instancetype/offering/provider.go +++ b/pkg/providers/instancetype/offering/provider.go @@ -113,7 +113,7 @@ func (p *DefaultProvider) createOfferings( var offerings []*cloudprovider.Offering itZones := sets.New(it.Requirements.Get(corev1.LabelTopologyZone).Values()...) - if ofs, ok := p.cache.Get(p.cacheKeyFromInstanceType(it)); ok { + if ofs, ok := p.cache.Get(p.cacheKeyFromInstanceType(it, subnetZones)); ok { offerings = append(offerings, ofs.([]*cloudprovider.Offering)...) } else { var cachedOfferings []*cloudprovider.Offering @@ -151,7 +151,7 @@ func (p *DefaultProvider) createOfferings( offerings = append(cachedOfferings, offering) } } - p.cache.SetDefault(p.cacheKeyFromInstanceType(it), cachedOfferings) + p.cache.SetDefault(p.cacheKeyFromInstanceType(it, subnetZones), cachedOfferings) offerings = append(offerings, cachedOfferings...) } if !options.FromContext(ctx).FeatureGates.ReservedCapacity { @@ -192,22 +192,28 @@ func (p *DefaultProvider) createOfferings( return offerings } -func (p *DefaultProvider) cacheKeyFromInstanceType(it *cloudprovider.InstanceType) string { - zoneHash, _ := hashstructure.Hash( +func (p *DefaultProvider) cacheKeyFromInstanceType(it *cloudprovider.InstanceType, subnetZones map[string]string) string { + zonesHash, _ := hashstructure.Hash( it.Requirements.Get(corev1.LabelTopologyZone).Values(), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}, ) - ctHash, _ := hashstructure.Hash( + subnetZonesHash, _ := hashstructure.Hash( + subnetZones, + hashstructure.FormatV2, + &hashstructure.HashOptions{SlicesAsSets: true}, + ) + capacityTypesHash, _ := hashstructure.Hash( it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values(), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}, ) return fmt.Sprintf( - "%s-%016x-%016x-%d", + "%s-%016x-%016x-%016x-%d", it.Name, - zoneHash, - ctHash, + zonesHash, + subnetZonesHash, + capacityTypesHash, p.unavailableOfferings.SeqNum, ) } From a5048330c9482cd09ff042c9723374e721cdf582 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Mon, 24 Feb 2025 02:16:57 -0800 Subject: [PATCH 06/16] remaining feedback --- pkg/cloudprovider/drift.go | 27 +++++++++++++++---- pkg/cloudprovider/suite_test.go | 4 +-- pkg/controllers/interruption/suite_test.go | 2 +- .../nodeclaim/tagging/suite_test.go | 2 +- pkg/controllers/nodeclass/controller.go | 8 ++---- pkg/controllers/nodeclass/hash/suite_test.go | 4 +-- pkg/controllers/nodeclass/readiness_test.go | 2 +- pkg/controllers/nodeclass/suite_test.go | 4 +-- .../instancetype/capacity/suite_test.go | 2 +- .../providers/instancetype/suite_test.go | 4 +-- .../providers/pricing/suite_test.go | 4 +-- .../providers/ssm/invalidation/suite_test.go | 4 +-- .../providers/version/suite_test.go | 4 +-- pkg/providers/amifamily/suite_test.go | 2 +- pkg/providers/instance/instance.go | 3 +++ pkg/providers/instance/suite_test.go | 4 +-- pkg/providers/instanceprofile/suite_test.go | 5 ++-- pkg/providers/instancetype/instancetype.go | 4 +++ .../instancetype/offering/provider.go | 25 +++++++++++++---- pkg/providers/instancetype/suite_test.go | 4 +-- pkg/providers/launchtemplate/suite_test.go | 4 +-- pkg/providers/securitygroup/suite_test.go | 4 +-- pkg/providers/subnet/suite_test.go | 4 +-- pkg/providers/version/suite_test.go | 4 ++- 24 files changed, 86 insertions(+), 48 deletions(-) diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index 0f58e6d06b26..276e6ce7c26d 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -32,10 +32,11 @@ import ( ) const ( - AMIDrift cloudprovider.DriftReason = "AMIDrift" - SubnetDrift cloudprovider.DriftReason = "SubnetDrift" - SecurityGroupDrift cloudprovider.DriftReason = "SecurityGroupDrift" - NodeClassDrift cloudprovider.DriftReason = "NodeClassDrift" + AMIDrift cloudprovider.DriftReason = "AMIDrift" + SubnetDrift cloudprovider.DriftReason = "SubnetDrift" + SecurityGroupDrift cloudprovider.DriftReason = "SecurityGroupDrift" + CapacityReservationDrift cloudprovider.DriftReason = "CapacityReservationDrift" + NodeClassDrift cloudprovider.DriftReason = "NodeClassDrift" ) func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *karpv1.NodeClaim, nodePool *karpv1.NodePool, nodeClass *v1.EC2NodeClass) (cloudprovider.DriftReason, error) { @@ -59,7 +60,13 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *karpv if err != nil { return "", fmt.Errorf("calculating subnet drift, %w", err) } - drifted := lo.FindOrElse([]cloudprovider.DriftReason{amiDrifted, securitygroupDrifted, subnetDrifted}, "", func(i cloudprovider.DriftReason) bool { + capacityReservationsDrifted := c.isCapacityReservationDrifted(instance, nodeClass) + drifted := lo.FindOrElse([]cloudprovider.DriftReason{ + amiDrifted, + securitygroupDrifted, + subnetDrifted, + capacityReservationsDrifted, + }, "", func(i cloudprovider.DriftReason) bool { return string(i) != "" }) return drifted, nil @@ -119,6 +126,16 @@ func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, return "", nil } +// Checks if capacity reservations are drifted, by comparing the capacity reservations persisted to the NodeClass to +// the instance's capacity reservation. +func (c *CloudProvider) isCapacityReservationDrifted(instance *instance.Instance, nodeClass *v1.EC2NodeClass) cloudprovider.DriftReason { + capacityReservationIDs := sets.New(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { return cr.ID })...) + if instance.CapacityReservationID != "" && !capacityReservationIDs.Has(instance.CapacityReservationID) { + return CapacityReservationDrift + } + return "" +} + func (c *CloudProvider) areStaticFieldsDrifted(nodeClaim *karpv1.NodeClaim, nodeClass *v1.EC2NodeClass) cloudprovider.DriftReason { nodeClassHash, foundNodeClassHash := nodeClass.Annotations[v1.AnnotationEC2NodeClassHash] nodeClassHashVersion, foundNodeClassHashVersion := nodeClass.Annotations[v1.AnnotationEC2NodeClassHashVersion] diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 91d3df302ee7..d6ec65996cb8 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -81,7 +81,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(test.RemoveNodeClassTagValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -99,7 +99,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) cluster.Reset() diff --git a/pkg/controllers/interruption/suite_test.go b/pkg/controllers/interruption/suite_test.go index cb119a240ff5..52850e9d28d9 100644 --- a/pkg/controllers/interruption/suite_test.go +++ b/pkg/controllers/interruption/suite_test.go @@ -100,7 +100,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) unavailableOfferingsCache.Flush() sqsapi.Reset() }) diff --git a/pkg/controllers/nodeclaim/tagging/suite_test.go b/pkg/controllers/nodeclaim/tagging/suite_test.go index 627d1e6c8ddb..783cc8710af0 100644 --- a/pkg/controllers/nodeclaim/tagging/suite_test.go +++ b/pkg/controllers/nodeclaim/tagging/suite_test.go @@ -60,7 +60,7 @@ func TestAPIs(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 09ef21a6a798..b2e73cb5c55f 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -55,16 +55,12 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" ) -type nodeClassReconciler interface { - Reconcile(context.Context, *v1.EC2NodeClass) (reconcile.Result, error) -} - type Controller struct { kubeClient client.Client recorder events.Recorder launchTemplateProvider launchtemplate.Provider instanceProfileProvider instanceprofile.Provider - reconcilers []nodeClassReconciler + reconcilers []reconcile.TypedReconciler[*v1.EC2NodeClass] } func NewController( @@ -80,7 +76,7 @@ func NewController( capacityReservationProvider capacityreservation.Provider, ec2api sdk.EC2API, ) *Controller { - reconcilers := []nodeClassReconciler{ + reconcilers := []reconcile.TypedReconciler[*v1.EC2NodeClass]{ NewAMIReconciler(amiProvider), &Subnet{subnetProvider: subnetProvider}, &SecurityGroup{securityGroupProvider: securityGroupProvider}, diff --git a/pkg/controllers/nodeclass/hash/suite_test.go b/pkg/controllers/nodeclass/hash/suite_test.go index 4d21e0a6f8e3..eebd2cecabd9 100644 --- a/pkg/controllers/nodeclass/hash/suite_test.go +++ b/pkg/controllers/nodeclass/hash/suite_test.go @@ -56,7 +56,7 @@ func TestAPIs(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) @@ -68,7 +68,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) awsEnv.Reset() }) diff --git a/pkg/controllers/nodeclass/readiness_test.go b/pkg/controllers/nodeclass/readiness_test.go index fdd5f3f95010..645253d732be 100644 --- a/pkg/controllers/nodeclass/readiness_test.go +++ b/pkg/controllers/nodeclass/readiness_test.go @@ -53,7 +53,7 @@ var _ = Describe("NodeClass Status Condition Controller", func() { ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Status.Conditions).To(HaveLen(6)) + Expect(nodeClass.Status.Conditions).To(HaveLen(7)) Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsTrue()).To(BeTrue()) }) It("should update status condition as Not Ready", func() { diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index d11266ba119d..5a7af82be21a 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -62,7 +62,7 @@ func TestAPIs(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(test.RemoveNodeClassTagValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) @@ -86,7 +86,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) nodeClass = test.EC2NodeClass() awsEnv.Reset() }) diff --git a/pkg/controllers/providers/instancetype/capacity/suite_test.go b/pkg/controllers/providers/instancetype/capacity/suite_test.go index b6a6e5f272d7..5b38a7cb23a9 100644 --- a/pkg/controllers/providers/instancetype/capacity/suite_test.go +++ b/pkg/controllers/providers/instancetype/capacity/suite_test.go @@ -70,7 +70,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimProviderIDFieldIndexer(ctx))) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options(test.OptionsFields{ VMMemoryOverheadPercent: lo.ToPtr[float64](0.075), })) diff --git a/pkg/controllers/providers/instancetype/suite_test.go b/pkg/controllers/providers/instancetype/suite_test.go index 479ee549cc0a..b07707eb5609 100644 --- a/pkg/controllers/providers/instancetype/suite_test.go +++ b/pkg/controllers/providers/instancetype/suite_test.go @@ -55,7 +55,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -68,7 +68,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() diff --git a/pkg/controllers/providers/pricing/suite_test.go b/pkg/controllers/providers/pricing/suite_test.go index e9f51d155bf2..a6261e7f7e75 100644 --- a/pkg/controllers/providers/pricing/suite_test.go +++ b/pkg/controllers/providers/pricing/suite_test.go @@ -57,7 +57,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -70,7 +70,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() diff --git a/pkg/controllers/providers/ssm/invalidation/suite_test.go b/pkg/controllers/providers/ssm/invalidation/suite_test.go index 1e99a24a1d5c..39894a65d113 100644 --- a/pkg/controllers/providers/ssm/invalidation/suite_test.go +++ b/pkg/controllers/providers/ssm/invalidation/suite_test.go @@ -53,7 +53,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -67,7 +67,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() }) diff --git a/pkg/controllers/providers/version/suite_test.go b/pkg/controllers/providers/version/suite_test.go index 005c45d6d8e4..e68a9cc96f68 100644 --- a/pkg/controllers/providers/version/suite_test.go +++ b/pkg/controllers/providers/version/suite_test.go @@ -52,7 +52,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -65,7 +65,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() diff --git a/pkg/providers/amifamily/suite_test.go b/pkg/providers/amifamily/suite_test.go index 4adc0ea4c558..1a0343b4d13a 100644 --- a/pkg/providers/amifamily/suite_test.go +++ b/pkg/providers/amifamily/suite_test.go @@ -67,7 +67,7 @@ const ( var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) }) diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 13808b5225d8..df230d51c24a 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -507,6 +507,9 @@ func (*DefaultProvider) filterReservedInstanceTypes(nodeClaimRequirements schedu if len(zonalOfferings) == 0 { continue } + // WARNING: It is only safe to mutate the slice containing the offerings, not the offerings themselves. The individual + // offerings are cached, but not the slice storing them. This helps keep the launch path simple, but changes to the + // caching strategy employed by the InstanceType provider could result in unexpected behavior. it.Offerings = lo.Values(zonalOfferings) reservedInstanceTypes = append(reservedInstanceTypes, it) } diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index dd29355697fa..4ccf9bc52907 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -62,7 +62,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), @@ -74,7 +74,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() }) diff --git a/pkg/providers/instanceprofile/suite_test.go b/pkg/providers/instanceprofile/suite_test.go index 0cb00fde75c6..0991dd32f949 100644 --- a/pkg/providers/instanceprofile/suite_test.go +++ b/pkg/providers/instanceprofile/suite_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/samber/lo" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" @@ -54,7 +55,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -66,7 +67,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) nodeClass = test.TestNodeClass{ EC2NodeClass: v1.EC2NodeClass{ diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 24dd96deeb08..a059d7697833 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -153,6 +153,10 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) instanceTypes = p.resolveInstanceTypes(ctx, nodeClass, amiHash) p.instanceTypesCache.SetDefault(key, instanceTypes) } + // Offerings aren't cached along with the rest of the instance type info because reserved offerings need to have up to + // date capacity information. Rather than incurring a cache miss each time an instance is launched into a reserved + // offering (or terminated), offerings are injected to the cached instance types on each call. Note that on-demand and + // spot offerings are still cached - only reserved offerings are generated each time. return p.offeringProvider.InjectOfferings( ctx, instanceTypes, diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go index a5d156930ca5..e68e8ea06801 100644 --- a/pkg/providers/instancetype/offering/provider.go +++ b/pkg/providers/instancetype/offering/provider.go @@ -69,7 +69,7 @@ func (p *DefaultProvider) InjectOfferings( subnetZones := lo.SliceToMap(nodeClass.Status.Subnets, func(s v1.Subnet) (string, string) { return s.Zone, s.ZoneID }) - its := []*cloudprovider.InstanceType{} + var its []*cloudprovider.InstanceType for _, it := range instanceTypes { offerings := p.createOfferings( ctx, @@ -78,16 +78,31 @@ func (p *DefaultProvider) InjectOfferings( allZones, subnetZones, ) + + reservedAvailability := map[string]bool{} for _, of := range offerings { - InstanceTypeOfferingAvailable.Set(float64(lo.Ternary(of.Available, 1, 0)), map[string]string{ + // If the capacity type is reserved we need to determine if any of the reserved offerings are available. Otherwise, + // we can update the availability metric directly. + if of.CapacityType() == karpv1.CapacityTypeReserved { + reservedAvailability[of.Zone()] = reservedAvailability[of.Zone()] || of.Available + } else { + InstanceTypeOfferingAvailable.Set(float64(lo.Ternary(of.Available, 1, 0)), map[string]string{ + instanceTypeLabel: it.Name, + capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), + zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), + }) + } + InstanceTypeOfferingPriceEstimate.Set(of.Price, map[string]string{ instanceTypeLabel: it.Name, capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), }) - InstanceTypeOfferingPriceEstimate.Set(of.Price, map[string]string{ + } + for zone := range allZones { + InstanceTypeOfferingAvailable.Set(float64(lo.Ternary(reservedAvailability[zone], 1, 0)), map[string]string{ instanceTypeLabel: it.Name, - capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), - zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), + capacityTypeLabel: karpv1.CapacityTypeReserved, + zoneLabel: zone, }) } diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index e504866e62e0..8f6359cd64be 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -83,7 +83,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) fakeClock = &clock.FakeClock{} @@ -98,7 +98,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) cluster.Reset() awsEnv.Reset() diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index e73e50d254ae..c969455ee5fd 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -91,7 +91,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -110,7 +110,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) cluster.Reset() awsEnv.Reset() diff --git a/pkg/providers/securitygroup/suite_test.go b/pkg/providers/securitygroup/suite_test.go index 629ebd4ade56..fee91aca534e 100644 --- a/pkg/providers/securitygroup/suite_test.go +++ b/pkg/providers/securitygroup/suite_test.go @@ -55,7 +55,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -67,7 +67,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) nodeClass = test.EC2NodeClass(v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index 7c16a485a55d..6c142dddd912 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -53,7 +53,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -65,7 +65,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) nodeClass = test.EC2NodeClass(v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ diff --git a/pkg/providers/version/suite_test.go b/pkg/providers/version/suite_test.go index 29910f8e04e0..56bbc5086bc5 100644 --- a/pkg/providers/version/suite_test.go +++ b/pkg/providers/version/suite_test.go @@ -23,6 +23,8 @@ import ( coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" coretest "sigs.k8s.io/karpenter/pkg/test" + "github.com/samber/lo" + "github.com/aws/karpenter-provider-aws/pkg/apis" "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/test" @@ -52,7 +54,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) From 9996026d7c101ff0b63e7c1936587e8c05f4bf73 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Mon, 24 Feb 2025 11:56:57 -0800 Subject: [PATCH 07/16] test: capacity reservation status tests --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 4 +- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 4 +- pkg/apis/v1/ec2nodeclass.go | 2 +- pkg/apis/v1/ec2nodeclass_status.go | 2 +- .../nodeclass/capacityreservation_test.go | 182 ++++++++++++++++++ pkg/controllers/nodeclass/controller.go | 25 +-- pkg/controllers/nodeclass/readiness_test.go | 21 +- pkg/fake/ec2api.go | 16 +- pkg/fake/utils.go | 33 +++- pkg/providers/capacityreservation/types.go | 30 +-- pkg/test/environment.go | 1 + 11 files changed, 276 insertions(+), 44 deletions(-) create mode 100644 pkg/controllers/nodeclass/capacityreservation_test.go diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index 32c21066e067..6021dac17fde 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -247,7 +247,7 @@ spec: properties: id: description: ID is the capacity reservation id in EC2 - pattern: ^cr-[0-9a-z]+$ + pattern: ^cr-.+$ type: string ownerID: description: Owner is the owner id for the ami. @@ -690,7 +690,7 @@ spec: type: string id: description: The id for the capacity reservation. - pattern: ^cr-[0-9a-z]+$ + pattern: ^cr-.+$ type: string instanceMatchCriteria: description: Indicates the type of instance launches the capacity reservation accepts. diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 9dbc2c2817b6..09e845096d29 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -244,7 +244,7 @@ spec: properties: id: description: ID is the capacity reservation id in EC2 - pattern: ^cr-[0-9a-z]+$ + pattern: ^cr-.+$ type: string ownerID: description: Owner is the owner id for the ami. @@ -687,7 +687,7 @@ spec: type: string id: description: The id for the capacity reservation. - pattern: ^cr-[0-9a-z]+$ + pattern: ^cr-.+$ type: string instanceMatchCriteria: description: Indicates the type of instance launches the capacity reservation accepts. diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index fdc76e33458f..717cee67a7e9 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -184,7 +184,7 @@ type CapacityReservationSelectorTerm struct { // +optional Tags map[string]string `json:"tags,omitempty"` // ID is the capacity reservation id in EC2 - // +kubebuilder:validation:Pattern:="^cr-[0-9a-z]+$" + // +kubebuilder:validation:Pattern:="^cr-.+$" // +optional ID string `json:"id,omitempty"` // Owner is the owner id for the ami. diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index 2b56635c1060..062fcac1ed8d 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -81,7 +81,7 @@ type CapacityReservation struct { // +optional EndTime *metav1.Time `json:"endTime,omitempty" hash:"ignore"` // The id for the capacity reservation. - // +kubebuilder:validation:Pattern:="^cr-[0-9a-z]+$" + // +kubebuilder:validation:Pattern:="^cr-.+$" // +required ID string `json:"id"` // Indicates the type of instance launches the capacity reservation accepts. diff --git a/pkg/controllers/nodeclass/capacityreservation_test.go b/pkg/controllers/nodeclass/capacityreservation_test.go new file mode 100644 index 000000000000..a8d115cd1212 --- /dev/null +++ b/pkg/controllers/nodeclass/capacityreservation_test.go @@ -0,0 +1,182 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeclass_test + +import ( + "time" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + . "sigs.k8s.io/karpenter/pkg/test/expectations" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" +) + +const selfOwnerID = "012345678901" +const altOwnerID = "123456789012" + +var discoveryTags = map[string]string{ + "karpenter.sh/discovery": "test", +} + +var _ = Describe("NodeClass Capacity Reservation Reconciler", func() { + BeforeEach(func() { + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr(selfOwnerID), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), + AvailableInstanceCount: lo.ToPtr[int32](10), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr(selfOwnerID), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-2"), + AvailableInstanceCount: lo.ToPtr[int32](10), + Tags: toEC2Tags(discoveryTags), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr(selfOwnerID), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1b-1"), + AvailableInstanceCount: lo.ToPtr[int32](15), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr(altOwnerID), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1b-2"), + AvailableInstanceCount: lo.ToPtr[int32](15), + Tags: toEC2Tags(discoveryTags), + State: ec2types.CapacityReservationStateActive, + }, + }, + }) + }) + It("should resolve capacity reservations by ID", func() { + const targetID = "cr-m5.large-1a-1" + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + ID: targetID, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(1)) + Expect(nodeClass.Status.CapacityReservations[0]).To(Equal(v1.CapacityReservation{ + ID: targetID, + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + OwnerID: selfOwnerID, + InstanceType: "m5.large", + AvailabilityZone: "test-zone-1a", + EndTime: nil, + })) + }) + It("should resolve capacity reservations by tags", func() { + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + Tags: discoveryTags, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(2)) + Expect(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { + return cr.ID + })).To(ContainElements("cr-m5.large-1a-2", "cr-m5.large-1b-2")) + }) + It("should resolve capacity reservations by tags + owner", func() { + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + Tags: discoveryTags, + OwnerID: selfOwnerID, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(1)) + Expect(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { + return cr.ID + })).To(ContainElements("cr-m5.large-1a-2")) + }) + It("should exclude expired capacity reservations", func() { + out := awsEnv.EC2API.DescribeCapacityReservationsOutput.Clone() + targetReservationID := *out.CapacityReservations[0].CapacityReservationId + out.CapacityReservations[0].EndDate = lo.ToPtr(awsEnv.Clock.Now().Add(time.Hour)) + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(out) + + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + ID: targetReservationID, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(1)) + Expect(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { + return cr.ID + })).To(ContainElements(targetReservationID)) + + awsEnv.Clock.Step(2 * time.Hour) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(0)) + }) + DescribeTable( + "should exclude non-active capacity reservations", + func(state ec2types.CapacityReservationState) { + out := awsEnv.EC2API.DescribeCapacityReservationsOutput.Clone() + targetReservationID := *out.CapacityReservations[0].CapacityReservationId + out.CapacityReservations[0].State = state + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(out) + + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + ID: targetReservationID, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(0)) + }, + lo.FilterMap(ec2types.CapacityReservationStateActive.Values(), func(state ec2types.CapacityReservationState, _ int) (TableEntry, bool) { + return Entry(string(state), state), state != ec2types.CapacityReservationStateActive + }), + ) +}) + +func toEC2Tags(tags map[string]string) []ec2types.Tag { + return lo.MapToSlice(tags, func(key, value string) ec2types.Tag { + return ec2types.Tag{ + Key: lo.ToPtr(key), + Value: lo.ToPtr(value), + } + }) +} diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index b2e73cb5c55f..842599cb9850 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -76,23 +76,20 @@ func NewController( capacityReservationProvider capacityreservation.Provider, ec2api sdk.EC2API, ) *Controller { - reconcilers := []reconcile.TypedReconciler[*v1.EC2NodeClass]{ - NewAMIReconciler(amiProvider), - &Subnet{subnetProvider: subnetProvider}, - &SecurityGroup{securityGroupProvider: securityGroupProvider}, - &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, - &Validation{ec2api: ec2api, amiProvider: amiProvider}, - &Readiness{launchTemplateProvider: launchTemplateProvider}, - } - if options.FromContext(ctx).FeatureGates.ReservedCapacity { - reconcilers = append(reconcilers, NewCapacityReservationReconciler(clk, capacityReservationProvider)) - } return &Controller{ kubeClient: kubeClient, recorder: recorder, launchTemplateProvider: launchTemplateProvider, instanceProfileProvider: instanceProfileProvider, - reconcilers: reconcilers, + reconcilers: []reconcile.TypedReconciler[*v1.EC2NodeClass]{ + NewAMIReconciler(amiProvider), + NewCapacityReservationReconciler(clk, capacityReservationProvider), + &Subnet{subnetProvider: subnetProvider}, + &SecurityGroup{securityGroupProvider: securityGroupProvider}, + &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, + &Validation{ec2api: ec2api, amiProvider: amiProvider}, + &Readiness{launchTemplateProvider: launchTemplateProvider}, + }, } } @@ -100,6 +97,7 @@ func (c *Controller) Name() string { return "nodeclass" } +//nolint:gocyclo func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { ctx = injection.WithControllerName(ctx, c.Name()) @@ -126,6 +124,9 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) var results []reconcile.Result var errs error for _, reconciler := range c.reconcilers { + if _, ok := reconciler.(*CapacityReservation); ok && !options.FromContext(ctx).FeatureGates.ReservedCapacity { + continue + } res, err := reconciler.Reconcile(ctx, nodeClass) errs = multierr.Append(errs, err) results = append(results, res) diff --git a/pkg/controllers/nodeclass/readiness_test.go b/pkg/controllers/nodeclass/readiness_test.go index 645253d732be..6891e0c93ea4 100644 --- a/pkg/controllers/nodeclass/readiness_test.go +++ b/pkg/controllers/nodeclass/readiness_test.go @@ -17,6 +17,7 @@ package nodeclass_test import ( "github.com/awslabs/operatorpkg/status" "github.com/samber/lo" + coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/test" @@ -49,13 +50,19 @@ var _ = Describe("NodeClass Status Condition Controller", func() { }, }) }) - It("should update status condition on nodeClass as Ready", func() { - ExpectApplied(ctx, env.Client, nodeClass) - ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) - nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Status.Conditions).To(HaveLen(7)) - Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsTrue()).To(BeTrue()) - }) + DescribeTable( + "should update status condition on nodeClass as Ready", + func(reservedCapacity bool) { + coreoptions.FromContext(ctx).FeatureGates.ReservedCapacity = reservedCapacity + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.Status.Conditions).To(HaveLen(lo.Ternary(reservedCapacity, 7, 6))) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsTrue()).To(BeTrue()) + }, + Entry("when reserved capacity feature flag is enabled", true), + Entry("when reserved capacity feature flag is disabled", false), + ) It("should update status condition as Not Ready", func() { nodeClass.Spec.SecurityGroupSelectorTerms = []v1.SecurityGroupSelectorTerm{ { diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index c6f6556de0b0..672cd47ed10a 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -46,6 +46,7 @@ type CapacityPool struct { // EC2Behavior must be reset between tests otherwise tests will // pollute each other. type EC2Behavior struct { + DescribeCapacityReservationsOutput AtomicPtr[ec2.DescribeCapacityReservationsOutput] DescribeImagesOutput AtomicPtr[ec2.DescribeImagesOutput] DescribeLaunchTemplatesOutput AtomicPtr[ec2.DescribeLaunchTemplatesOutput] DescribeSubnetsOutput AtomicPtr[ec2.DescribeSubnetsOutput] @@ -340,6 +341,19 @@ func filterInstances(instances []ec2types.Instance, filters []ec2types.Filter) [ return ret } +func (e *EC2API) DescribeCapacityReservations(ctx context.Context, input *ec2.DescribeCapacityReservationsInput, _ ...func(*ec2.Options)) (*ec2.DescribeCapacityReservationsOutput, error) { + if !e.NextError.IsNil() { + defer e.NextError.Reset() + return nil, e.NextError.Get() + } + if !e.DescribeCapacityReservationsOutput.IsNil() { + out := e.DescribeCapacityReservationsOutput.Clone() + out.CapacityReservations = FilterDescribeCapacityReservations(out.CapacityReservations, input.CapacityReservationIds, input.Filters) + return out, nil + } + return &ec2.DescribeCapacityReservationsOutput{}, nil +} + func (e *EC2API) DescribeImages(ctx context.Context, input *ec2.DescribeImagesInput, _ ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) { if !e.NextError.IsNil() { defer e.NextError.Reset() @@ -379,7 +393,7 @@ func (e *EC2API) DescribeLaunchTemplates(_ context.Context, input *ec2.DescribeL output := &ec2.DescribeLaunchTemplatesOutput{} e.LaunchTemplates.Range(func(key, value interface{}) bool { launchTemplate := value.(ec2types.LaunchTemplate) - if lo.Contains(input.LaunchTemplateNames, lo.FromPtr(launchTemplate.LaunchTemplateName)) || len(input.Filters) != 0 && Filter(input.Filters, aws.ToString(launchTemplate.LaunchTemplateId), aws.ToString(launchTemplate.LaunchTemplateName), launchTemplate.Tags) { + if lo.Contains(input.LaunchTemplateNames, lo.FromPtr(launchTemplate.LaunchTemplateName)) || len(input.Filters) != 0 && Filter(input.Filters, aws.ToString(launchTemplate.LaunchTemplateId), aws.ToString(launchTemplate.LaunchTemplateName), "", launchTemplate.Tags) { output.LaunchTemplates = append(output.LaunchTemplates, launchTemplate) } return true diff --git a/pkg/fake/utils.go b/pkg/fake/utils.go index 7941b0c0b33e..7f1bc6170b9e 100644 --- a/pkg/fake/utils.go +++ b/pkg/fake/utils.go @@ -90,7 +90,7 @@ func SubnetsFromFleetRequest(createFleetInput *ec2.CreateFleetInput) []string { // Filters are chained with a logical "AND" func FilterDescribeSecurtyGroups(sgs []ec2types.SecurityGroup, filters []ec2types.Filter) []ec2types.SecurityGroup { return lo.Filter(sgs, func(group ec2types.SecurityGroup, _ int) bool { - return Filter(filters, *group.GroupId, *group.GroupName, group.Tags) + return Filter(filters, *group.GroupId, *group.GroupName, "", group.Tags) }) } @@ -98,7 +98,26 @@ func FilterDescribeSecurtyGroups(sgs []ec2types.SecurityGroup, filters []ec2type // Filters are chained with a logical "AND" func FilterDescribeSubnets(subnets []ec2types.Subnet, filters []ec2types.Filter) []ec2types.Subnet { return lo.Filter(subnets, func(subnet ec2types.Subnet, _ int) bool { - return Filter(filters, *subnet.SubnetId, "", subnet.Tags) + return Filter(filters, *subnet.SubnetId, "", "", subnet.Tags) + }) +} + +func FilterDescribeCapacityReservations(crs []ec2types.CapacityReservation, ids []string, filters []ec2types.Filter) []ec2types.CapacityReservation { + idSet := sets.New[string](ids...) + return lo.Filter(crs, func(cr ec2types.CapacityReservation, _ int) bool { + if len(ids) != 0 && !idSet.Has(*cr.CapacityReservationId) { + return false + } + if stateFilter, ok := lo.Find(filters, func(f ec2types.Filter) bool { + return lo.FromPtr(f.Name) == "state" + }); ok { + if !lo.Contains(stateFilter.Values, string(cr.State)) { + return false + } + } + return Filter(lo.Reject(filters, func(f ec2types.Filter, _ int) bool { + return lo.FromPtr(f.Name) == "state" + }), *cr.CapacityReservationId, "", *cr.OwnerId, cr.Tags) }) } @@ -113,12 +132,12 @@ func FilterDescribeImages(images []ec2types.Image, filters []ec2types.Filter) [] } return Filter(lo.Reject(filters, func(f ec2types.Filter, _ int) bool { return lo.FromPtr(f.Name) == "state" - }), *image.ImageId, *image.Name, image.Tags) + }), *image.ImageId, *image.Name, "", image.Tags) }) } //nolint:gocyclo -func Filter(filters []ec2types.Filter, id, name string, tags []ec2types.Tag) bool { +func Filter(filters []ec2types.Filter, id, name, owner string, tags []ec2types.Tag) bool { return lo.EveryBy(filters, func(filter ec2types.Filter) bool { switch filterName := aws.ToString(filter.Name); { case filterName == "subnet-id" || filterName == "group-id" || filterName == "image-id": @@ -133,6 +152,12 @@ func Filter(filters []ec2types.Filter, id, name string, tags []ec2types.Tag) boo return true } } + case filterName == "owner-id": + for _, val := range filter.Values { + if owner == val { + return true + } + } case strings.HasPrefix(filterName, "tag"): if matchTags(tags, filter) { return true diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go index d5ec2c0c5461..c2de9cbd182c 100644 --- a/pkg/providers/capacityreservation/types.go +++ b/pkg/providers/capacityreservation/types.go @@ -30,9 +30,9 @@ import ( ) type Query struct { - ids []string - ownerID string - tags map[string]string + IDs []string + OwnerID string + Tags map[string]string } func QueriesFromSelectorTerms(terms ...v1.CapacityReservationSelectorTerm) []*Query { @@ -42,13 +42,15 @@ func QueriesFromSelectorTerms(terms ...v1.CapacityReservationSelectorTerm) []*Qu if terms[i].ID != "" { ids = append(ids, terms[i].ID) } - queries = append(queries, &Query{ - ownerID: terms[i].OwnerID, - tags: terms[i].Tags, - }) + if len(terms[i].Tags) != 0 { + queries = append(queries, &Query{ + OwnerID: terms[i].OwnerID, + Tags: terms[i].Tags, + }) + } } if len(ids) != 0 { - queries = append(queries, &Query{ids: ids}) + queries = append(queries, &Query{IDs: ids}) } return queries } @@ -64,20 +66,20 @@ func (q *Query) DescribeCapacityReservationsInput() *ec2.DescribeCapacityReserva Name: lo.ToPtr("state"), Values: []string{string(ec2types.CapacityReservationStateActive)}, }} - if len(q.ids) != 0 { + if len(q.IDs) != 0 { return &ec2.DescribeCapacityReservationsInput{ Filters: filters, - CapacityReservationIds: q.ids, + CapacityReservationIds: q.IDs, } } - if q.ownerID != "" { + if q.OwnerID != "" { filters = append(filters, ec2types.Filter{ Name: lo.ToPtr("owner-id"), - Values: []string{q.ownerID}, + Values: []string{q.OwnerID}, }) } - if len(q.tags) != 0 { - filters = append(filters, lo.MapToSlice(q.tags, func(k, v string) ec2types.Filter { + if len(q.Tags) != 0 { + filters = append(filters, lo.MapToSlice(q.Tags, func(k, v string) ec2types.Filter { if v == "*" { return ec2types.Filter{ Name: lo.ToPtr("tag-key"), diff --git a/pkg/test/environment.go b/pkg/test/environment.go index e0199a8552a7..a5db8743083c 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -223,6 +223,7 @@ func (env *Environment) Reset() { env.InstanceProfileCache.Flush() env.SSMCache.Flush() env.DiscoveredCapacityCache.Flush() + env.CapacityReservationCache.Flush() mfs, err := crmetrics.Registry.Gather() if err != nil { for _, mf := range mfs { From f56642523f4ffd03e9e7ed21539a7f6be38e995b Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Mon, 24 Feb 2025 14:39:19 -0800 Subject: [PATCH 08/16] instance tests --- pkg/fake/ec2api.go | 63 +++++++++-- pkg/providers/capacityreservation/types.go | 15 ++- pkg/providers/instance/suite_test.go | 119 +++++++++++++++++++++ 3 files changed, 189 insertions(+), 8 deletions(-) diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index 672cd47ed10a..112c441aed07 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -38,9 +38,10 @@ import ( ) type CapacityPool struct { - CapacityType string - InstanceType string - Zone string + CapacityType string + InstanceType string + Zone string + ReservationID string } // EC2Behavior must be reset between tests otherwise tests will @@ -127,13 +128,23 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . return nil, fmt.Errorf("missing launch template name") } var instanceIds []string - var skippedPools []CapacityPool + var icedPools []CapacityPool + var reservationExceededPools []CapacityPool var spotInstanceRequestID *string if string(input.TargetCapacitySpecification.DefaultTargetCapacityType) == karpv1.CapacityTypeSpot { spotInstanceRequestID = aws.String(test.RandomName()) } + launchTemplates := map[string]*ec2.CreateLaunchTemplateInput{} + for e.CreateLaunchTemplateBehavior.CalledWithInput.Len() > 0 { + lt := e.CreateLaunchTemplateBehavior.CalledWithInput.Pop() + launchTemplates[*lt.LaunchTemplateName] = lt + } + for _, ltInput := range launchTemplates { + e.CreateLaunchTemplateBehavior.CalledWithInput.Add(ltInput) + } + fulfilled := 0 for _, ltc := range input.LaunchTemplateConfigs { for _, override := range ltc.Overrides { @@ -142,7 +153,7 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . if pool.InstanceType == string(override.InstanceType) && pool.Zone == aws.ToString(override.AvailabilityZone) && pool.CapacityType == string(input.TargetCapacitySpecification.DefaultTargetCapacityType) { - skippedPools = append(skippedPools, pool) + icedPools = append(icedPools, pool) skipInstance = true return false } @@ -151,7 +162,34 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . if skipInstance { continue } - amiID := aws.String("") + amiID := lo.ToPtr("") + var capacityReservationID *string + if lt, ok := launchTemplates[lo.FromPtr(ltc.LaunchTemplateSpecification.LaunchTemplateName)]; ok { + amiID = lt.LaunchTemplateData.ImageId + if crs := lt.LaunchTemplateData.CapacityReservationSpecification; crs != nil && crs.CapacityReservationPreference == ec2types.CapacityReservationPreferenceCapacityReservationsOnly { + id := crs.CapacityReservationTarget.CapacityReservationId + if id == nil { + panic("received a launch template targeting capacity reservations without a provided ID") + } + capacityReservationID = id + } + } + if capacityReservationID != nil { + if cr, ok := lo.Find(e.DescribeCapacityReservationsOutput.Clone().CapacityReservations, func(cr ec2types.CapacityReservation) bool { + return *cr.CapacityReservationId == *capacityReservationID + }); !ok || *cr.AvailableInstanceCount == 0 { + reservationExceededPools = append(reservationExceededPools, CapacityPool{ + InstanceType: string(override.InstanceType), + Zone: lo.FromPtr(override.AvailabilityZone), + CapacityType: karpv1.CapacityTypeReserved, + ReservationID: *capacityReservationID, + }) + skipInstance = true + } + } + if skipInstance { + continue + } if e.CreateLaunchTemplateBehavior.CalledWithInput.Len() > 0 { lt := e.CreateLaunchTemplateBehavior.CalledWithInput.Pop() amiID = lt.LaunchTemplateData.ImageId @@ -193,7 +231,7 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . }, }, }} - for _, pool := range skippedPools { + for _, pool := range icedPools { result.Errors = append(result.Errors, ec2types.CreateFleetError{ ErrorCode: aws.String("InsufficientInstanceCapacity"), LaunchTemplateAndOverrides: &ec2types.LaunchTemplateAndOverridesResponse{ @@ -204,6 +242,17 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . }, }) } + for _, pool := range reservationExceededPools { + result.Errors = append(result.Errors, ec2types.CreateFleetError{ + ErrorCode: lo.ToPtr("ReservationCapacityExceeded"), + LaunchTemplateAndOverrides: &ec2types.LaunchTemplateAndOverridesResponse{ + Overrides: &ec2types.FleetLaunchTemplateOverrides{ + InstanceType: ec2types.InstanceType(pool.InstanceType), + AvailabilityZone: lo.ToPtr(pool.Zone), + }, + }, + }) + } return result, nil }) } diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go index c2de9cbd182c..c1eb8d5b156c 100644 --- a/pkg/providers/capacityreservation/types.go +++ b/pkg/providers/capacityreservation/types.go @@ -158,7 +158,20 @@ func (c *availabilityCache) GetAvailableInstanceCount(reservationID string) int c.mu.RLock() defer c.mu.RUnlock() entry, ok := c.cache.Get(reservationID) - return lo.Ternary(ok, entry.(*availabilityCacheEntry).count, 0) + if !ok { + return 0 + } + return entry.(*availabilityCacheEntry).count +} + +// TODO: Determine better abstraction for setting availability in tests without reconciling the nodeclass controller +func (c *availabilityCache) SetAvailableInstanceCount(reservationID string, count int) { + c.mu.Lock() + defer c.mu.Unlock() + c.cache.SetDefault(reservationID, &availabilityCacheEntry{ + count: count, + syncTime: c.clk.Now(), + }) } func (c *availabilityCache) MarkUnavailable(reservationIDs ...string) { diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index 4ccf9bc52907..9146d9b96a0a 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -23,9 +23,11 @@ import ( "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/awslabs/operatorpkg/object" "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" @@ -137,6 +139,123 @@ var _ = Describe("InstanceProvider", func() { Expect(corecloudprovider.IsInsufficientCapacityError(err)).To(BeTrue()) Expect(instance).To(BeNil()) }) + It("should return an ICE error when all attempted instance types return a ReservedCapacityReservation error", func() { + const targetReservationID = "cr-m5.large-1a-1" + // Ensure that Karpenter believes a reservation is available, but the API returns no capacity when attempting to launch + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount(targetReservationID, 1) + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr(targetReservationID), + AvailableInstanceCount: lo.ToPtr[int32](0), + State: ec2types.CapacityReservationStateActive, + }, + }, + }) + nodeClass.Status.CapacityReservations = append(nodeClass.Status.CapacityReservations, v1.CapacityReservation{ + ID: "cr-m5.large-1a-1", + AvailabilityZone: "test-zone-1a", + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + InstanceType: "m5.large", + OwnerID: "012345678901", + }) + nodeClaim.Spec.Requirements = append( + nodeClaim.Spec.Requirements, + karpv1.NodeSelectorRequirementWithMinValues{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }}, + ) + ExpectApplied(ctx, env.Client, nodeClaim, nodePool, nodeClass) + + instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool) + Expect(err).ToNot(HaveOccurred()) + instance, err := awsEnv.InstanceProvider.Create(ctx, nodeClass, nodeClaim, nil, instanceTypes) + Expect(corecloudprovider.IsInsufficientCapacityError(err)).To(BeTrue()) + Expect(instance).To(BeNil()) + + // Ensure we marked the reservation as unavailable after encountering the error + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(targetReservationID)).To(Equal(0)) + }) + It("should filter compatible reserved offerings such that only one offering per capacity pool is included in the CreateFleet request", func() { + const targetReservationID = "cr-m5.large-1a-2" + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), + AvailableInstanceCount: lo.ToPtr[int32](1), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr(targetReservationID), + AvailableInstanceCount: lo.ToPtr[int32](2), + State: ec2types.CapacityReservationStateActive, + }, + }, + }) + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount("cr-m5.large-1a-1", 1) + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount(targetReservationID, 2) + nodeClass.Status.CapacityReservations = append(nodeClass.Status.CapacityReservations, []v1.CapacityReservation{ + { + ID: "cr-m5.large-1a-1", + AvailabilityZone: "test-zone-1a", + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + InstanceType: "m5.large", + OwnerID: "012345678901", + }, + { + ID: "cr-m5.large-1a-2", + AvailabilityZone: "test-zone-1a", + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + InstanceType: "m5.large", + OwnerID: "012345678901", + }, + }...) + + nodeClaim.Spec.Requirements = append( + nodeClaim.Spec.Requirements, + karpv1.NodeSelectorRequirementWithMinValues{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }}, + ) + ExpectApplied(ctx, env.Client, nodeClaim, nodePool, nodeClass) + + instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool) + Expect(err).ToNot(HaveOccurred()) + instance, err := awsEnv.InstanceProvider.Create(ctx, nodeClass, nodeClaim, nil, instanceTypes) + Expect(err).ToNot(HaveOccurred()) + Expect(instance.CapacityType).To(Equal(karpv1.CapacityTypeReserved)) + Expect(instance.CapacityReservationID).To(Equal(targetReservationID)) + + // We should have only created a single launch template, for the single capacity reservation we're attempting to launch + var launchTemplates []*ec2.CreateLaunchTemplateInput + for awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Len() > 0 { + launchTemplates = append(launchTemplates, awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Pop()) + } + Expect(launchTemplates).To(HaveLen(1)) + Expect(*launchTemplates[0].LaunchTemplateData.CapacityReservationSpecification.CapacityReservationTarget.CapacityReservationId).To(Equal(targetReservationID)) + + Expect(awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Len()).ToNot(Equal(0)) + createFleetInput := awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Pop() + Expect(createFleetInput.TargetCapacitySpecification.DefaultTargetCapacityType).To(Equal(ec2types.DefaultTargetCapacityTypeOnDemand)) + Expect(createFleetInput.LaunchTemplateConfigs).To(HaveLen(1)) + Expect(createFleetInput.LaunchTemplateConfigs[0].Overrides).To(HaveLen(1)) + }) It("should return all NodePool-owned instances from List", func() { ids := sets.New[string]() // Provision instances that have the karpenter.sh/nodepool key From 423b3bbeacb9e4d8830434c2823fab03b6ad4095 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Mon, 24 Feb 2025 16:52:04 -0800 Subject: [PATCH 09/16] checkpoint feedback + lt tests --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 2 +- go.mod | 2 +- go.sum | 4 +- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 2 +- pkg/apis/v1/ec2nodeclass.go | 2 +- pkg/cloudprovider/drift.go | 3 + .../capacityreservation/controller.go | 4 +- .../nodeclass/capacityreservation.go | 4 +- .../nodeclass/capacityreservation_test.go | 14 +- pkg/controllers/nodeclass/suite_test.go | 6 +- pkg/fake/ec2api.go | 47 +++---- pkg/fake/utils.go | 34 ++--- .../capacityreservation/suite_test.go | 109 +++++++++++++++ pkg/providers/instance/suite_test.go | 2 +- .../instancetype/offering/provider.go | 21 +-- pkg/providers/instancetype/types.go | 20 +-- pkg/providers/launchtemplate/suite_test.go | 125 +++++++++++++++++- pkg/test/utils.go | 15 +++ 18 files changed, 318 insertions(+), 98 deletions(-) create mode 100644 pkg/providers/capacityreservation/suite_test.go diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index 6021dac17fde..0289a71a179a 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -247,7 +247,7 @@ spec: properties: id: description: ID is the capacity reservation id in EC2 - pattern: ^cr-.+$ + pattern: ^cr-[0-9a-z]+$ type: string ownerID: description: Owner is the owner id for the ami. diff --git a/go.mod b/go.mod index a7671b761385..b73f68ee91c0 100644 --- a/go.mod +++ b/go.mod @@ -120,4 +120,4 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect ) -replace sigs.k8s.io/karpenter => github.com/jmdeal/karpenter v0.0.0-20250221104820-4c25410338d8 +replace sigs.k8s.io/karpenter => github.com/jmdeal/karpenter v0.0.0-20250225003856-d34d71584c1a diff --git a/go.sum b/go.sum index aa288ae00a06..6f9a7509d44f 100644 --- a/go.sum +++ b/go.sum @@ -116,8 +116,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/jmdeal/karpenter v0.0.0-20250221104820-4c25410338d8 h1:K89kW02bTZkegQnJPlOHSTt+a7WXGQOfrt+pP7lBJos= -github.com/jmdeal/karpenter v0.0.0-20250221104820-4c25410338d8/go.mod h1:/FgjYrt+hwAMcvY46hku76st/aeP4KjOib6RLEj312g= +github.com/jmdeal/karpenter v0.0.0-20250225003856-d34d71584c1a h1:r7gPnoafSMfTjVmirGrkQu/3Suo6wiVDSElWyWIscXk= +github.com/jmdeal/karpenter v0.0.0-20250225003856-d34d71584c1a/go.mod h1:/FgjYrt+hwAMcvY46hku76st/aeP4KjOib6RLEj312g= github.com/jonathan-innis/aws-sdk-go-prometheus v0.1.1 h1:gmpuckrozJ3lfKqSIia9YMGh0caoQmEY7mQP5MsnbTM= github.com/jonathan-innis/aws-sdk-go-prometheus v0.1.1/go.mod h1:168XvZFghCqo32ISSWnTXwdlMKzEq+x9TqdfswCjkrQ= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 09e845096d29..d8680a337e0f 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -244,7 +244,7 @@ spec: properties: id: description: ID is the capacity reservation id in EC2 - pattern: ^cr-.+$ + pattern: ^cr-[0-9a-z]+$ type: string ownerID: description: Owner is the owner id for the ami. diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index 717cee67a7e9..fdc76e33458f 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -184,7 +184,7 @@ type CapacityReservationSelectorTerm struct { // +optional Tags map[string]string `json:"tags,omitempty"` // ID is the capacity reservation id in EC2 - // +kubebuilder:validation:Pattern:="^cr-.+$" + // +kubebuilder:validation:Pattern:="^cr-[0-9a-z]+$" // +optional ID string `json:"id,omitempty"` // Owner is the owner id for the ami. diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index 276e6ce7c26d..dd04549f4b9c 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -128,6 +128,9 @@ func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, // Checks if capacity reservations are drifted, by comparing the capacity reservations persisted to the NodeClass to // the instance's capacity reservation. +// NOTE: We handle drift dynamically for capacity reservations rather than relying on the offerings inducing drift since +// a reserved instance may fall back to on-demand. Relying on offerings could result in drift occurring before fallback +// would cancel it out. func (c *CloudProvider) isCapacityReservationDrifted(instance *instance.Instance, nodeClass *v1.EC2NodeClass) cloudprovider.DriftReason { capacityReservationIDs := sets.New(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { return cr.ID })...) if instance.CapacityReservationID != "" && !capacityReservationIDs.Has(instance.CapacityReservationID) { diff --git a/pkg/controllers/nodeclaim/capacityreservation/controller.go b/pkg/controllers/nodeclaim/capacityreservation/controller.go index 32efa849791d..ed1821748a77 100644 --- a/pkg/controllers/nodeclaim/capacityreservation/controller.go +++ b/pkg/controllers/nodeclaim/capacityreservation/controller.go @@ -68,7 +68,9 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { }) ncs := &karpv1.NodeClaimList{} - if err := c.kubeClient.List(ctx, ncs); err != nil { + if err := c.kubeClient.List(ctx, ncs, client.MatchingLabels{ + karpv1.NodeRegisteredLabelKey: "true", + }); err != nil { return reconcile.Result{}, fmt.Errorf("listing nodeclaims, %w", err) } updatedNodeClaims := sets.New[string]() diff --git a/pkg/controllers/nodeclass/capacityreservation.go b/pkg/controllers/nodeclass/capacityreservation.go index 7f9d3b149807..a7ee60275b9e 100644 --- a/pkg/controllers/nodeclass/capacityreservation.go +++ b/pkg/controllers/nodeclass/capacityreservation.go @@ -72,7 +72,7 @@ func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass errors := []error{} nc.Status.CapacityReservations = []v1.CapacityReservation{} for _, r := range reservations { - reservation, err := capacityReservationFromEC2(r) + reservation, err := CapacityReservationFromEC2(r) if err != nil { errors = append(errors, err) continue @@ -89,7 +89,7 @@ func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass return reconcile.Result{RequeueAfter: c.requeueAfter(reservations...)}, nil } -func capacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityReservation, error) { +func CapacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityReservation, error) { // Guard against new instance match criteria added in the future. See https://github.com/kubernetes-sigs/karpenter/issues/806 // for a similar issue. if !lo.Contains([]ec2types.InstanceMatchCriteria{ diff --git a/pkg/controllers/nodeclass/capacityreservation_test.go b/pkg/controllers/nodeclass/capacityreservation_test.go index a8d115cd1212..f8909b2b8cf9 100644 --- a/pkg/controllers/nodeclass/capacityreservation_test.go +++ b/pkg/controllers/nodeclass/capacityreservation_test.go @@ -25,6 +25,7 @@ import ( . "sigs.k8s.io/karpenter/pkg/test/expectations" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/utils" ) const selfOwnerID = "012345678901" @@ -54,7 +55,7 @@ var _ = Describe("NodeClass Capacity Reservation Reconciler", func() { InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, CapacityReservationId: lo.ToPtr("cr-m5.large-1a-2"), AvailableInstanceCount: lo.ToPtr[int32](10), - Tags: toEC2Tags(discoveryTags), + Tags: utils.MergeTags(discoveryTags), State: ec2types.CapacityReservationStateActive, }, { @@ -73,7 +74,7 @@ var _ = Describe("NodeClass Capacity Reservation Reconciler", func() { InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, CapacityReservationId: lo.ToPtr("cr-m5.large-1b-2"), AvailableInstanceCount: lo.ToPtr[int32](15), - Tags: toEC2Tags(discoveryTags), + Tags: utils.MergeTags(discoveryTags), State: ec2types.CapacityReservationStateActive, }, }, @@ -171,12 +172,3 @@ var _ = Describe("NodeClass Capacity Reservation Reconciler", func() { }), ) }) - -func toEC2Tags(tags map[string]string) []ec2types.Tag { - return lo.MapToSlice(tags, func(key, value string) ec2types.Tag { - return ec2types.Tag{ - Key: lo.ToPtr(key), - Value: lo.ToPtr(value), - } - }) -} diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 5a7af82be21a..7bf0e2f4506b 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -61,7 +61,11 @@ func TestAPIs(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(test.RemoveNodeClassTagValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) + env = coretest.NewEnvironment( + coretest.WithCRDs(test.DisableCapacityReservationIDValidation(test.RemoveNodeClassTagValidation(apis.CRDs))...), + coretest.WithCRDs(v1alpha1.CRDs...), + coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx)), + ) ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index 112c441aed07..194a1c2954fa 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -68,6 +68,9 @@ type EC2Behavior struct { LaunchTemplates sync.Map InsufficientCapacityPools atomic.Slice[CapacityPool] NextError AtomicError + + // Tracks the capacity reservations associated with launch templates, if applicable + launchTemplateCapacityReservationIndex sync.Map } type EC2API struct { @@ -109,6 +112,11 @@ func (e *EC2API) Reset() { }) e.InsufficientCapacityPools.Reset() e.NextError.Reset() + + e.launchTemplateCapacityReservationIndex.Range(func(k, _ any) bool { + e.launchTemplateCapacityReservationIndex.Delete(k) + return true + }) } // nolint: gocyclo @@ -136,15 +144,6 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . spotInstanceRequestID = aws.String(test.RandomName()) } - launchTemplates := map[string]*ec2.CreateLaunchTemplateInput{} - for e.CreateLaunchTemplateBehavior.CalledWithInput.Len() > 0 { - lt := e.CreateLaunchTemplateBehavior.CalledWithInput.Pop() - launchTemplates[*lt.LaunchTemplateName] = lt - } - for _, ltInput := range launchTemplates { - e.CreateLaunchTemplateBehavior.CalledWithInput.Add(ltInput) - } - fulfilled := 0 for _, ltc := range input.LaunchTemplateConfigs { for _, override := range ltc.Overrides { @@ -162,34 +161,21 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . if skipInstance { continue } - amiID := lo.ToPtr("") - var capacityReservationID *string - if lt, ok := launchTemplates[lo.FromPtr(ltc.LaunchTemplateSpecification.LaunchTemplateName)]; ok { - amiID = lt.LaunchTemplateData.ImageId - if crs := lt.LaunchTemplateData.CapacityReservationSpecification; crs != nil && crs.CapacityReservationPreference == ec2types.CapacityReservationPreferenceCapacityReservationsOnly { - id := crs.CapacityReservationTarget.CapacityReservationId - if id == nil { - panic("received a launch template targeting capacity reservations without a provided ID") - } - capacityReservationID = id - } - } - if capacityReservationID != nil { + + if crID, ok := e.launchTemplateCapacityReservationIndex.Load(*ltc.LaunchTemplateSpecification.LaunchTemplateName); ok { if cr, ok := lo.Find(e.DescribeCapacityReservationsOutput.Clone().CapacityReservations, func(cr ec2types.CapacityReservation) bool { - return *cr.CapacityReservationId == *capacityReservationID + return *cr.CapacityReservationId == crID.(string) }); !ok || *cr.AvailableInstanceCount == 0 { reservationExceededPools = append(reservationExceededPools, CapacityPool{ InstanceType: string(override.InstanceType), Zone: lo.FromPtr(override.AvailabilityZone), CapacityType: karpv1.CapacityTypeReserved, - ReservationID: *capacityReservationID, + ReservationID: crID.(string), }) - skipInstance = true + continue } } - if skipInstance { - continue - } + amiID := lo.ToPtr("") if e.CreateLaunchTemplateBehavior.CalledWithInput.Len() > 0 { lt := e.CreateLaunchTemplateBehavior.CalledWithInput.Pop() amiID = lt.LaunchTemplateData.ImageId @@ -292,6 +278,9 @@ func (e *EC2API) CreateLaunchTemplate(ctx context.Context, input *ec2.CreateLaun } launchTemplate := ec2types.LaunchTemplate{LaunchTemplateName: input.LaunchTemplateName} e.LaunchTemplates.Store(input.LaunchTemplateName, launchTemplate) + if crs := input.LaunchTemplateData.CapacityReservationSpecification; crs != nil && crs.CapacityReservationPreference == ec2types.CapacityReservationPreferenceCapacityReservationsOnly { + e.launchTemplateCapacityReservationIndex.Store(*input.LaunchTemplateName, *crs.CapacityReservationTarget.CapacityReservationId) + } return &ec2.CreateLaunchTemplateOutput{LaunchTemplate: lo.ToPtr(launchTemplate)}, nil }) } @@ -442,7 +431,7 @@ func (e *EC2API) DescribeLaunchTemplates(_ context.Context, input *ec2.DescribeL output := &ec2.DescribeLaunchTemplatesOutput{} e.LaunchTemplates.Range(func(key, value interface{}) bool { launchTemplate := value.(ec2types.LaunchTemplate) - if lo.Contains(input.LaunchTemplateNames, lo.FromPtr(launchTemplate.LaunchTemplateName)) || len(input.Filters) != 0 && Filter(input.Filters, aws.ToString(launchTemplate.LaunchTemplateId), aws.ToString(launchTemplate.LaunchTemplateName), "", launchTemplate.Tags) { + if lo.Contains(input.LaunchTemplateNames, lo.FromPtr(launchTemplate.LaunchTemplateName)) || len(input.Filters) != 0 && Filter(input.Filters, aws.ToString(launchTemplate.LaunchTemplateId), aws.ToString(launchTemplate.LaunchTemplateName), "", "", launchTemplate.Tags) { output.LaunchTemplates = append(output.LaunchTemplates, launchTemplate) } return true diff --git a/pkg/fake/utils.go b/pkg/fake/utils.go index 7f1bc6170b9e..539d778c689d 100644 --- a/pkg/fake/utils.go +++ b/pkg/fake/utils.go @@ -90,7 +90,7 @@ func SubnetsFromFleetRequest(createFleetInput *ec2.CreateFleetInput) []string { // Filters are chained with a logical "AND" func FilterDescribeSecurtyGroups(sgs []ec2types.SecurityGroup, filters []ec2types.Filter) []ec2types.SecurityGroup { return lo.Filter(sgs, func(group ec2types.SecurityGroup, _ int) bool { - return Filter(filters, *group.GroupId, *group.GroupName, "", group.Tags) + return Filter(filters, *group.GroupId, *group.GroupName, "", "", group.Tags) }) } @@ -98,7 +98,7 @@ func FilterDescribeSecurtyGroups(sgs []ec2types.SecurityGroup, filters []ec2type // Filters are chained with a logical "AND" func FilterDescribeSubnets(subnets []ec2types.Subnet, filters []ec2types.Filter) []ec2types.Subnet { return lo.Filter(subnets, func(subnet ec2types.Subnet, _ int) bool { - return Filter(filters, *subnet.SubnetId, "", "", subnet.Tags) + return Filter(filters, *subnet.SubnetId, "", "", "", subnet.Tags) }) } @@ -108,38 +108,26 @@ func FilterDescribeCapacityReservations(crs []ec2types.CapacityReservation, ids if len(ids) != 0 && !idSet.Has(*cr.CapacityReservationId) { return false } - if stateFilter, ok := lo.Find(filters, func(f ec2types.Filter) bool { - return lo.FromPtr(f.Name) == "state" - }); ok { - if !lo.Contains(stateFilter.Values, string(cr.State)) { - return false - } - } - return Filter(lo.Reject(filters, func(f ec2types.Filter, _ int) bool { - return lo.FromPtr(f.Name) == "state" - }), *cr.CapacityReservationId, "", *cr.OwnerId, cr.Tags) + return Filter(filters, *cr.CapacityReservationId, "", *cr.OwnerId, string(cr.State), cr.Tags) }) } func FilterDescribeImages(images []ec2types.Image, filters []ec2types.Filter) []ec2types.Image { return lo.Filter(images, func(image ec2types.Image, _ int) bool { - if stateFilter, ok := lo.Find(filters, func(f ec2types.Filter) bool { - return lo.FromPtr(f.Name) == "state" - }); ok { - if !lo.Contains(stateFilter.Values, string(image.State)) { - return false - } - } - return Filter(lo.Reject(filters, func(f ec2types.Filter, _ int) bool { - return lo.FromPtr(f.Name) == "state" - }), *image.ImageId, *image.Name, "", image.Tags) + return Filter(filters, *image.ImageId, *image.Name, "", string(image.State), image.Tags) }) } //nolint:gocyclo -func Filter(filters []ec2types.Filter, id, name, owner string, tags []ec2types.Tag) bool { +func Filter(filters []ec2types.Filter, id, name, owner, state string, tags []ec2types.Tag) bool { return lo.EveryBy(filters, func(filter ec2types.Filter) bool { switch filterName := aws.ToString(filter.Name); { + case filterName == "state": + for _, val := range filter.Values { + if state == val { + return true + } + } case filterName == "subnet-id" || filterName == "group-id" || filterName == "image-id": for _, val := range filter.Values { if id == val { diff --git a/pkg/providers/capacityreservation/suite_test.go b/pkg/providers/capacityreservation/suite_test.go new file mode 100644 index 000000000000..620b6669223c --- /dev/null +++ b/pkg/providers/capacityreservation/suite_test.go @@ -0,0 +1,109 @@ +package capacityreservation_test + +import ( + "context" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/karpenter-provider-aws/pkg/apis" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/operator/options" + "github.com/aws/karpenter-provider-aws/pkg/test" + "github.com/aws/karpenter-provider-aws/pkg/utils" + "github.com/samber/lo" + coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" + coretest "sigs.k8s.io/karpenter/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + . "sigs.k8s.io/karpenter/pkg/utils/testing" +) + +var ctx context.Context +var env *coretest.Environment +var awsEnv *test.Environment + +func TestAPIs(t *testing.T) { + ctx = TestContextWithLogger(t) + RegisterFailHandler(Fail) + RunSpecs(t, "EC2NodeClass") +} + +var _ = BeforeSuite(func() { + env = coretest.NewEnvironment( + coretest.WithCRDs(test.DisableCapacityReservationIDValidation(test.RemoveNodeClassTagValidation(apis.CRDs))...), + coretest.WithCRDs(v1alpha1.CRDs...), + ) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) + ctx = options.ToContext(ctx, test.Options()) + awsEnv = test.NewEnvironment(ctx, env) +}) + +// NOTE: Tests for different selector terms can be found in the nodeclass reconciler tests +var _ = Describe("Capacity Reservation Provider", func() { + var discoveryTags map[string]string + var reservations map[string]int + + BeforeEach(func() { + discoveryTags = map[string]string{ + "karpenter.sh/discovery": "test", + } + crs := []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), + AvailableInstanceCount: lo.ToPtr[int32](10), + Tags: utils.MergeTags(discoveryTags), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-2"), + AvailableInstanceCount: lo.ToPtr[int32](15), + Tags: utils.MergeTags(discoveryTags), + State: ec2types.CapacityReservationStateActive, + }, + } + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: crs, + }) + reservations = make(map[string]int) + for _, cr := range crs { + reservations[*cr.CapacityReservationId] = int(*cr.AvailableInstanceCount) + } + }) + Context("Availability Cache", func() { + It("should sync availability cache when listing reservations", func() { + crs, err := awsEnv.CapacityReservationProvider.List(ctx, v1.CapacityReservationSelectorTerm{ + Tags: discoveryTags, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(crs).To(HaveLen(2)) + for id, count := range reservations { + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(id)).To(Equal(count)) + } + }) + It("should decrement availability when reservation is marked as launched", func() { + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount("cr-test", 5) + awsEnv.CapacityReservationProvider.MarkLaunched("cr-test-2") + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount("cr-test")).To(Equal(5)) + awsEnv.CapacityReservationProvider.MarkLaunched("cr-test") + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount("cr-test")).To(Equal(4)) + }) + It("should increment availability when reservation is marked as terminated", func() { + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount("cr-test", 5) + awsEnv.CapacityReservationProvider.MarkTerminated("cr-test-2") + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount("cr-test")).To(Equal(5)) + awsEnv.CapacityReservationProvider.MarkTerminated("cr-test") + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount("cr-test")).To(Equal(6)) + }) + }) +}) diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index 9146d9b96a0a..1c7e633a3369 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -63,7 +63,7 @@ func TestAWS(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) + env = coretest.NewEnvironment(coretest.WithCRDs(test.DisableCapacityReservationIDValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...)) ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go index e68e8ea06801..3bbcfc6b47ad 100644 --- a/pkg/providers/instancetype/offering/provider.go +++ b/pkg/providers/instancetype/offering/provider.go @@ -128,7 +128,7 @@ func (p *DefaultProvider) createOfferings( var offerings []*cloudprovider.Offering itZones := sets.New(it.Requirements.Get(corev1.LabelTopologyZone).Values()...) - if ofs, ok := p.cache.Get(p.cacheKeyFromInstanceType(it, subnetZones)); ok { + if ofs, ok := p.cache.Get(p.cacheKeyFromInstanceType(it)); ok { offerings = append(offerings, ofs.([]*cloudprovider.Offering)...) } else { var cachedOfferings []*cloudprovider.Offering @@ -139,7 +139,6 @@ func (p *DefaultProvider) createOfferings( continue } isUnavailable := p.unavailableOfferings.IsUnavailable(ec2types.InstanceType(it.Name), zone, capacityType) - _, hasSubnetZone := subnetZones[zone] var price float64 var hasPrice bool switch capacityType { @@ -157,7 +156,7 @@ func (p *DefaultProvider) createOfferings( scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpDoesNotExist), ), Price: price, - Available: !isUnavailable && hasPrice && itZones.Has(zone) && hasSubnetZone, + Available: !isUnavailable && hasPrice && itZones.Has(zone), } if id, ok := subnetZones[zone]; ok { offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, id)) @@ -166,7 +165,7 @@ func (p *DefaultProvider) createOfferings( offerings = append(cachedOfferings, offering) } } - p.cache.SetDefault(p.cacheKeyFromInstanceType(it, subnetZones), cachedOfferings) + p.cache.SetDefault(p.cacheKeyFromInstanceType(it), cachedOfferings) offerings = append(offerings, cachedOfferings...) } if !options.FromContext(ctx).FeatureGates.ReservedCapacity { @@ -178,8 +177,6 @@ func (p *DefaultProvider) createOfferings( continue } reservation := &nodeClass.Status.CapacityReservations[i] - - _, hasSubnetZone := subnetZones[reservation.AvailabilityZone] price := 0.0 if odPrice, ok := p.pricingProvider.OnDemandPrice(ec2types.InstanceType(it.Name)); ok { // Divide the on-demand price by a sufficiently large constant. This allows us to treat the reservation as "free", @@ -196,7 +193,7 @@ func (p *DefaultProvider) createOfferings( scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpIn, reservation.ID), ), Price: price, - Available: reservationCapacity != 0 && itZones.Has(reservation.AvailabilityZone) && hasSubnetZone, + Available: reservationCapacity != 0 && itZones.Has(reservation.AvailabilityZone), ReservationCapacity: reservationCapacity, } if id, ok := subnetZones[reservation.AvailabilityZone]; ok { @@ -207,27 +204,21 @@ func (p *DefaultProvider) createOfferings( return offerings } -func (p *DefaultProvider) cacheKeyFromInstanceType(it *cloudprovider.InstanceType, subnetZones map[string]string) string { +func (p *DefaultProvider) cacheKeyFromInstanceType(it *cloudprovider.InstanceType) string { zonesHash, _ := hashstructure.Hash( it.Requirements.Get(corev1.LabelTopologyZone).Values(), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}, ) - subnetZonesHash, _ := hashstructure.Hash( - subnetZones, - hashstructure.FormatV2, - &hashstructure.HashOptions{SlicesAsSets: true}, - ) capacityTypesHash, _ := hashstructure.Hash( it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values(), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}, ) return fmt.Sprintf( - "%s-%016x-%016x-%016x-%d", + "%s-%016x-%016x-%d", it.Name, zonesHash, - subnetZonesHash, capacityTypesHash, p.unavailableOfferings.SeqNum, ) diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index 65a0eb0ea8fb..7e9178981cfb 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -28,6 +28,7 @@ import ( "github.com/samber/lo" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/sets" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" @@ -123,8 +124,8 @@ func NewInstanceType( ctx context.Context, info ec2types.InstanceTypeInfo, region string, - zones []string, - zonesToZoneIDs map[string]string, + offeringZones []string, + subnetZonesToZoneIDs map[string]string, blockDeviceMappings []*v1.BlockDeviceMapping, instanceStorePolicy *v1.InstanceStorePolicy, maxPods *int32, @@ -139,7 +140,7 @@ func NewInstanceType( amiFamily := amifamily.GetAMIFamily(amiFamilyType, &amifamily.Options{}) it := &cloudprovider.InstanceType{ Name: string(info.InstanceType), - Requirements: computeRequirements(info, region, zones, zonesToZoneIDs, amiFamily, capacityReservations), + Requirements: computeRequirements(info, region, offeringZones, subnetZonesToZoneIDs, amiFamily, capacityReservations), Capacity: computeCapacity(ctx, info, amiFamily, blockDeviceMappings, instanceStorePolicy, maxPods, podsPerCore), Overhead: &cloudprovider.InstanceTypeOverhead{ KubeReserved: kubeReservedResources(cpu(info), pods(ctx, info, amiFamily, maxPods, podsPerCore), ENILimitedPods(ctx, info), amiFamily, kubeReserved), @@ -157,8 +158,8 @@ func NewInstanceType( func computeRequirements( info ec2types.InstanceTypeInfo, region string, - zones []string, - zonesToZoneIDs map[string]string, + offeringZones []string, + subnetZonesToZoneIDs map[string]string, amiFamily amifamily.AMIFamily, capacityReservations []v1.CapacityReservation, ) scheduling.Requirements { @@ -172,12 +173,15 @@ func computeRequirements( capacityTypes = append(capacityTypes, karpv1.CapacityTypeReserved) } + // Available zones is the set intersection between zones where the instance type is available, and zones which are + // available via the provided EC2NodeClass. + availableZones := sets.New(offeringZones...).Intersection(sets.New(lo.Keys(subnetZonesToZoneIDs)...)) requirements := scheduling.NewRequirements( // Well Known Upstream scheduling.NewRequirement(corev1.LabelInstanceTypeStable, corev1.NodeSelectorOpIn, string(info.InstanceType)), scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, getArchitecture(info)), scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, getOS(info, amiFamily)...), - scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zones...), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, availableZones.UnsortedList()...), scheduling.NewRequirement(corev1.LabelTopologyRegion, corev1.NodeSelectorOpIn, region), scheduling.NewRequirement(corev1.LabelWindowsBuild, corev1.NodeSelectorOpDoesNotExist), // Well Known to Karpenter @@ -206,8 +210,8 @@ func computeRequirements( ) // Only add zone-id label when available in offerings. It may not be available if a user has upgraded from a // previous version of Karpenter w/o zone-id support and the nodeclass subnet status has not yet updated. - if zoneIDs := lo.FilterMap(zones, func(zone string, _ int) (string, bool) { - id, ok := zonesToZoneIDs[zone] + if zoneIDs := lo.FilterMap(availableZones.UnsortedList(), func(zone string, _ int) (string, bool) { + id, ok := subnetZonesToZoneIDs[zone] return id, ok }); len(zoneIDs) != 0 { requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, zoneIDs...)) diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index c969455ee5fd..71d2c5966c69 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -90,7 +90,7 @@ func TestAWS(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) + env = coretest.NewEnvironment(coretest.WithCRDs(test.DisableCapacityReservationIDValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...)) ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) @@ -2300,6 +2300,129 @@ essential = true ) }) }) + It("should generate a unique launch template per capacity reservation", func() { + crs := []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), + AvailableInstanceCount: lo.ToPtr[int32](10), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-2"), + AvailableInstanceCount: lo.ToPtr[int32](15), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1b-1"), + AvailableInstanceCount: lo.ToPtr[int32](10), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + InstanceType: lo.ToPtr("m5.xlarge"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.xlarge-1b-1"), + AvailableInstanceCount: lo.ToPtr[int32](15), + State: ec2types.CapacityReservationStateActive, + }, + } + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: crs, + }) + for _, cr := range crs { + nodeClass.Status.CapacityReservations = append(nodeClass.Status.CapacityReservations, lo.Must(nodeclass.CapacityReservationFromEC2(&cr))) + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount(*cr.CapacityReservationId, int(*cr.AvailableInstanceCount)) + } + + nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }}} + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, pod, nodePool, nodeClass) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + + launchTemplates := map[string]*ec2.CreateLaunchTemplateInput{} + for awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Len() != 0 { + lt := awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Pop() + launchTemplates[*lt.LaunchTemplateName] = lt + } + // We should have created 3 launch templates, rather than 4 since we only create 1 launch template per capacity pool + Expect(launchTemplates).To(HaveLen(3)) + reservationIDs := lo.Uniq(lo.Map(lo.Values(launchTemplates), func(input *ec2.CreateLaunchTemplateInput, _ int) string { + return *input.LaunchTemplateData.CapacityReservationSpecification.CapacityReservationTarget.CapacityReservationId + })) + Expect(reservationIDs).To(HaveLen(3)) + Expect(reservationIDs).To(ConsistOf( + // We don't include the m5.large offering in 1a because we select the zonal offering with the highest capacity + "cr-m5.large-1a-2", + "cr-m5.large-1b-1", + "cr-m5.xlarge-1b-1", + )) + for _, input := range launchTemplates { + Expect(input.LaunchTemplateData.CapacityReservationSpecification.CapacityReservationPreference).To(Equal(ec2types.CapacityReservationPreferenceCapacityReservationsOnly)) + } + + // Validate that we generate one override per launch template, and the override is for the instance pool associated + // with the capacity reservation. + Expect(awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Len()).ToNot(Equal(0)) + createFleetInput := awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Pop() + Expect(createFleetInput.LaunchTemplateConfigs).To(HaveLen(3)) + for _, ltc := range createFleetInput.LaunchTemplateConfigs { + Expect(ltc.Overrides).To(HaveLen(1)) + Expect(launchTemplates).To(HaveKey(*ltc.LaunchTemplateSpecification.LaunchTemplateName)) + lt := launchTemplates[*ltc.LaunchTemplateSpecification.LaunchTemplateName] + cr, ok := lo.Find(crs, func(cr ec2types.CapacityReservation) bool { + return *cr.CapacityReservationId == *lt.LaunchTemplateData.CapacityReservationSpecification.CapacityReservationTarget.CapacityReservationId + }) + Expect(ok).To(BeTrue()) + Expect(*ltc.Overrides[0].AvailabilityZone).To(Equal(*cr.AvailabilityZone)) + Expect(ltc.Overrides[0].InstanceType).To(Equal(ec2types.InstanceType(*cr.InstanceType))) + } + }) + DescribeTable( + "should set the capacity reservation specification accoriding to the capacity reservation feature flag", + func(enabled bool) { + coreoptions.FromContext(ctx).FeatureGates.ReservedCapacity = enabled + + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, pod, nodePool, nodeClass) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + + var launchTemplates []*ec2.CreateLaunchTemplateInput + for awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Len() != 0 { + launchTemplates = append(launchTemplates, awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Pop()) + } + for _, input := range launchTemplates { + crs := input.LaunchTemplateData.CapacityReservationSpecification + if !enabled { + Expect(crs).To(BeNil()) + } else { + Expect(*crs).To(Equal(ec2types.LaunchTemplateCapacityReservationSpecificationRequest{ + CapacityReservationPreference: ec2types.CapacityReservationPreferenceNone, + })) + } + } + }, + Entry("enabled", true), + Entry("disabled", false), + ) }) // ExpectTags verifies that the expected tags are a subset of the tags found diff --git a/pkg/test/utils.go b/pkg/test/utils.go index 4e4adebd5752..17b7dc075f2f 100644 --- a/pkg/test/utils.go +++ b/pkg/test/utils.go @@ -31,3 +31,18 @@ func RemoveNodeClassTagValidation(crds []*apiextensionsv1.CustomResourceDefiniti } return crds } + +// DisableCapacityReservationIDValidation updates the regex validation used for capacity reservation IDs to allow any +// string after the "cr-" prefix. This enables us to embed useful debugging information in the reservation ID, such as +// the instance type and zone. +func DisableCapacityReservationIDValidation(crds []*apiextensionsv1.CustomResourceDefinition) []*apiextensionsv1.CustomResourceDefinition { + for _, crd := range crds { + if crd.Name != "ec2nodeclasses.karpenter.k8s.aws" { + continue + } + idProps := crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["spec"].Properties["capacityReservationSelectorTerms"].Items.Schema.Properties["id"] + idProps.Pattern = `^cr-.+$` + crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["spec"].Properties["capacityReservationSelectorTerms"].Items.Schema.Properties["id"] = idProps + } + return crds +} From e3d1cddc2ba0349c03850e05d4c9d6592ba4c0eb Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Tue, 25 Feb 2025 14:10:16 -0500 Subject: [PATCH 10/16] test: cloudprovider functional tests --- pkg/cloudprovider/suite_test.go | 94 ++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index d6ec65996cb8..de1eb3bddcb3 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -80,7 +80,10 @@ func TestAWS(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(test.RemoveNodeClassTagValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...)) + env = coretest.NewEnvironment( + coretest.WithCRDs(test.DisableCapacityReservationIDValidation(test.RemoveNodeClassTagValidation(apis.CRDs))...), + coretest.WithCRDs(v1alpha1.CRDs...), + ) ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) @@ -870,6 +873,31 @@ var _ = Describe("CloudProvider", func() { Expect(err).ToNot(HaveOccurred()) Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift)) }) + It("should dynamically drift nodeclaims for capacity reservations", func() { + nodeClass.Status.CapacityReservations = []v1.CapacityReservation{ + { + AvailabilityZone: "test-zone-1a", + ID: "cr-foo", + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + InstanceType: "m5.large", + OwnerID: "012345678901", + }, + } + setReservationID := func(id string) { + out := awsEnv.EC2API.DescribeInstancesBehavior.Output.Clone() + out.Reservations[0].Instances[0].CapacityReservationId = lo.ToPtr(id) + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(out) + } + setReservationID("cr-foo") + ExpectApplied(ctx, env.Client, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).ToNot(HaveOccurred()) + Expect(isDrifted).To(Equal(corecloudprovider.DriftReason(""))) + setReservationID("cr-bar") + isDrifted, err = cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).ToNot(HaveOccurred()) + Expect(isDrifted).To(Equal(cloudprovider.CapacityReservationDrift)) + }) It("should not return drifted if the security groups match", func() { isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) Expect(err).ToNot(HaveOccurred()) @@ -1343,4 +1371,68 @@ var _ = Describe("CloudProvider", func() { Expect(lo.Keys(cloudProviderNodeClaim.Status.Allocatable)).ToNot(ContainElement(v1.ResourceEFA)) }) }) + Context("Capacity Reservations", func() { + var reservationID string + BeforeEach(func() { + reservationID = "cr-m5.large-1a-1" + cr := ec2types.CapacityReservation{ + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr(reservationID), + AvailableInstanceCount: lo.ToPtr[int32](10), + State: ec2types.CapacityReservationStateActive, + } + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount(reservationID, 10) + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: []ec2types.CapacityReservation{cr}, + }) + nodeClass.Status.CapacityReservations = []v1.CapacityReservation{ + lo.Must(nodeclass.CapacityReservationFromEC2(&cr)), + } + nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }}} + }) + It("should mark capacity reservations as launched", func() { + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, nodePool, nodeClass, pod) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(reservationID)).To(Equal(9)) + }) + It("should mark capacity reservations as terminated", func() { + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, nodePool, nodeClass, pod) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + ncs := ExpectNodeClaims(ctx, env.Client) + Expect(ncs).To(HaveLen(1)) + + // Attempt the first delete - since the instance still exists we shouldn't increment the availability count + err := cloudProvider.Delete(ctx, ncs[0]) + Expect(corecloudprovider.IsNodeClaimNotFoundError(err)).To(BeFalse()) + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(reservationID)).To(Equal(9)) + + // Attempt again after clearing the instance from the EC2 output. Now that we get a NotFound error, expect + // availability to be incremented. + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(&ec2.DescribeInstancesOutput{}) + err = cloudProvider.Delete(ctx, ncs[0]) + Expect(corecloudprovider.IsNodeClaimNotFoundError(err)).To(BeTrue()) + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(reservationID)).To(Equal(10)) + }) + It("should include capacity reservation labels", func() { + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, nodePool, nodeClass, pod) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + ncs := ExpectNodeClaims(ctx, env.Client) + Expect(ncs).To(HaveLen(1)) + Expect(ncs[0].Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(ncs[0].Labels).To(HaveKeyWithValue(corecloudprovider.ReservationIDLabel, reservationID)) + }) + }) }) From 9d6c1b00848c0e18c0a009b93686d788452801ca Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Tue, 25 Feb 2025 17:38:21 -0500 Subject: [PATCH 11/16] remaining functional + feedback --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 2 +- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 2 +- pkg/apis/v1/ec2nodeclass_status.go | 2 +- pkg/cloudprovider/suite_test.go | 8 +- pkg/controllers/controllers.go | 2 +- .../capacityreservation/controller.go | 28 +-- .../capacityreservation/suite_test.go | 169 ++++++++++++++++++ pkg/fake/ec2api.go | 13 +- .../capacityreservation/suite_test.go | 25 ++- pkg/providers/launchtemplate/suite_test.go | 2 +- pkg/test/utils.go | 8 +- 11 files changed, 226 insertions(+), 35 deletions(-) create mode 100644 pkg/controllers/nodeclaim/capacityreservation/suite_test.go diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index 0289a71a179a..32c21066e067 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -690,7 +690,7 @@ spec: type: string id: description: The id for the capacity reservation. - pattern: ^cr-.+$ + pattern: ^cr-[0-9a-z]+$ type: string instanceMatchCriteria: description: Indicates the type of instance launches the capacity reservation accepts. diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index d8680a337e0f..9dbc2c2817b6 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -687,7 +687,7 @@ spec: type: string id: description: The id for the capacity reservation. - pattern: ^cr-.+$ + pattern: ^cr-[0-9a-z]+$ type: string instanceMatchCriteria: description: Indicates the type of instance launches the capacity reservation accepts. diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index 062fcac1ed8d..2b56635c1060 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -81,7 +81,7 @@ type CapacityReservation struct { // +optional EndTime *metav1.Time `json:"endTime,omitempty" hash:"ignore"` // The id for the capacity reservation. - // +kubebuilder:validation:Pattern:="^cr-.+$" + // +kubebuilder:validation:Pattern:="^cr-[0-9a-z]+$" // +required ID string `json:"id"` // Indicates the type of instance launches the capacity reservation accepts. diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index de1eb3bddcb3..9237b192d444 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -876,11 +876,11 @@ var _ = Describe("CloudProvider", func() { It("should dynamically drift nodeclaims for capacity reservations", func() { nodeClass.Status.CapacityReservations = []v1.CapacityReservation{ { - AvailabilityZone: "test-zone-1a", - ID: "cr-foo", + AvailabilityZone: "test-zone-1a", + ID: "cr-foo", InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), - InstanceType: "m5.large", - OwnerID: "012345678901", + InstanceType: "m5.large", + OwnerID: "012345678901", }, } setReservationID := func(id string) { diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index 34fba8215827..509ed647f48d 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -94,7 +94,7 @@ func NewControllers( ssminvalidation.NewController(ssmCache, amiProvider), status.NewController[*v1.EC2NodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter"), status.EmitDeprecatedMetrics), controllersversion.NewController(versionProvider, versionProvider.UpdateVersionWithValidation), - capacityreservation.NewController(), + capacityreservation.NewController(kubeClient, cloudProvider), } if options.FromContext(ctx).InterruptionQueue != "" { sqsapi := servicesqs.NewFromConfig(cfg) diff --git a/pkg/controllers/nodeclaim/capacityreservation/controller.go b/pkg/controllers/nodeclaim/capacityreservation/controller.go index ed1821748a77..0003f28a0e73 100644 --- a/pkg/controllers/nodeclaim/capacityreservation/controller.go +++ b/pkg/controllers/nodeclaim/capacityreservation/controller.go @@ -41,8 +41,11 @@ type Controller struct { kubeClient client.Client } -func NewController() *Controller { - return nil +func NewController(kubeClient client.Client, cp cloudprovider.CloudProvider) *Controller { + return &Controller{ + cp: cp, + kubeClient: kubeClient, + } } func (*Controller) Name() string { @@ -58,15 +61,13 @@ func (c *Controller) Register(_ context.Context, m manager.Manager) error { func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { ctx = injection.WithControllerName(ctx, c.Name()) - cpNodeClaims, err := c.cp.List(ctx) if err != nil { return reconcile.Result{}, fmt.Errorf("listing instance types, %w", err) } - cpNodeClaimIndex := lo.SliceToMap(cpNodeClaims, func(nc *karpv1.NodeClaim) (string, *karpv1.NodeClaim) { + providerIDsToCPNodeClaims := lo.SliceToMap(cpNodeClaims, func(nc *karpv1.NodeClaim) (string, *karpv1.NodeClaim) { return nc.Status.ProviderID, nc }) - ncs := &karpv1.NodeClaimList{} if err := c.kubeClient.List(ctx, ncs, client.MatchingLabels{ karpv1.NodeRegisteredLabelKey: "true", @@ -76,7 +77,7 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { updatedNodeClaims := sets.New[string]() var errs []error for i := range ncs.Items { - cpNC, ok := cpNodeClaimIndex[ncs.Items[i].Status.ProviderID] + cpNC, ok := providerIDsToCPNodeClaims[ncs.Items[i].Status.ProviderID] if !ok { continue } @@ -88,10 +89,11 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { updatedNodeClaims.Insert(ncs.Items[i].Name) } } - log.FromContext(ctx).WithValues("NodeClaims", lo.Map(updatedNodeClaims.UnsortedList(), func(name string, _ int) klog.ObjectRef { - return klog.KRef("", name) - })).V(1).Info("updated capacity type for nodeclaims") - + if len(updatedNodeClaims) != 0 { + log.FromContext(ctx).WithValues("NodeClaims", lo.Map(updatedNodeClaims.UnsortedList(), func(name string, _ int) klog.ObjectRef { + return klog.KRef("", name) + })).V(1).Info("updated capacity type for nodeclaims") + } if len(errs) != 0 { if lo.EveryBy(errs, func(err error) bool { return errors.IsConflict(err) }) { return reconcile.Result{Requeue: true}, nil @@ -118,7 +120,7 @@ func (c *Controller) syncCapacityType(ctx context.Context, capacityType string, stored := nc.DeepCopy() nc.Labels[karpv1.CapacityTypeLabelKey] = karpv1.CapacityTypeOnDemand delete(nc.Labels, cloudprovider.ReservationIDLabel) - if err := c.kubeClient.Patch(ctx, nc, client.MergeFrom(stored)); err != nil { + if err := c.kubeClient.Patch(ctx, nc, client.MergeFrom(stored)); client.IgnoreNotFound(err) != nil { return false, fmt.Errorf("patching nodeclaim %q, %w", nc.Name, err) } } @@ -144,8 +146,8 @@ func (c *Controller) syncCapacityType(ctx context.Context, capacityType string, } stored := n.DeepCopy() n.Labels[karpv1.CapacityTypeLabelKey] = karpv1.CapacityTypeOnDemand - delete(nc.Labels, cloudprovider.ReservationIDLabel) - if err := c.kubeClient.Patch(ctx, n, client.MergeFrom(stored)); err != nil { + delete(n.Labels, cloudprovider.ReservationIDLabel) + if err := c.kubeClient.Patch(ctx, n, client.MergeFrom(stored)); client.IgnoreNotFound(err) != nil { return false, fmt.Errorf("patching node %q, %w", n.Name, err) } } diff --git a/pkg/controllers/nodeclaim/capacityreservation/suite_test.go b/pkg/controllers/nodeclaim/capacityreservation/suite_test.go new file mode 100644 index 000000000000..096ec945c3d0 --- /dev/null +++ b/pkg/controllers/nodeclaim/capacityreservation/suite_test.go @@ -0,0 +1,169 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation_test + +import ( + "context" + "fmt" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/events" + coretest "sigs.k8s.io/karpenter/pkg/test" + + "github.com/aws/karpenter-provider-aws/pkg/apis" + "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" + "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclaim/capacityreservation" + "github.com/aws/karpenter-provider-aws/pkg/fake" + "github.com/aws/karpenter-provider-aws/pkg/operator/options" + "github.com/aws/karpenter-provider-aws/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + . "sigs.k8s.io/karpenter/pkg/test/expectations" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + . "sigs.k8s.io/karpenter/pkg/utils/testing" +) + +var ctx context.Context +var stop context.CancelFunc +var env *coretest.Environment +var awsEnv *test.Environment +var controller *capacityreservation.Controller + +func TestAWS(t *testing.T) { + ctx = TestContextWithLogger(t) + RegisterFailHandler(Fail) + RunSpecs(t, "SSM Invalidation Controller") +} + +var _ = BeforeSuite(func() { + env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeProviderIDFieldIndexer(ctx))) + ctx = options.ToContext(ctx, test.Options()) + ctx, stop = context.WithCancel(ctx) + awsEnv = test.NewEnvironment(ctx, env) + + cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) + controller = capacityreservation.NewController(env.Client, cloudProvider) +}) + +var _ = AfterSuite(func() { + stop() + Expect(env.Stop()).To(Succeed(), "Failed to stop environment") +}) + +var _ = Describe("Capacity Reservation NodeClaim Controller", func() { + var nodeClaim *karpv1.NodeClaim + var node *corev1.Node + var reservationID string + BeforeEach(func() { + reservationID = "cr-foo" + instance := ec2types.Instance{ + ImageId: lo.ToPtr(fake.ImageID()), + InstanceType: ec2types.InstanceType("m5.large"), + SubnetId: lo.ToPtr(fake.SubnetID()), + SpotInstanceRequestId: nil, + State: &ec2types.InstanceState{ + Name: ec2types.InstanceStateNameRunning, + }, + InstanceId: lo.ToPtr(fake.InstanceID()), + CapacityReservationId: &reservationID, + Placement: &ec2types.Placement{ + AvailabilityZone: lo.ToPtr("test-zone-1a"), + }, + SecurityGroups: []ec2types.GroupIdentifier{{GroupId: lo.ToPtr(fake.SecurityGroupID())}}, + } + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(&ec2.DescribeInstancesOutput{ + Reservations: []ec2types.Reservation{{Instances: []ec2types.Instance{instance}}}, + }) + + nodeClaim = coretest.NodeClaim(karpv1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + karpv1.CapacityTypeLabelKey: karpv1.CapacityTypeReserved, + corecloudprovider.ReservationIDLabel: reservationID, + karpv1.NodeRegisteredLabelKey: "true", + }, + }, + Status: karpv1.NodeClaimStatus{ + ProviderID: fmt.Sprintf("aws:///test-zone-1a/%s", *instance.InstanceId), + }, + }) + node = coretest.NodeClaimLinkedNode(nodeClaim) + }) + It("should demote nodeclaims and nodes from reserved to on-demand", func() { + ExpectApplied(ctx, env.Client, nodeClaim, node) + ExpectSingletonReconciled(ctx, controller) + + // Since the backing instance is still under a capacity reservation, we shouldn't demote the nodeclaim or node + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(corecloudprovider.ReservationIDLabel, reservationID)) + node = ExpectExists(ctx, env.Client, node) + Expect(node.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(node.Labels).To(HaveKeyWithValue(corecloudprovider.ReservationIDLabel, reservationID)) + + out := awsEnv.EC2API.DescribeInstancesBehavior.Output.Clone() + out.Reservations[0].Instances[0].CapacityReservationId = nil + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(out) + + // Now that the backing instance is no longer part of a capacity reservation, we should demote the resources by + // updating the capacity type to on-demand and removing the reservation ID label. + ExpectSingletonReconciled(ctx, controller) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + Expect(nodeClaim.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) + node = ExpectExists(ctx, env.Client, node) + Expect(node.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + Expect(node.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) + }) + It("should demote nodes from reserved to on-demand even if their nodeclaim was demoted previously", func() { + out := awsEnv.EC2API.DescribeInstancesBehavior.Output.Clone() + out.Reservations[0].Instances[0].CapacityReservationId = nil + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(out) + + ExpectApplied(ctx, env.Client, nodeClaim) + ExpectSingletonReconciled(ctx, controller) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + Expect(nodeClaim.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) + + ExpectApplied(ctx, env.Client, node) + ExpectSingletonReconciled(ctx, controller) + node = ExpectExists(ctx, env.Client, node) + Expect(node.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + Expect(node.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) + }) + It("should ignore nodeclaims which aren't registered", func() { + out := awsEnv.EC2API.DescribeInstancesBehavior.Output.Clone() + out.Reservations[0].Instances[0].CapacityReservationId = nil + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(out) + delete(nodeClaim.Labels, karpv1.NodeRegisteredLabelKey) + + ExpectApplied(ctx, env.Client, nodeClaim) + ExpectSingletonReconciled(ctx, controller) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(corecloudprovider.ReservationIDLabel, reservationID)) + }) +}) diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index 194a1c2954fa..a01f05c004f6 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -65,12 +65,11 @@ type EC2Behavior struct { CreateLaunchTemplateBehavior MockedFunction[ec2.CreateLaunchTemplateInput, ec2.CreateLaunchTemplateOutput] CalledWithDescribeImagesInput AtomicPtrSlice[ec2.DescribeImagesInput] Instances sync.Map - LaunchTemplates sync.Map InsufficientCapacityPools atomic.Slice[CapacityPool] NextError AtomicError - // Tracks the capacity reservations associated with launch templates, if applicable - launchTemplateCapacityReservationIndex sync.Map + LaunchTemplates sync.Map + launchTemplatesToCapacityReservations sync.Map // map[lt-name]cr-id } type EC2API struct { @@ -113,8 +112,8 @@ func (e *EC2API) Reset() { e.InsufficientCapacityPools.Reset() e.NextError.Reset() - e.launchTemplateCapacityReservationIndex.Range(func(k, _ any) bool { - e.launchTemplateCapacityReservationIndex.Delete(k) + e.launchTemplatesToCapacityReservations.Range(func(k, _ any) bool { + e.launchTemplatesToCapacityReservations.Delete(k) return true }) } @@ -162,7 +161,7 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . continue } - if crID, ok := e.launchTemplateCapacityReservationIndex.Load(*ltc.LaunchTemplateSpecification.LaunchTemplateName); ok { + if crID, ok := e.launchTemplatesToCapacityReservations.Load(*ltc.LaunchTemplateSpecification.LaunchTemplateName); ok { if cr, ok := lo.Find(e.DescribeCapacityReservationsOutput.Clone().CapacityReservations, func(cr ec2types.CapacityReservation) bool { return *cr.CapacityReservationId == crID.(string) }); !ok || *cr.AvailableInstanceCount == 0 { @@ -279,7 +278,7 @@ func (e *EC2API) CreateLaunchTemplate(ctx context.Context, input *ec2.CreateLaun launchTemplate := ec2types.LaunchTemplate{LaunchTemplateName: input.LaunchTemplateName} e.LaunchTemplates.Store(input.LaunchTemplateName, launchTemplate) if crs := input.LaunchTemplateData.CapacityReservationSpecification; crs != nil && crs.CapacityReservationPreference == ec2types.CapacityReservationPreferenceCapacityReservationsOnly { - e.launchTemplateCapacityReservationIndex.Store(*input.LaunchTemplateName, *crs.CapacityReservationTarget.CapacityReservationId) + e.launchTemplatesToCapacityReservations.Store(*input.LaunchTemplateName, *crs.CapacityReservationTarget.CapacityReservationId) } return &ec2.CreateLaunchTemplateOutput{LaunchTemplate: lo.ToPtr(launchTemplate)}, nil }) diff --git a/pkg/providers/capacityreservation/suite_test.go b/pkg/providers/capacityreservation/suite_test.go index 620b6669223c..bf2771f11a24 100644 --- a/pkg/providers/capacityreservation/suite_test.go +++ b/pkg/providers/capacityreservation/suite_test.go @@ -1,3 +1,17 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package capacityreservation_test import ( @@ -6,14 +20,15 @@ import ( "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/samber/lo" + coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" + coretest "sigs.k8s.io/karpenter/pkg/test" + "github.com/aws/karpenter-provider-aws/pkg/apis" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/test" "github.com/aws/karpenter-provider-aws/pkg/utils" - "github.com/samber/lo" - coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" - coretest "sigs.k8s.io/karpenter/pkg/test" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -58,7 +73,7 @@ var _ = Describe("Capacity Reservation Provider", func() { InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), AvailableInstanceCount: lo.ToPtr[int32](10), - Tags: utils.MergeTags(discoveryTags), + Tags: utils.MergeTags(discoveryTags), State: ec2types.CapacityReservationStateActive, }, { @@ -68,7 +83,7 @@ var _ = Describe("Capacity Reservation Provider", func() { InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, CapacityReservationId: lo.ToPtr("cr-m5.large-1a-2"), AvailableInstanceCount: lo.ToPtr[int32](15), - Tags: utils.MergeTags(discoveryTags), + Tags: utils.MergeTags(discoveryTags), State: ec2types.CapacityReservationStateActive, }, } diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 71d2c5966c69..ccfadbb368c1 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -2396,7 +2396,7 @@ essential = true } }) DescribeTable( - "should set the capacity reservation specification accoriding to the capacity reservation feature flag", + "should set the capacity reservation specification according to the capacity reservation feature flag", func(enabled bool) { coreoptions.FromContext(ctx).FeatureGates.ReservedCapacity = enabled diff --git a/pkg/test/utils.go b/pkg/test/utils.go index 17b7dc075f2f..11496fcc510e 100644 --- a/pkg/test/utils.go +++ b/pkg/test/utils.go @@ -40,9 +40,15 @@ func DisableCapacityReservationIDValidation(crds []*apiextensionsv1.CustomResour if crd.Name != "ec2nodeclasses.karpenter.k8s.aws" { continue } + // Disable validation for the selector terms idProps := crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["spec"].Properties["capacityReservationSelectorTerms"].Items.Schema.Properties["id"] - idProps.Pattern = `^cr-.+$` + idProps.Pattern = "" crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["spec"].Properties["capacityReservationSelectorTerms"].Items.Schema.Properties["id"] = idProps + + // Disable validation for the status + idProps = crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["status"].Properties["capacityReservations"].Items.Schema.Properties["id"] + idProps.Pattern = "" + crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["status"].Properties["capacityReservations"].Items.Schema.Properties["id"] = idProps } return crds } From 1f35782f80abb2d409ce244cc8f77e9e068d0d02 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Tue, 25 Feb 2025 20:41:33 -0500 Subject: [PATCH 12/16] e2e tests --- test/hack/e2e_scripts/install_karpenter.sh | 1 + test/pkg/environment/aws/expectations.go | 34 +++++++ test/suites/scheduling/suite_test.go | 103 +++++++++++++++++++++ 3 files changed, 138 insertions(+) diff --git a/test/hack/e2e_scripts/install_karpenter.sh b/test/hack/e2e_scripts/install_karpenter.sh index 935746be83cb..9684efbf6a64 100755 --- a/test/hack/e2e_scripts/install_karpenter.sh +++ b/test/hack/e2e_scripts/install_karpenter.sh @@ -18,6 +18,7 @@ helm upgrade --install karpenter "${CHART}" \ --set settings.interruptionQueue="$CLUSTER_NAME" \ --set settings.featureGates.spotToSpotConsolidation=true \ --set settings.featureGates.nodeRepair=true \ + --set settings.featureGates.reservedCapacity=true \ --set controller.resources.requests.cpu=5 \ --set controller.resources.requests.memory=3Gi \ --set controller.resources.limits.cpu=5 \ diff --git a/test/pkg/environment/aws/expectations.go b/test/pkg/environment/aws/expectations.go index 00da2aab74c6..6b35e1b5eb1e 100644 --- a/test/pkg/environment/aws/expectations.go +++ b/test/pkg/environment/aws/expectations.go @@ -15,6 +15,7 @@ limitations under the License. package aws import ( + "context" "fmt" "net" "strconv" @@ -41,6 +42,7 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" + "github.com/aws/karpenter-provider-aws/pkg/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -525,3 +527,35 @@ func ignoreAlreadyContainsRole(err error) error { } return err } + +func ExpectCapacityReservationCreated( + ctx context.Context, + ec2api *ec2.Client, + instanceType ec2types.InstanceType, + zone string, + capacity int32, + endDate *time.Time, + tags map[string]string, +) (id string, cleanup func()) { + GinkgoHelper() + out, err := ec2api.CreateCapacityReservation(ctx, &ec2.CreateCapacityReservationInput{ + InstanceCount: lo.ToPtr(capacity), + InstanceType: lo.ToPtr(string(instanceType)), + InstancePlatform: ec2types.CapacityReservationInstancePlatformLinuxUnix, + AvailabilityZone: lo.ToPtr(zone), + EndDate: endDate, + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + TagSpecifications: lo.Ternary(len(tags) != 0, []ec2types.TagSpecification{{ + ResourceType: ec2types.ResourceTypeCapacityReservation, + Tags: utils.MergeTags(tags), + }}, nil), + }) + Expect(err).ToNot(HaveOccurred()) + return lo.FromPtr(out.CapacityReservation.CapacityReservationId), func() { + GinkgoHelper() + _, err := ec2api.CancelCapacityReservation(ctx, &ec2.CancelCapacityReservationInput{ + CapacityReservationId: lo.ToPtr(*out.CapacityReservation.CapacityReservationId), + }) + Expect(err).ToNot(HaveOccurred()) + } +} diff --git a/test/suites/scheduling/suite_test.go b/test/suites/scheduling/suite_test.go index 724bbfdb3ee5..876b87ffb227 100644 --- a/test/suites/scheduling/suite_test.go +++ b/test/suites/scheduling/suite_test.go @@ -706,6 +706,109 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { } }) }) + + FContext("Capacity Reservations", func() { + var largeCapacityReservationID, xlargeCapacityReservationID string + var cleanupFuncs []func() + BeforeAll(func() { + var cleanupFunc func() + largeCapacityReservationID, cleanupFunc = environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + cleanupFuncs = append(cleanupFuncs, cleanupFunc) + xlargeCapacityReservationID, cleanupFunc = environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Xlarge, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + cleanupFuncs = append(cleanupFuncs, cleanupFunc) + + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{ + { + ID: largeCapacityReservationID, + }, + { + ID: xlargeCapacityReservationID, + }, + } + nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeOnDemand, karpv1.CapacityTypeReserved}, + }}} + }) + AfterAll(func() { + for _, f := range cleanupFuncs { + f() + } + }) + It("should schedule against a specific reservation ID", func() { + pod := test.Pod(test.PodOptions{ + NodeRequirements: []corev1.NodeSelectorRequirement{{ + Key: v1.LabelCapacityReservationID, + Operator: corev1.NodeSelectorOpIn, + Values: []string{xlargeCapacityReservationID}, + }}, + }) + env.ExpectCreated(nodePool, nodeClass, pod) + + nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] + req, ok := lo.Find(nc.Spec.Requirements, func(req karpv1.NodeSelectorRequirementWithMinValues) bool { + return req.Key == v1.LabelCapacityReservationID + }) + Expect(ok).To(BeTrue()) + Expect(req.Values).To(ConsistOf(xlargeCapacityReservationID)) + + n := env.EventuallyExpectNodeCount("==", 1)[0] + Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(n.Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, xlargeCapacityReservationID)) + }) + FIt("should fall back when compatible capacity reservations are exhausted", func() { + podLabels := map[string]string{"foo": "bar"} + pods := test.Pods(2, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabels, + }, + NodeRequirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{string(ec2types.InstanceTypeM5Large)}, + }}, + PodAntiRequirements: []corev1.PodAffinityTerm{{ + TopologyKey: corev1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: podLabels, + }, + }}, + }) + env.ExpectCreated(nodePool, nodeClass, pods[0], pods[1]) + + reservedCount := 0 + for _, nc := range env.EventuallyExpectNodeClaimCount("==", 2) { + req, ok := lo.Find(nc.Spec.Requirements, func(req karpv1.NodeSelectorRequirementWithMinValues) bool { + return req.Key == v1.LabelCapacityReservationID + }) + if ok { + reservedCount += 1 + Expect(req.Values).To(ConsistOf(xlargeCapacityReservationID)) + } + } + Expect(reservedCount).To(Equal(1)) + env.EventuallyExpectNodeCount("==", 2) + }) + It("should demote reserved instances when the reservation is canceled", func() { + }) + }) }) func ephemeralInitContainer(requirements corev1.ResourceRequirements) corev1.Container { From f6198e25fd8c04518594e1dd3d14439c9bb765af Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Tue, 25 Feb 2025 21:46:46 -0500 Subject: [PATCH 13/16] checkpoint --- .../capacityreservation/controller.go | 7 +-- test/suites/scheduling/suite_test.go | 45 ++++++++++++++++--- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/pkg/controllers/nodeclaim/capacityreservation/controller.go b/pkg/controllers/nodeclaim/capacityreservation/controller.go index 0003f28a0e73..d25245c15807 100644 --- a/pkg/controllers/nodeclaim/capacityreservation/controller.go +++ b/pkg/controllers/nodeclaim/capacityreservation/controller.go @@ -69,11 +69,12 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { return nc.Status.ProviderID, nc }) ncs := &karpv1.NodeClaimList{} - if err := c.kubeClient.List(ctx, ncs, client.MatchingLabels{ - karpv1.NodeRegisteredLabelKey: "true", - }); err != nil { + if err := c.kubeClient.List(ctx, ncs); err != nil { return reconcile.Result{}, fmt.Errorf("listing nodeclaims, %w", err) } + log.FromContext(ctx).WithValues("provider-ids", lo.Keys(providerIDsToCPNodeClaims), "nodeclaims", lo.Map(ncs.Items, func(nc karpv1.NodeClaim, _ int) string { + return nc.Name + })).Info("evaluating") updatedNodeClaims := sets.New[string]() var errs []error for i := range ncs.Items { diff --git a/test/suites/scheduling/suite_test.go b/test/suites/scheduling/suite_test.go index 876b87ffb227..01dc7a10db60 100644 --- a/test/suites/scheduling/suite_test.go +++ b/test/suites/scheduling/suite_test.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" + "sigs.k8s.io/controller-runtime/pkg/client" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/test" @@ -707,7 +708,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }) }) - FContext("Capacity Reservations", func() { + Context("Capacity Reservations", func() { var largeCapacityReservationID, xlargeCapacityReservationID string var cleanupFuncs []func() BeforeAll(func() { @@ -742,9 +743,9 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }, } nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ - Key: karpv1.CapacityTypeLabelKey, + Key: karpv1.CapacityTypeLabelKey, Operator: corev1.NodeSelectorOpIn, - Values: []string{karpv1.CapacityTypeOnDemand, karpv1.CapacityTypeReserved}, + Values: []string{karpv1.CapacityTypeOnDemand, karpv1.CapacityTypeReserved}, }}} }) AfterAll(func() { @@ -773,7 +774,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) Expect(n.Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, xlargeCapacityReservationID)) }) - FIt("should fall back when compatible capacity reservations are exhausted", func() { + It("should fall back when compatible capacity reservations are exhausted", func() { podLabels := map[string]string{"foo": "bar"} pods := test.Pods(2, test.PodOptions{ ObjectMeta: metav1.ObjectMeta{ @@ -800,13 +801,47 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }) if ok { reservedCount += 1 - Expect(req.Values).To(ConsistOf(xlargeCapacityReservationID)) + Expect(req.Values).To(ConsistOf(largeCapacityReservationID)) } } Expect(reservedCount).To(Equal(1)) env.EventuallyExpectNodeCount("==", 2) }) It("should demote reserved instances when the reservation is canceled", func() { + id, cleanup := environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: id}} + pod := test.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + + nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] + req, ok := lo.Find(nc.Spec.Requirements, func(req karpv1.NodeSelectorRequirementWithMinValues) bool { + return req.Key == v1.LabelCapacityReservationID + }) + Expect(ok).To(BeTrue()) + Expect(req.Values).To(ConsistOf(id)) + n := env.EventuallyExpectNodeCount("==", 1)[0] + + cleanup() + + Eventually(func(g Gomega) { + updatedNodeClaim := &karpv1.NodeClaim{} + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nc), updatedNodeClaim)).To(BeNil()) + g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) + + updatedNode := &corev1.Node{} + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(n), updatedNode)).To(BeNil()) + g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) + }).Should(Succeed()) }) }) }) From 18d6949912e7212a2c68f340d3f03f81ac73ad86 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 26 Feb 2025 05:01:36 -0500 Subject: [PATCH 14/16] remaining e2es + feedback --- .../capacityreservation/controller.go | 3 - .../capacityreservation/suite_test.go | 12 -- .../instancetype/offering/provider.go | 3 + test/pkg/environment/aws/expectations.go | 12 +- test/suites/consolidation/suite_test.go | 199 +++++++++++++++--- test/suites/drift/suite_test.go | 110 +++++++++- test/suites/scheduling/suite_test.go | 45 ++-- .../cloudformation.yaml | 2 + 8 files changed, 321 insertions(+), 65 deletions(-) diff --git a/pkg/controllers/nodeclaim/capacityreservation/controller.go b/pkg/controllers/nodeclaim/capacityreservation/controller.go index d25245c15807..ada90ef2c46c 100644 --- a/pkg/controllers/nodeclaim/capacityreservation/controller.go +++ b/pkg/controllers/nodeclaim/capacityreservation/controller.go @@ -72,9 +72,6 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { if err := c.kubeClient.List(ctx, ncs); err != nil { return reconcile.Result{}, fmt.Errorf("listing nodeclaims, %w", err) } - log.FromContext(ctx).WithValues("provider-ids", lo.Keys(providerIDsToCPNodeClaims), "nodeclaims", lo.Map(ncs.Items, func(nc karpv1.NodeClaim, _ int) string { - return nc.Name - })).Info("evaluating") updatedNodeClaims := sets.New[string]() var errs []error for i := range ncs.Items { diff --git a/pkg/controllers/nodeclaim/capacityreservation/suite_test.go b/pkg/controllers/nodeclaim/capacityreservation/suite_test.go index 096ec945c3d0..49d8ef9fb98d 100644 --- a/pkg/controllers/nodeclaim/capacityreservation/suite_test.go +++ b/pkg/controllers/nodeclaim/capacityreservation/suite_test.go @@ -154,16 +154,4 @@ var _ = Describe("Capacity Reservation NodeClaim Controller", func() { Expect(node.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) Expect(node.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) }) - It("should ignore nodeclaims which aren't registered", func() { - out := awsEnv.EC2API.DescribeInstancesBehavior.Output.Clone() - out.Reservations[0].Instances[0].CapacityReservationId = nil - awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(out) - delete(nodeClaim.Labels, karpv1.NodeRegisteredLabelKey) - - ExpectApplied(ctx, env.Client, nodeClaim) - ExpectSingletonReconciled(ctx, controller) - nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) - Expect(nodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) - Expect(nodeClaim.Labels).To(HaveKeyWithValue(corecloudprovider.ReservationIDLabel, reservationID)) - }) }) diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go index 3bbcfc6b47ad..375a5f2c5edf 100644 --- a/pkg/providers/instancetype/offering/provider.go +++ b/pkg/providers/instancetype/offering/provider.go @@ -106,6 +106,9 @@ func (p *DefaultProvider) InjectOfferings( }) } + // NOTE: By making this copy one level deep, we can modify the offerings without mutating the results from previous + // GetInstanceTypes calls. This should still be done with caution - it is currently done here in the provider, and + // once in the instance provider (filterReservedInstanceTypes) its = append(its, &cloudprovider.InstanceType{ Name: it.Name, Requirements: it.Requirements, diff --git a/test/pkg/environment/aws/expectations.go b/test/pkg/environment/aws/expectations.go index 6b35e1b5eb1e..76bd9b2dce0a 100644 --- a/test/pkg/environment/aws/expectations.go +++ b/test/pkg/environment/aws/expectations.go @@ -536,7 +536,7 @@ func ExpectCapacityReservationCreated( capacity int32, endDate *time.Time, tags map[string]string, -) (id string, cleanup func()) { +) string { GinkgoHelper() out, err := ec2api.CreateCapacityReservation(ctx, &ec2.CreateCapacityReservationInput{ InstanceCount: lo.ToPtr(capacity), @@ -551,10 +551,14 @@ func ExpectCapacityReservationCreated( }}, nil), }) Expect(err).ToNot(HaveOccurred()) - return lo.FromPtr(out.CapacityReservation.CapacityReservationId), func() { - GinkgoHelper() + return *out.CapacityReservation.CapacityReservationId +} + +func ExpectCapacityReservationsCanceled(ctx context.Context, ec2api *ec2.Client, reservationIDs ...string) { + GinkgoHelper() + for _, id := range reservationIDs { _, err := ec2api.CancelCapacityReservation(ctx, &ec2.CancelCapacityReservationInput{ - CapacityReservationId: lo.ToPtr(*out.CapacityReservation.CapacityReservationId), + CapacityReservationId: &id, }) Expect(err).ToNot(HaveOccurred()) } diff --git a/test/suites/consolidation/suite_test.go b/test/suites/consolidation/suite_test.go index 19b990397a7f..cba85ba2c181 100644 --- a/test/suites/consolidation/suite_test.go +++ b/test/suites/consolidation/suite_test.go @@ -21,6 +21,7 @@ import ( "time" "github.com/aws/aws-sdk-go-v2/aws" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/awslabs/operatorpkg/object" "github.com/samber/lo" appsv1 "k8s.io/api/apps/v1" @@ -32,7 +33,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" - "sigs.k8s.io/karpenter/pkg/test" + coretest "sigs.k8s.io/karpenter/pkg/test" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" @@ -66,7 +67,7 @@ var _ = BeforeEach(func() { var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) -var _ = Describe("Consolidation", func() { +var _ = Describe("Consolidation", Ordered, func() { Context("LastPodEventTime", func() { var nodePool *karpv1.NodePool BeforeEach(func() { @@ -76,9 +77,9 @@ var _ = Describe("Consolidation", func() { }) It("should update lastPodEventTime when pods are scheduled and removed", func() { var numPods int32 = 5 - dep := test.Deployment(test.DeploymentOptions{ + dep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "regular-app"}, }, @@ -129,7 +130,7 @@ var _ = Describe("Consolidation", func() { }) It("should update lastPodEventTime when pods go terminal", func() { podLabels := map[string]string{"app": "regular-app"} - pod := test.Pod(test.PodOptions{ + pod := coretest.Pod(coretest.PodOptions{ // use a non-pause image so that we can have a sleep Image: "alpine:3.20.2", Command: []string{"/bin/sh", "-c", "sleep 30"}, @@ -143,7 +144,7 @@ var _ = Describe("Consolidation", func() { }) job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: test.RandomName(), + Name: coretest.RandomName(), Namespace: "default", }, Spec: batchv1.JobSpec{ @@ -190,9 +191,9 @@ var _ = Describe("Consolidation", func() { nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") numPods = 5 - dep = test.Deployment(test.DeploymentOptions{ + dep = coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "regular-app"}, }, @@ -245,7 +246,7 @@ var _ = Describe("Consolidation", func() { // This test will hold consolidation until we are ready to execute it nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") - nodePool = test.ReplaceRequirements(nodePool, + nodePool = coretest.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceSize, Operator: corev1.NodeSelectorOpIn, @@ -258,9 +259,9 @@ var _ = Describe("Consolidation", func() { Nodes: "50%", }} numPods = 9 - dep = test.Deployment(test.DeploymentOptions{ + dep = coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "large-app"}, }, @@ -306,7 +307,7 @@ var _ = Describe("Consolidation", func() { // This test will hold consolidation until we are ready to execute it nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") - nodePool = test.ReplaceRequirements(nodePool, + nodePool = coretest.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{ Key: v1.LabelInstanceSize, @@ -328,9 +329,9 @@ var _ = Describe("Consolidation", func() { Nodes: "3", }} - ds := test.DaemonSet(test.DaemonSetOptions{ + ds := coretest.DaemonSet(coretest.DaemonSetOptions{ Selector: appLabels, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: appLabels, }, @@ -352,9 +353,9 @@ var _ = Describe("Consolidation", func() { numPods = 5 deployments := make([]*appsv1.Deployment, numPods) for i := range lo.Range(int(numPods)) { - deployments[i] = test.Deployment(test.DeploymentOptions{ + deployments[i] = coretest.Deployment(coretest.DeploymentOptions{ Replicas: 1, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: appLabels, }, @@ -485,7 +486,7 @@ var _ = Describe("Consolidation", func() { }) DescribeTable("should consolidate nodes (delete)", Label(debug.NoWatch), Label(debug.NoEvents), func(spotToSpot bool) { - nodePool := test.NodePool(karpv1.NodePool{ + nodePool := coretest.NodePool(karpv1.NodePool{ Spec: karpv1.NodePoolSpec{ Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, @@ -531,9 +532,9 @@ var _ = Describe("Consolidation", func() { }) var numPods int32 = 100 - dep := test.Deployment(test.DeploymentOptions{ + dep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "large-app"}, }, @@ -568,7 +569,7 @@ var _ = Describe("Consolidation", func() { ) DescribeTable("should consolidate nodes (replace)", func(spotToSpot bool) { - nodePool := test.NodePool(karpv1.NodePool{ + nodePool := coretest.NodePool(karpv1.NodePool{ Spec: karpv1.NodePoolSpec{ Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, @@ -621,9 +622,9 @@ var _ = Describe("Consolidation", func() { }) var numPods int32 = 3 - largeDep := test.Deployment(test.DeploymentOptions{ + largeDep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "large-app"}, }, @@ -644,9 +645,9 @@ var _ = Describe("Consolidation", func() { }, }, }) - smallDep := test.Deployment(test.DeploymentOptions{ + smallDep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "small-app"}, }, @@ -723,7 +724,7 @@ var _ = Describe("Consolidation", func() { Entry("if the nodes are spot nodes", true), ) It("should consolidate on-demand nodes to spot (replace)", func() { - nodePool := test.NodePool(karpv1.NodePool{ + nodePool := coretest.NodePool(karpv1.NodePool{ Spec: karpv1.NodePoolSpec{ Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, @@ -768,9 +769,9 @@ var _ = Describe("Consolidation", func() { }) var numPods int32 = 2 - smallDep := test.Deployment(test.DeploymentOptions{ + smallDep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "small-app"}, }, @@ -809,7 +810,7 @@ var _ = Describe("Consolidation", func() { // Expect the node to consolidate to a spot instance as it will be a cheaper // instance than on-demand nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") - test.ReplaceRequirements(nodePool, + coretest.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{ Key: karpv1.CapacityTypeLabelKey, @@ -853,4 +854,146 @@ var _ = Describe("Consolidation", func() { env.ExpectDeleted(smallDep) }) + Context("Capacity Reservations", func() { + var largeCapacityReservationID, xlargeCapacityReservationID string + var nodePool *karpv1.NodePool + BeforeAll(func() { + largeCapacityReservationID = environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + xlargeCapacityReservationID = environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Xlarge, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + }) + AfterAll(func() { + environmentaws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, largeCapacityReservationID, xlargeCapacityReservationID) + }) + BeforeEach(func() { + nodePool = coretest.NodePool(karpv1.NodePool{ + Spec: karpv1.NodePoolSpec{ + Disruption: karpv1.Disruption{ + ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, + ConsolidateAfter: karpv1.MustParseNillableDuration("0s"), + }, + Template: karpv1.NodeClaimTemplate{ + Spec: karpv1.NodeClaimTemplateSpec{ + Requirements: []karpv1.NodeSelectorRequirementWithMinValues{ + { + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeOnDemand, karpv1.CapacityTypeReserved}, + }, + }, + }, + NodeClassRef: &karpv1.NodeClassReference{ + Group: object.GVK(nodeClass).Group, + Kind: object.GVK(nodeClass).Kind, + Name: nodeClass.Name, + }, + }, + }, + }, + }) + }) + It("should consolidate into a reserved offering", func() { + dep := coretest.Deployment(coretest.DeploymentOptions{ + PodOptions: coretest.PodOptions{ + NodeRequirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{ + // Should result in an m5.large initially + string(ec2types.InstanceTypeM5Large), + // Should consolidate to the m5.xlarge when we add the reservation to the nodeclass + string(ec2types.InstanceTypeM5Xlarge), + }, + }}, + }, + Replicas: 1, + }) + env.ExpectCreated(nodePool, nodeClass, dep) + env.EventuallyExpectNodeClaimsReady(env.EventuallyExpectNodeClaimCount("==", 1)...) + n := env.EventuallyExpectNodeCount("==", int(1))[0] + Expect(n.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Large))) + Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: xlargeCapacityReservationID}} + env.ExpectUpdated(nodeClass) + + // Eventually expect the m5.large on-demand node to be replaced with an m5.xlarge reserved node. We should prioritize + // the reserved instance since it's already been paid for. + Eventually(func(g Gomega) { + var nodes corev1.NodeList + g.Expect(env.Client.List(env.Context, &nodes)).To(Succeed()) + filtered := lo.Filter(nodes.Items, func(n corev1.Node, _ int) bool { + if val, ok := n.Labels[karpv1.NodePoolLabelKey]; !ok || val != nodePool.Name { + return false + } + return true + }) + g.Expect(filtered).To(HaveLen(1)) + + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Xlarge))) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, xlargeCapacityReservationID)) + }, time.Minute*10).Should(Succeed()) + }) + It("should consolidate between reserved offerings", func() { + dep := coretest.Deployment(coretest.DeploymentOptions{ + PodOptions: coretest.PodOptions{ + NodeRequirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{ + string(ec2types.InstanceTypeM5Large), + string(ec2types.InstanceTypeM5Xlarge), + }, + }}, + }, + Replicas: 1, + }) + + // Start by only enabling the m5.xlarge capacity reservation, ensuring it's provisioned + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: xlargeCapacityReservationID}} + env.ExpectCreated(nodePool, nodeClass, dep) + env.EventuallyExpectNodeClaimsReady(env.EventuallyExpectNodeClaimCount("==", 1)...) + n := env.EventuallyExpectNodeCount("==", int(1))[0] + Expect(n.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Xlarge))) + Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(n.Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, xlargeCapacityReservationID)) + + // Add the m5.large capacity reservation to the nodeclass. We should consolidate from the xlarge instance to the large. + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + ID: largeCapacityReservationID, + }) + env.ExpectUpdated(nodeClass) + Eventually(func(g Gomega) { + var nodes corev1.NodeList + g.Expect(env.Client.List(env.Context, &nodes)).To(Succeed()) + filtered := lo.Filter(nodes.Items, func(n corev1.Node, _ int) bool { + if val, ok := n.Labels[karpv1.NodePoolLabelKey]; !ok || val != nodePool.Name { + return false + } + return true + }) + g.Expect(filtered).To(HaveLen(1)) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Large))) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, largeCapacityReservationID)) + }, time.Minute*10).Should(Succeed()) + }) + }) }) diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 7e04968090dd..58f3a77cb294 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -72,7 +72,7 @@ var _ = BeforeEach(func() { var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) -var _ = Describe("Drift", func() { +var _ = Describe("Drift", Ordered, func() { var dep *appsv1.Deployment var selector labels.Selector var numPods int @@ -955,4 +955,112 @@ var _ = Describe("Drift", func() { env.ConsistentlyExpectNoDisruptions(int(numPods), time.Minute) }) }) + Context("Capacity Reservations", func() { + var largeCapacityReservationID, xlargeCapacityReservationID string + BeforeAll(func() { + largeCapacityReservationID = aws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + xlargeCapacityReservationID = aws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Xlarge, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + }) + AfterAll(func() { + aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, largeCapacityReservationID, xlargeCapacityReservationID) + }) + BeforeEach(func() { + nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }, + }} + }) + It("should drift nodeclaim when the reservation is no longer selected by the nodeclass", func() { + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: largeCapacityReservationID}} + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] + env.EventuallyExpectNodeClaimsReady(nc) + n := env.EventuallyExpectCreatedNodeCount("==", 1)[0] + Expect(n.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Large))) + Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(n.Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, largeCapacityReservationID)) + + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: xlargeCapacityReservationID}} + env.ExpectUpdated(nodeClass) + env.EventuallyExpectDrifted(nc) + }) + It("should drift nodeclaim when the nodeclaim is demoted to on-demand", func() { + var canceled bool + capacityReservationID := aws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + DeferCleanup(func() { + if !canceled { + aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) + } + }) + + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: capacityReservationID}} + // Prevent drift from being executed by marking the pod as do-not-disrupt. Without this, the nodeclaim may be replaced + // in-between polling intervals for the eventually block. + pod := coretest.Pod(coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + karpv1.DoNotDisruptAnnotationKey: "true", + }, + }, + }) + env.ExpectCreated(nodePool, nodeClass, pod) + + nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] + req, ok := lo.Find(nc.Spec.Requirements, func(req karpv1.NodeSelectorRequirementWithMinValues) bool { + return req.Key == v1.LabelCapacityReservationID + }) + Expect(ok).To(BeTrue()) + Expect(req.Values).To(ConsistOf(capacityReservationID)) + n := env.EventuallyExpectNodeCount("==", 1)[0] + + aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) + canceled = true + + // The NodeClaim capacity reservation controller runs once every minute, we'll give a little extra time to avoid + // a failure from a small delay, but the capacity type label should be updated and the reservation-id label should + // be removed within a minute of the reservation being canceled. + Eventually(func(g Gomega) { + updatedNodeClaim := &karpv1.NodeClaim{} + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nc), updatedNodeClaim)).To(BeNil()) + g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) + + updatedNode := &corev1.Node{} + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(n), updatedNode)).To(BeNil()) + g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) + }).WithTimeout(75 * time.Second).Should(Succeed()) + + // Since the nodeclaim is only compatible with reserved instances, we should drift the node when it's demoted to on-demand + env.EventuallyExpectDrifted(nc) + }) + }) }) diff --git a/test/suites/scheduling/suite_test.go b/test/suites/scheduling/suite_test.go index 01dc7a10db60..c10b316caa32 100644 --- a/test/suites/scheduling/suite_test.go +++ b/test/suites/scheduling/suite_test.go @@ -710,10 +710,8 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { Context("Capacity Reservations", func() { var largeCapacityReservationID, xlargeCapacityReservationID string - var cleanupFuncs []func() BeforeAll(func() { - var cleanupFunc func() - largeCapacityReservationID, cleanupFunc = environmentaws.ExpectCapacityReservationCreated( + largeCapacityReservationID = environmentaws.ExpectCapacityReservationCreated( env.Context, env.EC2API, ec2types.InstanceTypeM5Large, @@ -722,8 +720,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { nil, nil, ) - cleanupFuncs = append(cleanupFuncs, cleanupFunc) - xlargeCapacityReservationID, cleanupFunc = environmentaws.ExpectCapacityReservationCreated( + xlargeCapacityReservationID = environmentaws.ExpectCapacityReservationCreated( env.Context, env.EC2API, ec2types.InstanceTypeM5Xlarge, @@ -732,8 +729,11 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { nil, nil, ) - cleanupFuncs = append(cleanupFuncs, cleanupFunc) - + }) + AfterAll(func() { + environmentaws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, largeCapacityReservationID, xlargeCapacityReservationID) + }) + BeforeEach(func() { nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{ { ID: largeCapacityReservationID, @@ -748,11 +748,6 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { Values: []string{karpv1.CapacityTypeOnDemand, karpv1.CapacityTypeReserved}, }}} }) - AfterAll(func() { - for _, f := range cleanupFuncs { - f() - } - }) It("should schedule against a specific reservation ID", func() { pod := test.Pod(test.PodOptions{ NodeRequirements: []corev1.NodeSelectorRequirement{{ @@ -770,11 +765,16 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { Expect(ok).To(BeTrue()) Expect(req.Values).To(ConsistOf(xlargeCapacityReservationID)) + env.EventuallyExpectNodeClaimsReady(nc) n := env.EventuallyExpectNodeCount("==", 1)[0] Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) Expect(n.Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, xlargeCapacityReservationID)) }) It("should fall back when compatible capacity reservations are exhausted", func() { + // We create two pods with self anti-affinity and a node selector on a specific instance type. The anti-affinity term + // ensures that we must provision 2 nodes, and the node selector selects upon an instance type with a single reserved + // instance available. As such, we should create a reserved NodeClaim for one pod, and an on-demand NodeClaim for the + // other. podLabels := map[string]string{"foo": "bar"} pods := test.Pods(2, test.PodOptions{ ObjectMeta: metav1.ObjectMeta{ @@ -808,7 +808,8 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.EventuallyExpectNodeCount("==", 2) }) It("should demote reserved instances when the reservation is canceled", func() { - id, cleanup := environmentaws.ExpectCapacityReservationCreated( + var canceled bool + capacityReservationID := environmentaws.ExpectCapacityReservationCreated( env.Context, env.EC2API, ec2types.InstanceTypeM5Large, @@ -817,7 +818,13 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { nil, nil, ) - nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: id}} + DeferCleanup(func() { + if !canceled { + environmentaws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) + } + }) + + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: capacityReservationID}} pod := test.Pod() env.ExpectCreated(nodePool, nodeClass, pod) @@ -826,11 +833,15 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { return req.Key == v1.LabelCapacityReservationID }) Expect(ok).To(BeTrue()) - Expect(req.Values).To(ConsistOf(id)) + Expect(req.Values).To(ConsistOf(capacityReservationID)) n := env.EventuallyExpectNodeCount("==", 1)[0] - cleanup() + environmentaws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) + canceled = true + // The NodeClaim capacity reservation controller runs once every minute, we'll give a little extra time to avoid + // a failure from a small delay, but the capacity type label should be updated and the reservation-id label should + // be removed within a minute of the reservation being canceled. Eventually(func(g Gomega) { updatedNodeClaim := &karpv1.NodeClaim{} g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nc), updatedNodeClaim)).To(BeNil()) @@ -841,7 +852,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(n), updatedNode)).To(BeNil()) g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) - }).Should(Succeed()) + }).WithTimeout(75 * time.Second).Should(Succeed()) }) }) }) diff --git a/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml b/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml index aa3030334dd6..c29eec9378c8 100644 --- a/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml +++ b/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml @@ -75,6 +75,7 @@ Resources: "arn:${AWS::Partition}:ec2:${AWS::Region}:*:network-interface/*", "arn:${AWS::Partition}:ec2:${AWS::Region}:*:launch-template/*", "arn:${AWS::Partition}:ec2:${AWS::Region}:*:spot-instances-request/*" + "arn:${AWS::Partition}:ec2:${AWS::Region}:*:capacity-reservation/*" ], "Action": [ "ec2:RunInstances", @@ -167,6 +168,7 @@ Resources: "Effect": "Allow", "Resource": "*", "Action": [ + "ec2:DescribeCapacityReservations", "ec2:DescribeImages", "ec2:DescribeInstances", "ec2:DescribeInstanceTypeOfferings", From 61aa5f5f4bffce7908f6bc6f54ff4bb26cd4a633 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 26 Feb 2025 05:49:37 -0500 Subject: [PATCH 15/16] not found error fix --- designs/odcr.md | 34 ++++++++----------- pkg/errors/errors.go | 1 + pkg/providers/capacityreservation/provider.go | 6 ++++ pkg/providers/capacityreservation/types.go | 12 +++---- .../cloudformation.yaml | 2 +- 5 files changed, 28 insertions(+), 27 deletions(-) diff --git a/designs/odcr.md b/designs/odcr.md index b4a3f72e49db..c06fe5963334 100644 --- a/designs/odcr.md +++ b/designs/odcr.md @@ -58,14 +58,14 @@ AWS also supports grouping Capacity Reservation into [Capacity Reservation group ## Goals -1. Allow selection of targeted and open ODCRs with Karpenter -2. Ensure multiple ODCRs can be selected from a single NodePool +1. Allow selection of targeted and open ODCRs with Karpenter +2. Ensure multiple ODCRs can be selected from a single NodePool 3. Ensure that we only launch capacity into an ODCR in a cluster when an application requires the capacity, ensuring ODCR sharing between clusters and accounts -4. Ensure ODCRs are prioritized over regular OD and spot capacity -5. Ensure Karpenter consolidates regular OD and spot instances to ODCR capacity when it is available +4. Ensure ODCRs are prioritized over regular OD and spot capacity +5. Ensure Karpenter consolidates regular OD and spot instances to ODCR capacity when it is available 6. Ensure Karpenter consolidates between ODCRs when a smaller/cheaper ODCR is available -7. Allow users to constrain a NodePool to only launch into ODCR capacity without fallback -8. Allow users to fallback from ODCR to spot capacity and from ODCR to standard OD capacity +7. Allow users to constrain a NodePool to only launch into ODCR capacity without fallback +8. Allow users to fallback from ODCR to spot capacity and from ODCR to standard OD capacity 9. Ensure OD capacity is not automatically drifted to new capacity when a capacity reservation expires or is canceled to reduce workload disruption ## Non-Goals @@ -96,11 +96,9 @@ spec: # All other fields are not mutually exclusive and can be combined capacityReservationSelectorTerms: - # The id for the Capacity Reservation - # Specifying '*' for this field selects all ids id: String | None # The id of the AWS account that owns the Capacity Reservation - # If no ownerID is specified, only ODCRs owned by the current account will be used - # Specifying '*' for this field selects all ownerIDs + # If no ownerID is specified, any ODCRs available to the current account will be used ownerID: String | None # Tags is a map of key/value tags used to select capacity reservations # Specifying '*' for a value selects all values for a given tag key. @@ -109,8 +107,6 @@ status: capacityReservations: - # AvailabilityZone for the Capacity Reservation availabilityZone: String - # Available Instance Count for the Capacity Reservation - availableInstanceCount: Integer # The time at which the Capacity Reservation expires. When a Capacity # Reservation expires, the reserved capacity is released and you can no longer # launch instances into it. The Capacity Reservation's state changes to expired @@ -136,15 +132,13 @@ status: instanceType: String # The id of the AWS account that owns the Capacity Reservation ownerID: String - # Total Instance Count for the Capacity Reservation - totalInstanceCount: Integer ``` This API follows closely with how [DescribeCapacityReservations](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeCapacityReservations.html) can filter capacity reservations -- allowing Karpenter to receive the server-side filtered version of the capacity reservations to store in its status. ### NodePool API -The EC2NodeClass API allows selection on capacity reservations, which give additional options to the scheduler to choose from when launching instance types; however, it does not offer a mechanism to scope-down whether instances in a NodePool should only launch into an ODCR, fallback between a capacity reservation to on-demand if none is available, or fallback between a capacity reservation to spot and then finally to on-demand. +The EC2NodeClass API allows selection on capacity reservations, which give additional options to the scheduler to choose from when launching instance types; however, it does not offer a mechanism to scope-down whether instances in a NodePool should only launch into an ODCR, fallback between a capacity reservation to on-demand if none is available, or fallback between a capacity reservation to spot and then finally to on-demand. This RFC proposes the addition of a new `karpenter.sh/capacity-type` label value, called `reserved`. A cluster admin could then select to support only launching ODCR capacity and falling back between ODCR capacity to on-demand capacity respectively. _NOTE: This option requires any applications (pods) that are using node selection on `karpenter.sh/capacity-type: "on-demand"` to expand their selection to include `reserved` or to update it to perform a `NotIn` node affinity on `karpenter.sh/capacity-type: spot`_ @@ -206,7 +200,7 @@ To solve for this problem, Karpenter will implement special handling for `karpen We can surface ODCR capacity as additional offerings attached to each instance type. Offerings currently allow us to track the pricing of variants of a specific instance type, primarily based on capacity type and availability zone today. -To track reservation capacity, we can add additional offerings to an instance type when there is a capacity reservation that is matched on by an EC2NodeClass's `capacityReservationSelectorTerms`. This offering will have a price near 0 to model the fact that the reservation is already paid-for and to ensure the offering is prioritized ahead of other offerings. +To track reservation capacity, we can add additional offerings to an instance type when there is a capacity reservation that is matched on by an EC2NodeClass's `capacityReservationSelectorTerms`. This offering will have a price near 0 to model the fact that the reservation is already paid-for and to ensure the offering is prioritized ahead of other offerings. When there are multiple capacity reservation offerings for an instance type for different AZs, we will produce separate offerings for these different zones. When there are multiple capacity reservation offerings for instance type in the same AZ, we will only produce a single offering. With this change, an example instance type offerings set will look like the following @@ -253,9 +247,9 @@ offerings: ### Representing ODCR Available Instance Counts in Instance Type Offerings -ODCRs (unlike spot and on-demand capacity) have much more defined, constrained capacity ceilings. For instance, in an extreme example, a user may select on a capacity reservation with only a single available instance but launch 10,000 pods that contain hostname anti-affinity. The scheduler would do work to determine that it needs to launch 10,000 instances for these pods; however, without any kind of cap on the number of times the capacity reservation offering could be used, the scheduler would think that it could launch 10,000 instances into the capacity reservation offering. +ODCRs (unlike spot and on-demand capacity) have much more defined, constrained capacity ceilings. For instance, in an extreme example, a user may select on a capacity reservation with only a single available instance but launch 10,000 pods that contain hostname anti-affinity. The scheduler would do work to determine that it needs to launch 10,000 instances for these pods; however, without any kind of cap on the number of times the capacity reservation offering could be used, the scheduler would think that it could launch 10,000 instances into the capacity reservation offering. -Attempting to launch this would result in a success for a single instance and an ICE error for the other 9,999. The next scheduling loop would remediate this, but this results in a lot of extra, unneeded work. +Attempting to launch this would result in a success for a single instance and an ICE error for the other 9,999. The next scheduling loop would remediate this, but this results in a lot of extra, unneeded work. A better way to model this would be to track the available instance count as a numerical value associated with an instance type offering. In this modeling, the scheduler could count the number of simulated NodeClaims that might use the offering and know that it can't simulate NodeClaims into particular offerings once they hit their cap. @@ -306,7 +300,7 @@ offerings: ## CloudProvider Launch Behavior -When a NodeClaim is passed to the CloudProvider `Create()` call that selects the `reserved` capacity type, the AWS Cloud Provider will prioritize launching into the `reserved` capacity type before attempting other capacity types. +When a NodeClaim is passed to the CloudProvider `Create()` call that selects the `reserved` capacity type, the AWS Cloud Provider will prioritize launching into the `reserved` capacity type before attempting other capacity types. Practically, this means that when a NodeClaim allows for the `reserved` capacity type, Karpenter will know that this NodeClaim is requesting to launch into an ODCR and leverage available ODCR offerings from this NodePool that match the instance type and availability zone requirements passed through the NodeClaim. @@ -360,7 +354,7 @@ reservation first then fall back into other instances. Because of this reserved If we track Capacity Reservation usage, we can optimize the cluster configuration by moving non-Capacity Reserved instances into Capacity Reserved instances. We would need to match the instance type, platform and availability zone prior to doing this. -This would be done by the standard consolidation algorithm and should work with minimal changes, since consolidation already optimizes for cost. +This would be done by the standard consolidation algorithm and should work with minimal changes, since consolidation already optimizes for cost. #### Consolidating between Capacity Reservations @@ -405,4 +399,4 @@ In this case, there is no existing mechanism in Karpenter that would catch this. CreateFleet will reject the call outright since you are not allowed to specify duplicate instance type/availability zone combinations, even if the launch templates contain different data -- such as different capacity reservation ids. -> For more information on CreateFleet's handling when specifying different `usageStrategy` and `capacityReservationPreference` values, see https://docs.aws.amazon.com/emr/latest/ManagementGuide/on-demand-capacity-reservations.html. \ No newline at end of file +> For more information on CreateFleet's handling when specifying different `usageStrategy` and `capacityReservationPreference` values, see https://docs.aws.amazon.com/emr/latest/ManagementGuide/on-demand-capacity-reservations.html. diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index b9984b34a20e..9b012888aed4 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -32,6 +32,7 @@ const ( var ( // This is not an exhaustive list, add to it as needed notFoundErrorCodes = sets.New[string]( + "InvalidCapacityReservationId.NotFound", "InvalidInstanceID.NotFound", launchTemplateNameNotFoundCode, "InvalidLaunchTemplateId.NotFound", diff --git a/pkg/providers/capacityreservation/provider.go b/pkg/providers/capacityreservation/provider.go index 8a7332c40c6a..f0ec5b1effc9 100644 --- a/pkg/providers/capacityreservation/provider.go +++ b/pkg/providers/capacityreservation/provider.go @@ -28,6 +28,7 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" sdk "github.com/aws/karpenter-provider-aws/pkg/aws" + awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" ) type Provider interface { @@ -82,6 +83,11 @@ func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.Capacity for paginator.HasMorePages() { out, err := paginator.NextPage(ctx) if err != nil { + if awserrors.IsNotFound(err) { + // Note: we only receive this error when requesting a single ID, in which case we will only ever get a single page. + // Replacing this with a continue will result in an infinite loop as HasMorePages will always return true. + break + } return nil, fmt.Errorf("listing capacity reservations, %w", err) } queryReservations = append(queryReservations, lo.ToSlicePtr(out.CapacityReservations)...) diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go index c1eb8d5b156c..5f72882fb383 100644 --- a/pkg/providers/capacityreservation/types.go +++ b/pkg/providers/capacityreservation/types.go @@ -30,7 +30,7 @@ import ( ) type Query struct { - IDs []string + ID string OwnerID string Tags map[string]string } @@ -49,9 +49,9 @@ func QueriesFromSelectorTerms(terms ...v1.CapacityReservationSelectorTerm) []*Qu }) } } - if len(ids) != 0 { - queries = append(queries, &Query{IDs: ids}) - } + queries = append(queries, lo.Map(ids, func(id string, _ int) *Query { + return &Query{ID: id} + })...) return queries } @@ -66,10 +66,10 @@ func (q *Query) DescribeCapacityReservationsInput() *ec2.DescribeCapacityReserva Name: lo.ToPtr("state"), Values: []string{string(ec2types.CapacityReservationStateActive)}, }} - if len(q.IDs) != 0 { + if len(q.ID) != 0 { return &ec2.DescribeCapacityReservationsInput{ Filters: filters, - CapacityReservationIds: q.IDs, + CapacityReservationIds: []string{q.ID}, } } if q.OwnerID != "" { diff --git a/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml b/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml index c29eec9378c8..896dbe0d2a9a 100644 --- a/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml +++ b/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml @@ -74,7 +74,7 @@ Resources: "arn:${AWS::Partition}:ec2:${AWS::Region}:*:volume/*", "arn:${AWS::Partition}:ec2:${AWS::Region}:*:network-interface/*", "arn:${AWS::Partition}:ec2:${AWS::Region}:*:launch-template/*", - "arn:${AWS::Partition}:ec2:${AWS::Region}:*:spot-instances-request/*" + "arn:${AWS::Partition}:ec2:${AWS::Region}:*:spot-instances-request/*", "arn:${AWS::Partition}:ec2:${AWS::Region}:*:capacity-reservation/*" ], "Action": [ From 199602b8e08669db30fbacb6fbe9e40e047b9e4e Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 26 Feb 2025 06:46:40 -0500 Subject: [PATCH 16/16] checkpoint feedback --- pkg/providers/capacityreservation/types.go | 8 +--- test/suites/drift/suite_test.go | 15 ++++--- test/suites/scheduling/suite_test.go | 48 ---------------------- 3 files changed, 11 insertions(+), 60 deletions(-) diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go index 5f72882fb383..7d9f14e1248c 100644 --- a/pkg/providers/capacityreservation/types.go +++ b/pkg/providers/capacityreservation/types.go @@ -37,10 +37,9 @@ type Query struct { func QueriesFromSelectorTerms(terms ...v1.CapacityReservationSelectorTerm) []*Query { queries := []*Query{} - ids := []string{} for i := range terms { - if terms[i].ID != "" { - ids = append(ids, terms[i].ID) + if id := terms[i].ID; id != "" { + queries = append(queries, &Query{ID: id}) } if len(terms[i].Tags) != 0 { queries = append(queries, &Query{ @@ -49,9 +48,6 @@ func QueriesFromSelectorTerms(terms ...v1.CapacityReservationSelectorTerm) []*Qu }) } } - queries = append(queries, lo.Map(ids, func(id string, _ int) *Query { - return &Query{ID: id} - })...) return queries } diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 58f3a77cb294..af176867cbd5 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -991,7 +991,14 @@ var _ = Describe("Drift", Ordered, func() { }) It("should drift nodeclaim when the reservation is no longer selected by the nodeclass", func() { nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: largeCapacityReservationID}} - pod := coretest.Pod() + // Include the do-not-disrupt annotation to prevent replacement NodeClaims from leaking between tests + pod := coretest.Pod(coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + karpv1.DoNotDisruptAnnotationKey: "true", + }, + }, + }) env.ExpectCreated(nodePool, nodeClass, pod) nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] env.EventuallyExpectNodeClaimsReady(nc) @@ -1005,7 +1012,6 @@ var _ = Describe("Drift", Ordered, func() { env.EventuallyExpectDrifted(nc) }) It("should drift nodeclaim when the nodeclaim is demoted to on-demand", func() { - var canceled bool capacityReservationID := aws.ExpectCapacityReservationCreated( env.Context, env.EC2API, @@ -1016,9 +1022,7 @@ var _ = Describe("Drift", Ordered, func() { nil, ) DeferCleanup(func() { - if !canceled { - aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) - } + aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) }) nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: capacityReservationID}} @@ -1042,7 +1046,6 @@ var _ = Describe("Drift", Ordered, func() { n := env.EventuallyExpectNodeCount("==", 1)[0] aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) - canceled = true // The NodeClaim capacity reservation controller runs once every minute, we'll give a little extra time to avoid // a failure from a small delay, but the capacity type label should be updated and the reservation-id label should diff --git a/test/suites/scheduling/suite_test.go b/test/suites/scheduling/suite_test.go index c10b316caa32..89603c88c798 100644 --- a/test/suites/scheduling/suite_test.go +++ b/test/suites/scheduling/suite_test.go @@ -29,7 +29,6 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" - "sigs.k8s.io/controller-runtime/pkg/client" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/test" @@ -807,53 +806,6 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { Expect(reservedCount).To(Equal(1)) env.EventuallyExpectNodeCount("==", 2) }) - It("should demote reserved instances when the reservation is canceled", func() { - var canceled bool - capacityReservationID := environmentaws.ExpectCapacityReservationCreated( - env.Context, - env.EC2API, - ec2types.InstanceTypeM5Large, - env.ZoneInfo[0].Zone, - 1, - nil, - nil, - ) - DeferCleanup(func() { - if !canceled { - environmentaws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) - } - }) - - nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: capacityReservationID}} - pod := test.Pod() - env.ExpectCreated(nodePool, nodeClass, pod) - - nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] - req, ok := lo.Find(nc.Spec.Requirements, func(req karpv1.NodeSelectorRequirementWithMinValues) bool { - return req.Key == v1.LabelCapacityReservationID - }) - Expect(ok).To(BeTrue()) - Expect(req.Values).To(ConsistOf(capacityReservationID)) - n := env.EventuallyExpectNodeCount("==", 1)[0] - - environmentaws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) - canceled = true - - // The NodeClaim capacity reservation controller runs once every minute, we'll give a little extra time to avoid - // a failure from a small delay, but the capacity type label should be updated and the reservation-id label should - // be removed within a minute of the reservation being canceled. - Eventually(func(g Gomega) { - updatedNodeClaim := &karpv1.NodeClaim{} - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nc), updatedNodeClaim)).To(BeNil()) - g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) - g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) - - updatedNode := &corev1.Node{} - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(n), updatedNode)).To(BeNil()) - g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) - g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) - }).WithTimeout(75 * time.Second).Should(Succeed()) - }) }) })