diff --git a/Makefile b/Makefile index 0fcce58d5b9b..1d0bac23058b 100644 --- a/Makefile +++ b/Makefile @@ -17,8 +17,9 @@ HELM_OPTS ?= --set serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn=${K --set controller.resources.requests.memory=1Gi \ --set controller.resources.limits.cpu=1 \ --set controller.resources.limits.memory=1Gi \ - --set settings.featureGates.spotToSpotConsolidation=true \ --set settings.featureGates.nodeRepair=true \ + --set settings.featureGates.reservedCapacity=true \ + --set settings.featureGates.spotToSpotConsolidation=true \ --create-namespace # CR for local builds of Karpenter diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index 75021e7afb46..32c21066e067 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -239,6 +239,39 @@ spec: x-kubernetes-validations: - message: must have only one blockDeviceMappings with rootVolume rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1 + capacityReservationSelectorTerms: + description: |- + CapacityReservationSelectorTerms is a list of capacity reservation selector terms. Each term is ORed together to + determine the set of eligible capacity reservations. + items: + properties: + id: + description: ID is the capacity reservation id in EC2 + pattern: ^cr-[0-9a-z]+$ + type: string + ownerID: + description: Owner is the owner id for the ami. + pattern: ^[0-9]{12}$ + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select capacity reservations. + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: expected at least one, got none, ['tags', 'id'] + rule: self.all(x, has(x.tags) || has(x.id)) + - message: '''id'' is mutually exclusive, cannot be set along with tags in a capacity reservation selector term' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.ownerID)))' context: description: |- Context is a Reserved field in EC2 APIs @@ -469,7 +502,7 @@ spec: - message: immutable field changed rule: self == oldSelf securityGroupSelectorTerms: - description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. + description: SecurityGroupSelectorTerms is a list of security group selector terms. The terms are ORed. items: description: |- SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. @@ -503,12 +536,12 @@ spec: rule: self.size() != 0 - message: expected at least one, got none, ['tags', 'id', 'name'] rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term' rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' - - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term' rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' subnetSelectorTerms: - description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. + description: SubnetSelectorTerms is a list of subnet selector terms. The terms are ORed. items: description: |- SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. @@ -537,7 +570,7 @@ spec: rule: self.size() != 0 - message: expected at least one, got none, ['tags', 'id'] rule: self.all(x, has(x.tags) || has(x.id)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms' + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a subnet selector term' rule: '!self.all(x, has(x.id) && has(x.tags))' tags: additionalProperties: @@ -640,6 +673,46 @@ spec: - requirements type: object type: array + capacityReservations: + description: |- + CapacityReservations contains the current capacity reservation values that are available to this NodeClass under the + CapacityReservation selectors. + items: + properties: + availabilityZone: + description: The availability zone the capacity reservation is available in. + type: string + endTime: + description: |- + The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter + will no longer be able to launch instances into that reservation. + format: date-time + type: string + id: + description: The id for the capacity reservation. + pattern: ^cr-[0-9a-z]+$ + type: string + instanceMatchCriteria: + description: Indicates the type of instance launches the capacity reservation accepts. + enum: + - open + - targeted + type: string + instanceType: + description: The instance type for the capacity reservation. + type: string + ownerID: + description: The ID of the AWS account that owns the capacity reservation. + pattern: ^[0-9]{12}$ + type: string + required: + - availabilityZone + - id + - instanceMatchCriteria + - instanceType + - ownerID + type: object + type: array conditions: description: Conditions contains signals for health and readiness items: diff --git a/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml b/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml index e0a0184f30d7..5b9b22b4932c 100644 --- a/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml +++ b/charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml @@ -137,7 +137,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml b/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml index dac45d671119..59ddb62b12b1 100644 --- a/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml +++ b/charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml @@ -210,7 +210,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: |- @@ -283,7 +283,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/charts/karpenter/templates/deployment.yaml b/charts/karpenter/templates/deployment.yaml index edd2aea0429a..0f2a69e89592 100644 --- a/charts/karpenter/templates/deployment.yaml +++ b/charts/karpenter/templates/deployment.yaml @@ -107,7 +107,7 @@ spec: divisor: "0" resource: limits.memory - name: FEATURE_GATES - value: "SpotToSpotConsolidation={{ .Values.settings.featureGates.spotToSpotConsolidation }},NodeRepair={{ .Values.settings.featureGates.nodeRepair }}" + value: "ReservedCapacity={{ .Values.settings.featureGates.reservedCapacity }},SpotToSpotConsolidation={{ .Values.settings.featureGates.spotToSpotConsolidation }},NodeRepair={{ .Values.settings.featureGates.nodeRepair }}" {{- with .Values.settings.batchMaxDuration }} - name: BATCH_MAX_DURATION value: "{{ . }}" diff --git a/charts/karpenter/values.yaml b/charts/karpenter/values.yaml index bc24a5852ee9..da1680e49beb 100644 --- a/charts/karpenter/values.yaml +++ b/charts/karpenter/values.yaml @@ -184,9 +184,12 @@ settings: # -- Feature Gate configuration values. Feature Gates will follow the same graduation process and requirements as feature gates # in Kubernetes. More information here https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features featureGates: - # -- spotToSpotConsolidation is ALPHA and is disabled by default. - # Setting this to true will enable spot replacement consolidation for both single and multi-node consolidation. - spotToSpotConsolidation: false # -- nodeRepair is ALPHA and is disabled by default. # Setting this to true will enable node repair. nodeRepair: false + # -- reservedCapacity is ALPHA and is disabled by default. + # Setting this will enable native on-demand capacity reservation support. + reservedCapacity: false + # -- spotToSpotConsolidation is ALPHA and is disabled by default. + # Setting this to true will enable spot replacement consolidation for both single and multi-node consolidation. + spotToSpotConsolidation: false diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 52075ecd984b..b75b56f35b90 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -15,6 +15,7 @@ limitations under the License. package main import ( + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" "github.com/aws/karpenter-provider-aws/pkg/controllers" "github.com/aws/karpenter-provider-aws/pkg/operator" @@ -23,6 +24,7 @@ import ( corecontrollers "sigs.k8s.io/karpenter/pkg/controllers" "sigs.k8s.io/karpenter/pkg/controllers/state" coreoperator "sigs.k8s.io/karpenter/pkg/operator" + karpoptions "sigs.k8s.io/karpenter/pkg/operator/options" ) func main() { @@ -35,10 +37,15 @@ func main() { op.GetClient(), op.AMIProvider, op.SecurityGroupProvider, + op.CapacityReservationProvider, ) cloudProvider := metrics.Decorate(awsCloudProvider) clusterState := state.NewCluster(op.Clock, op.GetClient(), cloudProvider) + if karpoptions.FromContext(ctx).FeatureGates.ReservedCapacity { + v1.CapacityReservationsEnabled = true + } + op. WithControllers(ctx, corecontrollers.NewControllers( ctx, @@ -69,6 +76,7 @@ func main() { op.LaunchTemplateProvider, op.VersionProvider, op.InstanceTypesProvider, + op.CapacityReservationProvider, )...). Start(ctx) } diff --git a/designs/odcr.md b/designs/odcr.md index b4a3f72e49db..c06fe5963334 100644 --- a/designs/odcr.md +++ b/designs/odcr.md @@ -58,14 +58,14 @@ AWS also supports grouping Capacity Reservation into [Capacity Reservation group ## Goals -1. Allow selection of targeted and open ODCRs with Karpenter -2. Ensure multiple ODCRs can be selected from a single NodePool +1. Allow selection of targeted and open ODCRs with Karpenter +2. Ensure multiple ODCRs can be selected from a single NodePool 3. Ensure that we only launch capacity into an ODCR in a cluster when an application requires the capacity, ensuring ODCR sharing between clusters and accounts -4. Ensure ODCRs are prioritized over regular OD and spot capacity -5. Ensure Karpenter consolidates regular OD and spot instances to ODCR capacity when it is available +4. Ensure ODCRs are prioritized over regular OD and spot capacity +5. Ensure Karpenter consolidates regular OD and spot instances to ODCR capacity when it is available 6. Ensure Karpenter consolidates between ODCRs when a smaller/cheaper ODCR is available -7. Allow users to constrain a NodePool to only launch into ODCR capacity without fallback -8. Allow users to fallback from ODCR to spot capacity and from ODCR to standard OD capacity +7. Allow users to constrain a NodePool to only launch into ODCR capacity without fallback +8. Allow users to fallback from ODCR to spot capacity and from ODCR to standard OD capacity 9. Ensure OD capacity is not automatically drifted to new capacity when a capacity reservation expires or is canceled to reduce workload disruption ## Non-Goals @@ -96,11 +96,9 @@ spec: # All other fields are not mutually exclusive and can be combined capacityReservationSelectorTerms: - # The id for the Capacity Reservation - # Specifying '*' for this field selects all ids id: String | None # The id of the AWS account that owns the Capacity Reservation - # If no ownerID is specified, only ODCRs owned by the current account will be used - # Specifying '*' for this field selects all ownerIDs + # If no ownerID is specified, any ODCRs available to the current account will be used ownerID: String | None # Tags is a map of key/value tags used to select capacity reservations # Specifying '*' for a value selects all values for a given tag key. @@ -109,8 +107,6 @@ status: capacityReservations: - # AvailabilityZone for the Capacity Reservation availabilityZone: String - # Available Instance Count for the Capacity Reservation - availableInstanceCount: Integer # The time at which the Capacity Reservation expires. When a Capacity # Reservation expires, the reserved capacity is released and you can no longer # launch instances into it. The Capacity Reservation's state changes to expired @@ -136,15 +132,13 @@ status: instanceType: String # The id of the AWS account that owns the Capacity Reservation ownerID: String - # Total Instance Count for the Capacity Reservation - totalInstanceCount: Integer ``` This API follows closely with how [DescribeCapacityReservations](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeCapacityReservations.html) can filter capacity reservations -- allowing Karpenter to receive the server-side filtered version of the capacity reservations to store in its status. ### NodePool API -The EC2NodeClass API allows selection on capacity reservations, which give additional options to the scheduler to choose from when launching instance types; however, it does not offer a mechanism to scope-down whether instances in a NodePool should only launch into an ODCR, fallback between a capacity reservation to on-demand if none is available, or fallback between a capacity reservation to spot and then finally to on-demand. +The EC2NodeClass API allows selection on capacity reservations, which give additional options to the scheduler to choose from when launching instance types; however, it does not offer a mechanism to scope-down whether instances in a NodePool should only launch into an ODCR, fallback between a capacity reservation to on-demand if none is available, or fallback between a capacity reservation to spot and then finally to on-demand. This RFC proposes the addition of a new `karpenter.sh/capacity-type` label value, called `reserved`. A cluster admin could then select to support only launching ODCR capacity and falling back between ODCR capacity to on-demand capacity respectively. _NOTE: This option requires any applications (pods) that are using node selection on `karpenter.sh/capacity-type: "on-demand"` to expand their selection to include `reserved` or to update it to perform a `NotIn` node affinity on `karpenter.sh/capacity-type: spot`_ @@ -206,7 +200,7 @@ To solve for this problem, Karpenter will implement special handling for `karpen We can surface ODCR capacity as additional offerings attached to each instance type. Offerings currently allow us to track the pricing of variants of a specific instance type, primarily based on capacity type and availability zone today. -To track reservation capacity, we can add additional offerings to an instance type when there is a capacity reservation that is matched on by an EC2NodeClass's `capacityReservationSelectorTerms`. This offering will have a price near 0 to model the fact that the reservation is already paid-for and to ensure the offering is prioritized ahead of other offerings. +To track reservation capacity, we can add additional offerings to an instance type when there is a capacity reservation that is matched on by an EC2NodeClass's `capacityReservationSelectorTerms`. This offering will have a price near 0 to model the fact that the reservation is already paid-for and to ensure the offering is prioritized ahead of other offerings. When there are multiple capacity reservation offerings for an instance type for different AZs, we will produce separate offerings for these different zones. When there are multiple capacity reservation offerings for instance type in the same AZ, we will only produce a single offering. With this change, an example instance type offerings set will look like the following @@ -253,9 +247,9 @@ offerings: ### Representing ODCR Available Instance Counts in Instance Type Offerings -ODCRs (unlike spot and on-demand capacity) have much more defined, constrained capacity ceilings. For instance, in an extreme example, a user may select on a capacity reservation with only a single available instance but launch 10,000 pods that contain hostname anti-affinity. The scheduler would do work to determine that it needs to launch 10,000 instances for these pods; however, without any kind of cap on the number of times the capacity reservation offering could be used, the scheduler would think that it could launch 10,000 instances into the capacity reservation offering. +ODCRs (unlike spot and on-demand capacity) have much more defined, constrained capacity ceilings. For instance, in an extreme example, a user may select on a capacity reservation with only a single available instance but launch 10,000 pods that contain hostname anti-affinity. The scheduler would do work to determine that it needs to launch 10,000 instances for these pods; however, without any kind of cap on the number of times the capacity reservation offering could be used, the scheduler would think that it could launch 10,000 instances into the capacity reservation offering. -Attempting to launch this would result in a success for a single instance and an ICE error for the other 9,999. The next scheduling loop would remediate this, but this results in a lot of extra, unneeded work. +Attempting to launch this would result in a success for a single instance and an ICE error for the other 9,999. The next scheduling loop would remediate this, but this results in a lot of extra, unneeded work. A better way to model this would be to track the available instance count as a numerical value associated with an instance type offering. In this modeling, the scheduler could count the number of simulated NodeClaims that might use the offering and know that it can't simulate NodeClaims into particular offerings once they hit their cap. @@ -306,7 +300,7 @@ offerings: ## CloudProvider Launch Behavior -When a NodeClaim is passed to the CloudProvider `Create()` call that selects the `reserved` capacity type, the AWS Cloud Provider will prioritize launching into the `reserved` capacity type before attempting other capacity types. +When a NodeClaim is passed to the CloudProvider `Create()` call that selects the `reserved` capacity type, the AWS Cloud Provider will prioritize launching into the `reserved` capacity type before attempting other capacity types. Practically, this means that when a NodeClaim allows for the `reserved` capacity type, Karpenter will know that this NodeClaim is requesting to launch into an ODCR and leverage available ODCR offerings from this NodePool that match the instance type and availability zone requirements passed through the NodeClaim. @@ -360,7 +354,7 @@ reservation first then fall back into other instances. Because of this reserved If we track Capacity Reservation usage, we can optimize the cluster configuration by moving non-Capacity Reserved instances into Capacity Reserved instances. We would need to match the instance type, platform and availability zone prior to doing this. -This would be done by the standard consolidation algorithm and should work with minimal changes, since consolidation already optimizes for cost. +This would be done by the standard consolidation algorithm and should work with minimal changes, since consolidation already optimizes for cost. #### Consolidating between Capacity Reservations @@ -405,4 +399,4 @@ In this case, there is no existing mechanism in Karpenter that would catch this. CreateFleet will reject the call outright since you are not allowed to specify duplicate instance type/availability zone combinations, even if the launch templates contain different data -- such as different capacity reservation ids. -> For more information on CreateFleet's handling when specifying different `usageStrategy` and `capacityReservationPreference` values, see https://docs.aws.amazon.com/emr/latest/ManagementGuide/on-demand-capacity-reservations.html. \ No newline at end of file +> For more information on CreateFleet's handling when specifying different `usageStrategy` and `capacityReservationPreference` values, see https://docs.aws.amazon.com/emr/latest/ManagementGuide/on-demand-capacity-reservations.html. diff --git a/go.mod b/go.mod index cadcca7e944c..b73f68ee91c0 100644 --- a/go.mod +++ b/go.mod @@ -119,3 +119,5 @@ require ( sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect ) + +replace sigs.k8s.io/karpenter => github.com/jmdeal/karpenter v0.0.0-20250225003856-d34d71584c1a diff --git a/go.sum b/go.sum index 0db46a26c9d5..6f9a7509d44f 100644 --- a/go.sum +++ b/go.sum @@ -116,6 +116,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jmdeal/karpenter v0.0.0-20250225003856-d34d71584c1a h1:r7gPnoafSMfTjVmirGrkQu/3Suo6wiVDSElWyWIscXk= +github.com/jmdeal/karpenter v0.0.0-20250225003856-d34d71584c1a/go.mod h1:/FgjYrt+hwAMcvY46hku76st/aeP4KjOib6RLEj312g= github.com/jonathan-innis/aws-sdk-go-prometheus v0.1.1 h1:gmpuckrozJ3lfKqSIia9YMGh0caoQmEY7mQP5MsnbTM= github.com/jonathan-innis/aws-sdk-go-prometheus v0.1.1/go.mod h1:168XvZFghCqo32ISSWnTXwdlMKzEq+x9TqdfswCjkrQ= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -342,8 +344,6 @@ sigs.k8s.io/controller-runtime v0.20.2 h1:/439OZVxoEc02psi1h4QO3bHzTgu49bb347Xp4 sigs.k8s.io/controller-runtime v0.20.2/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= -sigs.k8s.io/karpenter v1.2.1-0.20250221214626-1c6342c8405e h1:UQLz2DYoXnrIN/WVjKQWS6beVS6di3r7KRVimHp/x6s= -sigs.k8s.io/karpenter v1.2.1-0.20250221214626-1c6342c8405e/go.mod h1:/FgjYrt+hwAMcvY46hku76st/aeP4KjOib6RLEj312g= sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA= sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/hack/docs/instancetypes_gen/main.go b/hack/docs/instancetypes_gen/main.go index e22064fb43aa..fdaa4212f90b 100644 --- a/hack/docs/instancetypes_gen/main.go +++ b/hack/docs/instancetypes_gen/main.go @@ -89,7 +89,9 @@ func main() { lo.Must0(os.Setenv("SYSTEM_NAMESPACE", "karpenter")) lo.Must0(os.Setenv("AWS_SDK_LOAD_CONFIG", "true")) - ctx := coreoptions.ToContext(context.Background(), coretest.Options()) + ctx := coreoptions.ToContext(context.Background(), coretest.Options(coretest.OptionsFields{ + FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(false)}, + })) ctx = options.ToContext(ctx, test.Options(test.OptionsFields{ ClusterName: lo.ToPtr("docs-gen"), ClusterEndpoint: lo.ToPtr("https://docs-gen.aws"), @@ -130,19 +132,21 @@ below are the resources available with some assumptions and after the instance o ec2api := ec2.NewFromConfig(cfg) subnetProvider := subnet.NewDefaultProvider(ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval), cache.New(awscache.AvailableIPAddressTTL, awscache.DefaultCleanupInterval), cache.New(awscache.AssociatePublicIPAddressTTL, awscache.DefaultCleanupInterval)) instanceTypeProvider := instancetype.NewDefaultProvider( - cache.New(awscache.InstanceTypesAndZonesTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, + pricing.NewDefaultProvider( + ctx, + pricing.NewAPI(cfg), + ec2api, + cfg.Region, + ), + nil, + awscache.NewUnavailableOfferings(), instancetype.NewDefaultResolver( region, - pricing.NewDefaultProvider( - ctx, - pricing.NewAPI(cfg), - ec2api, - cfg.Region, - ), - awscache.NewUnavailableOfferings(), ), ) if err = instanceTypeProvider.UpdateInstanceTypes(ctx); err != nil { diff --git a/hack/tools/allocatable_diff/main.go b/hack/tools/allocatable_diff/main.go index d686d9330740..385c5e78598f 100644 --- a/hack/tools/allocatable_diff/main.go +++ b/hack/tools/allocatable_diff/main.go @@ -78,6 +78,7 @@ func main() { op.GetClient(), op.AMIProvider, op.SecurityGroupProvider, + op.CapacityReservationProvider, ) instanceTypes := lo.Must(cloudProvider.GetInstanceTypes(ctx, nil)) diff --git a/hack/tools/launchtemplate_counter/main.go b/hack/tools/launchtemplate_counter/main.go index 2ea169ef2dd4..1ac6bf229278 100644 --- a/hack/tools/launchtemplate_counter/main.go +++ b/hack/tools/launchtemplate_counter/main.go @@ -45,7 +45,9 @@ import ( func main() { lo.Must0(os.Setenv("AWS_SDK_LOAD_CONFIG", "true")) - ctx := coreoptions.ToContext(context.Background(), coretest.Options()) + ctx := coreoptions.ToContext(context.Background(), coretest.Options(coretest.OptionsFields{ + FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(false)}, + })) ctx = options.ToContext(ctx, test.Options(test.OptionsFields{ ClusterName: lo.ToPtr("docs-gen"), ClusterEndpoint: lo.ToPtr("https://docs-gen.aws"), @@ -57,19 +59,21 @@ func main() { ec2api := ec2.NewFromConfig(cfg) subnetProvider := subnet.NewDefaultProvider(ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval), cache.New(awscache.AvailableIPAddressTTL, awscache.DefaultCleanupInterval), cache.New(awscache.AssociatePublicIPAddressTTL, awscache.DefaultCleanupInterval)) instanceTypeProvider := instancetype.NewDefaultProvider( - cache.New(awscache.InstanceTypesAndZonesTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, + pricing.NewDefaultProvider( + ctx, + pricing.NewAPI(cfg), + ec2api, + cfg.Region, + ), + nil, + awscache.NewUnavailableOfferings(), instancetype.NewDefaultResolver( region, - pricing.NewDefaultProvider( - ctx, - pricing.NewAPI(cfg), - ec2api, - cfg.Region, - ), - awscache.NewUnavailableOfferings(), ), ) if err := instanceTypeProvider.UpdateInstanceTypes(ctx); err != nil { diff --git a/hack/validation/labels.sh b/hack/validation/labels.sh index 23aa02d7ee4c..d4d9b0c2c93e 100755 --- a/hack/validation/labels.sh +++ b/hack/validation/labels.sh @@ -2,7 +2,7 @@ function injectDomainLabelRestrictions() { domain=$1 - rule="self.all(x, x in [\"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"${domain}\"))" + rule="self.all(x, x in [\"${domain}/capacity-reservation-id\", \"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"${domain}\"))" message="label domain \"${domain}\" is restricted" MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.metadata.properties.labels.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml } diff --git a/hack/validation/requirements.sh b/hack/validation/requirements.sh index ccc70e2575fe..f4dc91867a09 100755 --- a/hack/validation/requirements.sh +++ b/hack/validation/requirements.sh @@ -2,7 +2,7 @@ function injectDomainRequirementRestrictions() { domain=$1 - rule="self in [\"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"${domain}\")" + rule="self in [\"${domain}/capacity-reservation-id\", \"${domain}/ec2nodeclass\", \"${domain}/instance-encryption-in-transit-supported\", \"${domain}/instance-category\", \"${domain}/instance-hypervisor\", \"${domain}/instance-family\", \"${domain}/instance-generation\", \"${domain}/instance-local-nvme\", \"${domain}/instance-size\", \"${domain}/instance-cpu\", \"${domain}/instance-cpu-manufacturer\", \"${domain}/instance-cpu-sustained-clock-speed-mhz\", \"${domain}/instance-memory\", \"${domain}/instance-ebs-bandwidth\", \"${domain}/instance-network-bandwidth\", \"${domain}/instance-gpu-name\", \"${domain}/instance-gpu-manufacturer\", \"${domain}/instance-gpu-count\", \"${domain}/instance-gpu-memory\", \"${domain}/instance-accelerator-name\", \"${domain}/instance-accelerator-manufacturer\", \"${domain}/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"${domain}\")" message="label domain \"${domain}\" is restricted" MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml MSG="${message}" RULE="${rule}" yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [{"message": strenv(MSG), "rule": strenv(RULE)}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 3b915b075962..9dbc2c2817b6 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -236,6 +236,39 @@ spec: x-kubernetes-validations: - message: must have only one blockDeviceMappings with rootVolume rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1 + capacityReservationSelectorTerms: + description: |- + CapacityReservationSelectorTerms is a list of capacity reservation selector terms. Each term is ORed together to + determine the set of eligible capacity reservations. + items: + properties: + id: + description: ID is the capacity reservation id in EC2 + pattern: ^cr-[0-9a-z]+$ + type: string + ownerID: + description: Owner is the owner id for the ami. + pattern: ^[0-9]{12}$ + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select capacity reservations. + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: expected at least one, got none, ['tags', 'id'] + rule: self.all(x, has(x.tags) || has(x.id)) + - message: '''id'' is mutually exclusive, cannot be set along with tags in a capacity reservation selector term' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.ownerID)))' context: description: |- Context is a Reserved field in EC2 APIs @@ -466,7 +499,7 @@ spec: - message: immutable field changed rule: self == oldSelf securityGroupSelectorTerms: - description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. + description: SecurityGroupSelectorTerms is a list of security group selector terms. The terms are ORed. items: description: |- SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. @@ -500,12 +533,12 @@ spec: rule: self.size() != 0 - message: expected at least one, got none, ['tags', 'id', 'name'] rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term' rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' - - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term' rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' subnetSelectorTerms: - description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. + description: SubnetSelectorTerms is a list of subnet selector terms. The terms are ORed. items: description: |- SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. @@ -534,7 +567,7 @@ spec: rule: self.size() != 0 - message: expected at least one, got none, ['tags', 'id'] rule: self.all(x, has(x.tags) || has(x.id)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms' + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in a subnet selector term' rule: '!self.all(x, has(x.id) && has(x.tags))' tags: additionalProperties: @@ -637,6 +670,46 @@ spec: - requirements type: object type: array + capacityReservations: + description: |- + CapacityReservations contains the current capacity reservation values that are available to this NodeClass under the + CapacityReservation selectors. + items: + properties: + availabilityZone: + description: The availability zone the capacity reservation is available in. + type: string + endTime: + description: |- + The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter + will no longer be able to launch instances into that reservation. + format: date-time + type: string + id: + description: The id for the capacity reservation. + pattern: ^cr-[0-9a-z]+$ + type: string + instanceMatchCriteria: + description: Indicates the type of instance launches the capacity reservation accepts. + enum: + - open + - targeted + type: string + instanceType: + description: The instance type for the capacity reservation. + type: string + ownerID: + description: The ID of the AWS account that owns the capacity reservation. + pattern: ^[0-9]{12}$ + type: string + required: + - availabilityZone + - id + - instanceMatchCriteria + - instanceType + - ownerID + type: object + type: array conditions: description: Conditions contains signals for health and readiness items: diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index e255d9c894fc..cf065565c848 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -134,7 +134,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 155bd626c067..6fc612d83918 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -207,7 +207,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: |- @@ -280,7 +280,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/capacity-reservation-id", "karpenter.k8s.aws/ec2nodeclass", "karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu", "karpenter.k8s.aws/instance-cpu-manufacturer", "karpenter.k8s.aws/instance-cpu-sustained-clock-speed-mhz", "karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time diff --git a/pkg/apis/v1/doc.go b/pkg/apis/v1/doc.go index 44692b28c362..22a855b23ba2 100644 --- a/pkg/apis/v1/doc.go +++ b/pkg/apis/v1/doc.go @@ -22,6 +22,7 @@ import ( corev1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/karpenter/pkg/cloudprovider" "github.com/aws/karpenter-provider-aws/pkg/apis" ) @@ -33,4 +34,6 @@ func init() { &EC2NodeClass{}, &EC2NodeClassList{}, ) + + cloudprovider.ReservationIDLabel = LabelCapacityReservationID } diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index 8d61dc78d156..fdc76e33458f 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -28,21 +28,28 @@ import ( // EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. // This will contain configuration necessary to launch instances in AWS. type EC2NodeClassSpec struct { - // SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. + // SubnetSelectorTerms is a list of subnet selector terms. The terms are ORed. // +kubebuilder:validation:XValidation:message="subnetSelectorTerms cannot be empty",rule="self.size() != 0" // +kubebuilder:validation:XValidation:message="expected at least one, got none, ['tags', 'id']",rule="self.all(x, has(x.tags) || has(x.id))" - // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms",rule="!self.all(x, has(x.id) && has(x.tags))" + // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in a subnet selector term",rule="!self.all(x, has(x.id) && has(x.tags))" // +kubebuilder:validation:MaxItems:=30 // +required SubnetSelectorTerms []SubnetSelectorTerm `json:"subnetSelectorTerms" hash:"ignore"` - // SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. + // SecurityGroupSelectorTerms is a list of security group selector terms. The terms are ORed. // +kubebuilder:validation:XValidation:message="securityGroupSelectorTerms cannot be empty",rule="self.size() != 0" // +kubebuilder:validation:XValidation:message="expected at least one, got none, ['tags', 'id', 'name']",rule="self.all(x, has(x.tags) || has(x.id) || has(x.name))" - // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms",rule="!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))" - // +kubebuilder:validation:XValidation:message="'name' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms",rule="!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))" + // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term",rule="!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))" + // +kubebuilder:validation:XValidation:message="'name' is mutually exclusive, cannot be set with a combination of other fields in a security group selector term",rule="!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))" // +kubebuilder:validation:MaxItems:=30 // +required SecurityGroupSelectorTerms []SecurityGroupSelectorTerm `json:"securityGroupSelectorTerms" hash:"ignore"` + // CapacityReservationSelectorTerms is a list of capacity reservation selector terms. Each term is ORed together to + // determine the set of eligible capacity reservations. + // +kubebuilder:validation:XValidation:message="expected at least one, got none, ['tags', 'id']",rule="self.all(x, has(x.tags) || has(x.id))" + // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set along with tags in a capacity reservation selector term",rule="!self.all(x, has(x.id) && (has(x.tags) || has(x.ownerID)))" + // +kubebuilder:validation:MaxItems:=30 + // +optional + CapacityReservationSelectorTerms []CapacityReservationSelectorTerm `json:"capacityReservationSelectorTerms" hash:"ignore"` // AssociatePublicIPAddress controls if public IP addresses are assigned to instances that are launched with the nodeclass. // +optional AssociatePublicIPAddress *bool `json:"associatePublicIPAddress,omitempty"` @@ -169,6 +176,23 @@ type SecurityGroupSelectorTerm struct { Name string `json:"name,omitempty"` } +type CapacityReservationSelectorTerm struct { + // Tags is a map of key/value tags used to select capacity reservations. + // Specifying '*' for a value selects all values for a given tag key. + // +kubebuilder:validation:XValidation:message="empty tag keys or values aren't supported",rule="self.all(k, k != '' && self[k] != '')" + // +kubebuilder:validation:MaxProperties:=20 + // +optional + Tags map[string]string `json:"tags,omitempty"` + // ID is the capacity reservation id in EC2 + // +kubebuilder:validation:Pattern:="^cr-[0-9a-z]+$" + // +optional + ID string `json:"id,omitempty"` + // Owner is the owner id for the ami. + // +kubebuilder:validation:Pattern:="^[0-9]{12}$" + // +optional + OwnerID string `json:"ownerID,omitempty"` +} + // AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. // If multiple fields are used for selection, the requirements are ANDed. type AMISelectorTerm struct { diff --git a/pkg/apis/v1/ec2nodeclass_hash_test.go b/pkg/apis/v1/ec2nodeclass_hash_test.go index d88d561c2b34..87ad4de0b5af 100644 --- a/pkg/apis/v1/ec2nodeclass_hash_test.go +++ b/pkg/apis/v1/ec2nodeclass_hash_test.go @@ -184,21 +184,18 @@ var _ = Describe("Hash", func() { hash := nodeClass.Hash() // Update a behavior/dynamic field - nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{ - { - Tags: map[string]string{"subnet-test-key": "subnet-test-value"}, - }, - } - nodeClass.Spec.SecurityGroupSelectorTerms = []v1.SecurityGroupSelectorTerm{ - { - Tags: map[string]string{"sg-test-key": "sg-test-value"}, - }, - } - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ - { - Tags: map[string]string{"ami-test-key": "ami-test-value"}, - }, - } + nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ + Tags: map[string]string{"subnet-test-key": "subnet-test-value"}, + }} + nodeClass.Spec.SecurityGroupSelectorTerms = []v1.SecurityGroupSelectorTerm{{ + Tags: map[string]string{"sg-test-key": "sg-test-value"}, + }} + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ + Tags: map[string]string{"ami-test-key": "ami-test-value"}, + }} + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{"cr-test-key": "cr-test-value"}, + }} updatedHash := nodeClass.Hash() Expect(hash).To(Equal(updatedHash)) }) diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index 4c210ef81789..2b56635c1060 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -17,14 +17,20 @@ package v1 import ( "github.com/awslabs/operatorpkg/status" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ( + CapacityReservationsEnabled = false ) const ( - ConditionTypeSubnetsReady = "SubnetsReady" - ConditionTypeSecurityGroupsReady = "SecurityGroupsReady" - ConditionTypeAMIsReady = "AMIsReady" - ConditionTypeInstanceProfileReady = "InstanceProfileReady" - ConditionTypeValidationSucceeded = "ValidationSucceeded" + ConditionTypeSubnetsReady = "SubnetsReady" + ConditionTypeSecurityGroupsReady = "SecurityGroupsReady" + ConditionTypeAMIsReady = "AMIsReady" + ConditionTypeInstanceProfileReady = "InstanceProfileReady" + ConditionTypeCapacityReservationsReady = "CapacityReservationsReady" + ConditionTypeValidationSucceeded = "ValidationSucceeded" ) // Subnet contains resolved Subnet selector values utilized for node launch @@ -66,6 +72,31 @@ type AMI struct { Requirements []corev1.NodeSelectorRequirement `json:"requirements"` } +type CapacityReservation struct { + // The availability zone the capacity reservation is available in. + // +required + AvailabilityZone string `json:"availabilityZone"` + // The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter + // will no longer be able to launch instances into that reservation. + // +optional + EndTime *metav1.Time `json:"endTime,omitempty" hash:"ignore"` + // The id for the capacity reservation. + // +kubebuilder:validation:Pattern:="^cr-[0-9a-z]+$" + // +required + ID string `json:"id"` + // Indicates the type of instance launches the capacity reservation accepts. + // +kubebuilder:validation:Enum:={open,targeted} + // +required + InstanceMatchCriteria string `json:"instanceMatchCriteria"` + // The instance type for the capacity reservation. + // +required + InstanceType string `json:"instanceType"` + // The ID of the AWS account that owns the capacity reservation. + // +kubebuilder:validation:Pattern:="^[0-9]{12}$" + // +required + OwnerID string `json:"ownerID"` +} + // EC2NodeClassStatus contains the resolved state of the EC2NodeClass type EC2NodeClassStatus struct { // Subnets contains the current subnet values that are available to the @@ -76,6 +107,10 @@ type EC2NodeClassStatus struct { // cluster under the SecurityGroups selectors. // +optional SecurityGroups []SecurityGroup `json:"securityGroups,omitempty"` + // CapacityReservations contains the current capacity reservation values that are available to this NodeClass under the + // CapacityReservation selectors. + // +optional + CapacityReservations []CapacityReservation `json:"capacityReservations,omitempty"` // AMI contains the current AMI values that are available to the // cluster under the AMI selectors. // +optional @@ -89,13 +124,17 @@ type EC2NodeClassStatus struct { } func (in *EC2NodeClass) StatusConditions() status.ConditionSet { - return status.NewReadyConditions( + conds := []string{ ConditionTypeAMIsReady, ConditionTypeSubnetsReady, ConditionTypeSecurityGroupsReady, ConditionTypeInstanceProfileReady, ConditionTypeValidationSucceeded, - ).For(in) + } + if CapacityReservationsEnabled { + conds = append(conds, ConditionTypeCapacityReservationsReady) + } + return status.NewReadyConditions(conds...).For(in) } func (in *EC2NodeClass) GetConditions() []status.Condition { diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index 7685f1546118..296eb17a2e2d 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -435,6 +435,123 @@ var _ = Describe("CEL/Validation", func() { Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) }) + Context("CapacityReservationSelectorTerms", func() { + It("should succeed with a valid capacity reservation selector on tags", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should succeed with a valid capacity reservation selector on id", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + ID: "cr-12345749", + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should succeed for a valid ownerID", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "012345678901", + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should fail with a capacity reservation selector on a malformed id", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + ID: "r-12345749", + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should succeed when capacity group selector terms is set to nil", func() { + nc.Spec.CapacityReservationSelectorTerms = nil + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should fail when a capacity reservation selector term has no values", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{}} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when a capacity reservation selector term has no tag map values", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{}, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when a capacity reservation selector term has a tag map key that is empty", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{ + "test": "", + }, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when a capacity reservation selector term has a tag map value that is empty", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + Tags: map[string]string{ + "": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when the last capacity reservation selector is invalid", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{ + { + Tags: map[string]string{ + "test": "testvalue", + }, + }, + { + Tags: map[string]string{ + "test2": "testvalue2", + }, + }, + { + Tags: map[string]string{ + "test3": "testvalue3", + }, + }, + { + Tags: map[string]string{ + "": "testvalue4", + }, + }, + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when specifying id with tags in a single term", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + ID: "cr-12345749", + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when specifying id with ownerID in a single term", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "012345678901", + ID: "cr-12345749", + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when the ownerID is malformed", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "01234567890", // OwnerID must be 12 digits, this is 11 + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when the ownerID is set by itself", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "012345678901", + }} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + }) Context("AMISelectorTerms", func() { It("should succeed with a valid ami selector on alias", func() { nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ diff --git a/pkg/apis/v1/labels.go b/pkg/apis/v1/labels.go index 7090f623c9ab..1783be8cbda8 100644 --- a/pkg/apis/v1/labels.go +++ b/pkg/apis/v1/labels.go @@ -29,6 +29,7 @@ import ( func init() { karpv1.RestrictedLabelDomains = karpv1.RestrictedLabelDomains.Insert(RestrictedLabelDomains...) karpv1.WellKnownLabels = karpv1.WellKnownLabels.Insert( + LabelCapacityReservationID, LabelInstanceHypervisor, LabelInstanceEncryptionInTransitSupported, LabelInstanceCategory, @@ -97,10 +98,7 @@ var ( ResourcePrivateIPv4Address corev1.ResourceName = "vpc.amazonaws.com/PrivateIPv4Address" ResourceEFA corev1.ResourceName = "vpc.amazonaws.com/efa" - LabelNodeClass = apis.Group + "/ec2nodeclass" - - LabelTopologyZoneID = "topology.k8s.aws/zone-id" - + LabelCapacityReservationID = apis.Group + "/capacity-reservation-id" LabelInstanceHypervisor = apis.Group + "/instance-hypervisor" LabelInstanceEncryptionInTransitSupported = apis.Group + "/instance-encryption-in-transit-supported" LabelInstanceCategory = apis.Group + "/instance-category" @@ -121,10 +119,14 @@ var ( LabelInstanceAcceleratorName = apis.Group + "/instance-accelerator-name" LabelInstanceAcceleratorManufacturer = apis.Group + "/instance-accelerator-manufacturer" LabelInstanceAcceleratorCount = apis.Group + "/instance-accelerator-count" - AnnotationEC2NodeClassHash = apis.Group + "/ec2nodeclass-hash" - AnnotationClusterNameTaggedCompatability = apis.CompatibilityGroup + "/cluster-name-tagged" - AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version" - AnnotationInstanceTagged = apis.Group + "/tagged" + LabelNodeClass = apis.Group + "/ec2nodeclass" + + LabelTopologyZoneID = "topology.k8s.aws/zone-id" + + AnnotationEC2NodeClassHash = apis.Group + "/ec2nodeclass-hash" + AnnotationClusterNameTaggedCompatability = apis.CompatibilityGroup + "/cluster-name-tagged" + AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version" + AnnotationInstanceTagged = apis.Group + "/tagged" NodeClaimTagKey = coreapis.Group + "/nodeclaim" NameTagKey = "Name" diff --git a/pkg/apis/v1/zz_generated.deepcopy.go b/pkg/apis/v1/zz_generated.deepcopy.go index 802b4929776c..344bb1c23917 100644 --- a/pkg/apis/v1/zz_generated.deepcopy.go +++ b/pkg/apis/v1/zz_generated.deepcopy.go @@ -164,6 +164,47 @@ func (in *BlockDeviceMapping) DeepCopy() *BlockDeviceMapping { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CapacityReservation) DeepCopyInto(out *CapacityReservation) { + *out = *in + if in.EndTime != nil { + in, out := &in.EndTime, &out.EndTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservation. +func (in *CapacityReservation) DeepCopy() *CapacityReservation { + if in == nil { + return nil + } + out := new(CapacityReservation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CapacityReservationSelectorTerm) DeepCopyInto(out *CapacityReservationSelectorTerm) { + *out = *in + if in.Tags != nil { + in, out := &in.Tags, &out.Tags + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservationSelectorTerm. +func (in *CapacityReservationSelectorTerm) DeepCopy() *CapacityReservationSelectorTerm { + if in == nil { + return nil + } + out := new(CapacityReservationSelectorTerm) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EC2NodeClass) DeepCopyInto(out *EC2NodeClass) { *out = *in @@ -240,6 +281,13 @@ func (in *EC2NodeClassSpec) DeepCopyInto(out *EC2NodeClassSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.CapacityReservationSelectorTerms != nil { + in, out := &in.CapacityReservationSelectorTerms, &out.CapacityReservationSelectorTerms + *out = make([]CapacityReservationSelectorTerm, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.AssociatePublicIPAddress != nil { in, out := &in.AssociatePublicIPAddress, &out.AssociatePublicIPAddress *out = new(bool) @@ -335,6 +383,13 @@ func (in *EC2NodeClassStatus) DeepCopyInto(out *EC2NodeClassStatus) { *out = make([]SecurityGroup, len(*in)) copy(*out, *in) } + if in.CapacityReservations != nil { + in, out := &in.CapacityReservations, &out.CapacityReservations + *out = make([]CapacityReservation, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.AMIs != nil { in, out := &in.AMIs, &out.AMIs *out = make([]AMI, len(*in)) diff --git a/pkg/aws/sdk.go b/pkg/aws/sdk.go index b6449125714b..b807b7922761 100644 --- a/pkg/aws/sdk.go +++ b/pkg/aws/sdk.go @@ -27,6 +27,7 @@ import ( ) type EC2API interface { + DescribeCapacityReservations(context.Context, *ec2.DescribeCapacityReservationsInput, ...func(*ec2.Options)) (*ec2.DescribeCapacityReservationsOutput, error) DescribeImages(context.Context, *ec2.DescribeImagesInput, ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) DescribeLaunchTemplates(context.Context, *ec2.DescribeLaunchTemplatesInput, ...func(*ec2.Options)) (*ec2.DescribeLaunchTemplatesOutput, error) DescribeSubnets(context.Context, *ec2.DescribeSubnetsInput, ...func(*ec2.Options)) (*ec2.DescribeSubnetsOutput, error) diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index ce13687cb550..1fda37b6da03 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -27,8 +27,12 @@ const ( // UnavailableOfferingsTTL is the time before offerings that were marked as unavailable // are removed from the cache and are available for launch again UnavailableOfferingsTTL = 3 * time.Minute - // InstanceTypesAndZonesTTL is the time before we refresh instance types and zones at EC2 - InstanceTypesAndZonesTTL = 5 * time.Minute + // CapacityReservationAvailabilityTTL is the time we will persist cached capacity availability. Nominally, this is + // updated every minute, but we want to persist the data longer in the event of an EC2 API outage. 24 hours was the + // compormise made for API outage reseliency and gargage collecting entries for orphaned reservations. + CapacityReservationAvailabilityTTL = 24 * time.Hour + // InstanceTypesZonesAndOfferingsTTL is the time before we refresh instance types, zones, and offerings at EC2 + InstanceTypesZonesAndOfferingsTTL = 5 * time.Minute // InstanceProfileTTL is the time before we refresh checking instance profile existence at IAM InstanceProfileTTL = 15 * time.Minute // AvailableIPAddressTTL is time to drop AvailableIPAddress data if it is not updated within the TTL diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 6bb838cd67e5..2ad32737c96f 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -46,6 +46,7 @@ import ( cloudproviderevents "github.com/aws/karpenter-provider-aws/pkg/cloudprovider/events" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" @@ -59,21 +60,30 @@ type CloudProvider struct { kubeClient client.Client recorder events.Recorder - instanceTypeProvider instancetype.Provider - instanceProvider instance.Provider - amiProvider amifamily.Provider - securityGroupProvider securitygroup.Provider + instanceTypeProvider instancetype.Provider + instanceProvider instance.Provider + amiProvider amifamily.Provider + securityGroupProvider securitygroup.Provider + capacityReservationProvider capacityreservation.Provider } -func New(instanceTypeProvider instancetype.Provider, instanceProvider instance.Provider, recorder events.Recorder, - kubeClient client.Client, amiProvider amifamily.Provider, securityGroupProvider securitygroup.Provider) *CloudProvider { +func New( + instanceTypeProvider instancetype.Provider, + instanceProvider instance.Provider, + recorder events.Recorder, + kubeClient client.Client, + amiProvider amifamily.Provider, + securityGroupProvider securitygroup.Provider, + capacityReservationProvider capacityreservation.Provider, +) *CloudProvider { return &CloudProvider{ - instanceTypeProvider: instanceTypeProvider, - instanceProvider: instanceProvider, - kubeClient: kubeClient, - amiProvider: amiProvider, - securityGroupProvider: securityGroupProvider, - recorder: recorder, + instanceTypeProvider: instanceTypeProvider, + instanceProvider: instanceProvider, + kubeClient: kubeClient, + amiProvider: amiProvider, + securityGroupProvider: securityGroupProvider, + capacityReservationProvider: capacityReservationProvider, + recorder: recorder, } } @@ -111,6 +121,9 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim) if err != nil { return nil, fmt.Errorf("creating instance, %w", err) } + if instance.CapacityType == karpv1.CapacityTypeReserved { + c.capacityReservationProvider.MarkLaunched(instance.CapacityReservationID) + } instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool { return i.Name == string(instance.Type) }) @@ -189,7 +202,11 @@ func (c *CloudProvider) Delete(ctx context.Context, nodeClaim *karpv1.NodeClaim) return fmt.Errorf("getting instance ID, %w", err) } ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("id", id)) - return c.instanceProvider.Delete(ctx, id) + err = c.instanceProvider.Delete(ctx, id) + if id := nodeClaim.Labels[cloudprovider.ReservationIDLabel]; id != "" && cloudprovider.IsNodeClaimNotFoundError(err) { + c.capacityReservationProvider.MarkTerminated(id) + } + return err } func (c *CloudProvider) DisruptionReasons() []karpv1.DisruptionReason { @@ -399,6 +416,9 @@ func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType * } } labels[karpv1.CapacityTypeLabelKey] = i.CapacityType + if i.CapacityType == karpv1.CapacityTypeReserved { + labels[cloudprovider.ReservationIDLabel] = i.CapacityReservationID + } if v, ok := i.Tags[karpv1.NodePoolLabelKey]; ok { labels[karpv1.NodePoolLabelKey] = v } diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index 0f58e6d06b26..dd04549f4b9c 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -32,10 +32,11 @@ import ( ) const ( - AMIDrift cloudprovider.DriftReason = "AMIDrift" - SubnetDrift cloudprovider.DriftReason = "SubnetDrift" - SecurityGroupDrift cloudprovider.DriftReason = "SecurityGroupDrift" - NodeClassDrift cloudprovider.DriftReason = "NodeClassDrift" + AMIDrift cloudprovider.DriftReason = "AMIDrift" + SubnetDrift cloudprovider.DriftReason = "SubnetDrift" + SecurityGroupDrift cloudprovider.DriftReason = "SecurityGroupDrift" + CapacityReservationDrift cloudprovider.DriftReason = "CapacityReservationDrift" + NodeClassDrift cloudprovider.DriftReason = "NodeClassDrift" ) func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *karpv1.NodeClaim, nodePool *karpv1.NodePool, nodeClass *v1.EC2NodeClass) (cloudprovider.DriftReason, error) { @@ -59,7 +60,13 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *karpv if err != nil { return "", fmt.Errorf("calculating subnet drift, %w", err) } - drifted := lo.FindOrElse([]cloudprovider.DriftReason{amiDrifted, securitygroupDrifted, subnetDrifted}, "", func(i cloudprovider.DriftReason) bool { + capacityReservationsDrifted := c.isCapacityReservationDrifted(instance, nodeClass) + drifted := lo.FindOrElse([]cloudprovider.DriftReason{ + amiDrifted, + securitygroupDrifted, + subnetDrifted, + capacityReservationsDrifted, + }, "", func(i cloudprovider.DriftReason) bool { return string(i) != "" }) return drifted, nil @@ -119,6 +126,19 @@ func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, return "", nil } +// Checks if capacity reservations are drifted, by comparing the capacity reservations persisted to the NodeClass to +// the instance's capacity reservation. +// NOTE: We handle drift dynamically for capacity reservations rather than relying on the offerings inducing drift since +// a reserved instance may fall back to on-demand. Relying on offerings could result in drift occurring before fallback +// would cancel it out. +func (c *CloudProvider) isCapacityReservationDrifted(instance *instance.Instance, nodeClass *v1.EC2NodeClass) cloudprovider.DriftReason { + capacityReservationIDs := sets.New(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { return cr.ID })...) + if instance.CapacityReservationID != "" && !capacityReservationIDs.Has(instance.CapacityReservationID) { + return CapacityReservationDrift + } + return "" +} + func (c *CloudProvider) areStaticFieldsDrifted(nodeClaim *karpv1.NodeClaim, nodeClass *v1.EC2NodeClass) cloudprovider.DriftReason { nodeClassHash, foundNodeClassHash := nodeClass.Annotations[v1.AnnotationEC2NodeClassHash] nodeClassHashVersion, foundNodeClassHashVersion := nodeClass.Annotations[v1.AnnotationEC2NodeClassHashVersion] diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 9e055ee0f194..9237b192d444 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -80,15 +80,18 @@ func TestAWS(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(test.RemoveNodeClassTagValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + env = coretest.NewEnvironment( + coretest.WithCRDs(test.DisableCapacityReservationIDValidation(test.RemoveNodeClassTagValidation(apis.CRDs))...), + coretest.WithCRDs(v1alpha1.CRDs...), + ) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) fakeClock = clock.NewFakeClock(time.Now()) recorder = events.NewRecorder(&record.FakeRecorder{}) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder, - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster, fakeClock) }) @@ -99,7 +102,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) cluster.Reset() @@ -870,6 +873,31 @@ var _ = Describe("CloudProvider", func() { Expect(err).ToNot(HaveOccurred()) Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift)) }) + It("should dynamically drift nodeclaims for capacity reservations", func() { + nodeClass.Status.CapacityReservations = []v1.CapacityReservation{ + { + AvailabilityZone: "test-zone-1a", + ID: "cr-foo", + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + InstanceType: "m5.large", + OwnerID: "012345678901", + }, + } + setReservationID := func(id string) { + out := awsEnv.EC2API.DescribeInstancesBehavior.Output.Clone() + out.Reservations[0].Instances[0].CapacityReservationId = lo.ToPtr(id) + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(out) + } + setReservationID("cr-foo") + ExpectApplied(ctx, env.Client, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).ToNot(HaveOccurred()) + Expect(isDrifted).To(Equal(corecloudprovider.DriftReason(""))) + setReservationID("cr-bar") + isDrifted, err = cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).ToNot(HaveOccurred()) + Expect(isDrifted).To(Equal(cloudprovider.CapacityReservationDrift)) + }) It("should not return drifted if the security groups match", func() { isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) Expect(err).ToNot(HaveOccurred()) @@ -1158,7 +1186,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(100), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) + controller := nodeclass.NewController(ctx, awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectApplied(ctx, env.Client, nodePool, nodeClass) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{corev1.LabelTopologyZone: "test-zone-1a"}}) @@ -1175,7 +1203,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(11), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) + controller := nodeclass.NewController(ctx, awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{ MaxPods: aws.Int32(1), } @@ -1216,7 +1244,7 @@ var _ = Describe("CloudProvider", func() { }}) nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) + controller := nodeclass.NewController(ctx, awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) podSubnet1 := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet1) @@ -1343,4 +1371,68 @@ var _ = Describe("CloudProvider", func() { Expect(lo.Keys(cloudProviderNodeClaim.Status.Allocatable)).ToNot(ContainElement(v1.ResourceEFA)) }) }) + Context("Capacity Reservations", func() { + var reservationID string + BeforeEach(func() { + reservationID = "cr-m5.large-1a-1" + cr := ec2types.CapacityReservation{ + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr(reservationID), + AvailableInstanceCount: lo.ToPtr[int32](10), + State: ec2types.CapacityReservationStateActive, + } + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount(reservationID, 10) + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: []ec2types.CapacityReservation{cr}, + }) + nodeClass.Status.CapacityReservations = []v1.CapacityReservation{ + lo.Must(nodeclass.CapacityReservationFromEC2(&cr)), + } + nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }}} + }) + It("should mark capacity reservations as launched", func() { + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, nodePool, nodeClass, pod) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(reservationID)).To(Equal(9)) + }) + It("should mark capacity reservations as terminated", func() { + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, nodePool, nodeClass, pod) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + ncs := ExpectNodeClaims(ctx, env.Client) + Expect(ncs).To(HaveLen(1)) + + // Attempt the first delete - since the instance still exists we shouldn't increment the availability count + err := cloudProvider.Delete(ctx, ncs[0]) + Expect(corecloudprovider.IsNodeClaimNotFoundError(err)).To(BeFalse()) + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(reservationID)).To(Equal(9)) + + // Attempt again after clearing the instance from the EC2 output. Now that we get a NotFound error, expect + // availability to be incremented. + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(&ec2.DescribeInstancesOutput{}) + err = cloudProvider.Delete(ctx, ncs[0]) + Expect(corecloudprovider.IsNodeClaimNotFoundError(err)).To(BeTrue()) + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(reservationID)).To(Equal(10)) + }) + It("should include capacity reservation labels", func() { + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, nodePool, nodeClass, pod) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + ncs := ExpectNodeClaims(ctx, env.Client) + Expect(ncs).To(HaveLen(1)) + Expect(ncs[0].Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(ncs[0].Labels).To(HaveKeyWithValue(corecloudprovider.ReservationIDLabel, reservationID)) + }) + }) }) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index a9de8ba97fa9..509ed647f48d 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -34,6 +34,7 @@ import ( controllerspricing "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/pricing" ssminvalidation "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/ssm/invalidation" controllersversion "github.com/aws/karpenter-provider-aws/pkg/controllers/providers/version" + capacityreservationprovider "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/version" @@ -46,6 +47,7 @@ import ( awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/controllers/interruption" + "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclaim/capacityreservation" nodeclaimgarbagecollection "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclaim/garbagecollection" nodeclaimtagging "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclaim/tagging" "github.com/aws/karpenter-provider-aws/pkg/operator/options" @@ -78,10 +80,12 @@ func NewControllers( amiProvider amifamily.Provider, launchTemplateProvider launchtemplate.Provider, versionProvider *version.DefaultProvider, - instanceTypeProvider *instancetype.DefaultProvider) []controller.Controller { + instanceTypeProvider *instancetype.DefaultProvider, + capacityReservationProvider capacityreservationprovider.Provider, +) []controller.Controller { controllers := []controller.Controller{ nodeclasshash.NewController(kubeClient), - nodeclass.NewController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider, ec2api), + nodeclass.NewController(ctx, clk, kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider, capacityReservationProvider, ec2api), nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider), nodeclaimtagging.NewController(kubeClient, cloudProvider, instanceProvider), controllerspricing.NewController(pricingProvider), @@ -90,6 +94,7 @@ func NewControllers( ssminvalidation.NewController(ssmCache, amiProvider), status.NewController[*v1.EC2NodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter"), status.EmitDeprecatedMetrics), controllersversion.NewController(versionProvider, versionProvider.UpdateVersionWithValidation), + capacityreservation.NewController(kubeClient, cloudProvider), } if options.FromContext(ctx).InterruptionQueue != "" { sqsapi := servicesqs.NewFromConfig(cfg) diff --git a/pkg/controllers/interruption/suite_test.go b/pkg/controllers/interruption/suite_test.go index 042131d03164..52850e9d28d9 100644 --- a/pkg/controllers/interruption/suite_test.go +++ b/pkg/controllers/interruption/suite_test.go @@ -91,7 +91,7 @@ var _ = BeforeSuite(func() { sqsapi = &fake.SQSAPI{} sqsProvider = lo.Must(sqs.NewDefaultProvider(sqsapi, fmt.Sprintf("https://sqs.%s.amazonaws.com/%s/test-cluster", fake.DefaultRegion, fake.DefaultAccount))) cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) controller = interruption.NewController(env.Client, cloudProvider, fakeClock, events.NewRecorder(&record.FakeRecorder{}), sqsProvider, unavailableOfferingsCache) }) @@ -100,7 +100,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) unavailableOfferingsCache.Flush() sqsapi.Reset() }) diff --git a/pkg/controllers/nodeclaim/capacityreservation/controller.go b/pkg/controllers/nodeclaim/capacityreservation/controller.go new file mode 100644 index 000000000000..ada90ef2c46c --- /dev/null +++ b/pkg/controllers/nodeclaim/capacityreservation/controller.go @@ -0,0 +1,153 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation + +import ( + "context" + "fmt" + "time" + + "github.com/awslabs/operatorpkg/singleton" + "github.com/samber/lo" + "go.uber.org/multierr" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + controllerruntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/operator/injection" + nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" +) + +type Controller struct { + cp cloudprovider.CloudProvider + kubeClient client.Client +} + +func NewController(kubeClient client.Client, cp cloudprovider.CloudProvider) *Controller { + return &Controller{ + cp: cp, + kubeClient: kubeClient, + } +} + +func (*Controller) Name() string { + return "nodeclaim.capacityreservation" +} + +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named(c.Name()). + WatchesRawSource(singleton.Source()). + Complete(singleton.AsReconciler(c)) +} + +func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { + ctx = injection.WithControllerName(ctx, c.Name()) + cpNodeClaims, err := c.cp.List(ctx) + if err != nil { + return reconcile.Result{}, fmt.Errorf("listing instance types, %w", err) + } + providerIDsToCPNodeClaims := lo.SliceToMap(cpNodeClaims, func(nc *karpv1.NodeClaim) (string, *karpv1.NodeClaim) { + return nc.Status.ProviderID, nc + }) + ncs := &karpv1.NodeClaimList{} + if err := c.kubeClient.List(ctx, ncs); err != nil { + return reconcile.Result{}, fmt.Errorf("listing nodeclaims, %w", err) + } + updatedNodeClaims := sets.New[string]() + var errs []error + for i := range ncs.Items { + cpNC, ok := providerIDsToCPNodeClaims[ncs.Items[i].Status.ProviderID] + if !ok { + continue + } + updated, err := c.syncCapacityType(ctx, cpNC.Labels[karpv1.CapacityTypeLabelKey], &ncs.Items[i]) + if err != nil { + errs = append(errs, err) + } + if updated { + updatedNodeClaims.Insert(ncs.Items[i].Name) + } + } + if len(updatedNodeClaims) != 0 { + log.FromContext(ctx).WithValues("NodeClaims", lo.Map(updatedNodeClaims.UnsortedList(), func(name string, _ int) klog.ObjectRef { + return klog.KRef("", name) + })).V(1).Info("updated capacity type for nodeclaims") + } + if len(errs) != 0 { + if lo.EveryBy(errs, func(err error) bool { return errors.IsConflict(err) }) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, multierr.Combine(errs...) + } + return reconcile.Result{RequeueAfter: time.Minute}, nil +} + +// syncCapacityType will update the capacity type for the given NodeClaim. This accounts for the fact that capacity +// reservations will expire, demoting NodeClaims with capacity type "reserved" to "on-demand". +func (c *Controller) syncCapacityType(ctx context.Context, capacityType string, nc *karpv1.NodeClaim) (bool, error) { + // We won't be able to sync deleting NodeClaims, and there's no real need to either as they're already draining. + if !nc.DeletionTimestamp.IsZero() { + return false, nil + } + + // For now we only account for the case where a reserved NodeClaim becomes an on-demand NodeClaim. This does not + // account for on-demand NodeClaims being promoted to reserved since that is not natively supported by Karpenter. + if capacityType != karpv1.CapacityTypeOnDemand { + return false, nil + } + if nc.Labels[karpv1.CapacityTypeLabelKey] == karpv1.CapacityTypeReserved { + stored := nc.DeepCopy() + nc.Labels[karpv1.CapacityTypeLabelKey] = karpv1.CapacityTypeOnDemand + delete(nc.Labels, cloudprovider.ReservationIDLabel) + if err := c.kubeClient.Patch(ctx, nc, client.MergeFrom(stored)); client.IgnoreNotFound(err) != nil { + return false, fmt.Errorf("patching nodeclaim %q, %w", nc.Name, err) + } + } + + // If the reservation expired before the NodeClaim became registered, there may not be a Node on the cluster. Note + // that there should never be duplicate Nodes for a given NodeClaim, but handling this user-induced error is more + // straightforward than handling the duplicate error. + nodes, err := nodeclaimutils.AllNodesForNodeClaim(ctx, c.kubeClient, nc) + if err != nil { + return false, fmt.Errorf("listing nodes for nodeclaim %q, %w", nc.Name, err) + } + for _, n := range nodes { + if !n.DeletionTimestamp.IsZero() { + continue + } + // Skip Nodes which haven't been registered since we still may not have synced labels. We'll get it on the next + // iteration. + if n.Labels[karpv1.NodeRegisteredLabelKey] != "true" { + continue + } + if n.Labels[karpv1.CapacityTypeLabelKey] != karpv1.CapacityTypeReserved { + continue + } + stored := n.DeepCopy() + n.Labels[karpv1.CapacityTypeLabelKey] = karpv1.CapacityTypeOnDemand + delete(n.Labels, cloudprovider.ReservationIDLabel) + if err := c.kubeClient.Patch(ctx, n, client.MergeFrom(stored)); client.IgnoreNotFound(err) != nil { + return false, fmt.Errorf("patching node %q, %w", n.Name, err) + } + } + return true, nil +} diff --git a/pkg/controllers/nodeclaim/capacityreservation/suite_test.go b/pkg/controllers/nodeclaim/capacityreservation/suite_test.go new file mode 100644 index 000000000000..49d8ef9fb98d --- /dev/null +++ b/pkg/controllers/nodeclaim/capacityreservation/suite_test.go @@ -0,0 +1,157 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation_test + +import ( + "context" + "fmt" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/events" + coretest "sigs.k8s.io/karpenter/pkg/test" + + "github.com/aws/karpenter-provider-aws/pkg/apis" + "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" + "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclaim/capacityreservation" + "github.com/aws/karpenter-provider-aws/pkg/fake" + "github.com/aws/karpenter-provider-aws/pkg/operator/options" + "github.com/aws/karpenter-provider-aws/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + . "sigs.k8s.io/karpenter/pkg/test/expectations" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + . "sigs.k8s.io/karpenter/pkg/utils/testing" +) + +var ctx context.Context +var stop context.CancelFunc +var env *coretest.Environment +var awsEnv *test.Environment +var controller *capacityreservation.Controller + +func TestAWS(t *testing.T) { + ctx = TestContextWithLogger(t) + RegisterFailHandler(Fail) + RunSpecs(t, "SSM Invalidation Controller") +} + +var _ = BeforeSuite(func() { + env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeProviderIDFieldIndexer(ctx))) + ctx = options.ToContext(ctx, test.Options()) + ctx, stop = context.WithCancel(ctx) + awsEnv = test.NewEnvironment(ctx, env) + + cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) + controller = capacityreservation.NewController(env.Client, cloudProvider) +}) + +var _ = AfterSuite(func() { + stop() + Expect(env.Stop()).To(Succeed(), "Failed to stop environment") +}) + +var _ = Describe("Capacity Reservation NodeClaim Controller", func() { + var nodeClaim *karpv1.NodeClaim + var node *corev1.Node + var reservationID string + BeforeEach(func() { + reservationID = "cr-foo" + instance := ec2types.Instance{ + ImageId: lo.ToPtr(fake.ImageID()), + InstanceType: ec2types.InstanceType("m5.large"), + SubnetId: lo.ToPtr(fake.SubnetID()), + SpotInstanceRequestId: nil, + State: &ec2types.InstanceState{ + Name: ec2types.InstanceStateNameRunning, + }, + InstanceId: lo.ToPtr(fake.InstanceID()), + CapacityReservationId: &reservationID, + Placement: &ec2types.Placement{ + AvailabilityZone: lo.ToPtr("test-zone-1a"), + }, + SecurityGroups: []ec2types.GroupIdentifier{{GroupId: lo.ToPtr(fake.SecurityGroupID())}}, + } + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(&ec2.DescribeInstancesOutput{ + Reservations: []ec2types.Reservation{{Instances: []ec2types.Instance{instance}}}, + }) + + nodeClaim = coretest.NodeClaim(karpv1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + karpv1.CapacityTypeLabelKey: karpv1.CapacityTypeReserved, + corecloudprovider.ReservationIDLabel: reservationID, + karpv1.NodeRegisteredLabelKey: "true", + }, + }, + Status: karpv1.NodeClaimStatus{ + ProviderID: fmt.Sprintf("aws:///test-zone-1a/%s", *instance.InstanceId), + }, + }) + node = coretest.NodeClaimLinkedNode(nodeClaim) + }) + It("should demote nodeclaims and nodes from reserved to on-demand", func() { + ExpectApplied(ctx, env.Client, nodeClaim, node) + ExpectSingletonReconciled(ctx, controller) + + // Since the backing instance is still under a capacity reservation, we shouldn't demote the nodeclaim or node + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(corecloudprovider.ReservationIDLabel, reservationID)) + node = ExpectExists(ctx, env.Client, node) + Expect(node.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(node.Labels).To(HaveKeyWithValue(corecloudprovider.ReservationIDLabel, reservationID)) + + out := awsEnv.EC2API.DescribeInstancesBehavior.Output.Clone() + out.Reservations[0].Instances[0].CapacityReservationId = nil + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(out) + + // Now that the backing instance is no longer part of a capacity reservation, we should demote the resources by + // updating the capacity type to on-demand and removing the reservation ID label. + ExpectSingletonReconciled(ctx, controller) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + Expect(nodeClaim.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) + node = ExpectExists(ctx, env.Client, node) + Expect(node.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + Expect(node.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) + }) + It("should demote nodes from reserved to on-demand even if their nodeclaim was demoted previously", func() { + out := awsEnv.EC2API.DescribeInstancesBehavior.Output.Clone() + out.Reservations[0].Instances[0].CapacityReservationId = nil + awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(out) + + ExpectApplied(ctx, env.Client, nodeClaim) + ExpectSingletonReconciled(ctx, controller) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + Expect(nodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + Expect(nodeClaim.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) + + ExpectApplied(ctx, env.Client, node) + ExpectSingletonReconciled(ctx, controller) + node = ExpectExists(ctx, env.Client, node) + Expect(node.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + Expect(node.Labels).ToNot(HaveKey(corecloudprovider.ReservationIDLabel)) + }) +}) diff --git a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go index db2b76bf6f2b..2ab7dca2c3f7 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go +++ b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go @@ -65,7 +65,7 @@ var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) garbageCollectionController = garbagecollection.NewController(env.Client, cloudProvider) }) diff --git a/pkg/controllers/nodeclaim/tagging/suite_test.go b/pkg/controllers/nodeclaim/tagging/suite_test.go index 5f284108543f..783cc8710af0 100644 --- a/pkg/controllers/nodeclaim/tagging/suite_test.go +++ b/pkg/controllers/nodeclaim/tagging/suite_test.go @@ -60,11 +60,11 @@ func TestAPIs(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) taggingController = tagging.NewController(env.Client, cloudProvider, awsEnv.InstanceProvider) }) var _ = AfterSuite(func() { diff --git a/pkg/controllers/nodeclass/capacityreservation.go b/pkg/controllers/nodeclass/capacityreservation.go new file mode 100644 index 000000000000..a7ee60275b9e --- /dev/null +++ b/pkg/controllers/nodeclass/capacityreservation.go @@ -0,0 +1,140 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeclass + +import ( + "context" + "fmt" + "sort" + "time" + + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/awslabs/operatorpkg/singleton" + "github.com/samber/lo" + "go.uber.org/multierr" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/clock" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/karpenter/pkg/utils/pretty" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" +) + +const capacityReservationPollPeriod = time.Minute + +type CapacityReservation struct { + provider capacityreservation.Provider + clk clock.Clock + cm *pretty.ChangeMonitor +} + +func NewCapacityReservationReconciler(clk clock.Clock, provider capacityreservation.Provider) *CapacityReservation { + return &CapacityReservation{ + provider: provider, + clk: clk, + cm: pretty.NewChangeMonitor(), + } +} + +func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass) (reconcile.Result, error) { + reservations, err := c.provider.List(ctx, nc.Spec.CapacityReservationSelectorTerms...) + if err != nil { + return reconcile.Result{}, fmt.Errorf("getting capacity reservations, %w", err) + } + if len(reservations) == 0 { + nc.Status.CapacityReservations = nil + nc.StatusConditions().SetTrue(v1.ConditionTypeCapacityReservationsReady) + return reconcile.Result{RequeueAfter: capacityReservationPollPeriod}, nil + } + + if ids := lo.Map(reservations, func(r *ec2types.CapacityReservation, _ int) string { + return *r.CapacityReservationId + }); c.cm.HasChanged(nc.Name, ids) { + log.FromContext(ctx).V(1).WithValues("ids", ids).Info("discovered capacity reservations") + } + sort.Slice(reservations, func(i, j int) bool { + return *reservations[i].CapacityReservationId < *reservations[j].CapacityReservationId + }) + errors := []error{} + nc.Status.CapacityReservations = []v1.CapacityReservation{} + for _, r := range reservations { + reservation, err := CapacityReservationFromEC2(r) + if err != nil { + errors = append(errors, err) + continue + } + nc.Status.CapacityReservations = append(nc.Status.CapacityReservations, reservation) + } + if len(errors) != 0 { + log.FromContext(ctx).WithValues( + "error-count", len(errors), + "total-count", len(reservations), + ).Error(multierr.Combine(errors...), "failed to parse discovered capacity reservations") + } + nc.StatusConditions().SetTrue(v1.ConditionTypeCapacityReservationsReady) + return reconcile.Result{RequeueAfter: c.requeueAfter(reservations...)}, nil +} + +func CapacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityReservation, error) { + // Guard against new instance match criteria added in the future. See https://github.com/kubernetes-sigs/karpenter/issues/806 + // for a similar issue. + if !lo.Contains([]ec2types.InstanceMatchCriteria{ + ec2types.InstanceMatchCriteriaOpen, + ec2types.InstanceMatchCriteriaTargeted, + }, cr.InstanceMatchCriteria) { + return v1.CapacityReservation{}, fmt.Errorf("capacity reservation %s has an unsupported instance match criteria %q", *cr.CapacityReservationId, cr.InstanceMatchCriteria) + } + var endTime *metav1.Time + if cr.EndDate != nil { + endTime = lo.ToPtr(metav1.NewTime(*cr.EndDate)) + } + + return v1.CapacityReservation{ + AvailabilityZone: *cr.AvailabilityZone, + EndTime: endTime, + ID: *cr.CapacityReservationId, + InstanceMatchCriteria: string(cr.InstanceMatchCriteria), + InstanceType: *cr.InstanceType, + OwnerID: *cr.OwnerId, + }, nil +} + +// requeueAfter determines the duration until the next target reconciliation time based on the provided reservations. If +// any reservations are expected to expire before we would typically requeue, the duration will be based on the +// nearest expiration time. +func (c *CapacityReservation) requeueAfter(reservations ...*ec2types.CapacityReservation) time.Duration { + var next *time.Time + for _, reservation := range reservations { + if reservation.EndDate == nil { + continue + } + if next == nil { + next = reservation.EndDate + continue + } + if next.After(*reservation.EndDate) { + next = reservation.EndDate + } + } + if next == nil { + return capacityReservationPollPeriod + } + if d := next.Sub(c.clk.Now()); d < capacityReservationPollPeriod { + return lo.Ternary(d < 0, singleton.RequeueImmediately, d) + } + return capacityReservationPollPeriod +} diff --git a/pkg/controllers/nodeclass/capacityreservation_test.go b/pkg/controllers/nodeclass/capacityreservation_test.go new file mode 100644 index 000000000000..f8909b2b8cf9 --- /dev/null +++ b/pkg/controllers/nodeclass/capacityreservation_test.go @@ -0,0 +1,174 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeclass_test + +import ( + "time" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + . "sigs.k8s.io/karpenter/pkg/test/expectations" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/utils" +) + +const selfOwnerID = "012345678901" +const altOwnerID = "123456789012" + +var discoveryTags = map[string]string{ + "karpenter.sh/discovery": "test", +} + +var _ = Describe("NodeClass Capacity Reservation Reconciler", func() { + BeforeEach(func() { + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr(selfOwnerID), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), + AvailableInstanceCount: lo.ToPtr[int32](10), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr(selfOwnerID), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-2"), + AvailableInstanceCount: lo.ToPtr[int32](10), + Tags: utils.MergeTags(discoveryTags), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr(selfOwnerID), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1b-1"), + AvailableInstanceCount: lo.ToPtr[int32](15), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr(altOwnerID), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1b-2"), + AvailableInstanceCount: lo.ToPtr[int32](15), + Tags: utils.MergeTags(discoveryTags), + State: ec2types.CapacityReservationStateActive, + }, + }, + }) + }) + It("should resolve capacity reservations by ID", func() { + const targetID = "cr-m5.large-1a-1" + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + ID: targetID, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(1)) + Expect(nodeClass.Status.CapacityReservations[0]).To(Equal(v1.CapacityReservation{ + ID: targetID, + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + OwnerID: selfOwnerID, + InstanceType: "m5.large", + AvailabilityZone: "test-zone-1a", + EndTime: nil, + })) + }) + It("should resolve capacity reservations by tags", func() { + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + Tags: discoveryTags, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(2)) + Expect(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { + return cr.ID + })).To(ContainElements("cr-m5.large-1a-2", "cr-m5.large-1b-2")) + }) + It("should resolve capacity reservations by tags + owner", func() { + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + Tags: discoveryTags, + OwnerID: selfOwnerID, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(1)) + Expect(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { + return cr.ID + })).To(ContainElements("cr-m5.large-1a-2")) + }) + It("should exclude expired capacity reservations", func() { + out := awsEnv.EC2API.DescribeCapacityReservationsOutput.Clone() + targetReservationID := *out.CapacityReservations[0].CapacityReservationId + out.CapacityReservations[0].EndDate = lo.ToPtr(awsEnv.Clock.Now().Add(time.Hour)) + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(out) + + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + ID: targetReservationID, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(1)) + Expect(lo.Map(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) string { + return cr.ID + })).To(ContainElements(targetReservationID)) + + awsEnv.Clock.Step(2 * time.Hour) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(0)) + }) + DescribeTable( + "should exclude non-active capacity reservations", + func(state ec2types.CapacityReservationState) { + out := awsEnv.EC2API.DescribeCapacityReservationsOutput.Clone() + targetReservationID := *out.CapacityReservations[0].CapacityReservationId + out.CapacityReservations[0].State = state + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(out) + + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + ID: targetReservationID, + }) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeCapacityReservationsReady).IsTrue()).To(BeTrue()) + Expect(nodeClass.Status.CapacityReservations).To(HaveLen(0)) + }, + lo.FilterMap(ec2types.CapacityReservationStateActive.Values(), func(state ec2types.CapacityReservationState, _ int) (TableEntry, bool) { + return Entry(string(state), state), state != ec2types.CapacityReservationStateActive + }), + ) +}) diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index ff56928d7f94..842599cb9850 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -21,7 +21,9 @@ import ( "go.uber.org/multierr" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/utils/clock" "sigs.k8s.io/karpenter/pkg/operator/injection" + "sigs.k8s.io/karpenter/pkg/operator/options" nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" "sigs.k8s.io/karpenter/pkg/utils/result" @@ -46,42 +48,48 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" sdk "github.com/aws/karpenter-provider-aws/pkg/aws" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" ) -type nodeClassReconciler interface { - Reconcile(context.Context, *v1.EC2NodeClass) (reconcile.Result, error) -} - type Controller struct { - kubeClient client.Client - recorder events.Recorder - launchTemplateProvider launchtemplate.Provider - - ami *AMI - instanceProfile *InstanceProfile - subnet *Subnet - securityGroup *SecurityGroup - validation *Validation - readiness *Readiness //TODO : Remove this when we have sub status conditions + kubeClient client.Client + recorder events.Recorder + launchTemplateProvider launchtemplate.Provider + instanceProfileProvider instanceprofile.Provider + reconcilers []reconcile.TypedReconciler[*v1.EC2NodeClass] } -func NewController(kubeClient client.Client, recorder events.Recorder, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, - amiProvider amifamily.Provider, instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider, ec2api sdk.EC2API) *Controller { - +func NewController( + ctx context.Context, + clk clock.Clock, + kubeClient client.Client, + recorder events.Recorder, + subnetProvider subnet.Provider, + securityGroupProvider securitygroup.Provider, + amiProvider amifamily.Provider, + instanceProfileProvider instanceprofile.Provider, + launchTemplateProvider launchtemplate.Provider, + capacityReservationProvider capacityreservation.Provider, + ec2api sdk.EC2API, +) *Controller { return &Controller{ - kubeClient: kubeClient, - recorder: recorder, - launchTemplateProvider: launchTemplateProvider, - ami: NewAMIReconciler(amiProvider), - subnet: &Subnet{subnetProvider: subnetProvider}, - securityGroup: &SecurityGroup{securityGroupProvider: securityGroupProvider}, - instanceProfile: &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, - validation: &Validation{ec2api: ec2api, amiProvider: amiProvider}, - readiness: &Readiness{launchTemplateProvider: launchTemplateProvider}, + kubeClient: kubeClient, + recorder: recorder, + launchTemplateProvider: launchTemplateProvider, + instanceProfileProvider: instanceProfileProvider, + reconcilers: []reconcile.TypedReconciler[*v1.EC2NodeClass]{ + NewAMIReconciler(amiProvider), + NewCapacityReservationReconciler(clk, capacityReservationProvider), + &Subnet{subnetProvider: subnetProvider}, + &SecurityGroup{securityGroupProvider: securityGroupProvider}, + &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, + &Validation{ec2api: ec2api, amiProvider: amiProvider}, + &Readiness{launchTemplateProvider: launchTemplateProvider}, + }, } } @@ -89,6 +97,7 @@ func (c *Controller) Name() string { return "nodeclass" } +//nolint:gocyclo func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { ctx = injection.WithControllerName(ctx, c.Name()) @@ -114,14 +123,10 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) var results []reconcile.Result var errs error - for _, reconciler := range []nodeClassReconciler{ - c.ami, - c.subnet, - c.securityGroup, - c.instanceProfile, - c.validation, - c.readiness, - } { + for _, reconciler := range c.reconcilers { + if _, ok := reconciler.(*CapacityReservation); ok && !options.FromContext(ctx).FeatureGates.ReservedCapacity { + continue + } res, err := reconciler.Reconcile(ctx, nodeClass) errs = multierr.Append(errs, err) results = append(results, res) @@ -158,8 +163,8 @@ func (c *Controller) finalize(ctx context.Context, nodeClass *v1.EC2NodeClass) ( return reconcile.Result{RequeueAfter: time.Minute * 10}, nil // periodically fire the event } if nodeClass.Spec.Role != "" { - if _, err := c.instanceProfile.Finalize(ctx, nodeClass); err != nil { - return reconcile.Result{}, err + if err := c.instanceProfileProvider.Delete(ctx, nodeClass); err != nil { + return reconcile.Result{}, fmt.Errorf("deleting instance profile, %w", err) } } if err := c.launchTemplateProvider.DeleteAll(ctx, nodeClass); err != nil { diff --git a/pkg/controllers/nodeclass/hash/suite_test.go b/pkg/controllers/nodeclass/hash/suite_test.go index 4d21e0a6f8e3..eebd2cecabd9 100644 --- a/pkg/controllers/nodeclass/hash/suite_test.go +++ b/pkg/controllers/nodeclass/hash/suite_test.go @@ -56,7 +56,7 @@ func TestAPIs(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) @@ -68,7 +68,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) awsEnv.Reset() }) diff --git a/pkg/controllers/nodeclass/instanceprofile.go b/pkg/controllers/nodeclass/instanceprofile.go index 15402ea618ea..ab2322183e73 100644 --- a/pkg/controllers/nodeclass/instanceprofile.go +++ b/pkg/controllers/nodeclass/instanceprofile.go @@ -42,10 +42,3 @@ func (ip *InstanceProfile) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeC nodeClass.StatusConditions().SetTrue(v1.ConditionTypeInstanceProfileReady) return reconcile.Result{}, nil } - -func (ip *InstanceProfile) Finalize(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { - if err := ip.instanceProfileProvider.Delete(ctx, nodeClass); err != nil { - return reconcile.Result{}, fmt.Errorf("deleting instance profile, %w", err) - } - return reconcile.Result{}, nil -} diff --git a/pkg/controllers/nodeclass/readiness_test.go b/pkg/controllers/nodeclass/readiness_test.go index fdd5f3f95010..6891e0c93ea4 100644 --- a/pkg/controllers/nodeclass/readiness_test.go +++ b/pkg/controllers/nodeclass/readiness_test.go @@ -17,6 +17,7 @@ package nodeclass_test import ( "github.com/awslabs/operatorpkg/status" "github.com/samber/lo" + coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/test" @@ -49,13 +50,19 @@ var _ = Describe("NodeClass Status Condition Controller", func() { }, }) }) - It("should update status condition on nodeClass as Ready", func() { - ExpectApplied(ctx, env.Client, nodeClass) - ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) - nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Status.Conditions).To(HaveLen(6)) - Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsTrue()).To(BeTrue()) - }) + DescribeTable( + "should update status condition on nodeClass as Ready", + func(reservedCapacity bool) { + coreoptions.FromContext(ctx).FeatureGates.ReservedCapacity = reservedCapacity + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.Status.Conditions).To(HaveLen(lo.Ternary(reservedCapacity, 7, 6))) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsTrue()).To(BeTrue()) + }, + Entry("when reserved capacity feature flag is enabled", true), + Entry("when reserved capacity feature flag is disabled", false), + ) It("should update status condition as Not Ready", func() { nodeClass.Spec.SecurityGroupSelectorTerms = []v1.SecurityGroupSelectorTerm{ { diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 2a7e8813db9b..7bf0e2f4506b 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -61,18 +61,26 @@ func TestAPIs(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(test.RemoveNodeClassTagValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx))) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + env = coretest.NewEnvironment( + coretest.WithCRDs(test.DisableCapacityReservationIDValidation(test.RemoveNodeClassTagValidation(apis.CRDs))...), + coretest.WithCRDs(v1alpha1.CRDs...), + coretest.WithFieldIndexers(coretest.NodeClaimNodeClassRefFieldIndexer(ctx)), + ) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) controller = nodeclass.NewController( - env.Client, events.NewRecorder(&record.FakeRecorder{}), + ctx, + awsEnv.Clock, + env.Client, + events.NewRecorder(&record.FakeRecorder{}), awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, + awsEnv.CapacityReservationProvider, awsEnv.EC2API, ) }) @@ -82,7 +90,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) nodeClass = test.EC2NodeClass() awsEnv.Reset() }) diff --git a/pkg/controllers/nodeclass/validation.go b/pkg/controllers/nodeclass/validation.go index 8131b66047dc..3621e2044b89 100644 --- a/pkg/controllers/nodeclass/validation.go +++ b/pkg/controllers/nodeclass/validation.go @@ -91,7 +91,7 @@ func (n Validation) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) ( return reconcile.Result{}, nil } - createLaunchTemplateInput := launchtemplate.GetCreateLaunchTemplateInput(mockOptions(*nodeClaim, nodeClass, tags), corev1.IPv4Protocol, "") + createLaunchTemplateInput := launchtemplate.GetCreateLaunchTemplateInput(ctx, mockOptions(*nodeClaim, nodeClass, tags), corev1.IPv4Protocol, "") createLaunchTemplateInput.DryRun = aws.Bool(true) if _, err := n.ec2api.CreateLaunchTemplate(ctx, createLaunchTemplateInput); awserrors.IgnoreDryRunError(err) != nil { diff --git a/pkg/controllers/providers/instancetype/capacity/suite_test.go b/pkg/controllers/providers/instancetype/capacity/suite_test.go index 246d1656d71d..5b38a7cb23a9 100644 --- a/pkg/controllers/providers/instancetype/capacity/suite_test.go +++ b/pkg/controllers/providers/instancetype/capacity/suite_test.go @@ -70,7 +70,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...), coretest.WithFieldIndexers(coretest.NodeClaimProviderIDFieldIndexer(ctx))) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options(test.OptionsFields{ VMMemoryOverheadPercent: lo.ToPtr[float64](0.075), })) @@ -80,7 +80,7 @@ var _ = BeforeSuite(func() { nodeClaim = coretest.NodeClaim() node = coretest.Node() cloudProvider := cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) controller = controllersinstancetypecapacity.NewController(env.Client, cloudProvider, awsEnv.InstanceTypesProvider) }) diff --git a/pkg/controllers/providers/instancetype/suite_test.go b/pkg/controllers/providers/instancetype/suite_test.go index 479ee549cc0a..b07707eb5609 100644 --- a/pkg/controllers/providers/instancetype/suite_test.go +++ b/pkg/controllers/providers/instancetype/suite_test.go @@ -55,7 +55,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -68,7 +68,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() diff --git a/pkg/controllers/providers/pricing/suite_test.go b/pkg/controllers/providers/pricing/suite_test.go index e9f51d155bf2..a6261e7f7e75 100644 --- a/pkg/controllers/providers/pricing/suite_test.go +++ b/pkg/controllers/providers/pricing/suite_test.go @@ -57,7 +57,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -70,7 +70,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() diff --git a/pkg/controllers/providers/ssm/invalidation/suite_test.go b/pkg/controllers/providers/ssm/invalidation/suite_test.go index 1e99a24a1d5c..39894a65d113 100644 --- a/pkg/controllers/providers/ssm/invalidation/suite_test.go +++ b/pkg/controllers/providers/ssm/invalidation/suite_test.go @@ -53,7 +53,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -67,7 +67,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() }) diff --git a/pkg/controllers/providers/version/suite_test.go b/pkg/controllers/providers/version/suite_test.go index 005c45d6d8e4..e68a9cc96f68 100644 --- a/pkg/controllers/providers/version/suite_test.go +++ b/pkg/controllers/providers/version/suite_test.go @@ -52,7 +52,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -65,7 +65,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index d3dfe9e1c8ea..9b012888aed4 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -32,6 +32,7 @@ const ( var ( // This is not an exhaustive list, add to it as needed notFoundErrorCodes = sets.New[string]( + "InvalidCapacityReservationId.NotFound", "InvalidInstanceID.NotFound", launchTemplateNameNotFoundCode, "InvalidLaunchTemplateId.NotFound", @@ -42,6 +43,8 @@ var ( "EntityAlreadyExists", ) + reservationCapacityExceededErrorCode = "ReservationCapacityExceeded" + // unfulfillableCapacityErrorCodes signify that capacity is temporarily unable to be launched unfulfillableCapacityErrorCodes = sets.New[string]( "InsufficientInstanceCapacity", @@ -50,6 +53,7 @@ var ( "UnfulfillableCapacity", "Unsupported", "InsufficientFreeAddressesInSubnet", + reservationCapacityExceededErrorCode, ) ) @@ -128,13 +132,18 @@ func IgnoreUnauthorizedOperationError(err error) error { return err } -// IsUnfulfillableCapacity returns true if the Fleet err means -// capacity is temporarily unavailable for launching. -// This could be due to account limits, insufficient ec2 capacity, etc. +// IsUnfulfillableCapacity returns true if the Fleet err means capacity is temporarily unavailable for launching. This +// could be due to account limits, insufficient ec2 capacity, etc. func IsUnfulfillableCapacity(err ec2types.CreateFleetError) bool { return unfulfillableCapacityErrorCodes.Has(*err.ErrorCode) } +// IsReservationCapacityExceeded returns true if the fleet error means there is no remaining capacity for the provided +// capacity reservation. +func IsReservationCapacityExceeded(err ec2types.CreateFleetError) bool { + return *err.ErrorCode == reservationCapacityExceededErrorCode +} + func IsLaunchTemplateNotFound(err error) bool { if err == nil { return false diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index c6f6556de0b0..a01f05c004f6 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -38,14 +38,16 @@ import ( ) type CapacityPool struct { - CapacityType string - InstanceType string - Zone string + CapacityType string + InstanceType string + Zone string + ReservationID string } // EC2Behavior must be reset between tests otherwise tests will // pollute each other. type EC2Behavior struct { + DescribeCapacityReservationsOutput AtomicPtr[ec2.DescribeCapacityReservationsOutput] DescribeImagesOutput AtomicPtr[ec2.DescribeImagesOutput] DescribeLaunchTemplatesOutput AtomicPtr[ec2.DescribeLaunchTemplatesOutput] DescribeSubnetsOutput AtomicPtr[ec2.DescribeSubnetsOutput] @@ -63,9 +65,11 @@ type EC2Behavior struct { CreateLaunchTemplateBehavior MockedFunction[ec2.CreateLaunchTemplateInput, ec2.CreateLaunchTemplateOutput] CalledWithDescribeImagesInput AtomicPtrSlice[ec2.DescribeImagesInput] Instances sync.Map - LaunchTemplates sync.Map InsufficientCapacityPools atomic.Slice[CapacityPool] NextError AtomicError + + LaunchTemplates sync.Map + launchTemplatesToCapacityReservations sync.Map // map[lt-name]cr-id } type EC2API struct { @@ -107,6 +111,11 @@ func (e *EC2API) Reset() { }) e.InsufficientCapacityPools.Reset() e.NextError.Reset() + + e.launchTemplatesToCapacityReservations.Range(func(k, _ any) bool { + e.launchTemplatesToCapacityReservations.Delete(k) + return true + }) } // nolint: gocyclo @@ -126,7 +135,8 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . return nil, fmt.Errorf("missing launch template name") } var instanceIds []string - var skippedPools []CapacityPool + var icedPools []CapacityPool + var reservationExceededPools []CapacityPool var spotInstanceRequestID *string if string(input.TargetCapacitySpecification.DefaultTargetCapacityType) == karpv1.CapacityTypeSpot { @@ -141,7 +151,7 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . if pool.InstanceType == string(override.InstanceType) && pool.Zone == aws.ToString(override.AvailabilityZone) && pool.CapacityType == string(input.TargetCapacitySpecification.DefaultTargetCapacityType) { - skippedPools = append(skippedPools, pool) + icedPools = append(icedPools, pool) skipInstance = true return false } @@ -150,7 +160,21 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . if skipInstance { continue } - amiID := aws.String("") + + if crID, ok := e.launchTemplatesToCapacityReservations.Load(*ltc.LaunchTemplateSpecification.LaunchTemplateName); ok { + if cr, ok := lo.Find(e.DescribeCapacityReservationsOutput.Clone().CapacityReservations, func(cr ec2types.CapacityReservation) bool { + return *cr.CapacityReservationId == crID.(string) + }); !ok || *cr.AvailableInstanceCount == 0 { + reservationExceededPools = append(reservationExceededPools, CapacityPool{ + InstanceType: string(override.InstanceType), + Zone: lo.FromPtr(override.AvailabilityZone), + CapacityType: karpv1.CapacityTypeReserved, + ReservationID: crID.(string), + }) + continue + } + } + amiID := lo.ToPtr("") if e.CreateLaunchTemplateBehavior.CalledWithInput.Len() > 0 { lt := e.CreateLaunchTemplateBehavior.CalledWithInput.Pop() amiID = lt.LaunchTemplateData.ImageId @@ -192,7 +216,7 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . }, }, }} - for _, pool := range skippedPools { + for _, pool := range icedPools { result.Errors = append(result.Errors, ec2types.CreateFleetError{ ErrorCode: aws.String("InsufficientInstanceCapacity"), LaunchTemplateAndOverrides: &ec2types.LaunchTemplateAndOverridesResponse{ @@ -203,6 +227,17 @@ func (e *EC2API) CreateFleet(_ context.Context, input *ec2.CreateFleetInput, _ . }, }) } + for _, pool := range reservationExceededPools { + result.Errors = append(result.Errors, ec2types.CreateFleetError{ + ErrorCode: lo.ToPtr("ReservationCapacityExceeded"), + LaunchTemplateAndOverrides: &ec2types.LaunchTemplateAndOverridesResponse{ + Overrides: &ec2types.FleetLaunchTemplateOverrides{ + InstanceType: ec2types.InstanceType(pool.InstanceType), + AvailabilityZone: lo.ToPtr(pool.Zone), + }, + }, + }) + } return result, nil }) } @@ -242,6 +277,9 @@ func (e *EC2API) CreateLaunchTemplate(ctx context.Context, input *ec2.CreateLaun } launchTemplate := ec2types.LaunchTemplate{LaunchTemplateName: input.LaunchTemplateName} e.LaunchTemplates.Store(input.LaunchTemplateName, launchTemplate) + if crs := input.LaunchTemplateData.CapacityReservationSpecification; crs != nil && crs.CapacityReservationPreference == ec2types.CapacityReservationPreferenceCapacityReservationsOnly { + e.launchTemplatesToCapacityReservations.Store(*input.LaunchTemplateName, *crs.CapacityReservationTarget.CapacityReservationId) + } return &ec2.CreateLaunchTemplateOutput{LaunchTemplate: lo.ToPtr(launchTemplate)}, nil }) } @@ -340,6 +378,19 @@ func filterInstances(instances []ec2types.Instance, filters []ec2types.Filter) [ return ret } +func (e *EC2API) DescribeCapacityReservations(ctx context.Context, input *ec2.DescribeCapacityReservationsInput, _ ...func(*ec2.Options)) (*ec2.DescribeCapacityReservationsOutput, error) { + if !e.NextError.IsNil() { + defer e.NextError.Reset() + return nil, e.NextError.Get() + } + if !e.DescribeCapacityReservationsOutput.IsNil() { + out := e.DescribeCapacityReservationsOutput.Clone() + out.CapacityReservations = FilterDescribeCapacityReservations(out.CapacityReservations, input.CapacityReservationIds, input.Filters) + return out, nil + } + return &ec2.DescribeCapacityReservationsOutput{}, nil +} + func (e *EC2API) DescribeImages(ctx context.Context, input *ec2.DescribeImagesInput, _ ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) { if !e.NextError.IsNil() { defer e.NextError.Reset() @@ -379,7 +430,7 @@ func (e *EC2API) DescribeLaunchTemplates(_ context.Context, input *ec2.DescribeL output := &ec2.DescribeLaunchTemplatesOutput{} e.LaunchTemplates.Range(func(key, value interface{}) bool { launchTemplate := value.(ec2types.LaunchTemplate) - if lo.Contains(input.LaunchTemplateNames, lo.FromPtr(launchTemplate.LaunchTemplateName)) || len(input.Filters) != 0 && Filter(input.Filters, aws.ToString(launchTemplate.LaunchTemplateId), aws.ToString(launchTemplate.LaunchTemplateName), launchTemplate.Tags) { + if lo.Contains(input.LaunchTemplateNames, lo.FromPtr(launchTemplate.LaunchTemplateName)) || len(input.Filters) != 0 && Filter(input.Filters, aws.ToString(launchTemplate.LaunchTemplateId), aws.ToString(launchTemplate.LaunchTemplateName), "", "", launchTemplate.Tags) { output.LaunchTemplates = append(output.LaunchTemplates, launchTemplate) } return true diff --git a/pkg/fake/utils.go b/pkg/fake/utils.go index 7941b0c0b33e..539d778c689d 100644 --- a/pkg/fake/utils.go +++ b/pkg/fake/utils.go @@ -90,7 +90,7 @@ func SubnetsFromFleetRequest(createFleetInput *ec2.CreateFleetInput) []string { // Filters are chained with a logical "AND" func FilterDescribeSecurtyGroups(sgs []ec2types.SecurityGroup, filters []ec2types.Filter) []ec2types.SecurityGroup { return lo.Filter(sgs, func(group ec2types.SecurityGroup, _ int) bool { - return Filter(filters, *group.GroupId, *group.GroupName, group.Tags) + return Filter(filters, *group.GroupId, *group.GroupName, "", "", group.Tags) }) } @@ -98,29 +98,36 @@ func FilterDescribeSecurtyGroups(sgs []ec2types.SecurityGroup, filters []ec2type // Filters are chained with a logical "AND" func FilterDescribeSubnets(subnets []ec2types.Subnet, filters []ec2types.Filter) []ec2types.Subnet { return lo.Filter(subnets, func(subnet ec2types.Subnet, _ int) bool { - return Filter(filters, *subnet.SubnetId, "", subnet.Tags) + return Filter(filters, *subnet.SubnetId, "", "", "", subnet.Tags) + }) +} + +func FilterDescribeCapacityReservations(crs []ec2types.CapacityReservation, ids []string, filters []ec2types.Filter) []ec2types.CapacityReservation { + idSet := sets.New[string](ids...) + return lo.Filter(crs, func(cr ec2types.CapacityReservation, _ int) bool { + if len(ids) != 0 && !idSet.Has(*cr.CapacityReservationId) { + return false + } + return Filter(filters, *cr.CapacityReservationId, "", *cr.OwnerId, string(cr.State), cr.Tags) }) } func FilterDescribeImages(images []ec2types.Image, filters []ec2types.Filter) []ec2types.Image { return lo.Filter(images, func(image ec2types.Image, _ int) bool { - if stateFilter, ok := lo.Find(filters, func(f ec2types.Filter) bool { - return lo.FromPtr(f.Name) == "state" - }); ok { - if !lo.Contains(stateFilter.Values, string(image.State)) { - return false - } - } - return Filter(lo.Reject(filters, func(f ec2types.Filter, _ int) bool { - return lo.FromPtr(f.Name) == "state" - }), *image.ImageId, *image.Name, image.Tags) + return Filter(filters, *image.ImageId, *image.Name, "", string(image.State), image.Tags) }) } //nolint:gocyclo -func Filter(filters []ec2types.Filter, id, name string, tags []ec2types.Tag) bool { +func Filter(filters []ec2types.Filter, id, name, owner, state string, tags []ec2types.Tag) bool { return lo.EveryBy(filters, func(filter ec2types.Filter) bool { switch filterName := aws.ToString(filter.Name); { + case filterName == "state": + for _, val := range filter.Values { + if state == val { + return true + } + } case filterName == "subnet-id" || filterName == "group-id" || filterName == "image-id": for _, val := range filter.Values { if id == val { @@ -133,6 +140,12 @@ func Filter(filters []ec2types.Filter, id, name string, tags []ec2types.Tag) boo return true } } + case filterName == "owner-id": + for _, val := range filter.Values { + if owner == val { + return true + } + } case strings.HasPrefix(filterName, "tag"): if matchTags(tags, filter) { return true diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index abbb0a56d365..897239f74aaa 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -58,6 +58,7 @@ import ( awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" @@ -77,21 +78,22 @@ func init() { // Operator is injected into the AWS CloudProvider's factories type Operator struct { *operator.Operator - Config aws.Config - UnavailableOfferingsCache *awscache.UnavailableOfferings - SSMCache *cache.Cache - SubnetProvider subnet.Provider - SecurityGroupProvider securitygroup.Provider - InstanceProfileProvider instanceprofile.Provider - AMIProvider amifamily.Provider - AMIResolver amifamily.Resolver - LaunchTemplateProvider launchtemplate.Provider - PricingProvider pricing.Provider - VersionProvider *version.DefaultProvider - InstanceTypesProvider *instancetype.DefaultProvider - InstanceProvider instance.Provider - SSMProvider ssmp.Provider - EC2API *ec2.Client + Config aws.Config + UnavailableOfferingsCache *awscache.UnavailableOfferings + SSMCache *cache.Cache + SubnetProvider subnet.Provider + SecurityGroupProvider securitygroup.Provider + InstanceProfileProvider instanceprofile.Provider + AMIProvider amifamily.Provider + AMIResolver amifamily.Resolver + LaunchTemplateProvider launchtemplate.Provider + PricingProvider pricing.Provider + VersionProvider *version.DefaultProvider + InstanceTypesProvider *instancetype.DefaultProvider + InstanceProvider instance.Provider + SSMProvider ssmp.Provider + CapacityReservationProvider capacityreservation.Provider + EC2API *ec2.Client } func NewOperator(ctx context.Context, operator *operator.Operator) (context.Context, *Operator) { @@ -172,12 +174,22 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont kubeDNSIP, clusterEndpoint, ) + capacityReservationProvider := capacityreservation.NewProvider( + ec2api, + operator.Clock, + cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.CapacityReservationAvailabilityTTL, awscache.DefaultCleanupInterval), + ) instanceTypeProvider := instancetype.NewDefaultProvider( - cache.New(awscache.InstanceTypesAndZonesTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), + cache.New(awscache.InstanceTypesZonesAndOfferingsTTL, awscache.DefaultCleanupInterval), cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval), ec2api, subnetProvider, - instancetype.NewDefaultResolver(cfg.Region, pricingProvider, unavailableOfferingsCache), + pricingProvider, + capacityReservationProvider, + unavailableOfferingsCache, + instancetype.NewDefaultResolver(cfg.Region), ) instanceProvider := instance.NewDefaultProvider( ctx, @@ -186,6 +198,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont unavailableOfferingsCache, subnetProvider, launchTemplateProvider, + capacityReservationProvider, ) // Setup field indexers on instanceID -- specifically for the interruption controller @@ -193,22 +206,23 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont SetupIndexers(ctx, operator.Manager) } return ctx, &Operator{ - Operator: operator, - Config: cfg, - UnavailableOfferingsCache: unavailableOfferingsCache, - SSMCache: ssmCache, - SubnetProvider: subnetProvider, - SecurityGroupProvider: securityGroupProvider, - InstanceProfileProvider: instanceProfileProvider, - AMIProvider: amiProvider, - AMIResolver: amiResolver, - VersionProvider: versionProvider, - LaunchTemplateProvider: launchTemplateProvider, - PricingProvider: pricingProvider, - InstanceTypesProvider: instanceTypeProvider, - InstanceProvider: instanceProvider, - SSMProvider: ssmProvider, - EC2API: ec2api, + Operator: operator, + Config: cfg, + UnavailableOfferingsCache: unavailableOfferingsCache, + SSMCache: ssmCache, + SubnetProvider: subnetProvider, + SecurityGroupProvider: securityGroupProvider, + InstanceProfileProvider: instanceProfileProvider, + AMIProvider: amiProvider, + AMIResolver: amiResolver, + VersionProvider: versionProvider, + LaunchTemplateProvider: launchTemplateProvider, + PricingProvider: pricingProvider, + InstanceTypesProvider: instanceTypeProvider, + InstanceProvider: instanceProvider, + SSMProvider: ssmProvider, + CapacityReservationProvider: capacityReservationProvider, + EC2API: ec2api, } } diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 8566b05a4d8a..a47d3db3ebdc 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "net" + "strings" "github.com/aws/aws-sdk-go-v2/aws" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" @@ -68,14 +69,15 @@ type Options struct { // LaunchTemplate holds the dynamically generated launch template parameters type LaunchTemplate struct { *Options - UserData bootstrap.Bootstrapper - BlockDeviceMappings []*v1.BlockDeviceMapping - MetadataOptions *v1.MetadataOptions - AMIID string - InstanceTypes []*cloudprovider.InstanceType `hash:"ignore"` - DetailedMonitoring bool - EFACount int - CapacityType string + UserData bootstrap.Bootstrapper + BlockDeviceMappings []*v1.BlockDeviceMapping + MetadataOptions *v1.MetadataOptions + AMIID string + InstanceTypes []*cloudprovider.InstanceType `hash:"ignore"` + DetailedMonitoring bool + EFACount int + CapacityType string + CapacityReservationID string } // AMIFamily can be implemented to override the default logic for generating dynamic launch template parameters @@ -134,25 +136,41 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N // In order to support reserved ENIs for CNI custom networking setups, // we need to pass down the max-pods calculation to the kubelet. // This requires that we resolve a unique launch template per max-pods value. - // Similarly, instance types configured with EfAs require unique launch templates depending on the number of + // Similarly, instance types configured with EFAs require unique launch templates depending on the number of // EFAs they support. + // Reservations IDs are also included since we need to create a separate LaunchTemplate per reservation ID when + // launching reserved capacity. If it's a reserved capacity launch, we've already filtered the instance types + // further up the call stack. type launchTemplateParams struct { efaCount int maxPods int + // reservationIDs is encoded as a string rather than a slice to ensure this type is comparable for use by `lo.GroupBy`. + reservationIDs string } - paramsToInstanceTypes := lo.GroupBy(instanceTypes, func(instanceType *cloudprovider.InstanceType) launchTemplateParams { + paramsToInstanceTypes := lo.GroupBy(instanceTypes, func(it *cloudprovider.InstanceType) launchTemplateParams { return launchTemplateParams{ efaCount: lo.Ternary( lo.Contains(lo.Keys(nodeClaim.Spec.Resources.Requests), v1.ResourceEFA), - int(lo.ToPtr(instanceType.Capacity[v1.ResourceEFA]).Value()), + int(lo.ToPtr(it.Capacity[v1.ResourceEFA]).Value()), 0, ), - maxPods: int(instanceType.Capacity.Pods().Value()), + maxPods: int(it.Capacity.Pods().Value()), + // If we're dealing with reserved instances, there's only going to be a single instance per group. This invariant + // is due to reservation IDs not being shared across instance types. Because of this, we don't need to worry about + // ordering in this string. + reservationIDs: lo.Ternary( + capacityType == karpv1.CapacityTypeReserved, + strings.Join(lo.FilterMap(it.Offerings, func(o *cloudprovider.Offering, _ int) (string, bool) { + return o.ReservationID(), o.CapacityType() == karpv1.CapacityTypeReserved + }), ","), + "", + ), } }) + for params, instanceTypes := range paramsToInstanceTypes { - resolved := r.resolveLaunchTemplate(nodeClass, nodeClaim, instanceTypes, capacityType, amiFamily, amiID, params.maxPods, params.efaCount, options) - resolvedTemplates = append(resolvedTemplates, resolved) + reservationIDs := strings.Split(params.reservationIDs, ",") + resolvedTemplates = append(resolvedTemplates, r.resolveLaunchTemplates(nodeClass, nodeClaim, instanceTypes, capacityType, amiFamily, amiID, params.maxPods, params.efaCount, reservationIDs, options)...) } } return resolvedTemplates, nil @@ -201,8 +219,18 @@ func (r DefaultResolver) defaultClusterDNS(opts *Options, kubeletConfig *v1.Kube return newKubeletConfig } -func (r DefaultResolver) resolveLaunchTemplate(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, capacityType string, - amiFamily AMIFamily, amiID string, maxPods int, efaCount int, options *Options) *LaunchTemplate { +func (r DefaultResolver) resolveLaunchTemplates( + nodeClass *v1.EC2NodeClass, + nodeClaim *karpv1.NodeClaim, + instanceTypes []*cloudprovider.InstanceType, + capacityType string, + amiFamily AMIFamily, + amiID string, + maxPods int, + efaCount int, + capacityReservationIDs []string, + options *Options, +) []*LaunchTemplate { kubeletConfig := &v1.KubeletConfiguration{} if nodeClass.Spec.Kubelet != nil { kubeletConfig = nodeClass.Spec.Kubelet.DeepCopy() @@ -222,31 +250,41 @@ func (r DefaultResolver) resolveLaunchTemplate(nodeClass *v1.EC2NodeClass, nodeC }); !found { taints = append(taints, karpv1.UnregisteredNoExecuteTaint) } - - resolved := &LaunchTemplate{ - Options: options, - UserData: amiFamily.UserData( - r.defaultClusterDNS(options, kubeletConfig), - taints, - options.Labels, - options.CABundle, - instanceTypes, - nodeClass.Spec.UserData, - options.InstanceStorePolicy, - ), - BlockDeviceMappings: nodeClass.Spec.BlockDeviceMappings, - MetadataOptions: nodeClass.Spec.MetadataOptions, - DetailedMonitoring: aws.ToBool(nodeClass.Spec.DetailedMonitoring), - AMIID: amiID, - InstanceTypes: instanceTypes, - EFACount: efaCount, - CapacityType: capacityType, - } - if len(resolved.BlockDeviceMappings) == 0 { - resolved.BlockDeviceMappings = amiFamily.DefaultBlockDeviceMappings() - } - if resolved.MetadataOptions == nil { - resolved.MetadataOptions = amiFamily.DefaultMetadataOptions() + // If no reservation IDs are provided, insert an empty string so the end result is a single launch template with no + // associated capacity reservation. + // TODO: We can simplify this by creating an initial lt, and then copying it for each cr. However, this requires a deep + // copy of the LT struct, which contains an interface causing problems for deepcopy-gen. See review comment for context: + // https://github.com/aws/karpenter-provider-aws/pull/7726#discussion_r1955280055 + if len(capacityReservationIDs) == 0 { + capacityReservationIDs = append(capacityReservationIDs, "") } - return resolved + return lo.Map(capacityReservationIDs, func(id string, _ int) *LaunchTemplate { + resolved := &LaunchTemplate{ + Options: options, + UserData: amiFamily.UserData( + r.defaultClusterDNS(options, kubeletConfig), + taints, + options.Labels, + options.CABundle, + instanceTypes, + nodeClass.Spec.UserData, + options.InstanceStorePolicy, + ), + BlockDeviceMappings: nodeClass.Spec.BlockDeviceMappings, + MetadataOptions: nodeClass.Spec.MetadataOptions, + DetailedMonitoring: aws.ToBool(nodeClass.Spec.DetailedMonitoring), + AMIID: amiID, + InstanceTypes: instanceTypes, + EFACount: efaCount, + CapacityType: capacityType, + CapacityReservationID: id, + } + if len(resolved.BlockDeviceMappings) == 0 { + resolved.BlockDeviceMappings = amiFamily.DefaultBlockDeviceMappings() + } + if resolved.MetadataOptions == nil { + resolved.MetadataOptions = amiFamily.DefaultMetadataOptions() + } + return resolved + }) } diff --git a/pkg/providers/amifamily/suite_test.go b/pkg/providers/amifamily/suite_test.go index 4adc0ea4c558..1a0343b4d13a 100644 --- a/pkg/providers/amifamily/suite_test.go +++ b/pkg/providers/amifamily/suite_test.go @@ -67,7 +67,7 @@ const ( var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) }) diff --git a/pkg/providers/capacityreservation/provider.go b/pkg/providers/capacityreservation/provider.go new file mode 100644 index 000000000000..f0ec5b1effc9 --- /dev/null +++ b/pkg/providers/capacityreservation/provider.go @@ -0,0 +1,125 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation + +import ( + "context" + "fmt" + "sync" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/patrickmn/go-cache" + "github.com/samber/lo" + "k8s.io/utils/clock" + "sigs.k8s.io/karpenter/pkg/utils/pretty" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + sdk "github.com/aws/karpenter-provider-aws/pkg/aws" + awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" +) + +type Provider interface { + List(context.Context, ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) + GetAvailableInstanceCount(string) int + MarkLaunched(string) + MarkTerminated(string) + MarkUnavailable(...string) +} + +type DefaultProvider struct { + availabilityCache + sync.Mutex + + ec2api sdk.EC2API + clk clock.Clock + reservationCache *cache.Cache + cm *pretty.ChangeMonitor +} + +func NewProvider( + ec2api sdk.EC2API, + clk clock.Clock, + reservationCache, reservationAvailabilityCache *cache.Cache, +) *DefaultProvider { + return &DefaultProvider{ + availabilityCache: availabilityCache{ + cache: reservationAvailabilityCache, + clk: clk, + }, + ec2api: ec2api, + clk: clk, + reservationCache: reservationCache, + cm: pretty.NewChangeMonitor(), + } +} + +func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) { + // Take a write lock over the entire List operation to ensure minimize duplicate DescribeCapacityReservation calls + p.Lock() + defer p.Unlock() + + var reservations []*ec2types.CapacityReservation + queries := QueriesFromSelectorTerms(selectorTerms...) + reservations, queries = p.resolveCachedQueries(queries...) + if len(queries) == 0 { + return p.filterReservations(reservations), nil + } + for _, q := range queries { + paginator := ec2.NewDescribeCapacityReservationsPaginator(p.ec2api, q.DescribeCapacityReservationsInput()) + var queryReservations []*ec2types.CapacityReservation + for paginator.HasMorePages() { + out, err := paginator.NextPage(ctx) + if err != nil { + if awserrors.IsNotFound(err) { + // Note: we only receive this error when requesting a single ID, in which case we will only ever get a single page. + // Replacing this with a continue will result in an infinite loop as HasMorePages will always return true. + break + } + return nil, fmt.Errorf("listing capacity reservations, %w", err) + } + queryReservations = append(queryReservations, lo.ToSlicePtr(out.CapacityReservations)...) + } + p.syncAvailability(lo.SliceToMap(queryReservations, func(r *ec2types.CapacityReservation) (string, int) { + return *r.CapacityReservationId, int(*r.AvailableInstanceCount) + })) + p.reservationCache.SetDefault(q.CacheKey(), queryReservations) + reservations = append(reservations, queryReservations...) + } + return p.filterReservations(reservations), nil +} + +func (p *DefaultProvider) resolveCachedQueries(queries ...*Query) (reservations []*ec2types.CapacityReservation, remainingQueries []*Query) { + for _, q := range queries { + if value, ok := p.reservationCache.Get(q.CacheKey()); ok { + reservations = append(reservations, value.([]*ec2types.CapacityReservation)...) + } else { + remainingQueries = append(remainingQueries, q) + } + } + return reservations, remainingQueries +} + +// filterReservations removes duplicate and expired reservations +func (p *DefaultProvider) filterReservations(reservations []*ec2types.CapacityReservation) []*ec2types.CapacityReservation { + return lo.Filter(lo.UniqBy(reservations, func(r *ec2types.CapacityReservation) string { + return *r.CapacityReservationId + }), func(r *ec2types.CapacityReservation, _ int) bool { + if r.EndDate == nil { + return true + } + return r.EndDate.After(p.clk.Now()) + }) +} diff --git a/pkg/providers/capacityreservation/suite_test.go b/pkg/providers/capacityreservation/suite_test.go new file mode 100644 index 000000000000..bf2771f11a24 --- /dev/null +++ b/pkg/providers/capacityreservation/suite_test.go @@ -0,0 +1,124 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation_test + +import ( + "context" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/samber/lo" + coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" + coretest "sigs.k8s.io/karpenter/pkg/test" + + "github.com/aws/karpenter-provider-aws/pkg/apis" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/operator/options" + "github.com/aws/karpenter-provider-aws/pkg/test" + "github.com/aws/karpenter-provider-aws/pkg/utils" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + . "sigs.k8s.io/karpenter/pkg/utils/testing" +) + +var ctx context.Context +var env *coretest.Environment +var awsEnv *test.Environment + +func TestAPIs(t *testing.T) { + ctx = TestContextWithLogger(t) + RegisterFailHandler(Fail) + RunSpecs(t, "EC2NodeClass") +} + +var _ = BeforeSuite(func() { + env = coretest.NewEnvironment( + coretest.WithCRDs(test.DisableCapacityReservationIDValidation(test.RemoveNodeClassTagValidation(apis.CRDs))...), + coretest.WithCRDs(v1alpha1.CRDs...), + ) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) + ctx = options.ToContext(ctx, test.Options()) + awsEnv = test.NewEnvironment(ctx, env) +}) + +// NOTE: Tests for different selector terms can be found in the nodeclass reconciler tests +var _ = Describe("Capacity Reservation Provider", func() { + var discoveryTags map[string]string + var reservations map[string]int + + BeforeEach(func() { + discoveryTags = map[string]string{ + "karpenter.sh/discovery": "test", + } + crs := []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), + AvailableInstanceCount: lo.ToPtr[int32](10), + Tags: utils.MergeTags(discoveryTags), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-2"), + AvailableInstanceCount: lo.ToPtr[int32](15), + Tags: utils.MergeTags(discoveryTags), + State: ec2types.CapacityReservationStateActive, + }, + } + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: crs, + }) + reservations = make(map[string]int) + for _, cr := range crs { + reservations[*cr.CapacityReservationId] = int(*cr.AvailableInstanceCount) + } + }) + Context("Availability Cache", func() { + It("should sync availability cache when listing reservations", func() { + crs, err := awsEnv.CapacityReservationProvider.List(ctx, v1.CapacityReservationSelectorTerm{ + Tags: discoveryTags, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(crs).To(HaveLen(2)) + for id, count := range reservations { + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(id)).To(Equal(count)) + } + }) + It("should decrement availability when reservation is marked as launched", func() { + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount("cr-test", 5) + awsEnv.CapacityReservationProvider.MarkLaunched("cr-test-2") + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount("cr-test")).To(Equal(5)) + awsEnv.CapacityReservationProvider.MarkLaunched("cr-test") + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount("cr-test")).To(Equal(4)) + }) + It("should increment availability when reservation is marked as terminated", func() { + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount("cr-test", 5) + awsEnv.CapacityReservationProvider.MarkTerminated("cr-test-2") + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount("cr-test")).To(Equal(5)) + awsEnv.CapacityReservationProvider.MarkTerminated("cr-test") + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount("cr-test")).To(Equal(6)) + }) + }) +}) diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go new file mode 100644 index 000000000000..7d9f14e1248c --- /dev/null +++ b/pkg/providers/capacityreservation/types.go @@ -0,0 +1,183 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capacityreservation + +import ( + "fmt" + "sync" + "time" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/mitchellh/hashstructure/v2" + "github.com/patrickmn/go-cache" + "github.com/samber/lo" + "k8s.io/utils/clock" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" +) + +type Query struct { + ID string + OwnerID string + Tags map[string]string +} + +func QueriesFromSelectorTerms(terms ...v1.CapacityReservationSelectorTerm) []*Query { + queries := []*Query{} + for i := range terms { + if id := terms[i].ID; id != "" { + queries = append(queries, &Query{ID: id}) + } + if len(terms[i].Tags) != 0 { + queries = append(queries, &Query{ + OwnerID: terms[i].OwnerID, + Tags: terms[i].Tags, + }) + } + } + return queries +} + +func (q *Query) CacheKey() string { + return fmt.Sprintf("%d", lo.Must(hashstructure.Hash(q, hashstructure.FormatV2, &hashstructure.HashOptions{ + SlicesAsSets: true, + }))) +} + +func (q *Query) DescribeCapacityReservationsInput() *ec2.DescribeCapacityReservationsInput { + filters := []ec2types.Filter{{ + Name: lo.ToPtr("state"), + Values: []string{string(ec2types.CapacityReservationStateActive)}, + }} + if len(q.ID) != 0 { + return &ec2.DescribeCapacityReservationsInput{ + Filters: filters, + CapacityReservationIds: []string{q.ID}, + } + } + if q.OwnerID != "" { + filters = append(filters, ec2types.Filter{ + Name: lo.ToPtr("owner-id"), + Values: []string{q.OwnerID}, + }) + } + if len(q.Tags) != 0 { + filters = append(filters, lo.MapToSlice(q.Tags, func(k, v string) ec2types.Filter { + if v == "*" { + return ec2types.Filter{ + Name: lo.ToPtr("tag-key"), + Values: []string{k}, + } + } + return ec2types.Filter{ + Name: lo.ToPtr(fmt.Sprintf("tag:%s", k)), + Values: []string{v}, + } + })...) + } + return &ec2.DescribeCapacityReservationsInput{ + Filters: filters, + } +} + +type availabilityCache struct { + mu sync.RWMutex + cache *cache.Cache + clk clock.Clock +} + +type availabilityCacheEntry struct { + count int + syncTime time.Time +} + +func (c *availabilityCache) syncAvailability(availability map[string]int) { + now := c.clk.Now() + c.mu.Lock() + defer c.mu.Unlock() + for id, count := range availability { + c.cache.SetDefault(id, &availabilityCacheEntry{ + count: count, + syncTime: now, + }) + } +} + +func (c *availabilityCache) MarkLaunched(reservationID string) { + now := c.clk.Now() + c.mu.Lock() + defer c.mu.Unlock() + entry, ok := c.cache.Get(reservationID) + if !ok { + return + } + // Only count the launch if it occurred before the last sync from EC2. In the worst case, this will lead to us + // overestimating availability if there's an eventual consistency delay with EC2, but we'd rather overestimate than + // underestimate. + if entry.(*availabilityCacheEntry).syncTime.After(now) { + return + } + + if entry.(*availabilityCacheEntry).count != 0 { + entry.(*availabilityCacheEntry).count -= 1 + } +} + +func (c *availabilityCache) MarkTerminated(reservationID string) { + // We don't do a time based comparison for CountTerminated because the reservation becomes available some time between + // the termination call and the instance state transitioning to terminated. This can be a pretty big gap, so a time + // based comparison would have limited value. In the worst case, this can result in us overestimating the available + // capacity, but we'd rather overestimate than underestimate. + c.mu.Lock() + defer c.mu.Unlock() + entry, ok := c.cache.Get(reservationID) + if !ok { + return + } + entry.(*availabilityCacheEntry).count += 1 +} + +func (c *availabilityCache) GetAvailableInstanceCount(reservationID string) int { + c.mu.RLock() + defer c.mu.RUnlock() + entry, ok := c.cache.Get(reservationID) + if !ok { + return 0 + } + return entry.(*availabilityCacheEntry).count +} + +// TODO: Determine better abstraction for setting availability in tests without reconciling the nodeclass controller +func (c *availabilityCache) SetAvailableInstanceCount(reservationID string, count int) { + c.mu.Lock() + defer c.mu.Unlock() + c.cache.SetDefault(reservationID, &availabilityCacheEntry{ + count: count, + syncTime: c.clk.Now(), + }) +} + +func (c *availabilityCache) MarkUnavailable(reservationIDs ...string) { + c.mu.Lock() + defer c.mu.Unlock() + for _, id := range reservationIDs { + entry, ok := c.cache.Get(id) + if !ok { + continue + } + entry.(*availabilityCacheEntry).count = 0 + } +} diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 74c0a6c11bec..df230d51c24a 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -43,6 +43,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/cache" awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" "github.com/aws/karpenter-provider-aws/pkg/operator/options" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" @@ -77,23 +78,32 @@ type Provider interface { } type DefaultProvider struct { - region string - ec2api sdk.EC2API - unavailableOfferings *cache.UnavailableOfferings - subnetProvider subnet.Provider - launchTemplateProvider launchtemplate.Provider - ec2Batcher *batcher.EC2API + region string + ec2api sdk.EC2API + unavailableOfferings *cache.UnavailableOfferings + subnetProvider subnet.Provider + launchTemplateProvider launchtemplate.Provider + ec2Batcher *batcher.EC2API + capacityReservationProvider capacityreservation.Provider } -func NewDefaultProvider(ctx context.Context, region string, ec2api sdk.EC2API, unavailableOfferings *cache.UnavailableOfferings, - subnetProvider subnet.Provider, launchTemplateProvider launchtemplate.Provider) *DefaultProvider { +func NewDefaultProvider( + ctx context.Context, + region string, + ec2api sdk.EC2API, + unavailableOfferings *cache.UnavailableOfferings, + subnetProvider subnet.Provider, + launchTemplateProvider launchtemplate.Provider, + capacityReservationProvider capacityreservation.Provider, +) *DefaultProvider { return &DefaultProvider{ - region: region, - ec2api: ec2api, - unavailableOfferings: unavailableOfferings, - subnetProvider: subnetProvider, - launchTemplateProvider: launchTemplateProvider, - ec2Batcher: batcher.EC2(ctx, ec2api), + region: region, + ec2api: ec2api, + unavailableOfferings: unavailableOfferings, + subnetProvider: subnetProvider, + launchTemplateProvider: launchTemplateProvider, + ec2Batcher: batcher.EC2(ctx, ec2api), + capacityReservationProvider: capacityReservationProvider, } } @@ -103,21 +113,44 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass if !schedulingRequirements.HasMinValues() { instanceTypes = p.filterInstanceTypes(nodeClaim, instanceTypes) } + // We filter out non-reserved instances regardless of the min-values settings, since if the launch is eligible for + // reserved instances that's all we'll include in our fleet request. + if reqs := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...); reqs.Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeReserved) { + instanceTypes = p.filterReservedInstanceTypes(reqs, instanceTypes) + if _, err := cloudprovider.InstanceTypes(instanceTypes).SatisfiesMinValues(schedulingRequirements); err != nil { + return nil, cloudprovider.NewCreateError(fmt.Errorf("failed to construct CreateFleet request while respecting minValues requirements"), "CreateFleetRequestConstructionFailed", "Failed to construct CreateFleet request while respecting minValues") + } + } instanceTypes, err := cloudprovider.InstanceTypes(instanceTypes).Truncate(schedulingRequirements, maxInstanceTypes) if err != nil { return nil, cloudprovider.NewCreateError(fmt.Errorf("truncating instance types, %w", err), "InstanceTypeResolutionFailed", "Error truncating instance types based on the passed-in requirements") } - fleetInstance, err := p.launchInstance(ctx, nodeClass, nodeClaim, instanceTypes, tags) + capacityType := p.getCapacityType(nodeClaim, instanceTypes) + fleetInstance, err := p.launchInstance(ctx, nodeClass, nodeClaim, capacityType, instanceTypes, tags) if awserrors.IsLaunchTemplateNotFound(err) { // retry once if launch template is not found. This allows karpenter to generate a new LT if the // cache was out-of-sync on the first try - fleetInstance, err = p.launchInstance(ctx, nodeClass, nodeClaim, instanceTypes, tags) + fleetInstance, err = p.launchInstance(ctx, nodeClass, nodeClaim, capacityType, instanceTypes, tags) } if err != nil { return nil, err } - efaEnabled := lo.Contains(lo.Keys(nodeClaim.Spec.Resources.Requests), v1.ResourceEFA) - return NewInstanceFromFleet(fleetInstance, tags, efaEnabled), nil + + var capacityReservation string + if capacityType == karpv1.CapacityTypeReserved { + capacityReservation = p.getCapacityReservationIDForInstance( + string(fleetInstance.InstanceType), + *fleetInstance.LaunchTemplateAndOverrides.Overrides.AvailabilityZone, + instanceTypes, + ) + } + return NewInstanceFromFleet( + fleetInstance, + tags, + capacityType, + capacityReservation, + lo.Contains(lo.Keys(nodeClaim.Spec.Resources.Requests), v1.ResourceEFA), + ), nil } func (p *DefaultProvider) Get(ctx context.Context, id string) (*Instance, error) { @@ -209,8 +242,14 @@ func (p *DefaultProvider) CreateTags(ctx context.Context, id string, tags map[st return nil } -func (p *DefaultProvider) launchInstance(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, tags map[string]string) (ec2types.CreateFleetInstance, error) { - capacityType := p.getCapacityType(nodeClaim, instanceTypes) +func (p *DefaultProvider) launchInstance( + ctx context.Context, + nodeClass *v1.EC2NodeClass, + nodeClaim *karpv1.NodeClaim, + capacityType string, + instanceTypes []*cloudprovider.InstanceType, + tags map[string]string, +) (ec2types.CreateFleetInstance, error) { zonalSubnets, err := p.subnetProvider.ZonalSubnetsForLaunch(ctx, nodeClass, instanceTypes, capacityType) if err != nil { return ec2types.CreateFleetInstance{}, cloudprovider.NewCreateError(fmt.Errorf("getting subnets, %w", err), "SubnetResolutionFailed", "Error getting subnets") @@ -249,7 +288,7 @@ func (p *DefaultProvider) launchInstance(ctx context.Context, nodeClass *v1.EC2N } return ec2types.CreateFleetInstance{}, cloudprovider.NewCreateError(fmt.Errorf("creating fleet request, %w", err), reason, fmt.Sprintf("Error creating fleet request: %s", message)) } - p.updateUnavailableOfferingsCache(ctx, createFleetOutput.Errors, capacityType) + p.updateUnavailableOfferingsCache(ctx, createFleetOutput.Errors, capacityType, instanceTypes) if len(createFleetOutput.Instances) == 0 || len(createFleetOutput.Instances[0].InstanceIds) == 0 { return ec2types.CreateFleetInstance{}, combineFleetErrors(createFleetOutput.Errors) } @@ -262,8 +301,12 @@ func GetCreateFleetInput(nodeClass *v1.EC2NodeClass, capacityType string, tags m Context: nodeClass.Spec.Context, LaunchTemplateConfigs: launchTemplateConfigs, TargetCapacitySpecification: &ec2types.TargetCapacitySpecificationRequest{ - DefaultTargetCapacityType: ec2types.DefaultTargetCapacityType(capacityType), - TotalTargetCapacity: aws.Int32(1), + DefaultTargetCapacityType: lo.Ternary( + capacityType == karpv1.CapacityTypeReserved, + ec2types.DefaultTargetCapacityType(karpv1.CapacityTypeOnDemand), + ec2types.DefaultTargetCapacityType(capacityType), + ), + TotalTargetCapacity: aws.Int32(1), }, TagSpecifications: []ec2types.TagSpecification{ {ResourceType: ec2types.ResourceTypeInstance, Tags: utils.MergeTags(tags)}, @@ -293,8 +336,15 @@ func (p *DefaultProvider) checkODFallback(nodeClaim *karpv1.NodeClaim, instanceT return nil } -func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, - instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, capacityType string, tags map[string]string) ([]ec2types.FleetLaunchTemplateConfigRequest, error) { +func (p *DefaultProvider) getLaunchTemplateConfigs( + ctx context.Context, + nodeClass *v1.EC2NodeClass, + nodeClaim *karpv1.NodeClaim, + instanceTypes []*cloudprovider.InstanceType, + zonalSubnets map[string]*subnet.Subnet, + capacityType string, + tags map[string]string, +) ([]ec2types.FleetLaunchTemplateConfigRequest, error) { var launchTemplateConfigs []ec2types.FleetLaunchTemplateConfigRequest launchTemplates, err := p.launchTemplateProvider.EnsureAll(ctx, nodeClass, nodeClaim, instanceTypes, capacityType, tags) if err != nil { @@ -304,7 +354,7 @@ func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClas requirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityType) for _, launchTemplate := range launchTemplates { launchTemplateConfig := ec2types.FleetLaunchTemplateConfigRequest{ - Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, requirements, launchTemplate.ImageID), + Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, requirements, launchTemplate.ImageID, launchTemplate.CapacityReservationID), LaunchTemplateSpecification: &ec2types.FleetLaunchTemplateSpecificationRequest{ LaunchTemplateName: aws.String(launchTemplate.Name), Version: aws.String("$Latest"), @@ -322,36 +372,47 @@ func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClas // getOverrides creates and returns launch template overrides for the cross product of InstanceTypes and subnets (with subnets being constrained by // zones and the offerings in InstanceTypes) -func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, reqs scheduling.Requirements, image string) []ec2types.FleetLaunchTemplateOverridesRequest { +func (p *DefaultProvider) getOverrides( + instanceTypes []*cloudprovider.InstanceType, + zonalSubnets map[string]*subnet.Subnet, + reqs scheduling.Requirements, + image, capacityReservationID string, +) []ec2types.FleetLaunchTemplateOverridesRequest { // Unwrap all the offerings to a flat slice that includes a pointer // to the parent instance type name type offeringWithParentName struct { - cloudprovider.Offering + *cloudprovider.Offering parentInstanceTypeName ec2types.InstanceType } - var unwrappedOfferings []offeringWithParentName + var filteredOfferings []offeringWithParentName for _, it := range instanceTypes { - ofs := lo.Map(it.Offerings.Available(), func(of cloudprovider.Offering, _ int) offeringWithParentName { - return offeringWithParentName{ - Offering: of, + ofs := it.Offerings.Available().Compatible(reqs) + // If we are generating a launch template for a specific capacity reservation, we only want to include the offering + // for that capacity reservation when generating overrides. + if capacityReservationID != "" { + ofs = ofs.Compatible(scheduling.NewRequirements(scheduling.NewRequirement( + cloudprovider.ReservationIDLabel, + corev1.NodeSelectorOpIn, + capacityReservationID, + ))) + } + for _, o := range ofs { + filteredOfferings = append(filteredOfferings, offeringWithParentName{ + Offering: o, parentInstanceTypeName: ec2types.InstanceType(it.Name), - } - }) - unwrappedOfferings = append(unwrappedOfferings, ofs...) + }) + } } var overrides []ec2types.FleetLaunchTemplateOverridesRequest - for _, offering := range unwrappedOfferings { - if reqs.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { - continue - } - subnet, ok := zonalSubnets[offering.Requirements.Get(corev1.LabelTopologyZone).Any()] + for _, offering := range filteredOfferings { + subnet, ok := zonalSubnets[offering.Zone()] if !ok { continue } overrides = append(overrides, ec2types.FleetLaunchTemplateOverridesRequest{ InstanceType: offering.parentInstanceTypeName, SubnetId: lo.ToPtr(subnet.ID), - ImageId: aws.String(image), + ImageId: lo.ToPtr(image), // This is technically redundant, but is useful if we have to parse insufficient capacity errors from // CreateFleet so that we can figure out the zone rather than additional API calls to look up the subnet AvailabilityZone: lo.ToPtr(subnet.Zone), @@ -360,32 +421,104 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy return overrides } -func (p *DefaultProvider) updateUnavailableOfferingsCache(ctx context.Context, errors []ec2types.CreateFleetError, capacityType string) { - for _, err := range errors { - if awserrors.IsUnfulfillableCapacity(err) { - p.unavailableOfferings.MarkUnavailableForFleetErr(ctx, err, capacityType) +func (p *DefaultProvider) updateUnavailableOfferingsCache( + ctx context.Context, + errs []ec2types.CreateFleetError, + capacityType string, + instanceTypes []*cloudprovider.InstanceType, +) { + if capacityType != karpv1.CapacityTypeReserved { + for _, err := range errs { + if awserrors.IsUnfulfillableCapacity(err) { + p.unavailableOfferings.MarkUnavailableForFleetErr(ctx, err, capacityType) + } + } + return + } + + reservationIDs := make([]string, 0, len(errs)) + for i := range errs { + id := p.getCapacityReservationIDForInstance( + string(errs[i].LaunchTemplateAndOverrides.Overrides.InstanceType), + lo.FromPtr(errs[i].LaunchTemplateAndOverrides.Overrides.AvailabilityZone), + instanceTypes, + ) + reservationIDs = append(reservationIDs, id) + log.FromContext(ctx).WithValues( + "reason", lo.FromPtr(errs[i].ErrorCode), + "instance-type", errs[i].LaunchTemplateAndOverrides.Overrides.InstanceType, + "zone", lo.FromPtr(errs[i].LaunchTemplateAndOverrides.Overrides.AvailabilityZone), + "capacity-reservation-id", id, + ).V(1).Info("marking capacity reservation unavailable") + } + p.capacityReservationProvider.MarkUnavailable(reservationIDs...) +} + +func (p *DefaultProvider) getCapacityReservationIDForInstance(instance, zone string, instanceTypes []*cloudprovider.InstanceType) string { + for _, it := range instanceTypes { + if it.Name != instance { + continue + } + for _, o := range it.Offerings { + if o.CapacityType() != karpv1.CapacityTypeReserved || o.Zone() != zone { + continue + } + return o.ReservationID() } } + // note: this is an invariant that the caller must enforce, should not occur at runtime + panic("reservation ID doesn't exist for reserved launch") } -// getCapacityType selects spot if both constraints are flexible and there is an -// available offering. The AWS Cloud Provider defaults to [ on-demand ], so spot -// must be explicitly included in capacity type requirements. +// getCapacityType selects the capacity type based on the flexibility of the NodeClaim and the available offerings. +// Prioritization is as follows: reserved, spot, on-demand. func (p *DefaultProvider) getCapacityType(nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) string { - requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) - if requirements.Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeSpot) { - requirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeSpot) - for _, instanceType := range instanceTypes { - for _, offering := range instanceType.Offerings.Available() { - if requirements.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) == nil { - return karpv1.CapacityTypeSpot - } + for _, capacityType := range []string{karpv1.CapacityTypeReserved, karpv1.CapacityTypeSpot} { + requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) + if !requirements.Get(karpv1.CapacityTypeLabelKey).Has(capacityType) { + continue + } + requirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityType) + for _, it := range instanceTypes { + if len(it.Offerings.Available().Compatible(requirements)) != 0 { + return capacityType } } } return karpv1.CapacityTypeOnDemand } +// filterReservedInstanceTypes is used to filter the provided set of instance types to only include those with +// available reserved offerings if the nodeclaim is compatible. If there are no available reserved offerings, no +// filtering is applied. +func (*DefaultProvider) filterReservedInstanceTypes(nodeClaimRequirements scheduling.Requirements, instanceTypes []*cloudprovider.InstanceType) []*cloudprovider.InstanceType { + nodeClaimRequirements[karpv1.CapacityTypeLabelKey] = scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeReserved) + var reservedInstanceTypes []*cloudprovider.InstanceType + for _, it := range instanceTypes { + // We only want to include a single offering per pool (instance type / AZ combo). This is due to a limitation in the + // CreateFleet API, which limits calls to specifying a single override per pool. We'll choose to launch into the pool + // with the most capacity. + zonalOfferings := map[string]*cloudprovider.Offering{} + for _, o := range it.Offerings.Available().Compatible(nodeClaimRequirements) { + if current, ok := zonalOfferings[o.Zone()]; !ok || o.ReservationCapacity > current.ReservationCapacity { + zonalOfferings[o.Zone()] = o + } + } + if len(zonalOfferings) == 0 { + continue + } + // WARNING: It is only safe to mutate the slice containing the offerings, not the offerings themselves. The individual + // offerings are cached, but not the slice storing them. This helps keep the launch path simple, but changes to the + // caching strategy employed by the InstanceType provider could result in unexpected behavior. + it.Offerings = lo.Values(zonalOfferings) + reservedInstanceTypes = append(reservedInstanceTypes, it) + } + if len(reservedInstanceTypes) == 0 { + return instanceTypes + } + return reservedInstanceTypes +} + // filterInstanceTypes is used to provide filtering on the list of potential instance types to further limit it to those // that make the most sense given our specific AWS cloudprovider. func (p *DefaultProvider) filterInstanceTypes(nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) []*cloudprovider.InstanceType { diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index 58b3ebdecf63..1c7e633a3369 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -23,9 +23,11 @@ import ( "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/awslabs/operatorpkg/object" "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" @@ -61,12 +63,12 @@ func TestAWS(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + env = coretest.NewEnvironment(coretest.WithCRDs(test.DisableCapacityReservationIDValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...)) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) }) var _ = AfterSuite(func() { @@ -74,7 +76,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv.Reset() }) @@ -137,6 +139,123 @@ var _ = Describe("InstanceProvider", func() { Expect(corecloudprovider.IsInsufficientCapacityError(err)).To(BeTrue()) Expect(instance).To(BeNil()) }) + It("should return an ICE error when all attempted instance types return a ReservedCapacityReservation error", func() { + const targetReservationID = "cr-m5.large-1a-1" + // Ensure that Karpenter believes a reservation is available, but the API returns no capacity when attempting to launch + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount(targetReservationID, 1) + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr(targetReservationID), + AvailableInstanceCount: lo.ToPtr[int32](0), + State: ec2types.CapacityReservationStateActive, + }, + }, + }) + nodeClass.Status.CapacityReservations = append(nodeClass.Status.CapacityReservations, v1.CapacityReservation{ + ID: "cr-m5.large-1a-1", + AvailabilityZone: "test-zone-1a", + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + InstanceType: "m5.large", + OwnerID: "012345678901", + }) + nodeClaim.Spec.Requirements = append( + nodeClaim.Spec.Requirements, + karpv1.NodeSelectorRequirementWithMinValues{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }}, + ) + ExpectApplied(ctx, env.Client, nodeClaim, nodePool, nodeClass) + + instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool) + Expect(err).ToNot(HaveOccurred()) + instance, err := awsEnv.InstanceProvider.Create(ctx, nodeClass, nodeClaim, nil, instanceTypes) + Expect(corecloudprovider.IsInsufficientCapacityError(err)).To(BeTrue()) + Expect(instance).To(BeNil()) + + // Ensure we marked the reservation as unavailable after encountering the error + Expect(awsEnv.CapacityReservationProvider.GetAvailableInstanceCount(targetReservationID)).To(Equal(0)) + }) + It("should filter compatible reserved offerings such that only one offering per capacity pool is included in the CreateFleet request", func() { + const targetReservationID = "cr-m5.large-1a-2" + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), + AvailableInstanceCount: lo.ToPtr[int32](1), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr(targetReservationID), + AvailableInstanceCount: lo.ToPtr[int32](2), + State: ec2types.CapacityReservationStateActive, + }, + }, + }) + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount("cr-m5.large-1a-1", 1) + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount(targetReservationID, 2) + nodeClass.Status.CapacityReservations = append(nodeClass.Status.CapacityReservations, []v1.CapacityReservation{ + { + ID: "cr-m5.large-1a-1", + AvailabilityZone: "test-zone-1a", + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + InstanceType: "m5.large", + OwnerID: "012345678901", + }, + { + ID: "cr-m5.large-1a-2", + AvailabilityZone: "test-zone-1a", + InstanceMatchCriteria: string(ec2types.InstanceMatchCriteriaTargeted), + InstanceType: "m5.large", + OwnerID: "012345678901", + }, + }...) + + nodeClaim.Spec.Requirements = append( + nodeClaim.Spec.Requirements, + karpv1.NodeSelectorRequirementWithMinValues{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }}, + ) + ExpectApplied(ctx, env.Client, nodeClaim, nodePool, nodeClass) + + instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool) + Expect(err).ToNot(HaveOccurred()) + instance, err := awsEnv.InstanceProvider.Create(ctx, nodeClass, nodeClaim, nil, instanceTypes) + Expect(err).ToNot(HaveOccurred()) + Expect(instance.CapacityType).To(Equal(karpv1.CapacityTypeReserved)) + Expect(instance.CapacityReservationID).To(Equal(targetReservationID)) + + // We should have only created a single launch template, for the single capacity reservation we're attempting to launch + var launchTemplates []*ec2.CreateLaunchTemplateInput + for awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Len() > 0 { + launchTemplates = append(launchTemplates, awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Pop()) + } + Expect(launchTemplates).To(HaveLen(1)) + Expect(*launchTemplates[0].LaunchTemplateData.CapacityReservationSpecification.CapacityReservationTarget.CapacityReservationId).To(Equal(targetReservationID)) + + Expect(awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Len()).ToNot(Equal(0)) + createFleetInput := awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Pop() + Expect(createFleetInput.TargetCapacitySpecification.DefaultTargetCapacityType).To(Equal(ec2types.DefaultTargetCapacityTypeOnDemand)) + Expect(createFleetInput.LaunchTemplateConfigs).To(HaveLen(1)) + Expect(createFleetInput.LaunchTemplateConfigs[0].Overrides).To(HaveLen(1)) + }) It("should return all NodePool-owned instances from List", func() { ids := sets.New[string]() // Provision instances that have the karpenter.sh/nodepool key diff --git a/pkg/providers/instance/types.go b/pkg/providers/instance/types.go index 49f1ea7aaec7..f8ff74557255 100644 --- a/pkg/providers/instance/types.go +++ b/pkg/providers/instance/types.go @@ -17,7 +17,6 @@ package instance import ( "time" - "github.com/aws/aws-sdk-go-v2/aws" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/samber/lo" @@ -27,33 +26,37 @@ import ( // Instance is an internal data representation of either an ec2.Instance or an ec2.FleetInstance // It contains all the common data that is needed to inject into the Machine from either of these responses type Instance struct { - LaunchTime time.Time - State ec2types.InstanceStateName - ID string - ImageID string - Type ec2types.InstanceType - Zone string - CapacityType string - SecurityGroupIDs []string - SubnetID string - Tags map[string]string - EFAEnabled bool + LaunchTime time.Time + State ec2types.InstanceStateName + ID string + ImageID string + Type ec2types.InstanceType + Zone string + CapacityType string + CapacityReservationID string + SecurityGroupIDs []string + SubnetID string + Tags map[string]string + EFAEnabled bool } func NewInstance(out ec2types.Instance) *Instance { return &Instance{ - LaunchTime: aws.ToTime(out.LaunchTime), - State: out.State.Name, - ID: aws.ToString(out.InstanceId), - ImageID: aws.ToString(out.ImageId), - Type: out.InstanceType, - Zone: aws.ToString(out.Placement.AvailabilityZone), - CapacityType: lo.Ternary(out.SpotInstanceRequestId != nil, karpv1.CapacityTypeSpot, karpv1.CapacityTypeOnDemand), + LaunchTime: lo.FromPtr(out.LaunchTime), + State: out.State.Name, + ID: lo.FromPtr(out.InstanceId), + ImageID: lo.FromPtr(out.ImageId), + Type: out.InstanceType, + Zone: lo.FromPtr(out.Placement.AvailabilityZone), + CapacityType: lo.If(out.SpotInstanceRequestId != nil, karpv1.CapacityTypeSpot). + ElseIf(out.CapacityReservationId != nil, karpv1.CapacityTypeReserved). + Else(karpv1.CapacityTypeOnDemand), + CapacityReservationID: lo.FromPtr(out.CapacityReservationId), SecurityGroupIDs: lo.Map(out.SecurityGroups, func(securitygroup ec2types.GroupIdentifier, _ int) string { - return aws.ToString(securitygroup.GroupId) + return lo.FromPtr(securitygroup.GroupId) }), - SubnetID: aws.ToString(out.SubnetId), - Tags: lo.SliceToMap(out.Tags, func(t ec2types.Tag) (string, string) { return aws.ToString(t.Key), aws.ToString(t.Value) }), + SubnetID: lo.FromPtr(out.SubnetId), + Tags: lo.SliceToMap(out.Tags, func(t ec2types.Tag) (string, string) { return lo.FromPtr(t.Key), lo.FromPtr(t.Value) }), EFAEnabled: lo.ContainsBy(out.NetworkInterfaces, func(item ec2types.InstanceNetworkInterface) bool { return item.InterfaceType != nil && *item.InterfaceType == string(ec2types.NetworkInterfaceTypeEfa) }), @@ -61,17 +64,24 @@ func NewInstance(out ec2types.Instance) *Instance { } -func NewInstanceFromFleet(out ec2types.CreateFleetInstance, tags map[string]string, efaEnabled bool) *Instance { +func NewInstanceFromFleet( + out ec2types.CreateFleetInstance, + tags map[string]string, + capacityType string, + capacityReservationID string, + efaEnabled bool, +) *Instance { return &Instance{ - LaunchTime: time.Now(), // estimate the launch time since we just launched - State: ec2types.InstanceStateNamePending, - ID: out.InstanceIds[0], - ImageID: aws.ToString(out.LaunchTemplateAndOverrides.Overrides.ImageId), - Type: out.InstanceType, - Zone: aws.ToString(out.LaunchTemplateAndOverrides.Overrides.AvailabilityZone), - CapacityType: string(out.Lifecycle), - SubnetID: aws.ToString(out.LaunchTemplateAndOverrides.Overrides.SubnetId), - Tags: tags, - EFAEnabled: efaEnabled, + LaunchTime: time.Now(), // estimate the launch time since we just launched + State: ec2types.InstanceStateNamePending, + ID: out.InstanceIds[0], + ImageID: lo.FromPtr(out.LaunchTemplateAndOverrides.Overrides.ImageId), + Type: out.InstanceType, + Zone: lo.FromPtr(out.LaunchTemplateAndOverrides.Overrides.AvailabilityZone), + CapacityType: capacityType, + CapacityReservationID: capacityReservationID, + SubnetID: lo.FromPtr(out.LaunchTemplateAndOverrides.Overrides.SubnetId), + Tags: tags, + EFAEnabled: efaEnabled, } } diff --git a/pkg/providers/instanceprofile/suite_test.go b/pkg/providers/instanceprofile/suite_test.go index 0cb00fde75c6..0991dd32f949 100644 --- a/pkg/providers/instanceprofile/suite_test.go +++ b/pkg/providers/instanceprofile/suite_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/samber/lo" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" @@ -54,7 +55,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -66,7 +67,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) nodeClass = test.TestNodeClass{ EC2NodeClass: v1.EC2NodeClass{ diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 1b6ec4604571..a059d7697833 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -23,7 +23,11 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "sigs.k8s.io/karpenter/pkg/scheduling" + awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" + "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype/offering" + "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" @@ -65,6 +69,7 @@ type DefaultProvider struct { muInstanceTypesOfferings sync.RWMutex instanceTypesOfferings map[string]sets.Set[string] + allZones sets.Set[string] instanceTypesCache *cache.Cache discoveredCapacityCache *cache.Cache @@ -73,9 +78,21 @@ type DefaultProvider struct { instanceTypesSeqNum uint64 // instanceTypesOfferingsSeqNum is a monotonically increasing change counter used to avoid the expensive hashing operation on instance types instanceTypesOfferingsSeqNum uint64 + + offeringProvider *offering.DefaultProvider } -func NewDefaultProvider(instanceTypesCache *cache.Cache, discoveredCapacityCache *cache.Cache, ec2api sdk.EC2API, subnetProvider subnet.Provider, instanceTypesResolver Resolver) *DefaultProvider { +func NewDefaultProvider( + instanceTypesCache *cache.Cache, + offeringCache *cache.Cache, + discoveredCapacityCache *cache.Cache, + ec2api sdk.EC2API, + subnetProvider subnet.Provider, + pricingProvider pricing.Provider, + capacityReservationProvider capacityreservation.Provider, + unavailableOfferingsCache *awscache.UnavailableOfferings, + instanceTypesResolver Resolver, +) *DefaultProvider { return &DefaultProvider{ ec2api: ec2api, subnetProvider: subnetProvider, @@ -86,6 +103,12 @@ func NewDefaultProvider(instanceTypesCache *cache.Cache, discoveredCapacityCache discoveredCapacityCache: discoveredCapacityCache, cm: pretty.NewChangeMonitor(), instanceTypesSeqNum: 0, + offeringProvider: offering.NewDefaultProvider( + pricingProvider, + capacityReservationProvider, + unavailableOfferingsCache, + offeringCache, + ), } } @@ -112,10 +135,8 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) // Compute fully initialized instance types hash key subnetZonesHash, _ := hashstructure.Hash(subnetZones, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) - // Compute hash key against node class AMIs (used to force cache rebuild when AMIs change) amiHash, _ := hashstructure.Hash(nodeClass.Status.AMIs, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) - key := fmt.Sprintf("%d-%d-%016x-%016x-%016x", p.instanceTypesSeqNum, p.instanceTypesOfferingsSeqNum, @@ -123,80 +144,58 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) subnetZonesHash, p.instanceTypesResolver.CacheKey(nodeClass), ) + var instanceTypes []*cloudprovider.InstanceType if item, ok := p.instanceTypesCache.Get(key); ok { // Ensure what's returned from this function is a shallow-copy of the slice (not a deep-copy of the data itself) // so that modifications to the ordering of the data don't affect the original - return append([]*cloudprovider.InstanceType{}, item.([]*cloudprovider.InstanceType)...), nil + instanceTypes = item.([]*cloudprovider.InstanceType) + } else { + instanceTypes = p.resolveInstanceTypes(ctx, nodeClass, amiHash) + p.instanceTypesCache.SetDefault(key, instanceTypes) } + // Offerings aren't cached along with the rest of the instance type info because reserved offerings need to have up to + // date capacity information. Rather than incurring a cache miss each time an instance is launched into a reserved + // offering (or terminated), offerings are injected to the cached instance types on each call. Note that on-demand and + // spot offerings are still cached - only reserved offerings are generated each time. + return p.offeringProvider.InjectOfferings( + ctx, + instanceTypes, + nodeClass, + p.allZones, + ), nil +} - // Get all zones across all offerings - // We don't use this in the cache key since this is produced from our instanceTypesOfferings which we do cache - allZones := sets.New[string]() - for _, offeringZones := range p.instanceTypesOfferings { - for zone := range offeringZones { - allZones.Insert(zone) - } - } - if p.cm.HasChanged("zones", allZones) { - log.FromContext(ctx).WithValues("zones", allZones.UnsortedList()).V(1).Info("discovered zones") - } - subnetZoneToID := lo.SliceToMap(nodeClass.Status.Subnets, func(s v1.Subnet) (string, string) { +func (p *DefaultProvider) resolveInstanceTypes( + ctx context.Context, + nodeClass *v1.EC2NodeClass, + amiHash uint64, +) []*cloudprovider.InstanceType { + zonesToZoneIDs := lo.SliceToMap(nodeClass.Status.Subnets, func(s v1.Subnet) (string, string) { return s.Zone, s.ZoneID }) - result := lo.Map(p.instanceTypesInfo, func(i ec2types.InstanceTypeInfo, _ int) *cloudprovider.InstanceType { - InstanceTypeVCPU.Set(float64(lo.FromPtr(i.VCpuInfo.DefaultVCpus)), map[string]string{ - instanceTypeLabel: string(i.InstanceType), - }) - InstanceTypeMemory.Set(float64(lo.FromPtr(i.MemoryInfo.SizeInMiB)*1024*1024), map[string]string{ - instanceTypeLabel: string(i.InstanceType), - }) - - zoneData := lo.Map(allZones.UnsortedList(), func(zoneName string, _ int) ZoneData { - if !p.instanceTypesOfferings[string(i.InstanceType)].Has(zoneName) || !subnetZones.Has(zoneName) { - return ZoneData{ - Name: zoneName, - Available: false, - } - } - return ZoneData{ - Name: zoneName, - ID: subnetZoneToID[zoneName], - Available: true, - } - }) - - it := p.instanceTypesResolver.Resolve(ctx, i, zoneData, nodeClass) + return lo.Map(p.instanceTypesInfo, func(info ec2types.InstanceTypeInfo, _ int) *cloudprovider.InstanceType { + it := p.instanceTypesResolver.Resolve(ctx, info, p.instanceTypesOfferings[string(info.InstanceType)].UnsortedList(), zonesToZoneIDs, nodeClass) if cached, ok := p.discoveredCapacityCache.Get(fmt.Sprintf("%s-%016x", it.Name, amiHash)); ok { it.Capacity[corev1.ResourceMemory] = cached.(resource.Quantity) } - for _, of := range it.Offerings { - InstanceTypeOfferingAvailable.Set(float64(lo.Ternary(of.Available, 1, 0)), map[string]string{ - instanceTypeLabel: it.Name, - capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), - zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), - }) - InstanceTypeOfferingPriceEstimate.Set(of.Price, map[string]string{ - instanceTypeLabel: it.Name, - capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), - zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), - }) - } + InstanceTypeVCPU.Set(float64(lo.FromPtr(info.VCpuInfo.DefaultVCpus)), map[string]string{ + instanceTypeLabel: string(info.InstanceType), + }) + InstanceTypeMemory.Set(float64(lo.FromPtr(info.MemoryInfo.SizeInMiB)*1024*1024), map[string]string{ + instanceTypeLabel: string(info.InstanceType), + }) return it }) - p.instanceTypesCache.SetDefault(key, result) - return result, nil } func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { // DO NOT REMOVE THIS LOCK ---------------------------------------------------------------------------- // We lock here so that multiple callers to getInstanceTypeOfferings do not result in cache misses and multiple // calls to EC2 when we could have just made one call. - // TODO @joinnis: This can be made more efficient by holding a Read lock and only obtaining the Write if not in cache p.muInstanceTypesInfo.Lock() defer p.muInstanceTypesInfo.Unlock() var instanceTypes []ec2types.InstanceTypeInfo - paginator := ec2.NewDescribeInstanceTypesPaginator(p.ec2api, &ec2.DescribeInstanceTypesInput{ Filters: []ec2types.Filter{ { @@ -209,13 +208,11 @@ func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { }, }, }) - for paginator.HasMorePages() { page, err := paginator.NextPage(ctx) if err != nil { return fmt.Errorf("describing instance types, %w", err) } - instanceTypes = append(instanceTypes, page.InstanceTypes...) } @@ -223,8 +220,7 @@ func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { // Only update instanceTypesSeqNun with the instance types have been changed // This is to not create new keys with duplicate instance types option atomic.AddUint64(&p.instanceTypesSeqNum, 1) - log.FromContext(ctx).WithValues( - "count", len(instanceTypes)).V(1).Info("discovered instance types") + log.FromContext(ctx).WithValues("count", len(instanceTypes)).V(1).Info("discovered instance types") } p.instanceTypesInfo = instanceTypes return nil @@ -267,6 +263,17 @@ func (p *DefaultProvider) UpdateInstanceTypeOfferings(ctx context.Context) error log.FromContext(ctx).WithValues("instance-type-count", len(instanceTypeOfferings)).V(1).Info("discovered offerings for instance types") } p.instanceTypesOfferings = instanceTypeOfferings + + allZones := sets.New[string]() + for _, offeringZones := range instanceTypeOfferings { + for zone := range offeringZones { + allZones.Insert(zone) + } + } + if p.cm.HasChanged("zones", allZones) { + log.FromContext(ctx).WithValues("zones", allZones.UnsortedList()).V(1).Info("discovered zones") + } + p.allZones = allZones return nil } diff --git a/pkg/providers/instancetype/metrics.go b/pkg/providers/instancetype/metrics.go index 726c851e13b8..050c1a99b844 100644 --- a/pkg/providers/instancetype/metrics.go +++ b/pkg/providers/instancetype/metrics.go @@ -54,31 +54,4 @@ var ( instanceTypeLabel, }, ) - InstanceTypeOfferingAvailable = opmetrics.NewPrometheusGauge( - crmetrics.Registry, - prometheus.GaugeOpts{ - Namespace: metrics.Namespace, - Subsystem: cloudProviderSubsystem, - Name: "instance_type_offering_available", - Help: "Instance type offering availability, based on instance type, capacity type, and zone", - }, - []string{ - instanceTypeLabel, - capacityTypeLabel, - zoneLabel, - }, - ) - InstanceTypeOfferingPriceEstimate = opmetrics.NewPrometheusGauge( - crmetrics.Registry, - prometheus.GaugeOpts{ - Namespace: metrics.Namespace, - Subsystem: cloudProviderSubsystem, - Name: "instance_type_offering_price_estimate", - Help: "Instance type offering estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.", - }, - []string{ - instanceTypeLabel, - capacityTypeLabel, - zoneLabel, - }) ) diff --git a/pkg/providers/instancetype/offering/metrics.go b/pkg/providers/instancetype/offering/metrics.go new file mode 100644 index 000000000000..a4c70f1713b3 --- /dev/null +++ b/pkg/providers/instancetype/offering/metrics.go @@ -0,0 +1,61 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package offering + +import ( + opmetrics "github.com/awslabs/operatorpkg/metrics" + "github.com/prometheus/client_golang/prometheus" + crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + + "sigs.k8s.io/karpenter/pkg/metrics" +) + +const ( + cloudProviderSubsystem = "cloudprovider" + instanceTypeLabel = "instance_type" + capacityTypeLabel = "capacity_type" + zoneLabel = "zone" +) + +var ( + InstanceTypeOfferingAvailable = opmetrics.NewPrometheusGauge( + crmetrics.Registry, + prometheus.GaugeOpts{ + Namespace: metrics.Namespace, + Subsystem: cloudProviderSubsystem, + Name: "instance_type_offering_available", + Help: "Instance type offering availability, based on instance type, capacity type, and zone", + }, + []string{ + instanceTypeLabel, + capacityTypeLabel, + zoneLabel, + }, + ) + InstanceTypeOfferingPriceEstimate = opmetrics.NewPrometheusGauge( + crmetrics.Registry, + prometheus.GaugeOpts{ + Namespace: metrics.Namespace, + Subsystem: cloudProviderSubsystem, + Name: "instance_type_offering_price_estimate", + Help: "Instance type offering estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.", + }, + []string{ + instanceTypeLabel, + capacityTypeLabel, + zoneLabel, + }, + ) +) diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go new file mode 100644 index 000000000000..375a5f2c5edf --- /dev/null +++ b/pkg/providers/instancetype/offering/provider.go @@ -0,0 +1,228 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package offering + +import ( + "context" + "fmt" + + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/mitchellh/hashstructure/v2" + "github.com/patrickmn/go-cache" + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/operator/options" + "sigs.k8s.io/karpenter/pkg/scheduling" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + awscache "github.com/aws/karpenter-provider-aws/pkg/cache" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" + "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" +) + +type Provider interface { + InjectOfferings(context.Context, []*cloudprovider.InstanceType, *v1.EC2NodeClass, []string) []*cloudprovider.InstanceType +} + +type DefaultProvider struct { + pricingProvider pricing.Provider + capacityReservationProvider capacityreservation.Provider + unavailableOfferings *awscache.UnavailableOfferings + cache *cache.Cache +} + +func NewDefaultProvider( + pricingProvider pricing.Provider, + capacityReservationProvider capacityreservation.Provider, + unavailableOfferingsCache *awscache.UnavailableOfferings, + offeringCache *cache.Cache, +) *DefaultProvider { + return &DefaultProvider{ + pricingProvider: pricingProvider, + capacityReservationProvider: capacityReservationProvider, + unavailableOfferings: unavailableOfferingsCache, + cache: offeringCache, + } +} + +func (p *DefaultProvider) InjectOfferings( + ctx context.Context, + instanceTypes []*cloudprovider.InstanceType, + nodeClass *v1.EC2NodeClass, + allZones sets.Set[string], +) []*cloudprovider.InstanceType { + subnetZones := lo.SliceToMap(nodeClass.Status.Subnets, func(s v1.Subnet) (string, string) { + return s.Zone, s.ZoneID + }) + var its []*cloudprovider.InstanceType + for _, it := range instanceTypes { + offerings := p.createOfferings( + ctx, + it, + nodeClass, + allZones, + subnetZones, + ) + + reservedAvailability := map[string]bool{} + for _, of := range offerings { + // If the capacity type is reserved we need to determine if any of the reserved offerings are available. Otherwise, + // we can update the availability metric directly. + if of.CapacityType() == karpv1.CapacityTypeReserved { + reservedAvailability[of.Zone()] = reservedAvailability[of.Zone()] || of.Available + } else { + InstanceTypeOfferingAvailable.Set(float64(lo.Ternary(of.Available, 1, 0)), map[string]string{ + instanceTypeLabel: it.Name, + capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), + zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), + }) + } + InstanceTypeOfferingPriceEstimate.Set(of.Price, map[string]string{ + instanceTypeLabel: it.Name, + capacityTypeLabel: of.Requirements.Get(karpv1.CapacityTypeLabelKey).Any(), + zoneLabel: of.Requirements.Get(corev1.LabelTopologyZone).Any(), + }) + } + for zone := range allZones { + InstanceTypeOfferingAvailable.Set(float64(lo.Ternary(reservedAvailability[zone], 1, 0)), map[string]string{ + instanceTypeLabel: it.Name, + capacityTypeLabel: karpv1.CapacityTypeReserved, + zoneLabel: zone, + }) + } + + // NOTE: By making this copy one level deep, we can modify the offerings without mutating the results from previous + // GetInstanceTypes calls. This should still be done with caution - it is currently done here in the provider, and + // once in the instance provider (filterReservedInstanceTypes) + its = append(its, &cloudprovider.InstanceType{ + Name: it.Name, + Requirements: it.Requirements, + Offerings: offerings, + Capacity: it.Capacity, + Overhead: it.Overhead, + }) + } + return its +} + +//nolint:gocyclo +func (p *DefaultProvider) createOfferings( + ctx context.Context, + it *cloudprovider.InstanceType, + nodeClass *v1.EC2NodeClass, + allZones sets.Set[string], + subnetZones map[string]string, +) cloudprovider.Offerings { + var offerings []*cloudprovider.Offering + itZones := sets.New(it.Requirements.Get(corev1.LabelTopologyZone).Values()...) + + if ofs, ok := p.cache.Get(p.cacheKeyFromInstanceType(it)); ok { + offerings = append(offerings, ofs.([]*cloudprovider.Offering)...) + } else { + var cachedOfferings []*cloudprovider.Offering + for zone := range allZones { + for _, capacityType := range it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values() { + // Reserved capacity types are constructed separately + if capacityType == karpv1.CapacityTypeReserved { + continue + } + isUnavailable := p.unavailableOfferings.IsUnavailable(ec2types.InstanceType(it.Name), zone, capacityType) + var price float64 + var hasPrice bool + switch capacityType { + case karpv1.CapacityTypeOnDemand: + price, hasPrice = p.pricingProvider.OnDemandPrice(ec2types.InstanceType(it.Name)) + case karpv1.CapacityTypeSpot: + price, hasPrice = p.pricingProvider.SpotPrice(ec2types.InstanceType(it.Name), zone) + default: + panic(fmt.Sprintf("invalid capacity type %q in requirements for instance type %q", capacityType, it.Name)) + } + offering := &cloudprovider.Offering{ + Requirements: scheduling.NewRequirements( + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zone), + scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpDoesNotExist), + ), + Price: price, + Available: !isUnavailable && hasPrice && itZones.Has(zone), + } + if id, ok := subnetZones[zone]; ok { + offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, id)) + } + cachedOfferings = append(cachedOfferings, offering) + offerings = append(cachedOfferings, offering) + } + } + p.cache.SetDefault(p.cacheKeyFromInstanceType(it), cachedOfferings) + offerings = append(offerings, cachedOfferings...) + } + if !options.FromContext(ctx).FeatureGates.ReservedCapacity { + return offerings + } + + for i := range nodeClass.Status.CapacityReservations { + if nodeClass.Status.CapacityReservations[i].InstanceType != it.Name { + continue + } + reservation := &nodeClass.Status.CapacityReservations[i] + price := 0.0 + if odPrice, ok := p.pricingProvider.OnDemandPrice(ec2types.InstanceType(it.Name)); ok { + // Divide the on-demand price by a sufficiently large constant. This allows us to treat the reservation as "free", + // while maintaining relative ordering for consolidation. If the pricing details are unavailable for whatever reason, + // still succeed to create the offering and leave the price at zero. This will break consolidation, but will allow + // users to utilize the instances they're already paying for. + price = odPrice / 10_000_000.0 + } + reservationCapacity := p.capacityReservationProvider.GetAvailableInstanceCount(reservation.ID) + offering := &cloudprovider.Offering{ + Requirements: scheduling.NewRequirements( + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeReserved), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, reservation.AvailabilityZone), + scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpIn, reservation.ID), + ), + Price: price, + Available: reservationCapacity != 0 && itZones.Has(reservation.AvailabilityZone), + ReservationCapacity: reservationCapacity, + } + if id, ok := subnetZones[reservation.AvailabilityZone]; ok { + offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, id)) + } + offerings = append(offerings, offering) + } + return offerings +} + +func (p *DefaultProvider) cacheKeyFromInstanceType(it *cloudprovider.InstanceType) string { + zonesHash, _ := hashstructure.Hash( + it.Requirements.Get(corev1.LabelTopologyZone).Values(), + hashstructure.FormatV2, + &hashstructure.HashOptions{SlicesAsSets: true}, + ) + capacityTypesHash, _ := hashstructure.Hash( + it.Requirements.Get(karpv1.CapacityTypeLabelKey).Values(), + hashstructure.FormatV2, + &hashstructure.HashOptions{SlicesAsSets: true}, + ) + return fmt.Sprintf( + "%s-%016x-%016x-%d", + it.Name, + zonesHash, + capacityTypesHash, + p.unavailableOfferings.SeqNum, + ) +} diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 3c5fb0992ce5..8f6359cd64be 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -83,12 +83,12 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) fakeClock = &clock.FakeClock{} cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster, fakeClock) }) @@ -98,7 +98,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) cluster.Reset() awsEnv.Reset() @@ -266,7 +266,10 @@ var _ = Describe("InstanceTypeProvider", func() { } // Ensure that we're exercising all well known labels - Expect(lo.Keys(nodeSelector)).To(ContainElements(append(karpv1.WellKnownLabels.UnsortedList(), lo.Keys(karpv1.NormalizedLabels)...))) + Expect(lo.Keys(nodeSelector)).To(ContainElements(append(karpv1.WellKnownLabels.Difference(sets.New( + // TODO: add back to test with a preconfigured reserved instance type + v1.LabelCapacityReservationID, + )).UnsortedList(), lo.Keys(karpv1.NormalizedLabels)...))) var pods []*corev1.Pod for key, value := range nodeSelector { @@ -317,10 +320,11 @@ var _ = Describe("InstanceTypeProvider", func() { "topology.ebs.csi.aws.com/zone": "test-zone-1a", } - // Ensure that we're exercising all well known labels except for accelerator labels + // Ensure that we're exercising all well known labels except for the accelerator and capacity reservation labels Expect(lo.Keys(nodeSelector)).To(ContainElements( append( karpv1.WellKnownLabels.Difference(sets.New( + v1.LabelCapacityReservationID, v1.LabelInstanceAcceleratorCount, v1.LabelInstanceAcceleratorName, v1.LabelInstanceAcceleratorManufacturer, @@ -369,8 +373,9 @@ var _ = Describe("InstanceTypeProvider", func() { "topology.ebs.csi.aws.com/zone": "test-zone-1a", } - // Ensure that we're exercising all well known labels except for gpu labels and nvme + // Ensure that we're exercising all well known labels except for the gpu, nvme and capacity reservation id labels expectedLabels := append(karpv1.WellKnownLabels.Difference(sets.New( + v1.LabelCapacityReservationID, v1.LabelInstanceGPUCount, v1.LabelInstanceGPUName, v1.LabelInstanceGPUManufacturer, @@ -952,6 +957,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -974,6 +981,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, windowsNodeClass.Spec.BlockDeviceMappings, windowsNodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1093,6 +1102,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1119,6 +1130,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1141,6 +1154,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1172,6 +1187,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1210,6 +1227,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1238,6 +1257,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1266,6 +1287,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1294,6 +1317,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1324,6 +1349,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1355,6 +1382,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1383,6 +1412,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1415,6 +1446,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1434,6 +1467,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1467,6 +1502,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1498,6 +1535,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1529,6 +1568,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1552,6 +1593,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1569,6 +1612,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1594,6 +1639,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1618,6 +1665,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1647,6 +1696,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, t3Large, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1681,6 +1732,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, t3Large, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1711,6 +1764,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1736,6 +1791,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1761,6 +1818,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1787,6 +1846,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1804,6 +1865,8 @@ var _ = Describe("InstanceTypeProvider", func() { it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -2385,7 +2448,7 @@ var _ = Describe("InstanceTypeProvider", func() { nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ { DeviceName: lo.ToPtr("/dev/xvda"), - EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(10, resource.Giga)}, + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(20, resource.Giga)}, RootVolume: false, }, } @@ -2394,32 +2457,39 @@ var _ = Describe("InstanceTypeProvider", func() { {Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}, {Spec: v1.EC2NodeClassSpec{AMISelectorTerms: []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}}}}, { - Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ - { - DeviceName: lo.ToPtr("/dev/sda1"), - EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(10, resource.Giga)}, - RootVolume: true, + Spec: v1.EC2NodeClassSpec{ + BlockDeviceMappings: []*v1.BlockDeviceMapping{ + { + DeviceName: lo.ToPtr("/dev/xvda"), + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(20, resource.Giga)}, + RootVolume: false, + }, + { + DeviceName: lo.ToPtr("/dev/sda1"), + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(10, resource.Giga)}, + RootVolume: true, + }, }, }, - }}, + }, { - Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ - { + Spec: v1.EC2NodeClassSpec{ + BlockDeviceMappings: []*v1.BlockDeviceMapping{{ DeviceName: lo.ToPtr("/dev/xvda"), - EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(10, resource.Giga)}, - RootVolume: true, - }, + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(15, resource.Giga)}, + RootVolume: false, + }}, }, - }}, + }, { - Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ - { - DeviceName: lo.ToPtr("/dev/xvda"), - EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(20, resource.Giga)}, - RootVolume: false, - }, + Spec: v1.EC2NodeClassSpec{ + BlockDeviceMappings: []*v1.BlockDeviceMapping{{ + DeviceName: lo.ToPtr("/dev/yvda"), + EBS: &v1.BlockDevice{VolumeSize: resource.NewScaledQuantity(25, resource.Giga)}, + RootVolume: true, + }}, }, - }}, + }, } var instanceTypeResult [][]*corecloudprovider.InstanceType ExpectApplied(ctx, env.Client, nodeClass) @@ -2439,12 +2509,12 @@ var _ = Describe("InstanceTypeProvider", func() { _, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) // We are making sure to pull from the cache - instanetypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass) + its, err := awsEnv.InstanceTypesProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) - sort.Slice(instanetypes, func(x int, y int) bool { - return instanetypes[x].Name < instanetypes[y].Name + sort.Slice(its, func(x int, y int) bool { + return its[x].Name < its[y].Name }) - instanceTypeResult = append(instanceTypeResult, instanetypes) + instanceTypeResult = append(instanceTypeResult, its) } // Based on the nodeclass configuration, we expect to have 5 unique set of instance types @@ -2489,6 +2559,7 @@ var _ = Describe("InstanceTypeProvider", func() { }) func uniqueInstanceTypeList(instanceTypesLists [][]*corecloudprovider.InstanceType) { + GinkgoHelper() for x := range instanceTypesLists { for y := range instanceTypesLists { if x == y { diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index 8f178418e690..7e9178981cfb 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -29,14 +29,11 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" - "sigs.k8s.io/controller-runtime/pkg/log" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" - awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" - "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" @@ -62,20 +59,16 @@ type Resolver interface { // CacheKey tells the InstanceType cache if something changes about the InstanceTypes or Offerings based on the NodeClass. CacheKey(nodeClass *v1.EC2NodeClass) string // Resolve generates an InstanceType based on raw InstanceTypeInfo and NodeClass setting data - Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zoneData []ZoneData, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType + Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zones []string, zonesToZoneIDs map[string]string, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType } type DefaultResolver struct { - region string - pricingProvider pricing.Provider - unavailableOfferings *awscache.UnavailableOfferings + region string } -func NewDefaultResolver(region string, pricingProvider pricing.Provider, unavailableOfferingsCache *awscache.UnavailableOfferings) *DefaultResolver { +func NewDefaultResolver(region string) *DefaultResolver { return &DefaultResolver{ - region: region, - pricingProvider: pricingProvider, - unavailableOfferings: unavailableOfferingsCache, + region: region, } } @@ -86,16 +79,18 @@ func (d *DefaultResolver) CacheKey(nodeClass *v1.EC2NodeClass) string { } kcHash, _ := hashstructure.Hash(kc, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) blockDeviceMappingsHash, _ := hashstructure.Hash(nodeClass.Spec.BlockDeviceMappings, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) - return fmt.Sprintf("%016x-%016x-%s-%s-%d", + capacityReservationHash, _ := hashstructure.Hash(nodeClass.Status.CapacityReservations, hashstructure.FormatV2, nil) + return fmt.Sprintf( + "%016x-%016x-%016x-%s-%s", kcHash, blockDeviceMappingsHash, + capacityReservationHash, lo.FromPtr((*string)(nodeClass.Spec.InstanceStorePolicy)), nodeClass.AMIFamily(), - d.unavailableOfferings.SeqNum, ) } -func (d *DefaultResolver) Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zoneData []ZoneData, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType { +func (d *DefaultResolver) Resolve(ctx context.Context, info ec2types.InstanceTypeInfo, zones []string, zonesToZoneIDs map[string]string, nodeClass *v1.EC2NodeClass) *cloudprovider.InstanceType { // !!! Important !!! // Any changes to the values passed into the NewInstanceType method will require making updates to the cache key // so that Karpenter is able to cache the set of InstanceTypes based on values that alter the set of instance types @@ -104,68 +99,48 @@ func (d *DefaultResolver) Resolve(ctx context.Context, info ec2types.InstanceTyp if nodeClass.Spec.Kubelet != nil { kc = nodeClass.Spec.Kubelet } - return NewInstanceType(ctx, info, d.region, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, kc.MaxPods, kc.PodsPerCore, kc.KubeReserved, - kc.SystemReserved, kc.EvictionHard, kc.EvictionSoft, nodeClass.AMIFamily(), d.createOfferings(ctx, info, zoneData)) -} - -// createOfferings creates a set of mutually exclusive offerings for a given instance type. This provider maintains an -// invariant that each offering is mutually exclusive. Specifically, there is an offering for each permutation of zone -// and capacity type. ZoneID is also injected into the offering requirements, when available, but there is a 1-1 -// mapping between zone and zoneID so this does not change the number of offerings. -// -// Each requirement on the offering is guaranteed to have a single value. To get the value for a requirement on an -// offering, you can do the following thanks to this invariant: -// -// offering.Requirements.Get(v1.TopologyLabelZone).Any() -func (d *DefaultResolver) createOfferings(ctx context.Context, instanceType ec2types.InstanceTypeInfo, zoneData []ZoneData) []cloudprovider.Offering { - var offerings []cloudprovider.Offering - for _, zone := range zoneData { - // while usage classes should be a distinct set, there's no guarantee of that - for capacityType := range sets.New((instanceType.SupportedUsageClasses)...) { - // exclude any offerings that have recently seen an insufficient capacity error from EC2 - isUnavailable := d.unavailableOfferings.IsUnavailable(instanceType.InstanceType, zone.Name, string(capacityType)) - var price float64 - var ok bool - switch capacityType { - case ec2types.UsageClassTypeSpot: - price, ok = d.pricingProvider.SpotPrice(instanceType.InstanceType, zone.Name) - case ec2types.UsageClassTypeOnDemand: - price, ok = d.pricingProvider.OnDemandPrice(instanceType.InstanceType) - case "capacity-block": - // ignore since karpenter doesn't support it yet, but do not log an unknown capacity type error - continue - default: - log.FromContext(ctx).WithValues("capacity-type", capacityType, "instance-type", instanceType.InstanceType).Error(fmt.Errorf("received unknown capacity type"), "failed parsing offering") - continue - } - available := !isUnavailable && ok && zone.Available - offering := cloudprovider.Offering{ - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, string(capacityType)), - scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zone.Name), - ), - Price: price, - Available: available, - } - if zone.ID != "" { - offering.Requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, zone.ID)) - } - offerings = append(offerings, offering) - } - } - return offerings + return NewInstanceType( + ctx, + info, + d.region, + zones, + zonesToZoneIDs, + nodeClass.Spec.BlockDeviceMappings, + nodeClass.Spec.InstanceStorePolicy, + kc.MaxPods, + kc.PodsPerCore, + kc.KubeReserved, + kc.SystemReserved, + kc.EvictionHard, + kc.EvictionSoft, + nodeClass.AMIFamily(), + lo.Filter(nodeClass.Status.CapacityReservations, func(cr v1.CapacityReservation, _ int) bool { + return cr.InstanceType == string(info.InstanceType) + }), + ) } -func NewInstanceType(ctx context.Context, info ec2types.InstanceTypeInfo, region string, - blockDeviceMappings []*v1.BlockDeviceMapping, instanceStorePolicy *v1.InstanceStorePolicy, maxPods *int32, podsPerCore *int32, - kubeReserved map[string]string, systemReserved map[string]string, evictionHard map[string]string, evictionSoft map[string]string, - amiFamilyType string, offerings cloudprovider.Offerings) *cloudprovider.InstanceType { - +func NewInstanceType( + ctx context.Context, + info ec2types.InstanceTypeInfo, + region string, + offeringZones []string, + subnetZonesToZoneIDs map[string]string, + blockDeviceMappings []*v1.BlockDeviceMapping, + instanceStorePolicy *v1.InstanceStorePolicy, + maxPods *int32, + podsPerCore *int32, + kubeReserved map[string]string, + systemReserved map[string]string, + evictionHard map[string]string, + evictionSoft map[string]string, + amiFamilyType string, + capacityReservations []v1.CapacityReservation, +) *cloudprovider.InstanceType { amiFamily := amifamily.GetAMIFamily(amiFamilyType, &amifamily.Options{}) it := &cloudprovider.InstanceType{ Name: string(info.InstanceType), - Requirements: computeRequirements(info, offerings, region, amiFamily), - Offerings: offerings, + Requirements: computeRequirements(info, region, offeringZones, subnetZonesToZoneIDs, amiFamily, capacityReservations), Capacity: computeCapacity(ctx, info, amiFamily, blockDeviceMappings, instanceStorePolicy, maxPods, podsPerCore), Overhead: &cloudprovider.InstanceTypeOverhead{ KubeReserved: kubeReservedResources(cpu(info), pods(ctx, info, amiFamily, maxPods, podsPerCore), ENILimitedPods(ctx, info), amiFamily, kubeReserved), @@ -180,21 +155,37 @@ func NewInstanceType(ctx context.Context, info ec2types.InstanceTypeInfo, region } //nolint:gocyclo -func computeRequirements(info ec2types.InstanceTypeInfo, offerings cloudprovider.Offerings, region string, amiFamily amifamily.AMIFamily) scheduling.Requirements { +func computeRequirements( + info ec2types.InstanceTypeInfo, + region string, + offeringZones []string, + subnetZonesToZoneIDs map[string]string, + amiFamily amifamily.AMIFamily, + capacityReservations []v1.CapacityReservation, +) scheduling.Requirements { + capacityTypes := lo.FilterMap(info.SupportedUsageClasses, func(uc ec2types.UsageClassType, _ int) (string, bool) { + if uc != ec2types.UsageClassTypeOnDemand && uc != ec2types.UsageClassTypeSpot { + return "", false + } + return string(uc), true + }) + if len(capacityReservations) != 0 { + capacityTypes = append(capacityTypes, karpv1.CapacityTypeReserved) + } + + // Available zones is the set intersection between zones where the instance type is available, and zones which are + // available via the provided EC2NodeClass. + availableZones := sets.New(offeringZones...).Intersection(sets.New(lo.Keys(subnetZonesToZoneIDs)...)) requirements := scheduling.NewRequirements( // Well Known Upstream scheduling.NewRequirement(corev1.LabelInstanceTypeStable, corev1.NodeSelectorOpIn, string(info.InstanceType)), scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, getArchitecture(info)), scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, getOS(info, amiFamily)...), - scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.Requirements.Get(corev1.LabelTopologyZone).Any() - })...), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, availableZones.UnsortedList()...), scheduling.NewRequirement(corev1.LabelTopologyRegion, corev1.NodeSelectorOpIn, region), scheduling.NewRequirement(corev1.LabelWindowsBuild, corev1.NodeSelectorOpDoesNotExist), // Well Known to Karpenter - scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { - return o.Requirements.Get(karpv1.CapacityTypeLabelKey).Any() - })...), + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityTypes...), // Well Known to AWS scheduling.NewRequirement(v1.LabelInstanceCPU, corev1.NodeSelectorOpIn, fmt.Sprint(lo.FromPtr(info.VCpuInfo.DefaultVCpus))), scheduling.NewRequirement(v1.LabelInstanceCPUManufacturer, corev1.NodeSelectorOpDoesNotExist), @@ -219,12 +210,19 @@ func computeRequirements(info ec2types.InstanceTypeInfo, offerings cloudprovider ) // Only add zone-id label when available in offerings. It may not be available if a user has upgraded from a // previous version of Karpenter w/o zone-id support and the nodeclass subnet status has not yet updated. - if zoneIDs := lo.FilterMap(offerings.Available(), func(o cloudprovider.Offering, _ int) (string, bool) { - zoneID := o.Requirements.Get(v1.LabelTopologyZoneID).Any() - return zoneID, zoneID != "" + if zoneIDs := lo.FilterMap(availableZones.UnsortedList(), func(zone string, _ int) (string, bool) { + id, ok := subnetZonesToZoneIDs[zone] + return id, ok }); len(zoneIDs) != 0 { requirements.Add(scheduling.NewRequirement(v1.LabelTopologyZoneID, corev1.NodeSelectorOpIn, zoneIDs...)) } + if len(capacityReservations) != 0 { + requirements.Add(scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpIn, lo.Map(capacityReservations, func(cr v1.CapacityReservation, _ int) string { + return cr.ID + })...)) + } else { + requirements.Add(scheduling.NewRequirement(cloudprovider.ReservationIDLabel, corev1.NodeSelectorOpDoesNotExist)) + } // Instance Type Labels instanceFamilyParts := instanceTypeScheme.FindStringSubmatch(string(info.InstanceType)) if len(instanceFamilyParts) == 4 { diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 4e1fe5b9a653..103173b7c72a 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -38,6 +38,8 @@ import ( "k8s.io/apimachinery/pkg/api/resource" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + karpoptions "sigs.k8s.io/karpenter/pkg/operator/options" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" "github.com/aws/karpenter-provider-aws/pkg/operator/options" @@ -60,9 +62,10 @@ type Provider interface { ResolveClusterCIDR(context.Context) error } type LaunchTemplate struct { - Name string - InstanceTypes []*cloudprovider.InstanceType - ImageID string + Name string + InstanceTypes []*cloudprovider.InstanceType + ImageID string + CapacityReservationID string } type DefaultProvider struct { @@ -109,8 +112,14 @@ func NewDefaultProvider(ctx context.Context, cache *cache.Cache, ec2api sdk.EC2A }() return l } -func (p *DefaultProvider) EnsureAll(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, - instanceTypes []*cloudprovider.InstanceType, capacityType string, tags map[string]string) ([]*LaunchTemplate, error) { +func (p *DefaultProvider) EnsureAll( + ctx context.Context, + nodeClass *v1.EC2NodeClass, + nodeClaim *karpv1.NodeClaim, + instanceTypes []*cloudprovider.InstanceType, + capacityType string, + tags map[string]string, +) ([]*LaunchTemplate, error) { p.Lock() defer p.Unlock() options, err := p.createAMIOptions(ctx, nodeClass, lo.Assign(nodeClaim.Labels, map[string]string{karpv1.CapacityTypeLabelKey: capacityType}), tags) @@ -128,7 +137,12 @@ func (p *DefaultProvider) EnsureAll(ctx context.Context, nodeClass *v1.EC2NodeCl if err != nil { return nil, err } - launchTemplates = append(launchTemplates, &LaunchTemplate{Name: *ec2LaunchTemplate.LaunchTemplateName, InstanceTypes: resolvedLaunchTemplate.InstanceTypes, ImageID: resolvedLaunchTemplate.AMIID}) + launchTemplates = append(launchTemplates, &LaunchTemplate{ + Name: *ec2LaunchTemplate.LaunchTemplateName, + InstanceTypes: resolvedLaunchTemplate.InstanceTypes, + ImageID: resolvedLaunchTemplate.AMIID, + CapacityReservationID: resolvedLaunchTemplate.CapacityReservationID, + }) } return launchTemplates, nil } @@ -219,7 +233,7 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami if err != nil { return ec2types.LaunchTemplate{}, err } - createLaunchTemplateInput := GetCreateLaunchTemplateInput(options, p.ClusterIPFamily, userData) + createLaunchTemplateInput := GetCreateLaunchTemplateInput(ctx, options, p.ClusterIPFamily, userData) output, err := p.ec2api.CreateLaunchTemplate(ctx, createLaunchTemplateInput) if err != nil { return ec2types.LaunchTemplate{}, err @@ -229,7 +243,12 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami } // you need UserData, AmiID, tags, blockdevicemappings, instance profile, -func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFamily corev1.IPFamily, userData string) *ec2.CreateLaunchTemplateInput { +func GetCreateLaunchTemplateInput( + ctx context.Context, + options *amifamily.LaunchTemplate, + ClusterIPFamily corev1.IPFamily, + userData string, +) *ec2.CreateLaunchTemplateInput { launchTemplateDataTags := []ec2types.LaunchTemplateTagSpecificationRequest{ {ResourceType: ec2types.ResourceTypeNetworkInterface, Tags: utils.MergeTags(options.Tags)}, } @@ -237,7 +256,7 @@ func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFa launchTemplateDataTags = append(launchTemplateDataTags, ec2types.LaunchTemplateTagSpecificationRequest{ResourceType: ec2types.ResourceTypeSpotInstancesRequest, Tags: utils.MergeTags(options.Tags)}) } networkInterfaces := generateNetworkInterfaces(options, ClusterIPFamily) - return &ec2.CreateLaunchTemplateInput{ + lt := &ec2.CreateLaunchTemplateInput{ LaunchTemplateName: aws.String(LaunchTemplateName(options)), LaunchTemplateData: &ec2types.RequestLaunchTemplateData{ BlockDeviceMappings: blockDeviceMappings(options.BlockDeviceMappings), @@ -275,6 +294,25 @@ func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFa }, }, } + // Gate this specifically since the update to CapacityReservationPreference will opt od / spot launches out of open + // ODCRs, which is a breaking change from the pre-native ODCR support behavior. + if karpoptions.FromContext(ctx).FeatureGates.ReservedCapacity { + lt.LaunchTemplateData.CapacityReservationSpecification = &ec2types.LaunchTemplateCapacityReservationSpecificationRequest{ + CapacityReservationPreference: lo.Ternary( + options.CapacityType == karpv1.CapacityTypeReserved, + ec2types.CapacityReservationPreferenceCapacityReservationsOnly, + ec2types.CapacityReservationPreferenceNone, + ), + CapacityReservationTarget: lo.Ternary( + options.CapacityType == karpv1.CapacityTypeReserved, + &ec2types.CapacityReservationTarget{ + CapacityReservationId: &options.CapacityReservationID, + }, + nil, + ), + } + } + return lt } // generateNetworkInterfaces generates network interfaces for the launch template. diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index b31b02177039..ccfadbb368c1 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -90,8 +90,8 @@ func TestAWS(t *testing.T) { } var _ = BeforeSuite(func() { - env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + env = coretest.NewEnvironment(coretest.WithCRDs(test.DisableCapacityReservationIDValidation(apis.CRDs)...), coretest.WithCRDs(v1alpha1.CRDs...)) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -99,7 +99,7 @@ var _ = BeforeSuite(func() { fakeClock = &clock.FakeClock{} recorder = events.NewRecorder(&record.FakeRecorder{}) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder, - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.CapacityReservationProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster, fakeClock) }) @@ -110,7 +110,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) cluster.Reset() awsEnv.Reset() @@ -1016,6 +1016,8 @@ var _ = Describe("LaunchTemplate Provider", func() { it := instancetype.NewInstanceType(ctx, info, "", + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1067,6 +1069,8 @@ var _ = Describe("LaunchTemplate Provider", func() { it := instancetype.NewInstanceType(ctx, info, "", + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -1092,6 +1096,8 @@ var _ = Describe("LaunchTemplate Provider", func() { it := instancetype.NewInstanceType(ctx, info, "", + nil, + nil, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, nodeClass.Spec.Kubelet.MaxPods, @@ -2036,7 +2042,7 @@ essential = true nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyCustom) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} ExpectApplied(ctx, env.Client, nodeClass) - controller := nodeclass.NewController(env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.EC2API) + controller := nodeclass.NewController(ctx, awsEnv.Clock, env.Client, recorder, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{ { @@ -2294,6 +2300,129 @@ essential = true ) }) }) + It("should generate a unique launch template per capacity reservation", func() { + crs := []ec2types.CapacityReservation{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-1"), + AvailableInstanceCount: lo.ToPtr[int32](10), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1a-2"), + AvailableInstanceCount: lo.ToPtr[int32](15), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + InstanceType: lo.ToPtr("m5.large"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.large-1b-1"), + AvailableInstanceCount: lo.ToPtr[int32](10), + State: ec2types.CapacityReservationStateActive, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + InstanceType: lo.ToPtr("m5.xlarge"), + OwnerId: lo.ToPtr("012345678901"), + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + CapacityReservationId: lo.ToPtr("cr-m5.xlarge-1b-1"), + AvailableInstanceCount: lo.ToPtr[int32](15), + State: ec2types.CapacityReservationStateActive, + }, + } + awsEnv.EC2API.DescribeCapacityReservationsOutput.Set(&ec2.DescribeCapacityReservationsOutput{ + CapacityReservations: crs, + }) + for _, cr := range crs { + nodeClass.Status.CapacityReservations = append(nodeClass.Status.CapacityReservations, lo.Must(nodeclass.CapacityReservationFromEC2(&cr))) + awsEnv.CapacityReservationProvider.SetAvailableInstanceCount(*cr.CapacityReservationId, int(*cr.AvailableInstanceCount)) + } + + nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }}} + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, pod, nodePool, nodeClass) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + + launchTemplates := map[string]*ec2.CreateLaunchTemplateInput{} + for awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Len() != 0 { + lt := awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Pop() + launchTemplates[*lt.LaunchTemplateName] = lt + } + // We should have created 3 launch templates, rather than 4 since we only create 1 launch template per capacity pool + Expect(launchTemplates).To(HaveLen(3)) + reservationIDs := lo.Uniq(lo.Map(lo.Values(launchTemplates), func(input *ec2.CreateLaunchTemplateInput, _ int) string { + return *input.LaunchTemplateData.CapacityReservationSpecification.CapacityReservationTarget.CapacityReservationId + })) + Expect(reservationIDs).To(HaveLen(3)) + Expect(reservationIDs).To(ConsistOf( + // We don't include the m5.large offering in 1a because we select the zonal offering with the highest capacity + "cr-m5.large-1a-2", + "cr-m5.large-1b-1", + "cr-m5.xlarge-1b-1", + )) + for _, input := range launchTemplates { + Expect(input.LaunchTemplateData.CapacityReservationSpecification.CapacityReservationPreference).To(Equal(ec2types.CapacityReservationPreferenceCapacityReservationsOnly)) + } + + // Validate that we generate one override per launch template, and the override is for the instance pool associated + // with the capacity reservation. + Expect(awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Len()).ToNot(Equal(0)) + createFleetInput := awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Pop() + Expect(createFleetInput.LaunchTemplateConfigs).To(HaveLen(3)) + for _, ltc := range createFleetInput.LaunchTemplateConfigs { + Expect(ltc.Overrides).To(HaveLen(1)) + Expect(launchTemplates).To(HaveKey(*ltc.LaunchTemplateSpecification.LaunchTemplateName)) + lt := launchTemplates[*ltc.LaunchTemplateSpecification.LaunchTemplateName] + cr, ok := lo.Find(crs, func(cr ec2types.CapacityReservation) bool { + return *cr.CapacityReservationId == *lt.LaunchTemplateData.CapacityReservationSpecification.CapacityReservationTarget.CapacityReservationId + }) + Expect(ok).To(BeTrue()) + Expect(*ltc.Overrides[0].AvailabilityZone).To(Equal(*cr.AvailabilityZone)) + Expect(ltc.Overrides[0].InstanceType).To(Equal(ec2types.InstanceType(*cr.InstanceType))) + } + }) + DescribeTable( + "should set the capacity reservation specification according to the capacity reservation feature flag", + func(enabled bool) { + coreoptions.FromContext(ctx).FeatureGates.ReservedCapacity = enabled + + pod := coretest.UnschedulablePod() + ExpectApplied(ctx, env.Client, pod, nodePool, nodeClass) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + + var launchTemplates []*ec2.CreateLaunchTemplateInput + for awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Len() != 0 { + launchTemplates = append(launchTemplates, awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Pop()) + } + for _, input := range launchTemplates { + crs := input.LaunchTemplateData.CapacityReservationSpecification + if !enabled { + Expect(crs).To(BeNil()) + } else { + Expect(*crs).To(Equal(ec2types.LaunchTemplateCapacityReservationSpecificationRequest{ + CapacityReservationPreference: ec2types.CapacityReservationPreferenceNone, + })) + } + } + }, + Entry("enabled", true), + Entry("disabled", false), + ) }) // ExpectTags verifies that the expected tags are a subset of the tags found diff --git a/pkg/providers/securitygroup/suite_test.go b/pkg/providers/securitygroup/suite_test.go index 629ebd4ade56..fee91aca534e 100644 --- a/pkg/providers/securitygroup/suite_test.go +++ b/pkg/providers/securitygroup/suite_test.go @@ -55,7 +55,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -67,7 +67,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) nodeClass = test.EC2NodeClass(v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index 7c16a485a55d..6c142dddd912 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -53,7 +53,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) @@ -65,7 +65,7 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) nodeClass = test.EC2NodeClass(v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ diff --git a/pkg/providers/version/suite_test.go b/pkg/providers/version/suite_test.go index 29910f8e04e0..56bbc5086bc5 100644 --- a/pkg/providers/version/suite_test.go +++ b/pkg/providers/version/suite_test.go @@ -23,6 +23,8 @@ import ( coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" coretest "sigs.k8s.io/karpenter/pkg/test" + "github.com/samber/lo" + "github.com/aws/karpenter-provider-aws/pkg/apis" "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/test" @@ -52,7 +54,7 @@ func TestAWS(t *testing.T) { var _ = BeforeSuite(func() { env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...)) - ctx = coreoptions.ToContext(ctx, coretest.Options()) + ctx = coreoptions.ToContext(ctx, coretest.Options(coretest.OptionsFields{FeatureGates: coretest.FeatureGates{ReservedCapacity: lo.ToPtr(true)}})) ctx = options.ToContext(ctx, test.Options()) ctx, stop = context.WithCancel(ctx) awsEnv = test.NewEnvironment(ctx, env) diff --git a/pkg/test/environment.go b/pkg/test/environment.go index a03b6081fb33..a5db8743083c 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -32,6 +32,7 @@ import ( awscache "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/fake" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" + "github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation" "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" @@ -65,30 +66,34 @@ type Environment struct { PricingAPI *fake.PricingAPI // Cache - EC2Cache *cache.Cache - InstanceTypeCache *cache.Cache - UnavailableOfferingsCache *awscache.UnavailableOfferings - LaunchTemplateCache *cache.Cache - SubnetCache *cache.Cache - AvailableIPAdressCache *cache.Cache - AssociatePublicIPAddressCache *cache.Cache - SecurityGroupCache *cache.Cache - InstanceProfileCache *cache.Cache - SSMCache *cache.Cache - DiscoveredCapacityCache *cache.Cache + EC2Cache *cache.Cache + InstanceTypeCache *cache.Cache + OfferingCache *cache.Cache + UnavailableOfferingsCache *awscache.UnavailableOfferings + LaunchTemplateCache *cache.Cache + SubnetCache *cache.Cache + AvailableIPAdressCache *cache.Cache + AssociatePublicIPAddressCache *cache.Cache + SecurityGroupCache *cache.Cache + InstanceProfileCache *cache.Cache + SSMCache *cache.Cache + DiscoveredCapacityCache *cache.Cache + CapacityReservationCache *cache.Cache + CapacityReservationAvailabilityCache *cache.Cache // Providers - InstanceTypesResolver *instancetype.DefaultResolver - InstanceTypesProvider *instancetype.DefaultProvider - InstanceProvider *instance.DefaultProvider - SubnetProvider *subnet.DefaultProvider - SecurityGroupProvider *securitygroup.DefaultProvider - InstanceProfileProvider *instanceprofile.DefaultProvider - PricingProvider *pricing.DefaultProvider - AMIProvider *amifamily.DefaultProvider - AMIResolver *amifamily.DefaultResolver - VersionProvider *version.DefaultProvider - LaunchTemplateProvider *launchtemplate.DefaultProvider + CapacityReservationProvider *capacityreservation.DefaultProvider + InstanceTypesResolver *instancetype.DefaultResolver + InstanceTypesProvider *instancetype.DefaultProvider + InstanceProvider *instance.DefaultProvider + SubnetProvider *subnet.DefaultProvider + SecurityGroupProvider *securitygroup.DefaultProvider + InstanceProfileProvider *instanceprofile.DefaultProvider + PricingProvider *pricing.DefaultProvider + AMIProvider *amifamily.DefaultProvider + AMIResolver *amifamily.DefaultResolver + VersionProvider *version.DefaultProvider + LaunchTemplateProvider *launchtemplate.DefaultProvider } func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment { @@ -104,6 +109,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment // cache ec2Cache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) instanceTypeCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) + offeringCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) discoveredCapacityCache := cache.New(awscache.DiscoveredCapacityCacheTTL, awscache.DefaultCleanupInterval) unavailableOfferingsCache := awscache.NewUnavailableOfferings() launchTemplateCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) @@ -113,6 +119,8 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment securityGroupCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) instanceProfileCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) ssmCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) + capacityReservationCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) + capacityReservationAvailabilityCache := cache.New(24*time.Hour, awscache.DefaultCleanupInterval) fakePricingAPI := &fake.PricingAPI{} // Providers @@ -128,30 +136,31 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment ssmProvider := ssmp.NewDefaultProvider(ssmapi, ssmCache) amiProvider := amifamily.NewDefaultProvider(clock, versionProvider, ssmProvider, ec2api, ec2Cache) amiResolver := amifamily.NewDefaultResolver() - instanceTypesResolver := instancetype.NewDefaultResolver(fake.DefaultRegion, pricingProvider, unavailableOfferingsCache) - instanceTypesProvider := instancetype.NewDefaultProvider(instanceTypeCache, discoveredCapacityCache, ec2api, subnetProvider, instanceTypesResolver) - launchTemplateProvider := - launchtemplate.NewDefaultProvider( - ctx, - launchTemplateCache, - ec2api, - eksapi, - amiResolver, - securityGroupProvider, - subnetProvider, - lo.ToPtr("ca-bundle"), - make(chan struct{}), - net.ParseIP("10.0.100.10"), - "https://test-cluster", - ) - instanceProvider := - instance.NewDefaultProvider(ctx, - "", - ec2api, - unavailableOfferingsCache, - subnetProvider, - launchTemplateProvider, - ) + instanceTypesResolver := instancetype.NewDefaultResolver(fake.DefaultRegion) + capacityReservationProvider := capacityreservation.NewProvider(ec2api, clock, capacityReservationCache, capacityReservationAvailabilityCache) + instanceTypesProvider := instancetype.NewDefaultProvider(instanceTypeCache, offeringCache, discoveredCapacityCache, ec2api, subnetProvider, pricingProvider, capacityReservationProvider, unavailableOfferingsCache, instanceTypesResolver) + launchTemplateProvider := launchtemplate.NewDefaultProvider( + ctx, + launchTemplateCache, + ec2api, + eksapi, + amiResolver, + securityGroupProvider, + subnetProvider, + lo.ToPtr("ca-bundle"), + make(chan struct{}), + net.ParseIP("10.0.100.10"), + "https://test-cluster", + ) + instanceProvider := instance.NewDefaultProvider( + ctx, + "", + ec2api, + unavailableOfferingsCache, + subnetProvider, + launchTemplateProvider, + capacityReservationProvider, + ) return &Environment{ Clock: clock, @@ -162,29 +171,34 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment IAMAPI: iamapi, PricingAPI: fakePricingAPI, - EC2Cache: ec2Cache, - InstanceTypeCache: instanceTypeCache, - LaunchTemplateCache: launchTemplateCache, - SubnetCache: subnetCache, - AvailableIPAdressCache: availableIPAdressCache, - AssociatePublicIPAddressCache: associatePublicIPAddressCache, - SecurityGroupCache: securityGroupCache, - InstanceProfileCache: instanceProfileCache, - UnavailableOfferingsCache: unavailableOfferingsCache, - SSMCache: ssmCache, - DiscoveredCapacityCache: discoveredCapacityCache, - - InstanceTypesResolver: instanceTypesResolver, - InstanceTypesProvider: instanceTypesProvider, - InstanceProvider: instanceProvider, - SubnetProvider: subnetProvider, - SecurityGroupProvider: securityGroupProvider, - LaunchTemplateProvider: launchTemplateProvider, - InstanceProfileProvider: instanceProfileProvider, - PricingProvider: pricingProvider, - AMIProvider: amiProvider, - AMIResolver: amiResolver, - VersionProvider: versionProvider, + EC2Cache: ec2Cache, + InstanceTypeCache: instanceTypeCache, + OfferingCache: offeringCache, + + LaunchTemplateCache: launchTemplateCache, + SubnetCache: subnetCache, + AvailableIPAdressCache: availableIPAdressCache, + AssociatePublicIPAddressCache: associatePublicIPAddressCache, + SecurityGroupCache: securityGroupCache, + InstanceProfileCache: instanceProfileCache, + UnavailableOfferingsCache: unavailableOfferingsCache, + SSMCache: ssmCache, + DiscoveredCapacityCache: discoveredCapacityCache, + CapacityReservationCache: capacityReservationCache, + CapacityReservationAvailabilityCache: capacityReservationAvailabilityCache, + + CapacityReservationProvider: capacityReservationProvider, + InstanceTypesResolver: instanceTypesResolver, + InstanceTypesProvider: instanceTypesProvider, + InstanceProvider: instanceProvider, + SubnetProvider: subnetProvider, + SecurityGroupProvider: securityGroupProvider, + LaunchTemplateProvider: launchTemplateProvider, + InstanceProfileProvider: instanceProfileProvider, + PricingProvider: pricingProvider, + AMIProvider: amiProvider, + AMIResolver: amiResolver, + VersionProvider: versionProvider, } } @@ -200,6 +214,7 @@ func (env *Environment) Reset() { env.EC2Cache.Flush() env.UnavailableOfferingsCache.Flush() + env.OfferingCache.Flush() env.LaunchTemplateCache.Flush() env.SubnetCache.Flush() env.AssociatePublicIPAddressCache.Flush() @@ -208,6 +223,7 @@ func (env *Environment) Reset() { env.InstanceProfileCache.Flush() env.SSMCache.Flush() env.DiscoveredCapacityCache.Flush() + env.CapacityReservationCache.Flush() mfs, err := crmetrics.Registry.Gather() if err != nil { for _, mf := range mfs { diff --git a/pkg/test/utils.go b/pkg/test/utils.go index 4e4adebd5752..11496fcc510e 100644 --- a/pkg/test/utils.go +++ b/pkg/test/utils.go @@ -31,3 +31,24 @@ func RemoveNodeClassTagValidation(crds []*apiextensionsv1.CustomResourceDefiniti } return crds } + +// DisableCapacityReservationIDValidation updates the regex validation used for capacity reservation IDs to allow any +// string after the "cr-" prefix. This enables us to embed useful debugging information in the reservation ID, such as +// the instance type and zone. +func DisableCapacityReservationIDValidation(crds []*apiextensionsv1.CustomResourceDefinition) []*apiextensionsv1.CustomResourceDefinition { + for _, crd := range crds { + if crd.Name != "ec2nodeclasses.karpenter.k8s.aws" { + continue + } + // Disable validation for the selector terms + idProps := crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["spec"].Properties["capacityReservationSelectorTerms"].Items.Schema.Properties["id"] + idProps.Pattern = "" + crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["spec"].Properties["capacityReservationSelectorTerms"].Items.Schema.Properties["id"] = idProps + + // Disable validation for the status + idProps = crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["status"].Properties["capacityReservations"].Items.Schema.Properties["id"] + idProps.Pattern = "" + crd.Spec.Versions[0].Schema.OpenAPIV3Schema.Properties["status"].Properties["capacityReservations"].Items.Schema.Properties["id"] = idProps + } + return crds +} diff --git a/test/hack/e2e_scripts/install_karpenter.sh b/test/hack/e2e_scripts/install_karpenter.sh index 935746be83cb..9684efbf6a64 100755 --- a/test/hack/e2e_scripts/install_karpenter.sh +++ b/test/hack/e2e_scripts/install_karpenter.sh @@ -18,6 +18,7 @@ helm upgrade --install karpenter "${CHART}" \ --set settings.interruptionQueue="$CLUSTER_NAME" \ --set settings.featureGates.spotToSpotConsolidation=true \ --set settings.featureGates.nodeRepair=true \ + --set settings.featureGates.reservedCapacity=true \ --set controller.resources.requests.cpu=5 \ --set controller.resources.requests.memory=3Gi \ --set controller.resources.limits.cpu=5 \ diff --git a/test/pkg/environment/aws/expectations.go b/test/pkg/environment/aws/expectations.go index 00da2aab74c6..76bd9b2dce0a 100644 --- a/test/pkg/environment/aws/expectations.go +++ b/test/pkg/environment/aws/expectations.go @@ -15,6 +15,7 @@ limitations under the License. package aws import ( + "context" "fmt" "net" "strconv" @@ -41,6 +42,7 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" + "github.com/aws/karpenter-provider-aws/pkg/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -525,3 +527,39 @@ func ignoreAlreadyContainsRole(err error) error { } return err } + +func ExpectCapacityReservationCreated( + ctx context.Context, + ec2api *ec2.Client, + instanceType ec2types.InstanceType, + zone string, + capacity int32, + endDate *time.Time, + tags map[string]string, +) string { + GinkgoHelper() + out, err := ec2api.CreateCapacityReservation(ctx, &ec2.CreateCapacityReservationInput{ + InstanceCount: lo.ToPtr(capacity), + InstanceType: lo.ToPtr(string(instanceType)), + InstancePlatform: ec2types.CapacityReservationInstancePlatformLinuxUnix, + AvailabilityZone: lo.ToPtr(zone), + EndDate: endDate, + InstanceMatchCriteria: ec2types.InstanceMatchCriteriaTargeted, + TagSpecifications: lo.Ternary(len(tags) != 0, []ec2types.TagSpecification{{ + ResourceType: ec2types.ResourceTypeCapacityReservation, + Tags: utils.MergeTags(tags), + }}, nil), + }) + Expect(err).ToNot(HaveOccurred()) + return *out.CapacityReservation.CapacityReservationId +} + +func ExpectCapacityReservationsCanceled(ctx context.Context, ec2api *ec2.Client, reservationIDs ...string) { + GinkgoHelper() + for _, id := range reservationIDs { + _, err := ec2api.CancelCapacityReservation(ctx, &ec2.CancelCapacityReservationInput{ + CapacityReservationId: &id, + }) + Expect(err).ToNot(HaveOccurred()) + } +} diff --git a/test/suites/consolidation/suite_test.go b/test/suites/consolidation/suite_test.go index 19b990397a7f..cba85ba2c181 100644 --- a/test/suites/consolidation/suite_test.go +++ b/test/suites/consolidation/suite_test.go @@ -21,6 +21,7 @@ import ( "time" "github.com/aws/aws-sdk-go-v2/aws" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/awslabs/operatorpkg/object" "github.com/samber/lo" appsv1 "k8s.io/api/apps/v1" @@ -32,7 +33,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" - "sigs.k8s.io/karpenter/pkg/test" + coretest "sigs.k8s.io/karpenter/pkg/test" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" @@ -66,7 +67,7 @@ var _ = BeforeEach(func() { var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) -var _ = Describe("Consolidation", func() { +var _ = Describe("Consolidation", Ordered, func() { Context("LastPodEventTime", func() { var nodePool *karpv1.NodePool BeforeEach(func() { @@ -76,9 +77,9 @@ var _ = Describe("Consolidation", func() { }) It("should update lastPodEventTime when pods are scheduled and removed", func() { var numPods int32 = 5 - dep := test.Deployment(test.DeploymentOptions{ + dep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "regular-app"}, }, @@ -129,7 +130,7 @@ var _ = Describe("Consolidation", func() { }) It("should update lastPodEventTime when pods go terminal", func() { podLabels := map[string]string{"app": "regular-app"} - pod := test.Pod(test.PodOptions{ + pod := coretest.Pod(coretest.PodOptions{ // use a non-pause image so that we can have a sleep Image: "alpine:3.20.2", Command: []string{"/bin/sh", "-c", "sleep 30"}, @@ -143,7 +144,7 @@ var _ = Describe("Consolidation", func() { }) job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: test.RandomName(), + Name: coretest.RandomName(), Namespace: "default", }, Spec: batchv1.JobSpec{ @@ -190,9 +191,9 @@ var _ = Describe("Consolidation", func() { nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") numPods = 5 - dep = test.Deployment(test.DeploymentOptions{ + dep = coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "regular-app"}, }, @@ -245,7 +246,7 @@ var _ = Describe("Consolidation", func() { // This test will hold consolidation until we are ready to execute it nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") - nodePool = test.ReplaceRequirements(nodePool, + nodePool = coretest.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceSize, Operator: corev1.NodeSelectorOpIn, @@ -258,9 +259,9 @@ var _ = Describe("Consolidation", func() { Nodes: "50%", }} numPods = 9 - dep = test.Deployment(test.DeploymentOptions{ + dep = coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "large-app"}, }, @@ -306,7 +307,7 @@ var _ = Describe("Consolidation", func() { // This test will hold consolidation until we are ready to execute it nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") - nodePool = test.ReplaceRequirements(nodePool, + nodePool = coretest.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{ Key: v1.LabelInstanceSize, @@ -328,9 +329,9 @@ var _ = Describe("Consolidation", func() { Nodes: "3", }} - ds := test.DaemonSet(test.DaemonSetOptions{ + ds := coretest.DaemonSet(coretest.DaemonSetOptions{ Selector: appLabels, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: appLabels, }, @@ -352,9 +353,9 @@ var _ = Describe("Consolidation", func() { numPods = 5 deployments := make([]*appsv1.Deployment, numPods) for i := range lo.Range(int(numPods)) { - deployments[i] = test.Deployment(test.DeploymentOptions{ + deployments[i] = coretest.Deployment(coretest.DeploymentOptions{ Replicas: 1, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: appLabels, }, @@ -485,7 +486,7 @@ var _ = Describe("Consolidation", func() { }) DescribeTable("should consolidate nodes (delete)", Label(debug.NoWatch), Label(debug.NoEvents), func(spotToSpot bool) { - nodePool := test.NodePool(karpv1.NodePool{ + nodePool := coretest.NodePool(karpv1.NodePool{ Spec: karpv1.NodePoolSpec{ Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, @@ -531,9 +532,9 @@ var _ = Describe("Consolidation", func() { }) var numPods int32 = 100 - dep := test.Deployment(test.DeploymentOptions{ + dep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "large-app"}, }, @@ -568,7 +569,7 @@ var _ = Describe("Consolidation", func() { ) DescribeTable("should consolidate nodes (replace)", func(spotToSpot bool) { - nodePool := test.NodePool(karpv1.NodePool{ + nodePool := coretest.NodePool(karpv1.NodePool{ Spec: karpv1.NodePoolSpec{ Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, @@ -621,9 +622,9 @@ var _ = Describe("Consolidation", func() { }) var numPods int32 = 3 - largeDep := test.Deployment(test.DeploymentOptions{ + largeDep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "large-app"}, }, @@ -644,9 +645,9 @@ var _ = Describe("Consolidation", func() { }, }, }) - smallDep := test.Deployment(test.DeploymentOptions{ + smallDep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "small-app"}, }, @@ -723,7 +724,7 @@ var _ = Describe("Consolidation", func() { Entry("if the nodes are spot nodes", true), ) It("should consolidate on-demand nodes to spot (replace)", func() { - nodePool := test.NodePool(karpv1.NodePool{ + nodePool := coretest.NodePool(karpv1.NodePool{ Spec: karpv1.NodePoolSpec{ Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, @@ -768,9 +769,9 @@ var _ = Describe("Consolidation", func() { }) var numPods int32 = 2 - smallDep := test.Deployment(test.DeploymentOptions{ + smallDep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: numPods, - PodOptions: test.PodOptions{ + PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "small-app"}, }, @@ -809,7 +810,7 @@ var _ = Describe("Consolidation", func() { // Expect the node to consolidate to a spot instance as it will be a cheaper // instance than on-demand nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") - test.ReplaceRequirements(nodePool, + coretest.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{ Key: karpv1.CapacityTypeLabelKey, @@ -853,4 +854,146 @@ var _ = Describe("Consolidation", func() { env.ExpectDeleted(smallDep) }) + Context("Capacity Reservations", func() { + var largeCapacityReservationID, xlargeCapacityReservationID string + var nodePool *karpv1.NodePool + BeforeAll(func() { + largeCapacityReservationID = environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + xlargeCapacityReservationID = environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Xlarge, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + }) + AfterAll(func() { + environmentaws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, largeCapacityReservationID, xlargeCapacityReservationID) + }) + BeforeEach(func() { + nodePool = coretest.NodePool(karpv1.NodePool{ + Spec: karpv1.NodePoolSpec{ + Disruption: karpv1.Disruption{ + ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, + ConsolidateAfter: karpv1.MustParseNillableDuration("0s"), + }, + Template: karpv1.NodeClaimTemplate{ + Spec: karpv1.NodeClaimTemplateSpec{ + Requirements: []karpv1.NodeSelectorRequirementWithMinValues{ + { + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeOnDemand, karpv1.CapacityTypeReserved}, + }, + }, + }, + NodeClassRef: &karpv1.NodeClassReference{ + Group: object.GVK(nodeClass).Group, + Kind: object.GVK(nodeClass).Kind, + Name: nodeClass.Name, + }, + }, + }, + }, + }) + }) + It("should consolidate into a reserved offering", func() { + dep := coretest.Deployment(coretest.DeploymentOptions{ + PodOptions: coretest.PodOptions{ + NodeRequirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{ + // Should result in an m5.large initially + string(ec2types.InstanceTypeM5Large), + // Should consolidate to the m5.xlarge when we add the reservation to the nodeclass + string(ec2types.InstanceTypeM5Xlarge), + }, + }}, + }, + Replicas: 1, + }) + env.ExpectCreated(nodePool, nodeClass, dep) + env.EventuallyExpectNodeClaimsReady(env.EventuallyExpectNodeClaimCount("==", 1)...) + n := env.EventuallyExpectNodeCount("==", int(1))[0] + Expect(n.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Large))) + Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: xlargeCapacityReservationID}} + env.ExpectUpdated(nodeClass) + + // Eventually expect the m5.large on-demand node to be replaced with an m5.xlarge reserved node. We should prioritize + // the reserved instance since it's already been paid for. + Eventually(func(g Gomega) { + var nodes corev1.NodeList + g.Expect(env.Client.List(env.Context, &nodes)).To(Succeed()) + filtered := lo.Filter(nodes.Items, func(n corev1.Node, _ int) bool { + if val, ok := n.Labels[karpv1.NodePoolLabelKey]; !ok || val != nodePool.Name { + return false + } + return true + }) + g.Expect(filtered).To(HaveLen(1)) + + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Xlarge))) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, xlargeCapacityReservationID)) + }, time.Minute*10).Should(Succeed()) + }) + It("should consolidate between reserved offerings", func() { + dep := coretest.Deployment(coretest.DeploymentOptions{ + PodOptions: coretest.PodOptions{ + NodeRequirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{ + string(ec2types.InstanceTypeM5Large), + string(ec2types.InstanceTypeM5Xlarge), + }, + }}, + }, + Replicas: 1, + }) + + // Start by only enabling the m5.xlarge capacity reservation, ensuring it's provisioned + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: xlargeCapacityReservationID}} + env.ExpectCreated(nodePool, nodeClass, dep) + env.EventuallyExpectNodeClaimsReady(env.EventuallyExpectNodeClaimCount("==", 1)...) + n := env.EventuallyExpectNodeCount("==", int(1))[0] + Expect(n.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Xlarge))) + Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(n.Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, xlargeCapacityReservationID)) + + // Add the m5.large capacity reservation to the nodeclass. We should consolidate from the xlarge instance to the large. + nodeClass.Spec.CapacityReservationSelectorTerms = append(nodeClass.Spec.CapacityReservationSelectorTerms, v1.CapacityReservationSelectorTerm{ + ID: largeCapacityReservationID, + }) + env.ExpectUpdated(nodeClass) + Eventually(func(g Gomega) { + var nodes corev1.NodeList + g.Expect(env.Client.List(env.Context, &nodes)).To(Succeed()) + filtered := lo.Filter(nodes.Items, func(n corev1.Node, _ int) bool { + if val, ok := n.Labels[karpv1.NodePoolLabelKey]; !ok || val != nodePool.Name { + return false + } + return true + }) + g.Expect(filtered).To(HaveLen(1)) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Large))) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + g.Expect(filtered[0].Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, largeCapacityReservationID)) + }, time.Minute*10).Should(Succeed()) + }) + }) }) diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 7e04968090dd..af176867cbd5 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -72,7 +72,7 @@ var _ = BeforeEach(func() { var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) -var _ = Describe("Drift", func() { +var _ = Describe("Drift", Ordered, func() { var dep *appsv1.Deployment var selector labels.Selector var numPods int @@ -955,4 +955,115 @@ var _ = Describe("Drift", func() { env.ConsistentlyExpectNoDisruptions(int(numPods), time.Minute) }) }) + Context("Capacity Reservations", func() { + var largeCapacityReservationID, xlargeCapacityReservationID string + BeforeAll(func() { + largeCapacityReservationID = aws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + xlargeCapacityReservationID = aws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Xlarge, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + }) + AfterAll(func() { + aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, largeCapacityReservationID, xlargeCapacityReservationID) + }) + BeforeEach(func() { + nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeReserved}, + }, + }} + }) + It("should drift nodeclaim when the reservation is no longer selected by the nodeclass", func() { + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: largeCapacityReservationID}} + // Include the do-not-disrupt annotation to prevent replacement NodeClaims from leaking between tests + pod := coretest.Pod(coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + karpv1.DoNotDisruptAnnotationKey: "true", + }, + }, + }) + env.ExpectCreated(nodePool, nodeClass, pod) + nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] + env.EventuallyExpectNodeClaimsReady(nc) + n := env.EventuallyExpectCreatedNodeCount("==", 1)[0] + Expect(n.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, string(ec2types.InstanceTypeM5Large))) + Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(n.Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, largeCapacityReservationID)) + + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: xlargeCapacityReservationID}} + env.ExpectUpdated(nodeClass) + env.EventuallyExpectDrifted(nc) + }) + It("should drift nodeclaim when the nodeclaim is demoted to on-demand", func() { + capacityReservationID := aws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + DeferCleanup(func() { + aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) + }) + + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ID: capacityReservationID}} + // Prevent drift from being executed by marking the pod as do-not-disrupt. Without this, the nodeclaim may be replaced + // in-between polling intervals for the eventually block. + pod := coretest.Pod(coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + karpv1.DoNotDisruptAnnotationKey: "true", + }, + }, + }) + env.ExpectCreated(nodePool, nodeClass, pod) + + nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] + req, ok := lo.Find(nc.Spec.Requirements, func(req karpv1.NodeSelectorRequirementWithMinValues) bool { + return req.Key == v1.LabelCapacityReservationID + }) + Expect(ok).To(BeTrue()) + Expect(req.Values).To(ConsistOf(capacityReservationID)) + n := env.EventuallyExpectNodeCount("==", 1)[0] + + aws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, capacityReservationID) + + // The NodeClaim capacity reservation controller runs once every minute, we'll give a little extra time to avoid + // a failure from a small delay, but the capacity type label should be updated and the reservation-id label should + // be removed within a minute of the reservation being canceled. + Eventually(func(g Gomega) { + updatedNodeClaim := &karpv1.NodeClaim{} + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nc), updatedNodeClaim)).To(BeNil()) + g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) + + updatedNode := &corev1.Node{} + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(n), updatedNode)).To(BeNil()) + g.Expect(updatedNodeClaim.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeOnDemand)) + g.Expect(updatedNodeClaim.Labels).ToNot(HaveKey(v1.LabelCapacityReservationID)) + }).WithTimeout(75 * time.Second).Should(Succeed()) + + // Since the nodeclaim is only compatible with reserved instances, we should drift the node when it's demoted to on-demand + env.EventuallyExpectDrifted(nc) + }) + }) }) diff --git a/test/suites/scheduling/suite_test.go b/test/suites/scheduling/suite_test.go index 724bbfdb3ee5..89603c88c798 100644 --- a/test/suites/scheduling/suite_test.go +++ b/test/suites/scheduling/suite_test.go @@ -706,6 +706,107 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { } }) }) + + Context("Capacity Reservations", func() { + var largeCapacityReservationID, xlargeCapacityReservationID string + BeforeAll(func() { + largeCapacityReservationID = environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Large, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + xlargeCapacityReservationID = environmentaws.ExpectCapacityReservationCreated( + env.Context, + env.EC2API, + ec2types.InstanceTypeM5Xlarge, + env.ZoneInfo[0].Zone, + 1, + nil, + nil, + ) + }) + AfterAll(func() { + environmentaws.ExpectCapacityReservationsCanceled(env.Context, env.EC2API, largeCapacityReservationID, xlargeCapacityReservationID) + }) + BeforeEach(func() { + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{ + { + ID: largeCapacityReservationID, + }, + { + ID: xlargeCapacityReservationID, + }, + } + nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeOnDemand, karpv1.CapacityTypeReserved}, + }}} + }) + It("should schedule against a specific reservation ID", func() { + pod := test.Pod(test.PodOptions{ + NodeRequirements: []corev1.NodeSelectorRequirement{{ + Key: v1.LabelCapacityReservationID, + Operator: corev1.NodeSelectorOpIn, + Values: []string{xlargeCapacityReservationID}, + }}, + }) + env.ExpectCreated(nodePool, nodeClass, pod) + + nc := env.EventuallyExpectNodeClaimCount("==", 1)[0] + req, ok := lo.Find(nc.Spec.Requirements, func(req karpv1.NodeSelectorRequirementWithMinValues) bool { + return req.Key == v1.LabelCapacityReservationID + }) + Expect(ok).To(BeTrue()) + Expect(req.Values).To(ConsistOf(xlargeCapacityReservationID)) + + env.EventuallyExpectNodeClaimsReady(nc) + n := env.EventuallyExpectNodeCount("==", 1)[0] + Expect(n.Labels).To(HaveKeyWithValue(karpv1.CapacityTypeLabelKey, karpv1.CapacityTypeReserved)) + Expect(n.Labels).To(HaveKeyWithValue(v1.LabelCapacityReservationID, xlargeCapacityReservationID)) + }) + It("should fall back when compatible capacity reservations are exhausted", func() { + // We create two pods with self anti-affinity and a node selector on a specific instance type. The anti-affinity term + // ensures that we must provision 2 nodes, and the node selector selects upon an instance type with a single reserved + // instance available. As such, we should create a reserved NodeClaim for one pod, and an on-demand NodeClaim for the + // other. + podLabels := map[string]string{"foo": "bar"} + pods := test.Pods(2, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabels, + }, + NodeRequirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelInstanceTypeStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{string(ec2types.InstanceTypeM5Large)}, + }}, + PodAntiRequirements: []corev1.PodAffinityTerm{{ + TopologyKey: corev1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: podLabels, + }, + }}, + }) + env.ExpectCreated(nodePool, nodeClass, pods[0], pods[1]) + + reservedCount := 0 + for _, nc := range env.EventuallyExpectNodeClaimCount("==", 2) { + req, ok := lo.Find(nc.Spec.Requirements, func(req karpv1.NodeSelectorRequirementWithMinValues) bool { + return req.Key == v1.LabelCapacityReservationID + }) + if ok { + reservedCount += 1 + Expect(req.Values).To(ConsistOf(largeCapacityReservationID)) + } + } + Expect(reservedCount).To(Equal(1)) + env.EventuallyExpectNodeCount("==", 2) + }) + }) }) func ephemeralInitContainer(requirements corev1.ResourceRequirements) corev1.Container { diff --git a/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml b/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml index aa3030334dd6..896dbe0d2a9a 100644 --- a/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml +++ b/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml @@ -74,7 +74,8 @@ Resources: "arn:${AWS::Partition}:ec2:${AWS::Region}:*:volume/*", "arn:${AWS::Partition}:ec2:${AWS::Region}:*:network-interface/*", "arn:${AWS::Partition}:ec2:${AWS::Region}:*:launch-template/*", - "arn:${AWS::Partition}:ec2:${AWS::Region}:*:spot-instances-request/*" + "arn:${AWS::Partition}:ec2:${AWS::Region}:*:spot-instances-request/*", + "arn:${AWS::Partition}:ec2:${AWS::Region}:*:capacity-reservation/*" ], "Action": [ "ec2:RunInstances", @@ -167,6 +168,7 @@ Resources: "Effect": "Allow", "Resource": "*", "Action": [ + "ec2:DescribeCapacityReservations", "ec2:DescribeImages", "ec2:DescribeInstances", "ec2:DescribeInstanceTypeOfferings",