From 2e27014af3064bfaec2918bbb1183ce59a7e89ef Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 21 Feb 2025 06:43:31 -0800 Subject: [PATCH] checkpoint review feedback --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 8 --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 8 --- pkg/apis/v1/ec2nodeclass_hash_test.go | 2 +- pkg/apis/v1/ec2nodeclass_status.go | 6 -- .../v1/ec2nodeclass_validation_cel_test.go | 18 +++--- pkg/cache/unavailableofferings.go | 15 +++-- pkg/controllers/interruption/controller.go | 3 +- .../nodeclass/capacityreservation.go | 10 ++- pkg/controllers/nodeclass/validation.go | 2 +- pkg/providers/amifamily/resolver.go | 23 +------ pkg/providers/capacityreservation/provider.go | 39 +++++++----- pkg/providers/capacityreservation/types.go | 63 ++++++++----------- pkg/providers/instance/instance.go | 3 + pkg/providers/instancetype/instancetype.go | 2 +- .../instancetype/offering/provider.go | 2 +- pkg/providers/instancetype/suite_test.go | 2 +- .../launchtemplate/launchtemplate.go | 46 +++++++++----- 17 files changed, 112 insertions(+), 140 deletions(-) diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index 6e179257a4d9..b1f48ae85d79 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -679,9 +679,6 @@ spec: availabilityZone: description: The availability zone the capacity reservation is available in. type: string - availableInstanceCount: - description: The last known available instance count for the capacity reservation. - type: integer endTime: description: |- The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter @@ -705,17 +702,12 @@ spec: description: The ID of the AWS account that owns the capacity reservation. pattern: ^[0-9]{12}$ type: string - totalInstanceCount: - description: The total instance count for the capacity reservation. - type: integer required: - availabilityZone - - availableInstanceCount - id - instanceMatchCriteria - instanceType - ownerID - - totalInstanceCount type: object type: array conditions: diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 152dd1ac92e9..bd817cc477ec 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -676,9 +676,6 @@ spec: availabilityZone: description: The availability zone the capacity reservation is available in. type: string - availableInstanceCount: - description: The last known available instance count for the capacity reservation. - type: integer endTime: description: |- The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter @@ -702,17 +699,12 @@ spec: description: The ID of the AWS account that owns the capacity reservation. pattern: ^[0-9]{12}$ type: string - totalInstanceCount: - description: The total instance count for the capacity reservation. - type: integer required: - availabilityZone - - availableInstanceCount - id - instanceMatchCriteria - instanceType - ownerID - - totalInstanceCount type: object type: array conditions: diff --git a/pkg/apis/v1/ec2nodeclass_hash_test.go b/pkg/apis/v1/ec2nodeclass_hash_test.go index a523cfd8189b..87ad4de0b5af 100644 --- a/pkg/apis/v1/ec2nodeclass_hash_test.go +++ b/pkg/apis/v1/ec2nodeclass_hash_test.go @@ -193,7 +193,7 @@ var _ = Describe("Hash", func() { nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ Tags: map[string]string{"ami-test-key": "ami-test-value"}, }} - nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ + nodeClass.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ Tags: map[string]string{"cr-test-key": "cr-test-value"}, }} updatedHash := nodeClass.Hash() diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index 28308900e2bc..b89e5e9b6bcb 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -72,9 +72,6 @@ type CapacityReservation struct { // The availability zone the capacity reservation is available in. // +required AvailabilityZone string `json:"availabilityZone"` - // The last known available instance count for the capacity reservation. - // +required - AvailableInstanceCount int `json:"availableInstanceCount,omitempty" hash:"ignore"` // The time at which the capacity reservation expires. Once expired, the reserved capacity is released and Karpenter // will no longer be able to launch instances into that reservation. // +optional @@ -94,9 +91,6 @@ type CapacityReservation struct { // +kubebuilder:validation:Pattern:="^[0-9]{12}$" // +required OwnerID string `json:"ownerID"` - // The total instance count for the capacity reservation. - // +required - TotalInstanceCount int `json:"totalInstanceCount" hash:"ignore"` } // EC2NodeClassStatus contains the resolved state of the EC2NodeClass diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index 898ba9dc7ad1..bbf844aea15c 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -450,6 +450,15 @@ var _ = Describe("CEL/Validation", func() { }} Expect(env.Client.Create(ctx, nc)).To(Succeed()) }) + It("should succeed for a valid ownerID", func() { + nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ + OwnerID: "012345678901", + Tags: map[string]string{ + "test": "testvalue", + }, + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) It("should fail with a capacity reservation selector on a malformed id", func() { nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ ID: "r-12345749", @@ -520,15 +529,6 @@ var _ = Describe("CEL/Validation", func() { }} Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) - It("should succeed for a valid ownerID", func() { - nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ - OwnerID: "012345678901", - Tags: map[string]string{ - "test": "testvalue", - }, - }} - Expect(env.Client.Create(ctx, nc)).To(Succeed()) - }) It("should fail when the ownerID is malformed", func() { nc.Spec.CapacityReservationSelectorTerms = []v1.CapacityReservationSelectorTerm{{ OwnerID: "01234567890", // OwnerID must be 12 digits, this is 11 diff --git a/pkg/cache/unavailableofferings.go b/pkg/cache/unavailableofferings.go index e9b10154de35..fc78412dfd75 100644 --- a/pkg/cache/unavailableofferings.go +++ b/pkg/cache/unavailableofferings.go @@ -48,21 +48,20 @@ func NewUnavailableOfferings() *UnavailableOfferings { } // IsUnavailable returns true if the offering appears in the cache -func (u *UnavailableOfferings) IsUnavailable(instanceType string, zone, capacityType string) bool { +func (u *UnavailableOfferings) IsUnavailable(instanceType ec2types.InstanceType, zone, capacityType string) bool { _, found := u.cache.Get(u.key(instanceType, zone, capacityType)) return found } // MarkUnavailable communicates recently observed temporary capacity shortages in the provided offerings -func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason, instanceType, zone, capacityType string) { +func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason string, instanceType ec2types.InstanceType, zone, capacityType string) { // even if the key is already in the cache, we still need to call Set to extend the cached entry's TTL log.FromContext(ctx).WithValues( "reason", unavailableReason, "instance-type", instanceType, "zone", zone, "capacity-type", capacityType, - "ttl", UnavailableOfferingsTTL, - ).V(1).Info("removing offering from offerings") + "ttl", UnavailableOfferingsTTL).V(1).Info("removing offering from offerings") u.cache.SetDefault(u.key(instanceType, zone, capacityType), struct{}{}) atomic.AddUint64(&u.SeqNum, 1) } @@ -70,10 +69,10 @@ func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableR func (u *UnavailableOfferings) MarkUnavailableForFleetErr(ctx context.Context, fleetErr ec2types.CreateFleetError, capacityType string) { instanceType := fleetErr.LaunchTemplateAndOverrides.Overrides.InstanceType zone := aws.ToString(fleetErr.LaunchTemplateAndOverrides.Overrides.AvailabilityZone) - u.MarkUnavailable(ctx, lo.FromPtr(fleetErr.ErrorCode), string(instanceType), zone, capacityType) + u.MarkUnavailable(ctx, lo.FromPtr(fleetErr.ErrorCode), instanceType, zone, capacityType) } -func (u *UnavailableOfferings) DeleteOffering(instanceType, zone, capacityType string) { +func (u *UnavailableOfferings) Delete(instanceType ec2types.InstanceType, zone string, capacityType string) { u.cache.Delete(u.key(instanceType, zone, capacityType)) } @@ -82,6 +81,6 @@ func (u *UnavailableOfferings) Flush() { } // key returns the cache key for all offerings in the cache -func (*UnavailableOfferings) key(instanceType, zone, capacityType string) string { - return fmt.Sprintf("o:%s:%s:%s", capacityType, instanceType, zone) +func (u *UnavailableOfferings) key(instanceType ec2types.InstanceType, zone string, capacityType string) string { + return fmt.Sprintf("%s:%s:%s", capacityType, instanceType, zone) } diff --git a/pkg/controllers/interruption/controller.go b/pkg/controllers/interruption/controller.go index 64694475348a..81a91054a0ec 100644 --- a/pkg/controllers/interruption/controller.go +++ b/pkg/controllers/interruption/controller.go @@ -22,6 +22,7 @@ import ( "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/metrics" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" sqstypes "github.com/aws/aws-sdk-go-v2/service/sqs/types" "github.com/awslabs/operatorpkg/singleton" "go.uber.org/multierr" @@ -207,7 +208,7 @@ func (c *Controller) handleNodeClaim(ctx context.Context, msg messages.Message, zone := nodeClaim.Labels[corev1.LabelTopologyZone] instanceType := nodeClaim.Labels[corev1.LabelInstanceTypeStable] if zone != "" && instanceType != "" { - c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), instanceType, zone, karpv1.CapacityTypeSpot) + c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), ec2types.InstanceType(instanceType), zone, karpv1.CapacityTypeSpot) } } if action != NoAction { diff --git a/pkg/controllers/nodeclass/capacityreservation.go b/pkg/controllers/nodeclass/capacityreservation.go index dbcad52186aa..0d862ba31d66 100644 --- a/pkg/controllers/nodeclass/capacityreservation.go +++ b/pkg/controllers/nodeclass/capacityreservation.go @@ -56,6 +56,7 @@ func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass return reconcile.Result{}, fmt.Errorf("getting capacity reservations, %w", err) } if len(reservations) == 0 { + nc.Status.CapacityReservations = nil nc.StatusConditions().SetTrue(v1.ConditionTypeCapacityReservationsReady) return reconcile.Result{RequeueAfter: capacityReservationPollPeriod}, nil } @@ -86,6 +87,8 @@ func (c *CapacityReservation) Reconcile(ctx context.Context, nc *v1.EC2NodeClass } func capacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityReservation, error) { + // Guard against new instance match criteria added in the future. See https://github.com/kubernetes-sigs/karpenter/issues/806 + // for a similar issue. if !lo.Contains([]ec2types.InstanceMatchCriteria{ ec2types.InstanceMatchCriteriaOpen, ec2types.InstanceMatchCriteriaTargeted, @@ -98,17 +101,18 @@ func capacityReservationFromEC2(cr *ec2types.CapacityReservation) (v1.CapacityRe } return v1.CapacityReservation{ - AvailabilityZone: *cr.AvailabilityZone, - // AvailableInstanceCount: int(*cr.AvailableInstanceCount), + AvailabilityZone: *cr.AvailabilityZone, EndTime: endTime, ID: *cr.CapacityReservationId, InstanceMatchCriteria: string(cr.InstanceMatchCriteria), InstanceType: *cr.InstanceType, OwnerID: *cr.OwnerId, - TotalInstanceCount: int(*cr.TotalInstanceCount), }, nil } +// requeueAfter determines the duration until the next target reconciliation time based on the provided reservations. If +// any reservations are expected to expire before we would typically requeue, the duration will be based on the +// nearest expiration time. func (c *CapacityReservation) requeueAfter(reservations ...*ec2types.CapacityReservation) time.Duration { var next *time.Time for _, reservation := range reservations { diff --git a/pkg/controllers/nodeclass/validation.go b/pkg/controllers/nodeclass/validation.go index 8131b66047dc..3621e2044b89 100644 --- a/pkg/controllers/nodeclass/validation.go +++ b/pkg/controllers/nodeclass/validation.go @@ -91,7 +91,7 @@ func (n Validation) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) ( return reconcile.Result{}, nil } - createLaunchTemplateInput := launchtemplate.GetCreateLaunchTemplateInput(mockOptions(*nodeClaim, nodeClass, tags), corev1.IPv4Protocol, "") + createLaunchTemplateInput := launchtemplate.GetCreateLaunchTemplateInput(ctx, mockOptions(*nodeClaim, nodeClass, tags), corev1.IPv4Protocol, "") createLaunchTemplateInput.DryRun = aws.Bool(true) if _, err := n.ec2api.CreateLaunchTemplate(ctx, createLaunchTemplateInput); awserrors.IgnoreDryRunError(err) != nil { diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index e8a9e213558d..43ce2afd0ec1 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -159,7 +159,9 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N // ordering in this string. reservationIDs: lo.Ternary( capacityType == karpv1.CapacityTypeReserved, - strings.Join(selectReservationIDs(it, nodeClaim), ","), + strings.Join(lo.FilterMap(it.Offerings, func(o *cloudprovider.Offering, _ int) (string, bool) { + return o.ReservationID(), o.CapacityType() == karpv1.CapacityTypeReserved + }), ","), "", ), } @@ -173,25 +175,6 @@ func (r DefaultResolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.N return resolvedTemplates, nil } -// selectReservationIDs filters the set of reservation IDs available on the given instance type to only include those -// that are compatible with the given NodeClaim. Additionally, if there are multiple reservations available in the same -// zone, only the reservation with the greatest availability is selected. This is to address a limitation in the -// CreateFleet interface, where you can only provide one override for a given instance-zone combination. -func selectReservationIDs(it *cloudprovider.InstanceType, nodeClaim *karpv1.NodeClaim) []string { - zonalOfferings := map[string]*cloudprovider.Offering{} - for _, o := range it.Offerings.Available().Compatible(scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...)) { - if o.CapacityType() != karpv1.CapacityTypeReserved { - continue - } - if current, ok := zonalOfferings[o.Zone()]; !ok || current.ReservationCapacity < o.ReservationCapacity { - zonalOfferings[o.Zone()] = o - } - } - return lo.Map(lo.Values(zonalOfferings), func(o *cloudprovider.Offering, _ int) string { - return o.ReservationID() - }) -} - func GetAMIFamily(amiFamily string, options *Options) AMIFamily { switch amiFamily { case v1.AMIFamilyBottlerocket: diff --git a/pkg/providers/capacityreservation/provider.go b/pkg/providers/capacityreservation/provider.go index 2a136debaaa3..e16f5f867b8a 100644 --- a/pkg/providers/capacityreservation/provider.go +++ b/pkg/providers/capacityreservation/provider.go @@ -46,7 +46,11 @@ type DefaultProvider struct { cm *pretty.ChangeMonitor } -func NewProvider(ec2api sdk.EC2API, clk clock.Clock, reservationCache, reservationAvailabilityCache *cache.Cache) *DefaultProvider { +func NewProvider( + ec2api sdk.EC2API, + clk clock.Clock, + reservationCache, reservationAvailabilityCache *cache.Cache, +) *DefaultProvider { return &DefaultProvider{ availabilityCache: availabilityCache{ cache: reservationAvailabilityCache, @@ -60,40 +64,41 @@ func NewProvider(ec2api sdk.EC2API, clk clock.Clock, reservationCache, reservati } func (p *DefaultProvider) List(ctx context.Context, selectorTerms ...v1.CapacityReservationSelectorTerm) ([]*ec2types.CapacityReservation, error) { - queries := QueriesFromSelectorTerms(selectorTerms...) - var reservations []*ec2types.CapacityReservation - var remainingQueries []*Query - for _, query := range queries { - if value, ok := p.reservationCache.Get(query.CacheKey()); ok { - reservations = append(reservations, value.([]*ec2types.CapacityReservation)...) - } else { - remainingQueries = append(remainingQueries, query) - } - } - if len(remainingQueries) == 0 { + queries := QueriesFromSelectorTerms(selectorTerms...) + reservations, queries = p.resolveCachedQueries(queries...) + if len(queries) == 0 { return p.filterReservations(reservations), nil } - - for _, query := range remainingQueries { - paginator := ec2.NewDescribeCapacityReservationsPaginator(p.ec2api, query.DescribeCapacityReservationsInput()) + for _, q := range queries { + paginator := ec2.NewDescribeCapacityReservationsPaginator(p.ec2api, q.DescribeCapacityReservationsInput()) for paginator.HasMorePages() { out, err := paginator.NextPage(ctx) if err != nil { return nil, fmt.Errorf("listing capacity reservations, %w", err) } queryReservations := lo.ToSlicePtr(out.CapacityReservations) - p.reservationCache.SetDefault(query.CacheKey(), queryReservations) + p.reservationCache.SetDefault(q.CacheKey(), queryReservations) reservations = append(reservations, queryReservations...) p.syncAvailability(lo.SliceToMap(queryReservations, func(r *ec2types.CapacityReservation) (string, int) { return *r.CapacityReservationId, int(*r.AvailableInstanceCount) })) } } - return p.filterReservations(reservations), nil } +func (p *DefaultProvider) resolveCachedQueries(queries ...*Query) (reservations []*ec2types.CapacityReservation, remainingQueries []*Query) { + for _, q := range queries { + if value, ok := p.reservationCache.Get(q.CacheKey()); ok { + reservations = append(reservations, value.([]*ec2types.CapacityReservation)...) + } else { + remainingQueries = append(remainingQueries, q) + } + } + return reservations, remainingQueries +} + // filterReservations removes duplicate and expired reservations func (p *DefaultProvider) filterReservations(reservations []*ec2types.CapacityReservation) []*ec2types.CapacityReservation { return lo.Filter(lo.UniqBy(reservations, func(r *ec2types.CapacityReservation) string { diff --git a/pkg/providers/capacityreservation/types.go b/pkg/providers/capacityreservation/types.go index bd970b0c33b4..d5ec2c0c5461 100644 --- a/pkg/providers/capacityreservation/types.go +++ b/pkg/providers/capacityreservation/types.go @@ -60,52 +60,39 @@ func (q *Query) CacheKey() string { } func (q *Query) DescribeCapacityReservationsInput() *ec2.DescribeCapacityReservationsInput { + filters := []ec2types.Filter{{ + Name: lo.ToPtr("state"), + Values: []string{string(ec2types.CapacityReservationStateActive)}, + }} if len(q.ids) != 0 { return &ec2.DescribeCapacityReservationsInput{ - Filters: []ec2types.Filter{lo.Must(q.stateFilter())[0]}, + Filters: filters, CapacityReservationIds: q.ids, } } - type filterProvider func() ([]ec2types.Filter, bool) - return &ec2.DescribeCapacityReservationsInput{ - Filters: lo.Flatten(lo.FilterMap([]filterProvider{ - q.stateFilter, - q.ownerIDFilter, - q.tagsFilter, - }, func(f filterProvider, _ int) ([]ec2types.Filter, bool) { - return f() - })), + if q.ownerID != "" { + filters = append(filters, ec2types.Filter{ + Name: lo.ToPtr("owner-id"), + Values: []string{q.ownerID}, + }) } -} - -func (q *Query) stateFilter() ([]ec2types.Filter, bool) { - return []ec2types.Filter{{ - Name: lo.ToPtr("state"), - Values: []string{string(ec2types.CapacityReservationStateActive)}, - }}, true -} - -func (q *Query) ownerIDFilter() ([]ec2types.Filter, bool) { - return []ec2types.Filter{{ - Name: lo.ToPtr("owner-id"), - Values: []string{q.ownerID}, - }}, q.ownerID != "" -} - -func (q *Query) tagsFilter() ([]ec2types.Filter, bool) { - return lo.MapToSlice(q.tags, func(k, v string) ec2types.Filter { - if v == "*" { + if len(q.tags) != 0 { + filters = append(filters, lo.MapToSlice(q.tags, func(k, v string) ec2types.Filter { + if v == "*" { + return ec2types.Filter{ + Name: lo.ToPtr("tag-key"), + Values: []string{k}, + } + } return ec2types.Filter{ - Name: lo.ToPtr("tag-key"), - Values: []string{k}, + Name: lo.ToPtr(fmt.Sprintf("tag:%s", k)), + Values: []string{v}, } - } - return ec2types.Filter{ - Name: lo.ToPtr(fmt.Sprintf("tag:%s", k)), - Values: []string{v}, - } - }), len(q.tags) != 0 - + })...) + } + return &ec2.DescribeCapacityReservationsInput{ + Filters: filters, + } } type availabilityCache struct { diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 8a52c8a56ee7..df81a676a29e 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -117,6 +117,9 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass // reserved instances that's all we'll include in our fleet request. if reqs := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...); reqs.Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeReserved) { instanceTypes = p.filterReservedInstanceTypes(reqs, instanceTypes) + if _, err := cloudprovider.InstanceTypes(instanceTypes).SatisfiesMinValues(schedulingRequirements); err != nil { + return nil, cloudprovider.NewCreateError(fmt.Errorf("failed to construct CreateFleet request while respecting minValues requirements"), "CreateFleetRequestConstructionFailed", "Failed to construct CreateFleet request while respecting minValues") + } } instanceTypes, err := cloudprovider.InstanceTypes(instanceTypes).Truncate(schedulingRequirements, maxInstanceTypes) if err != nil { diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 46045ff0191e..2ab95c5714b2 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -183,7 +183,7 @@ func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { p.muInstanceTypesInfo.Lock() defer p.muInstanceTypesInfo.Unlock() - instanceTypes := []ec2types.InstanceTypeInfo{} + var instanceTypes []ec2types.InstanceTypeInfo paginator := ec2.NewDescribeInstanceTypesPaginator(p.ec2api, &ec2.DescribeInstanceTypesInput{ Filters: []ec2types.Filter{ { diff --git a/pkg/providers/instancetype/offering/provider.go b/pkg/providers/instancetype/offering/provider.go index 74f3211aeed6..ae5e667f145d 100644 --- a/pkg/providers/instancetype/offering/provider.go +++ b/pkg/providers/instancetype/offering/provider.go @@ -106,7 +106,7 @@ func (p *DefaultProvider) createOfferings( continue } - isUnavailable := p.unavailableOfferings.IsUnavailable(it.Name, zone, capacityType) + isUnavailable := p.unavailableOfferings.IsUnavailable(ec2types.InstanceType(it.Name), zone, capacityType) _, hasSubnetZone := subnetZones[zone] var price float64 var hasPrice bool diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index db25bf6b21f6..eb162c9c4cd2 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -2079,7 +2079,7 @@ var _ = Describe("InstanceTypeProvider", func() { ExpectNotScheduled(ctx, env.Client, pod) // capacity shortage is over - expire the item from the cache and try again awsEnv.EC2API.InsufficientCapacityPools.Set([]fake.CapacityPool{}) - awsEnv.UnavailableOfferingsCache.DeleteOffering("inf2.24xlarge", "test-zone-1a", karpv1.CapacityTypeOnDemand) + awsEnv.UnavailableOfferingsCache.Delete("inf2.24xlarge", "test-zone-1a", karpv1.CapacityTypeOnDemand) ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) node := ExpectScheduled(ctx, env.Client, pod) Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "inf2.24xlarge")) diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index d07c37b09bb3..103173b7c72a 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -38,6 +38,8 @@ import ( "k8s.io/apimachinery/pkg/api/resource" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + karpoptions "sigs.k8s.io/karpenter/pkg/operator/options" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" "github.com/aws/karpenter-provider-aws/pkg/operator/options" @@ -231,7 +233,7 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami if err != nil { return ec2types.LaunchTemplate{}, err } - createLaunchTemplateInput := GetCreateLaunchTemplateInput(options, p.ClusterIPFamily, userData) + createLaunchTemplateInput := GetCreateLaunchTemplateInput(ctx, options, p.ClusterIPFamily, userData) output, err := p.ec2api.CreateLaunchTemplate(ctx, createLaunchTemplateInput) if err != nil { return ec2types.LaunchTemplate{}, err @@ -241,7 +243,12 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami } // you need UserData, AmiID, tags, blockdevicemappings, instance profile, -func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFamily corev1.IPFamily, userData string) *ec2.CreateLaunchTemplateInput { +func GetCreateLaunchTemplateInput( + ctx context.Context, + options *amifamily.LaunchTemplate, + ClusterIPFamily corev1.IPFamily, + userData string, +) *ec2.CreateLaunchTemplateInput { launchTemplateDataTags := []ec2types.LaunchTemplateTagSpecificationRequest{ {ResourceType: ec2types.ResourceTypeNetworkInterface, Tags: utils.MergeTags(options.Tags)}, } @@ -249,24 +256,10 @@ func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFa launchTemplateDataTags = append(launchTemplateDataTags, ec2types.LaunchTemplateTagSpecificationRequest{ResourceType: ec2types.ResourceTypeSpotInstancesRequest, Tags: utils.MergeTags(options.Tags)}) } networkInterfaces := generateNetworkInterfaces(options, ClusterIPFamily) - return &ec2.CreateLaunchTemplateInput{ + lt := &ec2.CreateLaunchTemplateInput{ LaunchTemplateName: aws.String(LaunchTemplateName(options)), LaunchTemplateData: &ec2types.RequestLaunchTemplateData{ BlockDeviceMappings: blockDeviceMappings(options.BlockDeviceMappings), - CapacityReservationSpecification: &ec2types.LaunchTemplateCapacityReservationSpecificationRequest{ - CapacityReservationPreference: lo.Ternary( - options.CapacityType == karpv1.CapacityTypeReserved, - ec2types.CapacityReservationPreferenceCapacityReservationsOnly, - ec2types.CapacityReservationPreferenceNone, - ), - CapacityReservationTarget: lo.Ternary( - options.CapacityType == karpv1.CapacityTypeReserved, - &ec2types.CapacityReservationTarget{ - CapacityReservationId: &options.CapacityReservationID, - }, - nil, - ), - }, IamInstanceProfile: &ec2types.LaunchTemplateIamInstanceProfileSpecificationRequest{ Name: aws.String(options.InstanceProfile), }, @@ -301,6 +294,25 @@ func GetCreateLaunchTemplateInput(options *amifamily.LaunchTemplate, ClusterIPFa }, }, } + // Gate this specifically since the update to CapacityReservationPreference will opt od / spot launches out of open + // ODCRs, which is a breaking change from the pre-native ODCR support behavior. + if karpoptions.FromContext(ctx).FeatureGates.ReservedCapacity { + lt.LaunchTemplateData.CapacityReservationSpecification = &ec2types.LaunchTemplateCapacityReservationSpecificationRequest{ + CapacityReservationPreference: lo.Ternary( + options.CapacityType == karpv1.CapacityTypeReserved, + ec2types.CapacityReservationPreferenceCapacityReservationsOnly, + ec2types.CapacityReservationPreferenceNone, + ), + CapacityReservationTarget: lo.Ternary( + options.CapacityType == karpv1.CapacityTypeReserved, + &ec2types.CapacityReservationTarget{ + CapacityReservationId: &options.CapacityReservationID, + }, + nil, + ), + } + } + return lt } // generateNetworkInterfaces generates network interfaces for the launch template.