From 5123e88eb1cf48b1f8908c83f2893cd4a39489bf Mon Sep 17 00:00:00 2001 From: AlexKaracaoglu Date: Fri, 13 Sep 2024 13:37:18 -0400 Subject: [PATCH 1/5] chore(eks): update nodegroup gpu check and add gpu instance type --- packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts | 6 ++++++ packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts b/packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts index f5fa84835d910..c9aa4d94213f0 100644 --- a/packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts +++ b/packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts @@ -900,6 +900,11 @@ export enum InstanceClass { */ G6 = 'g6', + /** + * Graphics-optimized instances powered by AMD EPYC processors and NVIDIA L40S Tensor Core GPUs, 6th generation + */ + G6E = 'g6e', + /** * Parallel-processing optimized instances, 2nd generation */ @@ -1589,6 +1594,7 @@ export class InstanceType { [InstanceClass.GRAPHICS5_GRAVITON2]: 'g5g', [InstanceClass.G5G]: 'g5g', [InstanceClass.GRAPHICS6]: 'g6', + [InstanceClass.G6E]: 'g6e', [InstanceClass.G6]: 'g6', [InstanceClass.PARALLEL2]: 'p2', [InstanceClass.P2]: 'p2', diff --git a/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts b/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts index fe0df0fd7cc37..7de27f42dbd8a 100644 --- a/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts +++ b/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts @@ -605,7 +605,7 @@ const gpuAmiTypes: NodegroupAmiType[] = [ function isGpuInstanceType(instanceType: InstanceType): boolean { //compare instanceType to known GPU InstanceTypes const knownGpuInstanceTypes = [InstanceClass.P2, InstanceClass.P3, InstanceClass.P3DN, InstanceClass.P4DE, InstanceClass.P4D, - InstanceClass.G3S, InstanceClass.G3, InstanceClass.G4DN, InstanceClass.G4AD, InstanceClass.G5, InstanceClass.G5G, + InstanceClass.G3S, InstanceClass.G3, InstanceClass.G4DN, InstanceClass.G4AD, InstanceClass.G5, InstanceClass.G5G, InstanceClass.G6, InstanceClass.G6E, InstanceClass.INF1, InstanceClass.INF2]; return knownGpuInstanceTypes.some((c) => instanceType.sameInstanceClassAs(InstanceType.of(c, InstanceSize.LARGE))); } From dc79508be527bbb9ed8beb532fe78a2e9bc0a68e Mon Sep 17 00:00:00 2001 From: AlexKaracaoglu Date: Fri, 13 Sep 2024 16:26:55 -0400 Subject: [PATCH 2/5] chore(eks): update nodegroup gpu check and add gpu instance type --- packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts | 4 ++-- tools/@aws-cdk/cdk-build-tools/package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts b/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts index 7de27f42dbd8a..4d49685b43615 100644 --- a/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts +++ b/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts @@ -605,8 +605,8 @@ const gpuAmiTypes: NodegroupAmiType[] = [ function isGpuInstanceType(instanceType: InstanceType): boolean { //compare instanceType to known GPU InstanceTypes const knownGpuInstanceTypes = [InstanceClass.P2, InstanceClass.P3, InstanceClass.P3DN, InstanceClass.P4DE, InstanceClass.P4D, - InstanceClass.G3S, InstanceClass.G3, InstanceClass.G4DN, InstanceClass.G4AD, InstanceClass.G5, InstanceClass.G5G, InstanceClass.G6, InstanceClass.G6E, - InstanceClass.INF1, InstanceClass.INF2]; + InstanceClass.G3S, InstanceClass.G3, InstanceClass.G4DN, InstanceClass.G4AD, InstanceClass.G5, InstanceClass.G5G, InstanceClass.G6, + InstanceClass.G6E, InstanceClass.INF1, InstanceClass.INF2]; return knownGpuInstanceTypes.some((c) => instanceType.sameInstanceClassAs(InstanceType.of(c, InstanceSize.LARGE))); } diff --git a/tools/@aws-cdk/cdk-build-tools/package.json b/tools/@aws-cdk/cdk-build-tools/package.json index ba8558a15a8dd..7c5c0eed19e03 100644 --- a/tools/@aws-cdk/cdk-build-tools/package.json +++ b/tools/@aws-cdk/cdk-build-tools/package.json @@ -87,4 +87,4 @@ "ubergen": { "exclude": true } -} \ No newline at end of file +} From 8e88f82f07867b5c8c9329dfec90eae41b10cf9d Mon Sep 17 00:00:00 2001 From: AlexKaracaoglu Date: Mon, 16 Sep 2024 09:01:22 -0400 Subject: [PATCH 3/5] chore(eks): update nodegroup gpu check and add gpu instance type --- tools/@aws-cdk/cdk-build-tools/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/@aws-cdk/cdk-build-tools/package.json b/tools/@aws-cdk/cdk-build-tools/package.json index 7c5c0eed19e03..ba8558a15a8dd 100644 --- a/tools/@aws-cdk/cdk-build-tools/package.json +++ b/tools/@aws-cdk/cdk-build-tools/package.json @@ -87,4 +87,4 @@ "ubergen": { "exclude": true } -} +} \ No newline at end of file From 7f82fd51a6e0b447ff7036039de669ccb25a0035 Mon Sep 17 00:00:00 2001 From: AlexKaracaoglu Date: Fri, 11 Oct 2024 15:07:46 -0400 Subject: [PATCH 4/5] chore(eks): add isGpuInstanceType tests --- .../aws-cdk-lib/aws-ec2/lib/instance-types.ts | 1 - .../aws-eks/lib/managed-nodegroup.ts | 2 +- .../aws-eks/test/nodegroup.test.ts | 41 +++++++++++++++++-- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts b/packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts index f7ea73264887f..098bfdfa124cb 100644 --- a/packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts +++ b/packages/aws-cdk-lib/aws-ec2/lib/instance-types.ts @@ -1599,7 +1599,6 @@ export class InstanceType { [InstanceClass.GRAPHICS5_GRAVITON2]: 'g5g', [InstanceClass.G5G]: 'g5g', [InstanceClass.GRAPHICS6]: 'g6', - [InstanceClass.G6E]: 'g6e', [InstanceClass.G6]: 'g6', [InstanceClass.GRAPHICS6_EFFICIENT]: 'g6e', [InstanceClass.G6E]: 'g6e', diff --git a/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts b/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts index 38878ce413ded..5fbcb7332a75c 100644 --- a/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts +++ b/packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts @@ -602,7 +602,7 @@ const gpuAmiTypes: NodegroupAmiType[] = [ * This function check if the instanceType is GPU instance. * @param instanceType The EC2 instance type */ -function isGpuInstanceType(instanceType: InstanceType): boolean { +export function isGpuInstanceType(instanceType: InstanceType): boolean { //compare instanceType to known GPU InstanceTypes const knownGpuInstanceTypes = [InstanceClass.P2, InstanceClass.P3, InstanceClass.P3DN, InstanceClass.P4DE, InstanceClass.P4D, InstanceClass.G3S, InstanceClass.G3, InstanceClass.G4DN, InstanceClass.G4AD, InstanceClass.G5, InstanceClass.G5G, InstanceClass.G6, diff --git a/packages/aws-cdk-lib/aws-eks/test/nodegroup.test.ts b/packages/aws-cdk-lib/aws-eks/test/nodegroup.test.ts index b4ba3fd8b3275..23b013800f0aa 100644 --- a/packages/aws-cdk-lib/aws-eks/test/nodegroup.test.ts +++ b/packages/aws-cdk-lib/aws-eks/test/nodegroup.test.ts @@ -6,7 +6,7 @@ import * as iam from '../../aws-iam'; import * as cdk from '../../core'; import * as cxapi from '../../cx-api'; import * as eks from '../lib'; -import { NodegroupAmiType, TaintEffect } from '../lib'; +import { isGpuInstanceType, NodegroupAmiType, TaintEffect } from '../lib'; /* eslint-disable max-len */ @@ -617,8 +617,8 @@ describe('node group', () => { new eks.Nodegroup(stack, 'Nodegroup', { cluster, instanceTypes: [ - new ec2.InstanceType('p3.large'), - new ec2.InstanceType('g3.large'), + new ec2.InstanceType('g6e.large'), + new ec2.InstanceType('g5.large'), ], }); @@ -1735,3 +1735,38 @@ describe('node group', () => { expect(() => cluster.addNodegroupCapacity('ng', { maxUnavailablePercentage: 101 })).toThrow(/maxUnavailablePercentage must be between 1 and 100/); }); }); + +describe('isGpuInstanceType', () => { + it('should return true for known GPU instance types', () => { + const gpuInstanceTypes = [ + ec2.InstanceType.of(ec2.InstanceClass.P2, ec2.InstanceSize.XLARGE), + ec2.InstanceType.of(ec2.InstanceClass.G3, ec2.InstanceSize.XLARGE), + ec2.InstanceType.of(ec2.InstanceClass.P4D, ec2.InstanceSize.LARGE), + ec2.InstanceType.of(ec2.InstanceClass.G6, ec2.InstanceSize.MEDIUM), + ec2.InstanceType.of(ec2.InstanceClass.G6E, ec2.InstanceSize.XLARGE2), + ]; + gpuInstanceTypes.forEach(instanceType => { + expect(isGpuInstanceType(instanceType)).toBe(true); + }); + }); + it('should return false for non-GPU instance types', () => { + const nonGpuInstanceTypes = [ + ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.MICRO), + ec2.InstanceType.of(ec2.InstanceClass.M5, ec2.InstanceSize.LARGE), + ec2.InstanceType.of(ec2.InstanceClass.C5, ec2.InstanceSize.XLARGE), + ]; + nonGpuInstanceTypes.forEach(instanceType => { + expect(isGpuInstanceType(instanceType)).toBe(false); + }); + }); + it('should return true for different sizes of GPU instance types', () => { + const gpuInstanceTypes = [ + ec2.InstanceType.of(ec2.InstanceClass.G6, ec2.InstanceSize.XLARGE), + ec2.InstanceType.of(ec2.InstanceClass.G6, ec2.InstanceSize.XLARGE16), + ec2.InstanceType.of(ec2.InstanceClass.G6, ec2.InstanceSize.XLARGE48), + ]; + gpuInstanceTypes.forEach(instanceType => { + expect(isGpuInstanceType(instanceType)).toBe(true); + }); + }); +}); From f696b9d857feb55155e01f96987655eab9c16a3e Mon Sep 17 00:00:00 2001 From: AlexKaracaoglu Date: Fri, 22 Nov 2024 09:31:07 -0500 Subject: [PATCH 5/5] feat(eks): add nodegroup gpu integ test --- .../test/aws-eks/test/integ.eks-cluster.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/packages/@aws-cdk-testing/framework-integ/test/aws-eks/test/integ.eks-cluster.ts b/packages/@aws-cdk-testing/framework-integ/test/aws-eks/test/integ.eks-cluster.ts index 59011249edfa7..c43ef29fbfd0b 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/aws-eks/test/integ.eks-cluster.ts +++ b/packages/@aws-cdk-testing/framework-integ/test/aws-eks/test/integ.eks-cluster.ts @@ -75,6 +75,8 @@ class EksClusterStack extends Stack { this.assertNodeGroupCustomAmi(); + this.assertNodeGroupGpu(); + this.assertSimpleManifest(); this.assertManifestWithoutValidation(); @@ -277,6 +279,19 @@ class EksClusterStack extends Stack { nodeRole: this.cluster.defaultCapacity ? this.cluster.defaultCapacity.role : undefined, }); } + private assertNodeGroupGpu() { + // add a GPU nodegroup + this.cluster.addNodegroupCapacity('extra-ng-gpu', { + instanceTypes: [ + new ec2.InstanceType('p2.xlarge'), + new ec2.InstanceType('g5.xlarge'), + new ec2.InstanceType('g6e.xlarge'), + ], + minSize: 1, + // reusing the default capacity nodegroup instance role when available + nodeRole: this.cluster.defaultCapacity ? this.cluster.defaultCapacity.role : undefined, + }); + } private assertSpotCapacity() { // spot instances (up to 10) this.cluster.addAutoScalingGroupCapacity('spot', {