Skip to content

Commit

Permalink
feat(glue): support Ray jobs (#23822)
Browse files Browse the repository at this point in the history
Glue launched a new job type: `AWS Glue for Ray`.
https://aws.amazon.com/about-aws/whats-new/2022/11/aws-glue-ray-preview/

This PR is to support the Ray job type in AWS CDK.

----

### All Submissions:

* [x] Have you followed the guidelines in our [Contributing guide?](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md)

### Adding new Construct Runtime Dependencies:

* [ ] This PR adds new construct runtime dependencies following the process described [here](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md/#adding-construct-runtime-dependencies)

### New Features

* [ ] Have you added the new feature to an [integration test](https://github.com/aws/aws-cdk/blob/main/INTEGRATION_TESTS.md)?
	* [ ] Did you use `yarn integ` to deploy the infrastructure and generate the snapshot (i.e. `yarn integ` without `--dry-run`)?

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
moomindani authored Feb 9, 2023
1 parent 57770bb commit 8de50d6
Show file tree
Hide file tree
Showing 10 changed files with 497 additions and 16 deletions.
17 changes: 17 additions & 0 deletions packages/@aws-cdk/aws-glue/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,23 @@ new glue.Job(this, 'PythonShellJob', {
});
```

### Ray Jobs

These jobs run in a Ray environment managed by AWS Glue.

```ts
new glue.Job(this, 'RayJob', {
executable: glue.JobExecutable.pythonRay({
glueVersion: glue.GlueVersion.V4_0,
pythonVersion: glue.PythonVersion.THREE_NINE,
script: glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')),
}),
workerType: glue.WorkerType.Z_2X,
workerCount: 2,
description: 'an example Ray job'
});
```

See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue.

## Connection
Expand Down
44 changes: 39 additions & 5 deletions packages/@aws-cdk/aws-glue/lib/job-executable.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ export enum PythonVersion {
*/
export class JobType {
/**
* Command for running a Glue ETL job.
* Command for running a Glue Spark job.
*/
public static readonly ETL = new JobType('glueetl');

/**
* Command for running a Glue streaming job.
* Command for running a Glue Spark streaming job.
*/
public static readonly STREAMING = new JobType('gluestreaming');

Expand All @@ -109,6 +109,11 @@ export class JobType {
*/
public static readonly PYTHON_SHELL = new JobType('pythonshell');

/**
* Command for running a Glue Ray job.
*/
public static readonly RAY = new JobType('glueray');

/**
* Custom type name
* @param name type name
Expand Down Expand Up @@ -211,6 +216,11 @@ export interface PythonSparkJobExecutableProps extends SharedSparkJobExecutableP
*/
export interface PythonShellExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {}

/**
* Props for creating a Python Ray job executable
*/
export interface PythonRayExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {}

/**
* The executable properties related to the Glue job's GlueVersion, JobType and code
*/
Expand Down Expand Up @@ -281,6 +291,19 @@ export class JobExecutable {
});
}

/**
* Create Python executable props for Ray jobs.
*
* @param props Ray Job props.
*/
public static pythonRay(props: PythonRayExecutableProps): JobExecutable {
return new JobExecutable({
...props,
type: JobType.RAY,
language: JobLanguage.PYTHON,
});
}

/**
* Create a custom JobExecutable.
*
Expand All @@ -297,10 +320,18 @@ export class JobExecutable {
if (config.language !== JobLanguage.PYTHON) {
throw new Error('Python shell requires the language to be set to Python');
}
if ([GlueVersion.V0_9, GlueVersion.V2_0, GlueVersion.V3_0, GlueVersion.V4_0].includes(config.glueVersion)) {
if ([GlueVersion.V0_9, GlueVersion.V3_0, GlueVersion.V4_0].includes(config.glueVersion)) {
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support Python Shell`);
}
}
if (JobType.RAY === config.type) {
if (config.language !== JobLanguage.PYTHON) {
throw new Error('Ray requires the language to be set to Python');
}
if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0, GlueVersion.V3_0].includes(config.glueVersion)) {
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support Ray`);
}
}
if (config.extraJarsFirst && [GlueVersion.V0_9, GlueVersion.V1_0].includes(config.glueVersion)) {
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support extraJarsFirst`);
}
Expand All @@ -310,8 +341,11 @@ export class JobExecutable {
if (JobLanguage.PYTHON !== config.language && config.extraPythonFiles) {
throw new Error('extraPythonFiles is not supported for languages other than JobLanguage.PYTHON');
}
if (config.pythonVersion === PythonVersion.THREE_NINE && config.type !== JobType.PYTHON_SHELL) {
throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell');
if (config.pythonVersion === PythonVersion.THREE_NINE && config.type !== JobType.PYTHON_SHELL && config.type !== JobType.RAY) {
throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell and Ray');
}
if (config.pythonVersion === PythonVersion.THREE && config.type === JobType.RAY) {
throw new Error('Specified PythonVersion PythonVersion.THREE is not supported for Ray');
}
this.config = config;
}
Expand Down
12 changes: 12 additions & 0 deletions packages/@aws-cdk/aws-glue/lib/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ export class WorkerType {
*/
public static readonly G_2X = new WorkerType('G.2X');

/**
* Each worker maps to 0.25 DPU (2 vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for low volume streaming jobs.
*/
public static readonly G_025X = new WorkerType('G.025X');

/**
* Each worker maps to 2 high-memory DPU [M-DPU] (8 vCPU, 64 GB of memory, 128 GB disk). Supported in Ray jobs.
*/
public static readonly Z_2X = new WorkerType('Z.2X');

/**
* Custom worker type
* @param workerType custom worker type
Expand Down Expand Up @@ -726,6 +736,8 @@ export class Job extends JobBase {
private setupSparkUI(executable: JobExecutableConfig, role: iam.IRole, props: SparkUIProps) {
if (JobType.PYTHON_SHELL === executable.type) {
throw new Error('Spark UI is not available for JobType.PYTHON_SHELL jobs');
} else if (JobType.RAY === executable.type) {
throw new Error('Spark UI is not available for JobType.RAY jobs');
}

const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
}
}
},
"977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2": {
"b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce": {
"source": {
"path": "aws-glue-job.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json",
"objectKey": "b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,11 @@
},
"GlueVersion": "2.0",
"Name": "StreamingJob2.0",
"NumberOfWorkers": 10,
"Tags": {
"key": "value"
}
},
"WorkerType": "G.025X"
}
},
"EtlJob30ServiceRole8E675579": {
Expand Down Expand Up @@ -705,9 +707,11 @@
},
"GlueVersion": "3.0",
"Name": "StreamingJob3.0",
"NumberOfWorkers": 10,
"Tags": {
"key": "value"
}
},
"WorkerType": "G.025X"
}
},
"EtlJob40ServiceRoleBDD9998A": {
Expand Down Expand Up @@ -1060,9 +1064,11 @@
},
"GlueVersion": "4.0",
"Name": "StreamingJob4.0",
"NumberOfWorkers": 10,
"Tags": {
"key": "value"
}
},
"WorkerType": "G.025X"
}
},
"ShellJobServiceRoleCF97BC4B": {
Expand Down Expand Up @@ -1314,6 +1320,133 @@
"key": "value"
}
}
},
"RayJobServiceRole51433C3D": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Statement": [
{
"Action": "sts:AssumeRole",
"Effect": "Allow",
"Principal": {
"Service": "glue.amazonaws.com"
}
}
],
"Version": "2012-10-17"
},
"ManagedPolicyArns": [
{
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":iam::aws:policy/service-role/AWSGlueServiceRole"
]
]
}
]
}
},
"RayJobServiceRoleDefaultPolicyA615640D": {
"Type": "AWS::IAM::Policy",
"Properties": {
"PolicyDocument": {
"Statement": [
{
"Action": [
"s3:GetBucket*",
"s3:GetObject*",
"s3:List*"
],
"Effect": "Allow",
"Resource": [
{
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":s3:::",
{
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
},
"/*"
]
]
},
{
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":s3:::",
{
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
}
]
]
}
]
}
],
"Version": "2012-10-17"
},
"PolicyName": "RayJobServiceRoleDefaultPolicyA615640D",
"Roles": [
{
"Ref": "RayJobServiceRole51433C3D"
}
]
}
},
"RayJob2F7864D9": {
"Type": "AWS::Glue::Job",
"Properties": {
"Command": {
"Name": "glueray",
"PythonVersion": "3.9",
"ScriptLocation": {
"Fn::Join": [
"",
[
"s3://",
{
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
},
"/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py"
]
]
}
},
"Role": {
"Fn::GetAtt": [
"RayJobServiceRole51433C3D",
"Arn"
]
},
"DefaultArguments": {
"--job-language": "python",
"arg1": "value1",
"arg2": "value2"
},
"GlueVersion": "4.0",
"Name": "RayJob",
"NumberOfWorkers": 2,
"Tags": {
"key": "value"
},
"WorkerType": "Z.2X"
}
}
},
"Parameters": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"validateOnSynth": false,
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}",
"cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce.json",
"requiresBootstrapStackVersion": 6,
"bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version",
"additionalDependencies": [
Expand Down Expand Up @@ -213,6 +213,24 @@
"data": "ShellJob390C141361"
}
],
"/aws-glue-job/RayJob/ServiceRole/Resource": [
{
"type": "aws:cdk:logicalId",
"data": "RayJobServiceRole51433C3D"
}
],
"/aws-glue-job/RayJob/ServiceRole/DefaultPolicy/Resource": [
{
"type": "aws:cdk:logicalId",
"data": "RayJobServiceRoleDefaultPolicyA615640D"
}
],
"/aws-glue-job/RayJob/Resource": [
{
"type": "aws:cdk:logicalId",
"data": "RayJob2F7864D9"
}
],
"/aws-glue-job/BootstrapVersion": [
{
"type": "aws:cdk:logicalId",
Expand Down
Loading

0 comments on commit 8de50d6

Please sign in to comment.