Skip to content

Commit

Permalink
fix(glue): --conf parameter is no longer a reserved keyword for glue …
Browse files Browse the repository at this point in the history
…jobs (#23673)

AWS Glue has changed its public description for the job parameter `--conf`.

The latest description:
https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
> Controls Spark config parameters. It is for advanced use cases.

According to this change, AWS CDK no longer needs to block the usage of this parameter.

This PR removes the validation for the parameter `--conf`.

## Background

The parameter `--conf` is expected to use several Spark capabilities. One of the typical usage is with native data lake table format support.
https://aws.amazon.com/about-aws/whats-new/2022/11/aws-glue-apache-spark-native-data-lake-frameworks-apache-hudi-iceberg-delta-lake/

Public sample notebooks (e.g. https://github.com/aws-samples/aws-glue-samples/blob/master/examples/notebooks/native_hudi_sql.ipynb) use the parameter `--conf`.

----

### All Submissions:

* [ x] Have you followed the guidelines in our [Contributing guide?](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md)

### Adding new Construct Runtime Dependencies:

* [ ] This PR adds new construct runtime dependencies following the process described [here](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md/#adding-construct-runtime-dependencies)

### New Features

* [ ] Have you added the new feature to an [integration test](https://github.com/aws/aws-cdk/blob/main/INTEGRATION_TESTS.md)?
	* [ ] Did you use `yarn integ` to deploy the infrastructure and generate the snapshot (i.e. `yarn integ` without `--dry-run`)?

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
moomindani authored Jan 18, 2023
1 parent f5a3a29 commit 3d0f4ba
Show file tree
Hide file tree
Showing 9 changed files with 30 additions and 95 deletions.
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-glue/lib/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ export class Job extends JobBase {
*/
private checkNoReservedArgs(defaultArguments?: { [key: string]: string }) {
if (defaultArguments) {
const reservedArgs = new Set(['--conf', '--debug', '--mode', '--JOB_NAME']);
const reservedArgs = new Set(['--debug', '--mode', '--JOB_NAME']);
Object.keys(defaultArguments).forEach((arg) => {
if (reservedArgs.has(arg)) {
throw new Error(`The ${arg} argument is reserved by Glue. Don't set it`);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "22.0.0",
"version": "29.0.0",
"files": {
"432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": {
"source": {
Expand All @@ -14,15 +14,15 @@
}
}
},
"0985af21379e9d6e1cba091105ecb533ee38a96b4c738816daf17d951a0752b4": {
"977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2": {
"source": {
"path": "aws-glue-job.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "0985af21379e9d6e1cba091105ecb533ee38a96b4c738816daf17d951a0752b4.json",
"objectKey": "977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@
]
},
"arg1": "value1",
"arg2": "value2"
"arg2": "value2",
"--conf": "valueConf"
},
"ExecutionProperty": {
"MaxConcurrentRuns": 2
Expand Down Expand Up @@ -527,7 +528,8 @@
]
},
"arg1": "value1",
"arg2": "value2"
"arg2": "value2",
"--conf": "valueConf"
},
"ExecutionProperty": {
"MaxConcurrentRuns": 2
Expand Down Expand Up @@ -881,7 +883,8 @@
]
},
"arg1": "value1",
"arg2": "value2"
"arg2": "value2",
"--conf": "valueConf"
},
"ExecutionProperty": {
"MaxConcurrentRuns": 2
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version":"22.0.0"}
{"version":"29.0.0"}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "22.0.0",
"version": "29.0.0",
"testCases": {
"integ.job": {
"stacks": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "22.0.0",
"version": "29.0.0",
"artifacts": {
"aws-glue-job.assets": {
"type": "cdk:asset-manifest",
Expand All @@ -17,7 +17,7 @@
"validateOnSynth": false,
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}",
"cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/0985af21379e9d6e1cba091105ecb533ee38a96b4c738816daf17d951a0752b4.json",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json",
"requiresBootstrapStackVersion": 6,
"bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version",
"additionalDependencies": [
Expand Down Expand Up @@ -224,78 +224,6 @@
"type": "aws:cdk:logicalId",
"data": "CheckBootstrapVersion"
}
],
"EtlJobServiceRole837F781B": [
{
"type": "aws:cdk:logicalId",
"data": "EtlJobServiceRole837F781B",
"trace": [
"!!DESTRUCTIVE_CHANGES: WILL_DESTROY"
]
}
],
"EtlJobServiceRoleDefaultPolicy8BFE343B": [
{
"type": "aws:cdk:logicalId",
"data": "EtlJobServiceRoleDefaultPolicy8BFE343B",
"trace": [
"!!DESTRUCTIVE_CHANGES: WILL_DESTROY"
]
}
],
"EtlJobSparkUIBucketBF23744B": [
{
"type": "aws:cdk:logicalId",
"data": "EtlJobSparkUIBucketBF23744B",
"trace": [
"!!DESTRUCTIVE_CHANGES: WILL_DESTROY"
]
}
],
"EtlJob7FC88E45": [
{
"type": "aws:cdk:logicalId",
"data": "EtlJob7FC88E45",
"trace": [
"!!DESTRUCTIVE_CHANGES: WILL_DESTROY"
]
}
],
"EtlJobSuccessMetricRuleA72A3EF6": [
{
"type": "aws:cdk:logicalId",
"data": "EtlJobSuccessMetricRuleA72A3EF6",
"trace": [
"!!DESTRUCTIVE_CHANGES: WILL_DESTROY"
]
}
],
"StreamingJobServiceRole1B4B8BF9": [
{
"type": "aws:cdk:logicalId",
"data": "StreamingJobServiceRole1B4B8BF9",
"trace": [
"!!DESTRUCTIVE_CHANGES: WILL_DESTROY"
]
}
],
"StreamingJobServiceRoleDefaultPolicyA0CC4C68": [
{
"type": "aws:cdk:logicalId",
"data": "StreamingJobServiceRoleDefaultPolicyA0CC4C68",
"trace": [
"!!DESTRUCTIVE_CHANGES: WILL_DESTROY"
]
}
],
"StreamingJob3783CC17": [
{
"type": "aws:cdk:logicalId",
"data": "StreamingJob3783CC17",
"trace": [
"!!DESTRUCTIVE_CHANGES: WILL_DESTROY"
]
}
]
},
"displayName": "aws-glue-job"
Expand Down
21 changes: 12 additions & 9 deletions packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/tree.json
Original file line number Diff line number Diff line change
Expand Up @@ -203,21 +203,21 @@
"version": "0.0.0"
}
},
"Code93a4952ea654434aca8481fb2bc2a836": {
"id": "Code93a4952ea654434aca8481fb2bc2a836",
"path": "aws-glue-job/EtlJob2.0/Code93a4952ea654434aca8481fb2bc2a836",
"Codebeaf1c9f157c9b396ec6972f85317dbc": {
"id": "Codebeaf1c9f157c9b396ec6972f85317dbc",
"path": "aws-glue-job/EtlJob2.0/Codebeaf1c9f157c9b396ec6972f85317dbc",
"children": {
"Stage": {
"id": "Stage",
"path": "aws-glue-job/EtlJob2.0/Code93a4952ea654434aca8481fb2bc2a836/Stage",
"path": "aws-glue-job/EtlJob2.0/Codebeaf1c9f157c9b396ec6972f85317dbc/Stage",
"constructInfo": {
"fqn": "@aws-cdk/core.AssetStaging",
"version": "0.0.0"
}
},
"AssetBucket": {
"id": "AssetBucket",
"path": "aws-glue-job/EtlJob2.0/Code93a4952ea654434aca8481fb2bc2a836/AssetBucket",
"path": "aws-glue-job/EtlJob2.0/Codebeaf1c9f157c9b396ec6972f85317dbc/AssetBucket",
"constructInfo": {
"fqn": "@aws-cdk/aws-s3.BucketBase",
"version": "0.0.0"
Expand Down Expand Up @@ -275,7 +275,8 @@
]
},
"arg1": "value1",
"arg2": "value2"
"arg2": "value2",
"--conf": "valueConf"
},
"executionProperty": {
"maxConcurrentRuns": 2
Expand Down Expand Up @@ -788,7 +789,8 @@
]
},
"arg1": "value1",
"arg2": "value2"
"arg2": "value2",
"--conf": "valueConf"
},
"executionProperty": {
"maxConcurrentRuns": 2
Expand Down Expand Up @@ -1301,7 +1303,8 @@
]
},
"arg1": "value1",
"arg2": "value2"
"arg2": "value2",
"--conf": "valueConf"
},
"executionProperty": {
"maxConcurrentRuns": 2
Expand Down Expand Up @@ -1974,7 +1977,7 @@
"path": "Tree",
"constructInfo": {
"fqn": "constructs.Construct",
"version": "10.1.168"
"version": "10.1.209"
}
}
},
Expand Down
5 changes: 3 additions & 2 deletions packages/@aws-cdk/aws-glue/test/integ.job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.
timeout: cdk.Duration.minutes(5),
notifyDelayAfter: cdk.Duration.minutes(1),
defaultArguments: {
arg1: 'value1',
arg2: 'value2',
'arg1': 'value1',
'arg2': 'value2',
'--conf': 'valueConf',
},
sparkUI: {
enabled: true,
Expand Down
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-glue/test/job.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ describe('Job', () => {
});

test('with reserved args should throw', () => {
['--conf', '--debug', '--mode', '--JOB_NAME'].forEach((arg, index) => {
['--debug', '--mode', '--JOB_NAME'].forEach((arg, index) => {
const defaultArguments: {[key: string]: string} = {};
defaultArguments[arg] = 'random value';

Expand Down

0 comments on commit 3d0f4ba

Please sign in to comment.