diff --git a/addons/addon-base-raas/packages/base-raas-cfn-templates/src/plugins/cfn-templates-plugin.js b/addons/addon-base-raas/packages/base-raas-cfn-templates/src/plugins/cfn-templates-plugin.js index 476bd67999..24b48ce009 100644 --- a/addons/addon-base-raas/packages/base-raas-cfn-templates/src/plugins/cfn-templates-plugin.js +++ b/addons/addon-base-raas/packages/base-raas-cfn-templates/src/plugins/cfn-templates-plugin.js @@ -19,7 +19,6 @@ import ec2RStudioInstance from '../templates/ec2-rstudio-instance.cfn.yml'; import ec2LinuxInstance from '../templates/ec2-linux-instance.cfn.yml'; import ec2WindowsInstance from '../templates/ec2-windows-instance.cfn.yml'; import sagemakerInstance from '../templates/sagemaker-notebook-instance.cfn.yml'; -import emrCluster from '../templates/emr-cluster.cfn.yml'; import onboardAccount from '../templates/onboard-account.cfn.yml'; import storageGatewayNetworkInfra from '../templates/storage-gateway/network-infrastructure.cfn.yml'; @@ -31,7 +30,6 @@ const templates = [ add('ec2-linux-instance', ec2LinuxInstance), add('ec2-windows-instance', ec2WindowsInstance), add('sagemaker-notebook-instance', sagemakerInstance), - add('emr-cluster', emrCluster), add('onboard-account', onboardAccount), add('storage-gateway-network-infra', storageGatewayNetworkInfra), ]; diff --git a/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/emr-cluster.cfn.yml b/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/emr-cluster.cfn.yml deleted file mode 100644 index 7bee27e2ea..0000000000 --- a/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/emr-cluster.cfn.yml +++ /dev/null @@ -1,314 +0,0 @@ -AWSTemplateFormatVersion: 2010-09-09 - -Description: Service-Workbench-on-AWS EMR-Hail-Jupyter - -Metadata: - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: EMR Options - Parameters: - - Namespace - - KeyName - - VPC - - Subnet - - CoreNodeCount - - DiskSizeGB - - MasterInstanceType - - WorkerInstanceType - - WorkerBidPrice - - AccessFromCIDRBlock - - AmiId - - Label: - default: Tags - Parameters: - - NameTag - - OwnerTag - - PurposeTag - -Parameters: - Namespace: - Type: String - Description: An environment name that will be prefixed to resource names - KeyName: - Description: SSH key pair to use for EMR node login - Type: AWS::EC2::KeyPair::KeyName - VPC: - Description: VPC for EMR nodes. - Type: AWS::EC2::VPC::Id - Subnet: - Description: Subnet for EMR nodes, from the VPC selected above - Type: AWS::EC2::Subnet::Id - CoreNodeCount: - Description: Number of core nodes to provision (1-80) - Type: Number - MinValue: '1' - MaxValue: '80' - Default: '5' - DiskSizeGB: - Description: EBS Volume size (GB) for each node - Type: Number - MinValue: '10' - MaxValue: '1000' - Default: '20' - MasterInstanceType: - Type: String - Default: m5.xlarge - Description: EMR node ec2 instance type. - WorkerInstanceType: - Type: String - Default: m5.xlarge - Description: EMR node ec2 instance type. - Market: - Type: String - Default: ON_DEMAND - Description: Which market to purchase workers on - ON_DEMAND or SPOT. - WorkerBidPrice: - Type: String - Description: Bid price for the worker spot nodes. This is only applicable when Market = SPOT. Specify 0 for Market = ON_DEMAND. - AccessFromCIDRBlock: - Type: String - MinLength: 9 - Description: Restrict WebUI access to specified address or range - AmiId: - Type: String - Description: Ami Id to use for the cluster - EnvironmentInstanceFiles: - Type: String - Description: >- - An S3 URI (starting with "s3://") that specifies the location of files to be copied to - the environment instance, including any bootstrap scripts - S3Mounts: - Type: String - Description: A JSON array of objects with name, bucket and prefix properties used to mount data - IamPolicyDocument: - Type: String - Description: The IAM policy to be associated with the launched workstation - EncryptionKeyArn: - Type: String - Description: The ARN of the KMS encryption Key used to encrypt data in the cluster - -Conditions: - IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] - IsOnDemandCondition: !Equals [!Ref Market, ON_DEMAND] - -Resources: - # TODO: Use one bucket for EMR logs per account, so shift deployment to account on-boarding and pass here as param - LogBucket: - Type: AWS::S3::Bucket - DeletionPolicy: Retain - Properties: - PublicAccessBlockConfiguration: # Block all public access configuration for the S3 bucket - BlockPublicAcls: true - BlockPublicPolicy: true - IgnorePublicAcls: true - RestrictPublicBuckets: true - - MasterSecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: Jupyter - VpcId: - Ref: VPC - SecurityGroupIngress: - - IpProtocol: tcp - FromPort: 8192 - ToPort: 8192 - CidrIp: - Ref: AccessFromCIDRBlock - - InstanceProfile: - Properties: - Path: '/' - Roles: - - Ref: Ec2Role - Type: AWS::IAM::InstanceProfile - - Ec2Role: - Type: 'AWS::IAM::Role' - Properties: - RoleName: !Join ['-', [Ref: Namespace, 'ec2-role']] - Path: '/' - AssumeRolePolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: 'Allow' - Principal: - Service: - - 'ec2.amazonaws.com' - Action: - - 'sts:AssumeRole' - Policies: - - !If - - IamPolicyEmpty - - !Ref 'AWS::NoValue' - - PolicyName: !Join ['-', [Ref: Namespace, 's3-studydata-policy']] - PolicyDocument: !Ref IamPolicyDocument - - PolicyName: !Join ['-', [Ref: Namespace, 's3-bootstrap-script-policy']] - PolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: 'Allow' - Action: 's3:GetObject' - Resource: - - 'arn:aws:s3:::us-east-1.elasticmapreduce/bootstrap-actions/run-if' - - !Sub - - 'arn:aws:s3:::${S3Location}/*' - # Remove "s3://" prefix from EnvironmentInstanceFiles - - S3Location: !Select [1, !Split ['s3://', !Ref EnvironmentInstanceFiles]] - - Effect: 'Allow' - Action: 's3:ListBucket' - Resource: !Sub - - 'arn:aws:s3:::${S3Bucket}' - - S3Bucket: !Select [2, !Split ['/', !Ref EnvironmentInstanceFiles]] - Condition: - StringLike: - s3:prefix: !Sub - - '${S3Prefix}/*' - - S3Prefix: !Select [3, !Split ['/', !Ref EnvironmentInstanceFiles]] - - ServiceRole: - Type: AWS::IAM::Role - Properties: - Path: '/' - AssumeRolePolicyDocument: - Statement: - - Action: - - sts:AssumeRole - Effect: Allow - Principal: - Service: - - elasticmapreduce.amazonaws.com - Version: '2012-10-17' - ManagedPolicyArns: - - arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole - - EmrSecurityConfiguration: - Type: AWS::EMR::SecurityConfiguration - Properties: - SecurityConfiguration: - { - 'EncryptionConfiguration': - { - 'AtRestEncryptionConfiguration': - { - 'LocalDiskEncryptionConfiguration': - { - 'EncryptionKeyProviderType': 'AwsKms', - 'AwsKmsKey': { 'Ref': 'EncryptionKeyArn' }, - 'EnableEbsEncryption': true, - }, - }, - 'EnableInTransitEncryption': false, - 'EnableAtRestEncryption': true, - }, - } - - # TODO: customise jupyter password from launch ui - # TODO: Add security configuration to cluster - # TODO: Can we make the jupyter use https? - # TODO: Also change notebook owner to hadoop on launch - EmrCluster: - Type: AWS::EMR::Cluster - Properties: - Applications: - - Name: Hadoop - - Name: Hive - - Name: Spark - BootstrapActions: - - Name: Run-Python-Jupyter - ScriptBootstrapAction: - Path: s3://us-east-1.elasticmapreduce/bootstrap-actions/run-if - Args: - - 'instance.isMaster=true' - - '/opt/hail-on-AWS-spot-instances/src/jupyter_run.sh' - - Name: Mount-S3-Resources - ScriptBootstrapAction: - Path: !Sub '${EnvironmentInstanceFiles}/get_bootstrap.sh' - Args: - - !Ref EnvironmentInstanceFiles - - !Ref S3Mounts - CustomAmiId: - Ref: AmiId - Configurations: - - Classification: spark - ConfigurationProperties: - maximizeResourceAllocation: true - - Classification: yarn-site - ConfigurationProperties: - yarn.nodemanager.vmem-check-enabled: false - - Classification: spark-defaults - ConfigurationProperties: - spark.hadoop.io.compression.codecs: 'org.apache.hadoop.io.compress.DefaultCodec,is.hail.io.compress.BGzipCodec,org.apache.hadoop.io.compress.GzipCodec' - spark.serializer: 'org.apache.spark.serializer.KryoSerializer' - spark.hadoop.parquet.block.size: '1099511627776' - spark.sql.files.maxPartitionBytes: '1099511627776' - spark.sql.files.openCostInBytes: '1099511627776' - Configurations: [] - Instances: - AdditionalMasterSecurityGroups: - - Fn::GetAtt: - - MasterSecurityGroup - - GroupId - Ec2KeyName: - Ref: KeyName - Ec2SubnetId: - Ref: Subnet - MasterInstanceGroup: - InstanceCount: 1 - InstanceType: - Ref: MasterInstanceType - CoreInstanceGroup: !If - - IsOnDemandCondition - - InstanceCount: - Ref: CoreNodeCount - InstanceType: - Ref: WorkerInstanceType - Market: - Ref: Market - EbsConfiguration: - EbsOptimized: true - EbsBlockDeviceConfigs: - - VolumeSpecification: - SizeInGB: - Ref: DiskSizeGB - VolumeType: gp2 - - InstanceCount: - Ref: CoreNodeCount - InstanceType: - Ref: WorkerInstanceType - Market: - Ref: Market - BidPrice: - Ref: WorkerBidPrice - EbsConfiguration: - EbsOptimized: true - EbsBlockDeviceConfigs: - - VolumeSpecification: - SizeInGB: - Ref: DiskSizeGB - VolumeType: gp2 - JobFlowRole: - Ref: InstanceProfile - Name: !Sub '${Namespace}-emr' - Tags: # Add Name tag so EC2 instances are easily identifiable - - Key: Name - Value: !Sub '${Namespace}-emr' - ServiceRole: - Ref: ServiceRole - ReleaseLabel: emr-5.27.0 - # This has to be true because we assume a new user each time. - VisibleToAllUsers: true - SecurityConfiguration: !Ref EmrSecurityConfiguration - LogUri: !Sub 's3://${LogBucket}' - -Outputs: - JupyterUrl: - Description: Open Jupyter on your new EMR cluster - Value: !Sub 'http://${EmrCluster.MasterPublicDNS}:8192' - LogBucket: - Description: EMR Scratch data and Logs bucket - Value: !Ref LogBucket - WorkspaceInstanceRoleArn: - Description: IAM role assumed by the EMR workspace instances - Value: !GetAtt Ec2Role.Arn diff --git a/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/external/emr.cfn.yml b/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/external/emr.cfn.yml deleted file mode 100644 index 1a2670296a..0000000000 --- a/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/external/emr.cfn.yml +++ /dev/null @@ -1,336 +0,0 @@ -AWSTemplateFormatVersion: 2010-09-09 - -Description: Service-Workbench-on-AWS EMR-Hail-Jupyter - -Metadata: - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: EMR Options - Parameters: - - Namespace - - KeyName - - VPC - - Subnet - - CoreNodeCount - - DiskSizeGB - - MasterInstanceType - - WorkerInstanceType - - WorkerBidPrice - - AccessFromCIDRBlock - - AmiId - - Label: - default: Tags - Parameters: - - NameTag - - OwnerTag - - PurposeTag - -Parameters: - Namespace: - Type: String - Description: An environment name that will be prefixed to resource names - KeyName: - Description: SSH key pair to use for EMR node login - Type: AWS::EC2::KeyPair::KeyName - VPC: - Description: VPC for EMR nodes. - Type: AWS::EC2::VPC::Id - Subnet: - Description: Subnet for EMR nodes, from the VPC selected above - Type: AWS::EC2::Subnet::Id - CoreNodeCount: - Description: Number of core nodes to provision (1-80) - Type: Number - MinValue: '1' - MaxValue: '80' - Default: '5' - DiskSizeGB: - Description: EBS Volume size (GB) for each node - Type: Number - MinValue: '10' - MaxValue: '1000' - Default: '20' - MasterInstanceType: - Type: String - Default: m5.xlarge - Description: EMR node ec2 instance type. - WorkerInstanceType: - Type: String - Default: m5.xlarge - Description: EMR node ec2 instance type. - Market: - Type: String - Default: ON_DEMAND - Description: Which market to purchase workers on - ON_DEMAND or SPOT. - WorkerBidPrice: - Type: String - Description: Bid price for the worker spot nodes. - AccessFromCIDRBlock: - Type: String - MinLength: 9 - Description: Restrict WebUI access to specified address or range - AmiId: - Type: String - Description: Ami Id to use for the cluster - EnvironmentInstanceFiles: - Type: String - Description: >- - An S3 URI (starting with "s3://") that specifies the location of files to be copied to - the environment instance, including any bootstrap scripts - S3Mounts: - Type: String - Description: A JSON array of objects with name, bucket and prefix properties used to mount data - IamPolicyDocument: - Type: String - Description: The IAM policy to be associated with the launched workstation - -Conditions: - IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] - IsOnDemandCondition: !Equals [!Ref Market, ON_DEMAND] - -Resources: - # TODO: Use one bucket for EMR logs per account, so shift deployment to account on-boarding and pass here as param - LogBucket: - Type: AWS::S3::Bucket - DeletionPolicy: Retain - Properties: - PublicAccessBlockConfiguration: # Block all public access configuration for the S3 bucket - BlockPublicAcls: true - BlockPublicPolicy: true - IgnorePublicAcls: true - RestrictPublicBuckets: true - - EncryptionKey: - Type: AWS::KMS::Key - Properties: - Description: 'This is the key used to secure resources in this account' - EnableKeyRotation: True - KeyPolicy: - Version: '2012-10-17' - Statement: - - Sid: Allow root access - Effect: 'Allow' - Principal: - AWS: !Sub arn:aws:iam::${AWS::AccountId}:root - Action: - - 'kms:*' - Resource: '*' - - Sid: Allow use of the key by this account - Effect: 'Allow' - Principal: - AWS: '*' - Action: - - 'kms:DescribeKey' - - 'kms:Encrypt' - - 'kms:Decrypt' - - 'kms:ReEncrypt*' - - 'kms:GenerateDataKey' - - 'kms:GenerateDataKeyWithoutPlaintext' - - 'kms:CreateGrant' - - 'kms:RevokeGrant' - Resource: '*' - Condition: - StringEquals: - kms:CallerAccount: !Ref 'AWS::AccountId' - - MasterSecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: Jupyter - VpcId: - Ref: VPC - SecurityGroupIngress: - - IpProtocol: tcp - FromPort: 8192 - ToPort: 8192 - CidrIp: - Ref: AccessFromCIDRBlock - - InstanceProfile: - Properties: - Path: '/' - Roles: - - Ref: Ec2Role - Type: AWS::IAM::InstanceProfile - - Ec2Role: - Type: 'AWS::IAM::Role' - Properties: - RoleName: !Join ['-', [Ref: Namespace, 'ec2-role']] - Path: '/' - AssumeRolePolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: 'Allow' - Principal: - Service: - - 'ec2.amazonaws.com' - Action: - - 'sts:AssumeRole' - Policies: - - !If - - IamPolicyEmpty - - !Ref 'AWS::NoValue' - - PolicyName: !Join ['-', [Ref: Namespace, 's3-studydata-policy']] - PolicyDocument: !Ref IamPolicyDocument - - PolicyName: !Join ['-', [Ref: Namespace, 's3-bootstrap-script-policy']] - PolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: 'Allow' - Action: 's3:GetObject' - Resource: - - 'arn:aws:s3:::us-east-1.elasticmapreduce/bootstrap-actions/run-if' - - !Sub - - 'arn:aws:s3:::${S3Location}/*' - # Remove "s3://" prefix from EnvironmentInstanceFiles - - S3Location: !Select [1, !Split ['s3://', !Ref EnvironmentInstanceFiles]] - - Effect: 'Allow' - Action: 's3:ListBucket' - Resource: !Sub - - 'arn:aws:s3:::${S3Bucket}' - - S3Bucket: !Select [2, !Split ['/', !Ref EnvironmentInstanceFiles]] - Condition: - StringLike: - s3:prefix: !Sub - - '${S3Prefix}/*' - - S3Prefix: !Select [3, !Split ['/', !Ref EnvironmentInstanceFiles]] - - ServiceRole: - Type: AWS::IAM::Role - Properties: - Path: '/' - AssumeRolePolicyDocument: - Statement: - - Action: - - sts:AssumeRole - Effect: Allow - Principal: - Service: - - elasticmapreduce.amazonaws.com - Version: '2012-10-17' - ManagedPolicyArns: - - arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole - - EmrSecurityConfiguration: - Type: AWS::EMR::SecurityConfiguration - Properties: - SecurityConfiguration: - EncryptionConfiguration: - EnableInTransitEncryption: false - EnableAtRestEncryption: true - AtRestEncryptionConfiguration: - LocalDiskEncryptionConfiguration: - EncryptionKeyProviderType: AwsKms - AwsKmsKey: !GetAtt EncryptionKey.Arn - EnableEbsEncryption: true - - # TODO: customise jupyter password from launch ui - # TODO: Add security configuration to cluster - # TODO: Can we make the jupyter use https? - # TODO: Also change notebook owner to hadoop on launch - EmrCluster: - Type: AWS::EMR::Cluster - Properties: - Applications: - - Name: Hadoop - - Name: Hive - - Name: Spark - BootstrapActions: - - Name: Run-Python-Jupyter - ScriptBootstrapAction: - Path: s3://us-east-1.elasticmapreduce/bootstrap-actions/run-if - Args: - - 'instance.isMaster=true' - - '/opt/hail-on-AWS-spot-instances/src/jupyter_run.sh' - - Name: Mount-S3-Resources - ScriptBootstrapAction: - Path: !Sub '${EnvironmentInstanceFiles}/get_bootstrap.sh' - Args: - - !Ref EnvironmentInstanceFiles - - !Ref S3Mounts - CustomAmiId: - Ref: AmiId - Configurations: - - Classification: spark - ConfigurationProperties: - maximizeResourceAllocation: true - - Classification: yarn-site - ConfigurationProperties: - yarn.nodemanager.vmem-check-enabled: false - - Classification: spark-defaults - ConfigurationProperties: - spark.hadoop.io.compression.codecs: 'org.apache.hadoop.io.compress.DefaultCodec,is.hail.io.compress.BGzipCodec,org.apache.hadoop.io.compress.GzipCodec' - spark.serializer: 'org.apache.spark.serializer.KryoSerializer' - spark.hadoop.parquet.block.size: '1099511627776' - spark.sql.files.maxPartitionBytes: '1099511627776' - spark.sql.files.openCostInBytes: '1099511627776' - Configurations: [] - Instances: - AdditionalMasterSecurityGroups: - - Fn::GetAtt: - - MasterSecurityGroup - - GroupId - Ec2KeyName: - Ref: KeyName - Ec2SubnetId: - Ref: Subnet - MasterInstanceGroup: - InstanceCount: 1 - InstanceType: - Ref: MasterInstanceType - CoreInstanceGroup: !If - - IsOnDemandCondition - - InstanceCount: - Ref: CoreNodeCount - InstanceType: - Ref: WorkerInstanceType - Market: - Ref: Market - EbsConfiguration: - EbsOptimized: true - EbsBlockDeviceConfigs: - - VolumeSpecification: - SizeInGB: - Ref: DiskSizeGB - VolumeType: gp2 - - InstanceCount: - Ref: CoreNodeCount - InstanceType: - Ref: WorkerInstanceType - Market: - Ref: Market - BidPrice: - Ref: WorkerBidPrice - EbsConfiguration: - EbsOptimized: true - EbsBlockDeviceConfigs: - - VolumeSpecification: - SizeInGB: - Ref: DiskSizeGB - VolumeType: gp2 - JobFlowRole: - Ref: InstanceProfile - Name: !Sub '${Namespace}-emr' - Tags: # Add Name tag so EC2 instances are easily identifiable - - Key: Name - Value: !Sub '${Namespace}-emr' - ServiceRole: - Ref: ServiceRole - ReleaseLabel: emr-5.27.0 - # This has to be true because we assume a new user each time. - VisibleToAllUsers: true - SecurityConfiguration: !Ref EmrSecurityConfiguration - LogUri: !Sub 's3://${LogBucket}' - -Outputs: - JupyterUrl: - Description: Open Jupyter on your new EMR cluster - Value: !Sub 'http://${EmrCluster.MasterPublicDNS}:8192' - LogBucket: - Description: EMR Scratch data and Logs bucket - Value: !Ref LogBucket - WorkspaceInstanceRoleArn: - Description: IAM role assumed by the EMR workspace instances - Value: !GetAtt Ec2Role.Arn diff --git a/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/service-catalog/emr-cluster.cfn.yml b/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/service-catalog/emr-cluster.cfn.yml index 9722b18d93..91d927945e 100644 --- a/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/service-catalog/emr-cluster.cfn.yml +++ b/addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/service-catalog/emr-cluster.cfn.yml @@ -103,6 +103,10 @@ Resources: BlockPublicPolicy: true IgnorePublicAcls: true RestrictPublicBuckets: true + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: AES256 MasterSecurityGroup: Type: AWS::EC2::SecurityGroup diff --git a/main/cicd/cicd-pipeline/config/infra/cloudformation.yml b/main/cicd/cicd-pipeline/config/infra/cloudformation.yml index 89d47a9b2c..cbc426710a 100644 --- a/main/cicd/cicd-pipeline/config/infra/cloudformation.yml +++ b/main/cicd/cicd-pipeline/config/infra/cloudformation.yml @@ -97,6 +97,11 @@ Resources: Type: AWS::S3::Bucket Properties: BucketName: ${self:custom.settings.cicdAppArtifactBucketName} + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: AES256 + BucketKeyEnabled: true VersioningConfiguration: Status: Enabled PublicAccessBlockConfiguration: # Block all public access configuration for the S3 bucket diff --git a/main/solution/post-deployment/config/infra/cloudformation.yml b/main/solution/post-deployment/config/infra/cloudformation.yml index 1c4e1b1d24..ac83beeb5a 100644 --- a/main/solution/post-deployment/config/infra/cloudformation.yml +++ b/main/solution/post-deployment/config/infra/cloudformation.yml @@ -297,6 +297,7 @@ Resources: - s3:CreateBucket - s3:PutBucketPublicAccessBlock - s3:PutBucketTagging + - s3:PutEncryptionConfiguration Resource: 'arn:aws:s3:::sc-*-logbucket-*' - Effect: Allow Action: