From b3d82e2fa5b03192d3cde857259a36fee4a3aa60 Mon Sep 17 00:00:00 2001 From: lmouhib Date: Fri, 24 Nov 2023 18:04:58 +0000 Subject: [PATCH] feat: SparkEmrContainers runtime (#191) * add EMR on EKS construct --------- Co-authored-by: Jerome Van Der Linden Co-authored-by: Vincent Gromakowski --- .github/workflows/_build.yml | 5 +- .gitignore | 2 + .projen/deps.json | 5 + .projenrc.ts | 44 +- examples/dsf-quickstart/.projen/tasks.json | 5 +- .../spark-data-lake/infra/.projen/tasks.json | 5 +- framework/.projen/deps.json | 4 + framework/.projen/tasks.json | 25 +- framework/API.md | 1023 ++++++++++++++++- framework/package.json | 4 +- framework/src/processing/README.md | 23 + ...park-emr-runtime-containers-default.lit.ts | 57 + framework/src/processing/index.ts | 1 - framework/src/processing/lib/index.ts | 1 + .../src/processing/lib/karpenter-releases.ts | 11 + .../spark-job/pyspark-application-package.ts | 2 +- .../emr-containers/eks-cluster-helpers.ts | 210 ++++ .../emr-containers/eks-controllers-version.ts | 28 + .../emr-containers/eks-karpenter-helpers.ts | 411 +++++++ .../emr-virtual-cluster-props.ts | 23 + .../lib/spark-runtime/emr-containers/index.ts | 7 + .../alb/iam-policy-alb-v2.5.json | 241 ++++ .../alb/iam-policy-alb-v2.6.json | 241 ++++ .../iam-policy-ebs-csi-driver.json | 122 ++ .../k8s/emr-eks-config/critical.json | 32 + .../notebook-pod-template-ready.json | 32 + .../resources/k8s/emr-eks-config/shared.json | 32 + .../v0.32.1/critical-provisioner.yml | 94 ++ .../v0.32.1/notebook-driver-provisioner.yml | 95 ++ .../v0.32.1/notebook-executor-provisioner.yml | 100 ++ .../v0.32.1/shared-driver-provisioner.yml | 91 ++ .../v0.32.1/shared-executor-provisioner.yml | 95 ++ .../v0.32.1/tooling-provisioner.yml | 86 ++ .../k8s/network-policy-pod2pod-internet.yml | 17 + .../k8s/pod-template/critical-driver.yaml | 15 + .../k8s/pod-template/critical-executor.yaml | 22 + .../k8s/pod-template/notebook-driver.yaml | 16 + .../k8s/pod-template/notebook-executor.yaml | 20 + .../k8s/pod-template/shared-driver.yaml | 16 + .../k8s/pod-template/shared-executor.yaml | 20 + .../k8s/rbac/emr-containers-rbac.yaml | 44 + .../resources/k8s/resource-management.yaml | 22 + .../spark-emr-containers-runtime-props.ts | 106 ++ .../spark-emr-containers-runtime.ts | 530 +++++++++ .../src/processing/lib/spark-runtime/index.ts | 1 + framework/src/utils/lib/index.ts | 1 + framework/src/utils/lib/utils.ts | 72 ++ framework/src/utils/lib/vpc-helper.ts | 6 +- .../e2e/spark-containers-runtime.e2e.test.ts | 87 ++ .../nag-spark-runtime-containers.test.ts | 200 ++++ .../spark-runtime-containers.test.ts | 960 ++++++++++++++++ framework/yarn.lock | 5 + package.json | 4 + .../01-spark-emr-serverless-runtime.mdx | 2 +- .../02-spark-emr-containers-runtime.mdx | 9 + .../03-spark-emr-serverless-job.mdx | 2 +- .../04-pyspark-application-package.mdx | 2 +- .../03-Processing/05-spark-cicd-pipeline.mdx | 2 +- ...rocessing-spark-emr-runtime-containers.mdx | 127 ++ yarn.lock | 23 +- 60 files changed, 5436 insertions(+), 52 deletions(-) create mode 100644 framework/src/processing/examples/spark-emr-runtime-containers-default.lit.ts create mode 100644 framework/src/processing/lib/karpenter-releases.ts create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/eks-cluster-helpers.ts create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/eks-controllers-version.ts create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/eks-karpenter-helpers.ts create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/emr-virtual-cluster-props.ts create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/index.ts create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.5.json create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.6.json create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/iam-policy-ebs-csi-driver.json create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/critical.json create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/notebook-pod-template-ready.json create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/shared.json create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/critical-provisioner.yml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/notebook-driver-provisioner.yml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/notebook-executor-provisioner.yml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/shared-driver-provisioner.yml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/shared-executor-provisioner.yml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/tooling-provisioner.yml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/network-policy-pod2pod-internet.yml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/critical-driver.yaml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/critical-executor.yaml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/notebook-driver.yaml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/notebook-executor.yaml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/shared-driver.yaml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/shared-executor.yaml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/rbac/emr-containers-rbac.yaml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/resource-management.yaml create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/spark-emr-containers-runtime-props.ts create mode 100644 framework/src/processing/lib/spark-runtime/emr-containers/spark-emr-containers-runtime.ts create mode 100644 framework/src/utils/lib/utils.ts create mode 100644 framework/test/e2e/spark-containers-runtime.e2e.test.ts create mode 100644 framework/test/unit/nag/processing/nag-spark-runtime-containers.test.ts create mode 100644 framework/test/unit/processing/spark-runtime-containers.test.ts create mode 100644 website/docs/constructs/library/03-Processing/02-spark-emr-containers-runtime.mdx create mode 100644 website/docs/constructs/library/generated/_processing-spark-emr-runtime-containers.mdx diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index c3e5fbcd1..f20b8e404 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -47,7 +47,4 @@ jobs: run: |- echo "::error::Files were changed during build (see build log). If this was triggered from a fork, you will need to update your branch." cat .repo.patch - exit 1 - - name: Validate code examples - working-directory: ./framework - run: npx projen validate-examples \ No newline at end of file + exit 1 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 39f458573..d7f4cc9b3 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,8 @@ dist __pycache__ .devcontainer .venv +cdk.out +.DS_Store !/.projenrc.js !/.mergify.yml !/.github/dependabot.yml diff --git a/.projen/deps.json b/.projen/deps.json index 67c277210..d287658c9 100644 --- a/.projen/deps.json +++ b/.projen/deps.json @@ -5,6 +5,11 @@ "version": "16.0.0", "type": "build" }, + { + "name": "glob", + "version": "^10.3.6", + "type": "build" + }, { "name": "lerna", "type": "build" diff --git a/.projenrc.ts b/.projenrc.ts index 63c3c574a..d8b7e22dc 100644 --- a/.projenrc.ts +++ b/.projenrc.ts @@ -2,6 +2,8 @@ import { LernaProject } from 'lerna-projen'; import { awscdk, Task } from 'projen'; import { DependabotScheduleInterval } from 'projen/lib/github'; import { Transform } from "projen/lib/javascript"; +import { dirname } from 'path'; +import { globSync } from 'glob'; const CDK_VERSION = '2.109.0'; const CDK_CONSTRUCTS_VERSION = '10.3.0'; @@ -35,6 +37,7 @@ const rootProject = new LernaProject({ 'lerna-projen', 'ts-node', 'typescript', + 'glob@^10.3.6' ], peerDeps: [ '@types/node@^16', @@ -67,7 +70,9 @@ const rootProject = new LernaProject({ 'dist', '__pycache__', '.devcontainer', - '.venv' + '.venv', + 'cdk.out', + '.DS_Store' ], projenrcTs: true, @@ -75,6 +80,10 @@ const rootProject = new LernaProject({ jest: false }); +rootProject.package.addField('resolutions', { + 'wide-align': '1.1.5', +}); + const fwkProject = new awscdk.AwsCdkConstructLibrary({ name: 'framework', description: 'L3 CDK Constructs used to build data solutions with AWS', @@ -122,13 +131,14 @@ const fwkProject = new awscdk.AwsCdkConstructLibrary({ 'jest-runner-groups', `@aws-cdk/cli-lib-alpha@${CDK_VERSION}-alpha.0`, 'rosetta', - `@aws-cdk/lambda-layer-kubectl-${KUBECTL_LAYER_VERSION}`, + `@aws-cdk/lambda-layer-kubectl-${KUBECTL_LAYER_VERSION}` ], bundledDeps: [ 'js-yaml', + '@types/js-yaml', 'simple-base', - 'semver', + 'semver' ], jestOptions: { @@ -177,15 +187,27 @@ fwkProject.addTask('test:e2e', { exec: 'jest --passWithNoTests --updateSnapshot --group=e2e' }); +/** + * Task copy `resources` directories from `src` to `lib` + * This is to package YAML files part of the dist + */ + +const copyResourcesToLibTask = fwkProject.addTask('copy-resources', { + description: 'Copy all resources directories from src to lib', +}); + +for (const from of globSync('src/**/resources', { cwd: './framework/', root: '.' })) { + const to = dirname(from.replace('src', 'lib')); + const cpCommand = `rsync -avr --exclude '*.ts' --exclude '*.js' ${from} ${to}`; + copyResourcesToLibTask.exec(cpCommand); +}; + +fwkProject.compileTask.exec('npx projen copy-resources'); + fwkProject.postCompileTask.prependExec('rm -f .jsii.tabl.json && jsii-rosetta extract .jsii && node generate_doc.mjs'); fwkProject.tasks.tryFind('release')!.prependSpawn(new Task('install:ci')); -fwkProject.tasks.addTask('validate-examples', { - description: 'Validating examples using jsii-rosetta', - exec: 'jsii-rosetta extract --fail .jsii' -}); - const sparkDataLakeInfraExampleApp = new awscdk.AwsCdkPythonApp({ name: 'spark-data-lake-infra-example', moduleName: 'stacks', @@ -232,7 +254,8 @@ sparkDataLakeInfraExampleApp.addTask('test:e2e', { }); const synthTask = sparkDataLakeInfraExampleApp.tasks.tryFind('synth:silent'); synthTask?.reset(); -synthTask?.exec(`npx -y cdk@${CDK_VERSION} synth -q -c prod=PLACEHOLDER -c staging=PLACEHOLDER`); +synthTask?.prependExec(`cdk --version || npm install -g cdk@${CDK_VERSION}`); +synthTask?.exec('cdk synth -q -c prod=PLACEHOLDER -c staging=PLACEHOLDER'); const buildExampleTask = sparkDataLakeInfraExampleApp.addTask('build-example', { steps: [ { exec: `pip install --ignore-installed --no-deps --no-index --find-links ../../../framework/dist/python aws_dsf` }, @@ -288,7 +311,8 @@ adsfQuickstart.addTask('test:e2e', { }); const adsfQuickstartSynthTask = adsfQuickstart.tasks.tryFind('synth:silent'); adsfQuickstartSynthTask?.reset(); -adsfQuickstartSynthTask?.exec(`npx -y cdk@${CDK_VERSION} synth -q`); +adsfQuickstartSynthTask?.prependExec(`cdk --version || npm install -g cdk@${CDK_VERSION}`); +adsfQuickstartSynthTask?.exec('cdk synth -q'); const buildAdsfQuickstartTask = adsfQuickstart.addTask('build-example', { steps: [ { exec: `pip install --ignore-installed --no-deps --no-index --find-links ../../framework/dist/python aws_dsf` }, diff --git a/examples/dsf-quickstart/.projen/tasks.json b/examples/dsf-quickstart/.projen/tasks.json index e724866ed..41ef8d9df 100644 --- a/examples/dsf-quickstart/.projen/tasks.json +++ b/examples/dsf-quickstart/.projen/tasks.json @@ -80,7 +80,10 @@ "description": "Synthesizes your cdk app into cdk.out and suppresses the template in stdout (part of \"yarn build\")", "steps": [ { - "exec": "npx -y cdk@2.109.0 synth -q" + "exec": "cdk --version || npm install -g cdk@2.109.0" + }, + { + "exec": "cdk synth -q" } ] }, diff --git a/examples/spark-data-lake/infra/.projen/tasks.json b/examples/spark-data-lake/infra/.projen/tasks.json index 4f531d705..7fd82a619 100644 --- a/examples/spark-data-lake/infra/.projen/tasks.json +++ b/examples/spark-data-lake/infra/.projen/tasks.json @@ -80,7 +80,10 @@ "description": "Synthesizes your cdk app into cdk.out and suppresses the template in stdout (part of \"yarn build\")", "steps": [ { - "exec": "npx -y cdk@2.109.0 synth -q -c prod=PLACEHOLDER -c staging=PLACEHOLDER" + "exec": "cdk --version || npm install -g cdk@2.109.0" + }, + { + "exec": "cdk synth -q -c prod=PLACEHOLDER -c staging=PLACEHOLDER" } ] }, diff --git a/framework/.projen/deps.json b/framework/.projen/deps.json index 0fd2d2a19..bffd63670 100644 --- a/framework/.projen/deps.json +++ b/framework/.projen/deps.json @@ -125,6 +125,10 @@ "name": "typescript", "type": "build" }, + { + "name": "@types/js-yaml", + "type": "bundled" + }, { "name": "js-yaml", "type": "bundled" diff --git a/framework/.projen/tasks.json b/framework/.projen/tasks.json index 1a9f7eafb..3291d6973 100644 --- a/framework/.projen/tasks.json +++ b/framework/.projen/tasks.json @@ -53,6 +53,18 @@ "steps": [ { "exec": "jsii --silence-warnings=reserved-word" + }, + { + "exec": "npx projen copy-resources" + } + ] + }, + "copy-resources": { + "name": "copy-resources", + "description": "Copy all resources directories from src to lib", + "steps": [ + { + "exec": "rsync -avr --exclude '*.ts' --exclude '*.js' src/processing/lib/spark-runtime/emr-containers/resources lib/processing/lib/spark-runtime/emr-containers" } ] }, @@ -245,13 +257,13 @@ "exec": "yarn upgrade npm-check-updates" }, { - "exec": "npm-check-updates --upgrade --target=minor --filter=@aws-cdk/lambda-layer-kubectl-v27,@jest/globals,@types/jest,@types/node,@typescript-eslint/eslint-plugin,@typescript-eslint/parser,cdk-nag,eslint-import-resolver-node,eslint-import-resolver-typescript,eslint-plugin-import,eslint,jest,jest-junit,jest-runner-groups,jsii-diff,jsii-docgen,jsii-pacmak,npm-check-updates,projen,rosetta,standard-version,ts-jest,typescript,js-yaml,semver,simple-base,aws-cdk-lib,constructs" + "exec": "npm-check-updates --upgrade --target=minor --filter=@aws-cdk/lambda-layer-kubectl-v27,@jest/globals,@types/jest,@types/node,@typescript-eslint/eslint-plugin,@typescript-eslint/parser,cdk-nag,eslint-import-resolver-node,eslint-import-resolver-typescript,eslint-plugin-import,eslint,jest,jest-junit,jest-runner-groups,jsii-diff,jsii-docgen,jsii-pacmak,npm-check-updates,projen,rosetta,standard-version,ts-jest,typescript,@types/js-yaml,js-yaml,semver,simple-base,aws-cdk-lib,constructs" }, { "exec": "yarn install --check-files" }, { - "exec": "yarn upgrade @aws-cdk/lambda-layer-kubectl-v27 @jest/globals @types/jest @types/node @typescript-eslint/eslint-plugin @typescript-eslint/parser cdk-nag eslint-import-resolver-node eslint-import-resolver-typescript eslint-plugin-import eslint jest jest-junit jest-runner-groups jsii-diff jsii-docgen jsii-pacmak npm-check-updates projen rosetta standard-version ts-jest typescript js-yaml semver simple-base aws-cdk-lib constructs" + "exec": "yarn upgrade @aws-cdk/lambda-layer-kubectl-v27 @jest/globals @types/jest @types/node @typescript-eslint/eslint-plugin @typescript-eslint/parser cdk-nag eslint-import-resolver-node eslint-import-resolver-typescript eslint-plugin-import eslint jest jest-junit jest-runner-groups jsii-diff jsii-docgen jsii-pacmak npm-check-updates projen rosetta standard-version ts-jest typescript @types/js-yaml js-yaml semver simple-base aws-cdk-lib constructs" }, { "exec": "npx projen" @@ -261,15 +273,6 @@ } ] }, - "validate-examples": { - "name": "validate-examples", - "description": "Validating examples using jsii-rosetta", - "steps": [ - { - "exec": "jsii-rosetta extract --fail .jsii" - } - ] - }, "watch": { "name": "watch", "description": "Watch & compile in the background", diff --git a/framework/API.md b/framework/API.md index e5d790377..372933a10 100644 --- a/framework/API.md +++ b/framework/API.md @@ -3923,6 +3923,578 @@ public readonly DSF_TRACKING_CODE: string; --- +### SparkEmrContainersRuntime + +A construct to create an EKS cluster, configure it and enable it with EMR on EKS. + +> [https://awslabs.github.io/aws-data-solutions-framework/docs/constructs/library/spark-emr-containers-runtime](https://awslabs.github.io/aws-data-solutions-framework/docs/constructs/library/spark-emr-containers-runtime) + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| addEmrVirtualCluster | Add a new Amazon EMR Virtual Cluster linked to Amazon EKS Cluster. | +| addKarpenterProvisioner | Apply the provided manifest and add the CDK dependency on EKS cluster. | +| createExecutionRole | Create and configure a new Amazon IAM Role usable as an execution role. | +| retrieveVersion | Retrieve DSF package.json version. | +| uploadPodTemplate | Upload podTemplates to the Amazon S3 location used by the cluster. | + +--- + +##### `toString` + +```typescript +public toString(): string +``` + +Returns a string representation of this construct. + +##### `addEmrVirtualCluster` + +```typescript +public addEmrVirtualCluster(scope: Construct, options: EmrVirtualClusterProps): CfnVirtualCluster +``` + +Add a new Amazon EMR Virtual Cluster linked to Amazon EKS Cluster. + +###### `scope`Required + +- *Type:* constructs.Construct + +of the stack where virtual cluster is deployed. + +--- + +###### `options`Required + +- *Type:* aws-dsf.processing.EmrVirtualClusterProps + +the EmrVirtualClusterProps [properties]{@link EmrVirtualClusterProps}. + +--- + +##### `addKarpenterProvisioner` + +```typescript +public addKarpenterProvisioner(id: string, manifest: any): any +``` + +Apply the provided manifest and add the CDK dependency on EKS cluster. + +###### `id`Required + +- *Type:* string + +the unique ID of the CDK resource. + +--- + +###### `manifest`Required + +- *Type:* any + +The manifest to apply. + +You can use the Utils class that offers method to read yaml file and load it as a manifest + +--- + +##### `createExecutionRole` + +```typescript +public createExecutionRole(scope: Construct, id: string, policy: IManagedPolicy, eksNamespace: string, name: string): Role +``` + +Create and configure a new Amazon IAM Role usable as an execution role. + +This method makes the created role assumed by the Amazon EKS cluster Open ID Connect provider. + +###### `scope`Required + +- *Type:* constructs.Construct + +of the IAM role. + +--- + +###### `id`Required + +- *Type:* string + +of the CDK resource to be created, it should be unique across the stack. + +--- + +###### `policy`Required + +- *Type:* aws-cdk-lib.aws_iam.IManagedPolicy + +the execution policy to attach to the role. + +--- + +###### `eksNamespace`Required + +- *Type:* string + +The namespace from which the role is going to be used. + +MUST be the same as the namespace of the Virtual Cluster from which the job is submitted + +--- + +###### `name`Required + +- *Type:* string + +Name to use for the role, required and is used to scope the iam role. + +--- + +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. + +##### `uploadPodTemplate` + +```typescript +public uploadPodTemplate(id: string, filePath: string, removalPolicy: RemovalPolicy): void +``` + +Upload podTemplates to the Amazon S3 location used by the cluster. + +###### `id`Required + +- *Type:* string + +the unique ID of the CDK resource. + +--- + +###### `filePath`Required + +- *Type:* string + +The local path of the yaml podTemplate files to upload. + +--- + +###### `removalPolicy`Required + +- *Type:* aws-cdk-lib.RemovalPolicy + +--- + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | +| getOrCreate | Get an existing EmrEksCluster based on the cluster name property or create a new one only one EKS cluster can exist per stack. | + +--- + +##### `isConstruct` + +```typescript +import { processing } from 'aws-dsf' + +processing.SparkEmrContainersRuntime.isConstruct(x: any) +``` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +###### `x`Required + +- *Type:* any + +Any object. + +--- + +##### `getOrCreate` + +```typescript +import { processing } from 'aws-dsf' + +processing.SparkEmrContainersRuntime.getOrCreate(scope: Construct, props: SparkEmrContainersRuntimeProps) +``` + +Get an existing EmrEksCluster based on the cluster name property or create a new one only one EKS cluster can exist per stack. + +###### `scope`Required + +- *Type:* constructs.Construct + +the CDK scope used to search or create the cluster. + +--- + +###### `props`Required + +- *Type:* aws-dsf.processing.SparkEmrContainersRuntimeProps + +the EmrEksClusterProps [properties]{@link EmrEksClusterProps } if created. + +--- + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| ec2InstanceNodeGroupRole | aws-cdk-lib.aws_iam.IRole | IAM role used by the tooling managed nodegroup hosting core Kubernetes controllers like EBS CSI driver, core dns. | +| eksCluster | aws-cdk-lib.aws_eks.Cluster | The EKS cluster created by the construct if it is not provided. | +| assetBucket | aws-cdk-lib.aws_s3.IBucket | The bucket holding podtemplates referenced in the configuration override for the job. | +| awsNodeRole | aws-cdk-lib.aws_iam.IRole | IAM Role used by IRSA for the aws-node daemonset. | +| criticalDefaultConfig | string | The configuration override for the spark application to use with the default nodes for criticale jobs. | +| csiDriverIrsa | aws-cdk-lib.aws_iam.IRole | *No description.* | +| csiDriverIrsaRole | aws-cdk-lib.aws_iam.IRole | The IAM Role created for the EBS CSI controller. | +| karpenterEventRules | aws-cdk-lib.aws_events.IRule[] | Rules used by Karpenter to track node health, rules are defined in the cloudformation below https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml. | +| karpenterIrsaRole | aws-cdk-lib.aws_iam.IRole | The IAM role created for the Karpenter controller. | +| karpenterQueue | aws-cdk-lib.aws_sqs.IQueue | SQS queue used by Karpenter to receive critical events from AWS services which may affect your nodes. | +| karpenterSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The security group used by the EC2NodeClass of the default nodes. | +| notebookDefaultConfig | string | The configuration override for the spark application to use with the default nodes dedicated for notebooks. | +| podTemplateS3LocationCriticalDriver | string | The s3 location holding the driver pod tempalte for critical nodes. | +| podTemplateS3LocationCriticalExecutor | string | The s3 location holding the executor pod tempalte for critical nodes. | +| podTemplateS3LocationDriverShared | string | The s3 location holding the driver pod tempalte for shared nodes. | +| podTemplateS3LocationExecutorShared | string | The s3 location holding the executor pod tempalte for shared nodes. | +| podTemplateS3LocationNotebookDriver | string | The s3 location holding the driver pod tempalte for interactive sessions. | +| podTemplateS3LocationNotebookExecutor | string | The s3 location holding the executor pod tempalte for interactive sessions. | +| sharedDefaultConfig | string | The configuration override for the spark application to use with the default nodes for none criticale jobs. | + +--- + +##### `node`Required + +```typescript +public readonly node: Node; +``` + +- *Type:* constructs.Node + +The tree node. + +--- + +##### `ec2InstanceNodeGroupRole`Required + +```typescript +public readonly ec2InstanceNodeGroupRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +IAM role used by the tooling managed nodegroup hosting core Kubernetes controllers like EBS CSI driver, core dns. + +--- + +##### `eksCluster`Required + +```typescript +public readonly eksCluster: Cluster; +``` + +- *Type:* aws-cdk-lib.aws_eks.Cluster + +The EKS cluster created by the construct if it is not provided. + +--- + +##### `assetBucket`Optional + +```typescript +public readonly assetBucket: IBucket; +``` + +- *Type:* aws-cdk-lib.aws_s3.IBucket + +The bucket holding podtemplates referenced in the configuration override for the job. + +--- + +##### `awsNodeRole`Optional + +```typescript +public readonly awsNodeRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +IAM Role used by IRSA for the aws-node daemonset. + +--- + +##### `criticalDefaultConfig`Optional + +```typescript +public readonly criticalDefaultConfig: string; +``` + +- *Type:* string + +The configuration override for the spark application to use with the default nodes for criticale jobs. + +--- + +##### `csiDriverIrsa`Optional + +```typescript +public readonly csiDriverIrsa: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +--- + +##### `csiDriverIrsaRole`Optional + +```typescript +public readonly csiDriverIrsaRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role created for the EBS CSI controller. + +--- + +##### `karpenterEventRules`Optional + +```typescript +public readonly karpenterEventRules: IRule[]; +``` + +- *Type:* aws-cdk-lib.aws_events.IRule[] + +Rules used by Karpenter to track node health, rules are defined in the cloudformation below https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml. + +--- + +##### `karpenterIrsaRole`Optional + +```typescript +public readonly karpenterIrsaRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM role created for the Karpenter controller. + +--- + +##### `karpenterQueue`Optional + +```typescript +public readonly karpenterQueue: IQueue; +``` + +- *Type:* aws-cdk-lib.aws_sqs.IQueue + +SQS queue used by Karpenter to receive critical events from AWS services which may affect your nodes. + +--- + +##### `karpenterSecurityGroup`Optional + +```typescript +public readonly karpenterSecurityGroup: ISecurityGroup; +``` + +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup + +The security group used by the EC2NodeClass of the default nodes. + +--- + +##### `notebookDefaultConfig`Optional + +```typescript +public readonly notebookDefaultConfig: string; +``` + +- *Type:* string + +The configuration override for the spark application to use with the default nodes dedicated for notebooks. + +--- + +##### `podTemplateS3LocationCriticalDriver`Optional + +```typescript +public readonly podTemplateS3LocationCriticalDriver: string; +``` + +- *Type:* string + +The s3 location holding the driver pod tempalte for critical nodes. + +--- + +##### `podTemplateS3LocationCriticalExecutor`Optional + +```typescript +public readonly podTemplateS3LocationCriticalExecutor: string; +``` + +- *Type:* string + +The s3 location holding the executor pod tempalte for critical nodes. + +--- + +##### `podTemplateS3LocationDriverShared`Optional + +```typescript +public readonly podTemplateS3LocationDriverShared: string; +``` + +- *Type:* string + +The s3 location holding the driver pod tempalte for shared nodes. + +--- + +##### `podTemplateS3LocationExecutorShared`Optional + +```typescript +public readonly podTemplateS3LocationExecutorShared: string; +``` + +- *Type:* string + +The s3 location holding the executor pod tempalte for shared nodes. + +--- + +##### `podTemplateS3LocationNotebookDriver`Optional + +```typescript +public readonly podTemplateS3LocationNotebookDriver: string; +``` + +- *Type:* string + +The s3 location holding the driver pod tempalte for interactive sessions. + +--- + +##### `podTemplateS3LocationNotebookExecutor`Optional + +```typescript +public readonly podTemplateS3LocationNotebookExecutor: string; +``` + +- *Type:* string + +The s3 location holding the executor pod tempalte for interactive sessions. + +--- + +##### `sharedDefaultConfig`Optional + +```typescript +public readonly sharedDefaultConfig: string; +``` + +- *Type:* string + +The configuration override for the spark application to use with the default nodes for none criticale jobs. + +--- + +#### Constants + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DEFAULT_CLUSTER_NAME | string | *No description.* | +| DEFAULT_EKS_VERSION | aws-cdk-lib.aws_eks.KubernetesVersion | *No description.* | +| DEFAULT_EMR_EKS_VERSION | aws-dsf.processing.EmrRuntimeVersion | *No description.* | +| DEFAULT_VPC_CIDR | string | *No description.* | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | + +--- + +##### `DEFAULT_CLUSTER_NAME`Required + +```typescript +public readonly DEFAULT_CLUSTER_NAME: string; +``` + +- *Type:* string + +--- + +##### `DEFAULT_EKS_VERSION`Required + +```typescript +public readonly DEFAULT_EKS_VERSION: KubernetesVersion; +``` + +- *Type:* aws-cdk-lib.aws_eks.KubernetesVersion + +--- + +##### `DEFAULT_EMR_EKS_VERSION`Required + +```typescript +public readonly DEFAULT_EMR_EKS_VERSION: EmrRuntimeVersion; +``` + +- *Type:* aws-dsf.processing.EmrRuntimeVersion + +--- + +##### `DEFAULT_VPC_CIDR`Required + +```typescript +public readonly DEFAULT_VPC_CIDR: string; +``` + +- *Type:* string + +--- + +##### `DSF_OWNED_TAG`Required + +```typescript +public readonly DSF_OWNED_TAG: string; +``` + +- *Type:* string + +--- + +##### `DSF_TRACKING_CODE`Required + +```typescript +public readonly DSF_TRACKING_CODE: string; +``` + +- *Type:* string + +--- + ### SparkEmrEksJob A construct to run Spark Jobs using EMR on EKS. @@ -5901,38 +6473,98 @@ Otherwise, the removalPolicy is reverted to RETAIN. public readonly silverBucketArchiveDelay: number; ``` -- *Type:* number -- *Default:* Objects are not archived to Glacier. +- *Type:* number +- *Default:* Objects are not archived to Glacier. + +Delay (in days) before archiving SILVER data to frozen storage (Glacier storage class). + +--- + +##### `silverBucketInfrequentAccessDelay`Optional + +```typescript +public readonly silverBucketInfrequentAccessDelay: number; +``` + +- *Type:* number +- *Default:* Move objects to Infrequent Access after 90 days. + +Delay (in days) before moving SILVER data to cold storage (Infrequent Access storage class). + +--- + +##### `silverBucketName`Optional + +```typescript +public readonly silverBucketName: string; +``` + +- *Type:* string +- *Default:* `silver---` will be used. + +Name of the Silver bucket. + +Use `BucketUtils.generateUniqueBucketName()` to generate a unique name (recommended). + +--- + +### EmrVirtualClusterProps + +The properties for the EmrVirtualCluster Construct class. + +#### Initializer + +```typescript +import { processing } from 'aws-dsf' + +const emrVirtualClusterProps: processing.EmrVirtualClusterProps = { ... } +``` + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| name | string | name of the Amazon Emr virtual cluster to be created. | +| createNamespace | boolean | creates Amazon EKS namespace. | +| eksNamespace | string | name of the Amazon EKS namespace to be linked to the Amazon EMR virtual cluster. | + +--- + +##### `name`Required + +```typescript +public readonly name: string; +``` + +- *Type:* string -Delay (in days) before archiving SILVER data to frozen storage (Glacier storage class). +name of the Amazon Emr virtual cluster to be created. --- -##### `silverBucketInfrequentAccessDelay`Optional +##### `createNamespace`Optional ```typescript -public readonly silverBucketInfrequentAccessDelay: number; +public readonly createNamespace: boolean; ``` -- *Type:* number -- *Default:* Move objects to Infrequent Access after 90 days. +- *Type:* boolean +- *Default:* Do not create the namespace -Delay (in days) before moving SILVER data to cold storage (Infrequent Access storage class). +creates Amazon EKS namespace. --- -##### `silverBucketName`Optional +##### `eksNamespace`Optional ```typescript -public readonly silverBucketName: string; +public readonly eksNamespace: string; ``` - *Type:* string -- *Default:* `silver---` will be used. - -Name of the Silver bucket. +- *Default:* Use the default namespace -Use `BucketUtils.generateUniqueBucketName()` to generate a unique name (recommended). +name of the Amazon EKS namespace to be linked to the Amazon EMR virtual cluster. --- @@ -6273,6 +6905,232 @@ The EMR Spark image to use to run the unit tests. --- +### SparkEmrContainersRuntimeProps + +The properties for the EmrEksCluster Construct class. + +#### Initializer + +```typescript +import { processing } from 'aws-dsf' + +const sparkEmrContainersRuntimeProps: processing.SparkEmrContainersRuntimeProps = { ... } +``` + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| kubectlLambdaLayer | aws-cdk-lib.aws_lambda.ILayerVersion | Starting k8s 1.22, CDK no longer bundle the kubectl layer with the code due to breaking npm package size. A layer needs to be passed to the Construct. | +| publicAccessCIDRs | string[] | The CIDR blocks that are allowed access to your cluster’s public Kubernetes API server endpoint. | +| createEmrOnEksServiceLinkedRole | boolean | Wether we need to create an EMR on EKS Service Linked Role. | +| defaultNodes | boolean | If set to true, the Construct will create default EKS nodegroups or node provisioners (based on the autoscaler mechanism used). | +| ec2InstanceRole | aws-cdk-lib.aws_iam.IRole | The role used for the cluster nodes instance profile. | +| eksAdminRole | aws-cdk-lib.aws_iam.IRole | Amazon IAM Role to be added to Amazon EKS master roles that will give access to kubernetes cluster from AWS console UI. | +| eksCluster | aws-cdk-lib.aws_eks.Cluster | The EKS cluster to setup EMR on. | +| eksClusterName | string | Name of the Amazon EKS cluster to be created. | +| eksVpc | aws-cdk-lib.aws_ec2.IVpc | The VPC to use when creating the EKS cluster. | +| karpenterVersion | aws-dsf.processing.KarpenterVersion | The version of karpenter to pass to Helm. | +| kubernetesVersion | aws-cdk-lib.aws_eks.KubernetesVersion | Kubernetes version for Amazon EKS cluster that will be created The default is changed as new version version of k8s on EKS becomes available. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | +| vpcCidr | string | The CIDR of the VPC to use when creating the EKS cluster. | + +--- + +##### `kubectlLambdaLayer`Required + +```typescript +public readonly kubectlLambdaLayer: ILayerVersion; +``` + +- *Type:* aws-cdk-lib.aws_lambda.ILayerVersion + +Starting k8s 1.22, CDK no longer bundle the kubectl layer with the code due to breaking npm package size. A layer needs to be passed to the Construct. + +The cdk [documentation](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_eks.KubernetesVersion.html#static-v1_22) +contains the libraries that you should add for the right Kubernetes version + +--- + +##### `publicAccessCIDRs`Required + +```typescript +public readonly publicAccessCIDRs: string[]; +``` + +- *Type:* string[] + +The CIDR blocks that are allowed access to your cluster’s public Kubernetes API server endpoint. + +--- + +##### `createEmrOnEksServiceLinkedRole`Optional + +```typescript +public readonly createEmrOnEksServiceLinkedRole: boolean; +``` + +- *Type:* boolean +- *Default:* true + +Wether we need to create an EMR on EKS Service Linked Role. + +--- + +##### `defaultNodes`Optional + +```typescript +public readonly defaultNodes: boolean; +``` + +- *Type:* boolean +- *Default:* true + +If set to true, the Construct will create default EKS nodegroups or node provisioners (based on the autoscaler mechanism used). + +There are three types of nodes: + * Nodes for critical jobs which use on-demand instances, high speed disks and workload isolation + * Nodes for shared worklaods which uses spot instances and no isolation to optimize costs + * Nodes for notebooks which leverage a cost optimized configuration for running EMR managed endpoints and spark drivers/executors. + +--- + +##### `ec2InstanceRole`Optional + +```typescript +public readonly ec2InstanceRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* A role is created with AmazonEKSWorkerNodePolicy, AmazonEC2ContainerRegistryReadOnly, AmazonSSMManagedInstanceCore and AmazonEKS_CNI_Policy AWS managed policies + +The role used for the cluster nodes instance profile. + +--- + +##### `eksAdminRole`Optional + +```typescript +public readonly eksAdminRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +Amazon IAM Role to be added to Amazon EKS master roles that will give access to kubernetes cluster from AWS console UI. + +An admin role must be passed if `eksCluster` property is not set. +You will use this role to manage the EKS cluster and grant other access to it. + +--- + +##### `eksCluster`Optional + +```typescript +public readonly eksCluster: Cluster; +``` + +- *Type:* aws-cdk-lib.aws_eks.Cluster +- *Default:* An EKS Cluster is created + +The EKS cluster to setup EMR on. + +The cluster needs to be created in the same CDK Stack. +If the EKS cluster is provided, the cluster AddOns and all the controllers (ALB Ingress controller, Cluster Autoscaler or Karpenter...) need to be configured. +When providing an EKS cluster, the methods for adding nodegroups can still be used. They implement the best practices for running Spark on EKS. + +--- + +##### `eksClusterName`Optional + +```typescript +public readonly eksClusterName: string; +``` + +- *Type:* string +- *Default:* The [default cluster name]{@link DEFAULT_CLUSTER_NAME } + +Name of the Amazon EKS cluster to be created. + +--- + +##### `eksVpc`Optional + +```typescript +public readonly eksVpc: IVpc; +``` + +- *Type:* aws-cdk-lib.aws_ec2.IVpc + +The VPC to use when creating the EKS cluster. + +VPC should have at least two private and public subnets in different Availability Zones. +All private subnets should have the following tags: + * 'for-use-with-amazon-emr-managed-policies'='true' + * 'kubernetes.io/role/internal-elb'='1' +All public subnets should have the following tag: + * 'kubernetes.io/role/elb'='1' +Cannot be combined with `vpcCidr`. If combined, `vpcCidr` takes precedence. + +--- + +##### `karpenterVersion`Optional + +```typescript +public readonly karpenterVersion: KarpenterVersion; +``` + +- *Type:* aws-dsf.processing.KarpenterVersion +- *Default:* The [default Karpenter version]{@link DEFAULT_KARPENTER_VERSION } + +The version of karpenter to pass to Helm. + +--- + +##### `kubernetesVersion`Optional + +```typescript +public readonly kubernetesVersion: KubernetesVersion; +``` + +- *Type:* aws-cdk-lib.aws_eks.KubernetesVersion +- *Default:* Kubernetes version {@link DEFAULT_EKS_VERSION } + +Kubernetes version for Amazon EKS cluster that will be created The default is changed as new version version of k8s on EKS becomes available. + +--- + +##### `removalPolicy`Optional + +```typescript +public readonly removalPolicy: RemovalPolicy; +``` + +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). + +The removal policy when deleting the CDK resource. + +Resources like Amazon cloudwatch log or Amazon S3 bucket +If DESTROY is selected, context value + +--- + +##### `vpcCidr`Optional + +```typescript +public readonly vpcCidr: string; +``` + +- *Type:* string +- *Default:* A vpc with the following CIDR 10.0.0.0/16 will be used + +The CIDR of the VPC to use when creating the EKS cluster. + +If provided, a VPC with three public subnets and three private subnets is created. +The size of the private subnets is four time the one of the public subnet. + +--- + ### SparkEmrEksJobApiProps Configuration for the EMR on EKS job. @@ -7333,6 +8191,126 @@ Internal function to convert camel case properties to pascal case as required by +### Utils + +Utilities class used across the different resources. + +#### Initializers + +```typescript +import { utils } from 'aws-dsf' + +new utils.Utils() +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | + +--- + + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| loadYaml | Take a document stored as string and load it as YAML. | +| randomize | Create a random string to be used as a seed for IAM User password. | +| readYamlDocument | Read a YAML file from the path provided and return it. | +| stringSanitizer | Sanitize a string by removing upper case and replacing special characters except underscore. | +| toPascalCase | Convert a string to PascalCase. | + +--- + +##### `loadYaml` + +```typescript +import { utils } from 'aws-dsf' + +utils.Utils.loadYaml(document: string) +``` + +Take a document stored as string and load it as YAML. + +###### `document`Required + +- *Type:* string + +the document stored as string. + +--- + +##### `randomize` + +```typescript +import { utils } from 'aws-dsf' + +utils.Utils.randomize(name: string) +``` + +Create a random string to be used as a seed for IAM User password. + +###### `name`Required + +- *Type:* string + +the string to which to append a random string. + +--- + +##### `readYamlDocument` + +```typescript +import { utils } from 'aws-dsf' + +utils.Utils.readYamlDocument(path: string) +``` + +Read a YAML file from the path provided and return it. + +###### `path`Required + +- *Type:* string + +the path to the file. + +--- + +##### `stringSanitizer` + +```typescript +import { utils } from 'aws-dsf' + +utils.Utils.stringSanitizer(toSanitize: string) +``` + +Sanitize a string by removing upper case and replacing special characters except underscore. + +###### `toSanitize`Required + +- *Type:* string + +the string to sanitize. + +--- + +##### `toPascalCase` + +```typescript +import { utils } from 'aws-dsf' + +utils.Utils.toPascalCase(text: string) +``` + +Convert a string to PascalCase. + +###### `text`Required + +- *Type:* string + +--- + + + ## Enums @@ -7483,6 +8461,23 @@ Enum defining the EMR version as defined [here](https://docs.aws.amazon.com/emr/ --- +### KarpenterVersion + +Enum defining the Karpenter versions as defined [here](https://github.com/aws/karpenter/releases). + +#### Members + +| **Name** | **Description** | +| --- | --- | +| V0_32_1 | *No description.* | + +--- + +##### `V0_32_1` + +--- + + ### SparkImage The list of supported Spark images to use in the SparkCICDPipeline. diff --git a/framework/package.json b/framework/package.json index ce8ad6e81..c298c541b 100644 --- a/framework/package.json +++ b/framework/package.json @@ -11,6 +11,7 @@ "bump": "npx projen bump", "compat": "npx projen compat", "compile": "npx projen compile", + "copy-resources": "npx projen copy-resources", "default": "npx projen default", "docgen": "npx projen docgen", "eslint": "npx projen eslint", @@ -27,7 +28,6 @@ "test:watch": "npx projen test:watch", "unbump": "npx projen unbump", "upgrade": "npx projen upgrade", - "validate-examples": "npx projen validate-examples", "watch": "npx projen watch", "projen": "npx projen" }, @@ -72,11 +72,13 @@ }, "dependencies": { "@aws-cdk/lambda-layer-kubectl-v27": "^2.0.0", + "@types/js-yaml": "^4.0.9", "js-yaml": "^4.1.0", "semver": "^7.5.4", "simple-base": "^1.0.0" }, "bundledDependencies": [ + "@types/js-yaml", "js-yaml", "semver", "simple-base" diff --git a/framework/src/processing/README.md b/framework/src/processing/README.md index cf69678be..0a11717ee 100644 --- a/framework/src/processing/README.md +++ b/framework/src/processing/README.md @@ -36,6 +36,29 @@ The code snippet below shows a usage example of the `SparkEmrServerlessRuntime` [example usage](examples/spark-emr-runtime-serverless-default.lit.ts) +[//]: # (processing.spark-emr-runtime-containers) +# Spark EMR Containers Runtime + +A construct to deploy an EKS cluster and enable it for EMR on EKS use. + +## Overview + +The constructs creates an EKS cluster, install the necessary controllers and enable it the be used by EMR on EKS service as described in this [documentation](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/setting-up-cluster-access.html). The following are the details of the components deployed. + + * An EKS cluster (VPC configuration can be customized) + * A tooling nodegroup to run tools to run controllers + * Kubernetes controlers: EBS CSI Driver, Karpenter, ALB Ingress Controller, cert-manager + * Optionally Default Kaprenter NodePools and EC2NodeClass as listed [here](https://github.com/awslabs/data-solutions-framework-on-aws/tree/main/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config). + +The construct will upload on S3 the Pod templates required to run EMR jobs on the default Kaprenter NodePools and EC2NodeClass. It will also parse and store the configuration of EMR on EKS jobs for each default nodegroup in object parameters. + + +## Usage + +The code snippet below shows a usage example of the `SparkEmrContainersRuntime` construct. + +[example usage](examples/spark-emr-runtime-containers-default.lit.ts) + [//]: # (processing.spark-job) # Spark EMR Serverless job diff --git a/framework/src/processing/examples/spark-emr-runtime-containers-default.lit.ts b/framework/src/processing/examples/spark-emr-runtime-containers-default.lit.ts new file mode 100644 index 000000000..bb3c56b47 --- /dev/null +++ b/framework/src/processing/examples/spark-emr-runtime-containers-default.lit.ts @@ -0,0 +1,57 @@ +import * as cdk from 'aws-cdk-lib'; +import { ManagedPolicy, PolicyDocument, PolicyStatement, Role } from 'aws-cdk-lib/aws-iam'; +import { Construct } from 'constructs'; +import { SparkEmrContainersRuntime } from '../lib'; +import { KubectlV27Layer } from '@aws-cdk/lambda-layer-kubectl-v27'; + +/// !show +class ExampleSparkEmrContainersStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + //Layer must be changed according to the Kubernetes version used + const kubectlLayer = new KubectlV27Layer(this, 'kubectlLayer'); + + // creation of the construct(s) under test + const emrEksCluster = SparkEmrContainersRuntime.getOrCreate(this, { + eksAdminRole: Role.fromRoleArn(this, 'EksAdminRole' , 'arn:aws:iam::12345678912:role/role-name-with-path'), + publicAccessCIDRs: ['10.0.0.0/32'], + createEmrOnEksServiceLinkedRole: true, + kubectlLambdaLayer: kubectlLayer, + }); + + const s3Read = new PolicyDocument({ + statements: [new PolicyStatement({ + actions: [ + 's3:GetObject', + ], + resources: ['arn:aws:s3:::aws-data-analytics-workshop'], + })], + }); + + const s3ReadPolicy = new ManagedPolicy(this, 's3ReadPolicy', { + document: s3Read, + }); + + const virtualCluster = emrEksCluster.addEmrVirtualCluster(this, { + name: 'e2e', + createNamespace: true, + eksNamespace: 'e2ens', + }); + + const execRole = emrEksCluster.createExecutionRole(this, 'ExecRole', s3ReadPolicy, 'e2ens', 's3ReadExecRole'); + + new cdk.CfnOutput(this, 'virtualClusterArn', { + value: virtualCluster.attrArn, + }); + + new cdk.CfnOutput(this, 'execRoleArn', { + value: execRole.roleArn, + }); + + } +} +/// !hide + +const app = new cdk.App(); +new ExampleSparkEmrContainersStack(app, 'ExampleSparkEmrServerlessStack'); \ No newline at end of file diff --git a/framework/src/processing/index.ts b/framework/src/processing/index.ts index 1dabf9e52..40db2a6af 100644 --- a/framework/src/processing/index.ts +++ b/framework/src/processing/index.ts @@ -1,5 +1,4 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT-0 - export * from './lib'; \ No newline at end of file diff --git a/framework/src/processing/lib/index.ts b/framework/src/processing/lib/index.ts index f5b79ecc3..ba5f3b34b 100644 --- a/framework/src/processing/lib/index.ts +++ b/framework/src/processing/lib/index.ts @@ -4,4 +4,5 @@ export * from './emr-releases'; export * from './cicd-pipeline'; export * from './spark-job'; +export * from './karpenter-releases'; export * from './spark-runtime'; diff --git a/framework/src/processing/lib/karpenter-releases.ts b/framework/src/processing/lib/karpenter-releases.ts new file mode 100644 index 000000000..b5bae2724 --- /dev/null +++ b/framework/src/processing/lib/karpenter-releases.ts @@ -0,0 +1,11 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +/** + * Enum defining the Karpenter versions as defined [here](https://github.com/aws/karpenter/releases) + */ +export enum KarpenterVersion { + V0_32_1 = 'v0.32.1', +} + +export const DEFAULT_KARPENTER_VERSION: KarpenterVersion = KarpenterVersion.V0_32_1; diff --git a/framework/src/processing/lib/spark-job/pyspark-application-package.ts b/framework/src/processing/lib/spark-job/pyspark-application-package.ts index cb4661880..dbd602d71 100644 --- a/framework/src/processing/lib/spark-job/pyspark-application-package.ts +++ b/framework/src/processing/lib/spark-job/pyspark-application-package.ts @@ -188,7 +188,7 @@ export class PySparkApplicationPackage extends TrackedConstruct { const emrAppAsset = new Asset(this, 'EmrAppAsset', { path: entrypointDirectory, bundling: { - image: DockerImage.fromRegistry('public.ecr.aws/docker/library/alpine:latest'), + image: DockerImage.fromRegistry('public.ecr.aws/amazonlinux/amazonlinux:2023-minimal'), outputType: BundlingOutput.NOT_ARCHIVED, command: [ 'sh', diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/eks-cluster-helpers.ts b/framework/src/processing/lib/spark-runtime/emr-containers/eks-cluster-helpers.ts new file mode 100644 index 000000000..f1c183c55 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/eks-cluster-helpers.ts @@ -0,0 +1,210 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +import { Duration } from 'aws-cdk-lib'; +import { CfnLaunchTemplate, InstanceType } from 'aws-cdk-lib/aws-ec2'; +import { Cluster, KubernetesManifest, CfnAddon, NodegroupOptions, NodegroupAmiType, KubernetesVersion, ICluster } from 'aws-cdk-lib/aws-eks'; +import { FederatedPrincipal, IRole, ManagedPolicy, Policy, PolicyDocument, Role } from 'aws-cdk-lib/aws-iam'; +import { Construct } from 'constructs'; +import { CERTMANAGER_HELM_CHART_VERSION, EBS_CSI_DRIVER_ADDON_VERSION } from './eks-controllers-version'; +import * as IamPolicyEbsCsiDriver from './resources/k8s/controllers-iam-policies/iam-policy-ebs-csi-driver.json'; +import { Utils } from '../../../../utils'; + + +/** + * @internal + * Configure the EBS CSI driver on an Amazon EKS cluster + * @param {Construct} scope the CDK scope to create resources in + * @param {ICluster} cluster the EKS cluster to install the CSI driver in + * @param {KubernetesVersion} eksClusterK8sVersion the Kubernetes version of the EKS cluster + * @return {IRole} the IAM role used by the CSI driver + */ +export function ebsCsiDriverSetup(scope: Construct, cluster: ICluster, eksClusterK8sVersion: KubernetesVersion): IRole { + + const ebsCsiDriverIrsa = cluster.addServiceAccount('EbsCsiDriverSa', { + name: 'ebs-csi-controller-sa', + namespace: 'kube-system', + }); + + const ebsCsiDriverPolicyDocument = PolicyDocument.fromJson(IamPolicyEbsCsiDriver); + + const ebsCsiDriverPolicy = new Policy( + scope, + 'EbsCsiDriverPolicy', + { document: ebsCsiDriverPolicyDocument }, + ); + + ebsCsiDriverPolicy.attachToRole(ebsCsiDriverIrsa.role); + + const ebsCSIDriver = new CfnAddon(scope, 'EbsCsiDriver', { + addonName: 'aws-ebs-csi-driver', + clusterName: cluster.clusterName, + serviceAccountRoleArn: ebsCsiDriverIrsa.role.roleArn, + addonVersion: EBS_CSI_DRIVER_ADDON_VERSION.get(eksClusterK8sVersion), + resolveConflicts: 'OVERWRITE', + }); + + ebsCSIDriver.node.addDependency(ebsCsiDriverIrsa); + + // Deploy the Helm Chart for the Certificate Manager. Required for EMR Studio ALB. + cluster.addHelmChart('CertManager', { + createNamespace: true, + namespace: 'cert-manager', + chart: 'cert-manager', + repository: 'https://charts.jetstack.io', + version: CERTMANAGER_HELM_CHART_VERSION.get(eksClusterK8sVersion), + timeout: Duration.minutes(14), + values: { + startupapicheck: { + timeout: '5m', + }, + installCRDs: true, + }, + }); + + return ebsCsiDriverIrsa.role; +} + +/** + * @internal + * Configure the IAM role used by the aws-node pod following AWS best practice not to use the EC2 instance role + * @param {Construct} scope the CDK scope to create resources in + * @param {ICluster} cluster the EKS cluster to configure the aws-node pod in + * @return {IRole} the IAM role used by the aws-node pod + */ + +export function awsNodeRoleSetup(scope: Construct, cluster: ICluster): IRole { + + const awsNodeRole: Role = new Role(scope, 'AwsNodeRole', { + assumedBy: new FederatedPrincipal( + cluster.openIdConnectProvider.openIdConnectProviderArn, + { ...[] }, + 'sts:AssumeRoleWithWebIdentity', + ), + description: `awsNodeRole-${cluster.clusterName}`, + managedPolicies: [ManagedPolicy.fromAwsManagedPolicyName('AmazonEKS_CNI_Policy')], + }); + + // update the aws-node service account with IAM role created for it + new KubernetesManifest(scope, 'AwsNodeSaUpdateManifest', { + cluster: cluster, + manifest: [ + { + apiVersion: 'v1', + kind: 'ServiceAccount', + metadata: { + name: 'aws-node', + namespace: 'kube-system', + annotations: { + 'eks.amazonaws.com/role-arn': awsNodeRole.roleArn, + }, + }, + }, + ], + overwrite: true, + }); + + return awsNodeRole; +} + +/** + * @internal + * Method to setup a managed nodegroup to bootstrap all cluster vital componenets like + * core dns, karpenter, ebs csi driver. + * @param {Construct} scope the CDK scope to create the nodegroup in + * @param {Cluster} cluster the EKS cluster to create the nodegroup in + * @param {IRole} nodeRole the IAM role to use for the nodegroup + */ +export function toolingManagedNodegroupSetup (scope: Construct, cluster: Cluster, nodeRole: IRole) { + + const toolingLaunchTemplate: CfnLaunchTemplate = new CfnLaunchTemplate(scope, 'toolinglaunchtemplate', { + launchTemplateName: 'ToolingNodegroup', + + launchTemplateData: { + + metadataOptions: { + httpEndpoint: 'enabled', + httpProtocolIpv6: 'disabled', + httpPutResponseHopLimit: 2, + httpTokens: 'required', + }, + }, + }); + + let toolingManagedNodegroupOptions: NodegroupOptions = { + nodegroupName: 'tooling', + instanceTypes: [new InstanceType('t3.medium')], + amiType: NodegroupAmiType.BOTTLEROCKET_X86_64, + minSize: 2, + maxSize: 2, + labels: { role: 'tooling' }, + launchTemplateSpec: { + id: toolingLaunchTemplate.ref, + version: toolingLaunchTemplate.attrLatestVersionNumber, + }, + nodeRole: nodeRole, + }; + + cluster.addNodegroupCapacity('toolingMNG', toolingManagedNodegroupOptions); +} + +/** + * @internal + * Create a namespace with a predefined baseline + * * Create namespace + * * Define a Network Policy + * @param {ICluster} cluster the EKS cluster to create the namespace in + * @param {string} namespace the namespace to create + * @return {KubernetesManifest} the Kubernetes manifest for the namespace + */ +export function createNamespace (cluster: ICluster, namespace: string): KubernetesManifest { + + const regex = /^[a-z0-9]([-a-z0-9]*[a-z0-9])?$/; + + const reg = RegExp(regex); + + if (!reg.exec(namespace) || namespace.length > 63) { + throw new Error(`Namespace provided violates the constraints of Namespace naming ${namespace}`); + } + + //Create namespace with pod security admission to with pod security standard to baseline + //To learn more look at https://kubernetes.io/docs/concepts/security/pod-security-standards/ + let ns = cluster.addManifest(`${namespace}-Namespace`, { + apiVersion: 'v1', + kind: 'Namespace', + metadata: { + name: namespace, + labels: { + 'pod-security.kubernetes.io/enforce': 'baseline', + 'pod-security.kubernetes.io/enforce-version': 'v1.28', + }, + }, + + }); + + //Create network policy for namespace + let manifestNetworkPolicy = Utils.readYamlDocument(`${__dirname}/resources/k8s/network-policy-pod2pod-internet.yml`); + + manifestNetworkPolicy = manifestNetworkPolicy.replace(/(\{{NAMESPACE}})/g, namespace); + + let manifestNetworkPolicyManifestYAML: any = manifestNetworkPolicy.split('---').map((e: any) => Utils.loadYaml(e)); + + const manifestApplyNetworkPolicy = cluster.addManifest(`${namespace}-network-policy`, ...manifestNetworkPolicyManifestYAML); + + manifestApplyNetworkPolicy.node.addDependency(ns); + + + //Create resource quota and limit range for namespace + let manifestResourceManagement = Utils.readYamlDocument(`${__dirname}/resources/k8s/resource-management.yaml`); + + manifestResourceManagement = manifestResourceManagement.replace(/(\{{NAMESPACE}})/g, namespace); + + let manifestResourceManagementYAML: any = manifestResourceManagement.split('---').map((e: any) => Utils.loadYaml(e)); + + const manifestApplResourceManagement = cluster.addManifest(`${namespace}-resource-management`, ...manifestResourceManagementYAML); + + manifestApplResourceManagement.node.addDependency(ns); + + return ns; +} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/eks-controllers-version.ts b/framework/src/processing/lib/spark-runtime/emr-containers/eks-controllers-version.ts new file mode 100644 index 000000000..abab6c36b --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/eks-controllers-version.ts @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +import { KubernetesVersion } from 'aws-cdk-lib/aws-eks'; + +/** + * @internal + * The version mapping can be taken from here + * aws eks describe-addon-versions --addon-name aws-ebs-csi-driver + */ +export const EBS_CSI_DRIVER_ADDON_VERSION: Map = new Map([ + [KubernetesVersion.V1_24, 'v1.24.1-eksbuild.1'], + [KubernetesVersion.V1_25, 'v1.24.1-eksbuild.1'], + [KubernetesVersion.V1_26, 'v1.24.1-eksbuild.1'], + [KubernetesVersion.V1_27, 'v1.24.1-eksbuild.1'], +]); + +/** + * @internal + * The version mapping can be taken from here + * https://cert-manager.io/docs/releases/ + */ +export const CERTMANAGER_HELM_CHART_VERSION: Map = new Map([ + [KubernetesVersion.V1_24, '1.13.2'], + [KubernetesVersion.V1_25, '1.13.2'], + [KubernetesVersion.V1_26, '1.13.2'], + [KubernetesVersion.V1_27, '1.13.2'], +]); \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/eks-karpenter-helpers.ts b/framework/src/processing/lib/spark-runtime/emr-containers/eks-karpenter-helpers.ts new file mode 100644 index 000000000..81ccd8d9e --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/eks-karpenter-helpers.ts @@ -0,0 +1,411 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +import { Duration, RemovalPolicy, Stack, Tags } from 'aws-cdk-lib'; +import { SubnetType, ISubnet, SecurityGroup, Port, ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; +import { HelmChart, ICluster } from 'aws-cdk-lib/aws-eks'; +import { IRule, Rule } from 'aws-cdk-lib/aws-events'; +import { SqsQueue } from 'aws-cdk-lib/aws-events-targets'; +import { CfnInstanceProfile, IRole, PolicyStatement, Effect, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; +import { IQueue, Queue } from 'aws-cdk-lib/aws-sqs'; +import { Construct } from 'constructs'; +import { SparkEmrContainersRuntime } from './spark-emr-containers-runtime'; +import { Context, Utils } from '../../../../utils'; +import { KarpenterVersion } from '../../karpenter-releases'; + +/** + * @internal + * Method to add the default Karpenter provisioners for Spark workloads + * @param {Cluster} cluster the EMR on EKS cluster to configure the Karpenter provisioners in + * @param {KarpenterVersion} karpenterVersion the Karpenter version to use for the provisioners + * @param {IRole} nodeRole the IAM role to use for the provisioners + */ +export function setDefaultKarpenterProvisioners(cluster: SparkEmrContainersRuntime, karpenterVersion: KarpenterVersion, nodeRole: IRole) { + + const subnets = cluster.eksCluster.vpc.selectSubnets({ + onePerAz: true, + subnetType: SubnetType.PRIVATE_WITH_EGRESS, + }).subnets; + + subnets.forEach( (subnet, index) => { + let criticalManifestYAML = karpenterManifestSetup(cluster.eksCluster, `${__dirname}/resources/k8s/karpenter-provisioner-config/${karpenterVersion}/critical-provisioner.yml`, subnet, nodeRole); + cluster.addKarpenterProvisioner(`karpenterCriticalManifest-${index}`, criticalManifestYAML); + + let sharedDriverManifestYAML = karpenterManifestSetup(cluster.eksCluster, `${__dirname}/resources/k8s/karpenter-provisioner-config/${karpenterVersion}/shared-driver-provisioner.yml`, subnet, nodeRole); + cluster.addKarpenterProvisioner(`karpenterSharedDriverManifest-${index}`, sharedDriverManifestYAML); + + let sharedExecutorManifestYAML = karpenterManifestSetup(cluster.eksCluster, `${__dirname}/resources/k8s/karpenter-provisioner-config/${karpenterVersion}/shared-executor-provisioner.yml`, subnet, nodeRole); + cluster.addKarpenterProvisioner(`karpenterSharedExecutorManifest-${index}`, sharedExecutorManifestYAML); + + let notebookDriverManifestYAML = karpenterManifestSetup(cluster.eksCluster, `${__dirname}/resources/k8s/karpenter-provisioner-config/${karpenterVersion}/notebook-driver-provisioner.yml`, subnet, nodeRole); + cluster.addKarpenterProvisioner(`karpenterNotebookDriverManifest-${index}`, notebookDriverManifestYAML); + + let notebookExecutorManifestYAML = karpenterManifestSetup(cluster.eksCluster, `${__dirname}/resources/k8s/karpenter-provisioner-config/${karpenterVersion}/notebook-executor-provisioner.yml`, subnet, nodeRole); + cluster.addKarpenterProvisioner(`karpenterNotebookExecutorManifest-${index}`, notebookExecutorManifestYAML); + }); +} + +/** + * @internal + * Method to generate the Karpenter manifests from templates and targeted to the specific EKS cluster + * @param {ICluster} cluster the name of the EKS cluster to target the manifests to + * @param {string} path the path to the manifest template + * @param {ISubnet} subnet the subnet to target the manifests to + * @param {IRole} nodeRole the IAM role to use for the manifests + * @return {any} the Kubernetes manifest for Karpenter provisioned + */ +export function karpenterManifestSetup(cluster: ICluster, path: string, subnet: ISubnet, nodeRole: IRole): any { + + let manifest = Utils.readYamlDocument(path); + + manifest = manifest.replace('{{subnet-id}}', subnet.subnetId); + manifest = manifest.replace( /(\{{az}})/g, subnet.availabilityZone); + manifest = manifest.replace('{{cluster-name}}', cluster.clusterName); + manifest = manifest.replace(/(\{{ROLENAME}})/g, nodeRole.roleName); + + let manfifestYAML: any = manifest.split('---').map((e: any) => Utils.loadYaml(e)); + + return manfifestYAML; +} + +/** + * @internal + * Install all the required configurations of Karpenter SQS and Event rules to handle spot and unhealthy instance termination + * Create a security group to be used by nodes created with karpenter + * Tags the subnets and VPC to be used by karpenter + * create a tooling provisioner that will deploy in each of the AZs, one per AZ + * @param {Cluster} cluster the EKS cluster to configure the Karpenter provisioners in + * @param {string} clusterName the name of the EKS cluster to target the manifests to + * @param {CfnInstanceProfile} instanceProfile the IAM instance profile to use for the Karpenter nodes + * @param {IRole} nodeRole the IAM role to use for the Karpenter nodes + * @param {KarpenterVersion} karpenterVersion the Karpenter version to use for the provisioners + * @return {[HelmChart, IRole, IQueue, Array]} the Helm chart to install, the IAM Role for service account, the SQS queue and the EventBridge rules for Karpenter + */ +export function karpenterSetup(cluster: ICluster, + clusterName: string, + scope: Construct, + instanceProfile: CfnInstanceProfile, + nodeRole: IRole, + karpenterRemovalPolicy: RemovalPolicy, + karpenterVersion?: KarpenterVersion, +): [HelmChart, IRole, IQueue, ISecurityGroup, Array ] { + + const removalPolicy = Context.revertRemovalPolicy(scope, karpenterRemovalPolicy); + + const karpenterInterruptionQueue: Queue = new Queue(scope, 'KarpenterInterruptionQueue', { + retentionPeriod: Duration.seconds(300), + enforceSSL: true, + removalPolicy, + }); + + karpenterInterruptionQueue.addToResourcePolicy( + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ['sqs:SendMessage'], + principals: [new ServicePrincipal('sqs.amazonaws.com'), new ServicePrincipal('events.amazonaws.com')], + }), + ); + + const scheduledChangeRule = new Rule(scope, 'ScheduledChangeRule', { + eventPattern: { + source: ['aws.heatlh'], + detail: ['AWS Health Event'], + }, + targets: [new SqsQueue(karpenterInterruptionQueue)], + }); + + const stateChangeRule = new Rule(scope, 'InstanceStateChangeRule', { + eventPattern: { + source: ['aws.ec2'], + detail: ['EC2 Instance State-change Notification'], + }, + targets: [new SqsQueue(karpenterInterruptionQueue)], + }); + + const karpenterControllerPolicyStatementSSM: PolicyStatement = new PolicyStatement({ + effect: Effect.ALLOW, + actions: ['ssm:GetParameter', 'pricing:GetProducts'], + resources: ['*'], + }); + + const karpenterControllerPolicyStatementEC2: PolicyStatement = new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'ec2:DescribeAvailabilityZones', + 'ec2:DescribeImages', + 'ec2:DescribeInstances', + 'ec2:DescribeInstanceTypeOfferings', + 'ec2:DescribeInstanceTypes', + 'ec2:DescribeLaunchTemplates', + 'ec2:DescribeSecurityGroups', + 'ec2:DescribeSpotPriceHistory', + 'ec2:DescribeSubnets', + ], + resources: ['*'], + conditions: { + StringEquals: { + 'aws:RequestedRegion': Stack.of(scope).region, + }, + }, + }); + + const allowScopedEC2InstanceActions: PolicyStatement = new PolicyStatement({ + effect: Effect.ALLOW, + resources: [ + `arn:aws:ec2:${Stack.of(scope).region}::image/*`, + `arn:aws:ec2:${Stack.of(scope).region}::snapshot/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:spot-instances-request/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:security-group/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:subnet/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`, + ], + actions: ['ec2:RunInstances', 'ec2:CreateFleet'], + }); + + const allowScopedEC2LaunchTemplateActions: PolicyStatement = new PolicyStatement({ + effect: Effect.ALLOW, + resources: [`arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`], + actions: ['ec2:CreateLaunchTemplate'], + conditions: { + StringEquals: { + [`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned', + }, + StringLike: { + 'aws:RequestTag/karpenter.sh/provisioner-name': '*', + }, + }, + }); + + const allowScopedEC2InstanceActionsWithTags: PolicyStatement = new PolicyStatement({ + effect: Effect.ALLOW, + resources: [ + `arn:aws:ec2:${Stack.of(scope).region}:*:fleet/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:volume/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:network-interface/*`, + ], + actions: ['ec2:RunInstances', 'ec2:CreateFleet'], + conditions: { + StringEquals: { + [`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned', + }, + StringLike: { + 'aws:RequestTag/karpenter.sh/provisioner-name': '*', + }, + }, + }); + + const allowScopedResourceCreationTagging: PolicyStatement = new PolicyStatement({ + sid: 'AllowScopedResourceCreationTagging', + effect: Effect.ALLOW, + resources: [ + `arn:aws:ec2:${Stack.of(scope).region}:*:fleet/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:volume/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:network-interface/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`, + ], + actions: ['ec2:CreateTags'], + conditions: { + StringEquals: { + [`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned', + 'ec2:CreateAction': ['RunInstances', 'CreateFleet', 'CreateLaunchTemplate'], + }, + StringLike: { + 'aws:RequestTag/karpenter.sh/provisioner-name': '*', + }, + }, + }); + + const allowMachineMigrationTagging: PolicyStatement = new PolicyStatement({ + sid: 'AllowMachineMigrationTagging', + effect: Effect.ALLOW, + resources: [`arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`], + actions: ['ec2:CreateTags'], + conditions: { + 'StringEquals': { + [`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned', + 'aws:RequestTag/karpenter.sh/managed-by': `${clusterName}`, + }, + 'StringLike': { + 'aws:RequestTag/karpenter.sh/provisioner-name': '*', + }, + 'ForAllValues:StringEquals': { + 'aws:TagKeys': ['karpenter.sh/provisioner-name', 'karpenter.sh/managed-by'], + }, + }, + }); + + const allowScopedDeletion: PolicyStatement = new PolicyStatement({ + sid: 'AllowScopedDeletion', + effect: Effect.ALLOW, + resources: [ + `arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`, + ], + actions: ['ec2:TerminateInstances', 'ec2:DeleteLaunchTemplate'], + conditions: { + StringEquals: { + [`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned', + }, + StringLike: { + 'aws:ResourceTag/karpenter.sh/provisioner-name': '*', + }, + }, + }); + + const karpenterControllerPolicyStatementIAM: PolicyStatement = new PolicyStatement({ + effect: Effect.ALLOW, + actions: ['iam:PassRole'], + resources: [`${nodeRole.roleArn}`], + conditions: { + StringEquals: { + 'iam:PassedToService': 'ec2.amazonaws.com', + }, + }, + }); + + const allowInterruptionQueueActions: PolicyStatement = new PolicyStatement({ + sid: 'AllowInterruptionQueueActions', + effect: Effect.ALLOW, + resources: [karpenterInterruptionQueue.queueArn], + actions: ['sqs:DeleteMessage', 'sqs:GetQueueAttributes', 'sqs:GetQueueUrl', 'sqs:ReceiveMessage'], + }); + + const allowAPIServerEndpointDiscovery : PolicyStatement = new PolicyStatement({ + sid: 'AllowAPIServerEndpointDiscovery', + effect: Effect.ALLOW, + resources: [`arn:aws:eks:${Stack.of(scope).region}:${Stack.of(scope).account}:cluster/${clusterName}`], + actions: ['eks:DescribeCluster'], + }); + + const allowInstanceProfileReadActions: PolicyStatement = new PolicyStatement({ + sid: 'AllowInstanceProfileReadActions', + effect: Effect.ALLOW, + resources: ['*'], + actions: ['iam:GetInstanceProfile'], + }); + + + const karpenterNS = cluster.addManifest('karpenterNS', { + apiVersion: 'v1', + kind: 'Namespace', + metadata: { name: 'karpenter' }, + }); + + const karpenterAccount = cluster.addServiceAccount('karpenterServiceAccount', { + name: 'karpenter', + namespace: 'karpenter', + }); + + karpenterAccount.node.addDependency(karpenterNS); + + karpenterAccount.addToPrincipalPolicy(karpenterControllerPolicyStatementSSM); + karpenterAccount.addToPrincipalPolicy(karpenterControllerPolicyStatementEC2); + karpenterAccount.addToPrincipalPolicy(karpenterControllerPolicyStatementIAM); + karpenterAccount.addToPrincipalPolicy(allowScopedEC2InstanceActions); + karpenterAccount.addToPrincipalPolicy(allowScopedEC2InstanceActionsWithTags); + karpenterAccount.addToPrincipalPolicy(allowScopedEC2LaunchTemplateActions); + karpenterAccount.addToPrincipalPolicy(allowMachineMigrationTagging); + karpenterAccount.addToPrincipalPolicy(allowScopedResourceCreationTagging); + karpenterAccount.addToPrincipalPolicy(allowScopedDeletion); + karpenterAccount.addToPrincipalPolicy(allowInterruptionQueueActions); + karpenterAccount.addToPrincipalPolicy(allowAPIServerEndpointDiscovery); + karpenterAccount.addToPrincipalPolicy(allowInstanceProfileReadActions); + + //Deploy Karpenter Chart + const karpenterChart = cluster.addHelmChart('KarpenterHelmChart', { + chart: 'karpenter', + release: 'karpenter', + repository: 'oci://public.ecr.aws/karpenter/karpenter', + namespace: 'karpenter', + version: karpenterVersion, + timeout: Duration.minutes(14), + wait: true, + values: { + serviceAccount: { + name: 'karpenter', + create: false, + annotations: { + 'eks.amazonaws.com/role-arn': karpenterAccount.role.roleArn, + }, + }, + settings: { + aws: { + defaultInstanceProfile: instanceProfile.instanceProfileName, + clusterName: clusterName, + clusterEndpoint: cluster.clusterEndpoint, + interruptionQueueName: karpenterInterruptionQueue.queueName, + }, + }, + + }, + }); + + karpenterChart.node.addDependency(karpenterAccount); + + const karpenterInstancesSg = new SecurityGroup(scope, 'KarpenterSg', { + vpc: cluster.vpc, + allowAllOutbound: true, + description: 'security group for a karpenter instances', + disableInlineRules: true, + }); + + Tags.of(karpenterInstancesSg).add('karpenter.sh/discovery', `${clusterName}`); + + cluster.clusterSecurityGroup.addIngressRule( + karpenterInstancesSg, + Port.allTraffic(), + ); + + karpenterInstancesSg.addIngressRule( + karpenterInstancesSg, + Port.allTraffic(), + ); + + karpenterInstancesSg.addIngressRule( + cluster.clusterSecurityGroup, + Port.allTraffic(), + ); + + Tags.of(cluster.vpc).add( + 'karpenter.sh/discovery', clusterName, + ); + + cluster.vpc.privateSubnets.forEach((subnet) => { + Tags.of(subnet).add('karpenter.sh/discovery', clusterName); + }); + + cluster.vpc.publicSubnets.forEach((subnet) => + Tags.of(subnet).add('karpenter.sh/discovery', clusterName), + ); + + const privateSubnets = cluster.vpc.selectSubnets({ + onePerAz: true, + subnetType: SubnetType.PRIVATE_WITH_EGRESS, + }).subnets; + + let manifest = Utils.readYamlDocument(`${__dirname}/resources/k8s/karpenter-provisioner-config/${karpenterVersion}/tooling-provisioner.yml`); + + manifest = manifest.replace(/(\{{cluster-name}})/g, clusterName); + + manifest = manifest.replace(/(\{{ROLENAME}})/g, nodeRole.roleName); + + const subnetIdHolder: string[] = ['subnet-1', 'subnet-2']; + + privateSubnets.forEach((subnet, index) => { + + let subnetHolder = `{{${subnetIdHolder[index]}}}`; + let re = new RegExp(subnetHolder, 'g'); + manifest = manifest.replace(re, subnet.subnetId); + + }); + + let manfifestYAML: any = manifest.split('---').map((e: any) => Utils.loadYaml(e)); + + const manifestApply = cluster.addManifest('provisioner-tooling', ...manfifestYAML); + + manifestApply.node.addDependency(karpenterChart); + + return [karpenterChart, karpenterAccount.role, karpenterInterruptionQueue, karpenterInstancesSg, [scheduledChangeRule, stateChangeRule]] ; +} \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/emr-virtual-cluster-props.ts b/framework/src/processing/lib/spark-runtime/emr-containers/emr-virtual-cluster-props.ts new file mode 100644 index 000000000..4374725d4 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/emr-virtual-cluster-props.ts @@ -0,0 +1,23 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +/** + * The properties for the EmrVirtualCluster Construct class. + */ +export interface EmrVirtualClusterProps { + /** + * name of the Amazon Emr virtual cluster to be created + */ + readonly name: string; + /** + * name of the Amazon EKS namespace to be linked to the Amazon EMR virtual cluster + * @default - Use the default namespace + */ + readonly eksNamespace?: string; + + /** + * creates Amazon EKS namespace + * @default - Do not create the namespace + */ + readonly createNamespace?: boolean; +} \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/index.ts b/framework/src/processing/lib/spark-runtime/emr-containers/index.ts new file mode 100644 index 000000000..652cd4041 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/index.ts @@ -0,0 +1,7 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +export * from './spark-emr-containers-runtime'; +export * from './emr-virtual-cluster-props'; +export * from './eks-controllers-version'; +export * from './spark-emr-containers-runtime-props'; diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.5.json b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.5.json new file mode 100644 index 000000000..7944f2a12 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.5.json @@ -0,0 +1,241 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "iam:CreateServiceLinkedRole" + ], + "Resource": "*", + "Condition": { + "StringEquals": { + "iam:AWSServiceName": "elasticloadbalancing.amazonaws.com" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DescribeAccountAttributes", + "ec2:DescribeAddresses", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInternetGateways", + "ec2:DescribeVpcs", + "ec2:DescribeVpcPeeringConnections", + "ec2:DescribeSubnets", + "ec2:DescribeSecurityGroups", + "ec2:DescribeInstances", + "ec2:DescribeNetworkInterfaces", + "ec2:DescribeTags", + "ec2:GetCoipPoolUsage", + "ec2:DescribeCoipPools", + "elasticloadbalancing:DescribeLoadBalancers", + "elasticloadbalancing:DescribeLoadBalancerAttributes", + "elasticloadbalancing:DescribeListeners", + "elasticloadbalancing:DescribeListenerCertificates", + "elasticloadbalancing:DescribeSSLPolicies", + "elasticloadbalancing:DescribeRules", + "elasticloadbalancing:DescribeTargetGroups", + "elasticloadbalancing:DescribeTargetGroupAttributes", + "elasticloadbalancing:DescribeTargetHealth", + "elasticloadbalancing:DescribeTags" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "cognito-idp:DescribeUserPoolClient", + "acm:ListCertificates", + "acm:DescribeCertificate", + "iam:ListServerCertificates", + "iam:GetServerCertificate", + "waf-regional:GetWebACL", + "waf-regional:GetWebACLForResource", + "waf-regional:AssociateWebACL", + "waf-regional:DisassociateWebACL", + "wafv2:GetWebACL", + "wafv2:GetWebACLForResource", + "wafv2:AssociateWebACL", + "wafv2:DisassociateWebACL", + "shield:GetSubscriptionState", + "shield:DescribeProtection", + "shield:CreateProtection", + "shield:DeleteProtection" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupIngress" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateSecurityGroup" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateTags" + ], + "Resource": "arn:aws:ec2:*:*:security-group/*", + "Condition": { + "StringEquals": { + "ec2:CreateAction": "CreateSecurityGroup" + }, + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateTags", + "ec2:DeleteTags" + ], + "Resource": "arn:aws:ec2:*:*:security-group/*", + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "true", + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupIngress", + "ec2:DeleteSecurityGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateLoadBalancer", + "elasticloadbalancing:CreateTargetGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateListener", + "elasticloadbalancing:DeleteListener", + "elasticloadbalancing:CreateRule", + "elasticloadbalancing:DeleteRule" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags", + "elasticloadbalancing:RemoveTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/net/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/app/*/*" + ], + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "true", + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags", + "elasticloadbalancing:RemoveTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:listener/net/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener/app/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener-rule/net/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener-rule/app/*/*/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:ModifyLoadBalancerAttributes", + "elasticloadbalancing:SetIpAddressType", + "elasticloadbalancing:SetSecurityGroups", + "elasticloadbalancing:SetSubnets", + "elasticloadbalancing:DeleteLoadBalancer", + "elasticloadbalancing:ModifyTargetGroup", + "elasticloadbalancing:ModifyTargetGroupAttributes", + "elasticloadbalancing:DeleteTargetGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/net/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/app/*/*" + ], + "Condition": { + "StringEquals": { + "elasticloadbalancing:CreateAction": [ + "CreateTargetGroup", + "CreateLoadBalancer" + ] + }, + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:RegisterTargets", + "elasticloadbalancing:DeregisterTargets" + ], + "Resource": "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:SetWebAcl", + "elasticloadbalancing:ModifyListener", + "elasticloadbalancing:AddListenerCertificates", + "elasticloadbalancing:RemoveListenerCertificates", + "elasticloadbalancing:ModifyRule" + ], + "Resource": "*" + } + ] +} diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.6.json b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.6.json new file mode 100644 index 000000000..7944f2a12 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.6.json @@ -0,0 +1,241 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "iam:CreateServiceLinkedRole" + ], + "Resource": "*", + "Condition": { + "StringEquals": { + "iam:AWSServiceName": "elasticloadbalancing.amazonaws.com" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DescribeAccountAttributes", + "ec2:DescribeAddresses", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInternetGateways", + "ec2:DescribeVpcs", + "ec2:DescribeVpcPeeringConnections", + "ec2:DescribeSubnets", + "ec2:DescribeSecurityGroups", + "ec2:DescribeInstances", + "ec2:DescribeNetworkInterfaces", + "ec2:DescribeTags", + "ec2:GetCoipPoolUsage", + "ec2:DescribeCoipPools", + "elasticloadbalancing:DescribeLoadBalancers", + "elasticloadbalancing:DescribeLoadBalancerAttributes", + "elasticloadbalancing:DescribeListeners", + "elasticloadbalancing:DescribeListenerCertificates", + "elasticloadbalancing:DescribeSSLPolicies", + "elasticloadbalancing:DescribeRules", + "elasticloadbalancing:DescribeTargetGroups", + "elasticloadbalancing:DescribeTargetGroupAttributes", + "elasticloadbalancing:DescribeTargetHealth", + "elasticloadbalancing:DescribeTags" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "cognito-idp:DescribeUserPoolClient", + "acm:ListCertificates", + "acm:DescribeCertificate", + "iam:ListServerCertificates", + "iam:GetServerCertificate", + "waf-regional:GetWebACL", + "waf-regional:GetWebACLForResource", + "waf-regional:AssociateWebACL", + "waf-regional:DisassociateWebACL", + "wafv2:GetWebACL", + "wafv2:GetWebACLForResource", + "wafv2:AssociateWebACL", + "wafv2:DisassociateWebACL", + "shield:GetSubscriptionState", + "shield:DescribeProtection", + "shield:CreateProtection", + "shield:DeleteProtection" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupIngress" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateSecurityGroup" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateTags" + ], + "Resource": "arn:aws:ec2:*:*:security-group/*", + "Condition": { + "StringEquals": { + "ec2:CreateAction": "CreateSecurityGroup" + }, + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateTags", + "ec2:DeleteTags" + ], + "Resource": "arn:aws:ec2:*:*:security-group/*", + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "true", + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupIngress", + "ec2:DeleteSecurityGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateLoadBalancer", + "elasticloadbalancing:CreateTargetGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateListener", + "elasticloadbalancing:DeleteListener", + "elasticloadbalancing:CreateRule", + "elasticloadbalancing:DeleteRule" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags", + "elasticloadbalancing:RemoveTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/net/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/app/*/*" + ], + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "true", + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags", + "elasticloadbalancing:RemoveTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:listener/net/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener/app/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener-rule/net/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener-rule/app/*/*/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:ModifyLoadBalancerAttributes", + "elasticloadbalancing:SetIpAddressType", + "elasticloadbalancing:SetSecurityGroups", + "elasticloadbalancing:SetSubnets", + "elasticloadbalancing:DeleteLoadBalancer", + "elasticloadbalancing:ModifyTargetGroup", + "elasticloadbalancing:ModifyTargetGroupAttributes", + "elasticloadbalancing:DeleteTargetGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/net/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/app/*/*" + ], + "Condition": { + "StringEquals": { + "elasticloadbalancing:CreateAction": [ + "CreateTargetGroup", + "CreateLoadBalancer" + ] + }, + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:RegisterTargets", + "elasticloadbalancing:DeregisterTargets" + ], + "Resource": "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:SetWebAcl", + "elasticloadbalancing:ModifyListener", + "elasticloadbalancing:AddListenerCertificates", + "elasticloadbalancing:RemoveListenerCertificates", + "elasticloadbalancing:ModifyRule" + ], + "Resource": "*" + } + ] +} diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/iam-policy-ebs-csi-driver.json b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/iam-policy-ebs-csi-driver.json new file mode 100644 index 000000000..f70fcd4b6 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/controllers-iam-policies/iam-policy-ebs-csi-driver.json @@ -0,0 +1,122 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:CreateSnapshot", + "ec2:AttachVolume", + "ec2:DetachVolume", + "ec2:ModifyVolume", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInstances", + "ec2:DescribeSnapshots", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DescribeVolumesModifications" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": ["ec2:CreateTags"], + "Resource": [ + "arn:aws:ec2:*:*:volume/*", + "arn:aws:ec2:*:*:snapshot/*" + ], + "Condition": { + "StringEquals": { + "ec2:CreateAction": ["CreateVolume", "CreateSnapshot"] + } + } + }, + { + "Effect": "Allow", + "Action": ["ec2:DeleteTags"], + "Resource": [ + "arn:aws:ec2:*:*:volume/*", + "arn:aws:ec2:*:*:snapshot/*" + ] + }, + { + "Effect": "Allow", + "Action": ["ec2:CreateVolume"], + "Resource": "*", + "Condition": { + "StringLike": { + "aws:RequestTag/ebs.csi.aws.com/cluster": "true" + } + } + }, + { + "Effect": "Allow", + "Action": ["ec2:CreateVolume"], + "Resource": "*", + "Condition": { + "StringLike": { + "aws:RequestTag/CSIVolumeName": "*" + } + } + }, + { + "Effect": "Allow", + "Action": ["ec2:CreateVolume"], + "Resource": "*", + "Condition": { + "StringLike": { + "aws:RequestTag/kubernetes.io/cluster/*": "owned" + } + } + }, + { + "Effect": "Allow", + "Action": ["ec2:DeleteVolume"], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/ebs.csi.aws.com/cluster": "true" + } + } + }, + { + "Effect": "Allow", + "Action": ["ec2:DeleteVolume"], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/CSIVolumeName": "*" + } + } + }, + { + "Effect": "Allow", + "Action": ["ec2:DeleteVolume"], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/kubernetes.io/cluster/*": "owned" + } + } + }, + { + "Effect": "Allow", + "Action": ["ec2:DeleteSnapshot"], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/CSIVolumeSnapshotName": "*" + } + } + }, + { + "Effect": "Allow", + "Action": ["ec2:DeleteSnapshot"], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/ebs.csi.aws.com/cluster": "true" + } + } + } + ] + } \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/critical.json b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/critical.json new file mode 100644 index 000000000..d53da2858 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/critical.json @@ -0,0 +1,32 @@ +{ + "applicationConfiguration": + [ + { + "classification": "spark-defaults", + "properties": { + "spark.hadoop.hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", + "spark.sql.catalogImplementation": "hive", + "spark.dynamicAllocation.enabled":"true", + "spark.dynamicAllocation.minExecutors": "8", + "spark.dynamicAllocation.maxExecutors": "40", + "spark.kubernetes.allocation.batch.size": "8", + "spark.kubernetes.executor.request.cores": "7.7", + "spark.executor.memory": "20800m", + "spark.kubernetes.driver.request.cores": "2.5", + "spark.driver.memory": "6800m", + "spark.dynamicAllocation.executorAllocationRatio": "1", + "spark.dynamicAllocation.shuffleTracking.enabled": "true", + "spark.dynamicAllocation.shuffleTracking.timeout": "300s", + "spark.kubernetes.driver.podTemplateFile": "{{POD_TEMPLATE_LOCATION}}/critical-driver.yaml", + "spark.kubernetes.executor.podTemplateFile": "{{POD_TEMPLATE_LOCATION}}/critical-executor.yaml" + } + } + ], + "monitoringConfiguration": { + "persistentAppUI": "ENABLED", + "cloudWatchMonitoringConfiguration": { + "logGroupName": "/aws/emr-containers/critical", + "logStreamNamePrefix": "default" + } + } +} \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/notebook-pod-template-ready.json b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/notebook-pod-template-ready.json new file mode 100644 index 000000000..3ad6385c7 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/notebook-pod-template-ready.json @@ -0,0 +1,32 @@ +{ + "applicationConfiguration": + [ + { + "classification": "spark-defaults", + "properties": { + "spark.hadoop.hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", + "spark.sql.catalogImplementation": "hive", + "spark.dynamicAllocation.enabled":"true", + "spark.dynamicAllocation.minExecutors": "0", + "spark.dynamicAllocation.maxExecutors": "20", + "spark.kubernetes.allocation.batch.size": "2", + "spark.kubernetes.executor.request.cores": "3.7", + "spark.executor.memory": "10200m", + "spark.kubernetes.driver.request.cores": "1.6", + "spark.driver.memory": "4700m", + "spark.dynamicAllocation.executorAllocationRatio": "1", + "spark.dynamicAllocation.shuffleTracking.enabled": "true", + "spark.dynamicAllocation.shuffleTracking.timeout": "300s", + "spark.kubernetes.driver.podTemplateFile": "{{POD_TEMPLATE_LOCATION}}/notebook-driver.yaml", + "spark.kubernetes.executor.podTemplateFile": "{{POD_TEMPLATE_LOCATION}}/notebook-executor.yaml" + } + } + ], + "monitoringConfiguration": { + "persistentAppUI": "ENABLED", + "cloudWatchMonitoringConfiguration": { + "logGroupName": "/aws/emr-containers/notebook", + "logStreamNamePrefix": "default" + } + } +} \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/shared.json b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/shared.json new file mode 100644 index 000000000..69bc16a5e --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/emr-eks-config/shared.json @@ -0,0 +1,32 @@ +{ + "applicationConfiguration": + [ + { + "classification": "spark-defaults", + "properties": { + "spark.hadoop.hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory", + "spark.sql.catalogImplementation": "hive", + "spark.dynamicAllocation.enabled":"true", + "spark.dynamicAllocation.minExecutors": "8", + "spark.dynamicAllocation.maxExecutors": "40", + "spark.kubernetes.allocation.batch.size": "8", + "spark.kubernetes.executor.request.cores": "3.8", + "spark.executor.memory": "10400m", + "spark.kubernetes.driver.request.cores": "1.7", + "spark.driver.memory": "4900m", + "spark.dynamicAllocation.executorAllocationRatio": "1", + "spark.dynamicAllocation.shuffleTracking.enabled": "true", + "spark.dynamicAllocation.shuffleTracking.timeout": "300s", + "spark.kubernetes.driver.podTemplateFile": "{{POD_TEMPLATE_LOCATION}}/shared-driver.yaml", + "spark.kubernetes.executor.podTemplateFile": "{{POD_TEMPLATE_LOCATION}}/shared-executor.yaml" + } + } + ], + "monitoringConfiguration": { + "persistentAppUI": "ENABLED", + "cloudWatchMonitoringConfiguration": { + "logGroupName": "/aws/emr-containers/shared", + "logStreamNamePrefix": "default" + } + } +} \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/critical-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/critical-provisioner.yml new file mode 100644 index 000000000..e6b700fa0 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/critical-provisioner.yml @@ -0,0 +1,94 @@ +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: critical-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: critical + node-lifecycle: on-demand + + spec: + nodeClassRef: + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass + name: critical-nodes-{{az}} + + taints: + - key: role + value: critical + effect: NoSchedule + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["m6gd"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small, medium, large] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["on-demand"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + + limits: + cpu: "3200" + memory: 12800Gi + + disruption: + + consolidationPolicy: WhenEmpty + + # If omitted, the feature is disabled and nodes will never expire. If set to less time than it requires for a node + # to become ready, the node may expire before any pods successfully start. + expireAfter: 720h # 30 Days = 60 * 60 * 24 * 30 Seconds; + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 10 + +--- +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: critical-nodes-{{az}} +spec: + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + tags: + KarpenerProvisionerName: "critical" + + role: {{ROLENAME}} + + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + amiFamily: Bottlerocket \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/notebook-driver-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/notebook-driver-provisioner.yml new file mode 100644 index 000000000..df63a2ef7 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/notebook-driver-provisioner.yml @@ -0,0 +1,95 @@ +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: notebook-driver-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: notebook + node-lifecycle: on-demand + spark-role: driver + + spec: + nodeClassRef: + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass + name: notebook-driver-nodes-{{az}} + + taints: + - key: role + value: notebook + effect: NoSchedule + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["t3", "t3a"] + - key: "kubernetes.io/arch" + operator: In + values: ["amd64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["on-demand"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "40" + memory: 160Gi + + disruption: + + consolidationPolicy: WhenEmpty + + # If omitted, the feature is disabled and nodes will never expire. If set to less time than it requires for a node + # to become ready, the node may expire before any pods successfully start. + expireAfter: 720h + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 20 + +--- +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: notebook-driver-nodes-{{az}} +spec: + amiFamily: Bottlerocket + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + + tags: + KarpenerProvisionerName: "notebook-driver" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/notebook-executor-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/notebook-executor-provisioner.yml new file mode 100644 index 000000000..d34baff3d --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/notebook-executor-provisioner.yml @@ -0,0 +1,100 @@ +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: notebook-executor-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: notebook + node-lifecycle: spot + spark-role: executor + + spec: + nodeClassRef: + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass + name: notebook-executor-nodes-{{az}} + + taints: + - key: role + value: notebook + effect: NoSchedule + - key: node-lifecycle + value: spot + effect: NoSchedule + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["t3","t3a"] + - key: "kubernetes.io/arch" + operator: In + values: ["amd64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small, medium, large] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["spot"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "800" + memory: 3200Gi + + + disruption: + + consolidationPolicy: WhenEmpty + + # If omitted, the feature is disabled and nodes will never expire. If set to less time than it requires for a node + # to become ready, the node may expire before any pods successfully start. + expireAfter: 720h # 30 Days = 60 * 60 * 24 * 30 Seconds; + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 20 + +--- +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: notebook-executor-nodes-{{az}} +spec: + amiFamily: Bottlerocket + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + + tags: + KarpenerProvisionerName: "notebook-executor" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/shared-driver-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/shared-driver-provisioner.yml new file mode 100644 index 000000000..8894bbd42 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/shared-driver-provisioner.yml @@ -0,0 +1,91 @@ +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: shared-driver-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: shared + node-lifecycle: on-demand + spark-role: driver + + spec: + nodeClassRef: + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass + name: shared-driver-nodes-{{az}} + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["m6g"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small, medium] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["on-demand"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "40" + memory: 160Gi + + disruption: + + consolidationPolicy: WhenEmpty + + # If omitted, the feature is disabled and nodes will never expire. If set to less time than it requires for a node + # to become ready, the node may expire before any pods successfully start. + expireAfter: 720h # 30 Days = 60 * 60 * 24 * 30 Seconds; + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 10 + +--- +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: shared-driver-nodes-{{az}} +spec: + amiFamily: Bottlerocket + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + + tags: + KarpenerProvisionerName: "shared-driver" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/shared-executor-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/shared-executor-provisioner.yml new file mode 100644 index 000000000..7a1b712aa --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/shared-executor-provisioner.yml @@ -0,0 +1,95 @@ +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: shared-executor-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: shared + node-lifecycle: spot + spark-role: executor + + spec: + nodeClassRef: + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass + name: shared-executor-nodes-{{az}} + + taints: + - key: node-lifecycle + value: spot + effect: NoSchedule + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["m6g", "m6gd"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small, medium, large] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["spot"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "3200" + memory: 12800Gi + + disruption: + + consolidationPolicy: WhenEmpty + + # If omitted, the feature is disabled and nodes will never expire. If set to less time than it requires for a node + # to become ready, the node may expire before any pods successfully start. + expireAfter: 720h # 30 Days = 60 * 60 * 24 * 30 Seconds; + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 10 + +--- +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: shared-executor-nodes-{{az}} +spec: + amiFamily: Bottlerocket + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + + tags: + KarpenerProvisionerName: "shared-executor" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/tooling-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/tooling-provisioner.yml new file mode 100644 index 000000000..ef8653b10 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/v0.32.1/tooling-provisioner.yml @@ -0,0 +1,86 @@ +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: tooling-nodes +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: tooling + + spec: + nodeClassRef: + apiVersion: karpenter.k8s.aws/v1beta1 + kind: EC2NodeClass + name: tooling-nodes + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["t3", "t3a"] + - key: "kubernetes.io/arch" + operator: In + values: ["amd64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: In + values: [medium, large, xlarge] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["on-demand"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + + limits: + cpu: "100" + memory: 100Gi + + disruption: + + consolidationPolicy: WhenEmpty + + # If omitted, the feature is disabled and nodes will never expire. If set to less time than it requires for a node + # to become ready, the node may expire before any pods successfully start. + expireAfter: 720h # 30 Days = 60 * 60 * 24 * 30 Seconds; + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 50 + +--- +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: tooling-nodes +spec: + amiFamily: Bottlerocket + subnetSelectorTerms: + - id: {{subnet-1}} + - id: {{subnet-2}} + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + tags: + KarpenerProvisionerName: "tooling" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/network-policy-pod2pod-internet.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/network-policy-pod2pod-internet.yml new file mode 100644 index 000000000..d0018a80c --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/network-policy-pod2pod-internet.yml @@ -0,0 +1,17 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-pod-to-pod-and-internet + namespace: {{NAMESPACE}} +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: {} + egress: + - to: + - ipBlock: + cidr: 0.0.0.0/0 \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/critical-driver.yaml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/critical-driver.yaml new file mode 100644 index 000000000..8434b58ff --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/critical-driver.yaml @@ -0,0 +1,15 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +apiVersion: v1 +kind: Pod + +spec: + tolerations: + - key: role + operator: Equal + value: critical + effect: NoSchedule + nodeSelector: + role: critical + node-lifecycle: on-demand diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/critical-executor.yaml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/critical-executor.yaml new file mode 100644 index 000000000..0c7d40e51 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/critical-executor.yaml @@ -0,0 +1,22 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +apiVersion: v1 +kind: Pod + +spec: + tolerations: + - key: role + operator: Equal + value: critical + effect: NoSchedule + nodeSelector: + role: critical + node-lifecycle: on-demand + initContainers: + - name: volume-permissions + image: busybox + command: ['/bin/sh', '-c', 'chown -R 999 /pv/tmp'] + volumeMounts: + - mountPath: /pv/tmp + name: spark-local-dir-1 \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/notebook-driver.yaml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/notebook-driver.yaml new file mode 100644 index 000000000..e7e9d03bf --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/notebook-driver.yaml @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +apiVersion: v1 +kind: Pod + +spec: + tolerations: + - key: role + operator: Equal + value: notebook + effect: NoSchedule + nodeSelector: + node-lifecycle: on-demand + spark-role: driver + role: notebook \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/notebook-executor.yaml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/notebook-executor.yaml new file mode 100644 index 000000000..dba19d3cc --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/notebook-executor.yaml @@ -0,0 +1,20 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +apiVersion: v1 +kind: Pod + +spec: + tolerations: + - key: node-lifecycle + operator: Equal + value: spot + effect: NoSchedule + - key: role + operator: Equal + value: notebook + effect: NoSchedule + nodeSelector: + node-lifecycle: spot + spark-role: executor + role: notebook \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/shared-driver.yaml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/shared-driver.yaml new file mode 100644 index 000000000..7a5f638b9 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/shared-driver.yaml @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +apiVersion: v1 +kind: Pod + +spec: + tolerations: + - key: role + operator: Equal + value: shared + effect: NoSchedule + nodeSelector: + node-lifecycle: on-demand + spark-role: driver + role: shared \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/shared-executor.yaml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/shared-executor.yaml new file mode 100644 index 000000000..33adcd50a --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/pod-template/shared-executor.yaml @@ -0,0 +1,20 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +apiVersion: v1 +kind: Pod + +spec: + tolerations: + - key: node-lifecycle + operator: Equal + value: spot + effect: NoSchedule + - key: role + operator: Equal + value: shared + effect: NoSchedule + nodeSelector: + node-lifecycle: spot + spark-role: executor + role: shared \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/rbac/emr-containers-rbac.yaml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/rbac/emr-containers-rbac.yaml new file mode 100644 index 000000000..0f60c476c --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/rbac/emr-containers-rbac.yaml @@ -0,0 +1,44 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: emr-containers + namespace: {{NAMESPACE}} +rules: + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get"] + - apiGroups: [""] + resources: ["serviceaccounts", "services", "configmaps", "events", "pods", "pods/log"] + verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "deletecollection", "annotate", "patch", "label"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create", "patch", "delete", "watch"] + - apiGroups: ["apps"] + resources: ["statefulsets", "deployments"] + verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"] + - apiGroups: ["extensions", "networking.k8s.io"] + resources: ["ingresses"] + verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"] + - apiGroups: ["rbac.authorization.k8s.io"] + resources: ["roles", "rolebindings"] + verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "deletecollection", "annotate", "patch", "label"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: emr-containers + namespace: {{NAMESPACE}} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: emr-containers +subjects: +- apiGroup: rbac.authorization.k8s.io + kind: User + name: emr-containers \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/resource-management.yaml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/resource-management.yaml new file mode 100644 index 000000000..f3de30e2f --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/resource-management.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: ResourceQuota +metadata: + name: compute-resources + namespace: {{NAMESPACE}} +spec: + hard: + requests.cpu: "1000" + requests.memory: 4000Gi +--- +apiVersion: v1 +kind: LimitRange +metadata: + name: cpu-min-max + namespace: {{NAMESPACE}} +spec: + limits: + - min: + cpu: "100m" + defaultRequest: + cpu: "100m" + type: Container \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/spark-emr-containers-runtime-props.ts b/framework/src/processing/lib/spark-runtime/emr-containers/spark-emr-containers-runtime-props.ts new file mode 100644 index 000000000..d80427685 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/spark-emr-containers-runtime-props.ts @@ -0,0 +1,106 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + + +import { RemovalPolicy } from 'aws-cdk-lib'; +import { IVpc } from 'aws-cdk-lib/aws-ec2'; +import { + Cluster, + KubernetesVersion, +} from 'aws-cdk-lib/aws-eks'; +import { IRole } from 'aws-cdk-lib/aws-iam'; +import { ILayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { KarpenterVersion } from '../../karpenter-releases'; + +/** + * The properties for the EmrEksCluster Construct class. + */ +export interface SparkEmrContainersRuntimeProps { + /** + * Name of the Amazon EKS cluster to be created + * @default - The [default cluster name]{@link DEFAULT_CLUSTER_NAME} + */ + readonly eksClusterName?: string; + /** + * Amazon IAM Role to be added to Amazon EKS master roles that will give access to kubernetes cluster from AWS console UI. + * An admin role must be passed if `eksCluster` property is not set. + * You will use this role to manage the EKS cluster and grant other access to it. + */ + readonly eksAdminRole?: IRole; + /** + * The EKS cluster to setup EMR on. The cluster needs to be created in the same CDK Stack. + * If the EKS cluster is provided, the cluster AddOns and all the controllers (ALB Ingress controller, Cluster Autoscaler or Karpenter...) need to be configured. + * When providing an EKS cluster, the methods for adding nodegroups can still be used. They implement the best practices for running Spark on EKS. + * @default - An EKS Cluster is created + */ + readonly eksCluster?: Cluster; + /** + * Kubernetes version for Amazon EKS cluster that will be created + * The default is changed as new version version of k8s on EKS becomes available + * @default - Kubernetes version {@link DEFAULT_EKS_VERSION} + */ + readonly kubernetesVersion?: KubernetesVersion; + /** + * If set to true, the Construct will create default EKS nodegroups or node provisioners (based on the autoscaler mechanism used). + * There are three types of nodes: + * * Nodes for critical jobs which use on-demand instances, high speed disks and workload isolation + * * Nodes for shared worklaods which uses spot instances and no isolation to optimize costs + * * Nodes for notebooks which leverage a cost optimized configuration for running EMR managed endpoints and spark drivers/executors. + * @default - true + */ + readonly defaultNodes?: boolean; + /** + * The version of karpenter to pass to Helm + * @default - The [default Karpenter version]{@link DEFAULT_KARPENTER_VERSION} + */ + readonly karpenterVersion?: KarpenterVersion; + /** + * Starting k8s 1.22, CDK no longer bundle the kubectl layer with the code due to breaking npm package size. + * A layer needs to be passed to the Construct. + * + * The cdk [documentation](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_eks.KubernetesVersion.html#static-v1_22) + * contains the libraries that you should add for the right Kubernetes version + */ + readonly kubectlLambdaLayer: ILayerVersion; + + /** + * The CIDR of the VPC to use when creating the EKS cluster. If provided, a VPC with three public subnets and three private subnets is created. + * The size of the private subnets is four time the one of the public subnet. + * @default - A vpc with the following CIDR 10.0.0.0/16 will be used + */ + readonly vpcCidr?: string; + /** + * The VPC to use when creating the EKS cluster. + * VPC should have at least two private and public subnets in different Availability Zones. + * All private subnets should have the following tags: + * * 'for-use-with-amazon-emr-managed-policies'='true' + * * 'kubernetes.io/role/internal-elb'='1' + * All public subnets should have the following tag: + * * 'kubernetes.io/role/elb'='1' + * Cannot be combined with `vpcCidr`. If combined, `vpcCidr` takes precedence. + */ + readonly eksVpc?: IVpc; + /** + * The CIDR blocks that are allowed access to your cluster’s public Kubernetes API server endpoint. + */ + readonly publicAccessCIDRs: string[]; + /** + * The role used for the cluster nodes instance profile. + * @default - A role is created with AmazonEKSWorkerNodePolicy, AmazonEC2ContainerRegistryReadOnly, + * AmazonSSMManagedInstanceCore and AmazonEKS_CNI_Policy AWS managed policies + */ + readonly ec2InstanceRole?: IRole; + /** + * Wether we need to create an EMR on EKS Service Linked Role + * @default - true + */ + readonly createEmrOnEksServiceLinkedRole?: boolean; + + /** + * The removal policy when deleting the CDK resource. + * Resources like Amazon cloudwatch log or Amazon S3 bucket + * If DESTROY is selected, context value + * @default - The resources are not deleted (`RemovalPolicy.RETAIN`). + */ + readonly removalPolicy?: RemovalPolicy; +} \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/spark-emr-containers-runtime.ts b/framework/src/processing/lib/spark-runtime/emr-containers/spark-emr-containers-runtime.ts new file mode 100644 index 000000000..8723644c0 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/spark-emr-containers-runtime.ts @@ -0,0 +1,530 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +import { readFileSync } from 'fs'; +import { join } from 'path'; +import { Aws, Stack, Tags, CfnJson, RemovalPolicy } from 'aws-cdk-lib'; +import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { + AlbControllerVersion, + Cluster, + ClusterLoggingTypes, + EndpointAccess, + HelmChart, + KubernetesVersion, +} from 'aws-cdk-lib/aws-eks'; +import { CfnVirtualCluster } from 'aws-cdk-lib/aws-emrcontainers'; +import { IRule } from 'aws-cdk-lib/aws-events'; +import { + CfnInstanceProfile, + CfnServiceLinkedRole, + Effect, + FederatedPrincipal, + IManagedPolicy, + IRole, + ManagedPolicy, + PolicyDocument, + PolicyStatement, + Role, + ServicePrincipal, +} from 'aws-cdk-lib/aws-iam'; +import { IKey, Key } from 'aws-cdk-lib/aws-kms'; +import { Bucket, BucketEncryption, IBucket, Location } from 'aws-cdk-lib/aws-s3'; +import { BucketDeployment, Source } from 'aws-cdk-lib/aws-s3-deployment'; +import { IQueue } from 'aws-cdk-lib/aws-sqs'; +import { Construct } from 'constructs'; +import * as SimpleBase from 'simple-base'; +import { createNamespace, ebsCsiDriverSetup, awsNodeRoleSetup, toolingManagedNodegroupSetup } from './eks-cluster-helpers'; +import { karpenterSetup, setDefaultKarpenterProvisioners } from './eks-karpenter-helpers'; +import { EmrVirtualClusterProps } from './emr-virtual-cluster-props'; +import * as CriticalDefaultConfig from './resources/k8s/emr-eks-config/critical.json'; +import * as NotebookDefaultConfig from './resources/k8s/emr-eks-config/notebook-pod-template-ready.json'; +import * as SharedDefaultConfig from './resources/k8s/emr-eks-config/shared.json'; +import { SparkEmrContainersRuntimeProps } from './spark-emr-containers-runtime-props'; +import { Context, TrackedConstruct, TrackedConstructProps, Utils, vpcBootstrap } from '../../../../utils'; +import { EMR_DEFAULT_VERSION } from '../../emr-releases'; +import { DEFAULT_KARPENTER_VERSION } from '../../karpenter-releases'; + +/** + * A construct to create an EKS cluster, configure it and enable it with EMR on EKS + * @see https://awslabs.github.io/aws-data-solutions-framework/docs/constructs/library/spark-emr-containers-runtime +*/ +export class SparkEmrContainersRuntime extends TrackedConstruct { + + public static readonly DEFAULT_EMR_EKS_VERSION = EMR_DEFAULT_VERSION; + public static readonly DEFAULT_EKS_VERSION = KubernetesVersion.V1_27; + public static readonly DEFAULT_CLUSTER_NAME = 'data-platform'; + public static readonly DEFAULT_VPC_CIDR = '10.0.0.0/16'; + + /** + * Get an existing EmrEksCluster based on the cluster name property or create a new one + * only one EKS cluster can exist per stack + * @param {Construct} scope the CDK scope used to search or create the cluster + * @param {EmrEksClusterProps} props the EmrEksClusterProps [properties]{@link EmrEksClusterProps} if created + */ + public static getOrCreate(scope: Construct, props: SparkEmrContainersRuntimeProps) { + + const stack = Stack.of(scope); + const id = Utils.toPascalCase(props.eksClusterName || SparkEmrContainersRuntime.DEFAULT_CLUSTER_NAME); + + let emrEksCluster: SparkEmrContainersRuntime = + stack.node.tryFindChild(id) as SparkEmrContainersRuntime ?? + new SparkEmrContainersRuntime(stack, id, props); + + return emrEksCluster; + } + + /** + * The EKS cluster created by the construct if it is not provided + */ + public readonly eksCluster: Cluster; + public readonly csiDriverIrsa?: IRole; + /** + * IAM Role used by IRSA for the aws-node daemonset + */ + public readonly awsNodeRole?: IRole; + /** + * IAM role used by the tooling managed nodegroup hosting core Kubernetes controllers + * like EBS CSI driver, core dns + */ + public readonly ec2InstanceNodeGroupRole: IRole; + /** + * SQS queue used by Karpenter to receive critical events from AWS services which may affect your nodes. + */ + public readonly karpenterQueue?: IQueue; + /** + * The security group used by the EC2NodeClass of the default nodes + */ + public readonly karpenterSecurityGroup?: ISecurityGroup; + /** + * Rules used by Karpenter to track node health, rules are defined in the cloudformation below + * https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml + */ + public readonly karpenterEventRules?: Array; + + /** + * The configuration override for the spark application to use with the default nodes dedicated for notebooks + */ + public readonly notebookDefaultConfig?: string; + /** + * The configuration override for the spark application to use with the default nodes for criticale jobs + */ + public readonly criticalDefaultConfig?: string; + /** + * The configuration override for the spark application to use with the default nodes for none criticale jobs + */ + public readonly sharedDefaultConfig?: string; + /** + * The bucket holding podtemplates referenced in the configuration override for the job + */ + public readonly assetBucket?: IBucket; + + /** + * The s3 location holding the driver pod tempalte for critical nodes + */ + public readonly podTemplateS3LocationCriticalDriver?: string; + /** + * The s3 location holding the executor pod tempalte for critical nodes + */ + public readonly podTemplateS3LocationCriticalExecutor?: string; + /** + * The s3 location holding the driver pod tempalte for shared nodes + */ + public readonly podTemplateS3LocationDriverShared?: string; + /** + * The s3 location holding the executor pod tempalte for shared nodes + */ + public readonly podTemplateS3LocationExecutorShared?: string; + /** + * The s3 location holding the driver pod tempalte for interactive sessions + */ + public readonly podTemplateS3LocationNotebookDriver?: string; + /** + * The s3 location holding the executor pod tempalte for interactive sessions + */ + public readonly podTemplateS3LocationNotebookExecutor?: string; + /** + * The IAM Role created for the EBS CSI controller + */ + public readonly csiDriverIrsaRole?: IRole; + /** + * The IAM role created for the Karpenter controller + */ + public readonly karpenterIrsaRole?: IRole; + + + private readonly emrServiceRole?: CfnServiceLinkedRole; + private readonly assetUploadBucketRole?: IRole; + private readonly karpenterChart?: HelmChart; + private readonly defaultNodes?: boolean; + private readonly createEmrOnEksServiceLinkedRole?: boolean; + private readonly logKmsKey?: IKey; + private readonly eksSecretKmsKey?: IKey; + private readonly podTemplateLocation: Location; + private readonly podTemplatePolicy: PolicyDocument; + /** + * Constructs a new instance of the EmrEksCluster construct. + * @param {Construct} scope the Scope of the CDK Construct + * @param {string} id the ID of the CDK Construct + * @param {EmrEksClusterProps} props the EmrEksClusterProps [properties]{@link EmrEksClusterProps} + */ + private constructor(scope: Construct, id: string, props: SparkEmrContainersRuntimeProps) { + + const trackedConstructProps: TrackedConstructProps = { + trackingTag: SparkEmrContainersRuntime.name, + }; + + super(scope, id, trackedConstructProps); + + let removalPolicy = Context.revertRemovalPolicy(scope, props.removalPolicy); + + // Create a role to be used as instance profile for nodegroups + this.ec2InstanceNodeGroupRole = props.ec2InstanceRole || new Role(this, 'Ec2InstanceNodeGroupRole', { + assumedBy: new ServicePrincipal('ec2.amazonaws.com'), + }); + + //attach policies to the role to be used by the nodegroups + this.ec2InstanceNodeGroupRole.addManagedPolicy(ManagedPolicy.fromAwsManagedPolicyName('AmazonEKSWorkerNodePolicy')); + this.ec2InstanceNodeGroupRole.addManagedPolicy(ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ContainerRegistryReadOnly')); + this.ec2InstanceNodeGroupRole.addManagedPolicy(ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore')); + this.ec2InstanceNodeGroupRole.addManagedPolicy(ManagedPolicy.fromAwsManagedPolicyName('AmazonEKS_CNI_Policy')); + + //Create instance profile to be used by Managed nodegroup and karpenter + const clusterInstanceProfile = new CfnInstanceProfile(scope, 'KarpenterInstanceProfile', { + roles: [this.ec2InstanceNodeGroupRole.roleName], + // instanceProfileName: `adsfNodeInstanceProfile-${clusterName ?? 'default'}`, + path: '/', + }); + + const karpenterVersion = props.karpenterVersion ?? DEFAULT_KARPENTER_VERSION; + + let eksCluster: Cluster; + + // create an Amazon EKS CLuster with default parameters if not provided in the properties + if (props.eksCluster == undefined) { + + this.logKmsKey = new Key(scope, 'LogKmsKey', { + enableKeyRotation: true, + description: 'log-vpc-key', + removalPolicy: removalPolicy, + }); + + this.eksSecretKmsKey = new Key(scope, 'EksSecretKmsKey', { + enableKeyRotation: true, + description: 'eks-secrets-key', + removalPolicy: removalPolicy, + }); + + const clusterName = props.eksClusterName ?? SparkEmrContainersRuntime.DEFAULT_CLUSTER_NAME; + + //Define EKS cluster logging + const eksClusterLogging: ClusterLoggingTypes[] = [ + ClusterLoggingTypes.API, + ClusterLoggingTypes.AUTHENTICATOR, + ClusterLoggingTypes.SCHEDULER, + ClusterLoggingTypes.CONTROLLER_MANAGER, + ClusterLoggingTypes.AUDIT, + ]; + + //Set the flag for creating the EMR on EKS Service Linked Role + this.createEmrOnEksServiceLinkedRole = props.createEmrOnEksServiceLinkedRole ?? true; + + //Set flag for default karpenter provisioners for Spark jobs + this.defaultNodes = props.defaultNodes ?? true; + + const vpcCidr = props.vpcCidr ? props.vpcCidr : SparkEmrContainersRuntime.DEFAULT_VPC_CIDR; + + let eksVpc: IVpc = props.eksVpc ? props.eksVpc : vpcBootstrap (scope, vpcCidr, this.logKmsKey, removalPolicy, clusterName, undefined).vpc; + + eksCluster = new Cluster(scope, 'EksCluster', { + defaultCapacity: 0, + clusterName: clusterName, + version: props.kubernetesVersion ?? SparkEmrContainersRuntime.DEFAULT_EKS_VERSION, + clusterLogging: eksClusterLogging, + kubectlLayer: props.kubectlLambdaLayer, + vpc: eksVpc, + endpointAccess: EndpointAccess.PUBLIC_AND_PRIVATE, + secretsEncryptionKey: this.eksSecretKmsKey, + albController: { + version: AlbControllerVersion.V2_5_1, + policy: JSON.parse(readFileSync(join(__dirname, 'resources/k8s/controllers-iam-policies/alb/iam-policy-alb-v2.5.json'), 'utf8')), + }, + placeClusterHandlerInVpc: true, + }); + + // Add the provided Amazon IAM Role as Amazon EKS Admin + if (props.eksAdminRole === undefined) { + throw new Error('An IAM role must be passed to create an EKS cluster'); + } else { + eksCluster.awsAuth.addMastersRole(props.eksAdminRole, 'AdminRole'); + } + + // Configure the EBS CSI controler + this.csiDriverIrsaRole = ebsCsiDriverSetup(this, eksCluster, props.kubernetesVersion ?? SparkEmrContainersRuntime.DEFAULT_EKS_VERSION); + + // Configure the AWS Node Role + this.awsNodeRole = awsNodeRoleSetup(this, eksCluster); + + // Configure the tooling nodegroup for hosting tooling components + toolingManagedNodegroupSetup(this, eksCluster, this.ec2InstanceNodeGroupRole); + + //Deploy karpenter + [this.karpenterChart, this.karpenterIrsaRole, this.karpenterQueue, this.karpenterSecurityGroup, this.karpenterEventRules] = karpenterSetup( + eksCluster, + clusterName, + scope, + clusterInstanceProfile, + this.ec2InstanceNodeGroupRole, + removalPolicy, + karpenterVersion, + ); + + } else { + //Initialize with the provided EKS Cluster + eksCluster = props.eksCluster; + } + + this.eksCluster = eksCluster; + + // Create an Amazon S3 Bucket for podTemplate assets + this.assetBucket = new Bucket (this, 'AssetBucket', { + encryption: BucketEncryption.KMS_MANAGED, + enforceSSL: true, + removalPolicy: removalPolicy, + }); + + // Configure the podTemplate location + this.podTemplateLocation = { + bucketName: this.assetBucket.bucketName, + objectKey: `${this.eksCluster.clusterName}/pod-template`, + }; + + let s3DeploymentLambdaPolicyStatement: PolicyStatement[] = []; + + s3DeploymentLambdaPolicyStatement.push(new PolicyStatement({ + actions: ['logs:CreateLogGroup', 'logs:CreateLogStream', 'logs:PutLogEvents'], + resources: [`arn:aws:logs:${Aws.REGION}:${Aws.ACCOUNT_ID}:*`], + effect: Effect.ALLOW, + })); + + //Policy to allow lambda access to cloudwatch logs + const lambdaExecutionRolePolicy = new ManagedPolicy(this, 'S3BucketDeploymentPolicy', { + statements: s3DeploymentLambdaPolicyStatement, + description: 'Policy used by S3 deployment cdk construct', + }); + + //Create an execution role for the lambda and attach to it a policy formed from user input + this.assetUploadBucketRole = new Role(this, 'S3BucketDeploymentRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + description: 'Role used by S3 deployment cdk construct', + managedPolicies: [lambdaExecutionRolePolicy], + }); + + const stack = Stack.of(scope); + + this.podTemplatePolicy = new PolicyDocument({ + statements: [ + new PolicyStatement({ + actions: [ + 's3:getObject', + ], + resources: [ + stack.formatArn({ + region: '', + account: '', + service: 's3', + resource: this.podTemplateLocation.bucketName, + resourceName: `${this.podTemplateLocation.objectKey}/*`, + }), + ], + }), + ], + }); + + if (this.defaultNodes ) { + setDefaultKarpenterProvisioners(this, karpenterVersion, this.ec2InstanceNodeGroupRole); + + // Upload the default podTemplate to the Amazon S3 asset bucket + this.uploadPodTemplate('defaultPodTemplates', join(__dirname, 'resources/k8s/pod-template'), removalPolicy); + + // Replace the pod template location for driver and executor with the correct Amazon S3 path in the notebook default config + NotebookDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.driver.podTemplateFile'] = + this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/notebook-driver.yaml`); + this.podTemplateS3LocationNotebookDriver = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/notebook-driver.yaml`); + + NotebookDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.executor.podTemplateFile'] = + this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/notebook-executor.yaml`); + this.podTemplateS3LocationNotebookExecutor = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/notebook-executor.yaml`); + + this.notebookDefaultConfig = JSON.parse(JSON.stringify(NotebookDefaultConfig)); + + // Replace the pod template location for driver and executor with the correct Amazon S3 path in the critical default config + CriticalDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.driver.podTemplateFile'] = + this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/critical-driver.yaml`); + this.podTemplateS3LocationCriticalDriver = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/critical-driver.yaml`); + + CriticalDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.executor.podTemplateFile'] = + this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/critical-executor.yaml`); + this.podTemplateS3LocationCriticalExecutor = this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/critical-executor.yaml`); + + this.criticalDefaultConfig = JSON.stringify(CriticalDefaultConfig); + + // Replace the pod template location for driver and executor with the correct Amazon S3 path in the shared default config + SharedDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.driver.podTemplateFile'] = + this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/shared-driver.yaml`); + this.podTemplateS3LocationDriverShared=this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/shared-driver.yaml`); + + SharedDefaultConfig.applicationConfiguration[0].properties['spark.kubernetes.executor.podTemplateFile'] = + this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/shared-executor.yaml`); + this.podTemplateS3LocationExecutorShared=this.assetBucket.s3UrlForObject(`${this.podTemplateLocation.objectKey}/shared-executor.yaml`); + + this.sharedDefaultConfig = JSON.stringify(SharedDefaultConfig); + } + + // Tags the Amazon VPC and Subnets of the Amazon EKS Cluster + Tags.of(eksCluster.vpc).add( + 'for-use-with-amazon-emr-managed-policies', + 'true', + ); + + eksCluster.vpc.privateSubnets.forEach((subnet) => + Tags.of(subnet).add('for-use-with-amazon-emr-managed-policies', 'true'), + ); + + eksCluster.vpc.publicSubnets.forEach((subnet) => + Tags.of(subnet).add('for-use-with-amazon-emr-managed-policies', 'true'), + ); + + // Create Amazon IAM ServiceLinkedRole for Amazon EMR and add to kubernetes configmap + // required to add a dependency on the Amazon EMR virtual cluster + if (this.createEmrOnEksServiceLinkedRole) { + this.emrServiceRole = new CfnServiceLinkedRole(this, 'EmrServiceRole', { + awsServiceName: 'emr-containers.amazonaws.com', + }); + } + + eksCluster.awsAuth.addRoleMapping( + Role.fromRoleArn( + this, + 'ServiceRoleForAmazonEMRContainers', + `arn:aws:iam::${Stack.of(this).account}:role/AWSServiceRoleForAmazonEMRContainers`, + ), + { + username: 'emr-containers', + groups: [''], + }, + ); + } + + /** + * Add a new Amazon EMR Virtual Cluster linked to Amazon EKS Cluster. + * @param {Construct} scope of the stack where virtual cluster is deployed + * @param {EmrVirtualClusterProps} options the EmrVirtualClusterProps [properties]{@link EmrVirtualClusterProps} + */ + public addEmrVirtualCluster(scope: Construct, options: EmrVirtualClusterProps): CfnVirtualCluster { + const eksNamespace = options.eksNamespace ?? 'default'; + + let ns = undefined; + + if (options.createNamespace) { + ns = createNamespace(this.eksCluster, options.eksNamespace!); + } + + // deep clone the Role Binding template object and replace the namespace + let manifest = Utils.readYamlDocument(`${__dirname}/resources/k8s/rbac/emr-containers-rbac.yaml`); + + manifest = manifest.replace(/(\{{NAMESPACE}})/g, eksNamespace); + + let manfifestYAML: any = manifest.split('---').map((e: any) => Utils.loadYaml(e)); + + const manifestApply = this.eksCluster.addManifest(`emr-containers-rbac-${eksNamespace}`, ...manfifestYAML); + + if (ns) {manifestApply.node.addDependency(ns);} + + const virtualCluster = new CfnVirtualCluster(scope, `${options.name}VirtualCluster`, { + name: options.name, + containerProvider: { + id: this.eksCluster.clusterName, + type: 'EKS', + info: { eksInfo: { namespace: options.eksNamespace ?? 'default' } }, + }, + }); + + virtualCluster.node.addDependency(manifestApply); + + if (this.emrServiceRole) { + manifestApply.node.addDependency(this.emrServiceRole); + virtualCluster.node.addDependency(this.emrServiceRole); + } + + if (ns) {virtualCluster.node.addDependency(ns);} + + return virtualCluster; + } + + + /** + * Create and configure a new Amazon IAM Role usable as an execution role. + * This method makes the created role assumed by the Amazon EKS cluster Open ID Connect provider. + * @param {Construct} scope of the IAM role + * @param {string} id of the CDK resource to be created, it should be unique across the stack + * @param {IManagedPolicy} policy the execution policy to attach to the role + * @param {string} eksNamespace The namespace from which the role is going to be used. MUST be the same as the namespace of the Virtual Cluster from which the job is submitted + * @param {string} name Name to use for the role, required and is used to scope the iam role + */ + public createExecutionRole(scope: Construct, id: string, policy: IManagedPolicy, eksNamespace: string, name: string): Role { + + let irsaConditionkey: CfnJson = new CfnJson(scope, `${id}IrsaConditionkey'`, { + value: { + [`${this.eksCluster.openIdConnectProvider.openIdConnectProviderIssuer}:sub`]: 'system:serviceaccount:' + eksNamespace + ':emr-containers-sa-*-*-' + Aws.ACCOUNT_ID.toString() + '-' + SimpleBase.base36.encode(name), + }, + }); + + // Create an execution role assumable by EKS OIDC provider + return new Role(scope, `${id}ExecutionRole`, { + assumedBy: new FederatedPrincipal( + this.eksCluster.openIdConnectProvider.openIdConnectProviderArn, + { + StringLike: irsaConditionkey, + }, + 'sts:AssumeRoleWithWebIdentity'), + roleName: name, + managedPolicies: [policy], + inlinePolicies: this.podTemplatePolicy ? { podTemplateAccess: this.podTemplatePolicy! } : undefined, + }); + } + + /** + * Upload podTemplates to the Amazon S3 location used by the cluster. + * @param {string} id the unique ID of the CDK resource + * @param {string} filePath The local path of the yaml podTemplate files to upload + */ + public uploadPodTemplate(id: string, filePath: string, removalPolicy: RemovalPolicy) { + + new BucketDeployment(this, `${id}AssetDeployment`, { + destinationBucket: this.assetBucket!, + destinationKeyPrefix: this.podTemplateLocation!.objectKey, + sources: [Source.asset(filePath)], + role: this.assetUploadBucketRole, + retainOnDelete: removalPolicy === RemovalPolicy.RETAIN ? true : false, + }); + } + + /** + * Apply the provided manifest and add the CDK dependency on EKS cluster + * @param {string} id the unique ID of the CDK resource + * @param {any} manifest The manifest to apply. + * You can use the Utils class that offers method to read yaml file and load it as a manifest + */ + public addKarpenterProvisioner(id: string, manifest: any): any { + + let manifestApply = this.eksCluster.addManifest(id, ...manifest); + + if (this.karpenterChart) { + manifestApply.node.addDependency(this.karpenterChart); + } + + return manifestApply; + } +} + diff --git a/framework/src/processing/lib/spark-runtime/index.ts b/framework/src/processing/lib/spark-runtime/index.ts index 9f61f29ec..b3f8d9dbb 100644 --- a/framework/src/processing/lib/spark-runtime/index.ts +++ b/framework/src/processing/lib/spark-runtime/index.ts @@ -3,3 +3,4 @@ export * from './emr-serverless'; +export * from './emr-containers'; diff --git a/framework/src/utils/lib/index.ts b/framework/src/utils/lib/index.ts index 1e8081ecc..f5ef9acd3 100644 --- a/framework/src/utils/lib/index.ts +++ b/framework/src/utils/lib/index.ts @@ -9,6 +9,7 @@ export * from './application-stage'; export * from './application-stack-factory'; export * from './bucket-utils'; export * from './step-function-utils'; +export * from './utils'; export * from './vpc-helper'; diff --git a/framework/src/utils/lib/utils.ts b/framework/src/utils/lib/utils.ts new file mode 100644 index 000000000..daf3afe5c --- /dev/null +++ b/framework/src/utils/lib/utils.ts @@ -0,0 +1,72 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 +import * as fs from 'fs'; +import * as yaml from 'js-yaml'; + +/** + * Utilities class used across the different resources + */ +export class Utils { + + /** + * Sanitize a string by removing upper case and replacing special characters except underscore + * @param {string} toSanitize the string to sanitize + */ + public static stringSanitizer(toSanitize: string): string { + return toSanitize.toLowerCase().replace(/[^\w\s]/gi, ''); + } + + /** + * Create a random string to be used as a seed for IAM User password + * @param {string} name the string to which to append a random string + */ + public static randomize(name: string) { + return `${name}-${Math.random().toString(36).substring(2, 9).toUpperCase()}`; + } + + /** + * Read a YAML file from the path provided and return it + * @param {string} path the path to the file + */ + public static readYamlDocument(path: string): string { + try { + const doc = fs.readFileSync(path, 'utf8'); + return doc; + } catch (e) { + console.log(e + ' for path: ' + path); + throw e; + } + } + + /** + * Take a document stored as string and load it as YAML + * @param {string} document the document stored as string + */ + public static loadYaml(document: string): any { + return yaml.load(document); + } + + /** + * Convert a string to PascalCase + * @param text + * @returns + */ + public static toPascalCase(text: string): string { + + // Split the text into words + const words = text.match(/[a-z]+/gi); + + if (words) { + // Capitalize first letter of each word + words.forEach((word, index) => { + words[index] = word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(); + }); + + // Join the words back into a string + return words.join(''); + } + + return text; + } + +} diff --git a/framework/src/utils/lib/vpc-helper.ts b/framework/src/utils/lib/vpc-helper.ts index b4e8f0004..d04b8191f 100644 --- a/framework/src/utils/lib/vpc-helper.ts +++ b/framework/src/utils/lib/vpc-helper.ts @@ -57,7 +57,7 @@ export function vpcBootstrap( const publicSubnetMask = vpcMask + 4; const privateSubnetMask = publicSubnetMask + 2; // twice as large as public subnet - const vpc = new Vpc(scope, 'AdsfVPC', { + const vpc = new Vpc(scope, 'DsfVpc', { ipAddresses: IpAddresses.cidr(vpcCidr), maxAzs: 3, natGateways: 3, @@ -110,7 +110,7 @@ export function vpcBootstrap( ); //Setup the VPC flow logs - const iamFlowLogRole = vpcFlowLogRole || new Role(scope, 'iamRoleforFlowLog', { + const iamFlowLogRole = vpcFlowLogRole || new Role(scope, 'FlowLogRole', { assumedBy: new ServicePrincipal('vpc-flow-logs.amazonaws.com'), }); @@ -119,7 +119,7 @@ export function vpcBootstrap( }); // Create a gateway endpoint for S3 - const s3GatewayVpcEndpoint: GatewayVpcEndpoint = vpc.addGatewayEndpoint('AdsfVpcS3Endpoint', { + const s3GatewayVpcEndpoint: GatewayVpcEndpoint = vpc.addGatewayEndpoint('DsfS3VpcEndpoint', { service: GatewayVpcEndpointAwsService.S3, }); diff --git a/framework/test/e2e/spark-containers-runtime.e2e.test.ts b/framework/test/e2e/spark-containers-runtime.e2e.test.ts new file mode 100644 index 000000000..b476f10af --- /dev/null +++ b/framework/test/e2e/spark-containers-runtime.e2e.test.ts @@ -0,0 +1,87 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +/** + * E2E test for SparkContainersRunime + * + * @group e2e/spark-runtime-containers + */ + +import { KubectlV27Layer } from '@aws-cdk/lambda-layer-kubectl-v27'; +import * as cdk from 'aws-cdk-lib'; +import { ManagedPolicy, PolicyDocument, PolicyStatement, Role } from 'aws-cdk-lib/aws-iam'; +import { TestStack } from './test-stack'; +import { SparkEmrContainersRuntime } from '../../src/processing'; + + +jest.setTimeout(6000000); + +// GIVEN +const app = new cdk.App(); +const testStack = new TestStack('SparkContainersTestStack', app); +const { stack } = testStack; + +stack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', true); + +const kubectlLayer = new KubectlV27Layer(stack, 'kubectlLayer'); +const eksAdminRole = Role.fromRoleArn(stack, 'EksAdminRole', `arn:aws:iam::${stack.account}:role/role-name-with-path`); + +// creation of the construct(s) under test +const emrEksCluster = SparkEmrContainersRuntime.getOrCreate(stack, { + eksAdminRole, + publicAccessCIDRs: ['10.0.0.0/32'], + createEmrOnEksServiceLinkedRole: false, + kubectlLambdaLayer: kubectlLayer, + removalPolicy: cdk.RemovalPolicy.DESTROY, +}); + +const s3Read = new PolicyDocument({ + statements: [new PolicyStatement({ + actions: [ + 's3:GetObject', + ], + resources: ['arn:aws:s3:::aws-data-analytics-workshop'], + })], +}); + +const s3ReadPolicy = new ManagedPolicy(stack, 's3ReadPolicy', { + document: s3Read, +}); + +const virtualCluster = emrEksCluster.addEmrVirtualCluster(stack, { + name: 'e2e', + createNamespace: true, + eksNamespace: 'e2ens', +}); + +const execRole = emrEksCluster.createExecutionRole(stack, 'ExecRole', s3ReadPolicy, 'e2ens', 's3ReadExecRole'); + +new cdk.CfnOutput(stack, 'virtualClusterArn', { + value: virtualCluster.attrArn, +}); + +new cdk.CfnOutput(stack, 'execRoleArn', { + value: execRole.roleArn, +}); + +new cdk.CfnOutput(stack, 'eksClusterName', { + value: emrEksCluster.eksCluster.clusterName, +}); + +let deployResult: Record; + +beforeAll(async() => { + // WHEN + deployResult = await testStack.deploy(); +}, 6000000); + +it('Containers runtime created successfully', async () => { + // THEN + expect(deployResult.virtualClusterArn).toContain('arn'); + expect(deployResult.execRoleArn).toContain('arn'); + expect(deployResult.eksClusterName).toBe('data-platform'); +}); + +afterAll(async () => { + await testStack.destroy(); +}, 6000000); diff --git a/framework/test/unit/nag/processing/nag-spark-runtime-containers.test.ts b/framework/test/unit/nag/processing/nag-spark-runtime-containers.test.ts new file mode 100644 index 000000000..56db7f136 --- /dev/null +++ b/framework/test/unit/nag/processing/nag-spark-runtime-containers.test.ts @@ -0,0 +1,200 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + +/** + * Nag for Spark runtime EMR Containers + * + * @group unit/best-practice/spark-runtime-containers + */ + +import { KubectlV27Layer } from '@aws-cdk/lambda-layer-kubectl-v27'; +import { App, Aspects, Stack } from 'aws-cdk-lib'; +import { Annotations, Match } from 'aws-cdk-lib/assertions'; +// eslint-disable-next-line import/no-extraneous-dependencies +import { ManagedPolicy, PolicyDocument, PolicyStatement, Role } from 'aws-cdk-lib/aws-iam'; +import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; +import { SparkEmrContainersRuntime } from '../../../../src/processing'; + +const app = new App(); +const emrEksClusterStack = new Stack(app, 'nagStack'); + +const kubectlLayer = new KubectlV27Layer(emrEksClusterStack, 'kubectlLayer'); + +const adminRole = Role.fromRoleArn(emrEksClusterStack, 'AdminRole', 'arn:aws:iam::123445678901:role/eks-admin'); + +const emrEksCluster = SparkEmrContainersRuntime.getOrCreate(emrEksClusterStack, { + eksAdminRole: adminRole, + publicAccessCIDRs: ['10.0.0.0/32'], + kubectlLambdaLayer: kubectlLayer, + vpcCidr: '10.0.0.0/16', +}); + +emrEksCluster.addEmrVirtualCluster(emrEksClusterStack, { + name: 'test', +}); + +emrEksCluster.addEmrVirtualCluster(emrEksClusterStack, { + name: 'nons', + createNamespace: true, + eksNamespace: 'nons', +}); + +const policy = new ManagedPolicy(emrEksClusterStack, 'testPolicy', { + document: new PolicyDocument({ + statements: [ + new PolicyStatement({ + resources: ['arn:aws:s3:::aws-data-analytics-workshop'], + actions: ['s3:GetObject'], + }), + ], + }), +}); + +emrEksCluster.createExecutionRole(emrEksClusterStack, 'test', policy, 'nons', 'myExecRole'); + +Aspects.of(emrEksClusterStack).add(new AwsSolutionsChecks()); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/DataPlatform/Ec2InstanceNodeGroupRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'The use of Managed policies is a must for EKS nodes' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/DataPlatform/AwsNodeRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'The use of Managed policies is a must for EKS nodes' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/EksCluster/KubectlHandlerRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'the use of a managed policy is inherited from the L2 construct' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/DataPlatform/AssetBucket/Resource', [ + { id: 'AwsSolutions-S1', reason: 'Access log is not necessary for this bucket, holds only assets supporting emr on eks jobs like podtemplate' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/nagStackEksClusterE556AA2A-AlbController/alb-sa/Role/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'IAM policy provided by the controller for ALB' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/DataPlatform/EbsCsiDriverPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'wild card used due resources defined at runtime, TBAC is used when possible' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/testExecutionRole/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'wild card used for test execution role' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/KarpenterInterruptionQueue/Resource', [ + { id: 'AwsSolutions-SQS3', reason: 'DLQ not needed, data is transient' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/EksCluster/Resource/Resource/Default', [ + { id: 'AwsSolutions-EKS1', reason: 'Public API is limited by a Security group, the CIDR is provided by the user' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/EksCluster/Resource/CreationRole/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'Role is scoped by TBAC or resources, wild card used with list and get' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/EksCluster/karpenterServiceAccount/Role/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'Role is scoped by TBAC or resources, wild card used with list and get' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/EksCluster/Role/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'the use of a managed policy is inherited from the L2 construct' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/DataPlatform/S3BucketDeploymentPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'Wild card is used because resources are created at runtime, they cannot be scoped at synth' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/OnEventHandler/ServiceRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'managed policy used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/IsCompleteHandler/ServiceRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'managed policy used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-onEvent/ServiceRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'managed policy used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-onEvent/ServiceRole/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'wild card used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-isComplete/ServiceRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'managed policy used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-isComplete/ServiceRole/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'wild card used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-onTimeout/ServiceRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'managed policy used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-onTimeout/ServiceRole/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'wild card used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/waiter-state-machine/Role/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'unable to modify the role of the step function' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, '/nagStack/@aws-cdk--aws-eks.KubectlProvider/Handler/Resource', [ + { id: 'AwsSolutions-L1', reason: 'unable to modify the runtime' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, '/nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/IsCompleteHandler/Resource', [ + { id: 'AwsSolutions-L1', reason: 'unable to modify the runtime' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, '/nagStack/@aws-cdk--aws-eks.KubectlProvider/Provider/framework-onEvent/Resource', [ + { id: 'AwsSolutions-L1', reason: 'unable to modify the runtime' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, '/nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-onTimeout/Resource', [ + { id: 'AwsSolutions-L1', reason: 'unable to modify the runtime' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, '/nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-isComplete/Resource', [ + { id: 'AwsSolutions-L1', reason: 'unable to modify the runtime' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, '/nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/Provider/framework-onEvent/Resource', [ + { id: 'AwsSolutions-L1', reason: 'unable to modify the runtime' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, '/nagStack/@aws-cdk--aws-eks.ClusterResourceProvider/OnEventHandler/Resource', [ + { id: 'AwsSolutions-L1', reason: 'unable to modify the runtime' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.KubectlProvider/Provider/framework-onEvent/ServiceRole/Resource', [ + { id: 'AwsSolutions-IAM4', reason: 'managed policy used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/@aws-cdk--aws-eks.KubectlProvider/Provider/framework-onEvent/ServiceRole/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'wild card used by L2 resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, 'nagStack/DataPlatform/S3BucketDeploymentRole/DefaultPolicy/Resource', [ + { id: 'AwsSolutions-IAM5', reason: 'wild card used by L2 resource to copy data, the policy is scoped to the resource' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(emrEksClusterStack, '/nagStack/Custom::CDKBucketDeployment8693BB64968944B69AAFB0CC9EB8756C/Resource', [ + { id: 'AwsSolutions-L1', reason: 'unable to modify the runtime provided by L2 construct' }, +]); + +test('No unsuppressed Warnings', () => { + const warnings = Annotations.fromStack(emrEksClusterStack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(warnings); + expect(warnings).toHaveLength(0); +}); + +test('No unsuppressed Errors', () => { + const errors = Annotations.fromStack(emrEksClusterStack).findError('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(errors); + expect(errors).toHaveLength(0); +}); + diff --git a/framework/test/unit/processing/spark-runtime-containers.test.ts b/framework/test/unit/processing/spark-runtime-containers.test.ts new file mode 100644 index 000000000..01fdeff10 --- /dev/null +++ b/framework/test/unit/processing/spark-runtime-containers.test.ts @@ -0,0 +1,960 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 + + +/** + * Tests Spark runtime EMR Containers construct + * + * @group unit/processing-runtime/containers/emr-containers +*/ + + +import { KubectlV27Layer } from '@aws-cdk/lambda-layer-kubectl-v27'; +import { RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { Cluster, KubernetesVersion } from 'aws-cdk-lib/aws-eks'; +import { ManagedPolicy, PolicyDocument, PolicyStatement, Role } from 'aws-cdk-lib/aws-iam'; +import { SparkEmrContainersRuntime } from '../../../src/processing'; + +describe('With default configuration, the construct ', () => { + + const emrEksClusterStack = new Stack(); + + const kubectlLayer = new KubectlV27Layer(emrEksClusterStack, 'kubectlLayer'); + + const adminRole = Role.fromRoleArn(emrEksClusterStack, 'AdminRole', 'arn:aws:iam::123445678901:role/eks-admin'); + + const emrEksCluster = SparkEmrContainersRuntime.getOrCreate(emrEksClusterStack, { + eksAdminRole: adminRole, + publicAccessCIDRs: ['10.0.0.0/32'], + kubectlLambdaLayer: kubectlLayer, + }); + + emrEksCluster.addEmrVirtualCluster(emrEksClusterStack, { + name: 'test', + }); + + const policy = new ManagedPolicy(emrEksClusterStack, 'testPolicy', { + document: new PolicyDocument({ + statements: [ + new PolicyStatement({ + resources: ['arn:aws:s3:::aws-data-analytics-workshop'], + actions: ['s3:GetObject'], + }), + ], + }), + }); + + emrEksCluster.createExecutionRole(emrEksClusterStack, 'test', policy, 'nons', 'myExecRole'); + + const template = Template.fromStack(emrEksClusterStack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should create an EKS cluster with correct version', () => { + template.hasResourceProperties('Custom::AWSCDK-EKS-Cluster', { + Config: Match.objectLike({ + version: '1.27', + name: 'data-platform', + }), + }); + }); + + test('should create the emr-containers service linked role', () => { + // THEN + template.hasResourceProperties('AWS::IAM::ServiceLinkedRole', { + AWSServiceName: 'emr-containers.amazonaws.com', + }); + }); + + test('should create the AWS node role and update the AWS node service account', () => { + // THEN + template.hasResourceProperties('AWS::IAM::Role', { + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRoleWithWebIdentity', + Effect: 'Allow', + Principal: { + Federated: { + Ref: Match.stringLikeRegexp('.*OpenIdConnectProvider.*'), + }, + }, + }, + ], + }), + Description: { + 'Fn::Join': Match.arrayWith([ + [ + 'awsNodeRole-', + { + Ref: Match.stringLikeRegexp('EksCluster.*'), + }, + ], + ]), + }, + ManagedPolicyArns: [ + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + ':iam::aws:policy/AmazonEKS_CNI_Policy', + ]), + ]), + }, + ], + }); + + template.hasResourceProperties('Custom::AWSCDK-EKS-KubernetesResource', { + Manifest: { + 'Fn::Join': Match.arrayWith([ + '', + Match.arrayWith([ + '[{"apiVersion":"v1","kind":"ServiceAccount","metadata":{"name":"aws-node","namespace":"kube-system","annotations":{"eks.amazonaws.com/role-arn":"', + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataPlatformAwsNodeRole.*'), + 'Arn', + ], + }, + ]), + ]), + }, + ClusterName: { + Ref: Match.stringLikeRegexp('EksCluster.*'), + }, + }); + }); + + test('should create a VPC with correct CIDR and tags', () => { + // THEN + template.hasResourceProperties('AWS::EC2::VPC', { + CidrBlock: '10.0.0.0/16', + Tags: Match.arrayWith([ + Match.objectLike({ + Key: 'for-use-with-amazon-emr-managed-policies', + Value: 'true', + }), + ]), + }); + }); + + test('should create 2 private subnet with tags', () => { + // THEN + template.resourcePropertiesCountIs('AWS::EC2::Subnet', { + Tags: Match.arrayWith([ + Match.objectLike({ + Key: 'aws-cdk:subnet-type', + Value: 'Private', + }), + Match.objectLike({ + Key: 'for-use-with-amazon-emr-managed-policies', + Value: 'true', + }), + ]), + }, 2); + }); + + test('should deploy the cert manager', () => { + template.hasResourceProperties('Custom::AWSCDK-EKS-HelmChart', { + Chart: 'cert-manager', + Repository: 'https://charts.jetstack.io', + Namespace: 'cert-manager', + }); + }); + + test('should deploy the AWS load balancer controller', () => { + template.hasResourceProperties('Custom::AWSCDK-EKS-HelmChart', { + Chart: 'aws-load-balancer-controller', + Repository: 'https://aws.github.io/eks-charts', + Namespace: 'kube-system', + }); + }); + + test('should deploy the EBS CSI controller', () => { + template.hasResourceProperties('AWS::IAM::Policy', { + PolicyName: Match.stringLikeRegexp('.*EbsCsiDriverPolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('.*EbsCsiDriverSaRole.*'), + }, + ], + }); + + template.hasResourceProperties('AWS::EKS::Addon', { + AddonName: 'aws-ebs-csi-driver', + AddonVersion: 'v1.24.1-eksbuild.1', + ClusterName: { + Ref: Match.stringLikeRegexp('EksCluster.*'), + }, + ResolveConflicts: 'OVERWRITE', + ServiceAccountRoleArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('.*EbsCsiDriverSaRole.*'), + 'Arn', + ], + }, + }); + }); + + test('should create the tooling nodegroup', () => { + template.hasResourceProperties('AWS::EKS::Nodegroup', { + AmiType: 'BOTTLEROCKET_x86_64', + InstanceTypes: ['t3.medium'], + Labels: { + role: 'tooling', + }, + ScalingConfig: { + DesiredSize: 2, + MaxSize: 2, + MinSize: 2, + }, + Tags: Match.objectLike({ + 'data-solutions-fwk:owned': 'true', + }), + }); + }); + + test('should create the awsnode role', () => { + template.hasResourceProperties('AWS::IAM::Role', { + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'ec2.amazonaws.com', + }, + }, + ], + }), + ManagedPolicyArns: [ + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + ':iam::aws:policy/AmazonEKSWorkerNodePolicy', + ]), + ]), + }, + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + ':iam::aws:policy/AmazonEC2ContainerRegistryReadOnly', + ]), + ]), + }, + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + ':iam::aws:policy/AmazonSSMManagedInstanceCore', + ]), + ]), + }, + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + ':iam::aws:policy/AmazonEKS_CNI_Policy', + ]), + ]), + }, + ], + }); + }); + + test('should create an EMR virtual cluster', () => { + template.hasResourceProperties('AWS::EMRContainers::VirtualCluster', { + ContainerProvider: Match.objectLike({ + Type: 'EKS', + Info: Match.objectLike({ + EksInfo: { + Namespace: 'default', + }, + }), + }), + Name: 'test', + }); + }); + + test('should create a VPC for the EKS cluster', () => { + template.hasResourceProperties('AWS::EC2::VPC', { + CidrBlock: '10.0.0.0/16', + EnableDnsHostnames: true, + EnableDnsSupport: true, + InstanceTenancy: 'default', + Tags: Match.arrayWith([ + { + Key: 'for-use-with-amazon-emr-managed-policies', + Value: 'true', + }, + { + Key: 'karpenter.sh/discovery', + Value: 'data-platform', + }, + { + Key: 'Name', + Value: 'Default/DsfVpc', + }, + ]), + }); + + template.resourceCountIs('AWS::EC2::Subnet', 4); + template.resourceCountIs('AWS::EC2::RouteTable', 4); + template.resourceCountIs('AWS::EC2::SubnetRouteTableAssociation', 4); + template.resourceCountIs('AWS::EC2::Route', 4); + template.resourceCountIs('AWS::EC2::EIP', 2); + template.resourceCountIs('AWS::EC2::NatGateway', 2); + template.resourceCountIs('AWS::EC2::InternetGateway', 1); + template.resourceCountIs('AWS::EC2::VPCGatewayAttachment', 1); + template.resourceCountIs('AWS::EC2::FlowLog', 1); + template.resourceCountIs('AWS::EC2::VPCEndpoint', 1); + template.resourceCountIs('AWS::Logs::LogGroup', 1); + template.resourcePropertiesCountIs('AWS::IAM::Role', { + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + Match.objectLike({ + Principal: { + Service: 'vpc-flow-logs.amazonaws.com', + }, + }), + ], + }), + }, 1); + template.resourcePropertiesCountIs('AWS::IAM::Policy', { + PolicyName: Match.stringLikeRegexp('FlowLog.*'), + }, 1); + template.resourcePropertiesCountIs('AWS::EC2::SecurityGroup', { + GroupDescription: 'EKS Control Plane Security Group', + }, 1); + }); + + test('should create an S3 bucket for PodTemplate and attach podtemplate policy to execution role', () => { + template.hasResourceProperties('AWS::S3::Bucket', { + BucketEncryption: { + ServerSideEncryptionConfiguration: [ + { + ServerSideEncryptionByDefault: { + SSEAlgorithm: 'aws:kms', + }, + }, + ], + }, + Tags: Match.arrayWith([ + { + Key: { + 'Fn::Join': Match.arrayWith([ + [ + 'aws-cdk:cr-owned:', + { + Ref: Match.stringLikeRegexp('EksCluster.*'), + }, + Match.stringLikeRegexp('/pod-template:.*'), + ], + ]), + }, + Value: 'true', + }, + ]), + }); + + template.hasResourceProperties('AWS::IAM::Role', { + Policies: [ + { + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 's3:getObject', + Effect: 'Allow', + Resource: { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + Ref: Match.stringLikeRegexp('DataPlatformAssetBucket.*'), + }, + { + Ref: Match.stringLikeRegexp('EksCluster.*'), + }, + '/pod-template/*', + ]), + ]), + }, + }, + ], + }), + PolicyName: 'podTemplateAccess', + }, + ], + RoleName: 'myExecRole', + }); + }); + + test('should upload Pod Templates into the pod template bucket', () => { + template.hasResourceProperties('Custom::CDKBucketDeployment', { + SourceBucketNames: [ + { + 'Fn::Sub': 'cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}', + }, + ], + DestinationBucketName: { + Ref: Match.stringLikeRegexp('DataPlatformAssetBucket.*'), + }, + DestinationBucketKeyPrefix: { + 'Fn::Join': Match.arrayWith([ + [ + { + Ref: 'EksClusterFAB68BDB', + }, + '/pod-template', + ], + ]), + }, + Prune: true, + }); + }); + + test('should create an instance profile for Karpenter', () => { + template.hasResourceProperties('AWS::IAM::InstanceProfile', { + Path: '/', + Roles: [ + { + Ref: Match.stringLikeRegexp('.*Ec2InstanceNodeGroupRole.*'), + }, + ], + }); + }); + + test('should create a KMS key for EKS secrets', () => { + template.hasResourceProperties('AWS::KMS::Key', { + Description: 'eks-secrets-key', + EnableKeyRotation: true, + }); + }); + + test('should create a KMS key for encrypting VPC flow logs', () => { + template.hasResourceProperties('AWS::KMS::Key', { + Description: 'log-vpc-key', + EnableKeyRotation: true, + KeyPolicy: Match.objectLike({ + Statement: Match.arrayWith([ + { + Action: [ + 'kms:Encrypt*', + 'kms:Decrypt*', + 'kms:ReEncrypt*', + 'kms:GenerateDataKey*', + 'kms:Describe*', + ], + Condition: { + ArnLike: { + 'kms:EncryptionContext:aws:logs:arn': { + 'Fn::Join': [ + '', + [ + 'arn:aws:logs:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':*', + ], + ], + }, + }, + }, + Effect: 'Allow', + Principal: { + Service: { + 'Fn::Join': [ + '', + [ + 'logs.', + { + Ref: 'AWS::Region', + }, + '.amazonaws.com', + ], + ], + }, + }, + Resource: '*', + }, + ]), + }), + }); + }); + + test('should create the execution role with provided policy, podtemplate polic and IRSA setup', () => { + template.hasResourceProperties('AWS::IAM::Role', { + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRoleWithWebIdentity', + Condition: { + StringLike: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('.*IrsaConditionkey.*'), + 'Value', + ], + }, + }, + Effect: 'Allow', + Principal: { + Federated: { + Ref: Match.stringLikeRegexp('.*OpenIdConnectProvider.*'), + }, + }, + }, + ], + }), + ManagedPolicyArns: [ + { + Ref: Match.stringLikeRegexp('testPolicy.*'), + }, + ], + RoleName: 'myExecRole', + }); + }); + + template.hasResourceProperties('AWS::IAM::ManagedPolicy', { + PolicyDocument: Match.objectLike({ + Statement: Match.arrayWith([ + Match.objectLike({ + Action: 's3:GetObject', + Effect: 'Allow', + Resource: 'arn:aws:s3:::aws-data-analytics-workshop', + }), + ]), + }), + }); + + template.hasResourceProperties('Custom::AWSCDKCfnJson', { + Value: { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + 'Fn::Select': [ + 1, + { + 'Fn::Split': [ + ':oidc-provider/', + { + Ref: Match.stringLikeRegexp('.*EksClusterOpenIdConnectProvider.*'), + }, + ], + }, + ], + }, + ':sub":"system:serviceaccount:nons:emr-containers-sa-*-*-', + { + Ref: 'AWS::AccountId', + }, + ]), + ]), + }, + }); + + + test('should create a queue for managing interruption in Karpenter with proper configuration', () => { + template.hasResourceProperties('AWS::SQS::Queue', { + MessageRetentionPeriod: 300, + }); + + template.hasResourceProperties('AWS::Events::Rule', { + EventPattern: { + source: [ + 'aws.heatlh', + ], + detail: [ + 'AWS Health Event', + ], + }, + State: 'ENABLED', + Targets: [ + { + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterInterruptionQueue.*'), + 'Arn', + ], + }, + Id: 'Target0', + }, + ], + }); + + template.hasResourceProperties('AWS::Events::Rule', { + EventPattern: { + source: [ + 'aws.ec2', + ], + detail: [ + 'EC2 Instance State-change Notification', + ], + }, + State: 'ENABLED', + Targets: [ + { + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterInterruptionQueue.*'), + 'Arn', + ], + }, + Id: 'Target0', + }, + ], + }); + template.hasResourceProperties('AWS::SQS::QueuePolicy', { + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sqs:*', + Condition: { + Bool: { + 'aws:SecureTransport': 'false', + }, + }, + Effect: 'Deny', + Principal: { + AWS: '*', + }, + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterInterruptionQueue.*'), + 'Arn', + ], + }, + }, + { + Action: 'sqs:SendMessage', + Effect: 'Allow', + Principal: { + Service: [ + 'sqs.amazonaws.com', + 'events.amazonaws.com', + ], + }, + }, + { + Action: [ + 'sqs:SendMessage', + 'sqs:GetQueueAttributes', + 'sqs:GetQueueUrl', + ], + Condition: { + ArnEquals: { + 'aws:SourceArn': { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ScheduledChangeRule.*'), + 'Arn', + ], + }, + }, + }, + Effect: 'Allow', + Principal: { + Service: 'events.amazonaws.com', + }, + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterInterruptionQueue.*'), + 'Arn', + ], + }, + }, + { + Action: [ + 'sqs:SendMessage', + 'sqs:GetQueueAttributes', + 'sqs:GetQueueUrl', + ], + Condition: { + ArnEquals: { + 'aws:SourceArn': { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('InstanceStateChangeRule.*'), + 'Arn', + ], + }, + }, + }, + Effect: 'Allow', + Principal: { + Service: 'events.amazonaws.com', + }, + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterInterruptionQueue.*'), + 'Arn', + ], + }, + }, + ], + }), + Queues: [ + { + Ref: Match.stringLikeRegexp('KarpenterInterruptionQueue.*'), + }, + ], + }); + }); + + test('should configure proper security group between karpenter and interruption queue', () => { + template.hasResourceProperties('AWS::EC2::SecurityGroup', { + GroupDescription: 'security group for a karpenter instances', + Tags: Match.arrayWith([ + { + Key: 'karpenter.sh/discovery', + Value: 'data-platform', + }, + ]), + VpcId: { + Ref: Match.stringLikeRegexp('DsfVpc.*'), + }, + }); + + template.hasResourceProperties('AWS::EC2::SecurityGroupEgress', { + CidrIp: '0.0.0.0/0', + Description: 'Allow all outbound traffic by default', + GroupId: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterSg'), + 'GroupId', + ], + }, + IpProtocol: '-1', + }); + + template.hasResourceProperties('AWS::EC2::SecurityGroupIngress', { + Description: 'from KarpenterSg:ALL TRAFFIC', + GroupId: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterSg.*'), + 'GroupId', + ], + }, + IpProtocol: '-1', + SourceSecurityGroupId: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterSg.*'), + 'GroupId', + ], + }, + }); + + template.hasResourceProperties('AWS::EC2::SecurityGroupIngress', { + Description: 'from EksClusterClusterSecurityGroupD517EF5B:ALL TRAFFIC', + GroupId: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('KarpenterSg.*'), + 'GroupId', + ], + }, + IpProtocol: '-1', + SourceSecurityGroupId: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('EksCluster.*'), + 'ClusterSecurityGroupId', + ], + }, + }); + }); +}); + +describe('With DESTROY removal policy and global data removal set to TRUE, the construct ', () => { + + const emrEksClusterStack = new Stack(); + // Set context value for global data removal policy + emrEksClusterStack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', true); + + const kubectlLayer = new KubectlV27Layer(emrEksClusterStack, 'kubectlLayer'); + + const adminRole = Role.fromRoleArn(emrEksClusterStack, 'AdminRole', 'arn:aws:iam::123445678901:role/eks-admin'); + + SparkEmrContainersRuntime.getOrCreate(emrEksClusterStack, { + eksAdminRole: adminRole, + publicAccessCIDRs: ['10.0.0.0/32'], + kubectlLambdaLayer: kubectlLayer, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(emrEksClusterStack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should create a Karpenter queue with DELETE removal policy', () => { + template.hasResource('AWS::SQS::Queue', { + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }); + }); + + test('should create a pod template Bucket with DELETE removal policy', () => { + template.hasResource('AWS::S3::Bucket', { + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }); + }); + + test('should create a KMS Key for VPC flow logs with DELETE removal policy', () => { + template.hasResource('AWS::KMS::Key', { + Properties: Match.objectLike({ + Description: 'log-vpc-key', + }), + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }); + }); + + test('should create a KMS Key for EKS secrets with DELETE removal policy', () => { + template.hasResource('AWS::KMS::Key', { + Properties: Match.objectLike({ + Description: 'eks-secrets-key', + }), + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }); + }); + + test('should create a log group for VPC flow log with DELETE removal policy', () => { + template.hasResource('AWS::Logs::LogGroup', { + Properties: Match.objectLike({ + LogGroupName: '/aws/emr-eks-vpc-flow/data-platform', + }), + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }); + }); +}); + +describe('With DESTROY removal policy and global data removal unset, the construct ', () => { + + const emrEksClusterStack = new Stack(); + + const kubectlLayer = new KubectlV27Layer(emrEksClusterStack, 'kubectlLayer'); + + const adminRole = Role.fromRoleArn(emrEksClusterStack, 'AdminRole', 'arn:aws:iam::123445678901:role/eks-admin'); + + SparkEmrContainersRuntime.getOrCreate(emrEksClusterStack, { + eksAdminRole: adminRole, + publicAccessCIDRs: ['10.0.0.0/32'], + kubectlLambdaLayer: kubectlLayer, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(emrEksClusterStack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should create a Karpenter queue with RETAIN removal policy', () => { + template.hasResource('AWS::SQS::Queue', { + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }); + }); + + test('should create a pod template Bucket with RETAIN removal policy', () => { + template.hasResource('AWS::S3::Bucket', { + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }); + }); + + test('should create a KMS Key for VPC flow logs with RETAIN removal policy', () => { + template.hasResource('AWS::KMS::Key', { + Properties: Match.objectLike({ + Description: 'log-vpc-key', + }), + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }); + }); + + test('should create a KMS Key for EKS secrets with RETAIN removal policy', () => { + template.hasResource('AWS::KMS::Key', { + Properties: Match.objectLike({ + Description: 'eks-secrets-key', + }), + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }); + }); + + test('should create a log group for VPC flow log with RETAIN removal policy', () => { + template.hasResource('AWS::Logs::LogGroup', { + Properties: Match.objectLike({ + LogGroupName: '/aws/emr-eks-vpc-flow/data-platform', + }), + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }); + }); +}); + +describe('With provided EKS cluster, the construct ', () => { + + const emrEksClusterStack = new Stack(); + + const kubectlLayer = new KubectlV27Layer(emrEksClusterStack, 'kubectlLayer'); + + const adminRole = Role.fromRoleArn(emrEksClusterStack, 'AdminRole', 'arn:aws:iam::123445678901:role/eks-admin'); + + const cluster = new Cluster(emrEksClusterStack, 'Cluster', { + clusterName: 'myName', + version: KubernetesVersion.V1_28, + }); + + SparkEmrContainersRuntime.getOrCreate(emrEksClusterStack, { + eksCluster: cluster, + eksClusterName: cluster.clusterName, + eksAdminRole: adminRole, + publicAccessCIDRs: ['10.0.0.0/32'], + kubectlLambdaLayer: kubectlLayer, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(emrEksClusterStack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should not create any VPC or any EKS Cluster', () => { + template.resourceCountIs('Custom::AWSCDK-EKS-Cluster', 1); + template.resourceCountIs('AWS::EC2::VPC', 1); + template.resourceCountIs('AWS::EC2::Subnet', 4); + template.resourceCountIs('AWS::EC2::RouteTable', 4); + template.resourceCountIs('AWS::EC2::SubnetRouteTableAssociation', 4); + template.resourceCountIs('AWS::EC2::Route', 4); + template.resourceCountIs('AWS::EC2::EIP', 2); + template.resourceCountIs('AWS::EC2::NatGateway', 2); + template.resourceCountIs('AWS::EC2::InternetGateway', 1); + template.resourceCountIs('AWS::EC2::VPCGatewayAttachment', 1); + template.resourceCountIs('AWS::EC2::FlowLog', 0); + template.resourceCountIs('AWS::EC2::VPCEndpoint', 0); + template.resourceCountIs('AWS::Logs::LogGroup', 0); + }); + + test('should not configure the cluster with cert managed, EBS CSI driver and Karpenter', () => { + template.resourcePropertiesCountIs('Custom::AWSCDK-EKS-HelmChart', { + Chart: 'cert-manager', + Repository: 'https://charts.jetstack.io', + Namespace: 'cert-manager', + }, 0); + template.resourcePropertiesCountIs('Custom::AWSCDK-EKS-HelmChart', { + Chart: 'aws-load-balancer-controller', + Repository: 'https://aws.github.io/eks-charts', + Namespace: 'kube-system', + }, 0); + template.resourcePropertiesCountIs('AWS::EKS::Addon', { + AddonName: 'aws-ebs-csi-driver', + AddonVersion: 'v1.24.1-eksbuild.1', + ClusterName: { + Ref: Match.stringLikeRegexp('EksCluster.*'), + }, + ResolveConflicts: 'OVERWRITE', + ServiceAccountRoleArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('.*EbsCsiDriverSaRole.*'), + 'Arn', + ], + }, + }, 0); + }); + +}); diff --git a/framework/yarn.lock b/framework/yarn.lock index 593953e96..0471f0abb 100644 --- a/framework/yarn.lock +++ b/framework/yarn.lock @@ -975,6 +975,11 @@ expect "^29.0.0" pretty-format "^29.0.0" +"@types/js-yaml@^4.0.9": + version "4.0.9" + resolved "https://registry.yarnpkg.com/@types/js-yaml/-/js-yaml-4.0.9.tgz#cd82382c4f902fed9691a2ed79ec68c5898af4c2" + integrity sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg== + "@types/json-schema@^7.0.12": version "7.0.13" resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.13.tgz#02c24f4363176d2d18fc8b70b9f3c54aba178a85" diff --git a/package.json b/package.json index 7e5e267c6..a1b04322d 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ }, "devDependencies": { "@types/node": "16.0.0", + "glob": "^10.3.6", "lerna": "^7.1.5", "lerna-projen": "^0.1.200", "projen": "^0.72.20", @@ -34,6 +35,9 @@ "peerDependencies": { "@types/node": "^16" }, + "resolutions": { + "wide-align": "1.1.5" + }, "main": "lib/index.js", "license": "MIT-0", "homepage": "https://awslabs.github.io/data-solutions-framework-on-aws/", diff --git a/website/docs/constructs/library/03-Processing/01-spark-emr-serverless-runtime.mdx b/website/docs/constructs/library/03-Processing/01-spark-emr-serverless-runtime.mdx index 82e4e0a10..0860e0ae0 100644 --- a/website/docs/constructs/library/03-Processing/01-spark-emr-serverless-runtime.mdx +++ b/website/docs/constructs/library/03-Processing/01-spark-emr-serverless-runtime.mdx @@ -1,5 +1,5 @@ --- -sidebar_position: 6 +sidebar_position: 1 sidebar_label: Spark EMR Serverless Runtime --- diff --git a/website/docs/constructs/library/03-Processing/02-spark-emr-containers-runtime.mdx b/website/docs/constructs/library/03-Processing/02-spark-emr-containers-runtime.mdx new file mode 100644 index 000000000..6f8751f34 --- /dev/null +++ b/website/docs/constructs/library/03-Processing/02-spark-emr-containers-runtime.mdx @@ -0,0 +1,9 @@ +--- +sidebar_position: 2 +sidebar_label: Spark EMR Containers Runtime +--- + +import GeneratedCode from '../generated/_processing-spark-emr-runtime-containers.mdx' + +# Spark EMR Containers Runtime + \ No newline at end of file diff --git a/website/docs/constructs/library/03-Processing/03-spark-emr-serverless-job.mdx b/website/docs/constructs/library/03-Processing/03-spark-emr-serverless-job.mdx index 941ac1062..e49bcf535 100644 --- a/website/docs/constructs/library/03-Processing/03-spark-emr-serverless-job.mdx +++ b/website/docs/constructs/library/03-Processing/03-spark-emr-serverless-job.mdx @@ -1,5 +1,5 @@ --- -sidebar_position: 7 +sidebar_position: 3 sidebar_label: Spark Job --- diff --git a/website/docs/constructs/library/03-Processing/04-pyspark-application-package.mdx b/website/docs/constructs/library/03-Processing/04-pyspark-application-package.mdx index d6f0a2d90..191153297 100644 --- a/website/docs/constructs/library/03-Processing/04-pyspark-application-package.mdx +++ b/website/docs/constructs/library/03-Processing/04-pyspark-application-package.mdx @@ -1,5 +1,5 @@ --- -sidebar_position: 9 +sidebar_position: 4 sidebar_label: PySpark Application Package --- diff --git a/website/docs/constructs/library/03-Processing/05-spark-cicd-pipeline.mdx b/website/docs/constructs/library/03-Processing/05-spark-cicd-pipeline.mdx index bc3dd25f7..7912db28d 100644 --- a/website/docs/constructs/library/03-Processing/05-spark-cicd-pipeline.mdx +++ b/website/docs/constructs/library/03-Processing/05-spark-cicd-pipeline.mdx @@ -1,5 +1,5 @@ --- -sidebar_position: 8 +sidebar_position: 5 sidebar_label: Spark CICD Pipeline --- diff --git a/website/docs/constructs/library/generated/_processing-spark-emr-runtime-containers.mdx b/website/docs/constructs/library/generated/_processing-spark-emr-runtime-containers.mdx new file mode 100644 index 000000000..f260068dd --- /dev/null +++ b/website/docs/constructs/library/generated/_processing-spark-emr-runtime-containers.mdx @@ -0,0 +1,127 @@ +[//]: # (This file is generated, do not modify directly, update the README.md in framework/src/processing) +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +A construct to deploy an EKS cluster and enable it for EMR on EKS use. + +## Overview + +The constructs creates an EKS cluster, install the necessary controllers and enable it the be used by EMR on EKS service as described in this [documentation](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/setting-up-cluster-access.html). The following are the details of the components deployed. + + * An EKS cluster (VPC configuration can be customized) + * A tooling nodegroup to run tools to run controllers + * Kubernetes controlers: EBS CSI Driver, Karpenter, ALB Ingress Controller, cert-manager + * Optionally Default Kaprenter NodePools and EC2NodeClass as listed [here](https://github.com/awslabs/data-solutions-framework-on-aws/tree/main/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config). + +The construct will upload on S3 the Pod templates required to run EMR jobs on the default Kaprenter NodePools and EC2NodeClass. It will also parse and store the configuration of EMR on EKS jobs for each default nodegroup in object parameters. + + +## Usage + +The code snippet below shows a usage example of the `SparkEmrContainersRuntime` construct. + + + + + ```typescript +class ExampleSparkEmrContainersStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + //Layer must be changed according to the Kubernetes version used + const kubectlLayer = new KubectlV27Layer(this, 'kubectlLayer'); + + // creation of the construct(s) under test + const emrEksCluster = SparkEmrContainersRuntime.getOrCreate(this, { + eksAdminRole: Role.fromRoleArn(this, 'EksAdminRole' , 'arn:aws:iam::12345678912:role/role-name-with-path'), + publicAccessCIDRs: ['10.0.0.0/32'], + createEmrOnEksServiceLinkedRole: true, + kubectlLambdaLayer: kubectlLayer, + }); + + const s3Read = new PolicyDocument({ + statements: [new PolicyStatement({ + actions: [ + 's3:GetObject', + ], + resources: ['arn:aws:s3:::aws-data-analytics-workshop'], + })], + }); + + const s3ReadPolicy = new ManagedPolicy(this, 's3ReadPolicy', { + document: s3Read, + }); + + const virtualCluster = emrEksCluster.addEmrVirtualCluster(this, { + name: 'e2e', + createNamespace: true, + eksNamespace: 'e2ens', + }); + + const execRole = emrEksCluster.createExecutionRole(this, 'ExecRole', s3ReadPolicy, 'e2ens', 's3ReadExecRole'); + + new cdk.CfnOutput(this, 'virtualClusterArn', { + value: virtualCluster.attrArn, + }); + + new cdk.CfnOutput(this, 'execRoleArn', { + value: execRole.roleArn, + }); + + } +} + ``` + + ```mdx-code-block + + + + + ```python +class ExampleSparkEmrContainersStack(cdk.Stack): + def __init__(self, scope, id): + super().__init__(scope, id) + + # Layer must be changed according to the Kubernetes version used + kubectl_layer = KubectlV27Layer(self, "kubectlLayer") + + # creation of the construct(s) under test + emr_eks_cluster = SparkEmrContainersRuntime.get_or_create(self, + eks_admin_role=Role.from_role_arn(self, "EksAdminRole", "arn:aws:iam::12345678912:role/role-name-with-path"), + public_access_cIDRs=["10.0.0.0/32"], + create_emr_on_eks_service_linked_role=True, + kubectl_lambda_layer=kubectl_layer + ) + + s3_read = PolicyDocument( + statements=[PolicyStatement( + actions=["s3:GetObject" + ], + resources=["arn:aws:s3:::aws-data-analytics-workshop"] + )] + ) + + s3_read_policy = ManagedPolicy(self, "s3ReadPolicy", + document=s3_read + ) + + virtual_cluster = emr_eks_cluster.add_emr_virtual_cluster(self, + name="e2e", + create_namespace=True, + eks_namespace="e2ens" + ) + + exec_role = emr_eks_cluster.create_execution_role(self, "ExecRole", s3_read_policy, "e2ens", "s3ReadExecRole") + + cdk.CfnOutput(self, "virtualClusterArn", + value=virtual_cluster.attr_arn + ) + + cdk.CfnOutput(self, "execRoleArn", + value=exec_role.role_arn + ) + ``` + + + + diff --git a/yarn.lock b/yarn.lock index 5feded094..0320cd78a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1677,6 +1677,17 @@ glob@^10.2.2: minipass "^5.0.0 || ^6.0.2 || ^7.0.0" path-scurry "^1.10.1" +glob@^10.3.6: + version "10.3.10" + resolved "https://registry.yarnpkg.com/glob/-/glob-10.3.10.tgz#0351ebb809fd187fe421ab96af83d3a70715df4b" + integrity sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g== + dependencies: + foreground-child "^3.1.0" + jackspeak "^2.3.5" + minimatch "^9.0.1" + minipass "^5.0.0 || ^6.0.2 || ^7.0.0" + path-scurry "^1.10.1" + glob@^7.0.0, glob@^7.1.3, glob@^7.1.4: version "7.2.3" resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b" @@ -2100,6 +2111,15 @@ jackspeak@^2.0.3: optionalDependencies: "@pkgjs/parseargs" "^0.11.0" +jackspeak@^2.3.5: + version "2.3.6" + resolved "https://registry.yarnpkg.com/jackspeak/-/jackspeak-2.3.6.tgz#647ecc472238aee4b06ac0e461acc21a8c505ca8" + integrity sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ== + dependencies: + "@isaacs/cliui" "^8.0.2" + optionalDependencies: + "@pkgjs/parseargs" "^0.11.0" + jake@^10.8.5: version "10.8.7" resolved "https://registry.yarnpkg.com/jake/-/jake-10.8.7.tgz#63a32821177940c33f356e0ba44ff9d34e1c7d8f" @@ -4022,7 +4042,7 @@ which@^3.0.0: dependencies: isexe "^2.0.0" -wide-align@^1.1.5: +wide-align@1.1.5, wide-align@^1.1.5: version "1.1.5" resolved "https://registry.yarnpkg.com/wide-align/-/wide-align-1.1.5.tgz#df1d4c206854369ecf3c9a4898f1b23fbd9d15d3" integrity sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg== @@ -4035,6 +4055,7 @@ wordwrap@^1.0.0: integrity sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q== "wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: + name wrap-ansi-cjs version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==