Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Support longer runner image build timeouts #543

Merged
merged 2 commits into from
Apr 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/image-builders/aws-image-builder/deprecated/container.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { WindowsComponents } from './windows-components';
import { Architecture, Os, RunnerAmi, RunnerImage, RunnerVersion } from '../../../providers';
import { singletonLambda } from '../../../utils';
import { BuildImageFunction } from '../../build-image-function';
import { BuildImageFunctionProperties } from '../../build-image.lambda';
import { uniqueImageBuilderName } from '../../common';
import { ImageBuilderComponent } from '../builder';
import { ContainerRecipe } from '../container';
Expand Down Expand Up @@ -323,7 +324,7 @@ export class ContainerImageBuilder extends ImageBuilderBase {
const cr = new CustomResource(this, 'Deleter', {
serviceToken: crHandler.functionArn,
resourceType: 'Custom::ImageDeleter',
properties: {
properties: <BuildImageFunctionProperties>{
RepoName: this.repository.repositoryName,
ImageBuilderName: recipeName, // we don't use image.name because CloudFormation complains if it was deleted already
DeleteOnly: true,
Expand Down
51 changes: 22 additions & 29 deletions src/image-builders/build-image.lambda.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,55 +11,48 @@ import { customResourceRespond } from '../lambda-helpers';
const codebuild = new CodeBuildClient();
const ib = new ImagebuilderClient();

/**
* @internal
*/
export interface BuildImageFunctionProperties {
ServiceToken: string;
DeleteOnly?: boolean;
RepoName: string;
ProjectName: string;
ImageBuilderName?: string;
WaitHandle?: string;
}

export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent, context: AWSLambda.Context) {
try {
console.log(JSON.stringify({ ...event, ResponseURL: '...' }));

const deleteOnly = event.ResourceProperties.DeleteOnly as boolean | undefined;
const projectName = event.ResourceProperties.ProjectName;
const ibName = event.ResourceProperties.ImageBuilderName as string | undefined;

// let physicalResourceId: string;
// let data: { [key: string]: string } = {};
const props = event.ResourceProperties as BuildImageFunctionProperties;

switch (event.RequestType) {
case 'Create':
case 'Update':
if (deleteOnly) {
if (props.DeleteOnly) {
await customResourceRespond(event, 'SUCCESS', 'OK', 'Deleter', {});
break;
}

console.log(`Starting CodeBuild project ${projectName}`);
await codebuild.send(new StartBuildCommand({
projectName,
console.log(`Starting CodeBuild project ${props.ProjectName}`);
const cbRes = await codebuild.send(new StartBuildCommand({
projectName: props.ProjectName,
environmentVariablesOverride: [
{
type: 'PLAINTEXT',
name: 'STACK_ID',
value: event.StackId,
},
{
type: 'PLAINTEXT',
name: 'REQUEST_ID',
value: event.RequestId,
},
{
type: 'PLAINTEXT',
name: 'LOGICAL_RESOURCE_ID',
value: event.LogicalResourceId,
},
{
type: 'PLAINTEXT',
name: 'RESPONSE_URL',
value: event.ResponseURL,
name: 'WAIT_HANDLE',
value: props.WaitHandle!,
},
],
}));
await customResourceRespond(event, 'SUCCESS', 'OK', cbRes.build?.id ?? 'build', {});
break;
case 'Delete':
if (ibName) {
const ibImages = await ib.send(new ListIbImagesCommand({ filters: [{ name: 'name', values: [ibName] }] }));
if (props.ImageBuilderName) {
const ibImages = await ib.send(new ListIbImagesCommand({ filters: [{ name: 'name', values: [props.ImageBuilderName] }] }));
if (ibImages.imageVersionList) {
for (const v of ibImages.imageVersionList) {
if (v.arn) {
Expand Down
65 changes: 42 additions & 23 deletions src/image-builders/codebuild.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import * as crypto from 'node:crypto';
import * as cdk from 'aws-cdk-lib';
import {
Annotations,
aws_cloudformation as cloudformation,
aws_codebuild as codebuild,
aws_ec2 as ec2,
aws_ecr as ecr,
Expand All @@ -20,6 +22,7 @@ import { RetentionDays } from 'aws-cdk-lib/aws-logs';
import { Construct, IConstruct } from 'constructs';
import { defaultBaseDockerImage } from './aws-image-builder';
import { BuildImageFunction } from './build-image-function';
import { BuildImageFunctionProperties } from './build-image.lambda';
import { RunnerImageBuilderBase, RunnerImageBuilderProps } from './common';
import { Architecture, Os, RunnerAmi, RunnerImage, RunnerVersion } from '../providers';
import { singletonLambda } from '../utils';
Expand Down Expand Up @@ -99,6 +102,11 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
'See https://github.com/aws/containers-roadmap/issues/1160');
}

// check timeout
if (this.timeout.toSeconds() > Duration.hours(8).toSeconds()) {
Annotations.of(this).addError('CodeBuild runner image builder timeout must 8 hours or less.');
}

// create service role for CodeBuild
this.role = new iam.Role(this, 'Role', {
assumedBy: new iam.ServicePrincipal('codebuild.amazonaws.com'),
Expand Down Expand Up @@ -146,7 +154,7 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
);

// generate buildSpec
const buildSpec = this.getBuildSpec(this.repository);
const [buildSpec, buildSpecHash] = this.getBuildSpec(this.repository);

// create CodeBuild project that builds Dockerfile and pushes to repository
const project = new codebuild.Project(this, 'CodeBuild', {
Expand All @@ -173,7 +181,7 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
this.repository.grantPullPush(project);

// call CodeBuild during deployment
const cr = this.customResource(project, buildSpec.toBuildSpec());
const completedImage = this.customResource(project, buildSpecHash);

// rebuild image on a schedule
this.rebuildImageOnSchedule(project, this.rebuildInterval);
Expand All @@ -186,7 +194,7 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
os: this.os,
logGroup,
runnerVersion: RunnerVersion.specific('unknown'),
_dependable: cr.getAttString('Random'),
_dependable: completedImage,
};
return this.boundDockerImage;
}
Expand Down Expand Up @@ -253,7 +261,7 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
return commands;
}

private getBuildSpec(repository: ecr.Repository): codebuild.BuildSpec {
private getBuildSpec(repository: ecr.Repository): [codebuild.BuildSpec, string] {
const thisStack = cdk.Stack.of(this);

let archUrl;
Expand All @@ -265,16 +273,19 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
throw new Error(`Unsupported architecture for required CodeBuild: ${this.architecture.name}`);
}

return codebuild.BuildSpec.fromObject({
const commands = this.getDockerfileGenerationCommands();

const buildSpecVersion = 'v1'; // change this every time the build spec changes
const hashedComponents = commands.concat(buildSpecVersion, this.architecture.name, this.baseImage, this.os.name);
const hash = crypto.createHash('md5').update(hashedComponents.join('\n')).digest('hex').slice(0, 10);

const buildSpec = codebuild.BuildSpec.fromObject({
version: '0.2',
env: {
variables: {
REPO_ARN: repository.repositoryArn,
REPO_URI: repository.repositoryUri,
STACK_ID: 'unspecified',
REQUEST_ID: 'unspecified',
LOGICAL_RESOURCE_ID: 'unspecified',
RESPONSE_URL: 'unspecified',
WAIT_HANDLE: 'unspecified',
BASH_ENV: 'codebuild-log.sh',
},
shell: 'bash',
Expand All @@ -287,30 +298,27 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
],
},
build: {
commands: this.getDockerfileGenerationCommands().concat(
commands: commands.concat(
'docker build --progress plain . -t "$REPO_URI"',
'docker push "$REPO_URI"',
),
},
post_build: {
commands: [
'rm -f codebuild-log.sh && STATUS="SUCCESS"',
'if [ $CODEBUILD_BUILD_SUCCEEDING -ne 1 ]; then STATUS="FAILED"; fi',
'if [ $CODEBUILD_BUILD_SUCCEEDING -ne 1 ]; then STATUS="FAILURE"; fi',
'cat <<EOF > /tmp/payload.json\n' +
'{\n' +
' "StackId": "$STACK_ID",\n' +
' "RequestId": "$REQUEST_ID",\n' +
' "LogicalResourceId": "$LOGICAL_RESOURCE_ID",\n' +
' "PhysicalResourceId": "$REPO_ARN",\n' +
' "Status": "$STATUS",\n' +
' "UniqueId": "build",\n' +
// we remove non-printable characters from the log because CloudFormation doesn't like them
// https://github.com/aws-cloudformation/cloudformation-coverage-roadmap/issues/1601
' "Reason": `sed \'s/[^[:print:]]//g\' /tmp/codebuild.log | tail -c 400 | jq -Rsa .`,\n' +
// for lambda always get a new value because there is always a new image hash
' "Data": {"Random": "$RANDOM"}\n' +
' "Data": "$RANDOM"\n' +
'}\n' +
'EOF',
'if [ "$RESPONSE_URL" != "unspecified" ]; then jq . /tmp/payload.json; curl -fsSL -X PUT -H "Content-Type:" -d "@/tmp/payload.json" "$RESPONSE_URL"; fi',
'if [ "$WAIT_HANDLE" != "unspecified" ]; then jq . /tmp/payload.json; curl -fsSL -X PUT -H "Content-Type:" -d "@/tmp/payload.json" "$WAIT_HANDLE"; fi',
// generate and push soci index
// we do this after finishing the build, so we don't have to wait. it's also not required, so it's ok if it fails
'docker rmi "$REPO_URI"', // it downloads the image again to /tmp, so save on space
Expand All @@ -321,9 +329,11 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
},
},
});

return [buildSpec, hash];
}

private customResource(project: codebuild.Project, buildSpec: string) {
private customResource(project: codebuild.Project, buildSpecHash: string) {
const crHandler = singletonLambda(BuildImageFunction, this, 'build-image', {
description: 'Custom resource handler that triggers CodeBuild to build runner images, and cleans-up images on deletion',
timeout: cdk.Duration.minutes(3),
Expand All @@ -340,15 +350,24 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
});
crHandler.role!.attachInlinePolicy(policy);

// Wait handle lets us wait for longer than an hour for the image build to complete.
// We generate a new wait handle for build spec changes to guarantee a new image is built.
// This also helps make sure the changes are good. If they have a bug, the deployment will fail instead of just the scheduled build.
// Finally, it's recommended by CloudFormation docs to not reuse wait handles or old responses may interfere in some cases.
const handle = new cloudformation.CfnWaitConditionHandle(this, `Build Wait Handle ${buildSpecHash}`);
const wait = new cloudformation.CfnWaitCondition(this, `Build Wait ${buildSpecHash}`, {
handle: handle.ref,
timeout: this.timeout.toSeconds().toString(), // don't wait longer than the build timeout
count: 1,
});

const cr = new CustomResource(this, 'Builder', {
serviceToken: crHandler.functionArn,
resourceType: 'Custom::ImageBuilder',
properties: {
properties: <BuildImageFunctionProperties>{
RepoName: this.repository.repositoryName,
ProjectName: project.projectName,
// We include the full buildSpec so the image is built immediately on changes, and we don't have to wait for its scheduled build.
// This also helps make sure the changes are good. If they have a bug, the deployment will fail instead of just the scheduled build.
BuildSpec: buildSpec,
WaitHandle: handle.ref,
},
});

Expand All @@ -359,7 +378,7 @@ export class CodeBuildRunnerImageBuilder extends RunnerImageBuilderBase {
cr.node.addDependency(crHandler.role!);
cr.node.addDependency(crHandler);

return cr;
return wait.ref; // user needs to wait on wait handle which is triggered when the image is built
}

private rebuildImageOnSchedule(project: codebuild.Project, rebuildInterval?: Duration) {
Expand Down
10 changes: 5 additions & 5 deletions test/default.integ.snapshot/github-runners-test.assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
}
}
},
"2dd30d0564f58d0d86550d44455883dabdababcb520c404817cd9193ac0a5161": {
"5d536d3909907c631aa15996d7eee0e12247f88312d888640b63f34a352c5fa4": {
"source": {
"path": "asset.2dd30d0564f58d0d86550d44455883dabdababcb520c404817cd9193ac0a5161.lambda",
"path": "asset.5d536d3909907c631aa15996d7eee0e12247f88312d888640b63f34a352c5fa4.lambda",
"packaging": "zip"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "2dd30d0564f58d0d86550d44455883dabdababcb520c404817cd9193ac0a5161.zip",
"objectKey": "5d536d3909907c631aa15996d7eee0e12247f88312d888640b63f34a352c5fa4.zip",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down Expand Up @@ -235,15 +235,15 @@
}
}
},
"e0a2b1d424418d59ce339e3fc6ec13297efe609e1c74984a369f871a521ac8ad": {
"9c5b39955d7bbbb61208adf216865db8d7012ef357025a8c6afbcbfcd02d8609": {
"source": {
"path": "github-runners-test.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "e0a2b1d424418d59ce339e3fc6ec13297efe609e1c74984a369f871a521ac8ad.json",
"objectKey": "9c5b39955d7bbbb61208adf216865db8d7012ef357025a8c6afbcbfcd02d8609.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Loading
Loading