From dc78e2af30f6a502e53db2d10519b5d473663f10 Mon Sep 17 00:00:00 2001 From: Zain Rizvi Date: Fri, 18 Oct 2024 10:21:24 -0500 Subject: [PATCH] Enable non-org autoscaler to read scale-config from any repo (#5767) Enable putting scale-config in test-infra, even for non-organization runners. Before, only scale-configs for organization-scoped runners could be put in arbitrary repos Significant change: - `scale_config_repo` no longer defaults to test-infra. You now _have_ to specify a value if you're using org runners. Non-org runners will default to the job's repo. pytorch-gha-infra and ci-infra will need to be updated accordingly. Testing: Deployed these changes to LF canary, terminated all lf-c instances, and verified that creating a new pull request there would still result in instances getting provisioned --- .../runners/src/scale-runners/config.test.ts | 10 +- .../runners/src/scale-runners/config.ts | 5 +- .../src/scale-runners/scale-down.test.ts | 130 +++++++++++++----- .../runners/src/scale-runners/scale-down.ts | 2 +- .../src/scale-runners/scale-up.test.ts | 41 ++++++ .../runners/src/scale-runners/scale-up.ts | 2 +- .../modules/runners/scale-down.tf | 10 ++ .../modules/runners/scale-up.tf | 10 ++ terraform-aws-github-runner/variables.tf | 4 +- 9 files changed, 174 insertions(+), 40 deletions(-) diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.test.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.test.ts index 761ed42bbf..d5f06bb2d4 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.test.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.test.ts @@ -193,10 +193,18 @@ describe('Config', () => { expect(Config.Instance.mustHaveIssuesLabels).toEqual([]); expect(Config.Instance.runnerGroupName).toBeUndefined(); expect(Config.Instance.runnersExtraLabels).toBeUndefined(); - expect(Config.Instance.scaleConfigRepo).toEqual('test-infra'); + expect(Config.Instance.scaleConfigRepo).toEqual(''); expect(Config.Instance.scaleConfigRepoPath).toEqual('.github/scale-config.yml'); expect(Config.Instance.secretsManagerSecretsId).toBeUndefined(); expect(Config.Instance.shuffledAwsRegionInstances).toEqual([]); expect(Config.Instance.enableOrganizationRunners).toBeFalsy(); }); + + it('requires scaleConfigRepo to be set when organization runners are enabled', () => { + Config.resetConfig(); + process.env.ENABLE_ORGANIZATION_RUNNERS = 'True'; + process.env.SCALE_CONFIG_REPO = ''; + + expect(() => Config.Instance).toThrowError('SCALE_CONFIG_REPO is required when ENABLE_ORGANIZATION_RUNNERS is set'); + }); }); diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.ts index aa98bf706e..68fbfb00ba 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.ts @@ -97,7 +97,10 @@ export class Config { this.runnerGroupName = process.env.RUNNER_GROUP_NAME; this.runnersExtraLabels = process.env.RUNNER_EXTRA_LABELS; /* istanbul ignore next */ - this.scaleConfigRepo = process.env.SCALE_CONFIG_REPO || 'test-infra'; + this.scaleConfigRepo = process.env.SCALE_CONFIG_REPO || ''; + if (this.enableOrganizationRunners && !this.scaleConfigRepo) { + throw new Error('SCALE_CONFIG_REPO is required when ENABLE_ORGANIZATION_RUNNERS is set'); + } this.scaleConfigRepoPath = process.env.SCALE_CONFIG_REPO_PATH || '.github/scale-config.yml'; this.secretsManagerSecretsId = process.env.SECRETSMANAGER_SECRETS_ID; this.sSMParamCleanupAgeDays = Number(process.env.SSM_PARAM_CLEANUP_AGE_DAYS || '7'); diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts index 6e6712bbb1..4025a6929c 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts @@ -1257,57 +1257,119 @@ describe('scale-down', () => { describe('repo runners', () => { const runnerType = 'runnerTypeDef'; const owner = 'the-org'; - const repo: Repo = { + const runnerRepo: Repo = { owner: owner, repo: 'a-repo', }; - const repoKey = `${owner}/a-repo`; + const runnerRepoKey = `${owner}/a-repo`; - beforeEach(() => { - jest.spyOn(Config, 'Instance', 'get').mockImplementation( - () => - ({ - ...baseConfig, - enableOrganizationRunners: false, - } as unknown as Config), - ); - }); + describe('When no scaleConfigRepo is set, the runner repo is used as the source for scale config', () => { + beforeEach(() => { + jest.spyOn(Config, 'Instance', 'get').mockImplementation( + () => + ({ + ...baseConfig, + enableOrganizationRunners: false, + } as unknown as Config), + ); + }); - it('is_ephemeral === undefined', async () => { - const mockedGetRunnerTypes = mocked(getRunnerTypes); + it('is_ephemeral === undefined', async () => { + const mockedGetRunnerTypes = mocked(getRunnerTypes); - mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, {} as RunnerType]])); + mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, {} as RunnerType]])); - expect(await isEphemeralRunner({ runnerType: runnerType, repo: repoKey } as RunnerInfo, metrics)).toEqual( - false, - ); + expect( + await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics), + ).toEqual(false); - expect(mockedGetRunnerTypes).toBeCalledTimes(1); - expect(mockedGetRunnerTypes).toBeCalledWith(repo, metrics); - }); + expect(mockedGetRunnerTypes).toBeCalledTimes(1); + expect(mockedGetRunnerTypes).toBeCalledWith(runnerRepo, metrics); + }); - it('is_ephemeral === true', async () => { - const mockedGetRunnerTypes = mocked(getRunnerTypes); + it('is_ephemeral === true', async () => { + const mockedGetRunnerTypes = mocked(getRunnerTypes); - mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: true } as RunnerType]])); + mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: true } as RunnerType]])); - expect(await isEphemeralRunner({ runnerType: runnerType, repo: repoKey } as RunnerInfo, metrics)).toEqual(true); + expect( + await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics), + ).toEqual(true); - expect(mockedGetRunnerTypes).toBeCalledTimes(1); - expect(mockedGetRunnerTypes).toBeCalledWith(repo, metrics); + expect(mockedGetRunnerTypes).toBeCalledTimes(1); + expect(mockedGetRunnerTypes).toBeCalledWith(runnerRepo, metrics); + }); + + it('is_ephemeral === false', async () => { + const mockedGetRunnerTypes = mocked(getRunnerTypes); + + mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: false } as RunnerType]])); + + expect( + await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics), + ).toEqual(false); + + expect(mockedGetRunnerTypes).toBeCalledTimes(1); + expect(mockedGetRunnerTypes).toBeCalledWith(runnerRepo, metrics); + }); }); - it('is_ephemeral === false', async () => { - const mockedGetRunnerTypes = mocked(getRunnerTypes); + describe("When a scaleConfigRepo is set, it's used as the source for scale config", () => { + const centralRepoName = 'central-repo'; // to be the test-infra equivalent + const centralRepo: Repo = { + owner: owner, + repo: centralRepoName, + }; - mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: false } as RunnerType]])); + beforeEach(() => { + jest.spyOn(Config, 'Instance', 'get').mockImplementation( + () => + ({ + ...baseConfig, + enableOrganizationRunners: false, + scaleConfigRepo: centralRepoName, + } as unknown as Config), + ); + }); - expect(await isEphemeralRunner({ runnerType: runnerType, repo: repoKey } as RunnerInfo, metrics)).toEqual( - false, - ); + it('is_ephemeral === undefined', async () => { + const mockedGetRunnerTypes = mocked(getRunnerTypes); - expect(mockedGetRunnerTypes).toBeCalledTimes(1); - expect(mockedGetRunnerTypes).toBeCalledWith(repo, metrics); + mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, {} as RunnerType]])); + + expect( + await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics), + ).toEqual(false); + + expect(mockedGetRunnerTypes).toBeCalledTimes(1); + expect(mockedGetRunnerTypes).toBeCalledWith(centralRepo, metrics); + }); + + it('is_ephemeral === true', async () => { + const mockedGetRunnerTypes = mocked(getRunnerTypes); + + mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: true } as RunnerType]])); + + expect( + await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics), + ).toEqual(true); + + expect(mockedGetRunnerTypes).toBeCalledTimes(1); + expect(mockedGetRunnerTypes).toBeCalledWith(centralRepo, metrics); + }); + + it('is_ephemeral === false', async () => { + const mockedGetRunnerTypes = mocked(getRunnerTypes); + + mockedGetRunnerTypes.mockResolvedValueOnce(new Map([[runnerType, { is_ephemeral: false } as RunnerType]])); + + expect( + await isEphemeralRunner({ runnerType: runnerType, repo: runnerRepoKey } as RunnerInfo, metrics), + ).toEqual(false); + + expect(mockedGetRunnerTypes).toBeCalledTimes(1); + expect(mockedGetRunnerTypes).toBeCalledWith(centralRepo, metrics); + }); }); }); }); diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index 73d82550ad..2f49ad66fc 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -373,7 +373,7 @@ export async function isEphemeralRunner(ec2runner: RunnerInfo, metrics: ScaleDow } const repo: Repo = (() => { - if (Config.Instance.enableOrganizationRunners) { + if (Config.Instance.scaleConfigRepo) { return { owner: ec2runner.org !== undefined ? (ec2runner.org as string) : getRepo(ec2runner.repo as string).owner, repo: Config.Instance.scaleConfigRepo, diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.test.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.test.ts index 34f5dfafb7..18f7130f07 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.test.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.test.ts @@ -87,6 +87,47 @@ describe('scaleUp', () => { expect(mockedListGithubRunners).not.toBeCalled(); }); + it('uses the scaleConfigRepo when provided', async () => { + jest.spyOn(Config, 'Instance', 'get').mockImplementation( + () => + ({ + ...baseCfg, + enableOrganizationRunners: false, + scaleConfigRepo: 'scale-config-repo', + } as unknown as Config), + ); + const payload = { + id: 10, + eventType: 'event', + repositoryName: 'repo', + repositoryOwner: 'owner', + installationId: 2, + runnerLabels: ['label1', 'label2'], + }; + const mockedGetRunnerTypes = mocked(getRunnerTypes).mockResolvedValue( + new Map([ + [ + 'label1-nomatch', + { + instance_type: 'instance_type', + os: 'os', + max_available: 33, + disk_size: 113, + runnerTypeName: 'runnerTypeName', + is_ephemeral: false, + }, + ], + ]), + ); + const mockedListGithubRunners = mocked(listGithubRunnersRepo); + + await scaleUp('aws:sqs', payload, metrics); + + expect(mockedGetRunnerTypes).toBeCalledTimes(1); + expect(mockedGetRunnerTypes).toBeCalledWith({ repo: 'scale-config-repo', owner: 'owner' }, metrics); + expect(mockedListGithubRunners).not.toBeCalled(); + }); + it('have available runners', async () => { jest.spyOn(Config, 'Instance', 'get').mockImplementation( () => diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts index 132c46bbf4..ae91c8ba5b 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts @@ -60,7 +60,7 @@ export async function scaleUp( const runnerTypes = await getRunnerTypes( { owner: repo.owner, - repo: Config.Instance.enableOrganizationRunners ? Config.Instance.scaleConfigRepo : repo.repo, + repo: Config.Instance.scaleConfigRepo || repo.repo, }, metrics, ); diff --git a/terraform-aws-github-runner/modules/runners/scale-down.tf b/terraform-aws-github-runner/modules/runners/scale-down.tf index 76d4bad47c..9daa764b2e 100644 --- a/terraform-aws-github-runner/modules/runners/scale-down.tf +++ b/terraform-aws-github-runner/modules/runners/scale-down.tf @@ -26,6 +26,16 @@ resource "aws_lambda_function" "scale_down" { tags = local.tags memory_size = 2048 + lifecycle { + precondition { + # Enforce that a value for scale_config_repo is set when enable_organization_runners is set to true. + # Setting the value is optional when not using organization runners since we'll default to the + # job's repository. + condition = var.enable_organization_runners == true ? var.scale_config_repo != "" : true + error_message = "scale_config_repo is required when enable_organization_runners is set to true" + } + } + environment { variables = { AWS_REGION_INSTANCES = join(",", var.aws_region_instances) diff --git a/terraform-aws-github-runner/modules/runners/scale-up.tf b/terraform-aws-github-runner/modules/runners/scale-up.tf index 4deb756295..8a47122534 100644 --- a/terraform-aws-github-runner/modules/runners/scale-up.tf +++ b/terraform-aws-github-runner/modules/runners/scale-up.tf @@ -28,6 +28,16 @@ resource "aws_lambda_function" "scale_up" { memory_size = 2048 publish = true + lifecycle { + precondition { + # Enforce that a value for scale_config_repo is set when enable_organization_runners is set to true. + # Setting the value is optional when not using organization runners since we'll default to the + # job's repository. + condition = var.enable_organization_runners == true ? var.scale_config_repo != "" : true + error_message = "scale_config_repo is required when enable_organization_runners is set to true" + } + } + environment { variables = { CANT_HAVE_ISSUES_LABELS = join(",", var.cant_have_issues_labels) diff --git a/terraform-aws-github-runner/variables.tf b/terraform-aws-github-runner/variables.tf index 80f9c64d39..b99af920cf 100644 --- a/terraform-aws-github-runner/variables.tf +++ b/terraform-aws-github-runner/variables.tf @@ -346,8 +346,8 @@ variable "cant_have_issues_labels" { } variable "scale_config_repo" { - description = "Repository to fetch scale config from if `enable_organization_runners` is set to true. Otherwise the job's repo will be used" - default = "" # Internally defaults to 'test-infra' + description = "Repository to fetch scale config from. Optional if `enable_organization_runners` is set to false, in which case the job's repo will be used" + default = "" type = string }