diff --git a/terraform-aws-github-runner/main.tf b/terraform-aws-github-runner/main.tf index 094389bb06..b32cc3ffb7 100644 --- a/terraform-aws-github-runner/main.tf +++ b/terraform-aws-github-runner/main.tf @@ -104,6 +104,7 @@ module "runners" { environment = var.environment tags = local.tags + scale_config_org = var.scale_config_org scale_config_repo = var.scale_config_repo scale_config_repo_path = var.scale_config_repo_path diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.ts index 68fbfb00ba..0172dff19b 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/config.ts @@ -36,8 +36,10 @@ export class Config { readonly retryScaleUpRecordQueueUrl: string | undefined; readonly runnerGroupName: string | undefined; readonly runnersExtraLabels: undefined | string; + readonly scaleConfigOrg: string; readonly scaleConfigRepo: string; readonly scaleConfigRepoPath: string; + readonly scaleUpRecordQueueUrl: string | undefined; readonly secretsManagerSecretsId: string | undefined; readonly sSMParamCleanupAgeDays: number; readonly sSMParamMaxCleanupAllowance: number; @@ -94,8 +96,10 @@ export class Config { /* istanbul ignore next */ this.retryScaleUpRecordJitterPct = Number(process.env.RETRY_SCALE_UP_RECORD_JITTER_PCT || '0'); this.retryScaleUpRecordQueueUrl = process.env.RETRY_SCALE_UP_RECORD_QUEUE_URL; + this.scaleUpRecordQueueUrl = process.env.SCALE_UP_RECORD_QUEUE_URL; this.runnerGroupName = process.env.RUNNER_GROUP_NAME; this.runnersExtraLabels = process.env.RUNNER_EXTRA_LABELS; + this.scaleConfigOrg = process.env.SCALE_CONFIG_ORG || ''; /* istanbul ignore next */ this.scaleConfigRepo = process.env.SCALE_CONFIG_REPO || ''; if (this.enableOrganizationRunners && !this.scaleConfigRepo) { diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts index 64c5998919..4c6ea88ea8 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts @@ -1424,6 +1424,12 @@ export class ScaleDownMetrics extends Metrics { } } +export class ScaleUpChronMetrics extends Metrics { + constructor() { + super('scaleUpChron'); + } +} + export interface sendMetricsTimeoutVars { metrics?: Metrics; setTimeout?: ReturnType; diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up-chron.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up-chron.ts index 0733503e2f..9a5a834a5e 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up-chron.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up-chron.ts @@ -1,6 +1,12 @@ import axios from 'axios'; import { Config } from './config'; +import { getRepo } from './utils'; +import { ScaleUpChronMetrics } from './metrics'; +import { getRunnerTypes } from './gh-runners'; +import { sqsSendMessages } from './sqs'; +import { ActionRequestMessage } from './scale-up'; +import { randomUUID } from 'crypto'; export async function scaleUpChron(): Promise { // This function does the following: @@ -8,8 +14,39 @@ export async function scaleUpChron(): Promise { // 2. Polls scale-config to filter the list to ones that are self-hosted by this fleet and // are ephemeral // 3. Sends a SQS request to the scale-up lambda to provision more of those instances + let queuedJobs = await getQueuedJobs(); + const scaleConfigRepo = getRepo(Config.Instance.scaleConfigOrg, Config.Instance.scaleConfigRepo); + + const metrics = new ScaleUpChronMetrics(); + const validRunnerTypes = await getRunnerTypes(scaleConfigRepo, metrics); + + const minAutoScaleupDelayMinutes = 30; + // Only proactively scale up the jobs that have been queued for longer than normal + queuedJobs = queuedJobs.filter((runner) => { + return runner.min_queue_time_min >= minAutoScaleupDelayMinutes && + runner.org === Config.Instance.scaleConfigOrg; + }); + + // Filter out the queued jobs that are do not correspond to a valid runner type + queuedJobs = queuedJobs.filter((requested_runner) => { + return Array.from(validRunnerTypes.keys()).some((available_runner_label) => { + return available_runner_label === requested_runner.runner_label; + }); + }); + + // Send a message to the SQS queue to scale up the runners + let scaleUpRequests : Array = queuedJobs.map((runner) => { + return { + "id": Math.floor(Math.random() * 100000000000000), + "eventType": "workflow_job", + "repositoryName": runner.full_repo.split('/')[1], + "repositoryOwner": runner.org, + } + } + + sqsSendMessages(metrics, queuedJobs, Config.Instance.scaleUpRecordQueueUrl); } class QueuedJobsForRunner { diff --git a/terraform-aws-github-runner/modules/runners/scale-down.tf b/terraform-aws-github-runner/modules/runners/scale-down.tf index 9daa764b2e..6398555085 100644 --- a/terraform-aws-github-runner/modules/runners/scale-down.tf +++ b/terraform-aws-github-runner/modules/runners/scale-down.tf @@ -53,6 +53,7 @@ resource "aws_lambda_function" "scale_down" { MINIMUM_RUNNING_TIME_IN_MINUTES = var.minimum_running_time_in_minutes REDIS_ENDPOINT = var.redis_endpoint REDIS_LOGIN = var.redis_login + SCALE_CONFIG_ORG = var.scale_config_org SCALE_CONFIG_REPO = var.scale_config_repo SCALE_CONFIG_REPO_PATH = var.scale_config_repo_path SCALE_DOWN_CONFIG = jsonencode(var.idle_config) diff --git a/terraform-aws-github-runner/modules/runners/scale-up-chron.tf b/terraform-aws-github-runner/modules/runners/scale-up-chron.tf index a7222ce9b6..4a21499145 100644 --- a/terraform-aws-github-runner/modules/runners/scale-up-chron.tf +++ b/terraform-aws-github-runner/modules/runners/scale-up-chron.tf @@ -43,8 +43,10 @@ resource "aws_lambda_function" "scale_up_chron" { MINIMUM_RUNNING_TIME_IN_MINUTES = var.minimum_running_time_in_minutes REDIS_ENDPOINT = var.redis_endpoint REDIS_LOGIN = var.redis_login + SCALE_CONFIG_ORG = var.scale_config_org SCALE_CONFIG_REPO = var.scale_config_repo SCALE_CONFIG_REPO_PATH = var.scale_config_repo_path + SCALE_UP_RECORD_QUEUE_URL = var.sqs_build_queue.url scale_up_chron_CONFIG = jsonencode(var.idle_config) SECRETSMANAGER_SECRETS_ID = var.secretsmanager_secrets_id AWS_REGIONS_TO_VPC_IDS = join( diff --git a/terraform-aws-github-runner/modules/runners/scale-up.tf b/terraform-aws-github-runner/modules/runners/scale-up.tf index 8a47122534..36d9cd821a 100644 --- a/terraform-aws-github-runner/modules/runners/scale-up.tf +++ b/terraform-aws-github-runner/modules/runners/scale-up.tf @@ -67,6 +67,7 @@ resource "aws_lambda_function" "scale_up" { RETRY_SCALE_UP_RECORD_JITTER_PCT = "0.5" RETRY_SCALE_UP_RECORD_QUEUE_URL = var.sqs_build_queue_retry.url RUNNER_EXTRA_LABELS = var.runner_extra_labels + SCALE_CONFIG_ORG = var.scale_config_org SCALE_CONFIG_REPO = var.scale_config_repo SCALE_CONFIG_REPO_PATH = var.scale_config_repo_path SECRETSMANAGER_SECRETS_ID = var.secretsmanager_secrets_id diff --git a/terraform-aws-github-runner/modules/runners/variables.tf b/terraform-aws-github-runner/modules/runners/variables.tf index d9a665e345..dfeaeab168 100644 --- a/terraform-aws-github-runner/modules/runners/variables.tf +++ b/terraform-aws-github-runner/modules/runners/variables.tf @@ -297,6 +297,11 @@ variable "role_runner_arn" { type = string } +variable "scale_config_org" { + description = "Organization to fetch scale config from." + type = string +} + variable "scale_config_repo" { description = "Repository to fetch scale config from." default = "" diff --git a/terraform-aws-github-runner/variables.tf b/terraform-aws-github-runner/variables.tf index b99af920cf..603a02e9ca 100644 --- a/terraform-aws-github-runner/variables.tf +++ b/terraform-aws-github-runner/variables.tf @@ -345,6 +345,11 @@ variable "cant_have_issues_labels" { default = [] } +variable "scale_config_org" { + description = "Organization to fetch scale config from." + type = string +} + variable "scale_config_repo" { description = "Repository to fetch scale config from. Optional if `enable_organization_runners` is set to false, in which case the job's repo will be used" default = ""