From 2791b5a1d5f21e0bea97a7ce2e98e7d2094f5a73 Mon Sep 17 00:00:00 2001 From: Thanh Ha Date: Tue, 24 Sep 2024 09:37:40 -0700 Subject: [PATCH] Add metric for GitHub API Rate Limit (#5654) This captures the rate limit values from GitHub for the ALI account user/process. We can use this to graph and track how much of the API rate limit is used by the CI infrastructure and flag when we are getting too close to the overall limit. Closes: pytorch/ci-infra#273 Signed-off-by: Thanh Ha Co-authored-by: Jean Schmidt --- .../runners/src/scale-runners/gh-runners.ts | 30 +++++++++++++++++++ .../runners/src/scale-runners/metrics.ts | 23 ++++++++++++++ .../runners/src/scale-runners/scale-up.ts | 3 ++ 3 files changed, 56 insertions(+) diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts index f41235b6ab..07e529903b 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts @@ -514,3 +514,33 @@ export async function createRegistrationTokenOrg( throw e; } } + +export async function getGitHubRateLimit(repo: Repo, installationId: number, metrics: Metrics): Promise { + try { + const { used, limit, remaining } = await locallyCached('ghRunners', 'getGitHubRateLimit', 10, async () => { + try { + const client = await createGitHubClientForRunnerRepo(repo, metrics); + + const rateLimit = await expBackOff(() => { + return metrics.trackRequest(metrics.getGitHubRateLimitSuccess, metrics.getGitHubRateLimitFailure, () => { + return client.rateLimit.get(); + }); + }); + + const limit = Number(rateLimit.headers['x-ratelimit-limit']); + const remaining = Number(rateLimit.headers['x-ratelimit-remaining']); + const used = Number(rateLimit.headers['x-ratelimit-used']); + + return { used, limit, remaining }; + } catch (e) { + console.error(`[getGitHubRateLimit]: ${e}`); + throw e; + } + }); + + metrics.gitHubRateLimitStats(limit, remaining, used); + } catch (e) { + console.error(`[getGitHubRateLimit]: ${e}`); + throw e; + } +} diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts index d156f896ce..64c5998919 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts @@ -241,6 +241,29 @@ export class Metrics { } // GitHub API CALLS + /* istanbul ignore next */ + getGitHubRateLimitSuccess(ms: number) { + this.countEntry(`gh.calls.total`, 1); + this.countEntry(`gh.calls.getGitHubRateLimit.count`, 1); + this.countEntry(`gh.calls.getGitHubRateLimit.success`, 1); + this.countEntry(`gh.calls.getGitHubRateLimit.wallclock`, ms); + } + + /* istanbul ignore next */ + getGitHubRateLimitFailure(ms: number) { + this.countEntry(`gh.calls.total`, 1); + this.countEntry(`gh.calls.getGitHubRateLimit.count`, 1); + this.countEntry(`gh.calls.getGitHubRateLimit.failure`, 1); + this.countEntry(`gh.calls.getGitHubRateLimit.wallclock`, ms); + } + + /* istanbul ignore next */ + gitHubRateLimitStats(limit: number, remaining: number, used: number) { + this.addEntry(`gh.calls.ratelimit.limit`, limit); + this.addEntry(`gh.calls.ratelimit.remaining`, remaining); + this.addEntry(`gh.calls.ratelimit.used`, used); + } + /* istanbul ignore next */ createAppAuthGHCallSuccess(ms: number) { this.countEntry(`gh.calls.total`, 1); diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts index 1964c0c032..132c46bbf4 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts @@ -7,6 +7,7 @@ import { getRunnerTypes, listGithubRunnersOrg, listGithubRunnersRepo, + getGitHubRateLimit, } from './gh-runners'; import { Config } from './config'; @@ -54,6 +55,8 @@ export async function scaleUp( metrics.runRepo(repo); metrics.run(); + getGitHubRateLimit(repo, Number(payload.installationId), metrics); + const runnerTypes = await getRunnerTypes( { owner: repo.owner,