From c67d7065752010ffa640ad676e95036c67a7ba91 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Tue, 24 Sep 2024 13:39:13 -0700 Subject: [PATCH] use release and tag APIs to enhance imposter commit verification The old verification process assumed a commit was always in the default branch, or came from a select number of hardcoded release branches. This was error prone whenever new releases branches were used by some of the actions we check. The commit verification workflow now uses dynamic data to determine which branches to check. The steps are now: 1. Check if the commit is one of the latest 100 tags, which should be the most common case. 2. Check the default branch (unchanged from before). 3. Check branches associated with the most recent releases. This removes the hardcoded checks and should require fewer updates in the future. Signed-off-by: Spencer Schrock --- app/server/github_verifier.go | 223 +++++++++++++++--- app/server/github_verifier_test.go | 16 +- app/server/post_results.go | 5 +- app/server/post_results_e2e_test.go | 16 +- .../testdata/api/github/codeqlV3Tags.json | 12 + go.mod | 1 + go.sum | 2 + 7 files changed, 227 insertions(+), 48 deletions(-) create mode 100644 app/server/testdata/api/github/codeqlV3Tags.json diff --git a/app/server/github_verifier.go b/app/server/github_verifier.go index af5a7370..e857c720 100644 --- a/app/server/github_verifier.go +++ b/app/server/github_verifier.go @@ -16,26 +16,62 @@ package server import ( "context" + "errors" "fmt" "net/http" + "strconv" + "strings" "github.com/google/go-github/v65/github" + "golang.org/x/mod/semver" ) +var errInvalidCodeQLVersion = errors.New("codeql version invalid") + type githubVerifier struct { - ctx context.Context - client *github.Client + ctx context.Context + client *github.Client + cachedCommits map[commit]bool + codeqlActionMajor string } -// contains makes two "core" API calls: one for the default branch, and one to compare the target hash to a branch -// if the repo is "github/codeql-action", also check releases/v1 before failing. +// returns a new githubVerifier, with an instantiated map. +// most uses should use this constructor. +func newGitHubVerifier(ctx context.Context, client *github.Client) *githubVerifier { + verifier := githubVerifier{ + ctx: ctx, + client: client, + } + verifier.cachedCommits = map[commit]bool{} + return &verifier +} + +// contains may make several "core" API calls: +// - one to get repository tags (we expect most cases to only need this call) +// - two to get the default branch name and check it +// - up to 10 requests when checking previous release branches func (g *githubVerifier) contains(c commit) (bool, error) { - owner, repo, hash := c.owner, c.repo, c.hash - defaultBranch, err := g.defaultBranch(owner, repo) + // return the cached answer if we've seen this commit before + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + + // fetch 100 most recent tags first, as this should handle the most common scenario + if err := g.getTags(c.owner, c.repo); err != nil { + return false, err + } + + // check cache again now that it's populated with tags + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + + // check default branch + defaultBranch, err := g.defaultBranch(c.owner, c.repo) if err != nil { return false, err } - contains, err := g.branchContains(defaultBranch, owner, repo, hash) + contains, err := g.branchContains(defaultBranch, c.owner, c.repo, c.hash) if err != nil { return false, err } @@ -43,27 +79,9 @@ func (g *githubVerifier) contains(c commit) (bool, error) { return true, nil } - switch { - // github/codeql-action has commits from their release branches that don't show up in the default branch - // this isn't the best approach for now, but theres no universal "does this commit belong to this repo" call - case owner == "github" && repo == "codeql-action": - releaseBranches := []string{"releases/v3", "releases/v2", "releases/v1"} - for _, branch := range releaseBranches { - contains, err = g.branchContains(branch, owner, repo, hash) - if err != nil { - return false, err - } - if contains { - return true, nil - } - } - - // add fallback lookup for actions/upload-artifact v3/node20 branch - // https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344 - case owner == "actions" && repo == "upload-artifact": - contains, err = g.branchContains("v3/node20", owner, repo, hash) - } - return contains, err + // finally, check the most recent 10 release branches. This limit is arbitrary and can be adjusted in the future. + const lookback = 10 + return g.checkReleaseBranches(c.owner, c.repo, c.hash, defaultBranch, lookback) } func (g *githubVerifier) defaultBranch(owner, repo string) (string, error) { @@ -89,5 +107,152 @@ func (g *githubVerifier) branchContains(branch, owner, repo, hash string) (bool, } // Target should be behind or at the base ref if it is considered contained. - return diff.GetStatus() == "behind" || diff.GetStatus() == "identical", nil + contains := diff.GetStatus() == "behind" || diff.GetStatus() == "identical" + if contains { + g.markContains(owner, repo, hash) + } + return contains, nil +} + +func (g *githubVerifier) getTags(owner, repo string) error { + // 100 releases is the maximum supported by /repos/{owner}/{repo}/tags endpoint + opts := github.ListOptions{PerPage: 100} + tags, _, err := g.client.Repositories.ListTags(g.ctx, owner, repo, &opts) + if err != nil { + return fmt.Errorf("fetch tags: %w", err) + } + isCodeQL := owner == "github" && repo == "codeql-action" + for _, t := range tags { + if t.Commit != nil && t.Commit.SHA != nil { + g.markContains(owner, repo, *t.Commit.SHA) + } + // store the highest major version for github/codeql-action + // this helps check release branches later, due to their release process. + if isCodeQL { + version := t.GetName() + if semver.IsValid(version) && semver.Compare(version, g.codeqlActionMajor) == 1 { + g.codeqlActionMajor = semver.Major(version) + } + } + } + return nil +} + +func (g *githubVerifier) markContains(owner, repo, sha string) { + commit := commit{ + owner: owner, + repo: repo, + hash: strings.ToLower(sha), + } + g.cachedCommits[commit] = true +} + +// check the most recent release branches, ignoring the default branch which was already checked. +func (g *githubVerifier) checkReleaseBranches(owner, repo, hash, defaultBranch string, limit int) (bool, error) { + var ( + analyzedBranches int + branches []string + err error + ) + + switch { + // special case: github/codeql-action releases all come from "main", even though the tags are on different branches + case owner == "github" && repo == "codeql-action": + branches, err = g.getCodeQLBranches() + if err != nil { + return false, err + } + default: + branches, err = g.getBranches(owner, repo) + if err != nil { + return false, err + } + // we may have discovered more commit SHAs from the release process + c := commit{ + owner: owner, + repo: repo, + hash: hash, + } + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + } + + for _, branch := range branches { + if analyzedBranches >= limit { + break + } + if branch == defaultBranch { + continue + } + analyzedBranches++ + contains, err := g.branchContains(branch, owner, repo, hash) + if err != nil { + return false, err + } + if contains { + return true, nil + } + } + + return false, nil +} + +// returns the integer version from the expected format: "v1", "v2", "v3" .. +func parseCodeQLVersion(version string) (int, error) { + if !strings.HasPrefix(version, "v") { + return 0, fmt.Errorf("%w: %s", errInvalidCodeQLVersion, version) + } + major, err := strconv.Atoi(version[1:]) + if major < 1 || err != nil { + return 0, fmt.Errorf("%w: %s", errInvalidCodeQLVersion, version) + } + return major, nil +} + +// these branches follow the releases/v3 pattern, so we can make assumptions about what they're called. +// this should be called after g.getTags(), because it requires g.codeqlActionMajor to be set. +func (g *githubVerifier) getCodeQLBranches() ([]string, error) { + if g.codeqlActionMajor == "" { + return nil, nil + } + version, err := parseCodeQLVersion(g.codeqlActionMajor) + if err != nil { + return nil, err + } + branches := make([]string, version) + // descending order (e..g releases/v5, releases/v4, ... releases/v1) + for i := 0; i < version; i++ { + branches[i] = "releases/v" + strconv.Itoa(version-i) + } + return branches, nil +} + +func (g *githubVerifier) getBranches(owner, repo string) ([]string, error) { + var branches []string + seen := map[string]struct{}{} + + // 100 releases is the maximum supported by /repos/{owner}/{repo}/releases endpoint + opts := github.ListOptions{PerPage: 100} + releases, _, err := g.client.Repositories.ListReleases(g.ctx, owner, repo, &opts) + if err != nil { + return nil, fmt.Errorf("fetch releases: %w", err) + } + + for _, r := range releases { + if r.TargetCommitish != nil { + if isCommitHash(*r.TargetCommitish) { + // if a commit, we know it's in the repo + g.markContains(owner, repo, *r.TargetCommitish) + } else { + // otherwise we have a release branch to check + if _, ok := seen[*r.TargetCommitish]; !ok { + seen[*r.TargetCommitish] = struct{}{} + branches = append(branches, *r.TargetCommitish) + } + } + } + } + + return branches, nil } diff --git a/app/server/github_verifier_test.go b/app/server/github_verifier_test.go index e013b43f..d4bb765a 100644 --- a/app/server/github_verifier_test.go +++ b/app/server/github_verifier_test.go @@ -27,6 +27,7 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) { httpClient := http.Client{ Transport: suffixStubTripper{ responsePaths: map[string]string{ + "tags": "./testdata/api/github/codeqlV3Tags.json", // start lookback from v3 "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch @@ -36,11 +37,8 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) { }, } client := github.NewClient(&httpClient) - gv := githubVerifier{ - ctx: context.Background(), - client: client, - } - got, err := gv.contains(commit{"github", "codeql-action", "somehash"}) + gv := newGitHubVerifier(context.Background(), client) + got, err := gv.contains(commit{owner: "github", repo: "codeql-action", hash: "somehash"}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -54,6 +52,7 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) { httpClient := http.Client{ Transport: suffixStubTripper{ responsePaths: map[string]string{ + "tags": "./testdata/api/github/codeqlV3Tags.json", // start lookback from v3 "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch either @@ -62,11 +61,8 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) { }, } client := github.NewClient(&httpClient) - gv := githubVerifier{ - ctx: context.Background(), - client: client, - } - got, err := gv.contains(commit{"github", "codeql-action", "somehash"}) + gv := newGitHubVerifier(context.Background(), client) + got, err := gv.contains(commit{owner: "github", repo: "codeql-action", hash: "somehash"}) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/app/server/post_results.go b/app/server/post_results.go index d5bf22ee..7bbcac99 100644 --- a/app/server/post_results.go +++ b/app/server/post_results.go @@ -226,10 +226,7 @@ func getAndVerifyWorkflowContent(ctx context.Context, return fmt.Errorf("error decoding workflow contents: %w", err) } - verifier := &githubVerifier{ - ctx: ctx, - client: client, - } + verifier := newGitHubVerifier(ctx, client) // Verify scorecard workflow. return verifyScorecardWorkflow(workflowContent, verifier) } diff --git a/app/server/post_results_e2e_test.go b/app/server/post_results_e2e_test.go index f8c0dda7..e9862c64 100644 --- a/app/server/post_results_e2e_test.go +++ b/app/server/post_results_e2e_test.go @@ -116,7 +116,7 @@ var _ = Describe("E2E Test: getAndVerifyWorkflowContent", func() { }) // helper function to setup a github verifier with an appropriately set token. -func getGithubVerifier() githubVerifier { +func getGithubVerifier() *githubVerifier { httpClient := http.DefaultClient token, _ := readGitHubTokens() if token != "" { @@ -124,14 +124,12 @@ func getGithubVerifier() githubVerifier { token: token, } } - return githubVerifier{ - ctx: context.Background(), - client: github.NewClient(httpClient), - } + return newGitHubVerifier(context.Background(), github.NewClient(httpClient)) } var _ = Describe("E2E Test: githubVerifier_contains", func() { Context("E2E Test: Validate known good commits", func() { + // https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344 It("can detect actions/upload-artifact v3-node20 commits", func() { gv := getGithubVerifier() c, err := gv.contains(commit{"actions", "upload-artifact", "97a0fba1372883ab732affbe8f94b823f91727db"}) @@ -145,5 +143,13 @@ var _ = Describe("E2E Test: githubVerifier_contains", func() { Expect(err).Should(BeNil()) Expect(c).To(BeTrue()) }) + + // https://github.com/ossf/scorecard-action/issues/1367#issuecomment-2333175627 + It("can detect release branch commits", func() { + gv := getGithubVerifier() + c, err := gv.contains(commit{"actions", "upload-artifact", "ff15f0306b3f739f7b6fd43fb5d26cd321bd4de5"}) + Expect(err).Should(BeNil()) + Expect(c).To(BeTrue()) + }) }) }) diff --git a/app/server/testdata/api/github/codeqlV3Tags.json b/app/server/testdata/api/github/codeqlV3Tags.json new file mode 100644 index 00000000..c46fffec --- /dev/null +++ b/app/server/testdata/api/github/codeqlV3Tags.json @@ -0,0 +1,12 @@ +[ + { + "name": "v3.26.9", + "zipball_url": "https://api.github.com/repos/github/codeql-action/zipball/refs/tags/v3.26.9", + "tarball_url": "https://api.github.com/repos/github/codeql-action/tarball/refs/tags/v3.26.9", + "commit": { + "sha": "461ef6c76dfe95d5c364de2f431ddbd31a417628", + "url": "https://api.github.com/repos/github/codeql-action/commits/461ef6c76dfe95d5c364de2f431ddbd31a417628" + }, + "node_id": "MDM6UmVmMjU5NDQ1ODc4OnJlZnMvdGFncy92My4yNi45" + } +] diff --git a/go.mod b/go.mod index 5e315f38..dfe98dc9 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/transparency-dev/merkle v0.0.2 gocloud.dev v0.37.0 + golang.org/x/mod v0.20.0 golang.org/x/net v0.29.0 ) diff --git a/go.sum b/go.sum index fd2ea9da..48c235f4 100644 --- a/go.sum +++ b/go.sum @@ -251,6 +251,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=