diff --git a/app/server/github_verifier.go b/app/server/github_verifier.go index af5a7370..e857c720 100644 --- a/app/server/github_verifier.go +++ b/app/server/github_verifier.go @@ -16,26 +16,62 @@ package server import ( "context" + "errors" "fmt" "net/http" + "strconv" + "strings" "github.com/google/go-github/v65/github" + "golang.org/x/mod/semver" ) +var errInvalidCodeQLVersion = errors.New("codeql version invalid") + type githubVerifier struct { - ctx context.Context - client *github.Client + ctx context.Context + client *github.Client + cachedCommits map[commit]bool + codeqlActionMajor string } -// contains makes two "core" API calls: one for the default branch, and one to compare the target hash to a branch -// if the repo is "github/codeql-action", also check releases/v1 before failing. +// returns a new githubVerifier, with an instantiated map. +// most uses should use this constructor. +func newGitHubVerifier(ctx context.Context, client *github.Client) *githubVerifier { + verifier := githubVerifier{ + ctx: ctx, + client: client, + } + verifier.cachedCommits = map[commit]bool{} + return &verifier +} + +// contains may make several "core" API calls: +// - one to get repository tags (we expect most cases to only need this call) +// - two to get the default branch name and check it +// - up to 10 requests when checking previous release branches func (g *githubVerifier) contains(c commit) (bool, error) { - owner, repo, hash := c.owner, c.repo, c.hash - defaultBranch, err := g.defaultBranch(owner, repo) + // return the cached answer if we've seen this commit before + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + + // fetch 100 most recent tags first, as this should handle the most common scenario + if err := g.getTags(c.owner, c.repo); err != nil { + return false, err + } + + // check cache again now that it's populated with tags + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + + // check default branch + defaultBranch, err := g.defaultBranch(c.owner, c.repo) if err != nil { return false, err } - contains, err := g.branchContains(defaultBranch, owner, repo, hash) + contains, err := g.branchContains(defaultBranch, c.owner, c.repo, c.hash) if err != nil { return false, err } @@ -43,27 +79,9 @@ func (g *githubVerifier) contains(c commit) (bool, error) { return true, nil } - switch { - // github/codeql-action has commits from their release branches that don't show up in the default branch - // this isn't the best approach for now, but theres no universal "does this commit belong to this repo" call - case owner == "github" && repo == "codeql-action": - releaseBranches := []string{"releases/v3", "releases/v2", "releases/v1"} - for _, branch := range releaseBranches { - contains, err = g.branchContains(branch, owner, repo, hash) - if err != nil { - return false, err - } - if contains { - return true, nil - } - } - - // add fallback lookup for actions/upload-artifact v3/node20 branch - // https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344 - case owner == "actions" && repo == "upload-artifact": - contains, err = g.branchContains("v3/node20", owner, repo, hash) - } - return contains, err + // finally, check the most recent 10 release branches. This limit is arbitrary and can be adjusted in the future. + const lookback = 10 + return g.checkReleaseBranches(c.owner, c.repo, c.hash, defaultBranch, lookback) } func (g *githubVerifier) defaultBranch(owner, repo string) (string, error) { @@ -89,5 +107,152 @@ func (g *githubVerifier) branchContains(branch, owner, repo, hash string) (bool, } // Target should be behind or at the base ref if it is considered contained. - return diff.GetStatus() == "behind" || diff.GetStatus() == "identical", nil + contains := diff.GetStatus() == "behind" || diff.GetStatus() == "identical" + if contains { + g.markContains(owner, repo, hash) + } + return contains, nil +} + +func (g *githubVerifier) getTags(owner, repo string) error { + // 100 releases is the maximum supported by /repos/{owner}/{repo}/tags endpoint + opts := github.ListOptions{PerPage: 100} + tags, _, err := g.client.Repositories.ListTags(g.ctx, owner, repo, &opts) + if err != nil { + return fmt.Errorf("fetch tags: %w", err) + } + isCodeQL := owner == "github" && repo == "codeql-action" + for _, t := range tags { + if t.Commit != nil && t.Commit.SHA != nil { + g.markContains(owner, repo, *t.Commit.SHA) + } + // store the highest major version for github/codeql-action + // this helps check release branches later, due to their release process. + if isCodeQL { + version := t.GetName() + if semver.IsValid(version) && semver.Compare(version, g.codeqlActionMajor) == 1 { + g.codeqlActionMajor = semver.Major(version) + } + } + } + return nil +} + +func (g *githubVerifier) markContains(owner, repo, sha string) { + commit := commit{ + owner: owner, + repo: repo, + hash: strings.ToLower(sha), + } + g.cachedCommits[commit] = true +} + +// check the most recent release branches, ignoring the default branch which was already checked. +func (g *githubVerifier) checkReleaseBranches(owner, repo, hash, defaultBranch string, limit int) (bool, error) { + var ( + analyzedBranches int + branches []string + err error + ) + + switch { + // special case: github/codeql-action releases all come from "main", even though the tags are on different branches + case owner == "github" && repo == "codeql-action": + branches, err = g.getCodeQLBranches() + if err != nil { + return false, err + } + default: + branches, err = g.getBranches(owner, repo) + if err != nil { + return false, err + } + // we may have discovered more commit SHAs from the release process + c := commit{ + owner: owner, + repo: repo, + hash: hash, + } + if contains, ok := g.cachedCommits[c]; ok { + return contains, nil + } + } + + for _, branch := range branches { + if analyzedBranches >= limit { + break + } + if branch == defaultBranch { + continue + } + analyzedBranches++ + contains, err := g.branchContains(branch, owner, repo, hash) + if err != nil { + return false, err + } + if contains { + return true, nil + } + } + + return false, nil +} + +// returns the integer version from the expected format: "v1", "v2", "v3" .. +func parseCodeQLVersion(version string) (int, error) { + if !strings.HasPrefix(version, "v") { + return 0, fmt.Errorf("%w: %s", errInvalidCodeQLVersion, version) + } + major, err := strconv.Atoi(version[1:]) + if major < 1 || err != nil { + return 0, fmt.Errorf("%w: %s", errInvalidCodeQLVersion, version) + } + return major, nil +} + +// these branches follow the releases/v3 pattern, so we can make assumptions about what they're called. +// this should be called after g.getTags(), because it requires g.codeqlActionMajor to be set. +func (g *githubVerifier) getCodeQLBranches() ([]string, error) { + if g.codeqlActionMajor == "" { + return nil, nil + } + version, err := parseCodeQLVersion(g.codeqlActionMajor) + if err != nil { + return nil, err + } + branches := make([]string, version) + // descending order (e..g releases/v5, releases/v4, ... releases/v1) + for i := 0; i < version; i++ { + branches[i] = "releases/v" + strconv.Itoa(version-i) + } + return branches, nil +} + +func (g *githubVerifier) getBranches(owner, repo string) ([]string, error) { + var branches []string + seen := map[string]struct{}{} + + // 100 releases is the maximum supported by /repos/{owner}/{repo}/releases endpoint + opts := github.ListOptions{PerPage: 100} + releases, _, err := g.client.Repositories.ListReleases(g.ctx, owner, repo, &opts) + if err != nil { + return nil, fmt.Errorf("fetch releases: %w", err) + } + + for _, r := range releases { + if r.TargetCommitish != nil { + if isCommitHash(*r.TargetCommitish) { + // if a commit, we know it's in the repo + g.markContains(owner, repo, *r.TargetCommitish) + } else { + // otherwise we have a release branch to check + if _, ok := seen[*r.TargetCommitish]; !ok { + seen[*r.TargetCommitish] = struct{}{} + branches = append(branches, *r.TargetCommitish) + } + } + } + } + + return branches, nil } diff --git a/app/server/github_verifier_test.go b/app/server/github_verifier_test.go index e013b43f..d4bb765a 100644 --- a/app/server/github_verifier_test.go +++ b/app/server/github_verifier_test.go @@ -27,6 +27,7 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) { httpClient := http.Client{ Transport: suffixStubTripper{ responsePaths: map[string]string{ + "tags": "./testdata/api/github/codeqlV3Tags.json", // start lookback from v3 "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch @@ -36,11 +37,8 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) { }, } client := github.NewClient(&httpClient) - gv := githubVerifier{ - ctx: context.Background(), - client: client, - } - got, err := gv.contains(commit{"github", "codeql-action", "somehash"}) + gv := newGitHubVerifier(context.Background(), client) + got, err := gv.contains(commit{owner: "github", repo: "codeql-action", hash: "somehash"}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -54,6 +52,7 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) { httpClient := http.Client{ Transport: suffixStubTripper{ responsePaths: map[string]string{ + "tags": "./testdata/api/github/codeqlV3Tags.json", // start lookback from v3 "codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch "main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch "v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch either @@ -62,11 +61,8 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) { }, } client := github.NewClient(&httpClient) - gv := githubVerifier{ - ctx: context.Background(), - client: client, - } - got, err := gv.contains(commit{"github", "codeql-action", "somehash"}) + gv := newGitHubVerifier(context.Background(), client) + got, err := gv.contains(commit{owner: "github", repo: "codeql-action", hash: "somehash"}) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/app/server/post_results.go b/app/server/post_results.go index d5bf22ee..7bbcac99 100644 --- a/app/server/post_results.go +++ b/app/server/post_results.go @@ -226,10 +226,7 @@ func getAndVerifyWorkflowContent(ctx context.Context, return fmt.Errorf("error decoding workflow contents: %w", err) } - verifier := &githubVerifier{ - ctx: ctx, - client: client, - } + verifier := newGitHubVerifier(ctx, client) // Verify scorecard workflow. return verifyScorecardWorkflow(workflowContent, verifier) } diff --git a/app/server/post_results_e2e_test.go b/app/server/post_results_e2e_test.go index f8c0dda7..e9862c64 100644 --- a/app/server/post_results_e2e_test.go +++ b/app/server/post_results_e2e_test.go @@ -116,7 +116,7 @@ var _ = Describe("E2E Test: getAndVerifyWorkflowContent", func() { }) // helper function to setup a github verifier with an appropriately set token. -func getGithubVerifier() githubVerifier { +func getGithubVerifier() *githubVerifier { httpClient := http.DefaultClient token, _ := readGitHubTokens() if token != "" { @@ -124,14 +124,12 @@ func getGithubVerifier() githubVerifier { token: token, } } - return githubVerifier{ - ctx: context.Background(), - client: github.NewClient(httpClient), - } + return newGitHubVerifier(context.Background(), github.NewClient(httpClient)) } var _ = Describe("E2E Test: githubVerifier_contains", func() { Context("E2E Test: Validate known good commits", func() { + // https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344 It("can detect actions/upload-artifact v3-node20 commits", func() { gv := getGithubVerifier() c, err := gv.contains(commit{"actions", "upload-artifact", "97a0fba1372883ab732affbe8f94b823f91727db"}) @@ -145,5 +143,13 @@ var _ = Describe("E2E Test: githubVerifier_contains", func() { Expect(err).Should(BeNil()) Expect(c).To(BeTrue()) }) + + // https://github.com/ossf/scorecard-action/issues/1367#issuecomment-2333175627 + It("can detect release branch commits", func() { + gv := getGithubVerifier() + c, err := gv.contains(commit{"actions", "upload-artifact", "ff15f0306b3f739f7b6fd43fb5d26cd321bd4de5"}) + Expect(err).Should(BeNil()) + Expect(c).To(BeTrue()) + }) }) }) diff --git a/app/server/testdata/api/github/codeqlV3Tags.json b/app/server/testdata/api/github/codeqlV3Tags.json new file mode 100644 index 00000000..c46fffec --- /dev/null +++ b/app/server/testdata/api/github/codeqlV3Tags.json @@ -0,0 +1,12 @@ +[ + { + "name": "v3.26.9", + "zipball_url": "https://api.github.com/repos/github/codeql-action/zipball/refs/tags/v3.26.9", + "tarball_url": "https://api.github.com/repos/github/codeql-action/tarball/refs/tags/v3.26.9", + "commit": { + "sha": "461ef6c76dfe95d5c364de2f431ddbd31a417628", + "url": "https://api.github.com/repos/github/codeql-action/commits/461ef6c76dfe95d5c364de2f431ddbd31a417628" + }, + "node_id": "MDM6UmVmMjU5NDQ1ODc4OnJlZnMvdGFncy92My4yNi45" + } +] diff --git a/go.mod b/go.mod index 5e315f38..dfe98dc9 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/transparency-dev/merkle v0.0.2 gocloud.dev v0.37.0 + golang.org/x/mod v0.20.0 golang.org/x/net v0.29.0 ) diff --git a/go.sum b/go.sum index fd2ea9da..48c235f4 100644 --- a/go.sum +++ b/go.sum @@ -251,6 +251,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=