From d9318c3c0354b836a704048d237e0e2c8427b07b Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Tue, 31 Dec 2024 10:56:37 -0700 Subject: [PATCH 01/10] add git handler for GitHub repositories This is primarily aimed at helping in cases where a repository's .gitattributes file causes files to not be analyzed. Signed-off-by: Spencer Schrock --- clients/githubrepo/client.go | 26 +++++- clients/githubrepo/git.go | 158 +++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 clients/githubrepo/git.go diff --git a/clients/githubrepo/client.go b/clients/githubrepo/client.go index 58e1a3dbeb4..5c81a17ea11 100644 --- a/clients/githubrepo/client.go +++ b/clients/githubrepo/client.go @@ -57,9 +57,11 @@ type Client struct { webhook *webhookHandler languages *languagesHandler licenses *licensesHandler + git *gitHandler ctx context.Context tarball tarballHandler commitDepth int + gitMode bool } const defaultGhHost = "github.com" @@ -88,8 +90,12 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitD commitSHA: commitSHA, } - // Init tarballHandler. - client.tarball.init(client.ctx, client.repo, commitSHA) + if client.gitMode { + client.git.init(client.ctx, client.repo, commitSHA) + } else { + // Init tarballHandler. + client.tarball.init(client.ctx, client.repo, commitSHA) + } // Setup GraphQL. client.graphClient.init(client.ctx, client.repourl, client.commitDepth) @@ -141,16 +147,25 @@ func (client *Client) URI() string { // LocalPath implements RepoClient.LocalPath. func (client *Client) LocalPath() (string, error) { + if client.gitMode { + return client.git.getLocalPath() + } return client.tarball.getLocalPath() } // ListFiles implements RepoClient.ListFiles. func (client *Client) ListFiles(predicate func(string) (bool, error)) ([]string, error) { + if client.gitMode { + return client.git.listFiles(predicate) + } return client.tarball.listFiles(predicate) } // GetFileReader implements RepoClient.GetFileReader. func (client *Client) GetFileReader(filename string) (io.ReadCloser, error) { + if client.gitMode { + return client.git.getFile(filename) + } return client.tarball.getFile(filename) } @@ -260,6 +275,9 @@ func (client *Client) SearchCommits(request clients.SearchCommitsOptions) ([]cli // Close implements RepoClient.Close. func (client *Client) Close() error { + if client.gitMode { + return client.git.cleanup() + } return client.tarball.cleanup() } @@ -289,6 +307,8 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp graphClient = githubv4.NewClient(httpClient) } + _, gitMode := os.LookupEnv("SCORECARD_GH_GIT_MODE") + return &Client{ ctx: ctx, repoClient: client, @@ -333,6 +353,8 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp tarball: tarballHandler{ httpClient: httpClient, }, + gitMode: gitMode, + git: &gitHandler{}, } } diff --git a/clients/githubrepo/git.go b/clients/githubrepo/git.go new file mode 100644 index 00000000000..1ad423bf6da --- /dev/null +++ b/clients/githubrepo/git.go @@ -0,0 +1,158 @@ +// Copyright 2024 OpenSSF Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package githubrepo + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/google/go-github/v53/github" + + "github.com/ossf/scorecard/v5/clients" +) + +var errPathTraversal = errors.New("requested file outside repo directory") + +type gitHandler struct { + errSetup error + ctx context.Context + once *sync.Once + cloneURL string + gitRepo *git.Repository + tempDir string + commitSHA string +} + +func (g *gitHandler) init(ctx context.Context, repo *github.Repository, commitSHA string) { + g.errSetup = nil + g.once = new(sync.Once) + g.ctx = ctx + g.cloneURL = repo.GetCloneURL() + g.commitSHA = commitSHA +} + +func (g *gitHandler) setup() error { + g.once.Do(func() { + tempDir, err := os.MkdirTemp("", repoDir) + if err != nil { + g.errSetup = err + return + } + g.tempDir = tempDir + g.gitRepo, err = git.PlainClone(g.tempDir, false, &git.CloneOptions{ + URL: g.cloneURL, + // TODO: auth may be required for private repos + Depth: 1, // currently only use the git repo for files, dont need history + SingleBranch: true, + }) + if err != nil { + g.errSetup = err + return + } + + // assume the commit SHA is reachable from the default branch + // this isn't as flexible as the tarball handler, but good enough for now + if g.commitSHA != clients.HeadSHA { + wt, err := g.gitRepo.Worktree() + if err != nil { + g.errSetup = err + return + } + if err := wt.Checkout(&git.CheckoutOptions{Hash: plumbing.NewHash(g.commitSHA)}); err != nil { + g.errSetup = fmt.Errorf("checkout specified commit: %w", err) + return + } + } + }) + return g.errSetup +} + +func (g *gitHandler) getLocalPath() (string, error) { + if err := g.setup(); err != nil { + return "", fmt.Errorf("setup: %w", err) + } + return g.tempDir, nil +} + +func (g *gitHandler) listFiles(predicate func(string) (bool, error)) ([]string, error) { + if err := g.setup(); err != nil { + return nil, fmt.Errorf("setup: %w", err) + } + ref, err := g.gitRepo.Head() + if err != nil { + return nil, fmt.Errorf("git.Head: %w", err) + } + + commit, err := g.gitRepo.CommitObject(ref.Hash()) + if err != nil { + return nil, fmt.Errorf("git.CommitObject: %w", err) + } + + tree, err := commit.Tree() + if err != nil { + return nil, fmt.Errorf("git.Commit.Tree: %w", err) + } + + var files []string + err = tree.Files().ForEach(func(f *object.File) error { + shouldInclude, err := predicate(f.Name) + if err != nil { + return fmt.Errorf("error applying predicate to file %s: %w", f.Name, err) + } + + if shouldInclude { + files = append(files, f.Name) + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("git.Tree.Files: %w", err) + } + + return files, nil +} + +func (g *gitHandler) getFile(filename string) (*os.File, error) { + if err := g.setup(); err != nil { + return nil, fmt.Errorf("setup: %w", err) + } + + // check for path traversal + path := filepath.Join(g.tempDir, filename) + if !strings.HasPrefix(path, filepath.Clean(g.tempDir)+string(os.PathSeparator)) { + return nil, errPathTraversal + } + + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("open file: %w", err) + } + return f, nil +} + +func (g *gitHandler) cleanup() error { + if err := os.RemoveAll(g.tempDir); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("os.Remove: %w", err) + } + return nil +} From 480510aa62b8fa3bc3884b6ad97aadd420aba023 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Tue, 31 Dec 2024 14:52:18 -0700 Subject: [PATCH 02/10] use variadic options to configure GitHub repoclient This will let us use the new entrypoint in a backwards compatible way, similar to the scorecard.Run change made in the v5 release. Signed-off-by: Spencer Schrock --- clients/githubrepo/client.go | 75 +++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 13 deletions(-) diff --git a/clients/githubrepo/client.go b/clients/githubrepo/client.go index 5c81a17ea11..fee13b63a7e 100644 --- a/clients/githubrepo/client.go +++ b/clients/githubrepo/client.go @@ -40,6 +40,8 @@ var ( errDefaultBranchEmpty = errors.New("default branch name is empty") ) +type Option func(*repoClientConfig) error + // Client is GitHub-specific implementation of RepoClient. type Client struct { repourl *Repo @@ -64,6 +66,27 @@ type Client struct { gitMode bool } +// WithGitMode configures the repo client to fetch files using git. +func WithGitMode() Option { + return func(c *repoClientConfig) error { + c.gitMode = true + return nil + } +} + +// WithRoundTripper configures the repo client to use the specified http.RoundTripper. +func WithRoundTripper(rt http.RoundTripper) Option { + return func(c *repoClientConfig) error { + c.rt = rt + return nil + } +} + +type repoClientConfig struct { + rt http.RoundTripper + gitMode bool +} + const defaultGhHost = "github.com" // InitRepo sets up the GitHub repo in local storage for improving performance and GitHub token usage efficiency. @@ -225,7 +248,14 @@ func (client *Client) GetOrgRepoClient(ctx context.Context) (clients.RepoClient, return nil, fmt.Errorf("error during MakeGithubRepo: %w", err) } - c := CreateGithubRepoClientWithTransport(ctx, client.repoClient.Client().Transport) + options := []Option{WithRoundTripper(client.repoClient.Client().Transport)} + if client.gitMode { + options = append(options, WithGitMode()) + } + c, err := NewRepoClient(ctx, options...) + if err != nil { + return nil, fmt.Errorf("create org repoclient: %w", err) + } if err := c.InitRepo(dotGithubRepo, clients.HeadSHA, 0); err != nil { return nil, fmt.Errorf("error during InitRepo: %w", err) } @@ -283,8 +313,36 @@ func (client *Client) Close() error { // CreateGithubRepoClientWithTransport returns a Client which implements RepoClient interface. func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripper) clients.RepoClient { + //nolint:errcheck // need to suppress because this method doesn't return an error + rc, _ := NewRepoClient(ctx, WithRoundTripper(rt)) + return rc +} + +// CreateGithubRepoClient returns a Client which implements RepoClient interface. +func CreateGithubRepoClient(ctx context.Context, logger *log.Logger) clients.RepoClient { + // Use our custom roundtripper + rt := roundtripper.NewTransport(ctx, logger) + return CreateGithubRepoClientWithTransport(ctx, rt) +} + +// NewRepoClient returns a Client which implements RepoClient interface. +// It can be configured with various [Option]s. +func NewRepoClient(ctx context.Context, opts ...Option) (clients.RepoClient, error) { + var config repoClientConfig + + for _, option := range opts { + if err := option(&config); err != nil { + return nil, err + } + } + + if config.rt == nil { + logger := log.NewLogger(log.DefaultLevel) + config.rt = roundtripper.NewTransport(ctx, logger) + } + httpClient := &http.Client{ - Transport: rt, + Transport: config.rt, } var client *github.Client @@ -307,8 +365,6 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp graphClient = githubv4.NewClient(httpClient) } - _, gitMode := os.LookupEnv("SCORECARD_GH_GIT_MODE") - return &Client{ ctx: ctx, repoClient: client, @@ -353,16 +409,9 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp tarball: tarballHandler{ httpClient: httpClient, }, - gitMode: gitMode, + gitMode: config.gitMode, git: &gitHandler{}, - } -} - -// CreateGithubRepoClient returns a Client which implements RepoClient interface. -func CreateGithubRepoClient(ctx context.Context, logger *log.Logger) clients.RepoClient { - // Use our custom roundtripper - rt := roundtripper.NewTransport(ctx, logger) - return CreateGithubRepoClientWithTransport(ctx, rt) + }, nil } // CreateOssFuzzRepoClient returns a RepoClient implementation From 8c7953e124b27145119095c721df977199066dff Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Tue, 31 Dec 2024 15:09:32 -0700 Subject: [PATCH 03/10] add flag to enable github git mode Signed-off-by: Spencer Schrock --- cmd/root.go | 12 ++++++++++-- options/flags.go | 10 ++++++++++ options/options.go | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 23626a634bb..e7e75d80865 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -147,13 +147,21 @@ func rootCmd(o *options.Options) error { } } - repoResult, err = scorecard.Run(ctx, repo, + opts := []scorecard.Option{ scorecard.WithLogLevel(sclog.ParseLevel(o.LogLevel)), scorecard.WithCommitSHA(o.Commit), scorecard.WithCommitDepth(o.CommitDepth), scorecard.WithProbes(enabledProbes), scorecard.WithChecks(checks), - ) + } + if o.GithubGitMode { + rc, err := githubrepo.NewRepoClient(ctx, githubrepo.WithGitMode()) + if err != nil { + return fmt.Errorf("enabling github git mode: %w", err) + } + opts = append(opts, scorecard.WithRepoClient(rc)) + } + repoResult, err = scorecard.Run(ctx, repo, opts...) if err != nil { return fmt.Errorf("scorecard.Run: %w", err) } diff --git a/options/flags.go b/options/flags.go index 0545b0293fc..7a486f4f1c8 100644 --- a/options/flags.go +++ b/options/flags.go @@ -54,6 +54,9 @@ const ( // FlagShowDetails is the flag name for outputting additional check info. FlagShowDetails = "show-details" + // Flag FlagGithubGitMode is the flag name for enabling git compatibility mode. + FlagGithubGitMode = "gh-git-mode" + // FlagShowAnnotations is the flag name for outputting annotations on checks. FlagShowAnnotations = "show-annotations" @@ -222,4 +225,11 @@ func (o *Options) AddFlags(cmd *cobra.Command) { o.ResultsFile, "output file", ) + + cmd.Flags().BoolVar( + &o.GithubGitMode, + FlagGithubGitMode, + o.GithubGitMode, + "fetch GitHub files using git for maximum compatibility", + ) } diff --git a/options/options.go b/options/options.go index ef3077a9d95..aaaef737d94 100644 --- a/options/options.go +++ b/options/options.go @@ -47,6 +47,7 @@ type Options struct { CommitDepth int ShowDetails bool ShowAnnotations bool + GithubGitMode bool // Feature flags. EnableSarif bool `env:"ENABLE_SARIF"` EnableScorecardV6 bool `env:"SCORECARD_V6"` From 9e8451535b1bf69d3a9a9cfbb7857b0f023db423 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Thu, 2 Jan 2025 09:21:26 -0700 Subject: [PATCH 04/10] rename flag to be forge agnostic export-ignore is not a github specific feature, and other forges, like gitlab, suffer from the same bug. Signed-off-by: Spencer Schrock --- cmd/root.go | 2 +- options/flags.go | 12 ++++++------ options/options.go | 2 +- pkg/scorecard/scorecard.go | 23 ++++++++++++++++++++++- 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index e7e75d80865..cb08906f2fa 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -154,7 +154,7 @@ func rootCmd(o *options.Options) error { scorecard.WithProbes(enabledProbes), scorecard.WithChecks(checks), } - if o.GithubGitMode { + if o.GitMode { rc, err := githubrepo.NewRepoClient(ctx, githubrepo.WithGitMode()) if err != nil { return fmt.Errorf("enabling github git mode: %w", err) diff --git a/options/flags.go b/options/flags.go index 7a486f4f1c8..7f35e6e49c5 100644 --- a/options/flags.go +++ b/options/flags.go @@ -54,8 +54,8 @@ const ( // FlagShowDetails is the flag name for outputting additional check info. FlagShowDetails = "show-details" - // Flag FlagGithubGitMode is the flag name for enabling git compatibility mode. - FlagGithubGitMode = "gh-git-mode" + // Flag FlagGitMode is the flag name for enabling git compatibility mode. + FlagGitMode = "git-mode" // FlagShowAnnotations is the flag name for outputting annotations on checks. FlagShowAnnotations = "show-annotations" @@ -227,9 +227,9 @@ func (o *Options) AddFlags(cmd *cobra.Command) { ) cmd.Flags().BoolVar( - &o.GithubGitMode, - FlagGithubGitMode, - o.GithubGitMode, - "fetch GitHub files using git for maximum compatibility", + &o.GitMode, + FlagGitMode, + o.GitMode, + "fetch repository files using git for maximum compatibility", ) } diff --git a/options/options.go b/options/options.go index aaaef737d94..086257fcd69 100644 --- a/options/options.go +++ b/options/options.go @@ -47,7 +47,7 @@ type Options struct { CommitDepth int ShowDetails bool ShowAnnotations bool - GithubGitMode bool + GitMode bool // Feature flags. EnableSarif bool `env:"ENABLE_SARIF"` EnableScorecardV6 bool `env:"SCORECARD_V6"` diff --git a/pkg/scorecard/scorecard.go b/pkg/scorecard/scorecard.go index 98ea3e210a9..a3ae568be4c 100644 --- a/pkg/scorecard/scorecard.go +++ b/pkg/scorecard/scorecard.go @@ -257,6 +257,7 @@ type runConfig struct { checks []string probes []string commitDepth int + gitMode bool } type Option func(*runConfig) error @@ -340,6 +341,18 @@ func WithOpenSSFBestPraticesClient(client clients.CIIBestPracticesClient) Option } } +// WithGitMode will configure supporting repository clients to download files +// using git, instead of the archive tarball. This is useful for repositories +// which "export-ignore" files in a .gitattributes file. +// +// Repository analysis may be slower. +func WithGitMode() Option { + return func(c *runConfig) error { + c.gitMode = true + return nil + } +} + // Run analyzes a given repository and returns the result. You can modify the // run behavior by passing in [Option] arguments. In the absence of a particular // option a default is used. Refer to the various Options for details. @@ -377,7 +390,15 @@ func Run(ctx context.Context, repo clients.Repo, opts ...Option) (Result, error) } case *githubrepo.Repo: if c.client == nil { - c.client = githubrepo.CreateGithubRepoClient(ctx, logger) + var opts []githubrepo.Option + if c.gitMode { + opts = append(opts, githubrepo.WithGitMode()) + } + client, err := githubrepo.NewRepoClient(ctx, opts...) + if err != nil { + return Result{}, fmt.Errorf("creating github client: %w", err) + } + c.client = client } case *gitlabrepo.Repo: if c.client == nil { From b0f91b3e530967b100310b9840e580e4d5ea8da2 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Thu, 2 Jan 2025 09:46:18 -0700 Subject: [PATCH 05/10] move git file handler to internal package This will allow sharing with GitLab in a followup PR Signed-off-by: Spencer Schrock --- clients/githubrepo/client.go | 30 ++++++-- .../git.go => internal/gitfile/gitfile.go | 76 ++++++++++--------- 2 files changed, 62 insertions(+), 44 deletions(-) rename clients/githubrepo/git.go => internal/gitfile/gitfile.go (63%) diff --git a/clients/githubrepo/client.go b/clients/githubrepo/client.go index fee13b63a7e..91941141747 100644 --- a/clients/githubrepo/client.go +++ b/clients/githubrepo/client.go @@ -31,6 +31,7 @@ import ( "github.com/ossf/scorecard/v5/clients" "github.com/ossf/scorecard/v5/clients/githubrepo/roundtripper" sce "github.com/ossf/scorecard/v5/errors" + "github.com/ossf/scorecard/v5/internal/gitfile" "github.com/ossf/scorecard/v5/log" ) @@ -59,7 +60,7 @@ type Client struct { webhook *webhookHandler languages *languagesHandler licenses *licensesHandler - git *gitHandler + git *gitfile.Handler ctx context.Context tarball tarballHandler commitDepth int @@ -114,7 +115,7 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitD } if client.gitMode { - client.git.init(client.ctx, client.repo, commitSHA) + client.git.Init(client.ctx, client.repo.GetCloneURL(), commitSHA) } else { // Init tarballHandler. client.tarball.init(client.ctx, client.repo, commitSHA) @@ -171,7 +172,11 @@ func (client *Client) URI() string { // LocalPath implements RepoClient.LocalPath. func (client *Client) LocalPath() (string, error) { if client.gitMode { - return client.git.getLocalPath() + path, err := client.git.GetLocalPath() + if err != nil { + return "", fmt.Errorf("git local path: %w", err) + } + return path, nil } return client.tarball.getLocalPath() } @@ -179,7 +184,11 @@ func (client *Client) LocalPath() (string, error) { // ListFiles implements RepoClient.ListFiles. func (client *Client) ListFiles(predicate func(string) (bool, error)) ([]string, error) { if client.gitMode { - return client.git.listFiles(predicate) + files, err := client.git.ListFiles(predicate) + if err != nil { + return nil, fmt.Errorf("git listfiles: %w", err) + } + return files, nil } return client.tarball.listFiles(predicate) } @@ -187,7 +196,11 @@ func (client *Client) ListFiles(predicate func(string) (bool, error)) ([]string, // GetFileReader implements RepoClient.GetFileReader. func (client *Client) GetFileReader(filename string) (io.ReadCloser, error) { if client.gitMode { - return client.git.getFile(filename) + f, err := client.git.GetFile(filename) + if err != nil { + return nil, fmt.Errorf("git getfile: %w", err) + } + return f, nil } return client.tarball.getFile(filename) } @@ -306,7 +319,10 @@ func (client *Client) SearchCommits(request clients.SearchCommitsOptions) ([]cli // Close implements RepoClient.Close. func (client *Client) Close() error { if client.gitMode { - return client.git.cleanup() + if err := client.git.Cleanup(); err != nil { + return fmt.Errorf("git cleanup: %w", err) + } + return nil } return client.tarball.cleanup() } @@ -410,7 +426,7 @@ func NewRepoClient(ctx context.Context, opts ...Option) (clients.RepoClient, err httpClient: httpClient, }, gitMode: config.gitMode, - git: &gitHandler{}, + git: &gitfile.Handler{}, }, nil } diff --git a/clients/githubrepo/git.go b/internal/gitfile/gitfile.go similarity index 63% rename from clients/githubrepo/git.go rename to internal/gitfile/gitfile.go index 1ad423bf6da..2581bbeae61 100644 --- a/clients/githubrepo/git.go +++ b/internal/gitfile/gitfile.go @@ -1,4 +1,4 @@ -// Copyright 2024 OpenSSF Scorecard Authors +// Copyright 2025 OpenSSF Scorecard Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -package githubrepo +// Package gitfile defines functionality to list and fetch files after temporarily cloning a git repo. +package gitfile import ( "context" @@ -26,14 +27,15 @@ import ( "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" - "github.com/google/go-github/v53/github" "github.com/ossf/scorecard/v5/clients" ) -var errPathTraversal = errors.New("requested file outside repo directory") +var errPathTraversal = errors.New("requested file outside repo") -type gitHandler struct { +const repoDir = "repo*" + +type Handler struct { errSetup error ctx context.Context once *sync.Once @@ -43,67 +45,67 @@ type gitHandler struct { commitSHA string } -func (g *gitHandler) init(ctx context.Context, repo *github.Repository, commitSHA string) { - g.errSetup = nil - g.once = new(sync.Once) - g.ctx = ctx - g.cloneURL = repo.GetCloneURL() - g.commitSHA = commitSHA +func (h *Handler) Init(ctx context.Context, cloneURL, commitSHA string) { + h.errSetup = nil + h.once = new(sync.Once) + h.ctx = ctx + h.cloneURL = cloneURL + h.commitSHA = commitSHA } -func (g *gitHandler) setup() error { - g.once.Do(func() { +func (h *Handler) setup() error { + h.once.Do(func() { tempDir, err := os.MkdirTemp("", repoDir) if err != nil { - g.errSetup = err + h.errSetup = err return } - g.tempDir = tempDir - g.gitRepo, err = git.PlainClone(g.tempDir, false, &git.CloneOptions{ - URL: g.cloneURL, + h.tempDir = tempDir + h.gitRepo, err = git.PlainClone(h.tempDir, false, &git.CloneOptions{ + URL: h.cloneURL, // TODO: auth may be required for private repos Depth: 1, // currently only use the git repo for files, dont need history SingleBranch: true, }) if err != nil { - g.errSetup = err + h.errSetup = err return } // assume the commit SHA is reachable from the default branch // this isn't as flexible as the tarball handler, but good enough for now - if g.commitSHA != clients.HeadSHA { - wt, err := g.gitRepo.Worktree() + if h.commitSHA != clients.HeadSHA { + wt, err := h.gitRepo.Worktree() if err != nil { - g.errSetup = err + h.errSetup = err return } - if err := wt.Checkout(&git.CheckoutOptions{Hash: plumbing.NewHash(g.commitSHA)}); err != nil { - g.errSetup = fmt.Errorf("checkout specified commit: %w", err) + if err := wt.Checkout(&git.CheckoutOptions{Hash: plumbing.NewHash(h.commitSHA)}); err != nil { + h.errSetup = fmt.Errorf("checkout specified commit: %w", err) return } } }) - return g.errSetup + return h.errSetup } -func (g *gitHandler) getLocalPath() (string, error) { - if err := g.setup(); err != nil { +func (h *Handler) GetLocalPath() (string, error) { + if err := h.setup(); err != nil { return "", fmt.Errorf("setup: %w", err) } - return g.tempDir, nil + return h.tempDir, nil } -func (g *gitHandler) listFiles(predicate func(string) (bool, error)) ([]string, error) { - if err := g.setup(); err != nil { +func (h *Handler) ListFiles(predicate func(string) (bool, error)) ([]string, error) { + if err := h.setup(); err != nil { return nil, fmt.Errorf("setup: %w", err) } - ref, err := g.gitRepo.Head() + ref, err := h.gitRepo.Head() if err != nil { return nil, fmt.Errorf("git.Head: %w", err) } - commit, err := g.gitRepo.CommitObject(ref.Hash()) + commit, err := h.gitRepo.CommitObject(ref.Hash()) if err != nil { return nil, fmt.Errorf("git.CommitObject: %w", err) } @@ -132,14 +134,14 @@ func (g *gitHandler) listFiles(predicate func(string) (bool, error)) ([]string, return files, nil } -func (g *gitHandler) getFile(filename string) (*os.File, error) { - if err := g.setup(); err != nil { +func (h *Handler) GetFile(filename string) (*os.File, error) { + if err := h.setup(); err != nil { return nil, fmt.Errorf("setup: %w", err) } // check for path traversal - path := filepath.Join(g.tempDir, filename) - if !strings.HasPrefix(path, filepath.Clean(g.tempDir)+string(os.PathSeparator)) { + path := filepath.Join(h.tempDir, filename) + if !strings.HasPrefix(path, filepath.Clean(h.tempDir)+string(os.PathSeparator)) { return nil, errPathTraversal } @@ -150,8 +152,8 @@ func (g *gitHandler) getFile(filename string) (*os.File, error) { return f, nil } -func (g *gitHandler) cleanup() error { - if err := os.RemoveAll(g.tempDir); err != nil && !os.IsNotExist(err) { +func (h *Handler) Cleanup() error { + if err := os.RemoveAll(h.tempDir); err != nil && !os.IsNotExist(err) { return fmt.Errorf("os.Remove: %w", err) } return nil From ff33e0c26036a28102e03a4835aa0750d88a4a0f Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Thu, 2 Jan 2025 10:46:51 -0700 Subject: [PATCH 06/10] add a test Signed-off-by: Spencer Schrock --- internal/gitfile/gitfile_test.go | 110 +++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 internal/gitfile/gitfile_test.go diff --git a/internal/gitfile/gitfile_test.go b/internal/gitfile/gitfile_test.go new file mode 100644 index 00000000000..2592ddb2348 --- /dev/null +++ b/internal/gitfile/gitfile_test.go @@ -0,0 +1,110 @@ +// Copyright 2025 OpenSSF Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gitfile + +import ( + "context" + "io" + "os" + "path/filepath" + "testing" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/google/go-cmp/cmp" +) + +func TestHandler(t *testing.T) { + t.Parallel() + + var ( + want = []string{"example.txt"} + wantContents = []byte("hello world!") + ) + + dir := setupGitRepo(t) + + var h Handler + h.Init(context.Background(), dir, "HEAD") + + files, err := h.ListFiles(allFiles) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if d := cmp.Diff(want, files); d != "" { + t.Errorf("-got,+want: %s", d) + } + + r, err := h.GetFile("example.txt") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + t.Cleanup(func() { r.Close() }) + + contents, err := io.ReadAll(r) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if d := cmp.Diff(wantContents, contents); d != "" { + t.Errorf("-got,+want: %s", d) + } + + err = h.Cleanup() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func setupGitRepo(t *testing.T) string { + t.Helper() + + dir := t.TempDir() + r, err := git.PlainInitWithOptions(dir, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + w, err := r.Worktree() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + filename := filepath.Join(dir, "example.txt") + + if err = os.WriteFile(filename, []byte("hello world!"), 0o600); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, err = w.Add("example.txt"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + _, err = w.Commit("commit message", &git.CommitOptions{ + Author: &object.Signature{ + Name: "John Doe", + Email: "john@doe.org", + }, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + return dir +} + +func allFiles(path string) (bool, error) { + return true, nil +} From 498e3c5d6cef5809ac76b49b540a99a475cbcc1e Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Thu, 2 Jan 2025 10:59:22 -0700 Subject: [PATCH 07/10] use new toplevel gitmode argument also moves a func around for smaller PR diff. Signed-off-by: Spencer Schrock --- clients/githubrepo/client.go | 14 +++++++------- cmd/root.go | 7 ++----- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/clients/githubrepo/client.go b/clients/githubrepo/client.go index 91941141747..1c7782bb091 100644 --- a/clients/githubrepo/client.go +++ b/clients/githubrepo/client.go @@ -334,13 +334,6 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp return rc } -// CreateGithubRepoClient returns a Client which implements RepoClient interface. -func CreateGithubRepoClient(ctx context.Context, logger *log.Logger) clients.RepoClient { - // Use our custom roundtripper - rt := roundtripper.NewTransport(ctx, logger) - return CreateGithubRepoClientWithTransport(ctx, rt) -} - // NewRepoClient returns a Client which implements RepoClient interface. // It can be configured with various [Option]s. func NewRepoClient(ctx context.Context, opts ...Option) (clients.RepoClient, error) { @@ -430,6 +423,13 @@ func NewRepoClient(ctx context.Context, opts ...Option) (clients.RepoClient, err }, nil } +// CreateGithubRepoClient returns a Client which implements RepoClient interface. +func CreateGithubRepoClient(ctx context.Context, logger *log.Logger) clients.RepoClient { + // Use our custom roundtripper + rt := roundtripper.NewTransport(ctx, logger) + return CreateGithubRepoClientWithTransport(ctx, rt) +} + // CreateOssFuzzRepoClient returns a RepoClient implementation // initialized to `google/oss-fuzz` GitHub repository. // diff --git a/cmd/root.go b/cmd/root.go index cb08906f2fa..a4e27cfea29 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -155,12 +155,9 @@ func rootCmd(o *options.Options) error { scorecard.WithChecks(checks), } if o.GitMode { - rc, err := githubrepo.NewRepoClient(ctx, githubrepo.WithGitMode()) - if err != nil { - return fmt.Errorf("enabling github git mode: %w", err) - } - opts = append(opts, scorecard.WithRepoClient(rc)) + opts = append(opts, scorecard.WithGitMode()) } + repoResult, err = scorecard.Run(ctx, repo, opts...) if err != nil { return fmt.Errorf("scorecard.Run: %w", err) From 0db42459ce59786a8559785f090501eaeba3bf15 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Thu, 2 Jan 2025 11:03:25 -0700 Subject: [PATCH 08/10] add path traversal test Signed-off-by: Spencer Schrock --- internal/gitfile/gitfile_test.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/internal/gitfile/gitfile_test.go b/internal/gitfile/gitfile_test.go index 2592ddb2348..01ecb144e0b 100644 --- a/internal/gitfile/gitfile_test.go +++ b/internal/gitfile/gitfile_test.go @@ -16,6 +16,7 @@ package gitfile import ( "context" + "errors" "io" "os" "path/filepath" @@ -69,6 +70,19 @@ func TestHandler(t *testing.T) { } } +func TestHandlerPathTraversal(t *testing.T) { + t.Parallel() + dir := setupGitRepo(t) + + var h Handler + h.Init(context.Background(), dir, "HEAD") + + _, err := h.GetFile("../example.txt") + if !errors.Is(err, errPathTraversal) { + t.Fatalf("expected path traversal error: got %v", err) + } +} + func setupGitRepo(t *testing.T) string { t.Helper() From 08a0b7ac384f99814e471092d02cd430748d01cd Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Wed, 8 Jan 2025 09:27:24 -0700 Subject: [PATCH 09/10] change flag to file-mode Signed-off-by: Spencer Schrock --- clients/githubrepo/client.go | 6 ++--- cmd/root.go | 4 ++-- options/flags.go | 15 +++++++------ options/options.go | 43 +++++++++++++++++++++++++----------- options/options_test.go | 25 +++++++++++++++++++++ pkg/scorecard/scorecard.go | 10 ++++----- 6 files changed, 73 insertions(+), 30 deletions(-) diff --git a/clients/githubrepo/client.go b/clients/githubrepo/client.go index 1c7782bb091..1298924c088 100644 --- a/clients/githubrepo/client.go +++ b/clients/githubrepo/client.go @@ -67,8 +67,8 @@ type Client struct { gitMode bool } -// WithGitMode configures the repo client to fetch files using git. -func WithGitMode() Option { +// WithFileModeGit configures the repo client to fetch files using git. +func WithFileModeGit() Option { return func(c *repoClientConfig) error { c.gitMode = true return nil @@ -263,7 +263,7 @@ func (client *Client) GetOrgRepoClient(ctx context.Context) (clients.RepoClient, options := []Option{WithRoundTripper(client.repoClient.Client().Transport)} if client.gitMode { - options = append(options, WithGitMode()) + options = append(options, WithFileModeGit()) } c, err := NewRepoClient(ctx, options...) if err != nil { diff --git a/cmd/root.go b/cmd/root.go index a4e27cfea29..23299e03edb 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -154,8 +154,8 @@ func rootCmd(o *options.Options) error { scorecard.WithProbes(enabledProbes), scorecard.WithChecks(checks), } - if o.GitMode { - opts = append(opts, scorecard.WithGitMode()) + if strings.EqualFold(o.FileMode, options.FileModeGit) { + opts = append(opts, scorecard.WithFileModeGit()) } repoResult, err = scorecard.Run(ctx, repo, opts...) diff --git a/options/flags.go b/options/flags.go index 7f35e6e49c5..7ad94116c23 100644 --- a/options/flags.go +++ b/options/flags.go @@ -54,8 +54,8 @@ const ( // FlagShowDetails is the flag name for outputting additional check info. FlagShowDetails = "show-details" - // Flag FlagGitMode is the flag name for enabling git compatibility mode. - FlagGitMode = "git-mode" + // Flag FlagFileMode is the flag name for specifying how files are fetched for a repository. + FlagFileMode = "file-mode" // FlagShowAnnotations is the flag name for outputting annotations on checks. FlagShowAnnotations = "show-annotations" @@ -226,10 +226,11 @@ func (o *Options) AddFlags(cmd *cobra.Command) { "output file", ) - cmd.Flags().BoolVar( - &o.GitMode, - FlagGitMode, - o.GitMode, - "fetch repository files using git for maximum compatibility", + allowedModes := []string{FileModeArchive, FileModeGit} + cmd.Flags().StringVar( + &o.FileMode, + FlagFileMode, + o.FileMode, + fmt.Sprintf("mode to fetch repository files: %s", strings.Join(allowedModes, ", ")), ) } diff --git a/options/options.go b/options/options.go index 086257fcd69..5a646886735 100644 --- a/options/options.go +++ b/options/options.go @@ -41,13 +41,13 @@ type Options struct { Nuget string PolicyFile string ResultsFile string + FileMode string ChecksToRun []string ProbesToRun []string Metadata []string CommitDepth int ShowDetails bool ShowAnnotations bool - GitMode bool // Feature flags. EnableSarif bool `env:"ENABLE_SARIF"` EnableScorecardV6 bool `env:"SCORECARD_V6"` @@ -56,21 +56,15 @@ type Options struct { // New creates a new instance of `Options`. func New() *Options { - opts := &Options{} + opts := &Options{ + Commit: DefaultCommit, + Format: FormatDefault, + LogLevel: DefaultLogLevel, + FileMode: FileModeArchive, + } if err := env.Parse(opts); err != nil { log.Printf("could not parse env vars, using default options: %v", err) } - // Defaulting. - // TODO(options): Consider moving this to a separate function/method. - if opts.Commit == "" { - opts.Commit = DefaultCommit - } - if opts.Format == "" { - opts.Format = FormatDefault - } - if opts.LogLevel == "" { - opts.LogLevel = DefaultLogLevel - } return opts } @@ -90,6 +84,12 @@ const ( // FormatRaw specifies that results should be output in raw format. FormatRaw = "raw" + // File Modes + // FileModeGit specifies that files should be fetched using git. + FileModeGit = "git" + // FileModeArchive specifies that files should be fetched using the export archive (tarball). + FileModeArchive = "archive" + // Environment variables. // EnvVarEnableSarif is the environment variable which controls enabling // SARIF logging. @@ -108,6 +108,7 @@ var ( errCommitIsEmpty = errors.New("commit should be non-empty") errFormatNotSupported = errors.New("unsupported format") + errFileModeNotSupported = errors.New("unsupported file mode") errPolicyFileNotSupported = errors.New("policy file is not supported yet") errRawOptionNotSupported = errors.New("raw option is not supported yet") errRepoOptionMustBeSet = errors.New( @@ -177,6 +178,13 @@ func (o *Options) Validate() error { ) } + if !validateFileMode(o.FileMode) { + errs = append( + errs, + errFileModeNotSupported, + ) + } + if len(errs) != 0 { return fmt.Errorf( "%w: %+v", @@ -253,3 +261,12 @@ func validateFormat(format string) bool { return false } } + +func validateFileMode(mode string) bool { + switch strings.ToLower(mode) { + case FileModeGit, FileModeArchive: + return true + default: + return false + } +} diff --git a/options/options_test.go b/options/options_test.go index 89a91aeb8bd..cf205f1cb8d 100644 --- a/options/options_test.go +++ b/options/options_test.go @@ -32,6 +32,7 @@ func TestOptions_Validate(t *testing.T) { Nuget string PolicyFile string ResultsFile string + FileMode string ChecksToRun []string Metadata []string ShowDetails bool @@ -84,9 +85,32 @@ func TestOptions_Validate(t *testing.T) { }, wantErr: true, }, + { + name: "invalid filemode flagged", + fields: fields{ + Repo: "github.com/oss/scorecard", + Commit: "HEAD", + Format: "default", + FileMode: "unsupported mode", + }, + wantErr: true, + }, + { + name: "git filemode is valid", + fields: fields{ + Repo: "github.com/oss/scorecard", + Commit: "HEAD", + Format: "default", + FileMode: FileModeGit, + }, + wantErr: false, + }, } for _, tt := range tests { tt := tt + if tt.fields.FileMode == "" { + tt.fields.FileMode = FileModeArchive + } t.Run(tt.name, func(t *testing.T) { o := &Options{ Repo: tt.fields.Repo, @@ -94,6 +118,7 @@ func TestOptions_Validate(t *testing.T) { Commit: tt.fields.Commit, LogLevel: tt.fields.LogLevel, Format: tt.fields.Format, + FileMode: tt.fields.FileMode, NPM: tt.fields.NPM, PyPI: tt.fields.PyPI, RubyGems: tt.fields.RubyGems, diff --git a/pkg/scorecard/scorecard.go b/pkg/scorecard/scorecard.go index a3ae568be4c..007d0671128 100644 --- a/pkg/scorecard/scorecard.go +++ b/pkg/scorecard/scorecard.go @@ -341,12 +341,12 @@ func WithOpenSSFBestPraticesClient(client clients.CIIBestPracticesClient) Option } } -// WithGitMode will configure supporting repository clients to download files -// using git, instead of the archive tarball. This is useful for repositories -// which "export-ignore" files in a .gitattributes file. +// WithFileModeGit will configure supporting repository clients to download files +// using git. This is useful for repositories which "export-ignore" files in its +// .gitattributes file. // // Repository analysis may be slower. -func WithGitMode() Option { +func WithFileModeGit() Option { return func(c *runConfig) error { c.gitMode = true return nil @@ -392,7 +392,7 @@ func Run(ctx context.Context, repo clients.Repo, opts ...Option) (Result, error) if c.client == nil { var opts []githubrepo.Option if c.gitMode { - opts = append(opts, githubrepo.WithGitMode()) + opts = append(opts, githubrepo.WithFileModeGit()) } client, err := githubrepo.NewRepoClient(ctx, opts...) if err != nil { From 60e05c56605ac4b74d1efc120696e23d8ee3ef16 Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Tue, 11 Feb 2025 18:56:46 -0700 Subject: [PATCH 10/10] fix repo typo in options test the value isn't used to connect to anything though. Signed-off-by: Spencer Schrock --- options/options_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/options/options_test.go b/options/options_test.go index cf205f1cb8d..caf54c3ca4f 100644 --- a/options/options_test.go +++ b/options/options_test.go @@ -59,7 +59,7 @@ func TestOptions_Validate(t *testing.T) { { name: "format sarif and the enable sarif flag is set", fields: fields{ - Repo: "github.com/oss/scorecard", + Repo: "github.com/ossf/scorecard", Commit: "HEAD", Format: "sarif", EnableSarif: true, @@ -70,7 +70,7 @@ func TestOptions_Validate(t *testing.T) { { name: "format sarif and the disabled but the policy file is set", fields: fields{ - Repo: "github.com/oss/scorecard", + Repo: "github.com/ossf/scorecard", Commit: "HEAD", PolicyFile: "testdata/policy.yaml", }, @@ -79,7 +79,7 @@ func TestOptions_Validate(t *testing.T) { { name: "format raw is not supported when V6 is not enabled", fields: fields{ - Repo: "github.com/oss/scorecard", + Repo: "github.com/ossf/scorecard", Commit: "HEAD", Format: "raw", }, @@ -88,7 +88,7 @@ func TestOptions_Validate(t *testing.T) { { name: "invalid filemode flagged", fields: fields{ - Repo: "github.com/oss/scorecard", + Repo: "github.com/ossf/scorecard", Commit: "HEAD", Format: "default", FileMode: "unsupported mode", @@ -98,7 +98,7 @@ func TestOptions_Validate(t *testing.T) { { name: "git filemode is valid", fields: fields{ - Repo: "github.com/oss/scorecard", + Repo: "github.com/ossf/scorecard", Commit: "HEAD", Format: "default", FileMode: FileModeGit,