From ac3eb582626d623628c1b67d7bfba6d5cc865191 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Mon, 21 Oct 2024 14:19:54 +0100 Subject: [PATCH 1/2] git: export gitutil helper for identifying commit shas Signed-off-by: Justin Chadwell --- source/git/source.go | 19 +++++++------------ util/gitutil/git_commit.go | 9 +++++++++ 2 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 util/gitutil/git_commit.go diff --git a/source/git/source.go b/source/git/source.go index be14d3fb55f7..dd45b828c82b 100644 --- a/source/git/source.go +++ b/source/git/source.go @@ -36,7 +36,6 @@ import ( "google.golang.org/grpc/status" ) -var validHex = regexp.MustCompile(`^[a-f0-9]{40}$`) var defaultBranch = regexp.MustCompile(`refs/heads/(\S+)`) type Opt struct { @@ -341,7 +340,7 @@ func (gs *gitSourceHandler) CacheKey(ctx context.Context, g session.Group, index gs.locker.Lock(remote) defer gs.locker.Unlock(remote) - if ref := gs.src.Ref; ref != "" && isCommitSHA(ref) { + if ref := gs.src.Ref; ref != "" && gitutil.IsCommitSHA(ref) { cacheKey := gs.shaToCacheKey(ref) gs.cacheKey = cacheKey return cacheKey, ref, nil, true, nil @@ -400,7 +399,7 @@ func (gs *gitSourceHandler) CacheKey(ctx context.Context, g session.Group, index if sha == "" { return "", "", nil, false, errors.Errorf("repository does not contain ref %s, output: %q", ref, string(buf)) } - if !isCommitSHA(sha) { + if !gitutil.IsCommitSHA(sha) { return "", "", nil, false, errors.Errorf("invalid commit sha %q", sha) } @@ -455,7 +454,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out } doFetch := true - if isCommitSHA(ref) { + if gitutil.IsCommitSHA(ref) { // skip fetch if commit already exists if _, err := git.Run(ctx, "cat-file", "-e", ref+"^{commit}"); err == nil { doFetch = false @@ -467,7 +466,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out os.RemoveAll(filepath.Join(gitDir, "shallow.lock")) args := []string{"fetch"} - if !isCommitSHA(ref) { // TODO: find a branch from ls-remote? + if !gitutil.IsCommitSHA(ref) { // TODO: find a branch from ls-remote? args = append(args, "--depth=1", "--no-tags") } else { args = append(args, "--tags") @@ -476,11 +475,11 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out } } args = append(args, "origin") - if !isCommitSHA(ref) { - args = append(args, "--force", ref+":tags/"+ref) + if !gitutil.IsCommitSHA(ref) { // local refs are needed so they would be advertised on next fetches. Force is used // in case the ref is a branch and it now points to a different commit sha // TODO: is there a better way to do this? + args = append(args, "--force", ref+":tags/"+ref) } if _, err := git.Run(ctx, args...); err != nil { return nil, errors.Wrapf(err, "failed to fetch remote %s", urlutil.RedactCredentials(gs.src.Remote)) @@ -549,7 +548,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out pullref := ref if isAnnotatedTag { pullref += ":refs/tags/" + pullref - } else if isCommitSHA(ref) { + } else if gitutil.IsCommitSHA(ref) { pullref = "refs/buildkit/" + identity.NewID() _, err = git.Run(ctx, "update-ref", pullref, ref) if err != nil { @@ -710,10 +709,6 @@ func (gs *gitSourceHandler) gitCli(ctx context.Context, g session.Group, opts .. return gitCLI(opts...), cleanup, err } -func isCommitSHA(str string) bool { - return validHex.MatchString(str) -} - func tokenScope(remote string) string { // generally we can only use the token for fetching main remote but in case of github.com we do best effort // to try reuse same token for all github.com remotes. This is the same behavior actions/checkout uses diff --git a/util/gitutil/git_commit.go b/util/gitutil/git_commit.go new file mode 100644 index 000000000000..e139d2c189ec --- /dev/null +++ b/util/gitutil/git_commit.go @@ -0,0 +1,9 @@ +package gitutil + +import "regexp" + +var validHex = regexp.MustCompile(`^[a-f0-9]{40}$`) + +func IsCommitSHA(str string) bool { + return validHex.MatchString(str) +} From 90d2d8b1c6724651b6b3fa86474352ac142e2a21 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Mon, 21 Oct 2024 14:22:28 +0100 Subject: [PATCH 2/2] git: allow cloning commit shas not referenced by branch/tag Signed-off-by: Justin Chadwell --- source/git/source.go | 4 +++- source/git/source_test.go | 47 ++++++++++++++++++++++++++++++++++---- util/gitutil/git_cli.go | 24 +++++++++++++++++++ util/gitutil/git_commit.go | 18 +++++++++++---- 4 files changed, 83 insertions(+), 10 deletions(-) diff --git a/source/git/source.go b/source/git/source.go index dd45b828c82b..cb56eeed5986 100644 --- a/source/git/source.go +++ b/source/git/source.go @@ -475,7 +475,9 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out } } args = append(args, "origin") - if !gitutil.IsCommitSHA(ref) { + if gitutil.IsCommitSHA(ref) { + args = append(args, ref) + } else { // local refs are needed so they would be advertised on next fetches. Force is used // in case the ref is a branch and it now points to a different commit sha // TODO: is there a better way to do this? diff --git a/source/git/source_test.go b/source/git/source_test.go index 59a0fdd83e4f..5955e697848c 100644 --- a/source/git/source_test.go +++ b/source/git/source_test.go @@ -218,15 +218,31 @@ func testFetchBySHA(t *testing.T, keepGitDir bool) { } func TestFetchUnreferencedTagSha(t *testing.T) { - testFetchUnreferencedTagSha(t, false) + testFetchUnreferencedRefSha(t, "v1.2.3-special", false) } func TestFetchUnreferencedTagShaKeepGitDir(t *testing.T) { - testFetchUnreferencedTagSha(t, true) + testFetchUnreferencedRefSha(t, "v1.2.3-special", true) } -// testFetchUnreferencedTagSha tests fetching a SHA that points to a tag that is not reachable from any branch. -func testFetchUnreferencedTagSha(t *testing.T, keepGitDir bool) { +func TestFetchUnreferencedRefSha(t *testing.T) { + testFetchUnreferencedRefSha(t, "refs/special", false) +} + +func TestFetchUnreferencedRefShaKeepGitDir(t *testing.T) { + testFetchUnreferencedRefSha(t, "refs/special", true) +} + +func TestFetchUnadvertisedRefSha(t *testing.T) { + testFetchUnreferencedRefSha(t, "refs/special~", false) +} + +func TestFetchUnadvertisedRefShaKeepGitDir(t *testing.T) { + testFetchUnreferencedRefSha(t, "refs/special~", true) +} + +// testFetchUnreferencedRefSha tests fetching a SHA that points to a ref that is not reachable from any branch. +func testFetchUnreferencedRefSha(t *testing.T, ref string, keepGitDir bool) { if runtime.GOOS == "windows" { t.Skip("Depends on unimplemented containerd bind-mount support on Windows") } @@ -239,7 +255,7 @@ func testFetchUnreferencedTagSha(t *testing.T, keepGitDir bool) { repo := setupGitRepo(t) - cmd := exec.Command("git", "rev-parse", "v1.2.3-special") + cmd := exec.Command("git", "rev-parse", ref) cmd.Dir = repo.mainPath out, err := cmd.Output() @@ -691,6 +707,8 @@ func setupGitRepo(t *testing.T) gitRepoFixture { // * (refs/heads/feature) withsub // * feature // * (HEAD -> refs/heads/master, tag: refs/tags/lightweight-tag) third + // | * ref only + // | * commit only // | * (tag: refs/tags/v1.2.3-special) tagonly-leaf // |/ // * (tag: refs/tags/v1.2.3) second @@ -699,35 +717,53 @@ func setupGitRepo(t *testing.T) gitRepoFixture { "git -c init.defaultBranch=master init", "git config --local user.email test", "git config --local user.name test", + "echo foo > abc", "git add abc", "git commit -m initial", "git tag --no-sign a/v1.2.3", + "echo bar > def", "mkdir subdir", "echo subcontents > subdir/subfile", "git add def subdir", "git commit -m second", "git tag -a -m \"this is an annotated tag\" v1.2.3", + "echo foo > bar", "git add bar", "git commit -m tagonly-leaf", "git tag --no-sign v1.2.3-special", + + "echo foo2 > bar2", + "git add bar2", + "git commit -m \"commit only\"", + "echo foo3 > bar3", + "git add bar3", + "git commit -m \"ref only\"", + "git update-ref refs/special $(git rev-parse HEAD)", + // switch master back to v1.2.3 "git checkout -B master v1.2.3", + "echo sbb > foo13", "git add foo13", "git commit -m third", "git tag --no-sign lightweight-tag", + "git checkout -B feature", + "echo baz > ghi", "git add ghi", "git commit -m feature", "git update-ref refs/test $(git rev-parse HEAD)", + "git submodule add "+fixture.subURL+" sub", "git add -A", "git commit -m withsub", + "git checkout master", + // "git log --oneline --graph --decorate=full --all", ) return fixture @@ -785,6 +821,7 @@ func runShell(t *testing.T, dir string, cmds ...string) { cmd = exec.Command("sh", "-c", args) } cmd.Dir = dir + // cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr require.NoErrorf(t, cmd.Run(), "error running %v", args) } diff --git a/util/gitutil/git_cli.go b/util/gitutil/git_cli.go index 5c35f9365b73..1e497d79994c 100644 --- a/util/gitutil/git_cli.go +++ b/util/gitutil/git_cli.go @@ -218,6 +218,14 @@ func (cli *GitCLI) Run(ctx context.Context, args ...string) (_ []byte, err error continue } } + if strings.Contains(errbuf.String(), "not our ref") || strings.Contains(errbuf.String(), "unadvertised object") { + // server-side error: https://github.com/git/git/blob/34b6ce9b30747131b6e781ff718a45328aa887d0/upload-pack.c#L811-L812 + // client-side error: https://github.com/git/git/blob/34b6ce9b30747131b6e781ff718a45328aa887d0/fetch-pack.c#L2250-L2253 + if newArgs := argsNoCommitRefspec(args); len(args) > len(newArgs) { + args = newArgs + continue + } + } return buf.Bytes(), errors.Wrapf(err, "git stderr:\n%s", errbuf.String()) } @@ -244,3 +252,19 @@ func argsNoDepth(args []string) []string { } return out } + +func argsNoCommitRefspec(args []string) []string { + if len(args) <= 2 { + return args + } + if args[0] != "fetch" { + return args + } + + // assume the refspec is the last arg + if IsCommitSHA(args[len(args)-1]) { + return args[:len(args)-1] + } + + return args +} diff --git a/util/gitutil/git_commit.go b/util/gitutil/git_commit.go index e139d2c189ec..8049e7e2d9b2 100644 --- a/util/gitutil/git_commit.go +++ b/util/gitutil/git_commit.go @@ -1,9 +1,19 @@ package gitutil -import "regexp" +func IsCommitSHA(str string) bool { + if len(str) != 40 { + return false + } -var validHex = regexp.MustCompile(`^[a-f0-9]{40}$`) + for _, ch := range str { + if ch >= '0' && ch <= '9' { + continue + } + if ch >= 'a' && ch <= 'f' { + continue + } + return false + } -func IsCommitSHA(str string) bool { - return validHex.MatchString(str) + return true }