From 4c848197946b0d67c82747075e6c81cd79cd1c71 Mon Sep 17 00:00:00 2001 From: Joseph Ferguson Date: Fri, 21 Jun 2024 17:16:59 -0700 Subject: [PATCH] Backport patch for buildkit to fetch by commit --- buildkit/Dockerfile.0.13 | 1 + buildkit/Dockerfile.0.16 | 1 + buildkit/Dockerfile.template | 1 + .../backport-5441-fetch-by-commit-0.13.patch | 152 ++++++++++++++++++ buildkit/backport-5441-fetch-by-commit.patch | 152 ++++++++++++++++++ 5 files changed, 307 insertions(+) create mode 100644 buildkit/backport-5441-fetch-by-commit-0.13.patch create mode 100644 buildkit/backport-5441-fetch-by-commit.patch diff --git a/buildkit/Dockerfile.0.13 b/buildkit/Dockerfile.0.13 index 0c47fa43030..7f7be5845c3 100644 --- a/buildkit/Dockerfile.0.13 +++ b/buildkit/Dockerfile.0.13 @@ -14,6 +14,7 @@ COPY \ backport-5072-fetch-tags.patch \ backport-5096-fix-umask.patch \ backport-5372-sbom-args.patch \ + backport-5441-fetch-by-commit-0.13.patch \ backport-moby-48455-fix-riscv64-seccomp.patch \ containerd-arm64-v8-pre-0.15.patch \ git-no-submodules.patch \ diff --git a/buildkit/Dockerfile.0.16 b/buildkit/Dockerfile.0.16 index 7a7b1c2890b..8ac5e6afbe0 100644 --- a/buildkit/Dockerfile.0.16 +++ b/buildkit/Dockerfile.0.16 @@ -11,6 +11,7 @@ ENV BUILDKIT_VERSION 0.16.0 COPY \ backport-5372-sbom-args.patch \ + backport-5441-fetch-by-commit.patch \ backport-moby-48455-fix-riscv64-seccomp.patch \ containerd-arm64-v8.patch \ git-no-submodules.patch \ diff --git a/buildkit/Dockerfile.template b/buildkit/Dockerfile.template index 39090997938..8293676462d 100644 --- a/buildkit/Dockerfile.template +++ b/buildkit/Dockerfile.template @@ -28,6 +28,7 @@ COPY \ "backport-5072-fetch-tags.patch": { until: "0.15" }, "backport-5096-fix-umask.patch": { until: "0.15" }, "backport-5372-sbom-args.patch": { until: "0.17" }, + "backport-5441-fetch-by-commit.patch": { until: "0.17", older: { "0.16": "backport-5441-fetch-by-commit-0.13.patch" } }, "backport-moby-48455-fix-riscv64-seccomp.patch": { until: "0.17" }, "containerd-arm64-v8.patch": { older: { "0.15": "containerd-arm64-v8-pre-0.15.patch" } }, "git-no-submodules.patch": { }, diff --git a/buildkit/backport-5441-fetch-by-commit-0.13.patch b/buildkit/backport-5441-fetch-by-commit-0.13.patch new file mode 100644 index 00000000000..29db6a80b90 --- /dev/null +++ b/buildkit/backport-5441-fetch-by-commit-0.13.patch @@ -0,0 +1,152 @@ +Description: git: allow cloning commit shas not referenced by branch/tag +Author: Justin Chadwell (+ backporting by Tianon Gravi ) +Applied-Upstream: https://github.com/moby/buildkit/pull/5441; 0.17+ + +diff --git a/source/git/source.go b/source/git/source.go +index 63dfa66a8..005907bf3 100644 +--- a/source/git/source.go ++++ b/source/git/source.go +@@ -36,7 +36,6 @@ import ( + "google.golang.org/grpc/status" + ) + +-var validHex = regexp.MustCompile(`^[a-f0-9]{40}$`) + var defaultBranch = regexp.MustCompile(`refs/heads/(\S+)`) + + type Opt struct { +@@ -341,7 +340,7 @@ func (gs *gitSourceHandler) CacheKey(ctx context.Context, g session.Group, index + gs.locker.Lock(remote) + defer gs.locker.Unlock(remote) + +- if ref := gs.src.Ref; ref != "" && isCommitSHA(ref) { ++ if ref := gs.src.Ref; ref != "" && gitutil.IsCommitSHA(ref) { + cacheKey := gs.shaToCacheKey(ref) + gs.cacheKey = cacheKey + return cacheKey, ref, nil, true, nil +@@ -400,7 +399,7 @@ func (gs *gitSourceHandler) CacheKey(ctx context.Context, g session.Group, index + if sha == "" { + return "", "", nil, false, errors.Errorf("repository does not contain ref %s, output: %q", ref, string(buf)) + } +- if !isCommitSHA(sha) { ++ if !gitutil.IsCommitSHA(sha) { + return "", "", nil, false, errors.Errorf("invalid commit sha %q", sha) + } + +@@ -458,7 +457,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out + } + + doFetch := true +- if isCommitSHA(ref) { ++ if gitutil.IsCommitSHA(ref) { + // skip fetch if commit already exists + if _, err := git.Run(ctx, "cat-file", "-e", ref+"^{commit}"); err == nil { + doFetch = false +@@ -470,7 +469,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out + os.RemoveAll(filepath.Join(gitDir, "shallow.lock")) + + args := []string{"fetch"} +- if !isCommitSHA(ref) { // TODO: find a branch from ls-remote? ++ if !gitutil.IsCommitSHA(ref) { // TODO: find a branch from ls-remote? + args = append(args, "--depth=1", "--no-tags") + } else { + args = append(args, "--tags") +@@ -479,11 +478,13 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out + } + } + args = append(args, "origin") +- if !isCommitSHA(ref) { +- args = append(args, "--force", ref+":tags/"+ref) ++ if gitutil.IsCommitSHA(ref) { ++ args = append(args, ref) ++ } else { + // local refs are needed so they would be advertised on next fetches. Force is used + // in case the ref is a branch and it now points to a different commit sha + // TODO: is there a better way to do this? ++ args = append(args, "--force", ref+":tags/"+ref) + } + if _, err := git.Run(ctx, args...); err != nil { + return nil, errors.Wrapf(err, "failed to fetch remote %s", urlutil.RedactCredentials(gs.src.Remote)) +@@ -552,7 +553,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out + pullref := ref + if isAnnotatedTag { + pullref += ":refs/tags/" + pullref +- } else if isCommitSHA(ref) { ++ } else if gitutil.IsCommitSHA(ref) { + pullref = "refs/buildkit/" + identity.NewID() + _, err = git.Run(ctx, "update-ref", pullref, ref) + if err != nil { +@@ -713,10 +714,6 @@ func (gs *gitSourceHandler) gitCli(ctx context.Context, g session.Group, opts .. + return gitCLI(opts...), cleanup, err + } + +-func isCommitSHA(str string) bool { +- return validHex.MatchString(str) +-} +- + func tokenScope(remote string) string { + // generally we can only use the token for fetching main remote but in case of github.com we do best effort + // to try reuse same token for all github.com remotes. This is the same behavior actions/checkout uses +diff --git a/util/gitutil/git_cli.go b/util/gitutil/git_cli.go +index 67c651458..d212952ba 100644 +--- a/util/gitutil/git_cli.go ++++ b/util/gitutil/git_cli.go +@@ -209,6 +209,14 @@ func (cli *GitCLI) Run(ctx context.Context, args ...string) (_ []byte, err error + continue + } + } ++ if strings.Contains(errbuf.String(), "not our ref") || strings.Contains(errbuf.String(), "unadvertised object") { ++ // server-side error: https://github.com/git/git/blob/34b6ce9b30747131b6e781ff718a45328aa887d0/upload-pack.c#L811-L812 ++ // client-side error: https://github.com/git/git/blob/34b6ce9b30747131b6e781ff718a45328aa887d0/fetch-pack.c#L2250-L2253 ++ if newArgs := argsNoCommitRefspec(args); len(args) > len(newArgs) { ++ args = newArgs ++ continue ++ } ++ } + return buf.Bytes(), errors.Errorf("git error: %s\nstderr:\n%s", err, errbuf.String()) + } + return buf.Bytes(), nil +@@ -234,3 +242,19 @@ func argsNoDepth(args []string) []string { + } + return out + } ++ ++func argsNoCommitRefspec(args []string) []string { ++ if len(args) <= 2 { ++ return args ++ } ++ if args[0] != "fetch" { ++ return args ++ } ++ ++ // assume the refspec is the last arg ++ if IsCommitSHA(args[len(args)-1]) { ++ return args[:len(args)-1] ++ } ++ ++ return args ++} +diff --git a/util/gitutil/git_commit.go b/util/gitutil/git_commit.go +new file mode 100644 +index 000000000..8049e7e2d +--- /dev/null ++++ b/util/gitutil/git_commit.go +@@ -0,0 +1,19 @@ ++package gitutil ++ ++func IsCommitSHA(str string) bool { ++ if len(str) != 40 { ++ return false ++ } ++ ++ for _, ch := range str { ++ if ch >= '0' && ch <= '9' { ++ continue ++ } ++ if ch >= 'a' && ch <= 'f' { ++ continue ++ } ++ return false ++ } ++ ++ return true ++} diff --git a/buildkit/backport-5441-fetch-by-commit.patch b/buildkit/backport-5441-fetch-by-commit.patch new file mode 100644 index 00000000000..e5c04d5d456 --- /dev/null +++ b/buildkit/backport-5441-fetch-by-commit.patch @@ -0,0 +1,152 @@ +Description: git: allow cloning commit shas not referenced by branch/tag +Author: Justin Chadwell +Applied-Upstream: https://github.com/moby/buildkit/pull/5441; 0.17+ + +diff --git a/source/git/source.go b/source/git/source.go +index be14d3fb55f7..cb56eeed5986 100644 +--- a/source/git/source.go ++++ b/source/git/source.go +@@ -36,7 +36,6 @@ import ( + "google.golang.org/grpc/status" + ) + +-var validHex = regexp.MustCompile(`^[a-f0-9]{40}$`) + var defaultBranch = regexp.MustCompile(`refs/heads/(\S+)`) + + type Opt struct { +@@ -341,7 +340,7 @@ func (gs *gitSourceHandler) CacheKey(ctx context.Context, g session.Group, index + gs.locker.Lock(remote) + defer gs.locker.Unlock(remote) + +- if ref := gs.src.Ref; ref != "" && isCommitSHA(ref) { ++ if ref := gs.src.Ref; ref != "" && gitutil.IsCommitSHA(ref) { + cacheKey := gs.shaToCacheKey(ref) + gs.cacheKey = cacheKey + return cacheKey, ref, nil, true, nil +@@ -400,7 +399,7 @@ func (gs *gitSourceHandler) CacheKey(ctx context.Context, g session.Group, index + if sha == "" { + return "", "", nil, false, errors.Errorf("repository does not contain ref %s, output: %q", ref, string(buf)) + } +- if !isCommitSHA(sha) { ++ if !gitutil.IsCommitSHA(sha) { + return "", "", nil, false, errors.Errorf("invalid commit sha %q", sha) + } + +@@ -455,7 +454,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out + } + + doFetch := true +- if isCommitSHA(ref) { ++ if gitutil.IsCommitSHA(ref) { + // skip fetch if commit already exists + if _, err := git.Run(ctx, "cat-file", "-e", ref+"^{commit}"); err == nil { + doFetch = false +@@ -467,7 +466,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out + os.RemoveAll(filepath.Join(gitDir, "shallow.lock")) + + args := []string{"fetch"} +- if !isCommitSHA(ref) { // TODO: find a branch from ls-remote? ++ if !gitutil.IsCommitSHA(ref) { // TODO: find a branch from ls-remote? + args = append(args, "--depth=1", "--no-tags") + } else { + args = append(args, "--tags") +@@ -476,11 +475,13 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out + } + } + args = append(args, "origin") +- if !isCommitSHA(ref) { +- args = append(args, "--force", ref+":tags/"+ref) ++ if gitutil.IsCommitSHA(ref) { ++ args = append(args, ref) ++ } else { + // local refs are needed so they would be advertised on next fetches. Force is used + // in case the ref is a branch and it now points to a different commit sha + // TODO: is there a better way to do this? ++ args = append(args, "--force", ref+":tags/"+ref) + } + if _, err := git.Run(ctx, args...); err != nil { + return nil, errors.Wrapf(err, "failed to fetch remote %s", urlutil.RedactCredentials(gs.src.Remote)) +@@ -549,7 +550,7 @@ func (gs *gitSourceHandler) Snapshot(ctx context.Context, g session.Group) (out + pullref := ref + if isAnnotatedTag { + pullref += ":refs/tags/" + pullref +- } else if isCommitSHA(ref) { ++ } else if gitutil.IsCommitSHA(ref) { + pullref = "refs/buildkit/" + identity.NewID() + _, err = git.Run(ctx, "update-ref", pullref, ref) + if err != nil { +@@ -710,10 +711,6 @@ func (gs *gitSourceHandler) gitCli(ctx context.Context, g session.Group, opts .. + return gitCLI(opts...), cleanup, err + } + +-func isCommitSHA(str string) bool { +- return validHex.MatchString(str) +-} +- + func tokenScope(remote string) string { + // generally we can only use the token for fetching main remote but in case of github.com we do best effort + // to try reuse same token for all github.com remotes. This is the same behavior actions/checkout uses +diff --git a/util/gitutil/git_cli.go b/util/gitutil/git_cli.go +index 5c35f9365b73..1e497d79994c 100644 +--- a/util/gitutil/git_cli.go ++++ b/util/gitutil/git_cli.go +@@ -218,6 +218,14 @@ func (cli *GitCLI) Run(ctx context.Context, args ...string) (_ []byte, err error + continue + } + } ++ if strings.Contains(errbuf.String(), "not our ref") || strings.Contains(errbuf.String(), "unadvertised object") { ++ // server-side error: https://github.com/git/git/blob/34b6ce9b30747131b6e781ff718a45328aa887d0/upload-pack.c#L811-L812 ++ // client-side error: https://github.com/git/git/blob/34b6ce9b30747131b6e781ff718a45328aa887d0/fetch-pack.c#L2250-L2253 ++ if newArgs := argsNoCommitRefspec(args); len(args) > len(newArgs) { ++ args = newArgs ++ continue ++ } ++ } + + return buf.Bytes(), errors.Wrapf(err, "git stderr:\n%s", errbuf.String()) + } +@@ -244,3 +252,19 @@ func argsNoDepth(args []string) []string { + } + return out + } ++ ++func argsNoCommitRefspec(args []string) []string { ++ if len(args) <= 2 { ++ return args ++ } ++ if args[0] != "fetch" { ++ return args ++ } ++ ++ // assume the refspec is the last arg ++ if IsCommitSHA(args[len(args)-1]) { ++ return args[:len(args)-1] ++ } ++ ++ return args ++} +diff --git a/util/gitutil/git_commit.go b/util/gitutil/git_commit.go +new file mode 100644 +index 000000000000..8049e7e2d9b2 +--- /dev/null ++++ b/util/gitutil/git_commit.go +@@ -0,0 +1,19 @@ ++package gitutil ++ ++func IsCommitSHA(str string) bool { ++ if len(str) != 40 { ++ return false ++ } ++ ++ for _, ch := range str { ++ if ch >= '0' && ch <= '9' { ++ continue ++ } ++ if ch >= 'a' && ch <= 'f' { ++ continue ++ } ++ return false ++ } ++ ++ return true ++}