diff --git a/imagebuildah/stage_executor.go b/imagebuildah/stage_executor.go index 7b089d3ab11..7cc84eda2d4 100644 --- a/imagebuildah/stage_executor.go +++ b/imagebuildah/stage_executor.go @@ -1280,7 +1280,11 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string, // No base image means there's nothing to put in a // layer, so don't create one. emptyLayer := (s.builder.FromImageID == "") - if imgID, ref, err = s.commit(ctx, s.getCreatedBy(nil, ""), emptyLayer, s.output, s.executor.squash || s.executor.confidentialWorkload.Convert, lastStage); err != nil { + createdBy, err := s.getCreatedBy(nil, "") + if err != nil { + return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err) + } + if imgID, ref, err = s.commit(ctx, createdBy, emptyLayer, s.output, s.executor.squash || s.executor.confidentialWorkload.Convert, lastStage); err != nil { return "", nil, false, fmt.Errorf("committing base container: %w", err) } } else { @@ -1427,7 +1431,11 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string, if s.executor.timestamp != nil { timestamp = *s.executor.timestamp } - s.builder.AddPrependedEmptyLayer(×tamp, s.getCreatedBy(node, addedContentSummary), "", "") + createdBy, err := s.getCreatedBy(node, addedContentSummary) + if err != nil { + return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err) + } + s.builder.AddPrependedEmptyLayer(×tamp, createdBy, "", "") continue } // This is the last instruction for this stage, @@ -1437,7 +1445,11 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string, // stage. if lastStage || imageIsUsedLater { logCommit(s.output, i) - imgID, ref, err = s.commit(ctx, s.getCreatedBy(node, addedContentSummary), false, s.output, s.executor.squash, lastStage && lastInstruction) + createdBy, err := s.getCreatedBy(node, addedContentSummary) + if err != nil { + return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err) + } + imgID, ref, err = s.commit(ctx, createdBy, false, s.output, s.executor.squash, lastStage && lastInstruction) if err != nil { return "", nil, false, fmt.Errorf("committing container for step %+v: %w", *step, err) } @@ -1658,6 +1670,10 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string, // We're not going to find any more cache hits, so we // can stop looking for them. checkForLayers = false + createdBy, err := s.getCreatedBy(node, addedContentSummary) + if err != nil { + return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err) + } // Create a new image, maybe with a new layer, with the // name for this stage if it's the last instruction. logCommit(s.output, i) @@ -1665,7 +1681,7 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string, // because at this point we want to save history for // layers even if its a squashed build so that they // can be part of the build cache. - imgID, ref, err = s.commit(ctx, s.getCreatedBy(node, addedContentSummary), !s.stepRequiresLayer(step), commitName, false, lastStage && lastInstruction) + imgID, ref, err = s.commit(ctx, createdBy, !s.stepRequiresLayer(step), commitName, false, lastStage && lastInstruction) if err != nil { return "", nil, false, fmt.Errorf("committing container for step %+v: %w", *step, err) } @@ -1696,12 +1712,16 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string, if lastInstruction && lastStage { if s.executor.squash || s.executor.confidentialWorkload.Convert || len(s.executor.sbomScanOptions) != 0 { + createdBy, err := s.getCreatedBy(node, addedContentSummary) + if err != nil { + return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err) + } // If this is the last instruction of the last stage, // create a squashed or confidential workload // version of the image if that's what we're after, // or a normal one if we need to scan the image while // committing it. - imgID, ref, err = s.commit(ctx, s.getCreatedBy(node, addedContentSummary), !s.stepRequiresLayer(step), commitName, s.executor.squash || s.executor.confidentialWorkload.Convert, lastStage && lastInstruction) + imgID, ref, err = s.commit(ctx, createdBy, !s.stepRequiresLayer(step), commitName, s.executor.squash || s.executor.confidentialWorkload.Convert, lastStage && lastInstruction) if err != nil { return "", nil, false, fmt.Errorf("committing final squash step %+v: %w", *step, err) } @@ -1793,54 +1813,58 @@ func historyEntriesEqual(base, derived v1.History) bool { // that we're comparing. // Used to verify whether a cache of the intermediate image exists and whether // to run the build again. -func (s *StageExecutor) historyAndDiffIDsMatch(baseHistory []v1.History, baseDiffIDs []digest.Digest, child *parser.Node, history []v1.History, diffIDs []digest.Digest, addedContentSummary string, buildAddsLayer bool) bool { +func (s *StageExecutor) historyAndDiffIDsMatch(baseHistory []v1.History, baseDiffIDs []digest.Digest, child *parser.Node, history []v1.History, diffIDs []digest.Digest, addedContentSummary string, buildAddsLayer bool) (bool, error) { // our history should be as long as the base's, plus one entry for what // we're doing if len(history) != len(baseHistory)+1 { - return false + return false, nil } // check that each entry in the base history corresponds to an entry in // our history, and count how many of them add a layer diff expectedDiffIDs := 0 for i := range baseHistory { if !historyEntriesEqual(baseHistory[i], history[i]) { - return false + return false, nil } if !baseHistory[i].EmptyLayer { expectedDiffIDs++ } } if len(baseDiffIDs) != expectedDiffIDs { - return false + return false, nil } if buildAddsLayer { // we're adding a layer, so we should have exactly one more // layer than the base image if len(diffIDs) != expectedDiffIDs+1 { - return false + return false, nil } } else { // we're not adding a layer, so we should have exactly the same // layers as the base image if len(diffIDs) != expectedDiffIDs { - return false + return false, nil } } // compare the diffs for the layers that we should have in common for i := range baseDiffIDs { if diffIDs[i] != baseDiffIDs[i] { - return false + return false, nil } } - return history[len(baseHistory)].CreatedBy == s.getCreatedBy(child, addedContentSummary) + createdBy, err := s.getCreatedBy(child, addedContentSummary) + if err != nil { + return false, fmt.Errorf("unable to get createdBy for the node: %w", err) + } + return history[len(baseHistory)].CreatedBy == createdBy, nil } // getCreatedBy returns the command the image at node will be created by. If // the passed-in CompositeDigester is not nil, it is assumed to have the digest // information for the content if the node is ADD or COPY. -func (s *StageExecutor) getCreatedBy(node *parser.Node, addedContentSummary string) string { +func (s *StageExecutor) getCreatedBy(node *parser.Node, addedContentSummary string) (string, error) { if node == nil { - return "/bin/sh" + return "/bin/sh", nil } switch strings.ToUpper(node.Value) { case "ARG": @@ -1850,15 +1874,72 @@ func (s *StageExecutor) getCreatedBy(node *parser.Node, addedContentSummary stri } } buildArgs := s.getBuildArgsKey() - return "/bin/sh -c #(nop) ARG " + buildArgs + return "/bin/sh -c #(nop) ARG " + buildArgs, nil case "RUN": shArg := "" buildArgs := s.getBuildArgsResolvedForRun() + mountOptionSource := "" + mountOptionFrom := "" + appendCheckSum := "" + mountCheckSum := "" + var err error + for _, flag := range node.Flags { + if strings.HasPrefix(flag, "--mount") { + mountInfo := GetFromAndSourceKeysFromMountFlag(flag) + if mountInfo.Type != "bind" && mountInfo.Type != "cache" { + continue + } + mountOptionSource = mountInfo.Source + mountOptionFrom = mountInfo.From + // If source is not specified then default is '.' + if mountOptionSource == "" { + mountOptionSource = "." + } + } + // Source specificed is part of stage, image or additional-build-context. + if mountOptionFrom != "" { + // If this is not a stage then get digest of image or additional build context + if _, ok := s.executor.stages[mountOptionFrom]; !ok { + if builder, ok := s.executor.containerMap[mountOptionFrom]; ok { + // Found valid image, get image digest. + mountCheckSum = builder.FromImageDigest + } else { + if s.executor.additionalBuildContexts[mountOptionFrom].IsImage { + if builder, ok := s.executor.containerMap[s.executor.additionalBuildContexts[mountOptionFrom].Value]; ok { + // Found valid image, get image digest. + mountCheckSum = builder.FromImageDigest + } + } else { + // Found additional build context, get directory sha. + basePath := s.executor.additionalBuildContexts[mountOptionFrom].Value + if s.executor.additionalBuildContexts[mountOptionFrom].IsURL { + basePath = s.executor.additionalBuildContexts[mountOptionFrom].DownloadedCache + } + mountCheckSum, err = GeneratePathChecksum(filepath.Join(basePath, mountOptionSource)) + if err != nil { + return "", fmt.Errorf("generating checksum for directory %q in %q: %w", mountOptionSource, basePath, err) + } + } + } + } + } else { + if mountOptionSource != "" { + mountCheckSum, err = GeneratePathChecksum(filepath.Join(s.executor.contextDir, mountOptionSource)) + if err != nil { + return "", fmt.Errorf("generating checksum for directory %q in %q: %w", mountOptionSource, s.executor.contextDir, err) + } + } + } + if mountCheckSum != "" { + // add a seperator to appendCheckSum + appendCheckSum += ":" + mountCheckSum + } + } if len(node.Original) > 4 { shArg = node.Original[4:] } if buildArgs != "" { - return "|" + strconv.Itoa(len(strings.Split(buildArgs, " "))) + " " + buildArgs + " /bin/sh -c " + shArg + return "|" + strconv.Itoa(len(strings.Split(buildArgs, " "))) + " " + buildArgs + " /bin/sh -c " + shArg + appendCheckSum, nil } result := "/bin/sh -c " + shArg if len(node.Heredocs) > 0 { @@ -1867,15 +1948,15 @@ func (s *StageExecutor) getCreatedBy(node *parser.Node, addedContentSummary stri result = result + "\n" + heredocContent } } - return result + return result + appendCheckSum, nil case "ADD", "COPY": destination := node for destination.Next != nil { destination = destination.Next } - return "/bin/sh -c #(nop) " + strings.ToUpper(node.Value) + " " + addedContentSummary + " in " + destination.Value + " " + return "/bin/sh -c #(nop) " + strings.ToUpper(node.Value) + " " + addedContentSummary + " in " + destination.Value + " ", nil default: - return "/bin/sh -c #(nop) " + node.Original + return "/bin/sh -c #(nop) " + node.Original, nil } } @@ -2024,7 +2105,10 @@ func (s *StageExecutor) generateCacheKey(ctx context.Context, currNode *parser.N fmt.Fprintln(hash, diffIDs[i].String()) } } - createdBy := s.getCreatedBy(currNode, addedContentDigest) + createdBy, err := s.getCreatedBy(currNode, addedContentDigest) + if err != nil { + return "", err + } fmt.Fprintf(hash, "%t", buildAddsLayer) fmt.Fprintln(hash, createdBy) fmt.Fprintln(hash, manifestType) @@ -2204,7 +2288,11 @@ func (s *StageExecutor) intermediateImageExists(ctx context.Context, currNode *p continue } // children + currNode is the point of the Dockerfile we are currently at. - if s.historyAndDiffIDsMatch(baseHistory, baseDiffIDs, currNode, history, diffIDs, addedContentDigest, buildAddsLayer) { + diff, err := s.historyAndDiffIDsMatch(baseHistory, baseDiffIDs, currNode, history, diffIDs, addedContentDigest, buildAddsLayer) + if err != nil { + return "", err + } + if diff { return image.ID, nil } } diff --git a/imagebuildah/util.go b/imagebuildah/util.go index 90c018fa497..ce30175cd0f 100644 --- a/imagebuildah/util.go +++ b/imagebuildah/util.go @@ -1,9 +1,92 @@ package imagebuildah import ( + "archive/tar" + "io" + "os" + "path/filepath" + "strings" + "github.com/containers/buildah" + digest "github.com/opencontainers/go-digest" ) +type MountInfo struct { + Type string + Source string + From string +} + +// Consumes mount flag in format of `--mount=type=bind,src=/path,from=image` and +// return MountInfo with values, otherwise values are empty if keys are not present in the option. +func GetFromAndSourceKeysFromMountFlag(mount string) MountInfo { + tokens := strings.Split(mount, ",") + source := "" + from := "" + mountType := "" + for _, option := range tokens { + if strings.Contains(option, "=") { + optionSplit := strings.Split(option, "=") + if optionSplit[0] == "src" || optionSplit[0] == "source" { + source = optionSplit[1] + } + if optionSplit[0] == "from" { + from = optionSplit[1] + } + if optionSplit[1] == "type" { + mountType = optionSplit[2] + } + } + } + return MountInfo{Source: source, From: from, Type: mountType} +} + +// GeneratePathChecksum generates the SHA-256 checksum for a file or a directory. +func GeneratePathChecksum(sourcePath string) (string, error) { + digester := digest.SHA256.Digester() + tarWriter := tar.NewWriter(digester.Hash()) + + err := filepath.Walk(sourcePath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + header, err := tar.FileInfoHeader(info, "") + if err != nil { + return err + } + + relPath, err := filepath.Rel(sourcePath, path) + if err != nil { + return err + } + header.Name = filepath.ToSlash(relPath) + + if err := tarWriter.WriteHeader(header); err != nil { + return err + } + + if !info.Mode().IsRegular() { + return nil + } + + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + + _, err = io.Copy(tarWriter, file) + return err + }) + tarWriter.Close() + if err != nil { + tarWriter.Close() + return "", err + } + return digester.Digest().String(), nil +} + // InitReexec is a wrapper for buildah.InitReexec(). It should be called at // the start of main(), and if it returns true, main() should return // successfully immediately. diff --git a/imagebuildah/util_test.go b/imagebuildah/util_test.go new file mode 100644 index 00000000000..f952b0ebfdd --- /dev/null +++ b/imagebuildah/util_test.go @@ -0,0 +1,88 @@ +package imagebuildah + +import ( + "archive/tar" + "io" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/containers/buildah/imagebuildah" + "github.com/opencontainers/go-digest" +) + +func TestGeneratePathChecksum(t *testing.T) { + tempDir, err := ioutil.TempDir("", "testdir") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) // Clean up after the test + + tempFile, err := ioutil.TempFile(tempDir, "testfile") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer tempFile.Close() + + // Write some data to the file + data := []byte("Hello, world!") + if _, err := tempFile.Write(data); err != nil { + t.Fatalf("Failed to write data to temp file: %v", err) + } + + // Generate the checksum for the directory + checksum, err := imagebuildah.GeneratePathChecksum(tempDir) + if err != nil { + t.Fatalf("Failed to generate checksum: %v", err) + } + + digester := digest.SHA256.Digester() + tarWriter := tar.NewWriter(digester.Hash()) + + err = filepath.Walk(tempDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + header, err := tar.FileInfoHeader(info, "") + if err != nil { + return err + } + + relPath, err := filepath.Rel(tempDir, path) + if err != nil { + return err + } + header.Name = filepath.ToSlash(relPath) + + if err := tarWriter.WriteHeader(header); err != nil { + return err + } + + if !info.Mode().IsRegular() { + return nil + } + + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + + _, err = io.Copy(tarWriter, file) + return err + }) + if err != nil { + tarWriter.Close() + t.Fatalf("Failed to manually generate checksum: %v", err) + } + + tarWriter.Close() + expectedChecksum := digester.Digest().String() + + // Compare the generated checksum to the expected checksum + if checksum != expectedChecksum { + t.Errorf("Checksum mismatch: got %s, expected %s", checksum, expectedChecksum) + } +} diff --git a/tests/bud.bats b/tests/bud.bats index 478918c8243..5efe766a5fa 100644 --- a/tests/bud.bats +++ b/tests/bud.bats @@ -354,6 +354,148 @@ _EOF run_buildah 1 run myctr ls -l subdir/ } +@test "bud --layers with --mount type bind should burst cache if content is changed" { + _prefetch alpine + local contextdir=${TEST_SCRATCH_DIR}/bud/platform + mkdir -p $contextdir + + cat > $contextdir/samplefile << _EOF +samplefile +_EOF + + cat > $contextdir/Containerfile << _EOF +FROM alpine +RUN --mount=type=bind,target=/test,Z ls /test +_EOF + + # on first run since there is no cache so `samplefile` must be printed + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + expect_output --substring "samplefile" + + # on second run since there is cache so `samplefile` should not be printed + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + # output should not contain `samplefile` + assert "$output" !~ "samplefile" + + cat > $contextdir/anotherfile << _EOF +anotherfile +_EOF + + # on third run since we have added new file `anotherfile` so cache must burst. + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + expect_output --substring "samplefile" + expect_output --substring "anotherfile" +} + +@test "bud --layers with --mount type bind should burst and multiple mounts cache if content is changed" { + _prefetch alpine + local contextdir=${TEST_SCRATCH_DIR}/bud/platform + mkdir -p $contextdir + + cat > $contextdir/samplefile << _EOF +samplefile +_EOF + + cat > $contextdir/testfile << _EOF +Helloworld +_EOF + + cat > $contextdir/Containerfile << _EOF +FROM alpine +RUN --mount=type=bind,target=/test,Z --mount=type=bind,source=testfile,target=testfile,Z ls /test && cat testfile +_EOF + + # on first run since there is no cache so `samplefile` must be printed + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + expect_output --substring "samplefile" + expect_output --substring "Helloworld" + + # on second run since there is cache so `samplefile` should not be printed + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + # output should not contain `samplefile` + assert "$output" !~ "samplefile" + + # Modify sample file 2 + cat > $contextdir/testfile << _EOF +Helloworld2 +_EOF + + # on third run since we have added new file `anotherfile` so cache must burst. + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + expect_output --substring "samplefile" + expect_output --substring "Helloworld2" +} + +@test "bud --layers with --mount type bind should burst cache if content is changed - source is additional build context" { + _prefetch alpine + local contextdir=${TEST_SCRATCH_DIR}/bud/platform + mkdir -p $contextdir + + cat > $contextdir/samplefile << _EOF +samplefile2 +_EOF + + cat > $contextdir/Containerfile << _EOF +FROM alpine +RUN --mount=type=bind,from=one,target=/test,Z ls /test +_EOF + + # on first run since there is no cache so `samplefile` must be printed + run_buildah build $WITH_POLICY_JSON --build-context one=$contextdir --layers -t source -f $contextdir/Containerfile + expect_output --substring "samplefile" + + # on second run since there is cache so `samplefile` should not be printed + run_buildah build $WITH_POLICY_JSON --build-context one=$contextdir --layers -t source -f $contextdir/Containerfile + # output should not `samplefile` since cache is being used + assert "$output" !~ "samplefile" + + cat > $contextdir/anotherfile << _EOF +anotherfile2 +_EOF + + # on third run since we have added new file `anotherfile` so cache must burst. + run_buildah build $WITH_POLICY_JSON --build-context one=$contextdir --layers -t source -f $contextdir/Containerfile + expect_output --substring "samplefile" + expect_output --substring "anotherfile" +} + +@test "bud --layers with --mount=cache should burst cache if content is changed" { + _prefetch alpine + local contextdir=${TEST_SCRATCH_DIR}/bud/platform + mkdir -p $contextdir + + cat > $contextdir/Containerfile << _EOF +FROM alpine +RUN --mount=type=cache,id=YfHI60aApFM-target,target=target echo world > /target/hello +_EOF + # First populate cache + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + + # on first run since there is no cache so `samplefile` must be printed + cat > $contextdir/Containerfile << _EOF +FROM alpine +RUN --mount=type=cache,id=YfHI60aApFM-target,target=target echo second > /target/second +RUN --mount=type=cache,id=YfHI60aApFM-target,target=target ls /target/ +_EOF + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + expect_output --substring "hello" + + # on second run since there is cache so `samplefile` should not be printed + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + # output should not contain `hello` + assert "$output" !~ "hello" + + cat > $contextdir/Containerfile << _EOF +FROM alpine +RUN --mount=type=cache,id=YfHI60aApFM-target,target=target echo third > /target/third +RUN --mount=type=cache,id=YfHI60aApFM-target,target=target ls /target/ +_EOF + # Burst cache, now it should show `hello` and `second` + run_buildah build $WITH_POLICY_JSON --layers -t source -f $contextdir/Containerfile $contextdir + expect_output --substring "hello" + expect_output --substring "second" +} + @test "bud --layers should not hit cache if heredoc is changed" { _prefetch alpine local contextdir=${TEST_SCRATCH_DIR}/bud/platform