Skip to content

Commit

Permalink
build, run: record hash or digest in image history
Browse files Browse the repository at this point in the history
When using `--mount=type=bind` or `--mount=type=cache` the hash or
digest of source in these flags should be added to image history so
buildah can burst cache if files on host or image which is being used as
source is changed.

Signed-off-by: flouthoc <[email protected]>
  • Loading branch information
flouthoc committed Jan 17, 2025
1 parent cda4282 commit 261b57f
Show file tree
Hide file tree
Showing 4 changed files with 423 additions and 22 deletions.
132 changes: 110 additions & 22 deletions imagebuildah/stage_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -1280,7 +1280,11 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
// No base image means there's nothing to put in a
// layer, so don't create one.
emptyLayer := (s.builder.FromImageID == "")
if imgID, ref, err = s.commit(ctx, s.getCreatedBy(nil, ""), emptyLayer, s.output, s.executor.squash || s.executor.confidentialWorkload.Convert, lastStage); err != nil {
createdBy, err := s.getCreatedBy(nil, "")
if err != nil {
return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err)
}
if imgID, ref, err = s.commit(ctx, createdBy, emptyLayer, s.output, s.executor.squash || s.executor.confidentialWorkload.Convert, lastStage); err != nil {
return "", nil, false, fmt.Errorf("committing base container: %w", err)
}
} else {
Expand Down Expand Up @@ -1427,7 +1431,11 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
if s.executor.timestamp != nil {
timestamp = *s.executor.timestamp
}
s.builder.AddPrependedEmptyLayer(&timestamp, s.getCreatedBy(node, addedContentSummary), "", "")
createdBy, err := s.getCreatedBy(node, addedContentSummary)
if err != nil {
return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err)
}
s.builder.AddPrependedEmptyLayer(&timestamp, createdBy, "", "")
continue
}
// This is the last instruction for this stage,
Expand All @@ -1437,7 +1445,11 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
// stage.
if lastStage || imageIsUsedLater {
logCommit(s.output, i)
imgID, ref, err = s.commit(ctx, s.getCreatedBy(node, addedContentSummary), false, s.output, s.executor.squash, lastStage && lastInstruction)
createdBy, err := s.getCreatedBy(node, addedContentSummary)
if err != nil {
return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err)
}
imgID, ref, err = s.commit(ctx, createdBy, false, s.output, s.executor.squash, lastStage && lastInstruction)
if err != nil {
return "", nil, false, fmt.Errorf("committing container for step %+v: %w", *step, err)
}
Expand Down Expand Up @@ -1658,14 +1670,18 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,
// We're not going to find any more cache hits, so we
// can stop looking for them.
checkForLayers = false
createdBy, err := s.getCreatedBy(node, addedContentSummary)
if err != nil {
return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err)
}
// Create a new image, maybe with a new layer, with the
// name for this stage if it's the last instruction.
logCommit(s.output, i)
// While committing we always set squash to false here
// because at this point we want to save history for
// layers even if its a squashed build so that they
// can be part of the build cache.
imgID, ref, err = s.commit(ctx, s.getCreatedBy(node, addedContentSummary), !s.stepRequiresLayer(step), commitName, false, lastStage && lastInstruction)
imgID, ref, err = s.commit(ctx, createdBy, !s.stepRequiresLayer(step), commitName, false, lastStage && lastInstruction)
if err != nil {
return "", nil, false, fmt.Errorf("committing container for step %+v: %w", *step, err)
}
Expand Down Expand Up @@ -1696,12 +1712,16 @@ func (s *StageExecutor) Execute(ctx context.Context, base string) (imgID string,

if lastInstruction && lastStage {
if s.executor.squash || s.executor.confidentialWorkload.Convert || len(s.executor.sbomScanOptions) != 0 {
createdBy, err := s.getCreatedBy(node, addedContentSummary)
if err != nil {
return "", nil, false, fmt.Errorf("unable to get createdBy for the node: %w", err)
}
// If this is the last instruction of the last stage,
// create a squashed or confidential workload
// version of the image if that's what we're after,
// or a normal one if we need to scan the image while
// committing it.
imgID, ref, err = s.commit(ctx, s.getCreatedBy(node, addedContentSummary), !s.stepRequiresLayer(step), commitName, s.executor.squash || s.executor.confidentialWorkload.Convert, lastStage && lastInstruction)
imgID, ref, err = s.commit(ctx, createdBy, !s.stepRequiresLayer(step), commitName, s.executor.squash || s.executor.confidentialWorkload.Convert, lastStage && lastInstruction)
if err != nil {
return "", nil, false, fmt.Errorf("committing final squash step %+v: %w", *step, err)
}
Expand Down Expand Up @@ -1793,54 +1813,58 @@ func historyEntriesEqual(base, derived v1.History) bool {
// that we're comparing.
// Used to verify whether a cache of the intermediate image exists and whether
// to run the build again.
func (s *StageExecutor) historyAndDiffIDsMatch(baseHistory []v1.History, baseDiffIDs []digest.Digest, child *parser.Node, history []v1.History, diffIDs []digest.Digest, addedContentSummary string, buildAddsLayer bool) bool {
func (s *StageExecutor) historyAndDiffIDsMatch(baseHistory []v1.History, baseDiffIDs []digest.Digest, child *parser.Node, history []v1.History, diffIDs []digest.Digest, addedContentSummary string, buildAddsLayer bool) (bool, error) {
// our history should be as long as the base's, plus one entry for what
// we're doing
if len(history) != len(baseHistory)+1 {
return false
return false, nil
}
// check that each entry in the base history corresponds to an entry in
// our history, and count how many of them add a layer diff
expectedDiffIDs := 0
for i := range baseHistory {
if !historyEntriesEqual(baseHistory[i], history[i]) {
return false
return false, nil
}
if !baseHistory[i].EmptyLayer {
expectedDiffIDs++
}
}
if len(baseDiffIDs) != expectedDiffIDs {
return false
return false, nil
}
if buildAddsLayer {
// we're adding a layer, so we should have exactly one more
// layer than the base image
if len(diffIDs) != expectedDiffIDs+1 {
return false
return false, nil
}
} else {
// we're not adding a layer, so we should have exactly the same
// layers as the base image
if len(diffIDs) != expectedDiffIDs {
return false
return false, nil
}
}
// compare the diffs for the layers that we should have in common
for i := range baseDiffIDs {
if diffIDs[i] != baseDiffIDs[i] {
return false
return false, nil
}
}
return history[len(baseHistory)].CreatedBy == s.getCreatedBy(child, addedContentSummary)
createdBy, err := s.getCreatedBy(child, addedContentSummary)
if err != nil {
return false, fmt.Errorf("unable to get createdBy for the node: %w", err)
}
return history[len(baseHistory)].CreatedBy == createdBy, nil
}

// getCreatedBy returns the command the image at node will be created by. If
// the passed-in CompositeDigester is not nil, it is assumed to have the digest
// information for the content if the node is ADD or COPY.
func (s *StageExecutor) getCreatedBy(node *parser.Node, addedContentSummary string) string {
func (s *StageExecutor) getCreatedBy(node *parser.Node, addedContentSummary string) (string, error) {
if node == nil {
return "/bin/sh"
return "/bin/sh", nil
}
switch strings.ToUpper(node.Value) {
case "ARG":
Expand All @@ -1850,15 +1874,72 @@ func (s *StageExecutor) getCreatedBy(node *parser.Node, addedContentSummary stri
}
}
buildArgs := s.getBuildArgsKey()
return "/bin/sh -c #(nop) ARG " + buildArgs
return "/bin/sh -c #(nop) ARG " + buildArgs, nil
case "RUN":
shArg := ""
buildArgs := s.getBuildArgsResolvedForRun()
mountOptionSource := ""
mountOptionFrom := ""
appendCheckSum := ""
mountCheckSum := ""
var err error
for _, flag := range node.Flags {
if strings.HasPrefix(flag, "--mount") {
mountInfo := GetFromAndSourceKeysFromMountFlag(flag)
if mountInfo.Type != "bind" && mountInfo.Type != "cache" {
continue
}
mountOptionSource = mountInfo.Source
mountOptionFrom = mountInfo.From
// If source is not specified then default is '.'
if mountOptionSource == "" {
mountOptionSource = "."
}
}
// Source specificed is part of stage, image or additional-build-context.
if mountOptionFrom != "" {
// If this is not a stage then get digest of image or additional build context
if _, ok := s.executor.stages[mountOptionFrom]; !ok {
if builder, ok := s.executor.containerMap[mountOptionFrom]; ok {
// Found valid image, get image digest.
mountCheckSum = builder.FromImageDigest
} else {
if s.executor.additionalBuildContexts[mountOptionFrom].IsImage {
if builder, ok := s.executor.containerMap[s.executor.additionalBuildContexts[mountOptionFrom].Value]; ok {
// Found valid image, get image digest.
mountCheckSum = builder.FromImageDigest
}
} else {
// Found additional build context, get directory sha.
basePath := s.executor.additionalBuildContexts[mountOptionFrom].Value
if s.executor.additionalBuildContexts[mountOptionFrom].IsURL {
basePath = s.executor.additionalBuildContexts[mountOptionFrom].DownloadedCache
}
mountCheckSum, err = GeneratePathChecksum(filepath.Join(basePath, mountOptionSource))
if err != nil {
return "", fmt.Errorf("generating checksum for directory %q in %q: %w", mountOptionSource, basePath, err)
}
}
}
}
} else {
if mountOptionSource != "" {
mountCheckSum, err = GeneratePathChecksum(filepath.Join(s.executor.contextDir, mountOptionSource))
if err != nil {
return "", fmt.Errorf("generating checksum for directory %q in %q: %w", mountOptionSource, s.executor.contextDir, err)
}
}
}
if mountCheckSum != "" {
// add a seperator to appendCheckSum
appendCheckSum += ":" + mountCheckSum
}
}
if len(node.Original) > 4 {
shArg = node.Original[4:]
}
if buildArgs != "" {
return "|" + strconv.Itoa(len(strings.Split(buildArgs, " "))) + " " + buildArgs + " /bin/sh -c " + shArg
return "|" + strconv.Itoa(len(strings.Split(buildArgs, " "))) + " " + buildArgs + " /bin/sh -c " + shArg + appendCheckSum, nil
}
result := "/bin/sh -c " + shArg
if len(node.Heredocs) > 0 {
Expand All @@ -1867,15 +1948,15 @@ func (s *StageExecutor) getCreatedBy(node *parser.Node, addedContentSummary stri
result = result + "\n" + heredocContent
}
}
return result
return result + appendCheckSum, nil
case "ADD", "COPY":
destination := node
for destination.Next != nil {
destination = destination.Next
}
return "/bin/sh -c #(nop) " + strings.ToUpper(node.Value) + " " + addedContentSummary + " in " + destination.Value + " "
return "/bin/sh -c #(nop) " + strings.ToUpper(node.Value) + " " + addedContentSummary + " in " + destination.Value + " ", nil
default:
return "/bin/sh -c #(nop) " + node.Original
return "/bin/sh -c #(nop) " + node.Original, nil
}
}

Expand Down Expand Up @@ -2024,7 +2105,10 @@ func (s *StageExecutor) generateCacheKey(ctx context.Context, currNode *parser.N
fmt.Fprintln(hash, diffIDs[i].String())
}
}
createdBy := s.getCreatedBy(currNode, addedContentDigest)
createdBy, err := s.getCreatedBy(currNode, addedContentDigest)
if err != nil {
return "", err
}
fmt.Fprintf(hash, "%t", buildAddsLayer)
fmt.Fprintln(hash, createdBy)
fmt.Fprintln(hash, manifestType)
Expand Down Expand Up @@ -2204,7 +2288,11 @@ func (s *StageExecutor) intermediateImageExists(ctx context.Context, currNode *p
continue
}
// children + currNode is the point of the Dockerfile we are currently at.
if s.historyAndDiffIDsMatch(baseHistory, baseDiffIDs, currNode, history, diffIDs, addedContentDigest, buildAddsLayer) {
diff, err := s.historyAndDiffIDsMatch(baseHistory, baseDiffIDs, currNode, history, diffIDs, addedContentDigest, buildAddsLayer)
if err != nil {
return "", err
}
if diff {
return image.ID, nil
}
}
Expand Down
83 changes: 83 additions & 0 deletions imagebuildah/util.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,92 @@
package imagebuildah

import (
"archive/tar"
"io"
"os"
"path/filepath"
"strings"

"github.com/containers/buildah"
digest "github.com/opencontainers/go-digest"
)

type MountInfo struct {
Type string
Source string
From string
}

// Consumes mount flag in format of `--mount=type=bind,src=/path,from=image` and
// return MountInfo with values, otherwise values are empty if keys are not present in the option.
func GetFromAndSourceKeysFromMountFlag(mount string) MountInfo {
tokens := strings.Split(mount, ",")
source := ""
from := ""
mountType := ""
for _, option := range tokens {
if strings.Contains(option, "=") {
optionSplit := strings.Split(option, "=")
if optionSplit[0] == "src" || optionSplit[0] == "source" {
source = optionSplit[1]
}
if optionSplit[0] == "from" {
from = optionSplit[1]
}
if optionSplit[1] == "type" {
mountType = optionSplit[2]
}
}
}
return MountInfo{Source: source, From: from, Type: mountType}
}

// GeneratePathChecksum generates the SHA-256 checksum for a file or a directory.
func GeneratePathChecksum(sourcePath string) (string, error) {
digester := digest.SHA256.Digester()
tarWriter := tar.NewWriter(digester.Hash())

err := filepath.Walk(sourcePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

header, err := tar.FileInfoHeader(info, "")
if err != nil {
return err
}

relPath, err := filepath.Rel(sourcePath, path)
if err != nil {
return err
}
header.Name = filepath.ToSlash(relPath)

if err := tarWriter.WriteHeader(header); err != nil {
return err
}

if !info.Mode().IsRegular() {
return nil
}

file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()

_, err = io.Copy(tarWriter, file)
return err
})
tarWriter.Close()
if err != nil {
tarWriter.Close()
return "", err
}
return digester.Digest().String(), nil
}

// InitReexec is a wrapper for buildah.InitReexec(). It should be called at
// the start of main(), and if it returns true, main() should return
// successfully immediately.
Expand Down
Loading

0 comments on commit 261b57f

Please sign in to comment.