From fcdde9b8965a1aba9e970ff86eb358bcdf3bd970 Mon Sep 17 00:00:00 2001 From: David Trudgian Date: Tue, 9 Jul 2024 17:03:51 +0100 Subject: [PATCH] feat: mount OCI-SIF overlays and support --writable When an OCI-SIF contains an ext3 overlay layer, created with `singularity overlay create`, it is now mounted when the container is run. By default, an embedded overlay is mounted read-only. In OCI-Mode we have a `--writable-tmpfs` in place by default, so changes can still be made to the container at runtime, but they are made in the ephemeral tmpfs, and do not write into the overlay. To write changes into the embedded overlay, the container must be started with the `--writable` flag, which is now supported in OCI-Mode. Embedded overlay layers are handled by extending the code that previously dealt with user specified `--overlay`s. Note that fuse2fs >=1.46.6 is required to use an embedded overlay, as older versions do not support the `-o` (offset) flag. Closes #2868 Closes #2869 --- CHANGELOG.md | 4 + e2e/overlay/overlay.go | 96 ++++++++++++++++-- internal/app/singularity/overlay_create.go | 2 +- internal/pkg/ocisif/overlay.go | 37 +++++-- internal/pkg/ocisif/overlay_test.go | 4 +- .../runtime/launcher/oci/launcher_linux.go | 30 +++--- .../pkg/runtime/launcher/oci/oci_overlay.go | 78 ++++++++++++-- .../pkg/util/fs/overlay/overlay_item_linux.go | 8 ++ pkg/ocibundle/ocisif/bundle_linux.go | 4 + test/images/extfs-for-overlay.img | Bin 2097152 -> 2097152 bytes 10 files changed, 213 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b363c45e15..44bf7ef115 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,10 @@ image to be pushed to `library://` and `docker://` registries in `squashfs` (default) or `tar` format. Images pushed with `--layer-format tar` can be pulled and run by other OCI runtimes. +- A writable overlay can be added to an OCI-SIF file with the `singularity + overlay create` command. The overlay will be applied read-only, by default, + when executing the OCI-SIF. To write changes to the container into the overlay, + use the `--writable` flag. ### Bug Fixes diff --git a/e2e/overlay/overlay.go b/e2e/overlay/overlay.go index f509868ba4..43e36211cd 100644 --- a/e2e/overlay/overlay.go +++ b/e2e/overlay/overlay.go @@ -189,8 +189,10 @@ func (c ctx) testOverlayCreate(t *testing.T) { } } -func (c ctx) testOverlayCreateOCI(t *testing.T) { - require.Filesystem(t, "overlay") +func (c ctx) testOverlayOCI(t *testing.T) { + require.Command(t, "fuse2fs") + require.Command(t, "fuse-overlayfs") + require.Command(t, "fusermount") require.MkfsExt3(t) e2e.EnsureOCISIF(t, c.env) @@ -240,6 +242,13 @@ func (c ctx) testOverlayCreateOCI(t *testing.T) { // native SIF tests above. Same code path for OCI-SIF. We don't need to // repeat them here. tests := []test{ + { + name: "create fail signed", + profile: e2e.UserProfile, + command: "overlay", + args: []string{"create", ocisifSigned}, + exit: 255, + }, { name: "create", profile: e2e.UserProfile, @@ -254,12 +263,81 @@ func (c ctx) testOverlayCreateOCI(t *testing.T) { args: []string{"create", ocisif}, exit: 255, }, + // Add a file without `--writable` - should go into ephemeral tmpfs, not the overlay. { - name: "create fail signed", - profile: e2e.UserProfile, - command: "overlay", - args: []string{"create", ocisifSigned}, - exit: 255, + name: "tmpfs touch", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{ocisif, "touch", "/in-overlay"}, + exit: 0, + }, + { + name: "tmpfs check", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{ocisif, "ls", "/in-overlay"}, + exit: 1, + }, + + // Add a file to the overlay with `--writable` and check that it exists on re-run. + { + name: "writable touch", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{"--writable", ocisif, "touch", "/in-overlay"}, + exit: 0, + }, + { + name: "writable touch check", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{ocisif, "ls", "/in-overlay"}, + exit: 0, + }, + // Remove file without `--writable` - should be an ephemeral change, file still in overlay. + { + name: "tmpfs rm", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{ocisif, "rm", "/in-overlay"}, + exit: 0, + }, + { + name: "tmpfs rm check", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{ocisif, "ls", "/in-overlay"}, + exit: 0, + }, + // Remove file with `--writable` - file gone from overlay. + { + name: "writable rm", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{"--writable", ocisif, "rm", "/in-overlay"}, + exit: 0, + }, + { + name: "writable rm check", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{ocisif, "ls", "/in-overlay"}, + exit: 1, + }, + // Touch file without `--writable` and no tmpfs (via --no-compat)... should fail + { + name: "readonly touch", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{"--no-compat", ocisif, "touch", "/in-overlay"}, + exit: 1, + }, + { + name: "readonly touch check", + profile: e2e.OCIUserProfile, + command: "exec", + args: []string{ocisif, "ls", "/in-overlay"}, + exit: 1, }, } @@ -282,7 +360,7 @@ func E2ETests(env e2e.TestEnv) testhelper.Tests { } return testhelper.Tests{ - "create": c.testOverlayCreate, - "createOCI": c.testOverlayCreateOCI, + "create": c.testOverlayCreate, + "OCI": c.testOverlayOCI, } } diff --git a/internal/app/singularity/overlay_create.go b/internal/app/singularity/overlay_create.go index 339f206892..dbbeecc3cb 100644 --- a/internal/app/singularity/overlay_create.go +++ b/internal/app/singularity/overlay_create.go @@ -92,7 +92,7 @@ func canAddOverlay(img *image.Image) (bool, error) { return false, errOverlaySigned } - hasOverlay, err := ocisif.HasOverlay(img.Path) + hasOverlay, _, err := ocisif.HasOverlay(img.Path) if err != nil { return false, fmt.Errorf("while checking for overlays: %s", err) } else if hasOverlay { diff --git a/internal/pkg/ocisif/overlay.go b/internal/pkg/ocisif/overlay.go index c4ffafc51b..b820e9f93f 100644 --- a/internal/pkg/ocisif/overlay.go +++ b/internal/pkg/ocisif/overlay.go @@ -22,47 +22,62 @@ import ( var Ext3LayerMediaType types.MediaType = "application/vnd.sylabs.image.layer.v1.ext3" // HasOverlay returns whether the OCI-SIF at imgPath has an ext3 writable final -// layer - an 'overlay'. -func HasOverlay(imagePath string) (bool, error) { +// layer - an 'overlay'. If present, the offset of the overlay data in the +// OCI-SIF file is also returned. +func HasOverlay(imagePath string) (bool, int64, error) { fi, err := sif.LoadContainerFromPath(imagePath, sif.OptLoadWithFlag(os.O_RDONLY), ) if err != nil { - return false, err + return false, 0, err } defer fi.UnloadContainer() ii, err := ocitsif.ImageIndexFromFileImage(fi) if err != nil { - return false, fmt.Errorf("while obtaining image index: %w", err) + return false, 0, fmt.Errorf("while obtaining image index: %w", err) } ix, err := ii.IndexManifest() if err != nil { - return false, fmt.Errorf("while obtaining index manifest: %w", err) + return false, 0, fmt.Errorf("while obtaining index manifest: %w", err) } // One image only. if len(ix.Manifests) != 1 { - return false, fmt.Errorf("only single image data containers are supported, found %d images", len(ix.Manifests)) + return false, 0, fmt.Errorf("only single image data containers are supported, found %d images", len(ix.Manifests)) } imageDigest := ix.Manifests[0].Digest img, err := ii.Image(imageDigest) if err != nil { - return false, fmt.Errorf("while initializing image: %w", err) + return false, 0, fmt.Errorf("while initializing image: %w", err) } layers, err := img.Layers() if err != nil { - return false, fmt.Errorf("while getting image layers: %w", err) + return false, 0, fmt.Errorf("while getting image layers: %w", err) } if len(layers) < 1 { - return false, fmt.Errorf("image has no layers") + return false, 0, fmt.Errorf("image has no layers") } mt, err := layers[len(layers)-1].MediaType() if err != nil { - return false, fmt.Errorf("while getting layer mediatype: %w", err) + return false, 0, fmt.Errorf("while getting layer mediatype: %w", err) } - return mt == Ext3LayerMediaType, nil + // Not an overlay as last layer + if mt != Ext3LayerMediaType { + return false, 0, nil + } + + // Overlay as last layer, get offset + ld, err := layers[len(layers)-1].Digest() + if err != nil { + return false, 0, fmt.Errorf("while getting layer digest: %w", err) + } + desc, err := fi.GetDescriptor(sif.WithOCIBlobDigest(ld)) + if err != nil { + return false, 0, fmt.Errorf("while getting layer descriptor: %w", err) + } + return true, desc.Offset(), nil } // AddOverlay adds the provided ext3 overlay file at overlayPath to the OCI-SIF diff --git a/internal/pkg/ocisif/overlay_test.go b/internal/pkg/ocisif/overlay_test.go index 984b73fc92..5f78054dcf 100644 --- a/internal/pkg/ocisif/overlay_test.go +++ b/internal/pkg/ocisif/overlay_test.go @@ -65,7 +65,7 @@ func TestHasOverlay(t *testing.T) { t.Fatal(err) } - got, err := HasOverlay(imgFile) + got, _, err := HasOverlay(imgFile) if got != tt.want { t.Errorf("Expected %v, got %v", tt.want, got) @@ -132,7 +132,7 @@ func TestAddOverlay(t *testing.T) { t.Error("Expected error, but no error returned.") } - hasOverlay, err := HasOverlay(imgFile) + hasOverlay, _, err := HasOverlay(imgFile) if err != nil { t.Fatal(err) } diff --git a/internal/pkg/runtime/launcher/oci/launcher_linux.go b/internal/pkg/runtime/launcher/oci/launcher_linux.go index 9fdf7b12e6..f35a5489d6 100644 --- a/internal/pkg/runtime/launcher/oci/launcher_linux.go +++ b/internal/pkg/runtime/launcher/oci/launcher_linux.go @@ -38,7 +38,7 @@ import ( imgutil "github.com/sylabs/singularity/v4/pkg/image" "github.com/sylabs/singularity/v4/pkg/ocibundle" "github.com/sylabs/singularity/v4/pkg/ocibundle/native" - "github.com/sylabs/singularity/v4/pkg/ocibundle/ocisif" + ocisifbundle "github.com/sylabs/singularity/v4/pkg/ocibundle/ocisif" sifbundle "github.com/sylabs/singularity/v4/pkg/ocibundle/sif" "github.com/sylabs/singularity/v4/pkg/ocibundle/tools" "github.com/sylabs/singularity/v4/pkg/sylog" @@ -117,6 +117,10 @@ func NewLauncher(opts ...launcher.Option) (*Launcher, error) { if !lo.NoCompat || lo.WritableTmpfs { lo.WritableTmpfs = true } + // Explicit writable (overlay) request means no WritableTmpfs + if lo.Writable { + lo.WritableTmpfs = false + } return &Launcher{ cfg: lo, @@ -133,10 +137,6 @@ func NewLauncher(opts ...launcher.Option) (*Launcher, error) { func checkOpts(lo launcher.Options) error { badOpt := []string{} - if lo.Writable { - badOpt = append(badOpt, "Writable") - } - if len(lo.FuseMount) > 0 { badOpt = append(badOpt, "FuseMount") } @@ -252,10 +252,10 @@ func (l *Launcher) createSpec() (spec *specs.Spec, err error) { ms := minimalSpec() spec = &ms - // The OCI mode always wraps the rootfs in a tmpfs. - // Whether we make it writable inside the container depends on a request for `--writable-tmpfs`. - // Note that --writable-tmpfs is inferred by default in OCI mode. See NewLauncher(). - spec.Root.Readonly = !l.cfg.WritableTmpfs + // Rootfs is writable if there is a writable tmpfs in place, or --writable + // is requested with an overlay in the image. Note that --writable-tmpfs is + // inferred by default in OCI mode. See NewLauncher(). + spec.Root.Readonly = !l.cfg.WritableTmpfs && !l.cfg.Writable err = addNamespaces(spec, l.cfg.Namespaces) if err != nil { @@ -722,9 +722,9 @@ func (l *Launcher) Exec(ctx context.Context, ep launcher.ExecParams) error { var b ocibundle.Bundle switch { case strings.HasPrefix(image, "oci-sif:"): - b, err = ocisif.New( - ocisif.OptBundlePath(bundleDir), - ocisif.OptImageRef(image), + b, err = ocisifbundle.New( + ocisifbundle.OptBundlePath(bundleDir), + ocisifbundle.OptImageRef(image), ) case strings.HasPrefix(image, "sif:"): sylog.Infof("Running a non-OCI SIF in OCI mode. See user guide for compatibility information.") @@ -832,11 +832,7 @@ func (l *Launcher) RunWrapped(ctx context.Context, containerID, bundlePath, pidF return err } - if len(l.cfg.OverlayPaths) > 0 { - return WrapWithOverlays(ctx, runFunc, absBundle, l.cfg.OverlayPaths, l.cfg.AllowSUID) - } - - return WrapWithWritableTmpFs(ctx, runFunc, absBundle, l.cfg.AllowSUID) + return l.WrapWithOverlays(ctx, runFunc, absBundle) } // getCgroup will return a cgroup path and resources for the runtime to create. diff --git a/internal/pkg/runtime/launcher/oci/oci_overlay.go b/internal/pkg/runtime/launcher/oci/oci_overlay.go index 9041db9808..859efdaef9 100644 --- a/internal/pkg/runtime/launcher/oci/oci_overlay.go +++ b/internal/pkg/runtime/launcher/oci/oci_overlay.go @@ -8,7 +8,9 @@ package oci import ( "context" "fmt" + "strings" + "github.com/sylabs/singularity/v4/internal/pkg/ocisif" "github.com/sylabs/singularity/v4/internal/pkg/util/fs/overlay" "github.com/sylabs/singularity/v4/pkg/image" "github.com/sylabs/singularity/v4/pkg/ocibundle/tools" @@ -52,14 +54,70 @@ func cleanupWritableTmpfs(ctx context.Context, bundleDir, overlayDir string) err return tools.DeleteOverlayTmpfs(ctx, bundleDir, overlayDir) } +// imageOverlaySet returns an overlay.Set that includes the correct r/o or +// writable overlay item for an ext3 overlay layer in an OCI-SIF image file, if +// applicable. +func (l *Launcher) imageOverlaySet(bundleDir string) (*overlay.Set, error) { + if !strings.HasPrefix(l.image, "oci-sif:") { + return nil, nil + } + + sifOverlay, sifOffset, err := ocisif.HasOverlay(strings.TrimPrefix(l.image, "oci-sif:")) + if err != nil { + return nil, err + } + + if !sifOverlay { + return nil, nil + } + + item := &overlay.Item{ + Type: image.EXT3, + Readonly: !l.cfg.Writable, + SourcePath: strings.TrimPrefix(l.image, "oci-sif:"), + SourceOffset: sifOffset, + } + item.SetParentDir(bundleDir) + + if l.cfg.Writable { + return &overlay.Set{ + WritableOverlay: item, + }, nil + } + + return &overlay.Set{ + ReadonlyOverlays: []*overlay.Item{item}, + }, nil +} + // WrapWithOverlays runs a function wrapped with prep / cleanup steps for the -// overlays specified in overlayPaths. If there is no user-provided writable -// overlay, it adds an ephemeral overlay which is always writable so that the -// launcher and runtime are able to add content to the container. Whether it is -// writable from inside the container is controlled by the runtime config. -func WrapWithOverlays(ctx context.Context, f func() error, bundleDir string, overlayPaths []string, allowSetuid bool) error { - s := overlay.Set{} - for _, p := range overlayPaths { +// overlays in the image, and/or specified in overlayPaths. If there is no +// writable overlay, it adds an ephemeral overlay which is always writable so +// that the launcher and runtime are able to add content to the container. +// Whether an ephemeral overlay is writable from inside the container is +// controlled by the runtime config. +func (l *Launcher) WrapWithOverlays(ctx context.Context, f func() error, bundleDir string) error { + s, err := l.imageOverlaySet(bundleDir) + if err != nil { + return err + } + + hasSifOverlay := s != nil + hasUserOverlay := len(l.cfg.OverlayPaths) > 0 + if l.cfg.Writable && !hasSifOverlay { + return fmt.Errorf("image %s does not contain a writable overlay", l.image) + } + + // No image embedded overlay, or user requested --overlay - just wrap with a writable tmpfs. + if !hasSifOverlay && !hasUserOverlay { + return WrapWithWritableTmpFs(ctx, f, bundleDir, l.cfg.AllowSUID) + } + + if s == nil { + s = &overlay.Set{} + } + + for _, p := range l.cfg.OverlayPaths { item, err := overlay.NewItemFromString(p) if err != nil { return err @@ -67,7 +125,7 @@ func WrapWithOverlays(ctx context.Context, f func() error, bundleDir string, ove item.SetParentDir(bundleDir) - if allowSetuid { + if l.cfg.AllowSUID { item.SetAllowSetuid(true) } @@ -83,7 +141,7 @@ func WrapWithOverlays(ctx context.Context, f func() error, bundleDir string, ove systemOverlay := "" if s.WritableOverlay == nil { - i, err := prepareSystemOverlay(bundleDir, allowSetuid) + i, err := prepareSystemOverlay(bundleDir, l.cfg.AllowSUID) if err != nil { return err } @@ -92,7 +150,7 @@ func WrapWithOverlays(ctx context.Context, f func() error, bundleDir string, ove } rootFsDir := tools.RootFs(bundleDir).Path() - err := s.Mount(ctx, rootFsDir) + err = s.Mount(ctx, rootFsDir) if err != nil { return err } diff --git a/internal/pkg/util/fs/overlay/overlay_item_linux.go b/internal/pkg/util/fs/overlay/overlay_item_linux.go index a234547efe..d076291724 100644 --- a/internal/pkg/util/fs/overlay/overlay_item_linux.go +++ b/internal/pkg/util/fs/overlay/overlay_item_linux.go @@ -33,6 +33,10 @@ type Item struct { // colon-prefixed options (like ":ro") SourcePath string + // SourceOffset is the (optional) offset of the overlay filesystem within + // SourcePath, in bytes. + SourceOffset int64 + // StagingDir is the directory on which this overlay item is staged, to be // used as a source for an overlayfs mount as part of an overlay.Set StagingDir string @@ -252,6 +256,10 @@ func (i *Item) mountWithFuse(ctx context.Context) error { AllowDev: i.allowDev, } + if i.SourceOffset != 0 { + im.ExtraOpts = []string{fmt.Sprintf("offset=%d", i.SourceOffset)} + } + if err := im.Mount(ctx); err != nil { return err } diff --git a/pkg/ocibundle/ocisif/bundle_linux.go b/pkg/ocibundle/ocisif/bundle_linux.go index 02042cb6a3..308fea886f 100644 --- a/pkg/ocibundle/ocisif/bundle_linux.go +++ b/pkg/ocibundle/ocisif/bundle_linux.go @@ -204,6 +204,10 @@ func (b *Bundle) mountLayers(ctx context.Context, img v1.Image, imgFile string) if err != nil { return fmt.Errorf("while checking layer: %w", err) } + // An ext3 final layer is an overlay, and handled separately from the rootfs assembly. + if mt == ocisif.Ext3LayerMediaType && i == len(layers)-1 { + continue + } if mt != ocisif.SquashfsLayerMediaType { return fmt.Errorf("unsupported layer mediaType %q", mt) } diff --git a/test/images/extfs-for-overlay.img b/test/images/extfs-for-overlay.img index 82003e2b72d64e263c6008a9ced05da6f1791dfd..4c3affb84cc77fb66ad1e69117c3c650dd5c796c 100644 GIT binary patch delta 6448 zcmeHLZ){Ul6o2=<+pc4;TMJtpFh}{bIR0!DGnI8UfMB8mf*KKVaa-4REvYN4tGY;_ zMAGKl0_!BW|Lgoz7G(#dNd?Cm@=e>8=*N$y1Z5BH5=b?;WUQmdWi@@

@+T(xO)!}RDstpFcPKj7; zCoTqk%f4JG^;e$Q)ziFo&B!VkNI4E=-FEMFBPpy&4GhrHtY~1j7_93&Paws+?Lo3| zWkNvq^aUgYeslrFz}|}N^z_a7EfOh@gqG2VDhhXdkv>Zd9K%Ubq%tu+`QZY6o%~v* zb_QHNZ+q}nFxmQGd#k_Q?QM5Cn}RmKW2>vRv#G-tZ1=S|Tt4XRv<0llb!0av2jv!s zXN2zwfnX&UULq^#pSF+A%X-;$4;DZDW~oZZgKai;VznO6TrEZ5%(lI5MoN#W*1?q? zvYI-#Ms%vNN{^g&juNuqyF@&TuBf}0ZhnqS?jD-JA)EiX@(nMl5>1W$AurOczIYr@ zoa4Fd=zc9PIg8`R% zGTnh%|F)LxL6=18s=8p*$=wPc^9QVz9+g`$&-2OYOQRqAy^L@;w!u7ePtMIq)E14X z?oF@?l~5G6<7V*PxWzCu;iFzWZY8sqn~lnxqfc^hiH9sD%VOk+(BmNmMl>OsvlNWo z8Y92Uy@j|q>@AYWJv~-Jm*Y!`+Br?BsY9)s)qci=-j3tZ)%whP%)NjvyChNx=rSbC zK<*+S5vW(V#_2d3BUgm|cCm60jyMv0fs#BlC@ksnZw=PhBBi)IV`c1Gm&X~EGk0T) z_zrVEOC9XE2r#1sI45>#yVrnSWBSGhjm<^Z7JvPy_=zdOA!(Ta}taY(PwSZ`>nhLXi;U5e0g|64&Is<}xUV zq)R(`;;*0-;0$BJ@CG7%2rr7%SGuoDL#N-q7alGO+W(#9)nF zSRlL^f<}nVq{{*?|b;1eiaPabaD9{9|$Ey0Ab+$VnI1R{zNU+!vQM zSWK#yvq(6XyIvWAMT}4$L$(f`77Q;t?KSHMD+*Ga7V~4~v=GoWP77aTr(Kh&(>g#D zPNog$@J^ec51sfxmQH*VjxCb@PEKsqIx!mw6G2Hju^!%%nZW4^(T9}c#EcbdWKMTt zB@>AJv|~QoHzo#akm5~pR2K;##@U_2z=xOL{STIy2<3IZW%vb#H*VhaZCd*K)9zu?C`@j5U83zBYU=1#2+6GptEX5a5f9 zHTF!gMw=j@9g;r}yiPK}ljv1{VyB6>DTp$C$Af z4D6l)YvlURUod2cG2`V4*`bcH#@`OCpGv(fie)C% z%kuiBMY-#h5%xj;BH+KyJPz)RvV*>X#_5Oe;@77l{ntnH*8?{j2x@IS|Hb)Jncb;O zeB+XzuVpjBQC&Ey4@V2aQA0Rd7>-U0M~lKyF&s69qs8H9$?>SEVrcnz04{PefOBSm zC2YI>er`In`vuB-{4cp;c`L}H^gT4!b YL16Fyp8d(sF8?oOvir+?Td1t^ANnP@3;+NC delta 1058 zcmZo@Xkcsr;tf13(|LLr6*z0RHKZ&6V&=)FEWr~6+_sl+zu z5{jM%g76W3h4#r4Yzn5;F!Hd50TugA7o5&0%*F^*od8j7qrf5Z!}b(VfMIiwO)2AM z1C9&w1POoPZ3c|e7L!#RM5h~MFlxgzT;*RjgOO#kGRIxQW)NWw?Ua0GZJPr`Y;AMc z+vaez&EagD!__v2yKN3n+Z^7uIecw%__xgwka-~Tf?0quEi)%oH!Z(Nw