From c3dff62e1cc0692d033465c784b561f36a722969 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Sat, 2 Dec 2023 15:48:12 -0500 Subject: [PATCH] commit: Try reflinks for local commits by default I think we originally used to do this, but at some point in a code refactoring, this optimization got lost. It's a quite important optimization for the case of writing content generated by an external system into an ostree repository. --- src/libostree/ostree-repo-commit.c | 74 ++++++++++--------- .../destructive/itest-label-selinux.sh | 13 ++++ 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/src/libostree/ostree-repo-commit.c b/src/libostree/ostree-repo-commit.c index d9fbb25104..5a22ad7311 100644 --- a/src/libostree/ostree-repo-commit.c +++ b/src/libostree/ostree-repo-commit.c @@ -586,11 +586,11 @@ _ostree_repo_bare_content_cleanup (OstreeRepoBareContent *regwrite) } /* Allocate an O_TMPFILE, write everything from @input to it, but - * not exceeding @length. Used for every object in archive repos, - * and content objects in all bare-type repos. + * not exceeding @length. */ static gboolean -create_regular_tmpfile_linkable_with_content (OstreeRepo *self, guint64 length, GInputStream *input, +create_regular_tmpfile_linkable_with_content (OstreeRepo *self, guint64 length, + GInputStream *original_input, GInputStream *input, GLnxTmpfile *out_tmpf, GCancellable *cancellable, GError **error) { @@ -601,40 +601,44 @@ create_regular_tmpfile_linkable_with_content (OstreeRepo *self, guint64 length, error)) return FALSE; - if (!glnx_try_fallocate (tmpf.fd, 0, length, error)) - return FALSE; - - if (G_IS_FILE_DESCRIPTOR_BASED (input)) + // Try to do a reflink if possible; if we hit this case we're operating on trusted local input. + gboolean did_clone = FALSE; + if (G_IS_FILE_DESCRIPTOR_BASED (original_input)) { - int infd = g_file_descriptor_based_get_fd ((GFileDescriptorBased *)input); - if (glnx_regfile_copy_bytes (infd, tmpf.fd, (off_t)length) < 0) - return glnx_throw_errno_prefix (error, "regfile copy"); + int infd = g_file_descriptor_based_get_fd ((GFileDescriptorBased *)original_input); + if (ioctl (tmpf.fd, FICLONE, infd) == 0) + { + did_clone = TRUE; + } } else { - /* We used to do a g_output_stream_splice(), but there are two issues with that: - * - We want to honor the size provided, to avoid malicious content that says it's - * e.g. 10 bytes but is actually gigabytes. - * - Due to GLib bugs that pointlessly calls `poll()` on the output fd for every write - */ - gsize buf_size = MIN (length, 1048576); - g_autofree gchar *buf = g_malloc (buf_size); - guint64 remaining = length; - while (remaining > 0) - { - const gssize bytes_read - = g_input_stream_read (input, buf, MIN (remaining, buf_size), cancellable, error); - if (bytes_read < 0) - return FALSE; - else if (bytes_read == 0) - return glnx_throw (error, - "Unexpected EOF with %" G_GUINT64_FORMAT "/%" G_GUINT64_FORMAT - " bytes remaining", - remaining, length); - if (glnx_loop_write (tmpf.fd, buf, bytes_read) < 0) - return glnx_throw_errno_prefix (error, "write"); - remaining -= bytes_read; - } + if (!glnx_try_fallocate (tmpf.fd, 0, length, error)) + return FALSE; + } + + /* We used to do a g_output_stream_splice(), but there are two issues with that: + * - We want to honor the size provided, to avoid malicious content that says it's + * e.g. 10 bytes but is actually gigabytes. + * - Due to GLib bugs that pointlessly calls `poll()` on the output fd for every write + */ + gsize buf_size = MIN (length, 1048576); + g_autofree gchar *buf = g_malloc (buf_size); + guint64 remaining = length; + while (remaining > 0) + { + const gssize bytes_read + = g_input_stream_read (input, buf, MIN (remaining, buf_size), cancellable, error); + if (bytes_read < 0) + return FALSE; + else if (bytes_read == 0) + return glnx_throw (error, + "Unexpected EOF with %" G_GUINT64_FORMAT "/%" G_GUINT64_FORMAT + " bytes remaining", + remaining, length); + if (!did_clone && glnx_loop_write (tmpf.fd, buf, bytes_read) < 0) + return glnx_throw_errno_prefix (error, "write"); + remaining -= bytes_read; } if (!glnx_fchmod (tmpf.fd, 0644, error)) @@ -989,8 +993,8 @@ write_content_object (OstreeRepo *self, const char *expected_checksum, GInputStr } else if (repo_mode != OSTREE_REPO_MODE_ARCHIVE) { - if (!create_regular_tmpfile_linkable_with_content (self, size, file_input, &tmpf, cancellable, - error)) + if (!create_regular_tmpfile_linkable_with_content (self, size, input, file_input, &tmpf, + cancellable, error)) return FALSE; } else diff --git a/tests/kolainst/destructive/itest-label-selinux.sh b/tests/kolainst/destructive/itest-label-selinux.sh index 29444fbc8d..f166b611c2 100755 --- a/tests/kolainst/destructive/itest-label-selinux.sh +++ b/tests/kolainst/destructive/itest-label-selinux.sh @@ -17,16 +17,29 @@ assert_not_has_file "${testbin}" # Make a test binary that we label as shell_exec_t on disk, but should be # reset by --selinux-policy back to bin_t echo 'test foo' > ${testbin} +# Extend it with some real data to help test reflinks +cat < /usr/bin/bash >> "${testbin}" +# And set its label chcon --reference co/usr/bin/true ${testbin} +# Now at this point, it should not have shared extents +filefrag -v "${testbin}" > out.txt +assert_not_file_has_content out.txt shared +rm -f out.txt oldcon=$(getfattr --only-values -m security.selinux ${testbin}) chcon --reference co/usr/bin/bash ${testbin} newcon=$(getfattr --only-values -m security.selinux ${testbin}) assert_not_streq "${oldcon}" "${newcon}" +# Ensure the tmpdir isn't pruned +touch co ostree commit -b testbranch --link-checkout-speedup \ --selinux-policy co --tree=dir=co ostree ls -X testbranch /usr/bin/foo-a-generic-binary > ls.txt assert_file_has_content ls.txt ${oldcon} ostree fsck +# Additionally, the commit process should have reflinked our input +filefrag -v "${testbin}" > out.txt +assert_file_has_content out.txt shared +rm -f out.txt ostree refs --delete testbranch rm co -rf