From 0e11b21ab6b973951edd096d2aa5ffc3186e593b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Jun 2024 16:21:38 +0100 Subject: [PATCH 01/18] btrfs-progs: subvolume delete: add new option for recursive deletion Add new option --recursive 'btrfs subvol delete', causing it to pass the BTRFS_UTIL_DELETE_SUBVOLUME_RECURSIVE flag through to libbtrfsutil. This can work in two modes, depending on the user: - regular user - this will skip subvolumes that are not accessible - root (CAP_SYS_ADMIN) - no limitations Pull-request: #861 Signed-off-by: Mark Harmstone Co-authored-by: Omar Sandoval Reviewed-by: Qu Wenruo [ Add details to man page, fix indent in the doc. ] Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- Documentation/btrfs-subvolume.rst | 9 +++++++++ cmds/subvolume.c | 16 ++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Documentation/btrfs-subvolume.rst b/Documentation/btrfs-subvolume.rst index d1e89f15e1..eed602f9bf 100644 --- a/Documentation/btrfs-subvolume.rst +++ b/Documentation/btrfs-subvolume.rst @@ -112,6 +112,15 @@ delete [options] [ [...]], delete -i|--subvolid -i|--subvolid subvolume id to be removed instead of the that should point to the filesystem with the subvolume + + -R|--recursive + delete subvolumes beneath each subvolume recursively + + This requires either `CAP_SYS_ADMIN` or the filesystem must be + mounted with `user_subvol_rm_allowed` mount option. + In the unprivileged case, subvolumes which cannot be accessed + are skipped. The deletion is not atomic. + -v|--verbose (deprecated) alias for global *-v* option diff --git a/cmds/subvolume.c b/cmds/subvolume.c index 52bc88500e..56108269a3 100644 --- a/cmds/subvolume.c +++ b/cmds/subvolume.c @@ -347,6 +347,8 @@ static const char * const cmd_subvolume_delete_usage[] = { OPTLINE("-c|--commit-after", "wait for transaction commit at the end of the operation"), OPTLINE("-C|--commit-each", "wait for transaction commit after deleting each subvolume"), OPTLINE("-i|--subvolid", "subvolume id of the to be removed subvolume"), + OPTLINE("-R|--recursive", "delete accessible subvolumes beneath each subvolume recursively, " + "this is not atomic, may need root to delete subvolumes not accessible by the user"), OPTLINE("-v|--verbose", "deprecated, alias for global -v option"), HELPINFO_INSERT_GLOBALS, HELPINFO_INSERT_VERBOSE, @@ -367,6 +369,7 @@ static int cmd_subvolume_delete(const struct cmd_struct *cmd, int argc, char **a char *path = NULL; int commit_mode = 0; bool subvol_path_not_found = false; + int flags = 0; u8 fsid[BTRFS_FSID_SIZE]; u64 subvolid = 0; char uuidbuf[BTRFS_UUID_UNPARSED_SIZE]; @@ -383,11 +386,12 @@ static int cmd_subvolume_delete(const struct cmd_struct *cmd, int argc, char **a {"commit-after", no_argument, NULL, 'c'}, {"commit-each", no_argument, NULL, 'C'}, {"subvolid", required_argument, NULL, 'i'}, + {"recursive", no_argument, NULL, 'R'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; - c = getopt_long(argc, argv, "cCi:v", long_options, NULL); + c = getopt_long(argc, argv, "cCi:Rv", long_options, NULL); if (c < 0) break; @@ -401,6 +405,9 @@ static int cmd_subvolume_delete(const struct cmd_struct *cmd, int argc, char **a case 'i': subvolid = arg_strtou64(optarg); break; + case 'R': + flags |= BTRFS_UTIL_DELETE_SUBVOLUME_RECURSIVE; + break; case 'v': bconf_be_verbose(); break; @@ -416,6 +423,11 @@ static int cmd_subvolume_delete(const struct cmd_struct *cmd, int argc, char **a if (subvolid > 0 && check_argc_exact(argc - optind, 1)) return 1; + if (subvolid > 0 && flags & BTRFS_UTIL_DELETE_SUBVOLUME_RECURSIVE) { + error("option --recursive is not supported with --subvolid"); + return 1; + } + pr_verbose(LOG_INFO, "Transaction commit: %s\n", !commit_mode ? "none (default)" : commit_mode == COMMIT_AFTER ? "at the end" : "after each"); @@ -528,7 +540,7 @@ static int cmd_subvolume_delete(const struct cmd_struct *cmd, int argc, char **a /* Start deleting. */ if (subvolid == 0) - err = btrfs_util_delete_subvolume_fd(fd, vname, 0); + err = btrfs_util_delete_subvolume_fd(fd, vname, flags); else err = btrfs_util_delete_subvolume_by_id_fd(fd, subvolid); if (err) { From 082ce756b6d47a7ceb2442c939c34dbd383cfe3e Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 7 Aug 2024 15:55:52 +0100 Subject: [PATCH 02/18] btrfs-progs: mkfs: add new option --subvol Add a new option --subvol, which tells mkfs.btrfs to create the specified directories as subvolumes when used with --rootdir. Given a populated directory dir, the command $ mkfs.btrfs --rootdir dir --subvol usr --subvol home --subvol home/username img will create subvolumes 'usr' and 'home' within the toplevel subvolume, and subvolume 'username' within the 'home' subvolume. It will fail if any of the directories do not yet exist. Pull-request: #868 Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- Documentation/mkfs.btrfs.rst | 5 + mkfs/main.c | 162 ++++++++++++++++++-- mkfs/rootdir.c | 143 +++++++++++++---- mkfs/rootdir.h | 9 +- tests/mkfs-tests/036-rootdir-subvol/test.sh | 33 ++++ 5 files changed, 310 insertions(+), 42 deletions(-) create mode 100755 tests/mkfs-tests/036-rootdir-subvol/test.sh diff --git a/Documentation/mkfs.btrfs.rst b/Documentation/mkfs.btrfs.rst index 3253ebf1ec..0e9e84adff 100644 --- a/Documentation/mkfs.btrfs.rst +++ b/Documentation/mkfs.btrfs.rst @@ -155,6 +155,11 @@ OPTIONS contain the files from *rootdir*. Since version 4.14.1 the filesystem size is not minimized. Please see option *--shrink* if you need that functionality. +-u|--subvol + Specify that *subdir* is to be created as a subvolume rather than a regular + directory. The option *--rootdir* must also be specified, and *subdir* must be an + existing subdirectory within it. This option can be specified multiple times. + --shrink Shrink the filesystem to its minimal size, only works with *--rootdir* option. diff --git a/mkfs/main.c b/mkfs/main.c index b24b148dfe..88e0f8f84f 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -440,6 +440,7 @@ static const char * const mkfs_usage[] = { "Creation:", OPTLINE("-b|--byte-count SIZE", "set size of each device to SIZE (filesystem size is sum of all device sizes)"), OPTLINE("-r|--rootdir DIR", "copy files from DIR to the image root directory"), + OPTLINE("-u|--subvol SUBDIR", "create SUBDIR as subvolume rather than normal directory, can be specified multiple times"), OPTLINE("--shrink", "(with --rootdir) shrink the filled filesystem to minimal size"), OPTLINE("-K|--nodiscard", "do not perform whole device TRIM"), OPTLINE("-f|--force", "force overwrite of existing filesystem"), @@ -1055,6 +1056,9 @@ int BOX_MAIN(mkfs)(int argc, char **argv) char *label = NULL; int nr_global_roots = sysconf(_SC_NPROCESSORS_ONLN); char *source_dir = NULL; + size_t source_dir_len = 0; + struct rootdir_subvol *rds; + LIST_HEAD(subvols); cpu_detect_flags(); hash_init_accel(); @@ -1085,6 +1089,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv) { "data", required_argument, NULL, 'd' }, { "version", no_argument, NULL, 'V' }, { "rootdir", required_argument, NULL, 'r' }, + { "subvol", required_argument, NULL, 'u' }, { "nodiscard", no_argument, NULL, 'K' }, { "features", required_argument, NULL, 'O' }, { "runtime-features", required_argument, NULL, 'R' }, @@ -1102,7 +1107,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv) { NULL, 0, NULL, 0} }; - c = getopt_long(argc, argv, "A:b:fl:n:s:m:d:L:R:O:r:U:VvMKq", + c = getopt_long(argc, argv, "A:b:fl:n:s:m:d:L:R:O:r:U:VvMKqu:", long_options, NULL); if (c < 0) break; @@ -1208,6 +1213,22 @@ int BOX_MAIN(mkfs)(int argc, char **argv) free(source_dir); source_dir = strdup(optarg); break; + case 'u': { + struct rootdir_subvol *subvol; + + subvol = malloc(sizeof(struct rootdir_subvol)); + if (!subvol) { + error_msg(ERROR_MSG_MEMORY, NULL); + ret = 1; + goto error; + } + + subvol->dir = strdup(optarg); + subvol->full_path = NULL; + + list_add_tail(&subvol->list, &subvols); + break; + } case 'U': strncpy_null(fs_uuid, optarg, BTRFS_UUID_UNPARSED_SIZE); break; @@ -1272,6 +1293,89 @@ int BOX_MAIN(mkfs)(int argc, char **argv) ret = 1; goto error; } + if (!list_empty(&subvols) && source_dir == NULL) { + error("option --subvol must be used with --rootdir"); + ret = 1; + goto error; + } + + if (source_dir) { + char *canonical = realpath(source_dir, NULL); + + if (!canonical) { + error("could not get canonical path to %s", source_dir); + ret = 1; + goto error; + } + + free(source_dir); + source_dir = canonical; + source_dir_len = strlen(source_dir); + } + + list_for_each_entry(rds, &subvols, list) { + char *path, *canonical; + struct rootdir_subvol *rds2; + size_t dir_len; + + dir_len = strlen(rds->dir); + + path = malloc(source_dir_len + 1 + dir_len + 1); + if (!path) { + error_msg(ERROR_MSG_MEMORY, NULL); + ret = 1; + goto error; + } + + memcpy(path, source_dir, source_dir_len); + path[source_dir_len] = '/'; + memcpy(path + source_dir_len + 1, rds->dir, dir_len + 1); + + canonical = realpath(path, NULL); + if (!canonical) { + error("could not get canonical path to %s", rds->dir); + free(path); + ret = 1; + goto error; + } + + free(path); + path = canonical; + + if (!path_exists(path)) { + error("subvolume %s does not exist", rds->dir); + free(path); + ret = 1; + goto error; + } + + if (!path_is_dir(path)) { + error("subvolume %s is not a directory", rds->dir); + free(path); + ret = 1; + goto error; + } + + rds->full_path = path; + + if (strlen(path) < source_dir_len + 1 || + memcmp(path, source_dir, source_dir_len) != 0 || + path[source_dir_len] != '/') { + error("subvolume %s is not a child of %s", rds->dir, source_dir); + ret = 1; + goto error; + } + + for (rds2 = list_first_entry(&subvols, struct rootdir_subvol, list); + rds2 != rds; + rds2 = list_next_entry(rds2, list)) { + if (strcmp(rds2->full_path, path) == 0) { + error("subvolume %s specified more than once", rds->dir); + ret = 1; + goto error; + } + } + } if (*fs_uuid) { uuid_t dummy_uuid; @@ -1821,24 +1925,37 @@ int BOX_MAIN(mkfs)(int argc, char **argv) error_msg(ERROR_MSG_START_TRANS, "%m"); goto out; } - ret = btrfs_rebuild_uuid_tree(fs_info); - if (ret < 0) - goto out; - - ret = cleanup_temp_chunks(fs_info, &allocation, data_profile, - metadata_profile, metadata_profile); - if (ret < 0) { - error("failed to cleanup temporary chunks: %d", ret); - goto out; - } if (source_dir) { pr_verbose(LOG_DEFAULT, "Rootdir from: %s\n", source_dir); - ret = btrfs_mkfs_fill_dir(source_dir, root); + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + errno = -PTR_ERR(trans); + error_msg(ERROR_MSG_START_TRANS, "%m"); + goto out; + } + + ret = btrfs_mkfs_fill_dir(trans, source_dir, root, + &subvols); if (ret) { error("error while filling filesystem: %d", ret); + btrfs_abort_transaction(trans, ret); + goto out; + } + + ret = btrfs_commit_transaction(trans, root); + if (ret) { + errno = -ret; + error_msg(ERROR_MSG_COMMIT_TRANS, "%m"); goto out; } + + list_for_each_entry(rds, &subvols, list) { + pr_verbose(LOG_DEFAULT, " Subvolume: %s\n", + rds->full_path); + } + if (shrink_rootdir) { pr_verbose(LOG_DEFAULT, " Shrink: yes\n"); ret = btrfs_mkfs_shrink_fs(fs_info, &shrink_size, @@ -1853,6 +1970,17 @@ int BOX_MAIN(mkfs)(int argc, char **argv) } } + ret = btrfs_rebuild_uuid_tree(fs_info); + if (ret < 0) + goto out; + + ret = cleanup_temp_chunks(fs_info, &allocation, data_profile, + metadata_profile, metadata_profile); + if (ret < 0) { + error("failed to cleanup temporary chunks: %d", ret); + goto out; + } + if (features.runtime_flags & BTRFS_FEATURE_RUNTIME_QUOTA || features.incompat_flags & BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA) { ret = setup_quota_root(fs_info); @@ -1946,6 +2074,16 @@ int BOX_MAIN(mkfs)(int argc, char **argv) free(label); free(source_dir); + while (!list_empty(&subvols)) { + struct rootdir_subvol *head; + + head = list_entry(subvols.next, struct rootdir_subvol, list); + free(head->dir); + free(head->full_path); + list_del(&head->list); + free(head); + } + return !!ret; success: diff --git a/mkfs/rootdir.c b/mkfs/rootdir.c index 41bc7f2570..3cc94316be 100644 --- a/mkfs/rootdir.c +++ b/mkfs/rootdir.c @@ -40,6 +40,8 @@ #include "common/messages.h" #include "common/utils.h" #include "common/extent-tree-utils.h" +#include "common/root-tree-utils.h" +#include "common/path-utils.h" #include "mkfs/rootdir.h" static u32 fs_block_size; @@ -68,6 +70,7 @@ static u64 ftw_data_size; struct inode_entry { /* The inode number inside btrfs. */ u64 ino; + struct btrfs_root *root; struct list_head list; }; @@ -94,6 +97,8 @@ static struct rootdir_path current_path = { static bool g_hardlink_warning; static u64 g_hardlink_count; static struct btrfs_trans_handle *g_trans = NULL; +static struct list_head *g_subvols; +static u64 next_subvol_id = BTRFS_FIRST_FREE_OBJECTID; static inline struct inode_entry *rootdir_path_last(struct rootdir_path *path) { @@ -114,13 +119,14 @@ static void rootdir_path_pop(struct rootdir_path *path) free(last); } -static int rootdir_path_push(struct rootdir_path *path, u64 ino) +static int rootdir_path_push(struct rootdir_path *path, struct btrfs_root *root, u64 ino) { struct inode_entry *new; new = malloc(sizeof(*new)); if (!new) return -ENOMEM; + new->root = root; new->ino = ino; list_add_tail(&new->list, &path->inode_list); path->level++; @@ -410,13 +416,88 @@ static u8 ftype_to_btrfs_type(mode_t ftype) return BTRFS_FT_UNKNOWN; } +static int ftw_add_subvol(const char *full_path, const struct stat *st, + int typeflag, struct FTW *ftwbuf, + struct rootdir_subvol *subvol) +{ + int ret; + struct btrfs_key key; + struct btrfs_root *new_root; + struct inode_entry *parent; + struct btrfs_inode_item inode_item = { 0 }; + u64 subvol_id, ino; + + subvol_id = next_subvol_id++; + + ret = btrfs_make_subvolume(g_trans, subvol_id); + if (ret < 0) { + errno = -ret; + error("failed to create subvolume: %m"); + return ret; + } + + key.objectid = subvol_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + new_root = btrfs_read_fs_root(g_trans->fs_info, &key); + if (IS_ERR(new_root)) { + ret = PTR_ERR(new_root); + errno = -ret; + error("unable to read fs root id %llu: %m", subvol_id); + return ret; + } + + parent = rootdir_path_last(¤t_path); + + ret = btrfs_link_subvolume(g_trans, parent->root, parent->ino, + path_basename(subvol->full_path), + strlen(path_basename(subvol->full_path)), + new_root); + if (ret) { + errno = -ret; + error("unable to link subvolume %s: %m", path_basename(subvol->full_path)); + return ret; + } + + ino = btrfs_root_dirid(&new_root->root_item); + + ret = add_xattr_item(g_trans, new_root, ino, full_path); + if (ret < 0) { + errno = -ret; + error("failed to add xattr item for the top level inode in subvol %llu: %m", + subvol_id); + return ret; + } + stat_to_inode_item(&inode_item, st); + + btrfs_set_stack_inode_nlink(&inode_item, 1); + ret = update_inode_item(g_trans, new_root, &inode_item, ino); + if (ret < 0) { + errno = -ret; + error("failed to update root dir for root %llu: %m", subvol_id); + return ret; + } + + ret = rootdir_path_push(¤t_path, new_root, ino); + if (ret < 0) { + errno = -ret; + error("failed to allocate new entry for subvolume %llu ('%s'): %m", + subvol_id, full_path); + return ret; + } + + return 0; +} + static int ftw_add_inode(const char *full_path, const struct stat *st, int typeflag, struct FTW *ftwbuf) { struct btrfs_fs_info *fs_info = g_trans->fs_info; - struct btrfs_root *root = fs_info->fs_root; + struct btrfs_root *root; struct btrfs_inode_item inode_item = { 0 }; struct inode_entry *parent; + struct rootdir_subvol *rds; u64 ino; int ret; @@ -442,7 +523,10 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, /* The rootdir itself. */ if (unlikely(ftwbuf->level == 0)) { - u64 root_ino = btrfs_root_dirid(&root->root_item); + u64 root_ino; + + root = fs_info->fs_root; + root_ino = btrfs_root_dirid(&root->root_item); UASSERT(S_ISDIR(st->st_mode)); UASSERT(current_path.level == 0); @@ -468,7 +552,7 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, } /* Push (and initialize) the rootdir directory into the stack. */ - ret = rootdir_path_push(¤t_path, btrfs_root_dirid(&root->root_item)); + ret = rootdir_path_push(¤t_path, root, btrfs_root_dirid(&root->root_item)); if (ret < 0) { errno = -ret; error_msg(ERROR_MSG_MEMORY, "push path for rootdir: %m"); @@ -516,6 +600,26 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, while (current_path.level > ftwbuf->level) rootdir_path_pop(¤t_path); + if (S_ISDIR(st->st_mode)) { + list_for_each_entry(rds, g_subvols, list) { + if (!strcmp(full_path, rds->full_path)) { + ret = ftw_add_subvol(full_path, st, typeflag, + ftwbuf, rds); + + free(rds->dir); + free(rds->full_path); + + list_del(&rds->list); + free(rds); + + return ret; + } + } + } + + parent = rootdir_path_last(¤t_path); + root = parent->root; + ret = btrfs_find_free_objectid(g_trans, root, BTRFS_FIRST_FREE_OBJECTID, &ino); if (ret < 0) { @@ -532,7 +636,6 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, return ret; } - parent = rootdir_path_last(¤t_path); ret = btrfs_add_link(g_trans, root, ino, parent->ino, full_path + ftwbuf->base, strlen(full_path) - ftwbuf->base, @@ -557,7 +660,7 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, return ret; } if (S_ISDIR(st->st_mode)) { - ret = rootdir_path_push(¤t_path, ino); + ret = rootdir_path_push(¤t_path, root, ino); if (ret < 0) { errno = -ret; error("failed to allocate new entry for inode %llu ('%s'): %m", @@ -598,42 +701,28 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, return 0; }; -int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root) +int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir, + struct btrfs_root *root, struct list_head *subvols) { int ret; - struct btrfs_trans_handle *trans; struct stat root_st; ret = lstat(source_dir, &root_st); if (ret) { error("unable to lstat %s: %m", source_dir); - ret = -errno; - goto out; - } - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - errno = -ret; - error_msg(ERROR_MSG_START_TRANS, "%m"); - goto fail; + return -errno; } g_trans = trans; g_hardlink_warning = false; g_hardlink_count = 0; + g_subvols = subvols; INIT_LIST_HEAD(¤t_path.inode_list); ret = nftw(source_dir, ftw_add_inode, 32, FTW_PHYS); if (ret) { error("unable to traverse directory %s: %d", source_dir, ret); - goto fail; - } - ret = btrfs_commit_transaction(trans, root); - if (ret) { - errno = -ret; - error_msg(ERROR_MSG_COMMIT_TRANS, "%m"); - goto out; + return ret; } if (g_hardlink_warning) @@ -644,10 +733,6 @@ int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root) rootdir_path_pop(¤t_path); return 0; -fail: - btrfs_abort_transaction(trans, ret); -out: - return ret; } static int ftw_add_entry_size(const char *fpath, const struct stat *st, diff --git a/mkfs/rootdir.h b/mkfs/rootdir.h index 4233431a9a..128e9e0999 100644 --- a/mkfs/rootdir.h +++ b/mkfs/rootdir.h @@ -28,7 +28,14 @@ struct btrfs_fs_info; struct btrfs_root; -int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root); +struct rootdir_subvol { + struct list_head list; + char *dir; + char *full_path; +}; + +int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir, + struct btrfs_root *root, struct list_head *subvols); u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size, u64 meta_profile, u64 data_profile); int btrfs_mkfs_shrink_fs(struct btrfs_fs_info *fs_info, u64 *new_size_ret, diff --git a/tests/mkfs-tests/036-rootdir-subvol/test.sh b/tests/mkfs-tests/036-rootdir-subvol/test.sh new file mode 100755 index 0000000000..63ba928f34 --- /dev/null +++ b/tests/mkfs-tests/036-rootdir-subvol/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Basic test for mkfs.btrfs --subvol option + +source "$TEST_TOP/common" || exit + +check_prereq mkfs.btrfs +check_prereq btrfs + +setup_root_helper +prepare_test_dev + +tmp=$(_mktemp_dir mkfs-rootdir) + +run_check touch "$tmp/foo" +run_check mkdir "$tmp/dir" +run_check mkdir "$tmp/dir/subvol" +run_check touch "$tmp/dir/subvol/bar" + +run_check_mkfs_test_dev --rootdir "$tmp" --subvol dir/subvol +run_check $SUDO_HELPER "$TOP/btrfs" check "$TEST_DEV" + +run_check_mount_test_dev +run_check_stdout $SUDO_HELPER "$TOP/btrfs" subvolume list "$TEST_MNT" | \ + cut -d\ -f9 > "$tmp/output" +run_check_umount_test_dev + +result=$(cat "$tmp/output") + +if [ "$result" != "dir/subvol" ]; then + _fail "dir/subvol not in subvolume list" +fi + +rm -rf -- "$tmp" From 3ca473e607d8f214dfc81f761ed7a4d44e1d1164 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 16 Aug 2024 21:34:28 +0200 Subject: [PATCH 03/18] btrfs-progs: ci: update cleanup scripts Add limit parameter so workflows are not skipped if they don't fit the default limit 10. Add more workflows to clean up after recent updates. Signed-off-by: David Sterba --- ci/actions/delete-all-failed-devel-runs | 2 +- ci/actions/delete-all-runs-of-branch | 10 ++++++---- ci/actions/keep-last-ci-image-tests | 2 +- ci/actions/keep-last-coverage | 2 +- ci/actions/keep-last-devel-runs | 2 +- ci/actions/keep-last-static-binaries | 2 +- ci/actions/keep-last-week | 5 ++++- ci/actions/update-artifacts | 4 ++-- 8 files changed, 17 insertions(+), 12 deletions(-) diff --git a/ci/actions/delete-all-failed-devel-runs b/ci/actions/delete-all-failed-devel-runs index 79d33b3982..983cb17843 100755 --- a/ci/actions/delete-all-failed-devel-runs +++ b/ci/actions/delete-all-failed-devel-runs @@ -8,7 +8,7 @@ repo="kdave/btrfs-progs" type -p gh > /dev/null || { echo "ERROR: gh tool not found"; exit 1; } type -p jq > /dev/null || { echo "ERROR: jq tool not found"; exit 1; } -for id in $(gh run -R "$repo" list --workflow "$workflow" --status failure --json databaseId | jq '.[].databaseId'); do +for id in $(gh run -R "$repo" list --limit 100 --workflow "$workflow" --status failure --json databaseId | jq '.[].databaseId'); do echo "Delete run $id" gh run -R "$repo" delete "$id" done diff --git a/ci/actions/delete-all-runs-of-branch b/ci/actions/delete-all-runs-of-branch index 9e1e194896..e7596f79dc 100755 --- a/ci/actions/delete-all-runs-of-branch +++ b/ci/actions/delete-all-runs-of-branch @@ -16,11 +16,13 @@ notthatone() { exit 1 } +areyousure() { + echo "WARNING: protected branch, make sure you want to remove it: $1" +} + case "$branch" in master) notthatone "$branch";; - devel) notthatone "$branch";; - coverage-test) notthatone "$branch";; - release-test) notthatone "$branch";; + devel) areyousure "$branch";; esac echo "Delete all runs of branch $branch, are you sure? [y/N]" @@ -31,7 +33,7 @@ if ! [ "$answer" = 'y' ]; then fi echo -for id in $(gh run -R "$repo" list --json databaseId --branch "$branch" | jq '.[].databaseId'); do +for id in $(gh run -R "$repo" list --limit 100 --json databaseId --branch "$branch" | jq '.[].databaseId'); do echo "Delete run $id" gh run -R "$repo" delete "$id" done diff --git a/ci/actions/keep-last-ci-image-tests b/ci/actions/keep-last-ci-image-tests index ceff31a6bd..40186edc1f 100755 --- a/ci/actions/keep-last-ci-image-tests +++ b/ci/actions/keep-last-ci-image-tests @@ -9,7 +9,7 @@ type -p gh > /dev/null || { echo "ERROR: gh tool not found"; exit 1; } type -p jq > /dev/null || { echo "ERROR: jq tool not found"; exit 1; } for branch in master release-test; do - for id in $(gh run -R "$repo" list --workflow "$workflow" --branch "$branch" --json databaseId | jq '.[1:] | .[].databaseId'); do + for id in $(gh run -R "$repo" list --limit 100 --workflow "$workflow" --branch "$branch" --json databaseId | jq '.[1:] | .[].databaseId'); do echo "Delete run $id" gh run -R "$repo" delete "$id" done diff --git a/ci/actions/keep-last-coverage b/ci/actions/keep-last-coverage index 76eb7033ee..f2543f7282 100755 --- a/ci/actions/keep-last-coverage +++ b/ci/actions/keep-last-coverage @@ -9,7 +9,7 @@ type -p gh > /dev/null || { echo "ERROR: gh tool not found"; exit 1; } type -p jq > /dev/null || { echo "ERROR: jq tool not found"; exit 1; } for branch in master coverage-test; do - for id in $(gh run -R "$repo" list --workflow "$workflow" --branch "$branch" --json databaseId | jq '.[1:] | .[].databaseId'); do + for id in $(gh run -R "$repo" list --limit 100 --workflow "$workflow" --branch "$branch" --json databaseId | jq '.[1:] | .[].databaseId'); do echo "Delete run $id" gh run -R "$repo" delete "$id" done diff --git a/ci/actions/keep-last-devel-runs b/ci/actions/keep-last-devel-runs index 5a65aaaeb3..96f5f08fd7 100755 --- a/ci/actions/keep-last-devel-runs +++ b/ci/actions/keep-last-devel-runs @@ -7,7 +7,7 @@ repo="kdave/btrfs-progs" from=11 -for id in $(gh run -R "$repo" list -w 'Devel build and tests' --json databaseId | jq '.[].databaseId' | tail -n +${from}); do +for id in $(gh run -R "$repo" list --limit 100 --workflow 'Devel build and tests' --json databaseId | jq '.[].databaseId' | tail -n +${from}); do echo "Delete run $id" gh run -R "$repo" delete "$id" done diff --git a/ci/actions/keep-last-static-binaries b/ci/actions/keep-last-static-binaries index 421a68679a..ebd769ddb8 100755 --- a/ci/actions/keep-last-static-binaries +++ b/ci/actions/keep-last-static-binaries @@ -9,7 +9,7 @@ type -p gh > /dev/null || { echo "ERROR: gh tool not found"; exit 1; } type -p jq > /dev/null || { echo "ERROR: jq tool not found"; exit 1; } for branch in master release-test; do - for id in $(gh run -R "$repo" list --workflow "$workflow" --branch "$branch" --json databaseId | jq '.[1:] | .[].databaseId'); do + for id in $(gh run -R "$repo" list --limit 100 --workflow "$workflow" --branch "$branch" --json databaseId | jq '.[1:] | .[].databaseId'); do echo "Delete run $id" gh run -R "$repo" delete "$id" done diff --git a/ci/actions/keep-last-week b/ci/actions/keep-last-week index e985380d0b..08834027c9 100755 --- a/ci/actions/keep-last-week +++ b/ci/actions/keep-last-week @@ -11,7 +11,7 @@ daysmax=8 clean_workflow() { local wf="$1" - local json=$(gh run -R "$repo" list --workflow "$wf" --json databaseId,startedAt) + local json=$(gh run -R "$repo" list --limit 100 --workflow "$wf" --json databaseId,startedAt) echo "Cleaning workflow $wf" i=0 @@ -40,3 +40,6 @@ clean_workflow() { clean_workflow "Testing CI build" clean_workflow "Devel build and tests" clean_workflow "Pull request build and tests" +clean_workflow "Codespell" +clean_workflow "CI image tests" +clean_workflow "Sanitizer checks" diff --git a/ci/actions/update-artifacts b/ci/actions/update-artifacts index 4c6c349e9d..6bf4e8e576 100755 --- a/ci/actions/update-artifacts +++ b/ci/actions/update-artifacts @@ -16,8 +16,8 @@ repo="kdave/btrfs-progs" tag="$1" # TODO: verify that tag exists -# Read last workflow id -id=$(gh run -R "$repo" list -w 'Static binaries' -L 1 --json databaseId | jq '.[].databaseId') +# Read last workflow id for master branch +id=$(gh run -R "$repo" list --limit 1 --workflow 'Static binaries' --branch 'master' --json databaseId | jq '.[].databaseId') for asset in btrfs.box.static btrfs.static; do gh run -R "$repo" download "$id" -n "$asset" From 2b204e1dd44456dc96b230efbc83650c02a63f7e Mon Sep 17 00:00:00 2001 From: Matt Langford Date: Thu, 8 Aug 2024 11:38:35 -0400 Subject: [PATCH 04/18] btrfs-progs: fi show: remove stray newline in filesystem show Remove last newline in the output of 'btrfs filesystem show', keep the line between two filesystems so the devices are visually grouped togehter. Pull-request: #866 Author: Matt Langford Signed-off-by: David Sterba --- cmds/filesystem.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/cmds/filesystem.c b/cmds/filesystem.c index 8e2697de6a..41ad2f8d3e 100644 --- a/cmds/filesystem.c +++ b/cmds/filesystem.c @@ -365,7 +365,6 @@ static void print_one_uuid(struct btrfs_fs_devices *fs_devices, if (devs_found < total) { pr_verbose(LOG_DEFAULT, "\t*** Some devices missing\n"); } - pr_verbose(LOG_DEFAULT, "\n"); } /* adds up all the used spaces as reported by the space info ioctl @@ -431,7 +430,6 @@ static int print_one_fs(struct btrfs_ioctl_fs_info_args *fs_info, free(canonical_path); } - pr_verbose(LOG_DEFAULT, "\n"); return 0; } @@ -481,6 +479,10 @@ static int btrfs_scan_kernel(void *search, unsigned unit_mode) fd = open(mnt->mnt_dir, O_RDONLY); if ((fd != -1) && !get_df(fd, &space_info_arg)) { + /* Put space between filesystem entries for readability. */ + if (found != 0) + pr_verbose(LOG_DEFAULT, "\n"); + print_one_fs(&fs_info_arg, dev_info_arg, space_info_arg, label, unit_mode); free(space_info_arg); @@ -757,6 +759,7 @@ static int cmd_filesystem_show(const struct cmd_struct *cmd, char uuid_buf[BTRFS_UUID_UNPARSED_SIZE]; unsigned unit_mode; int found = 0; + bool needs_newline = false; unit_mode = get_unit_mode_from_arg(&argc, argv, 0); @@ -845,6 +848,12 @@ static int cmd_filesystem_show(const struct cmd_struct *cmd, goto out; } + /* + * The above call will return 0 if it found anything, in those cases we + * need an extra newline below. + */ + needs_newline = !ret; + /* shows mounted only */ if (where == BTRFS_SCAN_MOUNTED) goto out; @@ -882,8 +891,14 @@ static int cmd_filesystem_show(const struct cmd_struct *cmd, goto out; } - list_for_each_entry(fs_devices, &all_uuids, fs_list) + list_for_each_entry(fs_devices, &all_uuids, fs_list) { + /* Put space between filesystem entries for readability. */ + if (needs_newline) + pr_verbose(LOG_DEFAULT, "\n"); + print_one_uuid(fs_devices, unit_mode); + needs_newline = true; + } if (search && !found) { error("not a valid btrfs filesystem: %s", search); From 2688073bfe466cadf458f49825e72462fabb080c Mon Sep 17 00:00:00 2001 From: Han Yuwei Date: Thu, 29 Aug 2024 20:06:39 +0800 Subject: [PATCH 05/18] btrfs-progs: docs: clarify number represention in on-disk-format tables Added 0x prefix to HEX numbers and transform some tables to new format. Pull-request: #881 Signed-off-by: Yuwei Han [ Fix RST grammar errors ] Signed-off-by: Qu Wenruo --- Documentation/dev/On-disk-format.rst | 439 ++++++++++++++------------- 1 file changed, 221 insertions(+), 218 deletions(-) diff --git a/Documentation/dev/On-disk-format.rst b/Documentation/dev/On-disk-format.rst index 6d62d03ab3..c3d5a0ff59 100644 --- a/Documentation/dev/On-disk-format.rst +++ b/Documentation/dev/On-disk-format.rst @@ -40,14 +40,14 @@ Note that the fields are unsigned, so object ID −1 will be treated as little‐endian, a simple byte‐by‐byte comparison of KEYs will not work. - === ==== ==== =================================================== - Off Size Type Description - === ==== ==== =================================================== - 0 8 UINT Object ID. Each tree has its own set of Object IDs. - 8 1 UINT `Item type <#Item_Types>`__. - 9 8 UINT Offset. The meaning depends on the item type. - 11 - === ==== ==== =================================================== + ==== ==== ==== =================================================== + Off Size Type Description + ==== ==== ==== =================================================== + 0x0 0x8 UINT Object ID. Each tree has its own set of Object IDs. + 0x8 0x1 UINT `Item type <#Item_Types>`__. + 0x9 0x8 UINT Offset. The meaning depends on the item type. + 0x11 + ==== ==== ==== =================================================== Btrfs uses `Unix time `__. @@ -55,9 +55,9 @@ Btrfs uses `Unix time `__. === ==== ==== ======================================================== Off Size Type Description === ==== ==== ======================================================== - 0 8 SINT Number of seconds since 1970-01-01T00:00:00Z. - 8 4 UINT Number of nanoseconds since the beginning of the second. - c + 0x0 0x8 SINT Number of seconds since 1970-01-01T00:00:00Z. + 0x8 0x4 UINT Number of nanoseconds since the beginning of the second. + 0xc === ==== ==== ======================================================== Superblock @@ -75,87 +75,87 @@ otherwise, there would be confusion with other filesystems. TODO - +------+------+-------+-------------------------------------------------------------------------+ - | Off | Size | Type | Description | - +======+======+=======+=========================================================================+ - | 0 | 20 | CSUM | Checksum of everything past this field (from 20 to 1000) | - +------+------+-------+-------------------------------------------------------------------------+ - | 20 | 10 | UUID | FS UUID | - +------+------+-------+-------------------------------------------------------------------------+ - | 30 | 8 | UINT | physical address of this block (different for mirrors) | - +------+------+-------+-------------------------------------------------------------------------+ - | 38 | 8 | | flags | - +------+------+-------+-------------------------------------------------------------------------+ - | 40 | 8 | ASCII | magic ("_BHRfS_M") | - +------+------+-------+-------------------------------------------------------------------------+ - | 48 | 8 | | generation | - +------+------+-------+-------------------------------------------------------------------------+ - | 50 | 8 | | logical address of the root tree root | - +------+------+-------+-------------------------------------------------------------------------+ - | 58 | 8 | | logical address of the `chunk tree <#Chunk_tree_.283.29>`__ root | - +------+------+-------+-------------------------------------------------------------------------+ - | 60 | 8 | | logical address of the log tree root | - +------+------+-------+-------------------------------------------------------------------------+ - | 68 | 8 | | log_root_transid | - +------+------+-------+-------------------------------------------------------------------------+ - | 70 | 8 | | total_bytes | - +------+------+-------+-------------------------------------------------------------------------+ - | 78 | 8 | | bytes_used | - +------+------+-------+-------------------------------------------------------------------------+ - | 80 | 8 | | root_dir_objectid (usually 6) | - +------+------+-------+-------------------------------------------------------------------------+ - | 88 | 8 | | num_devices | - +------+------+-------+-------------------------------------------------------------------------+ - | 90 | 4 | | sectorsize | - +------+------+-------+-------------------------------------------------------------------------+ - | 94 | 4 | | nodesize | - +------+------+-------+-------------------------------------------------------------------------+ - | 98 | 4 | | leafsize | - +------+------+-------+-------------------------------------------------------------------------+ - | 9c | 4 | | stripesize | - +------+------+-------+-------------------------------------------------------------------------+ - | a0 | 4 | | sys_chunk_array_size | - +------+------+-------+-------------------------------------------------------------------------+ - | a4 | 8 | | chunk_root_generation | - +------+------+-------+-------------------------------------------------------------------------+ - | ac | 8 | | compat_flags | - +------+------+-------+-------------------------------------------------------------------------+ - | b4 | 8 | | compat_ro_flags - only implementations that support the flags can write | - | | | | to the filesystem | - +------+------+-------+-------------------------------------------------------------------------+ - | bc | 8 | | incompat_flags - only implementations that support the flags can use | - | | | | the filesystem | - +------+------+-------+-------------------------------------------------------------------------+ - | c4 | 2 | | csum_type - Btrfs currently uses the CRC32c little-endian hash function | - | | | | with seed -1. | - +------+------+-------+-------------------------------------------------------------------------+ - | c6 | 1 | | root_level | - +------+------+-------+-------------------------------------------------------------------------+ - | c7 | 1 | | chunk_root_level | - +------+------+-------+-------------------------------------------------------------------------+ - | c8 | 1 | | log_root_level | - +------+------+-------+-------------------------------------------------------------------------+ - | c9 | 62 | | `DEV_ITEM <#DEV_ITEM_.28d8.29>`__ data for this device | - +------+------+-------+-------------------------------------------------------------------------+ - | 12b | 100 | | label (may not contain '/' or '\\\\') | - +------+------+-------+-------------------------------------------------------------------------+ - | 22b | 8 | | cache_generation | - +------+------+-------+-------------------------------------------------------------------------+ - | 233 | 8 | | uuid_tree_generation | - +------+------+-------+-------------------------------------------------------------------------+ - | 23b | f0 | | reserved /\* future expansion \*/ | - +------+------+-------+-------------------------------------------------------------------------+ - | 32b | 800 | | sys_chunk_array:(*n* bytes valid) Contains (KEY, | - | | | | `CHUNK_ITEM <#CHUNK_ITEM_.28e4.29>`__) pairs for all SYSTEM chunks. | - | | | | This is needed to bootstrap the mapping from logical addresses to | - | | | | physical. | - +------+------+-------+-------------------------------------------------------------------------+ - | b2b | 2a0 | | Contain super_roots (4 btrfs_root_backup) | - +------+------+-------+-------------------------------------------------------------------------+ - | dcb | 235 | | current unused | - +------+------+-------+-------------------------------------------------------------------------+ - | 1000 | | | | - +------+------+-------+-------------------------------------------------------------------------+ + +--------+-------+-------+-------------------------------------------------------------------------+ + | Off | Size | Type | Description | + +========+=======+=======+=========================================================================+ + | 0x0 | 0x20 | CSUM | Checksum of everything past this field (from 20 to 1000) | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x20 | 0x10 | UUID | FS UUID | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x30 | 0x8 | UINT | physical address of this block (different for mirrors) | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x38 | 0x8 | | flags | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x40 | 0x8 | ASCII | magic ("_BHRfS_M") | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x48 | 0x8 | | generation | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x50 | 0x8 | | logical address of the root tree root | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x58 | 0x8 | | logical address of the `chunk tree <#Chunk_tree_.283.29>`__ root | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x60 | 0x8 | | logical address of the log tree root | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x68 | 0x8 | | log_root_transid | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x70 | 0x8 | | total_bytes | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x78 | 0x8 | | bytes_used | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x80 | 0x8 | | root_dir_objectid (usually 6) | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x88 | 0x8 | | num_devices | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x90 | 0x4 | | sectorsize | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x94 | 0x4 | | nodesize | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x98 | 0x4 | | leafsize | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x9c | 0x4 | | stripesize | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xa0 | 0x4 | | sys_chunk_array_size | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xa4 | 0x8 | | chunk_root_generation | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xac | 0x8 | | compat_flags | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xb4 | 0x8 | | compat_ro_flags - only implementations that support the flags can write | + | | | | to the filesystem | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xbc | 0x8 | | incompat_flags - only implementations that support the flags can use | + | | | | the filesystem | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xc4 | 0x2 | | csum_type - Btrfs currently uses the CRC32c little-endian hash function | + | | | | with seed -1. | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xc6 | 0x1 | | root_level | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xc7 | 0x1 | | chunk_root_level | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xc8 | 0x1 | | log_root_level | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xc9 | 0x62 | | `DEV_ITEM <#DEV_ITEM_.28d8.29>`__ data for this device | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x12b | 0x100 | | label (may not contain '/' or '\\\\') | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x22b | 0x8 | | cache_generation | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x233 | 0x8 | | uuid_tree_generation | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x23b | 0xf0 | | reserved /\* future expansion \*/ | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x2b | 0x800 | | sys_chunk_array:(*n* bytes valid) Contains (KEY, | + | | | | `CHUNK_ITEM <#CHUNK_ITEM_.28e4.29>`__) pairs for all SYSTEM chunks. | + | | | | This is needed to bootstrap the mapping from logical addresses to | + | | | | physical. | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xb2b | 0x2a0 | | Contain super_roots (4 btrfs_root_backup) | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0xdcb | 0x235 | | current unused | + +--------+-------+-------+-------------------------------------------------------------------------+ + | 0x1000 | | | | + +--------+-------+-------+-------------------------------------------------------------------------+ Header ^^^^^^ @@ -165,32 +165,32 @@ depends on whether it is an internal or leaf node, both of which are described below. - +-----+------+-------+--------------------------------------------------------------------------+ - | Off | Size | Type | Description | - +=====+======+=======+==========================================================================+ - | 0 | 20 | CSUM | Checksum of everything after this field (from 20 to the end of the node) | - +-----+------+-------+--------------------------------------------------------------------------+ - | 20 | 10 | UUID | FS UUID | - +-----+------+-------+--------------------------------------------------------------------------+ - | 30 | 8 | UINT | Logical address of this node | - +-----+------+-------+--------------------------------------------------------------------------+ - | 38 | 7 | FIELD | Flags | - +-----+------+-------+--------------------------------------------------------------------------+ - | 3f | 1 | UINT | Backref. Rev.: always 1 (MIXED) for new filesystems; 0 (OLD) indicates | - | | | | an old filesystem. | - +-----+------+-------+--------------------------------------------------------------------------+ - | 40 | 10 | UUID | Chunk tree UUID | - +-----+------+-------+--------------------------------------------------------------------------+ - | 50 | 8 | UINT | Generation | - +-----+------+-------+--------------------------------------------------------------------------+ - | 58 | 8 | UINT | The ID of the tree that contains this node | - +-----+------+-------+--------------------------------------------------------------------------+ - | 60 | 4 | UINT | Number of items | - +-----+------+-------+--------------------------------------------------------------------------+ - | 64 | 1 | UINT | Level (0 for leaf nodes) | - +-----+------+-------+--------------------------------------------------------------------------+ - | 65 | | | | - +-----+------+-------+--------------------------------------------------------------------------+ + +-------+------+-------+--------------------------------------------------------------------------+ + | Off | Size | Type | Description | + +=======+======+=======+==========================================================================+ + | 0x0 | 0x20 | CSUM | Checksum of everything after this field (from 20 to the end of the node) | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x20 | 0x10 | UUID | FS UUID | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x30 | 0x8 | UINT | Logical address of this node | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x38 | 0x7 | FIELD | Flags | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x3f | 0x1 | UINT | Backref. Rev.: always 1 (MIXED) for new filesystems; 0 (OLD) indicates | + | | | | an old filesystem. | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x40 | 0x10 | UUID | Chunk tree UUID | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x50 | 0x8 | UINT | Generation | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x58 | 0x8 | UINT | The ID of the tree that contains this node | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x60 | 0x4 | UINT | Number of items | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x64 | 0x1 | UINT | Level (0 for leaf nodes) | + +-------+------+-------+--------------------------------------------------------------------------+ + | 0x65 | | | | + +-------+------+-------+--------------------------------------------------------------------------+ Internal Node @@ -199,14 +199,14 @@ Internal Node In internal nodes, the node header is followed by a number of key pointers. - === ==== ==== ============ - Off Size Type Description - === ==== ==== ============ - 0 11 KEY key - 11 8 UINT block number - 19 8 UINT generation - 21 - === ==== ==== ============ + ===== ==== ==== ============ + Off Size Type Description + ===== ==== ==== ============ + 0x0 0x11 KEY key + 0x11 0x8 UINT block number + 0x19 0x8 UINT generation + 0x21 + ===== ==== ==== ============ ====== ======= ======= ======= === ========== @@ -222,14 +222,14 @@ data is stored at the end of the node, and the contents of the item data depends on the item type stored in the key. - === ==== ==== ========================================== - Off Size Type Description - === ==== ==== ========================================== - 0 11 KEY key - 11 4 UINT data offset relative to end of header (65) - 15 4 UINT data size - 19 - === ==== ==== ========================================== + ===== ==== ==== ========================================== + Off Size Type Description + ===== ==== ==== ========================================== + 0x0 0x11 KEY key + 0x11 0x4 UINT data offset relative to end of header (65) + 0x15 0x4 UINT data size + 0x19 + ===== ==== ==== ========================================== ====== ====== ====== === ====== ========== ====== === ====== ====== @@ -546,10 +546,10 @@ From an inode to a name in a directory. ======= ==== ===== ====================== Off Size Type Description ======= ==== ===== ====================== -0 8 UINT index in the directory -8 2 UINT (*n*) +0x0 0x8 UINT index in the directory +0x8 0x2 UINT (*n*) a *n* ASCII name in the directory -a+\ *n* +a+\ *n* ======= ==== ===== ====================== This structure can be repeated...? @@ -563,15 +563,15 @@ INODE_EXTREF (0d) From an inode to a name in a directory. Used if the regarding INODE_REF array ran out of space. *This item requires the EXTENDED_IREF feature.* -======== ==== ===== ====================== -Off Size Type Description -======== ==== ===== ====================== -0 8 UINT directory object ID -8 8 UINT index in the directory -10 2 UINT (*n*) -12 *n* ASCII name in the directory -12+\ *n* -======== ==== ===== ====================== +========== ==== ===== ====================== +Off Size Type Description +========== ==== ===== ====================== +0x0 0x8 UINT directory object ID +0x8 0x8 UINT index in the directory +0x10 0x2 UINT (*n*) +0x12 *n* ASCII name in the directory +0x12+\ *n* +========== ==== ===== ====================== This structure can be repeated...? @@ -739,37 +739,37 @@ EXTENT_DATA (6c) The contents of a file. -=== ==== ==== ====================================== -Off Size Type Description -=== ==== ==== ====================================== -0 8 UINT generation -8 8 UINT (*n*) size of decoded extent -10 1 UINT compression (0=none, 1=zlib, 2=LZO) -11 1 UINT encryption (0=none) -12 2 UINT other encoding (0=none) -14 1 UINT type (0=inline, 1=regular, 2=prealloc) -15 -=== ==== ==== ====================================== +===== ==== ==== ====================================== +Off Size Type Description +===== ==== ==== ====================================== +0x0 0x8 UINT generation +0x8 0x8 UINT (*n*) size of decoded extent +0x10 0x1 UINT compression (0=none, 1=zlib, 2=LZO) +0x11 0x1 UINT encryption (0=none) +0x12 0x2 UINT other encoding (0=none) +0x14 0x1 UINT type (0=inline, 1=regular, 2=prealloc) +0x15 +===== ==== ==== ====================================== If the extent is inline, the remaining item bytes are the data bytes (*n* bytes in case no compression/encryption/other encoding is used). Otherwise, the structure continues: -+-----+------+------+---------------------------------------------------------------------------+ -| Off | Size | Type | Description | -+=====+======+======+===========================================================================+ -| 15 | 8 | UINT | (*ea*) logical address of extent. If this is zero, the extent is sparse | -| | | | and consists of all zeroes. | -+-----+------+------+---------------------------------------------------------------------------+ -| 1d | 8 | UINT | (*es*) size of extent | -+-----+------+------+---------------------------------------------------------------------------+ -| 25 | 8 | UINT | (*o*) offset within the extent | -+-----+------+------+---------------------------------------------------------------------------+ -| 2d | 8 | UINT | (*s*) logical number of bytes in file | -+-----+------+------+---------------------------------------------------------------------------+ -| 35 | | | | -+-----+------+------+---------------------------------------------------------------------------+ ++-------+------+------+---------------------------------------------------------------------------+ +| Off | Size | Type | Description | ++=======+======+======+===========================================================================+ +| 0x15 | 0x8 | UINT | (*ea*) logical address of extent. If this is zero, the extent is sparse | +| | | | and consists of all zeroes. | ++-------+------+------+---------------------------------------------------------------------------+ +| 0x1d | 0x8 | UINT | (*es*) size of extent | ++-------+------+------+---------------------------------------------------------------------------+ +| 0x25 | 0x8 | UINT | (*o*) offset within the extent | ++-------+------+------+---------------------------------------------------------------------------+ +| 0x2d | 0x8 | UINT | (*s*) logical number of bytes in file | ++-------+------+------+---------------------------------------------------------------------------+ +| 0x35 | | | | ++-------+------+------+---------------------------------------------------------------------------+ *ea* and *es* must exactly match an EXTENT_ITEM. If the *es* bytes of data at logical address *ea* are decoded, *n* bytes will result. The file's data @@ -941,11 +941,14 @@ EXTENT_DATA_REF (b2) (logical address, b2, hash of first three fields) TODO -| ``    0   8 UINT   root objectid (id of tree contained in)`` -| ``    8   8 UINT   object id (owner)`` -| ``   10   8 UINT   offset (in the file data)`` -| ``   18   4 UINT   count (always 1?)`` - +===== ==== ==== ======================================= +Off Size Type Description +===== ==== ==== ======================================= +0x0 0x8 UINT root objectid (id of tree contained in) +0x8 0x8 UINT object id (owner) +0x10  0x8  UINT offset (in the file data) +0x18  0x4  UINT count (always 1?) +===== ==== ==== ======================================= EXTENT_REF_V0 (b4) ^^^^^^^^^^^^^^^^^^ @@ -958,12 +961,12 @@ SHARED_BLOCK_REF (b6) (logical address, b6, parent) TODO -=== ==== ==== =========== -Off Size Type Description -=== ==== ==== =========== -0 8 UINT offset -8 -=== ==== ==== =========== +===== ==== ==== =========== +Off Size Type Description +===== ==== ==== =========== +0x0 0x8 UINT offset +0x8 +===== ==== ==== =========== SHARED_DATA_REF (b8) @@ -971,13 +974,13 @@ SHARED_DATA_REF (b8) (logical address, b8, parent) TODO -=== ==== ==== ================= -Off Size Type Description -=== ==== ==== ================= -0 8 UINT offset -8 4 UINT count (always 1?) -c -=== ==== ==== ================= +===== ==== ==== ================= +Off Size Type Description +===== ==== ==== ================= +0x0 0x8 UINT offset +0x8 0x4 UINT count (always 1?) +0xc +===== ==== ==== ================= BLOCK_GROUP_ITEM (c0) @@ -1031,16 +1034,16 @@ DEV_EXTENT (cc) Maps from physical address to logical. -=== ==== ===== ======================= -Off Size Type Description -=== ==== ===== ======================= -0 8 UINT chunk tree (always 3) -8 8 OBJID chunk oid (always 256?) -10 8 UINT logical address -18 8 UINT size in bytes -20 10 UUID chunk tree UUID -30 -=== ==== ===== ======================= +===== ==== ===== ======================= +Off Size Type Description +===== ==== ===== ======================= +0x0 0x8 UINT chunk tree (always 3) +0x8 0x8 OBJID chunk oid (always 256?) +0x10 0x8 UINT logical address +0x18 0x8 UINT size in bytes +0x20 0x10 UUID chunk tree UUID +0x30 +===== ==== ===== ======================= DEV_ITEM (d8) @@ -1050,25 +1053,25 @@ DEV_ITEM (d8) Contains information about one device. -=== ==== ==== ============================== -Off Size Type Description -=== ==== ==== ============================== -0 8 UINT device id -8 8 UINT number of bytes -10 8 UINT number of bytes used -18 4 UINT optimal I/O align -1c 4 UINT optimal I/O width -20 4 UINT minimal I/O size (sector size) -24 8 UINT type -2c 8 UINT generation -34 8 UINT start offset -3c 4 UINT dev group -40 1 UINT seek speed -41 1 UINT bandwidth -42 10 UUID device UUID -52 10 UUID FS UUID -62 -=== ==== ==== ============================== +===== ==== ==== ============================== +Off Size Type Description +===== ==== ==== ============================== +0x0 0x8 UINT device id +0x8 0x8 UINT number of bytes +0x10 0x8 UINT number of bytes used +0x18 0x4 UINT optimal I/O align +0x1c 0x4 UINT optimal I/O width +0x20 0x4 UINT minimal I/O size (sector size) +0x24 0x8 UINT type +0x2c 0x8 UINT generation +0x34 0x8 UINT start offset +0x3c 0x4 UINT dev group +0x40 0x1 UINT seek speed +0x41 0x1 UINT bandwidth +0x42 0x10 UUID device UUID +0x52 0x10 UUID FS UUID +0x62 +===== ==== ==== ============================== CHUNK_ITEM (e4) From 91a4311521df15d2054af417cb315aa4b070b2f2 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Thu, 15 Aug 2024 15:27:01 +0100 Subject: [PATCH 06/18] btrfs-progs: mkfs: add default flag to --subvol Change --subvol that it can accept flags, and add a "default" flag that allows you to mark a subvolume as the default. Signed-off-by: Mark Harmstone --- Documentation/mkfs.btrfs.rst | 6 ++- mkfs/main.c | 82 ++++++++++++++++++++++++++++++++++-- mkfs/rootdir.c | 53 +++++++++++++++++++++++ mkfs/rootdir.h | 1 + 4 files changed, 137 insertions(+), 5 deletions(-) diff --git a/Documentation/mkfs.btrfs.rst b/Documentation/mkfs.btrfs.rst index 0e9e84adff..629231a2c0 100644 --- a/Documentation/mkfs.btrfs.rst +++ b/Documentation/mkfs.btrfs.rst @@ -155,11 +155,15 @@ OPTIONS contain the files from *rootdir*. Since version 4.14.1 the filesystem size is not minimized. Please see option *--shrink* if you need that functionality. --u|--subvol +-u|--subvol : Specify that *subdir* is to be created as a subvolume rather than a regular directory. The option *--rootdir* must also be specified, and *subdir* must be an existing subdirectory within it. This option can be specified multiple times. + *flags* is an optional comma-separated list of modifiers. Valid choices are: + + * *default*: create as default subvolume (this can only be specified once) + --shrink Shrink the filesystem to its minimal size, only works with *--rootdir* option. diff --git a/mkfs/main.c b/mkfs/main.c index 88e0f8f84f..d26cd7d93c 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -440,7 +440,7 @@ static const char * const mkfs_usage[] = { "Creation:", OPTLINE("-b|--byte-count SIZE", "set size of each device to SIZE (filesystem size is sum of all device sizes)"), OPTLINE("-r|--rootdir DIR", "copy files from DIR to the image root directory"), - OPTLINE("-u|--subvol SUBDIR", "create SUBDIR as subvolume rather than normal directory, can be specified multiple times"), + OPTLINE("-u|--subvol SUBDIR:FLAGS", "create SUBDIR as subvolume rather than normal directory, can be specified multiple times"), OPTLINE("--shrink", "(with --rootdir) shrink the filled filesystem to minimal size"), OPTLINE("-K|--nodiscard", "do not perform whole device TRIM"), OPTLINE("-f|--force", "force overwrite of existing filesystem"), @@ -1015,6 +1015,46 @@ static void *prepare_one_device(void *ctx) return NULL; } +static int parse_subvol_flags(struct rootdir_subvol *subvol, const char *flags) +{ + char *buf, *orig_buf; + int ret; + + buf = orig_buf = strdup(flags); + + if (!buf) { + error_msg(ERROR_MSG_MEMORY, NULL); + ret = -ENOMEM; + goto out; + } + + while (true) { + char *comma = strstr(buf, ","); + + if (comma) + *comma = 0; + + if (!strcmp(buf, "default")) { + subvol->is_default = true; + } else if (buf[0] != 0) { + error("unrecognized subvol flag \"%s\"", buf); + ret = 1; + goto out; + } + + if (comma) + buf = comma + 1; + else + break; + } + + ret = 0; + +out: + free(orig_buf); + return ret; +} + int BOX_MAIN(mkfs)(int argc, char **argv) { char *file; @@ -1058,6 +1098,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv) char *source_dir = NULL; size_t source_dir_len = 0; struct rootdir_subvol *rds; + bool has_default_subvol = false; LIST_HEAD(subvols); cpu_detect_flags(); @@ -1215,16 +1256,49 @@ int BOX_MAIN(mkfs)(int argc, char **argv) break; case 'u': { struct rootdir_subvol *subvol; + char *colon; - subvol = malloc(sizeof(struct rootdir_subvol)); + subvol = calloc(1, sizeof(struct rootdir_subvol)); if (!subvol) { error_msg(ERROR_MSG_MEMORY, NULL); ret = 1; goto error; } - subvol->dir = strdup(optarg); - subvol->full_path = NULL; + colon = strstr(optarg, ":"); + + if (colon) { + /* Make sure we choose the last colon in + * optarg, in case the subvol name + * itself contains a colon. */ + do { + char *colon2; + + colon2 = strstr(colon + 1, ":"); + + if (colon2) + colon = colon2; + else + break; + } while (true); + + subvol->dir = strndup(optarg, colon - optarg); + if (parse_subvol_flags(subvol, colon + 1)) { + ret = 1; + goto error; + } + } else { + subvol->dir = strdup(optarg); + } + + if (subvol->is_default) { + if (has_default_subvol) { + error("subvol default flag can only be specified once"); + ret = 1; + goto error; + } + has_default_subvol = true; + } list_add_tail(&subvol->list, &subvols); break; diff --git a/mkfs/rootdir.c b/mkfs/rootdir.c index 3cc94316be..c31651d0cb 100644 --- a/mkfs/rootdir.c +++ b/mkfs/rootdir.c @@ -99,6 +99,7 @@ static u64 g_hardlink_count; static struct btrfs_trans_handle *g_trans = NULL; static struct list_head *g_subvols; static u64 next_subvol_id = BTRFS_FIRST_FREE_OBJECTID; +static u64 default_subvol_id; static inline struct inode_entry *rootdir_path_last(struct rootdir_path *path) { @@ -436,6 +437,9 @@ static int ftw_add_subvol(const char *full_path, const struct stat *st, return ret; } + if (subvol->is_default) + default_subvol_id = subvol_id; + key.objectid = subvol_id; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; @@ -701,6 +705,47 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, return 0; }; +static int set_default_subvolume(struct btrfs_trans_handle *trans) +{ + struct btrfs_path path = { 0 }; + struct btrfs_dir_item *di; + struct btrfs_key location; + struct extent_buffer *leaf; + struct btrfs_disk_key disk_key; + u64 features; + + di = btrfs_lookup_dir_item(trans, trans->fs_info->tree_root, &path, + btrfs_super_root_dir(trans->fs_info->super_copy), + "default", 7, 1); + if (IS_ERR_OR_NULL(di)) { + btrfs_release_path(&path); + + if (di) + return PTR_ERR(di); + else + return -ENOENT; + } + + leaf = path.nodes[0]; + + location.objectid = default_subvol_id; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = 0; + + btrfs_cpu_key_to_disk(&disk_key, &location); + btrfs_set_dir_item_key(leaf, di, &disk_key); + + btrfs_mark_buffer_dirty(leaf); + + btrfs_release_path(&path); + + features = btrfs_super_incompat_flags(trans->fs_info->super_copy); + features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; + btrfs_set_super_incompat_flags(trans->fs_info->super_copy, features); + + return 0; +} + int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir, struct btrfs_root *root, struct list_head *subvols) { @@ -732,6 +777,14 @@ int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir while (current_path.level > 0) rootdir_path_pop(¤t_path); + if (default_subvol_id != 0) { + ret = set_default_subvolume(trans); + if (ret < 0) { + error("error setting default subvolume: %d", ret); + return ret; + } + } + return 0; } diff --git a/mkfs/rootdir.h b/mkfs/rootdir.h index 128e9e0999..871889d92f 100644 --- a/mkfs/rootdir.h +++ b/mkfs/rootdir.h @@ -32,6 +32,7 @@ struct rootdir_subvol { struct list_head list; char *dir; char *full_path; + bool is_default; }; int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir, From 2c8dc943ec124f69546bd7cb5a715f696a38f005 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 27 Aug 2024 11:49:45 +0100 Subject: [PATCH 07/18] btrfs-progs: mkfs: add ro flag to --subvol Adds a flag to mkfs.btrfs --subvol to allow subvolumes to be created readonly. Signed-off-by: Mark Harmstone --- Documentation/mkfs.btrfs.rst | 1 + common/root-tree-utils.c | 10 +++++++++- common/root-tree-utils.h | 3 ++- convert/main.c | 5 +++-- mkfs/main.c | 5 ++++- mkfs/rootdir.c | 2 +- mkfs/rootdir.h | 1 + 7 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Documentation/mkfs.btrfs.rst b/Documentation/mkfs.btrfs.rst index 629231a2c0..a4817e77f1 100644 --- a/Documentation/mkfs.btrfs.rst +++ b/Documentation/mkfs.btrfs.rst @@ -163,6 +163,7 @@ OPTIONS *flags* is an optional comma-separated list of modifiers. Valid choices are: * *default*: create as default subvolume (this can only be specified once) + * *ro*: create as readonly subvolume --shrink Shrink the filesystem to its minimal size, only works with *--rootdir* option. diff --git a/common/root-tree-utils.c b/common/root-tree-utils.c index 09b9ada01d..a416fcebbe 100644 --- a/common/root-tree-utils.c +++ b/common/root-tree-utils.c @@ -66,7 +66,8 @@ int btrfs_make_root_dir(struct btrfs_trans_handle *trans, * The created tree root would have its root_ref as 1. * Thus for subvolumes caller needs to properly add ROOT_BACKREF items. */ -int btrfs_make_subvolume(struct btrfs_trans_handle *trans, u64 objectid) +int btrfs_make_subvolume(struct btrfs_trans_handle *trans, u64 objectid, + bool readonly) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root; @@ -98,6 +99,13 @@ int btrfs_make_subvolume(struct btrfs_trans_handle *trans, u64 objectid) ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID); if (ret < 0) goto error; + + btrfs_set_stack_inode_flags(&root->root_item.inode, + BTRFS_INODE_ROOT_ITEM_INIT); + + if (readonly) + btrfs_set_root_flags(&root->root_item, BTRFS_ROOT_SUBVOL_RDONLY); + ret = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); if (ret < 0) diff --git a/common/root-tree-utils.h b/common/root-tree-utils.h index 3cb508022e..c4964a3624 100644 --- a/common/root-tree-utils.h +++ b/common/root-tree-utils.h @@ -21,7 +21,8 @@ int btrfs_make_root_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid); -int btrfs_make_subvolume(struct btrfs_trans_handle *trans, u64 objectid); +int btrfs_make_subvolume(struct btrfs_trans_handle *trans, u64 objectid, + bool readonly); int btrfs_link_subvolume(struct btrfs_trans_handle *trans, struct btrfs_root *parent_root, u64 parent_dir, const char *name, diff --git a/convert/main.c b/convert/main.c index aa253781ee..1af47260cd 100644 --- a/convert/main.c +++ b/convert/main.c @@ -1022,13 +1022,14 @@ static int init_btrfs(struct btrfs_mkfs_config *cfg, struct btrfs_root *root, BTRFS_FIRST_FREE_OBJECTID); /* subvol for fs image file */ - ret = btrfs_make_subvolume(trans, CONV_IMAGE_SUBVOL_OBJECTID); + ret = btrfs_make_subvolume(trans, CONV_IMAGE_SUBVOL_OBJECTID, false); if (ret < 0) { error("failed to create subvolume image root: %d", ret); goto err; } /* subvol for data relocation tree */ - ret = btrfs_make_subvolume(trans, BTRFS_DATA_RELOC_TREE_OBJECTID); + ret = btrfs_make_subvolume(trans, BTRFS_DATA_RELOC_TREE_OBJECTID, + false); if (ret < 0) { error("failed to create DATA_RELOC root: %d", ret); goto err; diff --git a/mkfs/main.c b/mkfs/main.c index d26cd7d93c..45c25df339 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -1036,6 +1036,8 @@ static int parse_subvol_flags(struct rootdir_subvol *subvol, const char *flags) if (!strcmp(buf, "default")) { subvol->is_default = true; + } else if (!strcmp(buf, "ro")) { + subvol->readonly = true; } else if (buf[0] != 0) { error("unrecognized subvol flag \"%s\"", buf); ret = 1; @@ -1987,7 +1989,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv) goto out; } - ret = btrfs_make_subvolume(trans, BTRFS_DATA_RELOC_TREE_OBJECTID); + ret = btrfs_make_subvolume(trans, BTRFS_DATA_RELOC_TREE_OBJECTID, + false); if (ret) { error("unable to create data reloc tree: %d", ret); goto out; diff --git a/mkfs/rootdir.c b/mkfs/rootdir.c index c31651d0cb..5e80b871c1 100644 --- a/mkfs/rootdir.c +++ b/mkfs/rootdir.c @@ -430,7 +430,7 @@ static int ftw_add_subvol(const char *full_path, const struct stat *st, subvol_id = next_subvol_id++; - ret = btrfs_make_subvolume(g_trans, subvol_id); + ret = btrfs_make_subvolume(g_trans, subvol_id, subvol->readonly); if (ret < 0) { errno = -ret; error("failed to create subvolume: %m"); diff --git a/mkfs/rootdir.h b/mkfs/rootdir.h index 871889d92f..44817374ed 100644 --- a/mkfs/rootdir.h +++ b/mkfs/rootdir.h @@ -33,6 +33,7 @@ struct rootdir_subvol { char *dir; char *full_path; bool is_default; + bool readonly; }; int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir, From 50f408ddf55d44868bc297f7d557b3a3838136e3 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Fri, 28 Jun 2024 12:21:08 +0100 Subject: [PATCH 08/18] btrfs-progs: use libbtrfsutil for btrfs subvolume create Call btrfs_util_subvolume_create in create_one_subvolume rather than calling the ioctl directly. Signed-off-by: Mark Harmstone Co-authored-by: Omar Sandoval --- cmds/subvolume.c | 97 ++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 56 deletions(-) diff --git a/cmds/subvolume.c b/cmds/subvolume.c index 56108269a3..baf27e5b3b 100644 --- a/cmds/subvolume.c +++ b/cmds/subvolume.c @@ -46,6 +46,7 @@ #include "common/units.h" #include "common/format-output.h" #include "common/tree-search.h" +#include "common/parse-utils.h" #include "cmds/commands.h" #include "cmds/qgroup.h" @@ -140,28 +141,15 @@ static const char * const cmd_subvolume_create_usage[] = { NULL }; -static int create_one_subvolume(const char *dst, struct btrfs_qgroup_inherit *inherit, +static int create_one_subvolume(const char *dst, struct btrfs_util_qgroup_inherit *inherit, bool create_parents) { int ret; - int len; - int fddst = -1; char *dupname = NULL; char *dupdir = NULL; const char *newname; char *dstdir; - - ret = path_is_dir(dst); - if (ret < 0 && ret != -ENOENT) { - errno = -ret; - error("cannot access %s: %m", dst); - goto out; - } - if (ret >= 0) { - error("target path already exists: %s", dst); - ret = -EEXIST; - goto out; - } + enum btrfs_util_error err; dupname = strdup(dst); if (!dupname) { @@ -179,19 +167,6 @@ static int create_one_subvolume(const char *dst, struct btrfs_qgroup_inherit *in } dstdir = path_dirname(dupdir); - if (!test_issubvolname(newname)) { - error("invalid subvolume name: %s", newname); - ret = -EINVAL; - goto out; - } - - len = strlen(newname); - if (len > BTRFS_VOL_NAME_MAX) { - error("subvolume name too long: %s", newname); - ret = -EINVAL; - goto out; - } - if (create_parents) { char p[PATH_MAX] = { 0 }; char dstdir_dup[PATH_MAX]; @@ -223,47 +198,57 @@ static int create_one_subvolume(const char *dst, struct btrfs_qgroup_inherit *in } } - fddst = btrfs_open_dir(dstdir); - if (fddst < 0) { - ret = fddst; + err = btrfs_util_subvolume_create(dst, 0, NULL, inherit); + if (err) { + error_btrfs_util(err); + ret = -errno; goto out; } - if (inherit) { - struct btrfs_ioctl_vol_args_v2 args; + pr_verbose(LOG_DEFAULT, "Create subvolume '%s/%s'\n", dstdir, newname); - memset(&args, 0, sizeof(args)); - strncpy_null(args.name, newname, sizeof(args.name)); - args.flags |= BTRFS_SUBVOL_QGROUP_INHERIT; - args.size = btrfs_qgroup_inherit_size(inherit); - args.qgroup_inherit = inherit; + ret = 0; - ret = ioctl(fddst, BTRFS_IOC_SUBVOL_CREATE_V2, &args); - } else { - struct btrfs_ioctl_vol_args args; +out: + free(dupname); + free(dupdir); - memset(&args, 0, sizeof(args)); - strncpy_null(args.name, newname, sizeof(args.name)); - ret = ioctl(fddst, BTRFS_IOC_SUBVOL_CREATE, &args); + return ret; +} + +static int qgroup_inherit_add_group(struct btrfs_util_qgroup_inherit **inherit, + const char *arg) +{ + enum btrfs_util_error err; + u64 qgroupid; + + if (!*inherit) { + err = btrfs_util_qgroup_inherit_create(0, inherit); + if (err) { + error_btrfs_util(err); + return -errno; + } } - if (ret < 0) { - error("cannot create subvolume: %m"); - goto out; + qgroupid = parse_qgroupid_or_path(optarg); + if (qgroupid == 0) { + error("invalid qgroup specification, qgroupid must not be 0"); + return -EINVAL; } - pr_verbose(LOG_DEFAULT, "Create subvolume '%s/%s'\n", dstdir, newname); -out: - close(fddst); - free(dupname); - free(dupdir); + err = btrfs_util_qgroup_inherit_add_group(inherit, qgroupid); + if (err) { + error_btrfs_util(err); + return -errno; + } - return ret; + return 0; } + static int cmd_subvolume_create(const struct cmd_struct *cmd, int argc, char **argv) { int retval, ret; - struct btrfs_qgroup_inherit *inherit = NULL; + struct btrfs_util_qgroup_inherit *inherit = NULL; bool has_error = false; bool create_parents = false; @@ -281,7 +266,7 @@ static int cmd_subvolume_create(const struct cmd_struct *cmd, int argc, char **a switch (c) { case 'i': - ret = btrfs_qgroup_inherit_add_group(&inherit, optarg); + ret = qgroup_inherit_add_group(&inherit, optarg); if (ret) { retval = ret; goto out; @@ -310,7 +295,7 @@ static int cmd_subvolume_create(const struct cmd_struct *cmd, int argc, char **a if (!has_error) retval = 0; out: - free(inherit); + btrfs_util_qgroup_inherit_destroy(inherit); return retval; } From a9416ed72ae7171a2480101932ea721d3b8fcc36 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Fri, 28 Jun 2024 15:04:53 +0100 Subject: [PATCH 09/18] btrfs-progs: use libbtrfsutil for btrfs subvolume snapshot Call btrfs_util_subvolume_snapshot in cmd_subvolume_snapshot rather than calling the ioctl directly. Signed-off-by: Mark Harmstone Co-authored-by: Omar Sandoval --- cmds/subvolume.c | 92 ++++++++++++++++-------------------------------- 1 file changed, 31 insertions(+), 61 deletions(-) diff --git a/cmds/subvolume.c b/cmds/subvolume.c index baf27e5b3b..f34d940097 100644 --- a/cmds/subvolume.c +++ b/cmds/subvolume.c @@ -635,18 +635,11 @@ static int cmd_subvolume_snapshot(const struct cmd_struct *cmd, int argc, char * { char *subvol, *dst; int res, retval; - int fd = -1, fddst = -1; - int len; - bool readonly = false; - char *dupname = NULL; - char *dupdir = NULL; - const char *newname; - char *dstdir; + char *dstdir = NULL; enum btrfs_util_error err; - struct btrfs_ioctl_vol_args_v2 args; - struct btrfs_qgroup_inherit *inherit = NULL; + struct btrfs_util_qgroup_inherit *inherit = NULL; + int flags = 0; - memset(&args, 0, sizeof(args)); optind = 0; while (1) { int c = getopt(argc, argv, "i:r"); @@ -655,14 +648,14 @@ static int cmd_subvolume_snapshot(const struct cmd_struct *cmd, int argc, char * switch (c) { case 'i': - res = btrfs_qgroup_inherit_add_group(&inherit, optarg); + res = qgroup_inherit_add_group(&inherit, optarg); if (res) { retval = res; goto out; } break; case 'r': - readonly = true; + flags |= BTRFS_UTIL_CREATE_SNAPSHOT_READ_ONLY; break; default: usage_unknown_option(cmd, argv); @@ -696,72 +689,49 @@ static int cmd_subvolume_snapshot(const struct cmd_struct *cmd, int argc, char * } if (res > 0) { + char *dupname; + const char *newname; + dupname = strdup(subvol); newname = path_basename(dupname); - dstdir = dst; - } else { - dupname = strdup(dst); - newname = path_basename(dupname); - dupdir = strdup(dst); - dstdir = path_dirname(dupdir); - } - - if (!test_issubvolname(newname)) { - error("invalid snapshot name '%s'", newname); - goto out; - } - - len = strlen(newname); - if (len > BTRFS_VOL_NAME_MAX) { - error("snapshot name too long '%s'", newname); - goto out; - } - fddst = btrfs_open_dir(dstdir); - if (fddst < 0) - goto out; - - fd = btrfs_open_dir(subvol); - if (fd < 0) - goto out; + dstdir = malloc(strlen(dst) + 1 + strlen(newname) + 1); + if (!dstdir) { + error_msg(ERROR_MSG_MEMORY, NULL); + free(dupname); + goto out; + } - if (readonly) - args.flags |= BTRFS_SUBVOL_RDONLY; + dstdir[0] = 0; + strcpy(dstdir, dst); + strcat(dstdir, "/"); + strcat(dstdir, newname); - args.fd = fd; - if (inherit) { - args.flags |= BTRFS_SUBVOL_QGROUP_INHERIT; - args.size = btrfs_qgroup_inherit_size(inherit); - args.qgroup_inherit = inherit; + free(dupname); + } else { + dstdir = strdup(dst); } - strncpy_null(args.name, newname, sizeof(args.name)); - res = ioctl(fddst, BTRFS_IOC_SNAP_CREATE_V2, &args); - if (res < 0) { - if (errno == ETXTBSY) - error("cannot snapshot '%s': source subvolume contains an active swapfile (%m)", subvol); - else - error("cannot snapshot '%s': %m", subvol); + err = btrfs_util_subvolume_snapshot(subvol, dstdir, flags, NULL, inherit); + if (err) { + error_btrfs_util(err); goto out; } retval = 0; /* success */ - if (readonly) + if (flags & BTRFS_UTIL_CREATE_SNAPSHOT_READ_ONLY) pr_verbose(LOG_DEFAULT, - "Create readonly snapshot of '%s' in '%s/%s'\n", - subvol, dstdir, newname); + "Create readonly snapshot of '%s' in '%s'\n", + subvol, dstdir); else pr_verbose(LOG_DEFAULT, - "Create snapshot of '%s' in '%s/%s'\n", - subvol, dstdir, newname); + "Create snapshot of '%s' in '%s'\n", + subvol, dstdir); out: - close(fddst); - close(fd); - free(inherit); - free(dupname); - free(dupdir); + free(dstdir); + btrfs_util_qgroup_inherit_destroy(inherit); return retval; } From 261f25baa069c2d0b99a5172a4b8361c650896c3 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Fri, 28 Jun 2024 15:07:11 +0100 Subject: [PATCH 10/18] btrfs-progs: remove unused qgroup functions Remove functions that after the previous two patches are no longer referenced. Signed-off-by: Mark Harmstone Co-authored-by: Omar Sandoval --- cmds/qgroup.c | 64 --------------------------------------------------- cmds/qgroup.h | 2 -- 2 files changed, 66 deletions(-) diff --git a/cmds/qgroup.c b/cmds/qgroup.c index 20b97f7ae5..5705286120 100644 --- a/cmds/qgroup.c +++ b/cmds/qgroup.c @@ -1688,70 +1688,6 @@ static int qgroup_parse_sort_string(const char *opt_arg, return ret; } -int btrfs_qgroup_inherit_size(struct btrfs_qgroup_inherit *p) -{ - return sizeof(*p) + sizeof(p->qgroups[0]) * - (p->num_qgroups + 2 * p->num_ref_copies + - 2 * p->num_excl_copies); -} - -static int qgroup_inherit_realloc(struct btrfs_qgroup_inherit **inherit, int n, - int pos) -{ - struct btrfs_qgroup_inherit *out; - int nitems = 0; - - if (*inherit) { - nitems = (*inherit)->num_qgroups + - (*inherit)->num_ref_copies + - (*inherit)->num_excl_copies; - } - - out = calloc(1, sizeof(*out) + sizeof(out->qgroups[0]) * (nitems + n)); - if (out == NULL) { - error_msg(ERROR_MSG_MEMORY, NULL); - return -ENOMEM; - } - - if (*inherit) { - struct btrfs_qgroup_inherit *i = *inherit; - int s = sizeof(out->qgroups[0]); - - out->num_qgroups = i->num_qgroups; - out->num_ref_copies = i->num_ref_copies; - out->num_excl_copies = i->num_excl_copies; - memcpy(out->qgroups, i->qgroups, pos * s); - memcpy(out->qgroups + pos + n, i->qgroups + pos, - (nitems - pos) * s); - } - free(*inherit); - *inherit = out; - - return 0; -} - -int btrfs_qgroup_inherit_add_group(struct btrfs_qgroup_inherit **inherit, char *arg) -{ - int ret; - u64 qgroupid = parse_qgroupid_or_path(arg); - int pos = 0; - - if (qgroupid == 0) { - error("invalid qgroup specification, qgroupid must not 0"); - return -EINVAL; - } - - if (*inherit) - pos = (*inherit)->num_qgroups; - ret = qgroup_inherit_realloc(inherit, 1, pos); - if (ret) - return ret; - - (*inherit)->qgroups[(*inherit)->num_qgroups++] = qgroupid; - - return 0; -} - static const char * const qgroup_cmd_group_usage[] = { "btrfs qgroup [options] ", NULL diff --git a/cmds/qgroup.h b/cmds/qgroup.h index 1fc107221c..32309ce4ae 100644 --- a/cmds/qgroup.h +++ b/cmds/qgroup.h @@ -36,8 +36,6 @@ struct btrfs_qgroup_stats { struct btrfs_qgroup_limit limit; }; -int btrfs_qgroup_inherit_size(struct btrfs_qgroup_inherit *p); -int btrfs_qgroup_inherit_add_group(struct btrfs_qgroup_inherit **inherit, char *arg); int btrfs_qgroup_query(int fd, u64 qgroupid, struct btrfs_qgroup_stats *stats); #endif From f396c03456dc73381a8e1d7e98e4cc3c6430ccfb Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 3 Jul 2024 16:28:28 +0100 Subject: [PATCH 11/18] btrfs-progs: add rudimentary log checking Currently the transaction log is more or less ignored by btrfs check, meaning that it's possible for a FS with a corrupt log to pass btrfs check, but be immediately corrupted by the kernel when it's mounted. This patch adds a check that if there's an inode in the log, any pending non-inlined csumed writes also have corresponding csum entries. Signed-off-by: Mark Harmstone [ Small commit message update. ] Signed-off-by: Qu Wenruo --- check/main.c | 296 +++++++++++++++++- .../063-log-missing-csum/default.img.xz | Bin 0 -> 1288 bytes tests/fsck-tests/063-log-missing-csum/test.sh | 14 + 3 files changed, 298 insertions(+), 12 deletions(-) create mode 100644 tests/fsck-tests/063-log-missing-csum/default.img.xz create mode 100755 tests/fsck-tests/063-log-missing-csum/test.sh diff --git a/check/main.c b/check/main.c index 205bbb4a3c..599f22ec36 100644 --- a/check/main.c +++ b/check/main.c @@ -9670,6 +9670,266 @@ static int zero_log_tree(struct btrfs_root *root) return ret; } +static int check_log_csum(struct btrfs_root *root, u64 addr, u64 length) +{ + struct btrfs_path path = { 0 }; + struct btrfs_key key; + struct extent_buffer *leaf; + u16 csum_size = gfs_info->csum_size; + u16 num_entries; + u64 data_len; + int ret; + + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.type = BTRFS_EXTENT_CSUM_KEY; + key.offset = addr; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return ret; + + if (ret > 0 && path.slots[0]) + path.slots[0]--; + + ret = 0; + + while (1) { + leaf = path.nodes[0]; + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, &path); + if (ret) { + if (ret > 0) + ret = 0; + + break; + } + leaf = path.nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + + if (key.objectid > BTRFS_EXTENT_CSUM_OBJECTID) + break; + + if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + key.type != BTRFS_EXTENT_CSUM_KEY) + goto next; + + if (key.offset >= addr + length) + break; + + num_entries = btrfs_item_size(leaf, path.slots[0]) / csum_size; + data_len = num_entries * gfs_info->sectorsize; + + if (addr >= key.offset && addr <= key.offset + data_len) { + u64 end = min(addr + length, key.offset + data_len); + + length = addr + length - end; + addr = end; + + if (length == 0) + break; + } + +next: + path.slots[0]++; + } + + btrfs_release_path(&path); + + if (ret >= 0) + ret = length == 0 ? 0 : 1; + + return ret; +} + +static int check_log_root(struct btrfs_root *root, struct cache_tree *root_cache) +{ + struct btrfs_path path = { 0 }; + struct btrfs_key key; + struct extent_buffer *leaf; + int ret, err = 0; + u64 last_csum_inode = 0; + + key.objectid = BTRFS_FIRST_FREE_OBJECTID; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return 1; + + while (1) { + leaf = path.nodes[0]; + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, &path); + if (ret) { + if (ret < 0) + err = 1; + + break; + } + leaf = path.nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + + if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID) + break; + + if (key.type == BTRFS_INODE_ITEM_KEY) { + struct btrfs_inode_item *item; + + item = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_inode_item); + + if (!(btrfs_inode_flags(leaf, item) & BTRFS_INODE_NODATASUM)) + last_csum_inode = key.objectid; + } else if (key.type == BTRFS_EXTENT_DATA_KEY && + key.objectid == last_csum_inode) { + struct btrfs_file_extent_item *fi; + u64 addr, length; + + fi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) + goto next; + + addr = btrfs_file_extent_disk_bytenr(leaf, fi) + + btrfs_file_extent_offset(leaf, fi); + length = btrfs_file_extent_num_bytes(leaf, fi); + + ret = check_log_csum(root, addr, length); + if (ret < 0) { + err = 1; + break; + } + + if (ret) { + error("csum missing in log (root %llu, inode %llu, " + "offset %llu, address 0x%llx, length %llu)", + root->objectid, last_csum_inode, key.offset, + addr, length); + err = 1; + } + } + +next: + path.slots[0]++; + } + + btrfs_release_path(&path); + + return err; +} + +static int load_log_root(u64 root_id, struct btrfs_path *path, + struct btrfs_root *tmp_root) +{ + struct extent_buffer *l; + struct btrfs_tree_parent_check check = { 0 }; + int ret; + + btrfs_setup_root(tmp_root, gfs_info, root_id); + + l = path->nodes[0]; + read_extent_buffer(l, &tmp_root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), + sizeof(tmp_root->root_item)); + + tmp_root->root_key.objectid = root_id; + tmp_root->root_key.type = BTRFS_ROOT_ITEM_KEY; + tmp_root->root_key.offset = 0; + + check.owner_root = btrfs_root_id(tmp_root); + check.transid = btrfs_root_generation(&tmp_root->root_item); + check.level = btrfs_root_level(&tmp_root->root_item); + + tmp_root->node = read_tree_block(gfs_info, + btrfs_root_bytenr(&tmp_root->root_item), + &check); + if (IS_ERR(tmp_root->node)) { + ret = PTR_ERR(tmp_root->node); + tmp_root->node = NULL; + return ret; + } + + if (btrfs_header_level(tmp_root->node) != btrfs_root_level(&tmp_root->root_item)) { + error("root [%llu %llu] level %d does not match %d", + tmp_root->root_key.objectid, + tmp_root->root_key.offset, + btrfs_header_level(tmp_root->node), + btrfs_root_level(&tmp_root->root_item)); + return -EIO; + } + + return 0; +} + +static int check_log(struct cache_tree *root_cache) +{ + struct btrfs_path path = { 0 }; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_root *log_root = gfs_info->log_root_tree; + int ret; + int err = 0; + + key.objectid = BTRFS_TREE_LOG_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, log_root, &key, &path, 0, 0); + if (ret < 0) { + err = 1; + goto out; + } + + while (1) { + leaf = path.nodes[0]; + if (path.slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(log_root, &path); + if (ret) { + if (ret < 0) + err = 1; + break; + } + leaf = path.nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + + if (key.objectid > BTRFS_TREE_LOG_OBJECTID || + key.type > BTRFS_ROOT_ITEM_KEY) + break; + + if (key.objectid == BTRFS_TREE_LOG_OBJECTID && + key.type == BTRFS_ROOT_ITEM_KEY && + fs_root_objectid(key.offset)) { + struct btrfs_root tmp_root; + + memset(&tmp_root, 0, sizeof(tmp_root)); + + ret = load_log_root(key.offset, &path, &tmp_root); + if (ret) { + err = 1; + goto next; + } + + ret = check_log_root(&tmp_root, root_cache); + if (ret) + err = 1; + +next: + if (tmp_root.node) + free_extent_buffer(tmp_root.node); + } + + path.slots[0]++; + } +out: + btrfs_release_path(&path); + + return err; +} + static void free_roots_info_cache(void) { if (!roots_info_cache) @@ -10468,9 +10728,21 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) goto close_out; } + if (gfs_info->log_root_tree) { + fprintf(stderr, "[1/8] checking log\n"); + ret = check_log(&root_cache); + + if (ret) + error("errors found in log"); + err |= !!ret; + } else { + fprintf(stderr, + "[1/8] checking log skipped (none written)\n"); + } + if (!init_extent_tree) { if (!g_task_ctx.progress_enabled) { - fprintf(stderr, "[1/7] checking root items\n"); + fprintf(stderr, "[2/8] checking root items\n"); } else { g_task_ctx.tp = TASK_ROOT_ITEMS; task_start(g_task_ctx.info, &g_task_ctx.start_time, @@ -10505,11 +10777,11 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) } } } else { - fprintf(stderr, "[1/7] checking root items... skipped\n"); + fprintf(stderr, "[2/8] checking root items... skipped\n"); } if (!g_task_ctx.progress_enabled) { - fprintf(stderr, "[2/7] checking extents\n"); + fprintf(stderr, "[3/8] checking extents\n"); } else { g_task_ctx.tp = TASK_EXTENTS; task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count); @@ -10527,9 +10799,9 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) if (!g_task_ctx.progress_enabled) { if (is_free_space_tree) - fprintf(stderr, "[3/7] checking free space tree\n"); + fprintf(stderr, "[4/8] checking free space tree\n"); else - fprintf(stderr, "[3/7] checking free space cache\n"); + fprintf(stderr, "[4/8] checking free space cache\n"); } else { g_task_ctx.tp = TASK_FREE_SPACE; task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count); @@ -10547,7 +10819,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) */ no_holes = btrfs_fs_incompat(gfs_info, NO_HOLES); if (!g_task_ctx.progress_enabled) { - fprintf(stderr, "[4/7] checking fs roots\n"); + fprintf(stderr, "[5/8] checking fs roots\n"); } else { g_task_ctx.tp = TASK_FS_ROOTS; task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count); @@ -10563,10 +10835,10 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) if (!g_task_ctx.progress_enabled) { if (check_data_csum) - fprintf(stderr, "[5/7] checking csums against data\n"); + fprintf(stderr, "[6/8] checking csums against data\n"); else fprintf(stderr, - "[5/7] checking only csums items (without verifying data)\n"); + "[6/8] checking only csums items (without verifying data)\n"); } else { g_task_ctx.tp = TASK_CSUMS; task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count); @@ -10585,7 +10857,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) /* For low memory mode, check_fs_roots_v2 handles root refs */ if (check_mode != CHECK_MODE_LOWMEM) { if (!g_task_ctx.progress_enabled) { - fprintf(stderr, "[6/7] checking root refs\n"); + fprintf(stderr, "[7/8] checking root refs\n"); } else { g_task_ctx.tp = TASK_ROOT_REFS; task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count); @@ -10600,7 +10872,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) } } else { fprintf(stderr, - "[6/7] checking root refs done with fs roots in lowmem mode, skipping\n"); + "[7/8] checking root refs done with fs roots in lowmem mode, skipping\n"); } while (opt_check_repair && !list_empty(&gfs_info->recow_ebs)) { @@ -10632,7 +10904,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) if (gfs_info->quota_enabled) { if (!g_task_ctx.progress_enabled) { - fprintf(stderr, "[7/7] checking quota groups\n"); + fprintf(stderr, "[8/8] checking quota groups\n"); } else { g_task_ctx.tp = TASK_QGROUPS; task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count); @@ -10655,7 +10927,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv) ret = 0; } else { fprintf(stderr, - "[7/7] checking quota groups skipped (not enabled on this FS)\n"); + "[8/8] checking quota groups skipped (not enabled on this FS)\n"); } if (!list_empty(&gfs_info->recow_ebs)) { diff --git a/tests/fsck-tests/063-log-missing-csum/default.img.xz b/tests/fsck-tests/063-log-missing-csum/default.img.xz new file mode 100644 index 0000000000000000000000000000000000000000..c9b4f420ac23866cd142428daf21739efda0762d GIT binary patch literal 1288 zcmV+j1^4>>H+ooF000E$*0e?f03iVu0001VFXf}+)Bgm;T>wRyj;C3^v%$$4d1wo3 zjjaF1$8Jv*pMMm%#Ch6IM%}7&=@^TvKIIdYjZ@t7T($M|Hz%Cr>ZwJ6sj_bRsY^a* zIr#q#U>$ptr zW^k~hR~HJs$P?zuw_V+%>i2AU4x-C~^RmH%0#2o7VY;D>d@D<+CC=J4l?Bs2d@5yC z&pKh_V}nG$A*f{hGHlfKeT*E3NA1Q(Nt;TxvV6m2A_RVpD=`*t<8b6_KTom1S=|eG z>OMhPCcl7*3tYx)YhtFm-3)~e_SPBasmqw|$jnE_-QZfatuNh^bkJ5yW{ZX7NmD)i zLKCQYs5y!To5G>tIYzSNigXu|wY= z*;zh8-nLPC+GYq6HU^AQ7;56uclyq=uCMb1GmZ zuq5@iwv1^??e4XDB-M)l0stJd3pfs;T1IYy#{A9ljl@Ba_*dh)Qfq@8Y1A#q%p|d;USrIAZ*n1`%ho*!-RYOY z4j%X?D?CoiaW8A*7aue82GA9jmMGg6Bv&mO#v6uKVw1V~Uu01wXbz(RJN_|xw{`8K zSfdyQCJOdIwdN%h+Jr{$9!6otgQ8^`lkU4u;5wJlsy#P-IWOj%>%mQ@Ja!#!QIOVuluA}ecSFg;*v@1-?kl9WGes{ulrRWq zTCW@Om{iybhK+m8SiS%n^)Kgk21T`IEW#){Czvz;Fix;5bdkB$BFXimx9iBaRZ9r= zV(H0OD8mK94(j^6gF8)|^i-78H`hR#ebBvFq}>vK6-ni7j1S>*viG^nA7<~Bn2KW0 zw%D36qxTcpO;c2bYwf)K?p#^LFL4Ifq8?kP?#;(Jo8*(twFPRBj*u~h%Ej$X6JI2E z Date: Thu, 15 Aug 2024 13:39:28 +0930 Subject: [PATCH 12/18] btrfs-progs: mkfs/rootdir: add hard link support The new hard link detection and creation support is done by maintaining an rb tree with the following members: - st_ino, st_dev This is to record the stat() report from the host fs. With this two, we can detect if it's really a hard link (st_dev determines one filesystem/subvolume, and st_ino determines the inode number inside the fs). - root This is btrfs root pointer. This a special requirement for the recent introduced "--subvol" option. As we can have the following corner case: rootdir/ |- foobar_hardlink1 |- foobar_hardlink2 |- subv/ <- To be a subvolume inside btrfs |- foobar_hardlink3 In above case, on the host fs, `subv/` directory is just a regular directory, but in the new btrfs it will be a subvolume. In that case, `foobar_hardlink3` cannot be created as a hard link, but a new inode. - st_nlink and found_nlink Records the original reported number of links, and the nlinks we created inside btrfs. This is recorded in case we created all hard links and can remove the entry early. - btrfs_ino This is the inode number inside btrfs. And since we can handle hard links safely, remove all the related warnings, and add a new note for `--subvol` option, warning about the case where we need to split hard links due to subvolume boundary. Signed-off-by: Qu Wenruo --- Documentation/mkfs.btrfs.rst | 13 +++ mkfs/rootdir.c | 202 +++++++++++++++++++++++++++++------ 2 files changed, 185 insertions(+), 30 deletions(-) diff --git a/Documentation/mkfs.btrfs.rst b/Documentation/mkfs.btrfs.rst index a4817e77f1..a6251afd2d 100644 --- a/Documentation/mkfs.btrfs.rst +++ b/Documentation/mkfs.btrfs.rst @@ -165,6 +165,19 @@ OPTIONS * *default*: create as default subvolume (this can only be specified once) * *ro*: create as readonly subvolume + If there are hard links inside *rootdir* and *subdir* will split the + subvolumes, like the following case:: + + rootdir/ + |- hardlink1 + |- hardlink2 + |- subdir/ <- will be a subvolume + |- hardlink3 + + In that case we cannot create `hardlink3` as hardlinks of + `hardlink1` and `hardlink2` because hardlink3 will be inside a new + subvolume. + --shrink Shrink the filesystem to its minimal size, only works with *--rootdir* option. diff --git a/mkfs/rootdir.c b/mkfs/rootdir.c index 5e80b871c1..0a2154b68a 100644 --- a/mkfs/rootdir.c +++ b/mkfs/rootdir.c @@ -42,6 +42,7 @@ #include "common/extent-tree-utils.h" #include "common/root-tree-utils.h" #include "common/path-utils.h" +#include "common/rbtree-utils.h" #include "mkfs/rootdir.h" static u32 fs_block_size; @@ -74,6 +75,52 @@ struct inode_entry { struct list_head list; }; +/* + * Record all the hard links we found for a specific file inside + * rootdir. + * + * The search is based on (root, st_dev, st_ino). + * The reason for @root as a search index is, for hard links separated by + * subvolume boundaries: + * + * rootdir/ + * |- foobar_hardlink1 + * |- foobar_hardlink2 + * |- subv/ <- Will be created as a subvolume + * |- foobar_hardlink3. + * + * Since all the 3 hard links are inside the same rootdir and the same + * filesystem, on the host fs they are all hard links to the same inode. + * + * But for the btrfs we are building, only hardlink1 and hardlink2 can be + * created as hardlinks. Since we cannot create hardlink across subvolume. + * So we need @root as a search index to handle such case. + */ +struct hardlink_entry { + struct rb_node node; + /* + * The following three members are reported from the stat() of the + * host filesystem. + * + * For st_nlink we cannot trust it unconditionally, as + * some hard links may be out of rootdir. + * If @found_nlink reached @st_nlink, we know we have created all + * the hard links and can remove the entry. + */ + dev_t st_dev; + ino_t st_ino; + nlink_t st_nlink; + + /* The following two are inside the new btrfs. */ + struct btrfs_root *root; + u64 btrfs_ino; + + /* How many hard links we have created. */ + nlink_t found_nlink; +}; + +static struct rb_root hardlink_root = RB_ROOT; + /* * The path towards the rootdir. * @@ -93,9 +140,6 @@ static struct rootdir_path current_path = { .level = 0, }; -/* Track if a hardlink was found and a warning was printed. */ -static bool g_hardlink_warning; -static u64 g_hardlink_count; static struct btrfs_trans_handle *g_trans = NULL; static struct list_head *g_subvols; static u64 next_subvol_id = BTRFS_FIRST_FREE_OBJECTID; @@ -134,6 +178,82 @@ static int rootdir_path_push(struct rootdir_path *path, struct btrfs_root *root, return 0; } +static int hardlink_compare_nodes(const struct rb_node *node1, + const struct rb_node *node2) +{ + const struct hardlink_entry *entry1; + const struct hardlink_entry *entry2; + + entry1 = rb_entry(node1, struct hardlink_entry, node); + entry2 = rb_entry(node2, struct hardlink_entry, node); + UASSERT(entry1->root); + UASSERT(entry2->root); + + if (entry1->st_dev < entry2->st_dev) + return -1; + if (entry1->st_dev > entry2->st_dev) + return 1; + if (entry1->st_ino < entry2->st_ino) + return -1; + if (entry1->st_ino > entry2->st_ino) + return 1; + if (entry1->root < entry2->root) + return -1; + if (entry1->root > entry2->root) + return 1; + return 0; +} + +static struct hardlink_entry *find_hard_link(struct btrfs_root *root, + const struct stat *st) +{ + struct rb_node *node; + const struct hardlink_entry tmp = { + .st_dev = st->st_dev, + .st_ino = st->st_ino, + .root = root, + }; + + node = rb_search(&hardlink_root, &tmp, + (rb_compare_keys)hardlink_compare_nodes, NULL); + if (node) + return rb_entry(node, struct hardlink_entry, node); + return NULL; +} + +static int add_hard_link(struct btrfs_root *root, u64 btrfs_ino, + const struct stat *st) +{ + struct hardlink_entry *new; + int ret; + + UASSERT(st->st_nlink > 1); + + new = calloc(1, sizeof(*new)); + if (!new) + return -ENOMEM; + + new->root = root; + new->btrfs_ino = btrfs_ino; + new->found_nlink = 1; + new->st_dev = st->st_dev; + new->st_ino = st->st_ino; + new->st_nlink = st->st_nlink; + ret = rb_insert(&hardlink_root, &new->node, hardlink_compare_nodes); + if (ret) { + free(new); + return -EEXIST; + } + return 0; +} + +static void free_one_hardlink(struct rb_node *node) +{ + struct hardlink_entry *entry = rb_entry(node, struct hardlink_entry, node); + + free(entry); +} + static void stat_to_inode_item(struct btrfs_inode_item *dst, const struct stat *st) { /* @@ -502,29 +622,10 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, struct btrfs_inode_item inode_item = { 0 }; struct inode_entry *parent; struct rootdir_subvol *rds; + const bool have_hard_links = (!S_ISDIR(st->st_mode) && st->st_nlink > 1); u64 ino; int ret; - /* - * Hard link needs extra detection code, not supported for now, but - * it's not to break anything but splitting the hard links into new - * inodes. And we do not even know if the hard links are inside the - * rootdir. - * - * So here we only need to do extra warning. - * - * On most filesystems st_nlink of a directory is the number of - * subdirs, including "." and "..", so skip directory inodes. - */ - if (unlikely(!S_ISDIR(st->st_mode) && st->st_nlink > 1)) { - if (!g_hardlink_warning) { - warning("'%s' has extra hardlinks, they will be converted into new inodes", - full_path); - g_hardlink_warning = true; - } - g_hardlink_count++; - } - /* The rootdir itself. */ if (unlikely(ftwbuf->level == 0)) { u64 root_ino; @@ -624,6 +725,37 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, parent = rootdir_path_last(¤t_path); root = parent->root; + /* For non-directory inode, check if there is already any hard link. */ + if (have_hard_links) { + struct hardlink_entry *found; + + found = find_hard_link(root, st); + /* + * Can only add the hard link if it doesn't cross subvolume + * boundary. + */ + if (found && found->root == root) { + ret = btrfs_add_link(g_trans, root, found->btrfs_ino, + parent->ino, full_path + ftwbuf->base, + strlen(full_path) - ftwbuf->base, + ftype_to_btrfs_type(st->st_mode), + NULL, 1, 0); + if (ret < 0) { + errno = -ret; + error( + "failed to add link for hard link ('%s'): %m", full_path); + return ret; + } + found->found_nlink++; + /* We found all hard links for it. Can remove the entry. */ + if (found->found_nlink >= found->st_nlink) { + rb_erase(&found->node, &hardlink_root); + free(found); + } + return 0; + } + } + ret = btrfs_find_free_objectid(g_trans, root, BTRFS_FIRST_FREE_OBJECTID, &ino); if (ret < 0) { @@ -639,7 +771,6 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, error("failed to insert inode item %llu for '%s': %m", ino, full_path); return ret; } - ret = btrfs_add_link(g_trans, root, ino, parent->ino, full_path + ftwbuf->base, strlen(full_path) - ftwbuf->base, @@ -650,6 +781,22 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, error("failed to add link for inode %llu ('%s'): %m", ino, full_path); return ret; } + + /* + * Found a possible hard link, add it into the hard link rb tree for + * future detection. + */ + if (have_hard_links) { + ret = add_hard_link(root, ino, st); + if (ret < 0) { + errno = -ret; + error("failed to add hard link record for '%s': %m", + full_path); + return ret; + } + ret = 0; + } + /* * btrfs_add_link() has increased the nlink to 1 in the metadata. * Also update the value in case we need to update the inode item @@ -759,8 +906,6 @@ int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir } g_trans = trans; - g_hardlink_warning = false; - g_hardlink_count = 0; g_subvols = subvols; INIT_LIST_HEAD(¤t_path.inode_list); @@ -770,10 +915,6 @@ int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir return ret; } - if (g_hardlink_warning) - warning("%llu hardlinks were detected in %s, all converted to new inodes", - g_hardlink_count, source_dir); - while (current_path.level > 0) rootdir_path_pop(¤t_path); @@ -785,6 +926,7 @@ int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir } } + rb_free_nodes(&hardlink_root, free_one_hardlink); return 0; } From c72719b6bed42a171c46c037d0edfc6a9c51a5a5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 15 Aug 2024 13:57:12 +0930 Subject: [PATCH 13/18] btrfs-progs: mkfs-tests: add hardlink related tests for --subvol This introduces two new cases: - 3 hardlinks without any subvolume This should results 3 hard links inside the btrfs. - 3 hardlinks, but a subvolume will split 2 of them Then the 2 inside the same subvolume should still report 2 nlinks, but the lone one inside the new subvolume can only report 1 nlink. Signed-off-by: Qu Wenruo --- mkfs/rootdir.c | 8 +-- tests/mkfs-tests/036-rootdir-subvol/test.sh | 78 +++++++++++++++++---- 2 files changed, 68 insertions(+), 18 deletions(-) diff --git a/mkfs/rootdir.c b/mkfs/rootdir.c index 0a2154b68a..70cf0f84de 100644 --- a/mkfs/rootdir.c +++ b/mkfs/rootdir.c @@ -725,7 +725,7 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, parent = rootdir_path_last(¤t_path); root = parent->root; - /* For non-directory inode, check if there is already any hard link. */ + /* Check if there is already a hard link record for this. */ if (have_hard_links) { struct hardlink_entry *found; @@ -771,6 +771,7 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, error("failed to insert inode item %llu for '%s': %m", ino, full_path); return ret; } + ret = btrfs_add_link(g_trans, root, ino, parent->ino, full_path + ftwbuf->base, strlen(full_path) - ftwbuf->base, @@ -782,10 +783,7 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, return ret; } - /* - * Found a possible hard link, add it into the hard link rb tree for - * future detection. - */ + /* Record this new hard link. */ if (have_hard_links) { ret = add_hard_link(root, ino, st); if (ret < 0) { diff --git a/tests/mkfs-tests/036-rootdir-subvol/test.sh b/tests/mkfs-tests/036-rootdir-subvol/test.sh index 63ba928f34..e4ae604ed0 100755 --- a/tests/mkfs-tests/036-rootdir-subvol/test.sh +++ b/tests/mkfs-tests/036-rootdir-subvol/test.sh @@ -11,23 +11,75 @@ prepare_test_dev tmp=$(_mktemp_dir mkfs-rootdir) -run_check touch "$tmp/foo" -run_check mkdir "$tmp/dir" -run_check mkdir "$tmp/dir/subvol" -run_check touch "$tmp/dir/subvol/bar" +basic() +{ + run_check touch "$tmp/foo" + run_check mkdir "$tmp/dir" + run_check mkdir "$tmp/dir/subvol" + run_check touch "$tmp/dir/subvol/bar" -run_check_mkfs_test_dev --rootdir "$tmp" --subvol dir/subvol -run_check $SUDO_HELPER "$TOP/btrfs" check "$TEST_DEV" + run_check_mkfs_test_dev --rootdir "$tmp" --subvol dir/subvol + run_check $SUDO_HELPER "$TOP/btrfs" check "$TEST_DEV" -run_check_mount_test_dev -run_check_stdout $SUDO_HELPER "$TOP/btrfs" subvolume list "$TEST_MNT" | \ + run_check_mount_test_dev + run_check_stdout $SUDO_HELPER "$TOP/btrfs" subvolume list "$TEST_MNT" | \ cut -d\ -f9 > "$tmp/output" -run_check_umount_test_dev + run_check_umount_test_dev -result=$(cat "$tmp/output") + result=$(cat "$tmp/output") -if [ "$result" != "dir/subvol" ]; then - _fail "dir/subvol not in subvolume list" -fi + if [ "$result" != "dir/subvol" ]; then + _fail "dir/subvol not in subvolume list" + fi + rm -rf -- "$tmp/foo" "$tmp/dir" +} +basic_hardlinks() +{ + run_check touch "$tmp/hl1" + run_check ln "$tmp/hl1" "$tmp/hl2" + run_check mkdir "$tmp/dir" + run_check ln "$tmp/hl1" "$tmp/dir/hl3" + + run_check_mkfs_test_dev --rootdir "$tmp" + run_check $SUDO_HELPER "$TOP/btrfs" check "$TEST_DEV" + + run_check_mount_test_dev + nr_hardlink=$(run_check_stdout $SUDO_HELPER stat -c "%h" "$TEST_MNT/hl1") + + if [ "$nr_hardlink" -ne 3 ]; then + _fail "hard link number incorrect, has ${nr_hardlink} expect 3" + fi + run_check_umount_test_dev + rm -rf -- "$tmp/hl1" "$tmp/hl2" "$tmp/dir" +} + +split_by_subvolume_hardlinks() +{ + run_check touch "$tmp/hl1" + run_check ln "$tmp/hl1" "$tmp/hl2" + run_check mkdir "$tmp/subv" + run_check ln "$tmp/hl1" "$tmp/subv/hl3" + + run_check_mkfs_test_dev --rootdir "$tmp" --subvol subv + run_check $SUDO_HELPER "$TOP/btrfs" check "$TEST_DEV" + + run_check_mount_test_dev + nr_hardlink=$(run_check_stdout $SUDO_HELPER stat -c "%h" "$TEST_MNT/hl1") + + if [ "$nr_hardlink" -ne 2 ]; then + _fail "hard link number incorrect for hl1, has ${nr_hardlink} expect 2" + fi + + nr_hardlink=$(run_check_stdout $SUDO_HELPER stat -c "%h" "$TEST_MNT/subv/hl3") + if [ "$nr_hardlink" -ne 1 ]; then + _fail "hard link number incorrect for subv/hl3, has ${nr_hardlink} expect 1" + fi + run_check_umount_test_dev + rm -rf -- "$tmp/hl1" "$tmp/hl2" "$tmp/dir" +} + +basic +basic_hardlinks +split_by_subvolume_hardlinks rm -rf -- "$tmp" From 184eddc4f7bcfe8e72b15e231556aadf69e92088 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Sep 2024 11:29:34 +0930 Subject: [PATCH 14/18] btrfs-progs: convert: fix inline extent size for symbol link [BUG] Sometimes test case btrfs/012 fails randomly, with the failure to read a softlink: QA output created by 012 Checking converted btrfs against the original one: -OK +readlink: Structure needs cleaning Checking saved ext2 image against the original one: OK Furthermore, this will trigger a kernel error message: BTRFS critical (device dm-2): regular/prealloc extent found for non-regular inode 133081 [CAUSE] For that specific inode 133081, the tree dump looks like this: item 127 key (133081 INODE_ITEM 0) itemoff 40984 itemsize 160 generation 1 transid 1 size 4095 nbytes 4096 block group 0 mode 120777 links 1 uid 0 gid 0 rdev 0 sequence 0 flags 0x0(none) item 128 key (133081 INODE_REF 133080) itemoff 40972 itemsize 12 index 2 namelen 2 name: l3 item 129 key (133081 EXTENT_DATA 0) itemoff 40919 itemsize 53 generation 4 type 1 (regular) extent data disk byte 2147483648 nr 38080512 extent data offset 37974016 nr 4096 ram 38080512 extent compression 0 (none) Note that, the soft link inode size is 4095 at the max size (PATH_MAX, removing the terminating NUL). But the nbytes is 4096, exactly matching the sector size of the btrfs. Thus it results the creation of a regular extent, but for btrfs we do not accept a soft link with a regular/preallocated extent, thus kernel rejects such read and failed the readlink call. The root cause is in the convert code, where for soft links we always create a data extent with its size + 1, causing the above problem. I guess the original code is to handle the terminating NUL, but in btrfs we never need to store the terminating NUL for inline extents nor file names. Thus this pitfall in btrfs-convert leads to the above invalid data extent and fail the test case. [FIX] - Fix the ext2 and reiserfs symbolic link creation code To remove the terminating NUL. - Add extra checks for the size of a symbolic link Btrfs has extra limits on the size of a symbolic link, as btrfs must store symbolic link targets as inlined extents. This means for 4K node sized btrfs, the size limit is smaller than the usual PATH_MAX - 1 (only around 4000 bytes instead of 4095). So for certain nodesize, some filesystems can not be converted to btrfs. (this should be rare, because the default nodesize is 16K already) - Split the symbolic link and inline data extent size checks For symbolic links the real limit is PATH_MAX - 1 (removing the terminating NUL), but for inline data extents the limit is sectorsize - 1, which can be different from 4096 - 1 (e.g. 64K sector size). Signed-off-by: Qu Wenruo --- convert/source-ext2.c | 29 +++++++++++++++++++++++------ convert/source-reiserfs.c | 10 ++++++++-- kernel-shared/file-item.c | 6 ++++++ kernel-shared/file-item.h | 18 ++++++++++++++++++ 4 files changed, 55 insertions(+), 8 deletions(-) diff --git a/convert/source-ext2.c b/convert/source-ext2.c index acba5db7ee..d06f90a98e 100644 --- a/convert/source-ext2.c +++ b/convert/source-ext2.c @@ -390,6 +390,7 @@ static int ext2_create_file_extents(struct btrfs_trans_handle *trans, ext2_filsys ext2_fs, ext2_ino_t ext2_ino, u32 convert_flags) { + struct btrfs_fs_info *fs_info = trans->fs_info; int ret; char *buffer = NULL; errcode_t err; @@ -397,8 +398,20 @@ static int ext2_create_file_extents(struct btrfs_trans_handle *trans, u32 last_block; u32 sectorsize = root->fs_info->sectorsize; u64 inode_size = btrfs_stack_inode_size(btrfs_inode); + bool meet_inline_size_limit; struct blk_iterate_data data; + if (S_ISLNK(btrfs_stack_inode_mode(btrfs_inode))) { + meet_inline_size_limit = inode_size <= btrfs_symlink_max_size(fs_info); + if (!meet_inline_size_limit) { + error("symlink too large for ext2 inode %u, has %llu max %u", + ext2_ino, inode_size, btrfs_symlink_max_size(fs_info)); + return -ENAMETOOLONG; + } + } else { + meet_inline_size_limit = inode_size <= btrfs_data_inline_max_size(fs_info); + } + init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid, convert_flags & CONVERT_FLAG_DATACSUM); @@ -430,8 +443,7 @@ static int ext2_create_file_extents(struct btrfs_trans_handle *trans, if (ret) goto fail; if ((convert_flags & CONVERT_FLAG_INLINE_DATA) && data.first_block == 0 - && data.num_blocks > 0 && inode_size < sectorsize - && inode_size <= BTRFS_MAX_INLINE_DATA_SIZE(root->fs_info)) { + && data.num_blocks > 0 && meet_inline_size_limit) { u64 num_bytes = data.num_blocks * sectorsize; u64 disk_bytenr = data.disk_block * sectorsize; u64 nbytes; @@ -476,21 +488,26 @@ static int ext2_create_symlink(struct btrfs_trans_handle *trans, int ret; char *pathname; u64 inode_size = btrfs_stack_inode_size(btrfs_inode); + if (ext2fs_inode_data_blocks2(ext2_fs, ext2_inode)) { - btrfs_set_stack_inode_size(btrfs_inode, inode_size + 1); + if (inode_size > btrfs_symlink_max_size(trans->fs_info)) { + error("symlink too large for ext2 inode %u, has %llu max %u", + ext2_ino, inode_size, + btrfs_symlink_max_size(trans->fs_info)); + return -ENAMETOOLONG; + } ret = ext2_create_file_extents(trans, root, objectid, btrfs_inode, ext2_fs, ext2_ino, CONVERT_FLAG_DATACSUM | CONVERT_FLAG_INLINE_DATA); - btrfs_set_stack_inode_size(btrfs_inode, inode_size); return ret; } pathname = (char *)&(ext2_inode->i_block[0]); BUG_ON(pathname[inode_size] != 0); ret = btrfs_insert_inline_extent(trans, root, objectid, 0, - pathname, inode_size + 1); - btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size + 1); + pathname, inode_size); + btrfs_set_stack_inode_nbytes(btrfs_inode, inode_size); return ret; } diff --git a/convert/source-reiserfs.c b/convert/source-reiserfs.c index 3edc72ed08..3475b15277 100644 --- a/convert/source-reiserfs.c +++ b/convert/source-reiserfs.c @@ -537,9 +537,15 @@ static int reiserfs_copy_symlink(struct btrfs_trans_handle *trans, symlink = tp_item_body(&path); len = get_ih_item_len(tp_item_head(&path)); + if (len > btrfs_symlink_max_size(trans->fs_info)) { + error("symlink too large, has %u max %u", + len, btrfs_symlink_max_size(trans->fs_info)); + ret = -ENAMETOOLONG; + goto fail; + } ret = btrfs_insert_inline_extent(trans, root, objectid, 0, - symlink, len + 1); - btrfs_set_stack_inode_nbytes(btrfs_inode, len + 1); + symlink, len); + btrfs_set_stack_inode_nbytes(btrfs_inode, len); fail: pathrelse(&path); return ret; diff --git a/kernel-shared/file-item.c b/kernel-shared/file-item.c index d2da56e1f5..eb9024022d 100644 --- a/kernel-shared/file-item.c +++ b/kernel-shared/file-item.c @@ -26,6 +26,7 @@ #include "kernel-shared/extent_io.h" #include "kernel-shared/uapi/btrfs.h" #include "common/internal.h" +#include "common/messages.h" #define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r->fs_info) - \ sizeof(struct btrfs_item) * 2) / \ @@ -88,6 +89,7 @@ int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, const char *buffer, size_t size) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_key key; struct btrfs_path *path; struct extent_buffer *leaf; @@ -97,6 +99,10 @@ int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans, int err = 0; int ret; + if (size > max(btrfs_symlink_max_size(fs_info), + btrfs_data_inline_max_size(fs_info))) + return -EUCLEAN; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; diff --git a/kernel-shared/file-item.h b/kernel-shared/file-item.h index 0df8f4dfea..2c1e17c990 100644 --- a/kernel-shared/file-item.h +++ b/kernel-shared/file-item.h @@ -92,5 +92,23 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 logical, int btrfs_insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, const char *buffer, size_t size); +/* + * For symlink we allow up to PATH_MAX - 1 (PATH_MAX includes the terminating NUL, + * but fs doesn't store that terminating NUL). + * + * But for inlined data extents, the up limit is sectorsize - 1 (inclusive), or a + * regular extent should be created instead. + */ +static inline u32 btrfs_symlink_max_size(struct btrfs_fs_info *fs_info) +{ + return min_t(u32, BTRFS_MAX_INLINE_DATA_SIZE(fs_info), + PATH_MAX - 1); +} + +static inline u32 btrfs_data_inline_max_size(struct btrfs_fs_info *fs_info) +{ + return min_t(u32, BTRFS_MAX_INLINE_DATA_SIZE(fs_info), + fs_info->sectorsize - 1); +} #endif From 2afdbd760709216d5717fd5305b46324a8deef37 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Sep 2024 16:13:38 +0930 Subject: [PATCH 15/18] btrfs-progs: check/original: detect invalid file extent items for symbolic links [BUG] There is a recent bug that btrfs/012 fails and kernel rejects to read a symbolic link which is backed by a regular extent. Furthremore in that case, "btrfs check" doesn't detect such problem at all. [CAUSE] For symbolic links, we only allow inline file extents, and this means we should only have a symbolic link target which is smaller than 4K. But btrfs check doesn't handle symbolic link inodes any differently, thus it doesn't check if the file extents are inlined or not, nor reporting this problem as an error. [FIX] When processing data extents, if we find the owning inode is a symbolic link, and the file extent is regular/preallocated, mark the inode with I_ERR_FILE_EXTENT_TOO_LARGE error. Signed-off-by: Qu Wenruo --- check/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/check/main.c b/check/main.c index 599f22ec36..d4108b7315 100644 --- a/check/main.c +++ b/check/main.c @@ -1745,6 +1745,13 @@ static int process_file_extent(struct btrfs_root *root, rec->errors |= I_ERR_BAD_FILE_EXTENT; if (disk_bytenr > 0) rec->found_size += num_bytes; + /* + * Symbolic links should only have inlined extents. + * A regular extent means it's already too large to + * be inlined. + */ + if (S_ISLNK(rec->imode)) + rec->errors |= I_ERR_FILE_EXTENT_TOO_LARGE; } else { rec->errors |= I_ERR_BAD_FILE_EXTENT; } From b1ccd69fd30c6ac9c38044d7203283df8b7c37cd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Sep 2024 16:32:58 +0930 Subject: [PATCH 16/18] btrfs-progs: check/lowmem: detect invalid file extents for symbolic links [BUG] There is a recent bug that btrfs/012 fails and kernel rejects to read a symbolic link which is backed by a regular extent. Furthremore in that case, "btrfs check --mode=lowmem" doesn't detect such problem at all. [CAUSE] For symbolic links, we only allow inline extents, and this means we should only have a symbolic link target which is smaller than 4K. But lowmem mode btrfs check doesn't handle symbolic link inodes any differently, thus it doesn't check if the file extents are inlined or not, nor reporting this problem as an error. [FIX] When processing data extents, if we find the owning inode is a symbolic link, and the file extent is regular/preallocated, report an error for the bad file extent item. Signed-off-by: Qu Wenruo --- check/mode-lowmem.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/check/mode-lowmem.c b/check/mode-lowmem.c index a9908eaf62..4b6faccacb 100644 --- a/check/mode-lowmem.c +++ b/check/mode-lowmem.c @@ -3351,6 +3351,31 @@ static int repair_extent_data_item(struct btrfs_root *root, return err; } +static int read_inode_item(struct btrfs_root *root, + u64 ino, struct btrfs_inode_item *ret_ii) +{ + struct btrfs_path path = { 0 }; + struct btrfs_key key = { + .objectid = ino, + .type = BTRFS_INODE_ITEM_KEY, + .offset = 0 + }; + int ret; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret > 0) + ret = -ENOENT; + if (ret < 0) + goto out; + + read_extent_buffer(path.nodes[0], ret_ii, + btrfs_item_ptr_offset(path.nodes[0], path.slots[0]), + sizeof(*ret_ii)); +out: + btrfs_release_path(&path); + return ret; +} + /* * Check EXTENT_DATA item, mainly for its dbackref in extent tree * @@ -3371,6 +3396,7 @@ static int check_extent_data_item(struct btrfs_root *root, struct btrfs_extent_item *ei; struct btrfs_extent_inline_ref *iref; struct btrfs_extent_data_ref *dref; + struct btrfs_inode_item inode_item; u64 owner; u64 disk_bytenr; u64 disk_num_bytes; @@ -3400,6 +3426,24 @@ static int check_extent_data_item(struct btrfs_root *root, extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi); offset = btrfs_file_extent_offset(eb, fi); + /* + * There is a regular/preallocated data extent. Make sure the owning + * inode is not a symbolic link. + * As symbolic links can only have inline data extents. + */ + ret = read_inode_item(root, fi_key.objectid, &inode_item); + if (ret < 0) { + errno = -ret; + error("failed to grab the inode item for inode %llu: %m", + fi_key.objectid); + err |= INODE_ITEM_MISSING; + } + if (S_ISLNK(inode_item.mode)) { + error("symbolic link at root %lld ino %llu has regular/preallocated extents", + root->root_key.objectid, fi_key.objectid); + err |= FILE_EXTENT_ERROR; + } + /* Check unaligned disk_bytenr, disk_num_bytes and num_bytes */ if (!IS_ALIGNED(disk_bytenr, gfs_info->sectorsize)) { error( From c75b2f2c77c9fdace08a57fe4515b45a4616fa21 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 3 Sep 2024 16:45:46 +0930 Subject: [PATCH 17/18] btrfs-progs: convert-tests: add a test case to verify large symbolic link handling The new test case will: - Create a symbolic which contains a 4095 bytes sized target on ext4 - Convert the ext4 to btrfs - Make sure we can still read the symbolic link For unpatched btrfs-convert, the resulted symbolic link will be rejected by kernel and fail. Signed-off-by: Qu Wenruo --- .../027-large-symbol-link/test.sh | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100755 tests/convert-tests/027-large-symbol-link/test.sh diff --git a/tests/convert-tests/027-large-symbol-link/test.sh b/tests/convert-tests/027-large-symbol-link/test.sh new file mode 100755 index 0000000000..2a001424df --- /dev/null +++ b/tests/convert-tests/027-large-symbol-link/test.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Make sure btrfs-convert can handle a symbol link which is 4095 bytes large + +source "$TEST_TOP/common" || exit +source "$TEST_TOP/common.convert" || exit + +setup_root_helper +prepare_test_dev 1G +check_global_prereq mkfs.ext4 + +# This is at the symbolic link size limit (PATH_MAX includes the terminating NUL). +link_target=$(printf "%0.sb" {1..4095}) + +convert_test_prep_fs ext4 mke2fs -t ext4 -b 4096 +run_check $SUDO_HELPER ln -s "$link_target" "$TEST_MNT/symbol_link" +run_check_umount_test_dev + +# For unpatched btrfs-convert, it will always append one byte to the +# link target, causing above 4095 target to be 4096, exactly one sector, +# resulting a regular file extent. +convert_test_do_convert + +run_check_mount_test_dev +# If the unpatched btrfs-convert created a regular extent, and the kernel is +# newer enough, such readlink will be rejected by kernel. +run_check $SUDO_HELPER readlink "$TEST_MNT/symbol_link" +run_check_umount_test_dev From 180d79fbedf677388e0bcd4f01d4493854a7c54b Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 4 Sep 2024 11:18:29 +0100 Subject: [PATCH 18/18] btrfs-progs: add option for recursive subvol snapshots Adds an option -R to btrfs subvolume snapshot, corresponding to the flag BTRFS_UTIL_CREATE_SNAPSHOT_RECURSIVE. This is another resubmission of a missed patch of Omar's from 2018: https://lore.kernel.org/all/e42cdc5d5287269faf4d09e8c9786d0b3adeb658.1516991902.git.osandov@fb.com/ Signed-off-by: Mark Harmstone Co-authored-by: Omar Sandoval --- Documentation/btrfs-subvolume.rst | 8 +++++++- cmds/subvolume.c | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Documentation/btrfs-subvolume.rst b/Documentation/btrfs-subvolume.rst index eed602f9bf..4729bcd3d8 100644 --- a/Documentation/btrfs-subvolume.rst +++ b/Documentation/btrfs-subvolume.rst @@ -252,17 +252,23 @@ show [options] -u|--uuid UUID show details about subvolume with the given *UUID*, looked up in *path* -snapshot [-r] [-i ] |[/] +snapshot [-r|-R|--recursive] [-i ] |[/] Create a snapshot of the subvolume *source* with the name *name* in the *dest* directory. If only *dest* is given, the subvolume will be named the basename of *source*. If *source* is not a subvolume, btrfs returns an error. + If you wish to recursively create a readonly snapshot, you can run + :command:`btrfs property set ro true` on each subvolume after this command completes. + ``Options`` -r Make the new snapshot read only. + -R|--recursive + Recursively snapshot subvolumes beneath the source. This option cannot be + combined with -r. -i Add the newly created subvolume to a qgroup. This option can be given multiple times. diff --git a/cmds/subvolume.c b/cmds/subvolume.c index f34d940097..8102a0abef 100644 --- a/cmds/subvolume.c +++ b/cmds/subvolume.c @@ -616,7 +616,7 @@ static int cmd_subvolume_delete(const struct cmd_struct *cmd, int argc, char **a static DEFINE_COMMAND_WITH_FLAGS(subvolume_delete, "delete", CMD_DRY_RUN); static const char * const cmd_subvolume_snapshot_usage[] = { - "btrfs subvolume snapshot [-r] [-i ] { / | }", + "btrfs subvolume snapshot [-r|-R|--recursive] [-i ] { / | }", "", "Create a snapshot of a . Call it and place it in the .", "( will look like a new sub-directory, but is actually a btrfs subvolume", @@ -625,6 +625,7 @@ static const char * const cmd_subvolume_snapshot_usage[] = { "When only is given, the subvolume will be named the basename of .", "", OPTLINE("-r", "make the new snapshot readonly"), + OPTLINE("-R|--recursive", "recursively snapshot subvolumes beneath the source; this option cannot be combined with -r"), OPTLINE("-i ", "Add the new snapshot to a qgroup (a quota group). This option can be given multiple times."), HELPINFO_INSERT_GLOBALS, HELPINFO_INSERT_QUIET, @@ -642,7 +643,7 @@ static int cmd_subvolume_snapshot(const struct cmd_struct *cmd, int argc, char * optind = 0; while (1) { - int c = getopt(argc, argv, "i:r"); + int c = getopt(argc, argv, "i:rR"); if (c < 0) break; @@ -657,11 +658,21 @@ static int cmd_subvolume_snapshot(const struct cmd_struct *cmd, int argc, char * case 'r': flags |= BTRFS_UTIL_CREATE_SNAPSHOT_READ_ONLY; break; + case 'R': + flags |= BTRFS_UTIL_CREATE_SNAPSHOT_RECURSIVE; + break; default: usage_unknown_option(cmd, argv); } } + if ((flags & BTRFS_UTIL_CREATE_SNAPSHOT_READ_ONLY) && + (flags & BTRFS_UTIL_CREATE_SNAPSHOT_RECURSIVE)) { + error("-r and -R cannot be combined"); + retval = 1; + goto out; + } + if (check_argc_exact(argc - optind, 2)) { retval = 1; goto out;