diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 2c136bacb1a8..0abf89d449a4 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2019 by Delphix. All rights reserved. + * Copyright (c) 2011, 2024 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. @@ -6011,6 +6011,17 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, if (!do_claim) return; + + /* + * Theoretically, we could try to track leaks here, but it would + * require also importing the shared log pool and processing the + * chain map and space maps for it. ZDB currently doesn't have + * much facility to support multiple pools at once, so we leave this + * for future work. + */ + if (zilog && zilog->zl_spa != zilog->zl_io_spa) + return; + VERIFY0(zio_wait(zio_claim(NULL, zcb->zcb_spa, spa_min_claim_txg(zcb->zcb_spa), bp, NULL, NULL, ZIO_FLAG_CANFAIL))); @@ -6960,6 +6971,48 @@ zdb_brt_entry_compare(const void *zcn1, const void *zcn2) return (cmp); } +static int +chain_map_count_blk_cb(spa_t *spa, const blkptr_t *bp, void *arg) +{ + (void) spa; + zdb_cb_t *zbc = arg; + zdb_count_block(zbc, NULL, bp, ZDB_OT_OTHER); + return (0); +} + +static int +chain_map_count_lr_cb(spa_t *spa, const lr_t *lrc, void *arg) +{ + (void) spa; + zdb_cb_t *zbc = arg; + lr_write_t *lr = (lr_write_t *)lrc; + blkptr_t *bp = &lr->lr_blkptr; + if (lrc->lrc_txtype != TX_WRITE || BP_IS_HOLE(bp)) + return (0); + zdb_count_block(zbc, NULL, bp, ZDB_OT_OTHER); + return (0); +} + +/* + * Count the blocks in the chain maps. + */ +static void +chain_map_count_blocks(spa_t *spa, zdb_cb_t *zbc) +{ + avl_tree_t *pool_t = &spa->spa_chain_map; + + for (spa_chain_map_pool_t *pool_node = avl_first(pool_t); + pool_node != NULL; pool_node = AVL_NEXT(pool_t, pool_node)) { + avl_tree_t *os_t = &pool_node->scmp_os_tree; + for (spa_chain_map_os_t *os_node = avl_first(os_t); + os_node != NULL; os_node = AVL_NEXT(os_t, os_node)) { + (void) zil_parse_raw(spa, &os_node->scmo_chain_head, + chain_map_count_blk_cb, chain_map_count_lr_cb, + zbc); + } + } +} + static int dump_block_stats(spa_t *spa) { @@ -7025,6 +7078,10 @@ dump_block_stats(spa_t *spa) deleted_livelists_count_blocks(spa, zcb); + if (spa_is_shared_log(spa)) { + chain_map_count_blocks(spa, zcb); + } + if (dump_opt['c'] > 1) flags |= TRAVERSE_PREFETCH_DATA; @@ -7378,7 +7435,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, zdb_ddt_entry_t *zdde, zdde_search; if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) || - BP_IS_EMBEDDED(bp)) + BP_IS_EMBEDDED(bp) || (zilog && zilog->zl_spa != zilog->zl_io_spa)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { @@ -8148,6 +8205,8 @@ dump_mos_leaks(spa_t *spa) scip_next_mapping_object); mos_obj_refd(spa->spa_condensing_indirect_phys. scip_prev_obsolete_sm_object); + if (spa_is_shared_log(spa)) + mos_obj_refd(spa->spa_dsl_pool->dp_chain_map_obj); if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) { vdev_indirect_mapping_t *vim = vdev_indirect_mapping_open(mos, @@ -8322,6 +8381,57 @@ dump_log_spacemap_obsolete_stats(spa_t *spa) (u_longlong_t)lsos.lsos_total_entries); } +static void print_blkptr(const blkptr_t *bp) +{ + char blkbuf[BP_SPRINTF_LEN]; + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD) + snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp); + (void) printf("%s\n", blkbuf); + +} + +static int +chain_map_dump_blk_cb(spa_t *spa, const blkptr_t *bp, void *arg) +{ + (void) spa, (void) arg; + printf("\t\t\tBP: "); + print_blkptr(bp); + return (0); +} + +static int +chain_map_dump_lr_cb(spa_t *spa, const lr_t *lrc, void *arg) +{ + (void) spa, (void) arg; + lr_write_t *lr = (lr_write_t *)lrc; + blkptr_t *bp = &lr->lr_blkptr; + printf("\t\t\tLR BP: "); + print_blkptr(bp); + return (0); +} + +static void +dump_chain_map(spa_t *spa) +{ + (void) printf("Chain map contents:\n"); + avl_tree_t *pool_t = &spa->spa_chain_map; + + for (spa_chain_map_pool_t *pool_node = avl_first(pool_t); + pool_node != NULL; pool_node = AVL_NEXT(pool_t, pool_node)) { + avl_tree_t *os_t = &pool_node->scmp_os_tree; + (void) printf("\tPool entry: %s\n", pool_node->scmp_name); + for (spa_chain_map_os_t *os_node = avl_first(os_t); + os_node != NULL; os_node = AVL_NEXT(os_t, os_node)) { + (void) printf("\t\tObjset entry: %"PRIu64"\n\t\t\t", + os_node->scmo_id); + print_blkptr(&os_node->scmo_chain_head); + (void) zil_parse_raw(spa, &os_node->scmo_chain_head, + chain_map_dump_blk_cb, chain_map_dump_lr_cb, NULL); + } + } +} + static void dump_zpool(spa_t *spa) { @@ -8403,6 +8513,9 @@ dump_zpool(spa_t *spa) (void) dmu_objset_find(spa_name(spa), dump_one_objset, NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); + if (spa_is_shared_log(spa)) + dump_chain_map(spa); + if (rc == 0 && !dump_opt['L']) rc = dump_mos_leaks(spa); diff --git a/cmd/zhack.c b/cmd/zhack.c index f297afb65d47..e06e15a98918 100644 --- a/cmd/zhack.c +++ b/cmd/zhack.c @@ -104,7 +104,7 @@ fatal(spa_t *spa, const void *tag, const char *fmt, ...) if (spa != NULL) { spa_close(spa, tag); - (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE); + (void) spa_export(g_pool, NULL, B_TRUE, B_FALSE, NULL); } va_start(ap, fmt); @@ -1016,7 +1016,8 @@ main(int argc, char **argv) usage(); } - if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE) != 0) { + if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_FALSE, + NULL) != 0) { fatal(NULL, FTAG, "pool export failed; " "changes may not be committed to disk\n"); } diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index bc9f90cae08b..b8d7cd960190 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -112,6 +112,7 @@ static int zpool_do_split(int, char **); static int zpool_do_initialize(int, char **); static int zpool_do_scrub(int, char **); static int zpool_do_resilver(int, char **); +static int zpool_do_recycle(int, char **); static int zpool_do_trim(int, char **); static int zpool_do_import(int, char **); @@ -189,6 +190,7 @@ typedef enum { HELP_REMOVE, HELP_INITIALIZE, HELP_SCRUB, + HELP_RECYCLE, HELP_RESILVER, HELP_TRIM, HELP_STATUS, @@ -413,6 +415,7 @@ static zpool_command_t command_table[] = { { "split", zpool_do_split, HELP_SPLIT }, { NULL }, { "initialize", zpool_do_initialize, HELP_INITIALIZE }, + { "recycle", zpool_do_recycle, HELP_RECYCLE }, { "resilver", zpool_do_resilver, HELP_RESILVER }, { "scrub", zpool_do_scrub, HELP_SCRUB }, { "trim", zpool_do_trim, HELP_TRIM }, @@ -459,7 +462,8 @@ get_usage(zpool_help_t idx) case HELP_CLEAR: return (gettext("\tclear [[--power]|[-nF]] [device]\n")); case HELP_CREATE: - return (gettext("\tcreate [-fnd] [-o property=value] ... \n" + return (gettext("\tcreate [-fndL] [-l pool] ... \n" + "\t [-o property=value] ... \n" "\t [-O file-system-property=value] ... \n" "\t [-m mountpoint] [-R root] ...\n")); case HELP_CHECKPOINT: @@ -478,9 +482,10 @@ get_usage(zpool_help_t idx) "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]] -a\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " - "[-R root] [-F [-n]]\n" - "\t [--rewind-to-checkpoint] [newpool]\n")); + "\t [-d dir | -c cachefile] [-D] [-l] [-f] " + "[-m [-L pool]] [-N] [-R root]\n" + "\t [-F [-n]] [--rewind-to-checkpoint] " + "[newpool]\n")); case HELP_IOSTAT: return (gettext("\tiostat [[[-c [script1,script2,...]" "[-lq]]|[-rw]] [-T d | u] [-ghHLpPvy]\n" @@ -513,6 +518,8 @@ get_usage(zpool_help_t idx) "[ ...]\n")); case HELP_SCRUB: return (gettext("\tscrub [-s | -p] [-w] [-e] ...\n")); + case HELP_RECYCLE: + return (gettext("\trecycle [-nv] ...\n")); case HELP_RESILVER: return (gettext("\tresilver ...\n")); case HELP_TRIM: @@ -1524,10 +1531,13 @@ zpool_do_add(int argc, char **argv) &props, B_TRUE) == 0); } } + uint64_t shared_log; + boolean_t has_shared_log = nvlist_lookup_uint64(config, + ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log) == 0; /* pass off to make_root_vdev for processing */ nvroot = make_root_vdev(zhp, props, !check_inuse, - check_replication, B_FALSE, dryrun, argc, argv); + check_replication, B_FALSE, dryrun, has_shared_log, argc, argv); if (nvroot == NULL) { zpool_close(zhp); return (1); @@ -1987,9 +1997,11 @@ zpool_do_create(int argc, char **argv) nvlist_t *fsprops = NULL; nvlist_t *props = NULL; char *propval; + zpool_handle_t *shared_log_pool = NULL; + boolean_t is_shared_log = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":fndR:m:o:O:t:")) != -1) { + while ((c = getopt(argc, argv, ":fndR:m:o:O:t:l:L")) != -1) { switch (c) { case 'f': force = B_TRUE; @@ -2088,6 +2100,17 @@ zpool_do_create(int argc, char **argv) goto errout; tname = optarg; break; + case 'l': + shared_log_pool = zpool_open(g_zfs, optarg); + if (shared_log_pool == NULL) { + (void) fprintf(stderr, gettext("could not open " + "shared log pool '%s'"), optarg); + goto errout; + } + break; + case 'L': + is_shared_log = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -2128,9 +2151,17 @@ zpool_do_create(int argc, char **argv) /* pass off to make_root_vdev for bulk processing */ nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun, - argc - 1, argv + 1); + shared_log_pool != NULL, argc - 1, argv + 1); if (nvroot == NULL) goto errout; + if (shared_log_pool) { + fnvlist_add_uint64(nvroot, ZPOOL_CONFIG_SHARED_LOG_POOL, + fnvlist_lookup_uint64(zpool_get_config(shared_log_pool, + NULL), ZPOOL_CONFIG_POOL_GUID)); + } + + if (is_shared_log) + fnvlist_add_boolean(nvroot, ZPOOL_CONFIG_IS_SHARED_LOG); /* make_root_vdev() allows 0 toplevel children if there are spares */ if (!zfs_allocatable_devs(nvroot)) { @@ -2874,7 +2905,8 @@ vdev_health_check_cb(void *hdl_data, nvlist_t *nv, void *data) */ static void print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, - nvlist_t *nv, int depth, boolean_t isspare, vdev_rebuild_stat_t *vrs) + nvlist_t *nv, int depth, boolean_t isspare, boolean_t recurse, + vdev_rebuild_stat_t *vrs) { nvlist_t **child, *root; uint_t c, i, vsc, children; @@ -3165,7 +3197,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, (void) printf("\n"); - for (c = 0; c < children; c++) { + for (c = 0; c < children && recurse; c++) { uint64_t islog = B_FALSE, ishole = B_FALSE; /* Don't print logs or holes here */ @@ -3189,7 +3221,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); print_status_config(zhp, cb, vname, child[c], depth + 2, - isspare, vrs); + isspare, B_TRUE, vrs); free(vname); } } @@ -3200,7 +3232,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, */ static void print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv, - int depth) + int depth, boolean_t recurse) { nvlist_t **child; uint_t c, children; @@ -3266,7 +3298,7 @@ print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv, } (void) printf("\n"); - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + if (!recurse || nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) return; @@ -3282,7 +3314,7 @@ print_import_config(status_cbdata_t *cb, const char *name, nvlist_t *nv, vname = zpool_vdev_name(g_zfs, NULL, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); - print_import_config(cb, vname, child[c], depth + 2); + print_import_config(cb, vname, child[c], depth + 2, B_TRUE); free(vname); } @@ -3363,13 +3395,70 @@ print_class_vdevs(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv, cb->cb_name_flags | VDEV_NAME_TYPE_ID); if (cb->cb_print_status) print_status_config(zhp, cb, name, child[c], 2, - B_FALSE, NULL); + B_FALSE, B_TRUE, NULL); else - print_import_config(cb, name, child[c], 2); + print_import_config(cb, name, child[c], 2, B_TRUE); free(name); } } +/* + * Find a pool with a matching GUID. + */ +typedef struct find_cbdata { + uint64_t cb_guid; + zpool_handle_t *cb_zhp; +} find_cbdata_t; + +static int +find_pool(zpool_handle_t *zhp, void *data) +{ + find_cbdata_t *cbp = data; + + if (cbp->cb_guid == + zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) { + cbp->cb_zhp = zhp; + return (1); + } + + zpool_close(zhp); + return (0); +} + +/* + * Given a pool GUID, find the matching pool. + */ +static zpool_handle_t * +find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid) +{ + find_cbdata_t cb; + cb.cb_guid = pool_guid; + if (zpool_iter(zhdl, find_pool, &cb) != 1) + return (NULL); + + return (cb.cb_zhp); +} + +static void +print_shared_log(zpool_handle_t *zhp, status_cbdata_t *cb, + uint64_t shared_log_guid) +{ + (void) printf(gettext("\tshared log\n")); + zpool_handle_t *shared_log = find_by_guid(g_zfs, shared_log_guid); + VERIFY(shared_log); + nvlist_t *shared_log_config = zpool_get_config(shared_log, NULL); + nvlist_t *nvroot; + VERIFY0(nvlist_lookup_nvlist(shared_log_config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot)); + const char *name = zpool_get_name(shared_log); + if (cb->cb_print_status) + print_status_config(zhp, cb, name, nvroot, 2, + B_FALSE, B_FALSE, NULL); + else + print_import_config(cb, name, nvroot, 2, B_FALSE); + zpool_close(shared_log); +} + /* * Display the status for the given pool. */ @@ -3746,7 +3835,7 @@ show_import(nvlist_t *config, boolean_t report_error) if (cb.cb_namewidth < 10) cb.cb_namewidth = 10; - print_import_config(&cb, name, nvroot, 0); + print_import_config(&cb, name, nvroot, 0, B_TRUE); print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_DEDUP); print_class_vdevs(NULL, &cb, nvroot, VDEV_ALLOC_BIAS_SPECIAL); @@ -3948,10 +4037,31 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, uint_t npools = 0; + int err = 0; + nvpair_t *elem = NULL, *next = NULL; + boolean_t first = B_TRUE; tpool_t *tp = NULL; if (import->do_all) { tp = tpool_create(1, 5 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); + + elem = nvlist_next_nvpair(pools, NULL); + next = nvlist_next_nvpair(pools, elem); + + while (elem != NULL) { + verify(nvpair_value_nvlist(elem, &config) == 0); + if (fnvlist_lookup_boolean(config, + ZPOOL_CONFIG_IS_SHARED_LOG)) { + err = do_import(config, NULL, mntopts, props, + flags, mount_tp_nthr); + first = B_FALSE; + fnvlist_remove_nvpair(pools, elem); + } + elem = next; + next = nvlist_next_nvpair(pools, elem); + } + if (err != 0) + return (err); } /* @@ -3960,9 +4070,6 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, * post-process the list to deal with pool state and possible * duplicate names. */ - int err = 0; - nvpair_t *elem = NULL; - boolean_t first = B_TRUE; if (!pool_specified && import->do_all) { while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) npools++; @@ -4064,6 +4171,11 @@ import_pools(nvlist_t *pools, nvlist_t *props, char *mntopts, int flags, "no such pool available\n"), orig_name); err = B_TRUE; } else { + if (import->shared_log_guid) { + fnvlist_add_uint64(found_config, + ZPOOL_CONFIG_SHARED_LOG_POOL, + import->shared_log_guid); + } err |= do_import(found_config, new_name, mntopts, props, flags, mount_tp_nthr); } @@ -4352,6 +4464,7 @@ zpool_do_import(int argc, char **argv) char *cachefile = NULL; importargs_t idata = { 0 }; char *endptr; + zpool_handle_t *shared_log_pool = NULL; struct option long_options[] = { {"rewind-to-checkpoint", no_argument, NULL, CHECKPOINT_OPT}, @@ -4359,7 +4472,7 @@ zpool_do_import(int argc, char **argv) }; /* check options */ - while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX", + while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlL:mnNo:R:stT:VX", long_options, NULL)) != -1) { switch (c) { case 'a': @@ -4385,6 +4498,14 @@ zpool_do_import(int argc, char **argv) case 'l': flags |= ZFS_IMPORT_LOAD_KEYS; break; + case 'L': + shared_log_pool = zpool_open(g_zfs, optarg); + if (shared_log_pool == NULL) { + (void) fprintf(stderr, gettext("could not open " + "shared log pool '%s'"), optarg); + goto error; + } + break; case 'm': flags |= ZFS_IMPORT_MISSING_LOG; break; @@ -4457,6 +4578,16 @@ zpool_do_import(int argc, char **argv) argc -= optind; argv += optind; + if (shared_log_pool != NULL && ! (flags & ZFS_IMPORT_MISSING_LOG)) { + (void) fprintf(stderr, gettext("-L requires -m\n")); + usage(B_FALSE); + } + + if (shared_log_pool != NULL && do_all) { + (void) fprintf(stderr, gettext("-L is incompatible with -a\n")); + usage(B_FALSE); + } + if (cachefile && nsearch != 0) { (void) fprintf(stderr, gettext("-c is incompatible with -d\n")); usage(B_FALSE); @@ -4580,6 +4711,10 @@ zpool_do_import(int argc, char **argv) idata.policy = policy; idata.do_destroyed = do_destroyed; idata.do_all = do_all; + if (shared_log_pool) { + idata.shared_log_guid = fnvlist_lookup_uint64(zpool_get_config( + shared_log_pool, NULL), ZPOOL_CONFIG_POOL_GUID); + } libpc_handle_t lpch = { .lpc_lib_handle = g_zfs, @@ -6991,7 +7126,8 @@ collect_vdev_prop(zpool_prop_t prop, uint64_t value, const char *str, */ static void collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, - list_cbdata_t *cb, int depth, boolean_t isspare, nvlist_t *item) + list_cbdata_t *cb, int depth, boolean_t isspare, boolean_t recurse, + nvlist_t *item) { nvlist_t **child; vdev_stat_t *vs; @@ -7001,7 +7137,7 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, uint64_t islog = B_FALSE; nvlist_t *props, *ent, *ch, *obj, *l2c, *sp; props = ent = ch = obj = sp = l2c = NULL; - const char *dashes = "%-*s - - - - " + const char *dashes = "%*s%-*s - - - - " "- - - - -\n"; verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, @@ -7093,7 +7229,7 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, (void) fputc('\n', stdout); } - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + if (!recurse || nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { if (cb->cb_json) { fnvlist_add_nvlist(item, name, ent); @@ -7126,10 +7262,10 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, if (name == NULL || cb->cb_json != B_TRUE) collect_list_stats(zhp, vname, child[c], cb, depth + 2, - B_FALSE, item); + B_FALSE, B_TRUE, item); else if (cb->cb_json) { collect_list_stats(zhp, vname, child[c], cb, depth + 2, - B_FALSE, ch); + B_FALSE, B_TRUE, ch); } free(vname); } @@ -7165,14 +7301,14 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, if (!printed && !cb->cb_json) { /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, cb->cb_namewidth, - class_name[n]); + (void) printf(dashes, depth + 2, "", + cb->cb_namewidth, class_name[n]); printed = B_TRUE; } vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags | VDEV_NAME_TYPE_ID); collect_list_stats(zhp, vname, child[c], cb, depth + 2, - B_FALSE, obj); + B_FALSE, B_TRUE, obj); free(vname); } if (cb->cb_json) { @@ -7182,19 +7318,49 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, } } + uint64_t shared_log_guid; + if (name == NULL && nvlist_lookup_uint64(zpool_get_config(zhp, NULL), + ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log_guid) == 0) { + nvlist_t *sl = NULL; + if (cb->cb_json) { + sl = fnvlist_alloc(); + } else { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, depth + 2, "", cb->cb_namewidth, + "shared log"); + } + zpool_handle_t *shared_log = find_by_guid(g_zfs, + shared_log_guid); + VERIFY(shared_log); + nvlist_t *shared_log_config = zpool_get_config(shared_log, + NULL); + nvlist_t *nvroot; + VERIFY0(nvlist_lookup_nvlist(shared_log_config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot)); + collect_list_stats(shared_log, zpool_get_name(shared_log), + nvroot, cb, depth + 4, B_FALSE, B_FALSE, sl); + zpool_close(shared_log); + if (cb->cb_json) { + if (!nvlist_empty(sl)) + fnvlist_add_nvlist(item, "shared log", sl); + fnvlist_free(sl); + } + } + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0 && children > 0) { if (cb->cb_json) { l2c = fnvlist_alloc(); } else { /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, cb->cb_namewidth, "cache"); + (void) printf(dashes, depth + 2, "", cb->cb_namewidth, + "cache"); } for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags); collect_list_stats(zhp, vname, child[c], cb, depth + 2, - B_FALSE, l2c); + B_FALSE, B_TRUE, l2c); free(vname); } if (cb->cb_json) { @@ -7210,13 +7376,14 @@ collect_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, sp = fnvlist_alloc(); } else { /* LINTED E_SEC_PRINTF_VAR_FMT */ - (void) printf(dashes, cb->cb_namewidth, "spare"); + (void) printf(dashes, depth + 2, "", cb->cb_namewidth, + "spare"); } for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, child[c], cb->cb_name_flags); collect_list_stats(zhp, vname, child[c], cb, depth + 2, - B_TRUE, sp); + B_TRUE, B_TRUE, sp); free(vname); } if (cb->cb_json) { @@ -7266,7 +7433,8 @@ list_callback(zpool_handle_t *zhp, void *data) } nvdevs = fnvlist_alloc(); } - collect_list_stats(zhp, NULL, nvroot, cbp, 0, B_FALSE, nvdevs); + collect_list_stats(zhp, NULL, nvroot, cbp, 0, B_FALSE, B_TRUE, + nvdevs); if (cbp->cb_json) { fnvlist_add_nvlist(p, "vdevs", nvdevs); if (cbp->cb_json_pool_key_guid) @@ -7492,7 +7660,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) boolean_t rebuild = B_FALSE; boolean_t wait = B_FALSE; int c; - nvlist_t *nvroot; + nvlist_t *nvroot, *config; char *poolname, *old_disk, *new_disk; zpool_handle_t *zhp; nvlist_t *props = NULL; @@ -7575,7 +7743,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) return (1); } - if (zpool_get_config(zhp, NULL) == NULL) { + if ((config = zpool_get_config(zhp, NULL)) == NULL) { (void) fprintf(stderr, gettext("pool '%s' is unavailable\n"), poolname); zpool_close(zhp); @@ -7597,8 +7765,12 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) } } + uint64_t shared_log; + boolean_t has_shared_log = nvlist_lookup_uint64(config, + ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log) == 0; + nvroot = make_root_vdev(zhp, props, force, B_FALSE, replacing, B_FALSE, - argc, argv); + has_shared_log, argc, argv); if (nvroot == NULL) { zpool_close(zhp); nvlist_free(props); @@ -8535,6 +8707,116 @@ zpool_do_resilver(int argc, char **argv) B_FALSE, scrub_callback, &cb)); } +struct recycle_data { + boolean_t dryrun; + boolean_t verbose; +}; + +static void +print_recycle_info(nvlist_t *nvl, boolean_t dryrun) +{ + printf("Cleaned up%s: [", dryrun ? " (dry run)" : ""); + nvpair_t *elem = NULL; + boolean_t first = B_TRUE; + while ((elem = nvlist_next_nvpair(nvl, elem))) { + printf("%s%s", first ? "" : ",\n\t", nvpair_name(elem)); + first = B_FALSE; + } + printf("]\n"); +} + +static int +recycle_callback(zpool_handle_t *zhp, void *data) +{ + struct recycle_data *rd = data; + nvlist_t *nvl; + + int err = lzc_recycle(zpool_get_name(zhp), NULL, rd->dryrun, &nvl); + if (err) + return (err); + if (rd->verbose) + print_recycle_info(nvl, rd->dryrun); + nvlist_free(nvl); + return (0); +} + +/* + * zpool recycle [-a] [-n] [-v] [pool]... + * + * Cleans up chain maps for non-attached client pools + */ +int +zpool_do_recycle(int argc, char **argv) +{ + int c; + struct recycle_data rd = {0}; + boolean_t doall = B_FALSE; + + /* check options */ + while ((c = getopt(argc, argv, "nva")) != -1) { + switch (c) { + case 'n': + rd.dryrun = B_TRUE; + zfs_fallthrough; + case 'v': + rd.verbose = B_TRUE; + break; + case 'a': + doall = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } else if (argc == 1 && !doall) { + (void) fprintf(stderr, gettext("missing client pools\n")); + usage(B_FALSE); + } else if (argc > 1 && doall) { + (void) fprintf(stderr, gettext("specific client pools and " + "do_all\n")); + usage(B_FALSE); + } + + if (doall) { + return (for_each_pool(argc, argv, B_TRUE, NULL, ZFS_TYPE_POOL, + B_FALSE, recycle_callback, &rd)); + } + + const char *pool = argv[0]; + argc--; + argv++; + + nvlist_t *clients = NULL; + if (argc > 0) + clients = fnvlist_alloc(); + while (argc > 0) { + fnvlist_add_boolean(clients, argv[0]); + argc--; + argv++; + } + + nvlist_t *nvl; + int err = lzc_recycle(pool, clients, rd.dryrun, &nvl); + if (clients) + nvlist_free(clients); + if (err) + return (err); + if (rd.verbose) + print_recycle_info(nvl, rd.dryrun); + nvlist_free(nvl); + + return (0); +} + /* * zpool trim [-d] [-r ] [-c | -s] [ ...] * @@ -10236,7 +10518,8 @@ print_spares(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **spares, for (i = 0; i < nspares; i++) { name = zpool_vdev_name(g_zfs, zhp, spares[i], cb->cb_name_flags); - print_status_config(zhp, cb, name, spares[i], 2, B_TRUE, NULL); + print_status_config(zhp, cb, name, spares[i], 2, B_TRUE, B_TRUE, + NULL); free(name); } } @@ -10257,7 +10540,7 @@ print_l2cache(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t **l2cache, name = zpool_vdev_name(g_zfs, zhp, l2cache[i], cb->cb_name_flags); print_status_config(zhp, cb, name, l2cache[i], 2, - B_FALSE, NULL); + B_FALSE, B_TRUE, NULL); free(name); } } @@ -10895,12 +11178,18 @@ status_callback(zpool_handle_t *zhp, void *data) printf("\n"); print_status_config(zhp, cbp, zpool_get_name(zhp), nvroot, 0, - B_FALSE, NULL); + B_FALSE, B_TRUE, NULL); print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_DEDUP); print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_BIAS_SPECIAL); print_class_vdevs(zhp, cbp, nvroot, VDEV_ALLOC_CLASS_LOGS); + uint64_t shared_log_guid; + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SHARED_LOG_POOL, + &shared_log_guid) == 0) { + print_shared_log(zhp, cbp, shared_log_guid); + } + if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) print_l2cache(zhp, cbp, l2cache, nl2cache); diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index 7f5406f063e1..ac9963fc69f5 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023 by Delphix. All rights reserved. */ #ifndef ZPOOL_UTIL_H @@ -57,8 +58,8 @@ char *zpool_get_cmd_search_path(void); */ nvlist_t *make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, - int check_rep, boolean_t replacing, boolean_t dryrun, int argc, - char **argv); + int check_rep, boolean_t replacing, boolean_t dryrun, + boolean_t have_shlog, int argc, char **argv); nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, splitflags_t flags, int argc, char **argv); diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index fbd4b81dfacc..590b8974a260 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2018 by Delphix. All rights reserved. + * Copyright (c) 2013, 2023 by Delphix. All rights reserved. * Copyright (c) 2016, 2017 Intel Corporation. * Copyright 2016 Igor Kozhukhov . */ @@ -1488,7 +1488,7 @@ draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children) * because the program is just going to exit anyway. */ static nvlist_t * -construct_spec(nvlist_t *props, int argc, char **argv) +construct_spec(nvlist_t *props, boolean_t have_shlog, int argc, char **argv) { nvlist_t *nvroot, *nv, **top, **spares, **l2cache; int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache; @@ -1735,6 +1735,12 @@ construct_spec(nvlist_t *props, int argc, char **argv) goto spec_out; } + if (seen_logs && have_shlog) { + (void) fprintf(stderr, gettext("invalid vdev specification: " + "cannot mix shared log and log devices")); + goto spec_out; + } + if (seen_logs && nlogs == 0) { (void) fprintf(stderr, gettext("invalid vdev specification: " "log requires at least 1 device\n")); @@ -1779,7 +1785,8 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, uint_t c, children; if (argc > 0) { - if ((newroot = construct_spec(props, argc, argv)) == NULL) { + if ((newroot = construct_spec(props, B_FALSE, argc, argv)) == + NULL) { (void) fprintf(stderr, gettext("Unable to build a " "pool from the specified devices\n")); return (NULL); @@ -1853,7 +1860,8 @@ num_normal_vdevs(nvlist_t *nvroot) */ nvlist_t * make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, - boolean_t replacing, boolean_t dryrun, int argc, char **argv) + boolean_t replacing, boolean_t dryrun, boolean_t have_shlog, int argc, + char **argv) { nvlist_t *newroot; nvlist_t *poolconfig = NULL; @@ -1864,7 +1872,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, * that we have a valid specification, and that all devices can be * opened. */ - if ((newroot = construct_spec(props, argc, argv)) == NULL) + if ((newroot = construct_spec(props, have_shlog, argc, argv)) == NULL) return (NULL); if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) { diff --git a/cmd/ztest.c b/cmd/ztest.c index 523f280aae1a..d3842a11fa80 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -3070,7 +3070,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) * an export concurrently. */ VERIFY0(spa_open(zo->zo_pool, &spa, FTAG)); - int error = spa_destroy(zo->zo_pool); + int error = spa_destroy(zo->zo_pool, NULL); if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) { fatal(B_FALSE, "spa_destroy(%s) returned unexpected value %d", spa->spa_name, error); @@ -3172,7 +3172,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) /* * Clean up from previous runs. */ - (void) spa_destroy(name); + (void) spa_destroy(name, NULL); raidz_children = ztest_get_raidz_children(ztest_spa); @@ -3626,7 +3626,7 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) } /* clean up the old pool, if any */ - (void) spa_destroy("splitp"); + (void) spa_destroy("splitp", NULL); spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); @@ -7432,7 +7432,7 @@ ztest_spa_import_export(char *oldname, char *newname) /* * Clean up from previous runs. */ - (void) spa_destroy(newname); + (void) spa_destroy(newname, NULL); /* * Get the pool's configuration and guid. @@ -7453,7 +7453,7 @@ ztest_spa_import_export(char *oldname, char *newname) /* * Export it. */ - VERIFY0(spa_export(oldname, &config, B_FALSE, B_FALSE)); + VERIFY0(spa_export(oldname, &config, B_FALSE, B_FALSE, NULL)); ztest_walk_pool_directory("pools after export"); @@ -8611,7 +8611,7 @@ ztest_init(ztest_shared_t *zs) /* * Create the storage pool. */ - (void) spa_destroy(ztest_opts.zo_pool); + (void) spa_destroy(ztest_opts.zo_pool, NULL); ztest_shared->zs_vdev_next_leaf = 0; zs->zs_splits = 0; zs->zs_mirrors = ztest_opts.zo_mirrors; diff --git a/include/libzfs.h b/include/libzfs.h index 01d51999f4eb..4526a292dd96 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -159,6 +159,8 @@ typedef enum zfs_error { EZFS_SHAREFAILED, /* filesystem share failed */ EZFS_RAIDZ_EXPAND_IN_PROGRESS, /* a raidz is currently expanding */ EZFS_ASHIFT_MISMATCH, /* can't add vdevs with different ashifts */ + /* Operation cannot be performed on a shared log pool */ + EZFS_SHAREDLOG, EZFS_UNKNOWN } zfs_error_t; @@ -350,6 +352,7 @@ _LIBZFS_H uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path); _LIBZFS_H const char *zpool_get_state_str(zpool_handle_t *); +_LIBZFS_H zpool_handle_t *zpool_get_shared_log(zpool_handle_t *); /* * Functions to manage pool properties diff --git a/include/libzfs_core.h b/include/libzfs_core.h index b1d74fbbc8f5..3e07913eef72 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2012, 2020 by Delphix. All rights reserved. + * Copyright (c) 2012, 2023 by Delphix. All rights reserved. * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. @@ -156,6 +156,9 @@ _LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *); _LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *); _LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **); +_LIBZFS_CORE_H int lzc_recycle(const char *, nvlist_t *, boolean_t, + nvlist_t **); + _LIBZFS_CORE_H int lzc_get_vdev_prop(const char *, nvlist_t *, nvlist_t **); _LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **); @@ -164,6 +167,10 @@ _LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **); _LIBZFS_CORE_H int lzc_ddt_prune(const char *, zpool_ddt_prune_unit_t, uint64_t); +_LIBZFS_CORE_H int lzc_pool_destroy(const char *, const char *, nvlist_t **); +_LIBZFS_CORE_H int lzc_pool_export(const char *, const char *, boolean_t, + boolean_t, nvlist_t **); + #ifdef __cplusplus } #endif diff --git a/include/libzutil.h b/include/libzutil.h index e2108ceeaa44..b233dc5ce559 100644 --- a/include/libzutil.h +++ b/include/libzutil.h @@ -81,6 +81,7 @@ typedef struct importargs { nvlist_t *policy; /* load policy (max txg, rewind, etc.) */ boolean_t do_destroyed; boolean_t do_all; + uint64_t shared_log_guid; } importargs_t; typedef struct libpc_handle { diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 22cbd7fc73b6..2a07ac904159 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved. @@ -392,6 +392,7 @@ typedef struct dmu_buf { #define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint" #define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap" #define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones" +#define DMU_POOL_CHAIN_MAP_OBJ "com.delphix:chain_map_obj" /* * Allocate an object from this objset. The range of object numbers diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h index aa55da626149..6846588bde76 100644 --- a/include/sys/dmu_tx.h +++ b/include/sys/dmu_tx.h @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2013 by Delphix. All rights reserved. */ #ifndef _SYS_DMU_TX_H @@ -154,6 +154,7 @@ void dmu_tx_wait(dmu_tx_t *tx); */ extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg); +dmu_tx_t *dmu_tx_create_mos(struct dsl_pool *dp); /* * These routines are only called by the DMU. */ diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h index abcdc77a4b96..bc3648d97c03 100644 --- a/include/sys/dsl_pool.h +++ b/include/sys/dsl_pool.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2018 by Delphix. All rights reserved. + * Copyright (c) 2013, 2023 by Delphix. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ @@ -106,6 +106,7 @@ typedef struct dsl_pool { uint64_t dp_bptree_obj; uint64_t dp_empty_bpobj; bpobj_t dp_obsolete_bpobj; + uint64_t dp_chain_map_obj; struct dsl_scan *dp_scan; diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 1676020d04d3..38e37708fd9c 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -864,6 +864,8 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_EXPANSION_TIME "expansion_time" /* not stored */ #define ZPOOL_CONFIG_REBUILD_STATS "org.openzfs:rebuild_stats" #define ZPOOL_CONFIG_COMPATIBILITY "compatibility" +#define ZPOOL_CONFIG_SHARED_LOG_POOL "com.delphix:shared_log_pool" +#define ZPOOL_CONFIG_IS_SHARED_LOG "com.delphix:is_shared_log" /* * The persistent vdev state is stored as separate values rather than a single @@ -1534,6 +1536,9 @@ typedef enum zfs_ioc { ZFS_IOC_POOL_SCRUB, /* 0x5a57 */ ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */ ZFS_IOC_DDT_PRUNE, /* 0x5a59 */ + ZFS_IOC_POOL_RECYCLE, /* 0x5a5a */ + ZFS_IOC_POOL_DESTROY_NEW, /* 0x5a5b */ + ZFS_IOC_POOL_EXPORT_NEW, /* 0x5a5c */ /* * Per-platform (Optional) - 8/128 numbers reserved. @@ -1781,6 +1786,23 @@ typedef enum { #define DDT_PRUNE_UNIT "ddt_prune_unit" #define DDT_PRUNE_AMOUNT "ddt_prune_amount" +/* + * The following names are used when invoking ZFS_IOC_POOL_RECYCLE. + */ +#define ZPOOL_RECYCLE_DRYRUN "dryrun" +#define ZPOOL_RECYCLE_CLIENTS "clients" + +/* + * The following are names used when invoking ZFS_IOC_POOL_EXPORT_NEW. + */ +#define ZPOOL_EXPORT_FORCE "force" +#define ZPOOL_EXPORT_HARDFORCE "hardforce" + +/* + * Name that is used to convey client information for shared log pools. + */ +#define ZPOOL_SHARED_LOG_CLIENTS "clients" + /* * Flags for ZFS_IOC_VDEV_SET_STATE */ diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 815b5d0c9cf1..f3af7ca080ae 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -37,14 +37,20 @@ extern "C" { #endif +typedef enum metaslab_type { + METASLAB_TYPE_NORMAL, + METASLAB_TYPE_VIRTUAL, +} metaslab_type_t; typedef struct metaslab_ops { const char *msop_name; uint64_t (*msop_alloc)(metaslab_t *, uint64_t); + metaslab_type_t msop_type; } metaslab_ops_t; extern const metaslab_ops_t zfs_metaslab_ops; +extern const metaslab_ops_t zfs_virtual_ops; int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t, metaslab_t **); diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 4f434291ddbf..8bc96d481d4c 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2011, 2019 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_IMPL_H @@ -179,7 +179,15 @@ typedef struct metaslab_class_allocator { struct metaslab_class { kmutex_t mc_lock; spa_t *mc_spa; - const metaslab_ops_t *mc_ops; + const metaslab_ops_t *mc_ops; + /* + * If this field is set, this is a "virtual" metaslab class. In + * actuality, the allocations will be done by the spa this is pointing + * to, using another pool for our storage. This enables the shared + * SLOG architecture. If this field is set, most of the other fields + * in this metaslab class are not used, and should be unset. + */ + struct spa *mc_virtual; /* * Track the number of metaslab groups that have been initialized diff --git a/include/sys/spa.h b/include/sys/spa.h index ca30b60c0af7..24097203be51 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -754,11 +754,11 @@ extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); -extern int spa_destroy(const char *pool); +extern int spa_destroy(const char *pool, nvlist_t *ounvl); extern int spa_checkpoint(const char *pool); extern int spa_checkpoint_discard(const char *pool); extern int spa_export(const char *pool, nvlist_t **oldconfig, boolean_t force, - boolean_t hardforce); + boolean_t hardforce, nvlist_t *outnvl); extern int spa_reset(const char *pool); extern void spa_async_request(spa_t *spa, int flag); extern void spa_async_unrequest(spa_t *spa, int flag); @@ -841,6 +841,8 @@ void spa_select_allocator(zio_t *zio); extern kmutex_t spa_namespace_lock; extern avl_tree_t spa_namespace_avl; extern kcondvar_t spa_namespace_cv; +extern avl_tree_t spa_shared_log_avl; +extern kmutex_t spa_shared_log_lock; /* * SPA configuration functions in spa_config.c @@ -1045,7 +1047,8 @@ extern void spa_altroot(spa_t *, char *, size_t); extern uint32_t spa_sync_pass(spa_t *spa); extern char *spa_name(spa_t *spa); extern uint64_t spa_guid(spa_t *spa); -extern uint64_t spa_load_guid(spa_t *spa); +extern uint64_t spa_const_guid(const spa_t *spa); +extern uint64_t spa_load_guid(const spa_t *spa); extern uint64_t spa_last_synced_txg(spa_t *spa); extern uint64_t spa_first_txg(spa_t *spa); extern uint64_t spa_syncing_txg(spa_t *spa); @@ -1140,7 +1143,8 @@ extern boolean_t spa_multihost(spa_t *spa); extern uint32_t spa_get_hostid(spa_t *spa); extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *); extern boolean_t spa_livelist_delete_check(spa_t *spa); - +extern boolean_t spa_is_shared_log(const spa_t *spa); +extern boolean_t spa_uses_shared_log(const spa_t *spa); extern boolean_t spa_mmp_remote_host_activity(spa_t *spa); extern spa_mode_t spa_mode(spa_t *spa); @@ -1238,6 +1242,17 @@ extern void spa_export_os(spa_t *spa); extern void spa_activate_os(spa_t *spa); extern void spa_deactivate_os(spa_t *spa); +extern void spa_zil_map_insert(spa_t *spa, objset_t *os, + const blkptr_t *prev_bp, blkptr_t *bp); +extern void spa_zil_map_set_final(spa_t *spa, objset_t *os, blkptr_t *bp); +extern void spa_zil_delete(spa_t *spa, objset_t *os); +extern void spa_zil_header_convert(spa_t *spa, objset_t *os, blkptr_t *bp); +extern void spa_zil_header_mask(spa_t *spa, blkptr_t *bp); +extern spa_t *spa_get_shared_log_pool(spa_t *spa); +extern int spa_recycle_all(spa_t *spa, boolean_t dryrun, nvlist_t *outnvl); +extern int spa_recycle_clients(spa_t *spa, nvlist_t *clients, + boolean_t dryrun, nvlist_t *outnvl); + /* module param call functions */ int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS); int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 7811abbb9ce3..8437d26c7845 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -218,6 +218,44 @@ typedef enum spa_config_source { SPA_CONFIG_SRC_MOS /* MOS, but not always from right txg */ } spa_config_source_t; +typedef enum spa_pool_type { + SPA_TYPE_NORMAL = 0, + SPA_TYPE_SHARED_LOG, +} spa_pool_type_t; + +typedef struct spa_zil_update_head { + avl_node_t szuh_avl; + list_t szuh_list; + uint64_t szuh_id; + blkptr_t szuh_chain_head; + boolean_t szuh_set; + // Only used for the special once-per-pool entry + boolean_t szuh_force; +} spa_zil_update_head_t; + +typedef struct spa_zil_update { + list_node_t szu_list; + blkptr_t szu_chain_head; +} spa_zil_update_t; + +typedef struct spa_zil_chain_map_value { + char szcmv_pool_name[ZFS_MAX_DATASET_NAME_LEN]; + blkptr_t szcmv_bp; +} spa_zil_chain_map_value_t; + +typedef struct spa_chain_map_os { + avl_node_t scmo_avl; + uint64_t scmo_id; + blkptr_t scmo_chain_head; +} spa_chain_map_os_t; + +typedef struct spa_chain_map_pool { + avl_node_t scmp_avl; + uint64_t scmp_guid; + char scmp_name[ZFS_MAX_DATASET_NAME_LEN]; + avl_tree_t scmp_os_tree; +} spa_chain_map_pool_t; + struct spa { /* * Fields protected by spa_namespace_lock. @@ -225,6 +263,9 @@ struct spa { char spa_name[ZFS_MAX_DATASET_NAME_LEN]; /* pool name */ char *spa_comment; /* comment */ avl_node_t spa_avl; /* node in spa_namespace_avl */ + avl_node_t spa_log_avl; /* node in spa_shared_log_avl */ + /* node in spa_registered_clients */ + list_node_t spa_client_node; nvlist_t *spa_config; /* last synced config */ nvlist_t *spa_config_syncing; /* currently syncing config */ nvlist_t *spa_config_splitting; /* config for splitting */ @@ -245,6 +286,13 @@ struct spa { boolean_t spa_is_initializing; /* true while opening pool */ boolean_t spa_is_exporting; /* true while exporting pool */ kthread_t *spa_export_thread; /* valid during pool export */ + /* true if pool's log device is shared log */ + boolean_t spa_uses_shared_log; + /* + * true if pool was imported with MISSING_LOGS and couldn't find + * its shared log pool + */ + boolean_t spa_discarding_shared_log; kthread_t *spa_load_thread; /* loading, no namespace lock */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ @@ -304,6 +352,7 @@ struct spa { boolean_t spa_extreme_rewind; /* rewind past deferred frees */ kmutex_t spa_scrub_lock; /* resilver/scrub lock */ uint64_t spa_scrub_inflight; /* in-flight scrub bytes */ + spa_pool_type_t spa_pool_type; /* in-flight verification bytes */ uint64_t spa_load_verify_bytes; @@ -479,6 +528,17 @@ struct spa { */ spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */ zfs_refcount_t spa_refcount; /* number of opens */ + + /* Only used if type is shared log */ + kmutex_t spa_chain_map_lock; + avl_tree_t spa_chain_map; + list_t spa_registered_clients; + + /* Only used during syncing context if using shared log */ + kmutex_t spa_zil_map_lock; + avl_tree_t spa_zil_map; + list_t spa_zil_deletes; + taskq_t *spa_chain_map_taskq; }; extern char *spa_config_path; @@ -497,6 +557,7 @@ extern void spa_set_deadman_ziotime(hrtime_t ns); extern const char *spa_history_zone(void); extern const char *zfs_active_allocator; extern int param_set_active_allocator_common(const char *val); +extern void spa_set_pool_type(spa_t *); #ifdef __cplusplus } diff --git a/include/sys/zil.h b/include/sys/zil.h index 259f2d03fc05..5398e4373e53 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2023 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -583,10 +583,15 @@ typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg, typedef int zil_replay_func_t(void *arg1, void *arg2, boolean_t byteswap); typedef int zil_get_data_t(void *arg, uint64_t arg2, lr_write_t *lr, char *dbuf, struct lwb *lwb, zio_t *zio); +typedef int zil_parse_raw_blk_func_t(spa_t *spa, const blkptr_t *bp, void *arg); +typedef int zil_parse_raw_lr_func_t(spa_t *spa, const lr_t *lr, void *arg); extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, boolean_t decrypt); +extern int zil_parse_raw(spa_t *spa, const blkptr_t *bp, + zil_parse_raw_blk_func_t *parse_func, + zil_parse_raw_lr_func_t *parse_lr_func, void *arg); extern void zil_init(void); extern void zil_fini(void); @@ -614,6 +619,8 @@ extern void zil_commit_impl(zilog_t *zilog, uint64_t oid); extern void zil_remove_async(zilog_t *zilog, uint64_t oid); extern int zil_reset(const char *osname, void *txarg); +extern int zil_clear(struct dsl_pool *dp, + struct dsl_dataset *ds, void *txarg); extern int zil_claim(struct dsl_pool *dp, struct dsl_dataset *ds, void *txarg); extern int zil_check_log_chain(struct dsl_pool *dp, @@ -640,6 +647,8 @@ extern void zil_sums_fini(zil_sums_t *zs); extern void zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums); +extern boolean_t zil_shared_log(zilog_t *zl); + extern int zil_replay_disable; #ifdef __cplusplus diff --git a/include/sys/zil_impl.h b/include/sys/zil_impl.h index 9a34bafc1c77..9908f273d2a5 100644 --- a/include/sys/zil_impl.h +++ b/include/sys/zil_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2023 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -190,6 +190,7 @@ struct zilog { kmutex_t zl_lock; /* protects most zilog_t fields */ struct dsl_pool *zl_dmu_pool; /* DSL pool */ spa_t *zl_spa; /* handle for read/write log */ + spa_t *zl_io_spa; /* handle for read/write log */ const zil_header_t *zl_header; /* log header buffer */ objset_t *zl_os; /* object set we're logging */ zil_get_data_t *zl_get_data; /* callback to get object content */ diff --git a/include/zfeature_common.h b/include/zfeature_common.h index ac42b5c0cd6b..e7caf770577d 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. @@ -86,6 +86,7 @@ typedef enum spa_feature { SPA_FEATURE_FAST_DEDUP, SPA_FEATURE_LONGNAME, SPA_FEATURE_LARGE_MICROZAP, + SPA_FEATURE_SHARED_LOG, SPA_FEATURES } spa_feature_t; diff --git a/lib/libnvpair/libnvpair.abi b/lib/libnvpair/libnvpair.abi index e3eacb195463..cd139b53f0b4 100644 --- a/lib/libnvpair/libnvpair.abi +++ b/lib/libnvpair/libnvpair.abi @@ -1,7 +1,5 @@ - - @@ -405,16 +403,6 @@ - - - - - - - - - - @@ -857,27 +845,16 @@ - - - - - - - - - - - @@ -933,259 +910,27 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1876,98 +1621,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2048,289 +1701,23 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + @@ -2679,83 +2066,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -2764,12 +2132,12 @@ - - + + @@ -2780,111 +2148,22 @@ - - + + - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + @@ -3124,6 +2403,11 @@ + + + + + @@ -3133,6 +2417,18 @@ + + + + + + + + + + + + @@ -3331,6 +2627,148 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -3378,57 +2816,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi index 7cb92ac9f3f8..6e3c62918b7b 100644 --- a/lib/libuutil/libuutil.abi +++ b/lib/libuutil/libuutil.abi @@ -1,6 +1,6 @@ - + @@ -246,28 +246,21 @@ - - - + + - - - - - - - - + + + + + - - - - + @@ -338,6 +331,11 @@ + + + + + @@ -358,6 +356,11 @@ + + + + + @@ -594,211 +597,17 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - - @@ -911,116 +720,31 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - + + + + + + + + @@ -1055,26 +779,6 @@ - - - - - - - - - - - - - - - - - - - - @@ -1127,28 +831,102 @@ + + - + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + + + + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - @@ -1157,55 +935,22 @@ - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + @@ -1222,21 +967,11 @@ - - - - - - - - - - @@ -1249,80 +984,17 @@ + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1342,37 +1014,14 @@ - - - - - - - - - - - - - - - - - - - - - - - @@ -1408,9 +1057,6 @@ - - - @@ -1422,6 +1068,7 @@ + @@ -1512,8 +1159,6 @@ - - @@ -1541,15 +1186,6 @@ - - - - - - - - - @@ -1604,14 +1240,8 @@ - - - - - - @@ -1620,96 +1250,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1838,10 +1378,11 @@ - - - - + + + + + @@ -2090,186 +1631,10 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - @@ -2277,13 +1642,11 @@ - - - - - + + + - + @@ -2293,22 +1656,6 @@ - - - - - - - - - - - - - - - - @@ -2326,12 +1673,40 @@ - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2346,6 +1721,11 @@ + + + + + @@ -2366,9 +1746,29 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index ac9ae233c72d..c8da6db474f1 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -2,14 +2,16 @@ - + - + + + @@ -513,6 +515,7 @@ + @@ -629,7 +632,7 @@ - + @@ -637,61 +640,192 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + - - + + + - - - - - + + + + + - - - - - - + + + + + - - - - - - - - - + + + - - - - + + + + - - - + + + + + + + + + + + + + + + + + + + + + @@ -736,35 +870,11 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - @@ -776,11 +886,6 @@ - - - - - @@ -791,41 +896,40 @@ - - - - - - - - - + + - - - - - + + + + + - - - - + + + + + + + + + + @@ -878,6 +982,11 @@ + + + + + @@ -898,6 +1007,11 @@ + + + + + @@ -1100,6 +1214,11 @@ + + + + + @@ -1123,194 +1242,23 @@ + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - - @@ -1421,407 +1369,493 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + - - - - - - - - - - - - - - - - - - - - - + + + - - - + + + - - + + - - + + - - - - - - - - - - - - - - - - - - - + + + + - + - - - - - + + - - + + - - - - - + + - - + + - - + + - - - + + - + - + - + - + + + + - + - + - + - - - - + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + - + - + - + - - - - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - + + + - - + + + + - - - + + + + + - - - - + + + + - - - - - + + + + + + + - - - + + + + + + + - - + + + + + - - + + + + - - - - - - + + + + - - - - - + + + + + + + + + + + + - - - - - - + + + + - - + + - - + + + + + - - + + - - + + - - + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + - - + + - - + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1838,7 +1872,6 @@ - @@ -1846,28 +1879,16 @@ - - - - - - - - - - - - - + @@ -1944,124 +1965,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2080,23 +1983,15 @@ - - - - - - - - @@ -2335,6072 +2230,2026 @@ + - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + - - - - - + + + - - - + + + + - - - + + + - - - - - - - - - + + + + - - - - + + + + + - - - - + + - - + + - - - - - - - + + + - - - - - - - - - - - - - - + + + + + + + + + + - - - - + + + + + + + + - - - - - - - - - + + + + + - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + - - - - + + - - - - - - - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - + + - - + + - - + + - - + + - - + + + + + - - + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - - - - + + + - - + + - - - - - + + + - - - - - + + + - - - + + - - - - - - - - - - - - + + + + + - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - + - + - - - + - - - - + + + + - - + + + + - - - - + + + + + - - - + + + - - - - + + + - - - + + + + + + + + + + - - - - + + + + + - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - + + + + - - - - - - + + + + + - - - - + + + + + + - - - - - - - - - - - - - - + + - - - - - - - + + + + + + + + + + - - - + + + - - - - - + + + + + + + - - - - + + + + - - - + + + + + + - - - - + + + + - - - - + + + + + + - - - - - + + + - - - - - - - - + + + - - - - + + + - - - - - - - + + + - - - + + + + - - - + + + + + - - + + - - - + + - + + - - - + + + - - - + + + + - - + + + + - - - + + + + - - - + + - + - - - - - - - - - - - - - - - - - - + + - - - - - - - - + - - - - - - - - + + + - - - - - - + + + + - - - + + + + - - - - - + + + - - - - - - - + + + - - - - - - - + + + - - - + + + + + - - - - - - + + + + - - - + + + + + - - - + + + + + - - - - - - - - - - - - - - - + + + + - - - - - - - - - + + + + + + - - - - + + + + + - - - - - + + + + + + - - - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + - + - + + - - - - - - - - - - - - + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + - - + + - - + + - - - - - - - - - - - - + + - + - - + + - - - - + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - + + + + + - - - - - - - - - + + + + - - - - - - - - - - - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - + + + - - - - - - + + + - - - - - - - - - - + + + + + + + - - - + + + + + + - - - - - - - - - - - - - - + + + + - - - + + + + + - - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + - - - - - + + + + - - - + + + + - - + + - - - + + + - - - + + - - - - - - - - - - - - + + + - - - + + + + + + - - - - + + + - - + + - - - - - + + - - - - - - - + + + - - - - + + + + - - - - - + + + + - - - - + + + - - - - - - + + + + + + + + - + - - + + + + - + + + + + + + - - - + + + + - - - - - + + + + - - - - - - - - - + + + + - - - - - - + + + + + - - - - - - - + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + - - - - + + + - - + + + + + - + + + + - - + + + + + - - - + + + + + + + + - - - + + + + - - + + + + + + + - - + + + + - - + + + - - + + + + + - - + + + + + - - + + + + - - + + + + - - + + + - - - - - - + + + - - - + + + - - - - - + + + + + + + + + + + + + + + - - - - + + + + - - - - - + + + + + + - - - - - + + + + - - - - + + + + + - - + + + + + + + - - + + + - - - - + + + + - - + + + + + + - - - + + + + + + + + + + + - - + + + + + - - - - + + + + - - - - + + + + - - - + + + + + + - - - - + + + + + + + + + - - - - + + + + + + + + + + - - - - + + + + - - - + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + - - - + + + + + - - - + + + + + - - - - + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + - - - - - + + + + + + + - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -8413,6 +4262,12 @@ + + + + + + @@ -8445,6 +4300,10 @@ + + + + @@ -8452,6 +4311,18 @@ + + + + + + + + + + + + @@ -8493,6 +4364,12 @@ + + + + + + @@ -8524,6 +4401,13 @@ + + + + + + + @@ -8561,25 +4445,14 @@ + + + + + - - - - - - - - - - - - - - - - @@ -8599,379 +4472,330 @@ + + + + + + + + + - - + + + + + + + + + + - - + + - - + + - - + + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - + + - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - + + - - + + + + - - - + + + + + + - - - + + + - - + + - - - - - + + + - - - + + + - - + + + - - - + + + + + + + + + + + + + + + + + + @@ -8984,6 +4808,11 @@ + + + + + @@ -8994,50 +4823,50 @@ - - - - - - - - - - - + + - - - - - - - - - + + + + + + + - - - + + + + + + + + + + + + + + + + + - - - - + @@ -9068,6 +4897,9 @@ + + + @@ -9094,88 +4926,19 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + @@ -9188,19 +4951,8 @@ - - - - - - - - - - - @@ -9209,15 +4961,6 @@ - - - - - - - - - @@ -9229,6 +4972,12 @@ + + + + + + @@ -9241,6 +4990,12 @@ + + + + + + @@ -9284,18 +5039,20 @@ - - - - - - - + + + + + + + + + @@ -9307,6 +5064,19 @@ + + + + + + + + + + + + + @@ -9317,6 +5087,18 @@ + + + + + + + + + + + + @@ -9324,6 +5106,16 @@ + + + + + + + + + + @@ -9344,10 +5136,30 @@ + + + + + + + + + + + + + + + + + + + + @@ -9374,9 +5186,60 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -9419,24 +5282,6 @@ - - - - - - - - - - - - - - - - - - @@ -9449,35 +5294,26 @@ - - - - + + - - - - - - - - - + + + - - - - - + + + - - - - + + + + - + - + + @@ -9490,23 +5326,32 @@ + + + - - - - - - + + + + + + + + + + + + + + + + + - - - - - @@ -9516,6 +5361,11 @@ + + + + + @@ -9530,9 +5380,6 @@ - - - @@ -9598,16 +5445,6 @@ - - - - - - - - - - @@ -9625,6 +5462,9 @@ + + + @@ -9640,6 +5480,12 @@ + + + + + + @@ -9720,21 +5566,12 @@ - - + - - - - - - - - @@ -9776,6 +5613,12 @@ + + + + + + @@ -9783,6 +5626,24 @@ + + + + + + + + + + + + + + + + + + @@ -9868,6 +5729,24 @@ + + + + + + + + + + + + + + + + + + @@ -9879,6 +5758,22 @@ + + + + + + + + + + + + + + + + @@ -9897,100 +5792,124 @@ + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - + - - - - - - - - - - - + + + - - - - - - - - - - + + + - - - - - - + + + - - - - - + + + - - - + + + @@ -9998,15 +5917,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -10069,12 +6037,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -10095,12 +6111,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -10120,8 +6177,122 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 231bbbd92dbf..44161f279129 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2019 Joyent, Inc. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . * Copyright (c) 2013 Martin Matuska. All rights reserved. @@ -3869,6 +3869,10 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property value(s) specified")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + case EINVAL: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot create filesystem in shared log pool")); + return (zfs_error(hdl, EZFS_SHAREDLOG, errbuf)); #ifdef _ILP32 case EOVERFLOW: /* diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 14410b153130..bb6d5b0d14dd 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -246,6 +246,39 @@ zpool_pool_state_to_name(pool_state_t state) return (gettext("UNKNOWN")); } +struct shared_log_cbdata { + uint64_t guid; + zpool_handle_t *shared_log_pool; +}; + +static int +shared_log_cb(zpool_handle_t *hdl, void *arg) +{ + struct shared_log_cbdata *data = arg; + if (fnvlist_lookup_uint64(hdl->zpool_config, ZPOOL_CONFIG_POOL_GUID) == + data->guid) { + data->shared_log_pool = hdl; + } + return (0); +} + +zpool_handle_t * +zpool_get_shared_log(zpool_handle_t *zhp) +{ + uint64_t guid; + if (nvlist_lookup_uint64(zhp->zpool_config, + ZPOOL_CONFIG_SHARED_LOG_POOL, &guid) != 0) { + return (NULL); + } + struct shared_log_cbdata data; + data.guid = guid; + int err = zpool_iter(zhp->zpool_hdl, shared_log_cb, &data); + if (err != 0) { + return (NULL); + } + return (data.shared_log_pool); +} + /* * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED", * "SUSPENDED", etc). @@ -272,6 +305,10 @@ zpool_get_state_str(zpool_handle_t *zhp) vdev_stat_t *vs = (vdev_stat_t *)fnvlist_lookup_uint64_array( nvroot, ZPOOL_CONFIG_VDEV_STATS, &vsc); str = zpool_state_to_name(vs->vs_state, vs->vs_aux); + zpool_handle_t *shared_log = zpool_get_shared_log(zhp); + if (vs->vs_state == VDEV_STATE_HEALTHY && shared_log != NULL) { + str = zpool_get_state_str(shared_log); + } } return (str); } @@ -1688,23 +1725,48 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str) libzfs_handle_t *hdl = zhp->zpool_hdl; char errbuf[ERRBUFLEN]; - if (zhp->zpool_state == POOL_STATE_ACTIVE && - (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL) - return (-1); + nvlist_t *outnvl; + int err = lzc_pool_destroy(zhp->zpool_name, log_str, &outnvl); + if (err == ZFS_ERR_IOC_CMD_UNAVAIL) { + if (zhp->zpool_state == POOL_STATE_ACTIVE && + (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) + == NULL) + return (-1); - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_history = (uint64_t)(uintptr_t)log_str; + (void) strlcpy(zc.zc_name, zhp->zpool_name, + sizeof (zc.zc_name)); + zc.zc_history = (uint64_t)(uintptr_t)log_str; + if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) + err = errno; + else + err = 0; + } - if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { + if (err != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), zhp->zpool_name); - if (errno == EROFS) { + if (err == EROFS) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "one or more devices is read only")); (void) zfs_error(hdl, EZFS_BADDEV, errbuf); + } else if (err == EBUSY && outnvl != NULL) { + nvlist_t *clients = fnvlist_lookup_nvlist(outnvl, + ZPOOL_SHARED_LOG_CLIENTS); + nvpair_t *elem = nvlist_next_nvpair(clients, NULL); + char buf[ERRBUFLEN]; + int idx = snprintf(buf, ERRBUFLEN, "%s", + nvpair_name(elem)); + while ((elem = nvlist_next_nvpair(clients, elem)) + != NULL && idx < ERRBUFLEN) { + idx += snprintf(buf + idx, ERRBUFLEN - idx, + ", %s", nvpair_name(elem)); + } + zfs_error_aux(hdl, "pool has active clients: %s", buf); + (void) zfs_error(hdl, EZFS_BUSY, errbuf); + fnvlist_free(outnvl); } else { - (void) zpool_standard_error(hdl, errno, errbuf); + (void) zpool_standard_error(hdl, err, errbuf); } if (zfp) @@ -1904,27 +1966,52 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce, { zfs_cmd_t zc = {"\0"}; - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_cookie = force; - zc.zc_guid = hardforce; - zc.zc_history = (uint64_t)(uintptr_t)log_str; + nvlist_t *outnvl; + int err = lzc_pool_export(zhp->zpool_name, log_str, force, hardforce, + &outnvl); + if (err == ZFS_ERR_IOC_CMD_UNAVAIL) { - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) { - switch (errno) { - case EXDEV: - zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, - "use '-f' to override the following errors:\n" - "'%s' has an active shared spare which could be" - " used by other pools once '%s' is exported."), - zhp->zpool_name, zhp->zpool_name); - return (zfs_error_fmt(zhp->zpool_hdl, EZFS_ACTIVE_SPARE, - dgettext(TEXT_DOMAIN, "cannot export '%s'"), - zhp->zpool_name)); - default: - return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, - dgettext(TEXT_DOMAIN, "cannot export '%s'"), - zhp->zpool_name)); + (void) strlcpy(zc.zc_name, zhp->zpool_name, + sizeof (zc.zc_name)); + zc.zc_cookie = force; + zc.zc_guid = hardforce; + zc.zc_history = (uint64_t)(uintptr_t)log_str; + + if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) + err = errno; + else + err = 0; + } + + if (err == EXDEV) { + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "use '-f' to override the following errors:\n" + "'%s' has an active shared spare which could be" + " used by other pools once '%s' is exported."), + zhp->zpool_name, zhp->zpool_name); + return (zfs_error_fmt(zhp->zpool_hdl, EZFS_ACTIVE_SPARE, + dgettext(TEXT_DOMAIN, "cannot export '%s'"), + zhp->zpool_name)); + } else if (err == EBUSY && outnvl != NULL) { + libzfs_handle_t *hdl = zhp->zpool_hdl; + nvlist_t *clients = fnvlist_lookup_nvlist(outnvl, + ZPOOL_SHARED_LOG_CLIENTS); + nvpair_t *elem = nvlist_next_nvpair(clients, NULL); + char buf[ERRBUFLEN]; + int idx = snprintf(buf, ERRBUFLEN, "%s", nvpair_name(elem)); + while ((elem = nvlist_next_nvpair(clients, elem)) != NULL && + idx < ERRBUFLEN) { + idx += snprintf(buf + idx, ERRBUFLEN - idx, ", %s", + nvpair_name(elem)); } + fnvlist_free(outnvl); + zfs_error_aux(hdl, "pool has active clients: %s", buf); + return (zfs_error_fmt(hdl, EZFS_BUSY, dgettext(TEXT_DOMAIN, + "cannot export '%s'"), zhp->zpool_name)); + } else if (err != 0) { + return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot export '%s'"), + zhp->zpool_name)); } return (0); @@ -2365,6 +2452,11 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, "the maximum allowable length")); (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc); break; + case ESRCH: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "shared log pool no longer contains this client")); + (void) zfs_error(hdl, EZFS_NOENT, desc); + break; default: (void) zpool_standard_error(hdl, error, desc); memset(buf, 0, 2048); diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c index a2259eee91ca..802c488ee3e3 100644 --- a/lib/libzfs/libzfs_status.c +++ b/lib/libzfs/libzfs_status.c @@ -510,6 +510,10 @@ zpool_get_status(zpool_handle_t *zhp, const char **msgid, zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata, compatibility); + if (ret == ZPOOL_STATUS_OK && zpool_get_shared_log(zhp)) { + ret = check_status(zpool_get_shared_log(zhp)->zpool_config, + B_FALSE, errata, compatibility); + } if (msgid != NULL) { if (ret >= NMSGID) diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 6a9c20a2bb88..fd05c9237e07 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -1,7 +1,7 @@ - + @@ -180,6 +180,8 @@ + + @@ -188,6 +190,7 @@ + @@ -224,58 +227,34 @@ - - - - - - - - - - - - - - - - - - - - - + + + - - - - - - - - - - - - - - - + + + + + - - - - + + + + + + + + @@ -338,6 +317,11 @@ + + + + + @@ -358,6 +342,11 @@ + + + + + @@ -595,208 +584,17 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - @@ -817,6 +615,7 @@ + @@ -907,88 +706,32 @@ + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + - - - + + + + + @@ -1023,26 +766,6 @@ - - - - - - - - - - - - - - - - - - - - @@ -1090,77 +813,127 @@ + + + + + + + - + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + + + + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1181,21 +954,11 @@ - - - - - - - - - - @@ -1208,79 +971,17 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + - - - - - - - - - - - + + + + @@ -1299,44 +1000,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1365,8 +1033,6 @@ - - @@ -1469,33 +1135,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1618,6 +1257,9 @@ + + + @@ -1665,60 +1307,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2111,707 +1699,35 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + @@ -3202,21 +2118,267 @@ + + + + + + + + + + + + + + + + + + + + + - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index d07fca6cebad..332f01ead5ea 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2012, 2020 by Delphix. All rights reserved. + * Copyright (c) 2012, 2023 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. @@ -1949,3 +1949,39 @@ lzc_ddt_prune(const char *pool, zpool_ddt_prune_unit_t unit, uint64_t amount) return (error); } + +int +lzc_recycle(const char *pool, nvlist_t *clients, boolean_t dryrun, + nvlist_t **outnvl) +{ + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_boolean_value(args, ZPOOL_RECYCLE_DRYRUN, dryrun); + if (clients != NULL) + fnvlist_add_nvlist(args, ZPOOL_RECYCLE_CLIENTS, clients); + int err = lzc_ioctl(ZFS_IOC_POOL_RECYCLE, pool, args, outnvl); + fnvlist_free(args); + return (err); +} + +int +lzc_pool_destroy(const char *pool, const char *log_str, nvlist_t **outnvl) +{ + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, ZPOOL_HIST_CMD, log_str); + int err = lzc_ioctl(ZFS_IOC_POOL_DESTROY_NEW, pool, args, outnvl); + fnvlist_free(args); + return (err); +} + +int +lzc_pool_export(const char *pool, const char *log_str, boolean_t force, + boolean_t hardforce, nvlist_t **outnvl) +{ + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, ZPOOL_HIST_CMD, log_str); + fnvlist_add_boolean_value(args, ZPOOL_EXPORT_FORCE, force); + fnvlist_add_boolean_value(args, ZPOOL_EXPORT_HARDFORCE, hardforce); + int err = lzc_ioctl(ZFS_IOC_POOL_EXPORT_NEW, pool, args, outnvl); + fnvlist_free(args); + return (err); +} diff --git a/lib/libzfsbootenv/libzfsbootenv.abi b/lib/libzfsbootenv/libzfsbootenv.abi index 5903d5dcbe21..458486a2c571 100644 --- a/lib/libzfsbootenv/libzfsbootenv.abi +++ b/lib/libzfsbootenv/libzfsbootenv.abi @@ -1,6 +1,6 @@ - + @@ -16,6 +16,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -25,55 +86,9 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -167,413 +182,15 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 77fa0ce38b2a..225ca973a6e6 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -21,7 +21,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2024 by Delphix. All rights reserved. * Copyright 2015 RackTop Systems. * Copyright (c) 2016, Intel Corporation. * Copyright (c) 2021, Colm Buckley @@ -635,6 +635,18 @@ get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok, ZPOOL_CONFIG_HOSTNAME, hostname); } + if (nvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_SHARED_LOG_POOL, &guid) == 0) { + fnvlist_add_uint64(config, + ZPOOL_CONFIG_SHARED_LOG_POOL, guid); + } + + if (fnvlist_lookup_boolean(tmp, + ZPOOL_CONFIG_IS_SHARED_LOG)) { + fnvlist_add_boolean(config, + ZPOOL_CONFIG_IS_SHARED_LOG); + } + config_seen = B_TRUE; } @@ -1526,6 +1538,11 @@ zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg, iarg->guid == this_guid; } if (matched) { + if (iarg->shared_log_guid) { + fnvlist_add_uint64(config, + ZPOOL_CONFIG_SHARED_LOG_POOL, + iarg->shared_log_guid); + } /* * Verify all remaining entries can be opened * exclusively. This will prune all underlying diff --git a/man/man7/zpoolconcepts.7 b/man/man7/zpoolconcepts.7 index 18dfca6dc8ac..9007be8d0798 100644 --- a/man/man7/zpoolconcepts.7 +++ b/man/man7/zpoolconcepts.7 @@ -19,7 +19,7 @@ .\" CDDL HEADER END .\" .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. +.\" Copyright (c) 2012, 2023 by Delphix. All rights reserved. .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. .\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2018 George Melikov. All Rights Reserved. @@ -178,6 +178,13 @@ However, raidz vdev types are not supported for the intent log. For more information, see the .Sx Intent Log section. +.It Sy shared log +A separate ZFS storage pool used as a shared intent log device. +Only one shared log can be specified at pool creation or import, and a normal +log device cannot also be specified. +For more information, see the +.Sx Intent Log +section. .It Sy dedup A device solely dedicated for deduplication tables. The redundancy of this device should match the redundancy of the other normal @@ -395,6 +402,9 @@ In addition, log devices are imported and exported as part of the pool that contains them. Mirrored devices can be removed by specifying the top-level mirror vdev. . +A shared log pool can be used as a log device; this pool can be used by +several different "client" pools to provide easier management of space +to be used for the intent log. .Ss Cache Devices Devices can be added to a storage pool as .Qq cache devices . diff --git a/man/man8/zpool-create.8 b/man/man8/zpool-create.8 index 8449520944fb..e2cd6653c4be 100644 --- a/man/man8/zpool-create.8 +++ b/man/man8/zpool-create.8 @@ -19,7 +19,7 @@ .\" CDDL HEADER END .\" .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. +.\" Copyright (c) 2012, 2023 by Delphix. All rights reserved. .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. .\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2018 George Melikov. All Rights Reserved. @@ -37,8 +37,9 @@ .Sh SYNOPSIS .Nm zpool .Cm create -.Op Fl dfn +.Op Fl dfnL .Op Fl m Ar mountpoint +.Op Fl l Ar pool .Oo Fl o Ar property Ns = Ns Ar value Oc Ns … .Oo Fl o Sy feature@ Ns Ar feature Ns = Ns Ar value Oc .Op Fl o Ar compatibility Ns = Ns Sy off Ns | Ns Sy legacy Ns | Ns Ar file Ns Oo , Ns Ar file Oc Ns … @@ -167,6 +168,18 @@ Displays the configuration that would be used without actually creating the pool. The actual pool creation can still fail due to insufficient privileges or device sharing. +.It Fl L +Create the pool as a shared log pool. +Shared log pools cannot have filesystems or ZVOLs created in them, but they +can be used as a virtual log device by several other pools, allowing more +efficient use of physical log devices. +Only one shared log pool can be imported on the system at a given +time. +.It Fl l Ar pool +Create the pool using the provided pool as the log device. +The provided pool must be a shared log pool (created using the +.Fl L +flag), and no other log devices can be specified as part of the vdev tree. .It Fl o Ar property Ns = Ns Ar value Sets the given pool properties. See diff --git a/man/man8/zpool-import.8 b/man/man8/zpool-import.8 index dab6e1f55771..a794edfbc47b 100644 --- a/man/man8/zpool-import.8 +++ b/man/man8/zpool-import.8 @@ -19,7 +19,7 @@ .\" CDDL HEADER END .\" .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. -.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved. +.\" Copyright (c) 2012, 2023 by Delphix. All rights reserved. .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved. .\" Copyright (c) 2017 Datto Inc. .\" Copyright (c) 2018 George Melikov. All Rights Reserved. @@ -50,8 +50,9 @@ .Op Fl R Ar root .Nm zpool .Cm import -.Op Fl Dflmt +.Op Fl Dflt .Op Fl F Op Fl nTX +.Op Fl m Op Fl L Ar pool .Op Fl -rewind-to-checkpoint .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns Ar device .Op Fl o Ar mntopts @@ -256,8 +257,9 @@ health of your pool and should only be used as a last resort. .It Xo .Nm zpool .Cm import -.Op Fl Dflmt +.Op Fl Dflt .Op Fl F Op Fl nTX +.Op Fl m Op Fl L Ar pool .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir Ns | Ns Ar device .Op Fl o Ar mntopts .Oo Fl o Ar property Ns = Ns Ar value Oc Ns … @@ -330,6 +332,12 @@ encrypted datasets will be left unavailable until the keys are loaded. .It Fl m Allows a pool to import when there is a missing log device. Recent transactions can be lost because the log device will be discarded. +.It Fl L +Causes the pool to switch to using the specified shared log pool when +imported. +Requires the +.Fl m +flag. .It Fl n Used with the .Fl F diff --git a/man/man8/zpool-recycle.8 b/man/man8/zpool-recycle.8 new file mode 100644 index 000000000000..33fcefc16753 --- /dev/null +++ b/man/man8/zpool-recycle.8 @@ -0,0 +1,70 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or https://opensource.org/licenses/CDDL-1.0. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Copyright (c) 2023 by Delphix. All rights reserved. +.\" +.Dd February 13, 2023 +.Dt ZPOOL-RECYCLE 8 +.Os +. +.Sh NAME +.Nm zpool-recycle +.Nd recycle space used by shared log ZIL chains +.Sh SYNOPSIS +.Nm zpool +.Cm recycle +.Op Fl n +.Op Ar pool +. +.Sh DESCRIPTION +Client pools (from +.Nm zpool +.Cm create +.Fl l +) store their ZIL logs on the shared log pool (from +.Nm zpool +.Cm create +.Fl L +). +When a client pool is deleted with +.Nm zpool +.Cm destroy +, any space used in the shared log pool is reclaimed. +However, if a client pool is exported and will not be imported again +(e.g. because the disks were damaged or removed), any space associated with +it in the shared log pool remains allocated. +.Nm zpool +.Cm recycle +will reclaim space in the shared log pool that is referenced by any +non-imported client pools. +These non-imported client pools will have their logs deleted; if they are +subsequently imported, +.Nm zpool +.Cm import +.Fl m +will need to be used to ignore the missing logs, discarding any recent +transactions. +. +.Sh OPTIONS +.Bl -tag -width "-n" +.It Fl n +Do not actually perform any deletions, just print out the list of pool +GUIDs that would be affected. +.El diff --git a/module/os/freebsd/zfs/spa_os.c b/module/os/freebsd/zfs/spa_os.c index 1b9f1a4ec9dc..3bc2f271f32e 100644 --- a/module/os/freebsd/zfs/spa_os.c +++ b/module/os/freebsd/zfs/spa_os.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2013 Martin Matuska . All rights reserved. */ @@ -246,6 +246,8 @@ spa_import_rootpool(const char *name, bool checkpointrewind) spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, VDEV_ALLOC_ROOTPOOL); + if (error == 0) + spa_set_pool_type(spa); spa_config_exit(spa, SCL_ALL, FTAG); if (error) { mutex_exit(&spa_namespace_lock); diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 96f0086d7858..75d5b32371ff 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved. @@ -760,6 +760,11 @@ zpool_feature_init(void) ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); + zfeature_register(SPA_FEATURE_SHARED_LOG, + "com.delphix:shared_log", "shared_log", + "Support for shared log pools.", 0, ZFEATURE_TYPE_BOOLEAN, NULL, + sfeatures); + { static const spa_feature_t longname_deps[] = { SPA_FEATURE_EXTENSIBLE_DATASET, diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 8f00e6577bc5..8e08e4e19f58 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2020 by Delphix. All rights reserved. + * Copyright (c) 2012, 2023 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. @@ -664,6 +664,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, if (ds == NULL || !ds->ds_is_snapshot) os->os_zil_header = os->os_phys->os_zil_header; + if (spa_uses_shared_log(spa)) + spa_zil_header_convert(spa, os, &os->os_zil_header.zh_log); os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { @@ -1198,6 +1200,9 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx) const char *tail; int error; + if (spa_is_shared_log(dp->dp_spa)) + return (SET_ERROR(EINVAL)); + if (strchr(doca->doca_name, '@') != NULL) return (SET_ERROR(EINVAL)); @@ -1726,6 +1731,10 @@ sync_meta_dnode_task(void *arg) */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; + if (os->os_spa->spa_uses_shared_log) { + spa_zil_header_mask(os->os_spa, + &os->os_phys->os_zil_header.zh_log); + } zio_nowait(soa->soa_zio); mutex_destroy(&soa->soa_mutex); diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 3fdcebdff918..01c8228f010f 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, 2023 by Delphix. All rights reserved. * Copyright (c) 2024, Klara, Inc. */ @@ -76,6 +76,14 @@ dmu_tx_create_dd(dsl_dir_t *dd) return (tx); } +dmu_tx_t * +dmu_tx_create_mos(dsl_pool_t *dp) +{ + dmu_tx_t *tx = dmu_tx_create_dd(dp->dp_mos_dir); + tx->tx_objset = dp->dp_meta_objset; + return (tx); +} + dmu_tx_t * dmu_tx_create(objset_t *os) { diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index b2b925b135f7..18fd6911b9cb 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -726,7 +726,8 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, * It's a block in the intent log. It has no * accounting, so just free it. */ - dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); + if (!zil_shared_log(zilog)) + dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); } else { ASSERT(zilog == NULL); ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >, diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 1b60fa620b8d..b7b4fbe86eb4 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2022 by Delphix. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright (c) 2014 Joyent, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. @@ -1304,7 +1304,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, * refreservation values. Also, if checkrefquota is set, test if * allocating this space would exceed the dataset's refquota. */ - if (first && tx->tx_objset) { + if (first && tx->tx_objset && tx->tx_objset->os_dsl_dataset) { int error; dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset; diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 5ae96882935c..b6d48ac5dbec 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2016 Nexenta Systems, Inc. All rights reserved. @@ -170,6 +170,9 @@ static int zfs_zil_clean_taskq_nthr_pct = 100; static int zfs_zil_clean_taskq_minalloc = 1024; static int zfs_zil_clean_taskq_maxalloc = 1024 * 1024; +static unsigned int chain_map_zap_default_bs = 17; +static unsigned int chain_map_zap_default_ibs = 15; + int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) { @@ -360,6 +363,14 @@ dsl_pool_open(dsl_pool_t *dp) if (err) goto out; + if (spa_is_shared_log(dp->dp_spa)) { + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_CHAIN_MAP_OBJ, sizeof (uint64_t), 1, + &dp->dp_chain_map_obj); + if (err != 0) + goto out; + } + err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); out: @@ -548,6 +559,17 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops __attribute__((unused)), #endif dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); + if (spa_is_shared_log(spa)) { + dp->dp_chain_map_obj = zap_create_flags(dp->dp_meta_objset, 0, + ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY | + ZAP_FLAG_PRE_HASHED_KEY, DMU_OTN_ZAP_METADATA, + chain_map_zap_default_bs, chain_map_zap_default_ibs, + DMU_OT_NONE, 0, tx); + VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_CHAIN_MAP_OBJ, sizeof (uint64_t), 1, + &dp->dp_chain_map_obj, tx)); + } + dmu_tx_commit(tx); rrw_exit(&dp->dp_config_rwlock, FTAG); diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 3bd6e93e93a4..c446183c7896 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2019 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. @@ -1660,6 +1660,12 @@ static metaslab_ops_t metaslab_allocators[] = { { "new-dynamic", metaslab_ndf_alloc }, }; +const metaslab_ops_t zfs_virtual_ops = { + "virtual", + NULL, + METASLAB_TYPE_VIRTUAL +}; + static int spa_find_allocator_byname(const char *val) { @@ -2866,6 +2872,10 @@ metaslab_fini(metaslab_t *msp) range_tree_destroy(msp->ms_unflushed_frees); for (int t = 0; t < TXG_SIZE; t++) { + if (spa_is_shared_log(spa) && + spa_load_state(spa) != SPA_LOAD_NONE) { + range_tree_vacate(msp->ms_allocating[t], NULL, NULL); + } range_tree_destroy(msp->ms_allocating[t]); } for (int t = 0; t < TXG_DEFER_SIZE; t++) { @@ -5845,6 +5855,19 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, int ndvas, uint64_t txg, blkptr_t *hintbp, int flags, zio_alloc_list_t *zal, zio_t *zio, int allocator) { + if (mc->mc_ops->msop_type == METASLAB_TYPE_VIRTUAL) { + ASSERT3P(mc->mc_virtual, !=, NULL); + spa_t *target_spa = mc->mc_virtual; + dmu_tx_t *tx = dmu_tx_create_mos(target_spa->spa_dsl_pool); + VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE)); + uint64_t target_txg = dmu_tx_get_txg(tx); + int ret = metaslab_alloc(target_spa, + spa_normal_class(target_spa), psize, bp, ndvas, target_txg, + hintbp, flags, zal, zio, allocator); + dmu_tx_commit(tx); + return (ret); + } + dva_t *dva = bp->blk_dva; dva_t *hintdva = (hintbp != NULL) ? hintbp->blk_dva : NULL; int error = 0; @@ -5861,7 +5884,7 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, } ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa)); - ASSERT(BP_GET_NDVAS(bp) == 0); + ASSERT0(BP_GET_NDVAS(bp)); ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp)); ASSERT3P(zal, !=, NULL); @@ -5887,8 +5910,8 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, DVA_GET_VDEV(&dva[d]), zio, flags, allocator); } } - ASSERT(error == 0); - ASSERT(BP_GET_NDVAS(bp) == ndvas); + ASSERT0(error); + ASSERT3U(BP_GET_NDVAS(bp), ==, ndvas); spa_config_exit(spa, SCL_ALLOC, FTAG); diff --git a/module/zfs/range_tree.c b/module/zfs/range_tree.c index 5174e2c46633..ce91cb00af9a 100644 --- a/module/zfs/range_tree.c +++ b/module/zfs/range_tree.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2013, 2019 by Delphix. All rights reserved. + * Copyright (c) 2013, 2023 by Delphix. All rights reserved. * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. */ @@ -618,7 +618,7 @@ range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size) { range_seg_t *rs = range_tree_find(rt, off, size); if (rs != NULL) - panic("segment already in tree; rs=%p", (void *)rs); + panic("segment already in tree; rt=%px rs=%px", rt, (void *)rs); } boolean_t diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 6b8c7ee93daa..163a9c8ebdea 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -152,6 +152,11 @@ typedef struct zio_taskq_info { uint_t zti_count; } zio_taskq_info_t; +typedef struct zil_delete_entry { + list_node_t zde_node; + uint64_t zde_guid; +} zil_delete_entry_t; + static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { "iss", "iss_h", "int", "int_h" }; @@ -196,6 +201,9 @@ static boolean_t spa_has_active_shared_spare(spa_t *spa); static int spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport); static void spa_vdev_resilver_done(spa_t *spa); +static void spa_chain_map_update(spa_t *spa); +static void spa_cleanup_pool(spa_t *client); +void spa_zil_delete_impl(spa_t *spa, uint64_t id); /* * Percentage of all CPUs that can be used by the metaslab preload taskq. @@ -1039,6 +1047,9 @@ spa_change_guid(spa_t *spa, const uint64_t *guidp) uint64_t guid; int error; + if (spa_is_shared_log(spa) || spa_uses_shared_log(spa)) + return (ENOTSUP); + mutex_enter(&spa->spa_vdev_top_lock); mutex_enter(&spa_namespace_lock); @@ -1087,6 +1098,33 @@ spa_change_guid(spa_t *spa, const uint64_t *guidp) * ========================================================================== */ +static int +spa_chain_map_os_compare(const void *a, const void *b) +{ + const spa_chain_map_os_t *ca = (const spa_chain_map_os_t *)a; + const spa_chain_map_os_t *cb = (const spa_chain_map_os_t *)b; + + return (TREE_CMP(ca->scmo_id, cb->scmo_id)); +} + +static int +spa_chain_map_pool_compare(const void *a, const void *b) +{ + const spa_chain_map_pool_t *ca = (const spa_chain_map_pool_t *)a; + const spa_chain_map_pool_t *cb = (const spa_chain_map_pool_t *)b; + + return (TREE_CMP(ca->scmp_guid, cb->scmp_guid)); +} + +static int +spa_zil_update_head_compare(const void *a, const void *b) +{ + const spa_zil_update_head_t *ca = (const spa_zil_update_head_t *)a; + const spa_zil_update_head_t *cb = (const spa_zil_update_head_t *)b; + + return (TREE_CMP(ca->szuh_id, cb->szuh_id)); +} + static int spa_error_entry_compare(const void *a, const void *b) { @@ -1669,23 +1707,95 @@ spa_thread(void *arg) } #endif -extern metaslab_ops_t *metaslab_allocator(spa_t *spa); +/* + * Returns with the spa_chain_map_lock held. This prevents the shared log + * pool from being exported or deleted while a pool is being activated that + * depends on it. + */ +static int +get_shared_log_pool(nvlist_t *config, spa_t **out) +{ + if (config == 0) + return (0); + uint64_t guid; + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SHARED_LOG_POOL, &guid)) + return (0); + spa_t *search = kmem_zalloc(sizeof (spa_t), KM_SLEEP); + search->spa_config_guid = guid; + mutex_enter(&spa_shared_log_lock); + spa_t *result = avl_find(&spa_shared_log_avl, search, NULL); + kmem_free(search, sizeof (*search)); + if (!result) { + mutex_exit(&spa_shared_log_lock); + return (ENOENT); + } + mutex_enter(&result->spa_chain_map_lock); + mutex_exit(&spa_shared_log_lock); + *out = result; + + avl_tree_t *t = &result->spa_chain_map; + spa_chain_map_pool_t *search_scmp = kmem_zalloc(sizeof (*search_scmp), + KM_SLEEP); + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid)) + return (0); + + search_scmp->scmp_guid = guid; + spa_chain_map_pool_t *result_scmp = avl_find(t, search_scmp, NULL); + kmem_free(search_scmp, sizeof (*search_scmp)); + if (!result_scmp) { + return (ESRCH); + } + return (0); +} + +extern metaslab_ops_t *metaslab_allocator(spa_t *shared_log); /* * Activate an uninitialized pool. */ -static void -spa_activate(spa_t *spa, spa_mode_t mode) +static int +spa_activate(spa_t *spa, nvlist_t *config, spa_mode_t mode, boolean_t creating) { metaslab_ops_t *msp = metaslab_allocator(spa); ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); + boolean_t missing_logs = spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG; + + int error = 0; + spa_t *shared_log = NULL; + if (strcmp(spa->spa_name, TRYIMPORT_NAME) != 0 && + (error = get_shared_log_pool(config, &shared_log)) != 0) { + // We handle the ENOENT case in spa_check_for_missing_logs + if (missing_logs && (error == ENOENT || error == ESRCH)) { + spa->spa_discarding_shared_log = B_TRUE; + error = 0; + } + if (error == ESRCH) { + if (creating) + error = 0; + else + mutex_exit(&shared_log->spa_chain_map_lock); + } + if (error) + return (error); + } spa->spa_state = POOL_STATE_ACTIVE; spa->spa_mode = mode; spa->spa_read_spacemaps = spa_mode_readable_spacemaps; spa->spa_normal_class = metaslab_class_create(spa, msp); - spa->spa_log_class = metaslab_class_create(spa, msp); + if (shared_log != NULL) { + list_insert_tail(&shared_log->spa_registered_clients, spa); + mutex_exit(&shared_log->spa_chain_map_lock); + + spa->spa_log_class = metaslab_class_create(spa, + &zfs_virtual_ops); + spa->spa_uses_shared_log = B_TRUE; + spa->spa_log_class->mc_virtual = shared_log; + } else { + spa->spa_log_class = metaslab_class_create(spa, + msp); + } spa->spa_embedded_log_class = metaslab_class_create(spa, msp); spa->spa_special_class = metaslab_class_create(spa, msp); spa->spa_dedup_class = metaslab_class_create(spa, msp); @@ -1736,6 +1846,8 @@ spa_activate(spa_t *spa, spa_mode_t mode) offsetof(objset_t, os_evicting_node)); list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), offsetof(vdev_t, vdev_state_dirty_node)); + list_create(&spa->spa_zil_deletes, sizeof (zil_delete_entry_t), + offsetof(zil_delete_entry_t, zde_node)); txg_list_create(&spa->spa_vdev_txg_list, spa, offsetof(struct vdev, vdev_txg_node)); @@ -1749,6 +1861,18 @@ spa_activate(spa_t *spa, spa_mode_t mode) avl_create(&spa->spa_errlist_healed, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); + avl_create(&spa->spa_chain_map, + spa_chain_map_pool_compare, sizeof (spa_chain_map_pool_t), + offsetof(spa_chain_map_pool_t, scmp_avl)); + avl_create(&spa->spa_zil_map, + spa_zil_update_head_compare, sizeof (spa_zil_update_head_t), + offsetof(spa_zil_update_head_t, szuh_avl)); + if (spa->spa_uses_shared_log) { + spa_zil_update_head_t *entry = kmem_zalloc(sizeof (*entry), + KM_SLEEP); + entry->szuh_force = B_TRUE; + avl_add(&spa->spa_zil_map, entry); + } spa_activate_os(spa); @@ -1793,6 +1917,15 @@ spa_activate(spa_t *spa, spa_mode_t mode) */ spa->spa_upgrade_taskq = taskq_create("z_upgrade", 100, defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT); + + if (shared_log != NULL || fnvlist_lookup_boolean(config, + ZPOOL_CONFIG_IS_SHARED_LOG)) { + spa->spa_chain_map_taskq = taskq_create("z_chain_map", 100, + defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC | + TASKQ_THREADS_CPU_PCT); + } + + return (0); } /* @@ -1829,6 +1962,11 @@ spa_deactivate(spa_t *spa) spa->spa_upgrade_taskq = NULL; } + if (spa->spa_chain_map_taskq) { + taskq_destroy(spa->spa_chain_map_taskq); + spa->spa_chain_map_taskq = NULL; + } + txg_list_destroy(&spa->spa_vdev_txg_list); list_destroy(&spa->spa_config_dirty_list); @@ -1852,6 +1990,12 @@ spa_deactivate(spa_t *spa) metaslab_class_destroy(spa->spa_normal_class); spa->spa_normal_class = NULL; + spa_t *shared_log; + if ((shared_log = spa_get_shared_log_pool(spa)) != NULL) { + mutex_enter(&shared_log->spa_chain_map_lock); + list_remove(&shared_log->spa_registered_clients, spa); + mutex_exit(&shared_log->spa_chain_map_lock); + } metaslab_class_destroy(spa->spa_log_class); spa->spa_log_class = NULL; @@ -2590,6 +2734,10 @@ spa_check_for_missing_logs(spa_t *spa) vdev_dbgmsg_print_tree(rvd, 2); return (SET_ERROR(ENXIO)); } + } else if (spa->spa_discarding_shared_log) { + spa_set_log_state(spa, SPA_LOG_CLEAR); + spa_load_note(spa, "shared log pool is " + "missing, ZIL is dropped."); } else { for (uint64_t c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; @@ -2719,6 +2867,7 @@ typedef struct spa_load_error { boolean_t sle_verify_data; uint64_t sle_meta_count; uint64_t sle_data_count; + spa_t *sle_spa; } spa_load_error_t; static void @@ -2728,7 +2877,7 @@ spa_load_verify_done(zio_t *zio) spa_load_error_t *sle = zio->io_private; dmu_object_type_t type = BP_GET_TYPE(bp); int error = zio->io_error; - spa_t *spa = zio->io_spa; + spa_t *spa = sle->sle_spa; abd_free(zio->io_abd); if (error) { @@ -2759,8 +2908,9 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, { zio_t *rio = arg; spa_load_error_t *sle = rio->io_private; + spa_t *io_spa = spa; - (void) zilog, (void) dnp; + (void) dnp; /* * Note: normally this routine will not be called if @@ -2769,6 +2919,8 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, */ if (!spa_load_verify_metadata) return (0); + if (zilog && zil_shared_log(zilog)) + io_spa = spa_get_shared_log_pool(spa); /* * Sanity check the block pointer in order to detect obvious damage @@ -2776,7 +2928,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, * When damaged consider it to be a metadata error since we cannot * trust the BP_GET_TYPE and BP_GET_LEVEL values. */ - if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) { + if (!zfs_blkptr_verify(io_spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) { atomic_inc_64(&sle->sle_meta_count); return (0); } @@ -2799,8 +2951,8 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, spa->spa_load_verify_bytes += size; mutex_exit(&spa->spa_scrub_lock); - zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, - spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, + zio_nowait(zio_read(rio, io_spa, bp, abd_alloc_for_io(size, B_FALSE), + size, spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); return (0); @@ -2846,6 +2998,7 @@ spa_load_verify(spa_t *spa) */ sle.sle_verify_data = (policy.zlp_rewind & ZPOOL_REWIND_MASK) || (policy.zlp_maxdata < UINT64_MAX); + sle.sle_spa = spa; rio = zio_root(spa, NULL, &sle, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); @@ -3983,6 +4136,7 @@ spa_ld_parse_config(spa_t *spa, spa_import_type_t type) parse = (type == SPA_IMPORT_EXISTING ? VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse); + spa_set_pool_type(spa); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) { @@ -4361,6 +4515,7 @@ spa_ld_trusted_config(spa_t *spa, spa_import_type_t type, error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); } + spa_set_pool_type(spa); /* * Vdev paths in the MOS may be obsolete. If the untrusted config was @@ -4650,6 +4805,145 @@ spa_ld_check_features(spa_t *spa, boolean_t *missing_feat_writep) return (0); } +struct load_chain_map_arg { + blkptr_t *bp; + spa_t *spa; + uint64_t *error; +}; + +static int +load_chain_map_claim_blk_cb(spa_t *spa, const blkptr_t *bp, void *arg) +{ + (void) arg; + int error = metaslab_claim(spa, bp, + spa_get_dsl(spa)->dp_tx.tx_open_txg); + if (error == ENOENT) + error = 0; + return (error); +} + +static int +load_chain_map_claim_lr_cb(spa_t *spa, const lr_t *lrc, void *arg) +{ + (void) arg; + lr_write_t *lr = (lr_write_t *)lrc; + blkptr_t *bp = &lr->lr_blkptr; + if (lrc->lrc_txtype != TX_WRITE || BP_IS_HOLE(bp)) + return (0); + int error = metaslab_claim(spa, bp, + spa_get_dsl(spa)->dp_tx.tx_open_txg); + if (error == ENOENT) + error = 0; + return (error); +} + +static void +load_chain_map_cb(void *arg) +{ + struct load_chain_map_arg *lcmca = arg; + blkptr_t *bp = lcmca->bp; + spa_t *spa = lcmca->spa; + int error = zil_parse_raw(spa, bp, load_chain_map_claim_blk_cb, + load_chain_map_claim_lr_cb, NULL); + if (error != 0 && error != ECKSUM) + atomic_store_64((volatile uint64_t *)lcmca->error, error); + kmem_free(lcmca, sizeof (*lcmca)); +} + +noinline static int +spa_load_chain_map(spa_t *spa) +{ + int error = 0; + uint64_t dispatch_error = 0; + uint64_t chain_map_zap = spa->spa_dsl_pool->dp_chain_map_obj; + if (!spa_is_shared_log(spa)) + return (error); + ASSERT3U(chain_map_zap, !=, 0); + + zap_cursor_t zc; + zap_attribute_t *attr = zap_attribute_alloc(); + objset_t *os = spa->spa_dsl_pool->dp_meta_objset; + spa_zil_chain_map_value_t *szcmv = kmem_alloc(sizeof (*szcmv), + KM_SLEEP); + for (zap_cursor_init(&zc, os, chain_map_zap); + zap_cursor_retrieve(&zc, attr) == 0; zap_cursor_advance(&zc)) { + uint64_t pool_guid = ((uint64_t *)&attr->za_name)[0]; + uint64_t os_guid = ((uint64_t *)&attr->za_name)[1]; + error = zap_lookup_uint64(os, chain_map_zap, + (uint64_t *)&attr->za_name, 2, sizeof (uint64_t), + sizeof (*szcmv) / sizeof (uint64_t), + szcmv); + if (error != 0) { + break; + } + avl_index_t where; + spa_chain_map_pool_t search; + search.scmp_guid = pool_guid; + spa_chain_map_pool_t *pool_entry = + avl_find(&spa->spa_chain_map, &search, &where); + if (pool_entry == NULL) { + pool_entry = kmem_alloc(sizeof (*pool_entry), + KM_SLEEP); + pool_entry->scmp_guid = pool_guid; + avl_create(&pool_entry->scmp_os_tree, + spa_chain_map_os_compare, + sizeof (spa_chain_map_os_t), + offsetof(spa_chain_map_os_t, scmo_avl)); + strlcpy(pool_entry->scmp_name, + szcmv->szcmv_pool_name, ZFS_MAX_DATASET_NAME_LEN); + avl_insert(&spa->spa_chain_map, pool_entry, where); + } + + if (os_guid == 0) { + /* + * This is the dummy marker to make sure we know about + * the pool; no need to add an os-specific entry + */ + continue; + } + + spa_chain_map_os_t *os_entry = kmem_alloc(sizeof (*os_entry), + KM_SLEEP); + os_entry->scmo_id = os_guid; + os_entry->scmo_chain_head = szcmv->szcmv_bp; + avl_add(&pool_entry->scmp_os_tree, os_entry); + struct load_chain_map_arg *arg = kmem_alloc(sizeof (*arg), + KM_SLEEP); + arg->bp = &os_entry->scmo_chain_head; + arg->spa = spa; + arg->error = &dispatch_error; + (void) taskq_dispatch(spa->spa_chain_map_taskq, + load_chain_map_cb, arg, TQ_SLEEP); + } + kmem_free(szcmv, sizeof (*szcmv)); + zap_attribute_free(attr); + attr = NULL; + + if (error != 0) { + void *cookie = NULL; + spa_chain_map_pool_t *node; + + while ((node = avl_destroy_nodes(&spa->spa_chain_map, + &cookie)) != NULL) { + void *cookie2 = NULL; + spa_chain_map_os_t *node2; + while ((node2 = avl_destroy_nodes(&node->scmp_os_tree, + &cookie2)) != NULL) { + kmem_free(node2, sizeof (*node2)); + } + avl_destroy(&node->scmp_os_tree); + kmem_free(node, sizeof (*node)); + } + avl_destroy(&spa->spa_chain_map); + } + zap_cursor_fini(&zc); + taskq_wait(spa->spa_chain_map_taskq); + int dispatch_value = atomic_load_64(&dispatch_error); + if (dispatch_value != 0 && error == 0) + error = dispatch_value; + return (error); +} + static int spa_ld_load_special_directories(spa_t *spa) { @@ -4944,6 +5238,21 @@ spa_ld_load_vdev_metadata(spa_t *spa) return (0); } +static int +spa_ld_load_chain_map(spa_t *spa) +{ + int error = 0; + vdev_t *rvd = spa->spa_root_vdev; + + error = spa_load_chain_map(spa); + if (error != 0) { + spa_load_failed(spa, "spa_load_chain_map failed [error=%d]", + error); + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + } + return (0); +} + static int spa_ld_load_dedup_tables(spa_t *spa) { @@ -5035,10 +5344,19 @@ spa_ld_claim_log_blocks(spa_t *spa) */ spa->spa_claiming = B_TRUE; - tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); - (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, - zil_claim, tx, DS_FIND_CHILDREN); - dmu_tx_commit(tx); + if (!spa->spa_uses_shared_log) { + tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); + (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + zil_claim, tx, DS_FIND_CHILDREN); + dmu_tx_commit(tx); + } else if (spa_get_log_state(spa) == SPA_LOG_CLEAR) { + ASSERT(spa->spa_discarding_shared_log); + tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); + (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, + zil_clear, tx, DS_FIND_CHILDREN); + dmu_tx_commit(tx); + spa->spa_discarding_shared_log = B_FALSE; + } spa->spa_claiming = B_FALSE; @@ -5085,7 +5403,7 @@ spa_ld_prepare_for_reload(spa_t *spa) spa_unload(spa); spa_deactivate(spa); - spa_activate(spa, mode); + VERIFY0(spa_activate(spa, spa->spa_config, mode, B_FALSE)); /* * We save the value of spa_async_suspended as it gets reset to 0 by @@ -5466,6 +5784,11 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) if (error != 0) goto fail; + spa_import_progress_set_notes(spa, "Loading chain map"); + error = spa_ld_load_chain_map(spa); + if (error != 0) + goto fail; + spa_import_progress_set_notes(spa, "Loading dedup tables"); error = spa_ld_load_dedup_tables(spa); if (error != 0) @@ -5615,6 +5938,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) (void) dmu_objset_find(spa_name(spa), dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); + spa_cleanup_pool(spa); + /* * Clean up any stale temporary dataset userrefs. */ @@ -5656,7 +5981,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state) spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; - spa_activate(spa, mode); + VERIFY0(spa_activate(spa, spa->spa_config, mode, B_FALSE)); spa_async_suspend(spa); spa_load_note(spa, "spa_load_retry: rewind, max txg: %llu", @@ -5823,7 +6148,14 @@ spa_open_common(const char *pool, spa_t **spapp, const void *tag, if (policy.zlp_rewind & ZPOOL_DO_REWIND) state = SPA_LOAD_RECOVER; - spa_activate(spa, spa_mode_global); + error = spa_activate(spa, spa->spa_config, spa_mode_global, + B_FALSE); + if (error != 0) { + spa_remove(spa); + if (locked) + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(error)); + } if (state != SPA_LOAD_RECOVER) spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; @@ -6397,6 +6729,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, boolean_t has_features; boolean_t has_encryption; boolean_t has_allocclass; + boolean_t has_shared_log; spa_feature_t feat; const char *feat_name; const char *poolname; @@ -6421,11 +6754,19 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, */ nvl = fnvlist_alloc(); fnvlist_add_string(nvl, ZPOOL_CONFIG_POOL_NAME, pool); + if (fnvlist_lookup_boolean(nvroot, ZPOOL_CONFIG_IS_SHARED_LOG)) + fnvlist_add_boolean(nvl, ZPOOL_CONFIG_IS_SHARED_LOG); (void) nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); spa = spa_add(poolname, nvl, altroot); fnvlist_free(nvl); - spa_activate(spa, spa_mode_global); + error = spa_activate(spa, nvroot, spa_mode_global, B_TRUE); + if (error != 0) { + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); + + } if (props && (error = spa_prop_validate(spa, props))) { spa_deactivate(spa); @@ -6440,9 +6781,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, if (poolname != pool) spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME; + has_features = B_FALSE; has_encryption = B_FALSE; has_allocclass = B_FALSE; + has_shared_log = B_FALSE; for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); elem != NULL; elem = nvlist_next_nvpair(props, elem)) { if (zpool_prop_feature(nvpair_name(elem))) { @@ -6454,9 +6797,19 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, has_encryption = B_TRUE; if (feat == SPA_FEATURE_ALLOCATION_CLASSES) has_allocclass = B_TRUE; + if (feat == SPA_FEATURE_SHARED_LOG) + has_shared_log = B_TRUE; } } + if (!has_shared_log && (spa_uses_shared_log(spa) || + fnvlist_lookup_boolean(nvroot, ZPOOL_CONFIG_IS_SHARED_LOG))) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(ENOTSUP)); + } + /* verify encryption params, if they were provided */ if (dcp != NULL) { error = spa_create_check_encryption_params(dcp, has_encryption); @@ -6507,6 +6860,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); + if (error == 0) + spa_set_pool_type(spa); ASSERT(error != 0 || rvd != NULL); ASSERT(error != 0 || spa->spa_root_vdev == rvd); @@ -6639,7 +6994,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, sizeof (uint64_t), 1, &obj, tx) != 0) { cmn_err(CE_PANIC, "failed to add bpobj"); } - VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, + VERIFY0(bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj)); /* @@ -6667,6 +7022,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, for (int i = 0; i < ndraid; i++) spa_feature_incr(spa, SPA_FEATURE_DRAID, tx); + if (spa_uses_shared_log(spa) || spa_is_shared_log(spa)) + spa_feature_incr(spa, SPA_FEATURE_SHARED_LOG, tx); dmu_tx_commit(tx); @@ -6747,7 +7104,12 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) return (0); } - spa_activate(spa, mode); + error = spa_activate(spa, config, mode, B_FALSE); + if (error != 0) { + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); + } /* * Don't start async tasks until we know everything is healthy. @@ -6904,9 +7266,24 @@ spa_tryimport(nvlist_t *tryconfig) mutex_enter(&spa_namespace_lock); spa = spa_add(name, tryconfig, NULL); - spa_activate(spa, SPA_MODE_READ); kmem_free(name, MAXPATHLEN); + /* + * spa_import() relies on a pool config fetched by spa_try_import() + * for spare/cache devices. Import flags are not passed to + * spa_tryimport(), which makes it return early due to a missing log + * device and missing retrieving the cache device and spare eventually. + * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch + * the correct configuration regardless of the missing log device. + */ + spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG; + error = spa_activate(spa, tryconfig, SPA_MODE_READ, B_FALSE); + if (error != 0) { + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (NULL); + } + /* * Rewind pool if a max txg was provided. */ @@ -6928,21 +7305,12 @@ spa_tryimport(nvlist_t *tryconfig) spa->spa_config_source = SPA_CONFIG_SRC_SCAN; } - /* - * spa_import() relies on a pool config fetched by spa_try_import() - * for spare/cache devices. Import flags are not passed to - * spa_tryimport(), which makes it return early due to a missing log - * device and missing retrieving the cache device and spare eventually. - * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch - * the correct configuration regardless of the missing log device. - */ - spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG; - error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING); /* * If 'tryconfig' was at least parsable, return the current config. */ + zfs_dbgmsg("in tryimport"); if (spa->spa_root_vdev != NULL) { config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, poolname); @@ -6954,6 +7322,12 @@ spa_tryimport(nvlist_t *tryconfig) fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata); + uint64_t shared_log_guid; + if (nvlist_lookup_uint64(tryconfig, + ZPOOL_CONFIG_SHARED_LOG_POOL, &shared_log_guid) == 0) { + fnvlist_add_uint64(config, ZPOOL_CONFIG_SHARED_LOG_POOL, + shared_log_guid); + } /* * If the bootfs property exists on this pool then we * copy it out so that external consumers can tell which @@ -7016,7 +7390,7 @@ spa_tryimport(nvlist_t *tryconfig) */ static int spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, - boolean_t force, boolean_t hardforce) + boolean_t force, boolean_t hardforce, nvlist_t *outnvl) { int error = 0; spa_t *spa; @@ -7034,6 +7408,28 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, return (SET_ERROR(ENOENT)); } + if (spa_is_shared_log(spa)) { + mutex_enter(&spa->spa_chain_map_lock); + if (!list_is_empty(&spa->spa_registered_clients)) { + if (outnvl != NULL) { + spa_t *client; + list_t *l = &spa->spa_registered_clients; + nvlist_t *clients = fnvlist_alloc(); + for (client = list_head(l); client != NULL; + client = list_next(l, client)) { + fnvlist_add_boolean(clients, + spa_name(client)); + } + fnvlist_add_nvlist(outnvl, + ZPOOL_SHARED_LOG_CLIENTS, clients); + } + mutex_exit(&spa->spa_chain_map_lock); + mutex_exit(&spa_namespace_lock); + return (SET_ERROR(EBUSY)); + } + mutex_exit(&spa->spa_chain_map_lock); + } + if (spa->spa_is_exporting) { /* the pool is being exported by another thread */ mutex_exit(&spa_namespace_lock); @@ -7210,10 +7606,10 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig, * Destroy a storage pool. */ int -spa_destroy(const char *pool) +spa_destroy(const char *pool, nvlist_t *outnvl) { return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, - B_FALSE, B_FALSE)); + B_FALSE, B_FALSE, outnvl)); } /* @@ -7221,10 +7617,10 @@ spa_destroy(const char *pool) */ int spa_export(const char *pool, nvlist_t **oldconfig, boolean_t force, - boolean_t hardforce) + boolean_t hardforce, nvlist_t *outnvl) { return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, - force, hardforce)); + force, hardforce, outnvl)); } /* @@ -7235,7 +7631,7 @@ int spa_reset(const char *pool) { return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, - B_FALSE, B_FALSE)); + B_FALSE, B_FALSE, NULL)); } /* @@ -8331,6 +8727,9 @@ spa_vdev_split_mirror(spa_t *spa, const char *newname, nvlist_t *config, return (spa_vdev_exit(spa, NULL, txg, error)); } + if (spa_is_shared_log(spa)) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); + /* clear the log and flush everything up to now */ activate_slog = spa_passivate_log(spa); (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); @@ -8510,7 +8909,7 @@ spa_vdev_split_mirror(spa_t *spa, const char *newname, nvlist_t *config, if (zio_injection_enabled) zio_handle_panic_injection(spa, FTAG, 1); - spa_activate(newspa, spa_mode_global); + VERIFY0(spa_activate(newspa, config, spa_mode_global, B_TRUE)); spa_async_suspend(newspa); /* @@ -8629,7 +9028,7 @@ spa_vdev_split_mirror(spa_t *spa, const char *newname, nvlist_t *config, /* if we're not going to mount the filesystems in userland, export */ if (exp) error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, - B_FALSE, B_FALSE); + B_FALSE, B_FALSE, NULL); return (error); @@ -10282,6 +10681,7 @@ spa_sync(spa_t *spa, uint64_t txg) spa_config_exit(spa, SCL_CONFIG, FTAG); spa_handle_ignored_writes(spa); + spa_chain_map_update(spa); /* * If any async tasks have been requested, kick them off. @@ -10933,6 +11333,569 @@ spa_event_notify(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name) spa_event_post(spa_event_create(spa, vd, hist_nvl, name)); } +typedef struct spa_chain_map_free_cb_arg { + blkptr_t *smcfca_end; + uint64_t smcfca_txg; + uint64_t smcfca_guid; +} spa_chain_map_free_cb_arg_t; + +static int +spa_chain_map_free_blk_cb(spa_t *spa, const blkptr_t *bp, void *private) +{ + spa_chain_map_free_cb_arg_t *arg = private; + blkptr_t *end = arg->smcfca_end; + if (end != NULL && BP_EQUAL(bp, end)) + return (EFRAGS); + zio_free(spa, arg->smcfca_txg, bp); + return (0); +} + +static int +spa_chain_map_free_lr_cb(spa_t *spa, const lr_t *lrc, void *private) +{ + spa_chain_map_free_cb_arg_t *arg = private; + lr_write_t *lr = (lr_write_t *)lrc; + blkptr_t *bp = &lr->lr_blkptr; + if (lrc->lrc_txtype != TX_WRITE || BP_IS_HOLE(bp)) + return (0); + zio_free(spa, arg->smcfca_txg, bp); + return (0); +} + +static void +spa_chain_map_gc(spa_t *spa, blkptr_t *start, blkptr_t *end, dmu_tx_t *tx, + uint64_t guid) +{ + spa_chain_map_free_cb_arg_t arg; + arg.smcfca_end = end; + arg.smcfca_txg = dmu_tx_get_txg(tx); + arg.smcfca_guid = guid; + int err = zil_parse_raw(spa, start, spa_chain_map_free_blk_cb, + spa_chain_map_free_lr_cb, &arg); + ASSERT(err == 0 || err == EFRAGS); +} + +void +spa_zil_map_insert(spa_t *spa, objset_t *os, const blkptr_t *prev_bp, + blkptr_t *bp) +{ + if (!spa_uses_shared_log(spa)) + return; + + avl_tree_t *t = &spa->spa_zil_map; + spa_zil_update_t *node = kmem_alloc(sizeof (*node), KM_SLEEP); + node->szu_chain_head = *bp; + spa_zil_update_head_t search = {.szuh_id = dmu_objset_id(os)}; + spa_zil_update_head_t *entry; + avl_index_t where; + + mutex_enter(&spa->spa_zil_map_lock); + if ((entry = avl_find(t, &search, &where)) == NULL) { + entry = kmem_zalloc(sizeof (*entry), KM_SLEEP); + entry->szuh_id = dmu_objset_id(os); + list_create(&entry->szuh_list, sizeof (*entry), + offsetof(spa_zil_update_t, szu_list)); + avl_insert(t, entry, where); + } + if (list_head(&entry->szuh_list) == NULL && prev_bp) { + /* + * We're populating the list for the first time this import, + * but there are already blocks in the chain. Note the last + * block in the chain so that we can stop freeing when we reach + * it. + */ + spa_zil_update_t *prev_node = kmem_alloc(sizeof (*prev_node), + KM_SLEEP); + prev_node->szu_chain_head = *prev_bp; + list_insert_head(&entry->szuh_list, prev_node); + } else if (list_tail(&entry->szuh_list) != NULL) { + ASSERT(prev_bp); + ASSERT(BP_EQUAL(&((spa_zil_update_t *) + list_tail(&entry->szuh_list))->szu_chain_head, prev_bp)); + } + list_insert_tail(&entry->szuh_list, node); + mutex_exit(&spa->spa_zil_map_lock); +} + + +void +spa_zil_map_set_final(spa_t *spa, objset_t *os, blkptr_t *bp) +{ + if (!spa_uses_shared_log(spa)) + return; + + avl_tree_t *t = &spa->spa_zil_map; + spa_zil_update_head_t search = {.szuh_id = dmu_objset_id(os)}; + spa_zil_update_head_t *entry; + avl_index_t where; + + mutex_enter(&spa->spa_zil_map_lock); + if ((entry = avl_find(t, &search, &where)) == NULL) { + entry = kmem_zalloc(sizeof (*entry), KM_SLEEP); + entry->szuh_id = dmu_objset_id(os); + list_create(&entry->szuh_list, sizeof (*entry), + offsetof(spa_zil_update_t, szu_list)); + avl_insert(t, entry, where); + } + entry->szuh_chain_head = *bp; + entry->szuh_set = B_TRUE; + mutex_exit(&spa->spa_zil_map_lock); +} + +void +spa_zil_delete_impl(spa_t *spa, uint64_t id) +{ + if (!spa_uses_shared_log(spa)) + return; + + list_t *l = &spa->spa_zil_deletes; + zil_delete_entry_t *entry = kmem_alloc(sizeof (*entry), KM_SLEEP); + entry->zde_guid = id; + avl_tree_t *t = &spa->spa_zil_map; + spa_zil_update_head_t search = {.szuh_id = id}; + spa_zil_update_head_t *uentry; + + mutex_enter(&spa->spa_zil_map_lock); + list_insert_tail(l, entry); + if ((uentry = avl_find(t, &search, NULL)) != NULL) { + avl_remove(t, uentry); + mutex_exit(&spa->spa_zil_map_lock); + spa_zil_update_t *node = NULL; + while ((node = list_remove_head(&uentry->szuh_list)) != NULL) { + kmem_free(node, sizeof (*node)); + } + list_destroy(&uentry->szuh_list); + kmem_free(uentry, sizeof (*uentry)); + return; + } + mutex_exit(&spa->spa_zil_map_lock); +} + +void +spa_zil_delete(spa_t *spa, objset_t *os) +{ + spa_zil_delete_impl(spa, dmu_objset_id(os)); +} + +struct spa_chain_map_update_cb_arg { + spa_t *scmuca_spa; + blkptr_t scmuca_end_bp; + blkptr_t scmuca_start_bp; + dmu_tx_t *scmuca_tx; + uint64_t scmuca_guid; +}; + +static void +spa_chain_map_update_cb(void *arg) +{ + struct spa_chain_map_update_cb_arg *scmuca = arg; + blkptr_t *end_bp = &scmuca->scmuca_end_bp; + spa_chain_map_gc(scmuca->scmuca_spa, &scmuca->scmuca_start_bp, + end_bp, scmuca->scmuca_tx, scmuca->scmuca_guid); + kmem_free(scmuca, sizeof (*scmuca)); +} + +/* + * This function takes the chain map updates from spa and applies them to the + * shared log pool's chain map in memory and on disk. This also involves + * freeing old ZIL blocks in the chain. If the pool doesn't use a shared log, + * we return immediately. + */ +static void +spa_chain_map_update(spa_t *spa) +{ + if (!spa_uses_shared_log(spa)) + return; + + avl_tree_t *t = &spa->spa_zil_map; + spa_t *target = spa->spa_log_class->mc_virtual; + ASSERT(target); + uint64_t chain_map_zap = target->spa_dsl_pool->dp_chain_map_obj; + objset_t *target_mos = target->spa_dsl_pool->dp_meta_objset; + + dmu_tx_t *tx = dmu_tx_create_mos(target->spa_dsl_pool); + dmu_tx_hold_zap(tx, chain_map_zap, B_TRUE, NULL); + dmu_tx_assign(tx, TXG_WAIT); + mutex_enter(&spa->spa_zil_map_lock); + mutex_enter(&target->spa_chain_map_lock); + + /* + * Get the pool's tree in the target in memory chain map, creating + * it if needed. + */ + avl_index_t where; + spa_chain_map_pool_t search; + search.scmp_guid = spa_guid(spa); + spa_chain_map_pool_t *pool_entry = avl_find(&target->spa_chain_map, + &search, &where); + if (pool_entry == NULL) { + pool_entry = kmem_alloc(sizeof (*pool_entry), KM_SLEEP); + pool_entry->scmp_guid = spa_guid(spa); + avl_create(&pool_entry->scmp_os_tree, + spa_chain_map_os_compare, + sizeof (spa_chain_map_os_t), + offsetof(spa_chain_map_os_t, scmo_avl)); + strcpy(pool_entry->scmp_name, spa_name(spa)); + avl_insert(&target->spa_chain_map, pool_entry, where); + } + avl_tree_t *target_tree = &pool_entry->scmp_os_tree; + + /* + * Iterate over the list of chain updates from this TXG. + * If an update is a new ZIL being created, we insert it into the + * tree and ZAP. If it's an update to an existing chain, we update + * the data structures and also iterate along the chain from the old + * head to the new one, issuing frees for all the blocks. + * + * For performance reasons, we actually issue the frees by appending + * them to a list and then processing that list once we've released + * the spa_chain_map_lock. + */ + + list_t local_frees; + list_create(&local_frees, sizeof (spa_zil_update_t), + offsetof(spa_zil_update_t, szu_list)); + spa_zil_update_head_t *node; + uint64_t keybuf[2]; + keybuf[0] = spa_guid(spa); + spa_zil_chain_map_value_t szcmv = {0}; + strcpy(szcmv.szcmv_pool_name, spa_name(spa)); + for (node = avl_first(t); node; ) { + uint64_t guid = node->szuh_id; + + if (node->szuh_force) { + ASSERT0(node->szuh_id); + ASSERT(BP_IS_HOLE(&node->szuh_chain_head)); + keybuf[1] = 0; + int res = zap_add_uint64(target_mos, chain_map_zap, + keybuf, sizeof (keybuf) / sizeof (uint64_t), + sizeof (uint64_t), sizeof (szcmv) / + sizeof (uint64_t), (uint64_t *)&szcmv, tx); + IMPLY(res != 0, res == EEXIST); + spa_zil_update_head_t *next = AVL_NEXT(t, node); + avl_remove(t, node); + kmem_free(node, sizeof (*node)); + node = next; + continue; + } + + list_t *l = &node->szuh_list; + spa_zil_update_t *szu = list_head(l); + if (!node->szuh_set || szu == NULL || + BP_IS_HOLE(&node->szuh_chain_head)) { + node = AVL_NEXT(t, node); + continue; + } + keybuf[1] = guid; + spa_chain_map_os_t osearch; + osearch.scmo_id = guid; + spa_chain_map_os_t *os_entry = avl_find(target_tree, + &osearch, &where); + if (os_entry != NULL) { + struct spa_chain_map_update_cb_arg *scmuca = + kmem_zalloc(sizeof (*scmuca), KM_SLEEP); + scmuca->scmuca_spa = target; + scmuca->scmuca_start_bp = os_entry->scmo_chain_head; + scmuca->scmuca_end_bp = szu->szu_chain_head; + scmuca->scmuca_tx = tx; + scmuca->scmuca_guid = guid; + taskq_dispatch(spa->spa_chain_map_taskq, + spa_chain_map_update_cb, scmuca, TQ_SLEEP); + } + + while (szu != NULL && !BP_EQUAL(&szu->szu_chain_head, + &node->szuh_chain_head)) { + list_remove(l, szu); + list_insert_tail(&local_frees, szu); + szu = list_head(l); + } + ASSERT(szu); + + if (os_entry == NULL) { + os_entry = kmem_alloc(sizeof (*os_entry), KM_SLEEP); + os_entry->scmo_id = guid; + os_entry->scmo_chain_head = node->szuh_chain_head; + avl_insert(&pool_entry->scmp_os_tree, os_entry, where); + szcmv.szcmv_bp = os_entry->scmo_chain_head; + + VERIFY0(zap_add_uint64(target_mos, chain_map_zap, + keybuf, sizeof (keybuf) / sizeof (uint64_t), + sizeof (uint64_t), sizeof (szcmv) / + sizeof (uint64_t), (uint64_t *)&szcmv, tx)); + } else { + os_entry->scmo_chain_head = node->szuh_chain_head; + szcmv.szcmv_bp = os_entry->scmo_chain_head; + VERIFY0(zap_update_uint64(target_mos, chain_map_zap, + keybuf, sizeof (keybuf) / sizeof (uint64_t), + sizeof (uint64_t), sizeof (szcmv) / + sizeof (uint64_t), (uint64_t *)&szcmv, tx)); + } + node = AVL_NEXT(t, node); + } + + /* + * Iterate over the list of deletes. For each one, update the data + * structures and issue frees for all the blocks in the chain. + */ + list_t *l = &spa->spa_zil_deletes; + zil_delete_entry_t *entry; + while ((entry = list_remove_head(l)) != NULL) { + spa_chain_map_os_t osearch; + osearch.scmo_id = entry->zde_guid; + spa_chain_map_os_t *tree_entry = avl_find(target_tree, + &osearch, &where); + ASSERT(tree_entry); + struct spa_chain_map_update_cb_arg *scmuca = + kmem_alloc(sizeof (*scmuca), KM_SLEEP); + scmuca->scmuca_spa = target; + scmuca->scmuca_start_bp = tree_entry->scmo_chain_head; + BP_ZERO(&scmuca->scmuca_end_bp); + scmuca->scmuca_tx = tx; + taskq_dispatch(spa->spa_chain_map_taskq, + spa_chain_map_update_cb, scmuca, TQ_SLEEP); + avl_remove(target_tree, tree_entry); + kmem_free(tree_entry, sizeof (*tree_entry)); + + keybuf[1] = entry->zde_guid; + kmem_free(entry, sizeof (*entry)); + VERIFY0(zap_remove_uint64(target_mos, chain_map_zap, keybuf, 2, + tx)); + } + + mutex_exit(&target->spa_chain_map_lock); + mutex_exit(&spa->spa_zil_map_lock); + spa_zil_update_t *szu; + while ((szu = list_remove_head(&local_frees))) { + if (!BP_IS_HOLE(&szu->szu_chain_head)) { + zio_free(target, dmu_tx_get_txg(tx), + &szu->szu_chain_head); + } + kmem_free(szu, sizeof (*szu)); + } + list_destroy(&local_frees); + taskq_wait(spa->spa_chain_map_taskq); + dmu_tx_commit(tx); +} + +/* + * Convert the ZIL header's blkptr to a blkptr for the shared log pool. + */ +void +spa_zil_header_convert(spa_t *spa, objset_t *os, blkptr_t *bp) +{ + /* + * First we check our zil map in case we've updated the + * mapping in this TXG. + */ + mutex_enter(&spa->spa_zil_map_lock); + spa_zil_update_head_t *update_head; + avl_index_t where; + spa_zil_update_head_t hsearch = {.szuh_id = dmu_objset_id(os)}; + if ((update_head = avl_find(&spa->spa_zil_map, &hsearch, &where)) && + list_head(&update_head->szuh_list) != NULL) { + spa_zil_update_t *update = list_tail(&update_head->szuh_list); + *bp = update->szu_chain_head; + mutex_exit(&spa->spa_zil_map_lock); + return; + } + mutex_exit(&spa->spa_zil_map_lock); + + spa_t *target = spa->spa_log_class->mc_virtual; + ASSERT(target); + mutex_enter(&target->spa_chain_map_lock); + spa_chain_map_pool_t search; + search.scmp_guid = spa_guid(spa); + spa_chain_map_pool_t *pool_entry = avl_find(&target->spa_chain_map, + &search, &where); + + /* + * If there's no matching entry, we've probably switched to a new shared + * log; to avoid issues with old BPs, zero out the BP. + */ + if (!pool_entry) { + memset(bp, 0, sizeof (*bp)); + mutex_exit(&target->spa_chain_map_lock); + return; + } + spa_chain_map_os_t osearch = {.scmo_id = dmu_objset_id(os)}; + spa_chain_map_os_t *os_entry = avl_find(&pool_entry->scmp_os_tree, + &osearch, &where); + if (!os_entry) { + memset(bp, 0, sizeof (*bp)); + mutex_exit(&target->spa_chain_map_lock); + return; + } + *bp = os_entry->scmo_chain_head; + mutex_exit(&target->spa_chain_map_lock); +} + +/* + * Convert the ZIL header's blkptr to a form suitable for storing on-disk. + * Because blkptrs into the shared SLOG pool are not meaningful outside of it, + * and we can change from using the shared SLOG to not using it, we use these + * masked blkptrs as a sign that we should look in the chain map for the real + * blkptr. + */ +void +spa_zil_header_mask(spa_t *spa, blkptr_t *bp) +{ + (void) spa; + blkptr_t masked = {{{{0}}}}; + BP_SET_BIRTH(&masked, BP_GET_LOGICAL_BIRTH(bp), + BP_GET_PHYSICAL_BIRTH(bp)); + BP_SET_PSIZE(&masked, BP_GET_PSIZE(bp)); + BP_SET_LSIZE(&masked, BP_GET_LSIZE(bp)); + BP_SET_BYTEORDER(&masked, BP_GET_BYTEORDER(bp)); + *bp = masked; +} + +static int +spa_recycle_one(spa_t *spa, spa_chain_map_pool_t *entry, boolean_t dryrun, + nvlist_t *outnvl) +{ + int err = 0; + uint64_t guid = entry->scmp_guid; + spa_t *search = kmem_zalloc(sizeof (spa_t), KM_SLEEP); + search->spa_config_guid = guid; + + spa_t *client; + list_t *l = &spa->spa_registered_clients; + for (client = list_head(l); client != NULL; + client = list_next(l, client)) { + if (spa_const_guid(client) == entry->scmp_guid) + break; + } + if (!client) { + fnvlist_add_uint64(outnvl, entry->scmp_name, guid); + } + if (dryrun || client) { + return (err); + } + + uint64_t chain_map_zap = spa->spa_dsl_pool->dp_chain_map_obj; + dmu_tx_t *tx = dmu_tx_create_mos(spa->spa_dsl_pool); + dmu_tx_hold_zap(tx, chain_map_zap, B_TRUE, NULL); + dmu_tx_assign(tx, TXG_WAIT); + uint64_t keybuf[2]; + keybuf[0] = entry->scmp_guid; + + avl_tree_t *os_tree = &entry->scmp_os_tree; + spa_chain_map_os_t *os = NULL; + void *cookie = NULL; + while ((os = avl_destroy_nodes(os_tree, &cookie))) { + struct spa_chain_map_free_cb_arg arg; + arg.smcfca_end = NULL; + arg.smcfca_guid = os->scmo_id; + arg.smcfca_txg = spa->spa_syncing_txg; + (void) zil_parse_raw(spa, &os->scmo_chain_head, + spa_chain_map_free_blk_cb, spa_chain_map_free_lr_cb, &arg); + + keybuf[1] = os->scmo_id; + zap_remove_uint64(spa->spa_dsl_pool->dp_meta_objset, + chain_map_zap, keybuf, sizeof (keybuf) / sizeof (uint64_t), + tx); + kmem_free(os, sizeof (*os)); + } + dmu_tx_commit(tx); + avl_destroy(&entry->scmp_os_tree); + kmem_free(search, sizeof (*search)); + + avl_remove(&spa->spa_chain_map, entry); + kmem_free(entry, sizeof (*entry)); + return (err); +} + +int +spa_recycle_all(spa_t *spa, boolean_t dryrun, nvlist_t *outnvl) +{ + int err = 0; + if (!spa_is_shared_log(spa)) { + return (SET_ERROR(ENOTSUP)); + } + mutex_enter(&spa->spa_chain_map_lock); + avl_tree_t *t = &spa->spa_chain_map; + spa_chain_map_pool_t *entry = avl_first(t); + while (entry != NULL) { + spa_chain_map_pool_t *next = AVL_NEXT(t, entry); + + int this_err = spa_recycle_one(spa, entry, dryrun, outnvl); + if (this_err != 0 && err == 0) + err = this_err; + + entry = next; + } + mutex_exit(&spa->spa_chain_map_lock); + return (err); +} + +int +spa_recycle_clients(spa_t *spa, nvlist_t *clients, boolean_t dryrun, + nvlist_t *outnvl) +{ + int err = 0; + if (!spa_is_shared_log(spa)) { + return (SET_ERROR(ENOTSUP)); + } + mutex_enter(&spa->spa_chain_map_lock); + for (nvpair_t *pair = nvlist_next_nvpair(clients, NULL); + pair != NULL; pair = nvlist_next_nvpair(clients, pair)) { + avl_tree_t *t = &spa->spa_chain_map; + spa_chain_map_pool_t *entry = avl_first(t); + while (entry != NULL) { + spa_chain_map_pool_t *next = AVL_NEXT(t, entry); + + if (strcmp(entry->scmp_name, nvpair_name(pair)) != 0) { + entry = next; + continue; + } + + err = spa_recycle_one(spa, entry, dryrun, outnvl); + break; + } + } + mutex_exit(&spa->spa_chain_map_lock); + return (err); +} + +static void +spa_cleanup_pool(spa_t *client) +{ + if (!spa_uses_shared_log(client)) + return; + dsl_pool_t *dp = client->spa_dsl_pool; + dsl_pool_config_enter(dp, FTAG); + spa_t *shared_log = spa_get_shared_log_pool(client); + spa_chain_map_pool_t search; + mutex_enter(&shared_log->spa_chain_map_lock); + + avl_tree_t *t = &shared_log->spa_chain_map; + search.scmp_guid = client->spa_config_guid; + spa_chain_map_pool_t *pool_entry = avl_find(t, &search, NULL); + ASSERT(pool_entry); + + t = &pool_entry->scmp_os_tree; + for (spa_chain_map_os_t *entry = avl_first(t); entry; + entry = AVL_NEXT(t, entry)) { + dsl_dataset_t *ds; + int res = dsl_dataset_hold_obj(dp, + entry->scmo_id, FTAG, &ds); + if (res == 0) { + dsl_dataset_rele(ds, FTAG); + continue; + } + ASSERT3U(res, ==, ENOENT); + spa_zil_delete_impl(client, entry->scmo_id); + } + mutex_exit(&shared_log->spa_chain_map_lock); + dsl_pool_config_exit(dp, FTAG); +} + +spa_t * +spa_get_shared_log_pool(spa_t *spa) +{ + return (spa->spa_log_class->mc_virtual); +} + /* state manipulation functions */ EXPORT_SYMBOL(spa_open); EXPORT_SYMBOL(spa_open_rewind); diff --git a/module/zfs/spa_checkpoint.c b/module/zfs/spa_checkpoint.c index 1efff47f87a0..ef63a98521e0 100644 --- a/module/zfs/spa_checkpoint.c +++ b/module/zfs/spa_checkpoint.c @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2017 by Delphix. All rights reserved. + * Copyright (c) 2017, 2023 by Delphix. All rights reserved. */ /* @@ -544,6 +544,10 @@ spa_checkpoint(const char *pool) error = spa_open(pool, &spa, FTAG); if (error != 0) return (error); + if (spa_uses_shared_log(spa) || spa_is_shared_log(spa)) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOTSUP)); + } mutex_enter(&spa->spa_vdev_top_lock); diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index a77874ea0dd3..b86ce7f234dc 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright 2017 Joyent, Inc. * Copyright (c) 2021, Colm Buckley */ @@ -466,6 +466,11 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) if (spa->spa_compatibility != NULL) fnvlist_add_string(config, ZPOOL_CONFIG_COMPATIBILITY, spa->spa_compatibility); + if (spa->spa_uses_shared_log) + fnvlist_add_uint64(config, ZPOOL_CONFIG_SHARED_LOG_POOL, + spa_guid(spa_get_shared_log_pool(spa))); + if (spa_is_shared_log(spa)) + fnvlist_add_boolean(config, ZPOOL_CONFIG_IS_SHARED_LOG); hostid = spa_get_hostid(spa); if (hostid != 0) diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index f486513fcaf9..11618bd8b895 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -238,6 +238,8 @@ avl_tree_t spa_namespace_avl; kmutex_t spa_namespace_lock; +avl_tree_t spa_shared_log_avl; +kmutex_t spa_shared_log_lock; kcondvar_t spa_namespace_cv; static const int spa_max_replication_override = SPA_DVAS_PER_BP; @@ -445,6 +447,42 @@ static int zfs_user_indirect_is_special = B_TRUE; */ static uint_t zfs_special_class_metadata_reserve_pct = 25; +void +spa_set_pool_type(spa_t *spa) +{ + ASSERT3P(spa->spa_root_vdev, !=, NULL); + + /* + * Must hold all of spa_config locks. + */ + ASSERT3U(spa_config_held(spa, SCL_ALL, RW_WRITER), ==, SCL_ALL); + + if (fnvlist_lookup_boolean(spa->spa_config, + ZPOOL_CONFIG_IS_SHARED_LOG)) { + spa->spa_pool_type = SPA_TYPE_SHARED_LOG; + avl_index_t where; + mutex_enter(&spa_shared_log_lock); + if (avl_find(&spa_shared_log_avl, spa, &where) == NULL) + avl_insert(&spa_shared_log_avl, spa, where); + mutex_exit(&spa_shared_log_lock); + } else { + spa->spa_pool_type = SPA_TYPE_NORMAL; + } +} + +boolean_t +spa_is_shared_log(const spa_t *spa) +{ + return (spa->spa_pool_type == SPA_TYPE_SHARED_LOG); +} + +boolean_t +spa_uses_shared_log(const spa_t *spa) +{ + return (spa->spa_uses_shared_log); +} + + /* * ========================================================================== * SPA config locking @@ -685,6 +723,15 @@ spa_log_sm_sort_by_txg(const void *va, const void *vb) return (TREE_CMP(a->sls_txg, b->sls_txg)); } +static int +spa_guid_compare(const void *a1, const void *a2) +{ + const spa_t *s1 = a1; + const spa_t *s2 = a2; + + return (TREE_CMP(spa_const_guid(s1), spa_const_guid(s2))); +} + /* * Create an uninitialized spa_t with the given name. Requires * spa_namespace_lock. The caller must ensure that the spa_t doesn't already @@ -714,6 +761,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_flushed_ms_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_activities_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_chain_map_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_zil_map_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL); @@ -777,6 +826,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node)); avl_create(&spa->spa_sm_logs_by_txg, spa_log_sm_sort_by_txg, sizeof (spa_log_sm_t), offsetof(spa_log_sm_t, sls_node)); + list_create(&spa->spa_registered_clients, sizeof (spa_t), + offsetof(spa_t, spa_client_node)); list_create(&spa->spa_log_summary, sizeof (log_summary_entry_t), offsetof(log_summary_entry_t, lse_node)); @@ -852,6 +903,12 @@ spa_remove(spa_t *spa) avl_remove(&spa_namespace_avl, spa); + if (spa_is_shared_log(spa)) { + mutex_enter(&spa_shared_log_lock); + avl_remove(&spa_shared_log_avl, spa); + mutex_exit(&spa_shared_log_lock); + } + if (spa->spa_root) spa_strfree(spa->spa_root); @@ -875,6 +932,7 @@ spa_remove(spa_t *spa) avl_destroy(&spa->spa_metaslabs_by_flushed); avl_destroy(&spa->spa_sm_logs_by_txg); + list_destroy(&spa->spa_registered_clients); list_destroy(&spa->spa_log_summary); list_destroy(&spa->spa_config_list); list_destroy(&spa->spa_leaf_list); @@ -916,6 +974,7 @@ spa_remove(spa_t *spa) mutex_destroy(&spa->spa_vdev_top_lock); mutex_destroy(&spa->spa_feat_stats_lock); mutex_destroy(&spa->spa_activities_lock); + mutex_destroy(&spa->spa_chain_map_lock); kmem_free(spa, sizeof (spa_t)); } @@ -1747,6 +1806,20 @@ spa_name(spa_t *spa) return (spa->spa_name); } +uint64_t +spa_const_guid(const spa_t *spa) +{ + /* + * If we fail to parse the config during spa_load(), we can go through + * the error path (which posts an ereport) and end up here with no root + * vdev. We stash the original pool guid in 'spa_config_guid' to handle + * this case. + */ + if (spa->spa_root_vdev == NULL) + return (spa->spa_config_guid); + return (spa->spa_root_vdev->vdev_guid); +} + uint64_t spa_guid(spa_t *spa) { @@ -1776,7 +1849,7 @@ spa_guid(spa_t *spa) } uint64_t -spa_load_guid(spa_t *spa) +spa_load_guid(const spa_t *spa) { /* * This is a GUID that exists solely as a reference for the @@ -2535,6 +2608,7 @@ void spa_init(spa_mode_t mode) { mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa_shared_log_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL); @@ -2548,6 +2622,9 @@ spa_init(spa_mode_t mode) avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t), offsetof(spa_aux_t, aux_avl)); + avl_create(&spa_shared_log_avl, spa_guid_compare, sizeof (spa_t), + offsetof(spa_t, spa_log_avl)); + spa_mode_global = mode; #ifndef _KERNEL @@ -2625,6 +2702,7 @@ spa_fini(void) cv_destroy(&spa_namespace_cv); mutex_destroy(&spa_namespace_lock); + mutex_destroy(&spa_shared_log_lock); mutex_destroy(&spa_spare_lock); mutex_destroy(&spa_l2cache_lock); } @@ -2637,7 +2715,8 @@ spa_fini(void) boolean_t spa_has_slogs(spa_t *spa) { - return (spa->spa_log_class->mc_groups != 0); + return (spa->spa_log_class->mc_groups != 0 || + spa->spa_log_class->mc_virtual != NULL); } spa_log_state_t diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c index 032aca92695e..a5f15cf0a2ee 100644 --- a/module/zfs/zap_leaf.c +++ b/module/zfs/zap_leaf.c @@ -332,7 +332,7 @@ zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) { uint64_t *thiskey = kmem_alloc(array_numints * sizeof (*thiskey), KM_SLEEP); - ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); + ASSERT3S(zn->zn_key_intlen, ==, sizeof (*thiskey)); zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, sizeof (*thiskey), array_numints, thiskey); @@ -342,7 +342,7 @@ zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, return (match); } - ASSERT(zn->zn_key_intlen == 1); + ASSERT3S(zn->zn_key_intlen, ==, 1); if (zn->zn_matchtype & MT_NORMALIZE) { char *thisname = kmem_alloc(array_numints, KM_SLEEP); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 8188a9e46865..c0cddbc2d685 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -352,19 +352,24 @@ zpl_earlier_version(const char *name, int version) } static void -zfs_log_history(zfs_cmd_t *zc) +zfs_log_history_string(const char *pool, const char *buf) { spa_t *spa; - char *buf; - - if ((buf = history_str_get(zc)) == NULL) - return; - - if (spa_open(zc->zc_name, &spa, FTAG) == 0) { + if (spa_open(pool, &spa, FTAG) == 0) { if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY) (void) spa_history_log(spa, buf); spa_close(spa, FTAG); } +} + +static void +zfs_log_history(zfs_cmd_t *zc) +{ + char *buf; + + if ((buf = history_str_get(zc)) == NULL) + return; + zfs_log_history_string(zc->zc_name, buf); history_str_free(buf); } @@ -1501,7 +1506,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) */ if (!error && (error = zfs_set_prop_nvlist(spa_name, ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) { - (void) spa_destroy(spa_name); + (void) spa_destroy(spa_name, NULL); unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */ } @@ -1520,7 +1525,23 @@ zfs_ioc_pool_destroy(zfs_cmd_t *zc) { int error; zfs_log_history(zc); - error = spa_destroy(zc->zc_name); + error = spa_destroy(zc->zc_name, NULL); + + return (error); +} + +static const zfs_ioc_key_t zfs_keys_pool_destroy_new[] = { + {ZPOOL_HIST_CMD, DATA_TYPE_STRING, 0}, +}; + +static int +zfs_ioc_pool_destroy_new(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) +{ + int error; + + zfs_log_history_string(pool, fnvlist_lookup_string(innvl, + ZPOOL_HIST_CMD)); + error = spa_destroy(pool, outnvl); return (error); } @@ -1570,7 +1591,28 @@ zfs_ioc_pool_export(zfs_cmd_t *zc) boolean_t hardforce = (boolean_t)zc->zc_guid; zfs_log_history(zc); - error = spa_export(zc->zc_name, NULL, force, hardforce); + error = spa_export(zc->zc_name, NULL, force, hardforce, NULL); + + return (error); +} + +static const zfs_ioc_key_t zfs_keys_pool_export_new[] = { + {ZPOOL_HIST_CMD, DATA_TYPE_STRING, 0}, + {ZPOOL_EXPORT_FORCE, DATA_TYPE_BOOLEAN_VALUE, 0}, + {ZPOOL_EXPORT_HARDFORCE, DATA_TYPE_BOOLEAN_VALUE, 0}, +}; + +static int +zfs_ioc_pool_export_new(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) +{ + int error; + + zfs_log_history_string(pool, + fnvlist_lookup_string(innvl, ZPOOL_HIST_CMD)); + error = spa_export(pool, NULL, + fnvlist_lookup_boolean_value(innvl, ZPOOL_EXPORT_FORCE), + fnvlist_lookup_boolean_value(innvl, ZPOOL_EXPORT_HARDFORCE), + outnvl); return (error); } @@ -7208,6 +7250,41 @@ zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) return (ret); } +static const zfs_ioc_key_t zfs_keys_pool_recycle[] = { + {ZPOOL_RECYCLE_DRYRUN, DATA_TYPE_BOOLEAN_VALUE, 0}, + {ZPOOL_RECYCLE_CLIENTS, DATA_TYPE_NVLIST, ZK_OPTIONAL}, +}; + +static int +zfs_ioc_pool_recycle(const char *pool, nvlist_t *innvl, nvlist_t *outnvl) +{ + int err; + boolean_t rc, dryrun = B_FALSE; + spa_t *spa; + nvlist_t *clients = NULL; + + if ((err = spa_open(pool, &spa, FTAG)) != 0) + return (err); + + if (innvl) { + err = nvlist_lookup_boolean_value(innvl, ZPOOL_RECYCLE_DRYRUN, + &rc); + if (err == 0) + dryrun = rc; + nvlist_lookup_nvlist(innvl, ZPOOL_RECYCLE_CLIENTS, + &clients); + } + if (clients) { + err = spa_recycle_clients(spa, clients, dryrun, outnvl); + } else { + err = spa_recycle_all(spa, dryrun, outnvl); + } + + spa_close(spa, FTAG); + + return (0); +} + static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; static void @@ -7514,6 +7591,21 @@ zfs_ioctl_init(void) POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE, zfs_keys_ddt_prune, ARRAY_SIZE(zfs_keys_ddt_prune)); + zfs_ioctl_register("zpool_recycle", ZFS_IOC_POOL_RECYCLE, + zfs_ioc_pool_recycle, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE, + zfs_keys_pool_recycle, ARRAY_SIZE(zfs_keys_pool_recycle)); + + zfs_ioctl_register("zpool_destroy_new", ZFS_IOC_POOL_DESTROY_NEW, + zfs_ioc_pool_destroy_new, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, + zfs_keys_pool_destroy_new, ARRAY_SIZE(zfs_keys_pool_destroy_new)); + + zfs_ioctl_register("zpool_export_new", ZFS_IOC_POOL_EXPORT_NEW, + zfs_ioc_pool_export_new, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, + zfs_keys_pool_export_new, ARRAY_SIZE(zfs_keys_pool_export_new)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 3983da6aa424..395bce8dc85d 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2018 Datto Inc. */ @@ -238,23 +238,16 @@ zil_kstats_global_update(kstat_t *ksp, int rw) return (0); } -/* - * Read a log block and make sure it's valid. - */ static int -zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, - blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf) +zil_read_log_block(spa_t *spa, boolean_t decrypt, zio_flag_t zio_flags, + const blkptr_t *bp, blkptr_t *nbp, char **begin, char **end, + arc_buf_t **abuf) { - zio_flag_t zio_flags = ZIO_FLAG_CANFAIL; + arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; int error; - - if (zilog->zl_header->zh_claim_txg == 0) - zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; - - if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) - zio_flags |= ZIO_FLAG_SPECULATIVE; + zio_flags |= ZIO_FLAG_CANFAIL; if (!decrypt) zio_flags |= ZIO_FLAG_RAW; @@ -262,7 +255,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); - error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, + error = arc_read(NULL, spa, bp, arc_getbuf_func, abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { @@ -346,7 +339,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); - error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, + error = arc_read(NULL, zilog->zl_io_spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { @@ -454,80 +447,48 @@ zil_kstat_values_update(zil_kstat_values_t *zs, zil_sums_t *zil_sums) } /* - * Parse the intent log, and call parse_func for each valid record within. + * Parse the intent log, and call parse_blk_func for each valid block within + * and parse_lr_func for each valid record within. */ -int -zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, +static int +zil_parse_raw_impl(spa_t *spa, const blkptr_t *bp, + zil_parse_raw_blk_func_t *parse_blk_func, + zil_parse_raw_lr_func_t *parse_lr_func, void *arg, zio_flag_t zio_flags, boolean_t decrypt) { - const zil_header_t *zh = zilog->zl_header; - boolean_t claimed = !!zh->zh_claim_txg; - uint64_t claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX; - uint64_t claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX; - uint64_t max_blk_seq = 0; - uint64_t max_lr_seq = 0; - uint64_t blk_count = 0; - uint64_t lr_count = 0; - blkptr_t blk, next_blk = {{{{0}}}}; + (void) parse_lr_func; + blkptr_t next_blk = {{{{0}}}}; int error = 0; - /* - * Old logs didn't record the maximum zh_claim_lr_seq. - */ - if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) - claim_lr_seq = UINT64_MAX; - - /* - * Starting at the block pointed to by zh_log we read the log chain. - * For each block in the chain we strongly check that block to - * ensure its validity. We stop when an invalid block is found. - * For each block pointer in the chain we call parse_blk_func(). - * For each record in each valid block we call parse_lr_func(). - * If the log has been claimed, stop if we encounter a sequence - * number greater than the highest claimed sequence number. - */ - zil_bp_tree_init(zilog); - - for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) { - uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ]; - int reclen; + for (blkptr_t blk = *bp; !BP_IS_HOLE(&blk); blk = next_blk) { char *lrp, *end; arc_buf_t *abuf = NULL; - if (blk_seq > claim_blk_seq) - break; - - error = parse_blk_func(zilog, &blk, arg, txg); - if (error != 0) - break; - ASSERT3U(max_blk_seq, <, blk_seq); - max_blk_seq = blk_seq; - blk_count++; - - if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq) - break; + /* + * We do the read before the parse function so that if the + * parse function frees the block, we still have next_blk so we + * can continue the chain. + */ + int read_error = zil_read_log_block(spa, decrypt, zio_flags, + &blk, &next_blk, &lrp, &end, &abuf); - error = zil_read_log_block(zilog, decrypt, &blk, &next_blk, - &lrp, &end, &abuf); + error = parse_blk_func(spa, &blk, arg); if (error != 0) { if (abuf) arc_buf_destroy(abuf, &abuf); - if (claimed) { - char name[ZFS_MAX_DATASET_NAME_LEN]; - - dmu_objset_name(zilog->zl_os, name); + break; + } - cmn_err(CE_WARN, "ZFS read log block error %d, " - "dataset %s, seq 0x%llx\n", error, name, - (u_longlong_t)blk_seq); - } + if (read_error != 0) { + if (abuf) + arc_buf_destroy(abuf, &abuf); + error = read_error; break; } + int reclen; for (; lrp < end; lrp += reclen) { lr_t *lr = (lr_t *)lrp; - /* * Are the remaining bytes large enough to hold an * log record? @@ -535,40 +496,163 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if ((char *)(lr + 1) > end) { cmn_err(CE_WARN, "zil_parse: lr_t overrun"); error = SET_ERROR(ECKSUM); - arc_buf_destroy(abuf, &abuf); - goto done; + break; } reclen = lr->lrc_reclen; + ASSERT3U(reclen, >=, sizeof (lr_t)); + ASSERT3U(reclen, <=, end - lrp); if (reclen < sizeof (lr_t) || reclen > end - lrp) { cmn_err(CE_WARN, "zil_parse: lr_t has an invalid reclen"); error = SET_ERROR(ECKSUM); - arc_buf_destroy(abuf, &abuf); - goto done; - } - - if (lr->lrc_seq > claim_lr_seq) { - arc_buf_destroy(abuf, &abuf); - goto done; + break; } - error = parse_lr_func(zilog, lr, arg, txg); - if (error != 0) { - arc_buf_destroy(abuf, &abuf); - goto done; - } - ASSERT3U(max_lr_seq, <, lr->lrc_seq); - max_lr_seq = lr->lrc_seq; - lr_count++; + error = parse_lr_func(spa, lr, arg); + if (error != 0) + break; } arc_buf_destroy(abuf, &abuf); } -done: + + return (error); +} + +/* + * Because we don't have access to the zilog_t, we cannot know when the chain + * is supposed to end. As a result, all IOs need to be marked as speculative. + */ +int +zil_parse_raw(spa_t *spa, const blkptr_t *bp, + zil_parse_raw_blk_func_t *parse_blk_func, + zil_parse_raw_lr_func_t *parse_lr_func, void *arg) +{ + return (zil_parse_raw_impl(spa, bp, parse_blk_func, parse_lr_func, arg, + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB, B_FALSE)); +} + +struct parse_arg { + zilog_t *zilog; + zil_parse_blk_func_t *parse_blk_func; + zil_parse_lr_func_t *parse_lr_func; + void *arg; + uint64_t txg; + uint64_t blk_seq; + uint64_t claim_blk_seq; + uint64_t claim_lr_seq; + uint64_t max_blk_seq; + uint64_t max_lr_seq; + uint64_t blk_count; + uint64_t lr_count; + int error; +}; + +static int +parse_blk_wrapper(spa_t *spa, const blkptr_t *bp, void *arg) +{ + (void) spa; + struct parse_arg *pa = arg; + pa->blk_seq = bp->blk_cksum.zc_word[ZIL_ZC_SEQ]; + + if (pa->blk_seq > pa->claim_blk_seq) + return (EINTR); + int error = pa->parse_blk_func(pa->zilog, bp, pa->arg, pa->txg); + if (error) { + pa->error = error; + return (EINTR); + } + + ASSERT3U(pa->max_blk_seq, <, pa->blk_seq); + pa->max_blk_seq = pa->blk_seq; + pa->blk_count++; + + if (pa->max_lr_seq == pa->claim_lr_seq && + pa->max_blk_seq == pa->claim_blk_seq) { + return (EINTR); + } + return (0); + +} +static int +parse_lr_wrapper(spa_t *spa, const lr_t *lr, void *arg) +{ + (void) spa; + struct parse_arg *pa = arg; + if (lr->lrc_seq > pa->claim_lr_seq) + return (EINTR); + + int error = pa->parse_lr_func(pa->zilog, lr, pa->arg, pa->txg); + if (error != 0) { + pa->error = error; + return (EINTR); + } + ASSERT3U(pa->max_lr_seq, <, lr->lrc_seq); + pa->max_lr_seq = lr->lrc_seq; + pa->lr_count++; + return (0); +} + +/* + * Parse the intent log, and call parse_func for each valid record within. + */ +int +zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt) +{ + const zil_header_t *zh = zilog->zl_header; + boolean_t claimed = !!zh->zh_claim_txg; + struct parse_arg arg2; + arg2.claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX; + arg2.claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX; + arg2.max_blk_seq = 0; + arg2.max_lr_seq = 0; + arg2.blk_count = 0; + arg2.lr_count = 0; + arg2.arg = arg; + arg2.parse_blk_func = parse_blk_func; + arg2.parse_lr_func = parse_lr_func; + arg2.txg = txg; + arg2.zilog = zilog; + arg2.error = 0; + arg2.blk_seq = 0; + + zio_flag_t zio_flags = 0; + if (!claimed) + zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; + + if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) + zio_flags |= ZIO_FLAG_SPECULATIVE; + + /* + * Old logs didn't record the maximum zh_claim_lr_seq. + */ + if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) + arg2.claim_lr_seq = UINT64_MAX; + + zil_bp_tree_init(zilog); + + int error = zil_parse_raw_impl(zilog->zl_io_spa, &zh->zh_log, + parse_blk_wrapper, parse_lr_wrapper, &arg2, zio_flags, decrypt); + + // If this happens, we got an error from zil_read_log_block_spa + if (error != 0 && error != EINTR && claimed) { + char name[ZFS_MAX_DATASET_NAME_LEN]; + + dmu_objset_name(zilog->zl_os, name); + + cmn_err(CE_WARN, "ZFS read log block error %d, " + "dataset %s, seq 0x%llx\n", error, name, + (u_longlong_t)arg2.blk_seq); + } + + if (error == EINTR) + error = arg2.error; zilog->zl_parse_error = error; - zilog->zl_parse_blk_seq = max_blk_seq; - zilog->zl_parse_lr_seq = max_lr_seq; - zilog->zl_parse_blk_count = blk_count; - zilog->zl_parse_lr_count = lr_count; + zilog->zl_parse_blk_seq = arg2.max_blk_seq; + zilog->zl_parse_lr_seq = arg2.max_lr_seq; + zilog->zl_parse_blk_count = arg2.blk_count; + zilog->zl_parse_lr_count = arg2.lr_count; zil_bp_tree_fini(zilog); @@ -582,6 +666,8 @@ zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, (void) tx; ASSERT(!BP_IS_HOLE(bp)); + // We do not support checkpoints of shared log client pools. + ASSERT(!zilog->zl_spa->spa_uses_shared_log); /* * As we call this function from the context of a rewind to a * checkpoint, each ZIL block whose txg is later than the txg @@ -594,7 +680,7 @@ zil_clear_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, if (zil_bp_tree_add(zilog, bp) != 0) return (0); - zio_free(zilog->zl_spa, first_txg, bp); + zio_free(zilog->zl_io_spa, first_txg, bp); return (0); } @@ -615,7 +701,8 @@ zil_claim_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, * If tx == NULL, just verify that the block is claimable. */ if (BP_IS_HOLE(bp) || BP_GET_LOGICAL_BIRTH(bp) < first_txg || - zil_bp_tree_add(zilog, bp) != 0) + zil_bp_tree_add(zilog, bp) != 0 || + zilog->zl_spa != zilog->zl_io_spa) return (0); return (zio_wait(zio_claim(NULL, zilog->zl_spa, @@ -725,7 +812,8 @@ zil_free_log_block(zilog_t *zilog, const blkptr_t *bp, void *tx, { (void) claim_txg; - zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); + if (!zilog->zl_spa->spa_uses_shared_log) + zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); return (0); } @@ -742,7 +830,8 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg) * If we previously claimed it, we need to free it. */ if (BP_GET_LOGICAL_BIRTH(bp) >= claim_txg && - zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp)) { + zil_bp_tree_add(zilog, bp) == 0 && !BP_IS_HOLE(bp) && + !zilog->zl_spa->spa_uses_shared_log) { zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); } @@ -983,7 +1072,7 @@ zil_create(zilog_t *zilog) int error = 0; boolean_t slog = FALSE; dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); - + spa_t *spa = zilog->zl_spa; /* * Wait for any previous destroy to complete. @@ -1007,14 +1096,23 @@ zil_create(zilog_t *zilog) txg = dmu_tx_get_txg(tx); if (!BP_IS_HOLE(&blk)) { - zio_free(zilog->zl_spa, txg, &blk); + if (spa_uses_shared_log(spa)) { + spa_zil_delete(spa, zilog->zl_os); + } else { + zio_free(spa, txg, &blk); + } BP_ZERO(&blk); } - error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, + error = zio_alloc_zil(spa, zilog->zl_os, txg, &blk, ZIL_MIN_BLKSZ, &slog); if (error == 0) zil_init_log_chain(zilog, &blk); + spa_zil_map_insert(spa, zilog->zl_os, NULL, &blk); + if (spa_uses_shared_log(spa)) { + spa_t *shared_log = spa_get_shared_log_pool(spa); + txg_wait_synced(shared_log->spa_dsl_pool, 0); + } } /* @@ -1035,9 +1133,8 @@ zil_create(zilog_t *zilog) * this until we write the first xattr log record because we * need to wait for the feature activation to sync out. */ - if (spa_feature_is_enabled(zilog->zl_spa, - SPA_FEATURE_ZILSAXATTR) && dmu_objset_type(zilog->zl_os) != - DMU_OST_ZVOL) { + if (spa_feature_is_enabled(spa, SPA_FEATURE_ZILSAXATTR) && + dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL) { mutex_enter(&ds->ds_lock); ds->ds_feature_activation[SPA_FEATURE_ZILSAXATTR] = (void *)B_TRUE; @@ -1053,7 +1150,7 @@ zil_create(zilog_t *zilog) */ zil_commit_activate_saxattr_feature(zilog); } - IMPLY(spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) && + IMPLY(spa_feature_is_enabled(spa, SPA_FEATURE_ZILSAXATTR) && dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL, dsl_dataset_feature_is_active(ds, SPA_FEATURE_ZILSAXATTR)); @@ -1105,11 +1202,14 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) if (!list_is_empty(&zilog->zl_lwb_list)) { ASSERT(zh->zh_claim_txg == 0); VERIFY(!keep_first); + spa_zil_delete(zilog->zl_spa, zilog->zl_os); while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) { if (lwb->lwb_buf != NULL) zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); - if (!BP_IS_HOLE(&lwb->lwb_blk)) + if (!BP_IS_HOLE(&lwb->lwb_blk) && + !spa_uses_shared_log(zilog->zl_spa)) { zio_free(zilog->zl_spa, txg, &lwb->lwb_blk); + } zil_free_lwb(zilog, lwb); } } else if (!keep_first) { @@ -1130,6 +1230,49 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE); } +/* + * This function's only job is to clear the zil chain for the given dataset. + * It is called when we're using a shared log pool and we import discarding + * logs. + */ +int +zil_clear(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) +{ + dmu_tx_t *tx = txarg; + zilog_t *zilog; + zil_header_t *zh; + objset_t *os; + int error; + + ASSERT3U(spa_get_log_state(dp->dp_spa), ==, SPA_LOG_CLEAR); + + error = dmu_objset_own_obj(dp, ds->ds_object, + DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os); + if (error != 0) { + /* + * EBUSY indicates that the objset is inconsistent, in which + * case it can not have a ZIL. + */ + if (error != EBUSY) { + cmn_err(CE_WARN, "can't open objset for %llu, error %u", + (unsigned long long)ds->ds_object, error); + } + + return (0); + } + + zilog = dmu_objset_zil(os); + zh = zil_header_in_syncing_context(zilog); + ASSERT3U(tx->tx_txg, ==, spa_first_txg(zilog->zl_spa)); + + BP_ZERO(&zh->zh_log); + if (os->os_encrypted) + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; + dsl_dataset_dirty(dmu_objset_ds(os), tx); + dmu_objset_disown(os, B_FALSE, FTAG); + return (0); +} + int zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) { @@ -1457,7 +1600,7 @@ zil_lwb_flush_vdevs_done(zio_t *zio) zil_commit_waiter_t *zcw; itx_t *itx; - spa_config_exit(zilog->zl_spa, SCL_STATE, lwb); + spa_config_exit(zilog->zl_io_spa, SCL_STATE, lwb); hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp; @@ -1895,6 +2038,7 @@ static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) { spa_t *spa = zilog->zl_spa; + spa_t *io_spa = zilog->zl_io_spa; zil_chain_t *zilc; boolean_t slog; zbookmark_phys_t zb; @@ -1910,7 +2054,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) lwb->lwb_nused = lwb->lwb_nfilled; ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax); - lwb->lwb_root_zio = zio_root(spa, zil_lwb_flush_vdevs_done, lwb, + lwb->lwb_root_zio = zio_root(io_spa, zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL); /* @@ -1943,7 +2087,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]); - lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, spa, 0, + lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, io_spa, 0, &lwb->lwb_blk, lwb_abd, lwb->lwb_sz, zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb); zil_lwb_add_block(lwb, &lwb->lwb_blk); @@ -1992,11 +2136,14 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) &slog); } if (error == 0) { - ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), ==, txg); + IMPLY(spa == io_spa, BP_GET_LOGICAL_BIRTH(bp) == txg); BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 : ZIO_CHECKSUM_ZILOG); + VERIFY(zfs_blkptr_verify(io_spa, bp, BLK_CONFIG_NEEDED, + BLK_VERIFY_HALT)); bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; + spa_zil_map_insert(spa, zilog->zl_os, &lwb->lwb_blk, bp); } /* @@ -2010,7 +2157,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) mutex_exit(&zilog->zl_lwb_io_lock); dmu_tx_commit(tx); - spa_config_enter(spa, SCL_STATE, lwb, RW_READER); + spa_config_enter(io_spa, SCL_STATE, lwb, RW_READER); /* * We've completed all potentially blocking operations. Update the @@ -3743,6 +3890,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) */ zil_init_log_chain(zilog, &blk); zh->zh_log = blk; + spa_zil_map_set_final(spa, zilog->zl_os, &blk); } else { /* * A destroyed ZIL chain can't contain any TX_SETSAXATTR @@ -3753,7 +3901,11 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) SPA_FEATURE_ZILSAXATTR)) dsl_dataset_deactivate_feature(ds, SPA_FEATURE_ZILSAXATTR, tx); + spa_zil_delete(spa, zilog->zl_os); } + + mutex_exit(&zilog->zl_lock); + return; } while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { @@ -3762,7 +3914,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) lwb->lwb_alloc_txg > txg || lwb->lwb_max_txg > txg) break; list_remove(&zilog->zl_lwb_list, lwb); - if (!BP_IS_HOLE(&lwb->lwb_blk)) + if (!BP_IS_HOLE(&lwb->lwb_blk) && !spa->spa_uses_shared_log) zio_free(spa, txg, &lwb->lwb_blk); zil_free_lwb(zilog, lwb); @@ -3774,6 +3926,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) */ if (list_is_empty(&zilog->zl_lwb_list)) BP_ZERO(&zh->zh_log); + spa_zil_map_set_final(spa, zilog->zl_os, &zh->zh_log); } mutex_exit(&zilog->zl_lock); @@ -3862,6 +4015,13 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) zilog->zl_header = zh_phys; zilog->zl_os = os; zilog->zl_spa = dmu_objset_spa(os); + zilog->zl_io_spa = spa_get_shared_log_pool(zilog->zl_spa); + if (zilog->zl_io_spa == NULL) { + zilog->zl_io_spa = zilog->zl_spa; + } else { + IMPLY(BP_IS_HOLE(&(zh_phys->zh_log)), + BP_GET_LOGICAL_BIRTH(&zh_phys->zh_log) == 0); + } zilog->zl_dmu_pool = dmu_objset_pool(os); zilog->zl_destroy_txg = TXG_INITIAL - 1; zilog->zl_logbias = dmu_objset_logbias(os); @@ -3949,6 +4109,8 @@ zil_open(objset_t *os, zil_get_data_t *get_data, zil_sums_t *zil_sums) ASSERT3P(zilog->zl_get_data, ==, NULL); ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL); ASSERT(list_is_empty(&zilog->zl_lwb_list)); + IMPLY(BP_IS_HOLE(&zilog->zl_header->zh_log), + BP_GET_LOGICAL_BIRTH(&zilog->zl_header->zh_log) == 0); zilog->zl_get_data = get_data; zilog->zl_sums = zil_sums; @@ -4369,6 +4531,12 @@ zil_reset(const char *osname, void *arg) return (0); } +boolean_t +zil_shared_log(zilog_t *zilog) +{ + return (zilog->zl_spa != zilog->zl_io_spa); +} + EXPORT_SYMBOL(zil_alloc); EXPORT_SYMBOL(zil_free); EXPORT_SYMBOL(zil_open); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index b26f5e80abfb..1164c726f76b 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2022 by Delphix. All rights reserved. + * Copyright (c) 2011, 2023 by Delphix. All rights reserved. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, 2023, 2024, Klara Inc. @@ -4208,15 +4208,16 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, int flags = METASLAB_ZIL; int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object) % spa->spa_alloc_count; + boolean_t must_slog = spa_uses_shared_log(spa); error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); - *slog = (error == 0); - if (error != 0) { + *slog = (error == 0 || must_slog); + if (error != 0 && !must_slog) { error = metaslab_alloc(spa, spa_embedded_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); } - if (error != 0) { + if (error != 0 && !must_slog) { error = metaslab_alloc(spa, spa_normal_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index f89a4b3e0aae..7aa1473462bc 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -947,6 +947,11 @@ tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', 'scrub_mirror_003_pos', 'scrub_mirror_004_pos'] tags = ['functional', 'scrub_mirror'] +[tests/functional/shared_log] +tests = ['shared_log_001_pos', 'shared_log_002_pos', 'shared_log_003_pos', 'shared_log_004_pos', + 'shared_log_005_pos', 'shared_log_006_neg', 'shared_log_007_pos', 'shared_log_008_pos'] +tags = ['functional', 'shared_log'] + [tests/functional/slog] tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', diff --git a/tests/runfiles/perf-regression.run b/tests/runfiles/perf-regression.run index ec081040d54d..03f492d97fd2 100644 --- a/tests/runfiles/perf-regression.run +++ b/tests/runfiles/perf-regression.run @@ -29,5 +29,4 @@ tests = ['sequential_writes', 'sequential_reads', 'sequential_reads_arc_cached', 'sequential_reads_arc_cached_clone', 'sequential_reads_dbuf_cached', 'random_reads', 'random_writes', 'random_readwrite', 'random_writes_zil', 'random_readwrite_fixed'] -post = tags = ['perf', 'regression'] diff --git a/tests/zfs-tests/cmd/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check.c index 7d9ce4fada1b..7d94aff0c576 100644 --- a/tests/zfs-tests/cmd/libzfs_input_check.c +++ b/tests/zfs-tests/cmd/libzfs_input_check.c @@ -14,7 +14,7 @@ */ /* - * Copyright (c) 2018 by Delphix. All rights reserved. + * Copyright (c) 2018, 2023 by Delphix. All rights reserved. */ #include @@ -790,6 +790,18 @@ test_set_bootenv(const char *pool) nvlist_free(required); } +static void +test_pool_recycle(const char *pool) +{ + nvlist_t *required = fnvlist_alloc(); + + fnvlist_add_boolean_value(required, "dryrun", B_FALSE); + + IOC_INPUT_TEST_WILD(ZFS_IOC_POOL_RECYCLE, pool, required, NULL, 0); + + nvlist_free(required); +} + static void zfs_ioc_input_tests(const char *pool) { @@ -884,6 +896,8 @@ zfs_ioc_input_tests(const char *pool) test_scrub(pool); + test_pool_recycle(pool); + /* * cleanup */ @@ -1039,6 +1053,7 @@ validate_ioc_values(void) CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT); CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS); CHECK(ZFS_IOC_BASE + 87 == ZFS_IOC_POOL_SCRUB); + CHECK(ZFS_IOC_BASE + 90 == ZFS_IOC_POOL_RECYCLE); CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT); CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR); CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK); diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 9cf919c3dd0f..dabe62207f56 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -21,7 +21,7 @@ # # Copyright (c) 2009, Sun Microsystems Inc. All rights reserved. -# Copyright (c) 2012, 2020, Delphix. All rights reserved. +# Copyright (c) 2012, 2024, Delphix. All rights reserved. # Copyright (c) 2017, Tim Chase. All rights reserved. # Copyright (c) 2017, Nexenta Systems Inc. All rights reserved. # Copyright (c) 2017, Lawrence Livermore National Security LLC. @@ -1586,10 +1586,10 @@ function create_pool #pool devs_list if is_global_zone ; then [[ -d /$pool ]] && rm -rf /$pool - log_must zpool create -f $pool $@ + zpool create -f $pool $@ fi - return 0 + return $? } # Return 0 if destroy successfully or the pool exists; $? otherwise diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 206ee8ac1542..1118887ba067 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -29,6 +29,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS = \ perf/regression/sequential_reads.ksh \ perf/regression/sequential_writes.ksh \ perf/regression/setup.ksh \ + perf/regression/cleanup.ksh \ \ perf/scripts/prefetch_io.sh @@ -365,6 +366,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/rsend/rsend.kshlib \ functional/scrub_mirror/default.cfg \ functional/scrub_mirror/scrub_mirror_common.kshlib \ + functional/shared_log/shared_log.cfg \ + functional/shared_log/shared_log.kshlib \ functional/slog/slog.cfg \ functional/slog/slog.kshlib \ functional/snapshot/snapshot.cfg \ @@ -1986,6 +1989,16 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/scrub_mirror/scrub_mirror_003_pos.ksh \ functional/scrub_mirror/scrub_mirror_004_pos.ksh \ functional/scrub_mirror/setup.ksh \ + functional/shared_log/cleanup.ksh \ + functional/shared_log/setup.ksh \ + functional/shared_log/shared_log_001_pos.ksh \ + functional/shared_log/shared_log_002_pos.ksh \ + functional/shared_log/shared_log_003_pos.ksh \ + functional/shared_log/shared_log_004_pos.ksh \ + functional/shared_log/shared_log_005_pos.ksh \ + functional/shared_log/shared_log_006_neg.ksh \ + functional/shared_log/shared_log_007_pos.ksh \ + functional/shared_log/shared_log_008_pos.ksh \ functional/slog/cleanup.ksh \ functional/slog/setup.ksh \ functional/slog/slog_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index e1fe865b1d3b..475c4e4cf277 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -112,5 +112,6 @@ if is_linux || is_freebsd; then "feature@fast_dedup" "feature@longname" "feature@large_microzap" + "feature@shared_log" ) fi diff --git a/tests/zfs-tests/tests/functional/shared_log/cleanup.ksh b/tests/zfs-tests/tests/functional/shared_log/cleanup.ksh new file mode 100755 index 000000000000..ea1df9f99dc7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/cleanup.ksh @@ -0,0 +1,49 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/shared_log/shared_log.cfg + +verify_runnable "global" + +zpool import $LOGPOOL +zpool import ${LOGPOOL}2 +zpool import $TESTPOOL +zpool import $TESTPOOL2 +if datasetexists $TESTPOOL ; then + log_must zpool destroy -f $TESTPOOL +fi +if datasetexists $TESTPOOL2 ; then + log_must zpool destroy -f $TESTPOOL2 +fi +if datasetexists $LOGPOOL ; then + log_must zpool destroy -f $LOGPOOL +fi +if datasetexists ${LOGPOOL}2 ; then + log_must zpool destroy -f ${LOGPOOL}2 +fi + +log_pass diff --git a/tests/zfs-tests/tests/functional/shared_log/setup.ksh b/tests/zfs-tests/tests/functional/shared_log/setup.ksh new file mode 100755 index 000000000000..5db5d9b85f1c --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/shared_log/shared_log.cfg + +verify_runnable "global" + +log_pass diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log.cfg b/tests/zfs-tests/tests/functional/shared_log/shared_log.cfg new file mode 100644 index 000000000000..029063411093 --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log.cfg @@ -0,0 +1,30 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +export LOGPOOL="${TESTPOOL}_log" +read -r DISK0 DISK1 DISK2 _ <<<"$DISKS" +export DISK0 DISK1 DISK2 diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log.kshlib b/tests/zfs-tests/tests/functional/shared_log/shared_log.kshlib new file mode 100644 index 000000000000..45973420b0f8 --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log.kshlib @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/shared_log/shared_log.cfg + +function cleanup +{ + zpool import $LOGPOOL + zpool import ${LOGPOOL}2 + zpool import -m $TESTPOOL + zpool import -m $TESTPOOL2 + poolexists $TESTPOOL && destroy_pool $TESTPOOL + poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2 + poolexists $LOGPOOL && destroy_pool $LOGPOOL + poolexists ${LOGPOOL}2 && destroy_pool ${LOGPOOL}2 +} + +function verify_shared_log +{ + local target="$1" + local log_pool_name="$2" + + zpool list -v $target | grep -A 1 "shared log" | tail -n 1 | grep "^ *$log_pool_name" >/dev/null + return $? +} diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_001_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_001_pos.ksh new file mode 100755 index 000000000000..7c54dba5adda --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_001_pos.ksh @@ -0,0 +1,50 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Creating a pool with a shared log succeeds. +# +# STRATEGY: +# 1. Create shared log pool +# 2. Create client pool with shared log +# 3. Display pool status +# + +verify_runnable "global" + +log_assert "Creating a pool with a shared log succeeds." +log_onexit cleanup + +log_must zpool create -L -f $LOGPOOL "$DISK0" +log_must zpool create -l $LOGPOOL -f $TESTPOOL "$DISK1" +log_must verify_shared_log $TESTPOOL $LOGPOOL +verify_pool $LOGPOOL +verify_pool $TESTPOOL + +log_pass "Creating a pool with a shared log succeeds." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_002_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_002_pos.ksh new file mode 100755 index 000000000000..35f880b0178b --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_002_pos.ksh @@ -0,0 +1,61 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Using a pool with a shared log device succeeds at basic operations. +# +# STRATEGY: +# 1. Create shared log pool & client +# 2. Create sync=always fs on client +# 3. Write data to fs +# 4. Export & import client +# 5. Write data to fs again +# + +verify_runnable "global" + +log_assert "Using a pool with a shared log device succeeds at basic operations." +log_onexit cleanup + +typeset FS="$TESTPOOL/fs" + +log_must zpool create -f -L $LOGPOOL "$DISK0" +log_must zpool create -f -l $LOGPOOL $TESTPOOL "$DISK1" +log_must verify_shared_log $TESTPOOL $LOGPOOL +log_must zfs create -o sync=always -o recordsize=8k $FS + +mntpnt=$(get_prop mountpoint $FS) +log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128 +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128 +verify_pool $LOGPOOL +verify_pool $TESTPOOL + +log_pass "Using a pool with a shared log device succeeds at basic operations." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_003_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_003_pos.ksh new file mode 100755 index 000000000000..6643ccf2654a --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_003_pos.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Shared log pool can be exported and imported. +# +# STRATEGY: +# 1. Create shared log pool & client +# 2. Write some data to the client pool +# 3. Export client +# 4. Export & import provider +# 5. Import client +# 6. Write data to client +# + +verify_runnable "global" + +log_assert "Shared log pool can be exported and imported." +log_onexit cleanup + +typeset FS="$TESTPOOL/fs" + +log_must zpool create -f -L $LOGPOOL "$DISK0" +log_must zpool create -f -l $LOGPOOL $TESTPOOL "$DISK1" +log_must verify_shared_log $TESTPOOL $LOGPOOL +log_must zfs create -o sync=always -o recordsize=8k $FS +mntpnt=$(get_prop mountpoint $FS) + +log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128 +log_must zpool export $TESTPOOL +log_must zpool export $LOGPOOL +log_must zpool import $LOGPOOL +log_must zpool import $TESTPOOL +log_must dd if=/dev/urandom of="$mntpnt/f2" bs=8k count=128 +verify_pool $LOGPOOL +verify_pool $TESTPOOL + +log_pass "Shared log pool can be exported and imported." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_004_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_004_pos.ksh new file mode 100755 index 000000000000..3d9d35f2ef4a --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_004_pos.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Client pools can be reimported without provider, with flag. +# +# STRATEGY: +# 1. Create shared log pools & client +# 2. Write data to client +# 3. Export client and provider +# 4. Import client with -m +# 5. Export client +# 6. Import client with -m and new provider +# + +verify_runnable "global" + +log_assert "Client pools can be reimported without provider, with flag." +log_onexit cleanup + +typeset FS="$TESTPOOL/fs" + +log_must zpool create -f -L $LOGPOOL "$DISK0" +log_must zpool create -f -L ${LOGPOOL}2 "$DISK1" +log_must zpool create -f -l $LOGPOOL $TESTPOOL "$DISK2" +log_must verify_shared_log $TESTPOOL $LOGPOOL +log_must zfs create -o sync=always -o recordsize=8k $FS +mntpnt=$(get_prop mountpoint $FS) + +log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128 +log_must zpool export $TESTPOOL +log_must zpool export $LOGPOOL +log_must zpool import -m $TESTPOOL +log_must dd if=/dev/urandom of="$mntpnt/f2" bs=8k count=128 +log_must zpool export $TESTPOOL +log_must zpool import $LOGPOOL +log_must zpool import -m -L ${LOGPOOL}2 $TESTPOOL +log_must verify_shared_log $TESTPOOL ${LOGPOOL}2 +log_must dd if=/dev/urandom of="$mntpnt/f3" bs=8k count=128 +verify_pool $LOGPOOL +verify_pool $LOGPOOL2 +verify_pool $TESTPOOL + +log_pass "Client pools can be reimported without provider, with flag." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_005_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_005_pos.ksh new file mode 100755 index 000000000000..bcba87033602 --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_005_pos.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Test scrub with client and provider pools. +# +# STRATEGY: +# 1. Create shared log pool & client +# 2. Write some data to the client pool +# 3. Scrub client and provider pools +# + +verify_runnable "global" + +log_assert "Test scrub with client and provider pools." +log_onexit cleanup + +typeset FS="$TESTPOOL/fs" + +log_must zpool create -f -L $LOGPOOL "$DISK0" +log_must zpool create -f -l $LOGPOOL $TESTPOOL "$DISK1" +log_must verify_shared_log $TESTPOOL $LOGPOOL +log_must zfs create -o sync=always -o recordsize=8k $FS +mntpnt=$(get_prop mountpoint $FS) + +log_must dd if=/dev/urandom of="$mntpnt/f1" bs=8k count=128 +log_must zpool scrub $LOGPOOL +log_must zpool scrub $TESTPOOL +log_must zpool wait -t scrub $LOGPOOL +log_must zpool wait -t scrub $TESTPOOL + +log_pass "Test scrub with client and provider pools." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_006_neg.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_006_neg.ksh new file mode 100755 index 000000000000..02969126fad6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_006_neg.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Negative shared log testing. +# +# STRATEGY: +# 1. Attempt to create a client pool with a missing shared log pool +# 2. Attempt to create a client pool with mis-named shared log pool +# 3. Attempt to create a client pool with a shared log and a log device +# 4. Attempt to use a client pool after the shared log has been destroyed +# 5. Attempt to create a client pool when the feature is disabled +# 6. Attempt to export/destroy an active shared log +# 7. Attempt to reguid a client/log pool +# 8. Attempt to checkpoint a client/log pool +# + +verify_runnable "global" + +log_assert "Negative shared log testing." +log_onexit cleanup + +log_mustnot zpool create -f -l $LOGPOOL $TESTPOOL "$DISK0" + +log_must zpool create -f $TESTPOOL2 "$DISK2" +log_mustnot zpool create -l $TESTPOOL2 -f $TESTPOOL "$DISK0" +log_must zpool destroy $TESTPOOL2 + +log_must zpool create -f -L $LOGPOOL "$DISK0" +log_mustnot zpool create -f -l "${LOGPOOL}2" $TESTPOOL "$DISK1" +log_mustnot zpool create -f -l $LOGPOOL $TESTPOOL "$DISK1" log "$DISK2" + +log_must zpool create -f -L ${LOGPOOL}2 "$DISK1" +log_must zpool destroy ${LOGPOOL}2 + +typeset FS="$LOGPOOL/fs" +log_mustnot zfs create -o sync=always -o recordsize=8k $FS + +log_mustnot zpool create -f -l $LOGPOOL -o feature@shared_log=disabled $TESTPOOL "$DISK1" +log_mustnot zpool create -f -L -o feature@shared_log=disabled ${LOGPOOL}2 "$DISK1" + +log_must zpool create -f -l $LOGPOOL $TESTPOOL "$DISK1" +log_mustnot zpool export $LOGPOOL +log_mustnot zpool destroy $LOGPOOL + +log_mustnot zpool reguid $LOGPOOL +log_mustnot zpool reguid $TESTPOOL + +log_mustnot zpool checkpoint $TESTPOOL +log_mustnot zpool checkpoint $LOGPOOL + +log_pass "Negative shared log testing." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_007_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_007_pos.ksh new file mode 100755 index 000000000000..cc5ad748a5fc --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_007_pos.ksh @@ -0,0 +1,51 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Test fault behavior of shared log pool +# +# STRATEGY: +# 1. Create shared log pool & client +# 2. Fault the provider pool +# 3. Verify the client pool also faults +# + +verify_runnable "global" + +log_assert "Test fault behavior of shared log pools." +log_onexit cleanup + +typeset FS="$TESTPOOL/fs" + +log_must zpool create -f -L $LOGPOOL "$DISK0" +log_must zpool create -f -l $LOGPOOL $TESTPOOL "$DISK1" +log_must zinject -d "$DISK0" -A degrade $LOGPOOL +log_must eval "zpool status -e $TESTPOOL | grep DEGRADED" + +log_pass "Test fault behavior of shared log pools." diff --git a/tests/zfs-tests/tests/functional/shared_log/shared_log_008_pos.ksh b/tests/zfs-tests/tests/functional/shared_log/shared_log_008_pos.ksh new file mode 100755 index 000000000000..fe2445e4dc9a --- /dev/null +++ b/tests/zfs-tests/tests/functional/shared_log/shared_log_008_pos.ksh @@ -0,0 +1,81 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/shared_log/shared_log.kshlib + +# +# DESCRIPTION: +# Test zpool recycle +# +# STRATEGY: +# 1. Create shared log pool & clients +# 2. Verify zpool recycle -a doesn't recycle anything +# 3. Export clients +# 4. Verify zpool recycle -a recycles everything +# 5. Re-add clients and export both +# 6. Verify zpool recycle of a single client works as expected +# 7. Re-add client and export it +# 8. Verify zpool recycle of multiple clients works as expected +# + +verify_runnable "global" + +log_assert "Test zpool recycle." +log_onexit cleanup + +typeset FS="$TESTPOOL/fs" + +log_must zpool create -f -L $LOGPOOL "$DISK0" +log_must zpool create -f -l $LOGPOOL $TESTPOOL "$DISK1" +log_must zpool create -f -l $LOGPOOL ${TESTPOOL}2 "$DISK2" +log_must zfs create -o sync=always ${TESTPOOL}/fs +log_must zfs create -o sync=always ${TESTPOOL}2/fs +log_must dd if=/dev/urandom of=/${TESTPOOL}/fs/f1 bs=128k count=128 +log_must dd if=/dev/urandom of=/${TESTPOOL}2/fs/f1 bs=128k count=128 +log_must eval "zpool recycle -a -v $LOGPOOL | grep '\\[\\]' >/dev/null" + +log_must zpool export $TESTPOOL +log_must zpool export ${TESTPOOL}2 +log_must zpool recycle -a -v $LOGPOOL +log_mustnot zpool import $TESTPOOL +log_mustnot zpool import ${TESTPOOL}2 + +log_must zpool import -m -L $LOGPOOL $TESTPOOL +log_must zpool import -m -L $LOGPOOL ${TESTPOOL}2 +log_must dd if=/dev/urandom of=/${TESTPOOL}/fs/f1 bs=128k count=128 +log_must zpool export $TESTPOOL +log_must zpool export ${TESTPOOL}2 +log_must zpool recycle $LOGPOOL $TESTPOOL +log_mustnot zpool import $TESTPOOL + +log_must zpool import -m -L $LOGPOOL $TESTPOOL +log_must dd if=/dev/urandom of=/${TESTPOOL}/fs/f1 bs=128k count=128 +log_must zpool export $TESTPOOL +log_must zpool recycle $LOGPOOL $TESTPOOL ${TESTPOOL2} +log_mustnot zpool import $TESTPOOL +log_mustnot zpool import ${TESTPOOL}2 + +log_pass "Test zpool recycle." diff --git a/tests/zfs-tests/tests/perf/perf.shlib b/tests/zfs-tests/tests/perf/perf.shlib index 5555e910d722..2402a93d52b8 100644 --- a/tests/zfs-tests/tests/perf/perf.shlib +++ b/tests/zfs-tests/tests/perf/perf.shlib @@ -10,7 +10,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2023 by Delphix. All rights reserved. # Copyright (c) 2016, Intel Corporation. # @@ -21,6 +21,7 @@ export PERF_RUNTIME=${PERF_RUNTIME:-'180'} export PERF_RANDSEED=${PERF_RANDSEED:-'1234'} export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'} export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'} +export PERF_LOG_TYPES=${PERF_LOG_TYPES:-"none slog shared"} # Default to JSON for fio output export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'} @@ -44,12 +45,13 @@ function get_suffix typeset threads=$1 typeset sync=$2 typeset iosize=$3 + typeset log_type=$4 typeset sync_str=$(get_sync_str $sync) typeset filesystems=$(get_nfilesystems) typeset suffix="$sync_str.$iosize-ios" - suffix="$suffix.$threads-threads.$filesystems-filesystems" + suffix="$suffix.$threads-threads.$filesystems-filesystems.$log_type-log" echo $suffix } @@ -63,9 +65,10 @@ function do_fio_run_impl typeset threads_per_fs=$5 typeset sync=$6 typeset iosize=$7 + typeset log_type=$8 typeset sync_str=$(get_sync_str $sync) - log_note "Running with $threads $sync_str threads, $iosize ios" + log_note "Running with $threads $sync_str threads, $iosize ios with log $log_type" if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then log_must test $do_recreate @@ -73,7 +76,7 @@ function do_fio_run_impl fi if $do_recreate; then - recreate_perf_pool + recreate_perf_pool $log_type # # A value of zero for "threads_per_fs" is "special", and @@ -127,7 +130,7 @@ function do_fio_run_impl fi # This will be part of the output filename. - typeset suffix=$(get_suffix $threads $sync $iosize) + typeset suffix=$(get_suffix $threads $sync $iosize $log_type) # Start the data collection do_collect_scripts $suffix @@ -168,20 +171,27 @@ function do_fio_run typeset script=$1 typeset do_recreate=$2 typeset clear_cache=$3 + typeset log_types=$4 typeset threads threads_per_fs sync iosize for threads in $PERF_NTHREADS; do for threads_per_fs in $PERF_NTHREADS_PER_FS; do for sync in $PERF_SYNC_TYPES; do for iosize in $PERF_IOSIZES; do - do_fio_run_impl \ - $script \ - $do_recreate \ - $clear_cache \ - $threads \ - $threads_per_fs \ - $sync \ - $iosize + for logtype in $log_types; do + if [[ $sync == "0" && $logtype != "none" ]]; then + continue + fi + do_fio_run_impl \ + $script \ + $do_recreate \ + $clear_cache \ + $threads \ + $threads_per_fs \ + $sync \ + $iosize \ + $logtype + done done done done @@ -286,8 +296,10 @@ function clear_zinject_delays # function recreate_perf_pool { + typeset logtype=$1 [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set." + log_note "recreating $PERFPOOL with $logtype" # # In case there's been some "leaked" zinject delays, or if the # performance test injected some delays itself, we clear all @@ -297,6 +309,23 @@ function recreate_perf_pool # clear_zinject_delays + if [[ $logtype == "none" ]]; then + destroy_pool $PERFPOOL + destroy_pool "${PERFPOOL}_log" + create_pool $PERFPOOL $DISKS + else + typeset disks="${DISKS% *}" + typeset log_disk="${DISKS##* }" + if [[ $logtype == "slog" ]]; then + destroy_pool $PERFPOOL + destroy_pool "${PERFPOOL}_log" + create_pool $PERFPOOL $disks log $log_disk + else + destroy_pool $PERFPOOL + create_pool ${PERFPOOL}_log -L $log_disk + create_pool $PERFPOOL -l ${PERFPOOL}_log $disks + fi + fi # # This function handles the case where the pool already exists, # and will destroy the previous pool and recreate a new pool. diff --git a/tests/zfs-tests/tests/perf/regression/cleanup.ksh b/tests/zfs-tests/tests/perf/regression/cleanup.ksh new file mode 100755 index 000000000000..b179f32c5bd2 --- /dev/null +++ b/tests/zfs-tests/tests/perf/regression/cleanup.ksh @@ -0,0 +1,38 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 20236 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +if datasetexists $PERFPOOL ; then + log_must destroy_pool $PERFPOOL +fi +if datasetexists ${PERFPOOL}_log ; then + log_must destroy_pool ${PERFPOOL}_log +fi + +log_pass diff --git a/tests/zfs-tests/tests/perf/regression/random_reads.ksh b/tests/zfs-tests/tests/perf/regression/random_reads.ksh index 0c73df67935c..5a064c5e762c 100755 --- a/tests/zfs-tests/tests/perf/regression/random_reads.ksh +++ b/tests/zfs-tests/tests/perf/regression/random_reads.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2023 by Delphix. All rights reserved. # # @@ -45,55 +45,56 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during random read load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems +for logtype in $PERF_LOG_TYPES; do + recreate_perf_pool $logtype + populate_perf_filesystems -# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. -export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. + export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + + # Variables specific to this test for use by fio. + export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'} + export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} + export PERF_IOSIZES=${PERF_IOSIZES:-'8k'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + + # Layout the files to be used by the read tests. Create as many files as the + # largest number of threads. An fio run with fewer threads will use a subset + # of the available files. + export NUMJOBS=$(get_max $PERF_NTHREADS) + export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) + export DIRECTORY=$(get_directory) + log_must fio $FIO_SCRIPTS/mkfiles.fio -# Variables specific to this test for use by fio. -export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'} -export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} -export PERF_IOSIZES=${PERF_IOSIZES:-'8k'} -export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + # Set up the scripts and output files that will log performance data. + lun_list=$(pool_to_lun_list $PERFPOOL) + log_note "Collecting backend IO stats with lun list $lun_list" + if is_linux; then + typeset perf_record_cmd="perf record -F 99 -a -g -q \ + -o /dev/stdout -- sleep ${PERF_RUNTIME}" -# Layout the files to be used by the read tests. Create as many files as the -# largest number of threads. An fio run with fewer threads will use a subset -# of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -export DIRECTORY=$(get_directory) -log_must fio $FIO_SCRIPTS/mkfiles.fio + export collect_scripts=( + "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" + "vmstat -t 1" "vmstat" + "mpstat -P ALL 1" "mpstat" + "iostat -tdxyz 1" "iostat" + "$perf_record_cmd" "perf" + ) + else + export collect_scripts=( + "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "vmstat -T d 1" "vmstat" + "mpstat -T d 1" "mpstat" + "iostat -T d -xcnz 1" "iostat" + ) + fi -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -if is_linux; then - typeset perf_record_cmd="perf record -F 99 -a -g -q \ - -o /dev/stdout -- sleep ${PERF_RUNTIME}" - - export collect_scripts=( - "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" - "vmstat -t 1" "vmstat" - "mpstat -P ALL 1" "mpstat" - "iostat -tdxyz 1" "iostat" - "$perf_record_cmd" "perf" - ) -else - export collect_scripts=( - "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" - "vmstat -T d 1" "vmstat" - "mpstat -T d 1" "mpstat" - "iostat -T d -xcnz 1" "iostat" - ) -fi - -log_note "Random reads with settings: $(print_perf_settings)" -do_fio_run random_reads.fio false true + log_note "Random reads with settings: $(print_perf_settings)" + do_fio_run random_reads.fio false true $logtype +done log_pass "Measure IO stats during random read load" diff --git a/tests/zfs-tests/tests/perf/regression/random_readwrite.ksh b/tests/zfs-tests/tests/perf/regression/random_readwrite.ksh index e0626c0b42f3..539f2ba96194 100755 --- a/tests/zfs-tests/tests/perf/regression/random_readwrite.ksh +++ b/tests/zfs-tests/tests/perf/regression/random_readwrite.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2022 by Delphix. All rights reserved. # # @@ -45,55 +45,56 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during random read load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems +for logtype in $PERF_LOG_TYPES; do + recreate_perf_pool $logtype + populate_perf_filesystems -# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. -export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. + export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + + # Variables specific to this test for use by fio. + export PERF_NTHREADS=${PERF_NTHREADS:-'32 64'} + export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} + export PERF_IOSIZES='bssplit' # bssplit used instead of fixed sizes + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + + # Layout the files to be used by the readwrite tests. Create as many files + # as the largest number of threads. An fio run with fewer threads will use + # a subset of the available files. + export NUMJOBS=$(get_max $PERF_NTHREADS) + export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) + export DIRECTORY=$(get_directory) + log_must fio $FIO_SCRIPTS/mkfiles.fio + + # Set up the scripts and output files that will log performance data. + lun_list=$(pool_to_lun_list $PERFPOOL) + log_note "Collecting backend IO stats with lun list $lun_list" + if is_linux; then + typeset perf_record_cmd="perf record -F 99 -a -g -q \ + -o /dev/stdout -- sleep ${PERF_RUNTIME}" + + export collect_scripts=( + "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" + "vmstat -t 1" "vmstat" + "mpstat -P ALL 1" "mpstat" + "iostat -tdxyz 1" "iostat" + "$perf_record_cmd" "perf" + ) + else + export collect_scripts=( + "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "vmstat -T d 1" "vmstat" + "mpstat -T d 1" "mpstat" + "iostat -T d -xcnz 1" "iostat" + ) + fi -# Variables specific to this test for use by fio. -export PERF_NTHREADS=${PERF_NTHREADS:-'32 64'} -export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} -export PERF_IOSIZES='bssplit' # bssplit used instead of fixed sizes -export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - -# Layout the files to be used by the readwrite tests. Create as many files -# as the largest number of threads. An fio run with fewer threads will use -# a subset of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -export DIRECTORY=$(get_directory) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -if is_linux; then - typeset perf_record_cmd="perf record -F 99 -a -g -q \ - -o /dev/stdout -- sleep ${PERF_RUNTIME}" - - export collect_scripts=( - "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" - "vmstat -t 1" "vmstat" - "mpstat -P ALL 1" "mpstat" - "iostat -tdxyz 1" "iostat" - "$perf_record_cmd" "perf" - ) -else - export collect_scripts=( - "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" - "vmstat -T d 1" "vmstat" - "mpstat -T d 1" "mpstat" - "iostat -T d -xcnz 1" "iostat" - ) -fi - -log_note "Random reads and writes with settings: $(print_perf_settings)" -do_fio_run random_readwrite.fio false true + log_note "Random reads and writes with settings: $(print_perf_settings)" + do_fio_run random_readwrite.fio false true $logtype +done log_pass "Measure IO stats during random read and write load" diff --git a/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh b/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh index afaffb9e648f..80010f2b6b3e 100755 --- a/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh +++ b/tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh @@ -1,4 +1,4 @@ -#!/bin/ksh + tests/zfs-tests/tests/perf/regression/random_readwrite_fixed.ksh#!/bin/ksh # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. @@ -11,7 +11,7 @@ # # -# Copyright (c) 2017, 2021 by Delphix. All rights reserved. +# Copyright (c) 2017, 2023 by Delphix. All rights reserved. # # @@ -35,57 +35,58 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during random read write load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems +for logtype in $PERF_LOG_TYPES; do + recreate_perf_pool $logtype + populate_perf_filesystems + + # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. + export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + + # Variables specific to this test for use by fio. + export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} + export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} + export PERF_IOSIZES=${PERF_IOSIZES:-'8k'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'} + + # Layout the files to be used by the readwrite tests. Create as many files + # as the largest number of threads. An fio run with fewer threads will use + # a subset of the available files. + export NUMJOBS=$(get_max $PERF_NTHREADS) + export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) + export DIRECTORY=$(get_directory) + log_must fio $FIO_SCRIPTS/mkfiles.fio + + # Set up the scripts and output files that will log performance data. + lun_list=$(pool_to_lun_list $PERFPOOL) + log_note "Collecting backend IO stats with lun list $lun_list" + if is_linux; then + typeset perf_record_cmd="perf record -F 99 -a -g -q \ + -o /dev/stdout -- sleep ${PERF_RUNTIME}" + + export collect_scripts=( + "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" + "vmstat -t 1" "vmstat" + "mpstat -P ALL 1" "mpstat" + "iostat -tdxyz 1" "iostat" + "$perf_record_cmd" "perf" + ) + else + export collect_scripts=( + "kstat zfs:0 1" "kstat" + "vmstat -T d 1" "vmstat" + "mpstat -T d 1" "mpstat" + "iostat -T d -xcnz 1" "iostat" + "dtrace -Cs $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "dtrace -s $PERF_SCRIPTS/profile.d" "profile" + ) + fi -# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. -export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) - -# Variables specific to this test for use by fio. -export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} -export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} -export PERF_IOSIZES=${PERF_IOSIZES:-'8k'} -export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'} - -# Layout the files to be used by the readwrite tests. Create as many files -# as the largest number of threads. An fio run with fewer threads will use -# a subset of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -export DIRECTORY=$(get_directory) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -if is_linux; then - typeset perf_record_cmd="perf record -F 99 -a -g -q \ - -o /dev/stdout -- sleep ${PERF_RUNTIME}" - - export collect_scripts=( - "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" - "vmstat -t 1" "vmstat" - "mpstat -P ALL 1" "mpstat" - "iostat -tdxyz 1" "iostat" - "$perf_record_cmd" "perf" - ) -else - export collect_scripts=( - "kstat zfs:0 1" "kstat" - "vmstat -T d 1" "vmstat" - "mpstat -T d 1" "mpstat" - "iostat -T d -xcnz 1" "iostat" - "dtrace -Cs $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" - ) -fi - -log_note "Random reads and writes with settings: $(print_perf_settings)" -do_fio_run random_readwrite_fixed.fio false true + log_note "Random reads and writes with settings: $(print_perf_settings)" + do_fio_run random_readwrite_fixed.fio false true $logtype +done log_pass "Measure IO stats during random read and write load" diff --git a/tests/zfs-tests/tests/perf/regression/random_writes.ksh b/tests/zfs-tests/tests/perf/regression/random_writes.ksh index 06061a9b7462..977a67a091fa 100755 --- a/tests/zfs-tests/tests/perf/regression/random_writes.ksh +++ b/tests/zfs-tests/tests/perf/regression/random_writes.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2023 by Delphix. All rights reserved. # # @@ -44,14 +44,12 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during random read load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems +recreate_perf_pool none # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) @@ -86,5 +84,5 @@ else fi log_note "Random writes with settings: $(print_perf_settings)" -do_fio_run random_writes.fio true false +do_fio_run random_writes.fio true false "$PERF_LOG_TYPES" log_pass "Measure IO stats during random write load" diff --git a/tests/zfs-tests/tests/perf/regression/random_writes_zil.ksh b/tests/zfs-tests/tests/perf/regression/random_writes_zil.ksh index 7e5a741137d6..b88225bb7f14 100755 --- a/tests/zfs-tests/tests/perf/regression/random_writes_zil.ksh +++ b/tests/zfs-tests/tests/perf/regression/random_writes_zil.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2023 by Delphix. All rights reserved. # . $STF_SUITE/include/libtest.shlib @@ -26,21 +26,12 @@ function cleanup pkill fio pkill iostat - # - # We're using many filesystems depending on the number of - # threads for each test, and there's no good way to get a list - # of all the filesystems that should be destroyed on cleanup - # (i.e. the list of filesystems used for the last test ran). - # Thus, we simply recreate the pool as a way to destroy all - # filesystems and leave a fresh pool behind. - # - recreate_perf_pool } trap "log_fail \"Measure IO stats during random write load\"" SIGTERM log_onexit cleanup -recreate_perf_pool +recreate_perf_pool none # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) @@ -82,5 +73,5 @@ else fi log_note \ "ZIL specific random write workload with settings: $(print_perf_settings)" -do_fio_run random_writes.fio true false +do_fio_run random_writes.fio true false "$PERF_LOG_TYPES" log_pass "Measure IO stats during ZIL specific random write workload" diff --git a/tests/zfs-tests/tests/perf/regression/sequential_reads.ksh b/tests/zfs-tests/tests/perf/regression/sequential_reads.ksh index cc6d17245239..c259be590c12 100755 --- a/tests/zfs-tests/tests/perf/regression/sequential_reads.ksh +++ b/tests/zfs-tests/tests/perf/regression/sequential_reads.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2023 by Delphix. All rights reserved. # # @@ -43,57 +43,59 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during random read load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems +for logtype in $PERF_LOG_TYPES; do + recreate_perf_pool $logtype + populate_perf_filesystems + + # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. + export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + + # Variables specific to this test for use by fio. + export PERF_NTHREADS=${PERF_NTHREADS:-'8 16'} + export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} + export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + + # Layout the files to be used by the read tests. Create as many files as the + # largest number of threads. An fio run with fewer threads will use a subset + # of the available files. + export NUMJOBS=$(get_max $PERF_NTHREADS) + export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) + export DIRECTORY=$(get_directory) + log_must fio $FIO_SCRIPTS/mkfiles.fio + -# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. -export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + # Set up the scripts and output files that will log performance data. + lun_list=$(pool_to_lun_list $PERFPOOL) + log_note "Collecting backend IO stats with lun list $lun_list" + if is_linux; then + typeset perf_record_cmd="perf record -F 99 -a -g -q \ + -o /dev/stdout -- sleep ${PERF_RUNTIME}" -# Variables specific to this test for use by fio. -export PERF_NTHREADS=${PERF_NTHREADS:-'8 16'} -export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} -export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'} -export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - -# Layout the files to be used by the read tests. Create as many files as the -# largest number of threads. An fio run with fewer threads will use a subset -# of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -export DIRECTORY=$(get_directory) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -if is_linux; then - typeset perf_record_cmd="perf record -F 99 -a -g -q \ - -o /dev/stdout -- sleep ${PERF_RUNTIME}" - - export collect_scripts=( - "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" - "$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch" - "vmstat -t 1" "vmstat" - "mpstat -P ALL 1" "mpstat" - "iostat -tdxyz 1" "iostat" - "$perf_record_cmd" "perf" - ) -else - export collect_scripts=( - "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" - "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" - "vmstat -T d 1" "vmstat" - "mpstat -T d 1" "mpstat" - "iostat -T d -xcnz 1" "iostat" - ) -fi - -log_note "Sequential reads with settings: $(print_perf_settings)" -do_fio_run sequential_reads.fio false true + export collect_scripts=( + "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" + "$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch" + "vmstat -t 1" "vmstat" + "mpstat -P ALL 1" "mpstat" + "iostat -tdxyz 1" "iostat" + "$perf_record_cmd" "perf" + ) + else + export collect_scripts=( + "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" + "vmstat -T d 1" "vmstat" + "mpstat -T d 1" "mpstat" + "iostat -T d -xcnz 1" "iostat" + ) + fi + + log_note "Sequential reads with settings: $(print_perf_settings)" + do_fio_run sequential_reads.fio false true $logtype +done log_pass "Measure IO stats during sequential read load" diff --git a/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh b/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh index ee14f2ce7807..f014c2f12e1d 100755 --- a/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh +++ b/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2023 by Delphix. All rights reserved. # # @@ -35,57 +35,58 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during random read load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems +for logtype in $PERF_LOG_TYPES; do + recreate_perf_pool $logtype + populate_perf_filesystems + + # Make sure the working set can be cached in the arc. Aim for 1/2 of arc. + export TOTAL_SIZE=$(($(get_max_arc_size) / 2)) + + # Variables specific to this test for use by fio. + export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} + export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} + export PERF_IOSIZES=${PERF_IOSIZES:-'128k'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + + # Layout the files to be used by the read tests. Create as many files as the + # largest number of threads. An fio run with fewer threads will use a subset + # of the available files. + export NUMJOBS=$(get_max $PERF_NTHREADS) + export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) + export DIRECTORY=$(get_directory) + log_must fio $FIO_SCRIPTS/mkfiles.fio + + # Set up the scripts and output files that will log performance data. + lun_list=$(pool_to_lun_list $PERFPOOL) + log_note "Collecting backend IO stats with lun list $lun_list" + if is_linux; then + typeset perf_record_cmd="perf record -F 99 -a -g -q \ + -o /dev/stdout -- sleep ${PERF_RUNTIME}" -# Make sure the working set can be cached in the arc. Aim for 1/2 of arc. -export TOTAL_SIZE=$(($(get_max_arc_size) / 2)) - -# Variables specific to this test for use by fio. -export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} -export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} -export PERF_IOSIZES=${PERF_IOSIZES:-'128k'} -export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - -# Layout the files to be used by the read tests. Create as many files as the -# largest number of threads. An fio run with fewer threads will use a subset -# of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -export DIRECTORY=$(get_directory) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -if is_linux; then - typeset perf_record_cmd="perf record -F 99 -a -g -q \ - -o /dev/stdout -- sleep ${PERF_RUNTIME}" - - export collect_scripts=( - "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" - "$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch" - "vmstat -t 1" "vmstat" - "mpstat -P ALL 1" "mpstat" - "iostat -tdxyz 1" "iostat" - "$perf_record_cmd" "perf" - ) -else - export collect_scripts=( - "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" - "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" - "vmstat -T d 1" "vmstat" - "mpstat -T d 1" "mpstat" - "iostat -T d -xcnz 1" "iostat" - ) -fi - -log_note "Sequential cached reads with settings: $(print_perf_settings)" -do_fio_run sequential_reads.fio false false + export collect_scripts=( + "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" + "$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch" + "vmstat -t 1" "vmstat" + "mpstat -P ALL 1" "mpstat" + "iostat -tdxyz 1" "iostat" + "$perf_record_cmd" "perf" + ) + else + export collect_scripts=( + "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" + "vmstat -T d 1" "vmstat" + "mpstat -T d 1" "mpstat" + "iostat -T d -xcnz 1" "iostat" + ) + fi + + log_note "Sequential cached reads with settings: $(print_perf_settings)" + do_fio_run sequential_reads.fio false false $logtype +done log_pass "Measure IO stats during sequential cached read load" diff --git a/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh b/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh index 2cc81d5cd341..2e18d7d5b160 100755 --- a/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh +++ b/tests/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2023 by Delphix. All rights reserved. # # @@ -41,78 +41,79 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during random read load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems - -# Make sure the working set can be cached in the arc. Aim for 1/2 of arc. -export TOTAL_SIZE=$(($(get_max_arc_size) / 2)) - -# Variables specific to this test for use by fio. -export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} -export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} -export PERF_IOSIZES=${PERF_IOSIZES:-'128k'} -export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - -# Layout the files to be used by the read tests. Create as many files as the -# largest number of threads. An fio run with fewer threads will use a subset -# of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -export DIRECTORY=$(get_directory) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -# -# Only a single filesystem is used by this test. To be defensive, we -# double check that TESTFS only contains a single filesystem. We -# wouldn't want to assume this was the case, and have it actually -# contain multiple filesystem (causing cascading failures later). -# -log_must test $(get_nfilesystems) -eq 1 - -log_note "Creating snapshot, $TESTSNAP, of $TESTFS" -create_snapshot $TESTFS $TESTSNAP -log_note "Creating clone, $PERFPOOL/$TESTCLONE, from $TESTFS@$TESTSNAP" -create_clone $TESTFS@$TESTSNAP $PERFPOOL/$TESTCLONE - -# -# We want to run FIO against the clone we created above, and not the -# clone's originating filesystem. Thus, we override the default behavior -# and explicitly set TESTFS to the clone. -# -export TESTFS=$PERFPOOL/$TESTCLONE - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -if is_linux; then - typeset perf_record_cmd="perf record -F 99 -a -g -q \ - -o /dev/stdout -- sleep ${PERF_RUNTIME}" - - export collect_scripts=( - "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" - "$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch" - "vmstat -t 1" "vmstat" - "mpstat -P ALL 1" "mpstat" - "iostat -tdxyz 1" "iostat" - "$perf_record_cmd" "perf" - ) -else - export collect_scripts=( - "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" - "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" - "vmstat -T d 1" "vmstat" - "mpstat -T d 1" "mpstat" - "iostat -T d -xcnz 1" "iostat" - ) -fi - -log_note "Sequential cached reads from $DIRECTORY with " \ - "ettings: $(print_perf_settings)" -do_fio_run sequential_reads.fio false false +for logtype in $PERF_LOG_TYPES; do + recreate_perf_pool $logtype + populate_perf_filesystems + + # Make sure the working set can be cached in the arc. Aim for 1/2 of arc. + export TOTAL_SIZE=$(($(get_max_arc_size) / 2)) + + # Variables specific to this test for use by fio. + export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} + export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} + export PERF_IOSIZES=${PERF_IOSIZES:-'128k'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + + # Layout the files to be used by the read tests. Create as many files as the + # largest number of threads. An fio run with fewer threads will use a subset + # of the available files. + export NUMJOBS=$(get_max $PERF_NTHREADS) + export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) + export DIRECTORY=$(get_directory) + log_must fio $FIO_SCRIPTS/mkfiles.fio + + # + # Only a single filesystem is used by this test. To be defensive, we + # double check that TESTFS only contains a single filesystem. We + # wouldn't want to assume this was the case, and have it actually + # contain multiple filesystem (causing cascading failures later). + # + log_must test $(get_nfilesystems) -eq 1 + + log_note "Creating snapshot, $TESTSNAP, of $TESTFS" + create_snapshot $TESTFS $TESTSNAP + log_note "Creating clone, $PERFPOOL/$TESTCLONE, from $TESTFS@$TESTSNAP" + create_clone $TESTFS@$TESTSNAP $PERFPOOL/$TESTCLONE + + # + # We want to run FIO against the clone we created above, and not the + # clone's originating filesystem. Thus, we override the default behavior + # and explicitly set TESTFS to the clone. + # + export TESTFS=$PERFPOOL/$TESTCLONE + + # Set up the scripts and output files that will log performance data. + lun_list=$(pool_to_lun_list $PERFPOOL) + log_note "Collecting backend IO stats with lun list $lun_list" + if is_linux; then + typeset perf_record_cmd="perf record -F 99 -a -g -q \ + -o /dev/stdout -- sleep ${PERF_RUNTIME}" + + export collect_scripts=( + "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" + "$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch" + "vmstat -t 1" "vmstat" + "mpstat -P ALL 1" "mpstat" + "iostat -tdxyz 1" "iostat" + "$perf_record_cmd" "perf" + ) + else + export collect_scripts=( + "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" + "vmstat -T d 1" "vmstat" + "mpstat -T d 1" "mpstat" + "iostat -T d -xcnz 1" "iostat" + ) + fi + + log_note "Sequential cached reads from $DIRECTORY with " \ + "settings: $(print_perf_settings)" + do_fio_run sequential_reads.fio false false $logtype +done log_pass "Measure IO stats during sequential cached read load" diff --git a/tests/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh b/tests/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh index 9a244324a751..cb71a4cb09ea 100755 --- a/tests/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh +++ b/tests/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2016, 2021 by Delphix. All rights reserved. +# Copyright (c) 2016, 2023 by Delphix. All rights reserved. # # @@ -39,59 +39,60 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during sequential read load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems +for logtype in $PERF_LOG_TYPES; do + recreate_perf_pool $logtype + populate_perf_filesystems + + # Ensure the working set can be cached in the dbuf cache. + export TOTAL_SIZE=$(($(get_dbuf_cache_size) * 3 / 4)) + + # Variables specific to this test for use by fio. + export PERF_NTHREADS=${PERF_NTHREADS:-'64'} + export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} + export PERF_IOSIZES=${PERF_IOSIZES:-'64k'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + + # Layout the files to be used by the read tests. Create as many files as the + # largest number of threads. An fio run with fewer threads will use a subset + # of the available files. + export NUMJOBS=$(get_max $PERF_NTHREADS) + export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) + export DIRECTORY=$(get_directory) + log_must fio $FIO_SCRIPTS/mkfiles.fio + + # Set up the scripts and output files that will log performance data. + lun_list=$(pool_to_lun_list $PERFPOOL) + log_note "Collecting backend IO stats with lun list $lun_list" + if is_linux; then + typeset perf_record_cmd="perf record -F 99 -a -g -q \ + -o /dev/stdout -- sleep ${PERF_RUNTIME}" -# Ensure the working set can be cached in the dbuf cache. -export TOTAL_SIZE=$(($(get_dbuf_cache_size) * 3 / 4)) - -# Variables specific to this test for use by fio. -export PERF_NTHREADS=${PERF_NTHREADS:-'64'} -export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} -export PERF_IOSIZES=${PERF_IOSIZES:-'64k'} -export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - -# Layout the files to be used by the read tests. Create as many files as the -# largest number of threads. An fio run with fewer threads will use a subset -# of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -export DIRECTORY=$(get_directory) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -if is_linux; then - typeset perf_record_cmd="perf record -F 99 -a -g -q \ - -o /dev/stdout -- sleep ${PERF_RUNTIME}" - - export collect_scripts=( - "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" - "$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch" - "vmstat -t 1" "vmstat" - "mpstat -P ALL 1" "mpstat" - "iostat -tdxyz 1" "iostat" - "$perf_record_cmd" "perf" - ) -else - export collect_scripts=( - "kstat zfs:0 1" "kstat" - "vmstat -T d 1" "vmstat" - "mpstat -T d 1" "mpstat" - "iostat -T d -xcnz 1" "iostat" - "dtrace -Cs $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" - "dtrace -Cs $PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" - ) -fi - -log_note "Sequential cached reads with settings: $(print_perf_settings)" -do_fio_run sequential_reads.fio false false + export collect_scripts=( + "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" + "$PERF_SCRIPTS/prefetch_io.sh $PERFPOOL 1" "prefetch" + "vmstat -t 1" "vmstat" + "mpstat -P ALL 1" "mpstat" + "iostat -tdxyz 1" "iostat" + "$perf_record_cmd" "perf" + ) + else + export collect_scripts=( + "kstat zfs:0 1" "kstat" + "vmstat -T d 1" "vmstat" + "mpstat -T d 1" "mpstat" + "iostat -T d -xcnz 1" "iostat" + "dtrace -Cs $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "dtrace -Cs $PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" + "dtrace -s $PERF_SCRIPTS/profile.d" "profile" + ) + fi + + log_note "Sequential cached reads with settings: $(print_perf_settings)" + do_fio_run sequential_reads.fio false false $logtype +done log_pass "Measure IO stats during sequential cached read load" diff --git a/tests/zfs-tests/tests/perf/regression/sequential_writes.ksh b/tests/zfs-tests/tests/perf/regression/sequential_writes.ksh index a51655cc3719..6ed345e5cddb 100755 --- a/tests/zfs-tests/tests/perf/regression/sequential_writes.ksh +++ b/tests/zfs-tests/tests/perf/regression/sequential_writes.ksh @@ -12,7 +12,7 @@ # # -# Copyright (c) 2015, 2021 by Delphix. All rights reserved. +# Copyright (c) 2015, 2023 by Delphix. All rights reserved. # # @@ -44,47 +44,48 @@ function cleanup # kill fio and iostat pkill fio pkill iostat - recreate_perf_pool } trap "log_fail \"Measure IO stats during random read load\"" SIGTERM log_onexit cleanup -recreate_perf_pool -populate_perf_filesystems +for logtype in $PERF_LOG_TYPES; do + recreate_perf_pool $logtype + populate_perf_filesystems -# Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. -export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + # Aim to fill the pool to 50% capacity while accounting for a 3x compressratio. + export TOTAL_SIZE=$(($(get_prop avail $PERFPOOL) * 3 / 2)) + + # Variables specific to this test for use by fio. + export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'} + export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} + export PERF_IOSIZES=${PERF_IOSIZES:-'8k 1m'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'} + + # Set up the scripts and output files that will log performance data. + lun_list=$(pool_to_lun_list $PERFPOOL) + log_note "Collecting backend IO stats with lun list $lun_list" + if is_linux; then + typeset perf_record_cmd="perf record -F 99 -a -g -q \ + -o /dev/stdout -- sleep ${PERF_RUNTIME}" + + export collect_scripts=( + "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" + "vmstat -t 1" "vmstat" + "mpstat -P ALL 1" "mpstat" + "iostat -tdxyz 1" "iostat" + "$perf_record_cmd" "perf" + ) + else + export collect_scripts=( + "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "vmstat -T d 1" "vmstat" + "mpstat -T d 1" "mpstat" + "iostat -T d -xcnz 1" "iostat" + ) + fi -# Variables specific to this test for use by fio. -export PERF_NTHREADS=${PERF_NTHREADS:-'16 32'} -export PERF_NTHREADS_PER_FS=${PERF_NTHREADS_PER_FS:-'0'} -export PERF_IOSIZES=${PERF_IOSIZES:-'8k 1m'} -export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'0 1'} - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -if is_linux; then - typeset perf_record_cmd="perf record -F 99 -a -g -q \ - -o /dev/stdout -- sleep ${PERF_RUNTIME}" - - export collect_scripts=( - "zpool iostat -lpvyL $PERFPOOL 1" "zpool.iostat" - "vmstat -t 1" "vmstat" - "mpstat -P ALL 1" "mpstat" - "iostat -tdxyz 1" "iostat" - "$perf_record_cmd" "perf" - ) -else - export collect_scripts=( - "$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" - "vmstat -T d 1" "vmstat" - "mpstat -T d 1" "mpstat" - "iostat -T d -xcnz 1" "iostat" - ) -fi - -log_note "Sequential writes with settings: $(print_perf_settings)" -do_fio_run sequential_writes.fio true false + log_note "Sequential writes with settings: $(print_perf_settings)" + do_fio_run sequential_writes.fio true false $logtype +done log_pass "Measure IO stats during sequential write load"