Skip to content

Commit

Permalink
Selectable block allocators
Browse files Browse the repository at this point in the history
ZFS historically has had several space allocators that were
dynamically selectable.  While these have been retained in 
OpenZFS, only a single allocator has been statically compiled 
in. This patch compiles all allocators for OpenZFS and provides 
a module parameter to allow for manual selection between them.

Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Ameer Hamza <[email protected]>
Reviewed-by: Alexander Motin <[email protected]>
Signed-off-by: Edmund Nadolski <[email protected]>
Closes openzfs#15218
  • Loading branch information
ednadolski-ix authored and lundman committed Dec 11, 2023
1 parent 5550826 commit bf0dcfa
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 31 deletions.
3 changes: 3 additions & 0 deletions include/os/freebsd/spl/sys/mod_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@
#define param_set_deadman_failmode_args(var) \
CTLTYPE_STRING, NULL, 0, param_set_deadman_failmode, "A"

#define param_set_active_allocator_args(var) \
CTLTYPE_STRING, NULL, 0, param_set_active_allocator, "A"

#define param_set_deadman_synctime_args(var) \
CTLTYPE_U64, NULL, 0, param_set_deadman_synctime, "QU"

Expand Down
1 change: 1 addition & 0 deletions include/sys/metaslab.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ extern "C" {


typedef struct metaslab_ops {
const char *msop_name;
uint64_t (*msop_alloc)(metaslab_t *, uint64_t);
} metaslab_ops_t;

Expand Down
3 changes: 3 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,8 @@ extern uint64_t spa_deadman_synctime(spa_t *spa);
extern uint64_t spa_deadman_ziotime(spa_t *spa);
extern uint64_t spa_dirty_data(spa_t *spa);
extern spa_autotrim_t spa_get_autotrim(spa_t *spa);
extern int spa_get_allocator(spa_t *spa);
extern void spa_set_allocator(spa_t *spa, const char *allocator);

/* Miscellaneous support routines */
extern void spa_load_failed(spa_t *spa, const char *fmt, ...)
Expand Down Expand Up @@ -1207,6 +1209,7 @@ int param_set_deadman_ziotime(ZFS_MODULE_PARAM_ARGS);
int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS);
int param_set_slop_shift(ZFS_MODULE_PARAM_ARGS);
int param_set_deadman_failmode(ZFS_MODULE_PARAM_ARGS);
int param_set_active_allocator(ZFS_MODULE_PARAM_ARGS);

#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
Expand Down
3 changes: 3 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ struct spa {
*/
spa_alloc_t *spa_allocs;
int spa_alloc_count;
int spa_active_allocator; /* selectable allocator */

spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
Expand Down Expand Up @@ -467,6 +468,8 @@ extern int param_set_deadman_failmode_common(const char *val);
extern void spa_set_deadman_synctime(hrtime_t ns);
extern void spa_set_deadman_ziotime(hrtime_t ns);
extern const char *spa_history_zone(void);
extern const char *zfs_active_allocator;
extern int param_set_active_allocator_common(const char *val);

#ifdef __cplusplus
}
Expand Down
18 changes: 18 additions & 0 deletions module/os/freebsd/zfs/sysctl_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,24 @@ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance,

/* metaslab.c */

int
param_set_active_allocator(SYSCTL_HANDLER_ARGS)
{
char buf[16];
int rc;

if (req->newptr == NULL)
strlcpy(buf, zfs_active_allocator, sizeof (buf));

rc = sysctl_handle_string(oidp, buf, sizeof (buf), req);
if (rc || req->newptr == NULL)
return (rc);
if (strcmp(buf, zfs_active_allocator) == 0)
return (0);

return (param_set_active_allocator_common(buf));
}

/*
* In pools where the log space map feature is not enabled we touch
* multiple metaslabs (and their respective space maps) with each
Expand Down
12 changes: 12 additions & 0 deletions module/os/linux/zfs/spa_misc_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,18 @@ param_set_slop_shift(const char *buf, zfs_kernel_param_t *kp)
return (0);
}

int
param_set_active_allocator(const char *val, zfs_kernel_param_t *kp)
{
int error;

error = -param_set_active_allocator_common(val);
if (error == 0)
error = param_set_charp(val, kp);

return (error);
}

const char *
spa_history_zone(void)
{
Expand Down
98 changes: 73 additions & 25 deletions module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@
#include <sys/zap.h>
#include <sys/btree.h>

#define WITH_DF_BLOCK_ALLOCATOR

#define GANG_ALLOCATION(flags) \
((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER))

Expand Down Expand Up @@ -1622,9 +1620,6 @@ metaslab_block_find(zfs_btree_t *t, range_tree_t *rt, uint64_t start,
return (rs);
}

#if defined(WITH_DF_BLOCK_ALLOCATOR) || \
defined(WITH_CF_BLOCK_ALLOCATOR)

/*
* This is a helper function that can be used by the allocator to find a
* suitable block to allocate. This will search the specified B-tree looking
Expand Down Expand Up @@ -1659,9 +1654,74 @@ metaslab_block_picker(range_tree_t *rt, uint64_t *cursor, uint64_t size,
*cursor = 0;
return (-1ULL);
}
#endif /* WITH_DF/CF_BLOCK_ALLOCATOR */

#if defined(WITH_DF_BLOCK_ALLOCATOR)
static uint64_t metaslab_df_alloc(metaslab_t *msp, uint64_t size);
static uint64_t metaslab_cf_alloc(metaslab_t *msp, uint64_t size);
static uint64_t metaslab_ndf_alloc(metaslab_t *msp, uint64_t size);
metaslab_ops_t *metaslab_allocator(spa_t *spa);

static metaslab_ops_t metaslab_allocators[] = {
{ "dynamic", metaslab_df_alloc },
{ "cursor", metaslab_cf_alloc },
{ "new-dynamic", metaslab_ndf_alloc },
};

static int
spa_find_allocator_byname(const char *val)
{
int a = ARRAY_SIZE(metaslab_allocators) - 1;
if (strcmp("new-dynamic", val) == 0)
return (-1); /* remove when ndf is working */
for (; a >= 0; a--) {
if (strcmp(val, metaslab_allocators[a].msop_name) == 0)
return (a);
}
return (-1);
}

void
spa_set_allocator(spa_t *spa, const char *allocator)
{
int a = spa_find_allocator_byname(allocator);
if (a < 0) a = 0;
spa->spa_active_allocator = a;
zfs_dbgmsg("spa allocator: %s\n", metaslab_allocators[a].msop_name);
}

int
spa_get_allocator(spa_t *spa)
{
return (spa->spa_active_allocator);
}

#if defined(_KERNEL)
int
param_set_active_allocator_common(const char *val)
{
char *p;

if (val == NULL)
return (SET_ERROR(EINVAL));

if ((p = strchr(val, '\n')) != NULL)
*p = '\0';

int a = spa_find_allocator_byname(val);
if (a < 0)
return (SET_ERROR(EINVAL));

zfs_active_allocator = metaslab_allocators[a].msop_name;
return (0);
}
#endif

metaslab_ops_t *
metaslab_allocator(spa_t *spa)
{
int allocator = spa_get_allocator(spa);
return (&metaslab_allocators[allocator]);
}

/*
* ==========================================================================
* Dynamic Fit (df) block allocator
Expand Down Expand Up @@ -1736,12 +1796,6 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size)
return (offset);
}

const metaslab_ops_t zfs_metaslab_ops = {
metaslab_df_alloc
};
#endif /* WITH_DF_BLOCK_ALLOCATOR */

#if defined(WITH_CF_BLOCK_ALLOCATOR)
/*
* ==========================================================================
* Cursor fit block allocator -
Expand Down Expand Up @@ -1784,12 +1838,6 @@ metaslab_cf_alloc(metaslab_t *msp, uint64_t size)
return (offset);
}

const metaslab_ops_t zfs_metaslab_ops = {
metaslab_cf_alloc
};
#endif /* WITH_CF_BLOCK_ALLOCATOR */

#if defined(WITH_NDF_BLOCK_ALLOCATOR)
/*
* ==========================================================================
* New dynamic fit allocator -
Expand Down Expand Up @@ -1846,12 +1894,6 @@ metaslab_ndf_alloc(metaslab_t *msp, uint64_t size)
return (-1ULL);
}

const metaslab_ops_t zfs_metaslab_ops = {
metaslab_ndf_alloc
};
#endif /* WITH_NDF_BLOCK_ALLOCATOR */


/*
* ==========================================================================
* Metaslabs
Expand Down Expand Up @@ -6232,3 +6274,9 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT,

ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW,
"Normally only consider this many of the best metaslabs in each vdev");

/* BEGIN CSTYLED */
ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator,
param_set_active_allocator, param_get_charp, ZMOD_RW,
"SPA active allocator");
/* END CSTYLED */
14 changes: 8 additions & 6 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -1297,24 +1297,26 @@ spa_thread(void *arg)
}
#endif

extern metaslab_ops_t *metaslab_allocator(spa_t *spa);

/*
* Activate an uninitialized pool.
*/
static void
spa_activate(spa_t *spa, spa_mode_t mode)
{
metaslab_ops_t *msp = metaslab_allocator(spa);
ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);

spa->spa_state = POOL_STATE_ACTIVE;
spa->spa_mode = mode;
spa->spa_read_spacemaps = spa_mode_readable_spacemaps;

spa->spa_normal_class = metaslab_class_create(spa, &zfs_metaslab_ops);
spa->spa_log_class = metaslab_class_create(spa, &zfs_metaslab_ops);
spa->spa_embedded_log_class =
metaslab_class_create(spa, &zfs_metaslab_ops);
spa->spa_special_class = metaslab_class_create(spa, &zfs_metaslab_ops);
spa->spa_dedup_class = metaslab_class_create(spa, &zfs_metaslab_ops);
spa->spa_normal_class = metaslab_class_create(spa, msp);
spa->spa_log_class = metaslab_class_create(spa, msp);
spa->spa_embedded_log_class = metaslab_class_create(spa, msp);
spa->spa_special_class = metaslab_class_create(spa, msp);
spa->spa_dedup_class = metaslab_class_create(spa, msp);

/* Try to create a covering process */
mutex_enter(&spa->spa_proc_lock);
Expand Down
6 changes: 6 additions & 0 deletions module/zfs/spa_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,11 @@ static const uint64_t spa_min_slop = 128ULL * 1024 * 1024;
static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024;
static const int spa_allocators = 4;

/*
* Spa active allocator.
* Valid values are zfs_active_allocator=<dynamic|cursor|new-dynamic>.
*/
const char *zfs_active_allocator = "dynamic";

void
spa_load_failed(spa_t *spa, const char *fmt, ...)
Expand Down Expand Up @@ -709,6 +714,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms);
spa_set_deadman_failmode(spa, zfs_deadman_failmode);
spa_set_allocator(spa, zfs_active_allocator);

zfs_refcount_create(&spa->spa_refcount);
spa_config_lock_init(spa);
Expand Down

0 comments on commit bf0dcfa

Please sign in to comment.