Skip to content

Commit

Permalink
Allocate zfs_locked_range_t memory externally from zfs_rangelock_{try…
Browse files Browse the repository at this point in the history
…,}enter()

Typically, the memory allocated by kmem_alloc() can be trivially
allocated from either the stack or as part of another structure. The
only case where it cannot is in vdev_raidz_io_start(), although some
further refactoring should be able to eliminate that case too.
Allocating from the stack or as part of another data structure is faster
as it gives us this memory for free, so there is little reason not to do
it.

This eliminates a non-neligible amount of CPU time that I have seen in
flame graphs going back to the early days of OpenZFS when the tree was
the ZFSOnLinux tree. This should make our VFS and zvol operations
slightly faster. Some RAID-Z operations will also become slightly
faster.

Signed-off-by: Richard Yao <[email protected]>
  • Loading branch information
ryao committed Dec 23, 2024
1 parent 1acd246 commit 88cddea
Show file tree
Hide file tree
Showing 13 changed files with 188 additions and 190 deletions.
40 changes: 17 additions & 23 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -1745,23 +1745,19 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
ztest_rll_unlock(rll);
}

static rl_t *
ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
static void
ztest_range_lock(ztest_ds_t *zd, rl_t *rl, uint64_t object, uint64_t offset,
uint64_t size, rl_type_t type)
{
uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
rl_t *rl;

rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
rl->rl_object = object;
rl->rl_offset = offset;
rl->rl_size = size;
rl->rl_lock = rll;

ztest_rll_lock(rll, type);

return (rl);
}

static void
Expand All @@ -1770,8 +1766,6 @@ ztest_range_unlock(rl_t *rl)
rll_t *rll = rl->rl_lock;

ztest_rll_unlock(rll);

umem_free(rl, sizeof (*rl));
}

static void
Expand Down Expand Up @@ -2200,7 +2194,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
dmu_tx_t *tx;
dmu_buf_t *db;
arc_buf_t *abuf = NULL;
rl_t *rl;
rl_t rl;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
Expand All @@ -2224,7 +2218,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
bt = NULL;

ztest_object_lock(zd, lr->lr_foid, ZTRL_READER);
rl = ztest_range_lock(zd, lr->lr_foid, offset, length, ZTRL_WRITER);
ztest_range_lock(zd, &rl, lr->lr_foid, offset, length, ZTRL_WRITER);

VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));

Expand All @@ -2249,7 +2243,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
if (abuf != NULL)
dmu_return_arcbuf(abuf);
dmu_buf_rele(db, FTAG);
ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
Expand Down Expand Up @@ -2315,7 +2309,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)

dmu_tx_commit(tx);

ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, lr->lr_foid);

return (0);
Expand All @@ -2329,13 +2323,13 @@ ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
uint64_t txg;
rl_t *rl;
rl_t rl;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));

ztest_object_lock(zd, lr->lr_foid, ZTRL_READER);
rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
ztest_range_lock(zd, &rl, lr->lr_foid, lr->lr_offset, lr->lr_length,
ZTRL_WRITER);

tx = dmu_tx_create(os);
Expand All @@ -2344,7 +2338,7 @@ ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)

txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
Expand All @@ -2356,7 +2350,7 @@ ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)

dmu_tx_commit(tx);

ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, lr->lr_foid);

return (0);
Expand Down Expand Up @@ -2472,12 +2466,12 @@ ztest_get_done(zgd_t *zgd, int error)
{
(void) error;
ztest_ds_t *zd = zgd->zgd_private;
uint64_t object = ((rl_t *)zgd->zgd_lr)->rl_object;
uint64_t object = ((rl_t *)&zgd->zgd_lr)->rl_object;

if (zgd->zgd_db)
dmu_buf_rele(zgd->zgd_db, zgd);

ztest_range_unlock((rl_t *)zgd->zgd_lr);
ztest_range_unlock((rl_t *)&zgd->zgd_lr);
ztest_object_unlock(zd, object);

umem_free(zgd, sizeof (*zgd));
Expand Down Expand Up @@ -2527,7 +2521,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
zgd->zgd_private = zd;

if (buf != NULL) { /* immediate write */
zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd,
ztest_range_lock(zd, (rl_t *)&zgd->zgd_lr,
object, offset, size, ZTRL_READER);

error = dmu_read(os, object, offset, size, buf,
Expand All @@ -2543,7 +2537,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
offset = 0;
}

zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd,
ztest_range_lock(zd, (rl_t *)&zgd->zgd_lr,
object, offset, size, ZTRL_READER);

error = dmu_buf_hold_noread(os, object, offset, zgd, &db);
Expand Down Expand Up @@ -2790,12 +2784,12 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
uint64_t txg;
rl_t *rl;
rl_t rl;

txg_wait_synced(dmu_objset_pool(os), 0);

ztest_object_lock(zd, object, ZTRL_READER);
rl = ztest_range_lock(zd, object, offset, size, ZTRL_WRITER);
ztest_range_lock(zd, &rl, object, offset, size, ZTRL_WRITER);

tx = dmu_tx_create(os);

Expand All @@ -2811,7 +2805,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
(void) dmu_free_long_range(os, object, offset, size);
}

ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, object);
}

Expand Down
3 changes: 2 additions & 1 deletion include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include <sys/fs/zfs.h>
#include <sys/zio_compress.h>
#include <sys/zio_priority.h>
#include <sys/zfs_rlock.h>
#include <sys/uio.h>
#include <sys/zfs_file.h>

Expand Down Expand Up @@ -1083,10 +1084,10 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
* {zfs,zvol,ztest}_get_done() args
*/
typedef struct zgd {
zfs_locked_range_t zgd_lr;
struct lwb *zgd_lwb;
struct blkptr *zgd_bp;
dmu_buf_t *zgd_db;
struct zfs_locked_range *zgd_lr;
void *zgd_private;
} zgd_t;

Expand Down
4 changes: 2 additions & 2 deletions include/sys/zfs_rlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ typedef struct zfs_locked_range {
void zfs_rangelock_init(zfs_rangelock_t *, zfs_rangelock_cb_t *, void *);
void zfs_rangelock_fini(zfs_rangelock_t *);

zfs_locked_range_t *zfs_rangelock_enter(zfs_rangelock_t *,
void zfs_rangelock_enter(zfs_rangelock_t *, zfs_locked_range_t *,
uint64_t, uint64_t, zfs_rangelock_type_t);
zfs_locked_range_t *zfs_rangelock_tryenter(zfs_rangelock_t *,
boolean_t zfs_rangelock_tryenter(zfs_rangelock_t *, zfs_locked_range_t *,
uint64_t, uint64_t, zfs_rangelock_type_t);
void zfs_rangelock_exit(zfs_locked_range_t *);
void zfs_rangelock_reduce(zfs_locked_range_t *, uint64_t, uint64_t);
Expand Down
21 changes: 11 additions & 10 deletions module/os/freebsd/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -3924,7 +3924,8 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
{
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
zfs_locked_range_t *lr;
zfs_locked_range_t lr;
boolean_t res;
vm_object_t object;
off_t start, end, obj_size;
uint_t blksz;
Expand All @@ -3948,9 +3949,9 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
blksz = zp->z_blksz;
len = roundup(end, blksz) - rounddown(start, blksz);

lr = zfs_rangelock_tryenter(&zp->z_rangelock,
res = zfs_rangelock_tryenter(&zp->z_rangelock, &lr,
rounddown(start, blksz), len, RL_READER);
if (lr == NULL) {
if (res == B_FALSE) {
/*
* Avoid a deadlock with update_pages(). We need to
* hold the range lock when copying from the DMU, so
Expand All @@ -3963,7 +3964,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
for (int i = 0; i < count; i++)
vm_page_xunbusy(ma[i]);

lr = zfs_rangelock_enter(&zp->z_rangelock,
zfs_rangelock_enter(&zp->z_rangelock, &lr,
rounddown(start, blksz), len, RL_READER);

zfs_vmobject_wlock(object);
Expand All @@ -3974,14 +3975,14 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
}
if (blksz == zp->z_blksz)
break;
zfs_rangelock_exit(lr);
zfs_rangelock_exit(&lr);
}

zfs_vmobject_wlock(object);
obj_size = object->un_pager.vnp.vnp_size;
zfs_vmobject_wunlock(object);
if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
zfs_rangelock_exit(lr);
zfs_rangelock_exit(&lr);
zfs_exit(zfsvfs, FTAG);
return (zfs_vm_pagerret_bad);
}
Expand Down Expand Up @@ -4032,7 +4033,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
i += count1 - 1;
}

zfs_rangelock_exit(lr);
zfs_rangelock_exit(&lr);
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);

dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
Expand Down Expand Up @@ -4075,7 +4076,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
{
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
zfs_locked_range_t *lr;
zfs_locked_range_t lr;
dmu_tx_t *tx;
struct sf_buf *sf;
vm_object_t object;
Expand Down Expand Up @@ -4107,7 +4108,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
blksz = zp->z_blksz;
lo_off = rounddown(off, blksz);
lo_len = roundup(len + (off - lo_off), blksz);
lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
zfs_rangelock_enter(&zp->z_rangelock, &lr, lo_off, lo_len, RL_WRITER);

zfs_vmobject_wlock(object);
if (len + off > object->un_pager.vnp.vnp_size) {
Expand Down Expand Up @@ -4213,7 +4214,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
dmu_tx_commit(tx);

out:
zfs_rangelock_exit(lr);
zfs_rangelock_exit(&lr);
if (commit)
zil_commit(zfsvfs->z_log, zp->z_id);

Expand Down
Loading

0 comments on commit 88cddea

Please sign in to comment.