Skip to content

Commit

Permalink
Remove fastwrite mechanism.
Browse files Browse the repository at this point in the history
Fastwrite was introduced many years ago to improve ZIL writes spread
between multiple top-level vdevs by tracking number of allocated but
not written blocks and choosing vdev with smaller count.  It suposed
to reduce ZIL knowledge about allocation, but actually made ZIL to
even more actively report allocation code about the allocations,
complicating both ZIL and metaslabs code.

On top of that, it seems ZIO_FLAG_FASTWRITE setting in dmu_sync()
was lost many years ago, that was one of the declared benefits. Plus
introduction of embedded log metaslab class solved another problem
with allocation rotor accounting both normal and log allocations,
since in most cases those are now in different metaslab classes.

After all that, I'd prefer to simplify already too complicated ZIL,
ZIO and metaslab code if the benefit of complexity is not obvious.

Signed-off-by:	Alexander Motin <[email protected]>
Sponsored by:	iXsystems, Inc.
  • Loading branch information
amotin committed Jul 25, 2023
1 parent 2848de1 commit 43b694c
Show file tree
Hide file tree
Showing 8 changed files with 8 additions and 123 deletions.
3 changes: 0 additions & 3 deletions include/sys/metaslab.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ uint64_t metaslab_largest_allocatable(metaslab_t *);
#define METASLAB_ASYNC_ALLOC 0x8
#define METASLAB_DONT_THROTTLE 0x10
#define METASLAB_MUST_RESERVE 0x20
#define METASLAB_FASTWRITE 0x40
#define METASLAB_ZIL 0x80

int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
Expand All @@ -96,8 +95,6 @@ void metaslab_unalloc_dva(spa_t *, const dva_t *, uint64_t);
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t);
void metaslab_check_free(spa_t *, const blkptr_t *);
void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);

void metaslab_stat_init(void);
void metaslab_stat_fini(void);
Expand Down
1 change: 0 additions & 1 deletion include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,6 @@ struct vdev {
metaslab_group_t *vdev_mg; /* metaslab group */
metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */
metaslab_t **vdev_ms; /* metaslab array */
uint64_t vdev_pending_fastwrite; /* allocated fastwrites */
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
Expand Down
1 change: 0 additions & 1 deletion include/sys/zil_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ typedef enum {
typedef struct lwb {
zilog_t *lwb_zilog; /* back pointer to log struct */
blkptr_t lwb_blk; /* on disk address of this log blk */
boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
boolean_t lwb_indirect; /* do not postpone zil_lwb_commit() */
int lwb_nused; /* # used bytes in buffer */
Expand Down
1 change: 0 additions & 1 deletion include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_NOPWRITE (1ULL << 28)
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
#define ZIO_FLAG_DELEGATED (1ULL << 30)
#define ZIO_FLAG_FASTWRITE (1ULL << 31)

#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
Expand Down
67 changes: 2 additions & 65 deletions module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -5101,7 +5101,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
zio_alloc_list_t *zal, int allocator)
{
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
metaslab_group_t *mg, *fast_mg, *rotor;
metaslab_group_t *mg, *rotor;
vdev_t *vd;
boolean_t try_hard = B_FALSE;

Expand Down Expand Up @@ -5164,15 +5164,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
} else if (d != 0) {
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
mg = vd->vdev_mg->mg_next;
} else if (flags & METASLAB_FASTWRITE) {
mg = fast_mg = mca->mca_rotor;

do {
if (fast_mg->mg_vd->vdev_pending_fastwrite <
mg->mg_vd->vdev_pending_fastwrite)
mg = fast_mg;
} while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor);

} else {
ASSERT(mca->mca_rotor != NULL);
mg = mca->mca_rotor;
Expand Down Expand Up @@ -5297,7 +5288,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
mg->mg_bias = 0;
}

if ((flags & METASLAB_FASTWRITE) ||
if ((flags & METASLAB_ZIL) ||
atomic_add_64_nv(&mca->mca_aliquot, asize) >=
mg->mg_aliquot + mg->mg_bias) {
mca->mca_rotor = mg->mg_next;
Expand All @@ -5310,11 +5301,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
((flags & METASLAB_GANG_HEADER) ? 1 : 0));
DVA_SET_ASIZE(&dva[d], asize);

if (flags & METASLAB_FASTWRITE) {
atomic_add_64(&vd->vdev_pending_fastwrite,
psize);
}

return (0);
}
next:
Expand Down Expand Up @@ -5950,55 +5936,6 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
return (error);
}

void
metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp)
{
const dva_t *dva = bp->blk_dva;
int ndvas = BP_GET_NDVAS(bp);
uint64_t psize = BP_GET_PSIZE(bp);
int d;
vdev_t *vd;

ASSERT(!BP_IS_HOLE(bp));
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(psize > 0);

spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);

for (d = 0; d < ndvas; d++) {
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
continue;
atomic_add_64(&vd->vdev_pending_fastwrite, psize);
}

spa_config_exit(spa, SCL_VDEV, FTAG);
}

void
metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
{
const dva_t *dva = bp->blk_dva;
int ndvas = BP_GET_NDVAS(bp);
uint64_t psize = BP_GET_PSIZE(bp);
int d;
vdev_t *vd;

ASSERT(!BP_IS_HOLE(bp));
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(psize > 0);

spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);

for (d = 0; d < ndvas; d++) {
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
continue;
ASSERT3U(vd->vdev_pending_fastwrite, >=, psize);
atomic_sub_64(&vd->vdev_pending_fastwrite, psize);
}

spa_config_exit(spa, SCL_VDEV, FTAG);
}

static void
metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset,
uint64_t size, void *arg)
Expand Down
2 changes: 0 additions & 2 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1192,7 +1192,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)

ASSERT(tvd == tvd->vdev_top);

tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
tvd->vdev_ms_array = svd->vdev_ms_array;
tvd->vdev_ms_shift = svd->vdev_ms_shift;
tvd->vdev_ms_count = svd->vdev_ms_count;
Expand Down Expand Up @@ -1655,7 +1654,6 @@ vdev_metaslab_fini(vdev_t *vd)
}
}
ASSERT0(vd->vdev_ms_count);
ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
}

typedef struct vdev_probe_stats {
Expand Down
42 changes: 5 additions & 37 deletions module/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -761,15 +761,13 @@ zil_lwb_vdev_compare(const void *x1, const void *x2)
}

static lwb_t *
zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg,
boolean_t fastwrite)
zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg)
{
lwb_t *lwb;

lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
lwb->lwb_zilog = zilog;
lwb->lwb_blk = *bp;
lwb->lwb_fastwrite = fastwrite;
lwb->lwb_slog = slog;
lwb->lwb_indirect = B_FALSE;
if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
Expand Down Expand Up @@ -916,7 +914,6 @@ zil_create(zilog_t *zilog)
dmu_tx_t *tx = NULL;
blkptr_t blk;
int error = 0;
boolean_t fastwrite = FALSE;
boolean_t slog = FALSE;
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);

Expand Down Expand Up @@ -949,8 +946,6 @@ zil_create(zilog_t *zilog)

error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk,
ZIL_MIN_BLKSZ, &slog);
fastwrite = TRUE;

if (error == 0)
zil_init_log_chain(zilog, &blk);
}
Expand All @@ -959,7 +954,7 @@ zil_create(zilog_t *zilog)
* Allocate a log write block (lwb) for the first log block.
*/
if (error == 0)
lwb = zil_alloc_lwb(zilog, &blk, slog, txg, fastwrite);
lwb = zil_alloc_lwb(zilog, &blk, slog, txg);

/*
* If we just allocated the first log block, commit our transaction
Expand Down Expand Up @@ -1044,9 +1039,6 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
ASSERT(zh->zh_claim_txg == 0);
VERIFY(!keep_first);
while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) {
if (lwb->lwb_fastwrite)
metaslab_fastwrite_unmark(zilog->zl_spa,
&lwb->lwb_blk);
if (lwb->lwb_buf != NULL)
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
zio_free(zilog->zl_spa, txg, &lwb->lwb_blk);
Expand Down Expand Up @@ -1551,7 +1543,6 @@ zil_lwb_write_done(zio_t *zio)
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
lwb->lwb_state = LWB_STATE_WRITE_DONE;
lwb->lwb_write_zio = NULL;
lwb->lwb_fastwrite = FALSE;
nlwb = list_next(&zilog->zl_lwb_list, lwb);
mutex_exit(&zilog->zl_lock);

Expand Down Expand Up @@ -1718,20 +1709,12 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);

/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
mutex_enter(&zilog->zl_lock);
if (!lwb->lwb_fastwrite) {
metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
lwb->lwb_fastwrite = 1;
}

lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
&lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
zil_lwb_write_done, lwb, prio,
ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb);

mutex_enter(&zilog->zl_lock);
lwb->lwb_state = LWB_STATE_OPENED;

zil_lwb_set_zio_dependency(zilog, lwb);
zilog->zl_last_lwb_opened = lwb;
mutex_exit(&zilog->zl_lock);
Expand Down Expand Up @@ -1864,7 +1847,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
/*
* Allocate a new log write block (lwb).
*/
nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
nlwb = zil_alloc_lwb(zilog, bp, slog, txg);
}

lwb->lwb_state = LWB_STATE_ISSUED;
Expand Down Expand Up @@ -3651,18 +3634,6 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
BP_ZERO(&zh->zh_log);
}

/*
* Remove fastwrite on any blocks that have been pre-allocated for
* the next commit. This prevents fastwrite counter pollution by
* unused, long-lived LWBs.
*/
for (; lwb != NULL; lwb = list_next(&zilog->zl_lwb_list, lwb)) {
if (lwb->lwb_fastwrite && !lwb->lwb_write_zio) {
metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
lwb->lwb_fastwrite = 0;
}
}

mutex_exit(&zilog->zl_lock);
}

Expand Down Expand Up @@ -3895,9 +3866,6 @@ zil_close(zilog_t *zilog)
ASSERT(list_is_empty(&zilog->zl_lwb_list));
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);

if (lwb->lwb_fastwrite)
metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);

zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
zil_free_lwb(zilog, lwb);
}
Expand Down
14 changes: 1 addition & 13 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -3024,11 +3024,6 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
*/
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;

/*
* We didn't allocate this bp, so make sure it doesn't get unmarked.
*/
pio->io_flags &= ~ZIO_FLAG_FASTWRITE;

zio_nowait(zio);

return (pio);
Expand Down Expand Up @@ -3616,7 +3611,6 @@ zio_dva_allocate(zio_t *zio)
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));

flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
if (zio->io_flags & ZIO_FLAG_NODATA)
flags |= METASLAB_DONT_THROTTLE;
if (zio->io_flags & ZIO_FLAG_GANG_CHILD)
Expand Down Expand Up @@ -3776,7 +3770,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
* of, so we just hash the objset ID to pick the allocator to get
* some parallelism.
*/
int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
int flags = METASLAB_ZIL;
int allocator = (uint_t)cityhash4(0, 0, 0,
os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
Expand Down Expand Up @@ -4931,12 +4925,6 @@ zio_done(zio_t *zio)
zfs_ereport_free_checksum(zcr);
}

if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
!BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) &&
!(zio->io_flags & ZIO_FLAG_NOPWRITE)) {
metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
}

/*
* It is the responsibility of the done callback to ensure that this
* particular zio is no longer discoverable for adoption, and as
Expand Down

0 comments on commit 43b694c

Please sign in to comment.