Skip to content

Commit

Permalink
ZAP: Reduce leaf array and free chunks fragmentation
Browse files Browse the repository at this point in the history
Previous implementation of zap_leaf_array_free() put chunks on the
free list in reverse order.  Also zap_leaf_transfer_entry() and
zap_entry_remove() were freeing name and value arrays in reverse
order.  Together this created a mess in the free list, making
following allocations much more fragmented than necessary.

This patch re-implements zap_leaf_array_free() to keep existing
chunks order, and implements non-destructive zap_leaf_array_copy()
to be used in zap_leaf_transfer_entry() to allow properly ordered
freeing name and value arrays there and in zap_entry_remove().

With this change test of some writes and deletes shows percent of
non-contiguous chunks in DDT reducing from 61% and 47% to 0% and
17% for arrays and frees respectively.  Sure some explicit sorting
could do even better, especially for ZAPs with variable-size arrays,
but it would also cost much more, while this should be very cheap.

Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Alexander Motin <[email protected]>
Sponsored by:	iXsystems, Inc.
Closes openzfs#16766
  • Loading branch information
amotin authored and behlendorf committed Nov 23, 2024
1 parent 3ed1d60 commit 434ecad
Showing 1 changed file with 62 additions and 44 deletions.
106 changes: 62 additions & 44 deletions module/zfs/zap_leaf.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,20 +248,63 @@ zap_leaf_array_create(zap_leaf_t *l, const char *buf,
return (chunk_head);
}

static void
zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
/*
* Non-destructively copy array between leaves.
*/
static uint16_t
zap_leaf_array_copy(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
{
uint16_t chunk = *chunkp;
uint16_t new_chunk;
uint16_t *nchunkp = &new_chunk;

*chunkp = CHAIN_END;
while (chunk != CHAIN_END) {
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
uint16_t nchunk = zap_leaf_chunk_alloc(nl);

struct zap_leaf_array *la =
&ZAP_LEAF_CHUNK(l, chunk).l_array;
struct zap_leaf_array *nla =
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
ASSERT3U(la->la_type, ==, ZAP_CHUNK_ARRAY);

*nla = *la; /* structure assignment */

chunk = la->la_next;
*nchunkp = nchunk;
nchunkp = &nla->la_next;
}
*nchunkp = CHAIN_END;
return (new_chunk);
}

/*
* Free array. Unlike trivial loop of zap_leaf_chunk_free() this does
* not reverse order of chunks in the free list, reducing fragmentation.
*/
static void
zap_leaf_array_free(zap_leaf_t *l, uint16_t chunk)
{
struct zap_leaf_header *hdr = &zap_leaf_phys(l)->l_hdr;
uint16_t *tailp = &hdr->lh_freelist;
uint16_t oldfree = *tailp;

while (chunk != CHAIN_END) {
uint_t nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
ZAP_CHUNK_ARRAY);
zap_leaf_chunk_free(l, chunk);
chunk = nextchunk;
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
zap_leaf_chunk_t *c = &ZAP_LEAF_CHUNK(l, chunk);
ASSERT3U(c->l_array.la_type, ==, ZAP_CHUNK_ARRAY);

*tailp = chunk;
chunk = c->l_array.la_next;

c->l_free.lf_type = ZAP_CHUNK_FREE;
memset(c->l_free.lf_pad, 0, sizeof (c->l_free.lf_pad));
tailp = &c->l_free.lf_next;

ASSERT3U(hdr->lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
hdr->lh_nfree++;
}

*tailp = oldfree;
}

/* array_len and buf_len are in integers, not bytes */
Expand Down Expand Up @@ -515,7 +558,7 @@ zap_entry_update(zap_entry_handle_t *zeh,
if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks)
return (SET_ERROR(EAGAIN));

zap_leaf_array_free(l, &le->le_value_chunk);
zap_leaf_array_free(l, le->le_value_chunk);
le->le_value_chunk =
zap_leaf_array_create(l, buf, integer_size, num_integers);
le->le_value_numints = num_integers;
Expand All @@ -534,10 +577,11 @@ zap_entry_remove(zap_entry_handle_t *zeh)
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk);
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);

zap_leaf_array_free(l, &le->le_name_chunk);
zap_leaf_array_free(l, &le->le_value_chunk);

*zeh->zeh_chunkp = le->le_next;

/* Free in opposite order to reduce fragmentation. */
zap_leaf_array_free(l, le->le_value_chunk);
zap_leaf_array_free(l, le->le_name_chunk);
zap_leaf_chunk_free(l, entry_chunk);

zap_leaf_phys(l)->l_hdr.lh_nentries--;
Expand Down Expand Up @@ -701,34 +745,6 @@ zap_leaf_rehash_entry(zap_leaf_t *l, struct zap_leaf_entry *le, uint16_t entry)
return (chunkp);
}

static uint16_t
zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
{
uint16_t new_chunk;
uint16_t *nchunkp = &new_chunk;

while (chunk != CHAIN_END) {
uint16_t nchunk = zap_leaf_chunk_alloc(nl);
struct zap_leaf_array *nla =
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
struct zap_leaf_array *la =
&ZAP_LEAF_CHUNK(l, chunk).l_array;
uint_t nextchunk = la->la_next;

ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));

*nla = *la; /* structure assignment */

zap_leaf_chunk_free(l, chunk);
chunk = nextchunk;
*nchunkp = nchunk;
nchunkp = &nla->la_next;
}
*nchunkp = CHAIN_END;
return (new_chunk);
}

static void
zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)
{
Expand All @@ -741,10 +757,12 @@ zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)

(void) zap_leaf_rehash_entry(nl, nle, chunk);

nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
nle->le_value_chunk =
zap_leaf_transfer_array(l, le->le_value_chunk, nl);
nle->le_name_chunk = zap_leaf_array_copy(l, le->le_name_chunk, nl);
nle->le_value_chunk = zap_leaf_array_copy(l, le->le_value_chunk, nl);

/* Free in opposite order to reduce fragmentation. */
zap_leaf_array_free(l, le->le_value_chunk);
zap_leaf_array_free(l, le->le_name_chunk);
zap_leaf_chunk_free(l, entry);

zap_leaf_phys(l)->l_hdr.lh_nentries--;
Expand Down

0 comments on commit 434ecad

Please sign in to comment.