Skip to content

Commit 9ddf0d5

Browse files
committed
Updating based on PR Feedback(3)
1. Unified the block cloning and Direct I/O code paths further. As part of this unification, it is important to outline that Direct I/O writes transition the db_state to DB_UNCACHED. This is used so that dbuf_unoverride() is called when dbuf_undirty() is called. This is needed to cleanup space accounting in a TXG. When a dbuf is redirtied through dbuf_redirty(), then dbuf_unoverride() is also called to clean up space accounting. This is a bit of a different approach that block cloning, which always calls dbuf_undirty(). 2. As part of uniying the two, Direct I/O also performs the same check in dmu_buf_will_fill() so that on failure the previous contents of the dbuf are set correctly. 3. General just code cleanup removing checks that are no longer necessary. Signed-off-by: Brian Atkinson <[email protected]>
1 parent eece40e commit 9ddf0d5

File tree

4 files changed

+55
-94
lines changed

4 files changed

+55
-94
lines changed

include/sys/dbuf.h

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ typedef struct dbuf_dirty_record {
176176
uint8_t dr_copies;
177177
boolean_t dr_nopwrite;
178178
boolean_t dr_brtwrite;
179+
boolean_t dr_diowrite;
179180
boolean_t dr_has_raw_params;
180181

181182
/*
@@ -467,20 +468,6 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
467468
return (NULL);
468469
}
469470

470-
static inline boolean_t
471-
dbuf_dirty_is_direct_write(dmu_buf_impl_t *db, dbuf_dirty_record_t *dr)
472-
{
473-
boolean_t ret = B_FALSE;
474-
ASSERT(MUTEX_HELD(&db->db_mtx));
475-
476-
if (dr != NULL && db->db_level == 0 && !dr->dt.dl.dr_brtwrite &&
477-
dr->dt.dl.dr_override_state == DR_OVERRIDDEN &&
478-
dr->dt.dl.dr_data == NULL) {
479-
ret = B_TRUE;
480-
}
481-
return (ret);
482-
}
483-
484471
#define DBUF_GET_BUFC_TYPE(_db) \
485472
(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
486473

module/zfs/dbuf.c

Lines changed: 44 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,16 +1255,6 @@ dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
12551255
ASSERT(buf != NULL);
12561256

12571257
db->db_buf = buf;
1258-
1259-
/*
1260-
* If there is a Direct I/O, set its data too. Then its state
1261-
* will be the same as if we did a ZIL dmu_sync().
1262-
*/
1263-
dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
1264-
if (dbuf_dirty_is_direct_write(db, dr)) {
1265-
dr->dt.dl.dr_data = db->db_buf;
1266-
}
1267-
12681258
ASSERT(buf->b_data != NULL);
12691259
db->db.db_data = buf->b_data;
12701260
}
@@ -1843,7 +1833,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags)
18431833
/*
18441834
* If a block clone or Direct I/O write has occurred we will
18451835
* get the dirty records overridden BP so we get the most
1846-
* recent data..
1836+
* recent data.
18471837
*/
18481838
err = dmu_buf_get_bp_from_dbuf(db, &bp);
18491839

@@ -1948,13 +1938,14 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
19481938
if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite)
19491939
zio_free(db->db_objset->os_spa, txg, bp);
19501940

1951-
if (dr->dt.dl.dr_brtwrite) {
1941+
if (dr->dt.dl.dr_brtwrite || dr->dt.dl.dr_diowrite) {
19521942
ASSERT0P(dr->dt.dl.dr_data);
19531943
dr->dt.dl.dr_data = db->db_buf;
19541944
}
19551945
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
19561946
dr->dt.dl.dr_nopwrite = B_FALSE;
19571947
dr->dt.dl.dr_brtwrite = B_FALSE;
1948+
dr->dt.dl.dr_diowrite = B_FALSE;
19581949
dr->dt.dl.dr_has_raw_params = B_FALSE;
19591950

19601951
/*
@@ -2161,26 +2152,11 @@ dbuf_redirty(dbuf_dirty_record_t *dr)
21612152
*/
21622153
dbuf_unoverride(dr);
21632154
if (db->db.db_object != DMU_META_DNODE_OBJECT &&
2164-
db->db_state != DB_NOFILL && db->db_buf != NULL) {
2165-
/*
2166-
* Already released on initial dirty,
2167-
* so just thaw.
2168-
*/
2155+
db->db_state != DB_NOFILL) {
2156+
/* Already released on initial dirty, so just thaw. */
21692157
ASSERT(arc_released(db->db_buf));
21702158
arc_buf_thaw(db->db_buf);
21712159
}
2172-
/*
2173-
* If initial dirty was via Direct I/O, may not have a dr_data.
2174-
*
2175-
* If the dirty record was associated with cloned block then
2176-
* the call above to dbuf_unoverride() will have reset
2177-
* dr->dt.dl.dr_data and it will not be NULL here.
2178-
*/
2179-
if (dr->dt.dl.dr_data == NULL) {
2180-
ASSERT3B(dbuf_dirty_is_direct_write(db, dr), ==,
2181-
B_TRUE);
2182-
dr->dt.dl.dr_data = db->db_buf;
2183-
}
21842160
}
21852161
}
21862162

@@ -2564,6 +2540,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
25642540
{
25652541
uint64_t txg = tx->tx_txg;
25662542
boolean_t brtwrite;
2543+
boolean_t diowrite;
25672544

25682545
ASSERT(txg != 0);
25692546

@@ -2589,7 +2566,9 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
25892566
ASSERT(dr->dr_dbuf == db);
25902567

25912568
brtwrite = dr->dt.dl.dr_brtwrite;
2569+
diowrite = dr->dt.dl.dr_diowrite;
25922570
if (brtwrite) {
2571+
ASSERT3B(diowrite, ==, B_FALSE);
25932572
/*
25942573
* We are freeing a block that we cloned in the same
25952574
* transaction group.
@@ -2630,11 +2609,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
26302609
if (db->db_state != DB_NOFILL && !brtwrite) {
26312610
dbuf_unoverride(dr);
26322611

2633-
/*
2634-
* In the Direct I/O case, the buffer is still dirty, but it
2635-
* may be UNCACHED, so we do not need to destroy an ARC buffer.
2636-
*/
2637-
if (dr->dt.dl.dr_data && dr->dt.dl.dr_data != db->db_buf) {
2612+
if (dr->dt.dl.dr_data != db->db_buf) {
26382613
ASSERT(db->db_buf != NULL);
26392614
ASSERT(dr->dt.dl.dr_data != NULL);
26402615
arc_buf_destroy(dr->dt.dl.dr_data, db);
@@ -2647,12 +2622,8 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
26472622
db->db_dirtycnt -= 1;
26482623

26492624
if (zfs_refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
2650-
/*
2651-
* In the Direct I/O case our db_buf will be NULL as we are not
2652-
* caching in the ARC.
2653-
*/
2654-
ASSERT(db->db_state == DB_NOFILL || brtwrite ||
2655-
db->db_buf == NULL || arc_released(db->db_buf));
2625+
ASSERT(db->db_state == DB_NOFILL || brtwrite || diowrite ||
2626+
arc_released(db->db_buf));
26562627
dbuf_destroy(db);
26572628
return (B_TRUE);
26582629
}
@@ -2711,8 +2682,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
27112682
* Block cloning: Do the dbuf_read() before undirtying the dbuf, as we
27122683
* want to make sure dbuf_read() will read the pending cloned block and
27132684
* not the uderlying block that is being replaced. dbuf_undirty() will
2714-
* do dbuf_unoverride(), so we will end up with cloned block content,
2715-
* without overridden BP.
2685+
* do brt_pending_remove() before removing the dirty record.
27162686
*/
27172687
(void) dbuf_read(db, NULL, flags);
27182688
if (undirty) {
@@ -2761,19 +2731,16 @@ dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db, blkptr_t **bp)
27612731

27622732
*bp = db->db_blkptr;
27632733
dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records);
2764-
if (dr) {
2765-
if (db->db_state == DB_NOFILL) {
2766-
/* Block clone */
2767-
if (!dr->dt.dl.dr_brtwrite)
2768-
error = EIO;
2769-
else
2770-
*bp = &dr->dt.dl.dr_overridden_by;
2771-
} else if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN &&
2772-
dr->dt.dl.dr_data == NULL) {
2773-
ASSERT(db->db_state == DB_UNCACHED);
2774-
/* Direct I/O write */
2734+
if (dr && db->db_state == DB_NOFILL) {
2735+
/* Block clone */
2736+
if (!dr->dt.dl.dr_brtwrite)
2737+
error = EIO;
2738+
else
2739+
*bp = &dr->dt.dl.dr_overridden_by;
2740+
} else if (dr && db->db_state == DB_UNCACHED) {
2741+
/* Direct I/O write */
2742+
if (dr->dt.dl.dr_diowrite)
27752743
*bp = &dr->dt.dl.dr_overridden_by;
2776-
}
27772744
}
27782745

27792746
return (error);
@@ -2929,21 +2896,28 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
29292896
dmu_tx_private_ok(tx));
29302897

29312898
mutex_enter(&db->db_mtx);
2932-
if (db->db_state == DB_NOFILL) {
2899+
dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg);
2900+
if (db->db_state == DB_NOFILL ||
2901+
(db->db_state == DB_UNCACHED && dr && dr->dt.dl.dr_diowrite)) {
29332902
/*
2934-
* Block cloning: We will be completely overwriting a block
2935-
* cloned in this transaction group, so let's undirty the
2936-
* pending clone and mark the block as uncached. This will be
2937-
* as if the clone was never done. But if the fill can fail
2938-
* we should have a way to return back to the cloned data.
2903+
* If the fill can fail we should have a way to return back to
2904+
* the cloned or Direct I/O write data.
29392905
*/
2940-
if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
2906+
if (canfail && dr) {
29412907
mutex_exit(&db->db_mtx);
29422908
dmu_buf_will_dirty(db_fake, tx);
29432909
return;
29442910
}
2945-
VERIFY(!dbuf_undirty(db, tx));
2946-
db->db_state = DB_UNCACHED;
2911+
/*
2912+
* Block cloning: We will be completely overwriting a block
2913+
* cloned in this transaction group, so let's undirty the
2914+
* pending clone and mark the block as uncached. This will be
2915+
* as if the clone was never done.
2916+
*/
2917+
if (dr && dr->dt.dl.dr_brtwrite) {
2918+
VERIFY(!dbuf_undirty(db, tx));
2919+
db->db_state = DB_UNCACHED;
2920+
}
29472921
}
29482922
mutex_exit(&db->db_mtx);
29492923

@@ -5085,6 +5059,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
50855059
if (dr->dt.dl.dr_data != NULL &&
50865060
dr->dt.dl.dr_data != db->db_buf) {
50875061
ASSERT3B(dr->dt.dl.dr_brtwrite, ==, B_FALSE);
5062+
ASSERT3B(dr->dt.dl.dr_diowrite, ==, B_FALSE);
50885063
arc_buf_destroy(dr->dt.dl.dr_data, db);
50895064
}
50905065
} else {
@@ -5146,9 +5121,7 @@ dbuf_write_override_done(zio_t *zio)
51465121
if (!BP_EQUAL(zio->io_bp, obp)) {
51475122
if (!BP_IS_HOLE(obp))
51485123
dsl_free(spa_get_dsl(zio->io_spa), zio->io_txg, obp);
5149-
5150-
if (dr->dt.dl.dr_data && dr->dt.dl.dr_data != db->db_buf)
5151-
arc_release(dr->dt.dl.dr_data, db);
5124+
arc_release(dr->dt.dl.dr_data, db);
51525125
}
51535126
mutex_exit(&db->db_mtx);
51545127

@@ -5355,14 +5328,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
53555328
* (by dmu_sync(), dmu_write_direct(),
53565329
* or dmu_buf_write_embedded()).
53575330
*/
5358-
blkptr_t *bp = &dr->dt.dl.dr_overridden_by;
5359-
abd_t *contents = NULL;
5360-
if (data) {
5361-
ASSERT(BP_IS_HOLE(bp) ||
5362-
arc_buf_lsize(data) == BP_GET_LSIZE(bp));
5363-
contents = abd_get_from_buf(data->b_data,
5364-
arc_buf_size(data));
5365-
}
5331+
abd_t *contents = (data != NULL) ?
5332+
abd_get_from_buf(data->b_data, arc_buf_size(data)) : NULL;
53665333

53675334
dr->dr_zio = zio_write(pio, os->os_spa, txg, &dr->dr_bp_copy,
53685335
contents, db->db.db_size, db->db.db_size, &zp,
@@ -5371,8 +5338,9 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
53715338
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
53725339
mutex_enter(&db->db_mtx);
53735340
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
5374-
zio_write_override(dr->dr_zio, bp, dr->dt.dl.dr_copies,
5375-
dr->dt.dl.dr_nopwrite, dr->dt.dl.dr_brtwrite);
5341+
zio_write_override(dr->dr_zio, &dr->dt.dl.dr_overridden_by,
5342+
dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite,
5343+
dr->dt.dl.dr_brtwrite);
53765344
mutex_exit(&db->db_mtx);
53775345
} else if (data == NULL) {
53785346
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||

module/zfs/dmu_direct.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,12 @@ dmu_write_direct_done(zio_t *zio)
107107
ASSERT3U(zio->io_error, ==, EAGAIN);
108108

109109
/*
110-
* In the event of an I/O error the metaslab cleanup is taken
111-
* care of in zio_done().
110+
* In the event of an I/O error this block has been freed in
111+
* zio_done() through zio_dva_unallocate(). Calling
112+
* dmu_sync_done() above set dr_override_state to
113+
* DR_NOT_OVERRIDDEN. In this case when dbuf_undirty() calls
114+
* dbuf_unoverride(), it will skip doing zio_free() to free
115+
* this block as that was already taken care of.
112116
*
113117
* Since we are undirtying the record in open-context, we must
114118
* have a hold on the db, so it should never be evicted after
@@ -154,6 +158,7 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
154158

155159
dr_head = list_head(&db->db_dirty_records);
156160
ASSERT3U(dr_head->dr_txg, ==, txg);
161+
dr_head->dt.dl.dr_diowrite = B_TRUE;
157162
dr_head->dr_accounted = db->db.db_size;
158163

159164
blkptr_t *bp = kmem_alloc(sizeof (blkptr_t), KM_SLEEP);

module/zfs/zfs_vnops.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,11 +1154,12 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
11541154
if (error == 0) {
11551155
zgd->zgd_db = dbp;
11561156
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp;
1157+
boolean_t direct_write = B_FALSE;
11571158
mutex_enter(&db->db_mtx);
11581159
dbuf_dirty_record_t *dr =
11591160
dbuf_find_dirty_eq(db, lr->lr_common.lrc_txg);
1160-
boolean_t direct_write =
1161-
dbuf_dirty_is_direct_write(db, dr);
1161+
if (dr != NULL && dr->dt.dl.dr_diowrite)
1162+
direct_write = B_TRUE;
11621163
mutex_exit(&db->db_mtx);
11631164

11641165
/*

0 commit comments

Comments
 (0)