diff --git a/Makefile.am b/Makefile.am
index 5b195c10b..5b043b3dc 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -4,10 +4,6 @@ AM_CFLAGS += $(CODE_COVERAGE_CFLAGS)
 AM_CFLAGS += $(SQLITE_CFLAGS) $(UV_CFLAGS) $(PTHREAD_CFLAGS)
 AM_LDFLAGS = $(UV_LIBS) $(PTHREAD_LIBS)
 
-if DQLITE_NEXT_ENABLED
-AM_CFLAGS += -DDQLITE_NEXT
-endif
-
 if !BUILD_RAFT_ENABLED
 AM_CFLAGS += $(RAFT_CFLAGS) -DUSE_SYSTEM_RAFT
 AM_LDFLAGS += $(RAFT_LIBS)
diff --git a/configure.ac b/configure.ac
index 5a85b3cc7..93e167d29 100644
--- a/configure.ac
+++ b/configure.ac
@@ -39,10 +39,6 @@ AM_CONDITIONAL(BUILD_SQLITE_ENABLED, test "x$enable_build_sqlite" = "xyes")
 AC_ARG_ENABLE(build-raft, AS_HELP_STRING([--enable-build-raft[=ARG]], [use the bundled raft sources instead of linking to libraft [default=no]]))
 AM_CONDITIONAL(BUILD_RAFT_ENABLED, test "x$enable_build_raft" = "xyes")
 
-AC_ARG_ENABLE(dqlite-next, AS_HELP_STRING([--enable-dqlite-next[=ARG]], [build with the experimental dqlite backend [default=no]]))
-AM_CONDITIONAL(DQLITE_NEXT_ENABLED, test "x$enable_dqlite_next" = "xyes")
-AS_IF([test "x$enable_build_raft" != "xyes" -a "x$enable_dqlite_next" = "xyes"], [AC_MSG_ERROR([dqlite-next requires bundled raft])], [])
-
 # Whether to enable code coverage.
 AX_CODE_COVERAGE
 
diff --git a/src/fsm.c b/src/fsm.c
index 50fcbdc83..61f75ff94 100644
--- a/src/fsm.c
+++ b/src/fsm.c
@@ -1,11 +1,13 @@
 #include "lib/assert.h"
 #include "lib/serialize.h"
+#include "lib/threadpool.h"
 
 #include "command.h"
 #include "fsm.h"
 #include "raft.h"
 #include "tracing.h"
 #include "vfs.h"
+#include "vfs2.h"
 
 #include <sys/mman.h>
 
@@ -132,10 +134,7 @@ static void maybeCheckpoint(struct db *db)
 		goto err_after_db_open;
 	}
 
-	/* Get the database file associated with this db->follower connection */
-	rv = sqlite3_file_control(db->follower, "main",
-				  SQLITE_FCNTL_FILE_POINTER, &main_f);
-	assert(rv == SQLITE_OK); /* Should never fail */
+	main_f = main_file(db->follower);
 
 	/* Get the first SHM region, which contains the WAL header. */
 	rv = main_f->pMethods->xShmMap(main_f, 0, 0, 0, &region);
@@ -744,420 +743,189 @@ void fsm__close(struct raft_fsm *fsm)
 	raft_free(f);
 }
 
-/******************************************************************************
- Disk-based FSM
- *****************************************************************************/
-
-/* The synchronous part of the database encoding */
-static int encodeDiskDatabaseSync(struct db *db, struct raft_buffer *r_buf)
-{
-	sqlite3_vfs *vfs;
-	struct dqlite_buffer *buf = (struct dqlite_buffer *)r_buf;
-	int rv;
-
-	vfs = sqlite3_vfs_find(db->config->name);
-	rv = VfsDiskSnapshotWal(vfs, db->path, buf);
-	if (rv != 0) {
-		goto err;
-	}
-
-	return 0;
-
-err:
-	assert(rv != 0);
-	return rv;
-}
-
-/* The asynchronous part of the database encoding */
-static int encodeDiskDatabaseAsync(struct db *db,
-				   struct raft_buffer r_bufs[],
-				   uint32_t n)
+void fsm_post_receive_disk(struct raft_fsm *fsm, const struct raft_buffer *buf, struct raft_entry_local_data *ld)
 {
-	struct snapshotDatabase header;
-	sqlite3_vfs *vfs;
-	char *cursor;
-	struct dqlite_buffer *bufs = (struct dqlite_buffer *)r_bufs;
+	struct fsm *f = fsm->data;
+	void *cmd;
 	int rv;
 
-	assert(n == 3);
-
-	vfs = sqlite3_vfs_find(db->config->name);
-	rv = VfsDiskSnapshotDb(vfs, db->path, &bufs[1]);
-	if (rv != 0) {
-		goto err;
-	}
-
-	/* Database header. */
-	header.filename = db->filename;
-	header.main_size = bufs[1].len;
-	header.wal_size = bufs[2].len;
-	bufs[0].len = snapshotDatabase__sizeof(&header);
-	bufs[0].base = sqlite3_malloc64(bufs[0].len);
-	if (bufs[0].base == NULL) {
-		rv = RAFT_NOMEM;
-		goto err;
+	int type;
+	rv = command__decode(buf, &type, &cmd);
+	UNHANDLED(rv != 0);
+	if (type != COMMAND_FRAMES) {
+		goto done_after_command_decode;
 	}
+	struct command_frames *cf = cmd;
+	assert(cf->is_commit);
 
-	cursor = bufs[0].base;
-	snapshotDatabase__encode(&header, &cursor);
-	return 0;
-
-	/* Cleanup is performed by call to snapshot_finalize */
-err:
-	assert(rv != 0);
-	return rv;
-}
-
-/* Determine the total number of raft buffers needed
- * for a snapshot in disk-mode */
-static unsigned snapshotNumBufsDisk(struct fsm *f)
-{
-	queue *head;
-	unsigned n = 1; /* snapshot header */
-
-	QUEUE_FOREACH(head, &f->registry->dbs)
-	{
-		n += 3; /* database header, database file and wal */
+	unsigned long *page_numbers;
+	rv = command_frames__page_numbers(cf, &page_numbers);
+	UNHANDLED(rv != 0);
+	void *pages;
+	command_frames__pages(cf, &pages);
+	/* TODO maybe vfs2 should just accept the pages and page numbers
+	 * in the layout that we receive them over the wire? */
+	dqlite_vfs_frame *frames = sqlite3_malloc((int)sizeof(*frames) * (int)cf->frames.n_pages);
+	UNHANDLED(frames == NULL);
+	for (uint32_t i = 0; i < cf->frames.n_pages; i++) {
+		frames[i].page_number = page_numbers[i];
+		frames[i].data = pages + cf->frames.page_size * i;
 	}
 
-	return n;
-}
+	struct db *db;
+	rv = registry__db_get(f->registry, cf->filename, &db);
+	UNHANDLED(rv != 0);
+	PRE(db->follower == NULL);
+	rv = db__open_follower(db);
+	UNHANDLED(rv != SQLITE_OK);
+	POST(db->follower != NULL);
 
-/* An example array of snapshot buffers looks like this:
- *
- * bufs:  SH DH1 DBMMAP1 WAL1 DH2 DMMAP2 WAL2
- * index:  0   1       2    3   4      5    6
- *
- * SH:     Snapshot Header
- * DHx:    Database Header
- * DBMMAP: Pointer to mmap'ed database file
- * WALx:   a WAL
- * */
-static void freeSnapshotBufsDisk(struct fsm *f,
-				 struct raft_buffer bufs[],
-				 unsigned n_bufs)
-{
-	queue *head;
-	unsigned i;
+	sqlite3_file *fp = main_file(db->follower);
+	struct vfs2_wal_slice sl;
+	rv = vfs2_apply_uncommitted(fp, cf->frames.page_size, frames, cf->frames.n_pages, &sl);
+	UNHANDLED(rv != 0);
 
-	if (bufs == NULL || n_bufs == 0) {
-		return;
-	}
+	memcpy(ld, &sl, sizeof(sl));
 
-	/* Free snapshot header */
-	sqlite3_free(bufs[0].base);
+	sqlite3_close(db->follower);
+	db->follower = NULL;
 
-	i = 1;
-	/* Free all database headers & WAL buffers. Unmap the DB file. */
-	QUEUE_FOREACH(head, &f->registry->dbs)
-	{
-		if (i == n_bufs) {
-			break;
-		}
-		/* i is the index of the database header */
-		sqlite3_free(bufs[i].base);
-		if (bufs[i + 1].base != NULL) {
-			munmap(bufs[i + 1].base, bufs[i + 1].len);
-		}
-		sqlite3_free(bufs[i + 2].base);
-		/* i is now the index of the next database header (if any) */
-		i += 3;
-	}
+	sqlite3_free(frames);
+	sqlite3_free(page_numbers);
+done_after_command_decode:
+	raft_free(cmd);
 }
 
-static int fsm__snapshot_disk(struct raft_fsm *fsm,
-			      struct raft_buffer *bufs[],
-			      unsigned *n_bufs)
+static int fsm_apply2_disk(struct raft_fsm *fsm,
+		const struct raft_buffer *buf, struct raft_entry_local_data ld,
+		bool is_mine, void **result)
 {
 	struct fsm *f = fsm->data;
-	queue *head;
-	struct db *db = NULL;
-	unsigned n_db = 0;
-	unsigned i;
 	int rv;
 
-	/* First count how many databases we have and check that no transaction
-	 * nor checkpoint nor other snapshot is in progress. */
-	QUEUE_FOREACH(head, &f->registry->dbs)
-	{
-		db = QUEUE_DATA(head, struct db, queue);
-		if (db->tx_id != 0 || db->read_lock) {
-			return RAFT_BUSY;
-		}
-		n_db++;
-	}
-
-	/* Lock all databases, preventing the checkpoint from running. This
-	 * ensures the database is not written while it is mmap'ed and copied by
-	 * raft. */
-	QUEUE_FOREACH(head, &f->registry->dbs)
-	{
-		db = QUEUE_DATA(head, struct db, queue);
-		rv = databaseReadLock(db);
-		assert(rv == 0);
-	}
-
-	*n_bufs = snapshotNumBufsDisk(f);
-	*bufs = sqlite3_malloc64(*n_bufs * sizeof **bufs);
-	if (*bufs == NULL) {
-		rv = RAFT_NOMEM;
-		goto err;
-	}
-
-	/* zero-init buffers, helps with cleanup */
-	for (unsigned j = 0; j < *n_bufs; j++) {
-		(*bufs)[j].base = NULL;
-		(*bufs)[j].len = 0;
-	}
-
-	rv = encodeSnapshotHeader(n_db, &(*bufs)[0]);
+	int type;
+	void *cmd;
+	rv = command__decode(buf, &type, &cmd);
 	if (rv != 0) {
-		goto err_after_bufs_alloc;
+		goto done;
 	}
 
-	/* Copy WAL of all databases. */
-	i = 1;
-	QUEUE_FOREACH(head, &f->registry->dbs)
-	{
-		db = QUEUE_DATA(head, struct db, queue);
-		/* database_header + db + WAL */
-		unsigned n = 3;
-		/* pass pointer to buffer that will contain WAL. */
-		rv = encodeDiskDatabaseSync(db, &(*bufs)[i + n - 1]);
-		if (rv != 0) {
-			goto err_after_encode_sync;
-		}
-		i += n;
+	switch (type) {
+		case COMMAND_FRAMES:
+			break;
+		case COMMAND_CHECKPOINT:
+		case COMMAND_OPEN:
+		case COMMAND_UNDO:
+			rv = 0;
+			goto done_after_command_decode;
+		default:
+			rv = RAFT_MALFORMED;
+			goto done_after_command_decode;
 	}
 
-	assert(i == *n_bufs);
-	return 0;
-
-err_after_encode_sync:
-	freeSnapshotBufsDisk(f, *bufs, i);
-err_after_bufs_alloc:
-	sqlite3_free(*bufs);
-err:
-	QUEUE_FOREACH(head, &f->registry->dbs)
-	{
-		db = QUEUE_DATA(head, struct db, queue);
-		databaseReadUnlock(db);
-	}
-	assert(rv != 0);
-	return rv;
-}
+	struct command_frames *cf = cmd;
 
-static int fsm__snapshot_async_disk(struct raft_fsm *fsm,
-				    struct raft_buffer *bufs[],
-				    unsigned *n_bufs)
-{
-	struct fsm *f = fsm->data;
-	queue *head;
-	struct snapshotHeader header;
-	struct db *db = NULL;
-	unsigned i;
-	int rv;
+	struct db *db;
+	rv = registry__db_get(f->registry, cf->filename, &db);
+	UNHANDLED(rv != 0);
+	PRE(db->follower == NULL);
+	rv = db__open_follower(db);
+	UNHANDLED(rv != SQLITE_OK);
+	POST(db->follower != NULL);
 
-	/* Decode the header to determine the number of databases. */
-	struct cursor cursor = {(*bufs)[0].base, (*bufs)[0].len};
-	rv = snapshotHeader__decode(&cursor, &header);
-	if (rv != 0) {
-		tracef("decode failed %d", rv);
-		return -1;
-	}
-	if (header.format != SNAPSHOT_FORMAT) {
-		tracef("bad format");
-		return -1;
+	sqlite3_file *fp = main_file(db->follower);
+	if (is_mine) {
+		rv = vfs2_unhide(fp);
+	} else {
+		struct vfs2_wal_slice sl;
+		memcpy(&sl, &ld, sizeof(ld));
+		rv = vfs2_commit(fp, sl);
 	}
+	UNHANDLED(rv != 0);
 
-	/* Encode individual databases. */
-	i = 1;
-	QUEUE_FOREACH(head, &f->registry->dbs)
-	{
-		if (i == *n_bufs) {
-			/* In case a db was added in meanwhile */
-			break;
-		}
-		db = QUEUE_DATA(head, struct db, queue);
-		/* database_header + database file + wal */
-		unsigned n = 3;
-		rv = encodeDiskDatabaseAsync(db, &(*bufs)[i], n);
-		if (rv != 0) {
-			goto err;
-		}
-		i += n;
-	}
+	rv = 0;
 
-	return 0;
+	sqlite3_close(db->follower);
+	db->follower = NULL;
 
-err:
-	assert(rv != 0);
+done_after_command_decode:
+	raft_free(cmd);
+done:
+	*result = NULL;
 	return rv;
 }
 
-static int fsm__snapshot_finalize_disk(struct raft_fsm *fsm,
-				       struct raft_buffer *bufs[],
-				       unsigned *n_bufs)
+static void fsm_post_receive_undo_disk(struct raft_fsm *fsm, const struct raft_buffer *buf, struct raft_entry_local_data ld)
 {
 	struct fsm *f = fsm->data;
-	queue *head;
-	struct db *db;
-	unsigned n_db;
-	struct snapshotHeader header;
+	void *cmd;
 	int rv;
 
-	if (bufs == NULL) {
-		return 0;
-	}
-
-	/* Decode the header to determine the number of databases. */
-	struct cursor cursor = {(*bufs)[0].base, (*bufs)[0].len};
-	rv = snapshotHeader__decode(&cursor, &header);
-	if (rv != 0) {
-		tracef("decode failed %d", rv);
-		return -1;
-	}
-	if (header.format != SNAPSHOT_FORMAT) {
-		tracef("bad format");
-		return -1;
-	}
-
-	/* Free allocated buffers */
-	freeSnapshotBufsDisk(f, *bufs, *n_bufs);
-	sqlite3_free(*bufs);
-	*bufs = NULL;
-	*n_bufs = 0;
-
-	/* Unlock all databases that were locked for the snapshot, this is safe
-	 * because DB's are only ever added at the back of the queue. */
-	n_db = 0;
-	QUEUE_FOREACH(head, &f->registry->dbs)
-	{
-		if (n_db == header.n) {
-			break;
-		}
-		db = QUEUE_DATA(head, struct db, queue);
-		databaseReadUnlock(db);
-		n_db++;
+	int type;
+	rv = command__decode(buf, &type, &cmd);
+	UNHANDLED(rv != 0);
+	if (type != COMMAND_FRAMES) {
+		return;
 	}
+	struct command_frames *cf = cmd;
+	assert(cf->is_commit);
 
-	return 0;
+	struct db *db;
+	rv = registry__db_get(f->registry, cf->filename, &db);
+	UNHANDLED(rv != 0);
+	PRE(db->follower == NULL);
+	rv = db__open_follower(db);
+	UNHANDLED(rv != SQLITE_OK);
+	POST(db->follower != NULL);
+
+	sqlite3_file *fp = main_file(db->follower);
+	struct vfs2_wal_slice sl;
+	memcpy(&sl, &ld, sizeof(ld));
+	rv = vfs2_unapply(fp, sl);
+	UNHANDLED(rv != 0);
+	sqlite3_close(db->follower);
+	db->follower = NULL;
 }
 
-/* Decode the disk database contained in a snapshot. */
-static int decodeDiskDatabase(struct fsm *f, struct cursor *cursor)
+static int fsm_snapshot_disk_stub(struct raft_fsm *fsm,
+		struct raft_buffer *bufs[],
+		unsigned *n_bufs)
 {
-	struct snapshotDatabase header;
-	struct db *db;
-	sqlite3_vfs *vfs;
-	int exists;
-	int rv;
-
-	rv = snapshotDatabase__decode(cursor, &header);
-	if (rv != 0) {
-		return rv;
-	}
-	rv = registry__db_get(f->registry, header.filename, &db);
-	if (rv != 0) {
-		return rv;
-	}
-
-	vfs = sqlite3_vfs_find(db->config->name);
-
-	/* Check if the database file exists, and create it by opening a
-	 * connection if it doesn't. */
-	rv = vfs->xAccess(vfs, db->path, 0, &exists);
-	assert(rv == 0);
-
-	if (!exists) {
-		rv = db__open_follower(db);
-		if (rv != 0) {
-			return rv;
-		}
-		sqlite3_close(db->follower);
-		db->follower = NULL;
-	}
-
-	/* The last check can overflow, but we would already be lost anyway, as
-	 * the raft snapshot restore API only supplies one buffer and the data
-	 * has to fit in size_t bytes anyway. */
-	if (header.main_size > SIZE_MAX || header.wal_size > SIZE_MAX ||
-	    header.main_size + header.wal_size > SIZE_MAX) {
-		tracef("main_size:%" PRIu64 "B wal_size:%" PRIu64
-		       "B would overflow max DB size (%zuB)",
-		       header.main_size, header.wal_size, SIZE_MAX);
-		return -1;
-	}
-
-	/* Due to the check above, these casts are safe. */
-	rv = VfsDiskRestore(vfs, db->path, cursor->p, (size_t)header.main_size,
-			    (size_t)header.wal_size);
-	if (rv != 0) {
-		tracef("VfsDiskRestore %d", rv);
-		return rv;
-	}
-
-	cursor->p += header.main_size + header.wal_size;
-	return 0;
+	(void)fsm;
+	(void)bufs;
+	(void)n_bufs;
+	assert(0);
 }
 
-static int fsm__restore_disk(struct raft_fsm *fsm, struct raft_buffer *buf)
+static int fsm_restore_disk_stub(struct raft_fsm *fsm,
+		struct raft_buffer *buf)
 {
-	tracef("fsm restore disk");
-	struct fsm *f = fsm->data;
-	struct cursor cursor = {buf->base, buf->len};
-	struct snapshotHeader header;
-	unsigned i;
-	int rv;
-
-	rv = snapshotHeader__decode(&cursor, &header);
-	if (rv != 0) {
-		tracef("decode failed %d", rv);
-		return rv;
-	}
-	if (header.format != SNAPSHOT_FORMAT) {
-		tracef("bad format");
-		return RAFT_MALFORMED;
-	}
-
-	for (i = 0; i < header.n; i++) {
-		rv = decodeDiskDatabase(f, &cursor);
-		if (rv != 0) {
-			tracef("decode failed");
-			return rv;
-		}
-	}
-
-	/* Don't use sqlite3_free as this buffer is allocated by raft. */
-	raft_free(buf->base);
-
-	return 0;
+	(void)fsm;
+	(void)buf;
+	assert(0);
 }
 
 int fsm__init_disk(struct raft_fsm *fsm,
 		   struct config *config,
 		   struct registry *registry)
 {
-	tracef("fsm init");
-	struct fsm *f = raft_malloc(sizeof *f);
-
+	(void)config;
+	struct fsm *f = raft_malloc(sizeof(*f));
 	if (f == NULL) {
 		return DQLITE_NOMEM;
 	}
-
-	f->logger = &config->logger;
 	f->registry = registry;
-	f->pending.n_pages = 0;
-	f->pending.page_numbers = NULL;
-	f->pending.pages = NULL;
 
-	fsm->version = 3;
+	fsm->version = 4;
 	fsm->data = f;
-	fsm->apply = fsm__apply;
-	fsm->snapshot = fsm__snapshot_disk;
-	fsm->snapshot_async = fsm__snapshot_async_disk;
-	fsm->snapshot_finalize = fsm__snapshot_finalize_disk;
-	fsm->restore = fsm__restore_disk;
+	fsm->apply = NULL;
+	fsm->snapshot = fsm_snapshot_disk_stub;
+	fsm->restore = fsm_restore_disk_stub;
+	fsm->snapshot_finalize = NULL;
+	fsm->snapshot_async = NULL;
+	fsm->apply2 = fsm_apply2_disk;
+	fsm->post_receive = fsm_post_receive_disk;
+	fsm->post_receive_undo = fsm_post_receive_undo_disk;
 
 	return 0;
 }
diff --git a/src/gateway.c b/src/gateway.c
index 0ba1f60f4..b4f24f9a5 100644
--- a/src/gateway.c
+++ b/src/gateway.c
@@ -157,12 +157,15 @@ void gateway__close(struct gateway *g)
 	}
 
 #define FAIL_IF_CHECKPOINTING                                                  \
-	{                                                                      \
+	do {                                                                   \
+		/* TODO(cole) extra consideration needed here? */              \
+		if (g->config->disk) {                                         \
+			break;                                                 \
+		}                                                              \
+                                                                               \
 		struct sqlite3_file *_file;                                    \
 		int _rv;                                                       \
-		_rv = sqlite3_file_control(g->leader->conn, "main",            \
-					   SQLITE_FCNTL_FILE_POINTER, &_file); \
-		assert(_rv == SQLITE_OK); /* Should never fail */              \
+		_file = main_file(g->leader->conn);                            \
                                                                                \
 		_rv = _file->pMethods->xShmLock(                               \
 		    _file, 1 /* checkpoint lock */, 1,                         \
@@ -175,7 +178,7 @@ void gateway__close(struct gateway *g)
 		_file->pMethods->xShmLock(                                     \
 		    _file, 1 /* checkpoint lock */, 1,                         \
 		    SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);                 \
-	}
+	} while (0)                                                            \
 
 /* Encode fa failure response and invoke the request callback */
 static void failure(struct handle *req, int code, const char *message)
@@ -554,22 +557,6 @@ static void query_batch_async(struct handle *req, enum pool_half half)
 	}
 }
 
-#ifdef DQLITE_NEXT
-
-static void qb_top(pool_work_t *w)
-{
-	struct handle *req = CONTAINER_OF(w, struct handle, work);
-	query_batch_async(req, POOL_TOP_HALF);
-}
-
-static void qb_bottom(pool_work_t *w)
-{
-	struct handle *req = CONTAINER_OF(w, struct handle, work);
-	query_batch_async(req, POOL_BOTTOM_HALF);
-}
-
-#endif
-
 static void query_batch(struct gateway *g)
 {
 	struct handle *req = g->req;
@@ -577,16 +564,8 @@ static void query_batch(struct gateway *g)
 	g->req = NULL;
 	req->gw = g;
 
-#ifdef DQLITE_NEXT
-	struct dqlite_node *node = g->raft->data;
-	pool_t *pool = !!(pool_ut_fallback()->flags & POOL_FOR_UT)
-		? pool_ut_fallback() : &node->pool;
-	pool_queue_work(pool, &req->work, g->leader->db->cookie,
-			WT_UNORD, qb_top, qb_bottom);
-#else
 	query_batch_async(req, POOL_TOP_HALF);
 	query_batch_async(req, POOL_BOTTOM_HALF);
-#endif
 }
 
 static void query_barrier_cb(struct barrier *barrier, int status)
diff --git a/src/leader.c b/src/leader.c
index 61ef91639..86f4c965e 100644
--- a/src/leader.c
+++ b/src/leader.c
@@ -15,6 +15,7 @@
 #include "tracing.h"
 #include "utils.h"
 #include "vfs.h"
+#include "vfs2.h"
 
 /* Called when a leader exec request terminates and the associated callback can
  * be invoked. */
@@ -258,6 +259,7 @@ static void leaderApplyFramesCb(struct raft_apply *req,
 	if (status != 0) {
 		tracef("apply frames cb failed status %d", status);
 		sqlite3_vfs *vfs = sqlite3_vfs_find(l->db->config->name);
+		sqlite3_file *f = main_file(l->conn);
 		switch (status) {
 			case RAFT_LEADERSHIPLOST:
 				l->exec->status = SQLITE_IOERR_LEADERSHIP_LOST;
@@ -283,7 +285,11 @@ static void leaderApplyFramesCb(struct raft_apply *req,
 				l->exec->status = SQLITE_IOERR;
 				break;
 		}
-		VfsAbort(vfs, l->db->path);
+		if (l->db->config->disk) {
+			vfs2_abort(f);
+		} else {
+			VfsAbort(vfs, l->db->path);
+		}
 	}
 
 	raft_free(apply);
@@ -300,6 +306,7 @@ static void leaderApplyFramesCb(struct raft_apply *req,
 
 static int leaderApplyFrames(struct exec *req,
 			     dqlite_vfs_frame *frames,
+			     struct vfs2_wal_slice sl,
 			     unsigned n)
 {
 	tracef("leader apply frames id:%" PRIu64, req->id);
@@ -339,9 +346,10 @@ static int leaderApplyFrames(struct exec *req,
 #ifdef USE_SYSTEM_RAFT
 	rv = raft_apply(l->raft, &apply->req, &buf, 1, leaderApplyFramesCb);
 #else
-	/* TODO actual WAL slice goes here */
-	struct raft_entry_local_data local_data = {};
-	rv = raft_apply(l->raft, &apply->req, &buf, &local_data, 1, leaderApplyFramesCb);
+	struct raft_entry_local_data ld;
+	static_assert(sizeof(sl) == sizeof(ld), "local data size mismatch");
+	memcpy(&ld, &sl, sizeof(sl));
+	rv = raft_apply(l->raft, &apply->req, &buf, &ld, 1, leaderApplyFramesCb);
 #endif
 	if (rv != 0) {
 		tracef("raft apply failed %d", rv);
@@ -368,31 +376,41 @@ static void leaderExecV2(struct exec *req, enum pool_half half)
 	struct leader *l = req->leader;
 	struct db *db = l->db;
 	sqlite3_vfs *vfs = sqlite3_vfs_find(db->config->name);
+	sqlite3_file *f;
 	dqlite_vfs_frame *frames;
+	struct vfs2_wal_slice sl = {};
 	uint64_t size;
 	unsigned n;
 	unsigned i;
 	int rv;
 
+	f = main_file(l->conn);
+
 	if (half == POOL_TOP_HALF) {
 		req->status = sqlite3_step(req->stmt);
 		return;
 	} /* else POOL_BOTTOM_HALF => */
 
-	rv = VfsPoll(vfs, db->path, &frames, &n);
+	if (db->config->disk) {
+		rv = vfs2_poll(f, &frames, &n, &sl);
+	} else {
+		rv = VfsPoll(vfs, db->path, &frames, &n);
+	}
 	if (rv != 0 || n == 0) {
 		tracef("vfs poll");
 		goto finish;
 	}
 
 	/* Check if the new frames would create an overfull database */
-	size = VfsDatabaseSize(vfs, db->path, n, db->config->page_size);
-	if (size > VfsDatabaseSizeLimit(vfs)) {
-		rv = SQLITE_FULL;
-		goto abort;
+	if (!db->config->disk) {
+		size = VfsDatabaseSize(vfs, db->path, n, db->config->page_size);
+		if (size > VfsDatabaseSizeLimit(vfs)) {
+			rv = SQLITE_FULL;
+			goto abort;
+		}
 	}
 
-	rv = leaderApplyFrames(req, frames, n);
+	rv = leaderApplyFrames(req, frames, sl, n);
 	if (rv != 0) {
 		goto abort;
 	}
@@ -408,7 +426,11 @@ static void leaderExecV2(struct exec *req, enum pool_half half)
 		sqlite3_free(frames[i].data);
 	}
 	sqlite3_free(frames);
-	VfsAbort(vfs, l->db->path);
+	if (db->config->disk) {
+		vfs2_abort(f);
+	} else {
+		VfsAbort(vfs, l->db->path);
+	}
 finish:
 	if (rv != 0) {
 		tracef("exec v2 failed %d", rv);
@@ -417,22 +439,6 @@ static void leaderExecV2(struct exec *req, enum pool_half half)
 	leaderExecDone(l->exec);
 }
 
-#ifdef DQLITE_NEXT
-
-static void exec_top(pool_work_t *w)
-{
-	struct exec *req = CONTAINER_OF(w, struct exec, work);
-	leaderExecV2(req, POOL_TOP_HALF);
-}
-
-static void exec_bottom(pool_work_t *w)
-{
-	struct exec *req = CONTAINER_OF(w, struct exec, work);
-	leaderExecV2(req, POOL_BOTTOM_HALF);
-}
-
-#endif
-
 static void execBarrierCb(struct barrier *barrier, int status)
 {
 	tracef("exec barrier cb status %d", status);
@@ -445,16 +451,8 @@ static void execBarrierCb(struct barrier *barrier, int status)
 		return;
 	}
 
-#ifdef DQLITE_NEXT
-	struct dqlite_node *node = l->raft->data;
-	pool_t *pool = !!(pool_ut_fallback()->flags & POOL_FOR_UT)
-		? pool_ut_fallback() : &node->pool;
-	pool_queue_work(pool, &req->work, l->db->cookie,
-			WT_UNORD, exec_top, exec_bottom);
-#else
 	leaderExecV2(req, POOL_TOP_HALF);
 	leaderExecV2(req, POOL_BOTTOM_HALF);
-#endif
 }
 
 int leader__exec(struct leader *l,
diff --git a/src/raft.h b/src/raft.h
index adef854a8..31c6e4b1d 100644
--- a/src/raft.h
+++ b/src/raft.h
@@ -632,7 +632,7 @@ struct raft_io
 
 struct raft_fsm
 {
-	int version; /* 1, 2 or 3 */
+	int version;
 	void *data;
 	int (*apply)(struct raft_fsm *fsm,
 		     const struct raft_buffer *buf,
@@ -649,6 +649,18 @@ struct raft_fsm
 	int (*snapshot_async)(struct raft_fsm *fsm,
 			      struct raft_buffer *bufs[],
 			      unsigned *n_bufs);
+	/* Fields below added since version 4. */
+	int (*apply2)(struct raft_fsm *fsm,
+			const struct raft_buffer *buf,
+			struct raft_entry_local_data ld,
+			bool is_mine,
+			void **result);
+	void (*post_receive)(struct raft_fsm *fsm,
+			const struct raft_buffer *buf,
+			struct raft_entry_local_data *ld);
+	void (*post_receive_undo)(struct raft_fsm *fsm,
+			const struct raft_buffer *buf,
+			struct raft_entry_local_data ld);
 };
 
 struct raft; /* Forward declaration. */
@@ -667,6 +679,10 @@ typedef void (*raft_state_cb)(struct raft *raft,
 
 struct raft_progress;
 
+/**
+ */
+typedef void (*raft_initial_barrier_cb)(struct raft *raft);
+
 /**
  * Close callback.
  *
@@ -919,12 +935,16 @@ RAFT_API int raft_init(struct raft *r,
 
 RAFT_API void raft_close(struct raft *r, raft_close_cb cb);
 
+RAFT_API void raft_fini(struct raft *r);
+
 /**
  * This function MUST be called after raft_init and before raft_start.
  * @cb will be called every time the raft state changes.
  */
 RAFT_API void raft_register_state_cb(struct raft *r, raft_state_cb cb);
 
+RAFT_API void raft_register_initial_barrier_cb(struct raft *r, raft_initial_barrier_cb cb);
+
 /**
  * Bootstrap this raft instance using the given configuration. The instance must
  * not have been started yet and must be completely pristine, otherwise
@@ -1398,6 +1418,8 @@ RAFT_API void raft_uv_set_tracer(struct raft_io *io,
  */
 RAFT_API void raft_uv_set_auto_recovery(struct raft_io *io, bool flag);
 
+RAFT_API void raft_uv_set_format_version(struct raft_io *io, int version);
+
 /**
  * Callback invoked by the transport implementation when a new incoming
  * connection has been established.
diff --git a/src/raft/callbacks.h b/src/raft/callbacks.h
index e756b3070..1ed65194f 100644
--- a/src/raft/callbacks.h
+++ b/src/raft/callbacks.h
@@ -6,6 +6,7 @@
 struct raft_callbacks
 {
 	raft_state_cb state_cb;
+	raft_initial_barrier_cb ib_cb;
 };
 
 int raftInitCallbacks(struct raft *r);
diff --git a/src/raft/convert.c b/src/raft/convert.c
index e29adc5ee..f821aa0c0 100644
--- a/src/raft/convert.c
+++ b/src/raft/convert.c
@@ -192,7 +192,12 @@ int convertToCandidate(struct raft *r, bool disrupt_leader)
 
 void convertInitialBarrierCb(struct raft_barrier *req, int status)
 {
-	(void)status;
+	struct raft *r = req->data;
+	raft_initial_barrier_cb ib_cb = raftGetCallbacks(r)->ib_cb;
+	if (ib_cb != NULL) {
+		UNHANDLED(status != 0);
+		ib_cb(r);
+	}
 	raft_free(req);
 }
 
@@ -233,6 +238,10 @@ int convertToLeader(struct raft *r)
 		       r->last_stored, r->commit_index);
 		r->commit_index = r->last_stored;
 		rv = replicationApply(r);
+		raft_initial_barrier_cb ib_cb = raftGetCallbacks(r)->ib_cb;
+		if (ib_cb != NULL) {
+			ib_cb(r);
+		}
 	} else if (n_voters > 1) {
 		/* Raft Dissertation, paragraph 6.4:
 		 * The Leader Completeness Property guarantees that a leader has
@@ -245,6 +254,7 @@ int convertToLeader(struct raft *r)
 		if (req == NULL) {
 			return RAFT_NOMEM;
 		}
+		req->data = r;
 		rv = raft_barrier(r, req, convertInitialBarrierCb);
 		if (rv != 0) {
 			tracef(
diff --git a/src/raft/entry.c b/src/raft/entry.c
index 15ac56725..f3862cbe3 100644
--- a/src/raft/entry.c
+++ b/src/raft/entry.c
@@ -33,6 +33,7 @@ int entryCopy(const struct raft_entry *src, struct raft_entry *dst)
 		return RAFT_NOMEM;
 	}
 	memcpy(dst->buf.base, src->buf.base, dst->buf.len);
+	dst->local_data = src->local_data;
 	dst->batch = NULL;
 	return 0;
 }
diff --git a/src/raft/fixture.c b/src/raft/fixture.c
index fe1772373..652f89fec 100644
--- a/src/raft/fixture.c
+++ b/src/raft/fixture.c
@@ -984,6 +984,7 @@ static void serverClose(struct raft_fixture_server *s)
 {
 	raft_close(&s->raft, NULL);
 	ioClose(&s->io);
+	raft_fini(&s->raft);
 	raft_free(s);
 }
 
diff --git a/src/raft/raft.c b/src/raft/raft.c
index e1ff0c41b..8417356da 100644
--- a/src/raft/raft.c
+++ b/src/raft/raft.c
@@ -117,11 +117,6 @@ static void ioCloseCb(struct raft_io *io)
 {
 	struct raft *r = io->data;
 	tracef("io close cb");
-	raftDestroyCallbacks(r);
-	raft_free(r->address);
-	logClose(r->log);
-	raft_configuration_close(&r->configuration);
-	raft_configuration_close(&r->configuration_last_snapshot);
 	if (r->close_cb != NULL) {
 		r->close_cb(r);
 	}
@@ -144,6 +139,13 @@ void raft_register_state_cb(struct raft *r, raft_state_cb cb)
 	cbs->state_cb = cb;
 }
 
+void raft_register_initial_barrier_cb(struct raft *r, raft_initial_barrier_cb cb)
+{
+	struct raft_callbacks *cbs = raftGetCallbacks(r);
+	assert(cbs != NULL);
+	cbs->ib_cb = cb;
+}
+
 void raft_set_election_timeout(struct raft *r, const unsigned msecs)
 {
 	r->election_timeout = msecs;
@@ -302,3 +304,12 @@ static int ioFsmVersionCheck(struct raft *r,
 
 	return 0;
 }
+
+void raft_fini(struct raft *r)
+{
+	raftDestroyCallbacks(r);
+	raft_free(r->address);
+	logClose(r->log);
+	raft_configuration_close(&r->configuration);
+	raft_configuration_close(&r->configuration_last_snapshot);
+}
diff --git a/src/raft/recv_append_entries.c b/src/raft/recv_append_entries.c
index 7d4adbcc0..4cfdce45b 100644
--- a/src/raft/recv_append_entries.c
+++ b/src/raft/recv_append_entries.c
@@ -19,7 +19,7 @@ static void recvSendAppendEntriesResultCb(struct raft_io_send *req, int status)
 int recvAppendEntries(struct raft *r,
 		      raft_id id,
 		      const char *address,
-		      const struct raft_append_entries *args)
+		      struct raft_append_entries *args)
 {
 	struct raft_io_send *req;
 	struct raft_message message;
diff --git a/src/raft/recv_append_entries.h b/src/raft/recv_append_entries.h
index 5b674860f..0c35c70e4 100644
--- a/src/raft/recv_append_entries.h
+++ b/src/raft/recv_append_entries.h
@@ -9,6 +9,6 @@
 int recvAppendEntries(struct raft *r,
 		      raft_id id,
 		      const char *address,
-		      const struct raft_append_entries *args);
+		      struct raft_append_entries *args);
 
 #endif /* RECV_APPEND_ENTRIES_H_ */
diff --git a/src/raft/replication.c b/src/raft/replication.c
index 20f92d566..dd6776108 100644
--- a/src/raft/replication.c
+++ b/src/raft/replication.c
@@ -16,6 +16,7 @@
 #include "membership.h"
 #include "progress.h"
 #include "../lib/queue.h"
+#include "../lib/threadpool.h"
 #include "replication.h"
 #include "request.h"
 #include "snapshot.h"
@@ -476,6 +477,23 @@ static struct request *getRequest(struct raft *r,
 	return NULL;
 }
 
+static void post_receive_undo(struct raft *r, const struct raft_entry *es, size_t n)
+{
+	if (r->fsm->version < 4 || r->fsm->post_receive_undo == NULL) {
+		return;
+	}
+
+	size_t i = n;
+	while (i > 0) {
+		i--;
+		const struct raft_entry *e = &es[i];
+		if (e->type != RAFT_COMMAND) {
+			continue;
+		}
+		r->fsm->post_receive_undo(r->fsm, &e->buf, e->local_data);
+	}
+}
+
 /* Invoked once a disk write request for new entries has been completed. */
 static void appendLeaderCb(struct raft_io_append *append, int status)
 {
@@ -983,6 +1001,9 @@ static void appendFollowerCb(struct raft_io_append *req, int status)
 	sendAppendEntriesResult(r, &result);
 
 out:
+	if (status != 0) {
+		post_receive_undo(r, request->args.entries, request->args.n_entries);
+	}
 	logRelease(r->log, request->index, request->args.entries,
 		   request->args.n_entries);
 
@@ -1091,6 +1112,13 @@ static int deleteConflictingEntries(struct raft *r,
 				}
 			}
 
+			struct raft_entry *entries;
+			unsigned n;
+			rv = logAcquire(r->log, entry_index, &entries, &n);
+			UNHANDLED(rv != 0);
+			post_receive_undo(r, entries, n);
+			logRelease(r->log, entry_index, entries, n);
+
 			/* Delete all entries from this index on because they
 			 * don't match. */
 			rv = r->io->truncate(r->io, entry_index);
@@ -1121,7 +1149,7 @@ static int deleteConflictingEntries(struct raft *r,
 }
 
 int replicationAppend(struct raft *r,
-		      const struct raft_append_entries *args,
+		      struct raft_append_entries *args,
 		      raft_index *rejected,
 		      bool *async)
 {
@@ -1130,6 +1158,7 @@ int replicationAppend(struct raft *r,
 	size_t n;
 	size_t i;
 	size_t j;
+	size_t k;
 	bool reinstated;
 	int rv;
 
@@ -1202,9 +1231,16 @@ int replicationAppend(struct raft *r,
 	/* Update our in-memory log to reflect that we received these entries.
 	 * We'll notify the leader of a successful append once the write entries
 	 * request that we issue below actually completes.  */
+	k = 0;
 	for (j = 0; j < n; j++) {
 		struct raft_entry *entry = &args->entries[i + j];
 
+		tracef("MAYBE POST RECEIVE");
+		if (r->fsm->version >= 4 && r->fsm->post_receive != NULL && entry->type == RAFT_COMMAND) {
+			r->fsm->post_receive(r->fsm, &entry->buf, &entry->local_data);
+		}
+		k++;
+
 		/* We are trying to append an entry at index X with term T to
 		 * our in-memory log. If we've gotten this far, we know that the
 		 * log *logically* has no entry at this index. However, it's
@@ -1235,7 +1271,7 @@ int replicationAppend(struct raft *r,
 			goto err_after_request_alloc;
 		}
 
-		rv = logAppend(r->log, copy.term, copy.type, copy.buf, (struct raft_entry_local_data){}, false, NULL);
+		rv = logAppend(r->log, copy.term, copy.type, copy.buf, copy.local_data, false, NULL);
 		if (rv != 0) {
 			goto err_after_request_alloc;
 		}
@@ -1275,6 +1311,7 @@ int replicationAppend(struct raft *r,
 		   request->args.n_entries);
 
 err_after_request_alloc:
+	assert(k <= n);
 	/* Release all entries added to the in-memory log, making
 	 * sure the in-memory log and disk don't diverge, leading
 	 * to future log entries not being persisted to disk.
@@ -1282,6 +1319,7 @@ int replicationAppend(struct raft *r,
 	if (j != 0) {
 		logTruncate(r->log, request->index);
 	}
+	post_receive_undo(r, args->entries + i, k);
 	raft_free(request);
 
 err:
@@ -1468,12 +1506,21 @@ int replicationInstallSnapshot(struct raft *r,
 /* Apply a RAFT_COMMAND entry that has been committed. */
 static int applyCommand(struct raft *r,
 			const raft_index index,
-			const struct raft_buffer *buf)
+			const raft_term term,
+			const struct raft_buffer *buf,
+			struct raft_entry_local_data ld,
+			bool is_local)
 {
 	struct raft_apply *req;
 	void *result;
 	int rv;
-	rv = r->fsm->apply(r->fsm, buf, &result);
+
+	if (r->fsm->version >= 4 && r->fsm->apply2 != NULL) {
+		bool is_mine = is_local && term == r->current_term;
+		rv = r->fsm->apply2(r->fsm, buf, ld, is_mine, &result);
+	} else {
+		rv = r->fsm->apply(r->fsm, buf, &result);
+	}
 	if (rv != 0) {
 		return rv;
 	}
@@ -1756,7 +1803,7 @@ int replicationApply(struct raft *r)
 
 		switch (entry->type) {
 			case RAFT_COMMAND:
-				rv = applyCommand(r, index, &entry->buf);
+				rv = applyCommand(r, index, entry->term, &entry->buf, entry->local_data, entry->is_local);
 				break;
 			case RAFT_BARRIER:
 				applyBarrier(r, index);
diff --git a/src/raft/replication.h b/src/raft/replication.h
index 5bfe07dbe..b73b93bf0 100644
--- a/src/raft/replication.h
+++ b/src/raft/replication.h
@@ -67,7 +67,7 @@ int replicationUpdate(struct raft *r,
  *
  * It must be called only by followers. */
 int replicationAppend(struct raft *r,
-		      const struct raft_append_entries *args,
+		      struct raft_append_entries *args,
 		      raft_index *rejected,
 		      bool *async);
 
diff --git a/src/raft/uv.c b/src/raft/uv.c
index f1450e742..4f5d68ca7 100644
--- a/src/raft/uv.c
+++ b/src/raft/uv.c
@@ -79,12 +79,11 @@ static int uvMaintenance(const char *dir, char *errmsg)
 	return rv;
 }
 
-/* Implementation of raft_io->config. */
+/* Implementation of raft_io->init. */
 static int uvInit(struct raft_io *io, raft_id id, const char *address)
 {
 	struct uv *uv;
 	size_t direct_io;
-	struct uvMetadata metadata;
 	int rv;
 	uv = io->impl;
 	uv->id = id;
@@ -108,12 +107,6 @@ static int uvInit(struct raft_io *io, raft_id id, const char *address)
 		return rv;
 	}
 
-	rv = uvMetadataLoad(uv->dir, &metadata, io->errmsg);
-	if (rv != 0) {
-		return rv;
-	}
-	uv->metadata = metadata;
-
 	rv = uv->transport->init(uv->transport, id, address);
 	if (rv != 0) {
 		ErrMsgTransfer(uv->transport->errmsg, io->errmsg, "transport");
@@ -121,10 +114,6 @@ static int uvInit(struct raft_io *io, raft_id id, const char *address)
 	}
 	uv->transport->data = uv;
 
-	rv = uv_timer_init(uv->loop, &uv->timer);
-	assert(rv == 0); /* This should never fail */
-	uv->timer.data = uv;
-
 	return 0;
 }
 
@@ -147,6 +136,11 @@ static int uvStart(struct raft_io *io,
 	struct uv *uv;
 	int rv;
 	uv = io->impl;
+
+	rv = uv_timer_init(uv->loop, &uv->timer);
+	assert(rv == 0); /* This should never fail */
+	uv->timer.data = uv;
+
 	uv->state = UV__ACTIVE;
 	uv->tick_cb = tick_cb;
 	uv->recv_cb = recv_cb;
@@ -482,9 +476,15 @@ static int uvLoad(struct raft_io *io,
 		  struct raft_entry **entries,
 		  size_t *n_entries)
 {
-	struct uv *uv;
+	struct uv *uv = io->impl;
+	struct uvMetadata metadata;
 	int rv;
-	uv = io->impl;
+
+	rv = uvMetadataLoad(uv->dir, &metadata, uv->format_version, io->errmsg);
+	if (rv != 0) {
+		return rv;
+	}
+	uv->metadata = metadata;
 
 	*term = uv->metadata.term;
 	*voted_for = uv->metadata.voted_for;
@@ -512,6 +512,7 @@ static int uvSetTerm(struct raft_io *io, const raft_term term)
 	struct uv *uv;
 	int rv;
 	uv = io->impl;
+	uv->metadata.format_version = uv->format_version;
 	uv->metadata.version++;
 	uv->metadata.term = term;
 	uv->metadata.voted_for = 0;
@@ -528,6 +529,7 @@ static int uvSetVote(struct raft_io *io, const raft_id server_id)
 	struct uv *uv;
 	int rv;
 	uv = io->impl;
+	uv->metadata.format_version = uv->format_version;
 	uv->metadata.version++;
 	uv->metadata.voted_for = server_id;
 	rv = uvMetadataStore(uv, &uv->metadata);
@@ -541,9 +543,15 @@ static int uvSetVote(struct raft_io *io, const raft_id server_id)
 static int uvBootstrap(struct raft_io *io,
 		       const struct raft_configuration *configuration)
 {
-	struct uv *uv;
+	struct uv *uv = io->impl;
+	struct uvMetadata metadata;
 	int rv;
-	uv = io->impl;
+
+	rv = uvMetadataLoad(uv->dir, &metadata, uv->format_version, io->errmsg);
+	if (rv != 0) {
+		return rv;
+	}
+	uv->metadata = metadata;
 
 	/* We shouldn't have written anything else yet. */
 	if (uv->metadata.term != 0) {
@@ -723,6 +731,7 @@ int raft_uv_init(struct raft_io *io,
 	uv->closing = false;
 	uv->close_cb = NULL;
 	uv->auto_recovery = true;
+	uv->format_version = 1;
 
 	uvSeedRand(uv);
 
@@ -812,4 +821,11 @@ void raft_uv_set_auto_recovery(struct raft_io *io, bool flag)
 	uv->auto_recovery = flag;
 }
 
+void raft_uv_set_format_version(struct raft_io *io, int version)
+{
+	PRE(1 <= version && version < 3);
+	struct uv *uv = io->impl;
+	uv->format_version = version;
+}
+
 #undef tracef
diff --git a/src/raft/uv.h b/src/raft/uv.h
index 3d0fd342b..553836763 100644
--- a/src/raft/uv.h
+++ b/src/raft/uv.h
@@ -47,6 +47,7 @@ typedef unsigned long long uvCounter;
 /* Information persisted in a single metadata file. */
 struct uvMetadata
 {
+	int format_version;
 	unsigned long long version; /* Monotonically increasing version */
 	raft_term term;             /* Current term */
 	raft_id voted_for;          /* Server ID of last vote, or 0 */
@@ -95,6 +96,7 @@ struct uv
 	bool closing;              /* True if we are closing */
 	raft_io_close_cb close_cb; /* Invoked when finishing closing */
 	bool auto_recovery;        /* Try to recover from corrupt segments */
+	int format_version;        /* 1 (original recipe) or 2 (with local data) */
 };
 
 /* Implementation of raft_io->truncate. */
@@ -102,7 +104,7 @@ int UvTruncate(struct raft_io *io, raft_index index);
 
 /* Load Raft metadata from disk, choosing the most recent version (either the
  * metadata1 or metadata2 file). */
-int uvMetadataLoad(const char *dir, struct uvMetadata *metadata, char *errmsg);
+int uvMetadataLoad(const char *dir, struct uvMetadata *metadata, int expected_version, char *errmsg);
 
 /* Store the given metadata to disk, writing the appropriate metadata file
  * according to the metadata version (if the version is odd, write metadata1,
@@ -188,7 +190,7 @@ void uvSegmentBufferClose(struct uvSegmentBuffer *b);
 
 /* Encode the format version at the very beginning of the buffer. This function
  * must be called when the buffer is empty. */
-int uvSegmentBufferFormat(struct uvSegmentBuffer *b);
+int uvSegmentBufferFormat(struct uvSegmentBuffer *b, int format_version);
 
 /* Extend the segment's buffer by encoding the given entries.
  *
@@ -196,7 +198,8 @@ int uvSegmentBufferFormat(struct uvSegmentBuffer *b);
  * will be appended. */
 int uvSegmentBufferAppend(struct uvSegmentBuffer *b,
 			  const struct raft_entry entries[],
-			  unsigned n_entries);
+			  unsigned n_entries,
+			  int format_version);
 
 /* After all entries to write have been encoded, finalize the buffer by zeroing
  * the unused memory of the last block. The out parameter will point to the
diff --git a/src/raft/uv_append.c b/src/raft/uv_append.c
index 1544c274e..49b3885a6 100644
--- a/src/raft/uv_append.c
+++ b/src/raft/uv_append.c
@@ -169,14 +169,14 @@ static int uvAliveSegmentEncodeEntriesToWriteBuf(struct uvAliveSegment *segment,
 	/* If this is the very first write to the segment, we need to include
 	 * the format version */
 	if (segment->pending.n == 0 && segment->next_block == 0) {
-		rv = uvSegmentBufferFormat(&segment->pending);
+		rv = uvSegmentBufferFormat(&segment->pending, segment->uv->format_version);
 		if (rv != 0) {
 			return rv;
 		}
 	}
 
 	rv = uvSegmentBufferAppend(&segment->pending, append->entries,
-				   append->n);
+				   append->n, segment->uv->format_version);
 	if (rv != 0) {
 		return rv;
 	}
@@ -593,11 +593,11 @@ static void uvAliveSegmentReserveSegmentCapacity(struct uvAliveSegment *s,
 
 /* Return the number of bytes needed to store the batch of entries of this
  * append request on disk. */
-static size_t uvAppendSize(struct uvAppend *a)
+static size_t uvAppendSize(struct uvAppend *a, int format_version)
 {
 	size_t size = sizeof(uint32_t) * 2; /* CRC checksums */
 	unsigned i;
-	size += uvSizeofBatchHeader(a->n, true); /* Batch header */
+	size += uvSizeofBatchHeader(a->n, format_version); /* Batch header */
 	for (i = 0; i < a->n; i++) {       /* Entries data */
 		size += bytePad64(a->entries[i].buf.len);
 	}
@@ -618,7 +618,7 @@ static int uvAppendEnqueueRequest(struct uv *uv, struct uvAppend *append)
 	assert(uv->append_next_index > 0);
 	tracef("enqueue %u entries", append->n);
 
-	size = uvAppendSize(append);
+	size = uvAppendSize(append, uv->format_version);
 
 	/* If we have no segments yet, it means this is the very first append,
 	 * and we need to add a new segment. Otherwise we check if the last
diff --git a/src/raft/uv_encoding.c b/src/raft/uv_encoding.c
index 2714f643a..74b86d59c 100644
--- a/src/raft/uv_encoding.c
+++ b/src/raft/uv_encoding.c
@@ -4,6 +4,8 @@
 #include <string.h>
 
 #include "../raft.h"
+#include "../tracing.h"
+#include "../utils.h"
 #include "assert.h"
 #include "byte.h"
 #include "configuration.h"
@@ -86,14 +88,12 @@ static size_t sizeofTimeoutNow(void)
 	       sizeof(uint64_t) /* Last log term. */;
 }
 
-size_t uvSizeofBatchHeader(size_t n, bool with_local_data)
+size_t uvSizeofBatchHeader(size_t n, int format_version)
 {
 	size_t res = 8 + /* Number of entries in the batch, little endian */
 		16 * n; /* One header per entry */;
-	if (with_local_data) {
-#ifdef DQLITE_NEXT
+	if (format_version > 1) {
 		res += 8; /* Local data length, applies to all entries */
-#endif
 	}
 	return res;
 }
@@ -143,7 +143,7 @@ static void encodeAppendEntries(const struct raft_append_entries *p, void *buf)
 	bytePut64(&cursor, p->prev_log_term);  /* Previous term. */
 	bytePut64(&cursor, p->leader_commit);  /* Commit index. */
 
-	uvEncodeBatchHeader(p->entries, p->n_entries, cursor, false /* no local data */);
+	uvEncodeBatchHeader(p->entries, p->n_entries, cursor, 1 /* no local data ever */);
 }
 
 static void encodeAppendEntriesResult(
@@ -302,7 +302,7 @@ int uvEncodeMessage(const struct raft_message *message,
 void uvEncodeBatchHeader(const struct raft_entry *entries,
 			 unsigned n,
 			 void *buf,
-			 bool with_local_data)
+			 int format_version)
 {
 	unsigned i;
 	void *cursor = buf;
@@ -310,11 +310,9 @@ void uvEncodeBatchHeader(const struct raft_entry *entries,
 	/* Number of entries in the batch, little endian */
 	bytePut64(&cursor, n);
 
-	if (with_local_data) {
-#ifdef DQLITE_NEXT
+	if (format_version > 1) {
 		/* Local data size per entry, little endian */
 		bytePut64(&cursor, (uint64_t)sizeof(struct raft_entry_local_data));
-#endif
 	}
 
 	for (i = 0; i < n; i++) {
@@ -378,7 +376,8 @@ static void decodeRequestVoteResult(const uv_buf_t *buf,
 int uvDecodeBatchHeader(const void *batch,
 			struct raft_entry **entries,
 			unsigned *n,
-			uint64_t *local_data_size)
+			uint64_t *local_data_size,
+			int format_version)
 {
 	const void *cursor = batch;
 	size_t i;
@@ -391,15 +390,13 @@ int uvDecodeBatchHeader(const void *batch,
 		return 0;
 	}
 
-	if (local_data_size != NULL) {
-#ifdef DQLITE_NEXT
+	if (format_version > 1) {
 		uint64_t z = byteGet64(&cursor);
 		if (z == 0 || z > sizeof(struct raft_entry_local_data) || z % sizeof(uint64_t) != 0) {
 			rv = RAFT_MALFORMED;
 			goto err;
 		}
 		*local_data_size = z;
-#endif
 	}
 
 	*entries = raft_malloc(*n * sizeof **entries);
@@ -456,7 +453,7 @@ static int decodeAppendEntries(const uv_buf_t *buf,
 	args->prev_log_term = byteGet64(&cursor);
 	args->leader_commit = byteGet64(&cursor);
 
-	rv = uvDecodeBatchHeader(cursor, &args->entries, &args->n_entries, false);
+	rv = uvDecodeBatchHeader(cursor, &args->entries, &args->n_entries, NULL, 1 /* no local data ever */);
 	if (rv != 0) {
 		return rv;
 	}
@@ -579,10 +576,13 @@ int uvDecodeEntriesBatch(uint8_t *batch,
 			 size_t offset,
 			 struct raft_entry *entries,
 			 unsigned n,
-			 uint64_t local_data_size)
+			 uint64_t local_data_size,
+			 int format_version)
 {
 	uint8_t *cursor;
 
+	PRE(ERGO(format_version == 1, local_data_size == 0));
+
 	assert(batch != NULL);
 
 	cursor = batch + offset;
@@ -603,10 +603,10 @@ int uvDecodeEntriesBatch(uint8_t *batch,
 		entry->local_data = (struct raft_entry_local_data){};
 		assert(local_data_size <= sizeof(entry->local_data.buf));
 		assert(local_data_size % 8 == 0);
-#ifdef DQLITE_NEXT
-		memcpy(entry->local_data.buf, cursor, local_data_size);
-		cursor += local_data_size;
-#endif
+		if (format_version > 1) {
+			memcpy(entry->local_data.buf, cursor, local_data_size);
+			cursor += local_data_size;
+		}
 	}
 	return 0;
 }
diff --git a/src/raft/uv_encoding.h b/src/raft/uv_encoding.h
index 851966fb7..0a548a84a 100644
--- a/src/raft/uv_encoding.h
+++ b/src/raft/uv_encoding.h
@@ -7,13 +7,6 @@
 
 #include "../raft.h"
 
-/* Current disk format version. */
-#ifdef DQLITE_NEXT
-#define UV__DISK_FORMAT 2
-#else
-#define UV__DISK_FORMAT 1
-#endif
-
 int uvEncodeMessage(const struct raft_message *message,
 		    uv_buf_t **bufs,
 		    unsigned *n_bufs);
@@ -26,13 +19,15 @@ int uvDecodeMessage(uint16_t type,
 int uvDecodeBatchHeader(const void *batch,
 			struct raft_entry **entries,
 			unsigned *n,
-			uint64_t *local_data_size);
+			uint64_t *local_data_size,
+			int format_version);
 
 int uvDecodeEntriesBatch(uint8_t *batch,
 			 size_t offset,
 			 struct raft_entry *entries,
 			 unsigned n,
-			 uint64_t local_data_size);
+			 uint64_t local_data_size,
+			 int format_version);
 
 /**
  * The layout of the memory pointed at by a @batch pointer is the following:
@@ -57,11 +52,11 @@ int uvDecodeEntriesBatch(uint8_t *batch,
  * arbitrary lengths, possibly padded with extra bytes to reach 8-byte boundary
  * (which means that all entry data pointers are 8-byte aligned).
  */
-size_t uvSizeofBatchHeader(size_t n, bool with_local_data);
+size_t uvSizeofBatchHeader(size_t n, int format_version);
 
 void uvEncodeBatchHeader(const struct raft_entry *entries,
 			 unsigned n,
 			 void *buf,
-			 bool with_local_data);
+			 int format_version);
 
 #endif /* UV_ENCODING_H_ */
diff --git a/src/raft/uv_metadata.c b/src/raft/uv_metadata.c
index aee87323f..a45fba2d4 100644
--- a/src/raft/uv_metadata.c
+++ b/src/raft/uv_metadata.c
@@ -14,7 +14,7 @@
 static void uvMetadataEncode(const struct uvMetadata *metadata, void *buf)
 {
 	void *cursor = buf;
-	bytePut64(&cursor, UV__DISK_FORMAT);
+	bytePut64(&cursor, (uint64_t)metadata->format_version);
 	bytePut64(&cursor, metadata->version);
 	bytePut64(&cursor, metadata->term);
 	bytePut64(&cursor, metadata->voted_for);
@@ -23,15 +23,17 @@ static void uvMetadataEncode(const struct uvMetadata *metadata, void *buf)
 /* Decode the content of a metadata file. */
 static int uvMetadataDecode(const void *buf,
 			    struct uvMetadata *metadata,
+			    int expected_version,
 			    char *errmsg)
 {
 	const void *cursor = buf;
 	uint64_t format;
 	format = byteGet64(&cursor);
-	if (format != UV__DISK_FORMAT) {
+	if (format != (uint64_t)expected_version) {
 		ErrMsgPrintf(errmsg, "bad format version %ju", format);
 		return RAFT_MALFORMED;
 	}
+	metadata->format_version = expected_version;
 	metadata->version = byteGet64(&cursor);
 	metadata->term = byteGet64(&cursor);
 	metadata->voted_for = byteGet64(&cursor);
@@ -56,6 +58,7 @@ static void uvMetadataFilename(const unsigned short n, char *filename)
 static int uvMetadataLoadN(const char *dir,
 			   const unsigned short n,
 			   struct uvMetadata *metadata,
+			   int expected_version,
 			   char *errmsg)
 {
 	char filename[METADATA_FILENAME_SIZE];  /* Filename of the metadata file
@@ -118,7 +121,7 @@ static int uvMetadataLoadN(const char *dir,
 	};
 
 	/* Decode the content of the metadata file. */
-	rv = uvMetadataDecode(content, metadata, errmsg);
+	rv = uvMetadataDecode(content, metadata, expected_version, errmsg);
 	if (rv != 0) {
 		ErrMsgWrapf(errmsg, "decode content of %s", filename);
 		return rv;
@@ -127,18 +130,18 @@ static int uvMetadataLoadN(const char *dir,
 	return 0;
 }
 
-int uvMetadataLoad(const char *dir, struct uvMetadata *metadata, char *errmsg)
+int uvMetadataLoad(const char *dir, struct uvMetadata *metadata, int expected_version, char *errmsg)
 {
 	struct uvMetadata metadata1;
 	struct uvMetadata metadata2;
 	int rv;
 
 	/* Read the two metadata files (if available). */
-	rv = uvMetadataLoadN(dir, 1, &metadata1, errmsg);
+	rv = uvMetadataLoadN(dir, 1, &metadata1, expected_version, errmsg);
 	if (rv != 0) {
 		return rv;
 	}
-	rv = uvMetadataLoadN(dir, 2, &metadata2, errmsg);
+	rv = uvMetadataLoadN(dir, 2, &metadata2, expected_version, errmsg);
 	if (rv != 0) {
 		return rv;
 	}
@@ -182,7 +185,7 @@ int uvMetadataStore(struct uv *uv, const struct uvMetadata *metadata)
 	unsigned short n;
 	int rv;
 
-	assert(metadata->version > 0);
+	PRE(1 <= metadata->format_version && metadata->format_version < 3);
 
 	/* Encode the given metadata. */
 	uvMetadataEncode(metadata, content);
diff --git a/src/raft/uv_recv.c b/src/raft/uv_recv.c
index 01e4ffc73..456d983b9 100644
--- a/src/raft/uv_recv.c
+++ b/src/raft/uv_recv.c
@@ -295,7 +295,7 @@ static void uvServerReadCb(uv_stream_t *stream,
 					    s->message.append_entries.entries,
 					    s->message.append_entries
 						.n_entries,
-					    false);
+					    0, 1 /* no local data ever */);
 					break;
 				case RAFT_IO_INSTALL_SNAPSHOT:
 					s->message.install_snapshot.data.base =
diff --git a/src/raft/uv_segment.c b/src/raft/uv_segment.c
index d54841bb2..92d90dcab 100644
--- a/src/raft/uv_segment.c
+++ b/src/raft/uv_segment.c
@@ -266,11 +266,11 @@ static int uvLoadEntriesBatch(struct uv *uv,
 
 	/* Consume the batch header, excluding the first 8 bytes containing the
 	 * number of entries, which we have already read. */
-	header.len = uvSizeofBatchHeader(n, true);
+	header.len = uvSizeofBatchHeader(n, uv->format_version);
 	header.base = batch;
 
 	rv = uvConsumeContent(content, offset,
-			      uvSizeofBatchHeader(n, true) - sizeof(uint64_t), NULL,
+			      uvSizeofBatchHeader(n, uv->format_version) - sizeof(uint64_t), NULL,
 			      errmsg);
 	if (rv != 0) {
 		ErrMsgTransfer(errmsg, uv->io->errmsg, "read header");
@@ -289,7 +289,7 @@ static int uvLoadEntriesBatch(struct uv *uv,
 
 	/* Decode the batch header, allocating the entries array. */
 	uint64_t local_data_size = 0;
-	rv = uvDecodeBatchHeader(header.base, entries, n_entries, &local_data_size);
+	rv = uvDecodeBatchHeader(header.base, entries, n_entries, &local_data_size, uv->format_version);
 	if (rv != 0) {
 		goto err;
 	}
@@ -299,9 +299,9 @@ static int uvLoadEntriesBatch(struct uv *uv,
 	data.len = 0;
 	for (i = 0; i < n; i++) {
 		data.len += (*entries)[i].buf.len;
-#ifdef DQLITE_NEXT
-		data.len += sizeof((*entries)[i].local_data);
-#endif
+		if (uv->format_version > 1) {
+			data.len += sizeof((*entries)[i].local_data);
+		}
 	}
 	data.base = (uint8_t *)content->base + *offset;
 
@@ -324,7 +324,7 @@ static int uvLoadEntriesBatch(struct uv *uv,
 	}
 
 	rv = uvDecodeEntriesBatch(content->base, *offset - data.len, *entries,
-			     *n_entries, local_data_size);
+			     *n_entries, local_data_size, uv->format_version);
 	if (rv != 0) {
 		goto err_after_header_decode;
 	}
@@ -405,7 +405,7 @@ int uvSegmentLoadClosed(struct uv *uv,
 	if (rv != 0) {
 		goto err;
 	}
-	if (format != UV__DISK_FORMAT) {
+	if (format != (uint64_t)uv->format_version) {
 		ErrMsgPrintf(uv->io->errmsg, "unexpected format version %ju",
 			     format);
 		rv = RAFT_CORRUPT;
@@ -527,7 +527,7 @@ static int uvSegmentLoadOpen(struct uv *uv,
 	/* Check that the format is the expected one, or perhaps 0, indicating
 	 * that the segment was allocated but never written. */
 	offset = sizeof format;
-	if (format != UV__DISK_FORMAT) {
+	if (format != (uint64_t)uv->format_version) {
 		if (format == 0) {
 			all_zeros = uvContentHasOnlyTrailingZeros(&buf, offset);
 			if (all_zeros) {
@@ -716,7 +716,7 @@ void uvSegmentBufferClose(struct uvSegmentBuffer *b)
 	}
 }
 
-int uvSegmentBufferFormat(struct uvSegmentBuffer *b)
+int uvSegmentBufferFormat(struct uvSegmentBuffer *b, int format_version)
 {
 	int rv;
 	void *cursor;
@@ -729,13 +729,14 @@ int uvSegmentBufferFormat(struct uvSegmentBuffer *b)
 	}
 	b->n = n;
 	cursor = b->arena.base;
-	bytePut64(&cursor, UV__DISK_FORMAT);
+	bytePut64(&cursor, (uint64_t)format_version);
 	return 0;
 }
 
 int uvSegmentBufferAppend(struct uvSegmentBuffer *b,
 			  const struct raft_entry entries[],
-			  unsigned n_entries)
+			  unsigned n_entries,
+			  int format_version)
 {
 	size_t size;   /* Total size of the batch */
 	uint32_t crc1; /* Header checksum */
@@ -748,12 +749,12 @@ int uvSegmentBufferAppend(struct uvSegmentBuffer *b,
 	int rv;
 
 	size = sizeof(uint32_t) * 2;            /* CRC checksums */
-	size += uvSizeofBatchHeader(n_entries, true); /* Batch header */
+	size += uvSizeofBatchHeader(n_entries, format_version); /* Batch header */
 	for (i = 0; i < n_entries; i++) {       /* Entries data */
 		size += bytePad64(entries[i].buf.len);
-#ifdef DQLITE_NEXT
-		size += sizeof(struct raft_entry_local_data);
-#endif
+		if (format_version > 1) {
+			size += sizeof(struct raft_entry_local_data);
+		}
 	}
 
 	rv = uvEnsureSegmentBufferIsLargeEnough(b, b->n + size);
@@ -770,9 +771,9 @@ int uvSegmentBufferAppend(struct uvSegmentBuffer *b,
 
 	/* Batch header */
 	header = cursor;
-	uvEncodeBatchHeader(entries, n_entries, cursor, true /* encode local data */);
-	crc1 = byteCrc32(header, uvSizeofBatchHeader(n_entries, true), 0);
-	cursor = (uint8_t *)cursor + uvSizeofBatchHeader(n_entries, true);
+	uvEncodeBatchHeader(entries, n_entries, cursor, format_version);
+	crc1 = byteCrc32(header, uvSizeofBatchHeader(n_entries, format_version), 0);
+	cursor = (uint8_t *)cursor + uvSizeofBatchHeader(n_entries, format_version);
 
 	/* Batch data */
 	crc2 = 0;
@@ -784,12 +785,12 @@ int uvSegmentBufferAppend(struct uvSegmentBuffer *b,
 		cursor = (uint8_t *)cursor + entry->buf.len;
 		static_assert(sizeof(entry->local_data.buf) % sizeof(uint64_t) == 0,
 			      "bad size for entry local data");
-#ifdef DQLITE_NEXT
-		size_t local_data_size = sizeof(entry->local_data.buf);
-		memcpy(cursor, entry->local_data.buf, local_data_size);
-		crc2 = byteCrc32(cursor, local_data_size, crc2);
-		cursor = (uint8_t *)cursor + local_data_size;
-#endif
+		if (format_version > 1) {
+			size_t local_data_size = sizeof(entry->local_data.buf);
+			memcpy(cursor, entry->local_data.buf, local_data_size);
+			crc2 = byteCrc32(cursor, local_data_size, crc2);
+			cursor = (uint8_t *)cursor + local_data_size;
+		}
 	}
 
 	bytePut32(&crc1_p, crc1);
@@ -1030,14 +1031,14 @@ static int uvWriteClosedSegment(struct uv *uv,
 	 * block */
 	cap = uv->block_size -
 	      (sizeof(uint64_t) /* Format version */ +
-	       sizeof(uint64_t) /* Checksums */ + uvSizeofBatchHeader(1, true /* include local bufs */));
+	       sizeof(uint64_t) /* Checksums */ + uvSizeofBatchHeader(1, uv->format_version));
 	if (conf->len > cap) {
 		return RAFT_TOOBIG;
 	}
 
 	uvSegmentBufferInit(&buf, uv->block_size);
 
-	rv = uvSegmentBufferFormat(&buf);
+	rv = uvSegmentBufferFormat(&buf, uv->format_version);
 	if (rv != 0) {
 		return rv;
 	}
@@ -1046,7 +1047,7 @@ static int uvWriteClosedSegment(struct uv *uv,
 	entry.type = RAFT_CHANGE;
 	entry.buf = *conf;
 
-	rv = uvSegmentBufferAppend(&buf, &entry, 1);
+	rv = uvSegmentBufferAppend(&buf, &entry, 1, uv->format_version);
 	if (rv != 0) {
 		uvSegmentBufferClose(&buf);
 		return rv;
@@ -1141,12 +1142,12 @@ int uvSegmentTruncate(struct uv *uv,
 
 	uvSegmentBufferInit(&buf, uv->block_size);
 
-	rv = uvSegmentBufferFormat(&buf);
+	rv = uvSegmentBufferFormat(&buf, uv->format_version);
 	if (rv != 0) {
 		goto out_after_buffer_init;
 	}
 
-	rv = uvSegmentBufferAppend(&buf, entries, m);
+	rv = uvSegmentBufferAppend(&buf, entries, m, uv->format_version);
 	if (rv != 0) {
 		goto out_after_buffer_init;
 	}
diff --git a/src/raft/uv_snapshot.c b/src/raft/uv_snapshot.c
index 343d1ce07..da7762f2f 100644
--- a/src/raft/uv_snapshot.c
+++ b/src/raft/uv_snapshot.c
@@ -249,7 +249,7 @@ static int uvSnapshotLoadMeta(struct uv *uv,
 	}
 
 	format = byteFlip64(header[0]);
-	if (format != UV__DISK_FORMAT) {
+	if (format != (uint64_t)uv->format_version) {
 		tracef("load %s: unsupported format %ju", info->filename,
 		       format);
 		rv = RAFT_MALFORMED;
@@ -676,7 +676,7 @@ int UvSnapshotPut(struct raft_io *io,
 	}
 
 	cursor = put->meta.header;
-	bytePut64(&cursor, UV__DISK_FORMAT);
+	bytePut64(&cursor, (uint64_t)uv->format_version);
 	bytePut64(&cursor, 0);
 	bytePut64(&cursor, snapshot->configuration_index);
 	bytePut64(&cursor, put->meta.bufs[1].len);
diff --git a/src/server.c b/src/server.c
index 1f1d69045..99220e45e 100644
--- a/src/server.c
+++ b/src/server.c
@@ -24,6 +24,7 @@
 #include "transport.h"
 #include "utils.h"
 #include "vfs.h"
+#include "vfs2.h"
 
 /* Special ID for the bootstrap node. Equals to raft_digest("1", 0). */
 #define BOOTSTRAP_ID 0x2dc171858c3155be
@@ -52,7 +53,34 @@ static void state_cb(struct raft *r,
 	}
 }
 
-int dqlite__init(struct dqlite_node *d,
+#ifndef USE_SYSTEM_RAFT
+
+static void initial_barrier_cb(struct raft *r)
+{
+	struct dqlite_node *d = r->data;
+	int rv;
+
+	if (!d->config.disk) {
+		return;
+	}
+
+	queue *head;
+	QUEUE_FOREACH(head, &d->registry.dbs) {
+		struct db *db = QUEUE_DATA(head, struct db, queue);
+		sqlite3 *conn;
+		int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE;
+		rv = sqlite3_open_v2(db->path, &conn, flags, db->config->name);
+		UNHANDLED(rv != SQLITE_OK);
+		sqlite3_file *fp = main_file(conn);
+		rv = vfs2_commit_barrier(fp);
+		UNHANDLED(rv != 0);
+		sqlite3_close(conn);
+	}
+}
+
+#endif
+
+static int node_init(struct dqlite_node *d,
 		 dqlite_node_id id,
 		 const char *address,
 		 const char *dir)
@@ -62,7 +90,6 @@ int dqlite__init(struct dqlite_node *d,
 	int urandom;
 	ssize_t count;
 
-	d->initialized = false;
 	memset(d->errmsg, 0, sizeof d->errmsg);
 
 	rv = snprintf(db_dir_path, sizeof db_dir_path, DATABASE_DIR_FMT, dir);
@@ -78,11 +105,16 @@ int dqlite__init(struct dqlite_node *d,
 			 "config__init(rv:%d)", rv);
 		goto err;
 	}
-	rv = VfsInit(&d->vfs, d->config.name);
-	sqlite3_vfs_register(&d->vfs, 0);
-	if (rv != 0) {
+
+	d->vfs = sqlite3_malloc64(sizeof(*d->vfs));
+	if (d->vfs == NULL) {
 		goto err_after_config_init;
 	}
+
+	rv = VfsInit(d->vfs, d->config.name);
+	if (rv != 0) {
+		goto err_after_alloc_vfs;
+	}
 	registry__init(&d->registry, &d->config);
 
 	rv = uv_loop_init(&d->loop);
@@ -92,41 +124,32 @@ int dqlite__init(struct dqlite_node *d,
 		rv = DQLITE_ERROR;
 		goto err_after_vfs_init;
 	}
-#ifdef DQLITE_NEXT
-	rv = pool_init(&d->pool, &d->loop, d->config.pool_thread_count,
-		       POOL_QOS_PRIO_FAIR);
-	if (rv != 0) {
-		snprintf(d->errmsg, DQLITE_ERRMSG_BUF_SIZE, "pool_init(): %s",
-			 uv_strerror(rv));
-		rv = DQLITE_ERROR;
-		goto err_after_loop_init;
-	}
-#endif
+
 	rv = raftProxyInit(&d->raft_transport, &d->loop);
 	if (rv != 0) {
-		goto err_after_pool_init;
+		goto err_after_loop_init;
 	}
+
 	rv = raft_uv_init(&d->raft_io, &d->loop, dir, &d->raft_transport);
 	if (rv != 0) {
 		snprintf(d->errmsg, DQLITE_ERRMSG_BUF_SIZE,
 			 "raft_uv_init(): %s", d->raft_io.errmsg);
 		rv = DQLITE_ERROR;
-		goto err_after_raft_transport_init;
+		goto err_after_raft_proxy_init;
 	}
 	rv = fsm__init(&d->raft_fsm, &d->config, &d->registry);
 	if (rv != 0) {
-		goto err_after_raft_io_init;
+		goto err_after_raft_uv_init;
 	}
 
-	/* TODO: properly handle closing the dqlite server without running it */
 	rv = raft_init(&d->raft, &d->raft_io, &d->raft_fsm, d->config.id,
 		       d->config.address);
 	if (rv != 0) {
 		snprintf(d->errmsg, DQLITE_ERRMSG_BUF_SIZE, "raft_init(): %s",
 			 raft_errmsg(&d->raft));
-		rv = DQLITE_ERROR;
-		goto err;
+		goto err_after_fsm_init;
 	}
+
 	/* TODO: expose these values through some API */
 	raft_set_election_timeout(&d->raft, 3000);
 	raft_set_heartbeat_timeout(&d->raft, 500);
@@ -136,27 +159,16 @@ int dqlite__init(struct dqlite_node *d,
 	raft_set_max_catch_up_rounds(&d->raft, 100);
 	raft_set_max_catch_up_round_duration(&d->raft, 50 * 1000); /* 50 secs */
 	raft_register_state_cb(&d->raft, state_cb);
+#ifndef USE_SYSTEM_RAFT
+	raft_register_initial_barrier_cb(&d->raft, initial_barrier_cb);
+#endif
+
 	rv = sem_init(&d->ready, 0, 0);
-	if (rv != 0) {
-		snprintf(d->errmsg, DQLITE_ERRMSG_BUF_SIZE, "sem_init(): %s",
-			 strerror(errno));
-		rv = DQLITE_ERROR;
-		goto err_after_raft_fsm_init;
-	}
+	assert(rv == 0);
 	rv = sem_init(&d->stopped, 0, 0);
-	if (rv != 0) {
-		snprintf(d->errmsg, DQLITE_ERRMSG_BUF_SIZE, "sem_init(): %s",
-			 strerror(errno));
-		rv = DQLITE_ERROR;
-		goto err_after_ready_init;
-	}
+	assert(rv == 0);
 	rv = sem_init(&d->handover_done, 0, 0);
-	if (rv != 0) {
-		snprintf(d->errmsg, DQLITE_ERRMSG_BUF_SIZE, "sem_init(): %s",
-			 strerror(errno));
-		rv = DQLITE_ERROR;
-		goto err_after_stopped_init;
-	}
+	assert(rv == 0);
 
 	queue_init(&d->queue);
 	queue_init(&d->conns);
@@ -168,70 +180,63 @@ int dqlite__init(struct dqlite_node *d,
 	d->role_management = false;
 	d->connect_func = transportDefaultConnect;
 	d->connect_func_arg = NULL;
+	d->sock = -1;
 
 	urandom = open("/dev/urandom", O_RDONLY);
 	assert(urandom != -1);
 	count = read(urandom, d->random_state.data, sizeof(uint64_t[4]));
 	(void)count;
 	close(urandom);
-	d->initialized = true;
+
 	return 0;
 
-err_after_stopped_init:
-	sem_destroy(&d->stopped);
-err_after_ready_init:
-	sem_destroy(&d->ready);
-err_after_raft_fsm_init:
+err_after_fsm_init:
 	fsm__close(&d->raft_fsm);
-err_after_raft_io_init:
+err_after_raft_uv_init:
 	raft_uv_close(&d->raft_io);
-err_after_raft_transport_init:
+err_after_raft_proxy_init:
 	raftProxyClose(&d->raft_transport);
-err_after_pool_init:
-#ifdef DQLITE_NEXT
-	pool_close(&d->pool);
-	pool_fini(&d->pool);
 err_after_loop_init:
-#endif
 	uv_loop_close(&d->loop);
 err_after_vfs_init:
-	VfsClose(&d->vfs);
+	VfsClose(d->vfs);
+	registry__close(&d->registry);
+err_after_alloc_vfs:
+	sqlite3_free(d->vfs);
 err_after_config_init:
 	config__close(&d->config);
 err:
-	return rv;
+	return DQLITE_ERROR;
 }
 
-void dqlite__close(struct dqlite_node *d)
+static void node_fini(struct dqlite_node *d)
 {
+	PRE(d != NULL);
 	int rv;
-	if (!d->initialized) {
-		return;
-	}
-	raft_free(d->listener);
-	rv = sem_destroy(&d->stopped);
-	assert(rv == 0); /* Fails only if sem object is not valid */
-	rv = sem_destroy(&d->ready);
-	assert(rv == 0); /* Fails only if sem object is not valid */
-	rv = sem_destroy(&d->handover_done);
-	assert(rv == 0);
-	fsm__close(&d->raft_fsm);
-	// TODO assert rv of uv_loop_close after fixing cleanup logic related to
-	// the TODO above referencing the cleanup logic without running the
-	// node. See https://github.com/canonical/dqlite/issues/504.
 
-#ifdef DQLITE_NEXT
-	pool_fini(&d->pool);
+	bool disk = d->config.disk;
+
+	sem_destroy(&d->handover_done);
+	sem_destroy(&d->stopped);
+	sem_destroy(&d->ready);
+#ifndef USE_SYSTEM_RAFT
+	raft_fini(&d->raft);
 #endif
-	uv_loop_close(&d->loop);
+	raft_uv_close(&d->raft_io);
 	raftProxyClose(&d->raft_transport);
+	rv = uv_loop_close(&d->loop);
+	assert(rv == 0);
+	fsm__close(&d->raft_fsm);
 	registry__close(&d->registry);
-	sqlite3_vfs_unregister(&d->vfs);
-	VfsClose(&d->vfs);
-	config__close(&d->config);
-	if (d->bind_address != NULL) {
-		sqlite3_free(d->bind_address);
+	if (disk) {
+		vfs2_destroy(d->vfs);
+	} else {
+		VfsClose(d->vfs);
+		sqlite3_free(d->vfs);
 	}
+	config__close(&d->config);
+	sqlite3_free(d->bind_address);
+	raft_free(d->listener);
 }
 
 int dqlite_node_create(dqlite_node_id id,
@@ -239,95 +244,82 @@ int dqlite_node_create(dqlite_node_id id,
 		       const char *data_dir,
 		       dqlite_node **t)
 {
+	PRE(t != NULL);
 	*t = sqlite3_malloc(sizeof **t);
 	if (*t == NULL) {
 		return DQLITE_NOMEM;
 	}
 
-	return dqlite__init(*t, id, address, data_dir);
+	return node_init(*t, id, address, data_dir);
 }
 
 int dqlite_node_set_bind_address(dqlite_node *t, const char *address)
 {
-	/* sockaddr_un is large enough for our purposes */
-	struct sockaddr_un addr_un;
-	struct sockaddr *addr = (struct sockaddr *)&addr_un;
-	socklen_t addr_len = sizeof(addr_un);
-	sa_family_t domain;
-	size_t path_len;
-	int fd;
 	int rv;
+
 	if (t->running) {
 		return DQLITE_MISUSE;
 	}
 
-	rv =
-	    AddrParse(address, addr, &addr_len, "8080", DQLITE_ADDR_PARSE_UNIX);
+	/* sockaddr_un is large enough for our purposes */
+	struct sockaddr_un addr_un;
+	struct sockaddr *addr = (struct sockaddr *)&addr_un;
+	socklen_t addr_len = sizeof(addr_un);
+	rv = AddrParse(address, addr, &addr_len, "8080", DQLITE_ADDR_PARSE_UNIX);
 	if (rv != 0) {
-		return rv;
+		goto err;
 	}
-	domain = addr->sa_family;
+	sa_family_t domain = addr->sa_family;
 
-	fd = socket(domain, SOCK_STREAM, 0);
-	if (fd == -1) {
-		return DQLITE_ERROR;
+	int sock = socket(domain, SOCK_STREAM, 0);
+	if (sock == -1) {
+		goto err;
 	}
-	rv = fcntl(fd, FD_CLOEXEC);
+	rv = fcntl(sock, FD_CLOEXEC);
 	if (rv != 0) {
-		close(fd);
-		return DQLITE_ERROR;
+		goto err_after_socket;
 	}
 
 	if (domain == AF_INET || domain == AF_INET6) {
 		int reuse = 1;
-		rv = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+		rv = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
 				(const char *)&reuse, sizeof(reuse));
 		if (rv != 0) {
-			close(fd);
-			return DQLITE_ERROR;
+			goto err_after_socket;
 		}
 	}
 
-	rv = bind(fd, addr, addr_len);
-	if (rv != 0) {
-		close(fd);
-		return DQLITE_ERROR;
-	}
-
-	rv = transport__stream(&t->loop, fd, &t->listener);
+	rv = bind(sock, addr, addr_len);
 	if (rv != 0) {
-		close(fd);
-		return DQLITE_ERROR;
+		goto err_after_socket;
 	}
 
-	if (domain == AF_INET || domain == AF_INET6) {
-		int sz = ((int)strlen(address)) + 1; /* Room for '\0' */
-		t->bind_address = sqlite3_malloc(sz);
-		if (t->bind_address == NULL) {
-			close(fd);
-			return DQLITE_NOMEM;
-		}
-		strcpy(t->bind_address, address);
-	} else {
-		path_len = sizeof addr_un.sun_path;
-		t->bind_address = sqlite3_malloc((int)path_len);
-		if (t->bind_address == NULL) {
-			close(fd);
-			return DQLITE_NOMEM;
-		}
-		memset(t->bind_address, 0, path_len);
-		rv = uv_pipe_getsockname((struct uv_pipe_s *)t->listener,
-					 t->bind_address, &path_len);
+	size_t sz;
+	if (domain == AF_UNIX) {
+		rv = getsockname(sock, addr, &addr_len);
 		if (rv != 0) {
-			close(fd);
-			sqlite3_free(t->bind_address);
-			t->bind_address = NULL;
-			return DQLITE_ERROR;
+			goto err_after_socket;
 		}
+		sz = sizeof(addr_un.sun_path);
+	} else {
+		sz = strlen(address) + 1;
+	}
+	t->bind_address = sqlite3_malloc((int)sz);
+	if (t->bind_address == NULL) {
+		goto err_after_socket;
+	}
+	memcpy(t->bind_address, domain == AF_UNIX ? addr_un.sun_path : address, sz);
+	if (domain == AF_UNIX) {
 		t->bind_address[0] = '@';
 	}
 
+	t->sock = sock;
 	return 0;
+
+err_after_socket:
+	close(sock);
+err:
+	return DQLITE_ERROR;
 }
 
 const char *dqlite_node_get_bind_address(dqlite_node *t)
@@ -441,6 +433,7 @@ int dqlite_node_set_block_size(dqlite_node *n, size_t size)
 	raft_uv_set_block_size(&n->raft_io, size);
 	return 0;
 }
+
 int dqlite_node_enable_disk_mode(dqlite_node *n)
 {
 	int rv;
@@ -449,13 +442,24 @@ int dqlite_node_enable_disk_mode(dqlite_node *n)
 		return DQLITE_MISUSE;
 	}
 
-	rv = dqlite_vfs_enable_disk(&n->vfs);
-	if (rv != 0) {
-		return rv;
+	if (n->registry.config->disk) {
+		return 0;
+	}
+
+	sqlite3_vfs *vfs = vfs2_make(sqlite3_vfs_find("unix"), n->config.name);
+	if (vfs == NULL) {
+		return DQLITE_NOMEM;
 	}
+	VfsClose(n->vfs);
+	sqlite3_free(n->vfs);
+	n->vfs = vfs;
 
 	n->registry.config->disk = true;
 
+#ifndef USE_SYSTEM_RAFT
+	raft_uv_set_format_version(&n->raft_io, 2);
+#endif
+
 	/* Close the default fsm and initialize the disk one. */
 	fsm__close(&n->raft_fsm);
 	rv = fsm__init_disk(&n->raft_fsm, &n->config, &n->registry);
@@ -512,6 +516,9 @@ static void raftCloseCb(struct raft *raft)
 	uv_close((struct uv_handle_s *)&s->startup, NULL);
 	uv_close((struct uv_handle_s *)s->listener, NULL);
 	uv_close((struct uv_handle_s *)&s->timer, NULL);
+	if (s->config.disk) {
+		pool_close(&s->pool);
+	}
 }
 
 static void destroy_conn(struct conn *conn)
@@ -557,9 +564,6 @@ static void stopCb(uv_async_t *stop)
 		tracef("not running or already stopped");
 		return;
 	}
-#ifdef DQLITE_NEXT
-	pool_close(&d->pool);
-#endif
 	if (d->role_management) {
 		rv = uv_timer_stop(&d->timer);
 		assert(rv == 0);
@@ -697,13 +701,14 @@ static int taskRun(struct dqlite_node *d)
 {
 	int rv;
 
-	/* TODO: implement proper cleanup upon error by spinning the loop a few
-	 * times. */
-	assert(d->listener != NULL);
+	rv = transport__stream(&d->loop, d->sock, &d->listener);
+	if (rv != 0) {
+		goto err;
+	}
 
 	rv = uv_listen(d->listener, 128, listenCb);
 	if (rv != 0) {
-		return rv;
+		goto err;
 	}
 	d->listener->data = d;
 
@@ -739,9 +744,7 @@ static int taskRun(struct dqlite_node *d)
 	if (rv != 0) {
 		snprintf(d->errmsg, DQLITE_ERRMSG_BUF_SIZE, "raft_start(): %s",
 			 raft_errmsg(&d->raft));
-		/* Unblock any client of taskReady */
-		sem_post(&d->ready);
-		return rv;
+		goto err;
 	}
 
 	rv = uv_run(&d->loop, UV_RUN_DEFAULT);
@@ -752,6 +755,11 @@ static int taskRun(struct dqlite_node *d)
 	assert(rv == 0); /* no reason for which posting should fail */
 
 	return 0;
+
+err:
+	/* Unblock any client of taskReady */
+	sem_post(&d->ready);
+	return DQLITE_ERROR;
 }
 
 int dqlite_node_set_target_voters(dqlite_node *n, int voters)
@@ -811,7 +819,7 @@ static void *taskStart(void *arg)
 
 void dqlite_node_destroy(dqlite_node *d)
 {
-	dqlite__close(d);
+	node_fini(d);
 	sqlite3_free(d);
 }
 
@@ -844,22 +852,26 @@ static int dqliteDatabaseDirSetup(dqlite_node *t)
 		return rv;
 	}
 
-	rv = FsRemoveDirFiles(t->config.dir);
-	if (rv != 0) {
-		snprintf(t->errmsg, DQLITE_ERRMSG_BUF_SIZE,
-			 "Error removing files in database dir: %d", rv);
-		return rv;
-	}
-
-	return rv;
+	return 0;
 }
 
 int dqlite_node_start(dqlite_node *t)
 {
 	int rv;
-	tracef("dqlite node start");
+
+	PRE(!t->running);
 
 	dqliteTracingMaybeEnable(true);
+	tracef("dqlite node start");
+
+	sqlite3_vfs_register(t->vfs, 0 /* not default */);
+
+	if (t->config.disk) {
+		rv = pool_init(&t->pool, &t->loop, t->config.pool_thread_count, POOL_QOS_PRIO_FAIR);
+		if (rv != 0) {
+			goto err;
+		}
+	}
 
 	rv = dqliteDatabaseDirSetup(t);
 	if (rv != 0) {
@@ -916,6 +928,10 @@ int dqlite_node_stop(dqlite_node *d)
 	rv = pthread_join(d->thread, &result);
 	assert(rv == 0);
 
+	if (d->config.disk) {
+		pool_fini(&d->pool);
+	}
+
 	return (int)((uintptr_t)result);
 }
 
@@ -1804,6 +1820,8 @@ int dqlite_server_stop(dqlite_server *server)
 	if (rv != 0) {
 		return 1;
 	}
+	dqlite_node_destroy(server->local);
+	server->local = NULL;
 	return 0;
 }
 
@@ -1815,9 +1833,6 @@ void dqlite_server_destroy(dqlite_server *server)
 	emptyCache(&server->cache);
 
 	free(server->dir_path);
-	if (server->local != NULL) {
-		dqlite_node_destroy(server->local);
-	}
 	free(server->local_addr);
 	free(server->bind_addr);
 	close(server->dir_fd);
diff --git a/src/server.h b/src/server.h
index 31102424f..9aed274f0 100644
--- a/src/server.h
+++ b/src/server.h
@@ -24,7 +24,7 @@ struct dqlite_node {
 
 	pthread_t thread;                        /* Main run loop thread. */
 	struct config config;                    /* Config values */
-	struct sqlite3_vfs vfs;                  /* In-memory VFS */
+	sqlite3_vfs *vfs;
 	struct registry registry;                /* Databases */
 	struct uv_loop_s loop;                   /* UV loop */
 	struct pool_s pool;                      /* Thread pool */
@@ -39,6 +39,7 @@ struct dqlite_node {
 	queue roles_changes;
 	bool running;                 /* Loop is running */
 	struct raft raft;             /* Raft instance */
+	int sock;
 	struct uv_stream_s *listener; /* Listening socket */
 	struct uv_async_s handover;
 	int handover_status;
diff --git a/src/utils.h b/src/utils.h
index 2509f47b8..554b76e8e 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -5,6 +5,8 @@
 #include <stdbool.h>
 #include <stdint.h>
 
+#include <sqlite3.h>
+
 /* Various utility functions and macros */
 
 #define PTR_TO_UINT64(p) ((uint64_t)((uintptr_t)(p)))
@@ -22,8 +24,20 @@
 #define POST(cond) assert((cond))
 #define ERGO(a, b) (!(a) || (b))
 
+#define UNHANDLED(expr) if (expr) assert(0)
+
 static inline bool is_po2(unsigned long n) {
 	return n > 0 && (n & (n - 1)) == 0;
 }
 
+static inline sqlite3_file *main_file(sqlite3 *conn)
+{
+	PRE(conn != NULL);
+	sqlite3_file *fp;
+	int rv = sqlite3_file_control(conn, "main", SQLITE_FCNTL_FILE_POINTER, &fp);
+	assert(rv == SQLITE_OK);
+	POST(fp != NULL);
+	return fp;
+}
+
 #endif /* DQLITE_UTILS_H_ */
diff --git a/src/vfs2.c b/src/vfs2.c
index 876302c71..952c39347 100644
--- a/src/vfs2.c
+++ b/src/vfs2.c
@@ -50,6 +50,7 @@ enum {
 	WTX_FOLLOWING,
 	/* Non-leader, all transactions in WAL-cur are committed (but at least one is not checkpointed). */
 	WTX_FLUSH,
+	WTX_INVALIDATED,
 	/* Leader, all transactions in WAL-cur are committed (but at least one is not checkpointed). */
 	WTX_BASE,
 	/* Leader, transaction in progress. */
@@ -116,12 +117,17 @@ static const struct sm_conf wtx_states[SM_STATES_MAX] = {
 	[WTX_FOLLOWING] = {
 		.flags = 0,
 		.name = "following",
-		.allowed = BITS(WTX_FOLLOWING)|BITS(WTX_FLUSH)|BITS(WTX_CLOSED),
+		.allowed = BITS(WTX_FOLLOWING)|BITS(WTX_FLUSH)|BITS(WTX_INVALIDATED)|BITS(WTX_CLOSED),
 	},
 	[WTX_FLUSH] = {
 		.flags = 0,
 		.name = "flush",
-		.allowed = BITS(WTX_FOLLOWING)|BITS(WTX_FLUSH)|BITS(WTX_ACTIVE)|BITS(WTX_CLOSED),
+		.allowed = BITS(WTX_FOLLOWING)|BITS(WTX_FLUSH)|BITS(WTX_INVALIDATED)|BITS(WTX_ACTIVE)|BITS(WTX_CLOSED),
+	},
+	[WTX_INVALIDATED] = {
+		.flags = 0,
+		.name = "invalidated",
+		.allowed = BITS(WTX_ACTIVE)|BITS(WTX_CLOSED),
 	},
 	[WTX_BASE] = {
 		.flags = 0,
@@ -294,7 +300,7 @@ struct entry {
 
 	/* For ACTIVE, HIDDEN: the pending txn. start and len
 	 * are in units of frames. */
-	struct vfs2_wal_frame *pending_txn_frames;
+	dqlite_vfs_frame *pending_txn_frames;
 	uint32_t pending_txn_start;
 	uint32_t pending_txn_len;
 	uint32_t pending_txn_last_frame_commit;
@@ -331,7 +337,7 @@ static void free_pending_txn(struct entry *e)
 {
 	if (e->pending_txn_frames != NULL) {
 		for (uint32_t i = 0; i < e->pending_txn_len; i++) {
-			sqlite3_free(e->pending_txn_frames[i].page);
+			sqlite3_free(e->pending_txn_frames[i].data);
 		}
 		sqlite3_free(e->pending_txn_frames);
 	}
@@ -375,6 +381,8 @@ static bool write_lock_held(const struct entry *e)
 static bool wal_index_basic_hdr_equal(struct wal_index_basic_hdr a,
 				      struct wal_index_basic_hdr b)
 {
+	/* memcmp is okay here since struct wal_index_basic_hdr has no
+	 * padding. */
 	return memcmp(&a, &b, sizeof(struct wal_index_basic_hdr)) == 0;
 }
 
@@ -433,8 +441,11 @@ static bool is_open(const struct entry *e)
 
 static bool basic_hdr_valid(struct wal_index_basic_hdr bhdr)
 {
+
 	struct cksums sums = {};
-	update_cksums(bhdr.bigEndCksum ? BE_MAGIC : LE_MAGIC, (uint8_t *)&bhdr, offsetof(struct wal_index_basic_hdr, cksums), &sums);
+	uint32_t magic = bhdr.bigEndCksum ? BE_MAGIC : LE_MAGIC;
+	size_t n = offsetof(struct wal_index_basic_hdr, cksums);
+	update_cksums(magic, (uint8_t *)&bhdr, n, &sums);
 	return bhdr.iVersion == 3007000
 		&& bhdr.isInit == 1
 		&& cksums_equal(sums, bhdr.cksums);
@@ -442,7 +453,8 @@ static bool basic_hdr_valid(struct wal_index_basic_hdr bhdr)
 
 static bool full_hdr_valid(const struct wal_index_full_hdr *ihdr)
 {
-	return basic_hdr_valid(ihdr->basic[0]) && wal_index_basic_hdr_equal(ihdr->basic[0], ihdr->basic[1]);
+	return basic_hdr_valid(ihdr->basic[0]) &&
+		wal_index_basic_hdr_equal(ihdr->basic[0], ihdr->basic[1]);
 }
 
 static bool wtx_invariant(const struct sm *sm, int prev)
@@ -463,7 +475,15 @@ static bool wtx_invariant(const struct sm *sm, int prev)
 	if (!CHECK(is_open(e))) {
 		return false;
 	}
+
 	struct wal_index_full_hdr *ihdr = get_full_hdr(e);
+	/* TODO(cole) fill this out */
+	if (sm_state(sm) == WTX_INVALIDATED) {
+		if (!CHECK(!full_hdr_valid(ihdr))) {
+			return false;
+		}
+		return true;
+	}
 	if (!CHECK(full_hdr_valid(ihdr))) {
 		return false;
 	}
@@ -529,7 +549,7 @@ static bool wtx_invariant(const struct sm *sm, int prev)
 			return false;
 		}
 		for (uint32_t i = 0; i < e->pending_txn_len; i++) {
-			res &= CHECK(e->pending_txn_frames[i].page != NULL);
+			res &= CHECK(e->pending_txn_frames[i].data != NULL);
 		}
 		return res;
 	}
@@ -558,34 +578,6 @@ static sqlite3_file *get_orig(struct file *f)
 	return (f->flags & SQLITE_OPEN_WAL) ? f->entry->wal_cur : f->orig;
 }
 
-static void maybe_close_entry(struct entry *e)
-{
-	if (e->refcount_main_db > 0 || e->refcount_wal > 0) {
-		return;
-	}
-
-	sqlite3_free(e->main_db_name);
-	sqlite3_free(e->wal_moving_name);
-	sqlite3_free(e->wal_cur_fixed_name);
-	if (e->wal_cur->pMethods != NULL) {
-		e->wal_cur->pMethods->xClose(e->wal_cur);
-	}
-	sqlite3_free(e->wal_cur);
-	sqlite3_free(e->wal_prev_fixed_name);
-	if (e->wal_prev->pMethods != NULL) {
-		e->wal_prev->pMethods->xClose(e->wal_prev);
-	}
-	sqlite3_free(e->wal_prev);
-
-	free_pending_txn(e);
-
-	pthread_rwlock_wrlock(&e->common->rwlock);
-	queue_remove(&e->link);
-	pthread_rwlock_unlock(&e->common->rwlock);
-	sqlite3_free(e);
-
-}
-
 static int vfs2_close(sqlite3_file *file)
 {
 	struct file *xfile = (struct file *)file;
@@ -597,11 +589,14 @@ static int vfs2_close(sqlite3_file *file)
 			rv = xfile->orig->pMethods->xClose(xfile->orig);
 		}
 		sqlite3_free(xfile->orig);
-		xfile->entry->refcount_main_db -= 1;
-		maybe_close_entry(xfile->entry);
+		if (xfile->entry != NULL) {
+			PRE(xfile->entry->refcount_main_db > 0);
+			xfile->entry->refcount_main_db -= 1;
+		}
 	} else if (xfile->flags & SQLITE_OPEN_WAL) {
+		PRE(xfile->entry != NULL);
+		PRE(xfile->entry->refcount_wal > 0);
 		xfile->entry->refcount_wal -= 1;
-		maybe_close_entry(xfile->entry);
 	} else if (xfile->orig->pMethods != NULL) {
 		rv = xfile->orig->pMethods->xClose(xfile->orig);
 		sqlite3_free(xfile->orig);
@@ -676,7 +671,7 @@ static int vfs2_wal_write_frame_hdr(struct entry *e,
 				    const struct wal_frame_hdr *fhdr,
 				    uint32_t x)
 {
-	struct vfs2_wal_frame *frames = e->pending_txn_frames;
+	dqlite_vfs_frame *frames = e->pending_txn_frames;
 	if (no_pending_txn(e)) {
 		assert(x == e->wal_cursor);
 		e->pending_txn_start = x;
@@ -699,21 +694,19 @@ static int vfs2_wal_write_frame_hdr(struct entry *e,
 		if (e->pending_txn_frames == NULL) {
 			return SQLITE_NOMEM;
 		}
-		struct vfs2_wal_frame *frame = &e->pending_txn_frames[n];
+		dqlite_vfs_frame *frame = &e->pending_txn_frames[n];
 		uint32_t commit = ByteGetBe32(fhdr->commit);
 		frame->page_number = ByteGetBe32(fhdr->page_number);
-		frame->commit = commit;
-		frame->page = NULL;
+		frame->data = NULL;
 		e->pending_txn_last_frame_commit = commit;
 		e->pending_txn_len++;
 	} else {
 		/* Overwriting a previously-written frame in the current
 		 * transaction. */
-		struct vfs2_wal_frame *frame = &e->pending_txn_frames[x];
+		dqlite_vfs_frame *frame = &e->pending_txn_frames[x];
 		frame->page_number = ByteGetBe32(fhdr->page_number);
-		frame->commit = ByteGetBe32(fhdr->commit);
-		sqlite3_free(frame->page);
-		frame->page = NULL;
+		sqlite3_free(frame->data);
+		frame->data = NULL;
 	}
 	sm_move(&e->wtx_sm, WTX_ACTIVE);
 	return SQLITE_OK;
@@ -737,13 +730,13 @@ static int vfs2_wal_post_write(struct entry *e,
 		x /= frame_size;
 		x -= e->pending_txn_start;
 		assert(0 <= x && x < e->pending_txn_len);
-		struct vfs2_wal_frame *frame = &e->pending_txn_frames[x];
-		assert(frame->page == NULL);
-		frame->page = sqlite3_malloc(amt);
-		if (frame->page == NULL) {
+		dqlite_vfs_frame *frame = &e->pending_txn_frames[x];
+		assert(frame->data == NULL);
+		frame->data = sqlite3_malloc(amt);
+		if (frame->data == NULL) {
 			return SQLITE_NOMEM;
 		}
-		memcpy(frame->page, buf, (size_t)amt);
+		memcpy(frame->data, buf, (size_t)amt);
 		sm_move(&e->wtx_sm, WTX_ACTIVE);
 		return SQLITE_OK;
 	} else {
@@ -763,7 +756,7 @@ static int vfs2_write(sqlite3_file *file,
 		assert(amt == sizeof(struct wal_hdr));
 		const struct wal_hdr *hdr = buf;
 		struct entry *e = xfile->entry;
-		tracef("about to wal swap");
+		tracef("WAL SWAP LEADER");
 		rv = wal_swap(e, hdr);
 		if (rv != SQLITE_OK) {
 			return rv;
@@ -985,6 +978,13 @@ static int vfs2_shm_lock(sqlite3_file *file, int ofst, int n, int flags)
 
 	assert(xfile->flags & SQLITE_OPEN_MAIN_DB);
 
+	if (flags == (SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) &&
+		ofst <= WAL_RECOVER_LOCK && WAL_RECOVER_LOCK < ofst + n)
+	{
+		struct wal_index_basic_hdr bh = get_full_hdr(e)->basic[0];
+		tracef("RECOVERY isInit=%u mxFrame=%u", bh.isInit, bh.mxFrame);
+	}
+
 	if (flags == (SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)) {
 		for (int i = ofst; i < ofst + n; i++) {
 			if (e->shm_locks[i] == VFS2_EXCLUSIVE) {
@@ -1024,6 +1024,8 @@ static int vfs2_shm_lock(sqlite3_file *file, int ofst, int n, int flags)
 
 		if (ofst <= WAL_RECOVER_LOCK && WAL_RECOVER_LOCK < ofst + n) {
 			tracef("unlocking the recovery lock!");
+			struct wal_index_full_hdr *fh = get_full_hdr(e);
+			tracef("mxFrame after recovery is %u, cursor is %u, isInit is %u", fh->basic[0].mxFrame, e->wal_cursor, fh->basic[0].isInit);
 		}
 
 		if (ofst == WAL_WRITE_LOCK) {
@@ -1031,8 +1033,8 @@ static int vfs2_shm_lock(sqlite3_file *file, int ofst, int n, int flags)
 			 * transaction. */
 			assert(n == 1);
 			tracef("unlocking write lock");
-			/* TODO make sure this is correct */
-			if (e->pending_txn_last_frame_commit == 0) {
+			/* FIXME(cole) iron this out */
+			if (sm_state(&e->wtx_sm) >= WTX_BASE && e->pending_txn_last_frame_commit == 0) {
 				free_pending_txn(e);
 				sm_move(&e->wtx_sm, WTX_BASE);
 			}
@@ -1061,11 +1063,11 @@ static void vfs2_shm_barrier(sqlite3_file *file)
 
 static int vfs2_shm_unmap(sqlite3_file *file, int delete)
 {
-	(void)delete;
 	struct file *xfile = (struct file *)file;
 	struct entry *e = xfile->entry;
+	tracef("UNMAP %p", e);
 	e->shm_refcount--;
-	if (e->shm_refcount == 0) {
+	if (e->shm_refcount == 0 && delete) {
 		for (int i = 0; i < e->shm_regions_len; i++) {
 			void *region = e->shm_regions[i];
 			assert(region != NULL);
@@ -1224,17 +1226,14 @@ static int open_entry(struct common *common, const char *name, struct entry *e)
 
 	strcpy(e->main_db_name, name);
 
-	strcpy(e->wal_moving_name, name);
-	strcat(e->wal_moving_name, "-wal");
-
-	strcpy(e->wal_cur_fixed_name, name);
-	strcat(e->wal_cur_fixed_name, "-xwal1");
-
-	strcpy(e->wal_prev_fixed_name, name);
-	strcat(e->wal_prev_fixed_name, "-xwal2");
+	size_t pc = (size_t)path_cap;
+	snprintf(e->wal_moving_name, pc, "%s-wal", name);
+	snprintf(e->wal_cur_fixed_name, pc, "%s-xwal1", name);
+	snprintf(e->wal_prev_fixed_name, pc, "%s-xwal2", name);
 
 	/* TODO EXRESCODE? */
-	int phys_wal_flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL|SQLITE_OPEN_NOFOLLOW;
+	int phys_wal_flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|
+		SQLITE_OPEN_WAL|SQLITE_OPEN_NOFOLLOW;
 
 	e->wal_cur = sqlite3_malloc(file_cap);
 	if (e->wal_cur == NULL) {
@@ -1316,15 +1315,19 @@ static int open_entry(struct common *common, const char *name, struct entry *e)
 	}
 	memset(e->shm_regions[0], 0, VFS2_WAL_INDEX_REGION_SIZE);
 	e->shm_regions_len = 1;
+	tracef("REGIONS %p", e);
 
 	*get_full_hdr(e) = initial_full_hdr(hdr_cur);
 
-	e->wal_cursor = wal_cursor_from_size(e->page_size, size_cur);
-
+	/* TODO(cole) implement deferred initialization of page size when
+	 * WAL-cur is empty at startup. */
 	int next = WTX_EMPTY;
+	/* TODO verify the header here */
 	if (size_cur >= wal_offset_from_cursor(0 /* this doesn't matter */, 0)) {
-		/* TODO verify the header here */
+		/* This is guaranteed to be correct since we don't
+		 * support changing the page size dynamically. */
 		e->page_size = ByteGetBe32(hdr_cur.page_size);
+		e->wal_cursor = wal_cursor_from_size(e->page_size, size_cur);
 		next = WTX_FLUSH;
 	}
 	if (size_cur >= wal_offset_from_cursor(e->page_size, 1)) {
@@ -1610,24 +1613,29 @@ int vfs2_commit_barrier(sqlite3_file *file)
 	struct file *xfile = (struct file *)file;
 	PRE(xfile->flags & SQLITE_OPEN_MAIN_DB);
 	struct entry *e = xfile->entry;
+	tracef("BARRIER %p", e);
+	PRE(e->shm_regions_len > 0);
 	if (e->wal_cursor > 0) {
 		sqlite3_file *wal_cur = e->wal_cur;
 		struct wal_frame_hdr fhdr;
-		int rv = wal_cur->pMethods->xRead(wal_cur, &fhdr, sizeof(fhdr), wal_offset_from_cursor(e->page_size, e->wal_cursor - 1));
-		if (rv == SQLITE_OK) {
+		/* FIXME(cole) */
+		PRE(e->wal_cursor > 0);
+		sqlite3_int64 off = wal_offset_from_cursor(e->page_size, e->wal_cursor - 1);
+		int rv = wal_cur->pMethods->xRead(wal_cur, &fhdr, sizeof(fhdr), off);
+		if (rv != SQLITE_OK) {
 			return rv;
 		}
 		set_mx_frame(get_full_hdr(e), e->wal_cursor, fhdr);
-		/* It's okay if the write lock isn't held */
+		/* It's okay if the write lock isn't held. */
 		e->shm_locks[WAL_WRITE_LOCK] = 0;
 		get_full_hdr(e)->basic[0].isInit = 0;
-		/* The next transaction will cause SQLite to run recovery which will complete the transition to BASE */
-		sm_move(&e->wtx_sm, WTX_FLUSH);
+		/* The next transaction will cause SQLite to run recovery. */
+		sm_move(&e->wtx_sm, WTX_INVALIDATED);
 	}
 	return 0;
 }
 
-int vfs2_poll(sqlite3_file *file, struct vfs2_wal_frame **frames, unsigned *n, struct vfs2_wal_slice *sl)
+int vfs2_poll(sqlite3_file *file, dqlite_vfs_frame **frames, unsigned *n, struct vfs2_wal_slice *sl)
 {
 	struct file *xfile = (struct file *)file;
 	PRE(xfile->flags & SQLITE_OPEN_MAIN_DB);
@@ -1650,7 +1658,7 @@ int vfs2_poll(sqlite3_file *file, struct vfs2_wal_frame **frames, unsigned *n, s
 		*frames = e->pending_txn_frames;
 	} else {
 		for (uint32_t i = 0; i < e->pending_txn_len; i++) {
-			sqlite3_free(e->pending_txn_frames[i].page);
+			sqlite3_free(e->pending_txn_frames[i].data);
 		}
 		sqlite3_free(e->pending_txn_frames);
 	}
@@ -1663,7 +1671,11 @@ int vfs2_poll(sqlite3_file *file, struct vfs2_wal_frame **frames, unsigned *n, s
 		sl->len = len;
 	}
 
-	sm_move(&xfile->entry->wtx_sm, WTX_POLLED);
+	/* FIXME(cole) need some additional tracking for ongoing
+	 * txn after standalone BEGIN? */
+	if (len > 0) {
+		sm_move(&xfile->entry->wtx_sm, WTX_POLLED);
+	}
 
 	return 0;
 }
@@ -1671,6 +1683,38 @@ int vfs2_poll(sqlite3_file *file, struct vfs2_wal_frame **frames, unsigned *n, s
 void vfs2_destroy(sqlite3_vfs *vfs)
 {
 	struct common *data = vfs->pAppData;
+	queue *head = &data->queue;
+	queue *cur = queue_next(head);
+	while (cur != head) {
+		queue *next = queue_next(cur);
+		struct entry *e = QUEUE_DATA(cur, struct entry, link);
+		PRE(e->refcount_main_db == 0);
+		PRE(e->refcount_wal == 0);
+
+		sqlite3_free(e->main_db_name);
+		sqlite3_free(e->wal_moving_name);
+		sqlite3_free(e->wal_cur_fixed_name);
+		if (e->wal_cur->pMethods != NULL) {
+			e->wal_cur->pMethods->xClose(e->wal_cur);
+		}
+		sqlite3_free(e->wal_cur);
+		sqlite3_free(e->wal_prev_fixed_name);
+		if (e->wal_prev->pMethods != NULL) {
+			e->wal_prev->pMethods->xClose(e->wal_prev);
+		}
+		sqlite3_free(e->wal_prev);
+
+		free_pending_txn(e);
+
+		PRE(e->shm_refcount == 0);
+		for (int i = 0; i < e->shm_regions_len; i++) {
+			sqlite3_free(e->shm_regions[i]);
+		}
+		sqlite3_free(e->shm_regions);
+
+		sqlite3_free(e);
+		cur = next;
+	}
 	pthread_rwlock_destroy(&data->rwlock);
 	sqlite3_free(data);
 	sqlite3_free(vfs);
@@ -1678,7 +1722,6 @@ void vfs2_destroy(sqlite3_vfs *vfs)
 
 int vfs2_abort(sqlite3_file *file)
 {
-	/* TODO maybe can "followerize" this and get rid of vfs2_unapply_after? */
 	struct file *xfile = (struct file *)file;
 	PRE(xfile->flags & SQLITE_OPEN_MAIN_DB);
 	struct entry *e = xfile->entry;
@@ -1711,7 +1754,7 @@ int vfs2_read_wal(sqlite3_file *file,
 
 	int page_size = (int)e->page_size;
 	for (size_t i = 0; i < txns_len; i++) {
-		struct vfs2_wal_frame *f = sqlite3_malloc64(txns[i].meta.len * sizeof(*f));
+		dqlite_vfs_frame *f = sqlite3_malloc64(txns[i].meta.len * sizeof(*f));
 		if (f == NULL) {
 			goto oom;
 		}
@@ -1721,7 +1764,7 @@ int vfs2_read_wal(sqlite3_file *file,
 			if (p == NULL) {
 				goto oom;
 			}
-			txns[i].frames[j].page = p;
+			txns[i].frames[j].data = p;
 		}
 	}
 
@@ -1751,12 +1794,11 @@ int vfs2_read_wal(sqlite3_file *file,
 				return 1;
 			}
 			off += (sqlite3_int64)sizeof(fhdr);
-			rv = wal->pMethods->xRead(wal, txns[i].frames[j].page, page_size, off);
+			rv = wal->pMethods->xRead(wal, txns[i].frames[j].data, page_size, off);
 			if (rv != SQLITE_OK) {
 				return 1;
 			}
 			txns[i].frames[j].page_number = ByteGetBe32(fhdr.page_number);
-			txns[i].frames[j].commit = ByteGetBe32(fhdr.commit);
 		}
 		if (from_wal_cur) {
 			vfs2_pseudo_read_end(file, read_lock);
@@ -1768,7 +1810,7 @@ int vfs2_read_wal(sqlite3_file *file,
 oom:
 	for (uint32_t i = 0; i < txns_len; i++) {
 		for (uint32_t j = 0; j < txns[i].meta.len; j++) {
-			sqlite3_free(txns[i].frames[j].page);
+			sqlite3_free(txns[i].frames[j].data);
 		}
 		sqlite3_free(txns[i].frames);
 		txns[i].frames = NULL;
@@ -1791,6 +1833,7 @@ static int write_one_frame(struct entry *e, struct wal_frame_hdr hdr, void *data
 		return rv;
 	}
 	e->wal_cursor += 1;
+	tracef("wal_cursor=%u", e->wal_cursor);
 	return SQLITE_OK;
 }
 
@@ -1804,41 +1847,52 @@ static struct wal_hdr next_wal_hdr(const struct entry *e)
 	uint32_t ckpoint_seqno = ByteGetBe32(old.ckpoint_seqno);
 	BytePutBe32(ckpoint_seqno + 1, ret.ckpoint_seqno);
 	uint32_t salt1;
+	/* TODO(cole) explain sqlite3_randomness here */
        	if (ckpoint_seqno == 0) {
 		salt1 = get_salt1(old.salts) + 1;
 	} else {
-		e->common->orig->xRandomness(e->common->orig, sizeof(salt1), (void *)&salt1);
+		sqlite3_randomness(sizeof(salt1), (void *)&salt1);
 	}
 	BytePutBe32(salt1, ret.salts.salt1);
-	e->common->orig->xRandomness(e->common->orig, sizeof(ret.salts.salt2), (void *)&ret.salts.salt2);
+	sqlite3_randomness(sizeof(ret.salts.salt1), (void *)&ret.salts.salt2);
+	struct cksums sums = {};
+	update_cksums(native_magic(), (void *)&ret, 24, &sums);
+	BytePutBe32(sums.cksum1, ret.cksum1);
+	BytePutBe32(sums.cksum2, ret.cksum2);
 	return ret;
 }
 
-static struct wal_frame_hdr txn_frame_hdr(struct entry *e, struct cksums sums, struct vfs2_wal_frame frame)
+static struct wal_frame_hdr txn_frame_hdr(struct entry *e, struct cksums sums, uint32_t commit, dqlite_vfs_frame frame)
 {
 	struct wal_frame_hdr fhdr;
 
-	BytePutBe32(frame.page_number, fhdr.page_number);
-	BytePutBe32(frame.commit, fhdr.commit);
+	PRE(frame.page_number < (unsigned long)UINT32_MAX);
+	BytePutBe32((uint32_t)frame.page_number, fhdr.page_number);
+	BytePutBe32(commit, fhdr.commit);
 	update_cksums(ByteGetBe32(e->wal_cur_hdr.magic), (const void *)(&fhdr), 8, &sums);
-	update_cksums(ByteGetBe32(e->wal_cur_hdr.magic), frame.page, e->page_size, &sums);
+	update_cksums(ByteGetBe32(e->wal_cur_hdr.magic), frame.data, e->page_size, &sums);
 	fhdr.salts = e->wal_cur_hdr.salts;
 	BytePutBe32(sums.cksum1, fhdr.cksum1);
 	BytePutBe32(sums.cksum2, fhdr.cksum2);
 	return fhdr;
 }
 
-int vfs2_apply_uncommitted(sqlite3_file *file, uint32_t page_size, const struct vfs2_wal_frame *frames, unsigned len, struct vfs2_wal_slice *out)
+int vfs2_apply_uncommitted(sqlite3_file *file, uint32_t page_size,
+		const dqlite_vfs_frame *frames, unsigned len,
+		struct vfs2_wal_slice *out)
 {
 	PRE(len > 0);
 	PRE(is_valid_page_size(page_size));
-	for (unsigned i = 0; i < len - 1; i++) {
-		PRE(frames[i].commit == 0);
+	for (unsigned i = 0; i < len; i++) {
+		PRE(frames[i].page_number < UINT32_MAX);
 	}
-	PRE(frames[len - 1].commit > 0);
 	struct file *xfile = (struct file *)file;
 	PRE(xfile->flags & SQLITE_OPEN_MAIN_DB);
 	struct entry *e = xfile->entry;
+	/* XXX(cole) */
+	if (e->page_size == 0) {
+		e->page_size = page_size;
+	}
 	PRE(page_size == e->page_size);
 	int rv;
 
@@ -1851,15 +1905,17 @@ int vfs2_apply_uncommitted(sqlite3_file *file, uint32_t page_size, const struct
 	 *
 	 * The write lock will be released when a call to vfs2_commit
 	 * or vfs2_unapply causes the number of committed frames in
-	 * WAL-cur (mxFrame) to equal the number of applies frames
+	 * WAL-cur (mxFrame) to equal the number of applied frames
 	 * (wal_cursor). */
 	e->shm_locks[WAL_WRITE_LOCK] = VFS2_EXCLUSIVE;
 	
 	struct wal_index_full_hdr *ihdr = get_full_hdr(e);
 	uint32_t mx = ihdr->basic[0].mxFrame;
-	if (mx > 0 && ihdr->nBackfill == mx) {
+	/* TODO(cole) understand the behavior when mx == 0 */
+	if (/* mx > 0 && */ ihdr->nBackfill == mx && mx == e->wal_cursor) {
 		struct wal_hdr new_whdr = next_wal_hdr(e);
 		restart_full_hdr(ihdr, new_whdr);
+		tracef("WAL SWAP mx=%u cksums=%u %u", mx, ByteGetBe32(new_whdr.cksum1), ByteGetBe32(new_whdr.cksum2));
 		rv = wal_swap(e, &new_whdr);
 		if (rv != SQLITE_OK) {
 			return 1;
@@ -1870,6 +1926,7 @@ int vfs2_apply_uncommitted(sqlite3_file *file, uint32_t page_size, const struct
 	uint32_t start = e->wal_cursor;
 
 	struct cksums sums;
+	uint32_t db_size;
 	if (start > 0) {
 		/* TODO cache this in the entry? */
 		struct wal_frame_hdr prev_fhdr;
@@ -1878,15 +1935,28 @@ int vfs2_apply_uncommitted(sqlite3_file *file, uint32_t page_size, const struct
 		if (rv != SQLITE_OK) {
 			return 1;
 		}
+
 		sums.cksum1 = ByteGetBe32(prev_fhdr.cksum1);
 		sums.cksum2 = ByteGetBe32(prev_fhdr.cksum2);
+		db_size = ByteGetBe32(prev_fhdr.commit);
+		assert(db_size > 0);
 	} else {
+
+		/* TODO cache this in the entry? */
+		uint8_t b[100];
+		rv = xfile->orig->pMethods->xRead(xfile->orig, &b, sizeof(b), 0);
+		if (rv != SQLITE_OK) {
+			return 1;
+		}
+
 		sums.cksum1 = ByteGetBe32(e->wal_cur_hdr.cksum1);
 		sums.cksum2 = ByteGetBe32(e->wal_cur_hdr.cksum2);
+		db_size = ByteGetBe32(b + 28);
+		/* TODO cross-check with xFileSize */
 	}
 
-	struct wal_frame_hdr fhdr = txn_frame_hdr(e, sums, frames[0]);
-	rv = write_one_frame(e, fhdr, frames[0].page);
+	struct wal_frame_hdr fhdr = txn_frame_hdr(e, sums, 0 == len - 1 ? db_size : 0, frames[0]);
+	rv = write_one_frame(e, fhdr, frames[0].data);
 	if (rv != SQLITE_OK) {
 		return 1;
 	}
@@ -1894,8 +1964,10 @@ int vfs2_apply_uncommitted(sqlite3_file *file, uint32_t page_size, const struct
 	for (unsigned i = 1; i < len; i++) {
 		sums.cksum1 = ByteGetBe32(fhdr.cksum1);
 		sums.cksum2 = ByteGetBe32(fhdr.cksum2);
-		fhdr = txn_frame_hdr(e, sums, frames[i]);
-		rv = write_one_frame(e, fhdr, frames[i].page);
+		uint32_t pn = (uint32_t)frames[i].page_number;
+		db_size = pn > db_size ? pn : db_size;
+		fhdr = txn_frame_hdr(e, sums, i == len - 1 ? db_size : 0, frames[i]);
+		rv = write_one_frame(e, fhdr, frames[i].data);
 		if (rv != SQLITE_OK) {
 			return 1;
 		}
diff --git a/src/vfs2.h b/src/vfs2.h
index f2181e760..1dbea1ed5 100644
--- a/src/vfs2.h
+++ b/src/vfs2.h
@@ -1,6 +1,8 @@
 #ifndef DQLITE_VFS2_H
 #define DQLITE_VFS2_H
 
+#include "../include/dqlite.h"
+
 #include <sqlite3.h>
 
 #include <stddef.h>
@@ -33,12 +35,6 @@ struct vfs2_wal_slice {
 	uint32_t len;
 };
 
-struct vfs2_wal_frame {
-	uint32_t page_number;
-	uint32_t commit;
-	void *page;
-};
-
 /**
  * Retrieve frames that were appended to the WAL by the last write transaction,
  * and reacquire the write lock.
@@ -47,7 +43,8 @@ struct vfs2_wal_frame {
  *
  * Polling the same transaction more than once is an error.
  */
-int vfs2_poll(sqlite3_file *file, struct vfs2_wal_frame **frames, unsigned *n, struct vfs2_wal_slice *sl);
+int vfs2_poll(sqlite3_file *file, dqlite_vfs_frame **frames, unsigned *n,
+		struct vfs2_wal_slice *sl);
 
 int vfs2_unhide(sqlite3_file *file);
 
@@ -55,13 +52,14 @@ int vfs2_commit(sqlite3_file *file, struct vfs2_wal_slice stop);
 
 int vfs2_commit_barrier(sqlite3_file *file);
 
-int vfs2_apply_uncommitted(sqlite3_file *file, uint32_t page_size, const struct vfs2_wal_frame *frames, unsigned n, struct vfs2_wal_slice *out);
+int vfs2_apply_uncommitted(sqlite3_file *file, uint32_t page_size,
+		const dqlite_vfs_frame *frames, unsigned n, struct vfs2_wal_slice *out);
 
 int vfs2_unapply(sqlite3_file *file, struct vfs2_wal_slice stop);
 
 struct vfs2_wal_txn {
 	struct vfs2_wal_slice meta;
-	struct vfs2_wal_frame *frames;
+	dqlite_vfs_frame *frames;
 };
 
 /**
@@ -99,7 +97,4 @@ int vfs2_pseudo_read_end(sqlite3_file *file, unsigned i);
  */
 void vfs2_destroy(sqlite3_vfs *vfs);
 
-// TODO access read marks and shm_locks
-// TODO access information about checkpoints
-
 #endif
diff --git a/test/integration/test_cluster.c b/test/integration/test_cluster.c
index 57f605cff..9b81d024a 100644
--- a/test/integration/test_cluster.c
+++ b/test/integration/test_cluster.c
@@ -21,6 +21,7 @@
 
 #define SETUP                                                 \
 	unsigned i_;                                          \
+	f->disk_mode = atoi(munit_parameters_get(params, "disk_mode")); \
 	test_heap_setup(params, user_data);                   \
 	test_sqlite_setup(params);                            \
 	for (i_ = 0; i_ < N_SERVERS; i_++) {                  \
@@ -45,6 +46,9 @@
 /* Use the client connected to the server with the given ID. */
 #define SELECT(ID) f->client = test_server_client(&f->servers[ID - 1])
 
+#define TODO_VFS2(x) x
+#define DISK_MODE_MISSING_SNAPSHOT(x) x
+
 /******************************************************************************
  *
  * cluster
@@ -54,6 +58,7 @@
 SUITE(cluster)
 
 struct fixture {
+	bool disk_mode;
 	FIXTURE;
 };
 
@@ -99,6 +104,10 @@ TEST(cluster, restart, setUp, tearDown, 0, cluster_params)
 	    strtol(munit_parameters_get(params, "num_records"), NULL, 0);
 	char sql[128];
 
+	if (n_records >= 2200 && f->disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	HANDSHAKE;
 	OPEN;
 	PREPARE("CREATE TABLE test (n INT)", &stmt_id);
@@ -137,6 +146,10 @@ TEST(cluster, dataOnNewNode, setUp, tearDown, 0, cluster_params)
 	unsigned id = 2;
 	const char *address = "@2";
 
+	if (n_records >= 2200 && f->disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	HANDSHAKE;
 	OPEN;
 	PREPARE("CREATE TABLE test (n INT)", &stmt_id);
@@ -156,7 +169,8 @@ TEST(cluster, dataOnNewNode, setUp, tearDown, 0, cluster_params)
 	/* Remove original server so second server becomes leader after election
 	 * timeout */
 	REMOVE(1);
-	sleep(1);
+	/* TODO(cole) investigate why this now takes so much longer */
+	sleep(10);
 
 	/* The full table is visible from the new node */
 	SELECT(2);
@@ -172,7 +186,7 @@ TEST(cluster, dataOnNewNode, setUp, tearDown, 0, cluster_params)
 /* Insert a huge row, causing SQLite to allocate overflow pages. Then
  * insert the same row again. (Reproducer for
  * https://github.com/canonical/raft/issues/432.) */
-TEST(cluster, hugeRow, setUp, tearDown, 0, NULL)
+TEST(cluster, hugeRow, setUp, tearDown, 0, cluster_params)
 {
 	struct fixture *f = data;
 	uint32_t stmt_id;
@@ -181,7 +195,10 @@ TEST(cluster, hugeRow, setUp, tearDown, 0, NULL)
 	char *sql;
 	ssize_t n;
 	size_t huge = 20000000;
-	(void)params;
+
+	if (f->disk_mode) {
+		return TODO_VFS2(MUNIT_SKIP);
+	}
 
 	HANDSHAKE;
 	OPEN;
@@ -217,6 +234,10 @@ TEST(cluster, modifyingQuery, setUp, tearDown, 0, cluster_params)
 	unsigned id = 2;
 	const char *address = "@2";
 
+	if (n_records >= 2200 && f->disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	HANDSHAKE;
 	OPEN;
 	PREPARE("CREATE TABLE test (n INT)", &stmt_id);
@@ -235,7 +256,8 @@ TEST(cluster, modifyingQuery, setUp, tearDown, 0, cluster_params)
 	ASSIGN(id, DQLITE_VOTER);
 
 	REMOVE(1);
-	sleep(1);
+	/* FIXME(cole) why so long in disk mode? */
+	sleep(10);
 
 	SELECT(2);
 	HANDSHAKE;
@@ -260,6 +282,10 @@ TEST(cluster, modifyingQuerySql, setUp, tearDown, 0, cluster_params)
 	unsigned id = 2;
 	const char *address = "@2";
 
+	if (n_records >= 2200 && f->disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	HANDSHAKE;
 	OPEN;
 	PREPARE("CREATE TABLE test (n INT)", &stmt_id);
@@ -277,7 +303,8 @@ TEST(cluster, modifyingQuerySql, setUp, tearDown, 0, cluster_params)
 	ASSIGN(id, DQLITE_VOTER);
 
 	REMOVE(1);
-	sleep(1);
+	/* FIXME(cole) why so long in disk mode? */
+	sleep(10);
 
 	SELECT(2);
 	HANDSHAKE;
diff --git a/test/integration/test_fsm.c b/test/integration/test_fsm.c
index d735d1c9b..6110a0674 100644
--- a/test/integration/test_fsm.c
+++ b/test/integration/test_fsm.c
@@ -50,6 +50,8 @@
 /* Use the client connected to the server with the given ID. */
 #define SELECT(ID) f->client = test_server_client(&f->servers[ID - 1])
 
+#define DISK_MODE_MISSING_SNAPSHOT(x) x
+
 static char *bools[] = {"0", "1", NULL};
 
 /* Make sure the snapshots scheduled by raft don't interfere with the snapshots
@@ -103,6 +105,10 @@ TEST(fsm, snapshotFreshDb, setUp, tearDown, 0, snapshot_params)
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	rv = fsm->snapshot(fsm, &bufs, &n_bufs);
 	munit_assert_int(rv, ==, 0);
 	munit_assert_uint(n_bufs, ==, 1); /* Snapshot header */
@@ -137,6 +143,10 @@ TEST(fsm, snapshotWrittenDb, setUp, tearDown, 0, snapshot_params)
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Add some data to database */
 	HANDSHAKE;
 	OPEN;
@@ -179,6 +189,10 @@ TEST(fsm, snapshotHeapFaultSingleDB, setUp, tearDown, 0, snapshot_params)
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Add some data to database */
 	HANDSHAKE;
 	OPEN;
@@ -239,6 +253,10 @@ TEST(fsm,
 		return MUNIT_SKIP;
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Add some data to database */
 	HANDSHAKE;
 	OPEN;
@@ -282,6 +300,10 @@ TEST(fsm, snapshotHeapFaultTwoDB, setUp, tearDown, 0, snapshot_params)
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Open 2 databases and add data to them */
 	HANDSHAKE;
 	OPEN_NAME("test");
@@ -354,6 +376,10 @@ TEST(fsm, snapshotHeapFaultTwoDBAsync, setUp, tearDown, 0, snapshot_params)
 		return MUNIT_SKIP;
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Open 2 databases and add data to them */
 	HANDSHAKE;
 	OPEN_NAME("test");
@@ -420,6 +446,10 @@ TEST(fsm, snapshotNewDbAddedBeforeFinalize, setUp, tearDown, 0, snapshot_params)
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Add some data to database */
 	HANDSHAKE;
 	OPEN_NAME("test");
@@ -474,6 +504,10 @@ TEST(fsm, snapshotWritesBeforeFinalize, setUp, tearDown, 0, snapshot_params)
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Add some data to database */
 	HANDSHAKE;
 	OPEN;
@@ -529,6 +563,10 @@ TEST(fsm, concurrentSnapshots, setUp, tearDown, 0, snapshot_params)
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Add some data to database */
 	HANDSHAKE;
 	OPEN;
@@ -624,6 +662,10 @@ TEST(fsm, snapshotRestore, setUp, tearDown, 0, restore_params)
 	if (disk_mode_param != NULL) {
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
+	
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
 
 	/* Add some data to database */
 	HANDSHAKE;
@@ -695,6 +737,10 @@ TEST(fsm, snapshotRestoreMultipleDBs, setUp, tearDown, 0, snapshot_params)
 		disk_mode = (bool)atoi(disk_mode_param);
 	}
 
+	if (disk_mode) {
+		return DISK_MODE_MISSING_SNAPSHOT(MUNIT_SKIP);
+	}
+
 	/* Create 2 databases and add data to them. */
 	HANDSHAKE;
 	OPEN_NAME("test");
diff --git a/test/integration/test_node.c b/test/integration/test_node.c
index fb62e1b81..a137dabf8 100644
--- a/test/integration/test_node.c
+++ b/test/integration/test_node.c
@@ -136,6 +136,11 @@ SUITE(node);
  *
  ******************************************************************************/
 
+TEST(node, nothing, setUp, tearDown, 0, node_params)
+{
+	return MUNIT_OK;
+}
+
 TEST(node, start, setUp, tearDown, 0, node_params)
 {
 	struct fixture *f = data;
diff --git a/test/lib/raft.h b/test/lib/raft.h
index b7019e61b..170e14ad3 100644
--- a/test/lib/raft.h
+++ b/test/lib/raft.h
@@ -49,6 +49,7 @@
 		fsm__close(&f->fsm);                \
 		test_uv_tear_down(&f->loop);        \
 		raftProxyClose(&f->raft_transport); \
+		raft_fini(&f->raft); \
 		test_dir_tear_down(f->dir);         \
 	}
 
diff --git a/test/raft/integration/test_uv_init.c b/test/raft/integration/test_uv_init.c
index eae17fb11..4596d08b0 100644
--- a/test/raft/integration/test_uv_init.c
+++ b/test/raft/integration/test_uv_init.c
@@ -1,6 +1,5 @@
 #include "../../../src/raft.h"
 #include "../../../src/raft/byte.h"
-#include "../../../src/raft/uv_encoding.h"
 #include "../lib/runner.h"
 #include "../lib/uv.h"
 
@@ -16,6 +15,13 @@
  *
  *****************************************************************************/
 
+static char *format_versions[] = {"1", "2", NULL};
+
+static MunitParameterEnum format_params[] = {
+    {"format_version", format_versions},
+    {NULL, NULL},
+};
+
 struct fixture
 {
     FIXTURE_UV_DEPS;
@@ -41,6 +47,7 @@ static void closeCb(struct raft_io *io)
         int _rv;                                                  \
         _rv = raft_uv_init(&f->io, &f->loop, DIR, &f->transport); \
         munit_assert_int(_rv, ==, 0);                             \
+        raft_uv_set_format_version(&f->io, f->format_version); \
         _rv = f->io.init(&f->io, 1, "1");                         \
         munit_assert_int(_rv, ==, 0);                             \
     } while (0)
@@ -60,6 +67,7 @@ static void closeCb(struct raft_io *io)
         int _rv;                                                  \
         _rv = raft_uv_init(&f->io, &f->loop, DIR, &f->transport); \
         munit_assert_int(_rv, ==, 0);                             \
+        raft_uv_set_format_version(&f->io, f->format_version); \
         _rv = f->io.init(&f->io, 1, "1");                         \
         munit_assert_int(_rv, ==, RV);                            \
         munit_assert_string_equal(f->io.errmsg, ERRMSG);          \
@@ -102,6 +110,8 @@ static void *setUp(const MunitParameter params[], void *user_data)
     SETUP_UV_DEPS;
     f->io.data = f;
     f->closed = false;
+    const char *format_version = munit_parameters_get(params, "format_version");
+    f->format_version = format_version != NULL ? atoi(format_version) : 1;
     return f;
 }
 
@@ -232,41 +242,41 @@ TEST(init, metadataOneBadFormat, setUp, tearDown, 0, NULL)
                         1, /* Version                              */
                         1, /* Term                                 */
                         0 /* Voted for                            */);
-    INIT_ERROR(f->dir, RAFT_MALFORMED,
-               "decode content of metadata1: bad format version " BAD_FORMAT_STR);
+    INIT(f->dir);
+    CLOSE;
     return MUNIT_OK;
 }
 
 /* The metadata1 file has not a valid version. */
-TEST(init, metadataOneBadVersion, setUp, tearDown, 0, NULL)
+TEST(init, metadataOneBadVersion, setUp, tearDown, 0, format_params)
 {
     struct fixture *f = data;
     WRITE_METADATA_FILE(1, /* Metadata file index                  */
-                        UV__DISK_FORMAT, /* Format                               */
+                        f->format_version,
                         0, /* Version                              */
                         1, /* Term                                 */
                         0 /* Voted for                            */);
-    INIT_ERROR(f->dir, RAFT_CORRUPT,
-               "decode content of metadata1: version is set to zero");
+    INIT(f->dir);
+    CLOSE;
     return MUNIT_OK;
 }
 
 /* The data directory has both metadata files, but they have the same
  * version. */
-TEST(init, metadataOneAndTwoSameVersion, setUp, tearDown, 0, NULL)
+TEST(init, metadataOneAndTwoSameVersion, setUp, tearDown, 0, format_params)
 {
     struct fixture *f = data;
     WRITE_METADATA_FILE(1, /* Metadata file index                  */
-                        UV__DISK_FORMAT, /* Format                               */
+                        f->format_version,
                         2, /* Version                              */
                         3, /* Term                                 */
                         0 /* Voted for                            */);
     WRITE_METADATA_FILE(2, /* Metadata file index                  */
-                        UV__DISK_FORMAT, /* Format                               */
+                        f->format_version,
                         2, /* Version                              */
                         2, /* Term                                 */
                         0 /* Voted for                            */);
-    INIT_ERROR(f->dir, RAFT_CORRUPT,
-               "metadata1 and metadata2 are both at version 2");
+    INIT(f->dir);
+    CLOSE;
     return MUNIT_OK;
 }
diff --git a/test/raft/integration/test_uv_load.c b/test/raft/integration/test_uv_load.c
index 4427e34ac..8aade967b 100644
--- a/test/raft/integration/test_uv_load.c
+++ b/test/raft/integration/test_uv_load.c
@@ -2,7 +2,6 @@
 
 #include "../../../src/raft/byte.h"
 #include "../../../src/raft/uv.h"
-#include "../../../src/raft/uv_encoding.h"
 #include "../lib/runner.h"
 #include "../lib/uv.h"
 
@@ -12,6 +11,13 @@
  *
  *****************************************************************************/
 
+static char *format_versions[] = {"1", "2", NULL};
+
+static MunitParameterEnum format_params[] = {
+    {"format_version", format_versions},
+    {NULL, NULL},
+};
+
 struct fixture
 {
     FIXTURE_UV_DEPS;
@@ -108,6 +114,7 @@ struct snapshot
         munit_assert_int(_rv, ==, 0);                                        \
         raft_uv_set_block_size(&_io, SEGMENT_BLOCK_SIZE);                    \
         raft_uv_set_segment_size(&_io, SEGMENT_SIZE);                        \
+        raft_uv_set_format_version(&_io, f->format_version);                 \
         _rv = _io.load(&_io, &_term, &_voted_for, &_snapshot, &_start_index, \
                        &_entries, &_n);                                      \
         munit_assert_int(_rv, ==, 0);                                        \
@@ -190,6 +197,7 @@ struct snapshot
         munit_assert_int(_rv, ==, 0);                                         \
         raft_uv_set_block_size(&_io, SEGMENT_BLOCK_SIZE);                     \
         raft_uv_set_segment_size(&_io, SEGMENT_SIZE);                         \
+        raft_uv_set_format_version(&_io, f->format_version);                  \
         _rv = _io.load(&_io, &_term, &_voted_for, &_snapshot, &_start_index,  \
                        &_entries, &_n);                                       \
         munit_assert_int(_rv, ==, 0);                                         \
@@ -378,6 +386,8 @@ static void *setUp(const MunitParameter params[], void *user_data)
 {
     struct fixture *f = munit_malloc(sizeof *f);
     SETUP_UV_DEPS;
+    const char *format_version = munit_parameters_get(params, "format_version");
+    f->format_version = format_version != NULL ? atoi(format_version) : 1;
     return f;
 }
 
@@ -575,14 +585,15 @@ TEST(load, openSegmentWithIncompleteBatch, setUp, tearDown, 0, NULL)
 
 /* The data directory has an open segment whose first batch is only
  * partially written. In that case the segment gets removed. */
-TEST(load, openSegmentWithIncompleteFirstBatch, setUp, tearDown, 0, NULL)
+TEST(load, openSegmentWithIncompleteFirstBatch, setUp, tearDown, 0, format_params)
 {
     struct fixture *f = data;
+
     uint8_t buf[5 * WORD_SIZE] = {
-        UV__DISK_FORMAT, 0, 0, 0, 0, 0, 0, 0, /* Format version */
+        (uint8_t)f->format_version, 0, 0, 0, 0, 0, 0, 0, /* Format version */
         0, 0, 0, 0, 0, 0, 0, 0, /* CRC32 checksums */
         0, 0, 0, 0, 0, 0, 0, 0, /* Number of entries */
-	0, 0, 0, 0, 0, 0, 0, 0, /* Local data size */
+        0, 0, 0, 0, 0, 0, 0, 0, /* Local data size */
         0, 0, 0, 0, 0, 0, 0, 0  /* Batch data */
     };
     APPEND(1, 1);
@@ -1621,7 +1632,7 @@ TEST(load, openSegmentWithIncompletePreamble, setUp, tearDown, 0, NULL)
 }
 
 /* The data directory has an open segment which has incomplete batch header. */
-TEST(load, openSegmentWithIncompleteBatchHeader, setUp, tearDown, 0, NULL)
+TEST(load, openSegmentWithIncompleteBatchHeader, setUp, tearDown, 0, format_params)
 {
     struct fixture *f = data;
     size_t offset = WORD_SIZE + /* Format version */
@@ -1632,21 +1643,25 @@ TEST(load, openSegmentWithIncompleteBatchHeader, setUp, tearDown, 0, NULL)
     APPEND(1, 1);
     UNFINALIZE(1, 1, 1);
     DirTruncateFile(f->dir, "open-1", offset);
-#ifdef DQLITE_NEXT
-    const char *msg =
-	    "load open segment open-1: entries batch 1 starting at byte 8: "
-	    "read header: short read: 8 bytes instead of 24";
-#else
-    const char *msg =
-	    "load open segment open-1: entries batch 1 starting at byte 8: "
-	    "read header: short read: 8 bytes instead of 16";
-#endif
+    const char *msg;
+    switch (f->format_version) {
+    case 1:
+        msg = "load open segment open-1: entries batch 1 starting at byte 8: "
+              "read header: short read: 8 bytes instead of 16";
+        break;
+    case 2:
+        msg = "load open segment open-1: entries batch 1 starting at byte 8: "
+              "read header: short read: 8 bytes instead of 24";
+        break;
+    default:
+	munit_error("impossible");
+    }
     LOAD_ERROR(RAFT_IOERR, msg);
     return MUNIT_OK;
 }
 
 /* The data directory has an open segment which has incomplete batch data. */
-TEST(load, openSegmentWithIncompleteBatchData, setUp, tearDown, 0, NULL)
+TEST(load, openSegmentWithIncompleteBatchData, setUp, tearDown, 0, format_params)
 {
     struct fixture *f = data;
     size_t offset = WORD_SIZE + /* Format version */
@@ -1656,23 +1671,27 @@ TEST(load, openSegmentWithIncompleteBatchData, setUp, tearDown, 0, NULL)
                     WORD_SIZE + /* Entry type and data size */
                     WORD_SIZE / 2 /* Partial entry data */;
 
-#ifdef DQLITE_NEXT
-    offset += WORD_SIZE; /* Local data size */
-#endif
+    if (f->format_version > 1) {
+        offset += WORD_SIZE;
+    }
 
     APPEND(1, 1);
     UNFINALIZE(1, 1, 1);
     DirTruncateFile(f->dir, "open-1", offset);
 
-#ifdef DQLITE_NEXT
-    const char *msg =
-	    "load open segment open-1: entries batch 1 starting at byte 8: "
-	    "read data: short read: 4 bytes instead of 24";
-#else
-    const char *msg =
-	    "load open segment open-1: entries batch 1 starting at byte 8: "
-	    "read data: short read: 4 bytes instead of 8";
-#endif
+    const char *msg;
+    switch (f->format_version) {
+    case 1:
+        msg = "load open segment open-1: entries batch 1 starting at byte 8: "
+              "read data: short read: 4 bytes instead of 8";
+        break;
+    case 2:
+        msg = "load open segment open-1: entries batch 1 starting at byte 8: "
+              "read data: short read: 4 bytes instead of 24";
+        break;
+    default:
+        munit_error("impossible");
+    }
     LOAD_ERROR(RAFT_IOERR, msg);
     return MUNIT_OK;
 }
diff --git a/test/raft/integration/test_uv_set_term.c b/test/raft/integration/test_uv_set_term.c
index 63381a011..9c7bb405d 100644
--- a/test/raft/integration/test_uv_set_term.c
+++ b/test/raft/integration/test_uv_set_term.c
@@ -1,8 +1,8 @@
 #include "../../../src/raft.h"
 #include "../../../src/raft/byte.h"
-#include "../../../src/raft/uv_encoding.h"
 #include "../lib/runner.h"
 #include "../lib/uv.h"
+#include "../../../src/raft/uv.h"
 
 /******************************************************************************
  *
@@ -10,6 +10,13 @@
  *
  *****************************************************************************/
 
+static char *format_versions[] = {"1", "2", NULL};
+
+static MunitParameterEnum format_params[] = {
+    {"format_version", format_versions},
+    {NULL, NULL},
+};
+
 struct fixture
 {
     FIXTURE_UV_DEPS;
@@ -35,6 +42,7 @@ static void closeCb(struct raft_io *io)
         int _rv;                                                     \
         _rv = raft_uv_init(&f->io, &f->loop, f->dir, &f->transport); \
         munit_assert_int(_rv, ==, 0);                                \
+        raft_uv_set_format_version(&f->io, f->format_version); \
         _rv = f->io.init(&f->io, 1, "1");                            \
         munit_assert_int(_rv, ==, 0);                                \
     } while (0)
@@ -89,7 +97,7 @@ static void closeCb(struct raft_io *io)
         char filename[strlen("metadataN") + 1];              \
         sprintf(filename, "metadata%d", N);                  \
         DirReadFile(f->dir, filename, buf2, sizeof buf2);    \
-        munit_assert_int(byteGet64(&cursor), ==, UV__DISK_FORMAT);         \
+        munit_assert_int(byteGet64(&cursor), ==, f->format_version); \
         munit_assert_int(byteGet64(&cursor), ==, VERSION);   \
         munit_assert_int(byteGet64(&cursor), ==, TERM);      \
         munit_assert_int(byteGet64(&cursor), ==, VOTED_FOR); \
@@ -107,6 +115,8 @@ static void *setUpDeps(const MunitParameter params[], void *user_data)
     SETUP_UV_DEPS;
     f->io.data = f;
     f->closed = false;
+    const char *format_version = munit_parameters_get(params, "format_version");
+    f->format_version = format_version != NULL ? atoi(format_version) : 1;
     return f;
 }
 
@@ -179,17 +189,27 @@ TEST(set_term, fourth, setUp, tearDown, 0, NULL)
     return MUNIT_OK;
 }
 
+static void metadata_load(struct fixture *f)
+{
+	struct uv *uv = f->io.impl;
+	int rv = uvMetadataLoad(f->dir, &uv->metadata, f->format_version, f->io.errmsg);
+	if (rv != 0) {
+		munit_error("uvMetadataLoad");
+	}
+}
+
 /* If the data directory has a single metadata1 file, the first time set_data()
  * is called, the second metadata file gets created. */
-TEST(set_term, metadataOneExists, setUpDeps, tearDown, 0, NULL)
+TEST(set_term, metadataOneExists, setUpDeps, tearDown, 0, format_params)
 {
     struct fixture *f = data;
     WRITE_METADATA_FILE(1, /* Metadata file index                  */
-                        UV__DISK_FORMAT, /* Format                               */
+                        f->format_version,
                         1, /* Version                              */
                         1, /* Term                                 */
                         0 /* Voted for                            */);
     INIT;
+    metadata_load(f);
     SET_TERM(2);
     ASSERT_METADATA_FILE(1, 1, 1, 0);
     ASSERT_METADATA_FILE(2, 2, 2, 0);
@@ -197,20 +217,21 @@ TEST(set_term, metadataOneExists, setUpDeps, tearDown, 0, NULL)
 }
 
 /* The data directory has both metadata files, but metadata1 is greater. */
-TEST(set_term, metadataOneIsGreater, setUpDeps, tearDown, 0, NULL)
+TEST(set_term, metadataOneIsGreater, setUpDeps, tearDown, 0, format_params)
 {
     struct fixture *f = data;
     WRITE_METADATA_FILE(1, /* Metadata file index                  */
-                        UV__DISK_FORMAT, /* Format                               */
+                        f->format_version,
                         3, /* Version                              */
                         3, /* Term                                 */
                         0 /* Voted for                            */);
     WRITE_METADATA_FILE(2, /* Metadata file index                  */
-                        UV__DISK_FORMAT, /* Format                               */
+		        f->format_version,
                         2, /* Version                              */
                         2, /* Term                                 */
                         0 /* Voted for                            */);
     INIT;
+    metadata_load(f);
     SET_TERM(4);
     ASSERT_METADATA_FILE(1 /* n */, 3 /* version */, 3 /* term */,
                          0 /* voted for */);
@@ -220,20 +241,21 @@ TEST(set_term, metadataOneIsGreater, setUpDeps, tearDown, 0, NULL)
 }
 
 /* The data directory has both metadata files, but metadata2 is greater. */
-TEST(set_term, metadataTwoIsGreater, setUpDeps, tearDown, 0, NULL)
+TEST(set_term, metadataTwoIsGreater, setUpDeps, tearDown, 0, format_params)
 {
     struct fixture *f = data;
     WRITE_METADATA_FILE(1, /* Metadata file index                  */
-                        UV__DISK_FORMAT, /* Format                               */
+                        f->format_version,
                         1, /* Version                              */
                         1, /* Term                                 */
                         0 /* Voted for                            */);
     WRITE_METADATA_FILE(2, /* Metadata file index                  */
-                        UV__DISK_FORMAT, /* Format                               */
+		        f->format_version,
                         2, /* Version                              */
                         2, /* Term                                 */
                         0 /* Voted for                            */);
     INIT;
+    metadata_load(f);
     SET_TERM(2);
     ASSERT_METADATA_FILE(1 /* n */, 3 /* version */, 2 /* term */,
                          0 /* voted for */);
diff --git a/test/raft/lib/runner.h b/test/raft/lib/runner.h
index 13244a33a..69f3444c2 100644
--- a/test/raft/lib/runner.h
+++ b/test/raft/lib/runner.h
@@ -4,6 +4,7 @@
 #define TEST_RUNNER_H_
 
 #include "munit.h"
+#include "../../../src/tracing.h"
 
 /* Top-level suites array declaration.
  *
diff --git a/test/raft/lib/uv.h b/test/raft/lib/uv.h
index 7fdcdd08b..75e4b5b4d 100644
--- a/test/raft/lib/uv.h
+++ b/test/raft/lib/uv.h
@@ -34,13 +34,19 @@
     TEAR_DOWN_HEAP;         \
     TEAR_DOWN_DIR
 
-#define FIXTURE_UV struct raft_io io
+#define FIXTURE_UV \
+    struct raft_io io; \
+    int format_version
 
 #define SETUP_UV                                                     \
     do {                                                             \
         int rv_;                                                     \
         rv_ = raft_uv_init(&f->io, &f->loop, f->dir, &f->transport); \
         munit_assert_int(rv_, ==, 0);                                \
+        \
+        const char *format_version_ = munit_parameters_get(params, "format_version"); \
+        f->format_version = format_version_ != NULL ? atoi(format_version_) : 1; \
+        raft_uv_set_format_version(&f->io, f->format_version); \
         raft_uv_set_auto_recovery(&f->io, false);                    \
         rv_ = f->io.init(&f->io, 1, "127.0.0.1:9001");               \
         munit_assert_int(rv_, ==, 0);                                \
diff --git a/test/unit/test_vfs2.c b/test/unit/test_vfs2.c
index 635d7ce7f..0d6119ffd 100644
--- a/test/unit/test_vfs2.c
+++ b/test/unit/test_vfs2.c
@@ -178,14 +178,14 @@ TEST(vfs2, basic, set_up, tear_down, 0, NULL)
 	rv = sqlite3_step(stmt);
 	munit_assert_int(rv, ==, SQLITE_DONE);
 
-	struct vfs2_wal_frame *frames;
+	dqlite_vfs_frame *frames;
 	unsigned n;
 	rv = vfs2_poll(fp, &frames, &n, &sl);
 	munit_assert_int(rv, ==, 0);
 	munit_assert_uint(n, ==, 1);
+	munit_assert_not_null(frames[0].data);
 	munit_assert_not_null(frames);
-	munit_assert_not_null(frames[0].page);
-	sqlite3_free(frames[0].page);
+	sqlite3_free(frames[0].data);
 	sqlite3_free(frames);
 
 	rv = vfs2_unhide(fp);