diff --git a/bottomless/src/replicator.rs b/bottomless/src/replicator.rs index 633d8ff9..76a25f1b 100644 --- a/bottomless/src/replicator.rs +++ b/bottomless/src/replicator.rs @@ -1302,8 +1302,11 @@ impl Replicator { }, }; - tracing::info!("Restoring from generation {}", generation); - self.restore_from(generation, timestamp).await + let (action, recovered) = self.restore_from(generation, timestamp).await?; + tracing::info!( + "Restoring from generation {generation}: action={action:?}, recovered={recovered}" + ); + Ok((action, recovered)) } pub async fn get_last_consistent_frame(&self, generation: &Uuid) -> Result { diff --git a/sqld/src/namespace/mod.rs b/sqld/src/namespace/mod.rs index d54499b5..4861b7e5 100644 --- a/sqld/src/namespace/mod.rs +++ b/sqld/src/namespace/mod.rs @@ -657,6 +657,18 @@ impl Namespace { tokio::fs::create_dir_all(&db_path).await?; + // FIXME: due to a bug in logger::checkpoint_db we call regular checkpointing code + // instead of our virtual WAL one. It's a bit tangled to fix right now, because + // we need WAL context for checkpointing, and WAL context needs the ReplicationLogger... + // So instead we checkpoint early, *before* bottomless gets initialized. That way + // we're sure bottomless won't try to back up any existing WAL frames and will instead + // treat the existing db file as the source of truth. + if config.bottomless_replication.is_some() { + tracing::debug!("Checkpointing before initializing bottomless"); + crate::replication::primary::logger::checkpoint_db(&db_path.join("data"))?; + tracing::debug!("Checkpointed before initializing bottomless"); + } + let bottomless_replicator = if let Some(options) = &config.bottomless_replication { let options = make_bottomless_options(options, name.clone()); let (replicator, did_recover) = diff --git a/sqld/src/replication/primary/logger.rs b/sqld/src/replication/primary/logger.rs index 9f69b876..580aa8e2 100644 --- a/sqld/src/replication/primary/logger.rs +++ b/sqld/src/replication/primary/logger.rs @@ -77,6 +77,7 @@ unsafe impl WalHook for ReplicationLoggerHook { assert_eq!(page_size, 4096); let wal_ptr = wal as *mut _; let last_valid_frame = wal.hdr.mxFrame; + tracing::trace!("Last valid frame before applying: {last_valid_frame}"); let ctx = Self::wal_extract_ctx(wal); let mut frame_count = 0; @@ -948,7 +949,9 @@ impl ReplicationLogger { } } -fn checkpoint_db(data_path: &Path) -> anyhow::Result<()> { +// FIXME: calling rusqlite::Connection's checkpoint here is a bug, +// we need to always call our virtual WAL methods. +pub fn checkpoint_db(data_path: &Path) -> anyhow::Result<()> { let wal_path = match data_path.parent() { Some(path) => path.join("data-wal"), None => return Ok(()),