From 6a1984d6d36f733d094001b260d95412206dd3bc Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 11 Dec 2024 02:51:44 +0000 Subject: [PATCH 1/4] paseserver: fix CLog truncate walingest --- pageserver/src/walingest.rs | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index 30c8965d517d..b7712cfac71c 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -877,22 +877,24 @@ impl WalIngest { // will block waiting for the last valid LSN to advance up to // it. So we use the previous record's LSN in the get calls // instead. - for segno in modification - .tline - .list_slru_segments(SlruKind::Clog, Version::Modified(modification), ctx) - .await? - { - let segpage = segno * pg_constants::SLRU_PAGES_PER_SEGMENT; + if modification.tline.get_shard_identity().is_shard_zero() { + for segno in modification + .tline + .list_slru_segments(SlruKind::Clog, Version::Modified(modification), ctx) + .await? + { + let segpage = segno * pg_constants::SLRU_PAGES_PER_SEGMENT; - let may_delete = dispatch_pgversion!(modification.tline.pg_version, { - pgv::nonrelfile_utils::slru_may_delete_clogsegment(segpage, pageno) - }); + let may_delete = dispatch_pgversion!(modification.tline.pg_version, { + pgv::nonrelfile_utils::slru_may_delete_clogsegment(segpage, pageno) + }); - if may_delete { - modification - .drop_slru_segment(SlruKind::Clog, segno, ctx) - .await?; - trace!("Drop CLOG segment {:>04X}", segno); + if may_delete { + modification + .drop_slru_segment(SlruKind::Clog, segno, ctx) + .await?; + trace!("Drop CLOG segment {:>04X}", segno); + } } } From 1b69b138934d3ddbbaa91125453d35ace8e54bca Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 11 Dec 2024 03:23:08 +0000 Subject: [PATCH 2/4] tests: use sharding in test_clog_truncate --- test_runner/regress/test_clog_truncate.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test_runner/regress/test_clog_truncate.py b/test_runner/regress/test_clog_truncate.py index 10027ce6891b..18bab763a754 100644 --- a/test_runner/regress/test_clog_truncate.py +++ b/test_runner/regress/test_clog_truncate.py @@ -4,15 +4,17 @@ import time from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv +from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.utils import query_scalar # # Test compute node start after clog truncation # -def test_clog_truncate(neon_simple_env: NeonEnv): - env = neon_simple_env +def test_clog_truncate(neon_env_builder: NeonEnvBuilder): + # Use a multi-sharded tenant because WAL ingest logic is shard-dependent, and + # this test is one of the very few that exercises a CLogTruncate WAL record. + env = neon_env_builder.init_start(initial_tenant_shard_count=2) # set aggressive autovacuum to make sure that truncation will happen config = [ From cd89089f13cae478a8069b22f57de40955dcfb45 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 11 Dec 2024 03:37:42 +0000 Subject: [PATCH 3/4] pageserver: make SLRU dir key handling more consistent --- libs/pageserver_api/src/key.rs | 4 ++++ libs/pageserver_api/src/shard.rs | 6 +++++- pageserver/src/pgdatadir_mapping.rs | 29 +++++++++++++++++------------ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs index 37dff6fe4647..6722c6873321 100644 --- a/libs/pageserver_api/src/key.rs +++ b/libs/pageserver_api/src/key.rs @@ -565,6 +565,10 @@ impl Key { && self.field5 == 0 && self.field6 == u32::MAX } + + pub fn is_slru_dir_key(&self) -> bool { + slru_dir_kind(self).is_some() + } } #[inline(always)] diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index cf0cd3a46b88..4cc0a739e871 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -173,7 +173,11 @@ impl ShardIdentity { /// Return true if the key should be stored on all shards, not just one. pub fn is_key_global(&self, key: &Key) -> bool { - if key.is_slru_block_key() || key.is_slru_segment_size_key() || key.is_aux_file_key() { + if key.is_slru_block_key() + || key.is_slru_segment_size_key() + || key.is_aux_file_key() + || key.is_slru_dir_key() + { // Special keys that are only stored on shard 0 false } else if key.is_rel_block_key() { diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 255bd01e259e..3eaecd3a08f5 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1319,18 +1319,23 @@ impl<'a> DatadirModification<'a> { let buf: Bytes = SlruSegmentDirectory::ser(&SlruSegmentDirectory::default())?.into(); let empty_dir = Value::Image(buf); - self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone()); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); - self.put( - slru_dir_to_key(SlruKind::MultiXactMembers), - empty_dir.clone(), - ); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); - self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0)); + + // Initialize SLRUs on shard 0 only: creating these on other shards would be + // harmless but they'd just be dropped on later compaction. + if self.tline.tenant_shard_id.is_shard_zero() { + self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone()); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); + self.put( + slru_dir_to_key(SlruKind::MultiXactMembers), + empty_dir.clone(), + ); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); + self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0)); + } Ok(()) } From 2624f2ea8314e23c4ec1aeff04358d25c429c014 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 11 Dec 2024 03:46:31 +0000 Subject: [PATCH 4/4] tests: use wait_until in test_clog_truncate --- test_runner/regress/test_clog_truncate.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test_runner/regress/test_clog_truncate.py b/test_runner/regress/test_clog_truncate.py index 18bab763a754..2ae38e6d8887 100644 --- a/test_runner/regress/test_clog_truncate.py +++ b/test_runner/regress/test_clog_truncate.py @@ -1,11 +1,10 @@ from __future__ import annotations import os -import time from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder -from fixtures.utils import query_scalar +from fixtures.utils import query_scalar, wait_until # @@ -33,6 +32,7 @@ def test_clog_truncate(neon_env_builder: NeonEnvBuilder): endpoint.safe_psql("CREATE EXTENSION neon_test_utils") # Consume many xids to advance clog + log.info("Consuming xids...") with endpoint.cursor() as cur: cur.execute("select test_consume_xids(1000*1000*10);") log.info("xids consumed") @@ -49,11 +49,17 @@ def test_clog_truncate(neon_env_builder: NeonEnvBuilder): pg_xact_0000_path = os.path.join(endpoint.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") - while os.path.isfile(pg_xact_0000_path): - log.info(f"file exists. wait for truncation: {pg_xact_0000_path=}") - time.sleep(5) + def assert_file_removed(): + exists = os.path.isfile(pg_xact_0000_path) + if exists: + log.info(f"file exists. wait for truncation: {pg_xact_0000_path=}") + assert not exists + + log.info("Waiting for truncation...") + wait_until(assert_file_removed) # checkpoint to advance latest lsn + log.info("Checkpointing...") with endpoint.cursor() as cur: cur.execute("CHECKPOINT;") lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()")