Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable logical replication subscribers #10249

Merged
merged 3 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 89 additions & 15 deletions compute_tools/src/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ use crate::local_proxy;
use crate::pg_helpers::*;
use crate::spec::*;
use crate::spec_apply::ApplySpecPhase::{
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser,
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
RunInEachDatabase,
};
use crate::spec_apply::PerDatabasePhase;
use crate::spec_apply::PerDatabasePhase::{
ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
HandleAnonExtension,
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
};
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
use crate::sync_sk::{check_if_synced, ping_safekeeper};
Expand Down Expand Up @@ -340,6 +340,15 @@ impl ComputeNode {
self.state.lock().unwrap().status
}

pub fn get_timeline_id(&self) -> Option<TimelineId> {
self.state
.lock()
.unwrap()
.pspec
.as_ref()
.map(|s| s.timeline_id)
}

// Remove `pgdata` directory and create it again with right permissions.
fn create_pgdata(&self) -> Result<()> {
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
Expand Down Expand Up @@ -929,6 +938,48 @@ impl ComputeNode {
.map(|role| (role.name.clone(), role))
.collect::<HashMap<String, Role>>();

// Check if we need to drop subscriptions before starting the endpoint.
//
// It is important to do this operation exactly once when endpoint starts on a new branch.
// Otherwise, we may drop not inherited, but newly created subscriptions.
//
// We cannot rely only on spec.drop_subscriptions_before_start flag,
// because if for some reason compute restarts inside VM,
// it will start again with the same spec and flag value.
//
// To handle this, we save the fact of the operation in the database
// in the neon.drop_subscriptions_done table.
// If the table does not exist, we assume that the operation was never performed, so we must do it.
// If table exists, we check if the operation was performed on the current timelilne.
//
let mut drop_subscriptions_done = false;

if spec.drop_subscriptions_before_start {
let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);

info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);

drop_subscriptions_done = match
client.simple_query(&query).await {
Ok(result) => {
matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
},
Err(e) =>
{
match e.code() {
Some(&SqlState::UNDEFINED_TABLE) => false,
_ => {
// We don't expect any other error here, except for the schema/table not existing
error!("Error checking if drop subscription operation was already performed: {}", e);
return Err(e.into());
}
}
}
}
};


let jwks_roles = Arc::new(
spec.as_ref()
.local_proxy_config
Expand Down Expand Up @@ -996,7 +1047,7 @@ impl ComputeNode {
jwks_roles.clone(),
concurrency_token.clone(),
db,
[DropSubscriptionsForDeletedDatabases].to_vec(),
[DropLogicalSubscriptions].to_vec(),
);

Ok(spawn(fut))
Expand Down Expand Up @@ -1024,6 +1075,7 @@ impl ComputeNode {
CreateAndAlterRoles,
RenameAndDeleteDatabases,
CreateAndAlterDatabases,
CreateSchemaNeon,
lubennikovaav marked this conversation as resolved.
Show resolved Hide resolved
] {
info!("Applying phase {:?}", &phase);
apply_operations(
Expand Down Expand Up @@ -1064,19 +1116,25 @@ impl ComputeNode {
}

let conf = Arc::new(conf);
let mut phases = vec![
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
];

if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
lubennikovaav marked this conversation as resolved.
Show resolved Hide resolved
phases.push(DropLogicalSubscriptions);
}

let fut = Self::apply_spec_sql_db(
spec.clone(),
conf,
ctx.clone(),
jwks_roles.clone(),
concurrency_token.clone(),
db,
[
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
]
.to_vec(),
phases,
);

Ok(spawn(fut))
Expand All @@ -1088,12 +1146,20 @@ impl ComputeNode {
handle.await??;
}

for phase in vec![
let mut phases = vec![
HandleOtherExtensions,
HandleNeonExtension,
HandleNeonExtension, // This step depends on CreateSchemaNeon
CreateAvailabilityCheck,
DropRoles,
] {
];

// This step depends on CreateSchemaNeon
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
lubennikovaav marked this conversation as resolved.
Show resolved Hide resolved
phases.push(FinalizeDropLogicalSubscriptions);
}

for phase in phases {
debug!("Applying phase {:?}", &phase);
apply_operations(
spec.clone(),
Expand Down Expand Up @@ -1463,6 +1529,14 @@ impl ComputeNode {
Ok(())
},
)?;

let postgresql_conf_path = pgdata_path.join("postgresql.conf");
if config::line_in_file(
&postgresql_conf_path,
"neon.disable_logical_replication_subscribers=false",
)? {
info!("updated postgresql.conf to set neon.disable_logical_replication_subscribers=false");
}
self.pg_reload_conf()?;
}
self.post_apply_config()?;
Expand Down
7 changes: 7 additions & 0 deletions compute_tools/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ pub fn write_postgres_conf(

writeln!(file, "neon.extension_server_port={}", extension_server_port)?;

if spec.drop_subscriptions_before_start {
writeln!(file, "neon.disable_logical_replication_subscribers=true")?;
} else {
// be explicit about the default value
writeln!(file, "neon.disable_logical_replication_subscribers=false")?;
}

// This is essential to keep this line at the end of the file,
// because it is intended to override any settings above.
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
Expand Down
24 changes: 16 additions & 8 deletions compute_tools/src/spec_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub enum PerDatabasePhase {
DeleteDBRoleReferences,
ChangeSchemaPerms,
HandleAnonExtension,
DropSubscriptionsForDeletedDatabases,
DropLogicalSubscriptions,
skyzh marked this conversation as resolved.
Show resolved Hide resolved
}

#[derive(Clone, Debug)]
Expand All @@ -58,11 +58,13 @@ pub enum ApplySpecPhase {
CreateAndAlterRoles,
RenameAndDeleteDatabases,
CreateAndAlterDatabases,
CreateSchemaNeon,
RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
HandleOtherExtensions,
HandleNeonExtension,
CreateAvailabilityCheck,
DropRoles,
FinalizeDropLogicalSubscriptions,
}

pub struct Operation {
Expand Down Expand Up @@ -331,7 +333,7 @@ async fn get_operations<'a>(
// NB: there could be other db states, which prevent us from dropping
// the database. For example, if db is used by any active subscription
// or replication slot.
// Such cases are handled in the DropSubscriptionsForDeletedDatabases
// Such cases are handled in the DropLogicalSubscriptions
// phase. We do all the cleanup before actually dropping the database.
let drop_db_query: String = format!(
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
Expand Down Expand Up @@ -442,13 +444,19 @@ async fn get_operations<'a>(

Ok(Box::new(operations))
}
ApplySpecPhase::CreateSchemaNeon => Ok(Box::new(once(Operation {
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
comment: Some(String::from(
"create schema for neon extension and utils tables",
)),
}))),
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
match subphase {
PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
PerDatabasePhase::DropLogicalSubscriptions => {
lubennikovaav marked this conversation as resolved.
Show resolved Hide resolved
match &db {
DB::UserDB(db) => {
let drop_subscription_query: String = format!(
include_str!("sql/drop_subscription_for_drop_dbs.sql"),
include_str!("sql/drop_subscriptions.sql"),
datname_str = escape_literal(&db.name),
);

Expand Down Expand Up @@ -666,10 +674,6 @@ async fn get_operations<'a>(
}
ApplySpecPhase::HandleNeonExtension => {
let operations = vec![
Operation {
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
comment: Some(String::from("init: add schema for extension")),
},
Operation {
query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"),
comment: Some(String::from(
Expand Down Expand Up @@ -712,5 +716,9 @@ async fn get_operations<'a>(

Ok(Box::new(operations))
}
ApplySpecPhase::FinalizeDropLogicalSubscriptions => Ok(Box::new(once(Operation {
query: String::from(include_str!("sql/finalize_drop_subscriptions.sql")),
comment: None,
}))),
}
}
21 changes: 21 additions & 0 deletions compute_tools/src/sql/finalize_drop_subscriptions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
DO $$
BEGIN
IF NOT EXISTS(
SELECT 1
FROM pg_catalog.pg_tables
WHERE tablename = 'drop_subscriptions_done'
AND schemaname = 'neon'
)
THEN
CREATE TABLE neon.drop_subscriptions_done
(id serial primary key, timeline_id text);
END IF;

-- preserve the timeline_id of the last drop_subscriptions run
-- to ensure that the cleanup of a timeline is executed only once.
-- use upsert to avoid the table bloat in case of cascade branching (branch of a branch)
INSERT INTO neon.drop_subscriptions_done VALUES (1, current_setting('neon.timeline_id'))
ON CONFLICT (id) DO UPDATE
SET timeline_id = current_setting('neon.timeline_id');
END
$$
1 change: 1 addition & 0 deletions control_plane/src/bin/neon_local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1357,6 +1357,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
args.pg_version,
mode,
!args.update_catalog,
false,
)?;
}
EndpointCmd::Start(args) => {
Expand Down
7 changes: 7 additions & 0 deletions control_plane/src/endpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ pub struct EndpointConf {
http_port: u16,
pg_version: u32,
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
features: Vec<ComputeFeature>,
}

Expand Down Expand Up @@ -143,6 +144,7 @@ impl ComputeControlPlane {
pg_version: u32,
mode: ComputeMode,
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
) -> Result<Arc<Endpoint>> {
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
Expand All @@ -162,6 +164,7 @@ impl ComputeControlPlane {
// with this we basically test a case of waking up an idle compute, where
// we also skip catalog updates in the cloud.
skip_pg_catalog_updates,
drop_subscriptions_before_start,
features: vec![],
});

Expand All @@ -177,6 +180,7 @@ impl ComputeControlPlane {
pg_port,
pg_version,
skip_pg_catalog_updates,
drop_subscriptions_before_start,
features: vec![],
})?,
)?;
Expand Down Expand Up @@ -240,6 +244,7 @@ pub struct Endpoint {
// Optimizations
skip_pg_catalog_updates: bool,

drop_subscriptions_before_start: bool,
// Feature flags
features: Vec<ComputeFeature>,
}
Expand Down Expand Up @@ -291,6 +296,7 @@ impl Endpoint {
tenant_id: conf.tenant_id,
pg_version: conf.pg_version,
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
features: conf.features,
})
}
Expand Down Expand Up @@ -625,6 +631,7 @@ impl Endpoint {
shard_stripe_size: Some(shard_stripe_size),
local_proxy_config: None,
reconfigure_concurrency: 1,
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
};
let spec_path = self.endpoint_path().join("spec.json");
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
Expand Down
7 changes: 7 additions & 0 deletions libs/compute_api/src/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ pub struct ComputeSpec {
/// enough spare connections for reconfiguration process to succeed.
#[serde(default = "default_reconfigure_concurrency")]
pub reconfigure_concurrency: usize,

/// If set to true, the compute_ctl will drop all subscriptions before starting the
/// compute. This is needed when we start an endpoint on a branch, so that child
/// would not compete with parent branch subscriptions
/// over the same replication content from publisher.
#[serde(default)] // Default false
pub drop_subscriptions_before_start: bool,
}

/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
Expand Down
10 changes: 10 additions & 0 deletions pgxn/neon/neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "access/xlogrecovery.h"
#endif
#include "replication/logical.h"
#include "replication/logicallauncher.h"
#include "replication/slot.h"
#include "replication/walsender.h"
#include "storage/proc.h"
Expand Down Expand Up @@ -434,6 +435,15 @@ _PG_init(void)

restore_running_xacts_callback = RestoreRunningXactsFromClog;

DefineCustomBoolVariable(
"neon.disable_logical_replication_subscribers",
"Disables incomming logical replication",
NULL,
&disable_logical_replication_subscribers,
false,
PGC_SIGHUP,
0,
NULL, NULL, NULL);

DefineCustomBoolVariable(
"neon.allow_replica_misconfig",
Expand Down
Loading
Loading