Skip to content

Commit de4dd9d

Browse files
authored
Proper Configuration of Single Instance DBs after Deletion of Sync Standbies (#577)
* Double check config * Logging * Especially double check when bootstrapping * Check for new value * Revert "Especially double check when bootstrapping" This reverts commit f1ca8db. * Especially double check when bootstrapping * Revert "Check for new value" This reverts commit 4ecb5f5. * Fetch and use the actual postgres cluster name as application_name for. This resolves some issues when using sync replication. * Always fetch application_name when needed * make linter happy * Simplify * logging * Fix typos * Logging * Also patch patroni in single instance mode (to ensure proper cleanup after a sync standby has been deleted) * Debug logging * More precise nil * On a related note, do not requeue * Some more debug logging * Additional check * Logging * Back to default nil * Check return code * Additional nil check * Make (updated) linter happy * Revert "Additional nil check" This reverts commit 273e30c. * Fix check and improve func name * Reapply "Additional nil check" This reverts commit 6ff4738.
1 parent 66e2742 commit de4dd9d

File tree

3 files changed

+39
-20
lines changed

3 files changed

+39
-20
lines changed

api/v1/postgres_types.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ func (p *Postgres) ToSharedSvcLB(lbIP string, lbPort int32, enableStandbyLeaderS
384384
"cluster-name": p.ToPeripheralResourceName(),
385385
"team": p.generateTeamID(),
386386
}
387-
if p.IsReplicationPrimary() {
387+
if p.IsReplicationPrimaryOrStandalone() {
388388
lb.Spec.Selector[SpiloRoleLabelName] = SpiloRoleLabelValueMaster
389389
} else {
390390
if enableStandbyLeaderSelector {
@@ -478,7 +478,7 @@ func (p *Postgres) ToDedicatedSvcLB(lbIP string, lbPort int32, standbyClustersSo
478478
"cluster-name": p.ToPeripheralResourceName(),
479479
"team": p.generateTeamID(),
480480
}
481-
if p.IsReplicationPrimary() {
481+
if p.IsReplicationPrimaryOrStandalone() {
482482
lb.Spec.Selector[SpiloRoleLabelName] = SpiloRoleLabelValueMaster
483483
} else {
484484
// select the first pod in the statefulset
@@ -769,7 +769,7 @@ func (p *Postgres) ToUnstructuredZalandoPostgresql(z *zalando.Postgresql, c *cor
769769
}
770770

771771
// Enable replication (using unstructured json)
772-
if p.IsReplicationPrimary() {
772+
if p.IsReplicationPrimaryOrStandalone() {
773773
// delete field
774774
z.Spec.StandbyCluster = nil
775775
} else {
@@ -929,7 +929,7 @@ func setSharedBufferSize(parameters map[string]string, shmSize string) {
929929
}
930930
}
931931

932-
func (p *Postgres) IsReplicationPrimary() bool {
932+
func (p *Postgres) IsReplicationPrimaryOrStandalone() bool {
933933
if p.Spec.PostgresConnection == nil || p.Spec.PostgresConnection.ReplicationPrimary {
934934
// nothing is configured, or we are the leader. nothing to do.
935935
return true
@@ -938,7 +938,7 @@ func (p *Postgres) IsReplicationPrimary() bool {
938938
}
939939

940940
func (p *Postgres) IsReplicationTarget() bool {
941-
if p.Spec.PostgresConnection != nil && p.Spec.PostgresConnection.ReplicationPrimary == false {
941+
if p.Spec.PostgresConnection != nil && !p.Spec.PostgresConnection.ReplicationPrimary {
942942
// sth is configured and we are not the leader
943943
return true
944944
}

controllers/postgres_controller.go

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ func (r *PostgresReconciler) ensurePostgresSecrets(log logr.Logger, ctx context.
886886
}
887887

888888
func (r *PostgresReconciler) ensureStandbySecrets(log logr.Logger, ctx context.Context, instance *pg.Postgres) error {
889-
if instance.IsReplicationPrimary() {
889+
if instance.IsReplicationPrimaryOrStandalone() {
890890
// nothing is configured, or we are the leader. nothing to do.
891891
return nil
892892
}
@@ -1024,11 +1024,6 @@ func (r *PostgresReconciler) checkAndUpdatePatroniReplicationConfig(log logr.Log
10241024
const requeueAfterReconcile = true
10251025
const allDone = false
10261026

1027-
// If there is no connected postgres, no need to tinker with patroni directly
1028-
if instance.Spec.PostgresConnection == nil {
1029-
return allDone, nil
1030-
}
1031-
10321027
log.V(debugLogLevel).Info("Checking replication config from Patroni API")
10331028

10341029
// Get the leader pod
@@ -1045,6 +1040,12 @@ func (r *PostgresReconciler) checkAndUpdatePatroniReplicationConfig(log logr.Log
10451040
}
10461041
leaderIP := leaderPods.Items[0].Status.PodIP
10471042

1043+
// If there is no connected postgres, we still need to possibly clean up a former synchronous primary
1044+
if instance.Spec.PostgresConnection == nil {
1045+
log.V(debugLogLevel).Info("single instance, updating with empty config and requeing")
1046+
return allDone, r.httpPatchPatroni(log, ctx, instance, leaderIP, nil)
1047+
}
1048+
10481049
var resp *PatroniConfig
10491050
resp, err = r.httpGetPatroniConfig(log, ctx, leaderIP)
10501051
if err != nil {
@@ -1056,7 +1057,7 @@ func (r *PostgresReconciler) checkAndUpdatePatroniReplicationConfig(log logr.Log
10561057
return requeueAfterReconcile, nil
10571058
}
10581059

1059-
if instance.IsReplicationPrimary() {
1060+
if instance.IsReplicationPrimaryOrStandalone() {
10601061
if resp.StandbyCluster != nil {
10611062
log.V(debugLogLevel).Info("standby_cluster mismatch, requeing", "response", resp)
10621063
return requeueAfterReconcile, nil
@@ -1076,7 +1077,11 @@ func (r *PostgresReconciler) checkAndUpdatePatroniReplicationConfig(log logr.Log
10761077
} else {
10771078
synchronousStandbyApplicationName = pointer.String(s.ToPeripheralResourceName())
10781079
}
1079-
if resp.SynchronousNodesAdditional == nil || *resp.SynchronousNodesAdditional != *synchronousStandbyApplicationName {
1080+
// compare the actual value with the expected value
1081+
if synchronousStandbyApplicationName == nil {
1082+
log.V(debugLogLevel).Info("could not fetch synchronous_nodes_additional, disabling sync replication and requeing", "response", resp)
1083+
return requeueAfterReconcile, r.httpPatchPatroni(log, ctx, instance, leaderIP, nil)
1084+
} else if resp.SynchronousNodesAdditional == nil || *resp.SynchronousNodesAdditional != *synchronousStandbyApplicationName {
10801085
log.V(debugLogLevel).Info("synchronous_nodes_additional mismatch, updating and requeing", "response", resp)
10811086
return requeueAfterReconcile, r.httpPatchPatroni(log, ctx, instance, leaderIP, synchronousStandbyApplicationName)
10821087
}
@@ -1179,7 +1184,9 @@ func (r *PostgresReconciler) httpPatchPatroni(log logr.Logger, ctx context.Conte
11791184

11801185
log.V(debugLogLevel).Info("Preparing request")
11811186
var request PatroniConfig
1182-
if instance.IsReplicationPrimary() {
1187+
if instance.Spec.PostgresConnection == nil {
1188+
// use empty config
1189+
} else if instance.IsReplicationPrimaryOrStandalone() {
11831190
request = PatroniConfig{
11841191
StandbyCluster: nil,
11851192
}
@@ -1206,7 +1213,6 @@ func (r *PostgresReconciler) httpPatchPatroni(log logr.Logger, ctx context.Conte
12061213
request.SynchronousNodesAdditional = nil
12071214
}
12081215
} else {
1209-
// TODO check values first
12101216
request = PatroniConfig{
12111217
StandbyCluster: &PatroniStandbyCluster{
12121218
CreateReplicaMethods: []string{"basebackup_fast_xlog"},
@@ -1241,8 +1247,21 @@ func (r *PostgresReconciler) httpPatchPatroni(log logr.Logger, ctx context.Conte
12411247
}
12421248
defer resp.Body.Close()
12431249

1250+
if resp.StatusCode/100 != 2 {
1251+
err = fmt.Errorf("received unexpected return code %d", resp.StatusCode)
1252+
log.Error(err, "could not perform PATCH request")
1253+
return err
1254+
}
1255+
1256+
log.V(debugLogLevel).Info("Performed request")
1257+
1258+
// fake error when standbyApplicationName is required but not provided
1259+
if instance.Spec.PostgresConnection != nil && instance.IsReplicationPrimaryOrStandalone() && instance.Spec.PostgresConnection.SynchronousReplication && synchronousStandbyApplicationName == nil {
1260+
return fmt.Errorf("missing application_name of synchronous standby, disable synchronous replication")
1261+
}
1262+
12441263
// fake error when standbyApplicationName is required but not provided
1245-
if instance.IsReplicationPrimary() && instance.Spec.PostgresConnection.SynchronousReplication && synchronousStandbyApplicationName == nil {
1264+
if instance.Spec.PostgresConnection != nil && instance.Spec.PostgresConnection.SynchronousReplication && synchronousStandbyApplicationName == nil {
12461265
return fmt.Errorf("missing application_name of synchronous standby, disable synchronous replication")
12471266
}
12481267

@@ -1487,7 +1506,7 @@ func (r *PostgresReconciler) createOrUpdateExporterSidecarServices(log logr.Logg
14871506
pes.Spec.Ports = []corev1.ServicePort{
14881507
{
14891508
Name: postgresExporterServicePortName,
1490-
Port: int32(exporterServicePort),
1509+
Port: int32(exporterServicePort), //nolint
14911510
Protocol: corev1.ProtocolTCP,
14921511
TargetPort: intstr.FromInt(int(exporterServiceTargetPort)),
14931512
},
@@ -1752,7 +1771,7 @@ func (r *PostgresReconciler) ensureInitDBJob(log logr.Logger, ctx context.Contex
17521771
cm.Data = map[string]string{}
17531772

17541773
// only execute SQL when encountering a **new** database, not for standbies or clones
1755-
if instance.IsReplicationPrimary() && instance.Spec.PostgresRestore == nil {
1774+
if instance.IsReplicationPrimaryOrStandalone() && instance.Spec.PostgresRestore == nil {
17561775
// try to fetch the global initjob configmap
17571776
cns := types.NamespacedName{
17581777
Namespace: r.PostgresletNamespace,

main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,8 @@ func main() {
429429

430430
var lbMgrOpts lbmanager.Options = lbmanager.Options{
431431
LBIP: lbIP,
432-
PortRangeStart: int32(portRangeStart),
433-
PortRangeSize: int32(portRangeSize),
432+
PortRangeStart: int32(portRangeStart), // nolint
433+
PortRangeSize: int32(portRangeSize), // nolint
434434
EnableStandbyLeaderSelector: enableStandbyLeaderSelector,
435435
EnableLegacyStandbySelector: enableLegacyStandbySelector,
436436
StandbyClustersSourceRanges: standbyClusterSourceRanges,

0 commit comments

Comments
 (0)