Skip to content

Commit

Permalink
Update node_is_replica to account for the quorum_standby state
Browse files Browse the repository at this point in the history
  • Loading branch information
blogh committed Dec 24, 2024
1 parent 7f2dc34 commit b27d898
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 32 deletions.
19 changes: 16 additions & 3 deletions check_patroni/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,13 @@ def node_is_leader(ctx: click.Context, check_standby_leader: bool) -> None:
default=False,
help="check if the replica is synchronous",
)
@click.option(
"--sync-type",
type=click.Choice(["any", "sync", "quorum"], case_sensitive=True),
default="any",
show_default=True,
help="Synchronous replication mode.",
)
@click.option(
"--is-async",
"check_is_async",
Expand All @@ -636,7 +643,11 @@ def node_is_leader(ctx: click.Context, check_standby_leader: bool) -> None:
@click.pass_context
@nagiosplugin.guarded
def node_is_replica(
ctx: click.Context, max_lag: str, check_is_sync: bool, check_is_async: bool
ctx: click.Context,
max_lag: str,
check_is_sync: bool,
check_is_async: bool,
sync_type: str,
) -> None:
"""Check if the node is a replica with no noloadbalance tag.
Expand Down Expand Up @@ -675,9 +686,11 @@ def node_is_replica(

check = nagiosplugin.Check()
check.add(
NodeIsReplica(ctx.obj.connection_info, max_lag, check_is_sync, check_is_async),
NodeIsReplica(
ctx.obj.connection_info, max_lag, check_is_sync, check_is_async, sync_type
),
nagiosplugin.ScalarContext("is_replica", None, "@0:0"),
NodeIsReplicaSummary(max_lag, check_is_sync, check_is_async),
NodeIsReplicaSummary(max_lag, check_is_sync, check_is_async, sync_type),
)
check.main(verbose=ctx.obj.verbose, timeout=ctx.obj.timeout)

Expand Down
42 changes: 30 additions & 12 deletions check_patroni/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,35 +65,53 @@ def __init__(
max_lag: str,
check_is_sync: bool,
check_is_async: bool,
sync_type: str,
) -> None:
super().__init__(connection_info)
self.max_lag = max_lag
self.check_is_sync = check_is_sync
self.check_is_async = check_is_async
self.sync_type = sync_type

def probe(self) -> Iterable[nagiosplugin.Metric]:
item_dict = {}
try:
if self.check_is_sync:
api_name = "synchronous"
elif self.check_is_async:
api_name = "asynchronous"
else:
api_name = "replica"

if self.max_lag is None:
self.rest_api(api_name)
item_dict = self.rest_api("replica")
else:
self.rest_api(f"{api_name}?lag={self.max_lag}")
item_dict = self.rest_api(f"replica?lag={self.max_lag}")
except APIError:
return [nagiosplugin.Metric("is_replica", 0)]
return [nagiosplugin.Metric("is_replica", 1)]

if self.check_is_sync:
if (
self.sync_type in ["sync", "any"] and "sync_standby" in item_dict.keys()
) or (
self.sync_type in ["quorum", "any"]
and "quorum_standby" in item_dict.keys()
):
return [nagiosplugin.Metric("is_replica", 1)]
else:
return [nagiosplugin.Metric("is_replica", 0)]
elif self.check_is_async:
if (
"sync_standby" in item_dict.keys()
or "quorum_standby" in item_dict.keys()
):
return [nagiosplugin.Metric("is_replica", 0)]
else:
return [nagiosplugin.Metric("is_replica", 1)]
else:
return [nagiosplugin.Metric("is_replica", 1)]


class NodeIsReplicaSummary(nagiosplugin.Summary):
def __init__(self, lag: str, check_is_sync: bool, check_is_async: bool) -> None:
def __init__(
self, lag: str, check_is_sync: bool, check_is_async: bool, sync_type: str
) -> None:
self.lag = lag
if check_is_sync:
self.replica_kind = "synchronous replica"
self.replica_kind = f"synchronous replica of kind '{sync_type}'"
elif check_is_async:
self.replica_kind = "asynchronous replica"
else:
Expand Down
22 changes: 22 additions & 0 deletions tests/json/node_is_replica_ok_quorum.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"state": "running",
"postmaster_start_time": "2024-12-23 10:07:07.660665+00:00",
"role": "replica",
"server_version": 140013,
"xlog": {
"received_location": 251660416,
"replayed_location": 251660416,
"replayed_timestamp": "2024-12-23 15:43:43.152572+00:00",
"paused": false
},
"quorum_standby": true,
"timeline": 9,
"replication_state": "streaming",
"dcs_last_seen": 1734972473,
"database_system_identifier": "7421168130564934130",
"patroni": {
"version": "4.0.2",
"scope": "patroni-demo",
"name": "p2"
}
}
22 changes: 22 additions & 0 deletions tests/json/node_is_replica_ok_sync.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"state": "running",
"postmaster_start_time": "2024-12-23 10:07:07.660665+00:00",
"role": "replica",
"server_version": 140013,
"xlog": {
"received_location": 251660416,
"replayed_location": 251660416,
"replayed_timestamp": "2024-12-23 15:43:43.152572+00:00",
"paused": false
},
"sync_standby": true,
"timeline": 9,
"replication_state": "streaming",
"dcs_last_seen": 1734972473,
"database_system_identifier": "7421168130564934130",
"patroni": {
"version": "4.0.2",
"scope": "patroni-demo",
"name": "p2"
}
}
Loading

0 comments on commit b27d898

Please sign in to comment.