Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding check_streaming_delta #152

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions check_postgres.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1534,6 +1534,7 @@ package check_postgres;
'replace', ## used by same_schema only
'lsfunc=s', ## used by wal_files and archive_ready
'skipcycled', ## used by sequence only
'netmasklength=i' ## used by streaming_delta only
);

die $USAGE if ! keys %opt and ! @ARGV;
Expand Down Expand Up @@ -2498,6 +2499,9 @@ sub finishup {
## Check the replication delay in hot standby setup
check_hot_standby_delay() if $action eq 'hot_standby_delay';

# Check the delay between two standby servers (useful for cascading replication)
check_streaming_delta() if $action eq 'streaming_delta';

## Check the delay on replication slots. warning and critical are sizes
check_replication_slots() if $action eq 'replication_slots';

Expand Down Expand Up @@ -5483,6 +5487,55 @@ sub check_hot_standby_delay {

} ## end of check_hot_standby_delay


sub check_streaming_delta {
my ($critical, $warning) = ($opt{critical}, $opt{warning});

## Check on the delay in PITR replication between the WAL receieved
## and the WAL passed on to the cascading replicas
## if the subnet mask is passed in it will only check against servers
## that are in the same subnet as the postgres instance based on that
## subnet mask

$SQL = q{SELECT application_name, client_addr, pid,
sent_location, write_location, flush_location, replay_location,
CASE pg_is_in_recovery() WHEN true THEN pg_last_xlog_receive_location() ELSE pg_current_xlog_location() END AS master_location
FROM pg_stat_replication WHERE state != 'backup'};
if ($opt{netmasklength}) {
my $netmask_length = $opt{netmasklength};
$SQL .= " AND network(set_masklen(client_addr,$netmask_length)) = network(set_masklen(inet_server_addr(),$netmask_length))";
}
my $info = run_command($SQL);
my $perfdata = "";
for $db (@{$info->{db}}) {
for my $row (@{$db->{slurp}}) {
my ($a, $b) = split(/\//, $row->{'master_location'});
my $master_location = (hex('ff000000') * hex($a)) + hex($b);

for my $wal_type (qw/sent write flush replay/) {
($a, $b) = split(/\//, $row->{$wal_type . '_location'});
my $slave_position = (hex('ff000000') * hex($a)) + hex($b);

my $slave_lag = $master_location - $slave_position;

$db->{perf} .= "$row->{'client_addr'}_$wal_type=$slave_lag;$warning;$critical ";

if (length $critical and $slave_lag >= $critical) {
add_critical "CRITICAL for : $row->{'client_addr'} - $row->{'application_name'} - $wal_type";
}
elsif (length $warning and $slave_lag >= $warning) {
add_warning "WARNING for : $row->{'client_addr'} - $row->{'application_name'} - $wal_type";
}
}
}
add_ok "OK for : $row->{'client_addr'} - $row->{'application_name'}";
}

return;

} ## end of check_streaming_delta


sub check_replication_slots {

## Check the delay on one or more replication slots
Expand Down