Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read backup's last modification timestamp from metadata #1064

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ENVIRONMENT.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ In case of S3, `wal-e` is used for backups and `wal-g` for restore.
- **WALG_S3_CA_CERT_FILE**: (optional) TLS CA certificate for wal-g (see [wal-g configuration](https://github.com/wal-g/wal-g#configuration))
- **WALG_SSH_PREFIX**: (optional) the ssh prefix to store WAL backups at in the format ssh://host.example.com/path/to/backups/ See `Wal-g <https://github.com/wal-g/wal-g#configuration>`__ documentation for details.
- **WALG_LIBSODIUM_KEY**, **WALG_LIBSODIUM_KEY_PATH**, **WALG_LIBSODIUM_KEY_TRANSFORM**, **WALG_PGP_KEY**, **WALG_PGP_KEY_PATH**, **WALG_PGP_KEY_PASSPHRASE** (optional) wal-g encryption properties (see [wal-g encryption](https://github.com/wal-g/wal-g#encryption))
- **WALG_READ_BACKUP_METADATA**: (optional) Whether ``wal-g`` should use the backup file metadata rather than the storage info when retrieving its last modification timestamp (essentially adding the `--detail` flag to the `backup-list command <https://github.com/wal-g/wal-g/tree/master/docs#backup-list>`__). (Boolean, ``false`` by default)
- **http_proxy**, **https_proxy**, **no_proxy** (optional) HTTP(S) proxy configuration for `wal-g` to access S3. While http_proxy and https_proxy take a proxy URL, no_proxy takes a comma separated list of exceptions. Both are following a de-facto standard, see the [`wget`](https://www.gnu.org/software/wget/manual/html_node/Proxies.html) documentation.
- **AWS_ROLE_ARN**, **AWS_WEB_IDENTITY_TOKEN_FILE**, **AWS_STS_REGIONAL_ENDPOINTS** (optional) `AWS EKS IRSA <https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html>`__ auth configuration for `wal-g` to access S3. Usually these variables are automatically set by the AWS EKS. Only `wal-g` supports AWS EKS IRSA feature.

Expand Down
12 changes: 10 additions & 2 deletions postgres-appliance/bootstrap/clone_with_wale.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ def build_wale_command(command, datadir=None, backup=None):
if datadir is None or backup is None:
raise Exception("backup-fetch requires datadir and backup arguments")
cmd.extend([datadir, backup])
elif command != 'backup-list':
elif command == 'backup-list':
if can_read_walg_metadata():
cmd.extend(['--detail'])
else:
raise Exception("invalid {0} command {1}".format(cmd[0], command))
return cmd

Expand All @@ -59,7 +62,8 @@ def fix_output(output):
if not started:
started = re.match(r'^(backup_)?name\s+(last_)?modified\s+', line)
if started:
line = line.replace(' modified ', ' last_modified ')
column = ' finish_time ' if can_read_walg_metadata() else ' modified '
line = line.replace(column, ' last_modified ')
if started:
yield '\t'.join(line.split())

Expand Down Expand Up @@ -139,6 +143,10 @@ def get_wale_environments(env):
yield name, orig_value


def can_read_walg_metadata():
return os.getenv('USE_WALG_RESTORE') == 'true' and os.getenv('WALG_READ_BACKUP_METADATA') == 'true'


def find_backup(recovery_target_time, env):
old_value = None
for name, value in get_wale_environments(env):
Expand Down
5 changes: 4 additions & 1 deletion postgres-appliance/scripts/configure_spilo.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,7 @@ def set_walg_placeholders(placeholders, prefix=''):
for name in ('USE_WALG_BACKUP', 'USE_WALG_RESTORE'):
value = str(placeholders.get(prefix + name, placeholders[prefix + 'USE_WALG'])).lower()
placeholders[prefix + name] = 'true' if value == 'true' and walg_supported else None
placeholders.setdefault(prefix + 'WALG_READ_BACKUP_METADATA', placeholders.get('WALG_READ_BACKUP_METADATA'))


def get_listen_ip():
Expand Down Expand Up @@ -651,6 +652,8 @@ def get_placeholders(provider):
placeholders.setdefault('USE_WALG_BACKUP', 'true')
if all(placeholders.get(n) for n in WALG_SSH_NAMES):
placeholders.setdefault('USE_WALG_BACKUP', 'true')
if os.environ.get('WALG_READ_BACKUP_METADATA') == 'true':
placeholders.setdefault('WALG_READ_BACKUP_METADATA', 'true')
set_walg_placeholders(placeholders)

placeholders['USE_WALE'] = any(placeholders.get(n) for n in AUTO_ENABLE_WALG_RESTORE +
Expand Down Expand Up @@ -826,7 +829,7 @@ def write_wale_environment(placeholders, prefix, overwrite):
'USE_WALG_RESTORE', 'WALG_BACKUP_COMPRESSION_METHOD', 'WALG_BACKUP_FROM_REPLICA',
'WALG_SENTINEL_USER_DATA', 'WALG_PREVENT_WAL_OVERWRITE', 'WALG_S3_CA_CERT_FILE',
'WALG_LIBSODIUM_KEY', 'WALG_LIBSODIUM_KEY_PATH', 'WALG_LIBSODIUM_KEY_TRANSFORM',
'WALG_PGP_KEY', 'WALG_PGP_KEY_PATH', 'WALG_PGP_KEY_PASSPHRASE',
'WALG_PGP_KEY', 'WALG_PGP_KEY_PATH', 'WALG_PGP_KEY_PASSPHRASE', 'WALG_READ_BACKUP_METADATA',
'no_proxy', 'http_proxy', 'https_proxy']

wale = defaultdict(lambda: '')
Expand Down
13 changes: 11 additions & 2 deletions postgres-appliance/scripts/postgres_backup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ function log
echo "$(date "+%Y-%m-%d %H:%M:%S.%3N") - $0 - $*"
}

function get_last_modified_timestamps
{
if [[ "$USE_WALG_BACKUP" == "true" && "$WALG_READ_BACKUP_METADATA" == "true" ]]; then
$WAL_E backup-list --detail 2> /dev/null | sed '0,/^\(backup_\)\?name\s*\(last_\)\?modified\s*/d' | awk '{print $1, $6;}'
else
$WAL_E backup-list 2> /dev/null | sed '0,/^\(backup_\)\?name\s*\(last_\)\?modified\s*/d'
fi
}

[[ -z $1 ]] && echo "Usage: $0 PGDATA" && exit 1

log "I was called as: $0 $*"
Expand Down Expand Up @@ -44,7 +53,7 @@ LEFT=0

NOW=$(date +%s -u)
readonly NOW
while read -r name last_modified rest; do
while read -r name last_modified; do
last_modified=$(date +%s -ud "$last_modified")
if [ $(((NOW-last_modified)/86400)) -ge $DAYS_TO_RETAIN ]; then
if [ -z "$BEFORE" ] || [ "$last_modified" -gt "$BEFORE_TIME" ]; then
Expand All @@ -55,7 +64,7 @@ while read -r name last_modified rest; do
# count how many backups will remain after we remove everything up to certain date
((LEFT=LEFT+1))
fi
done < <($WAL_E backup-list 2> /dev/null | sed '0,/^\(backup_\)\?name\s*\(last_\)\?modified\s*/d')
done < <(get_last_modified_timestamps)

# we want keep at least N backups even if the number of days exceeded
if [ -n "$BEFORE" ] && [ $LEFT -ge $DAYS_TO_RETAIN ]; then
Expand Down
19 changes: 17 additions & 2 deletions postgres-appliance/scripts/wale_restore.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
#!/bin/bash

function get_wal_segment_backup_start() {
local detail
local timestamp_column
if [[ "$USE_WALG_BACKUP" == "true" && "$WALG_READ_BACKUP_METADATA" == "true" ]]; then
detail="--detail"
timestamp_column=6
else
detail=""
timestamp_column=2
fi

$WAL_E backup-list $detail 2> /dev/null \
| sed '0,/^\(backup_\)\?name\s*\(last_\)\?modified\s*/d' \
| sort -bk$timestamp_column | tail -n1 | awk '{print $3;}' | sed 's/_.*$//'
}

RETRIES=2
THRESHOLD_PERCENTAGE=30
THRESHOLD_MEGABYTES=10240
Expand Down Expand Up @@ -42,8 +58,7 @@ fi
ATTEMPT=0
server_version="-1"
while true; do
[[ -z $wal_segment_backup_start ]] && wal_segment_backup_start=$($WAL_E backup-list 2> /dev/null \
| sed '0,/^\(backup_\)\?name\s*\(last_\)\?modified\s*/d' | sort -bk2 | tail -n1 | awk '{print $3;}' | sed 's/_.*$//')
[[ -z $wal_segment_backup_start ]] && wal_segment_backup_start=$(get_wal_segment_backup_start)

[[ -n "$CONNSTR" && $server_version == "-1" ]] && server_version=$(psql -d "$CONNSTR" -tAc 'show server_version_num' 2> /dev/null || echo "-1")

Expand Down
42 changes: 42 additions & 0 deletions postgres-appliance/tests/test_spilo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,30 @@ function start_clone_with_hourly_log_rotation() {
-d "spilo3"
}

function start_walg_metadata_container() {
docker-compose run \
-e SCOPE=metadata \
-e PGVERSION=16 \
-e USE_WALG=true \
-e WALG_READ_BACKUP_METADATA=true \
--name "${PREFIX}metadata" \
-d "spilo3"
}

function start_clone_with_walg_metadata_container() {
docker-compose run \
-e SCOPE=metadata-clone \
-e PGVERSION=16 \
-e CLONE_SCOPE=metadata \
-e CLONE_PGVERSION=16 \
-e CLONE_METHOD=CLONE_WITH_WALE \
-e CLONE_TARGET_TIME="$(next_minute)" \
-e USE_WALG=true \
-e WALG_READ_BACKUP_METADATA=true \
--name "${PREFIX}metadata-clone" \
-d "spilo3"
}

function verify_clone_upgrade() {
local type=$2
local from_version=$3
Expand Down Expand Up @@ -325,6 +349,19 @@ function test_spilo() {
log_info "[TS3] Started $clone17_container for testing point-in-time recovery (clone with wal-e) with unreachable target on 14+"


# TEST SUITE 4
local metadata_container
metadata_container=$(start_walg_metadata_container)
log_info "[TS4] Started $metadata_container for backup with wal-g metadata"
find_leader "$metadata_container"
run_test verify_archive_mode_is_on "$metadata_container"
wait_backup "$metadata_container"

local clone_metadata_container
clone_metadata_container=$(start_clone_with_walg_metadata_container)
log_info "[TS4] Started $clone_metadata_container for testing point-in-time recovery (clone with wal-g) with support for reading metadata"


# TEST SUITE 1
log_info "[TS1] Testing in-place major upgrade 14->15"
run_test test_successful_inplace_upgrade_to_15 "$container"
Expand All @@ -337,6 +374,11 @@ function test_spilo() {
run_test verify_archive_mode_is_on "$clone17_container"


# TEST SUITE 4
find_leader "$clone_metadata_container"
run_test verify_archive_mode_is_on "$clone_metadata_container"


# TEST SUITE 1
wait_backup "$container"

Expand Down