Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CASMTRIAGE-7627 ignore non-csm failed backups #655

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions goss-testing/scripts/velero_backups_check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#

# Due to a velero bug, a backup is created anytime the backup schedule is created or updated.
# Backups should only occurs based upon the cron schedule and not when the schedule itself is created..
# Backups should only occurs based upon the cron schedule and not when the schedule itself is created..
# This bug can result in a backup being created before the service is fully running and thus the backup ends up PartiallyFailed.
# A cleanup is done for any PartiallyFailed backup that exists for backups that were created within 10 minutes of the schedule being created.

Expand All @@ -45,15 +45,15 @@ cleanup_velero_backups() {
echo "schedule_name: ${schedule_name}"

schedule_creation_date=$(velero schedule get "${schedule_name}" -o json | jq -r '.metadata.creationTimestamp')

echo "schedule_creation_date: ${schedule_creation_date}"

velero_partiallyfailed_backups=$(kubectl get backups -A -o json | jq -r ".items[] | select(.metadata.name | contains(\"${schedule_name}\")) | select(.status.phase == \"PartiallyFailed\") | .metadata.name")

echo "velero_partiallyfailed_backups: "
[[ -z $velero_partiallyfailed_backups ]] && echo "None" || echo "${velero_partiallyfailed_backups}"

# Check if any PartiallyFailed backups exist
# Check if any PartiallyFailed backups exist
if [[ ! -z ${velero_partiallyfailed_backups} ]]
then
for backup_name in ${velero_partiallyfailed_backups}
Expand All @@ -62,7 +62,7 @@ cleanup_velero_backups() {
backup_creation_date=$(kubectl get backups -A -o json | jq -re ".items[] | select (.metadata.name == \"${backup_name}\") | .metadata.creationTimestamp")
schedule_creation_date_sec=$(date -d "${schedule_creation_date}" +%s)
backup_creation_date_sec=$(date -d "${backup_creation_date}" +%s)

# Check if the PartiallyFailed backup occured within 10 minutes after the schedule was created. If so delete the backup.

time_from_schedule_creation_to_backup=$(( $backup_creation_date_sec - $schedule_creation_date_sec ))
Expand All @@ -86,11 +86,17 @@ cleanup_velero_backups() {

cleanup_velero_backups

kubectl get backups -A -o json | jq '.items[].status.phase' | grep "Failed"
if [[ $? -eq 0 ]];
then
# save the failed backups to a temp file
failed_backups_path=$(mktemp)
kubectl get backups -A -o json > "$failed_backups_path"
# some backups are not part of the CSM product and are not relevant to this test
# skip those and only check for "vault" entries
number_failed=$(jq '[.items[] | select(.status.phase == "PartiallyFailed") | select(.metadata.name | contains("vault"))] | length' < "$failed_backups_path")
# if the number of failed backups is not 0, print the failed backups and exit with a non-zero status
if [[ $number_failed -ne 0 ]];
then
echo "Investigate remaining Failed or PartiallyFailed backups: $(kubectl get backups -A -o json | jq -e '.items[] | select(.status.phase == "PartiallyFailed") | .metadata.name')"
echo "FAIL"; exit 1;
else
else
echo "PASS"; exit 0;
fi
Loading