From 20d668a58a8c622d56c52d2022be9ef7d4d59781 Mon Sep 17 00:00:00 2001 From: MCatherine Date: Tue, 10 Sep 2024 15:05:06 -0700 Subject: [PATCH] feat: #638 Add cronjob config for database restore (#674) * test(638): trigger dev deployment * fix: try to not stop the pod after cron job * fix(638): added db restore cronjob , refs: #638 * fix(638): fix yml format error, refs: #638 * fix(638): fix yml format error, refs: #638 * fix(638): add placehoder for customize env variables, refs: #638 * fix: add a deployment config for testing * another fix to try * another fix to try * update the yaml file * fix and try * fix(638): fix database backup cron job, refs: #638 * feat(638): add comment to restore yaml, refs: #638 * fix(638): fix db backup schedule back to original time, refs: #638 * fix(638): add more comment to the db restore yaml, refs: #638 * test restore config * fix: switch back to set overwrite to be false * fix: switch back to set overwrite to be false * add restore cron job logic * update restore command * fix: adjust restore command * fix: update variables in restore command * rename variable for restore script * fix(638): add error handling logic in restore script, refs: #638 * fix(638): add error handling when psql command fail in restore script, refs: #638 * fix(638): set backofflimit to 0 for restore cronjob, refs: #638 * Adjust restore procedure. * Fix script. * fix script * fix space * drop "--set ON_ERROR_STOP=on" --------- Co-authored-by: Ian Liu Co-authored-by: Ian Liu <81595625+ianliuwk1019@users.noreply.github.com> --- .github/workflows/merge-demo.yml | 3 +- .github/workflows/merge-main.yml | 6 +- .github/workflows/pr-open.yml | 1 + db/openshift.deploy.yml | 124 +++++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 3 deletions(-) diff --git a/.github/workflows/merge-demo.yml b/.github/workflows/merge-demo.yml index 380181add..8c45a482a 100644 --- a/.github/workflows/merge-demo.yml +++ b/.github/workflows/merge-demo.yml @@ -55,7 +55,7 @@ jobs: file: api/openshift.deploy.yml overwrite: true parameters: - -p OC_NAMESPACE=a4b31c-test + -p OC_NAMESPACE=${{ vars.OC_NAMESPACE }} -p URL=fom-demo.apps.silver.devops.gov.bc.ca -p FOM_EMAIL_NOTIFY=FLNR.AdminServicesCariboo@gov.bc.ca -p DB_TESTDATA=true @@ -69,6 +69,7 @@ jobs: - name: db file: db/openshift.deploy.yml overwrite: false + parameters: -p OC_NAMESPACE=${{ vars.OC_NAMESPACE }} - name: init file: libs/openshift.init.yml overwrite: false diff --git a/.github/workflows/merge-main.yml b/.github/workflows/merge-main.yml index 3c73af287..d339ea252 100644 --- a/.github/workflows/merge-main.yml +++ b/.github/workflows/merge-main.yml @@ -32,7 +32,7 @@ jobs: oc_version: "4.13" overwrite: true parameters: - -p OC_NAMESPACE=a4b31c-test + -p OC_NAMESPACE=${{ vars.OC_NAMESPACE }} -p URL=fom-test.nrs.gov.bc.ca -p FOM_EMAIL_NOTIFY=FLNR.AdminServicesCariboo@gov.bc.ca -p DB_TESTDATA=true @@ -45,6 +45,7 @@ jobs: - name: db file: db/openshift.deploy.yml overwrite: false + parameters: -p OC_NAMESPACE=${{ vars.OC_NAMESPACE }} - name: init file: libs/openshift.init.yml overwrite: false @@ -84,7 +85,7 @@ jobs: oc_version: "4.13" overwrite: true parameters: - -p OC_NAMESPACE=a4b31c-prod + -p OC_NAMESPACE=${{ vars.OC_NAMESPACE }} -p URL=fom.nrs.gov.bc.ca -p AWS_USER_POOLS_WEB_CLIENT_ID="4bu2n8at3m32a2fqnvd4t06la1" -p LOGOUT_CHAIN_URL="https://logon7.gov.bc.ca/clp-cgi/logoff.cgi?retnow=1&returl=https://loginproxy.gov.bc.ca/auth/realms/standard/protocol/openid-connect/logout?redirect_uri=" @@ -97,6 +98,7 @@ jobs: - name: db file: db/openshift.deploy.yml overwrite: false + parameters: -p OC_NAMESPACE=${{ vars.OC_NAMESPACE }} - name: init file: libs/openshift.init.yml overwrite: false diff --git a/.github/workflows/pr-open.yml b/.github/workflows/pr-open.yml index d49b156d1..5b4f90e87 100644 --- a/.github/workflows/pr-open.yml +++ b/.github/workflows/pr-open.yml @@ -107,6 +107,7 @@ jobs: - name: db file: db/openshift.deploy.yml overwrite: false + parameters: -p OC_NAMESPACE=${{ vars.OC_NAMESPACE }} triggers: ('db/' 'libs/' 'api/' 'admin/' 'public/') - name: public file: public/openshift.deploy.yml diff --git a/db/openshift.deploy.yml b/db/openshift.deploy.yml index 6dccd8ca8..d24bc51ae 100644 --- a/db/openshift.deploy.yml +++ b/db/openshift.deploy.yml @@ -94,6 +94,9 @@ parameters: description: Random number, 0-60, for scheduling cronjobs from: "[0-5]{1}[0-9]{1}" generate: expression + - name: OC_NAMESPACE + description: OpenShift namespace containing imported images + required: true objects: - kind: PersistentVolumeClaim apiVersion: v1 @@ -326,3 +329,124 @@ objects: dnsPolicy: "ClusterFirst" serviceAccountName: "${JOB_SERVICE_ACCOUNT}" serviceAccount: "${JOB_SERVICE_ACCOUNT}" + - kind: CronJob + apiVersion: "batch/v1" + metadata: + name: ${NAME}-${ZONE}-${COMPONENT}-restore + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + schedule: "0 0 31 2 *" # At 00:00 on day-of-month 31 in February, so it won't run, we'll trigger it manually when need + concurrencyPolicy: "Replace" + successfulJobsHistoryLimit: ${{SUCCESS_JOBS_HISTORY_LIMIT}} + failedJobsHistoryLimit: ${{FAILED_JOBS_HISTORY_LIMIT}} + jobTemplate: + metadata: + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + backoffLimit: 0 + template: + metadata: + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + containers: + - name: ${NAME}-${ZONE}-${COMPONENT}-restore + # use the same image as our database, so we can run the psql command + image: image-registry.apps.silver.devops.gov.bc.ca/${OC_NAMESPACE}/${NAME}-${ZONE}-${COMPONENT}:${ZONE}-db + command: ["/bin/sh", "-c"] + args: + - | + sql_file=$(find backups/${BACKUP_FILE_DIR} -type f -name "*.sql" -print0) + if [ -z $sql_file ]; then + sql_zip_file=$(find backups/${BACKUP_FILE_DIR} -type f -name "*.sql.gz" -print0) + if [ -z $sql_zip_file ]; then + echo "No zipped backup file found" + else + echo "Found zipped backup file: $sql_zip_file" + gzip -dk $sql_zip_file + fi + fi + sql_file=$(find backups/${BACKUP_FILE_DIR} -type f -name "*.sql" -print0) + if [ -z $sql_file ]; then + echo "Error: No backup SQL file is found" + exit 1 + + else + echo "Found SQL file, rename existing database." + psql -h ${NAME}-${ZONE}-${COMPONENT} -U ${POSTGRES_USER} -c "DROP DATABASE IF EXISTS ${OLD_FOM_DATABASE_NAME};" -c "ALTER DATABASE fom RENAME TO ${OLD_FOM_DATABASE_NAME};" + if [ $? -ne 0 ]; then + echo "Error: psql failed while trying to rename existing database." + exit 1 + fi + + echo "Create a new empty database" + psql -h ${NAME}-${ZONE}-${COMPONENT} -U ${POSTGRES_USER} -c "CREATE DATABASE fom;" + db_restore_cmd_status=$? + if [ $db_restore_cmd_status -eq 0 ]; then + echo "Running database restore with backup SQL file: $sql_file" + psql -h ${NAME}-${ZONE}-${COMPONENT} -U ${POSTGRES_USER} -d ${POSTGRES_DB} -f $sql_file + db_restore_cmd_status=$? + fi + + if [ $db_restore_cmd_status -ne 0 ]; then + echo "DB restore failed, droping newly crated empty db fom (if exists), rename old(original) db back to fom." + psql -h ${NAME}-${ZONE}-${COMPONENT} -U ${POSTGRES_USER} -c "DROP DATABASE IF EXISTS fom;" -c "ALTER DATABASE ${OLD_FOM_DATABASE_NAME} RENAME TO fom;" + db_revert_restore_cmd_status=$? + if [ $db_revert_restore_cmd_status -eq 0 ]; then + # Successfully revert previous db restore procedure's failure; exit with failed status code from previous failure. + echo "psql successfully reverted current failed db restore procedure. DB fom is back to original state. However, the restore procedure failed." + exit $db_restore_cmd_status + else + echo "Error: psql failed to attmpt reverting failed db restore procedure. Please manually rename ${OLD_FOM_DATABASE_NAME} back to fom." + exit $db_revert_restore_cmd_status + fi + else + echo "Database restore successfully." + exit 0 + fi + fi + + volumeMounts: + - name: ${NAME}-${ZONE}-${COMPONENT}-restore + mountPath: "${BACKUP_DIR}" + env: + - name: BACKUP_DIR + value: "${BACKUP_DIR}" + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-${COMPONENT} + key: database-db + # this enviornment variable mmust be named PGPASSWORD, so postgres knows this is the password, and will not ask us to enter manually + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-${COMPONENT} + key: database-password + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-${COMPONENT} + key: database-user + # we don't need to declare the BACKUP_FILE_DIR variable if we set it manually, just put here so we won't forget + # BACKUP_FILE_DIR need to be set manually, in the format for example, daily/2024-08-16, where backup container puts the backup files + - name: BACKUP_FILE_DIR + value: "" + - name: OLD_FOM_DATABASE_NAME + value: old_fom + volumes: + - name: ${NAME}-${ZONE}-${COMPONENT}-restore + persistentVolumeClaim: + # use the same volume as database backup, so we can get the backup files + claimName: ${NAME}-${ZONE}-${BACKUP_COMPONENT} + restartPolicy: "Never" + terminationGracePeriodSeconds: 30 + activeDeadlineSeconds: 1600 + dnsPolicy: "ClusterFirst" + serviceAccountName: "${JOB_SERVICE_ACCOUNT}" + serviceAccount: "${JOB_SERVICE_ACCOUNT}" \ No newline at end of file