Skip to content

Commit

Permalink
Implement zip backup. (#287)
Browse files Browse the repository at this point in the history
* Implement zip backup.

* Remove no backup variable from config.

* Create github action to auto check backup.

* Fix backup file name.

* Improve messages.

* Remove unused code.

* Check backup file size.

* Fix backup error on json dumps.

* Update slack channel.
  • Loading branch information
everaldorodrigo authored Oct 3, 2024
1 parent 382c3d0 commit 118bc00
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 12 deletions.
96 changes: 96 additions & 0 deletions .github/scripts/check_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
This script checks if a backup file for the current date exists in a specified S3 bucket.
If the backup file does not exist, a notification is sent to a Slack channel.
Expected file format in the S3 bucket:
- The file should be in the folder 'db_backup/' with the following naming pattern:
'dde_backup_YYYYMMDD.zip', where YYYYMMDD corresponds to the current date.
Required Environment Variables:
- AWS_ACCESS_KEY_ID: The AWS access key ID to read the AWS s3 bucket.
- AWS_SECRET_ACCESS_KEY: The AWS secret access key to read the AWS s3 bucket.
- BACKUP_BUCKET_NAME: The name of the AWS S3 bucket where backups are stored.
- S3_FOLDER: The folder path within the S3 bucket where backups are stored (e.g., 'db_backup/').
- AWS_REGION: The AWS region where the S3 bucket is located.
- SLACK_CHANNEL: The Slack channel where notifications should be sent (e.g., '#observability-test').
- SLACK_WEBHOOK_URL: The Slack Webhook URL used to send the notification.
Functionality:
1. The script uses the AWS SDK (boto3) to check for the existence of the backup file in the specified S3 bucket.
2. If the file is found, it logs that no action is needed.
3. If the file is not found, it sends a notification to the configured Slack channel.
Dependencies:
- boto3: For interacting with AWS S3.
- requests: For sending HTTP POST requests to Slack.
"""

import boto3
import botocore
import os
import requests

from datetime import datetime


def send_slack_notification(message):

print(f" └─ {message}")

# Create the payload for Slack
slack_data = {
"channel": os.getenv("SLACK_CHANNEL"),
"username": "DDE",
"icon_emoji": ":thumbsdown:",
"text": message,
}

try:
print(" └─ Sending Slack notification.")
response = requests.post(os.getenv("SLACK_WEBHOOK_URL"), json=slack_data, timeout=10)
if response.status_code == 200:
print(" └─ Slack notification sent successfully.")
else:
print(f" └─ Failed to send message to Slack: {response.status_code}, {response.text}")
except requests.exceptions.Timeout as e:
print(" └─ Request timed out to Slack WebHook URL.")
raise e
except requests.exceptions.RequestException as e:
print(f" └─ Failed to send Slack notification. Error: {str(e)}")
raise e


def check_backup_file():

# Create the expected file name
today_date = datetime.today().strftime("%Y%m%d")
expected_file = f"{os.getenv('S3_FOLDER')}dde_backup_{today_date}.zip"

# Create the S3 client
s3_client = boto3.client("s3", region_name=os.getenv("AWS_REGION"))

# Try to fetch the file metadata
try:
response = s3_client.head_object(Bucket=os.getenv("BACKUP_BUCKET_NAME"), Key=expected_file)
print(f" └─ Backup file {expected_file} exists!")

# Get the file size in bytes
file_size = response['ContentLength']

# Check if the file is larger than 1MB
if file_size > 1048576: # 1MB in bytes
print(f" └─ Backup file is larger than 1MB! Size: {file_size} bytes.")
print(" └─ Nothing to do!")
else:
message = f":alert: The backup file {expected_file} is smaller than 1MB!"
send_slack_notification(message)

except botocore.exceptions.ClientError as e:
print(e)
message = f":alert: The backup file {expected_file} was NOT created today!"
send_slack_notification(message)


if __name__ == "__main__":
check_backup_file()
35 changes: 35 additions & 0 deletions .github/workflows/check_backup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Check S3 Backup and Notify Slack

on:
workflow_dispatch: # Allows manual trigger from GitHub Actions UI
schedule:
- cron: '0 13 * * *' # 5:00 AM PST (UTC-8)

jobs:
check-backup:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Install boto3 (AWS SDK for Python)
run: |
python -m pip install --upgrade pip
pip install boto3 requests
- name: Check if backup exists in S3
run: python .github/scripts/check_backup.py
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.AWS_REGION }}
BACKUP_BUCKET_NAME: "${{ secrets.BACKUP_BUCKET_NAME }}"
S3_FOLDER: "db_backup/"
SLACK_CHANNEL: "#cd2h"
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
62 changes: 50 additions & 12 deletions discovery/utils/backup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import logging
import zipfile
import io
from datetime import date, datetime

import boto3
Expand All @@ -18,15 +20,23 @@ def json_serial(obj):
raise TypeError("Type %s not serializable" % type(obj))


def _default_filename():
return "dde_backup_" + datetime.today().strftime("%Y%m%d") + ".json"
def _default_filename(extension=".json"):
return "dde_backup_" + datetime.today().strftime("%Y%m%d") + extension


def save_to_s3(data, filename=None, bucket="dde"):
filename = filename or _default_filename()
s3 = boto3.client("s3")
def save_to_s3(data, filename=None, bucket="dde", format="zip"):
filename = filename or _default_filename(f".{format}")
s3 = boto3.resource("s3")
obj_key = f"db_backup/{filename}"
s3.put_object(Bucket=bucket, Key=obj_key, Body=json.dumps(data, indent=2, default=json_serial))
if format == "zip":
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zfile:
json_data = json.dumps(data, indent=2, default=json_serial)
zfile.writestr(filename.replace(".zip", ".json"), json_data)
logging.info(f"Uploading {filename} to AWS S3")
s3.Bucket(bucket).upload_file(Filename=filename, Key=obj_key)
else:
logging.info(f"Uploading {filename} to AWS S3")
s3.Bucket(bucket).put_object(Key=obj_key, Body=json.dumps(data, indent=2))
return obj_key


Expand Down Expand Up @@ -76,7 +86,7 @@ def backup_schema_class(outfile=None):
return backup_es(SchemaClass, outfile=outfile)


def daily_backup_routine():
def daily_backup_routine(format="zip"):
logger = logging.getLogger("daily_backup")
data = {}
try:
Expand All @@ -94,10 +104,11 @@ def daily_backup_routine():
data.update(_d)

logger.info("Saving to S3 bucket...")
s3_obj = save_to_s3(data)
s3_obj = save_to_s3(data, format=format)
logger.info("Done. [%s]", s3_obj)
except Exception as exc:
logger.error(str(exc))
logger.error("Stack trace:", exc_info=True)


def backup_from_file(api):
Expand Down Expand Up @@ -143,11 +154,38 @@ def restore_from_s3(filename=None, bucket="dde"):
Key=filename
)

ddeapis = json.loads(obj['Body'].read())
filename = filename.replace("db_backup/", "")

if filename.endswith(".zip"):
file_content = obj["Body"].read()
with zipfile.ZipFile(io.BytesIO(file_content)) as zfile:
# Search for a JSON file inside the ZIP
json_file = next((f for f in zfile.namelist() if f.endswith(".json")), None)
if not json_file:
raise ValueError("No JSON file found inside the ZIP archive.")
with zfile.open(json_file) as json_data:
ddeapis = json.load(json_data)
elif filename.endswith(".json"):
ddeapis = json.loads(obj['Body'].read())
else:
raise Exception("Unsupported backup file type!")

backup_from_file(ddeapis)


def restore_from_file(filename=None):
with open(filename) as file:
ddeapis = json.load(file)
backup_from_file(ddeapis)
if filename.endswith(".zip"):
with zipfile.ZipFile(filename, 'r') as zfile:
# Search for a JSON file inside the ZIP
json_file = next((f for f in zfile.namelist() if f.endswith(".json")), None)
if not json_file:
raise ValueError("No JSON file found inside the ZIP archive.")
with zfile.open(json_file) as json_data:
ddeapis = json.load(json_data)
elif filename.endswith(".json"):
with open(filename) as file:
ddeapis = json.load(file)
else:
raise Exception("Unsupported backup file type!")

backup_from_file(ddeapis)

0 comments on commit 118bc00

Please sign in to comment.