Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Copy from automated backups #1

Merged
merged 5 commits into from
Mar 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Here is a break down of each parameter for the source template:

* **CodeBucket** - this parameter specifies the bucket where the code for the Lambda functions is located. The Lambda function code is located in the ```lambda``` directory in zip format. These files need to be on the **root* of the bucket or the CloudFormation templates will fail. Please follow the instructions to build source (earlier on this README file)
* **DeleteOldSnapshots** - Set to TRUE to enable functionality that will delete snapshots after **RetentionDays**. Set to FALSE if you want to disable this functionality completely. (Associated Lambda and State Machine resources will not be created in the account). **WARNING** If you decide to enable this functionality later on, bear in mind it will delete **all snapshots**, older than **RetentionDays**, created by this tool; not just the ones created after **DeleteOldSnapshots** is set to TRUE.
* **UseAutomatedBackup** - Set to TRUE to enable copying from automated backups, instead of from live database instance.
* **TaggedInstance** - Set to TRUE to enable functionality that will only take snapshots for RDS Instances with tag CopyDBSnapshot set to True. The settings in InstanceNamePattern and TaggedInstance both need to evaluate successfully for a snapshot to be created (logical AND).

### Destination Account
Expand Down
21 changes: 8 additions & 13 deletions cftemplates/snapshots_tool_rds_dest.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,6 @@
"AllowedValues": ["TRUE", "FALSE"],
"Default": "TRUE",
"Description": "Enable copying snapshots across accounts. Set to FALSE if your source snapshosts are not on a different account"
},
"LogGroupName": {
"Type": "String",
"Default": "lambdaDeleteOldSnapshotsRDS-dest",
"Description": "Name for RDS snapshot log group."
}
},
"Conditions": {
Expand Down Expand Up @@ -135,7 +130,7 @@
"EvaluationPeriods": "1",
"MetricName": "ExecutionsFailed",
"Namespace": "AWS/States",
"Period": "300",
"Period": "3600",
"Statistic": "Sum",
"Threshold": "1.0",
"AlarmActions": [{
Expand Down Expand Up @@ -541,15 +536,15 @@
}]
}
},
"cwloggroupDeleteOldSnapshotsDestRDS":{
"cwloggrouplambdaDeleteOldDestRDS":{
"Type": "AWS::Logs::LogGroup",
"Description": "Log group for the lambdaCopySnapshotsRDS function's logs",
"Description": "Log group for the lambdaDeleteOldDestRDS function's logs",
"Condition": "DeleteOld",
"DependsOn": "lambdaDeleteOldDestRDS",
"Properties": {
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "LogGroupName" } } ]
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaDeleteOldDestRDS" } } ]
}
}
},
Expand All @@ -558,8 +553,8 @@
"Description": "Log group for the lambdaCopySnapshotsRDS function's logs",
"DependsOn": "lambdaCopySnapshotsRDS",
"Properties": {
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaCopySnapshotsRDS" } } ]
}
}
Expand Down
36 changes: 25 additions & 11 deletions cftemplates/snapshots_tool_rds_source.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,22 @@
"Description": "Set to TRUE to enable deletion of snapshot based on RetentionDays. Set to FALSE to disable",
"AllowedValues": ["TRUE", "FALSE"]
},
"UseAutomatedBackup": {
"Type": "String",
"Default": "TRUE",
"Description": "Set to TRUE to create backups from automated backups by copying them first. Else set it to FALSE to create out of running instance",
"AllowedValues": ["TRUE", "FALSE"]
},
"TaggedInstance": {
"Type": "String",
"Default": "FALSE",
"Description": "Set to TRUE to filter instances that have tag CopyDBSnapshot set to True. Set to FALSE to disable",
"AllowedValues": ["TRUE", "FALSE"]
},
"LogGroupName": {
"KmsKeySource": {
"Type": "String",
"Default": "lambdaDeleteOldSnapshotsRDS-source",
"Description": "Name for RDS snapshot log group."
"Default": "None",
"Description": "Set to the ARN for the KMS key in the SOURCE region to re-encrypt encrypted snapshots. Leave None if you are not using encryption"
}
},
"Conditions": {
Expand Down Expand Up @@ -151,7 +157,7 @@
"EvaluationPeriods": "1",
"MetricName": "ExecutionsFailed",
"Namespace": "AWS/States",
"Period": "300",
"Period": "3600",
"Statistic": "Sum",
"Threshold": "1.0",
"AlarmActions": [{
Expand Down Expand Up @@ -253,7 +259,8 @@
"rds:ModifyDBSnapshotAttribute",
"rds:DescribeDBSnapshotAttributes",
"rds:ListTagsForResource",
"rds:AddTagsToResource"
"rds:AddTagsToResource",
"rds:CopyDBSnapshot"
],
"Resource": "*"
}]
Expand Down Expand Up @@ -290,6 +297,12 @@
},
"TAGGEDINSTANCE": {
"Ref": "TaggedInstance"
},
"USE_AUTOMATED_BACKUP": {
"Ref": "UseAutomatedBackup"
},
"KMS_KEY_SOURCE_REGION": {
"Ref": "KmsKeySource"
}
}
},
Expand Down Expand Up @@ -648,8 +661,8 @@
"Description": "Log group for the lambdaTakeSnapshotsRDS function's logs",
"DependsOn": "lambdaTakeSnapshotsRDS",
"Properties": {
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaTakeSnapshotsRDS" } } ]
}
}
Expand All @@ -660,19 +673,20 @@
"Description": "Log group for the lambdaShareSnapshotsRDS function's logs",
"DependsOn": "lambdaShareSnapshotsRDS",
"Properties": {
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaShareSnapshotsRDS" } } ]
}
}
},
"cwloggrouplambdaDeleteOldSnapshotsRDS":{
"Type": "AWS::Logs::LogGroup",
"Description": "Log group for the lambdaDeleteOldSnapshotsRDS function's logs",
"DependsOn": "lambdaDeleteOldSnapshotsRDS",
"Properties": {
"RetentionInDays": { "Ref": "LambdaCWLogRetention" },
"LogGroupName": {
"Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "LogGroupName" } } ]
"LogGroupName": {
"Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaDeleteOldSnapshotsRDS" } } ]
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions lambda/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
# Makefile for generating zip files for lambda functions and then copying them
# to S3 for deployment. This Makefile will NOT WORK unless you fill in the S3DEST
# and AWSARGS variables below. Once those parameters are established, simply type
# 'make' or 'gmake' (depending on your UNIX-like OS) and it will build.
# 'make' or 'gmake' (depending on your UNIX-like OS) and it will build.
#
# Behaviour:
# Behaviour:
# Creates a file named ._foo.whatever based on foo.whatever.Uploads foo.whatever to
# the S3 bucket. The ._ file is a hack to figure out whether the file has
# been modified since the last time we uploaded to s3.
Expand All @@ -20,7 +20,7 @@ S3DEST?=[YOUR BUCKET HERE]

# Set these if, for example, you use profiles on the AWS command line
# or if your 'aws' executable is in a weird place.
AWSARGS=--region [YOUR REGION] --profile [YOUR PROFILE, or 'default', or remove this]
AWSARGS=--region ${AWS_REGION} --profile ${AWS_PROFILE}
AWSCMD=aws
ZIPCMD=zip

Expand Down Expand Up @@ -48,4 +48,4 @@ clean:
# This rule is a BSD make style rule that says "to make foo.zip, call
# 'zip -jqr foo snapshot_tool_utils.py'"
%.zip: %
$(ZIPCMD) -jqr "$@" "$<" snapshots_tool_utils.py
$(ZIPCMD) -jqr "$@" "$<" snapshots_tool_utils.py
108 changes: 104 additions & 4 deletions lambda/snapshots_tool_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Support module for the Snapshot Tool for RDS

import boto3
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
import os
import logging
import re
Expand All @@ -38,6 +38,8 @@

_SUPPORTED_ENGINES = [ 'mariadb', 'sqlserver-se', 'sqlserver-ee', 'sqlserver-ex', 'sqlserver-web', 'mysql', 'oracle-se', 'oracle-se1', 'oracle-se2', 'oracle-ee', 'postgres' ]

_AUTOMATED_BACKUP_LIST = []


logger = logging.getLogger()
logger.setLevel(_LOGLEVEL.upper())
Expand Down Expand Up @@ -203,7 +205,7 @@ def get_own_snapshots_source(pattern, response, backup_interval=None):
filtered = {}

for snapshot in response['DBSnapshots']:

# No need to consider snapshots that are still in progress
if 'SnapshotCreateTime' not in snapshot:
continue
Expand Down Expand Up @@ -339,8 +341,6 @@ def copy_local(snapshot_identifier, snapshot_object):

return response



def copy_remote(snapshot_identifier, snapshot_object):
client = boto3.client('rds', region_name=_DESTINATION_REGION)

Expand All @@ -362,3 +362,103 @@ def copy_remote(snapshot_identifier, snapshot_object):
CopyTags = True)

return response


def get_all_automated_snapshots(client):
global _AUTOMATED_BACKUP_LIST
if len(_AUTOMATED_BACKUP_LIST) == 0:
response = paginate_api_call(
client, 'describe_db_snapshots', 'DBSnapshots', SnapshotType='automated'
)
_AUTOMATED_BACKUP_LIST = response['DBSnapshots']

return _AUTOMATED_BACKUP_LIST


def copy_or_create_db_snapshot(
client,
db_instance,
snapshot_identifier,
snapshot_tags,
use_automated_backup=True,
backup_interval=24,
):

if use_automated_backup is False:
logger.info(
'creating snapshot out of a running db instance: %s'
% db_instance['DBInstanceIdentifier']
)
snapshot_tags.append(
{
'Key': 'DBInstanceIdentifier',
'Value': db_instance['DBInstanceIdentifier'],
}
)
return client.create_db_snapshot(
DBSnapshotIdentifier=snapshot_identifier,
DBInstanceIdentifier=db_instance['DBInstanceIdentifier'],
Tags=snapshot_tags,
)

# Find the latest automted backup and Copy snapshot out of it
all_automated_snapshots = get_all_automated_snapshots(client)
db_automated_snapshots = [x for x in all_automated_snapshots
if x['DBInstanceIdentifier'] == db_instance['DBInstanceIdentifier']]

# Raise exception if no automated backup found
if len(db_automated_snapshots) <= 0:
log_message = (
'No automated snapshots found for db: %s'
% db_instance['DBInstanceIdentifier']
)
logger.error(log_message)
raise SnapshotToolException(log_message)

# filter last automated backup
db_automated_snapshots.sort(key=lambda x: x['SnapshotCreateTime'])
latest_snapshot = db_automated_snapshots[-1]

# Make sure automated backup is not more than backup_interval window old
backup_age = datetime.now(timezone.utc) - latest_snapshot['SnapshotCreateTime']
if backup_age.total_seconds() >= (backup_interval * 60 * 60):
now = datetime.now()
log_message = (
'Last automated backup was %s minutes ago. No latest automated backup available. '
% ((now - backup_age).total_seconds() / 60)
)
logger.warn(log_message)

# If last automated backup is over 2*backup_interval, then raise error
if backup_age.total_seconds() >= (backup_interval * 2 * 60 * 60):
logger.error(log_message)
raise SnapshotToolException(log_message)

logger.info(
'Creating snapshot out of an automated backup: %s'
% latest_snapshot['DBSnapshotIdentifier']
)
snapshot_tags.append(
{
'Key': 'SourceDBSnapshotIdentifier',
'Value': latest_snapshot['DBSnapshotIdentifier'],
}
)

if latest_snapshot['Encrypted']:
logger.info('Copying encrypted snapshot %s locally' % snapshot_identifier)
response = client.copy_db_snapshot(
SourceDBSnapshotIdentifier = latest_snapshot['DBSnapshotArn'],
TargetDBSnapshotIdentifier = snapshot_identifier,
KmsKeyId = _KMS_KEY_SOURCE_REGION,
Tags = snapshot_tags)

else:
logger.info('Copying snapshot %s locally' %snapshot_identifier)
response = client.copy_db_snapshot(
SourceDBSnapshotIdentifier = latest_snapshot['DBSnapshotArn'],
TargetDBSnapshotIdentifier = snapshot_identifier,
Tags = snapshot_tags)


return response
19 changes: 14 additions & 5 deletions lambda/take_snapshots_rds/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
BACKUP_INTERVAL = int(os.getenv('INTERVAL', '24'))
PATTERN = os.getenv('PATTERN', 'ALL_INSTANCES')
TAGGEDINSTANCE = os.getenv('TAGGEDINSTANCE', 'FALSE')
USE_AUTOMATED_BACKUP = os.getenv('USE_AUTOMATED_BACKUP', 'TRUE')

if os.getenv('REGION_OVERRIDE', 'NO') != 'NO':
REGION = os.getenv('REGION_OVERRIDE').strip()
Expand Down Expand Up @@ -68,13 +69,21 @@ def lambda_handler(event, context):

snapshot_identifier = '%s-%s' % (
db_instance['DBInstanceIdentifier'], timestamp_format)

snapshot_tags = [
{'Key': 'CreatedBy', 'Value': 'Snapshot Tool for RDS'},
{'Key': 'CreatedOn', 'Value': timestamp_format},
{'Key': 'shareAndCopy', 'Value': 'YES'},
]

try:
response = client.create_db_snapshot(
DBSnapshotIdentifier=snapshot_identifier,
DBInstanceIdentifier=db_instance['DBInstanceIdentifier'],
Tags=[{'Key': 'CreatedBy', 'Value': 'Snapshot Tool for RDS'}, {
'Key': 'CreatedOn', 'Value': timestamp_format}, {'Key': 'shareAndCopy', 'Value': 'YES'}]
response = copy_or_create_db_snapshot(
client,
db_instance,
snapshot_identifier,
snapshot_tags,
use_automated_backup=USE_AUTOMATED_BACKUP,
backup_interval = BACKUP_INTERVAL
)
except Exception as e:
pending_backups += 1
Expand Down