From d4e359043dd13b6e1e4131777dcfdc0633149ac2 Mon Sep 17 00:00:00 2001 From: Sonu Kumar Meena Date: Tue, 1 Oct 2019 17:10:22 -0400 Subject: [PATCH 1/3] added support to create backup out of automated backups --- README.md | 1 + cftemplates/snapshots_tool_rds_source.json | 12 ++- lambda/snapshots_tool_utils.py | 94 +++++++++++++++++++- lambda/take_snapshots_rds/lambda_function.py | 19 ++-- 4 files changed, 119 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1b056c8..19ec943 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Here is a break down of each parameter for the source template: * **CodeBucket** - this parameter specifies the bucket where the code for the Lambda functions is located. Leave to DEFAULT_BUCKET to download from an AWS-managed bucket. The Lambda function code is located in the ```lambda``` directory. These files need to be on the **root* of the bucket or the CloudFormation templates will fail. * **DeleteOldSnapshots** - Set to TRUE to enable functionality that will delete snapshots after **RetentionDays**. Set to FALSE if you want to disable this functionality completely. (Associated Lambda and State Machine resources will not be created in the account). **WARNING** If you decide to enable this functionality later on, bear in mind it will delete **all snapshots**, older than **RetentionDays**, created by this tool; not just the ones created after **DeleteOldSnapshots** is set to TRUE. +* **UseAutomatedBackup** - Set to TRUE to enable copying from automated backups, instead of from live database instance. * **TaggedInstance** - Set to TRUE to enable functionality that will only take snapshots for RDS Instances with tag CopyDBSnapshot set to True. The settings in InstanceNamePattern and TaggedInstance both need to evaluate successfully for a snapshot to be created (logical AND). ### Destination Account diff --git a/cftemplates/snapshots_tool_rds_source.json b/cftemplates/snapshots_tool_rds_source.json index f13ea4f..4900ce5 100644 --- a/cftemplates/snapshots_tool_rds_source.json +++ b/cftemplates/snapshots_tool_rds_source.json @@ -57,6 +57,12 @@ "Description": "Set to TRUE to enable deletion of snapshot based on RetentionDays. Set to FALSE to disable", "AllowedValues": ["TRUE", "FALSE"] }, + "UseAutomatedBackup": { + "Type": "String", + "Default": "TRUE", + "Description": "Set to TRUE to create backups from automated backups by copying them first. Else set it to FALSE to create out of running instance", + "AllowedValues": ["TRUE", "FALSE"] + }, "TaggedInstance": { "Type": "String", "Default": "FALSE", @@ -298,7 +304,8 @@ "rds:DescribeDBSnapshots", "rds:ModifyDBSnapshotAttribute", "rds:DescribeDBSnapshotAttributes", - "rds:ListTagsForResource" + "rds:ListTagsForResource", + "rds:CopyDBSnapshot" ], "Resource": "*" }] @@ -341,6 +348,9 @@ }, "TAGGEDINSTANCE": { "Ref": "TaggedInstance" + }, + "USE_AUTOMATED_BACKUP": { + "Ref": "UseAutomatedBackup" } } }, diff --git a/lambda/snapshots_tool_utils.py b/lambda/snapshots_tool_utils.py index 5a797d9..e9077f0 100644 --- a/lambda/snapshots_tool_utils.py +++ b/lambda/snapshots_tool_utils.py @@ -13,7 +13,7 @@ # Support module for the Snapshot Tool for RDS import boto3 -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone import os import logging import re @@ -38,6 +38,8 @@ _SUPPORTED_ENGINES = [ 'mariadb', 'sqlserver-se', 'sqlserver-ee', 'sqlserver-ex', 'sqlserver-web', 'mysql', 'oracle-se', 'oracle-se1', 'oracle-se2', 'oracle-ee', 'postgres' ] +_AUTOMATED_BACKUP_LIST = [] + logger = logging.getLogger() logger.setLevel(_LOGLEVEL.upper()) @@ -362,3 +364,93 @@ def copy_remote(snapshot_identifier, snapshot_object): CopyTags = True) return response + + +def get_all_automated_snapshots(client): + global _AUTOMATED_BACKUP_LIST + if len(_AUTOMATED_BACKUP_LIST) == 0: + response = paginate_api_call( + client, 'describe_db_snapshots', 'DBSnapshots', SnapshotType='automated' + ) + _AUTOMATED_BACKUP_LIST = response['DBSnapshots'] + + return _AUTOMATED_BACKUP_LIST + + +def copy_or_create_db_snapshot( + client, + db_instance, + snapshot_identifier, + snapshot_tags, + use_automated_backup=True, + backup_interval=24, +): + + if use_automated_backup is False: + logger.info( + 'creating snapshot out of a running db instance: %s' + % db_instance['DBInstanceIdentifier'] + ) + snapshot_tags.append( + { + 'Key': 'DBInstanceIdentifier', + 'Value': db_instance['DBInstanceIdentifier'], + } + ) + return client.create_db_snapshot( + DBSnapshotIdentifier=snapshot_identifier, + DBInstanceIdentifier=db_instance['DBInstanceIdentifier'], + Tags=snapshot_tags, + ) + + # Find the latest automted backup and Copy snapshot out of it + all_automated_snapshots = get_all_automated_snapshots(client) + db_automated_snapshots = [x for x in all_automated_snapshots + if x['DBInstanceIdentifier'] == db_instance['DBInstanceIdentifier']] + + # Raise exception if no automated backup found + if len(db_automated_snapshots) <= 0: + log_message = ( + 'No automated snapshots found for db: %s' + % db_instance['DBInstanceIdentifier'] + ) + logger.error(log_message) + raise SnapshotToolException(log_message) + + # filter last automated backup + db_automated_snapshots.sort(key=lambda x: x['SnapshotCreateTime']) + latest_snapshot = db_automated_snapshots[-1] + + # Make sure automated backup is not more than backup_interval window old + backup_age = datetime.now(timezone.utc) - latest_snapshot['SnapshotCreateTime'] + if backup_age.total_seconds() >= (backup_interval * 60 * 60): + now = datetime.now() + log_message = ( + 'Last automated backup was %s minutes ago. No latest automated backup available. ' + % ((now - backup_age).total_seconds() / 60) + ) + logger.warn(log_message) + + # If last automated backup is over 2*backup_interval, then raise error + if backup_age.total_seconds() >= (backup_interval * 2 * 60 * 60): + logger.error(log_message) + raise SnapshotToolException(log_message) + + logger.info( + 'Creating snapshot out of an automated backup: %s' + % latest_snapshot['DBSnapshotIdentifier'] + ) + snapshot_tags.append( + { + 'Key': 'SourceDBSnapshotIdentifier', + 'Value': latest_snapshot['DBSnapshotIdentifier'], + } + ) + + return client.copy_db_snapshot( + SourceDBSnapshotIdentifier=latest_snapshot['DBSnapshotIdentifier'], + TargetDBSnapshotIdentifier=snapshot_identifier, + Tags=snapshot_tags, + CopyTags=False, + ) + diff --git a/lambda/take_snapshots_rds/lambda_function.py b/lambda/take_snapshots_rds/lambda_function.py index 10f6a46..52c29de 100644 --- a/lambda/take_snapshots_rds/lambda_function.py +++ b/lambda/take_snapshots_rds/lambda_function.py @@ -27,6 +27,7 @@ BACKUP_INTERVAL = int(os.getenv('INTERVAL', '24')) PATTERN = os.getenv('PATTERN', 'ALL_INSTANCES') TAGGEDINSTANCE = os.getenv('TAGGEDINSTANCE', 'FALSE') +USE_AUTOMATED_BACKUP = os.getenv('USE_AUTOMATED_BACKUP', 'TRUE') if os.getenv('REGION_OVERRIDE', 'NO') != 'NO': REGION = os.getenv('REGION_OVERRIDE').strip() @@ -68,13 +69,21 @@ def lambda_handler(event, context): snapshot_identifier = '%s-%s' % ( db_instance['DBInstanceIdentifier'], timestamp_format) + + snapshot_tags = [ + {'Key': 'CreatedBy', 'Value': 'Snapshot Tool for RDS'}, + {'Key': 'CreatedOn', 'Value': timestamp_format}, + {'Key': 'shareAndCopy', 'Value': 'YES'}, + ] try: - response = client.create_db_snapshot( - DBSnapshotIdentifier=snapshot_identifier, - DBInstanceIdentifier=db_instance['DBInstanceIdentifier'], - Tags=[{'Key': 'CreatedBy', 'Value': 'Snapshot Tool for RDS'}, { - 'Key': 'CreatedOn', 'Value': timestamp_format}, {'Key': 'shareAndCopy', 'Value': 'YES'}] + response = copy_or_create_db_snapshot( + client, + db_instance, + snapshot_identifier, + snapshot_tags, + use_automated_backup=USE_AUTOMATED_BACKUP, + backup_interval = BACKUP_INTERVAL ) except Exception as e: pending_backups += 1 From fe156eb7df15b483f2149c3bd6fc571766d1a29b Mon Sep 17 00:00:00 2001 From: Sonu Kumar Meena Date: Wed, 2 Oct 2019 13:38:49 -0400 Subject: [PATCH 2/3] fixed loggroup deletion policy --- cftemplates/snapshots_tool_rds_dest.json | 19 +++++++------------ cftemplates/snapshots_tool_rds_source.json | 18 +++++++----------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/cftemplates/snapshots_tool_rds_dest.json b/cftemplates/snapshots_tool_rds_dest.json index 9d0fd96..7a88f87 100644 --- a/cftemplates/snapshots_tool_rds_dest.json +++ b/cftemplates/snapshots_tool_rds_dest.json @@ -56,11 +56,6 @@ "AllowedValues": ["TRUE", "FALSE"], "Default": "TRUE", "Description": "Enable copying snapshots across accounts. Set to FALSE if your source snapshosts are not on a different account" - }, - "LogGroupName": { - "Type": "String", - "Default": "lambdaDeleteOldSnapshotsRDS-dest", - "Description": "Name for RDS snapshot log group." } }, "Conditions": { @@ -598,15 +593,15 @@ }] } }, - "cwloggroupDeleteOldSnapshotsDestRDS":{ + "cwloggrouplambdaDeleteOldDestRDS":{ "Type": "AWS::Logs::LogGroup", - "Description": "Log group for the lambdaCopySnapshotsRDS function's logs", + "Description": "Log group for the lambdaDeleteOldDestRDS function's logs", "Condition": "DeleteOld", "DependsOn": "lambdaDeleteOldDestRDS", "Properties": { - "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, - "LogGroupName": { - "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "LogGroupName" } } ] + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { + "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaDeleteOldDestRDS" } } ] } } }, @@ -615,8 +610,8 @@ "Description": "Log group for the lambdaCopySnapshotsRDS function's logs", "DependsOn": "lambdaCopySnapshotsRDS", "Properties": { - "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, - "LogGroupName": { + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaCopySnapshotsRDS" } } ] } } diff --git a/cftemplates/snapshots_tool_rds_source.json b/cftemplates/snapshots_tool_rds_source.json index 4900ce5..b21990d 100644 --- a/cftemplates/snapshots_tool_rds_source.json +++ b/cftemplates/snapshots_tool_rds_source.json @@ -68,11 +68,6 @@ "Default": "FALSE", "Description": "Set to TRUE to filter instances that have tag CopyDBSnapshot set to True. Set to FALSE to disable", "AllowedValues": ["TRUE", "FALSE"] - }, - "LogGroupName": { - "Type": "String", - "Default": "lambdaDeleteOldSnapshotsRDS-source", - "Description": "Name for RDS snapshot log group." } }, "Conditions": { @@ -721,8 +716,8 @@ "Description": "Log group for the lambdaTakeSnapshotsRDS function's logs", "DependsOn": "lambdaTakeSnapshotsRDS", "Properties": { - "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, - "LogGroupName": { + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaTakeSnapshotsRDS" } } ] } } @@ -733,8 +728,8 @@ "Description": "Log group for the lambdaShareSnapshotsRDS function's logs", "DependsOn": "lambdaShareSnapshotsRDS", "Properties": { - "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, - "LogGroupName": { + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaShareSnapshotsRDS" } } ] } } @@ -742,10 +737,11 @@ "cwloggrouplambdaDeleteOldSnapshotsRDS":{ "Type": "AWS::Logs::LogGroup", "Description": "Log group for the lambdaDeleteOldSnapshotsRDS function's logs", + "DependsOn": "lambdaDeleteOldSnapshotsRDS", "Properties": { "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, - "LogGroupName": { - "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "LogGroupName" } } ] + "LogGroupName": { + "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaDeleteOldSnapshotsRDS" } } ] } } } From 9b92a8f387467754d31510ffc309adb2ba138403 Mon Sep 17 00:00:00 2001 From: Sonu Kumar Meena Date: Wed, 2 Oct 2019 15:42:51 -0400 Subject: [PATCH 3/3] updated alarms period --- cftemplates/snapshots_tool_rds_dest.json | 2 +- cftemplates/snapshots_tool_rds_source.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cftemplates/snapshots_tool_rds_dest.json b/cftemplates/snapshots_tool_rds_dest.json index 7a88f87..5e2ed55 100644 --- a/cftemplates/snapshots_tool_rds_dest.json +++ b/cftemplates/snapshots_tool_rds_dest.json @@ -176,7 +176,7 @@ "EvaluationPeriods": "1", "MetricName": "ExecutionsFailed", "Namespace": "AWS/States", - "Period": "300", + "Period": "3600", "Statistic": "Sum", "Threshold": "1.0", "AlarmActions": [{ diff --git a/cftemplates/snapshots_tool_rds_source.json b/cftemplates/snapshots_tool_rds_source.json index b21990d..3b85311 100644 --- a/cftemplates/snapshots_tool_rds_source.json +++ b/cftemplates/snapshots_tool_rds_source.json @@ -198,7 +198,7 @@ "EvaluationPeriods": "1", "MetricName": "ExecutionsFailed", "Namespace": "AWS/States", - "Period": "300", + "Period": "3600", "Statistic": "Sum", "Threshold": "1.0", "AlarmActions": [{