From fcb67bf293379017d156b685cf5dbbe6ee90df6f Mon Sep 17 00:00:00 2001 From: Sonu Kumar Meena Date: Tue, 1 Oct 2019 17:23:54 -0400 Subject: [PATCH 1/3] added support to create backup out of automated backups --- README.md | 1 + cftemplates/snapshots_tool_aurora_source.json | 15 ++- lambda/snapshots_tool_utils.py | 93 ++++++++++++++++++- .../take_snapshots_aurora/lambda_function.py | 19 +++- 4 files changed, 121 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index c021801..a6e47ab 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Here is a break down of each parameter for the source template: * **CodeBucket** - this parameter specifies the bucket where the code for the Lambda functions is located. Leave to DEFAULT_BUCKET to download from an AWS-managed bucket. The Lambda function code is located in the ```lambda``` directory. These files need to be on the **root* of the bucket or the CloudFormation templates will fail. * **DeleteOldSnapshots** - Set to TRUE to enable functionanility that will delete snapshots after **RetentionDays**. Set to FALSE if you want to disable this functionality completely. (Associated Lambda and State Machine resources will not be created in the account). **WARNING** If you decide to enable this functionality later on, bear in mind it will delete **all snapshots**, older than **RetentionDays**, created by this tool; not just the ones created after **DeleteOldSnapshots** is set to TRUE. +* **UseAutomatedBackup** - Set to TRUE to enable copying from automated backups, instead of from live database instance. * **ShareSnapshots** - Set to TRUE to enable functionality that will share snapshots with **DestAccount**. Set to FALSE to completely disable sharing. (Associated Lambda and State Machine resources will not be created in the account.) ### Destination Account diff --git a/cftemplates/snapshots_tool_aurora_source.json b/cftemplates/snapshots_tool_aurora_source.json index d1a2e99..a05f345 100644 --- a/cftemplates/snapshots_tool_aurora_source.json +++ b/cftemplates/snapshots_tool_aurora_source.json @@ -52,6 +52,15 @@ "Description": "Set to TRUE to enable deletion of snapshot based on RetentionDays. Set to FALSE to disable", "AllowedValues": ["TRUE", "FALSE"] }, + "UseAutomatedBackup": { + "Type": "String", + "Default": "TRUE", + "Description": "Set to TRUE to create backups from automated backups by copying them first. Else set it to FALSE to create out of running instance", + "AllowedValues": [ + "TRUE", + "FALSE" + ] + }, "SNSTopic": { "Type": "String", "Default": "", @@ -347,7 +356,8 @@ "rds:DescribeDBClusterSnapshots", "rds:ModifyDBClusterSnapshotAttribute", "rds:DescribeDBClusterSnapshotAttributes", - "rds:ListTagsForResource" + "rds:ListTagsForResource", + "rds:CopyDBClusterSnapshot" ], "Resource": "*" }] @@ -387,6 +397,9 @@ }, "REGION_OVERRIDE": { "Ref": "SourceRegionOverride" + }, + "USE_AUTOMATED_BACKUP": { + "Ref": "UseAutomatedBackup" } } }, diff --git a/lambda/snapshots_tool_utils.py b/lambda/snapshots_tool_utils.py index df4d587..83c3f19 100644 --- a/lambda/snapshots_tool_utils.py +++ b/lambda/snapshots_tool_utils.py @@ -13,7 +13,7 @@ # Support module for the Snapshots Tool for Aurora import boto3 -from datetime import datetime +from datetime import datetime, timezone import time import os import logging @@ -41,6 +41,8 @@ _SUPPORTED_ENGINES = [ 'aurora', 'aurora-mysql', 'aurora-postgresql'] +_AUTOMATED_BACKUP_LIST = [] + logger = logging.getLogger() logger.setLevel(_LOGLEVEL.upper()) @@ -373,3 +375,92 @@ def search_tag_copied(response): return False + +def get_all_automated_snapshots(client): + global _AUTOMATED_BACKUP_LIST + if len(_AUTOMATED_BACKUP_LIST) == 0: + response = paginate_api_call( + client, + 'describe_db_cluster_snapshots', + 'DBClusterSnapshots', + SnapshotType='automated', + ) + _AUTOMATED_BACKUP_LIST = response['DBClusterSnapshots'] + + return _AUTOMATED_BACKUP_LIST + + +def copy_or_create_db_snapshot( + client, + db_cluster, + snapshot_identifier, + snapshot_tags, + use_automated_backup=True, + backup_interval=24, +): + + if use_automated_backup is False: + logger.info( + 'creating snapshot out of a running db cluster: %s' + % db_cluster['DBClusterIdentifier'] + ) + snapshot_tags.append( + {'Key': 'DBClusterIdentifier', 'Value': db_cluster['DBClusterIdentifier']} + ) + return client.create_db_cluster_snapshot( + DBClusterSnapshotIdentifier=snapshot_identifier, + DBClusterIdentifier=db_cluster['DBClusterIdentifier'], + Tags=snapshot_tags, + ) + + # Find the latest automted backup and Copy snapshot out of it + all_automated_snapshots = get_all_automated_snapshots(client) + dbcluster_automated_snapshots = [x for x in all_automated_snapshots + if x['DBClusterIdentifier'] == db_cluster['DBClusterIdentifier']] + + # Raise exception if no automated backup found + if len(dbcluster_automated_snapshots) <= 0: + log_message = ( + 'No automated snapshots found for db: %s' + % db_cluster['DBClusterIdentifier'] + ) + logger.error(log_message) + raise SnapshotToolException(log_message) + + # filter last automated backup + dbcluster_automated_snapshots.sort(key=lambda x: x['SnapshotCreateTime']) + latest_snapshot = dbcluster_automated_snapshots[-1] + + # Make sure automated backup is not more than backup_interval window old + backup_age = datetime.now(timezone.utc) - latest_snapshot['SnapshotCreateTime'] + if backup_age.total_seconds() >= (backup_interval * 60 * 60): + now = datetime.now() + log_message = ( + 'Last automated backup was %s minutes ago. No latest automated backup available. ' + % ((now - backup_age).total_seconds() / 60) + ) + logger.warn(log_message) + + # If last automated backup is over 2*backup_interval, then raise error + if backup_age.total_seconds() >= (backup_interval * 2 * 60 * 60): + logger.error(log_message) + raise SnapshotToolException(log_message) + + logger.info( + 'Creating snapshot out of an automated backup: %s' + % latest_snapshot['DBClusterSnapshotIdentifier'] + ) + snapshot_tags.append( + { + 'Key': 'SourceDBClusterSnapshotIdentifier', + 'Value': latest_snapshot['DBClusterSnapshotIdentifier'], + } + ) + return client.copy_db_cluster_snapshot( + SourceDBClusterSnapshotIdentifier=latest_snapshot[ + 'DBClusterSnapshotIdentifier' + ], + TargetDBClusterSnapshotIdentifier=snapshot_identifier, + Tags=snapshot_tags, + CopyTags=False, + ) diff --git a/lambda/take_snapshots_aurora/lambda_function.py b/lambda/take_snapshots_aurora/lambda_function.py index 077e6b6..1ba22de 100644 --- a/lambda/take_snapshots_aurora/lambda_function.py +++ b/lambda/take_snapshots_aurora/lambda_function.py @@ -25,6 +25,7 @@ LOGLEVEL = os.getenv('LOG_LEVEL').strip() BACKUP_INTERVAL = int(os.getenv('INTERVAL', '24')) PATTERN = os.getenv('PATTERN', 'ALL_CLUSTERS') +USE_AUTOMATED_BACKUP = os.getenv('USE_AUTOMATED_BACKUP', 'TRUE') if os.getenv('REGION_OVERRIDE', 'NO') != 'NO': REGION = os.getenv('REGION_OVERRIDE').strip() @@ -66,13 +67,21 @@ def lambda_handler(event, context): snapshot_identifier = '%s-%s' % ( db_cluster['DBClusterIdentifier'], timestamp_format) + + snapshot_tags = [ + {'Key': 'CreatedBy', 'Value': 'Snapshot Tool for Aurora'}, + {'Key': 'CreatedOn', 'Value': timestamp_format}, + {'Key': 'shareAndCopy', 'Value': 'YES'}, + ] try: - response = client.create_db_cluster_snapshot( - DBClusterSnapshotIdentifier=snapshot_identifier, - DBClusterIdentifier=db_cluster['DBClusterIdentifier'], - Tags=[{'Key': 'CreatedBy', 'Value': 'Snapshot Tool for Aurora'}, { - 'Key': 'CreatedOn', 'Value': timestamp_format}, {'Key': 'shareAndCopy', 'Value': 'YES'}] + response = copy_or_create_db_snapshot( + client, + db_cluster, + snapshot_identifier, + snapshot_tags, + use_automated_backup=USE_AUTOMATED_BACKUP, + backup_interval=BACKUP_INTERVAL, ) except Exception as e: logger.error(e) From fb3c7945823b1305f60755033e3cbb61db83ff6a Mon Sep 17 00:00:00 2001 From: Sonu Kumar Meena Date: Wed, 2 Oct 2019 13:43:36 -0400 Subject: [PATCH 2/3] fixed loggroup deletion policy --- cftemplates/snapshots_tool_aurora_dest.json | 28 +++++++++++++ cftemplates/snapshots_tool_aurora_source.json | 40 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/cftemplates/snapshots_tool_aurora_dest.json b/cftemplates/snapshots_tool_aurora_dest.json index 5235ba6..61f1125 100644 --- a/cftemplates/snapshots_tool_aurora_dest.json +++ b/cftemplates/snapshots_tool_aurora_dest.json @@ -25,6 +25,11 @@ "Default": "ERROR", "Description": "Log level for Lambda functions (DEBUG, INFO, WARN, ERROR, CRITICAL are valid values)." }, + "LambdaCWLogRetention": { + "Type": "Number", + "Default": "7", + "Description": "Number of days to retain logs from the lambda functions in CloudWatch Logs" + }, "SourceRegionOverride": { "Type": "String", "Default": "NO", @@ -643,6 +648,29 @@ } }] } + }, + "cwloggrouplambdaCopySnapshotsAurora":{ + "Type": "AWS::Logs::LogGroup", + "Description": "Log group for the lambdaCopySnapshotsAurora function's logs", + "DependsOn": "lambdaCopySnapshotsAurora", + "Properties": { + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { + "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaCopySnapshotsAurora" } } ] + } + } + }, + "cwloggrouplambdaDeleteOldDestAurora":{ + "Type": "AWS::Logs::LogGroup", + "Description": "Log group for the lambdaDeleteOldDestAurora function's logs", + "Condition": "DeleteOld", + "DependsOn": "lambdaDeleteOldDestAurora", + "Properties": { + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { + "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaDeleteOldDestAurora" } } ] + } + } } }, "Outputs": { diff --git a/cftemplates/snapshots_tool_aurora_source.json b/cftemplates/snapshots_tool_aurora_source.json index a05f345..a571aef 100644 --- a/cftemplates/snapshots_tool_aurora_source.json +++ b/cftemplates/snapshots_tool_aurora_source.json @@ -41,6 +41,11 @@ "Default": "ERROR", "Description": "Log level for Lambda functions (DEBUG, INFO, WARN, ERROR, CRITICAL are valid values)." }, + "LambdaCWLogRetention": { + "Type": "Number", + "Default": "7", + "Description": "Number of days to retain logs from the lambda functions in CloudWatch Logs" + }, "SourceRegionOverride": { "Type": "String", "Default": "NO", @@ -764,6 +769,41 @@ } }] } + }, + "cwloggrouplambdaTakeSnapshotsAurora":{ + "Type": "AWS::Logs::LogGroup", + "Description": "Log group for the lambdaTakeSnapshotsAurora function's logs", + "DependsOn": "lambdaTakeSnapshotsAurora", + "Properties": { + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { + "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaTakeSnapshotsAurora" } } ] + } + } + }, + "cwloggrouplambdaShareSnapshotsAurora":{ + "Type": "AWS::Logs::LogGroup", + "Description": "Log group for the lambdaShareSnapshotsAurora function's logs", + "Condition": "DeleteOld", + "DependsOn": "lambdaShareSnapshotsAurora", + "Properties": { + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { + "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaShareSnapshotsAurora" } } ] + } + } + }, + "cwloggrouplambdaDeleteOldSnapshotsAurora":{ + "Type": "AWS::Logs::LogGroup", + "Description": "Log group for the lambdaDeleteOldSnapshotsAurora function's logs", + "Condition": "DeleteOld", + "DependsOn": "lambdaDeleteOldSnapshotsAurora", + "Properties": { + "RetentionInDays": { "Ref": "LambdaCWLogRetention" }, + "LogGroupName": { + "Fn::Sub": [ "/aws/lambda/${func}", { "func": { "Ref" : "lambdaDeleteOldSnapshotsAurora" } } ] + } + } } }, "Outputs": { From fc340bd60f87639360213bf1b434130dd1623d91 Mon Sep 17 00:00:00 2001 From: Sonu Kumar Meena Date: Wed, 2 Oct 2019 15:43:07 -0400 Subject: [PATCH 3/3] updated alarms period --- cftemplates/snapshots_tool_aurora_dest.json | 2 +- cftemplates/snapshots_tool_aurora_source.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cftemplates/snapshots_tool_aurora_dest.json b/cftemplates/snapshots_tool_aurora_dest.json index 61f1125..060436b 100644 --- a/cftemplates/snapshots_tool_aurora_dest.json +++ b/cftemplates/snapshots_tool_aurora_dest.json @@ -194,7 +194,7 @@ "EvaluationPeriods": "1", "MetricName": "ExecutionsFailed", "Namespace": "AWS/States", - "Period": "300", + "Period": "3600", "Statistic": "Sum", "Threshold": "2.0", "TreatMissingData": "ignore", diff --git a/cftemplates/snapshots_tool_aurora_source.json b/cftemplates/snapshots_tool_aurora_source.json index a571aef..b2533f5 100644 --- a/cftemplates/snapshots_tool_aurora_source.json +++ b/cftemplates/snapshots_tool_aurora_source.json @@ -249,7 +249,7 @@ "Namespace": "AWS/States", "Period": "3600", "Statistic": "Sum", - "Threshold": "6.0", + "Threshold": "3.0", "AlarmActions": [{ "Fn::If": ["SNSTopicIsEmpty", { "Ref": "snsTopicSnapshotsAuroraTool"