|
| 1 | +""" |
| 2 | +Script to drop tables from an RDS MySQL database while handling foreign key dependencies. |
| 3 | +
|
| 4 | +Usage: |
| 5 | + python drop_dop_tables.py --db-host=my-db-host --db-name=my-db |
| 6 | +
|
| 7 | +Arguments: |
| 8 | + --db-host The RDS database host. |
| 9 | + --db-name The database name. |
| 10 | + --dry-run Enable dry run mode (no actual changes). |
| 11 | +
|
| 12 | +Environment Variables: |
| 13 | + DB_USERNAME The RDS database username (set via environment variable). |
| 14 | + DB_PASSWORD The RDS database password (set via environment variable). |
| 15 | +
|
| 16 | +Functionality: |
| 17 | + - Drops specific tables only if they have had no activity in the last 12 months. |
| 18 | + - Handles foreign key constraints before dropping dependent tables. |
| 19 | + - Ensures safe execution using retries for AWS service interactions. |
| 20 | +
|
| 21 | +Example: |
| 22 | + export DB_USERNAME=admin |
| 23 | + export DB_PASSWORD=securepass |
| 24 | + python drop_dop_tables.py --db-host=mydb.amazonaws.com --db-name=mydatabase --dry-run |
| 25 | +""" |
| 26 | + |
| 27 | +import boto3 |
| 28 | +import click |
| 29 | +import backoff |
| 30 | +from botocore.exceptions import ClientError |
| 31 | +import pymysql |
| 32 | +import logging |
| 33 | +from datetime import datetime, timedelta |
| 34 | + |
| 35 | + |
| 36 | +MAX_TRIES = 5 |
| 37 | + |
| 38 | +TABLES_TO_DROP = [ |
| 39 | + "oauth2_provider_trustedclient", # FK reference to oauth2_client |
| 40 | + "third_party_auth_providerapipermissions", # FK reference to oauth2_client |
| 41 | + "oauth2_client", |
| 42 | + "oauth2_grant", |
| 43 | + "oauth2_accesstoken", |
| 44 | + "oauth2_refreshtoken", |
| 45 | + "oauth_provider_consumer", |
| 46 | + "oauth_provider_nonce", |
| 47 | + "oauth_provider_scope", |
| 48 | + "oauth_provider_token", |
| 49 | +] |
| 50 | +FK_DEPENDENCIES = { |
| 51 | + "third_party_auth_providerapipermissions": "oauth2_client", |
| 52 | + "oauth2_provider_trustedclient": "oauth2_client", |
| 53 | +} |
| 54 | + |
| 55 | +# Configure logging |
| 56 | +LOGGER = logging.getLogger(__name__) |
| 57 | +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| 58 | + |
| 59 | + |
| 60 | + |
| 61 | +class EC2BotoWrapper: |
| 62 | + def __init__(self): |
| 63 | + self.client = boto3.client("ec2") |
| 64 | + |
| 65 | + @backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES) |
| 66 | + def describe_regions(self): |
| 67 | + return self.client.describe_regions() |
| 68 | + |
| 69 | + |
| 70 | +class RDSBotoWrapper: |
| 71 | + def __init__(self, **kwargs): |
| 72 | + self.client = boto3.client("rds", **kwargs) |
| 73 | + |
| 74 | + @backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES) |
| 75 | + def describe_db_instances(self): |
| 76 | + return self.client.describe_db_instances() |
| 77 | + |
| 78 | + |
| 79 | +def connect_to_db(db_host, db_user, db_password, db_name): |
| 80 | + """ Establish a connection to the RDS MySQL database """ |
| 81 | + logging.info("Connecting to the database...") |
| 82 | + return pymysql.connect( |
| 83 | + host=db_host, |
| 84 | + user=db_user, |
| 85 | + password=db_password, |
| 86 | + database=db_name, |
| 87 | + cursorclass=pymysql.cursors.DictCursor |
| 88 | + ) |
| 89 | + |
| 90 | + |
| 91 | +def drop_foreign_key(connection, db_name, table_name, referenced_table, dry_run): |
| 92 | + last_activity = get_last_activity_date(connection, table_name) |
| 93 | + if last_activity: |
| 94 | + one_year_ago = datetime.now() - timedelta(days=365) |
| 95 | + if last_activity > one_year_ago: |
| 96 | + logging.info(f"Skipping {table_name}: Last activity was on {last_activity}") |
| 97 | + return |
| 98 | + |
| 99 | + query = f""" |
| 100 | + SELECT CONSTRAINT_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE |
| 101 | + WHERE TABLE_SCHEMA = '{db_name}' AND TABLE_NAME = '{table_name}' AND REFERENCED_TABLE_NAME = '{referenced_table}'; |
| 102 | + """ |
| 103 | + with connection.cursor() as cursor: |
| 104 | + cursor.execute(query) |
| 105 | + result = cursor.fetchone() |
| 106 | + if result: |
| 107 | + constraint_name = result["CONSTRAINT_NAME"] |
| 108 | + drop_query = f"ALTER TABLE {table_name} DROP FOREIGN KEY {constraint_name};" |
| 109 | + if dry_run: |
| 110 | + logging.info(f"[Dry Run] Would drop foreign key {constraint_name} from {table_name}.") |
| 111 | + else: |
| 112 | + cursor.execute(drop_query) |
| 113 | + connection.commit() |
| 114 | + logging.info(f"Dropped foreign key {constraint_name} from {table_name}.") |
| 115 | + |
| 116 | + |
| 117 | + |
| 118 | +def get_last_activity_date(connection, table_name): |
| 119 | + """ Retrieve the last activity date for a table """ |
| 120 | + query = f""" |
| 121 | + SELECT MAX(GREATEST( |
| 122 | + COALESCE(UPDATE_TIME, '1970-01-01 00:00:00'), |
| 123 | + COALESCE(CREATE_TIME, '1970-01-01 00:00:00') |
| 124 | + )) AS last_activity |
| 125 | + FROM information_schema.tables |
| 126 | + WHERE TABLE_NAME = '{table_name}'; |
| 127 | + """ |
| 128 | + with connection.cursor() as cursor: |
| 129 | + cursor.execute(query) |
| 130 | + result = cursor.fetchone() |
| 131 | + if result and result["last_activity"]: |
| 132 | + return datetime.strptime(str(result["last_activity"]), "%Y-%m-%d %H:%M:%S") |
| 133 | + return None # If no activity, return None |
| 134 | + |
| 135 | + |
| 136 | +def drop_table(connection, table_name, dry_run): |
| 137 | + last_activity = get_last_activity_date(connection, table_name) |
| 138 | + if last_activity: |
| 139 | + one_year_ago = datetime.now() - timedelta(days=365) |
| 140 | + if last_activity > one_year_ago: |
| 141 | + logging.info(f"Skipping {table_name}: Last activity was on {last_activity}") |
| 142 | + return |
| 143 | + |
| 144 | + logging.info(f"Dropping table {table_name}...") |
| 145 | + if dry_run: |
| 146 | + logging.info(f"[Dry Run] Would drop table {table_name}.") |
| 147 | + else: |
| 148 | + with connection.cursor() as cursor: |
| 149 | + cursor.execute(f"DROP TABLE IF EXISTS {table_name}") |
| 150 | + connection.commit() |
| 151 | + logging.info(f"Table {table_name} dropped.") |
| 152 | + |
| 153 | + |
| 154 | +@click.command() |
| 155 | +@click.option('--db-host', required=True, help="RDS DB host") |
| 156 | +@click.option('--db-user', envvar='DB_USERNAME', required=True, help="RDS DB user (can be set via environment variable DB_USERNAME)") |
| 157 | +@click.option('--db-password', envvar='DB_PASSWORD', required=True, help="RDS DB password (can be set via environment variable DB_PASSWORD)") |
| 158 | +@click.option('--db-name', required=True, help="RDS DB name") |
| 159 | +@click.option('--dry-run', is_flag=True, help="Enable dry run mode (no actual changes)") |
| 160 | +def drop_tables(db_host, db_user, db_password, db_name, dry_run): |
| 161 | + """ |
| 162 | + A script to drop tables from an RDS database while handling foreign key dependencies. |
| 163 | + Table names are read from the provided file. |
| 164 | + """ |
| 165 | + try: |
| 166 | + connection = connect_to_db(db_host, db_user, db_password, db_name) |
| 167 | + |
| 168 | + for table, referenced_table in FK_DEPENDENCIES.items(): |
| 169 | + drop_foreign_key(connection, db_name, table, referenced_table, dry_run) |
| 170 | + |
| 171 | + for table in TABLES_TO_DROP: |
| 172 | + drop_table(connection, table, dry_run) |
| 173 | + |
| 174 | + connection.close() |
| 175 | + logging.info("Database cleanup completed successfully.") |
| 176 | + except Exception as e: |
| 177 | + logging.error(f"An error occurred: {e}") |
| 178 | + |
| 179 | + |
| 180 | +if __name__ == '__main__': |
| 181 | + drop_tables() |
0 commit comments