Skip to content

Commit afdb2db

Browse files
Merge pull request #170 from edx/ihassan/GSRE-2673_added_script_to_remove_tables
chore: Added script to drop DOP tables from edxapp DB
2 parents 6d9e80b + 9f70cd7 commit afdb2db

File tree

2 files changed

+182
-0
lines changed

2 files changed

+182
-0
lines changed
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
"""
2+
Script to drop tables from an RDS MySQL database while handling foreign key dependencies.
3+
4+
Usage:
5+
python drop_dop_tables.py --db-host=my-db-host --db-name=my-db
6+
7+
Arguments:
8+
--db-host The RDS database host.
9+
--db-name The database name.
10+
--dry-run Enable dry run mode (no actual changes).
11+
12+
Environment Variables:
13+
DB_USERNAME The RDS database username (set via environment variable).
14+
DB_PASSWORD The RDS database password (set via environment variable).
15+
16+
Functionality:
17+
- Drops specific tables only if they have had no activity in the last 12 months.
18+
- Handles foreign key constraints before dropping dependent tables.
19+
- Ensures safe execution using retries for AWS service interactions.
20+
21+
Example:
22+
export DB_USERNAME=admin
23+
export DB_PASSWORD=securepass
24+
python drop_dop_tables.py --db-host=mydb.amazonaws.com --db-name=mydatabase --dry-run
25+
"""
26+
27+
import boto3
28+
import click
29+
import backoff
30+
from botocore.exceptions import ClientError
31+
import pymysql
32+
import logging
33+
from datetime import datetime, timedelta
34+
35+
36+
MAX_TRIES = 5
37+
38+
TABLES_TO_DROP = [
39+
"oauth2_provider_trustedclient", # FK reference to oauth2_client
40+
"third_party_auth_providerapipermissions", # FK reference to oauth2_client
41+
"oauth2_client",
42+
"oauth2_grant",
43+
"oauth2_accesstoken",
44+
"oauth2_refreshtoken",
45+
"oauth_provider_consumer",
46+
"oauth_provider_nonce",
47+
"oauth_provider_scope",
48+
"oauth_provider_token",
49+
]
50+
FK_DEPENDENCIES = {
51+
"third_party_auth_providerapipermissions": "oauth2_client",
52+
"oauth2_provider_trustedclient": "oauth2_client",
53+
}
54+
55+
# Configure logging
56+
LOGGER = logging.getLogger(__name__)
57+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
58+
59+
60+
61+
class EC2BotoWrapper:
62+
def __init__(self):
63+
self.client = boto3.client("ec2")
64+
65+
@backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES)
66+
def describe_regions(self):
67+
return self.client.describe_regions()
68+
69+
70+
class RDSBotoWrapper:
71+
def __init__(self, **kwargs):
72+
self.client = boto3.client("rds", **kwargs)
73+
74+
@backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES)
75+
def describe_db_instances(self):
76+
return self.client.describe_db_instances()
77+
78+
79+
def connect_to_db(db_host, db_user, db_password, db_name):
80+
""" Establish a connection to the RDS MySQL database """
81+
logging.info("Connecting to the database...")
82+
return pymysql.connect(
83+
host=db_host,
84+
user=db_user,
85+
password=db_password,
86+
database=db_name,
87+
cursorclass=pymysql.cursors.DictCursor
88+
)
89+
90+
91+
def drop_foreign_key(connection, db_name, table_name, referenced_table, dry_run):
92+
last_activity = get_last_activity_date(connection, table_name)
93+
if last_activity:
94+
one_year_ago = datetime.now() - timedelta(days=365)
95+
if last_activity > one_year_ago:
96+
logging.info(f"Skipping {table_name}: Last activity was on {last_activity}")
97+
return
98+
99+
query = f"""
100+
SELECT CONSTRAINT_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
101+
WHERE TABLE_SCHEMA = '{db_name}' AND TABLE_NAME = '{table_name}' AND REFERENCED_TABLE_NAME = '{referenced_table}';
102+
"""
103+
with connection.cursor() as cursor:
104+
cursor.execute(query)
105+
result = cursor.fetchone()
106+
if result:
107+
constraint_name = result["CONSTRAINT_NAME"]
108+
drop_query = f"ALTER TABLE {table_name} DROP FOREIGN KEY {constraint_name};"
109+
if dry_run:
110+
logging.info(f"[Dry Run] Would drop foreign key {constraint_name} from {table_name}.")
111+
else:
112+
cursor.execute(drop_query)
113+
connection.commit()
114+
logging.info(f"Dropped foreign key {constraint_name} from {table_name}.")
115+
116+
117+
118+
def get_last_activity_date(connection, table_name):
119+
""" Retrieve the last activity date for a table """
120+
query = f"""
121+
SELECT MAX(GREATEST(
122+
COALESCE(UPDATE_TIME, '1970-01-01 00:00:00'),
123+
COALESCE(CREATE_TIME, '1970-01-01 00:00:00')
124+
)) AS last_activity
125+
FROM information_schema.tables
126+
WHERE TABLE_NAME = '{table_name}';
127+
"""
128+
with connection.cursor() as cursor:
129+
cursor.execute(query)
130+
result = cursor.fetchone()
131+
if result and result["last_activity"]:
132+
return datetime.strptime(str(result["last_activity"]), "%Y-%m-%d %H:%M:%S")
133+
return None # If no activity, return None
134+
135+
136+
def drop_table(connection, table_name, dry_run):
137+
last_activity = get_last_activity_date(connection, table_name)
138+
if last_activity:
139+
one_year_ago = datetime.now() - timedelta(days=365)
140+
if last_activity > one_year_ago:
141+
logging.info(f"Skipping {table_name}: Last activity was on {last_activity}")
142+
return
143+
144+
logging.info(f"Dropping table {table_name}...")
145+
if dry_run:
146+
logging.info(f"[Dry Run] Would drop table {table_name}.")
147+
else:
148+
with connection.cursor() as cursor:
149+
cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
150+
connection.commit()
151+
logging.info(f"Table {table_name} dropped.")
152+
153+
154+
@click.command()
155+
@click.option('--db-host', required=True, help="RDS DB host")
156+
@click.option('--db-user', envvar='DB_USERNAME', required=True, help="RDS DB user (can be set via environment variable DB_USERNAME)")
157+
@click.option('--db-password', envvar='DB_PASSWORD', required=True, help="RDS DB password (can be set via environment variable DB_PASSWORD)")
158+
@click.option('--db-name', required=True, help="RDS DB name")
159+
@click.option('--dry-run', is_flag=True, help="Enable dry run mode (no actual changes)")
160+
def drop_tables(db_host, db_user, db_password, db_name, dry_run):
161+
"""
162+
A script to drop tables from an RDS database while handling foreign key dependencies.
163+
Table names are read from the provided file.
164+
"""
165+
try:
166+
connection = connect_to_db(db_host, db_user, db_password, db_name)
167+
168+
for table, referenced_table in FK_DEPENDENCIES.items():
169+
drop_foreign_key(connection, db_name, table, referenced_table, dry_run)
170+
171+
for table in TABLES_TO_DROP:
172+
drop_table(connection, table, dry_run)
173+
174+
connection.close()
175+
logging.info("Database cleanup completed successfully.")
176+
except Exception as e:
177+
logging.error(f"An error occurred: {e}")
178+
179+
180+
if __name__ == '__main__':
181+
drop_tables()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../jenkins/requirements.txt

0 commit comments

Comments
 (0)