Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies = [
"oslo.log",
"iso8601",
"pyyaml",
"kubernetes",
]


Expand All @@ -43,3 +44,4 @@ floating_ip_reaper = "hammers.ip_cleaner:launch_main"
expired_project_reaper = "hammers.expired_project_cleaner:launch_main"
image_deployer = "hammers.image_deployer:launch_main"
set_image_property = "hammers.set_image_property:launch_main"
cleanup_jupyter_volumes = "hammers.cleanup_jupyter_volumes:launch_main"
149 changes: 149 additions & 0 deletions src/hammers/cleanup_jupyter_volumes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""Module to clean up resources by expired projects"""

from collections import defaultdict
import logging
import argparse
import re
import os
import sys
from datetime import timedelta as TimeDelta
from kubernetes import client, config

from hammers.utils import get_user_groups, project_is_expired


logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
LOG = logging.getLogger(__name__)


def get_volumes_by_username(namespace, kube_config_path: str = None) -> dict[str, list[str]]:
if kube_config_path:
config.load_kube_config(config_file="/path/to/your/kubeconfig")
else:
config.load_incluster_config()

v1 = client.CoreV1Api()

volumes_by_username = defaultdict(list)
pvcs = v1.list_namespaced_persistent_volume_claim(namespace="jupyter").items
for pvc in pvcs:
username = pvc.metadata.annotations.get("hub.jupyter.org/username")
pvc_name = pvc.metadata.name
# If the PVC was not bound since migrations, it won't have the annotation.
if not username:
# Remove both claim- prefix and 6 digit hex suffix. Not 100% accurate, but since
# most usernames end with .edu, and u is not a hex digit, it mostly works.
partial_name = re.sub(r'^claim-', '', pvc_name)
username = re.sub(r'-[a-f0-9]{6,}$', '', partial_name)
if username:
volumes_by_username[username].append(pvc_name)
return volumes_by_username


def parse_args(args: list[str]) -> argparse.Namespace:
"""Handle CLI arguments."""
parser = argparse.ArgumentParser()

parser.add_argument(
"--dry-run",
action="store_true",
default=os.environ.get('HAMMERS_DRY_RUN'),
help="print out which servers would be shelved, instead of shelving them.",
)
parser.add_argument("--debug", action="store_true", help="increase log verbosity.")
parser.add_argument(
"--portal-api-token",
type=str,
default=os.environ.get('HAMMERS_PORTAL_API_TOKEN'),
help="API token for portal",
)
parser.add_argument(
"--ignore-pending",
action="store_true",
help="Ignore servers from a project with a pending allocation.",
)
parser.add_argument(
"--keycloak-url",
type=str,
default=os.environ.get("HAMMERS_KEYCLOAK_URL", "https://auth.chameleoncloud.org/auth"),
help="The cloud to use for OpenStack connection.",
)
parser.add_argument(
"--keycloak-client-id",
type=str,
default=os.environ.get("HAMMERS_KEYCLOAK_CLIENT_ID", "portal-admin"),
help="Keycloak admin client id.",
)
parser.add_argument(
"--keycloak-client-secret",
type=str,
default=os.environ.get("HAMMERS_KEYCLOAK_CLIENT_SECRET"),
help="Keycloak admin client secret.",
)
parser.add_argument(
"--kube-config-path",
type=str,
help="Path to kubeconfig file.",
)
parser.add_argument(
"--kube-namespace",
type=str,
default=os.environ.get("HAMMERS_KUBE_NAMESPACE"),
help="Kubernetes namespace to use.",
)
return parser.parse_args(args)


def main(arg_list: list[str]) -> None:
args = parse_args(arg_list)

if args.debug:
LOG.setLevel(logging.DEBUG)

api_token = args.portal_api_token
dry_run = args.dry_run
ignore_pending = args.ignore_pending

if args.debug:
LOG.setLevel(logging.DEBUG)

volumes_by_username = get_volumes_by_username(args.kube_namespace, args.kube_config_path)
for username, volumes in volumes_by_username.items():
try:
projects = get_user_groups(
username,
args.keycloak_url,
args.keycloak_client_id,
args.keycloak_client_secret,
)
grace_period = TimeDelta(days=365)
has_active_project = False
for charge_code in projects:
# NOTE Some projects may be "admin" projects, but in that case project_is_expired returns True.
if not project_is_expired(
charge_code, grace_period, api_token=api_token, ignore_pending=ignore_pending, log=LOG
):
has_active_project = True
if not has_active_project:
for volume in volumes:
LOG.info("Should delete volume '%s' for user '%s'", volume, username)
if not dry_run:
# TODO add delete code once we are confident this is working.
pass
else:
LOG.info("User %s has an active project, skipping", username)
except Exception as e:
LOG.error("Could not get projects for user %s", username)
LOG.error(e)


def launch_main():
main(sys.argv[1:])


if __name__ == "__main__":
launch_main()
38 changes: 38 additions & 0 deletions src/hammers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,41 @@ def project_is_expired(charge_code, grace_period, ignore_pending, api_token, log
return False
log.debug("Project %s has expired", charge_code)
return True


def _get_access_token(keycloak_url, keycloak_client_id, keycloak_client_secret):
url = f"{keycloak_url}/realms/master/protocol/openid-connect/token"
data = {
'grant_type': 'client_credentials',
'client_id': keycloak_client_id,
'client_secret': keycloak_client_secret
}
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
response = requests.post(url, data=data, headers=headers)
response.raise_for_status()
return response.json()['access_token']


def get_user_id(username, keycloak_url, keycloak_client_id, keycloak_client_secret, access_token=None):
if not access_token:
access_token = _get_access_token(keycloak_url, keycloak_client_id, keycloak_client_secret)
url = f"{keycloak_url}/admin/realms/chameleon/users"
headers = {'Authorization': f'Bearer {access_token}'}
params = {"username": username}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
users = response.json()
if users:
return users[0]['id']
raise Exception(f"User '{username}' not found")


def get_user_groups(username, keycloak_url, keycloak_client_id, keycloak_client_secret, access_token=None):
if not access_token:
access_token = _get_access_token(keycloak_url, keycloak_client_id, keycloak_client_secret)
user_id = get_user_id(username, keycloak_url, keycloak_client_id, keycloak_client_secret, access_token)
url = f"{keycloak_url}/admin/realms/chameleon/users/{user_id}/groups"
headers = {'Authorization': f'Bearer {access_token}'}
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.json()