Skip to content

Add option to specify rebuild image via a file #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ If you don't specifiy the image, it will default to doing a rebuild with
the existing image. If you don't have "rebuild" at the start of your
reason, openstack nodes will do a regular reboot.

Should you need to use a job to trigger a rebuild,
so you can't specify a custom reason for the reboot,
you can add a file with an image uuid in the following location
on the node you want to rebuild: /var/spool/slurm/REBUILD_IMAGE_UUID
In addition, when this file is present, we no longer call out
to sinfo. This can be very useful if you have sinfo installed
in a non-standard location.

slurm-stats
^^^^^^^^^^^

Expand Down
22 changes: 19 additions & 3 deletions slurm_openstack_tools/reboot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python3

# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
Expand Down Expand Up @@ -33,6 +33,7 @@
logger.addHandler(handler)

INSTANCE_UUID_FILE = "/var/lib/cloud/data/instance-id"
REBUILD_IMAGE_UUID_FILE = "/var/spool/slurm/REBUILD_IMAGE_UUID"


def get_openstack_server_id():
Expand All @@ -43,6 +44,15 @@ def get_openstack_server_id():
return f.readline().strip()


def get_rebuild_image_from_file():
if not path.exists(REBUILD_IMAGE_UUID_FILE):
return None
with open(REBUILD_IMAGE_UUID_FILE) as f:
image = f.readline().strip()
logger.info(f"spool file requested image:{image}")
return image


def get_sinfo_path():
# TODO(johngarbutt): get this from environment or config file?
sinfo_alt_path = "/usr/local/software/slurm/current/bin/sinfo"
Expand All @@ -52,6 +62,12 @@ def get_sinfo_path():


def get_reboot_reason():
image_uuid = get_rebuild_image_from_file()
if image_uuid:
# don't need to check sinfo
# TODO(johngarbutt) need a cleaner interface
return f"rebuild image:{image_uuid}"

# find our short hostname (without fqdn):
hostname = socket.gethostname().split(".")[0]
sinfo_path = get_sinfo_path()
Expand All @@ -78,7 +94,7 @@ def get_image_from_reason(reason):
if len(image_tokens) == 2 and image_tokens[0] == "image":
if image_tokens[1]:
image = image_tokens[1]
logger.info(f"user requested image:%{image}")
logger.info(f"requested image:{image}")
return image


Expand All @@ -90,7 +106,7 @@ def rebuild_openstack_server(server_id, reason):
image_uuid = get_image_from_reason(reason)
if not image_uuid:
image_uuid = server.image.id
logger.info(f"fallback to existing image:%{image_uuid}")
logger.info(f"fallback to existing image:{image_uuid}")

# Note that OpenStack will power down the server as part of the rebuild
logger.info(f"rebuilding server %{server_id} with image %{image_uuid}")
Expand Down
7 changes: 7 additions & 0 deletions slurm_openstack_tools/tests/test_reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,10 @@ def test_rebuild_or_reboot_non_openstack(
reboot.rebuild_or_reboot()
mock_exec.assert_called_once_with("reboot", ["reboot"])
mock_id.assert_called_once_with()

@mock.patch.object(reboot, "get_rebuild_image_from_file",
return_value="uuid")
def test_get_reboot_reason(self, mock_get_image):
reason = reboot.get_reboot_reason()
self.assertEqual("rebuild image:uuid", reason)
mock_get_image.assert_called_once_with()