Skip to content

Commit a63dac9

Browse files
slurm: add support for parents to both account and user children and update readme
Signed-off-by: Cecilia Lau <[email protected]>
1 parent dc840ea commit a63dac9

File tree

3 files changed

+140
-120
lines changed

3 files changed

+140
-120
lines changed

coldfront/plugins/slurm/README.md

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
ColdFront django plugin providing Slurm integration for ColdFront.
44
Allocations in ColdFront are marshalled out to Slurm associations in the
55
Slurm flat file format and can be loaded with sacctmgr. For more information on
6-
the Slurm flat file format see [here](https://slurm.schedmd.com/sacctmgr.html).
6+
the Slurm flat file format, see [here](https://slurm.schedmd.com/sacctmgr.html).
77

88
A command line tool is also provided with this app that allows an administrator
99
to check the consistency between ColdFront and Slurm and optionally remove any
@@ -13,23 +13,31 @@ associations that should not be in Slurm according to ColdFront.
1313

1414
Resources in ColdFront map to Clusters (or partitions within a cluster) in
1515
Slurm. The name of the Slurm cluster is taken from a resource attribute in
16-
ColdFront named "slurm\_cluster". You can optionally provide Slurm
17-
specifications for a cluster using a resource attribute named "slurm\_specs".
16+
ColdFront named `slurm_cluster`. You can optionally provide Slurm
17+
specifications for a cluster using a resource attribute named `slurm_specs`.
1818
The value of this attribute must conform to the Slurm specification format and
1919
are colon separated.
2020

2121
Allocations in ColdFront map to Accounts in Slurm. The name of the Slurm
2222
account is taken from a allocation attribute in ColdFront named
23-
"slurm\_account\_name" . You can optionally provide Slurm specifications for
24-
the account using a allocation attribute named "slurm\_specs". The value of
25-
this attribute must conform to the Slurm specification format and are colon
26-
separated.
23+
`slurm_account_name`. You can optionally provide Slurm specifications for
24+
the account using a allocation attribute named `slurm_specs`. The value of
25+
this attribute must conform to the Slurm specification format. This attribute
26+
can either be colon-separated or multiple instances of this attribute can be
27+
specified. For example, specifying both
28+
`slurm_specs`: `QOS='+qos_interactive,-free'`
29+
and `slurm_specs`: `DefaultAccount='my-account'` is equivalent to specifying
30+
only `slurm_specs`: `QOS='+qos_interactive,-free':DefaultAccount='my-account'`.
31+
You can also optionally provide child accounts with the `slurm_children`
32+
allocation attribute - specify the account name.
2733

2834
Allocation users in ColdFront map to Users in Slurm. You can optionally
2935
provide Slurm specifications for each user in a allocation using a
30-
allocation attribute named "slurm\_user\_specs". The value of this attribute
31-
must conform to the Slurm specification format and are colon separated. Setting
32-
specifications on an individual user basis is not currently supported.
36+
allocation attribute named `slurm_user_specs`. The value of this attribute
37+
must conform to the Slurm specification format This attribute can either be
38+
colon-separated or multiple instances of this attribute can be specified.
39+
Setting specifications on an individual user basis is not currently supported.
40+
3341

3442
## Usage
3543

coldfront/plugins/slurm/associations.py

Lines changed: 82 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
# SPDX-FileCopyrightText: (C) ColdFront Authors
22
#
33
# SPDX-License-Identifier: AGPL-3.0-or-later
4+
from __future__ import annotations
45

56
import datetime
67
import logging
78
import os
89
import re
910
import sys
10-
from typing import Self
11+
from typing import Optional, Self
1112

1213
from django.core.exceptions import ObjectDoesNotExist
14+
from django.db.models.query import QuerySet
1315

1416
from coldfront.core.allocation.models import Allocation, AllocationAttribute, AllocationAttributeType
1517
from coldfront.core.resource.models import Resource
@@ -97,7 +99,7 @@ def new_from_stream(stream):
9799
if parent == "root":
98100
cluster.accounts[account.name] = account
99101
elif parent_account:
100-
parent_account.add_child(account)
102+
parent_account.add_account(account)
101103
elif re.match("^Parent - '[^']+'", line):
102104
if not cluster or not cluster.name:
103105
raise no_cluster_error
@@ -113,7 +115,7 @@ def new_from_stream(stream):
113115
user = SlurmUser.new_from_sacctmgr(line)
114116
if not parent or not parent_account:
115117
raise SlurmParserError(f"Found user record without Parent for line: {line}")
116-
parent_account.add_child(user)
118+
parent_account.add_user(user)
117119

118120
if not cluster or not cluster.name:
119121
raise no_cluster_error
@@ -135,11 +137,10 @@ def new_from_resource(resource):
135137
allocations = resource.allocation_set.filter(status__name__in=["Active", "Renewal Requested"])
136138
for allocation in allocations:
137139
cluster.add_allocation(allocation, allocations, user_specs=user_specs)
138-
# remove child accounts cluster accounts
140+
# remove child accounts from cluster accounts
139141
child_accounts = set()
140142
for account in cluster.accounts.values():
141-
if account.child_type == SlurmAccount:
142-
child_accounts.update(account.children.keys())
143+
child_accounts.update(account.accounts.keys())
143144
for account_name in child_accounts:
144145
del cluster.accounts[account_name]
145146

@@ -154,8 +155,7 @@ def new_from_resource(resource):
154155
# remove child accounts cluster accounts
155156
child_accounts = set()
156157
for account in cluster.accounts.values():
157-
if account.child_type == SlurmAccount:
158-
child_accounts.update(account.children.keys())
158+
child_accounts.update(account.accounts.keys())
159159
for account_name in child_accounts:
160160
del cluster.accounts[account_name]
161161

@@ -221,8 +221,8 @@ def get_objects_to_remove(self, expected: Self) -> dict[str, list[dict]]:
221221
class SlurmAccount(SlurmBase):
222222
def __init__(self, name, specs=None):
223223
super().__init__(name, specs=specs)
224-
self.child_type = None
225-
self.children: dict[str, SlurmAccount | SlurmUser] = {}
224+
self.users: dict[str, SlurmUser] = {}
225+
self.accounts: dict[str, SlurmAccount] = {}
226226

227227
@staticmethod
228228
def new_from_sacctmgr(line):
@@ -238,7 +238,7 @@ def new_from_sacctmgr(line):
238238

239239
return SlurmAccount(name, specs=parts[1:])
240240

241-
def add_allocation(self, allocation: Allocation, res_allocations, user_specs=None):
241+
def add_allocation(self, allocation: Allocation, res_allocations: QuerySet[Allocation], user_specs=None):
242242
"""Add users from a ColdFront Allocation model to SlurmAccount"""
243243
if user_specs is None:
244244
user_specs = []
@@ -253,61 +253,48 @@ def add_allocation(self, allocation: Allocation, res_allocations, user_specs=Non
253253
)
254254

255255
child_accounts = set(allocation.get_attribute_list(SLURM_CHILDREN_ATTRIBUTE_NAME))
256-
if len(child_accounts) > 0 and allocation.allocationuser_set.count() > 0:
257-
raise SlurmError(
258-
f"Allocation {allocation} cannot be a parent and have users!"
259-
f" Please remove users or all {SLURM_CHILDREN_ATTRIBUTE_NAME} attributes."
260-
)
261-
262256
self.specs += allocation.get_attribute_list(SLURM_SPECS_ATTRIBUTE_NAME)
263257

264-
if len(child_accounts) > 0:
265-
self.child_type = SlurmAccount
266-
for account_name in child_accounts:
267-
account = self.children.get(account_name, SlurmAccount(account_name))
268-
try:
269-
child_allocation = res_allocations.get(
270-
pk=AllocationAttribute.objects.get(
271-
allocation_attribute_type=SLURM_ACCOUNT_ATTRIBUTE_TYPE, value=account_name
272-
).allocation.pk
273-
)
274-
account.add_allocation(child_allocation, res_allocations, user_specs=user_specs)
275-
except ObjectDoesNotExist:
276-
raise SlurmError(
277-
f"No allocation with {SLURM_ACCOUNT_ATTRIBUTE_TYPE}={account_name} in correct resource" # Don't have an easy way to get the resource here
278-
)
279-
280-
self.add_child(account)
281-
else:
282-
self.child_type = SlurmUser
283-
allocation_user_specs = allocation.get_attribute_list(SLURM_USER_SPECS_ATTRIBUTE_NAME)
284-
for u in allocation.allocationuser_set.filter(status__name="Active"):
285-
user = SlurmUser(u.user.username)
286-
user.specs += allocation_user_specs
287-
user.specs += user_specs
288-
self.add_child(user)
289-
290-
def add_child(self, child):
291-
if not self.child_type:
292-
self.child_type = type(child)
293-
else:
294-
if type(child) is not self.child_type:
258+
for account_name in child_accounts:
259+
account = self.accounts.get(account_name, SlurmAccount(account_name))
260+
try:
261+
child_allocation = res_allocations.get(
262+
pk=AllocationAttribute.objects.get(
263+
allocation_attribute_type=SLURM_ACCOUNT_ATTRIBUTE_TYPE, value=account_name
264+
).allocation.pk
265+
)
266+
account.add_allocation(child_allocation, res_allocations, user_specs=user_specs)
267+
except ObjectDoesNotExist:
295268
raise SlurmError(
296-
f"Cannot assign child of type {type(child)} to parent with child_type {self.child_type}"
269+
f"No allocation with {SLURM_ACCOUNT_ATTRIBUTE_TYPE}={account_name} in correct resource" # Don't have an easy way to get the resource here
297270
)
298-
if child.name not in self.children:
299-
self.children[child.name] = child
300271

301-
ch = self.children[child.name]
302-
ch.specs += child.specs
303-
self.children[child.name] = ch
272+
self.add_account(account)
304273

305-
def get_account(self, account_name):
306-
if self.child_type != SlurmAccount:
307-
return None
308-
if account_name in self.children.keys():
309-
return self.children[account_name]
310-
for account in self.children.values():
274+
allocation_user_specs = allocation.get_attribute_list(SLURM_USER_SPECS_ATTRIBUTE_NAME)
275+
for u in allocation.allocationuser_set.filter(status__name="Active"):
276+
user = SlurmUser(u.user.username)
277+
user.specs += allocation_user_specs
278+
user.specs += user_specs
279+
self.add_user(user)
280+
281+
def add_account(self, account: SlurmAccount) -> None:
282+
if account.name not in self.accounts:
283+
self.accounts[account.name] = account
284+
return
285+
self.accounts[account.name].specs += account.specs
286+
287+
def add_user(self, user: SlurmUser) -> None:
288+
if user.name not in self.users:
289+
self.users[user.name] = user
290+
return
291+
self.users[user.name].specs += user.specs
292+
293+
def get_account(self, account_name: str) -> Optional[SlurmAccount]:
294+
"""Gets an account, traversing through child accounts"""
295+
if account_name in self.accounts.keys():
296+
return self.accounts[account_name]
297+
for account in self.accounts.values():
311298
result = account.get_account(account_name)
312299
if result:
313300
return result
@@ -319,14 +306,14 @@ def write(self, out):
319306

320307
def write_children(self, out):
321308
self._write(out, f"Parent - '{self.name}'\n")
322-
for child in self.children.values():
323-
child.write(out)
324-
if self.child_type == SlurmUser:
325-
return
326-
for child in self.children.values():
327-
child.write_children(out)
309+
for user in self.users.values():
310+
user.write(out)
311+
for account in self.accounts.values():
312+
account.write(out)
313+
for account in self.accounts.values():
314+
account.write_children(out)
328315

329-
def get_objects_to_remove(self, expected: Self | None = None) -> dict[str, list[dict]]:
316+
def get_objects_to_remove(self, expected: Optional[Self] = None) -> dict[str, list[dict]]:
330317
"""Get the objects to remove from this account based on the expected account.
331318
If expected is None, remove the entire account.
332319
"""
@@ -337,44 +324,39 @@ def get_objects_to_remove(self, expected: Self | None = None) -> dict[str, list[
337324
}
338325

339326
if expected is None:
340-
if self.child_type == SlurmAccount:
341-
for account in self.children.values():
342-
child_objects_to_remove = account.get_objects_to_remove()
343-
for key, value in child_objects_to_remove.items():
344-
objects_to_remove[key].extend(value)
345-
elif self.child_type == SlurmUser:
346-
for uid in self.children.keys():
347-
objects_to_remove["users"].append({"user": uid, "account": self.name})
327+
for account in self.accounts.values():
328+
child_objects_to_remove = account.get_objects_to_remove()
329+
for key, value in child_objects_to_remove.items():
330+
objects_to_remove[key].extend(value)
331+
for uid in self.users.keys():
332+
objects_to_remove["users"].append({"user": uid, "account": self.name})
348333
objects_to_remove["accounts"].append({"account": self.name})
349334
return objects_to_remove
350335

351-
if self.child_type != expected.child_type:
352-
# remove this entire account
353-
child_objects_to_remove = self.get_objects_to_remove()
336+
accounts_removed = 0
337+
for account_name, account in self.accounts.items():
338+
if account_name not in expected.accounts:
339+
accounts_removed += 1
340+
child_objects_to_remove = self.get_objects_to_remove(expected.accounts.get(account_name))
354341
for key, value in child_objects_to_remove.items():
355342
objects_to_remove[key].extend(value)
356-
elif self.child_type == expected.child_type:
357-
children_removed = 0
358-
if self.child_type == SlurmAccount:
359-
for account_name, account in self.children.items():
360-
child_objects_to_remove = self.get_objects_to_remove(expected.children.get(account_name))
361-
for key, value in child_objects_to_remove.items():
362-
objects_to_remove[key].extend(value)
363-
elif self.child_type == SlurmUser:
364-
for uid, user in self.children.items():
365-
if uid == "root":
366-
continue
367-
if uid not in expected.children:
368-
objects_to_remove["users"].append({"user": uid, "account": self.name})
369-
children_removed += 1
370-
else:
371-
qoses_to_remove = user.get_qoses_to_remove(self.name, self.name, expected.children[uid])
372-
if len(qoses_to_remove) > 0:
373-
objects_to_remove["qoses"].append(
374-
{"user": uid, "account": self.name, "qos": "QOS-=" + ",".join(list(qoses_to_remove))}
375-
)
376-
if children_removed == len(self.children):
377-
objects_to_remove["accounts"].append({"account": self.name})
343+
344+
users_removed = 0
345+
for uid, user in self.users.items():
346+
if uid == "root":
347+
continue
348+
if uid not in expected.users:
349+
objects_to_remove["users"].append({"user": uid, "account": self.name})
350+
users_removed += 1
351+
else:
352+
qoses_to_remove = user.get_qoses_to_remove(self.name, self.name, expected.users[uid])
353+
if len(qoses_to_remove) > 0:
354+
objects_to_remove["qoses"].append(
355+
{"user": uid, "account": self.name, "qos": "QOS-=" + ",".join(list(qoses_to_remove))}
356+
)
357+
358+
if accounts_removed == len(self.accounts) and users_removed == len(self.users):
359+
objects_to_remove["accounts"].append({"account": self.name})
378360
return objects_to_remove
379361

380362

0 commit comments

Comments
 (0)