Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove an association (HPC-7692) #86

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 35 additions & 13 deletions lib/vsc/administration/slurm/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,16 +203,7 @@ def create_change_user_command(user, current_vo_id, new_vo_id, cluster):
@returns: two lists comprising the commands
"""
add_user_command = create_add_user_command(user, new_vo_id, cluster)
REMOVE_ASSOCIATION_USER_COMMAND = [
SLURM_SACCT_MGR,
"-i", # commit immediately
"delete",
"user",
"name={0}".format(user),
"Account={0}".format(current_vo_id),
"where",
"Cluster={0}".format(cluster),
]
remove_association_command = create_remove_association_command(user, current_vo_id, cluster)
logging.debug(
"Adding commands to change user %s on Cluster=%s from Account=%s to DefaultAccount=%s",
user,
Expand All @@ -221,7 +212,7 @@ def create_change_user_command(user, current_vo_id, new_vo_id, cluster):
new_vo_id
)

return [add_user_command, REMOVE_ASSOCIATION_USER_COMMAND]
return [add_user_command, remove_association_command]


def create_remove_user_command(user, cluster):
Expand All @@ -246,6 +237,25 @@ def create_remove_user_command(user, cluster):
return REMOVE_USER_COMMAND


def create_remove_association_command(user, obsolete_vo_id, cluster):
"""Create the command to remove an association

@returns: a list comprising the command.
"""
REMOVE_ASSOCIATION_USER_COMMAND = [
SLURM_SACCT_MGR,
"-i", # commit immediately
"delete",
"user",
"name={0}".format(user),
"Account={0}".format(obsolete_vo_id),
"where",
"Cluster={0}".format(cluster),
]

return REMOVE_ASSOCIATION_USER_COMMAND


def slurm_institute_accounts(slurm_account_info, clusters):
"""Check for the presence of the institutes and their default VOs in the slurm account list.

Expand Down Expand Up @@ -309,7 +319,7 @@ def slurm_user_accounts(vo_members, active_accounts, slurm_user_info, clusters,

for cluster in clusters:
cluster_users_acct = [
(user.User, user.Def_Acct) for user in slurm_user_info if user and user.Cluster == cluster
(user.User, user.Def_Acct, user.Account) for user in slurm_user_info if user and user.Cluster == cluster
]
cluster_users = set([u[0] for u in cluster_users_acct])

Expand All @@ -320,6 +330,7 @@ def slurm_user_accounts(vo_members, active_accounts, slurm_user_info, clusters,
new_users = set()
changed_users = set()
moved_users = set()
obsolete_users = set()

for (vo_id, (members, vo)) in vo_members.items():

Expand All @@ -330,7 +341,13 @@ def slurm_user_accounts(vo_members, active_accounts, slurm_user_info, clusters,
])

# these are the current Slurm users per Account, i.e., the VO currently being processed
slurm_acct_users = set([user for (user, acct) in cluster_users_acct if acct == vo_id])
slurm_acct_users = set([user for (user, def_acct, _) in cluster_users_acct if def_acct == vo_id])

# these are the users for which an earlier delete failed because they still had live job in their (at that
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

says who? these users are part of this set, but there might be others to, no?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, we grab them all. AFAICT.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I misunderstood. What others and where?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • By default, we only have one account per user.
  • When a user switches VO, he will be added to the new VO and the default account will be set to the new VO. He might not be removed from the old.
  • When we look through the associations, I believe we find all those that need to be removed. If they fail to be removed due to running jobs, they will be picked up again the next run. New jobs should be submitted under the default account, thus the jobs under the new association will eventually be gone.

What am I missing?

# time) current account (VO)
obsolete_users_vo = (set([user for (user, def_acct, acct) in cluster_users_acct
if acct == vo_id and def_acct != acct]) - members) & active_accounts
obsolete_users |= set([(u, vo_id) for u in obsolete_users_vo])

# these are the users that should no longer be in this account, but should not be removed
# we need to look up their new VO
Expand Down Expand Up @@ -372,4 +389,9 @@ def flatten(ls):
cluster=cluster) for (user, current_vo_id, (new_vo_id, _)) in moved_users])
)

commands.extend([create_remove_association_command(
user=user,
obsolete_vo_id=vo_id,
cluster=cluster) for (user, vo_id) in obsolete_users])

return commands
8 changes: 6 additions & 2 deletions test/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,19 @@ def test_slurm_user_accounts(self):
"""Test that the commands to create, change and remove users are correctly generated."""
vo_members = {
"vo1": (set(["user1", "user2", "user3"]), VO(vsc_id="vo1", institute={"name": "gent"})),
"vo2": (set(["user4", "user5", "user6"]), VO(vsc_id="vo2", institute={"name": "gent"})),
"vo2": (set(["user4", "user5", "user6", "user7", "user8"]), VO(vsc_id="vo2", institute={"name": "gent"})),
}

active_accounts = set(["user1", "user3", "user4", "user5", "user6", "user7"])
active_accounts = set(["user1", "user3", "user4", "user5", "user6", "user7", "user8"])
slurm_user_info = [
SlurmUser(User='user1', Def_Acct='vo1', Admin='None', Cluster='banette', Account='vo1', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
SlurmUser(User='user2', Def_Acct='vo1', Admin='None', Cluster='banette', Account='vo1', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
SlurmUser(User='user3', Def_Acct='vo2', Admin='None', Cluster='banette', Account='vo2', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
SlurmUser(User='user4', Def_Acct='vo1', Admin='None', Cluster='banette', Account='vo1', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
SlurmUser(User='user5', Def_Acct='vo2', Admin='None', Cluster='banette', Account='vo2', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
SlurmUser(User='user7', Def_Acct='vo2', Admin='None', Cluster='banette', Account='vo2', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
SlurmUser(User='user8', Def_Acct='vo2', Admin='None', Cluster='banette', Account='vo2', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
SlurmUser(User='user8', Def_Acct='vo2', Admin='None', Cluster='banette', Account='vo1', Partition='', Share='1', MaxJobs='', MaxNodes='', MaxCPUs='', MaxSubmit='', MaxWall='', MaxCPUMins='', QOS='normal', Def_QOS=''),
]

commands = slurm_user_accounts(vo_members, active_accounts, slurm_user_info, ["banette"])
Expand All @@ -77,6 +80,7 @@ def test_slurm_user_accounts(self):
shlex.split("/usr/bin/sacctmgr -i delete user name=user3 Account=vo2 where Cluster=banette"),
shlex.split("/usr/bin/sacctmgr -i add user user4 Account=vo2 DefaultAccount=vo2 Cluster=banette"),
shlex.split("/usr/bin/sacctmgr -i delete user name=user4 Account=vo1 where Cluster=banette"),
shlex.split("/usr/bin/sacctmgr -i delete user name=user8 Account=vo1 where Cluster=banette"),
]]))


Expand Down