Skip to content

Commit d3db0f4

Browse files
Allenz5himani2411
andauthored
[Subnet Prioritization] Support capacity-optimized-prioritized and prioritized Allocation Strategy (#671)
* [Subnet Prioritization] Add SingleAvailabilityZone parameter to ec2 fleet call Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Add Priority parameter to ec2 fleet call Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Extend test_evaluate_launch_params to test params launching with EnableSingleAvailabilityZone and prioritized|capacity-optimized-prioritized AllocationStrategy Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Update CHANGELOG.md Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Check AllocationStrategy is prioritized|capacity-optimized-prioritized when using EnableSingleAvailabilityZone Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Update CHANGELOG.md Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Reformat the code to make it more clear Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Reformat the code to make it more clear Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Add a valid condition to check that 'prioritized' is used with On-Demand Capacity and 'capacity-optimized-prioritized' is used with Spot Capacity type. Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Remove EnableSingleAvailabilityZone parameter Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Update unit tests to test that priority is correctly set with prioritized|capacity-optimized-prioritized AllocationStrategy and all_or_nothing ScalingStrategy Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Update unit tests to test that priority is correctly set with prioritized|capacity-optimized-prioritized AllocationStrategy and all_or_nothing ScalingStrategy Signed-off-by: Hanxuan Zhang <[email protected]> * [Subnet Prioritization] Update CHANGELOG.md Signed-off-by: Hanxuan Zhang <[email protected]> --------- Signed-off-by: Hanxuan Zhang <[email protected]> Co-authored-by: Himani Anil Deshpande <[email protected]>
1 parent 559cf78 commit d3db0f4

File tree

8 files changed

+243
-2
lines changed

8 files changed

+243
-2
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ aws-parallelcluster-node CHANGELOG
33

44
This file is used to list changes made in each version of the aws-parallelcluster-node package.
55

6+
3.14.0
7+
------
8+
9+
**CHANGES**
10+
- Support prioritized and capacity-optimized-prioritized Allocation Strategy
11+
612
3.13.2
713
------
814

src/slurm_plugin/fleet_manager.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,11 +296,18 @@ def _evaluate_template_overrides(self) -> list:
296296
if self._compute_resource_config.get("MaxPrice"):
297297
overrides.update({"MaxPrice": str(self._compute_resource_config["MaxPrice"])})
298298

299+
priority = 0.0
299300
for instance_type in self._compute_resource_config["Instances"]:
300-
subnet_ids = self._compute_resource_config["Networking"]["SubnetIds"]
301+
subnet_ids = self._compute_resource_config.get("Networking", {}).get("SubnetIds", [])
301302
for subnet_id in subnet_ids:
302-
overrides.update({"InstanceType": instance_type["InstanceType"], "SubnetId": subnet_id})
303+
if self._uses_subnet_prioritization():
304+
overrides.update(
305+
{"InstanceType": instance_type["InstanceType"], "SubnetId": subnet_id, "Priority": priority}
306+
)
307+
else:
308+
overrides.update({"InstanceType": instance_type["InstanceType"], "SubnetId": subnet_id})
303309
template_overrides.append(copy.deepcopy(overrides))
310+
priority += 1.0
304311
return template_overrides
305312

306313
def _uses_single_instance_type(self):
@@ -312,6 +319,15 @@ def _uses_single_az(self):
312319
subnet_ids = self._compute_resource_config.get("Networking", {}).get("SubnetIds", [])
313320
return len(subnet_ids) == 1
314321

322+
def _uses_subnet_prioritization(self):
323+
return (
324+
self._compute_resource_config.get("AllocationStrategy") == "prioritized"
325+
and self._compute_resource_config["CapacityType"] == "on-demand"
326+
) or (
327+
self._compute_resource_config.get("AllocationStrategy") == "capacity-optimized-prioritized"
328+
and self._compute_resource_config["CapacityType"] == "spot"
329+
)
330+
315331
def _evaluate_launch_params(self, count):
316332
"""Evaluate parameters to be passed to create_fleet call."""
317333
try:

tests/common.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,56 @@ def client_error(error_code):
110110
"CapacityReservationId": "cr-234567",
111111
},
112112
},
113+
"queue-single-az": {
114+
"c5xlarge": {"Api": "run-instances", "Instances": [{"InstanceType": "c5.xlarge"}]},
115+
"fleet1": {
116+
"Api": "create-fleet",
117+
"Instances": [{"InstanceType": "t2.medium"}, {"InstanceType": "t2.large"}],
118+
"AllocationStrategy": "prioritized",
119+
"CapacityType": "on-demand",
120+
"Networking": MULTIPLE_SUBNETS,
121+
},
122+
},
123+
"queue-prioritized": {
124+
"c5xlarge": {"Api": "run-instances", "Instances": [{"InstanceType": "c5.xlarge"}]},
125+
"fleet1": {
126+
"Api": "create-fleet",
127+
"Instances": [{"InstanceType": "t2.medium"}, {"InstanceType": "t2.large"}],
128+
"AllocationStrategy": "prioritized",
129+
"CapacityType": "on-demand",
130+
"Networking": MULTIPLE_SUBNETS,
131+
},
132+
},
133+
"queue-capacity-optimized-prioritized": {
134+
"c5xlarge": {"Api": "run-instances", "Instances": [{"InstanceType": "c5.xlarge"}]},
135+
"fleet1": {
136+
"Api": "create-fleet",
137+
"Instances": [{"InstanceType": "t2.medium"}, {"InstanceType": "t2.large"}],
138+
"AllocationStrategy": "capacity-optimized-prioritized",
139+
"CapacityType": "spot",
140+
"Networking": MULTIPLE_SUBNETS,
141+
},
142+
},
143+
"queue-prioritized-all-or-nothing": {
144+
"c5xlarge": {"Api": "run-instances", "Instances": [{"InstanceType": "c5.xlarge"}]},
145+
"fleet1": {
146+
"Api": "create-fleet",
147+
"Instances": [{"InstanceType": "t2.medium"}],
148+
"AllocationStrategy": "prioritized",
149+
"CapacityType": "on-demand",
150+
"Networking": MULTIPLE_SUBNETS,
151+
},
152+
},
153+
"queue-capacity-optimized-prioritized-all-or-nothing": {
154+
"c5xlarge": {"Api": "run-instances", "Instances": [{"InstanceType": "c5.xlarge"}]},
155+
"fleet1": {
156+
"Api": "create-fleet",
157+
"Instances": [{"InstanceType": "t2.medium"}],
158+
"AllocationStrategy": "capacity-optimized-prioritized",
159+
"CapacityType": "spot",
160+
"Networking": MULTIPLE_SUBNETS,
161+
},
162+
},
113163
}
114164

115165
LAUNCH_OVERRIDES = {}

tests/slurm_plugin/test_fleet_manager.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,15 @@ class TestEc2CreateFleetManager:
391391
{},
392392
"All-or-Nothing is only available with single instance type compute resources or single subnet queues",
393393
),
394+
# Use "prioritized" Allocation Strategy AND Launch Override with Priority
395+
(5, "queue-prioritized", "fleet1", False, {}, None),
396+
# Use "capacity-optimized-prioritized" Allocation Strategy AND Launch Override with Priority
397+
(5, "queue-capacity-optimized-prioritized", "fleet1", False, {}, None),
398+
# Use "prioritized" Allocation Strategy AND Launch Override with Priority AND all_or_nothing is True
399+
(5, "queue-prioritized-all-or-nothing", "fleet1", True, {}, None),
400+
# Use "capacity-optimized-prioritized" Allocation Strategy
401+
# AND Launch Override with Priority AND all_or_nothing is True
402+
(5, "queue-capacity-optimized-prioritized-all-or-nothing", "fleet1", True, {}, None),
394403
],
395404
ids=[
396405
"fleet_spot",
@@ -402,6 +411,10 @@ class TestEc2CreateFleetManager:
402411
"fleet-multi-az-single-it-all_or_nothing",
403412
"fleet-multi-az-multi-it",
404413
"fleet-multi-az-multi-it-all_or_nothing",
414+
"prioritized",
415+
"capacity_optimized_prioritized",
416+
"prioritized_all_or_nothing",
417+
"capacity_optimized_prioritized_all_or_nothing",
405418
],
406419
)
407420
def test_evaluate_launch_params(
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"LaunchTemplateConfigs": [
3+
{
4+
"LaunchTemplateSpecification": {
5+
"LaunchTemplateName": "hit-queue-capacity-optimized-prioritized-fleet1",
6+
"Version": "$Latest"
7+
},
8+
"Overrides": [
9+
{
10+
"InstanceType": "t2.medium",
11+
"SubnetId": "1234567",
12+
"Priority": 0.0
13+
},
14+
{
15+
"InstanceType": "t2.medium",
16+
"SubnetId": "7654321",
17+
"Priority": 1.0
18+
},
19+
{
20+
"InstanceType": "t2.large",
21+
"SubnetId": "1234567",
22+
"Priority": 2.0
23+
},
24+
{
25+
"InstanceType": "t2.large",
26+
"SubnetId": "7654321",
27+
"Priority": 3.0
28+
}
29+
]
30+
}
31+
],
32+
"SpotOptions": {
33+
"SingleInstanceType": false,
34+
"SingleAvailabilityZone": false,
35+
"AllocationStrategy": "capacity-optimized-prioritized"
36+
},
37+
"TargetCapacitySpecification": {
38+
"TotalTargetCapacity": 5,
39+
"DefaultTargetCapacityType": "spot"
40+
},
41+
"Type": "instant"
42+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"LaunchTemplateConfigs": [
3+
{
4+
"LaunchTemplateSpecification": {
5+
"LaunchTemplateName": "hit-queue-capacity-optimized-prioritized-all-or-nothing-fleet1",
6+
"Version": "$Latest"
7+
},
8+
"Overrides": [
9+
{
10+
"InstanceType": "t2.medium",
11+
"SubnetId": "1234567",
12+
"Priority": 0.0
13+
},
14+
{
15+
"InstanceType": "t2.medium",
16+
"SubnetId": "7654321",
17+
"Priority": 1.0
18+
}
19+
]
20+
}
21+
],
22+
"SpotOptions": {
23+
"SingleInstanceType": true,
24+
"SingleAvailabilityZone": false,
25+
"AllocationStrategy": "capacity-optimized-prioritized",
26+
"MinTargetCapacity": 5
27+
},
28+
"TargetCapacitySpecification": {
29+
"TotalTargetCapacity": 5,
30+
"DefaultTargetCapacityType": "spot"
31+
},
32+
"Type": "instant"
33+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"LaunchTemplateConfigs": [
3+
{
4+
"LaunchTemplateSpecification": {
5+
"LaunchTemplateName": "hit-queue-prioritized-fleet1",
6+
"Version": "$Latest"
7+
},
8+
"Overrides": [
9+
{
10+
"InstanceType": "t2.medium",
11+
"SubnetId": "1234567",
12+
"Priority": 0.0
13+
},
14+
{
15+
"InstanceType": "t2.medium",
16+
"SubnetId": "7654321",
17+
"Priority": 1.0
18+
},
19+
{
20+
"InstanceType": "t2.large",
21+
"SubnetId": "1234567",
22+
"Priority": 2.0
23+
},
24+
{
25+
"InstanceType": "t2.large",
26+
"SubnetId": "7654321",
27+
"Priority": 3.0
28+
}
29+
]
30+
}
31+
],
32+
"OnDemandOptions": {
33+
"AllocationStrategy": "prioritized",
34+
"SingleInstanceType": false,
35+
"SingleAvailabilityZone": false,
36+
"CapacityReservationOptions": {
37+
"UsageStrategy": "use-capacity-reservations-first"
38+
}
39+
},
40+
"TargetCapacitySpecification": {
41+
"TotalTargetCapacity": 5,
42+
"DefaultTargetCapacityType": "on-demand"
43+
},
44+
"Type": "instant"
45+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"LaunchTemplateConfigs": [
3+
{
4+
"LaunchTemplateSpecification": {
5+
"LaunchTemplateName": "hit-queue-prioritized-all-or-nothing-fleet1",
6+
"Version": "$Latest"
7+
},
8+
"Overrides": [
9+
{
10+
"InstanceType": "t2.medium",
11+
"SubnetId": "1234567",
12+
"Priority": 0.0
13+
},
14+
{
15+
"InstanceType": "t2.medium",
16+
"SubnetId": "7654321",
17+
"Priority": 1.0
18+
}
19+
]
20+
}
21+
],
22+
"OnDemandOptions": {
23+
"AllocationStrategy": "prioritized",
24+
"SingleInstanceType": true,
25+
"SingleAvailabilityZone": false,
26+
"CapacityReservationOptions": {
27+
"UsageStrategy": "use-capacity-reservations-first"
28+
},
29+
"MinTargetCapacity": 5
30+
},
31+
"TargetCapacitySpecification": {
32+
"TotalTargetCapacity": 5,
33+
"DefaultTargetCapacityType": "on-demand"
34+
},
35+
"Type": "instant"
36+
}

0 commit comments

Comments
 (0)