Skip to content

Commit 05b38b6

Browse files
committed
feat: expand pool across full region to minimize capacity issues
Signed-off-by: Adrian Riobo <[email protected]>
1 parent ac07129 commit 05b38b6

File tree

22 files changed

+215
-159
lines changed

22 files changed

+215
-159
lines changed

cmd/mapt/cmd/aws/services/mac-pool.go

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,8 @@ func houseKeep() *cobra.Command {
164164
MaxSize: viper.GetInt(paramMaxSize),
165165
},
166166
Machine: &macpool.MachineRequestArgs{
167-
VPCID: viper.GetString(paramVPCID),
168-
AZID: viper.GetString(paramAZID),
169-
SubnetID: viper.GetString(paramSubnetID),
170-
SSHSGID: viper.GetString(paramSSHSGID)},
167+
VPCID: viper.GetString(paramVPCID),
168+
SSHSGID: viper.GetString(paramSSHSGID)},
171169
}); err != nil {
172170
logging.Error(err)
173171
}
@@ -184,8 +182,6 @@ func houseKeep() *cobra.Command {
184182
flagSet.StringP(awsParams.MACOSVersion, "", awsParams.MACOSVersion, awsParams.MACOSVersionDefault)
185183
flagSet.Bool(params.Serverless, false, params.ServerlessDesc)
186184
flagSet.StringP(paramVPCID, "", paramVPCIDDefault, paramVPCIDDesc)
187-
flagSet.StringP(paramAZID, "", paramAZIDDefault, paramAZIDDesc)
188-
flagSet.StringP(paramSubnetID, "", paramSubnetIDDefault, paramSubnetIDDesc)
189185
flagSet.StringP(paramSSHSGID, "", paramSSHSGIDDefault, paramSSHSGIDDesc)
190186
c.PersistentFlags().AddFlagSet(flagSet)
191187
return c
@@ -237,10 +233,8 @@ func request() *cobra.Command {
237233
Architecture: viper.GetString(awsParams.MACArch),
238234
OSVersion: viper.GetString(awsParams.MACOSVersion),
239235
Machine: &macpool.MachineRequestArgs{
240-
VPCID: viper.GetString(paramVPCID),
241-
AZID: viper.GetString(paramAZID),
242-
SubnetID: viper.GetString(paramSubnetID),
243-
SSHSGID: viper.GetString(paramSSHSGID),
236+
VPCID: viper.GetString(paramVPCID),
237+
SSHSGID: viper.GetString(paramSSHSGID),
244238
},
245239
Ticket: viper.GetString(paramTicket),
246240
Timeout: viper.GetString(params.Timeout),
@@ -258,8 +252,6 @@ func request() *cobra.Command {
258252
flagSet.StringP(awsParams.MACOSVersion, "", awsParams.MACOSVersion, awsParams.MACOSVersionDefault)
259253
flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc)
260254
flagSet.StringP(paramVPCID, "", paramVPCIDDefault, paramVPCIDDesc)
261-
flagSet.StringP(paramAZID, "", paramAZIDDefault, paramAZIDDesc)
262-
flagSet.StringP(paramSubnetID, "", paramSubnetIDDefault, paramSubnetIDDesc)
263255
flagSet.StringP(paramSSHSGID, "", paramSSHSGIDDefault, paramSSHSGIDDesc)
264256
flagSet.StringP(paramTicket, "", paramTicketDefault, paramTicketDesc)
265257
flagSet.Bool(params.Serverless, false, params.ServerlessDesc)
@@ -287,10 +279,8 @@ func release() *cobra.Command {
287279
Remote: viper.IsSet(params.Remote),
288280
},
289281
&macpool.MachineRequestArgs{
290-
VPCID: viper.GetString(paramVPCID),
291-
AZID: viper.GetString(paramAZID),
292-
SubnetID: viper.GetString(paramSubnetID),
293-
SSHSGID: viper.GetString(paramSSHSGID),
282+
VPCID: viper.GetString(paramVPCID),
283+
SSHSGID: viper.GetString(paramSSHSGID),
294284
},
295285
viper.GetString(paramTicket)); err != nil {
296286
logging.Error(err)
@@ -300,8 +290,6 @@ func release() *cobra.Command {
300290
}
301291
flagSet := pflag.NewFlagSet(awsParams.MACReleaseCmd, pflag.ExitOnError)
302292
flagSet.StringP(paramVPCID, "", paramVPCIDDefault, paramVPCIDDesc)
303-
flagSet.StringP(paramAZID, "", paramAZIDDefault, paramAZIDDesc)
304-
flagSet.StringP(paramSubnetID, "", paramSubnetIDDefault, paramSubnetIDDesc)
305293
flagSet.StringP(paramSSHSGID, "", paramSSHSGIDDefault, paramSSHSGIDDesc)
306294
flagSet.StringP(paramTicket, "", paramTicketDefault, paramTicketDesc)
307295
flagSet.Bool(params.Serverless, false, params.ServerlessDesc)

pkg/provider/aws/action/mac-pool/housekeeper.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,8 @@ func (r *HouseKeepRequestArgs) fillHostRequest() *macHost.PoolMacDedicatedHostRe
8989
Prefix: r.Pool.Prefix,
9090
Architecture: r.Pool.Architecture,
9191
// FixedLocation: r.FixedLocation,
92-
VPCID: &r.Machine.VPCID,
93-
AZID: &r.Machine.AZID,
94-
SubnetID: &r.Machine.SubnetID,
95-
SSHSGID: &r.Machine.SSHSGID,
92+
VPCID: &r.Machine.VPCID,
93+
SSHSGID: &r.Machine.SSHSGID,
9694
},
9795
PoolID: &macHost.PoolID{
9896
PoolName: r.Pool.Name,

pkg/provider/aws/action/mac-pool/mac-pool.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ func Create(ctx *maptContext.ContextArgs, r *PoolRequestArgs) error {
4242
}
4343

4444
func Destroy(ctx *maptContext.ContextArgs) (err error) {
45-
logging.Debug("Run fedora destroy")
45+
logging.Debug("Run mac pool destroy")
4646
// Create mapt Context
4747
if err := maptContext.Init(ctx, aws.Provider()); err != nil {
4848
return err
@@ -97,10 +97,6 @@ func (r *PoolRequestArgs) deploy(ctx *pulumi.Context) error {
9797
MaxSize: r.MaxSize,
9898
})
9999
return err
100-
// if err != nil {
101-
// return err
102-
// }
103-
// return p.RunRemoteHouseKeep(&r.Name, &r.Architecture, &r.OSVersion, &r.OfferedCapacity, &r.MaxSize)
104100
}
105101

106102
func (r *PoolRequestArgs) results(stackResult auto.UpResult) error {

pkg/provider/aws/action/mac-pool/request.go

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,8 @@ func request(ctx *maptContext.ContextArgs, r *RequestMachineArgs) error {
4242
Version: *hi.OSVersion,
4343
Architecture: *hi.Arch,
4444
VPCID: &r.Machine.VPCID,
45-
// Availability zone is not needed cause it will be picked
46-
// from dedicated host
47-
// AvailabilityZone: &r.Machine.AZID,
48-
SubnetID: &r.Machine.SubnetID,
49-
SSHSGID: &r.Machine.SSHSGID,
50-
Timeout: r.Timeout,
45+
SSHSGID: &r.Machine.SSHSGID,
46+
Timeout: r.Timeout,
5147
}
5248

5349
// TODO here we would change based on the integration-mode requested
@@ -94,8 +90,8 @@ func (r *HouseKeepRequestArgs) fillMacRequest() *macMachine.Request {
9490
Architecture: r.Pool.Architecture,
9591
Version: r.Pool.OSVersion,
9692
// Network and Security
97-
VPCID: &r.Machine.VPCID,
98-
SubnetID: &r.Machine.SubnetID,
99-
SSHSGID: &r.Machine.SSHSGID,
93+
VPCID: &r.Machine.VPCID,
94+
// SubnetID: &r.Machine.SubnetID,
95+
SSHSGID: &r.Machine.SSHSGID,
10096
}
10197
}

pkg/provider/aws/action/mac-pool/types.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ type PoolRequestArgs struct {
2828

2929
// Custom values to setup within machines in the cluster
3030
type MachineRequestArgs struct {
31-
VPCID string
32-
AZID string
33-
SubnetID string
34-
SSHSGID string
31+
VPCID string
32+
// This values now will be calculated
33+
// AZID string
34+
// SubnetID string
35+
SSHSGID string
3536
}
3637

3738
type HouseKeepRequestArgs struct {

pkg/provider/aws/aws.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ func (a *AWS) Custom(ctx *pulumi.Context) (*pulumi.ProviderResource, error) {
4444
&awsConfig.ProviderArgs{
4545
SkipCredentialsValidation: pulumi.Bool(true),
4646
SkipRequestingAccountId: pulumi.Bool(true),
47+
MaxRetries: pulumi.Int(1),
4748
})
4849
if err != nil {
4950
return nil, err

pkg/provider/aws/data/azs.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ func GetRandomAvailabilityZone(region string, excludedAZs []string) (*string, er
2626
return azs[util.Random(len(azs)-1, 0)].ZoneName, nil
2727
}
2828

29-
func GetAvailabilityZones() []string {
30-
azs, err := describeAvailabilityZones("")
29+
func GetAvailabilityZones(regionName string) []string {
30+
azs, err := describeAvailabilityZones(regionName)
3131
if err != nil {
3232
logging.Error(err)
3333
return nil

pkg/provider/aws/data/network.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,32 @@ func isPublic(client *ec2.Client, subnetID string) error {
117117
}
118118
return fmt.Errorf("no public subnet setup found")
119119
}
120+
121+
func GetSubnetID(region, vpcID, azID *string) (*string, error) {
122+
cfg, err := getConfig(*region)
123+
if err != nil {
124+
return nil, err
125+
}
126+
ec2Client := ec2.NewFromConfig(cfg)
127+
output, err := ec2Client.DescribeSubnets(
128+
context.TODO(),
129+
&ec2.DescribeSubnetsInput{
130+
Filters: []ec2types.Filter{
131+
{
132+
Name: aws.String("vpc-id"),
133+
Values: []string{*vpcID},
134+
},
135+
{
136+
Name: aws.String("availability-zone-id"),
137+
Values: []string{*azID},
138+
},
139+
},
140+
})
141+
if err != nil {
142+
return nil, err
143+
}
144+
if len(output.Subnets) != 1 {
145+
return nil, fmt.Errorf("expected one subnet, found %d", len(output.Subnets))
146+
}
147+
return output.Subnets[0].SubnetId, nil
148+
}

pkg/provider/aws/modules/mac/host/host.go

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package host
33
import (
44
"fmt"
55
"maps"
6+
"strings"
67

78
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/ec2"
89
"github.com/pulumi/pulumi/sdk/v3/go/auto"
@@ -57,21 +58,11 @@ func createDedicatedHost(args *MacDedicatedHostRequestArgs,
5758
if err != nil {
5859
return nil, err
5960
}
60-
// pick random az from region ensuring machine is offered (sometimes machines are not offered on each az from a region)
61-
dHArgs.availabilityZone, err = getAZ(*dHArgs.region, args.Architecture)
61+
// We will try on each Az in case we do not have capacity
62+
sr, err := retryCreateStack(&dHArgs, &backedURL)
6263
if err != nil {
6364
return nil, err
6465
}
65-
cs := manager.Stack{
66-
StackName: maptContext.StackNameByProject(mac.StackDedicatedHost),
67-
ProjectName: maptContext.ProjectName(),
68-
BackedURL: backedURL,
69-
ProviderCredentials: aws.GetClouProviderCredentials(
70-
map[string]string{
71-
awsConstants.CONFIG_AWS_REGION: *dHArgs.region}),
72-
DeployFunc: dHArgs.deploy,
73-
}
74-
sr, _ := manager.UpStack(cs)
7566
dhID, _, err := manageResultsDedicatedHost(sr, dHArgs.prefix, exportOutputs)
7667
if err != nil {
7768
return nil, err
@@ -86,6 +77,41 @@ func createDedicatedHost(args *MacDedicatedHostRequestArgs,
8677
return
8778
}
8879

80+
func retryCreateStack(dHArgs *dedicatedHostArgs, backedURL *string) (sr auto.UpResult, err error) {
81+
created := false
82+
azs := data.GetAvailabilityZones(*dHArgs.region)
83+
for i := 0; created || i < len(azs); i++ {
84+
// for _, az := range data.GetAvailabilityZones(*dHArgs.region) {
85+
dHArgs.availabilityZone = &azs[i]
86+
cs := manager.Stack{
87+
StackName: maptContext.StackNameByProject(mac.StackDedicatedHost),
88+
ProjectName: maptContext.ProjectName(),
89+
BackedURL: *backedURL,
90+
ProviderCredentials: aws.GetClouProviderCredentials(
91+
map[string]string{
92+
awsConstants.CONFIG_AWS_REGION: *dHArgs.region}),
93+
DeployFunc: dHArgs.deploy,
94+
}
95+
sr, err = manager.UpStack(cs)
96+
if err != nil {
97+
if isCapacityError(err) {
98+
break
99+
}
100+
return
101+
}
102+
created = true
103+
}
104+
if !created {
105+
err = fmt.Errorf("currently no AZ on %s has capacity", *dHArgs.region)
106+
}
107+
return
108+
}
109+
110+
func isCapacityError(err error) bool {
111+
return strings.Contains(err.Error(), "Insufficient") ||
112+
strings.Contains(err.Error(), "capacity")
113+
}
114+
89115
// this function will create the dedicated host resource
90116
func (r *dedicatedHostArgs) deploy(ctx *pulumi.Context) (err error) {
91117
ctx.Export(fmt.Sprintf("%s-%s", r.prefix, outputRegion), pulumi.String(*r.region))

pkg/provider/aws/modules/mac/host/util.go

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ func GetHostInformation(h ec2Types.Host) *mac.HostInformation {
8383
RunID: getTagValue(h.Tags, maptContext.TagKeyRunID),
8484
Region: &region,
8585
Host: &h,
86+
AzId: &az,
8687
PoolName: getTagValue(h.Tags, macConstants.TagKeyPoolName),
8788
}
8889
}
@@ -126,25 +127,3 @@ func getRegion(arch string, fixedLocation bool) (*string, error) {
126127
return data.LokupRegionOfferingInstanceType(
127128
mac.TypesByArch[arch])
128129
}
129-
130-
// Get a random AZ from the requested region, it ensures the az offers the instance type
131-
func getAZ(region, arch string) (az *string, err error) {
132-
isOffered := false
133-
var excludedAZs []string
134-
for !isOffered {
135-
az, err = data.GetRandomAvailabilityZone(region, excludedAZs)
136-
if err != nil {
137-
return nil, err
138-
}
139-
isOffered, err = data.IsInstanceTypeOfferedByAZ(
140-
region,
141-
mac.TypesByArch[arch], *az)
142-
if err != nil {
143-
return nil, err
144-
}
145-
if !isOffered {
146-
excludedAZs = append(excludedAZs, *az)
147-
}
148-
}
149-
return
150-
}

pkg/provider/aws/modules/mac/machine/machine.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,11 @@ func (r *Request) deployerMachine(ctx *pulumi.Context) error {
198198
if err != nil {
199199
return err
200200
}
201+
} else {
202+
r.subnetID, err = data.GetSubnetID(r.Region, r.VPCID, r.AvailabilityZone)
203+
if err != nil {
204+
return err
205+
}
201206
}
202207
// Create Keypair
203208
machineKeyPair := keypair.KeyPairRequest{
@@ -353,10 +358,7 @@ func (r *Request) instance(ctx *pulumi.Context,
353358
securityGroups pulumi.StringArray,
354359
) (*ec2.Instance, error) {
355360
instanceArgs := ec2.InstanceArgs{
356-
HostId: pulumi.String(*r.dedicatedHost.Host.HostId),
357-
SubnetId: util.If[pulumi.StringPtrInput](r.SubnetID != nil,
358-
pulumi.String(*r.SubnetID),
359-
subnet.ID()),
361+
HostId: pulumi.String(*r.dedicatedHost.Host.HostId),
360362
Ami: pulumi.String(*ami.Image.ImageId),
361363
InstanceType: pulumi.String(mac.TypesByArch[r.Architecture]),
362364
KeyName: keyResources.AWSKeyPair.KeyName,
@@ -367,6 +369,11 @@ func (r *Request) instance(ctx *pulumi.Context,
367369
},
368370
Tags: maptContext.ResourceTags(),
369371
}
372+
if subnet != nil {
373+
instanceArgs.SubnetId = subnet.ID()
374+
} else {
375+
instanceArgs.SubnetId = pulumi.String(*r.subnetID)
376+
}
370377
if r.Airgap {
371378
instanceArgs.AssociatePublicIpAddress = pulumi.Bool(false)
372379
}

pkg/provider/aws/modules/mac/machine/types.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ type Request struct {
1818
// If timeout is set a severless scheduled task will be created to self destroy the resources
1919
Timeout string
2020
// Network and Security
21-
VPCID *string
22-
SubnetID *string
23-
SSHSGID *string
21+
VPCID *string
22+
SSHSGID *string
23+
24+
subnetID *string
2425
// For airgap scenario there is an orchestation of
2526
// a phase with connectivity on the machine (allowing bootstraping)
2627
// a pahase with connectivyt off where the subnet for the target lost the nat gateway

0 commit comments

Comments
 (0)