-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate-cluster-template.yaml
81 lines (80 loc) · 2.52 KB
/
create-cluster-template.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
Region: us-west-2
Image:
Os: alinux2
CustomAmi: $ami_id
HeadNode:
InstanceType: c5.4xlarge
Ssh:
KeyName: pcluster-key
Iam:
## (Un)comment S3 Access and provide one of your S3 buckets name
## https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html#HeadNode-v3-Iam
S3Access:
- BucketName: pcluster-ml-workshop
EnableWriteAccess: true
AdditionalIamPolicies:
- Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
- Policy: arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
Networking:
SubnetId: ${public_subnet_id}
LocalStorage:
RootVolume:
Size: 100
SharedStorage:
# install your shared applications in /apps, it can be archived through
# a snapshot that you can reuse with other clusters.
- Name: SharedEBS
StorageType: Ebs
MountDir: /apps
EbsSettings:
VolumeType: gp3
Size: 200
Throughput: 300
Iops: 6000
- Name: FsxLustre0
StorageType: FsxLustre
MountDir: /fsx
FsxLustreSettings:
StorageCapacity: 4800
DeploymentType: PERSISTENT_1
PerUnitStorageThroughput: 200
DataCompressionType: LZ4
## Review the following doc:
## https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html
## If local storage is needed this can be configured as required
Scheduling:
Scheduler: slurm
SlurmQueues:
- Name: train-g4dn-2xl
ComputeSettings:
LocalStorage:
# NVMe drives will be set in RAID0
EphemeralVolume:
MountDir: /local_scratch
# This is your root volume
RootVolume:
Size: 200
## We set the MinCount default to 0 to confirm that we get the
## instances from our ODCR when running a job. Then we change MinCount
## to 16 in order to avoid scale down
ComputeResources:
- MinCount: 2
MaxCount: 6
InstanceType: g4dn.2xlarge
Name: train-g4dn-2xl
Efa:
Enabled: false
GdrSupport: false
Networking:
PlacementGroup:
Enabled: true
SubnetIds:
- ${private_subnet_id}
## for IAM see this doc:
## https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html#Scheduling-v3-SlurmQueues-Iam
Iam:
S3Access:
- BucketName: pcluster-ml-workshop
AdditionalIamPolicies:
- Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
- Policy: arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly