-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplayground
109 lines (92 loc) · 2.89 KB
/
playground
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
[global]
cluster_template = playground
update_check = true
sanity_check = true
[aws]
aws_region_name = us-east-2
[aliases]
ssh = ssh {CFN_USER}@{MASTER_IP} {ARGS}
[cluster playground]
key_name = playground-cluster
scheduler = slurm
master_instance_type = c5.large
base_os = ubuntu2004
s3_read_resource = arn:aws:s3:::aws-parallel-cluster-slurm/playground*
post_install = s3://aws-parallel-cluster-slurm/playground/post_install.bash
vpc_settings = pc-vpc-pubpriv
queue_settings = cpu-compute-spot, cpu-compute-ondemand, gpu-compute-spot, gpu-compute-ondemand
scaling_settings = custom
ebs_settings = shared
custom_ami = ami-0ee10598e9d5ebad0 # our AMI: ubuntu-pcluster-cuda-enroot-pyxis-0.3.2 builded with packer/aws-ubuntu.pkr.hcl
# Our AMI requires 40GB
master_root_volume_size = 40
compute_root_volume_size = 40
[vpc pc-vpc-pubpriv]
vpc_id = vpc-0a2f625228af902a9
master_subnet_id = subnet-055a4a2a3d57187d3
compute_subnet_id = subnet-0624435f202eb2e11
use_public_ips = false
# The first [queue] section listed is the default scheduler queue.
# https://docs.aws.amazon.com/parallelcluster/latest/ug/cluster-definition.html#queue-settings
[queue cpu-compute-spot]
enable_efa = false
enable_efa_gdr = false
compute_type = spot
compute_resource_settings = cpu-spot
disable_hyperthreading = true
[queue cpu-compute-ondemand]
enable_efa = false
enable_efa_gdr = false
compute_type = ondemand
compute_resource_settings = cpu-ondemand
disable_hyperthreading = true
[queue gpu-compute-ondemand]
enable_efa = false
enable_efa_gdr = false
compute_type = ondemand
compute_resource_settings = gpu-t4x1-ondemand, gpu-t4x4-ondemand
disable_hyperthreading = true
# placement_group = DYNAMIC # https://docs.aws.amazon.com/parallelcluster/latest/ug/queue-section.html#queue-placement-group
[queue gpu-compute-spot]
enable_efa = false
enable_efa_gdr = false
compute_type = spot
compute_resource_settings = gpu-t4x1-spot, gpu-t4x4-spot
disable_hyperthreading = true
# placement_group = DYNAMIC # https://docs.aws.amazon.com/parallelcluster/latest/ug/queue-section.html#queue-placement-group
[compute_resource cpu-ondemand]
initial_count = 0
instance_type = t3.micro
max_count = 4
[compute_resource cpu-spot]
initial_count = 0
instance_type = c5.large
max_count = 4
spot_price = 0.0195
[compute_resource gpu-t4x1-ondemand]
initial_count = 0
instance_type = g4dn.xlarge
max_count = 4
[compute_resource gpu-t4x4-ondemand]
initial_count = 0
instance_type = g4dn.12xlarge
max_count = 4
[compute_resource gpu-t4x1-spot]
initial_count = 0
instance_type = g4dn.xlarge
max_count = 4
spot_price = 0.16
[compute_resource gpu-t4x4-spot]
initial_count = 0
instance_type = g4dn.12xlarge
max_count = 4
spot_price = 1.2
[scaling custom]
scaledown_idletime = 10
[ebs shared]
shared_dir = /shared
volume_type = st1
volume_size = 500
[cw_log playground-cw-log]
enable = true # https://docs.aws.amazon.com/parallelcluster/latest/ug/cw-log-section.html
retention_days = 14