forked from Azure/az-hop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.tpl.yml
267 lines (256 loc) · 11 KB
/
config.tpl.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
---
# azure location name as returned by the command : az account list-locations -o table
location: westeurope
# Name of the resource group to create all resources
resource_group: azhop
# If using an existing resource group set to true. Default is false
use_existing_rg: false
# Additional tags to be added on the Resource Group
tags:
env: dev
project: azhop
# Define an ANF account, single pool and volume
# If not present assume that there is an existing NFS share for the users home directory
anf:
# Size of the ANF pool and unique volume
homefs_size_tb: 4
# Service level of the ANF volume, can be: Standard, Premium, Ultra
homefs_service_level: Standard
# dual protocol
dual_protocol: false # true to enable SMB support. false by default
mounts:
# mount settings for the user home directory
home:
mountpoint: /anfhome # /sharedhome for example
server: '{{anf_home_ip}}' # Specify an existing NFS server name or IP, when using the ANF built in use '{{anf_home_ip}}'
export: '{{anf_home_path}}' # Specify an existing NFS export directory, when using the ANF built in use '{{anf_home_path}}'
# name of the admin account
admin_user: hpcadmin
# Object ID to grant key vault read access
key_vault_readers: #<object_id>
# Network
network:
# Create Network and Application Security Rules, true by default, false when using an existing VNET if not specified
create_nsg: true
vnet:
name: hpcvnet # Optional - default to hpcvnet
id: # If a vnet id is set then no network will be created and the provided vnet will be used
address_space: "10.0.0.0/16" # Optional - default to "10.0.0.0/16"
# When using an existing VNET, only the subnet names will be used and not the adress_prefixes
subnets: # all subnets are optionals
# name values can be used to rename the default to specific names, address_prefixes to change the IP ranges to be used
# All values below are the default values
frontend:
name: frontend
address_prefixes: "10.0.0.0/24"
create: true # create the subnet if true. default to true when not specified, default to false if using an existing VNET when not specified
admin:
name: admin
address_prefixes: "10.0.1.0/24"
create: true
netapp:
name: netapp
address_prefixes: "10.0.2.0/24"
create: true
ad:
name: ad
address_prefixes: "10.0.3.0/28"
create: true
# Bastion and Gateway subnets are optional and can be added if a Bastion or a VPN need to be created in the environment
# bastion: # Bastion subnet name is always fixed to AzureBastionSubnet
# address_prefixes: "10.0.4.0/27" # CIDR minimal range must be /27
# create: true
# gateway: # Gateway subnet name is always fixed to GatewaySubnet
# address_prefixes: "10.0.4.32/27" # Recommendation is to use /27 or /28 network
# create: true
compute:
name: compute
address_prefixes: "10.0.16.0/20"
create: true
# peering: # This list is optional, and can be used to create VNet Peerings in the same subscription.
# - vnet_name: #"VNET Name to Peer to"
# vnet_resource_group: #"Resource Group of the VNET to peer to"
# Specify DNS forwarders available in the network
# dns:
# forwarders:
# - { name: foo.com, ips: "10.2.0.4, 10.2.0.5" }
# When working in a locked down network, uncomment and fill out this section
locked_down_network:
enforce: false
# grant_access_from: [a.b.c.d] # Array of CIDR to grant access from, see https://docs.microsoft.com/en-us/azure/storage/common/storage-network-security?tabs=azure-portal#grant-access-from-an-internet-ip-range
public_ip: true # Enable public IP creation for Jumpbox, OnDemand and create images. Default to true
# Jumpbox VM configuration
jumpbox:
vm_size: Standard_B2ms
#ssh_port: 2222 # SSH port used on the public IP, default to 22
# Active directory VM configuration
ad:
vm_size: Standard_B2ms
# On demand VM configuration
ondemand:
vm_size: Standard_D4s_v5
#fqdn: azhop.foo.com # When provided it will be used for the certificate server name
generate_certificate: true # Generate an SSL certificate for the OnDemand portal. Default to true
# Grafana VM configuration
grafana:
vm_size: Standard_B2ms
# Scheduler VM configuration
scheduler:
vm_size: Standard_B2ms
# CycleCloud VM configuration
cyclecloud:
vm_size: Standard_B2ms
# Azure Image Reference for CycleCloud. Default to 8.2.120211111 if not present
image:
publisher: "azurecyclecloud"
offer: "azure-cyclecloud"
sku: "cyclecloud8"
version: "8.2.120211111"
# Azure Image Plan for CycleCloud. Default to 8 if not present
plan:
name: "cyclecloud8"
publisher: "azurecyclecloud"
product: "azure-cyclecloud"
# uncomment if updated RPMS need to be applied
# rpms:
# optional URL to apply a fix on the marketplace image deployed on the ccportal
# cyclecloud:
# mandatory URL on the jetpack RPM to be installed on the ccportal and the scheduler
# jetpack:
winviz:
vm_size: Standard_D4s_v3
create: false # Create an always running windows node, false by default
# Lustre cluster configuration
lustre:
rbh_sku: "Standard_D8d_v4"
mds_sku: "Standard_D8d_v4"
oss_sku: "Standard_D32d_v4"
oss_count: 2
hsm_max_requests: 8
mdt_device: "/dev/sdb"
ost_device: "/dev/sdb"
hsm:
# optional to use existing storage for the archive
# if not included it will use the azhop storage account that is created
storage_account: #existing_storage_account_name
storage_container: #only_used_with_existing_storage_account
# List of users to be created on this environment
users:
# name: username
# uid: uniqueid
# gid: 5000
# shell: /bin/bash # default to /bin/bash
# home: /anfhome/<user_name> # default to /homedir_mountpoint/user_name
# admin: false # true will allow user to have cluster admin privilege - false by default
# sudo: true # Allow sudo access on cluster compute nodes - false by default
- { name: clusteradmin, uid: 10001, gid: 5000, admin: true, sudo: true }
- { name: clusteruser, uid: 10002, gid: 5000 }
groups: # Not used today => To be used in the future
- name: users
gid: 5000
# scheduler to be installed and configured (openpbs, slurm)
queue_manager: openpbs
# Specific SLURM configuration
slurm:
# Enable SLURM accounting, this will create a SLURM accounting database in a managed MySQL server instance
accounting_enabled: false
# Enable container support for SLURM using Enroot/Pyxis (global switch)
# Each queue with container support must have its own enroot_enabled switch set to true
enroot_enabled: false
# Authentication configuration for accessing the az-hop portal
# Default is basic authentication. For oidc authentication you have to specify the following values
# The OIDCClient secret need to be stored as a secret named <oidc-client-id>-password in the keyvault used by az-hop
authentication:
httpd_auth: basic # oidc or basic
# User mapping https://osc.github.io/ood-documentation/latest/reference/files/ood-portal-yml.html#ood-portal-generator-user-map-match
# You can specify either a map_match or a user_map_cmd
# Domain users are mapped to az-hop users with the same name and without the domain name
# user_map_match: '^([^@]+)@mydomain.foo$'
# If using a custom mapping script, update it from the ./playbooks/files directory before running the playbook
# user_map_cmd: /opt/ood/ood_auth_map/bin/custom_mapping.sh
# ood_auth_openidc:
# OIDCProviderMetadataURL: # for AAD use 'https://sts.windows.net/{{tenant_id}}/.well-known/openid-configuration'
# OIDCClientID: 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX'
# OIDCRemoteUserClaim: # for AAD use 'upn'
# OIDCScope: # for AAD use 'openid profile email groups'
# OIDCPassIDTokenAs: # for AAD use 'serialized'
# OIDCPassRefreshToken: # for AAD use 'On'
# OIDCPassClaimsAs: # for AAD use 'environment'
# List of images to be defined
images:
# - name: image_definition_name # Should match the packer configuration file name, one per packer file
# publisher: azhop
# offer: CentOS
# sku: 7_9-gen2
# hyper_v: V2 # V1 or V2 (V1 is the default)
# os_type: Linux # Linux or Windows
# version: 7.9 # Version of the image to create the image definition in SIG
# Pre-defined images
- name: azhop-centos79-v2-rdma-gpgpu
publisher: azhop
offer: CentOS
sku: 7.9-gen2
hyper_v: V2
os_type: Linux
version: 7.9
- name: centos-7.8-desktop-3d
publisher: azhop
offer: CentOS
sku: 7_8
hyper_v: V1
os_type: Linux
version: 7.8
- name: azhop-ubuntu18.04
publisher: azhop
offer: Ubuntu
sku: 10_04
hyper_v: V2
os_type: Linux
version: 18.04
# List of queues (node arays in Cycle) to be defined
queues:
- name: execute # name of the Cycle Cloud node array
# Azure VM Instance type
vm_size: Standard_F2s_v2
# maximum number of cores that can be instanciated
max_core_count: 1024
# marketplace image name or custom image id
# image: OpenLogic:CentOS-HPC:7_9-gen2:latest
image: /subscriptions/{{subscription_id}}/resourceGroups/{{resource_group}}/providers/Microsoft.Compute/galleries/{{sig_name}}/images/azhop-centos79-v2-rdma-gpgpu/latest
# Set to true if AccelNet need to be enabled. false is the default value
EnableAcceleratedNetworking: false
# spot instance support. Default is false
spot: false
# Set to false to disable creation of placement groups (for SLURM only). Default is true
ColocateNodes: false
# Set to true to enable Enroot for this partition
enroot_enabled: false
- name: hc44rs
vm_size: Standard_HC44rs
max_core_count: 440
image: /subscriptions/{{subscription_id}}/resourceGroups/{{resource_group}}/providers/Microsoft.Compute/galleries/{{sig_name}}/images/azhop-centos79-v2-rdma-gpgpu/latest
spot: true
- name: hb120v2
vm_size: Standard_HB120rs_v2
max_core_count: 1200
image: /subscriptions/{{subscription_id}}/resourceGroups/{{resource_group}}/providers/Microsoft.Compute/galleries/{{sig_name}}/images/azhop-centos79-v2-rdma-gpgpu/latest
spot: true
- name: hb120v3
vm_size: Standard_HB120rs_v3
max_core_count: 1200
image: /subscriptions/{{subscription_id}}/resourceGroups/{{resource_group}}/providers/Microsoft.Compute/galleries/{{sig_name}}/images/azhop-centos79-v2-rdma-gpgpu/latest
spot: true
# Queue dedicated to GPU remote viz nodes. This name is fixed and can't be changed
- name: viz3d
vm_size: Standard_NV6
max_core_count: 24
image: /subscriptions/{{subscription_id}}/resourceGroups/{{resource_group}}/providers/Microsoft.Compute/galleries/{{sig_name}}/images/centos-7.8-desktop-3d/latest
ColocateNodes: false
spot: false
# Queue dedicated to non GPU remote viz nodes. This name is fixed and can't be changed
- name: viz
vm_size: Standard_D8s_v3
max_core_count: 200
image: /subscriptions/{{subscription_id}}/resourceGroups/{{resource_group}}/providers/Microsoft.Compute/galleries/{{sig_name}}/images/centos-7.8-desktop-3d/latest
ColocateNodes: false
spot: false