Skip to content

Commit 0aaa259

Browse files
authored
[LoginNodes] Add DCV support for login nodes (aws#2780)
* Update platform cookbook to support DCV on login nodes * Updated `dcv.rb` to activate dcv on the login node. * Updated `supervisord_config` to set `dcv_installed?` to be true when dcv is enabled on a login node. * Updated `parallelcluster_supervisord.conf.erb` to include `pcluster_dcv_authenticator` when dcv is configured on a login node. * Modify _dcv_common to support DCV on login node * Update cloudwatch config DCV logs to include login nodes * Allow DCV instance metadata access when enabled on login node * Rename the DCV sessionID file to be the instance hostname This is required to have multiple session files within the shared directory. * Update supervisord_config_spec to test DCV on login node * Update kitchen test and spec tests for login node dcv * Added `dcv_enabled` attribute to the login node log rotation in `kitchen.platform-config.yml`. * Updated `log_rotation_spec` to include dcv logs for login nodes. * Update CHANGELOG
1 parent 5907a2f commit 0aaa259

File tree

12 files changed

+80
-26
lines changed

12 files changed

+80
-26
lines changed

CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ This file is used to list changes made in each version of the AWS ParallelCluste
77
------
88

99
**ENHANCEMENTS**
10-
- Allow custom actions on Login Nodes.
10+
- Allow custom actions on login nodes.
11+
- Allow DCV connection on login nodes.
1112

1213
**BUG FIXES**
1314
- Fix EFA kmod installation with RHEL 8.10 or newer.

cookbooks/aws-parallelcluster-environment/attributes/environment.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# IMDS
1717
default['cluster']['head_node_imds_secured'] = 'true'
1818
default['cluster']['head_node_imds_allowed_users'] = ['root', node['cluster']['cluster_admin_user'], node['cluster']['cluster_user'] ]
19-
default['cluster']['head_node_imds_allowed_users'].append('dcv') if node['cluster']['dcv_enabled'] == 'head_node' && dcv_installed?
19+
default['cluster']['head_node_imds_allowed_users'].append('dcv') if (node['cluster']['dcv_enabled'] == 'head_node' || node['cluster']['dcv_enabled'] == 'login_node') && dcv_installed?
2020

2121
# ParallelCluster internal variables to configure active directory service
2222
default['cluster']["directory_service"]["domain_name"] = nil

cookbooks/aws-parallelcluster-environment/files/cloudwatch/cloudwatch_agent_config.json

+21-14
Original file line numberDiff line numberDiff line change
@@ -296,12 +296,13 @@
296296
],
297297
"platforms": {{ default_platforms | tojson}},
298298
"node_roles": [
299-
"HeadNode"
299+
"HeadNode",
300+
"LoginNode"
300301
],
301302
"feature_conditions": [
302303
{
303304
"dna_key": "dcv_enabled",
304-
"satisfying_values": ["head_node"]
305+
"satisfying_values": ["head_node", "login_node"]
305306
}
306307
]
307308
},
@@ -379,12 +380,13 @@
379380
],
380381
"platforms": {{ default_platforms | tojson}},
381382
"node_roles": [
382-
"HeadNode"
383+
"HeadNode",
384+
"LoginNode"
383385
],
384386
"feature_conditions": [
385387
{
386388
"dna_key": "dcv_enabled",
387-
"satisfying_values": ["head_node"]
389+
"satisfying_values": ["head_node", "login_node"]
388390
}
389391
]
390392
},
@@ -398,12 +400,13 @@
398400
],
399401
"platforms": {{ default_platforms | tojson}},
400402
"node_roles": [
401-
"HeadNode"
403+
"HeadNode",
404+
"LoginNode"
402405
],
403406
"feature_conditions": [
404407
{
405408
"dna_key": "dcv_enabled",
406-
"satisfying_values": ["head_node"]
409+
"satisfying_values": ["head_node", "login_node"]
407410
}
408411
]
409412
},
@@ -417,12 +420,13 @@
417420
],
418421
"platforms": {{ default_platforms | tojson}},
419422
"node_roles": [
420-
"HeadNode"
423+
"HeadNode",
424+
"LoginNode"
421425
],
422426
"feature_conditions": [
423427
{
424428
"dna_key": "dcv_enabled",
425-
"satisfying_values": ["head_node"]
429+
"satisfying_values": ["head_node", "login_node"]
426430
}
427431
]
428432
},
@@ -436,12 +440,13 @@
436440
],
437441
"platforms": {{ default_platforms | tojson}},
438442
"node_roles": [
439-
"HeadNode"
443+
"HeadNode",
444+
"LoginNode"
440445
],
441446
"feature_conditions": [
442447
{
443448
"dna_key": "dcv_enabled",
444-
"satisfying_values": ["head_node"]
449+
"satisfying_values": ["head_node", "login_node"]
445450
}
446451
]
447452
},
@@ -455,12 +460,13 @@
455460
],
456461
"platforms": {{ default_platforms | tojson}},
457462
"node_roles": [
458-
"HeadNode"
463+
"HeadNode",
464+
"LoginNode"
459465
],
460466
"feature_conditions": [
461467
{
462468
"dna_key": "dcv_enabled",
463-
"satisfying_values": ["head_node"]
469+
"satisfying_values": ["head_node", "login_node"]
464470
}
465471
]
466472
},
@@ -474,12 +480,13 @@
474480
],
475481
"platforms": {{ default_platforms | tojson}},
476482
"node_roles": [
477-
"HeadNode"
483+
"HeadNode",
484+
"LoginNode"
478485
],
479486
"feature_conditions": [
480487
{
481488
"dna_key": "dcv_enabled",
482-
"satisfying_values": ["head_node"]
489+
"satisfying_values": ["head_node", "login_node"]
483490
}
484491
]
485492
},

cookbooks/aws-parallelcluster-platform/files/dcv/pcluster_dcv_connect.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ main() {
122122

123123
# Create a session with session storage enabled.
124124
mkdir -p "${DCV_SESSION_FOLDER}"
125-
dcv_session_file="${DCV_SESSION_FOLDER}/dcv_session"
125+
dcv_session_file="${DCV_SESSION_FOLDER}/dcv_session_$(hostname)"
126126
if [[ ! -e ${dcv_session_file} ]]; then
127127
sessionid=$(_create_dcv_session "${dcv_session_file}" "${shared_folder_path}")
128128
else

cookbooks/aws-parallelcluster-platform/kitchen.platform-config.yml

+1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ suites:
181181
cluster:
182182
log_rotation_enabled: 'true'
183183
node_type: 'LoginNode'
184+
dcv_enabled: "login_node"
184185
directory_service:
185186
generate_ssh_keys_for_users: 'true'
186187
scheduler: 'slurm'

cookbooks/aws-parallelcluster-platform/recipes/config/dcv.rb

+7
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,11 @@
2020
action :configure
2121
end
2222
end unless on_docker?
23+
when 'LoginNode'
24+
if node['cluster']['dcv_enabled'] == "login_node"
25+
# Activate DCV on login node
26+
dcv "Configure DCV" do
27+
action :configure
28+
end
29+
end unless on_docker?
2330
end

cookbooks/aws-parallelcluster-platform/recipes/config/log_rotation_login_node.rb

+6
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
parallelcluster_supervisord_log_rotation
2323
)
2424

25+
if node['cluster']['dcv_enabled'] == "login_node" && dcv_installed?
26+
config_files += %w(
27+
parallelcluster_dcv_log_rotation
28+
)
29+
end
30+
2531
if node['cluster']["directory_service"]["generate_ssh_keys_for_users"] == 'true'
2632
config_files += %w(
2733
parallelcluster_pam_ssh_key_generator_log_rotation

cookbooks/aws-parallelcluster-platform/recipes/config/supervisord_config.rb

+3-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
variables(
2525
region: region,
2626
aws_ca_bundle: region.start_with?('us-iso') ? "/etc/pki/#{region}/certs/ca-bundle.pem" : '',
27-
dcv_configured: node['cluster']['dcv_enabled'] == "head_node" && dcv_installed?,
27+
dcv_configured: (node['cluster']['dcv_enabled'] == "head_node" ||
28+
node['cluster']['dcv_enabled'] == "login_node") &&
29+
dcv_installed?,
2830
dcv_auth_virtualenv_path: node['cluster']['dcv']['authenticator']['virtualenv_path'],
2931
dcv_auth_user_home: node['cluster']['dcv']['authenticator']['user_home'],
3032
dcv_port: node['cluster']['dcv_port'],

cookbooks/aws-parallelcluster-platform/resources/dcv/partial/_dcv_common.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def optionally_disable_rnd
183183
end
184184

185185
action :configure do
186-
if dcv_supported? && node['cluster']['node_type'] == "HeadNode"
186+
if dcv_supported? && (node['cluster']['node_type'] == "HeadNode" || node['cluster']['node_type'] == "LoginNode")
187187
if dcv_gpu_accel_supported?
188188
# Enable graphic acceleration in dcv conf file for graphic instances.
189189
allow_gpu_acceleration

cookbooks/aws-parallelcluster-platform/spec/unit/recipes/log_rotation_spec.rb

+4-2
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,15 @@
218218
end
219219
end
220220

221-
context "in the login node when log_rotation enabled and pam ssh key generation is enabled" do
221+
context "in the login node when log_rotation, pam ssh key generation, and dcv are enabled" do
222222
cached(:chef_run) do
223223
runner = runner(platform: platform, version: version) do |node|
224224
node.override['cluster']['node_type'] = "LoginNode"
225225
node.override['cluster']['log_rotation_enabled'] = 'true'
226+
node.override['cluster']['dcv_enabled'] = "login_node"
226227
node.override['cluster']["directory_service"]["generate_ssh_keys_for_users"] = 'true'
227228
node.override['cluster']["scheduler"] = 'slurm'
229+
allow_any_instance_of(Object).to receive(:dcv_installed?).and_return(true)
228230
end
229231
runner.converge(described_recipe)
230232
end
@@ -235,12 +237,12 @@
235237
parallelcluster_supervisord_log_rotation
236238
parallelcluster_cloud_init_output_log_rotation
237239
parallelcluster_pam_ssh_key_generator_log_rotation
240+
parallelcluster_dcv_log_rotation
238241
)
239242
unexpected_config_files = %w(
240243
parallelcluster_bootstrap_error_msg_log_rotation
241244
parallelcluster_cfn_init_log_rotation
242245
parallelcluster_chef_client_log_rotation
243-
parallelcluster_dcv_log_rotation
244246
parallelcluster_clustermgtd_log_rotation
245247
parallelcluster_clusterstatusmgtd_log_rotation
246248
parallelcluster_slurm_fleet_status_manager_log_rotation

cookbooks/aws-parallelcluster-platform/spec/unit/recipes/supervisord_config_spec.rb

+24-5
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,12 @@
7777
.with_content("[program:pcluster_dcv_authenticator]")
7878
end
7979
end
80-
context "when login node" do
80+
context "when login node and dcv configured" do
8181
cached(:chef_run) do
8282
runner = runner(platform: platform, version: version) do |node|
8383
node.override['cluster']['node_type'] = 'LoginNode'
84-
node.override['cluster']['dcv_enabled'] = 'head_node'
85-
allow_any_instance_of(Object).to receive(:dcv_installed?).and_return(false)
84+
node.override['cluster']['dcv_enabled'] = 'login_node'
85+
allow_any_instance_of(Object).to receive(:dcv_installed?).and_return(true)
8686
end
8787
runner.converge(described_recipe)
8888
end
@@ -92,9 +92,28 @@
9292
is_expected.to render_file('/etc/parallelcluster/parallelcluster_supervisord.conf')
9393
.with_content("[program:cfn-hup]")
9494
.with_content("[program:loginmgtd]")
95-
96-
is_expected.not_to render_file('/etc/parallelcluster/parallelcluster_supervisord.conf')
9795
.with_content("[program:pcluster_dcv_authenticator]")
96+
.with_content("--port 8444")
97+
end
98+
99+
context "when login node and dcv not configured" do
100+
cached(:chef_run) do
101+
runner = runner(platform: platform, version: version) do |node|
102+
node.override['cluster']['node_type'] = 'LoginNode'
103+
node.override['cluster']['dcv_enabled'] = 'NONE'
104+
allow_any_instance_of(Object).to receive(:dcv_installed?).and_return(true)
105+
end
106+
runner.converge(described_recipe)
107+
end
108+
cached(:node) { chef_run.node }
109+
110+
it 'has the correct content' do
111+
is_expected.to render_file('/etc/parallelcluster/parallelcluster_supervisord.conf')
112+
.with_content("[program:loginmgtd]")
113+
114+
is_expected.not_to render_file('/etc/parallelcluster/parallelcluster_supervisord.conf')
115+
.with_content("[program:pcluster_dcv_authenticator]")
116+
end
98117
end
99118
end
100119
end

cookbooks/aws-parallelcluster-platform/templates/supervisord/parallelcluster_supervisord.conf.erb

+9
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,13 @@ exitcodes = 0
6868
redirect_stderr = true
6969
stdout_logfile = /var/log/parallelcluster/loginmgtd.log
7070
stdout_logfile_maxbytes = 1MB
71+
<% if @dcv_configured -%>
72+
[program:pcluster_dcv_authenticator]
73+
command = <%= @dcv_auth_virtualenv_path %>/bin/python <%= @dcv_auth_user_home %>/pcluster_dcv_authenticator.py
74+
--port <%= Integer(@dcv_port) + 1 %>
75+
--certificate <%= @dcv_auth_certificate %>
76+
--key <%= @dcv_auth_private_key %>
77+
user = <%= @dcv_auth_user %>
78+
environment = HOME="<%= @dcv_auth_user_home %>",USER="<%= @dcv_auth_user %>"<% if @region.start_with?('us-iso') -%>,AWS_CA_BUNDLE="<%= @aws_ca_bundle %>"<% end -%>
79+
<% end -%>
7180
<% end -%>

0 commit comments

Comments
 (0)