From 40dae8786a0366ea4736ba83dd434d345cd43c4a Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sat, 28 Dec 2024 19:17:13 +0100 Subject: [PATCH 1/7] use mixin class for osu --- eessi/testsuite/eessi_mixin.py | 6 +- eessi/testsuite/tests/apps/osu.py | 235 +++++++++--------------------- 2 files changed, 69 insertions(+), 172 deletions(-) diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index d03d65e2..8753c8e1 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -43,6 +43,7 @@ class EESSI_Mixin(RegressionMixin): scale = parameter(SCALES.keys()) bench_name = None bench_name_ci = None + num_tasks_per_compute_unit = 1 # Create ReFrame variables for logging runtime environment information cvmfs_repo_name = variable(str, value='None') @@ -118,7 +119,7 @@ def run_after_init(self): # Set scales as tags hooks.set_tag_scale(self) - @run_after('init') + @run_before('setup', always_last=True) def measure_mem_usage(self): if self.measure_memory_usage: hooks.measure_memory_usage(self) @@ -163,7 +164,8 @@ def validate_setup(self): @run_after('setup') def assign_tasks_per_compute_unit(self): """Call hooks to assign tasks per compute unit, set OMP_NUM_THREADS, and set compact process binding""" - hooks.assign_tasks_per_compute_unit(test=self, compute_unit=self.compute_unit) + hooks.assign_tasks_per_compute_unit(test=self, compute_unit=self.compute_unit, + num_per=self.num_tasks_per_compute_unit) # Set OMP_NUM_THREADS environment variable hooks.set_omp_num_threads(self) diff --git a/eessi/testsuite/tests/apps/osu.py b/eessi/testsuite/tests/apps/osu.py index 83bbd0f4..23ab1803 100644 --- a/eessi/testsuite/tests/apps/osu.py +++ b/eessi/testsuite/tests/apps/osu.py @@ -6,19 +6,20 @@ non-GPU nodes. Otherwise those tests will FAIL. """ import reframe as rfm -from reframe.core.builtins import parameter, run_after # added only to make the linter happy +from reframe.core.builtins import parameter, run_after from reframe.utility import reframe from hpctestlib.microbenchmarks.mpi.osu import osu_benchmark -from eessi.testsuite import hooks, utils -from eessi.testsuite.constants import * +from eessi.testsuite.constants import COMPUTE_UNIT, CPU, DEVICE_TYPES, INVALID_SYSTEM, GPU, NODE, SCALES +from eessi.testsuite.eessi_mixin import EESSI_Mixin from eessi.testsuite.utils import find_modules, log def filter_scales_pt2pt(): """ Filtering function for filtering scales for the pt2pt OSU test + returns all scales with either 2 cores, 1 full node, or 2 full nodes """ return [ k for (k, v) in SCALES.items() @@ -30,7 +31,8 @@ def filter_scales_pt2pt(): def filter_scales_coll(): """ - Filtering function for filtering scales for collective the OSU test + Filtering function for filtering scales for the collective OSU test + returns all scales with at least 2 cores """ return [ k for (k, v) in SCALES.items() @@ -40,17 +42,13 @@ def filter_scales_coll(): @rfm.simple_test -class EESSI_OSU_Micro_Benchmarks_pt2pt(osu_benchmark): - ''' Run-only OSU test ''' - scale = parameter(filter_scales_pt2pt()) - valid_prog_environs = ['default'] - valid_systems = ['*'] +class EESSI_OSU_Base(osu_benchmark, EESSI_Mixin): + """ base class for OSU tests """ time_limit = '30m' module_name = parameter(find_modules('OSU-Micro-Benchmarks')) - # Device type for non-cuda OSU-Micro-Benchmarks should run on hosts of both node types. To do this the default - # device type is set to GPU. device_type = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) - # unset num_tasks_per_node from the hpctestlib. + + # reset num_tasks_per_node from the hpctestlib: we handle it ourselves num_tasks_per_node = None # Set num_warmup_iters to 5 to reduce execution time, especially on slower interconnects @@ -58,6 +56,9 @@ class EESSI_OSU_Micro_Benchmarks_pt2pt(osu_benchmark): # Set num_iters to 10 to reduce execution time, especially on slower interconnects num_iters = 10 + def required_mem_per_node(self): + return 1024 + @run_after('init') def filter_scales_2gpus(self): """Filter out scales with < 2 GPUs if running on GPUs""" @@ -69,26 +70,6 @@ def filter_scales_2gpus(self): self.valid_systems = [INVALID_SYSTEM] log(f'valid_systems set to {self.valid_systems} for scale {self.scale} and device_type {self.device_type}') - @run_after('init') - def filter_benchmark_pt2pt(self): - """ Filter out all non-mpi.pt2pt benchmarks """ - if not self.benchmark_info[0].startswith('mpi.pt2pt'): - self.valid_systems = [INVALID_SYSTEM] - - @run_after('init') - def run_after_init(self): - """hooks to run after init phase""" - - # Filter on which scales are supported by the partitions defined in the ReFrame configuration - hooks.filter_supported_scales(self) - - hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) - - hooks.set_modules(self) - - # Set scales as tags - hooks.set_tag_scale(self) - @run_after('init') def set_device_buffers(self): """ @@ -98,32 +79,46 @@ def set_device_buffers(self): """ if self.device_type == DEVICE_TYPES[GPU]: self.device_buffers = 'cuda' - else: - # If the device_type is CPU then device_buffers should always be CPU. self.device_buffers = 'cpu' @run_after('init') - def set_tag_ci(self): - """ Setting tests under CI tag. """ - if (self.benchmark_info[0] in ['mpi.pt2pt.osu_latency', 'mpi.pt2pt.osu_bw']): - self.tags.add('CI') - log(f'tags set to {self.tags}') + def set_tags(self): + """ Setting custom tags """ + self.bench_name = self.benchmark_info[0] + self.tags.add(self.bench_name.split('.')[-1]) + + @run_after('setup', always_last=True) + def skip_test_1gpu(self): + if self.device_type == DEVICE_TYPES[GPU]: + num_gpus = self.num_gpus_per_node * self.num_nodes + self.skip_if(num_gpus < 2, "Skipping GPU test : only 1 GPU available for this test case") - if (self.benchmark_info[0] == 'mpi.pt2pt.osu_bw'): - self.tags.add('osu_bw') - if (self.benchmark_info[0] == 'mpi.pt2pt.osu_latency'): - self.tags.add('osu_latency') +@rfm.simple_test +class EESSI_OSU_Micro_Benchmarks_pt2pt(EESSI_OSU_Base): + ''' point-to-point OSU test ''' + scale = parameter(filter_scales_pt2pt()) + compute_unit = COMPUTE_UNIT[NODE] @run_after('init') - def set_mem(self): - """ Setting an extra job option of memory. This test has only 4 possibilities: 1_node, 2_nodes, 2_cores and - 1cpn_2nodes. This is implemented for all cases including full node cases. The requested memory may seem large - and the test requires at least 4.5 GB per core at the minimum for the full test when run with validation (-c - option for osu_bw or osu_latency). We run till message size 8 (-m 8) which significantly reduces memory - requirement.""" - self.extra_resources = {'memory': {'size': '12GB'}} + def filter_benchmark_pt2pt(self): + """ Filter out all non-mpi.pt2pt benchmarks """ + if not self.benchmark_info[0].startswith('mpi.pt2pt'): + self.valid_systems = [INVALID_SYSTEM] + + @run_after('init') + def select_ci(self): + " Select the CI variants " + if (self.bench_name in ['mpi.pt2pt.osu_latency', 'mpi.pt2pt.osu_bw']): + self.bench_name_ci = self.bench_name + + @run_after('init') + def set_num_tasks_per_compute_unit(self): + """ Setting number of tasks per compute unit and cpus per task. This sets num_cpus_per_task + for 1 node and 2 node options where the request is for full nodes.""" + if SCALES.get(self.scale).get('num_nodes') == 1: + self.num_tasks_per_compute_unit = 2 @run_after('setup') def adjust_executable_opts(self): @@ -132,134 +127,34 @@ def adjust_executable_opts(self): Therefore we must override it *after* the 'setup' phase """ if self.device_type == DEVICE_TYPES[CPU]: - self.executable_opts = [ele for ele in self.executable_opts if ele != 'D'] - - @run_after('setup') - def set_num_tasks_per_node(self): - """ Setting number of tasks per node and cpus per task in this function. This function sets num_cpus_per_task - for 1 node and 2 node options where the request is for full nodes.""" - if SCALES.get(self.scale).get('num_nodes') == 1: - hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[NODE], 2) - else: - hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[NODE]) - - @run_after('setup') - def set_num_gpus_per_node(self): - """ - Set number of GPUs per node for GPU-to-GPU tests - """ - if self.device_type == DEVICE_TYPES[GPU]: - # Skip single-node tests with less than 2 GPU devices in the node - self.skip_if( - SCALES[self.scale]['num_nodes'] == 1 and self.default_num_gpus_per_node < 2, - "There are < 2 GPU devices present in the node." - f" Skipping tests with device_type={DEVICE_TYPES[GPU]} involving < 2 GPUs and 1 node." - ) - if not self.num_gpus_per_node: - self.num_gpus_per_node = self.default_num_gpus_per_node - log(f'num_gpus_per_node set to {self.num_gpus_per_node} for partition {self.current_partition.name}') + self.executable_opts = [x for x in self.executable_opts if x != 'D'] @rfm.simple_test -class EESSI_OSU_Micro_Benchmarks_coll(osu_benchmark): - ''' Run-only OSU test ''' +class EESSI_OSU_Micro_Benchmarks_coll(EESSI_OSU_Base): + ''' collective OSU test ''' scale = parameter(filter_scales_coll()) - valid_prog_environs = ['default'] - valid_systems = ['*'] - time_limit = '30m' - module_name = parameter(utils.find_modules('OSU-Micro-Benchmarks')) - # Device type for non-cuda OSU-Micro-Benchmarks should run on hosts of both node types. To do this the default - # device type is set to GPU. - device_type = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) - # Unset num_tasks_per_node from hpctestlib - num_tasks_per_node = None - - # Set num_warmup_iters to 5 to reduce execution time, especially on slower interconnects - num_warmup_iters = 5 - # Set num_iters to 10 to reduce execution time, especially on slower interconnects - num_iters = 10 @run_after('init') - def run_after_init(self): - """hooks to run after init phase""" - # Note: device_buffers variable is inherited from the hpctestlib class and adds options to the launcher - # commands based on what device is set. - self.device_buffers = 'cpu' - # Filter on which scales are supported by the partitions defined in the ReFrame configuration - hooks.filter_supported_scales(self) - hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) - is_cuda_module = utils.is_cuda_required_module(self.module_name) - if is_cuda_module and self.device_type == DEVICE_TYPES[GPU]: - self.device_buffers = 'cuda' - - # If the device_type is CPU then device buffer should always be CPU. - if self.device_type == DEVICE_TYPES[CPU]: - self.device_buffers = 'cpu' - # This part of the code removes the collective communication calls out of the run list since this test is only - # meant for collective. + def filter_benchmark_coll(self): + """ Filter out all non-mpi.collective benchmarks """ if not self.benchmark_info[0].startswith('mpi.collective'): - self.valid_systems = [] - hooks.set_modules(self) - - @run_after('init') - def set_tag_ci(self): - if (self.benchmark_info[0] == 'mpi.collective.osu_allreduce' - or self.benchmark_info[0] == 'mpi.collective.osu_alltoall'): - self.tags.add('CI') - if (self.benchmark_info[0] == 'mpi.collective.osu_allreduce'): - self.tags.add('osu_allreduce') - if (self.benchmark_info[0] == 'mpi.collective.osu_alltoall'): - self.tags.add('osu_alltoall') + self.valid_systems = [INVALID_SYSTEM] @run_after('init') - def set_mem(self): - """ Setting an extra job option of memory. The alltoall operation takes maximum memory of 0.1 GB per core for a - message size of 8 and almost 0.5 GB per core for the maximum message size the test allows. But we limit the - message sizes to 8 and for a safety net we take 64 GB assuming dense nodes works for all the tests and node - types.""" - self.extra_resources = {'memory': {'size': '64GB'}} + def select_ci(self): + " Select the CI variants " + if (self.bench_name in ['mpi.collective.osu_allreduce', 'mpi.collective.osu_alltoall']): + self.bench_name_ci = self.bench_name @run_after('init') - def set_num_tasks(self): - hooks.set_tag_scale(self) - - @run_after('setup') - def set_num_tasks_per_node(self): - """ Setting number of tasks per node, cpus per task and gpus per node in this function. This function sets - num_cpus_per_task for 1 node and 2 node options where the request is for full nodes.""" - max_avail_cpus_per_node = self.current_partition.processor.num_cpus - if self.device_buffers == 'cpu': - # Setting num_tasks and num_tasks_per_node for the CPU tests - if SCALES.get(self.scale).get('num_cpus_per_node', 0): - hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[NODE], - self.default_num_cpus_per_node) - elif SCALES.get(self.scale).get('node_part', 0): - pass_num_per = int(max_avail_cpus_per_node / SCALES.get(self.scale).get('node_part', 0)) - if pass_num_per > 1: - hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[NODE], pass_num_per) - else: - self.skip(msg="Too few cores available for a collective operation.") - - if FEATURES[GPU] in self.current_partition.features: - max_avail_gpus_per_node = utils.get_max_avail_gpus_per_node(self) - # Setting number of GPU for a cpu test on a GPU node. - if SCALES.get(self.scale).get('num_nodes') == 1: - self.num_gpus_per_node = 1 - else: - self.num_gpus_per_node = max_avail_gpus_per_node - elif self.device_buffers == 'cuda': - max_avail_gpus_per_node = utils.get_max_avail_gpus_per_node(self) - # Setting num_tasks and num_tasks_per_node for the GPU tests - if max_avail_gpus_per_node == 1 and SCALES.get(self.scale).get('num_nodes') == 1: - self.skip(msg="There is only 1 device in the node. Skipping collective tests involving only 1 node.") - else: - if SCALES.get(self.scale).get('num_gpus_per_node', 0) * SCALES.get(self.scale).get('num_nodes', 0) > 1: - hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT.get(GPU, FEATURES[GPU])) - elif SCALES.get(self.scale).get('node_part', 0): - pass_num_per = int(max_avail_gpus_per_node / SCALES.get(self.scale).get('node_part', 0)) - if pass_num_per > 1: - hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT.get(GPU, FEATURES[GPU])) - else: - self.skip(msg="Total GPUs (max_avail_gpus_per_node / node_part) is 1 less.") - else: - self.skip(msg="Total GPUs (num_nodes * num_gpus_per_node) = 1") + def set_compute_unit(self): + """ + Set the compute unit to which tasks will be assigned: + one task per core for CPU runs, and one task per GPU for GPU runs. + """ + device_to_compute_unit = { + DEVICE_TYPES[CPU]: COMPUTE_UNIT[CPU], + DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU], + } + self.compute_unit = device_to_compute_unit.get(self.device_type) From 29051545a95577855cb13adf5a5cfdad97fcca8f Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Mon, 30 Dec 2024 21:01:48 +0100 Subject: [PATCH 2/7] import run_before --- eessi/testsuite/eessi_mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index 8753c8e1..cde5fbf0 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -1,4 +1,4 @@ -from reframe.core.builtins import parameter, run_after, variable +from reframe.core.builtins import parameter, run_after, run_before, variable from reframe.core.exceptions import ReframeFatalError from reframe.core.pipeline import RegressionMixin from reframe.utility.sanity import make_performance_function From 21de9d96279683cceefb6e7434b1992d0e3d5d6f Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Mon, 30 Dec 2024 22:03:26 +0100 Subject: [PATCH 3/7] initialize compute_unit in base class --- eessi/testsuite/tests/apps/osu.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eessi/testsuite/tests/apps/osu.py b/eessi/testsuite/tests/apps/osu.py index 23ab1803..5dbcaa9c 100644 --- a/eessi/testsuite/tests/apps/osu.py +++ b/eessi/testsuite/tests/apps/osu.py @@ -47,6 +47,7 @@ class EESSI_OSU_Base(osu_benchmark, EESSI_Mixin): time_limit = '30m' module_name = parameter(find_modules('OSU-Micro-Benchmarks')) device_type = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) + compute_unit = None # reset num_tasks_per_node from the hpctestlib: we handle it ourselves num_tasks_per_node = None From 06e808af5f9b20a68a50e41b84af5f3271c2c337 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Mon, 30 Dec 2024 22:14:46 +0100 Subject: [PATCH 4/7] don't inherit from mixin in base class --- eessi/testsuite/tests/apps/osu.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/eessi/testsuite/tests/apps/osu.py b/eessi/testsuite/tests/apps/osu.py index 5dbcaa9c..ef9fd121 100644 --- a/eessi/testsuite/tests/apps/osu.py +++ b/eessi/testsuite/tests/apps/osu.py @@ -41,13 +41,11 @@ def filter_scales_coll(): ] -@rfm.simple_test -class EESSI_OSU_Base(osu_benchmark, EESSI_Mixin): +class EESSI_OSU_Base(osu_benchmark): """ base class for OSU tests """ time_limit = '30m' module_name = parameter(find_modules('OSU-Micro-Benchmarks')) device_type = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) - compute_unit = None # reset num_tasks_per_node from the hpctestlib: we handle it ourselves num_tasks_per_node = None @@ -97,7 +95,7 @@ def skip_test_1gpu(self): @rfm.simple_test -class EESSI_OSU_Micro_Benchmarks_pt2pt(EESSI_OSU_Base): +class EESSI_OSU_Micro_Benchmarks_pt2pt(EESSI_OSU_Base, EESSI_Mixin): ''' point-to-point OSU test ''' scale = parameter(filter_scales_pt2pt()) compute_unit = COMPUTE_UNIT[NODE] @@ -132,7 +130,7 @@ def adjust_executable_opts(self): @rfm.simple_test -class EESSI_OSU_Micro_Benchmarks_coll(EESSI_OSU_Base): +class EESSI_OSU_Micro_Benchmarks_coll(EESSI_OSU_Base, EESSI_Mixin): ''' collective OSU test ''' scale = parameter(filter_scales_coll()) From e905e28de412987736740db1c7d4824dc4eb2424 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Thu, 9 Jan 2025 17:22:26 +0100 Subject: [PATCH 5/7] use 1GB memory per task --- eessi/testsuite/tests/apps/osu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eessi/testsuite/tests/apps/osu.py b/eessi/testsuite/tests/apps/osu.py index ef9fd121..8266fed2 100644 --- a/eessi/testsuite/tests/apps/osu.py +++ b/eessi/testsuite/tests/apps/osu.py @@ -56,7 +56,7 @@ class EESSI_OSU_Base(osu_benchmark): num_iters = 10 def required_mem_per_node(self): - return 1024 + return self.num_tasks_per_node * 1024 @run_after('init') def filter_scales_2gpus(self): From f27ba00b707d815dc33f714dff916abb9e202865 Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sun, 12 Jan 2025 13:11:04 +0100 Subject: [PATCH 6/7] add partial node scales to the pt2pt gpu tests --- eessi/testsuite/tests/apps/osu.py | 61 ++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/eessi/testsuite/tests/apps/osu.py b/eessi/testsuite/tests/apps/osu.py index 8266fed2..3ea452f1 100644 --- a/eessi/testsuite/tests/apps/osu.py +++ b/eessi/testsuite/tests/apps/osu.py @@ -16,9 +16,9 @@ from eessi.testsuite.utils import find_modules, log -def filter_scales_pt2pt(): +def filter_scales_pt2pt_cpu(): """ - Filtering function for filtering scales for the pt2pt OSU test + Filtering function for filtering scales for the pt2pt OSU test on CPUs returns all scales with either 2 cores, 1 full node, or 2 full nodes """ return [ @@ -29,6 +29,19 @@ def filter_scales_pt2pt(): ] +def filter_scales_pt2pt_gpu(): + """ + Filtering function for filtering scales for the pt2pt OSU test on GPUs + returns all scales with either a partial node, 1 full node, or 2 full nodes + """ + return [ + k for (k, v) in SCALES.items() + if (v['num_nodes'] == 1 and v.get('node_part', 0) > 1) + or (v['num_nodes'] == 2 and v.get('node_part', 0) == 1) + or (v['num_nodes'] == 1 and v.get('node_part', 0) == 1) + ] + + def filter_scales_coll(): """ Filtering function for filtering scales for the collective OSU test @@ -45,7 +58,6 @@ class EESSI_OSU_Base(osu_benchmark): """ base class for OSU tests """ time_limit = '30m' module_name = parameter(find_modules('OSU-Micro-Benchmarks')) - device_type = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) # reset num_tasks_per_node from the hpctestlib: we handle it ourselves num_tasks_per_node = None @@ -87,17 +99,9 @@ def set_tags(self): self.bench_name = self.benchmark_info[0] self.tags.add(self.bench_name.split('.')[-1]) - @run_after('setup', always_last=True) - def skip_test_1gpu(self): - if self.device_type == DEVICE_TYPES[GPU]: - num_gpus = self.num_gpus_per_node * self.num_nodes - self.skip_if(num_gpus < 2, "Skipping GPU test : only 1 GPU available for this test case") - -@rfm.simple_test -class EESSI_OSU_Micro_Benchmarks_pt2pt(EESSI_OSU_Base, EESSI_Mixin): - ''' point-to-point OSU test ''' - scale = parameter(filter_scales_pt2pt()) +class EESSI_OSU_pt2pt_Base(EESSI_OSU_Base): + ''' point-to-point OSU test base class ''' compute_unit = COMPUTE_UNIT[NODE] @run_after('init') @@ -130,9 +134,32 @@ def adjust_executable_opts(self): @rfm.simple_test -class EESSI_OSU_Micro_Benchmarks_coll(EESSI_OSU_Base, EESSI_Mixin): +class EESSI_OSU_pt2pt_CPU(EESSI_OSU_pt2pt_Base, EESSI_Mixin): + ''' point-to-point OSU test on CPUs''' + scale = parameter(filter_scales_pt2pt_cpu()) + device_type = DEVICE_TYPES[CPU] + + +@rfm.simple_test +class EESSI_OSU_pt2pt_GPU(EESSI_OSU_pt2pt_Base, EESSI_Mixin): + ''' point-to-point OSU test on GPUs''' + scale = parameter(filter_scales_pt2pt_gpu()) + device_type = DEVICE_TYPES[GPU] + + @run_after('setup') + def skip_test_1gpu(self): + num_gpus = self.num_gpus_per_node * self.num_nodes + self.skip_if( + num_gpus != 2 and self.scale not in ['1_node', '2_nodes'], + f"Skipping test : {num_gpus} GPU(s) available for this test case, need exactly 2" + ) + + +@rfm.simple_test +class EESSI_OSU_coll(EESSI_OSU_Base, EESSI_Mixin): ''' collective OSU test ''' scale = parameter(filter_scales_coll()) + device_type = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]]) @run_after('init') def filter_benchmark_coll(self): @@ -157,3 +184,9 @@ def set_compute_unit(self): DEVICE_TYPES[GPU]: COMPUTE_UNIT[GPU], } self.compute_unit = device_to_compute_unit.get(self.device_type) + + @run_after('setup') + def skip_test_1gpu(self): + if self.device_type == DEVICE_TYPES[GPU]: + num_gpus = self.num_gpus_per_node * self.num_nodes + self.skip_if(num_gpus < 2, "Skipping GPU test : only 1 GPU available for this test case") From c33114e88b962a602ef747cdc108c64de642fd5f Mon Sep 17 00:00:00 2001 From: Samuel Moors Date: Sun, 19 Jan 2025 15:57:53 +0100 Subject: [PATCH 7/7] always request gpus for the gpu-only pt2pt test --- eessi/testsuite/eessi_mixin.py | 1 + eessi/testsuite/hooks.py | 7 ++++++- eessi/testsuite/tests/apps/osu.py | 16 +++++++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index 3dd5b224..feacfb96 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -45,6 +45,7 @@ class EESSI_Mixin(RegressionMixin): bench_name = None bench_name_ci = None num_tasks_per_compute_unit = 1 + always_request_gpus = None # Create ReFrame variables for logging runtime environment information cvmfs_repo_name = variable(str, value='None') diff --git a/eessi/testsuite/hooks.py b/eessi/testsuite/hooks.py index caa81fa7..8e4eb459 100644 --- a/eessi/testsuite/hooks.py +++ b/eessi/testsuite/hooks.py @@ -57,6 +57,8 @@ def _assign_default_num_gpus_per_node(test: rfm.RegressionTest): # no default set yet, so setting one test.default_num_gpus_per_node = math.ceil(test.max_avail_gpus_per_node / test.node_part) + log(f'default_num_gpus_per_node set to {test.default_num_gpus_per_node}') + def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, num_per: int = 1): """ @@ -83,6 +85,8 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n - assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET]) will launch 2 tasks with 64 threads per task """ + log(f'assign_tasks_per_compute_unit called with compute_unit: {compute_unit} and num_per: {num_per}') + if num_per != 1 and compute_unit not in [COMPUTE_UNIT[NODE]]: raise NotImplementedError( f'Non-default num_per {num_per} is not implemented for compute_unit {compute_unit}.') @@ -713,7 +717,8 @@ def _check_always_request_gpus(test: rfm.RegressionTest): """ Make sure we always request enough GPUs if required for the current GPU partition (cluster-specific policy) """ - if FEATURES[ALWAYS_REQUEST_GPUS] in test.current_partition.features and not test.num_gpus_per_node: + always_request_gpus = FEATURES[ALWAYS_REQUEST_GPUS] in test.current_partition.features or test.always_request_gpus + if always_request_gpus and not test.num_gpus_per_node: test.num_gpus_per_node = test.default_num_gpus_per_node log(f'num_gpus_per_node set to {test.num_gpus_per_node} for partition {test.current_partition.name}') diff --git a/eessi/testsuite/tests/apps/osu.py b/eessi/testsuite/tests/apps/osu.py index 3ea452f1..8e3ba85e 100644 --- a/eessi/testsuite/tests/apps/osu.py +++ b/eessi/testsuite/tests/apps/osu.py @@ -145,14 +145,20 @@ class EESSI_OSU_pt2pt_GPU(EESSI_OSU_pt2pt_Base, EESSI_Mixin): ''' point-to-point OSU test on GPUs''' scale = parameter(filter_scales_pt2pt_gpu()) device_type = DEVICE_TYPES[GPU] + always_request_gpus = True @run_after('setup') - def skip_test_1gpu(self): + def skip_test_gpus(self): num_gpus = self.num_gpus_per_node * self.num_nodes - self.skip_if( - num_gpus != 2 and self.scale not in ['1_node', '2_nodes'], - f"Skipping test : {num_gpus} GPU(s) available for this test case, need exactly 2" - ) + if self.scale not in ['1_node', '2_nodes']: + # On a partial node allocation, run this test only if exactly 2 GPUs are allocated + self.skip_if( + num_gpus != 2, + f"Skipping test : {num_gpus} GPU(s) available for this test case, need exactly 2" + ) + elif self.scale == '1_node': + # Make sure there are at least 2 GPUs + self.skip_if(num_gpus < 2, "Skipping GPU test : only 1 GPU available for this test case") @rfm.simple_test