EESSI · casparvl · Nov 9, 2020 · Nov 9, 2020 · Mar 29, 2021 · Mar 29, 2021
diff --git a/tests/reframe/config/settings.py b/tests/reframe/config/settings.py
@@ -0,0 +1,117 @@
+site_configuration = {
+    'systems': [
+        {
+            'name': 'Example_system',
+            'descr': 'This is just an example system',
+            'modules_system': 'tmod',
+	    'hostnames': ['login', 'int'],
+	    'partitions': [
+		{
+                    'name': 'short',
+                    'scheduler': 'slurm',
+	            'launcher': 'srun',
+                    'access':  ['-p short'],
+                    'environs': ['foss', 'container'],
+                    'container_platforms': [
+                        {
+                            'type': 'Singularity',
+                            'modules': [],
+                            'variables': [['SLURM_MPI_TYPE', 'pmix']]
+                        }
+                    ],
+                    'processor': {
+                        'num_cpus': 24,
+                    },
+                    'devices': [
+                        {
+                            'type': 'gpu',
+                            'num_devices': 2,
+                        },
+                    ],
+                    'descr': 'normal partition'
+                },
+                {
+                    'name': 'gpu_short',
+                    'scheduler': 'slurm',
+                    'launcher': 'srun',
+                    'access':  ['-p gpu_short'],
+                    'environs': ['fosscuda', 'container'],
+                    'container_platforms': [
+                        {
+                            'type': 'Singularity',
+                            'modules': [],
+                            'variables': [['SLURM_MPI_TYPE', 'pmix']]
+                        }
+                    ],
+                    'processor': {
+                        'num_cpus': 16,
+                    },
+                    'devices': [
+                        {
+                            'type': 'gpu',
+                            'num_devices': 2,
+                        },
+                    ],
+                    'descr': 'gpu partition'
+                },
+             ]
+         },
+     ],
+    'environments': [
+        {
+            'name': 'foss',
+	    'modules': ['foss/2020a'],
+            'cc': 'mpicc',
+            'cxx': 'mpicxx',
+            'ftn': 'mpifort',
+        },
+        {
+            'name': 'fosscuda',
+            'modules': ['fosscuda/2020a'],
+            'cc': 'mpicc',
+            'cxx': 'mpicxx',
+            'ftn': 'mpifort',
+        },
+        {
+            'name': 'container',
+            'modules': [],
+        },
+     ],
+     'logging': [
+        {
+            'level': 'debug',
+            'handlers': [
+                {
+                    'type': 'stream',
+                    'name': 'stdout',
+                    'level': 'info',
+                    'format': '%(message)s'
+                },
+                {
+                    'type': 'file',
+                    'name': 'reframe.log',
+                    'level': 'debug',
+                    'format': '[%(asctime)s] %(levelname)s: %(check_info)s: %(message)s',   # noqa: E501
+                    'append': False
+                }
+            ],
+            'handlers_perflog': [
+                {
+                    'type': 'filelog',
+                    'prefix': '%(check_system)s/%(check_partition)s',
+                    'level': 'info',
+                    'format': (
+                        '%(check_job_completion_time)s|reframe %(version)s|'
+                        '%(check_info)s|jobid=%(check_jobid)s|'
+                        '%(check_perf_var)s=%(check_perf_value)s|'
+                        'ref=%(check_perf_ref)s '
+                        '(l=%(check_perf_lower_thres)s, '
+                        'u=%(check_perf_upper_thres)s)|'
+                        '%(check_perf_unit)s'
+                    ),
+                    'append': True
+                }
+            ]
+        }
+    ],
+}
diff --git a/tests/reframe/config/system_properties.py b/tests/reframe/config/system_properties.py
@@ -0,0 +1,2 @@
+
+ncorespernode=16
diff --git a/tests/reframe/eessi-checks/applications/src/tensorflow2_synthetic_benchmark.py b/tests/reframe/eessi-checks/applications/src/tensorflow2_synthetic_benchmark.py
@@ -0,0 +1,158 @@
+# Copyright 2019 Uber Technologies, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import, division, print_function
+
+import argparse
+import os
+import numpy as np
+import timeit
+
+import tensorflow as tf
+from tensorflow.keras import applications
+from tensorflow.keras import mixed_precision
+
+def log(s, nl=True):
+    if args.use_horovod:
+        if hvd.rank() != 0:
+            return
+    print(s, end='\n' if nl else '')
+
+# Benchmark settings
+parser = argparse.ArgumentParser(description='TensorFlow Synthetic Benchmark',
+                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--fp16-allreduce', action='store_true', default=False,
+                    help='use fp16 compression during allreduce')
+parser.add_argument('--mixed-prec', action='store_true', default=False,
+                    help='Use mixed precision for training')
+
+parser.add_argument('--model', type=str, default='ResNet50',
+                    help='model to benchmark')
+parser.add_argument('--batch-size', type=int, default=32,
+                    help='input batch size')
+
+parser.add_argument('--num-warmup-batches', type=int, default=2,
+                    help='number of warm-up batches that don\'t count towards benchmark')
+parser.add_argument('--num-batches-per-iter', type=int, default=10,
+                    help='number of batches per benchmark iteration')
+parser.add_argument('--num-iters', type=int, default=10,
+                    help='number of benchmark iterations')
+
+parser.add_argument('--no-cuda', action='store_true', default=False,
+                    help='disables CUDA training')
+parser.add_argument('--use-horovod', action='store_true', default=False)
+
+args = parser.parse_args()
+args.cuda = not args.no_cuda
+
+# Horovod: initialize Horovod.
+if args.use_horovod:
+    import horovod.tensorflow as hvd
+    hvd.init()
+
+# Horovod: pin GPU to be used to process local rank (one GPU per process)
+if args.cuda:
+    gpus = tf.config.experimental.list_physical_devices('GPU')
+    for gpu in gpus:
+        tf.config.experimental.set_memory_growth(gpu, True)
+    if gpus and args.use_horovod:
+        tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')
+else:
+    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+if args.cuda:
+    tf.config.threading.set_inter_op_parallelism_threads(1)
+else:
+    tf.config.threading.set_inter_op_parallelism_threads(1)
+
+tf.config.threading.set_intra_op_parallelism_threads(int(os.environ['OMP_NUM_THREADS']))
+
+if args.mixed_prec:
+    log('Running with mixed_float16 as global policy for the precision')
+    mixed_precision.set_global_policy('mixed_float16')
+
+# Set up standard model.
+model = getattr(applications, args.model)(weights=None)
+opt = tf.optimizers.SGD(0.01)
+
+data = tf.random.uniform([args.batch_size, 224, 224, 3])
+target = tf.random.uniform([args.batch_size, 1], minval=0, maxval=999, dtype=tf.int64)
+
+
+@tf.function
+def benchmark_step(first_batch):
+    # Horovod: (optional) compression algorithm.
+    if args.use_horovod:
+        compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none
+
+    # Horovod: use DistributedGradientTape
+    with tf.GradientTape() as tape:
+        probs = model(data, training=True)
+        loss = tf.losses.sparse_categorical_crossentropy(target, probs)
+
+    # Horovod: add Horovod Distributed GradientTape.
+    if args.use_horovod:
+        tape = hvd.DistributedGradientTape(tape, compression=compression)
+
+    gradients = tape.gradient(loss, model.trainable_variables)
+    opt.apply_gradients(zip(gradients, model.trainable_variables))
+
+    # Horovod: broadcast initial variable states from rank 0 to all other processes.
+    # This is necessary to ensure consistent initialization of all workers when
+    # training is started with random weights or restored from a checkpoint.
+    #
+    # Note: broadcast should be done after the first gradient step to ensure optimizer
+    # initialization.
+    if args.use_horovod and first_batch:
+        hvd.broadcast_variables(model.variables, root_rank=0)
+        hvd.broadcast_variables(opt.variables(), root_rank=0)
+
+log('Model: %s' % args.model)
+log('Batch size: %d' % args.batch_size)
+device = 'GPU' if args.cuda else 'CPU'
+if args.use_horovod:
+    log('Number of %ss: %d' % (device, hvd.size()))
+else:
+    log('Number of %ss: %s' % (device, 1))
+
+
+with tf.device(device):
+    # Warm-up
+    log('Running warmup...')
+    benchmark_step(first_batch=True)
+
+    timeit.timeit(lambda: benchmark_step(first_batch=False),
+                  number=args.num_warmup_batches)
+
+    # Benchmark
+    log('Running benchmark...')
+    img_secs = []
+    for x in range(args.num_iters):
+        time = timeit.timeit(lambda: benchmark_step(first_batch=False),
+                             number=args.num_batches_per_iter)
+        img_sec = args.batch_size * args.num_batches_per_iter / time
+        log('Iter #%d: %.1f img/sec per %s' % (x, img_sec, device))
+        img_secs.append(img_sec)
+
+    # Results
+    img_sec_mean = np.mean(img_secs)
+    img_sec_conf = 1.96 * np.std(img_secs)
+    log('Img/sec per %s: %.1f +-%.1f' % (device, img_sec_mean, img_sec_conf))
+    if args.use_horovod:
+        ndevices = hvd.size()
+    else:
+        ndevices = 1
+    log('Total img/sec on %d %s(s): %.1f +-%.1f' %
+        (ndevices, device, ndevices * img_sec_mean, ndevices * img_sec_conf))
+    log('Benchmark completed')