Skip to content

Commit

Permalink
Unify environment information gathering and printing
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Nov 21, 2023
1 parent cfb57a0 commit eb98430
Show file tree
Hide file tree
Showing 13 changed files with 193 additions and 143 deletions.
2 changes: 1 addition & 1 deletion examples/alpaka/daxpy/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if (NOT TARGET llama::llama)
find_package(llama REQUIRED)
endif()
find_package(alpaka 1.0 REQUIRED)
alpaka_add_executable(${PROJECT_NAME} daxpy.cpp ../../common/Stopwatch.hpp ../../common/hostname.hpp)
alpaka_add_executable(${PROJECT_NAME} daxpy.cpp ../../common/Stopwatch.hpp ../../common/env.hpp)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama OpenMP::OpenMP_CXX alpaka::alpaka)

Expand Down
24 changes: 8 additions & 16 deletions examples/alpaka/daxpy/daxpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: LGPL-3.0-or-later

#include "../../common/Stopwatch.hpp"
#include "../../common/hostname.hpp"
#include "../../common/env.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
Expand Down Expand Up @@ -156,26 +156,20 @@ void daxpyAlpakaLlama(std::string mappingName, std::ofstream& plotFile, Mapping
auto main() -> int
try
{
const auto numThreads = static_cast<std::size_t>(omp_get_max_threads());
const char* affinity = std::getenv("GOMP_CPU_AFFINITY"); // NOLINT(concurrency-mt-unsafe)
affinity = affinity == nullptr ? "NONE - PLEASE PIN YOUR THREADS!" : affinity;
const auto env = common::captureEnv();

fmt::print(
R"({}Mi doubles ({}MiB data)
Threads: {}
Affinity: {}
)",
"{}Mi doubles ({}MiB data)\n{}\n",
problemSize / 1024 / 1024,
problemSize * sizeof(double) / 1024 / 1024,
numThreads,
affinity);
env);

std::ofstream plotFile{"daxpy.sh"};
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
plotFile << fmt::format(
R"(#!/usr/bin/gnuplot -p
# threads: {} affinity: {}
set title "daxpy CPU {}Mi doubles on {}"
# {}
set title "daxpy CPU {}Mi doubles"
set style data histograms
set style fill solid
set xtics rotate by 45 right
Expand All @@ -184,10 +178,8 @@ set yrange [0:*]
set ylabel "runtime [s]"
$data << EOD
)",
numThreads,
affinity,
problemSize / 1024 / 1024,
common::hostname());
env,
problemSize / 1024 / 1024);

daxpy(plotFile);

Expand Down
16 changes: 8 additions & 8 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: LGPL-3.0-or-later

#include "../../common/Stopwatch.hpp"
#include "../../common/hostname.hpp"
#include "../../common/env.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
Expand Down Expand Up @@ -356,21 +356,21 @@ try
using Size = int;
using Acc = alpaka::ExampleDefaultAcc<Dim, Size>;

const auto env = common::captureEnv<Acc>();
std::cout << problemSize / 1000 << "k particles (" << problemSize * llama::sizeOf<Particle> / 1024 << "kiB)\n"
<< "Caching " << threadsPerBlock << " particles (" << threadsPerBlock * llama::sizeOf<Particle> / 1024
<< " kiB) in shared memory\n"
<< "Reducing on " << elementsPerThread << " particles per thread\n"
<< "Using " << threadsPerBlock << " threads per block\n";
const auto dev = alpaka::getDevByIdx(alpaka::Platform<Acc>{}, 0);
const auto props = alpaka::getAccDevProps<Acc>(dev);
std::cout << "Running on " << alpaka::getName(dev) << ", " << props.m_sharedMemSizeBytes / 1024 << "kiB SM\n";
<< "Using " << threadsPerBlock << " threads per block\n"
<< env << '\n';
std::cout << std::fixed;

std::ofstream plotFile{"nbody_alpaka.sh"};
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
plotFile << fmt::format(
R"(#!/usr/bin/gnuplot -p
set title "nbody alpaka {}ki particles on {} on {}"
# {}
set title "nbody alpaka {}ki particles on {}"
set style data histograms
set style fill solid
set xtics rotate by 45 right
Expand All @@ -382,9 +382,9 @@ set y2label "move runtime [s]"
set y2tics auto
$data << EOD
)",
env,
problemSize / 1024,
alpaka::getAccName<Acc>(),
common::hostname());
alpaka::getAccName<Acc>());
plotFile << "\"\"\t\"update\"\t\"move\"\n";

run<Acc, AoS, AoS>(plotFile);
Expand Down
30 changes: 8 additions & 22 deletions examples/alpaka/pic/pic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_CUDA_ONLY_MODE)
# define ALPAKA_ACC_GPU_CUDA_ONLY_MODE
#endif
#include "../../common/hostname.hpp"
#include "../../common/env.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
Expand Down Expand Up @@ -877,29 +877,18 @@ void run(std::ostream& plotFile)
auto main() -> int
try
{
const auto numThreads = static_cast<std::size_t>(omp_get_max_threads());
const char* affinity = std::getenv("GOMP_CPU_AFFINITY"); // NOLINT(concurrency-mt-unsafe)
affinity = affinity == nullptr ? "NONE - PLEASE PIN YOUR THREADS!" : affinity;

using Acc = alpaka::ExampleDefaultAcc<Dim, Size>;
auto accName = alpaka::getName(alpaka::getDevByIdx(alpaka::Platform<Acc>{}, 0u));
while(static_cast<bool>(std::isspace(accName.back())))
accName.pop_back();
fmt::print(
"Running {} steps with grid {}x{} and {}k particles on {}\n",
nsteps,
gridX,
gridY,
numpart / 1000,
accName);
const auto env = common::captureEnv<Acc>();
const auto accName = common::trim(alpaka::getName(alpaka::getDevByIdx(alpaka::Platform<Acc>{}, 0u)));
fmt::print("Running {} steps with grid {}x{} and {}k particles\n{}\n", nsteps, gridX, gridY, numpart / 1000, env);

std::ofstream plotFile{"pic.sh"};
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
fmt::print(
plotFile,
R"aa(#!/usr/bin/gnuplot -p
# threads: {} affinity: {}
set title "PIC grid {}x{} {}k particles on {} ({})"
# {}
set title "PIC grid {}x{} {}k particles on {}"
set style data histograms
set style fill solid
set xtics rotate by 45 right
Expand All @@ -909,14 +898,11 @@ set ylabel "runtime [s]"
$data << EOD
"" "clr J" "integr" " dep J" " bnd J" "adv B1" "bnd B1" " adv E" " bnd E" "adv B2" "bnd B2" "total"
)aa",
numThreads,
affinity,
env,
gridX,
gridY,
numpart / 1000,
accName,
common::hostname());

accName);

// FieldMapping: AoS RM, AoS CM, AoS Mo,
// SoA RM, SoA CM, SoA Mo,
Expand Down
14 changes: 9 additions & 5 deletions examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: CC0-1.0

#include "../../common/Stopwatch.hpp"
#include "../../common/hostname.hpp"
#include "../../common/env.hpp"

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
Expand Down Expand Up @@ -189,24 +189,28 @@ catch(const std::exception& e)

auto main() -> int
{
using Acc = alpaka::ExampleDefaultAcc<alpaka::DimInt<1>, Size>;
const auto env = common::captureEnv<Acc>();
std::cout << problemSize / 1000 / 1000 << "M values "
<< "(" << problemSize * sizeof(float) / 1024 << "kiB)\n";
<< "(" << problemSize * sizeof(float) / 1024 << "kiB)\n"
<< env << '\n';

std::ofstream plotFile{"vectoradd_alpaka.sh"};
plotFile.exceptions(std::ios::badbit | std::ios::failbit);
plotFile << fmt::format(
R"(#!/usr/bin/gnuplot -p
set title "vectoradd alpaka {}Mi elements on {} on {}"
# {}
set title "vectoradd alpaka {}Mi elements on {}"
set style data histograms
set style fill solid
set xtics rotate by 45 right
set yrange [0:*]
set ylabel "runtime [s]"
$data << EOD
)",
env,
problemSize / 1024 / 1024,
alpaka::getAccName<alpaka::ExampleDefaultAcc<alpaka::DimInt<1>, Size>>(),
common::hostname());
alpaka::getAccName<Acc>());

boost::mp11::mp_for_each<boost::mp11::mp_iota_c<6>>([&](auto ic) { run<decltype(ic)::value>(plotFile); });

Expand Down
125 changes: 125 additions & 0 deletions examples/common/env.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2021 Bernhard Manfred Gruber
// SPDX-License-Identifier: LGPL-3.0-or-later

#pragma once

#include <fmt/core.h>
#if __has_include(<omp.h>)
# include <omp.h>
#endif
#ifdef ALPAKA_DEBUG // defined when the cmake target links to alpaka
# include <alpaka/alpaka.hpp>
#endif
#include <algorithm>
#include <string>
#ifdef _WIN32
# define NOMINMAX
# define WIN32_LEAN_AND_MEAN
# include <winsock2.h>
# pragma comment(lib, "ws2_32")
#else
# include <unistd.h>
#endif

namespace common
{
// We used boost::asio::ip::host_name() originally, but it complicated the disassembly and requires asio as
// additional dependency.
inline auto hostname() -> std::string
{
char name[256];
::gethostname(name, 256);
return name;
}

inline auto trim(std::string s) -> std::string
{
const auto pred = [](char c) { return !std::isspace(c); };
s.erase(std::find_if(rbegin(s), rend(s), pred).base(), end(s));
s.erase(begin(s), std::find_if(begin(s), end(s), pred));
return s;
}

template<typename AlpakaAcc = void>
inline auto captureEnv() -> std::string
{
std::string env;

// hostname
env += fmt::format("Host: {}", hostname());

// OpenMP
#ifdef _OPENMP
const auto maxThreads = static_cast<std::size_t>(omp_get_max_threads());
const char* ompProcBind = std::getenv("OMP_PROC_BIND"); // NOLINT(concurrency-mt-unsafe)
const char* ompPlaces = std::getenv("OMP_PLACES"); // NOLINT(concurrency-mt-unsafe)
ompProcBind = ompProcBind == nullptr ? "no - PLEASE DEFINE ENV.VAR. OMP_PROC_BIND!" : ompProcBind;
ompPlaces = ompPlaces == nullptr ? "nothing - PLEASE DEFINE ENV.VAR. OMP_PLACES!" : ompPlaces;
env += fmt::format("; OpenMP: max {} threads, bound {}, to {}", maxThreads, ompProcBind, ompPlaces);
#endif

// SIMD
std::string simdArch =
#if defined(__AVX512F__)
"AVX512F";
#elif defined(__AVX2__)
"AVX2";
#elif defined(__AVX__)
"AVX";
#elif defined(__SSE__SSE4_2__)
"SSE4.2";
#elif defined(__SSE__SSE4_1__)
"SSE4.1";
#elif defined(__SSE3__)
"SSE3";
#elif defined(__SSE2__)
"SSE2";
#elif defined(__ARM_NEON__)
"NEON";
#elif defined(__ALTIVEC__)
"ALTIVEC";
#else
"unknown";
#endif

#ifdef __FMA__
simdArch += "+FMA";
#endif
env += fmt::format("; SIMD: {}", simdArch);

// alpaka
#ifdef ALPAKA_DEBUG // defined when the cmake target links to alpaka
if constexpr(!std::is_void_v<AlpakaAcc>)
{
using Acc = AlpakaAcc;
auto accName = alpaka::getAccName<Acc>();
accName.erase(begin(accName) + accName.find_first_of('<'), end(accName)); // drop template arguments
const auto dev = getDevByIdx(alpaka::Platform<Acc>{}, 0u);
const auto devName = trim(getName(dev)); // TODO(bgruber): drop trim after fix lands in alpaka
const auto devProps = alpaka::getAccDevProps<Acc>(dev);
env += fmt::format(
"; alpaka acc: {}, dev[0]: {}, SMem: {}KiB",
accName,
devName,
devProps.m_sharedMemSizeBytes / 1024);
}
#endif

// CUDA
#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDACC__))
{
int device;
cudaGetDevice(&device);
cudaDeviceProp prop{};
cudaGetDeviceProperties(&prop, device);
env += fmt::format(
"; CUDA dev: {}, {}MiB GM, {}KiB SM",
prop.name,
prop.totalGlobalMem / 1024 / 1024,
prop.sharedMemPerBlock / 1024);
}
#endif

return env;
}
} // namespace common
26 changes: 0 additions & 26 deletions examples/common/hostname.hpp

This file was deleted.

Loading

0 comments on commit eb98430

Please sign in to comment.