diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a96a88a..27736b6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,8 +31,8 @@ endmacro() # the final executable name set(EXE_NAME babelstream) -# for chrono, some basic CXX features, and generic lambdas; models can overwrite this if required -set(CMAKE_CXX_STANDARD 14) +# for chrono and some basic CXX features, models can overwrite this if required +set(CMAKE_CXX_STANDARD 11) if (NOT CMAKE_BUILD_TYPE) message("No CMAKE_BUILD_TYPE specified, defaulting to 'Release'") diff --git a/src/StreamModels.h b/src/StreamModels.h index 541ded37..c2ea84df 100644 --- a/src/StreamModels.h +++ b/src/StreamModels.h @@ -36,7 +36,7 @@ #endif template -std::unique_ptr> construct_stream(int ARRAY_SIZE, unsigned int deviceIndex) { +std::unique_ptr> make_stream(int ARRAY_SIZE, unsigned int deviceIndex) { #if defined(CUDA) // Use the CUDA implementation return std::make_unique>(ARRAY_SIZE, deviceIndex); diff --git a/src/Unit.h b/src/Unit.h index a411f13c..5a7b916a 100644 --- a/src/Unit.h +++ b/src/Unit.h @@ -1,51 +1,31 @@ #pragma once #include -enum { MegaByte, GigaByte, MibiByte, GibiByte }; - // Units for output: struct Unit { - int value; - Unit(int v) : value(v) {} - double fmt(double bytes) { - switch(value) { - case MibiByte: return pow(2.0, -20.0) * bytes; - case MegaByte: return 1.0E-6 * bytes; - case GibiByte: return pow(2.0, -30.0) * bytes; - case GigaByte: return 1.0E-9 * bytes; - default: std::cerr << "Unimplemented!" << std::endl; abort(); - } - } - char const* str() { - switch(value) { - case MibiByte: return "MiB"; - case MegaByte: return "MB"; - case GibiByte: return "GiB"; - case GigaByte: return "GB"; - default: std::cerr << "Unimplemented!" << std::endl; abort(); - } - } - Unit kibi() { - switch(value) { - case MegaByte: return Unit(MibiByte); - case GigaByte: return Unit(GibiByte); - default: return *this; - } - } - Unit byte() { + enum class Kind { MegaByte, GigaByte, TeraByte, MibiByte, GibiByte, TebiByte }; + Kind value; + explicit Unit(Kind v) : value(v) {} + double fmt(double bytes) const { switch(value) { - case MibiByte: return Unit(MegaByte); - case GibiByte: return Unit(GigaByte); - default: return *this; + case Kind::MibiByte: return std::pow(2.0, -20.0) * bytes; + case Kind::MegaByte: return 1.0E-6 * bytes; + case Kind::GibiByte: return std::pow(2.0, -30.0) * bytes; + case Kind::GigaByte: return 1.0E-9 * bytes; + case Kind::TebiByte: return std::pow(2.0, -40.0) * bytes; + case Kind::TeraByte: return 1.0E-12 * bytes; + default: std::cerr << "Unimplemented!" << std::endl; std::abort(); } } - char const* lower() { + char const* str() const { switch(value) { - case MibiByte: return "mibytes"; - case MegaByte: return "mbytes"; - case GibiByte: return "gibytes"; - case GigaByte: return "gbytes"; - default: std::cerr << "Unimplemented!" << std::endl; abort(); + case Kind::MibiByte: return "MiB"; + case Kind::MegaByte: return "MB"; + case Kind::GibiByte: return "GiB"; + case Kind::GigaByte: return "GB"; + case Kind::TebiByte: return "TiB"; + case Kind::TeraByte: return "TB"; + default: std::cerr << "Unimplemented!" << std::endl; std::abort(); } } }; diff --git a/src/main.cpp b/src/main.cpp index b388abc9..ed1244e8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -27,16 +27,16 @@ unsigned int num_times = 100; unsigned int deviceIndex = 0; bool use_float = false; bool output_as_csv = false; -Unit unit = MegaByte; +Unit unit{Unit::Kind::MegaByte}; bool silence_errors = false; std::string csv_separator = ","; // Benchmarks: constexpr size_t num_benchmarks = 6; -array labels = {"Copy", "Add", "Mul", "Triad", "Dot", "Nstream"}; +std::array labels = {"Copy", "Add", "Mul", "Triad", "Dot", "Nstream"}; // Weights data moved by benchmark & therefore achieved BW: // bytes = weight * sizeof(T) * ARRAY_SIZE -> bw = bytes / dur -array weight = {/*Copy:*/ 2, /*Add:*/ 2, /*Mul:*/ 3, /*Triad:*/ 3, /*Dot:*/ 2, /*Nstream:*/ 4}; +std::array weight = {/*Copy:*/ 2, /*Add:*/ 2, /*Mul:*/ 3, /*Triad:*/ 3, /*Dot:*/ 2, /*Nstream:*/ 4}; // Options for running the benchmark: // - Classic 5 kernels (Copy, Add, Mul, Triad, Dot). @@ -63,19 +63,17 @@ bool run_benchmark(int id) { // Prints all available benchmark labels: template void print_labels(OStream& os) { - for (int i = 0; i < num_benchmarks; ++i) { + for (size_t i = 0; i < num_benchmarks; ++i) { os << labels[i]; if (i != (num_benchmarks - 1)) os << ","; } } -// Clock and duration types: -using clk_t = chrono::high_resolution_clock; -using dur_t = chrono::duration; - // Returns duration of executing function f: template double time(F&& f) { + using clk_t = std::chrono::high_resolution_clock; + using dur_t = std::chrono::duration; auto start = clk_t::now(); f(); return dur_t(clk_t::now() - start).count(); @@ -107,7 +105,7 @@ int main(int argc, char *argv[]) else run(); - return 0; + return EXIT_SUCCESS; } // Run specified kernels @@ -163,7 +161,7 @@ void run() << "num_times" << csv_separator << "n_elements" << csv_separator << "sizeof" << csv_separator - << "max_" << unit.lower() << "_per_sec" << csv_separator + << "max_" << unit.str() << "_per_sec" << csv_separator << "min_runtime" << csv_separator << "max_runtime" << csv_separator << "avg_runtime" << std::endl; @@ -182,10 +180,10 @@ void run() auto fmt_cli = [](char const* function, double bandwidth, double dt_min, double dt_max, double dt_avg) { std::cout << std::left << std::setw(12) << function - << std::left << std::setw(12) << setprecision(3) << bandwidth - << std::left << std::setw(12) << setprecision(5) << dt_min - << std::left << std::setw(12) << setprecision(5) << dt_max - << std::left << std::setw(12) << setprecision(5) << dt_avg + << std::left << std::setw(12) << std::setprecision(3) << bandwidth + << std::left << std::setw(12) << std::setprecision(5) << dt_min + << std::left << std::setw(12) << std::setprecision(5) << dt_max + << std::left << std::setw(12) << std::setprecision(5) << dt_avg << std::endl; }; auto fmt_result = [&](char const* function, size_t num_times, size_t num_elements, @@ -213,15 +211,15 @@ void run() std::cout.precision(ss); } - auto stream = construct_stream(ARRAY_SIZE, deviceIndex); + auto stream = make_stream(ARRAY_SIZE, deviceIndex); auto initElapsedS = time([&] { stream->init_arrays(startA, startB, startC); }); // Result of the Dot kernel, if used. T sum{}; - vector> timings = run_all(stream, sum); + std::vector> timings = run_all(stream, sum); // Create & read host vectors: - vector a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE); + std::vector a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE); auto readElapsedS = time([&] { stream->read_arrays(a, b, c); }); check_solution(num_times, a, b, c, sum); @@ -250,7 +248,7 @@ void run() << std::left << std::setw(12) << "Max" << std::left << std::setw(12) << "Average" << std::endl - << fixed; + << std::fixed; } for (int i = 0; i < num_benchmarks; ++i) @@ -280,13 +278,13 @@ void check_solution(const unsigned int ntimes, std::vector& a, std::vector const T scalar = startScalar; - for (int b = 0; b < num_benchmarks; ++b) + for (size_t b = 0; b < num_benchmarks; ++b) { if (!run_benchmark(b)) continue; for (unsigned int i = 0; i < ntimes; i++) { - switch((Benchmark)b) { + switch(static_cast(b)) { case Benchmark::Copy: goldC = goldA; break; case Benchmark::Mul: goldB = scalar * goldC; break; case Benchmark::Add: goldC = goldA + goldB; break; @@ -307,7 +305,7 @@ void check_solution(const unsigned int ntimes, std::vector& a, std::vector errC /= c.size(); long double errSum = std::fabs((sum - goldSum)/goldSum); - long double epsi = std::numeric_limits::epsilon() * 100.0; + long double epsi = std::numeric_limits::epsilon() * 1000.0; bool failed = false; if (errA > epsi) { @@ -425,7 +423,7 @@ void parseArguments(int argc, char *argv[]) } else { - auto p = find_if(labels.begin(), labels.end(), [&](char const* label) { + auto p = std::find_if(labels.begin(), labels.end(), [&](char const* label) { return std::string(label) == key; }); if (p == labels.end()) { @@ -435,7 +433,7 @@ void parseArguments(int argc, char *argv[]) std::cerr << std::endl; std::exit(EXIT_FAILURE); } - selection = (Benchmark)(distance(labels.begin(), p)); + selection = (Benchmark)(std::distance(labels.begin(), p)); } } else if (!std::string("--csv").compare(argv[i])) @@ -444,19 +442,27 @@ void parseArguments(int argc, char *argv[]) } else if (!std::string("--mibibytes").compare(argv[i])) { - unit = Unit(MibiByte); + unit = Unit(Unit::Kind::MibiByte); } else if (!std::string("--megabytes").compare(argv[i])) { - unit = Unit(MegaByte); + unit = Unit(Unit::Kind::MegaByte); } else if (!std::string("--gibibytes").compare(argv[i])) { - unit = Unit(GibiByte); + unit = Unit(Unit::Kind::GibiByte); } else if (!std::string("--gigabytes").compare(argv[i])) { - unit = Unit(GigaByte); + unit = Unit(Unit::Kind::GigaByte); + } + else if (!std::string("--tebibytes").compare(argv[i])) + { + unit = Unit(Unit::Kind::TebiByte); + } + else if (!std::string("--terabytes").compare(argv[i])) + { + unit = Unit(Unit::Kind::TeraByte); } else if (!std::string("--silence-errors").compare(argv[i])) { @@ -481,6 +487,8 @@ void parseArguments(int argc, char *argv[]) std::cout << " --mibibytes Use MiB=2^20 for bandwidth calculation (default MB=10^6)" << std::endl; std::cout << " --gigibytes Use GiB=2^30 for bandwidth calculation (default MB=10^6)" << std::endl; std::cout << " --gigabytes Use GB=10^9 for bandwidth calculation (default MB=10^6)" << std::endl; + std::cout << " --tebibytes Use TiB=2^40 for bandwidth calculation (default MB=10^6)" << std::endl; + std::cout << " --terabytes Use TB=10^12 for bandwidth calculation (default MB=10^6)" << std::endl; std::cout << " --silence-errors Ignores validation errors." << std::endl; std::cout << std::endl; std::exit(EXIT_SUCCESS);