-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #27 from scylla-zpp-blas/benchmark
- Adding identifier type (`scylla_blas::id_t`), as requested by @jbhayven - Renamed index_type to index_t - BLOCK_SIZE is now not global, but saved per matrix - Refactored matrix and vector classes (field/method order, driver usage, added nonstatic resize, etc.) - Fixed bug in tests (they were using abs instead of std::abs, abs works on integers) - Worker concurrency is now not global, but saved per scheduler. Same with scheduler sleep time. - Worker sleep time and max retries are now settable with command line arguments. - Sleeps now have microsecond accuracy. - Implement simple benchmark program - Performance optimizations - Many other changes introduced during benchmarking
- Loading branch information
Showing
57 changed files
with
2,595 additions
and
875 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
set(BENCHMARK_SRC main.cc benchmark.hh benchmark.cc const.hh) | ||
|
||
add_executable(scylla_blas_benchmark "${BENCHMARK_SRC}") | ||
target_include_directories(scylla_blas_benchmark PUBLIC "${Boost_INCLUDE_DIRS}") | ||
target_link_libraries(scylla_blas_benchmark PUBLIC scylla_blas "${Boost_LIBRARIES}") | ||
target_include_directories(scylla_blas_benchmark PRIVATE "${CMAKE_SOURCE_DIR}/matrix_generators") | ||
|
||
add_executable(insert_benchmark insert_benchmark.cc) | ||
target_link_libraries(insert_benchmark PUBLIC scylla_blas) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
#include <chrono> | ||
|
||
#include <scylla_blas/matrix.hh> | ||
#include <scylla_blas/vector.hh> | ||
|
||
#include "const.hh" | ||
#include "benchmark.hh" | ||
|
||
// Matrix * Matrix | ||
|
||
void benchmark_mm::init() { | ||
scylla_blas::matrix<float>::init(session, l_matrix_id, 0, 0, true); | ||
scylla_blas::matrix<float>::init(session, r_matrix_id, 0, 0, true); | ||
scylla_blas::matrix<float>::init(session, w_matrix_id, 0, 0, true); | ||
} | ||
|
||
void benchmark_mm::setup(int64_t block_size, int64_t length) { | ||
left_matrix = std::make_unique<scylla_blas::matrix<float>>(session, l_matrix_id); | ||
right_matrix = std::make_unique<scylla_blas::matrix<float>>(session, r_matrix_id); | ||
result_matrix = std::make_unique<scylla_blas::matrix<float>>(session, w_matrix_id); | ||
|
||
left_matrix->resize(length, length); | ||
right_matrix->resize(length, length); | ||
result_matrix->resize(length, length); | ||
|
||
left_matrix->set_block_size(block_size); | ||
right_matrix->set_block_size(block_size); | ||
result_matrix->set_block_size(block_size); | ||
|
||
scheduler.srmgen(matrix_load, *right_matrix); | ||
scheduler.srmgen(matrix_load, *left_matrix); | ||
} | ||
|
||
void benchmark_mm::proc() { | ||
scheduler.sgemm(scylla_blas::NoTrans, scylla_blas::NoTrans, 1.0, *left_matrix, *right_matrix, 0.0, *result_matrix); | ||
} | ||
|
||
void benchmark_mm::teardown() { | ||
left_matrix->clear_all(); | ||
right_matrix->clear_all(); | ||
result_matrix->clear_all(); | ||
} | ||
|
||
void benchmark_mm::destroy() { | ||
scylla_blas::matrix<float>::drop(session, l_matrix_id); | ||
scylla_blas::matrix<float>::drop(session, r_matrix_id); | ||
scylla_blas::matrix<float>::drop(session, w_matrix_id); | ||
} | ||
|
||
// Matrix * Vector | ||
|
||
void benchmark_mv::init() { | ||
scylla_blas::matrix<float>::init(session, l_matrix_id, 0, 0, true); | ||
scylla_blas::vector<float>::init(session, r_vector_id, 0, true); | ||
scylla_blas::vector<float>::init(session, w_vector_id, 0, true); | ||
} | ||
|
||
void benchmark_mv::setup(int64_t block_size, int64_t length) { | ||
left_matrix = std::make_unique<scylla_blas::matrix<float>>(session, l_matrix_id); | ||
right_vector = std::make_unique<scylla_blas::vector<float>>(session, r_vector_id); | ||
result_vector = std::make_unique<scylla_blas::vector<float>>(session, w_vector_id); | ||
|
||
left_matrix->resize(length, length); | ||
right_vector->resize(length); | ||
result_vector->resize(length); | ||
|
||
left_matrix->set_block_size(block_size); | ||
right_vector->set_block_size(block_size); | ||
result_vector->set_block_size(block_size); | ||
|
||
scheduler.srmgen(matrix_load, *left_matrix); | ||
fill_vector(*right_vector, length); | ||
} | ||
|
||
void benchmark_mv::proc() { | ||
scheduler.sgemv(scylla_blas::NoTrans, 1.0, *left_matrix, *right_vector, 0.0, *result_vector); | ||
} | ||
|
||
void benchmark_mv::teardown() { | ||
left_matrix->clear_all(); | ||
right_vector->clear_all(); | ||
result_vector->clear_all(); | ||
} | ||
|
||
void benchmark_mv::destroy() { | ||
scylla_blas::matrix<float>::drop(session, l_matrix_id); | ||
scylla_blas::vector<float>::drop(session, r_vector_id); | ||
scylla_blas::vector<float>::drop(session, w_vector_id); | ||
} | ||
|
||
// Vector * Vector | ||
|
||
void benchmark_vv::init() { | ||
scylla_blas::vector<float>::init(session, l_vector_id, 0, true); | ||
scylla_blas::vector<float>::init(session, r_vector_id, 0, true); | ||
} | ||
|
||
void benchmark_vv::setup(int64_t block_size, int64_t length) { | ||
left_vector = std::make_unique<scylla_blas::vector<float>>(session, l_vector_id); | ||
right_vector = std::make_unique<scylla_blas::vector<float>>(session, r_vector_id); | ||
|
||
left_vector->resize(length); | ||
right_vector->resize(length); | ||
|
||
left_vector->set_block_size(block_size); | ||
right_vector->set_block_size(block_size); | ||
|
||
fill_vector(*left_vector, length); | ||
fill_vector(*right_vector, length); | ||
} | ||
|
||
void benchmark_vv::proc() { | ||
scheduler.sdot(*left_vector, *right_vector); | ||
} | ||
|
||
void benchmark_vv::teardown() { | ||
left_vector->clear_all(); | ||
right_vector->clear_all(); | ||
} | ||
|
||
void benchmark_vv::destroy() { | ||
scylla_blas::vector<float>::drop(session, l_vector_id); | ||
scylla_blas::vector<float>::drop(session, r_vector_id); | ||
} | ||
|
||
template<typename F, typename... Args> | ||
double measure_time(F callable, Args... args) { | ||
auto t1 = std::chrono::high_resolution_clock::now(); | ||
callable(args...); | ||
auto t2 = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double, std::milli> duration = t2-t1; | ||
return duration.count(); | ||
} | ||
|
||
benchmark_result perform_benchmark(std::unique_ptr<base_benchmark> tester, | ||
const std::vector<int64_t> &block_sizes, | ||
const std::vector<int64_t> &problem_sizes, | ||
bool autoclean) { | ||
benchmark_result results{}; | ||
|
||
LogInfo("Starting initialization... "); | ||
results.init_time = measure_time([&](){tester->init();}); | ||
LogInfo("Initialization took {}ms", results.init_time); | ||
|
||
for(int64_t block_size : block_sizes) { | ||
for(int64_t problem_size : problem_sizes) { | ||
benchmark_result::result_t current_result{}; | ||
LogInfo("Block size: {}, problem size: {}", block_size, problem_size); | ||
|
||
LogInfo("\tStarting setup"); | ||
current_result.setup_time = measure_time([&](int64_t b, int64_t l){tester->setup(b, l);}, block_size, problem_size); | ||
LogInfo("\tSetup took {}ms", current_result.setup_time); | ||
|
||
LogInfo("\tStarting procedure"); | ||
current_result.proc_time = measure_time([&](){tester->proc();}); | ||
LogInfo("\tProcedure took {}ms", current_result.proc_time); | ||
|
||
if (autoclean) { | ||
LogInfo("\tStarting teardown"); | ||
current_result.teardown_time = measure_time([&](){tester->teardown();}); | ||
LogInfo("\tTeardown took {}ms", current_result.teardown_time); | ||
} else { | ||
current_result.teardown_time = 0; | ||
LogDebug("\tAutoclean off: skipping teardown"); | ||
} | ||
|
||
results.tests.emplace_back(block_size, problem_size, current_result); | ||
} | ||
} | ||
|
||
if (autoclean) { | ||
LogInfo("Starting destroy"); | ||
results.destroy_time = measure_time([&](){tester->destroy();}); | ||
LogInfo("Destroy took {}ms\n", results.destroy_time); | ||
} else { | ||
results.destroy_time = 0; | ||
LogDebug("\tAutoclean off: skipping destroy"); | ||
} | ||
|
||
return results; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
#pragma once | ||
#include <map> | ||
|
||
#include <scmd.hh> | ||
|
||
#include "scylla_blas/matrix.hh" | ||
#include "scylla_blas/vector.hh" | ||
#include "scylla_blas/routines.hh" | ||
|
||
#include "sparse_matrix_value_generator.hh" | ||
#include "random_value_factory.hh" | ||
|
||
#include "const.hh" | ||
|
||
template<class T> | ||
void load_vector_from_generator(value_factory<T> &gen, scylla_blas::vector<T> &vector) { | ||
scylla_blas::vector_segment<T> next_segment; | ||
scylla_blas::index_t in_segment_index = 1; | ||
scylla_blas::index_t segment_number = 1; | ||
scylla_blas::index_t segment_offset = 0; | ||
|
||
LogDebug("Filling vector with length {} and block size {}", vector.get_length(), vector.get_block_size()); | ||
for(size_t i = 0; i < vector.get_length(); i++) { | ||
if (in_segment_index > vector.get_block_size()) { | ||
vector.insert_segment(segment_number, next_segment); | ||
next_segment.clear(); | ||
|
||
in_segment_index = 1; | ||
segment_number++; | ||
segment_offset += vector.get_block_size(); | ||
} | ||
T next_val = gen.next(); | ||
next_segment.emplace_back(in_segment_index, next_val); | ||
in_segment_index++; | ||
} | ||
|
||
if (in_segment_index != 1) { | ||
vector.insert_segment(segment_number, next_segment); | ||
next_segment.clear(); | ||
} | ||
|
||
LogInfo("Loaded a vector {} from a generator", vector.get_id()); | ||
} | ||
|
||
template<typename T> | ||
void fill_vector(scylla_blas::vector<T> &v, scylla_blas::index_t length) { | ||
std::shared_ptr<value_factory<T>> f = std::make_shared<random_value_factory<T>>(0, 9, RANDOM_SEED); | ||
load_vector_from_generator(*f, v); | ||
} | ||
|
||
struct benchmark_result { | ||
using result_t = struct { double setup_time; double proc_time; double teardown_time; }; | ||
double init_time; | ||
double destroy_time; | ||
std::vector<std::tuple<int64_t, int64_t, result_t>> tests; | ||
}; | ||
|
||
class base_benchmark { | ||
protected: | ||
std::shared_ptr <scmd::session> session; | ||
scylla_blas::routine_scheduler scheduler; | ||
double matrix_load; | ||
public: | ||
explicit base_benchmark(const std::shared_ptr<scmd::session> &session) : session(session), scheduler(session) {} | ||
virtual void init() = 0; | ||
virtual void setup(int64_t block_size, int64_t length) = 0; | ||
virtual void proc() = 0; | ||
virtual void teardown() = 0; | ||
virtual void destroy() = 0; | ||
void set_max_workers(int64_t new_max_workers) { | ||
scheduler.set_max_used_workers(new_max_workers); | ||
} | ||
void set_matrix_load(double load) { | ||
this->matrix_load = load; | ||
} | ||
}; | ||
|
||
class benchmark_mm : public base_benchmark { | ||
std::unique_ptr<scylla_blas::matrix<float>> left_matrix; | ||
std::unique_ptr<scylla_blas::matrix<float>> right_matrix; | ||
std::unique_ptr<scylla_blas::matrix<float>> result_matrix; | ||
public: | ||
explicit benchmark_mm(const std::shared_ptr<scmd::session> &session) : base_benchmark(session) {} | ||
void init() override; | ||
void setup(int64_t block_size, int64_t length) override; | ||
void proc() override; | ||
void teardown() override; | ||
void destroy() override; | ||
}; | ||
|
||
class benchmark_mv : public base_benchmark { | ||
std::unique_ptr<scylla_blas::matrix<float>> left_matrix; | ||
std::unique_ptr<scylla_blas::vector<float>> right_vector; | ||
std::unique_ptr<scylla_blas::vector<float>> result_vector; | ||
public: | ||
explicit benchmark_mv(const std::shared_ptr<scmd::session> &session) : base_benchmark(session) {} | ||
void init() override; | ||
void setup(int64_t block_size, int64_t length) override; | ||
void proc() override; | ||
void teardown() override; | ||
void destroy() override; | ||
}; | ||
|
||
class benchmark_vv : public base_benchmark { | ||
std::unique_ptr<scylla_blas::vector<float>> left_vector; | ||
std::unique_ptr<scylla_blas::vector<float>> right_vector; | ||
public: | ||
explicit benchmark_vv(const std::shared_ptr<scmd::session> &session) : base_benchmark(session) {} | ||
void init() override; | ||
void setup(int64_t block_size, int64_t length) override; | ||
void proc() override; | ||
void teardown() override; | ||
void destroy() override; | ||
}; | ||
|
||
benchmark_result perform_benchmark(std::unique_ptr<base_benchmark> tester, | ||
const std::vector<int64_t> &block_sizes, | ||
const std::vector<int64_t> &problem_sizes, | ||
bool autoclean); |
Oops, something went wrong.