Skip to content

Commit

Permalink
Add interesting benchmark by @samuelpmish
Browse files Browse the repository at this point in the history
  • Loading branch information
greg7mdp committed Oct 9, 2023
1 parent c2fabc9 commit 623f4c0
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ if (PHMAP_BUILD_EXAMPLES)
add_executable(ex_hash_bench examples/hash_bench.cc phmap.natvis)
add_executable(ex_matt examples/matt.cc phmap.natvis)
add_executable(ex_mt_word_counter examples/mt_word_counter.cc phmap.natvis)
add_executable(ex_p_bench examples/p_bench.cc phmap.natvis)

target_link_libraries(ex_knucleotide Threads::Threads)
target_link_libraries(ex_bench Threads::Threads)
Expand Down
189 changes: 189 additions & 0 deletions examples/p_bench.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// example graciously provided @samuelpmish
// ----------------------------------------
//
// Getting rid of the mutexes for read access
//
// This example demonstrated how to populate a parallel_flat_hash_map from multiple
// concurrent threads (The map is protected by internal mutexes), but then doing a
// swap to get rid of the mutexes (and all locking) for accessing the same hash_map
// in `read` only mode, again concurrently from multiple threads.
// --------------------------------------------------------------------------------
#include <random>
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include "parallel_hashmap/phmap.h"

///////////////////////////////////////////////////////////////////////////////

#include <chrono>

class timer {
typedef std::chrono::high_resolution_clock::time_point time_point;
typedef std::chrono::duration<double> duration_type;

public:
void start() { then = std::chrono::high_resolution_clock::now(); }
void stop() { now = std::chrono::high_resolution_clock::now(); }
double elapsed() { return std::chrono::duration_cast<duration_type>(now - then).count(); }

private:
time_point then, now;
};

///////////////////////////////////////////////////////////////////////////////

#include <thread>

struct threadpool {

std::vector< uint64_t > partition(uint64_t n) {
uint64_t quotient = n / num_threads;
uint64_t remainder = n % num_threads;

std::vector< uint64_t > blocks(num_threads + 1);
blocks[0] = 0;
for (int i = 1; i < num_threads + 1; i++) {
if (remainder > 0) {
blocks[i] = blocks[i-1] + quotient + 1;
remainder--;
} else {
blocks[i] = blocks[i-1] + quotient;
}
}
return blocks;
}

threadpool(int n) : num_threads(n) {}

template < typename lambda >
void parallel_for(uint64_t n, const lambda & f) {

std::vector< uint64_t > blocks = partition(n);

for (int tid = 0; tid < num_threads; tid++) {
threads.push_back(std::thread([&](uint64_t i0) {
for (uint64_t i = blocks[i0]; i < blocks[i0+1]; i++) {
f(i);
}
}, tid));
}

for (int i = 0; i < num_threads; i++) {
threads[i].join();
}

threads.clear();
}

int num_threads;
std::vector< std::thread > threads;
};

///////////////////////////////////////////////////////////////////////////////

template < int n >
using pmap = phmap::parallel_flat_hash_map<
uint64_t,
uint64_t,
std::hash<uint64_t>,
std::equal_to<uint64_t>,
std::allocator<std::pair<const uint64_t, uint64_t>>,
n,
std::mutex >;

template < int n >
using pmap_nullmutex = phmap::parallel_flat_hash_map<
uint64_t,
uint64_t,
std::hash<uint64_t>,
std::equal_to<uint64_t>,
std::allocator<std::pair<const uint64_t, uint64_t>>,
n,
phmap::NullMutex >;



template < typename Map, typename Map_nomutex >
void renumber(const std::vector< uint64_t > & vertex_ids,
std::vector< std::array< uint64_t, 4 > > elements,
int num_threads) {
bool supports_parallel_insertion =
!std::is_same< Map, std::unordered_map<uint64_t, uint64_t> >::value;

Map new_ids;
std::atomic< uint64_t > new_id{ 0 };

timer stopwatch;
threadpool pool((supports_parallel_insertion) ? num_threads : 1);

stopwatch.start();
pool.parallel_for(vertex_ids.size(), [&](uint64_t i){
auto id = new_id++;
new_ids[vertex_ids[i]] = id;
});
stopwatch.stop();
std::cout << stopwatch.elapsed() * 1000 << "ms ";

pool.num_threads = num_threads;
stopwatch.start();
Map_nomutex new_ids_nc;
new_ids_nc.swap(new_ids);
pool.parallel_for(elements.size(), [&](uint64_t i) {
auto & elem = elements[i];
elem = { new_ids_nc.at(elem[0]),
new_ids_nc.at(elem[1]),
new_ids_nc.at(elem[2]),
new_ids_nc.at(elem[3]) };
});
stopwatch.stop();
std::cout << stopwatch.elapsed() * 1000 << "ms" << std::endl;
}

int main() {
uint64_t nvertices = 5000000;
uint64_t nelements = 25000000;

std::random_device rd; // a seed source for the random number engine
std::mt19937 gen(rd()); // mersenne_twister_engine seeded with rd()
std::uniform_int_distribution<uint64_t> vertex_id_dist(0, uint64_t(1) << 35);
std::uniform_int_distribution<uint64_t> elem_id_dist(0, nvertices-1);

std::cout << "generating dataset ." << std::flush;
std::vector< uint64_t > vertex_ids(nvertices);
for (uint64_t i = 0; i < nvertices; i++) {
vertex_ids[i] = vertex_id_dist(gen);
}

std::cout << "." << std::flush;

std::vector< std::array<uint64_t, 4> > elements(nelements);
for (uint64_t i = 0; i < nelements; i++) {
elements[i] = {
vertex_ids[elem_id_dist(gen)],
vertex_ids[elem_id_dist(gen)],
vertex_ids[elem_id_dist(gen)],
vertex_ids[elem_id_dist(gen)]
};
}
std::cout << " done" << std::endl;

using stdmap = std::unordered_map<uint64_t, uint64_t>;
std::cout << "std::unordered_map, 1 thread: ";
renumber< stdmap, stdmap >(vertex_ids, elements, 1);

std::cout << "std::unordered_map, 32 thread (single threaded insertion): ";
renumber< stdmap, stdmap >(vertex_ids, elements, 32);

std::cout << "pmap4, 1 thread: ";
renumber< pmap<4>, pmap_nullmutex<4> >(vertex_ids, elements, 1);

std::cout << "pmap4, 32 threads: ";
renumber< pmap<4>, pmap_nullmutex<4> >(vertex_ids, elements, 32);

std::cout << "pmap6, 1 thread: ";
renumber< pmap<6>, pmap_nullmutex<6> >(vertex_ids, elements, 1);

std::cout << "pmap6, 32 threads: ";
renumber< pmap<6>, pmap_nullmutex<6> >(vertex_ids, elements, 32);
}

0 comments on commit 623f4c0

Please sign in to comment.