Skip to content

Commit

Permalink
Ports prefixScan, OneToManyAssoc and HistoContainer from CUDAUtilities.
Browse files Browse the repository at this point in the history
This version allow usage of runtime sized arrays.
  • Loading branch information
ericcano committed Oct 19, 2023
1 parent e4bfeac commit 7998391
Show file tree
Hide file tree
Showing 16 changed files with 2,683 additions and 0 deletions.
68 changes: 68 additions & 0 deletions HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h
#define HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h

#include <cstdint>

#include <alpaka/alpaka.hpp>

namespace cms::alpakatools {

class AtomicPairCounter {
public:
using DoubleWord = uint64_t;

ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter() : counter_{0} {}
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter(uint32_t first, uint32_t second) : counter_{pack(first, second)} {}
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter(DoubleWord values) : counter_{values} {}

ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter& operator=(DoubleWord values) {
counter_.as_doubleword = values;
return *this;
}

struct Counters {
uint32_t first; // in a "One to Many" association is the number of "One"
uint32_t second; // in a "One to Many" association is the total number of associations
};

ALPAKA_FN_ACC constexpr Counters get() const { return counter_.as_counters; }

// atomically add as_counters, and return the previous value
template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr Counters add(const TAcc& acc, Counters c) {
Packer value{pack(c.first, c.second)};
Packer ret{0};
ret.as_doubleword =
alpaka::atomicAdd(acc, &counter_.as_doubleword, value.as_doubleword, alpaka::hierarchy::Blocks{});
return ret.as_counters;
}

// atomically increment first and add i to second, and return the previous value
template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE Counters constexpr inc_add(const TAcc& acc, uint32_t i) {
return add(acc, {1u, i});
}

private:
union Packer {
DoubleWord as_doubleword;
Counters as_counters;
constexpr Packer(DoubleWord _as_doubleword) : as_doubleword(_as_doubleword) { ; };
constexpr Packer(Counters _as_counters) : as_counters(_as_counters) { ; };
};

// pack two uint32_t values in a DoubleWord (aka uint64_t)
// this is needed because in c++17 a union can only be aggregate-initialised to its first type
// it can be probably removed with c++20, and replace with a designated initialiser
static constexpr DoubleWord pack(uint32_t first, uint32_t second) {
Packer ret{0};
ret.as_counters = {first, second};
return ret.as_doubleword;
}

Packer counter_;
};

} // namespace cms::alpakatools

#endif // HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h
50 changes: 50 additions & 0 deletions HeterogeneousCore/AlpakaInterface/interface/FlexiStorage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

#ifndef HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h
#define HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h

#include <cstdint>

namespace cms {
namespace alpakatools {

template <typename I, int S>
class FlexiStorage {
public:
constexpr int capacity() const { return S; }

constexpr I& operator[](int i) { return m_v[i]; }
constexpr const I& operator[](int i) const { return m_v[i]; }

constexpr I* data() { return m_v; }
constexpr I const* data() const { return m_v; }

private:
I m_v[S];
};

template <typename I>
class FlexiStorage<I, -1> {
public:
constexpr void init(I* v, int s) {
m_v = v;
m_capacity = s;
}

constexpr int capacity() const { return m_capacity; }

constexpr I& operator[](int i) { return m_v[i]; }
constexpr const I& operator[](int i) const { return m_v[i]; }

constexpr I* data() { return m_v; }
constexpr I const* data() const { return m_v; }

private:
I* m_v;
int m_capacity;
};

} // namespace alpakatools

} // namespace cms

#endif // HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h
203 changes: 203 additions & 0 deletions HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
#define HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h

#include <alpaka/alpaka.hpp>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <type_traits>

#include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
#include "HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h"
#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"

#include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
namespace cms {
namespace alpakatools {

struct countFromVector {
template <typename TAcc, typename Histo, typename T>
ALPAKA_FN_ACC void operator()(const TAcc &acc,
Histo *__restrict__ h,
uint32_t nh,
T const *__restrict__ v,
uint32_t const *__restrict__ offsets) const {
const uint32_t nt = offsets[nh];
for_each_element_in_grid_strided(acc, nt, [&](uint32_t i) {
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
ALPAKA_ASSERT_OFFLOAD((*off) > 0);
int32_t ih = off - offsets - 1;
ALPAKA_ASSERT_OFFLOAD(ih >= 0);
ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
h->count(acc, v[i], ih);
});
}
};

struct fillFromVector {
template <typename TAcc, typename Histo, typename T>
ALPAKA_FN_ACC void operator()(const TAcc &acc,
Histo *__restrict__ h,
uint32_t nh,
T const *__restrict__ v,
uint32_t const *__restrict__ offsets) const {
const uint32_t nt = offsets[nh];
for_each_element_in_grid_strided(acc, nt, [&](uint32_t i) {
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
ALPAKA_ASSERT_OFFLOAD((*off) > 0);
int32_t ih = off - offsets - 1;
ALPAKA_ASSERT_OFFLOAD(ih >= 0);
ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
h->fill(acc, v[i], i, ih);
});
}
};

template <typename TAcc, typename Histo, typename T, typename TQueue>
inline __attribute__((always_inline)) void fillManyFromVector(Histo *__restrict__ h,
uint32_t nh,
T const *v,
uint32_t const *offsets,
uint32_t totSize,
uint32_t nthreads,
TQueue &queue) {
launchZero<TAcc>(h, queue);

const auto threadsPerBlockOrElementsPerThread = nthreads;
const auto blocksPerGrid = divide_up_by(totSize, nthreads);
const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);

alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
launchFinalize<TAcc>(h, queue);

alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
}

template <typename TAcc, typename Histo, typename T, typename TQueue>
inline __attribute__((always_inline)) void fillManyFromVector(Histo *__restrict__ h,
OneToManyAssocView<typename Histo::Base> hv,
uint32_t nh,
T const *v,
uint32_t const *offsets,
uint32_t totSize,
uint32_t nthreads,
TQueue &queue) {
launchZero<TAcc>(hv, queue);

const auto threadsPerBlockOrElementsPerThread = nthreads;
const auto blocksPerGrid = divide_up_by(totSize, nthreads);
const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);

alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
launchFinalize<TAcc>(h, queue);

alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
}

// iteratate over N bins left and right of the one containing "v"
template <typename Hist, typename V, typename Func>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func) {
int bs = Hist::bin(value);
int be = std::min(int(Hist::nbins() - 1), bs + n);
bs = std::max(0, bs - n);
ALPAKA_ASSERT_OFFLOAD(be >= bs);
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
func(*pj);
}
}

// iteratate over bins containing all values in window wmin, wmax
template <typename Hist, typename V, typename Func>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
auto bs = Hist::bin(wmin);
auto be = Hist::bin(wmax);
ALPAKA_ASSERT_OFFLOAD(be >= bs);
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
func(*pj);
}
}

template <typename T, // the type of the discretized input values
uint32_t NBINS, // number of bins
int32_t SIZE, // max number of element. If -1 is initialized at runtime using external storage
uint32_t S = sizeof(T) * 8, // number of significant bits in T
typename I = uint32_t, // type stored in the container (usually an index in a vector of the input values)
uint32_t NHISTS = 1 // number of histos stored
>
class HistoContainer : public OneToManyAssoc<I, NHISTS * NBINS + 1, SIZE> {
public:
using Base = OneToManyAssoc<I, NHISTS * NBINS + 1, SIZE>;
using View = typename Base::View;
using Counter = typename Base::Counter;
using index_type = typename Base::index_type;
using UT = typename std::make_unsigned<T>::type;

static constexpr uint32_t ilog2(uint32_t v) {
constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
constexpr uint32_t s[] = {1, 2, 4, 8, 16};

uint32_t r = 0; // result of log2(v) will go here
for (auto i = 4; i >= 0; i--)
if (v & b[i]) {
v >>= s[i];
r |= s[i];
}
return r;
}

static constexpr uint32_t sizeT() { return S; }
static constexpr int32_t nhists() { return NHISTS; }
static constexpr uint32_t nbins() { return NBINS; }
static constexpr uint32_t totbins() { return NHISTS * NBINS + 1; }
static constexpr uint32_t nbits() { return ilog2(NBINS - 1) + 1; }

static constexpr auto histOff(uint32_t nh) { return NBINS * nh; }

static constexpr UT bin(T t) {
constexpr uint32_t shift = sizeT() - nbits();
constexpr uint32_t mask = (1 << nbits()) - 1;
return (t >> shift) & mask;
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
Base::atomicIncrement(acc, this->off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
auto w = Base::atomicDecrement(acc, this->off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
this->content[w - 1] = j;
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
b += histOff(nh);
ALPAKA_ASSERT_OFFLOAD(b < totbins());
Base::atomicIncrement(acc, this->off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
b += histOff(nh);
ALPAKA_ASSERT_OFFLOAD(b < totbins());
auto w = Base::atomicDecrement(acc, this->off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
this->content[w - 1] = j;
}
};

} // namespace alpakatools
} // namespace cms
#endif // HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
Loading

0 comments on commit 7998391

Please sign in to comment.