-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ports prefixScan, OneToManyAssoc and HistoContainer from CUDAUtilities.
This version allow usage of runtime sized arrays.
- Loading branch information
Showing
16 changed files
with
2,683 additions
and
0 deletions.
There are no files selected for viewing
68 changes: 68 additions & 0 deletions
68
HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#ifndef HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h | ||
|
||
#include <cstdint> | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
namespace cms::alpakatools { | ||
|
||
class AtomicPairCounter { | ||
public: | ||
using DoubleWord = uint64_t; | ||
|
||
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter() : counter_{0} {} | ||
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter(uint32_t first, uint32_t second) : counter_{pack(first, second)} {} | ||
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter(DoubleWord values) : counter_{values} {} | ||
|
||
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter& operator=(DoubleWord values) { | ||
counter_.as_doubleword = values; | ||
return *this; | ||
} | ||
|
||
struct Counters { | ||
uint32_t first; // in a "One to Many" association is the number of "One" | ||
uint32_t second; // in a "One to Many" association is the total number of associations | ||
}; | ||
|
||
ALPAKA_FN_ACC constexpr Counters get() const { return counter_.as_counters; } | ||
|
||
// atomically add as_counters, and return the previous value | ||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr Counters add(const TAcc& acc, Counters c) { | ||
Packer value{pack(c.first, c.second)}; | ||
Packer ret{0}; | ||
ret.as_doubleword = | ||
alpaka::atomicAdd(acc, &counter_.as_doubleword, value.as_doubleword, alpaka::hierarchy::Blocks{}); | ||
return ret.as_counters; | ||
} | ||
|
||
// atomically increment first and add i to second, and return the previous value | ||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE Counters constexpr inc_add(const TAcc& acc, uint32_t i) { | ||
return add(acc, {1u, i}); | ||
} | ||
|
||
private: | ||
union Packer { | ||
DoubleWord as_doubleword; | ||
Counters as_counters; | ||
constexpr Packer(DoubleWord _as_doubleword) : as_doubleword(_as_doubleword) { ; }; | ||
constexpr Packer(Counters _as_counters) : as_counters(_as_counters) { ; }; | ||
}; | ||
|
||
// pack two uint32_t values in a DoubleWord (aka uint64_t) | ||
// this is needed because in c++17 a union can only be aggregate-initialised to its first type | ||
// it can be probably removed with c++20, and replace with a designated initialiser | ||
static constexpr DoubleWord pack(uint32_t first, uint32_t second) { | ||
Packer ret{0}; | ||
ret.as_counters = {first, second}; | ||
return ret.as_doubleword; | ||
} | ||
|
||
Packer counter_; | ||
}; | ||
|
||
} // namespace cms::alpakatools | ||
|
||
#endif // HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h |
50 changes: 50 additions & 0 deletions
50
HeterogeneousCore/AlpakaInterface/interface/FlexiStorage.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
|
||
#ifndef HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h | ||
|
||
#include <cstdint> | ||
|
||
namespace cms { | ||
namespace alpakatools { | ||
|
||
template <typename I, int S> | ||
class FlexiStorage { | ||
public: | ||
constexpr int capacity() const { return S; } | ||
|
||
constexpr I& operator[](int i) { return m_v[i]; } | ||
constexpr const I& operator[](int i) const { return m_v[i]; } | ||
|
||
constexpr I* data() { return m_v; } | ||
constexpr I const* data() const { return m_v; } | ||
|
||
private: | ||
I m_v[S]; | ||
}; | ||
|
||
template <typename I> | ||
class FlexiStorage<I, -1> { | ||
public: | ||
constexpr void init(I* v, int s) { | ||
m_v = v; | ||
m_capacity = s; | ||
} | ||
|
||
constexpr int capacity() const { return m_capacity; } | ||
|
||
constexpr I& operator[](int i) { return m_v[i]; } | ||
constexpr const I& operator[](int i) const { return m_v[i]; } | ||
|
||
constexpr I* data() { return m_v; } | ||
constexpr I const* data() const { return m_v; } | ||
|
||
private: | ||
I* m_v; | ||
int m_capacity; | ||
}; | ||
|
||
} // namespace alpakatools | ||
|
||
} // namespace cms | ||
|
||
#endif // HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h |
203 changes: 203 additions & 0 deletions
203
HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
#ifndef HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h | ||
|
||
#include <alpaka/alpaka.hpp> | ||
#include <algorithm> | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <type_traits> | ||
|
||
#include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" | ||
|
||
#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" | ||
namespace cms { | ||
namespace alpakatools { | ||
|
||
struct countFromVector { | ||
template <typename TAcc, typename Histo, typename T> | ||
ALPAKA_FN_ACC void operator()(const TAcc &acc, | ||
Histo *__restrict__ h, | ||
uint32_t nh, | ||
T const *__restrict__ v, | ||
uint32_t const *__restrict__ offsets) const { | ||
const uint32_t nt = offsets[nh]; | ||
for_each_element_in_grid_strided(acc, nt, [&](uint32_t i) { | ||
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); | ||
ALPAKA_ASSERT_OFFLOAD((*off) > 0); | ||
int32_t ih = off - offsets - 1; | ||
ALPAKA_ASSERT_OFFLOAD(ih >= 0); | ||
ALPAKA_ASSERT_OFFLOAD(ih < int(nh)); | ||
h->count(acc, v[i], ih); | ||
}); | ||
} | ||
}; | ||
|
||
struct fillFromVector { | ||
template <typename TAcc, typename Histo, typename T> | ||
ALPAKA_FN_ACC void operator()(const TAcc &acc, | ||
Histo *__restrict__ h, | ||
uint32_t nh, | ||
T const *__restrict__ v, | ||
uint32_t const *__restrict__ offsets) const { | ||
const uint32_t nt = offsets[nh]; | ||
for_each_element_in_grid_strided(acc, nt, [&](uint32_t i) { | ||
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); | ||
ALPAKA_ASSERT_OFFLOAD((*off) > 0); | ||
int32_t ih = off - offsets - 1; | ||
ALPAKA_ASSERT_OFFLOAD(ih >= 0); | ||
ALPAKA_ASSERT_OFFLOAD(ih < int(nh)); | ||
h->fill(acc, v[i], i, ih); | ||
}); | ||
} | ||
}; | ||
|
||
template <typename TAcc, typename Histo, typename T, typename TQueue> | ||
inline __attribute__((always_inline)) void fillManyFromVector(Histo *__restrict__ h, | ||
uint32_t nh, | ||
T const *v, | ||
uint32_t const *offsets, | ||
uint32_t totSize, | ||
uint32_t nthreads, | ||
TQueue &queue) { | ||
launchZero<TAcc>(h, queue); | ||
|
||
const auto threadsPerBlockOrElementsPerThread = nthreads; | ||
const auto blocksPerGrid = divide_up_by(totSize, nthreads); | ||
const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread); | ||
|
||
alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets); | ||
launchFinalize<TAcc>(h, queue); | ||
|
||
alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets); | ||
} | ||
|
||
template <typename TAcc, typename Histo, typename T, typename TQueue> | ||
inline __attribute__((always_inline)) void fillManyFromVector(Histo *__restrict__ h, | ||
OneToManyAssocView<typename Histo::Base> hv, | ||
uint32_t nh, | ||
T const *v, | ||
uint32_t const *offsets, | ||
uint32_t totSize, | ||
uint32_t nthreads, | ||
TQueue &queue) { | ||
launchZero<TAcc>(hv, queue); | ||
|
||
const auto threadsPerBlockOrElementsPerThread = nthreads; | ||
const auto blocksPerGrid = divide_up_by(totSize, nthreads); | ||
const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread); | ||
|
||
alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets); | ||
launchFinalize<TAcc>(h, queue); | ||
|
||
alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets); | ||
} | ||
|
||
// iteratate over N bins left and right of the one containing "v" | ||
template <typename Hist, typename V, typename Func> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func) { | ||
int bs = Hist::bin(value); | ||
int be = std::min(int(Hist::nbins() - 1), bs + n); | ||
bs = std::max(0, bs - n); | ||
ALPAKA_ASSERT_OFFLOAD(be >= bs); | ||
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) { | ||
func(*pj); | ||
} | ||
} | ||
|
||
// iteratate over bins containing all values in window wmin, wmax | ||
template <typename Hist, typename V, typename Func> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) { | ||
auto bs = Hist::bin(wmin); | ||
auto be = Hist::bin(wmax); | ||
ALPAKA_ASSERT_OFFLOAD(be >= bs); | ||
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) { | ||
func(*pj); | ||
} | ||
} | ||
|
||
template <typename T, // the type of the discretized input values | ||
uint32_t NBINS, // number of bins | ||
int32_t SIZE, // max number of element. If -1 is initialized at runtime using external storage | ||
uint32_t S = sizeof(T) * 8, // number of significant bits in T | ||
typename I = uint32_t, // type stored in the container (usually an index in a vector of the input values) | ||
uint32_t NHISTS = 1 // number of histos stored | ||
> | ||
class HistoContainer : public OneToManyAssoc<I, NHISTS * NBINS + 1, SIZE> { | ||
public: | ||
using Base = OneToManyAssoc<I, NHISTS * NBINS + 1, SIZE>; | ||
using View = typename Base::View; | ||
using Counter = typename Base::Counter; | ||
using index_type = typename Base::index_type; | ||
using UT = typename std::make_unsigned<T>::type; | ||
|
||
static constexpr uint32_t ilog2(uint32_t v) { | ||
constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; | ||
constexpr uint32_t s[] = {1, 2, 4, 8, 16}; | ||
|
||
uint32_t r = 0; // result of log2(v) will go here | ||
for (auto i = 4; i >= 0; i--) | ||
if (v & b[i]) { | ||
v >>= s[i]; | ||
r |= s[i]; | ||
} | ||
return r; | ||
} | ||
|
||
static constexpr uint32_t sizeT() { return S; } | ||
static constexpr int32_t nhists() { return NHISTS; } | ||
static constexpr uint32_t nbins() { return NBINS; } | ||
static constexpr uint32_t totbins() { return NHISTS * NBINS + 1; } | ||
static constexpr uint32_t nbits() { return ilog2(NBINS - 1) + 1; } | ||
|
||
static constexpr auto histOff(uint32_t nh) { return NBINS * nh; } | ||
|
||
static constexpr UT bin(T t) { | ||
constexpr uint32_t shift = sizeT() - nbits(); | ||
constexpr uint32_t mask = (1 << nbits()) - 1; | ||
return (t >> shift) & mask; | ||
} | ||
|
||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) { | ||
uint32_t b = bin(t); | ||
ALPAKA_ASSERT_OFFLOAD(b < nbins()); | ||
Base::atomicIncrement(acc, this->off[b]); | ||
} | ||
|
||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) { | ||
uint32_t b = bin(t); | ||
ALPAKA_ASSERT_OFFLOAD(b < nbins()); | ||
auto w = Base::atomicDecrement(acc, this->off[b]); | ||
ALPAKA_ASSERT_OFFLOAD(w > 0); | ||
this->content[w - 1] = j; | ||
} | ||
|
||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) { | ||
uint32_t b = bin(t); | ||
ALPAKA_ASSERT_OFFLOAD(b < nbins()); | ||
b += histOff(nh); | ||
ALPAKA_ASSERT_OFFLOAD(b < totbins()); | ||
Base::atomicIncrement(acc, this->off[b]); | ||
} | ||
|
||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) { | ||
uint32_t b = bin(t); | ||
ALPAKA_ASSERT_OFFLOAD(b < nbins()); | ||
b += histOff(nh); | ||
ALPAKA_ASSERT_OFFLOAD(b < totbins()); | ||
auto w = Base::atomicDecrement(acc, this->off[b]); | ||
ALPAKA_ASSERT_OFFLOAD(w > 0); | ||
this->content[w - 1] = j; | ||
} | ||
}; | ||
|
||
} // namespace alpakatools | ||
} // namespace cms | ||
#endif // HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h |
Oops, something went wrong.