-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #43064 from ericcano/OneToManyFromCUDAToAlpaka
Ports prefixScan, OneToManyAssoc and HistoContainer from CUDAUtilities.
- Loading branch information
Showing
21 changed files
with
3,555 additions
and
2 deletions.
There are no files selected for viewing
68 changes: 68 additions & 0 deletions
68
HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#ifndef HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h | ||
|
||
#include <cstdint> | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
namespace cms::alpakatools { | ||
|
||
class AtomicPairCounter { | ||
public: | ||
using DoubleWord = uint64_t; | ||
|
||
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter() : counter_{0} {} | ||
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter(uint32_t first, uint32_t second) : counter_{pack(first, second)} {} | ||
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter(DoubleWord values) : counter_{values} {} | ||
|
||
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter& operator=(DoubleWord values) { | ||
counter_.as_doubleword = values; | ||
return *this; | ||
} | ||
|
||
struct Counters { | ||
uint32_t first; // in a "One to Many" association is the number of "One" | ||
uint32_t second; // in a "One to Many" association is the total number of associations | ||
}; | ||
|
||
ALPAKA_FN_ACC constexpr Counters get() const { return counter_.as_counters; } | ||
|
||
// atomically add as_counters, and return the previous value | ||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr Counters add(const TAcc& acc, Counters c) { | ||
Packer value{pack(c.first, c.second)}; | ||
Packer ret{0}; | ||
ret.as_doubleword = | ||
alpaka::atomicAdd(acc, &counter_.as_doubleword, value.as_doubleword, alpaka::hierarchy::Blocks{}); | ||
return ret.as_counters; | ||
} | ||
|
||
// atomically increment first and add i to second, and return the previous value | ||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE Counters constexpr inc_add(const TAcc& acc, uint32_t i) { | ||
return add(acc, {1u, i}); | ||
} | ||
|
||
private: | ||
union Packer { | ||
DoubleWord as_doubleword; | ||
Counters as_counters; | ||
constexpr Packer(DoubleWord _as_doubleword) : as_doubleword(_as_doubleword) { ; }; | ||
constexpr Packer(Counters _as_counters) : as_counters(_as_counters) { ; }; | ||
}; | ||
|
||
// pack two uint32_t values in a DoubleWord (aka uint64_t) | ||
// this is needed because in c++17 a union can only be aggregate-initialised to its first type | ||
// it can be probably removed with c++20, and replace with a designated initialiser | ||
static constexpr DoubleWord pack(uint32_t first, uint32_t second) { | ||
Packer ret{0}; | ||
ret.as_counters = {first, second}; | ||
return ret.as_doubleword; | ||
} | ||
|
||
Packer counter_; | ||
}; | ||
|
||
} // namespace cms::alpakatools | ||
|
||
#endif // HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h |
47 changes: 47 additions & 0 deletions
47
HeterogeneousCore/AlpakaInterface/interface/FlexiStorage.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
|
||
#ifndef HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h | ||
|
||
#include <cstdint> | ||
|
||
namespace cms::alpakatools { | ||
|
||
template <typename I, int S> | ||
class FlexiStorage { | ||
public: | ||
constexpr int capacity() const { return S; } | ||
|
||
constexpr I& operator[](int i) { return m_v[i]; } | ||
constexpr const I& operator[](int i) const { return m_v[i]; } | ||
|
||
constexpr I* data() { return m_v; } | ||
constexpr I const* data() const { return m_v; } | ||
|
||
private: | ||
I m_v[S]; | ||
}; | ||
|
||
template <typename I> | ||
class FlexiStorage<I, -1> { | ||
public: | ||
constexpr void init(I* v, int s) { | ||
m_v = v; | ||
m_capacity = s; | ||
} | ||
|
||
constexpr int capacity() const { return m_capacity; } | ||
|
||
constexpr I& operator[](int i) { return m_v[i]; } | ||
constexpr const I& operator[](int i) const { return m_v[i]; } | ||
|
||
constexpr I* data() { return m_v; } | ||
constexpr I const* data() const { return m_v; } | ||
|
||
private: | ||
I* m_v; | ||
int m_capacity; | ||
}; | ||
|
||
} // namespace cms::alpakatools | ||
|
||
#endif // HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h |
201 changes: 201 additions & 0 deletions
201
HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
#ifndef HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h | ||
|
||
#include <algorithm> | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <type_traits> | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
#include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" | ||
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" | ||
|
||
namespace cms::alpakatools { | ||
|
||
struct countFromVector { | ||
template <typename TAcc, typename Histo, typename T> | ||
ALPAKA_FN_ACC void operator()(const TAcc &acc, | ||
Histo *__restrict__ h, | ||
uint32_t nh, | ||
T const *__restrict__ v, | ||
uint32_t const *__restrict__ offsets) const { | ||
const uint32_t nt = offsets[nh]; | ||
for (uint32_t i : elements_with_stride(acc, nt)) { | ||
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); | ||
ALPAKA_ASSERT_OFFLOAD((*off) > 0); | ||
int32_t ih = off - offsets - 1; | ||
ALPAKA_ASSERT_OFFLOAD(ih >= 0); | ||
ALPAKA_ASSERT_OFFLOAD(ih < int(nh)); | ||
h->count(acc, v[i], ih); | ||
} | ||
} | ||
}; | ||
|
||
struct fillFromVector { | ||
template <typename TAcc, typename Histo, typename T> | ||
ALPAKA_FN_ACC void operator()(const TAcc &acc, | ||
Histo *__restrict__ h, | ||
uint32_t nh, | ||
T const *__restrict__ v, | ||
uint32_t const *__restrict__ offsets) const { | ||
const uint32_t nt = offsets[nh]; | ||
for (uint32_t i : elements_with_stride(acc, nt)) { | ||
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); | ||
ALPAKA_ASSERT_OFFLOAD((*off) > 0); | ||
int32_t ih = off - offsets - 1; | ||
ALPAKA_ASSERT_OFFLOAD(ih >= 0); | ||
ALPAKA_ASSERT_OFFLOAD(ih < int(nh)); | ||
h->fill(acc, v[i], i, ih); | ||
} | ||
} | ||
}; | ||
|
||
template <typename TAcc, typename Histo, typename T, typename TQueue> | ||
ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h, | ||
uint32_t nh, | ||
T const *__restrict__ v, | ||
uint32_t const *__restrict__ offsets, | ||
uint32_t totSize, | ||
uint32_t nthreads, | ||
TQueue &queue) { | ||
Histo::template launchZero<TAcc>(h, queue); | ||
|
||
const auto threadsPerBlockOrElementsPerThread = nthreads; | ||
const auto blocksPerGrid = divide_up_by(totSize, nthreads); | ||
const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread); | ||
|
||
alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets); | ||
Histo::template launchFinalize<TAcc>(h, queue); | ||
|
||
alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets); | ||
} | ||
|
||
template <typename TAcc, typename Histo, typename T, typename TQueue> | ||
ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h, | ||
typename Histo::View hv, | ||
uint32_t nh, | ||
T const *__restrict__ v, | ||
uint32_t const *__restrict__ offsets, | ||
uint32_t totSize, | ||
uint32_t nthreads, | ||
TQueue &queue) { | ||
Histo::template launchZero<TAcc>(hv, queue); | ||
|
||
const auto threadsPerBlockOrElementsPerThread = nthreads; | ||
const auto blocksPerGrid = divide_up_by(totSize, nthreads); | ||
const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread); | ||
|
||
alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets); | ||
Histo::template launchFinalize<TAcc>(h, queue); | ||
|
||
alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets); | ||
} | ||
|
||
// iteratate over N bins left and right of the one containing "v" | ||
template <typename Hist, typename V, typename Func> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func) { | ||
int bs = Hist::bin(value); | ||
int be = std::min(int(Hist::nbins() - 1), bs + n); | ||
bs = std::max(0, bs - n); | ||
ALPAKA_ASSERT_OFFLOAD(be >= bs); | ||
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) { | ||
func(*pj); | ||
} | ||
} | ||
|
||
// iteratate over bins containing all values in window wmin, wmax | ||
template <typename Hist, typename V, typename Func> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) { | ||
auto bs = Hist::bin(wmin); | ||
auto be = Hist::bin(wmax); | ||
ALPAKA_ASSERT_OFFLOAD(be >= bs); | ||
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) { | ||
func(*pj); | ||
} | ||
} | ||
|
||
template <typename T, // the type of the discretized input values | ||
uint32_t NBINS, // number of bins | ||
int32_t SIZE, // max number of element. If -1 is initialized at runtime using external storage | ||
uint32_t S = sizeof(T) * 8, // number of significant bits in T | ||
typename I = uint32_t, // type stored in the container (usually an index in a vector of the input values) | ||
uint32_t NHISTS = 1 // number of histos stored | ||
> | ||
class HistoContainer : public OneToManyAssocRandomAccess<I, NHISTS * NBINS + 1, SIZE> { | ||
public: | ||
using Base = OneToManyAssocRandomAccess<I, NHISTS * NBINS + 1, SIZE>; | ||
using View = typename Base::View; | ||
using Counter = typename Base::Counter; | ||
using index_type = typename Base::index_type; | ||
using UT = typename std::make_unsigned<T>::type; | ||
|
||
static constexpr uint32_t ilog2(uint32_t v) { | ||
constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; | ||
constexpr uint32_t s[] = {1, 2, 4, 8, 16}; | ||
|
||
uint32_t r = 0; // result of log2(v) will go here | ||
for (auto i = 4; i >= 0; i--) | ||
if (v & b[i]) { | ||
v >>= s[i]; | ||
r |= s[i]; | ||
} | ||
return r; | ||
} | ||
|
||
static constexpr uint32_t sizeT() { return S; } | ||
static constexpr int32_t nhists() { return NHISTS; } | ||
static constexpr uint32_t nbins() { return NBINS; } | ||
static constexpr uint32_t totbins() { return NHISTS * NBINS + 1; } | ||
static constexpr uint32_t nbits() { return ilog2(NBINS - 1) + 1; } | ||
|
||
static constexpr auto histOff(uint32_t nh) { return NBINS * nh; } | ||
|
||
static constexpr UT bin(T t) { | ||
constexpr uint32_t shift = sizeT() - nbits(); | ||
constexpr uint32_t mask = (1 << nbits()) - 1; | ||
return (t >> shift) & mask; | ||
} | ||
|
||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) { | ||
uint32_t b = bin(t); | ||
ALPAKA_ASSERT_OFFLOAD(b < nbins()); | ||
Base::atomicIncrement(acc, this->off[b]); | ||
} | ||
|
||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) { | ||
uint32_t b = bin(t); | ||
ALPAKA_ASSERT_OFFLOAD(b < nbins()); | ||
auto w = Base::atomicDecrement(acc, this->off[b]); | ||
ALPAKA_ASSERT_OFFLOAD(w > 0); | ||
this->content[w - 1] = j; | ||
} | ||
|
||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) { | ||
uint32_t b = bin(t); | ||
ALPAKA_ASSERT_OFFLOAD(b < nbins()); | ||
b += histOff(nh); | ||
ALPAKA_ASSERT_OFFLOAD(b < totbins()); | ||
Base::atomicIncrement(acc, this->off[b]); | ||
} | ||
|
||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) { | ||
uint32_t b = bin(t); | ||
ALPAKA_ASSERT_OFFLOAD(b < nbins()); | ||
b += histOff(nh); | ||
ALPAKA_ASSERT_OFFLOAD(b < totbins()); | ||
auto w = Base::atomicDecrement(acc, this->off[b]); | ||
ALPAKA_ASSERT_OFFLOAD(w > 0); | ||
this->content[w - 1] = j; | ||
} | ||
}; | ||
} // namespace cms::alpakatools | ||
#endif // HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h |
Oops, something went wrong.