Skip to content

Commit

Permalink
Merge pull request #43064 from ericcano/OneToManyFromCUDAToAlpaka
Browse files Browse the repository at this point in the history
Ports prefixScan, OneToManyAssoc and HistoContainer from CUDAUtilities.
  • Loading branch information
cmsbuild authored Jan 23, 2024
2 parents 089c6d8 + e03dde3 commit 762c965
Show file tree
Hide file tree
Showing 21 changed files with 3,555 additions and 2 deletions.
68 changes: 68 additions & 0 deletions HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h
#define HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h

#include <cstdint>

#include <alpaka/alpaka.hpp>

namespace cms::alpakatools {

class AtomicPairCounter {
public:
using DoubleWord = uint64_t;

ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter() : counter_{0} {}
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter(uint32_t first, uint32_t second) : counter_{pack(first, second)} {}
ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter(DoubleWord values) : counter_{values} {}

ALPAKA_FN_HOST_ACC constexpr AtomicPairCounter& operator=(DoubleWord values) {
counter_.as_doubleword = values;
return *this;
}

struct Counters {
uint32_t first; // in a "One to Many" association is the number of "One"
uint32_t second; // in a "One to Many" association is the total number of associations
};

ALPAKA_FN_ACC constexpr Counters get() const { return counter_.as_counters; }

// atomically add as_counters, and return the previous value
template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr Counters add(const TAcc& acc, Counters c) {
Packer value{pack(c.first, c.second)};
Packer ret{0};
ret.as_doubleword =
alpaka::atomicAdd(acc, &counter_.as_doubleword, value.as_doubleword, alpaka::hierarchy::Blocks{});
return ret.as_counters;
}

// atomically increment first and add i to second, and return the previous value
template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE Counters constexpr inc_add(const TAcc& acc, uint32_t i) {
return add(acc, {1u, i});
}

private:
union Packer {
DoubleWord as_doubleword;
Counters as_counters;
constexpr Packer(DoubleWord _as_doubleword) : as_doubleword(_as_doubleword) { ; };
constexpr Packer(Counters _as_counters) : as_counters(_as_counters) { ; };
};

// pack two uint32_t values in a DoubleWord (aka uint64_t)
// this is needed because in c++17 a union can only be aggregate-initialised to its first type
// it can be probably removed with c++20, and replace with a designated initialiser
static constexpr DoubleWord pack(uint32_t first, uint32_t second) {
Packer ret{0};
ret.as_counters = {first, second};
return ret.as_doubleword;
}

Packer counter_;
};

} // namespace cms::alpakatools

#endif // HeterogeneousCore_AlpakaInterface_interface_AtomicPairCounter_h
47 changes: 47 additions & 0 deletions HeterogeneousCore/AlpakaInterface/interface/FlexiStorage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

#ifndef HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h
#define HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h

#include <cstdint>

namespace cms::alpakatools {

template <typename I, int S>
class FlexiStorage {
public:
constexpr int capacity() const { return S; }

constexpr I& operator[](int i) { return m_v[i]; }
constexpr const I& operator[](int i) const { return m_v[i]; }

constexpr I* data() { return m_v; }
constexpr I const* data() const { return m_v; }

private:
I m_v[S];
};

template <typename I>
class FlexiStorage<I, -1> {
public:
constexpr void init(I* v, int s) {
m_v = v;
m_capacity = s;
}

constexpr int capacity() const { return m_capacity; }

constexpr I& operator[](int i) { return m_v[i]; }
constexpr const I& operator[](int i) const { return m_v[i]; }

constexpr I* data() { return m_v; }
constexpr I const* data() const { return m_v; }

private:
I* m_v;
int m_capacity;
};

} // namespace cms::alpakatools

#endif // HeterogeneousCore_AlpakaInterface_interface_FlexiStorage_h
201 changes: 201 additions & 0 deletions HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
#ifndef HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
#define HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <type_traits>

#include <alpaka/alpaka.hpp>

#include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
#include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h"
#include "HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h"
#include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"

namespace cms::alpakatools {

struct countFromVector {
template <typename TAcc, typename Histo, typename T>
ALPAKA_FN_ACC void operator()(const TAcc &acc,
Histo *__restrict__ h,
uint32_t nh,
T const *__restrict__ v,
uint32_t const *__restrict__ offsets) const {
const uint32_t nt = offsets[nh];
for (uint32_t i : elements_with_stride(acc, nt)) {
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
ALPAKA_ASSERT_OFFLOAD((*off) > 0);
int32_t ih = off - offsets - 1;
ALPAKA_ASSERT_OFFLOAD(ih >= 0);
ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
h->count(acc, v[i], ih);
}
}
};

struct fillFromVector {
template <typename TAcc, typename Histo, typename T>
ALPAKA_FN_ACC void operator()(const TAcc &acc,
Histo *__restrict__ h,
uint32_t nh,
T const *__restrict__ v,
uint32_t const *__restrict__ offsets) const {
const uint32_t nt = offsets[nh];
for (uint32_t i : elements_with_stride(acc, nt)) {
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
ALPAKA_ASSERT_OFFLOAD((*off) > 0);
int32_t ih = off - offsets - 1;
ALPAKA_ASSERT_OFFLOAD(ih >= 0);
ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
h->fill(acc, v[i], i, ih);
}
}
};

template <typename TAcc, typename Histo, typename T, typename TQueue>
ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h,
uint32_t nh,
T const *__restrict__ v,
uint32_t const *__restrict__ offsets,
uint32_t totSize,
uint32_t nthreads,
TQueue &queue) {
Histo::template launchZero<TAcc>(h, queue);

const auto threadsPerBlockOrElementsPerThread = nthreads;
const auto blocksPerGrid = divide_up_by(totSize, nthreads);
const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);

alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
Histo::template launchFinalize<TAcc>(h, queue);

alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
}

template <typename TAcc, typename Histo, typename T, typename TQueue>
ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h,
typename Histo::View hv,
uint32_t nh,
T const *__restrict__ v,
uint32_t const *__restrict__ offsets,
uint32_t totSize,
uint32_t nthreads,
TQueue &queue) {
Histo::template launchZero<TAcc>(hv, queue);

const auto threadsPerBlockOrElementsPerThread = nthreads;
const auto blocksPerGrid = divide_up_by(totSize, nthreads);
const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);

alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
Histo::template launchFinalize<TAcc>(h, queue);

alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
}

// iteratate over N bins left and right of the one containing "v"
template <typename Hist, typename V, typename Func>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func) {
int bs = Hist::bin(value);
int be = std::min(int(Hist::nbins() - 1), bs + n);
bs = std::max(0, bs - n);
ALPAKA_ASSERT_OFFLOAD(be >= bs);
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
func(*pj);
}
}

// iteratate over bins containing all values in window wmin, wmax
template <typename Hist, typename V, typename Func>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
auto bs = Hist::bin(wmin);
auto be = Hist::bin(wmax);
ALPAKA_ASSERT_OFFLOAD(be >= bs);
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
func(*pj);
}
}

template <typename T, // the type of the discretized input values
uint32_t NBINS, // number of bins
int32_t SIZE, // max number of element. If -1 is initialized at runtime using external storage
uint32_t S = sizeof(T) * 8, // number of significant bits in T
typename I = uint32_t, // type stored in the container (usually an index in a vector of the input values)
uint32_t NHISTS = 1 // number of histos stored
>
class HistoContainer : public OneToManyAssocRandomAccess<I, NHISTS * NBINS + 1, SIZE> {
public:
using Base = OneToManyAssocRandomAccess<I, NHISTS * NBINS + 1, SIZE>;
using View = typename Base::View;
using Counter = typename Base::Counter;
using index_type = typename Base::index_type;
using UT = typename std::make_unsigned<T>::type;

static constexpr uint32_t ilog2(uint32_t v) {
constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
constexpr uint32_t s[] = {1, 2, 4, 8, 16};

uint32_t r = 0; // result of log2(v) will go here
for (auto i = 4; i >= 0; i--)
if (v & b[i]) {
v >>= s[i];
r |= s[i];
}
return r;
}

static constexpr uint32_t sizeT() { return S; }
static constexpr int32_t nhists() { return NHISTS; }
static constexpr uint32_t nbins() { return NBINS; }
static constexpr uint32_t totbins() { return NHISTS * NBINS + 1; }
static constexpr uint32_t nbits() { return ilog2(NBINS - 1) + 1; }

static constexpr auto histOff(uint32_t nh) { return NBINS * nh; }

static constexpr UT bin(T t) {
constexpr uint32_t shift = sizeT() - nbits();
constexpr uint32_t mask = (1 << nbits()) - 1;
return (t >> shift) & mask;
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
Base::atomicIncrement(acc, this->off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
auto w = Base::atomicDecrement(acc, this->off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
this->content[w - 1] = j;
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
b += histOff(nh);
ALPAKA_ASSERT_OFFLOAD(b < totbins());
Base::atomicIncrement(acc, this->off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
b += histOff(nh);
ALPAKA_ASSERT_OFFLOAD(b < totbins());
auto w = Base::atomicDecrement(acc, this->off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
this->content[w - 1] = j;
}
};
} // namespace cms::alpakatools
#endif // HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
Loading

0 comments on commit 762c965

Please sign in to comment.