Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RF] Make RooBatchCompute thread safe #14877

Merged
merged 4 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 13 additions & 16 deletions roofit/batchcompute/res/RooBatchCompute.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@

#include <DllImport.h> //for R__EXTERN, needed for windows

#include <cassert>
#include <functional>
#include <initializer_list>
#include <string>
#include <vector>

/**
* Namespace for dispatching RooFit computations to various backends.
Expand All @@ -42,11 +40,13 @@
*/
namespace RooBatchCompute {

typedef std::vector<std::span<const double>> VarVector;
typedef std::vector<double> ArgVector;
typedef std::span<const std::span<const double>> VarSpan;
typedef std::span<double> ArgSpan;
typedef double *__restrict RestrictArr;
typedef const double *__restrict InputArr;

constexpr std::size_t bufferSize = 64;

void init();

/// Minimal configuration struct to steer the evaluation of a single node with
Expand Down Expand Up @@ -140,12 +140,7 @@ struct ReduceNLLOutput {
class RooBatchComputeInterface {
public:
virtual ~RooBatchComputeInterface() = default;
virtual void compute(Config const &cfg, Computer, RestrictArr, size_t, const VarVector &, ArgVector &) = 0;
inline void compute(Config const &cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
{
ArgVector extraArgs{};
compute(cfg, comp, output, size, vars, extraArgs);
}
virtual void compute(Config const &cfg, Computer, RestrictArr, size_t, VarSpan, ArgSpan) = 0;

virtual double reduceSum(Config const &cfg, InputArr input, size_t n) = 0;
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span<const double> probas, std::span<const double> weights,
Expand Down Expand Up @@ -182,18 +177,20 @@ inline bool hasCuda()
return dispatchCUDA;
}

inline void
compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars, ArgVector &extraArgs)
inline void compute(Config cfg, Computer comp, RestrictArr output, size_t size, VarSpan vars, ArgSpan extraArgs = {})
{
init();
auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
dispatch->compute(cfg, comp, output, size, vars, extraArgs);
}

inline void compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
/// It is not possible to construct a std::span directly from an initializer
/// list (probably it will be with C++26). That's why we need an explicit
/// overload for this.
inline void compute(Config cfg, Computer comp, RestrictArr output, size_t size,
std::initializer_list<std::span<const double>> vars, ArgSpan extraArgs = {})
{
ArgVector extraArgs{};
compute(cfg, comp, output, size, vars, extraArgs);
compute(cfg, comp, output, size, VarSpan{vars.begin(), vars.end()}, extraArgs);
}

inline double reduceSum(Config cfg, InputArr input, size_t n)
Expand Down
49 changes: 7 additions & 42 deletions roofit/batchcompute/src/Batches.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,67 +23,32 @@ so that they can contain data for every kind of compute function.
#ifndef ROOFIT_BATCHCOMPUTE_BATCHES_H
#define ROOFIT_BATCHCOMPUTE_BATCHES_H

#include <RooBatchComputeTypes.h>

#include <cstdint>

namespace RooBatchCompute {

constexpr std::size_t bufferSize = 64;

namespace RF_ARCH {

class Batch {
public:
const double *__restrict _array = nullptr;
bool _isVector = false;

Batch() = default;
inline Batch(InputArr array, bool isVector) : _array{array}, _isVector{isVector} {}

__roodevice__ constexpr bool isItVector() const { return _isVector; }
inline void set(InputArr array, bool isVector)
{
_array = array;
_isVector = isVector;
}
inline void advance(std::size_t _nEvents) { _array += _isVector * _nEvents; }
#ifdef __CUDACC__
__roodevice__ constexpr double operator[](std::size_t i) const noexcept { return _isVector ? _array[i] : _array[0]; }
__device__ constexpr double operator[](std::size_t i) const noexcept { return _isVector ? _array[i] : _array[0]; }
#else
constexpr double operator[](std::size_t i) const noexcept { return _array[i]; }
#endif // #ifdef __CUDACC__
};

/////////////////////////////////////////////////////////////////////////////////////////////////////////

class Batches {
public:
Batch *_arrays = nullptr;
double *_extraArgs = nullptr;
std::size_t _nEvents = 0;
std::size_t _nBatches = 0;
std::size_t _nExtraArgs = 0;
RestrictArr _output = nullptr;

__roodevice__ std::size_t getNEvents() const { return _nEvents; }
__roodevice__ std::size_t getNExtraArgs() const { return _nExtraArgs; }
__roodevice__ double extraArg(std::size_t i) const { return _extraArgs[i]; }
__roodevice__ void setExtraArg(std::size_t i, double val) { _extraArgs[i] = val; }
__roodevice__ Batch operator[](int batchIdx) const { return _arrays[batchIdx]; }
inline void setNEvents(std::size_t n) { _nEvents = n; }
inline void advance(std::size_t nEvents)
{
for (std::size_t i = 0; i < _nBatches; i++)
_arrays[i].advance(nEvents);
_output += nEvents;
}
Batch *args = nullptr;
double *extra;
std::size_t nEvents = 0;
std::size_t nBatches = 0;
std::size_t nExtra = 0;
RestrictArr output = nullptr;
};

// Defines the actual argument type of the compute function.
using BatchesHandle = Batches &;

} // End namespace RF_ARCH
} // end namespace RooBatchCompute

#endif // #ifdef ROOFIT_BATCHCOMPUTE_BATCHES_H
Loading
Loading