Skip to content

Commit

Permalink
[RF] Avoid std::vector allocations when calling RooBatchCompute
Browse files Browse the repository at this point in the history
This can be quite a big overhead if the dataset is small.
  • Loading branch information
guitargeek committed Mar 4, 2024
1 parent 2a6bdc7 commit 329c264
Show file tree
Hide file tree
Showing 17 changed files with 88 additions and 78 deletions.
28 changes: 12 additions & 16 deletions roofit/batchcompute/res/RooBatchCompute.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@

#include <DllImport.h> //for R__EXTERN, needed for windows

#include <cassert>
#include <functional>
#include <initializer_list>
#include <span>
#include <string>
#include <vector>

/**
* Namespace for dispatching RooFit computations to various backends.
Expand All @@ -42,8 +41,8 @@
*/
namespace RooBatchCompute {

typedef std::vector<std::span<const double>> VarVector;
typedef std::vector<double> ArgVector;
typedef std::span<const std::span<const double>> VarSpan;
typedef std::span<double> ArgSpan;
typedef double *__restrict RestrictArr;
typedef const double *__restrict InputArr;

Expand Down Expand Up @@ -140,12 +139,7 @@ struct ReduceNLLOutput {
class RooBatchComputeInterface {
public:
virtual ~RooBatchComputeInterface() = default;
virtual void compute(Config const &cfg, Computer, RestrictArr, size_t, const VarVector &, ArgVector &) = 0;
inline void compute(Config const &cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
{
ArgVector extraArgs{};
compute(cfg, comp, output, size, vars, extraArgs);
}
virtual void compute(Config const &cfg, Computer, RestrictArr, size_t, VarSpan, ArgSpan) = 0;

virtual double reduceSum(Config const &cfg, InputArr input, size_t n) = 0;
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span<const double> probas, std::span<const double> weights,
Expand Down Expand Up @@ -182,18 +176,20 @@ inline bool hasCuda()
return dispatchCUDA;
}

inline void
compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars, ArgVector &extraArgs)
inline void compute(Config cfg, Computer comp, RestrictArr output, size_t size, VarSpan vars, ArgSpan extraArgs = {})
{
init();
auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
dispatch->compute(cfg, comp, output, size, vars, extraArgs);
}

inline void compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
/// It is not possible to construct a std::span directly from an initializer
/// list (probably it will be with C++26). That's why we need an explicit
/// overload for this.
inline void compute(Config cfg, Computer comp, RestrictArr output, size_t size,
std::initializer_list<std::span<const double>> vars, ArgSpan extraArgs = {})
{
ArgVector extraArgs{};
compute(cfg, comp, output, size, vars, extraArgs);
compute(cfg, comp, output, size, VarSpan{vars.begin(), vars.end()}, extraArgs);
}

inline double reduceSum(Config cfg, InputArr input, size_t n)
Expand Down
10 changes: 5 additions & 5 deletions roofit/batchcompute/src/RooBatchCompute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size
batches._output = output;
}

void fillArrays(Batch *arrays, const VarVector &vars, double *buffer, double *bufferDevice, std::size_t nEvents)
void fillArrays(Batch *arrays, VarSpan vars, double *buffer, double *bufferDevice, std::size_t nEvents)
{
for (int i = 0; i < vars.size(); i++) {
const std::span<const double> &span = vars[i];
Expand Down Expand Up @@ -114,10 +114,10 @@ public:
\param computer An enum specifying the compute function to be used.
\param output The array where the computation results are stored.
\param nEvents The number of events to be processed.
\param vars A std::vector containing pointers to the variables involved in the computation.
\param extraArgs An optional std::vector containing extra double values that may participate in the computation. **/
void compute(RooBatchCompute::Config const &cfg, Computer computer, RestrictArr output, size_t nEvents,
const VarVector &vars, ArgVector &extraArgs) override
\param vars A std::span containing pointers to the variables involved in the computation.
\param extraArgs An optional std::span containing extra double values that may participate in the computation. **/
void compute(RooBatchCompute::Config const &cfg, Computer computer, RestrictArr output, size_t nEvents, VarSpan vars,
ArgSpan extraArgs) override
{
using namespace RooFit::Detail::CudaInterface;

Expand Down
12 changes: 6 additions & 6 deletions roofit/batchcompute/src/RooBatchCompute.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace RF_ARCH {

namespace {

void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size_t nBatches, ArgVector &extraArgs)
void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size_t nBatches, ArgSpan extraArgs)
{
batches._extraArgs = extraArgs.data();
batches._nEvents = nEvents;
Expand All @@ -50,7 +50,7 @@ void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size
batches._output = output;
}

void fillArrays(std::vector<Batch> &arrays, const VarVector &vars, double *buffer, std::size_t nEvents)
void fillArrays(std::vector<Batch> &arrays, VarSpan vars, double *buffer, std::size_t nEvents)
{

arrays.resize(vars.size());
Expand Down Expand Up @@ -106,10 +106,10 @@ class RooBatchComputeClass : public RooBatchComputeInterface {
\param computer An enum specifying the compute function to be used.
\param output The array where the computation results are stored.
\param nEvents The number of events to be processed.
\param vars A std::vector containing pointers to the variables involved in the computation.
\param extraArgs An optional std::vector containing extra double values that may participate in the computation. **/
void compute(Config const &, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars,
ArgVector &extraArgs) override
\param vars A std::span containing pointers to the variables involved in the computation.
\param extraArgs An optional std::span containing extra double values that may participate in the computation. **/
void compute(Config const &, Computer computer, RestrictArr output, size_t nEvents, VarSpan vars,
ArgSpan extraArgs) override
{
static std::vector<double> buffer;
buffer.resize(vars.size() * bufferSize);
Expand Down
17 changes: 10 additions & 7 deletions roofit/roofit/src/RooChebychev.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,17 @@ void RooChebychev::translate(RooFit::Detail::CodeSquashContext &ctx) const

////////////////////////////////////////////////////////////////////////////////
/// Compute multiple values of Chebychev.
void RooChebychev::computeBatch(double* output, size_t nEvents, RooFit::Detail::DataMap const& dataMap) const
void RooChebychev::computeBatch(double *output, size_t nEvents, RooFit::Detail::DataMap const &dataMap) const
{
RooBatchCompute::ArgVector extraArgs;
for (auto* coef:_coefList)
extraArgs.push_back( static_cast<const RooAbsReal*>(coef)->getVal() );
extraArgs.push_back( _x.min(_refRangeName?_refRangeName->GetName() : nullptr) );
extraArgs.push_back( _x.max(_refRangeName?_refRangeName->GetName() : nullptr) );
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::Chebychev, output, nEvents, {dataMap.at(_x)}, extraArgs);
std::vector<double> extraArgs;
extraArgs.reserve(_coefList.size() + 2);
for (auto *coef : _coefList) {
extraArgs.push_back(static_cast<const RooAbsReal *>(coef)->getVal());
}
extraArgs.push_back(_x.min(_refRangeName ? _refRangeName->GetName() : nullptr));
extraArgs.push_back(_x.max(_refRangeName ? _refRangeName->GetName() : nullptr));
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::Chebychev, output, nEvents, {dataMap.at(_x)},
extraArgs);
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
14 changes: 8 additions & 6 deletions roofit/roofit/src/RooChiSquarePdf.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ Here we also implement the analytic integral.

#include "TMath.h"

#include <array>
#include <cmath>
using namespace std;

ClassImp(RooChiSquarePdf);

Expand Down Expand Up @@ -55,17 +55,19 @@ RooChiSquarePdf::RooChiSquarePdf(const RooChiSquarePdf& other, const char* name)

double RooChiSquarePdf::evaluate() const
{
if(_x <= 0) return 0;
if (_x <= 0)
return 0;

return pow(_x,(_ndof/2.)-1.) * exp(-_x/2.) / TMath::Gamma(_ndof/2.) / pow(2.,_ndof/2.);
return pow(_x, (_ndof / 2.) - 1.) * std::exp(-_x / 2.) / TMath::Gamma(_ndof / 2.) / std::pow(2., _ndof / 2.);
}

////////////////////////////////////////////////////////////////////////////////
/// Compute multiple values of ChiSquare distribution.
void RooChiSquarePdf::computeBatch(double* output, size_t nEvents, RooFit::Detail::DataMap const& dataMap) const
void RooChiSquarePdf::computeBatch(double *output, size_t nEvents, RooFit::Detail::DataMap const &dataMap) const
{
RooBatchCompute::ArgVector extraArgs{_ndof};
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::ChiSquare, output, nEvents, {dataMap.at(_x)}, extraArgs);
std::array<double, 1> extraArgs{_ndof};
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::ChiSquare, output, nEvents, {dataMap.at(_x)},
extraArgs);
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
11 changes: 5 additions & 6 deletions roofit/roofit/src/RooExpPoly.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ RooExpPoly::RooExpPoly(const char*, const char*, RooAbsReal&, const RooArgList&,
#include <TMath.h>
#include <TError.h>

#include <cmath>
#include <sstream>
#include <array>
#include <cassert>
#include <cmath>
#include <complex>
#include <sstream>

ClassImp(RooExpPoly);

Expand Down Expand Up @@ -128,7 +129,7 @@ double RooExpPoly::evaluateLog() const
/// Compute multiple values of ExpPoly distribution.
void RooExpPoly::computeBatch(double *output, size_t nEvents, RooFit::Detail::DataMap const &dataMap) const
{
RooBatchCompute::VarVector vars;
std::vector<std::span<const double>> vars;
vars.reserve(_coefList.size() + 1);
vars.push_back(dataMap.at(_x));

Expand All @@ -137,9 +138,7 @@ void RooExpPoly::computeBatch(double *output, size_t nEvents, RooFit::Detail::Da
vars.push_back(dataMap.at(coef));
}

RooBatchCompute::ArgVector args;
args.push_back(_lowestOrder);
args.push_back(_coefList.size());
std::array<double, 2> args{static_cast<double>(_lowestOrder), static_cast<double>(_coefList.size())};

RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::ExpPoly, output, nEvents, vars, args);
}
Expand Down
4 changes: 3 additions & 1 deletion roofit/roofit/src/RooGaussModel.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ for analytical convolutions with classes inheriting from RooAbsAnaConvPdf

#include <RooHeterogeneousMath.h>

#include <array>

namespace {

enum RooGaussBasis {
Expand Down Expand Up @@ -190,7 +192,7 @@ void RooGaussModel::computeBatch(double *output, size_t size,
// arises, they can be implemented following this example. Remember to also
// adapt RooGaussModel::canComputeBatchWithCuda().
if (basisType == expBasis) {
RooBatchCompute::ArgVector extraArgs{basisSign};
std::array<double, 1> extraArgs{basisSign};
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::GaussModelExpBasis, output, size,
{xVals, meanVals, meanSfVals, sigmaVals, sigmaSfVals, param1Vals}, extraArgs);
return;
Expand Down
6 changes: 4 additions & 2 deletions roofit/roofit/src/RooJohnson.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ Johnson, N. L. (1949). *Systems of Frequency Curves Generated by Methods of Tran
#include "RooHelpers.h"
#include "RooBatchCompute.h"

#include <cmath>
#include "TMath.h"

#include <array>
#include <cmath>

ClassImp(RooJohnson);

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -114,7 +116,7 @@ double RooJohnson::evaluate() const
/// Compute multiple values of the Johnson distribution.
void RooJohnson::computeBatch(double* output, size_t nEvents, RooFit::Detail::DataMap const& dataMap) const
{
RooBatchCompute::ArgVector extraArgs{_massThreshold};
std::array<double, 1> extraArgs{_massThreshold};
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::Johnson, output, nEvents,
{dataMap.at(_mass), dataMap.at(_mu), dataMap.at(_lambda), dataMap.at(_gamma), dataMap.at(_delta)},
extraArgs);
Expand Down
4 changes: 3 additions & 1 deletion roofit/roofit/src/RooPoisson.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Poisson pdf
#include "RooFit/Detail/EvaluateFuncs.h"
#include "Math/ProbFuncMathCore.h"

#include <array>

ClassImp(RooPoisson);

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -78,7 +80,7 @@ void RooPoisson::translate(RooFit::Detail::CodeSquashContext &ctx) const
void RooPoisson::computeBatch(double *output, size_t nEvents,
RooFit::Detail::DataMap const &dataMap) const
{
RooBatchCompute::ArgVector extraArgs{static_cast<double>(_protectNegative), static_cast<double>(_noRounding)};
std::array<double, 2> extraArgs{static_cast<double>(_protectNegative), static_cast<double>(_noRounding)};
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::Poisson, output, nEvents,
{dataMap.at(x), dataMap.at(mean)}, extraArgs);
}
Expand Down
8 changes: 4 additions & 4 deletions roofit/roofit/src/RooPower.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ RooPower implements a power law PDF of the form

#include <TError.h>

#include <cmath>
#include <array>
#include <cassert>
#include <cmath>
#include <sstream>

ClassImp(RooPower);
Expand Down Expand Up @@ -84,7 +85,7 @@ RooPower::RooPower(const RooPower &other, const char *name)
/// Compute multiple values of Power distribution.
void RooPower::computeBatch(double *output, size_t nEvents, RooFit::Detail::DataMap const &dataMap) const
{
RooBatchCompute::VarVector vars;
std::vector<std::span<const double>> vars;
vars.reserve(2 * _coefList.size() + 1);
vars.push_back(dataMap.at(_x));

Expand All @@ -95,8 +96,7 @@ void RooPower::computeBatch(double *output, size_t nEvents, RooFit::Detail::Data
vars.push_back(dataMap.at(&_expList[i]));
}

RooBatchCompute::ArgVector args;
args.push_back(_coefList.size());
std::array<double, 1> args{static_cast<double>(_coefList.size())};

RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::Power, output, nEvents, vars, args);
}
Expand Down
4 changes: 2 additions & 2 deletions roofit/roofitcore/src/RooAddModel.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,8 @@ void RooAddModel::computeBatch(double *output, size_t nEvents, RooFit::Detail::D
_coefCache[i] = coefVals[0];
}

RooBatchCompute::VarVector pdfs;
RooBatchCompute::ArgVector coefs;
std::vector<std::span<const double>> pdfs;
std::vector<double> coefs;
AddCacheElem *cache = getProjCache(nullptr);
updateCoefficients(*cache, nullptr);

Expand Down
4 changes: 2 additions & 2 deletions roofit/roofitcore/src/RooAddPdf.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -574,8 +574,8 @@ void RooAddPdf::computeBatch(double* output, size_t nEvents, RooFit::Detail::Dat
_coefCache[i] = coefVals[0];
}

RooBatchCompute::VarVector pdfs;
RooBatchCompute::ArgVector coefs;
std::vector<std::span<const double>> pdfs;
std::vector<double> coefs;
AddCacheElem* cache = getProjCache(nullptr);
// We don't sync the coefficient values from the _coefList to the _coefCache
// because we have already done it using the dataMap.
Expand Down
21 changes: 10 additions & 11 deletions roofit/roofitcore/src/RooAddition.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -143,18 +143,17 @@ double RooAddition::evaluate() const

////////////////////////////////////////////////////////////////////////////////
/// Compute addition of PDFs in batches.
void RooAddition::computeBatch(double* output, size_t nEvents, RooFit::Detail::DataMap const& dataMap) const
void RooAddition::computeBatch(double *output, size_t nEvents, RooFit::Detail::DataMap const &dataMap) const
{
RooBatchCompute::VarVector pdfs;
RooBatchCompute::ArgVector coefs;
pdfs.reserve(_set.size());
coefs.reserve(_set.size());
for (const auto arg : _set)
{
pdfs.push_back(dataMap.at(arg));
coefs.push_back(1.0);
}
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::AddPdf, output, nEvents, pdfs, coefs);
std::vector<std::span<const double>> pdfs;
std::vector<double> coefs;
pdfs.reserve(_set.size());
coefs.reserve(_set.size());
for (const auto arg : _set) {
pdfs.push_back(dataMap.at(arg));
coefs.push_back(1.0);
}
RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::AddPdf, output, nEvents, pdfs, coefs);
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
4 changes: 3 additions & 1 deletion roofit/roofitcore/src/RooNormalizedPdf.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "RooNormalizedPdf.h"
#include "RooBatchCompute.h"

#include <array>

/**
* \class RooNormalizedPdf
*
Expand All @@ -26,7 +28,7 @@ void RooNormalizedPdf::computeBatch(double *output, size_t nEvents, RooFit::Deta
auto integralSpan = dataMap.at(_normIntegral);

// We use the extraArgs as output parameter to count evaluation errors.
RooBatchCompute::ArgVector extraArgs{0.0, 0.0, 0.0};
std::array<double, 3> extraArgs{0.0, 0.0, 0.0};

RooBatchCompute::compute(dataMap.config(this), RooBatchCompute::NormalizedPdf, output, nEvents, {nums, integralSpan},
extraArgs);
Expand Down
Loading

0 comments on commit 329c264

Please sign in to comment.