From 025c24de71788f351b36a725ff4efc40be1b1430 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Thu, 20 Feb 2025 13:22:33 +0100 Subject: [PATCH] [df] Expose contiguousness from TTreeReaderArray and use it TTreeReaderArray now exposes information on whether the underlying array type uses contiguous memory. This info is in turn used by RDataFrame to optimise reading collections as RVec. Fixes https://its.cern.ch/jira/browse/ROOT-10823 --- .../inc/ROOT/RDF/RTreeColumnReader.hxx | 24 +------------- tree/treeplayer/inc/TTreeReaderArray.h | 2 ++ tree/treeplayer/inc/TTreeReaderUtils.h | 1 + tree/treeplayer/src/TTreeReaderArray.cxx | 32 +++++++++++++++++++ 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDF/RTreeColumnReader.hxx b/tree/dataframe/inc/ROOT/RDF/RTreeColumnReader.hxx index 88eab65342c93..9b0d521a11919 100644 --- a/tree/dataframe/inc/ROOT/RDF/RTreeColumnReader.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RTreeColumnReader.hxx @@ -61,15 +61,9 @@ template class R__CLING_PTRCHECK(off) RTreeColumnReader> final : public ROOT::Detail::RDF::RColumnReaderBase { std::unique_ptr> fTreeArray; - /// Enumerator for the memory layout of the branch - enum class EStorageType : char { kContiguous, kUnknown, kSparse }; - /// We return a reference to this RVec to clients, to guarantee a stable address and contiguous memory layout. RVec fRVec; - /// Signal whether we ever checked that the branch we are reading with a TTreeReaderArray stores array elements - /// in contiguous memory. - EStorageType fStorageType = EStorageType::kUnknown; Long64_t fLastEntry = -1; /// Whether we already printed a warning about performing a copy of the TTreeReaderArray contents @@ -81,29 +75,13 @@ class R__CLING_PTRCHECK(off) RTreeColumnReader> final : public ROOT::Det return &fRVec; // we already pointed our fRVec to the right address auto &readerArray = *fTreeArray; - // We only use TTreeReaderArrays to read columns that users flagged as type `RVec`, so we need to check - // that the branch stores the array as contiguous memory that we can actually wrap in an `RVec`. - // Currently we need the first entry to have been loaded to perform the check - // TODO Move check to constructor once ROOT-10823 is fixed and TTreeReaderArray itself exposes this information const auto readerArraySize = readerArray.GetSize(); // The reader could not read an array, signal this back to the node requesting the value if (R__unlikely(readerArray.GetReadStatus() == ROOT::Internal::TTreeReaderValueBase::EReadStatus::kReadError)) return nullptr; - if (EStorageType::kUnknown == fStorageType && readerArraySize > 1) { - // We can decide since the array is long enough - fStorageType = EStorageType::kContiguous; - for (auto i = 0u; i < readerArraySize - 1; ++i) { - if ((char *)&readerArray[i + 1] - (char *)&readerArray[i] != sizeof(T)) { - fStorageType = EStorageType::kSparse; - break; - } - } - } - - if (EStorageType::kContiguous == fStorageType || - (EStorageType::kUnknown == fStorageType && readerArray.GetSize() < 2)) { + if (readerArray.IsContiguous()) { if (readerArraySize > 0) { // trigger loading of the contents of the TTreeReaderArray // the address of the first element in the reader array is not necessarily equal to diff --git a/tree/treeplayer/inc/TTreeReaderArray.h b/tree/treeplayer/inc/TTreeReaderArray.h index 93fc00c1fed71..76a7e46b567df 100644 --- a/tree/treeplayer/inc/TTreeReaderArray.h +++ b/tree/treeplayer/inc/TTreeReaderArray.h @@ -35,6 +35,8 @@ class TTreeReaderArrayBase : public TTreeReaderValueBase { EReadStatus GetReadStatus() const override { return fImpl ? fImpl->fReadStatus : kReadError; } + bool IsContiguous() const { return fImpl->IsContiguous(GetProxy()); } + protected: void *UntypedAt(std::size_t idx) const { return fImpl->At(GetProxy(), idx); } void CreateProxy() override; diff --git a/tree/treeplayer/inc/TTreeReaderUtils.h b/tree/treeplayer/inc/TTreeReaderUtils.h index 48f22a4b396e5..3c90bb9c6a5af 100644 --- a/tree/treeplayer/inc/TTreeReaderUtils.h +++ b/tree/treeplayer/inc/TTreeReaderUtils.h @@ -88,6 +88,7 @@ namespace Internal { virtual ~TVirtualCollectionReader(); virtual size_t GetSize(Detail::TBranchProxy*) = 0; virtual void* At(Detail::TBranchProxy*, size_t /*idx*/) = 0; + virtual bool IsContiguous(Detail::TBranchProxy *) = 0; }; } diff --git a/tree/treeplayer/src/TTreeReaderArray.cxx b/tree/treeplayer/src/TTreeReaderArray.cxx index 009839adedefb..5ae74ae188267 100644 --- a/tree/treeplayer/src/TTreeReaderArray.cxx +++ b/tree/treeplayer/src/TTreeReaderArray.cxx @@ -70,8 +70,22 @@ class TClonesReader : public TVirtualCollectionReader { } else return nullptr; } + + bool IsContiguous(ROOT::Detail::TBranchProxy *) override { return false; } }; +bool IsCPContiguous(const TVirtualCollectionProxy &cp) +{ + if (cp.GetProperties() & TVirtualCollectionProxy::kIsEmulated) + return true; + + switch (cp.GetCollectionType()) { + case ROOT::kSTLvector: + case ROOT::kROOTRVec: return true; + default: return false; + } +} + // Reader interface for STL class TSTLReader final : public TVirtualCollectionReader { public: @@ -111,6 +125,12 @@ class TSTLReader final : public TVirtualCollectionReader { return myCollectionProxy->At(idx); } } + + bool IsContiguous(ROOT::Detail::TBranchProxy *proxy) override + { + auto cp = GetCP(proxy); + return IsCPContiguous(*cp); + } }; class TCollectionLessSTLReader final : public TVirtualCollectionReader { @@ -164,6 +184,12 @@ class TCollectionLessSTLReader final : public TVirtualCollectionReader { return myCollectionProxy->At(idx); } } + + bool IsContiguous(ROOT::Detail::TBranchProxy *proxy) override + { + auto cp = GetCP(proxy); + return IsCPContiguous(*cp); + } }; // Reader interface for leaf list @@ -215,6 +241,8 @@ class TObjectArrayReader : public TVirtualCollectionReader { } void SetBasicTypeSize(Int_t size) { fBasicTypeSize = size; } + + bool IsContiguous(ROOT::Detail::TBranchProxy *) override { return true; } }; template @@ -357,6 +385,8 @@ class TBasicTypeArrayReader final : public TVirtualCollectionReader { return nullptr; return (Byte_t *)myCollectionProxy->At(idx) + proxy->GetOffset(); } + + bool IsContiguous(ROOT::Detail::TBranchProxy *) override { return false; } }; class TBasicTypeClonesReader final : public TClonesReader { @@ -402,6 +432,8 @@ class TLeafReader : public TVirtualCollectionReader { return (Byte_t *)address + (fElementSize * idx); } + bool IsContiguous(ROOT::Detail::TBranchProxy *) override { return true; } + protected: void ProxyRead() { fValueReader->ProxyRead(); } };