From ad0dc10e318ff7be70ee8e4db2a6ed5f4e90f47a Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 8 Jul 2021 19:51:11 +0200 Subject: [PATCH] WIP: trying to load rntuple into a jagged array data structure --- examples/common/ttjet_13tev_june2019.hpp | 5 +- examples/hep_rntuple/hep_rntuple.cpp | 82 +++++++++++++++++++++--- 2 files changed, 76 insertions(+), 11 deletions(-) diff --git a/examples/common/ttjet_13tev_june2019.hpp b/examples/common/ttjet_13tev_june2019.hpp index ebaa024480..86e12b660d 100644 --- a/examples/common/ttjet_13tev_june2019.hpp +++ b/examples/common/ttjet_13tev_june2019.hpp @@ -7,7 +7,7 @@ using bit = bool; using byte = unsigned char; -using Index = std::uint64_t; +using Index = std::uint32_t; // clang-format off struct run {}; @@ -1538,7 +1538,7 @@ using Electron = llama::Record< llama::Field, llama::Field, llama::Field, - llama::Field, + //llama::Field, llama::Field, llama::Field, llama::Field, @@ -1947,6 +1947,7 @@ using Event = llama::Record< llama::Field, //llama::Field, //llama::Field, + llama::Field, llama::Field, //llama::Field, //llama::Field, diff --git a/examples/hep_rntuple/hep_rntuple.cpp b/examples/hep_rntuple/hep_rntuple.cpp index 7354dfb999..0ecb569d57 100644 --- a/examples/hep_rntuple/hep_rntuple.cpp +++ b/examples/hep_rntuple/hep_rntuple.cpp @@ -1,5 +1,5 @@ // This example uses a non-public CMS NanoAOD file called: ttjet_13tev_june2019_lzma. -// Please ask contact us if you need it. +// Please contact us if you need it. #include "../common/ttjet_13tev_june2019.hpp" @@ -14,6 +14,8 @@ #include #include +using SmallEvent = boost::mp11::mp_take_c; + int main(int argc, const char* argv[]) { if (argc != 2) @@ -25,11 +27,28 @@ int main(int argc, const char* argv[]) using namespace std::chrono; using namespace ROOT::Experimental; + // auto ntuple + // = RNTupleReader::Open(RNTupleModel::Create(), "NTuple", "/mnt/c/dev/llama/ttjet_13tev_june2019_lzma.root"); auto ntuple = RNTupleReader::Open(RNTupleModel::Create(), "NTuple", argv[1]); - const auto n = ntuple->GetNEntries(); + // try + //{ + // ntuple->PrintInfo(ROOT::Experimental::ENTupleInfo::kStorageDetails); + //} + // catch (const std::exception& e) + //{ + // fmt::print("PrintInfo error: {}", e.what()); + //} + const auto eventCount = ntuple->GetNEntries(); + const auto& d = ntuple->GetDescriptor(); + const auto electronCount + = d.GetNElements(d.FindColumnId(d.FindFieldId("nElectron.nElectron.Electron_deltaEtaSC"), 0)); + fmt::print("File contains {} events with {} electrons\n", eventCount, electronCount); auto start = steady_clock::now(); - auto view = llama::allocView(llama::mapping::SoA, Event, true>{llama::ArrayDims{n}}); + auto mapping = llama::mapping::OffsetTable, SmallEvent>{ + llama::ArrayDims{eventCount}, + llama::ArrayDims{electronCount}}; + auto view = llama::allocView(mapping); fmt::print("Alloc LLAMA view: {}ms\n", duration_cast(steady_clock::now() - start).count()); std::size_t totalSize = 0; @@ -37,15 +56,60 @@ int main(int argc, const char* argv[]) totalSize += view.mapping.blobSize(i); fmt::print("Total LLAMA view memory: {}MiB in {} blobs\n", totalSize / 1024 / 1024, view.mapping.blobCount); + // fill offset table start = steady_clock::now(); - llama::forEachLeaf( + std::size_t offset = 0; + auto electronViewCollection = ntuple->GetViewCollection("nElectron"); + for (std::size_t i = 0; i < eventCount; i++) + { + offset += electronViewCollection(i); + view(i)(llama::EndOffset{}) = offset; + assert(offset <= electronCount); + } + fmt::print("Fill offset table: {}ms\n", duration_cast(steady_clock::now() - start).count()); + + using AugmentedSmallEvent = typename decltype(mapping)::RecordDim; + start = steady_clock::now(); + llama::forEachLeaf( [&](auto coord) { - using Name = llama::GetTag; - using Type = llama::GetType; - auto column = ntuple->GetView(llama::structName()); - for (std::size_t i = 0; i < n; i++) - view(i)(coord) = column(i); + using Coord = decltype(coord); + using LeafTag = llama::GetTag; + using Type = llama::GetType; + + fmt::print("Copying {}\n", llama::structName()); + if constexpr ( + !llama::mapping::internal::isEndOffsetField && !llama::mapping::internal::isSizeField) + { + if constexpr (boost::mp11::mp_contains>:: + value) + { + using Before = llama::mapping::internal::BeforeDynamic; + using BeforeBefore = llama::RecordCoordFromList>; + using After = llama::mapping::internal::AfterDynamic; + using SubCollectionTag = llama::GetTag; + + auto collectionColumn = ntuple->GetViewCollection(llama::structName()); + auto column = collectionColumn.template GetView( + llama::structName() + "." + llama::structName()); + for (std::size_t i = 0; i < eventCount; i++) + { + const auto subCollectionCount = view(i)(BeforeBefore{})(llama::Size{}); + for (std::size_t j = 0; j < subCollectionCount; j++) + { + const auto value = column(j); + auto& dst = view(i)(Before{})(j) (After{}); + dst = value; + } + } + } + else + { + auto column = ntuple->GetView(llama::structName()); + for (std::size_t i = 0; i < eventCount; i++) + view(i)(coord) = column(i); + } + } }); fmt::print("Copy RNTuple -> LLAMA view: {}ms\n", duration_cast(steady_clock::now() - start).count());