From 2aaa0fba3092f16f09cfc6c6120b319ab89ffe1f Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 20 Nov 2024 20:14:40 +0100 Subject: [PATCH 1/8] Make tuple a proper struct --- include/podio/utilities/RootHelpers.h | 8 +++++++- src/root_selection.xml | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h index 360ef6674..4247f23d9 100644 --- a/include/podio/utilities/RootHelpers.h +++ b/include/podio/utilities/RootHelpers.h @@ -18,7 +18,13 @@ namespace root_utils { // A collection of additional information that describes the collection: the // collectionID, the collection (data) type, whether it is a subset // collection, and its schema version - using CollectionWriteInfoT = std::tuple; + struct CollectionWriteInfoT { + uint32_t collectionID{static_cast(-1)}; + std::string dataType{}; + bool isSubset{false}; + unsigned int schemaVersion{0}; + }; + // for backwards compatibility using CollectionInfoWithoutSchemaT = std::tuple; diff --git a/src/root_selection.xml b/src/root_selection.xml index 38db949c0..3a886bfc7 100644 --- a/src/root_selection.xml +++ b/src/root_selection.xml @@ -5,5 +5,8 @@ + + + From d01ee5ccbd51f4b5671919ff0ebed69d46fbef65 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 20 Nov 2024 20:33:06 +0100 Subject: [PATCH 2/8] Rename struct to better match naming convention --- include/podio/ROOTLegacyReader.h | 2 +- include/podio/ROOTWriter.h | 2 +- include/podio/utilities/RootHelpers.h | 4 +++- src/ROOTLegacyReader.cc | 9 +++++++-- src/ROOTReader.cc | 10 +++++----- src/rootUtils.h | 2 +- src/root_selection.xml | 4 ++-- 7 files changed, 20 insertions(+), 13 deletions(-) diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index 9693bfdb0..c98d52713 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -116,7 +116,7 @@ class ROOTLegacyReader { private: std::pair getLocalTreeAndEntry(const std::string& treename); - void createCollectionBranches(const std::vector& collInfo); + void createCollectionBranches(const std::vector& collInfo); podio::GenericParameters readEventMetaData(); diff --git a/include/podio/ROOTWriter.h b/include/podio/ROOTWriter.h index 5ee23835f..0df3e6d54 100644 --- a/include/podio/ROOTWriter.h +++ b/include/podio/ROOTWriter.h @@ -105,7 +105,7 @@ class ROOTWriter { struct CategoryInfo { TTree* tree{nullptr}; ///< The TTree to which this category is written std::vector branches{}; ///< The branches for this category - std::vector collInfo{}; ///< Collection info for this category + std::vector collInfo{}; ///< Collection info for this category podio::CollectionIDTable idTable{}; ///< The collection id table for this category std::vector collsToWrite{}; ///< The collections to write for this category diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h index 4247f23d9..7516b4df1 100644 --- a/include/podio/utilities/RootHelpers.h +++ b/include/podio/utilities/RootHelpers.h @@ -18,12 +18,14 @@ namespace root_utils { // A collection of additional information that describes the collection: the // collectionID, the collection (data) type, whether it is a subset // collection, and its schema version - struct CollectionWriteInfoT { + struct CollectionWriteInfo { uint32_t collectionID{static_cast(-1)}; std::string dataType{}; bool isSubset{false}; unsigned int schemaVersion{0}; }; + // The format used until version 1.2 + using CollectionWriteInfoT = std::tuple; // for backwards compatibility using CollectionInfoWithoutSchemaT = std::tuple; diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index d94303aa8..abffa9d79 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -152,7 +152,12 @@ void ROOTLegacyReader::openFiles(const std::vector& filenames) { collInfoBranch->SetAddress(&collectionInfo); collInfoBranch->GetEntry(0); } - createCollectionBranches(*collectionInfo); + std::vector collInfo; + collInfo.reserve(collectionInfo->size()); + for (auto& [id, typeName, isSubsetColl, schemaVersion] : *collectionInfo) { + collInfo.emplace_back(id, std::move(typeName), isSubsetColl, schemaVersion); + } + createCollectionBranches(collInfo); delete collectionInfo; } else { std::cout << "PODIO: Reconstructing CollectionTypeInfo branch from other sources in file: \'" @@ -170,7 +175,7 @@ unsigned ROOTLegacyReader::getEntries(const std::string& name) const { return m_chain->GetEntries(); } -void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { +void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { size_t collectionIndex{0}; for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 1164321b0..908d661ca 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -20,11 +20,11 @@ namespace podio { std::tuple, std::vector> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo); + const std::vector& collInfo); std::tuple, std::vector> createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo); + const std::vector& collInfo); template void ROOTReader::readParams(ROOTReader::CategoryInfo& catInfo, podio::GenericParameters& params, bool reloadBranches, @@ -185,7 +185,7 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category)); - auto collInfo = new std::vector(); + auto collInfo = new std::vector(); if (m_fileVersion < podio::version::Version{0, 16, 4}) { auto oldCollInfo = new std::vector(); collInfoBranch->SetAddress(&oldCollInfo); @@ -325,7 +325,7 @@ std::vector ROOTReader::getAvailableCategories() const { std::tuple, std::vector> createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo) { + const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; @@ -377,7 +377,7 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable std::tuple, std::vector> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo) { + const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; diff --git a/src/rootUtils.h b/src/rootUtils.h index b58fc6aea..f304d4d2d 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -259,7 +259,7 @@ inline void readBranchesData(const CollectionBranches& branches, Long64_t entry) * collections */ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable const& idTable) { - std::vector collInfo; + std::vector collInfo; for (size_t iColl = 0; iColl < idTable.names().size(); ++iColl) { const auto collID = idTable.ids()[iColl]; diff --git a/src/root_selection.xml b/src/root_selection.xml index 3a886bfc7..dc4dbb9db 100644 --- a/src/root_selection.xml +++ b/src/root_selection.xml @@ -6,7 +6,7 @@ - - + + From 2dbf2e5cf025aefc2bb13f5d26ecbbf5d55e6b93 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 20 Nov 2024 20:47:58 +0100 Subject: [PATCH 3/8] Keep thte ROOTReader functional for older versions --- src/ROOTReader.cc | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 908d661ca..b4bddc77e 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -186,19 +186,32 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category)); auto collInfo = new std::vector(); - if (m_fileVersion < podio::version::Version{0, 16, 4}) { - auto oldCollInfo = new std::vector(); - collInfoBranch->SetAddress(&oldCollInfo); - collInfoBranch->GetEntry(0); - collInfo->reserve(oldCollInfo->size()); - for (auto&& [collID, collType, isSubsetColl] : *oldCollInfo) { - // Manually set the schema version to 1 - collInfo->emplace_back(collID, std::move(collType), isSubsetColl, 1u); - } - delete oldCollInfo; - } else { + + if (m_fileVersion >= podio::version::Version{1, 1, 0}) { collInfoBranch->SetAddress(&collInfo); collInfoBranch->GetEntry(0); + } else { + auto collInfoOld = new std::vector(); + if (m_fileVersion < podio::version::Version{0, 16, 4}) { + auto collInfoReallyOld = new std::vector(); + collInfoBranch->SetAddress(&collInfoReallyOld); + collInfoBranch->GetEntry(0); + collInfoOld->reserve(collInfoReallyOld->size()); + for (auto& [collID, collType, isSubsetColl] : *collInfoReallyOld) { + // Manually set the schema version to 1 + collInfo->emplace_back(collID, std::move(collType), isSubsetColl, 1u); + } + delete collInfoReallyOld; + } else { + collInfoBranch->SetAddress(&collInfoOld); + collInfoBranch->GetEntry(0); + } + // "Convert" to new style + collInfo->reserve(collInfoOld->size()); + for (auto& [id, typeName, isSubsetColl, schemaVersion] : *collInfoOld) { + collInfo->emplace_back(id, std::move(typeName), isSubsetColl, schemaVersion); + } + delete collInfoOld; } // For backwards compatibility make it possible to read the index based files From df75dfa35c40b212deba4360420d7b40c8a1a502 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 5 Mar 2025 14:14:44 +0100 Subject: [PATCH 4/8] Add name to CollectionWriteInfo Makes it possible to drop the collection id table storage --- include/podio/ROOTWriter.h | 7 +++-- include/podio/utilities/RootHelpers.h | 9 ++++--- src/ROOTLegacyReader.cc | 2 +- src/ROOTReader.cc | 38 +++++++++++++++++++-------- src/ROOTWriter.cc | 7 ++--- 5 files changed, 38 insertions(+), 25 deletions(-) diff --git a/include/podio/ROOTWriter.h b/include/podio/ROOTWriter.h index 0df3e6d54..d3cb3e7c8 100644 --- a/include/podio/ROOTWriter.h +++ b/include/podio/ROOTWriter.h @@ -103,11 +103,10 @@ class ROOTWriter { /// Helper struct to group together all necessary state to write / process a /// given category. Created during the first writing of a category struct CategoryInfo { - TTree* tree{nullptr}; ///< The TTree to which this category is written - std::vector branches{}; ///< The branches for this category + TTree* tree{nullptr}; ///< The TTree to which this category is written + std::vector branches{}; ///< The branches for this category std::vector collInfo{}; ///< Collection info for this category - podio::CollectionIDTable idTable{}; ///< The collection id table for this category - std::vector collsToWrite{}; ///< The collections to write for this category + std::vector collsToWrite{}; ///< The collections to write for this category // Storage for the keys & values of all the parameters of this category // (resp. at least the current entry) diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h index 7516b4df1..dc8b8a4f6 100644 --- a/include/podio/utilities/RootHelpers.h +++ b/include/podio/utilities/RootHelpers.h @@ -19,10 +19,11 @@ namespace root_utils { // collectionID, the collection (data) type, whether it is a subset // collection, and its schema version struct CollectionWriteInfo { - uint32_t collectionID{static_cast(-1)}; - std::string dataType{}; - bool isSubset{false}; - unsigned int schemaVersion{0}; + uint32_t collectionID{static_cast(-1)}; ///< collection id + std::string dataType{}; ///< The fully qualified data type + bool isSubset{false}; ///< Whether this collection is a subset collection or not + unsigned int schemaVersion{0}; ///< The schema version of the collection type + std::string name{}; ///< The name of the collection }; // The format used until version 1.2 using CollectionWriteInfoT = std::tuple; diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index abffa9d79..dc5c7ca70 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -178,7 +178,7 @@ unsigned ROOTLegacyReader::getEntries(const std::string& name) const { void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { size_t collectionIndex{0}; - for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = m_table->name(collID).value(); diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index b4bddc77e..3789c6314 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -5,6 +5,7 @@ #include "podio/CollectionIDTable.h" #include "podio/DatamodelRegistry.h" #include "podio/GenericParameters.h" +#include "podio/podioVersion.h" #include "podio/utilities/RootHelpers.h" #include "rootUtils.h" @@ -13,6 +14,7 @@ #include "TClass.h" #include +#include #include #include @@ -163,7 +165,7 @@ ROOTReader::CategoryInfo& ROOTReader::getCategoryInfo(const std::string& categor if (auto it = m_categories.find(category); it != m_categories.end()) { // Use the id table as proxy to check whether this category has been // initialized already - if (it->second.table == nullptr) { + if (it->second.branches.empty()) { initCategory(it->second, category); } return it->second; @@ -177,17 +179,11 @@ ROOTReader::CategoryInfo& ROOTReader::getCategoryInfo(const std::string& categor } void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category) { - catInfo.table = std::make_shared(); - auto* table = catInfo.table.get(); - auto* tableBranch = root_utils::getBranch(m_metaChain.get(), root_utils::idTableName(category)); - tableBranch->SetAddress(&table); - tableBranch->GetEntry(0); auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category)); auto collInfo = new std::vector(); - - if (m_fileVersion >= podio::version::Version{1, 1, 0}) { + if (m_fileVersion >= podio::version::Version{1, 2, 99}) { collInfoBranch->SetAddress(&collInfo); collInfoBranch->GetEntry(0); } else { @@ -214,6 +210,26 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category delete collInfoOld; } + // Recreate the idTable form the collection info if necessary, otherwise read + // it directly + if (m_fileVersion >= podio::version::Version{1, 2, 99}) { + std::vector ids; + ids.reserve(collInfo->size()); + std::vector names; + names.reserve(collInfo->size()); + for (const auto& [id, _1, _2, _3, name] : *collInfo) { + ids.emplace_back(id); + names.emplace_back(name); + } + catInfo.table = std::make_shared(std::move(ids), std::move(names)); + } else { + catInfo.table = std::make_shared(); + auto* table = catInfo.table.get(); + auto* tableBranch = root_utils::getBranch(m_metaChain.get(), root_utils::idTableName(category)); + tableBranch->SetAddress(&table); + tableBranch->GetEntry(0); + } + // For backwards compatibility make it possible to read the index based files // from older versions if (m_fileVersion < podio::version::Version{0, 16, 99}) { @@ -251,7 +267,7 @@ std::vector getAvailableCategories(TChain* metaChain) { for (int i = 0; i < branches->GetEntries(); ++i) { const std::string name = branches->At(i)->GetName(); - const auto fUnder = name.find(root_utils::idTableName("")); + const auto fUnder = name.find(root_utils::collInfoName("")); if (fUnder != std::string::npos) { brNames.emplace_back(name.substr(0, fUnder)); } @@ -346,7 +362,7 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable std::vector storedClasses; storedClasses.reserve(collInfo.size()); - for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = idTable.name(collID).value(); @@ -398,7 +414,7 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, std::vector storedClasses; storedClasses.reserve(collInfo.size()); - for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = idTable.name(collID).value(); diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index c4a3bc30f..ce99ede47 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -32,7 +32,6 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego // Use the TTree as proxy here to decide whether this category has already // been initialized if (catInfo.tree == nullptr) { - catInfo.idTable = frame.getCollectionIDTableForWrite(); catInfo.collsToWrite = podio::utils::sortAlphabeticaly(collsToWrite); catInfo.tree = new TTree(category.c_str(), (category + " data tree").c_str()); catInfo.tree->SetDirectory(m_file.get()); @@ -54,7 +53,6 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego // collections if (catInfo.branches.empty()) { initBranches(catInfo, collections, const_cast(frame.getParameters())); - } else { // Make sure that the category contents are consistent with the initial // frame in the category @@ -120,8 +118,8 @@ void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vectorgetTypeName()), - coll->isSubsetCollection(), coll->getSchemaVersion()); + catInfo.collInfo.emplace_back(coll->getID(), std::string(coll->getTypeName()), coll->isSubsetCollection(), + coll->getSchemaVersion(), name); } fillParams(catInfo, parameters); @@ -175,7 +173,6 @@ void ROOTWriter::finish() { // Store the collection id table and collection info for reading in the meta tree for (/*const*/ auto& [category, info] : m_categories) { - metaTree->Branch(root_utils::idTableName(category).c_str(), &info.idTable); metaTree->Branch(root_utils::collInfoName(category).c_str(), &info.collInfo); } From e2db949a4bbaf1259ff4fc4fec549102dc7d8ada Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 5 Mar 2025 21:54:09 +0100 Subject: [PATCH 5/8] Make RNTuple output the same as TTree based one --- include/podio/RNTupleReader.h | 17 +++++----- include/podio/RNTupleWriter.h | 9 ++--- src/RNTupleReader.cc | 64 +++++++++++++++++++---------------- src/RNTupleWriter.cc | 23 +++++-------- src/ROOTReader.cc | 10 +----- src/ROOTWriter.cc | 2 +- src/rootUtils.h | 31 +++++++++++------ 7 files changed, 77 insertions(+), 79 deletions(-) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index d4d26299b..821b909fa 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -5,6 +5,7 @@ #include "podio/SchemaEvolution.h" #include "podio/podioVersion.h" #include "podio/utilities/DatamodelRegistryIOHelpers.h" +#include "podio/utilities/RootHelpers.h" #include #include @@ -171,15 +172,15 @@ class RNTupleReader { std::unordered_map> m_readerEntries{}; std::unordered_map m_totalEntries{}; - struct CollectionInfo { - std::vector id{}; - std::vector name{}; - std::vector type{}; - std::vector isSubsetCollection{}; - std::vector schemaVersion{}; - }; + // struct CollectionInfo { + // std::vector id{}; + // std::vector name{}; + // std::vector type{}; + // std::vector isSubsetCollection{}; + // std::vector schemaVersion{}; + // }; - std::unordered_map m_collectionInfo{}; + std::unordered_map> m_collectionInfo{}; std::vector m_availableCategories{}; diff --git a/include/podio/RNTupleWriter.h b/include/podio/RNTupleWriter.h index dd3aebf96..42a1f7a3e 100644 --- a/include/podio/RNTupleWriter.h +++ b/include/podio/RNTupleWriter.h @@ -117,12 +117,9 @@ class RNTupleWriter { struct CategoryInfo { std::unique_ptr writer{nullptr}; ///< The RNTupleWriter for this category - // The following are assumed to run in parallel! - std::vector ids{}; ///< The ids of all collections - std::vector names{}; ///< The names of all collections - std::vector types{}; ///< The types of all collections - std::vector subsetCollections{}; ///< The flags identifying the subcollections - std::vector schemaVersions{}; ///< The schema versions of all collections + /// Collection info for this category + std::vector collInfo{}; + std::vector names{}; ///< The names of all collections to write // Storage for the keys & values of all the parameters of this category // (resp. at least the current entry) diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index 35cc59f8e..4c7456a7a 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -4,11 +4,13 @@ #include "podio/CollectionIDTable.h" #include "podio/DatamodelRegistry.h" #include "podio/GenericParameters.h" +#include "podio/utilities/RootHelpers.h" #include "rootUtils.h" #include #include +#include #include // Adjust for the move of this out of ROOT v7 in @@ -48,27 +50,32 @@ bool RNTupleReader::initCategory(const std::string& category) { // Assume that the metadata is the same in all files auto filename = m_filenames[0]; - auto& collInfo = m_collectionInfo[category]; + // auto& collInfo = m_collectionInfo[category]; - auto id = m_metadata_readers[filename]->GetView>(root_utils::idTableName(category)); - collInfo.id = id(0); + auto collInfo = m_metadata_readers[filename]->GetView>( + {root_utils::collInfoName(category)}); - auto collectionName = - m_metadata_readers[filename]->GetView>(root_utils::collectionName(category)); - collInfo.name = collectionName(0); + m_collectionInfo[category] = collInfo(0); - auto collectionType = - m_metadata_readers[filename]->GetView>(root_utils::collInfoName(category)); - collInfo.type = collectionType(0); + // auto id = m_metadata_readers[filename]->GetView>(root_utils::idTableName(category)); + // collInfo.collectionID = id(0); - auto subsetCollection = - m_metadata_readers[filename]->GetView>(root_utils::subsetCollection(category)); - collInfo.isSubsetCollection = subsetCollection(0); + // auto collectionName = + // m_metadata_readers[filename]->GetView>(root_utils::collectionName(category)); + // collInfo.name = collectionName(0); - auto schemaVersion = m_metadata_readers[filename]->GetView>("schemaVersion_" + category); - collInfo.schemaVersion = schemaVersion(0); + // auto collectionType = + // m_metadata_readers[filename]->GetView>(root_utils::collInfoName(category)); + // collInfo.type = collectionType(0); - m_idTables[category] = std::make_shared(collInfo.id, collInfo.name); + // auto subsetCollection = + // m_metadata_readers[filename]->GetView>(root_utils::subsetCollection(category)); + // collInfo.isSubset = subsetCollection(0); + + // auto schemaVersion = m_metadata_readers[filename]->GetView>("schemaVersion_" + + // category); collInfo.schemaVersion = schemaVersion(0); + + m_idTables[category] = root_utils::makeCollIdTable(collInfo(0)); return true; } @@ -162,7 +169,7 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ // Make sure to not silently ignore non-existant but requested collections if (!collsToRead.empty()) { for (const auto& name : collsToRead) { - if (std::ranges::find(collInfo.name, name) == collInfo.name.end()) { + if (std::ranges::find(collInfo, name, &root_utils::CollectionWriteInfo::name) == collInfo.end()) { throw std::invalid_argument(name + " is not available from Frame"); } } @@ -184,47 +191,46 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ // we set all the fields there in any case. auto dentry = m_readers[category][readerIndex]->GetModel().CreateEntry(); - for (size_t i = 0; i < collInfo.id.size(); ++i) { - if (!collsToRead.empty() && std::ranges::find(collsToRead, collInfo.name[i]) == collsToRead.end()) { + for (size_t i = 0; i < collInfo.size(); ++i) { + if (!collsToRead.empty() && std::ranges::find(collsToRead, collInfo[i].name) == collsToRead.end()) { continue; } - const auto& collType = collInfo.type[i]; + const auto& collType = collInfo[i].dataType; const auto& bufferFactory = podio::CollectionBufferFactory::instance(); - auto maybeBuffers = - bufferFactory.createBuffers(collType, collInfo.schemaVersion[i], collInfo.isSubsetCollection[i]); + auto maybeBuffers = bufferFactory.createBuffers(collType, collInfo[i].schemaVersion, collInfo[i].isSubset); auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); if (!maybeBuffers) { - std::cout << "WARNING: Buffers couldn't be created for collection " << collInfo.name[i] << " of type " - << collInfo.type[i] << " and schema version " << collInfo.schemaVersion[i] << std::endl; + std::cout << "WARNING: Buffers couldn't be created for collection " << collInfo[i].name << " of type " + << collInfo[i].dataType << " and schema version " << collInfo[i].schemaVersion << std::endl; return nullptr; } - if (collInfo.isSubsetCollection[i]) { - auto brName = root_utils::subsetBranch(collInfo.name[i]); + if (collInfo[i].isSubset) { + auto brName = root_utils::subsetBranch(collInfo[i].name); auto vec = new std::vector; dentry->BindRawPtr(brName, vec); collBuffers.references->at(0) = std::unique_ptr>(vec); } else { - dentry->BindRawPtr(collInfo.name[i], collBuffers.data); + dentry->BindRawPtr(collInfo[i].name, collBuffers.data); const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(collType); for (size_t j = 0; j < relVecNames.relations.size(); ++j) { const auto relName = relVecNames.relations[j]; auto vec = new std::vector; - const auto brName = root_utils::refBranch(collInfo.name[i], relName); + const auto brName = root_utils::refBranch(collInfo[i].name, relName); dentry->BindRawPtr(brName, vec); collBuffers.references->at(j) = std::unique_ptr>(vec); } for (size_t j = 0; j < relVecNames.vectorMembers.size(); ++j) { const auto vecName = relVecNames.vectorMembers[j]; - const auto brName = root_utils::vecBranch(collInfo.name[i], vecName); + const auto brName = root_utils::vecBranch(collInfo[i].name, vecName); dentry->BindRawPtr(brName, collBuffers.vectorMembers->at(j).second); } } - buffers.emplace(collInfo.name[i], std::move(collBuffers)); + buffers.emplace(collInfo[i].name, std::move(collBuffers)); } m_readers[category][readerIndex]->LoadEntry(localEntry, *dentry); diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index ce7ce0bda..a0a346e03 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -2,6 +2,7 @@ #include "podio/DatamodelRegistry.h" #include "podio/SchemaEvolution.h" #include "podio/podioVersion.h" +#include "podio/utilities/RootHelpers.h" #include "rootUtils.h" #include "TFile.h" @@ -92,14 +93,13 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat auto model = createModels(collections); catInfo.writer = root_compat::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); + catInfo.collInfo.reserve(collections.size()); for (const auto& [name, coll] : collections) { - catInfo.ids.emplace_back(coll->getID()); - catInfo.types.emplace_back(coll->getTypeName()); - catInfo.subsetCollections.emplace_back(coll->isSubsetCollection()); - catInfo.schemaVersions.emplace_back(coll->getSchemaVersion()); + catInfo.collInfo.emplace_back(coll->getID(), std::string(coll->getTypeName()), coll->isSubsetCollection(), + coll->getSchemaVersion(), name); } } else { - if (!root_utils::checkConsistentColls(catInfo.names, collsToWrite)) { + if (!root_utils::checkConsistentColls(catInfo.collInfo, collsToWrite)) { throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content. " + root_utils::getInconsistentCollsMsg(catInfo.names, collsToWrite)); } @@ -260,16 +260,9 @@ void RNTupleWriter::finish() { } for (auto& [category, collInfo] : m_categories) { - auto idField = metadata->MakeField>({root_utils::idTableName(category)}); - *idField = collInfo.ids; - auto collectionNameField = metadata->MakeField>({root_utils::collectionName(category)}); - *collectionNameField = collInfo.names; - auto collectionTypeField = metadata->MakeField>({root_utils::collInfoName(category)}); - *collectionTypeField = collInfo.types; - auto subsetCollectionField = metadata->MakeField>({root_utils::subsetCollection(category)}); - *subsetCollectionField = collInfo.subsetCollections; - auto schemaVersionField = metadata->MakeField>({"schemaVersion_" + category}); - *schemaVersionField = collInfo.schemaVersions; + auto collInfoField = + metadata->MakeField>({root_utils::collInfoName(category)}); + *collInfoField = collInfo.collInfo; } metadata->Freeze(); diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 3789c6314..ca4ee5626 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -213,15 +213,7 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category // Recreate the idTable form the collection info if necessary, otherwise read // it directly if (m_fileVersion >= podio::version::Version{1, 2, 99}) { - std::vector ids; - ids.reserve(collInfo->size()); - std::vector names; - names.reserve(collInfo->size()); - for (const auto& [id, _1, _2, _3, name] : *collInfo) { - ids.emplace_back(id); - names.emplace_back(name); - } - catInfo.table = std::make_shared(std::move(ids), std::move(names)); + catInfo.table = root_utils::makeCollIdTable(*collInfo); } else { catInfo.table = std::make_shared(); auto* table = catInfo.table.get(); diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index ce99ede47..f78269eb1 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -56,7 +56,7 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego } else { // Make sure that the category contents are consistent with the initial // frame in the category - if (!root_utils::checkConsistentColls(catInfo.collsToWrite, collsToWrite)) { + if (!root_utils::checkConsistentColls(catInfo.collInfo, collsToWrite)) { throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content. " + root_utils::getInconsistentCollsMsg(catInfo.collsToWrite, collsToWrite)); } diff --git a/src/rootUtils.h b/src/rootUtils.h index f304d4d2d..69a392fe3 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -287,26 +287,22 @@ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable * can have random order wrt each other, but the assumption is that each vector * only contains unique names. */ -inline bool checkConsistentColls(const std::vector& existingColls, +inline bool checkConsistentColls(const std::vector& collInfo, const std::vector& candidateColls) { - if (existingColls.size() != candidateColls.size()) { + if (collInfo.size() != candidateColls.size()) { return false; } - // Since we are guaranteed to have unique names here, we can just look for - // collisions brute force, which seems to be quickest approach for vector - // sizes we typically have (few hundred). We can take advantage of the fact - // that the existingColls are ordered (alphabetically and case-insensitive), - // so we can do a binary_search for (const auto& id : candidateColls) { - if (!std::binary_search(existingColls.begin(), existingColls.end(), id, [](const auto& lhs, const auto& rhs) { + std::ranges::binary_search( + collInfo, id, + [](const auto& lhs, const auto& rhs) { return lhs.size() == rhs.size() && std::lexicographical_compare( lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), [](const auto cl, const auto cr) { return std::tolower(cl) < std::tolower(cr); }); - })) { - return false; - } + }, + &root_utils::CollectionWriteInfo::name); } return true; @@ -362,6 +358,19 @@ inline std::string getInconsistentCollsMsg(const std::vector& exist return sstr.str(); } +inline std::shared_ptr makeCollIdTable(const std::vector& collInfo) { + std::vector ids{}; + ids.reserve(collInfo.size()); + std::vector names{}; + names.reserve(collInfo.size()); + for (const auto& [id, _1, _2, _3, name] : collInfo) { + ids.emplace_back(id); + names.emplace_back(name); + } + + return std::make_shared(std::move(ids), std::move(names)); +} + } // namespace podio::root_utils #endif From 3ea8b0aad5e0c0b5448778b550d407741916bf05 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 6 Mar 2025 10:49:06 +0100 Subject: [PATCH 6/8] Cleanup and minor refactoring --- include/podio/RNTupleReader.h | 9 +------ src/RNTupleReader.cc | 45 ++++++++++------------------------- src/rootUtils.h | 2 ++ 3 files changed, 15 insertions(+), 41 deletions(-) diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index 821b909fa..489d1c79d 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -172,14 +172,7 @@ class RNTupleReader { std::unordered_map> m_readerEntries{}; std::unordered_map m_totalEntries{}; - // struct CollectionInfo { - // std::vector id{}; - // std::vector name{}; - // std::vector type{}; - // std::vector isSubsetCollection{}; - // std::vector schemaVersion{}; - // }; - + /// Map each category to the collections that have been written and are available std::unordered_map> m_collectionInfo{}; std::vector m_availableCategories{}; diff --git a/src/RNTupleReader.cc b/src/RNTupleReader.cc index 4c7456a7a..268631fab 100644 --- a/src/RNTupleReader.cc +++ b/src/RNTupleReader.cc @@ -50,31 +50,10 @@ bool RNTupleReader::initCategory(const std::string& category) { // Assume that the metadata is the same in all files auto filename = m_filenames[0]; - // auto& collInfo = m_collectionInfo[category]; - auto collInfo = m_metadata_readers[filename]->GetView>( {root_utils::collInfoName(category)}); m_collectionInfo[category] = collInfo(0); - - // auto id = m_metadata_readers[filename]->GetView>(root_utils::idTableName(category)); - // collInfo.collectionID = id(0); - - // auto collectionName = - // m_metadata_readers[filename]->GetView>(root_utils::collectionName(category)); - // collInfo.name = collectionName(0); - - // auto collectionType = - // m_metadata_readers[filename]->GetView>(root_utils::collInfoName(category)); - // collInfo.type = collectionType(0); - - // auto subsetCollection = - // m_metadata_readers[filename]->GetView>(root_utils::subsetCollection(category)); - // collInfo.isSubset = subsetCollection(0); - - // auto schemaVersion = m_metadata_readers[filename]->GetView>("schemaVersion_" + - // category); collInfo.schemaVersion = schemaVersion(0); - m_idTables[category] = root_utils::makeCollIdTable(collInfo(0)); return true; @@ -191,46 +170,46 @@ std::unique_ptr RNTupleReader::readEntry(const std::string& categ // we set all the fields there in any case. auto dentry = m_readers[category][readerIndex]->GetModel().CreateEntry(); - for (size_t i = 0; i < collInfo.size(); ++i) { - if (!collsToRead.empty() && std::ranges::find(collsToRead, collInfo[i].name) == collsToRead.end()) { + for (const auto& coll : collInfo) { + if (!collsToRead.empty() && std::ranges::find(collsToRead, coll.name) == collsToRead.end()) { continue; } - const auto& collType = collInfo[i].dataType; + const auto& collType = coll.dataType; const auto& bufferFactory = podio::CollectionBufferFactory::instance(); - auto maybeBuffers = bufferFactory.createBuffers(collType, collInfo[i].schemaVersion, collInfo[i].isSubset); + auto maybeBuffers = bufferFactory.createBuffers(collType, coll.schemaVersion, coll.isSubset); auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); if (!maybeBuffers) { - std::cout << "WARNING: Buffers couldn't be created for collection " << collInfo[i].name << " of type " - << collInfo[i].dataType << " and schema version " << collInfo[i].schemaVersion << std::endl; + std::cout << "WARNING: Buffers couldn't be created for collection " << coll.name << " of type " << coll.dataType + << " and schema version " << coll.schemaVersion << std::endl; return nullptr; } - if (collInfo[i].isSubset) { - auto brName = root_utils::subsetBranch(collInfo[i].name); + if (coll.isSubset) { + auto brName = root_utils::subsetBranch(coll.name); auto vec = new std::vector; dentry->BindRawPtr(brName, vec); collBuffers.references->at(0) = std::unique_ptr>(vec); } else { - dentry->BindRawPtr(collInfo[i].name, collBuffers.data); + dentry->BindRawPtr(coll.name, collBuffers.data); const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(collType); for (size_t j = 0; j < relVecNames.relations.size(); ++j) { const auto relName = relVecNames.relations[j]; auto vec = new std::vector; - const auto brName = root_utils::refBranch(collInfo[i].name, relName); + const auto brName = root_utils::refBranch(coll.name, relName); dentry->BindRawPtr(brName, vec); collBuffers.references->at(j) = std::unique_ptr>(vec); } for (size_t j = 0; j < relVecNames.vectorMembers.size(); ++j) { const auto vecName = relVecNames.vectorMembers[j]; - const auto brName = root_utils::vecBranch(collInfo[i].name, vecName); + const auto brName = root_utils::vecBranch(coll.name, vecName); dentry->BindRawPtr(brName, collBuffers.vectorMembers->at(j).second); } } - buffers.emplace(collInfo[i].name, std::move(collBuffers)); + buffers.emplace(coll.name, std::move(collBuffers)); } m_readers[category][readerIndex]->LoadEntry(localEntry, *dentry); diff --git a/src/rootUtils.h b/src/rootUtils.h index 69a392fe3..5ec8c5946 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -358,6 +358,8 @@ inline std::string getInconsistentCollsMsg(const std::vector& exist return sstr.str(); } +/// Create a collection id table from the information in the +/// CollectionWriteInfos inline std::shared_ptr makeCollIdTable(const std::vector& collInfo) { std::vector ids{}; ids.reserve(collInfo.size()); From c26c18f5c40d0f56423eaff24ae21aa0030a71a3 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 10 Apr 2025 16:53:23 +0200 Subject: [PATCH 7/8] Add storage type to meta info --- include/podio/utilities/RootHelpers.h | 3 ++- src/RNTupleWriter.cc | 2 +- src/ROOTLegacyReader.cc | 2 +- src/ROOTReader.cc | 4 ++-- src/ROOTWriter.cc | 2 +- src/rootUtils.h | 7 ++++++- 6 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h index dc8b8a4f6..41d17f927 100644 --- a/include/podio/utilities/RootHelpers.h +++ b/include/podio/utilities/RootHelpers.h @@ -20,10 +20,11 @@ namespace root_utils { // collection, and its schema version struct CollectionWriteInfo { uint32_t collectionID{static_cast(-1)}; ///< collection id - std::string dataType{}; ///< The fully qualified data type + std::string dataType{}; ///< The fully qualified data type of the collection bool isSubset{false}; ///< Whether this collection is a subset collection or not unsigned int schemaVersion{0}; ///< The schema version of the collection type std::string name{}; ///< The name of the collection + std::string storageType{}; ///< The type in which the data is actually stored }; // The format used until version 1.2 using CollectionWriteInfoT = std::tuple; diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index a0a346e03..f4038eda6 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -96,7 +96,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat catInfo.collInfo.reserve(collections.size()); for (const auto& [name, coll] : collections) { catInfo.collInfo.emplace_back(coll->getID(), std::string(coll->getTypeName()), coll->isSubsetCollection(), - coll->getSchemaVersion(), name); + coll->getSchemaVersion(), name, root_utils::getStorageTypeName(coll)); } } else { if (!root_utils::checkConsistentColls(catInfo.collInfo, collsToWrite)) { diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index dc5c7ca70..d8329c7f2 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -178,7 +178,7 @@ unsigned ROOTLegacyReader::getEntries(const std::string& name) const { void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { size_t collectionIndex{0}; - for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _, __] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = m_table->name(collID).value(); diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index ca4ee5626..184c4806b 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -354,7 +354,7 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable std::vector storedClasses; storedClasses.reserve(collInfo.size()); - for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _, __] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = idTable.name(collID).value(); @@ -406,7 +406,7 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, std::vector storedClasses; storedClasses.reserve(collInfo.size()); - for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _, __] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = idTable.name(collID).value(); diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index f78269eb1..42d4042ff 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -119,7 +119,7 @@ void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vectorgetID(), std::string(coll->getTypeName()), coll->isSubsetCollection(), - coll->getSchemaVersion(), name); + coll->getSchemaVersion(), name, root_utils::getStorageTypeName(coll)); } fillParams(catInfo, parameters); diff --git a/src/rootUtils.h b/src/rootUtils.h index 5ec8c5946..fdb6b0c96 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -1,6 +1,7 @@ #ifndef PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy #define PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy +#include "podio/CollectionBase.h" #include "podio/CollectionIDTable.h" #include "podio/utilities/MiscHelpers.h" #include "podio/utilities/RootHelpers.h" @@ -199,6 +200,10 @@ inline std::string subsetBranch(const std::string& name) { return name + "_objIdx"; } +inline std::string getStorageTypeName(const podio::CollectionBase* coll) { + return "std::vector<" + std::string(coll->getDataTypeName()) + ">"; +} + /** * Reset all the branches that by getting them from the TTree again */ @@ -365,7 +370,7 @@ inline std::shared_ptr makeCollIdTable(const std::vect ids.reserve(collInfo.size()); std::vector names{}; names.reserve(collInfo.size()); - for (const auto& [id, _1, _2, _3, name] : collInfo) { + for (const auto& [id, _1, _2, _3, name, _5] : collInfo) { ids.emplace_back(id); names.emplace_back(name); } From 1a7aa232e3949b9f90d6182898b7ba34e34c8de1 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 15 Apr 2025 14:05:58 +0200 Subject: [PATCH 8/8] Update documentation --- doc/storage_details.md | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/doc/storage_details.md b/doc/storage_details.md index 9aabce7f5..e0d497df2 100644 --- a/doc/storage_details.md +++ b/doc/storage_details.md @@ -62,23 +62,29 @@ there will be the following branches per supported type The podio related metadata, stored in the `podio_metadata` `TTree` (or `RNTupleModel`) contains the following general information once per file - - The version of podio that has been used to write this file - The complete datamodel definitions for each datamodel that was encountered when writing the file -And the following information once per category -- The mapping of collection names to collection IDs -- The types of all the stored collections -- The schema version of all stored collections -- Which collections are stored as subset collections - -Here the `TTree` based and `RNTuple` based backends differ slightly in the way -these data are stored exactly. The `TTree` based backend stores the data in a -slightly more structured way, taking advantage of ROOTs capabilities to stream -out more complex object, e.g. the `podio::CollectionIDTable` is streamed as a -whole. The `RNTuple` based backend on the other hand, destructures the -information into separate fields that run in parallel. +And the following information once per category for each collection in that category +- The collection ID +- The collection name +- The collection type +- Whether the collection is a subset collection +- The collection schema version +- The collection storage type (which is different from the collection type and + describes the format in which the data is actually stored rather than how it + can be accessed in memory) + +From a technical point of view this information is stored as a +`std::vector`. + +```{note} +The exact details of how this information is stored in podio files has changed +several times. The readers provided by podio handle these changes transparently, +but other readers might have to adapt for these changes. **Notable changes +happened before v01-00, and v01-03.** +``` ## SIO