Skip to content

Commit 26d51c9

Browse files
authored
Add python bindings for the RNTuple backend (#488)
* Add RNTupleReader to python bindings and file dispatch * Complete ROOTNTupleReader interface and expose it in dictionaries * Add python bindings for RNTuple writer
1 parent 7a85b3a commit 26d51c9

12 files changed

+145
-12
lines changed

include/podio/ROOTNTupleReader.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
1010

1111
#include <string>
12+
#include <string_view>
1213
#include <unordered_map>
1314
#include <vector>
1415

@@ -47,6 +48,9 @@ class ROOTNTupleReader {
4748
*/
4849
std::unique_ptr<podio::ROOTFrameData> readEntry(const std::string& name, const unsigned entry);
4950

51+
/// Get the names of all the available Frame categories in the current file(s)
52+
std::vector<std::string_view> getAvailableCategories() const;
53+
5054
/// Returns number of entries for the given name
5155
unsigned getEntries(const std::string& name);
5256

@@ -55,6 +59,16 @@ class ROOTNTupleReader {
5559
return m_fileVersion;
5660
}
5761

62+
/// Get the datamodel definition for the given name
63+
const std::string_view getDatamodelDefinition(const std::string& name) const {
64+
return m_datamodelHolder.getDatamodelDefinition(name);
65+
}
66+
67+
/// Get all names of the datamodels that ara available from this reader
68+
std::vector<std::string> getAvailableDatamodels() const {
69+
return m_datamodelHolder.getAvailableDatamodels();
70+
}
71+
5872
void closeFile();
5973

6074
private:

python/podio/reading.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,26 @@ def _is_frame_sio_file(filename):
2323
'or there is a version mismatch')
2424

2525

26-
def _is_frame_root_file(filename):
27-
"""Peek into the root file to determine whether this is a legacy file or not."""
26+
class RootFileFormat:
27+
"""Enum to specify the ROOT file format"""
28+
TTREE = 0 # Non-legacy TTree based file
29+
RNTUPLE = 1 # RNTuple based file
30+
LEGACY = 2 # Legacy TTree based file
31+
32+
33+
def _determine_root_format(filename):
34+
"""Peek into the root file to determine which flavor we have at hand."""
2835
file = TFile.Open(filename)
29-
# The ROOT Frame writer puts a podio_metadata TTree into the file
30-
return bool(file.Get('podio_metadata'))
36+
37+
metadata = file.Get("podio_metadata")
38+
if not metadata:
39+
return RootFileFormat.LEGACY
40+
41+
md_class = metadata.IsA().GetName()
42+
if "TTree" in md_class:
43+
return RootFileFormat.TTREE
44+
45+
return RootFileFormat.RNTUPLE
3146

3247

3348
def get_reader(filename):
@@ -50,8 +65,12 @@ def get_reader(filename):
5065
return sio_io.LegacyReader(filename)
5166

5267
if filename.endswith('.root'):
53-
if _is_frame_root_file(filename):
68+
root_flavor = _determine_root_format(filename)
69+
if root_flavor == RootFileFormat.TTREE:
5470
return root_io.Reader(filename)
55-
return root_io.LegacyReader(filename)
71+
if root_flavor == RootFileFormat.RNTUPLE:
72+
return root_io.RNTupleReader(filename)
73+
if root_flavor == RootFileFormat.LEGACY:
74+
return root_io.LegacyReader(filename)
5675

5776
raise ValueError('file must end on .root or .sio')

python/podio/root_io.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,24 @@ def __init__(self, filenames):
2727
super().__init__()
2828

2929

30+
class RNTupleReader(BaseReaderMixin):
31+
"""Reader class for reading podio RNTuple root files."""
32+
33+
def __init__(self, filenames):
34+
"""Create an RNTuple reader that reads from the passed file(s).
35+
36+
Args:
37+
filenames (str or list[str]): file(s) to open and read data from
38+
"""
39+
if isinstance(filenames, str):
40+
filenames = (filenames,)
41+
42+
self._reader = podio.ROOTNTupleReader()
43+
self._reader.openFiles(filenames)
44+
45+
super().__init__()
46+
47+
3048
class LegacyReader(BaseReaderMixin):
3149
"""Reader class for reading legacy podio root files.
3250
@@ -59,3 +77,14 @@ def __init__(self, filename):
5977
filename (str): The name of the output file
6078
"""
6179
self._writer = podio.ROOTFrameWriter(filename)
80+
81+
82+
class RNTupleWriter(BaseWriterMixin):
83+
"""Writer class for writing podio root files"""
84+
def __init__(self, filename):
85+
"""Create a writer for writing files
86+
87+
Args:
88+
filename (str): The name of the output file
89+
"""
90+
self._writer = podio.ROOTNTupleWriter(filename)

src/ROOTNTupleReader.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ void ROOTNTupleReader::openFiles(const std::vector<std::string>& filenames) {
8787

8888
auto edmView = m_metadata->GetView<std::vector<std::tuple<std::string, std::string>>>(root_utils::edmDefBranchName);
8989
auto edm = edmView(0);
90+
m_datamodelHolder = DatamodelDefinitionHolder(std::move(edm));
9091

9192
auto availableCategoriesField = m_metadata->GetView<std::vector<std::string>>(root_utils::availableCategories);
9293
m_availableCategories = availableCategoriesField(0);
@@ -107,6 +108,15 @@ unsigned ROOTNTupleReader::getEntries(const std::string& name) {
107108
return m_totalEntries[name];
108109
}
109110

111+
std::vector<std::string_view> ROOTNTupleReader::getAvailableCategories() const {
112+
std::vector<std::string_view> cats;
113+
cats.reserve(m_availableCategories.size());
114+
for (const auto& cat : m_availableCategories) {
115+
cats.emplace_back(cat);
116+
}
117+
return cats;
118+
}
119+
110120
std::unique_ptr<ROOTFrameData> ROOTNTupleReader::readNextEntry(const std::string& name) {
111121
return readEntry(name, m_entries[name]);
112122
}

src/ROOTNTupleWriter.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ std::unique_ptr<ROOT::Experimental::RNTupleModel>
164164
ROOTNTupleWriter::createModels(const std::vector<StoreCollection>& collections) {
165165
auto model = ROOT::Experimental::RNTupleModel::CreateBare();
166166
for (auto& [name, coll] : collections) {
167+
// For the first entry in each category we also record the datamodel
168+
// definition
169+
m_datamodelCollector.registerDatamodelDefinition(coll, name);
170+
167171
const auto collBuffers = coll->getBuffers();
168172

169173
if (collBuffers.vecPtr) {
@@ -252,7 +256,7 @@ void ROOTNTupleWriter::finish() {
252256
auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite();
253257
auto edmField =
254258
m_metadata->MakeField<std::vector<std::tuple<std::string, std::string>>>(root_utils::edmDefBranchName);
255-
*edmField = edmDefinitions;
259+
*edmField = std::move(edmDefinitions);
256260

257261
auto availableCategoriesField = m_metadata->MakeField<std::vector<std::string>>(root_utils::availableCategories);
258262
for (auto& [c, _] : m_categories) {

src/root_selection.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@
33
<class name="podio::ROOTFrameReader"/>
44
<class name="podio::ROOTLegacyReader"/>
55
<class name="podio::ROOTFrameWriter"/>
6+
<class name="podio::ROOTNTupleReader"/>
7+
<class name="podio::ROOTNTupleWriter"/>
68
</selection>
79
</lcgdict>

tests/dumpmodel/CMakeLists.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,43 @@ if (ENABLE_SIO)
5757
)
5858
endif()
5959

60+
set(rntuple_roundtrip_tests "")
61+
if (ENABLE_RNTUPLE)
62+
add_test(NAME datamodel_def_store_roundtrip_rntuple COMMAND
63+
${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh
64+
${PROJECT_BINARY_DIR}/tests/root_io/example_rntuple.root
65+
datamodel
66+
${PROJECT_SOURCE_DIR}/tests
67+
)
68+
PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_rntuple)
69+
70+
add_test(NAME datamodel_def_store_roundtrip_rntuple_extension COMMAND
71+
${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh
72+
${PROJECT_BINARY_DIR}/tests/root_io/example_rntuple.root
73+
extension_model
74+
${PROJECT_SOURCE_DIR}/tests/extension_model
75+
--upstream-edm=datamodel:${PROJECT_SOURCE_DIR}/tests/datalayout.yaml
76+
)
77+
PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_rntuple_extension)
78+
79+
set(rntuple_roundtrip_tests
80+
datamodel_def_store_roundtrip_rntuple
81+
datamodel_def_store_roundtrip_rntuple_extension
82+
)
83+
84+
set_tests_properties(
85+
${rntuple_roundtrip_tests}
86+
PROPERTIES
87+
DEPENDS write_rntuple
88+
)
89+
90+
endif()
91+
6092
set_tests_properties(
6193
datamodel_def_store_roundtrip_root
6294
datamodel_def_store_roundtrip_root_extension
6395
${sio_roundtrip_tests}
96+
${rntuple_roundtrip_tests}
6497
PROPERTIES
6598
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
6699
)

tests/root_io/CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ if(ENABLE_RNTUPLE)
1919
${root_dependent_tests}
2020
write_rntuple.cpp
2121
read_rntuple.cpp
22+
read_python_frame_rntuple.cpp
2223
)
2324
endif()
2425
set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO)
@@ -80,6 +81,13 @@ endforeach()
8081

8182
#--- Write via python and the ROOT backend and see if we can read it back in in
8283
#--- c++
83-
add_test(NAME write_python_frame_root COMMAND python3 ${PROJECT_SOURCE_DIR}/tests/write_frame.py example_frame_with_py.root)
84+
add_test(NAME write_python_frame_root COMMAND python3 ${PROJECT_SOURCE_DIR}/tests/write_frame.py example_frame_with_py.root root_io.Writer)
8485
PODIO_SET_TEST_ENV(write_python_frame_root)
8586
set_property(TEST read_python_frame_root PROPERTY DEPENDS write_python_frame_root)
87+
88+
if (ENABLE_RNTUPLE)
89+
add_test(NAME write_python_frame_rntuple COMMAND python3 ${PROJECT_SOURCE_DIR}/tests/write_frame.py example_frame_with_py_rntuple.root root_io.RNTupleWriter)
90+
PODIO_SET_TEST_ENV(write_python_frame_rntuple)
91+
92+
set_property(TEST read_python_frame_rntuple PROPERTY DEPENDS write_python_frame_rntuple)
93+
endif()
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#include "read_python_frame.h"
2+
3+
#include "podio/ROOTNTupleReader.h"
4+
5+
int main() {
6+
return read_frame<podio::ROOTNTupleReader>("example_frame_with_py_rntuple.root");
7+
}

tests/sio_io/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,6 @@ set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio wr
4040

4141
#--- Write via python and the SIO backend and see if we can read it back in in
4242
#--- c++
43-
add_test(NAME write_python_frame_sio COMMAND python3 ${PROJECT_SOURCE_DIR}/tests/write_frame.py example_frame_with_py.sio)
43+
add_test(NAME write_python_frame_sio COMMAND python3 ${PROJECT_SOURCE_DIR}/tests/write_frame.py example_frame_with_py.sio sio_io.Writer)
4444
PODIO_SET_TEST_ENV(write_python_frame_sio)
4545
set_property(TEST read_python_frame_sio PROPERTY DEPENDS write_python_frame_sio)

tests/write_frame.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,14 @@ def create_frame():
5454
return frame
5555

5656

57-
def write_file(io_backend, filename):
57+
def write_file(writer_type, filename):
5858
"""Write a file using the given Writer type and put one Frame into it under
5959
the events category
6060
"""
61+
io_backend, writer_name = writer_type.split(".")
6162
io_module = importlib.import_module(f"podio.{io_backend}")
6263

63-
writer = io_module.Writer(filename)
64+
writer = getattr(io_module, writer_name)(filename)
6465
event = create_frame()
6566
writer.write_frame(event, "events")
6667

@@ -70,9 +71,10 @@ def write_file(io_backend, filename):
7071

7172
parser = argparse.ArgumentParser()
7273
parser.add_argument("outputfile", help="Output file name")
74+
parser.add_argument("writer", help="The writer type to use")
7375

7476
args = parser.parse_args()
7577

7678
io_format = args.outputfile.split(".")[-1]
7779

78-
write_file(f"{io_format}_io", args.outputfile)
80+
write_file(args.writer, args.outputfile)

tools/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,9 @@ if(BUILD_TESTING)
3737
CREATE_DUMP_TEST(podio-dump-detailed-sio-legacy "write_sio" --detailed --entries 9 ${PROJECT_BINARY_DIR}/tests/sio_io/example.sio)
3838
endif()
3939

40+
if (ENABLE_RNTUPLE)
41+
CREATE_DUMP_TEST(podio-dump-rntuple "write_rntuple" ${PROJECT_BINARY_DIR}/tests/root_io/example_rntuple.root)
42+
CREATE_DUMP_TEST(podio-dump-rntuple-detailed "write_rntuple" --detailed --category events --entries 1:3 ${PROJECT_BINARY_DIR}/tests/root_io/example_rntuple.root)
43+
endif()
44+
4045
endif()

0 commit comments

Comments
 (0)