Skip to content

Commit f357cdc

Browse files
authored
Merge pull request #1245 from prayagyadav/edm4hep-schema
feat: EDM4HEPSchema and Newstyle FCCSchema
2 parents 55cad6e + 748208e commit f357cdc

27 files changed

+8187
-365
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ classifiers = [
3838
]
3939
dependencies = [
4040
"awkward>=2.6.7",
41-
"uproot>=5.3.11",
41+
"uproot>=5.6.0",
4242
"dask[array]>=2024.3.0",
4343
"dask-awkward>=2025.2.0",
4444
"dask-histogram>=2025.2.0",

src/coffea/dataset_tools/preprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def get_steps(
8383
if save_form:
8484
form_str = uproot.dask(
8585
tree,
86-
ak_add_doc=True,
86+
ak_add_doc={"__doc__": "title", "typename": "typename"},
8787
filter_name=no_filter,
8888
filter_typename=no_filter,
8989
filter_branch=partial(_remove_not_interpretable, emit_warning=False),

src/coffea/nanoevents/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
FCC,
66
BaseSchema,
77
DelphesSchema,
8+
EDM4HEPSchema,
89
FCCSchema,
10+
FCCSchema_edm4hep1,
911
NanoAODSchema,
1012
PDUNESchema,
1113
PFNanoAODSchema,
@@ -26,4 +28,6 @@
2628
"ScoutingNanoAODSchema",
2729
"FCC",
2830
"FCCSchema",
31+
"FCCSchema_edm4hep1",
32+
"EDM4HEPSchema",
2933
]
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import importlib
2+
import os
3+
from functools import partial
4+
5+
import yaml
6+
7+
root_dir = importlib.resources.files("coffea.nanoevents.assets")
8+
9+
versions = [
10+
"00-10-01",
11+
"00-10-02",
12+
"00-10-03",
13+
"00-10-04",
14+
"00-10-05",
15+
"00-99-00",
16+
"00-99-01",
17+
]
18+
19+
20+
def _load_edm4hep_version(yamlfile):
21+
with open(yamlfile) as f:
22+
loaded = yaml.safe_load(f)
23+
return loaded
24+
25+
26+
edm4hep_ver = {
27+
version: partial(
28+
_load_edm4hep_version,
29+
yamlfile=os.path.join(root_dir, f"edm4hep_v{version}.yaml"),
30+
)
31+
for version in versions
32+
}

src/coffea/nanoevents/assets/edm4hep_v00-10-01.yaml

Lines changed: 578 additions & 0 deletions
Large diffs are not rendered by default.

src/coffea/nanoevents/assets/edm4hep_v00-10-02.yaml

Lines changed: 578 additions & 0 deletions
Large diffs are not rendered by default.

src/coffea/nanoevents/assets/edm4hep_v00-10-03.yaml

Lines changed: 593 additions & 0 deletions
Large diffs are not rendered by default.

src/coffea/nanoevents/assets/edm4hep_v00-10-04.yaml

Lines changed: 596 additions & 0 deletions
Large diffs are not rendered by default.

src/coffea/nanoevents/assets/edm4hep_v00-10-05.yaml

Lines changed: 593 additions & 0 deletions
Large diffs are not rendered by default.

src/coffea/nanoevents/assets/edm4hep_v00-99-00.yaml

Lines changed: 882 additions & 0 deletions
Large diffs are not rendered by default.

src/coffea/nanoevents/assets/edm4hep_v00-99-01.yaml

Lines changed: 877 additions & 0 deletions
Large diffs are not rendered by default.

src/coffea/nanoevents/factory.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,10 @@ def __call__(self, form):
117117
for ifield, field in enumerate(form.fields):
118118
iform = form.contents[ifield].to_dict()
119119
branch_forms[field] = _lazify_form(
120-
iform, f"{field},!load", docstr=iform["parameters"]["__doc__"]
120+
iform,
121+
f"{field},!load",
122+
docstr=iform["parameters"]["__doc__"],
123+
typestr=iform["parameters"]["typename"],
121124
)
122125
lform = {
123126
"class": "RecordArray",
@@ -129,6 +132,7 @@ def __call__(self, form):
129132
},
130133
"form_key": None,
131134
}
135+
132136
return (
133137
awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form),
134138
self,
@@ -340,13 +344,12 @@ def from_root(
340344
to_open = file
341345
if isinstance(file, uproot.reading.ReadOnlyDirectory):
342346
to_open = file[treepath]
343-
344347
opener = partial(
345348
uproot.dask,
346349
to_open,
347350
full_paths=True,
348351
open_files=False,
349-
ak_add_doc=True,
352+
ak_add_doc={"__doc__": "title", "typename": "typename"},
350353
filter_branch=_remove_not_interpretable,
351354
steps_per_file=steps_per_file,
352355
known_base_form=known_base_form,
@@ -372,6 +375,9 @@ def from_root(
372375
else:
373376
tree = uproot.open(file, **uproot_options)
374377

378+
# Get the typenames
379+
typenames = tree.typenames()
380+
375381
if entry_start is None or entry_start < 0:
376382
entry_start = 0
377383
if entry_stop is None or entry_stop > tree.num_entries:
@@ -396,6 +402,7 @@ def from_root(
396402
base_form = mapping._extract_base_form(
397403
tree, iteritems_options=iteritems_options
398404
)
405+
base_form["typenames"] = typenames
399406

400407
return cls._from_mapping(
401408
mapping,

src/coffea/nanoevents/mapping/uproot.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,26 @@ class CannotBeNanoEvents(Exception):
2828
pass
2929

3030

31-
def _lazify_form(form, prefix, docstr=None):
31+
def _lazify_form(form, prefix, docstr=None, typestr=None):
3232
if not isinstance(form, dict) or "class" not in form:
3333
raise RuntimeError("form should have been normalized by now")
3434

35-
parameters = _lazify_parameters(form.get("parameters", {}), docstr=docstr)
35+
parameters = _lazify_parameters(
36+
form.get("parameters", {}), docstr=docstr, typestr=typestr
37+
)
3638
if form["class"].startswith("ListOffset"):
3739
# awkward will add !offsets
3840
form["form_key"] = quote(prefix)
3941
form["content"] = _lazify_form(
40-
form["content"], prefix + ",!content", docstr=docstr
42+
form["content"], prefix + ",!content", docstr=docstr, typestr=typestr
4143
)
4244
elif form["class"] == "NumpyArray":
4345
form["form_key"] = quote(prefix)
4446
if parameters:
4547
form["parameters"] = parameters
4648
elif form["class"] == "RegularArray":
4749
form["content"] = _lazify_form(
48-
form["content"], prefix + ",!content", docstr=docstr
50+
form["content"], prefix + ",!content", docstr=docstr, typestr=typestr
4951
)
5052
if parameters:
5153
form["parameters"] = parameters
@@ -61,7 +63,10 @@ def _lazify_form(form, prefix, docstr=None):
6163
assert prefix.endswith("!load")
6264
form["form_key"] = quote(prefix + "allowmissing,!index")
6365
form["content"] = _lazify_form(
64-
form["content"], prefix + "allowmissing,!content", docstr=docstr
66+
form["content"],
67+
prefix + "allowmissing,!content",
68+
docstr=docstr,
69+
typestr=typestr,
6570
)
6671
if parameters:
6772
form["parameters"] = parameters
@@ -88,12 +93,16 @@ def _lazify_form(form, prefix, docstr=None):
8893
return form
8994

9095

91-
def _lazify_parameters(form_parameters, docstr=None):
96+
def _lazify_parameters(form_parameters, docstr=None, typestr=None):
9297
parameters = {}
9398
if "__array__" in form_parameters:
9499
parameters["__array__"] = form_parameters["__array__"]
95100
if docstr is not None:
96101
parameters["__doc__"] = docstr
102+
if typestr is not None:
103+
parameters["typename"] = typestr
104+
if "typename" in form_parameters: # eager mode
105+
parameters["typename"] = form_parameters["typename"]
97106
return parameters
98107

99108

@@ -159,13 +168,16 @@ def _extract_base_form(cls, tree, iteritems_options={}):
159168
form.to_json()
160169
) # normalizes form (expand NumpyArray classes)
161170
try:
162-
form = _lazify_form(form, f"{key},!load", docstr=branch.title)
171+
form = _lazify_form(
172+
form, f"{key},!load", docstr=branch.title, typestr=branch.typename
173+
)
163174
except CannotBeNanoEvents as ex:
164175
warnings.warn(
165176
f"Skipping {key} as it is not interpretable by NanoEvents\nDetails: {ex}"
166177
)
167178
continue
168179
branch_forms[key] = form
180+
169181
return {
170182
"class": "RecordArray",
171183
"contents": [item for item in branch_forms.values()],

0 commit comments

Comments
 (0)