Skip to content

Commit

Permalink
Merge branch 'main' into feat-add-copy-ttree
Browse files Browse the repository at this point in the history
  • Loading branch information
zbilodea authored Aug 13, 2024
2 parents c487bb4 + 80e7803 commit c7825df
Show file tree
Hide file tree
Showing 10 changed files with 572 additions and 16 deletions.
9 changes: 9 additions & 0 deletions .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,15 @@
"contributions": [
"code"
]
},
{
"login": "Pepesob",
"name": "Piotr Sobczyński",
"avatar_url": "https://avatars.githubusercontent.com/u/113636251?v=4",
"profile": "https://github.com/Pepesob",
"contributions": [
"code"
]
}
],
"contributorsPerLine": 7,
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ Thanks especially to the gracious help of Uproot contributors (including the [or
<td align="center" valign="top" width="14.28%"><a href="https://github.com/bojohnson5"><img src="https://avatars.githubusercontent.com/u/20647190?v=4?s=100" width="100px;" alt="Bo Johnson"/><br /><sub><b>Bo Johnson</b></sub></a><br /><a href="https://github.com/scikit-hep/uproot5/commits?author=bojohnson5" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/milesgranger"><img src="https://avatars.githubusercontent.com/u/13764397?v=4?s=100" width="100px;" alt="Miles"/><br /><sub><b>Miles</b></sub></a><br /><a href="https://github.com/scikit-hep/uproot5/commits?author=milesgranger" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/djw9497"><img src="https://avatars.githubusercontent.com/u/51672890?v=4?s=100" width="100px;" alt="djw9497"/><br /><sub><b>djw9497</b></sub></a><br /><a href="https://github.com/scikit-hep/uproot5/commits?author=djw9497" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Pepesob"><img src="https://avatars.githubusercontent.com/u/113636251?v=4?s=100" width="100px;" alt="Piotr Sobczyński"/><br /><sub><b>Piotr Sobczyński</b></sub></a><br /><a href="https://github.com/scikit-hep/uproot5/commits?author=Pepesob" title="Code">💻</a></td>
</tr>
</tbody>
</table>
Expand Down
2 changes: 2 additions & 0 deletions src/uproot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@
from uproot.writing import to_writable
from uproot.writing import dask_write

from uproot.writing.interpret import as_TGraph

import uproot.models.TObject
import uproot.models.TString
import uproot.models.TArray
Expand Down
58 changes: 43 additions & 15 deletions src/uproot/models/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
_rntuple_cluster_summary_format = struct.Struct("<QQ")
_rntuple_checksum_format = struct.Struct("<Q")
_rntuple_envlink_size_format = struct.Struct("<Q")
_rntuple_page_num_elements_format = struct.Struct("<I")
_rntuple_page_num_elements_format = struct.Struct("<i")
_rntuple_column_group_id_format = struct.Struct("<I")
_rntuple_first_ele_index_format = struct.Struct("<I")

Expand Down Expand Up @@ -338,15 +338,26 @@ def field_form(self, this_id, seen):
structural_role == uproot.const.rntuple_role_leaf
and this_record.repetition == 0
):
# deal with std::atomic
# they have no associated column, but exactly one subfield containing the underlying data
tmp_id = self._alias_columns_dict.get(this_id, this_id)
if (
tmp_id not in self._column_records_dict
and len(self._related_ids[tmp_id]) == 1
):
this_id = self._related_ids[tmp_id][0]
seen.add(this_id)
# base case of recursion
# n.b. the split may happen in column
return self.col_form(this_id)
elif structural_role == uproot.const.rntuple_role_leaf:
# std::array it only has one child
if this_id in self._related_ids:
# std::array has only one subfield
child_id = self._related_ids[this_id][0]

inner = self.field_form(child_id, seen)
inner = self.field_form(child_id, seen)
else:
# std::bitset has no subfields, so we use it directly
inner = self.col_form(this_id)
keyname = f"RegularForm-{this_id}"
return ak.forms.RegularForm(inner, this_record.repetition, form_key=keyname)
elif structural_role == uproot.const.rntuple_role_vector:
Expand Down Expand Up @@ -387,7 +398,10 @@ def field_form(self, this_id, seen):
if this_id in self._related_ids:
newids = self._related_ids[this_id]
recordlist = [self.field_form(i, seen) for i in newids]
return ak.forms.UnionForm("i8", "i64", recordlist, form_key=keyname)
inner = ak.forms.UnionForm(
"i8", "i64", recordlist, form_key=keyname + "-union"
)
return ak.forms.IndexedOptionForm("i64", inner, form_key=keyname)
else:
# everything should recurse above this branch
raise AssertionError("this should be unreachable")
Expand Down Expand Up @@ -538,13 +552,15 @@ def arrays(
[c.num_entries for c in clusters[start_cluster_idx:stop_cluster_idx]]
)

form = self.to_akform().select_columns(filter_names)
form = self.to_akform().select_columns(
filter_names, prune_unions_and_records=False
)
# only read columns mentioned in the awkward form
target_cols = []
container_dict = {}
_recursive_find(form, target_cols)
for key in target_cols:
if "column" in key:
if "column" in key and "union" not in key:
key_nr = int(key.split("-")[1])
dtype_byte = self.column_records[key_nr].type
content = self.read_col_pages(
Expand All @@ -556,18 +572,30 @@ def arrays(
content = numpy.diff(content)
if dtype_byte == uproot.const.rntuple_col_type_to_num_dict["switch"]:
kindex, tags = _split_switch_bits(content)
container_dict[f"{key}-index"] = kindex
container_dict[f"{key}-tags"] = tags
# Find invalid variants and adjust buffers accordingly
invalid = numpy.flatnonzero(tags == -1)
if len(invalid) > 0:
kindex = numpy.delete(kindex, invalid)
tags = numpy.delete(tags, invalid)
invalid -= numpy.arange(len(invalid))
optional_index = numpy.insert(
numpy.arange(len(kindex), dtype=numpy.int64), invalid, -1
)
else:
optional_index = numpy.arange(len(kindex), dtype=numpy.int64)
container_dict[f"{key}-index"] = optional_index
container_dict[f"{key}-union-index"] = kindex
container_dict[f"{key}-union-tags"] = tags
else:
# don't distinguish data and offsets
container_dict[f"{key}-data"] = content
container_dict[f"{key}-offsets"] = content
cluster_offset = cluster_starts[start_cluster_idx]
entry_start -= cluster_offset
entry_stop -= cluster_offset
return ak.from_buffers(form, cluster_num_entries, container_dict)[
entry_start:entry_stop
]
return ak.from_buffers(
form, cluster_num_entries, container_dict, allow_noncanonical_form=True
)[entry_start:entry_stop]


# Supporting function and classes
Expand All @@ -592,9 +620,9 @@ def _recursive_find(form, res):
class PageDescription:
def read(self, chunk, cursor, context):
out = MetaData(type(self).__name__)
out.num_elements = cursor.field(
chunk, _rntuple_page_num_elements_format, context
)
num_elements = cursor.field(chunk, _rntuple_page_num_elements_format, context)
out.has_checksum = num_elements < 0
out.num_elements = abs(num_elements)
out.locator = LocatorReader().read(chunk, cursor, context)
return out

Expand Down
13 changes: 12 additions & 1 deletion src/uproot/models/TGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,18 @@ def _serialize(self, out, header, name, tobject_flags):
where = len(out)
for x in self._bases:
x._serialize(out, True, name, tobject_flags)
raise NotImplementedError("FIXME")
out.extend(
[
struct.pack(">i", self._members["fNpoints"]),
b"\x01",
self._members["fX"].astype(">f8").tobytes(),
b"\x01",
self._members["fY"].astype(">f8").tobytes(),
b"@\x00\x00\x1f\xff\xff\xff\xffTList\x00@\x00\x00\x11\x00\x05\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
struct.pack(">d", self._members["fMinimum"]),
struct.pack(">d", self._members["fMaximum"]),
]
)
if header:
num_bytes = sum(len(x) for x in out[where:])
version = 4
Expand Down
2 changes: 2 additions & 0 deletions src/uproot/source/futures.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ def __init__(self, max_workers: int | None = None):
import multiprocessing

self._max_workers = multiprocessing.cpu_count()
else:
self._max_workers = max_workers

self._work_queue = queue.Queue()
self._workers = []
Expand Down
Loading

0 comments on commit c7825df

Please sign in to comment.