diff --git a/src/uproot/interpretation/identify.py b/src/uproot/interpretation/identify.py index 26daafbd9..8d6884d8d 100644 --- a/src/uproot/interpretation/identify.py +++ b/src/uproot/interpretation/identify.py @@ -124,7 +124,6 @@ def _from_leaves_one(leaf, title): for x in re.findall(_item_any_pattern, title) ): is_jagged = True - return dims, is_jagged diff --git a/src/uproot/model.py b/src/uproot/model.py index 36dde4270..b9159c1bd 100644 --- a/src/uproot/model.py +++ b/src/uproot/model.py @@ -787,7 +787,6 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None): self._is_memberwise = False old_breadcrumbs = context.get("breadcrumbs", ()) context["breadcrumbs"] = (*old_breadcrumbs, self) - self.hook_before_read(chunk=chunk, cursor=cursor, context=context, file=file) forth_obj = uproot._awkwardforth.get_forth_obj(context) if forth_obj is not None: @@ -798,6 +797,7 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None): if context.get("reading", True): temp_index = cursor._index self.read_numbytes_version(chunk, cursor, context) + length = cursor._index - temp_index if length != 0 and forth_obj is not None: forth_stash.pre_code.append(f"{length} stream skip\n") @@ -843,6 +843,7 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None): cursor.skip(4) if context.get("reading", True): + self.hook_before_read_members( chunk=chunk, cursor=cursor, context=context, file=file ) @@ -868,7 +869,7 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None): out = self.postprocess(chunk, cursor, context, file) context["breadcrumbs"] = old_breadcrumbs - + # print(out) return out def read_numbytes_version(self, chunk, cursor, context): diff --git a/src/uproot/models/TBranch.py b/src/uproot/models/TBranch.py index 9f7466ebf..41243ad96 100644 --- a/src/uproot/models/TBranch.py +++ b/src/uproot/models/TBranch.py @@ -14,6 +14,7 @@ import uproot import uproot.models.TH +import uproot.models.TObjArray _tbranch10_format1 = struct.Struct(">iiiiqiIiqqq") _tbranch10_dtype1 = numpy.dtype(">i4") diff --git a/src/uproot/models/TNamed.py b/src/uproot/models/TNamed.py index 96b303ef9..6c7ba557f 100644 --- a/src/uproot/models/TNamed.py +++ b/src/uproot/models/TNamed.py @@ -34,7 +34,7 @@ def read_members(self, chunk, cursor, context, file): concrete=self.concrete, ) ) - + # print("tnamed", context, chunk.raw_data.tobytes()) self._members["fName"] = cursor.string(chunk, context) self._members["fTitle"] = cursor.string(chunk, context) diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index fc37e4a45..62334a080 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -30,6 +30,9 @@ import uproot.compression import uproot.const +import uproot.deserialization +import uproot.models.TBranch +import uproot.models.TLeaf import uproot.models.TList import uproot.reading import uproot.serialization @@ -100,7 +103,6 @@ def write(self, sink): + repr(self) ) tmp = self.serialize() - # print(f"writing {self._location}:{self._location + len(tmp)} ({len(tmp)}) {type(self).__name__} {self.name if hasattr(self, 'name') else ''} {self.title if hasattr(self, 'title') else ''}") sink.write(self._location, tmp) self._file_dirty = False @@ -580,6 +582,460 @@ def deserialize(cls, raw_bytes, location, num_bytes, num_slices, in_path): return out +class OldBranches(CascadeLeaf): + """ + A :doc:`uproot.writing._cascade.CascadeLeaf` for copying an old TBranch to a new TTree. ? + """ + + def __init__(self, branches): + self._branches = branches + self._branch_data = {} + + @property + def allocation(self): + if self._allocation is None: + self._allocation = self.num_bytes + return self._allocation + + @allocation.setter + def allocation(self, value): + if self._allocation != value: + self._allocation = value + + @property + def num_bytes(self): + total = 0 + for _, stop in self._slices: + if stop - 1 >= uproot.const.kStartBigFile: + total += _free_format_big.size + else: + total += _free_format_small.size + + if self._end is None: + if total + _free_format_small.size >= uproot.const.kStartBigFile: + total += _free_format_big.size + else: + total += _free_format_small.size + elif self._end >= uproot.const.kStartBigFile: + total += _free_format_big.size + else: + total += _free_format_small.size + + return total + + def serialize(self, out, branch): + self.read_members(branch) + any_tbranch_index = len(out) + out.append(None) + if "fClonesName" in branch.all_members.keys(): + out.append(b"TBranchElement\x00") + tbranchelement_index = len(out) + out.append(None) + else: + out.append(b"TBranch\x00") + + tbranch_index = len(out) + out.append(None) + + datum = self._branch_data[branch.member("fName")] + key_num_bytes = uproot.reading._key_format_big.size + 6 + name_asbytes = branch.tree.name.encode(errors="surrogateescape") + title_asbytes = branch.tree.title.encode(errors="surrogateescape") + key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len(name_asbytes) + key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len(title_asbytes) + + tbranch_tobject = uproot.models.TObject.Model_TObject.empty() + tbranch_tnamed = uproot.models.TNamed.Model_TNamed.empty() + tbranch_tnamed._bases.append(tbranch_tobject) + tbranch_tnamed._members["fTitle"] = datum["fTitle"] + tbranch_tnamed._serialize(out, True, datum["fName"], numpy.uint32(0x00400000)) + # TAttFill v2, fFillColor: 0, fFillStyle: 1001 + tattfill = uproot.models.TAtt.Model_TAttFill_v2.empty() + tattfill._members["fFillColor"] = datum["fFillColor"] + tattfill._members["fFillStyle"] = datum["fFillStyle"] + + out.append(tattfill.serialize(out)) + datum["metadata_start"] = (6 + 6 + 8 + 6) + sum( + len(x) for x in out if x is not None + ) + + # Lie about the compression level so that ROOT checks and does the right thing. + # https://github.com/root-project/root/blob/87a998d48803bc207288d90038e60ff148827664/tree/tree/src/TBasket.cxx#L560-L578 + # Without this, when small buffers are left uncompressed, ROOT complains about them not being compressed. + # (I don't know where the "no, really, this is uncompressed" bit is.) + + out.append( + uproot.models.TBranch._tbranch13_format1.pack( + datum["fCompress"], + datum["fBasketSize"], + datum["fEntryOffsetLen"], + datum["fWriteBasket"], + datum["fEntryNumber"], + ) + ) + # TODO Check this? + # fIOFeatures (TIOFeatures) + out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") + # print(self._branch_data["fIOFeatures"].serialize()) + # 0 to bytestring?? + + out.append( + uproot.models.TBranch._tbranch13_format2.pack( + datum["fOffset"], + datum["fMaxBaskets"], # fMaxBaskets + datum["fSplitLevel"], + datum["fEntries"], # fEntries + datum["fFirstEntry"], + datum["fTotBytes"], + datum["fZipBytes"], + ) + ) + + # empty TObjArray of TBranches + + # TODO Test this! Later make sure TBranchElements are handled + + # if len(datum["fBranches"]) == 0: + # empty TObjArray of TBranches + + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) + + # else: + # out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") + # out.append( + # uproot.models.TObjArray._tobjarray_format1.pack( + # len(self._branch_data["fBranches"]), # TObjArray fSize + # 0, # TObjArray fLowerBound + # ) + # ) + # for branch in self._branch_data["fBranches"]: + # out.append( + # uproot.models.TBranch._tbranch13_format1.pack( + # datum["fCompress"], + # datum["fBasketSize"], + # datum["fEntryOffsetLen"], + # datum["fWriteBasket"], + # datum["fEntryNumber"], + # ) + # ) + + # # TODO Check this? + # # fIOFeatures (TIOFeatures) + # out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") + # # print(self._branch_data["fIOFeatures"].serialize()) + # # 0 to bytestring?? + + # out.append( + # uproot.models.TBranch._tbranch13_format2.pack( + # datum["fOffset"], + # datum["fMaxBaskets"], # fMaxBaskets + # datum["fSplitLevel"], + # datum["fEntries"], # fEntries + # datum["fFirstEntry"], + # datum["fTotBytes"], + # datum["fZipBytes"], + # ) + # ) + + subtobjarray_of_leaves_index = len(out) + out.append(None) + + # TObjArray header with fName: "", fSize: 1, fLowerBound: 0 + out.append( + b"\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00" + ) + + absolute_location = key_num_bytes + sum(len(x) for x in out if x is not None) + absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) + datum["tleaf_reference_number"] = absolute_location + 2 + subany_tleaf_index = len(out) + out.append(None) + for leaf in datum["fLeaves"]: + # Make and serialize each leaf?? + # if isinstance(leaf, model....) + if isinstance(leaf, uproot.models.TLeaf.Model_TLeafO_v1): + letter_upper = "O" + special_struct = uproot.models.TLeaf._tleafO1_format1 + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafB_v1): + letter_upper = "B" + special_struct = uproot.models.TLeaf._tleafb1_format1 + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafS_v1): + letter_upper = "S" + special_struct = uproot.models.TLeaf._tleafs1_format1 + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafI_v1): + letter_upper = "I" + special_struct = uproot.models.TLeaf._tleafi1_format1 + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafL_v1): + letter_upper = "L" + special_struct = uproot.models.TLeaf._tleafl1_format0 + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafF_v1): + letter_upper = "F" + special_struct = uproot.models.TLeaf._tleaff1_format1 + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafD_v1): + letter_upper = "D" + special_struct = uproot.models.TLeaf._tleafd1_format1 + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafC_v1): + letter_upper = "C" + special_struct = uproot.models.TLeaf._tleafc1_format1 + # else: # This will never be reached? What to do about G + # letter_upper = "G" + # special_struct = uproot.models.TLeaf._tleafl1_format0 + if isinstance( + leaf, uproot.models.TLeaf.Model_TLeafElement_v1 + ): # TLeafElement... + special_struct = uproot.models.TLeaf._tleafelement1_format1 + out.append((b"TLeafElement") + b"\x00") + else: + out.append(("TLeaf" + letter_upper).encode() + b"\x00") + # single TLeaf + leaf_name = datum["fName"].encode(errors="surrogateescape") + leaf_title = ( + datum["fLeaves"][0].member("fTitle").encode(errors="surrogateescape") + ) + leaf_name_length = (1 if len(leaf_name) < 255 else 5) + len(leaf_name) + leaf_title_length = (1 if len(leaf_title) < 255 else 5) + len(leaf_title) + + leaf_header = numpy.array( + [ + 64, + 0, + 0, + 76, + 0, + 1, + 64, + 0, + 0, + 54, + 0, + 2, + 64, + 0, + 0, + 30, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + ], + numpy.uint8, + ) + tmp = leaf_header[0:4].view(">u4") + tmp[:] = ( + numpy.uint32( + 42 + leaf_name_length + leaf_title_length + special_struct.size + ) + | uproot.const.kByteCountMask + ) + tmp = leaf_header[6:10].view(">u4") + tmp[:] = ( + numpy.uint32(36 + leaf_name_length + leaf_title_length) + | uproot.const.kByteCountMask + ) + tmp = leaf_header[12:16].view(">u4") + tmp[:] = ( + numpy.uint32(12 + leaf_name_length + leaf_title_length) + | uproot.const.kByteCountMask + ) + + out.append(uproot._util.tobytes(leaf_header)) + + if len(leaf_name) < 255: + out.append( + struct.pack(">B%ds" % len(leaf_name), len(leaf_name), leaf_name) + ) + else: + out.append( + struct.pack( + ">BI%ds" % len(leaf_name), 255, len(leaf_name), leaf_name + ) + ) + if len(leaf_title) < 255: + out.append( + struct.pack(">B%ds" % len(leaf_title), len(leaf_title), leaf_title) + ) + else: + out.append( + struct.pack( + ">BI%ds" % len(leaf_title), 255, len(leaf_title), leaf_title + ) + ) + + # generic TLeaf members + out.append( + uproot.models.TLeaf._tleaf2_format0.pack( + leaf.member("fLen"), + leaf.member("fLenType"), + leaf.member("fOffset"), # fOffset + leaf.member("fIsRange"), # fIsRange + leaf.member("fIsUnsigned"), + ) + ) + if leaf.member("fLeafCount") is not None: + out.append( + uproot.deserialization._read_object_any_format1.pack( + self._branch_data[ + branch.member("fLeaves")[0] + .member("fLeafCount") + .member("fName") + ]["tleaf_reference_number"] + ) + ) + else: + out.append(b"\x00\x00\x00\x00") + + if not isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): + # specialized TLeaf* members (fMinimum, fMaximum) + datum["tleaf_special_struct"] = special_struct + + out.append( + special_struct.pack( + int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) + ) + ) + if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): + out.append( + uproot.models.TLeaf._tleafelement1_format1.pack( + leaf.member("fID"), # fIsRange + leaf.member("fType"), + ) + ) + out[subany_tleaf_index] = ( + uproot.serialization._serialize_object_any_format1.pack( + numpy.uint32(sum(len(x) for x in out[subany_tleaf_index + 1 :]) + 4) + | uproot.const.kByteCountMask, + uproot.const.kNewClassTag, + ) + ) + + out[subtobjarray_of_leaves_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[subtobjarray_of_leaves_index + 1 :]), + 3, # TObjArray + ) + + # empty TObjArray of fBaskets (embedded) + if len(datum["fBaskets"]) >= 1: + msg = f"NotImplementedError, cannot yet write TObjArray of fBaskets. Branch {datum['fName']} has {len(datum['fBaskets'])} fBaskets." + raise NotImplementedError(msg) + + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) + + assert sum(1 if x is None else 0 for x in out) == 4 + datum["basket_metadata_start"] = (6 + 6 + 8 + 6) + sum( + len(x) for x in out if x is not None + ) + + # speedbump and fBasketBytes + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketBytes"])) + # speedbump and fBasketEntry + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketEntry"])) + # speedbump and fBasketSeek + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketSeek"])) + # out.append(datum["fFileName"].serialize()) # name = None? + out.append(b"\x00") + + if "fClonesName" in branch.all_members.keys(): + out[tbranchelement_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tbranchelement_index + 1 :] if x is not None), + 10, # TBranchElement (?) + ) + # out[tbranch_index] = uproot.serialization.numbytes_version( + # sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch + # ) + else: + out[tbranch_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch + ) + out[any_tbranch_index] = ( + uproot.serialization._serialize_object_any_format1.pack( + numpy.uint32(sum(len(x) for x in out[any_tbranch_index + 1 :]) + 4) + | uproot.const.kByteCountMask, + uproot.const.kNewClassTag, + ) + ) + if ( + "fClonesName" in branch.all_members.keys() + ): # TBranchElement - find a more robust way to check....or make sure this is only is True if branch is a TBranchElement + out.append( + branch.member("fClassName").serialize() + ) # These three are TStrings + out.append(branch.member("fParentName").serialize()) + out.append(branch.member("fClonesName").serialize()) + out.append( + uproot.models.TBranch._tbranchelement10_format1.pack( + branch.member("fCheckSum"), + branch.member("fClassVersion"), + branch.member("fID"), + branch.member("fType"), + branch.member("fStreamerType"), + branch.member("fMaximum"), + ) + ) + out.append( + uproot.serialization.serialize_object_any(branch.member("fBranchCount")) + ) + out.append( + uproot.serialization.serialize_object_any( + branch.member("fBranchCount2") + ) + ) + + return out, datum["tleaf_reference_number"] + + def read_members(self, branch): + name = branch.member("fName") + self._branch_data[name] = {} + self._branch_data[name]["fTitle"] = branch.member("fTitle") + self._branch_data[name]["fName"] = branch.member("fName") + self._branch_data[name]["fFillColor"] = branch.member("fFillColor") + self._branch_data[name]["fFillStyle"] = branch.member("fFillStyle") + try: + self._branch_data[name]["fIOFeatures"] = branch.member("fIOFeatures") + except KeyError: + self._branch_data[name]["fIOFeatures"] = 0 # ? branch_member("fIOFeatures") + self._branch_data[name]["fCompress"] = branch.member("fCompress") + self._branch_data[name]["fBasketSize"] = branch.member("fBasketSize") + self._branch_data[name]["fEntryOffsetLen"] = branch.member("fEntryOffsetLen") + self._branch_data[name]["fWriteBasket"] = branch.member("fWriteBasket") + self._branch_data[name]["fEntryNumber"] = branch.member("fEntryNumber") + self._branch_data[name]["fOffset"] = branch.member("fOffset") + self._branch_data[name]["fMaxBaskets"] = branch.member("fMaxBaskets") + self._branch_data[name]["fSplitLevel"] = branch.member("fSplitLevel") + self._branch_data[name]["fEntries"] = branch.member("fEntries") + try: + self._branch_data[name]["fFirstEntry"] = branch.member("fFirstEntry") + except KeyError: + self._branch_data[name]["fFirstEntry"] = 0 + self._branch_data[name]["fTotBytes"] = branch.member("fTotBytes") + self._branch_data[name]["fZipBytes"] = branch.member("fZipBytes") + self._branch_data[name]["fLeaves"] = branch.member("fLeaves") + self._branch_data[name]["fBaskets"] = branch.member("fBaskets") + self._branch_data[name]["fBranches"] = branch.member("fBranches") + self._branch_data[name]["fBasketBytes"] = branch.member("fBasketBytes") + self._branch_data[name]["fBasketEntry"] = branch.member("fBasketEntry") + self._branch_data[name]["fBasketSeek"] = branch.member("fBasketSeek") + self._branch_data[name]["fFileName"] = branch.member("fFileName") + + def serialize_leaf_elements(self, out, special_struct): + # specialized TLeaf* members (fMinimum, fMaximum) + out.append(special_struct.pack(0, 0)) + + class FreeSegments(CascadeNode): """ A :doc:`uproot.writing._cascade.CascadeNode` for writing a ROOT FreeSegments record. @@ -1727,6 +2183,51 @@ def add_tree( tree.write_anew(sink) return tree + def add_branches( + self, + sink, + name, + title, + branch_types, + counter_name, + field_name, + initial_basket_capacity, + resize_factor, + existing_ttree, + existing_branches, + new_branches, + directory, + ): + import uproot.writing._cascadetree + + tree = uproot.writing._cascadetree.Tree( + self, + name, + title, + branch_types, + self._freesegments, + counter_name, + field_name, + initial_basket_capacity, + resize_factor, + existing_branches, + existing_ttree, + ) + updated_streamers = tree.add_branches( + sink, directory, new_branches + ) # need new_branches for extend... + # start = key.seek_location + # stop = start + key.num_bytes + key.compressed_bytes + # directory._cascading.freesegments.release(start, stop) + + # directory._cascading._data.remove_key(key) + # self._cascading.header.modified_on = datetime.datetime.now() + + # directory._cascading.write(self._file.sink) + # directory._file.sink.set_file_length(self._cascading.freesegments.fileheader.end) + # directory._file.sink.flush() + return tree, updated_streamers + def add_rntuple(self, sink, name, title, akform): import uproot.writing._cascadentuple @@ -2249,7 +2750,6 @@ def create_empty( filename = "dynamic.root" if filename is None else os.path.split(filename)[-1] if len(filename) >= 256: raise ValueError("ROOT file names must be less than 256 bytes") - fileheader = FileHeader( None, None, @@ -2261,7 +2761,6 @@ def create_empty( None, uuid_function(), ) - freesegments_key = Key( None, None, @@ -2319,6 +2818,7 @@ def create_empty( fileheader.begin, None, ) + directory_data = DirectoryData(None, initial_directory_bytes, []) rootdirectory = RootDirectory( directory_key, @@ -2350,7 +2850,6 @@ def create_empty( ) fileheader.info_location = streamers_key.location fileheader.info_num_bytes = streamers_key.allocation + streamers.allocation - rootdirectory.write(sink) streamers.write(sink) diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 9ecd2e87f..d26a7308c 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -85,6 +85,8 @@ def __init__( field_name, initial_basket_capacity, resize_factor, + existing_branches=None, + existing_ttree=None, ): self._directory = directory self._name = name @@ -94,6 +96,8 @@ def __init__( self._field_name = field_name self._basket_capacity = initial_basket_capacity self._resize_factor = resize_factor + self._existing_branches = existing_branches + self._existing_ttree = existing_ttree if isinstance(branch_types, dict): branch_types_items = branch_types.items() @@ -831,7 +835,6 @@ def extend(self, file, sink, data): self._num_baskets += 1 self._metadata["fTotBytes"] += uncompressed_bytes self._metadata["fZipBytes"] += compressed_bytes - self.write_updates(sink) def write_anew(self, sink): @@ -1177,7 +1180,6 @@ def write_anew(self, sink): uproot.const.kNewClassTag, ) ) - out[tobjarray_of_branches_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tobjarray_of_branches_index + 1 :]), 3 # TObjArray ) @@ -1583,6 +1585,528 @@ def write_string_basket(self, sink, branch_name, compression, array, offsets): return fKeylen + fObjlen, fNbytes, location + def add_branches(self, sink, directory, new_branches): + # Get readonlykey for old tree + if ";" in self._name: + at = self._name.rindex(";") + item, cycle = self._name[:at], self._name[at + 1 :] + key = self._directory.data.get_key(item, cycle) + else: + key = self._directory.data.get_key(self._name, None) + + streamers = self._write_with_new_branches(sink, key) + self.extend(directory.file, sink, new_branches) + return streamers + + def _write_with_new_branches(self, sink, old_key): + models_for_streamers = [] + key_num_bytes = uproot.reading._key_format_big.size + 6 + name_asbytes = self._name.encode(errors="surrogateescape") + title_asbytes = self._title.encode(errors="surrogateescape") + key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len(name_asbytes) + key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len(title_asbytes) + + out = [None] + ttree_header_index = 0 + + tobject = uproot.models.TObject.Model_TObject.empty() + tnamed = uproot.models.TNamed.Model_TNamed.empty() + tnamed._bases.append(tobject) + tnamed._members["fTitle"] = self._title + tnamed._serialize(out, True, self._name, uproot.const.kMustCleanup) + + # TAttLine v2, fLineColor: 602 fLineStyle: 1 fLineWidth: 1 + # TAttFill v2, fFillColor: 0, fFillStyle: 1001 + # TAttMarker v2, fMarkerColor: 1, fMarkerStyle: 1, fMarkerSize: 1.0 + out.append( + b"@\x00\x00\x08\x00\x02\x02Z\x00\x01\x00\x01" + b"@\x00\x00\x06\x00\x02\x00\x00\x03\xe9" + b"@\x00\x00\n\x00\x02\x00\x01\x00\x01?\x80\x00\x00" + ) + + metadata_out_index = len(out) + out.append( + uproot.models.TTree._ttree20_format1.pack( + self._num_entries, + self._metadata["fTotBytes"], + self._metadata["fZipBytes"], + self._metadata["fSavedBytes"], + self._metadata["fFlushedBytes"], + self._metadata["fWeight"], + self._metadata["fTimerInterval"], + self._metadata["fScanField"], + self._metadata["fUpdate"], + self._metadata["fDefaultEntryOffsetLen"], + self._metadata["fNClusterRange"], + self._metadata["fMaxEntries"], + self._metadata["fMaxEntryLoop"], + self._metadata["fMaxVirtualSize"], + self._metadata["fAutoSave"], + self._metadata["fAutoFlush"], + self._metadata["fEstimate"], + ) + ) + # speedbump (0), fClusterRangeEnd (empty array), + # speedbump (0), fClusterSize (empty array) + # fIOFeatures (TIOFeatures) + out.append(b"\x00\x00@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") + + tleaf_reference_numbers = [] + + tobjarray_of_branches_index = len(out) + out.append(None) + + num_branches = sum( + 0 if datum["kind"] == "record" else 1 for datum in self._branch_data + ) + + # Include original branches in num_branches + num_branches += len(self._existing_branches) + + # TObjArray header with fName: "" + out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") + out.append( + uproot.models.TObjArray._tobjarray_format1.pack( + num_branches, # TObjArray fSize + 0, # TObjArray fLowerBound + ) + ) + # Write old branches + if self._existing_branches: + for branch in self._existing_branches: + cursor = branch.cursor.copy() + + # cursor before TObjArray of TBranches + first_indx = cursor.index + cursor.skip_after(branch) + second_indx = cursor.index + + f_indx = branch.member("fLeaves").cursor.index + + branch_start = ( + len( + uproot.writing.identify.to_TString(branch.classname).serialize() + ) + + 2 + ) + + if len(branch.branches) == 0: + # No subbranches + # Write remainder of branch + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + first_indx - branch_start : f_indx + 25 + ] + ) + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) + + absolute_location += 8 + 6 * ( + sum(1 if x is None else 0 for x in out) - 1 + ) + + tleaf_reference_numbers.append(absolute_location) + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + f_indx + 25 : second_indx + ] + ) + else: + # With subbranches + subbranch = branch.branches[0] + cursor = subbranch.cursor.copy() + # cursor before TObjArray of TBranches + first_indx1 = cursor.index + cursor.skip_after(subbranch) + second_indx1 = cursor.index + + f_indx1 = subbranch.member("fLeaves").cursor.index + + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + first_indx - branch_start : first_indx1 - 8 + ] + ) + for ( + subbranch + ) in branch.branches: # how to get it to not copy all subbranches? + cursor = subbranch.cursor.copy() + # cursor before TObjArray of TBranches + first_indx1 = cursor.index + cursor.skip_after(subbranch) + second_indx1 = cursor.index + + f_indx1 = subbranch.member("fLeaves").cursor.index + + branch_start = ( + len( + uproot.writing.identify.to_TString( + subbranch.classname + ).serialize() + ) + + 2 + ) + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + first_indx1 - 8 : f_indx1 + 25 + ] + ) + # Write TLeaf Reference + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) + absolute_location += 8 + 6 * ( + sum(1 if x is None else 0 for x in out) - 1 + ) + + tleaf_reference_numbers.append(absolute_location) + + # Write remainder of branch + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + f_indx1 + 25 : second_indx1 + ] + ) + # Write TLeaf Reference + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) + absolute_location += 8 + 6 * ( + sum(1 if x is None else 0 for x in out) - 1 + ) + + tleaf_reference_numbers.append(absolute_location) + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + second_indx1:second_indx + ] + ) + for datum in self._branch_data: + if datum["kind"] == "record": + continue + + any_tbranch_index = len(out) + out.append(None) + out.append(b"TBranch\x00") + + tbranch_index = len(out) + out.append(None) + + tbranch_tobject = uproot.models.TObject.Model_TObject.empty() + tbranch_tnamed = uproot.models.TNamed.Model_TNamed.empty() + tbranch_tnamed._bases.append(tbranch_tobject) + tbranch_tnamed._members["fTitle"] = datum["fTitle"] + tbranch_tnamed._serialize( + out, True, datum["fName"], numpy.uint32(0x00400000) + ) + + # TAttFill v2, fFillColor: 0, fFillStyle: 1001 + out.append(b"@\x00\x00\x06\x00\x02\x00\x00\x03\xe9") + + assert sum(1 if x is None else 0 for x in out) == 4 + datum["metadata_start"] = (6 + 6 + 8 + 6) + sum( + len(x) for x in out if x is not None + ) + + # Lie about the compression level so that ROOT checks and does the right thing. + # https://github.com/root-project/root/blob/87a998d48803bc207288d90038e60ff148827664/tree/tree/src/TBasket.cxx#L560-L578 + # Without this, when small buffers are left uncompressed, ROOT complains about them not being compressed. + # (I don't know where the "no, really, this is uncompressed" bit is.) + fCompress = 0 + + out.append( + uproot.models.TBranch._tbranch13_format1.pack( + fCompress, + datum["fBasketSize"], + datum["fEntryOffsetLen"], + self._num_baskets, # fWriteBasket + self._num_entries, # fEntryNumber + ) + ) + # fIOFeatures (TIOFeatures) + out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") + + out.append( + uproot.models.TBranch._tbranch13_format2.pack( + datum["fOffset"], + self._basket_capacity, # fMaxBaskets + datum["fSplitLevel"], + self._num_entries, # fEntries + datum["fFirstEntry"], + datum["fTotBytes"], + datum["fZipBytes"], + ) + ) + if uproot.models.TBranch.Model_TBranch_v13 not in models_for_streamers: + models_for_streamers.append(uproot.models.TBranch.Model_TBranch_v13) + # empty TObjArray of TBranches + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) + + subtobjarray_of_leaves_index = len(out) + out.append(None) + + # TObjArray header with fName: "", fSize: 1, fLowerBound: 0 + out.append( + b"\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00" + ) + + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) + absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) + datum["tleaf_reference_number"] = absolute_location + 2 + tleaf_reference_numbers.append(datum["tleaf_reference_number"]) + subany_tleaf_index = len(out) + out.append(None) + + letter = _dtype_to_char[datum["dtype"]] + letter_upper = letter.upper() + out.append(("TLeaf" + letter_upper).encode() + b"\x00") + if letter_upper == "O": + special_struct = uproot.models.TLeaf._tleafO1_format1 + model = uproot.models.TLeaf.Model_TLeafO_v1 + elif letter_upper == "B": + special_struct = uproot.models.TLeaf._tleafb1_format1 + model = uproot.models.TLeaf.Model_TLeafB_v1 + elif letter_upper == "S": + special_struct = uproot.models.TLeaf._tleafs1_format1 + model = uproot.models.TLeaf.Model_TLeafS_v1 + elif letter_upper == "I": + special_struct = uproot.models.TLeaf._tleafi1_format1 + model = uproot.models.TLeaf.Model_TLeafI_v1 + elif letter_upper == "G": + special_struct = uproot.models.TLeaf._tleafl1_format0 + elif letter_upper == "L": + special_struct = uproot.models.TLeaf._tleafl1_format0 + model = uproot.models.TLeaf.Model_TLeafL_v1 + elif letter_upper == "F": + special_struct = uproot.models.TLeaf._tleaff1_format1 + model = uproot.models.TLeaf.Model_TLeafF_v1 + elif letter_upper == "D": + special_struct = uproot.models.TLeaf._tleafd1_format1 + model = uproot.models.TLeaf.Model_TLeafD_v1 + elif letter_upper == "C": + special_struct = uproot.models.TLeaf._tleafc1_format1 + model = uproot.models.TLeaf.Model_TLeafC_v1 + if model not in models_for_streamers: + models_for_streamers.append(model) + fLenType = datum["dtype"].itemsize + fIsUnsigned = letter != letter_upper + + if datum["shape"] == (): + dims = "" + else: + dims = "".join("[" + str(x) + "]" for x in datum["shape"]) + + if datum["counter"] is not None: + dims = "[" + datum["counter"]["fName"] + "]" + dims + # single TLeaf + leaf_name = datum["fName"].encode(errors="surrogateescape") + leaf_title = (datum["fName"] + dims).encode(errors="surrogateescape") + leaf_name_length = (1 if len(leaf_name) < 255 else 5) + len(leaf_name) + leaf_title_length = (1 if len(leaf_title) < 255 else 5) + len(leaf_title) + + leaf_header = numpy.array( + [ + 64, + 0, + 0, + 76, + 0, + 1, + 64, + 0, + 0, + 54, + 0, + 2, + 64, + 0, + 0, + 30, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + ], + numpy.uint8, + ) + tmp = leaf_header[0:4].view(">u4") + tmp[:] = ( + numpy.uint32( + 42 + leaf_name_length + leaf_title_length + special_struct.size + ) + | uproot.const.kByteCountMask + ) + tmp = leaf_header[6:10].view(">u4") + tmp[:] = ( + numpy.uint32(36 + leaf_name_length + leaf_title_length) + | uproot.const.kByteCountMask + ) + tmp = leaf_header[12:16].view(">u4") + tmp[:] = ( + numpy.uint32(12 + leaf_name_length + leaf_title_length) + | uproot.const.kByteCountMask + ) + + out.append(uproot._util.tobytes(leaf_header)) + if len(leaf_name) < 255: + out.append( + struct.pack(">B%ds" % len(leaf_name), len(leaf_name), leaf_name) + ) + else: + out.append( + struct.pack( + ">BI%ds" % len(leaf_name), 255, len(leaf_name), leaf_name + ) + ) + if len(leaf_title) < 255: + out.append( + struct.pack(">B%ds" % len(leaf_title), len(leaf_title), leaf_title) + ) + else: + out.append( + struct.pack( + ">BI%ds" % len(leaf_title), 255, len(leaf_title), leaf_title + ) + ) + + fLen = 1 + for item in datum["shape"]: + fLen *= item + + # generic TLeaf members + out.append( + uproot.models.TLeaf._tleaf2_format0.pack( + fLen, + fLenType, + 0, # fOffset + datum["kind"] == "counter", # fIsRange + fIsUnsigned, + ) + ) + if datum["counter"] is None: + # null fLeafCount + out.append(b"\x00\x00\x00\x00") + else: + # reference to fLeafCount + out.append( + uproot.deserialization._read_object_any_format1.pack( + datum["counter"]["tleaf_reference_number"] + ) + ) + # specialized TLeaf* members (fMinimum, fMaximum) + out.append(special_struct.pack(0, 0)) + + datum["tleaf_special_struct"] = special_struct + + out[subany_tleaf_index] = ( + uproot.serialization._serialize_object_any_format1.pack( + numpy.uint32(sum(len(x) for x in out[subany_tleaf_index + 1 :]) + 4) + | uproot.const.kByteCountMask, + uproot.const.kNewClassTag, + ) + ) + out[subtobjarray_of_leaves_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[subtobjarray_of_leaves_index + 1 :]), + 3, # TObjArray + ) + + # empty TObjArray of fBaskets (embedded) + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) + + assert sum(1 if x is None else 0 for x in out) == 4 + datum["basket_metadata_start"] = (6 + 6 + 8 + 6) + sum( + len(x) for x in out if x is not None + ) + + # speedbump and fBasketBytes + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketBytes"])) + # speedbump and fBasketEntry + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketEntry"])) + # speedbump and fBasketSeek + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketSeek"])) + # empty fFileName + out.append(b"\x00") + + out[tbranch_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch + ) + + out[any_tbranch_index] = ( + uproot.serialization._serialize_object_any_format1.pack( + numpy.uint32(sum(len(x) for x in out[any_tbranch_index + 1 :]) + 4) + | uproot.const.kByteCountMask, + uproot.const.kNewClassTag, + ) + ) + + out[tobjarray_of_branches_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tobjarray_of_branches_index + 1 :]), 3 # TObjArray + ) + # TODO find tleaf reference numbers and append them ?? or update and then append + + # TObjArray of TLeaf references + tleaf_reference_bytes = uproot._util.tobytes( + numpy.array(tleaf_reference_numbers, ">u4") + ) + + out.append( # This is still fine + struct.pack( + ">I13sI4s", + (21 + len(tleaf_reference_bytes)) | uproot.const.kByteCountMask, + b"\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00", + len(tleaf_reference_numbers), + b"\x00\x00\x00\x00", + ) + ) + + out.append(tleaf_reference_bytes) + # null fAliases (b"\x00\x00\x00\x00") + # empty fIndexValues array (4-byte length is zero) + # empty fIndex array (4-byte length is zero) + # null fTreeIndex (b"\x00\x00\x00\x00") + # null fFriends (b"\x00\x00\x00\x00") + # null fUserInfo (b"\x00\x00\x00\x00") + # null fBranchRef (b"\x00\x00\x00\x00") + out.append(b"\x00" * 28) + + out[ttree_header_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[ttree_header_index + 1 :]), 20 # TTree + ) + + self._metadata_start = sum(len(x) for x in out[:metadata_out_index]) + + raw_data = b"".join(out) + + self._key = self._directory.add_object( + sink, + "TTree", + self._name, + self._title, + raw_data, + len(raw_data), + replaces=old_key, + big=True, + ) + return models_for_streamers + _tbasket_offsets_length = struct.Struct(">I") diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 5c0c8aec6..0069644df 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -122,7 +122,6 @@ def recreate(file_path: str | Path | IO, **options): "unrecognized options for uproot.create or uproot.recreate: " + ", ".join(repr(x) for x in options) ) - cascading = uproot.writing._cascade.create_empty( sink, compression, @@ -173,7 +172,6 @@ def update(file_path: str | Path | IO, **options): "unrecognized options for uproot.update: " + ", ".join(repr(x) for x in options) ) - cascading = uproot.writing._cascade.update_existing( sink, initial_directory_bytes, @@ -232,7 +230,6 @@ def __repr__(self): @property def sink(self) -> uproot.sink.file.FileSink: """ - Returns a :doc:`uproot.sink.file.FileSink`, the physical layer for writing (and sometimes reading) data. """ return self._sink @@ -954,7 +951,6 @@ def _get_del_search(self, where, isget): keys=last._cascading.data.key_names, file_path=self.file_path, ) - return step else: @@ -1344,6 +1340,245 @@ def mktree( return tree + def add_branches( # variation of mktree for copying ttree + self, + source, + branches, + *, + counter_name=lambda counted: "n" + counted, + field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner, + initial_basket_capacity=10, + resize_factor=10.0, + ): + """ + Args: + source (TTree): Name of existing TTree to copy/replace. TTree must be version 20. + branches (dict of pairs of str \u2192 NumPy dtype/Awkward type): Names and data + of branches to be added to the TTree. + counter_name (callable of str \u2192 str): Function to generate counter-TBranch + names for Awkward Arrays of variable-length lists. + field_name (callable of str \u2192 str): Function to generate TBranch + names for columns of an Awkward record array or a Pandas DataFrame. + initial_basket_capacity (int): Number of TBaskets that can be written to the + TTree without rewriting the TTree metadata to make room. + resize_factor (float): When the TTree metadata needs to be rewritten, + this specifies how many more TBasket slots to allocate as a multiplicative + factor. + Adds new branches to existing TTrees by rewriting the whole TTree with the new data. + This function can only copy TTrees version 20, TBranches version 13, and TBranchElements + version 10. To maintain custom ``counter_name``, ``field_name``, ``initial_basket_capacity`` + or ``resize_factor`` values for the new branches, pass the custom values to the parameters. + Currently, writing new branches in batches is not possible; data in new ``branches`` + must fit in memory. + + .. code-block:: python + + my_directory.add_branches("tree", {"branch1": np.array(...), "branch2": ak.Array(...)}) + + """ + if self._file.sink.closed: + raise ValueError("cannot modify a TTree in a closed file") + + try: + file = uproot.open(self.file_path, minimal_ttree_metadata=False) + old_ttree = file[source] + except ValueError: + msg = f"TTree {source} not found in file {self.file}" + raise ValueError(msg) from None + if not isinstance(old_ttree, uproot.TTree): + raise TypeError("'source' must be the name of a TTree") + if not isinstance(old_ttree, uproot.models.TTree.Model_TTree_v20): + if uproot.model.classname_version(old_ttree.encoded_classname) < 20: + raise TypeError( + f"Cannot update TTree models older than v20 in place. This TTree is {old_ttree.encoded_classname} from before 2017." + ) # TODO rewrite! + raise TypeError( + f"Can only update Model_TTree_v20 in place, not {old_ttree.encoded_classname}." + ) # TODO rewrite? + elif ( + uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[0] + == "TBranch" + and uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[ + 1 + ] + != 13 + ): + if ( + uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[ + 1 + ] + < 13 + ): + raise TypeError( + f"Cannot update TBranch models older than v13 in place. This TBranch is {old_ttree.branches[0].encoded_classname} from before 2017." + ) # TODO rewrite! + raise TypeError( + f"Can only update Model_TBranch_v13 in place, not {old_ttree.branches[0].encoded_classname}." + ) # TODO rewrite? + elif ( + uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[0] + == "TBranchElement" + and uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[ + 1 + ] + != 10 + ): + if ( + uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[ + 1 + ] + < 10 + ): + raise TypeError( + f"Cannot update TBranchElement models older than v10 in place. This TBranchElement is {old_ttree.branches[0].encoded_classname} from before 2017." + ) # TODO rewrite! + raise TypeError( + "Can only update TBranchElement models v10 in place." + ) # TODO rewrite? + leaf = uproot.model.classname_decode( + old_ttree.branches[0].member("fLeaves")[0].encoded_classname + ) + if leaf[0].startswith("TLeaf") and leaf[1] != 1: + if leaf[1] < 1: + raise TypeError( + f"Cannot only update version 1 TLeaf* and TLeafElements. This TLeaf* is a {old_ttree.branches[0].member('fLeaves')[0].encoded_classname} from before 2017." + ) + else: + raise TypeError( + f"Cannot only update version 1 TLeaf* and TLeafElements, not {old_ttree.branches[0].member('fLeaves')[0].encoded_classname}." + ) + + names = old_ttree.keys() + if len(names) == 0: + raise ValueError( + f"""TTree {old_ttree.name} in file {old_ttree.file_path} is empty.""" + ) + + at = -1 + try: + at = old_ttree.name.rindex("/") + except ValueError: + treename = old_ttree.name + directory = self + treename = old_ttree.name[at + 1 :] + path = (*directory._path, treename) + + awkward = uproot.extras.awkward() + import numpy + + if uproot._util.from_module(branches, "awkward"): + import awkward + + if isinstance(branches, awkward.Array): + branches = {"": branches} + + if isinstance(branches, numpy.ndarray) and branches.dtype.fields is not None: + branches = uproot.writing._cascadetree.recarray_to_dict(branches) + data = {} + metadata = {} + for branch_name, branch_array in branches.items(): + if ( + isinstance(branch_array, numpy.ndarray) + and branch_array.dtype.fields is not None + ): + branch_array = uproot.writing._cascadetree.recarray_to_dict( # noqa: PLW2901 (overwriting branch_array) + branch_array + ) + entries = old_ttree.member("fEntries") + if len(branch_array) != old_ttree.member("fEntries"): + raise ValueError( + f"'add_branches' must fill every branch with the same number of entries; new branches should have {entries} entries, but {branch_name!r} has {len(branch_array)} entries" + ) + if isinstance(branch_array, Mapping) and all( + isinstance(x, str) for x in branch_array + ): + datum = {} + metadatum = {} + for kk, vv in branch_array.items(): + try: + vv = ( # noqa: PLW2901 (overwriting vv) + uproot._util.ensure_numpy(vv) + ) + except TypeError: + raise TypeError( + f"unrecognizable array type {type(branch_array)} associated with {branch_name!r}" + ) from None + datum[kk] = vv + branch_dtype = vv.dtype + branch_shape = vv.shape[1:] + if branch_shape != (): + branch_dtype = numpy.dtype((branch_dtype, branch_shape)) + metadatum[kk] = branch_dtype + + data[branch_name] = datum + metadata[branch_name] = metadatum + + else: + if uproot._util.from_module(branch_array, "awkward"): + data[branch_name] = branch_array + metadata[branch_name] = branch_array.type + + else: + try: + branch_array = uproot._util.ensure_numpy( # noqa: PLW2901 (overwriting branch_array) + branch_array + ) + except TypeError: + awkward = uproot.extras.awkward() + try: + branch_array = awkward.from_iter( # noqa: PLW2901 (overwriting branch_array) + branch_array + ) + except Exception: + raise TypeError( + f"unrecognizable array type {type(branch_array)} associated with {branch_name!r}" + ) from None + else: + data[branch_name] = branch_array + metadata[branch_name] = awkward.type(branch_array) + + else: + data[branch_name] = branch_array + branch_dtype = branch_array.dtype + branch_shape = branch_array.shape[1:] + if branch_shape != (): + branch_dtype = numpy.dtype((branch_dtype, branch_shape)) + metadata[branch_name] = branch_dtype + file.close() + obj, update_streamers = directory._cascading.add_branches( + directory._file.sink, + old_ttree.name, + old_ttree.title, + metadata, + counter_name, + field_name, + initial_basket_capacity, + resize_factor, + old_ttree, + old_ttree.branches, + branches, + directory, + ) + tree = WritableTree(path, directory._file, obj) + update_streamers.append( + uproot.models.TTree.Model_TTree_v20, + ) + seen = set() + streamers = [] + for model in update_streamers: + for rawstreamer in model.class_rawstreamers: + classname_version = rawstreamer[-2], rawstreamer[-1] + if classname_version not in seen: + seen.add(classname_version) + streamers.append( + uproot.writing._cascade.RawStreamerInfo(*rawstreamer) + ) + directory._file._cascading.streamers.update_streamers( + directory._file.sink, + streamers, + ) + return tree + def mkrntuple( self, name, @@ -1525,7 +1760,6 @@ def update(self, pairs=None, **more_pairs): update. """ streamers = [] - if pairs is not None: if hasattr(pairs, "keys"): all_pairs = itertools.chain( @@ -1551,7 +1785,6 @@ def update(self, pairs=None, **more_pairs): directory = directory[item] uproot.writing.identify.add_to_directory(v, name, directory, streamers) - self._file._cascading.streamers.update_streamers(self._file.sink, streamers) diff --git a/tests/test_1155_feat_add_branches_to_existing_ttree.py b/tests/test_1155_feat_add_branches_to_existing_ttree.py new file mode 100644 index 000000000..879bbf2aa --- /dev/null +++ b/tests/test_1155_feat_add_branches_to_existing_ttree.py @@ -0,0 +1,515 @@ +import uproot +import os +import pytest + +ROOT = pytest.importorskip("ROOT") + +import numpy as np + +import awkward as ak +from skhep_testdata import data_path + + +def test_vector(tmp_path): + data = [1, 2, 3, 4, 5] + with uproot.open( + os.path.join(tmp_path, "uproot-vectorVectorDouble.root"), + minimal_ttree_metadata=False, + ) as read: + with pytest.raises(TypeError): + with uproot.update( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + ) as write: + write.add_branches("t", {"branch": data}) + + with uproot.open( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + minimal_ttree_metadata=False, + ) as new: + for i in read["t"].keys(): + assert ak.all(read["t"][i].array() == new["t"][i].array()) + assert ak.all(new["t"]["branch"].array() == data) + + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), "READ" + ) + tree = inFile.Get("t;1") + indx = 0 + + for x in tree: + indx2 = 0 + for i in getattr(x, "x"): + assert ak.all(list(i) == read["t"]["x"].array()[indx][indx2]) + indx2 += 1 + assert getattr(x, "branch") == data[indx] + indx += 1 + + +def simple_test(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array( + [ + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + ], + dtype=np.int32, + ) + + with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: + f["whatever"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} + + with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + f["whatever"] = {"b1": data, "b2": data1} + + with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: + f.add_branches("whatever", {"b3": data, "b4": data1}) + + with uproot.open( + os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False + ) as check: + with uproot.open( + os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False + ) as new: + print(new["whatever"].arrays()) + for key in new["whatever"].keys(): + assert ak.all( + new["whatever"].arrays()[key] == check["whatever"].arrays()[key] + ) + assert ak.all(new["whatever"]["b1"].array() == data) + assert ak.all(new["whatever"]["b2"].array() == data1) + assert ak.all(new["whatever"]["b3"].array() == data) + assert ak.all(new["whatever"]["b4"].array() == data1) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") + tree = inFile.Get("whatever;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == data[indx] + assert getattr(x, "b2") == data1[indx] + indx += 1 + + +def test_multiple_trees(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array( + [ + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + ], + dtype=np.int32, + ) + + with uproot.recreate(os.path.join(tmp_path, "mult_trees.root")) as f: + f["whatever"] = {"b1": data, "b2": data1} + f["whatever1"] = {"b1": data, "b2": data1, "b3": data} + + with uproot.update(os.path.join(tmp_path, "mult_trees.root")) as f: + f.add_branches("whatever", {"b3": data, "b4": data1}) + f.add_branches("whatever1", {"b4": data1}) + + with uproot.open( + os.path.join(tmp_path, "mult_trees.root"), minimal_ttree_metadata=False + ) as new: + assert ak.all(new["whatever"]["b1"].array() == data) + assert ak.all(new["whatever1"]["b4"].array() == data1) + assert ak.all(new["whatever1"]["b2"].array() == data1) + assert ak.all(new["whatever1"]["b4"].array() == data1) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "mult_trees.root"), "READ") + tree = inFile.Get("whatever;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == data[indx] + assert getattr(x, "b2") == data1[indx] + indx += 1 + + +def test_different_fEntries(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array([2.0, 3.0, 4.0, 5.0, 6.0], dtype=np.int32) + + with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + with pytest.raises(ValueError): + f["whatever"] = {"b1": data, "b2": data1} + f.add_branches( + "whatever", + { + "b3": data, + "b4": np.array([2.0, 3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.int32), + }, + ) + + +def test_dtypes(tmp_path): # tleaf types? + data = [ + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int64, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int32, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int8, + ), + np.array( + [ + 1.0, + 2.0, + 3.0, + 4.0, + ], + dtype=np.float32, + ), + np.array( + [ + 1.0, + 2.0, + 3.0, + 4.0, + ], + dtype=np.float64, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.double, + ), + np.array([True, False, True, False], dtype=bool), + ] + + with uproot.recreate(os.path.join(tmp_path, "all_dtypes.root")) as f: + f["whatever"] = { + "b1": data[0], + "b2": data[1], + "b3": data[2], + "b4": data[3], + "b5": data[4], + "b6": data[5], + "b7": data[6], + } + + with uproot.update(os.path.join(tmp_path, "all_dtypes.root")) as write: + write.add_branches( + "whatever", + { + "b8": data[0], + "b9": data[1], + "b10": data[2], + "b12": data[3], + "b13": data[4], + "b14": data[5], + "b15": data[6], + }, + ) + + with uproot.open(os.path.join(tmp_path, "all_dtypes.root")) as read: + + read["whatever"] + + +def test_ak_arrays(tmp_path): + data = np.array( + [ + 1, + 2, + ], + dtype=np.int64, + ) + data1 = np.array([2, 3, 4, 5], dtype=np.int64) + data2 = np.array([3, 4, 5], dtype=np.int64) + + with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: + file["whatever"] = { + "b1": ak.Array([data, data1, data2]), + "b2": ak.Array([data1, data2, data]), + } + + with uproot.update(os.path.join(tmp_path, "ak_test.root")) as write: + write.add_branches( + "whatever", + { + "b3": ak.Array([data2, data, data1]), + }, + ) + + with uproot.open( + os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False + ) as new: + new["whatever"].arrays() + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") + tree = inFile.Get("whatever") + for x in tree: + getattr(x, "b1") + inFile.Close() + df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) + npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) + assert ak.all(npy3["b1"] == [data, data1, data2]) + assert ak.all(npy3["b2"] == [data1, data2, data]) + assert ak.all(npy3["b3"] == [data2, data, data1]) + + +def test_streamers_same_dtypes(tmp_path): + # Make an example file with ROOT + inFile = ROOT.TFile(os.path.join(tmp_path, "root_same_dtypes.root"), "RECREATE") + tree = ROOT.TTree("tree1", "tree") + npa = np.zeros(4, dtype=np.float32) + tree.Branch("b1", npa, "b1/F") + for i in range(4): + npa[0] = i**0 + tree.Fill() + inFile.Write() + inFile.Close() + + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_same_dtypes.root"), "OPEN") + tree = inFile.Get("tree1") + data = np.array([5.0, 6.0, 7.0, 8.0], dtype=np.float32) + + with uproot.update(os.path.join(tmp_path, "root_same_dtypes.root")) as file: + file.add_branches("tree1", {"b2": data}) + + with uproot.open( + os.path.join(tmp_path, "root_same_dtypes.root"), minimal_ttree_metadata=False + ) as file: + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "root_same_dtypes.root"), "READ" + ) + # inFile.ShowStreamerInfo() + tree = inFile.Get("tree1;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] + assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] + indx += 1 + + # tree.Scan() + check = [ + "TBranch", + "TAttLine", + "TCollection", + "TLeafF", + "listOfRules", + "TString", + "TObjArray", + "TAttFill", + "TBranchRef", + "TList", + "ROOT::TIOFeatures", + "TSeqCollection", + "TAttMarker", + "TTree", + "TNamed", + "TObject", + "TAttLine", + "TLeaf", + "TRefTable", + ] + for i in set(file.file.streamers): + assert i in check + inFile.Close() + + +def test_streamers_diff_dtypes(tmp_path): + # Make an example file with ROOT + inFile = ROOT.TFile( + "/Users/zobil/Desktop/directory/root_diff_dtypes.root", "RECREATE" + ) + tree = ROOT.TTree("tree1", "tree") + npa = np.zeros(4, dtype=float) + tree.Branch("b1", npa, "b1F") + for i in range(4): + npa[0] = i**0 + tree.Fill() + inFile.Write() + inFile.Close() + + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_diff_dtypes.root"), "OPEN") + tree = inFile.Get("tree1") + data = np.array([5, 6, 7, 8], dtype=np.int64) + data1 = np.array([5.2, 6.3, 7.4, 8.5], dtype=np.float64) + with uproot.update(os.path.join(tmp_path, "root_diff_dtypes.root")) as file: + file.add_branches("tree1", {"b2": data, "b3": data1}) + + with uproot.open( + os.path.join(tmp_path, "root_diff_dtypes.root"), minimal_ttree_metadata=False + ) as file: + file["tree1"]["b2"].member("fLeaves")[0].all_members + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "root_diff_dtypes.root"), "READ" + ) + tree = inFile.Get("tree1;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] + assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] + indx += 1 + # tree.Scan() + check = [ + "TBranch", + "TAttLine", + "TCollection", + "TLeafF", + "listOfRules", + "TString", + "TObjArray", + "TAttFill", + "TBranchRef", + "TList", + "ROOT::TIOFeatures", + "TSeqCollection", + "TAttMarker", + "TTree", + "TNamed", + "TObject", + "TAttLine", + "TLeaf", + "TRefTable", + "TLeafL", + "TLeafD", + ] + for i in set(file.file.streamers): + assert i in check + inFile.Close() + + +def test_old_versions(tmp_path): + with pytest.raises(TypeError): + with uproot.update(os.path.join(tmp_path, "uproot-HZZ.root")) as file: + file.add_branches("events", {"b2": [1, 2, 3]}) + + +def test_TreeEventSimple0(tmp_path): + with uproot.update(os.path.join(tmp_path, "cp/TreeEventTreeSimple0.root")) as file: + file.add_branches( + "TreeEventTreeSimple0", {"b1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} + ) + with uproot.open( + os.path.join(tmp_path, "cp/TreeEventTreeSimple0.root") + ) as new: # Okay can't read with arrays() + print(new.file.chunk(0, 20000).raw_data.tobytes()) + # print(new['TreeEventTreeSimple0']['b1'].array()) + # inFile = ROOT.TFile.Open( + # os.path.join(tmp_path, "TreeEventTreeSimple0.root"), "READ" + # ) + # tree = inFile.Get("TreeEventTreeSimple0;1") + # indx = 0 + # for x in tree: + # assert getattr(x, "Event_branch") + # print(getattr(x, "Event_branch")) + # indx += 1 + + +def test_TreeEventSimple1(tmp_path): + with uproot.update(os.path.join(tmp_path, "cp/TreeEventTreeSimple1.root")) as file: + file.add_branches( + "TreeEventTreeSimple1", + {"new_v": np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], np.float32)}, + ) + with uproot.open( + os.path.join(tmp_path, "TreeEventTreeSimple1.root") + ) as file: # can't read with arrays() + with uproot.open( + os.path.join(tmp_path, "cp/TreeEventTreeSimple1.root") + ) as copy: + print(file["TreeEventTreeSimple1"]) + + +def test_TreeEventSimple3(tmp_path): + with uproot.update( + os.path.join(tmp_path, "TreeEventTreeSimple3.root") + ) as file: # can't read with arrays() + file["tree"] = {"b1": [1, 2, 3, 4, 5], "b2": [3, 4, 5, 6, 7]} + with uproot.open(os.path.join(tmp_path, "TreeEventTreeSimple3.root")) as copy: + print(file["TreeEventTreeSimple1"].chunk.raw_data.tobytes()) + # print(copy['TreeEventTreeSimple1']) + # inFile = ROOT.TFile.Open(os.path.join(tmp_path,"cp/TreeEventTreeSimple3.root"), "READ") + # tree = inFile.Get("TreeEventTreeSimple1") + # # for x in tree: + # # getattr(x, "new_v") + # inFile.Close() + # df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) + # npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True)] + + # for x in tree: + # assert getattr(x, "Event_branch") + # print(getattr(x, "Event_branch")) + # print(getattr(x, "a")) + # indx += 1 + # file.Write() + # file.Close() + + +def test_TreeEventSimple2(tmp_path): + # with uproot.update(os.path.join(tmp_path, "cp/TreeEventTreeSimple2.root")) as file: + # file.add_branches("TreeEventTreeSimple2", {"b1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + with uproot.open( + os.path.join(tmp_path, "TreeEventTreeSimple2.root") + ) as file: # Okay can't read with arrays() + print(file["TreeEventTreeSimple2"]) + with uproot.open( + os.path.join(tmp_path, "cp/TreeEventTreeSimple2.root") + ) as new: # Okay can't read with arrays() + print(new["TreeEventTreeSimple2"].asdfa) + # inFile = ROOT.TFile.Open( + # os.path.join(tmp_path, "TreeEventTreeSimple0.root"), "READ" + # ) + # tree = inFile.Get("TreeEventTreeSimple0;1") + # indx = 0 + # for x in tree: + # assert getattr(x, "Event_branch") + # print(getattr(x, "Event_branch")) + # indx += 1 + + +def test_TreeClass0(tmp_path): + + with uproot.update(os.path.join(tmp_path, "cp/TreeClass0.root")) as file: + file.add_branches( + "TreeClass0", + {"b1": np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=np.float64)}, + ) + with uproot.open( + os.path.join(tmp_path, "cp/TreeClass0.root") + ) as file: # Okay can't read with arrays() + print(file["TreeClass0"]["ClassC_branch"]) + # with uproot.open(os.path.join(tmp_path,"cp/TreeClass0.root")) as new: # Okay can't read with arrays() + # print(new['TreeEventTreeSimple2'].asdfa) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "cp/TreeClass0.root"), "READ") + tree = inFile.Get("TreeClass0;1") + indx = 0 + for x in tree: + assert getattr(x, "ClassC_branch") + print(getattr(x, "ClassC_branch")) + indx += 1 + + +def look(): + with uproot.open( + "/Users/zobil/Documents/trees/modified/TreeEventTreeSimple1.root" + ) as file: + print(file["TreeEventTreeSimple1"]["Event_branch"].member("fLeaves"))