From ed6f4aa182c447bb4620243458103f7d5fe56d2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Wed, 10 Apr 2024 10:49:20 +0200 Subject: [PATCH 01/20] Copies, details to work on though and haven't filled branches yet --- src/uproot/writing/_cascade.py | 340 +++++++++++++++++++++++++++++ src/uproot/writing/_cascadetree.py | 19 +- src/uproot/writing/writable.py | 96 ++++++++ 3 files changed, 453 insertions(+), 2 deletions(-) diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index d790788b7..4258d57d4 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -580,6 +580,344 @@ def deserialize(cls, raw_bytes, location, num_bytes, num_slices, in_path): return out +class OldBranch(CascadeLeaf): # Branch or branches? + """ + A :doc:`uproot.writing._cascade.CascadeLeaf` for copying an old TBranch to a new TTree. ? + """ + + class_version = 1 + + def __init__(self, branch_data): + self._branch_data = branch_data + + @property + def allocation(self): + if self._allocation is None: + self._allocation = self.num_bytes + return self._allocation + + @allocation.setter + def allocation(self, value): + if self._allocation != value: + self._file_dirty = True + self._allocation = value + + @property + def num_bytes(self): + total = 0 + for _, stop in self._slices: + if stop - 1 >= uproot.const.kStartBigFile: + total += _free_format_big.size + else: + total += _free_format_small.size + + if self._end is None: + if total + _free_format_small.size >= uproot.const.kStartBigFile: + total += _free_format_big.size + else: + total += _free_format_small.size + elif self._end >= uproot.const.kStartBigFile: + total += _free_format_big.size + else: + total += _free_format_small.size + + return total + + def serialize(self, out): + # superclass TNamed (Model_TNamed(uproot.model.Model)) + # superclass TAttFill + key_num_bytes = uproot.reading._key_format_big.size + 6 + name_asbytes = self._branch_data.tree.name.encode(errors="surrogateescape") + title_asbytes = self._branch_data.tree.title.encode(errors="surrogateescape") + key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len(name_asbytes) + key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len(title_asbytes) + + any_tbranch_index = len(out) + out.append(None) + # if isinstance(self._branch_data, uproot.models.TBranchElement): + + out.append(b"TBranch\x00") + + tbranch_index = len(out) + out.append(None) + + tbranch_tobject = uproot.models.TObject.Model_TObject.empty() # ? + # tbranch_tnamed = self._branch_data['TNamed'].serialize() # ? + tbranch_tnamed = uproot.models.TNamed.Model_TNamed.empty() + tbranch_tnamed._bases.append(tbranch_tobject) + tbranch_tnamed._members["fTitle"] = self._branch_data.title + tbranch_tnamed._serialize( + out, True, self._branch_data.name, numpy.uint32(0x00400000) + ) + + # TAttFill v2, fFillColor: 0, fFillStyle: 1001 + # make model TAttFill v2 with fFillColor and fFillStyle + tattfill = uproot.models.TAtt.Model_TAttFill_v2.empty() + # tattfill._deeply_writable = True # ? + tattfill._members["fFillColor"] = self._branch_data.member("fFillColor") + tattfill._members["fFillStyle"] = self._branch_data.member("fFillStyle") + + out.append(tattfill.serialize(out)) + + self._branch_data.members["metadata_start"] = (6 + 6 + 8 + 6) + sum( + len(x) for x in out if x is not None + ) + + # Lie about the compression level so that ROOT checks and does the right thing. + # https://github.com/root-project/root/blob/87a998d48803bc207288d90038e60ff148827664/tree/tree/src/TBasket.cxx#L560-L578 + # Without this, when small buffers are left uncompressed, ROOT complains about them not being compressed. + # (I don't know where the "no, really, this is uncompressed" bit is.) + + # Have to actually make something for if there's a TBranchElement!! + + out.append( + uproot.models.TBranch._tbranch13_format1.pack( + self._branch_data.member("fCompress"), + self._branch_data.member("fBasketSize"), + self._branch_data.member("fEntryOffsetLen"), + self._branch_data.member("fWriteBasket"), # fWriteBasket + self._branch_data.member("fEntryNumber"), # fEntryNumber + ) + ) + + # fIOFeatures (TIOFeatures) + out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") + # 0 to bytestring?? + out.append( + uproot.models.TBranch._tbranch13_format2.pack( + self._branch_data.member("fOffset"), + self._branch_data.member("fMaxBaskets"), # fMaxBaskets + self._branch_data.member("fSplitLevel"), + self._branch_data.member("fEntries"), # fEntries + self._branch_data.member("fFirstEntry"), + self._branch_data.member("fTotBytes"), + self._branch_data.member("fZipBytes"), + ) + ) + + # empty TObjArray of TBranches + out.append( + self._branch_data.member("fBranches").serialize( + out, + ) + ) + + subtobjarray_of_leaves_index = len(out) + out.append(None) + + # TObjArray header with fName: "", fSize: 1, fLowerBound: 0 + out.append( + b"\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00" + ) + _dtype_to_char = { + numpy.dtype("bool"): "O", + numpy.dtype(">i1"): "B", + numpy.dtype(">u1"): "b", + numpy.dtype(">i2"): "S", + numpy.dtype(">u2"): "s", + numpy.dtype(">i4"): "I", + numpy.dtype(">u4"): "i", + numpy.dtype(">i8"): "L", + numpy.dtype(">u8"): "l", + numpy.dtype(">f4"): "F", + numpy.dtype(">f8"): "D", + numpy.dtype(">U"): "C", + } + + absolute_location = key_num_bytes + sum(len(x) for x in out if x is not None) + absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) + tleaf_reference_number = absolute_location + 2 + + subany_tleaf_index = len(out) + out.append(None) + for leaf in self._branch_data.member("fLeaves"): + # Make and serialize each leaf?? + letter_upper = _dtype_to_char[numpy.dtype(">i8")] + out.append(("TLeaf" + letter_upper).encode() + b"\x00") + if letter_upper == "O": + special_struct = uproot.models.TLeaf._tleafO1_format1 + elif letter_upper == "B": + special_struct = uproot.models.TLeaf._tleafb1_format1 + elif letter_upper == "S": + special_struct = uproot.models.TLeaf._tleafs1_format1 + elif letter_upper == "I": + special_struct = uproot.models.TLeaf._tleafi1_format1 + elif letter_upper == "G": + special_struct = uproot.models.TLeaf._tleafl1_format0 + elif letter_upper == "L": + special_struct = uproot.models.TLeaf._tleafl1_format0 + elif letter_upper == "F": + special_struct = uproot.models.TLeaf._tleaff1_format1 + elif letter_upper == "D": + special_struct = uproot.models.TLeaf._tleafd1_format1 + elif letter_upper == "C": + special_struct = uproot.models.TLeaf._tleafc1_format1 + # single TLeaf + + leaf_name = self._branch_data.member("fName").encode( + errors="surrogateescape" + ) + leaf_title = ( + self._branch_data.member("fLeaves")[0] + .member("fTitle") + .encode(errors="surrogateescape") + ) + leaf_name_length = (1 if len(leaf_name) < 255 else 5) + len(leaf_name) + leaf_title_length = (1 if len(leaf_title) < 255 else 5) + len(leaf_title) + + leaf_header = numpy.array( + [ + 64, + 0, + 0, + 76, + 0, + 1, + 64, + 0, + 0, + 54, + 0, + 2, + 64, + 0, + 0, + 30, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + ], + numpy.uint8, + ) + tmp = leaf_header[0:4].view(">u4") + tmp[:] = ( + numpy.uint32( + 42 + leaf_name_length + leaf_title_length + special_struct.size + ) + | uproot.const.kByteCountMask + ) + tmp = leaf_header[6:10].view(">u4") + tmp[:] = ( + numpy.uint32(36 + leaf_name_length + leaf_title_length) + | uproot.const.kByteCountMask + ) + tmp = leaf_header[12:16].view(">u4") + tmp[:] = ( + numpy.uint32(12 + leaf_name_length + leaf_title_length) + | uproot.const.kByteCountMask + ) + + out.append(uproot._util.tobytes(leaf_header)) + if len(leaf_name) < 255: + out.append( + struct.pack(">B%ds" % len(leaf_name), len(leaf_name), leaf_name) + ) + else: + out.append( + struct.pack( + ">BI%ds" % len(leaf_name), 255, len(leaf_name), leaf_name + ) + ) + if len(leaf_title) < 255: + out.append( + struct.pack(">B%ds" % len(leaf_title), len(leaf_title), leaf_title) + ) + else: + out.append( + struct.pack( + ">BI%ds" % len(leaf_title), 255, len(leaf_title), leaf_title + ) + ) + + out.append( + uproot.models.TLeaf._tleaf2_format0.pack( + leaf.member("fLen"), + leaf.member("fLenType"), + leaf.member("fOffset"), # fOffset + leaf.member("fIsRange"), # fIsRange + leaf.member("fIsUnsigned"), + ) + ) + out.append( + uproot.serialization.serialize_object_any( + leaf.member("fLeafCount") # fLeafCount + ) + ) + + # specialized TLeaf* members (fMinimum, fMaximum) + # datum["tleaf_special_struct"] = special_struct + out.append( + special_struct.pack( + int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) + ) + ) + + out[subany_tleaf_index] = ( + uproot.serialization._serialize_object_any_format1.pack( + numpy.uint32(sum(len(x) for x in out[subany_tleaf_index + 1 :]) + 4) + | uproot.const.kByteCountMask, + uproot.const.kNewClassTag, + ) + ) + + out[subtobjarray_of_leaves_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[subtobjarray_of_leaves_index + 1 :]), + 3, # TObjArray + ) + + # empty TObjArray of fBaskets (embedded) + # TODO "fBranches, which is a TObjArray of nested TBranch instances (possibly TBranchElement)" + + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) + + # out.append(self._branch_data.member("")) + + # assert sum(1 if x is None else 0 for x in out) == 4 + self._branch_data.members["basket_metadata_start"] = (6 + 6 + 8 + 6) + sum( + len(x) for x in out if x is not None + ) # ? + + # speedbump and fBasketBytes + out.append(b"\x01") + out.append(uproot._util.tobytes(self._branch_data.member("fBasketBytes"))) + + # speedbump and fBasketEntry + out.append(b"\x01") + out.append(uproot._util.tobytes(self._branch_data.member("fBasketEntry"))) + + # speedbump and fBasketSeek + out.append(b"\x01") + out.append(uproot._util.tobytes(self._branch_data.member("fBasketSeek"))) + + self._branch_data.member("fFileName").serialize() # name = None? + + out[tbranch_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch + ) + + out[any_tbranch_index] = ( + uproot.serialization._serialize_object_any_format1.pack( + numpy.uint32(sum(len(x) for x in out[any_tbranch_index + 1 :]) + 4) + | uproot.const.kByteCountMask, + uproot.const.kNewClassTag, + ) + ) + return out, tleaf_reference_number + + # def write(self, ) + + class FreeSegments(CascadeNode): """ A :doc:`uproot.writing._cascade.CascadeNode` for writing a ROOT FreeSegments record. @@ -1710,6 +2048,7 @@ def add_tree( field_name, initial_basket_capacity, resize_factor, + existing_branches=None, ): import uproot.writing._cascadetree @@ -1723,6 +2062,7 @@ def add_tree( field_name, initial_basket_capacity, resize_factor, + existing_branches, ) tree.write_anew(sink) return tree diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index f88174df2..7ba982e13 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -85,6 +85,7 @@ def __init__( field_name, initial_basket_capacity, resize_factor, + existing_branches=None, ): self._directory = directory self._name = name @@ -94,7 +95,7 @@ def __init__( self._field_name = field_name self._basket_capacity = initial_basket_capacity self._resize_factor = resize_factor - + self._existing_branches = existing_branches if isinstance(branch_types, dict): branch_types_items = branch_types.items() else: @@ -123,7 +124,7 @@ def __init__( raise TypeError branch_dtype = numpy.dtype(branch_type) - except (TypeError, ValueError) as err: + except TypeError as err: try: awkward = uproot.extras.awkward() except ModuleNotFoundError as err: @@ -894,6 +895,8 @@ def write_anew(self, sink): num_branches = sum( 0 if datum["kind"] == "record" else 1 for datum in self._branch_data ) + if self._existing_branches: + num_branches += len(self._existing_branches) # TObjArray header with fName: "" out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") @@ -904,6 +907,17 @@ def write_anew(self, sink): ) ) + # Write old branches? + if self._existing_branches: + for branch in self._existing_branches: + # create OldTBranch object + # members = uproot.branch.read_members() + old_branch = uproot.writing._cascade.OldBranch(branch) + out, temp = old_branch.serialize( + out + ) # should call uproot.models.TBranch._tbranch13_format...pack or something + tleaf_reference_numbers.append(temp) # and don't forget the tleaves + for datum in self._branch_data: if datum["kind"] == "record": continue @@ -1213,6 +1227,7 @@ def write_anew(self, sink): self._metadata_start = sum(len(x) for x in out[:metadata_out_index]) raw_data = b"".join(out) + self._key = self._directory.add_object( sink, "TTree", diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 5c0c8aec6..49fe19a40 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1344,6 +1344,102 @@ def mktree( return tree + def add_branches( # my own variation of mktree + self, + name, + branch_types, + source, + title="", + *, + counter_name=lambda counted: "n" + counted, + field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner, + initial_basket_capacity=10, + resize_factor=10.0, + # new_branch, + ): + """ + Args: + source (TTree): existing TTree to copy/replace + Creates an empty TTree in this directory. + + Note that TTrees can be created by assigning TTree-like data to a directory + (see :doc:`uproot.writing.writable.WritableTree` for recognized TTree-like types): + + .. code-block:: python + + my_directory["tree"] = {"branch1": np.array(...), "branch2": ak.Array(...)} + + but TTrees created this way will never be empty. Use this method + to make an empty TTree or to control its parameters. + """ + if self._file.sink.closed: + raise ValueError("cannot create a TTree in a closed file") + if not isinstance(source, uproot.TTree): + raise TypeError("'source' must be a TTree") # ? + names = source.keys() + if len(names) == 0: + raise ValueError( + f"""TTree {source.name} in file {source.file_path} is empty.""" + ) + + # names.append(new_branch.name) # May need the TKey? (uproot.reading.ReadOnlyKey) + + try: # Will this throw an error? proabably? + at = source.name.rindex("/") + except ValueError: + treename = source.name + directory = self + else: + dirpath, treename = source.name[:at], source.name[at + 1 :] + directory = self.mkdir(dirpath) + + path = (*directory._path, treename) + + tree = WritableTree( + path, + directory._file, + directory._cascading.add_tree( + directory._file.sink, + name, + title, + branch_types, + counter_name, + field_name, + initial_basket_capacity, + resize_factor, + source.branches, + ), + ) + directory._file._new_tree(tree) + + seen = set() + streamers = [] + for model in ( + uproot.models.TLeaf.Model_TLeafB_v1, + uproot.models.TLeaf.Model_TLeafS_v1, + uproot.models.TLeaf.Model_TLeafI_v1, + uproot.models.TLeaf.Model_TLeafL_v1, + uproot.models.TLeaf.Model_TLeafF_v1, + uproot.models.TLeaf.Model_TLeafD_v1, + uproot.models.TLeaf.Model_TLeafC_v1, + uproot.models.TLeaf.Model_TLeafO_v1, + uproot.models.TBranch.Model_TBranch_v13, + uproot.models.TTree.Model_TTree_v20, + ): + for rawstreamer in model.class_rawstreamers: + classname_version = rawstreamer[-2], rawstreamer[-1] + if classname_version not in seen: + seen.add(classname_version) + streamers.append( + uproot.writing._cascade.RawStreamerInfo(*rawstreamer) + ) + + directory._file._cascading.streamers.update_streamers( + directory._file.sink, streamers + ) + + return tree + def mkrntuple( self, name, From f1c2d5702a60eed9048f25161c57f7a9a22e6eaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Fri, 12 Apr 2024 19:26:22 +0200 Subject: [PATCH 02/20] Progress on adding data --- src/uproot/writing/_cascade.py | 221 ++++++++++++------- src/uproot/writing/_cascadetree.py | 281 +++++++++++++++++++++++++ src/uproot/writing/writable.py | 41 ++-- tests/test_1155_feat_add_copy_ttree.py | 42 ++++ 4 files changed, 495 insertions(+), 90 deletions(-) create mode 100644 tests/test_1155_feat_add_copy_ttree.py diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 4258d57d4..2d8beb965 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -30,6 +30,8 @@ import uproot.compression import uproot.const +import uproot.models.TBranch +import uproot.models.TLeaf import uproot.models.TList import uproot.reading import uproot.serialization @@ -585,10 +587,8 @@ class OldBranch(CascadeLeaf): # Branch or branches? A :doc:`uproot.writing._cascade.CascadeLeaf` for copying an old TBranch to a new TTree. ? """ - class_version = 1 - - def __init__(self, branch_data): - self._branch_data = branch_data + def __init__(self, branch): + self._branch = branch @property def allocation(self): @@ -599,7 +599,6 @@ def allocation(self): @allocation.setter def allocation(self, value): if self._allocation != value: - self._file_dirty = True self._allocation = value @property @@ -626,16 +625,18 @@ def num_bytes(self): def serialize(self, out): # superclass TNamed (Model_TNamed(uproot.model.Model)) # superclass TAttFill + self.read_members() key_num_bytes = uproot.reading._key_format_big.size + 6 - name_asbytes = self._branch_data.tree.name.encode(errors="surrogateescape") - title_asbytes = self._branch_data.tree.title.encode(errors="surrogateescape") + name_asbytes = self._branch_data["fName"].encode(errors="surrogateescape") + title_asbytes = self._branch_data["fTitle"].encode(errors="surrogateescape") key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len(name_asbytes) key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len(title_asbytes) any_tbranch_index = len(out) out.append(None) - # if isinstance(self._branch_data, uproot.models.TBranchElement): - + # if 'fClonesName' in self._branch.all_members.keys(): + # out.append(b"TBranchElement\x00") + # else: out.append(b"TBranch\x00") tbranch_index = len(out) @@ -645,21 +646,21 @@ def serialize(self, out): # tbranch_tnamed = self._branch_data['TNamed'].serialize() # ? tbranch_tnamed = uproot.models.TNamed.Model_TNamed.empty() tbranch_tnamed._bases.append(tbranch_tobject) - tbranch_tnamed._members["fTitle"] = self._branch_data.title + tbranch_tnamed._members["fTitle"] = self._branch_data["fTitle"] tbranch_tnamed._serialize( - out, True, self._branch_data.name, numpy.uint32(0x00400000) + out, True, self._branch_data["fName"], numpy.uint32(0x00400000) ) # TAttFill v2, fFillColor: 0, fFillStyle: 1001 # make model TAttFill v2 with fFillColor and fFillStyle tattfill = uproot.models.TAtt.Model_TAttFill_v2.empty() # tattfill._deeply_writable = True # ? - tattfill._members["fFillColor"] = self._branch_data.member("fFillColor") - tattfill._members["fFillStyle"] = self._branch_data.member("fFillStyle") + tattfill._members["fFillColor"] = self._branch_data["fFillColor"] + tattfill._members["fFillStyle"] = self._branch_data["fFillStyle"] out.append(tattfill.serialize(out)) - self._branch_data.members["metadata_start"] = (6 + 6 + 8 + 6) + sum( + self._branch_data["metadata_start"] = (6 + 6 + 8 + 6) + sum( len(x) for x in out if x is not None ) @@ -672,32 +673,48 @@ def serialize(self, out): out.append( uproot.models.TBranch._tbranch13_format1.pack( - self._branch_data.member("fCompress"), - self._branch_data.member("fBasketSize"), - self._branch_data.member("fEntryOffsetLen"), - self._branch_data.member("fWriteBasket"), # fWriteBasket - self._branch_data.member("fEntryNumber"), # fEntryNumber + self._branch_data["fCompress"], + self._branch_data["fBasketSize"], + self._branch_data["fEntryOffsetLen"], + self._branch_data["fWriteBasket"], # fWriteBasket + self._branch_data["fEntryNumber"], # fEntryNumber ) ) # fIOFeatures (TIOFeatures) out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") + # out.append(self._branch_data["fIOFeatures"].serialize()) # 0 to bytestring?? out.append( uproot.models.TBranch._tbranch13_format2.pack( - self._branch_data.member("fOffset"), - self._branch_data.member("fMaxBaskets"), # fMaxBaskets - self._branch_data.member("fSplitLevel"), - self._branch_data.member("fEntries"), # fEntries - self._branch_data.member("fFirstEntry"), - self._branch_data.member("fTotBytes"), - self._branch_data.member("fZipBytes"), + self._branch_data["fOffset"], + self._branch_data["fMaxBaskets"], # fMaxBaskets + self._branch_data["fSplitLevel"], + self._branch_data["fEntries"], # fEntries + self._branch_data["fFirstEntry"], + self._branch_data["fTotBytes"], + self._branch_data["fZipBytes"], ) ) - + # if 'fClonesName' in self._branch.all_members.keys(): # TBranchElement? + # out.append(self._branch.member("fClassName").serialize()) # These three are TStrings + # out.append(self._branch.member("fParentName").serialize()) + # out.append(self._branch.member("fClonesName").serialize()) + # out.append( + # uproot.models.TBranch._tbranchelement10_format1.pack( + # self._branch.member("fCheckSum"), + # self._branch.member("fClassVersion"), + # self._branch.member("fID"), + # self._branch.member("fType"), + # self._branch.member("fStreamerType"), + # self._branch.member("fMaximum"), + # ) + # ) + # out.append(uproot.serialization.serialize_object_any(self._branch.member("fBranchCount"))) + # out.append(uproot.serialization.serialize_object_any(self._branch.member("fBranchCount2"))) # empty TObjArray of TBranches - out.append( - self._branch_data.member("fBranches").serialize( + out.append( # TODO how to handle this? Make sure to be TBranchElements will be handled too + self._branch_data["fBranches"].serialize( out, ) ) @@ -709,20 +726,6 @@ def serialize(self, out): out.append( b"\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00" ) - _dtype_to_char = { - numpy.dtype("bool"): "O", - numpy.dtype(">i1"): "B", - numpy.dtype(">u1"): "b", - numpy.dtype(">i2"): "S", - numpy.dtype(">u2"): "s", - numpy.dtype(">i4"): "I", - numpy.dtype(">u4"): "i", - numpy.dtype(">i8"): "L", - numpy.dtype(">u8"): "l", - numpy.dtype(">f4"): "F", - numpy.dtype(">f8"): "D", - numpy.dtype(">U"): "C", - } absolute_location = key_num_bytes + sum(len(x) for x in out if x is not None) absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) @@ -730,35 +733,45 @@ def serialize(self, out): subany_tleaf_index = len(out) out.append(None) - for leaf in self._branch_data.member("fLeaves"): + for leaf in self._branch_data["fLeaves"]: # Make and serialize each leaf?? - letter_upper = _dtype_to_char[numpy.dtype(">i8")] - out.append(("TLeaf" + letter_upper).encode() + b"\x00") - if letter_upper == "O": + # if isinstance(leaf, model....) + if isinstance(leaf, uproot.models.TLeaf.Model_TLeafO_v1): + letter_upper = "O" special_struct = uproot.models.TLeaf._tleafO1_format1 - elif letter_upper == "B": + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafB_v1): + letter_upper = "B" special_struct = uproot.models.TLeaf._tleafb1_format1 - elif letter_upper == "S": + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafS_v1): + letter_upper = "S" special_struct = uproot.models.TLeaf._tleafs1_format1 - elif letter_upper == "I": + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafI_v1): + letter_upper = "I" special_struct = uproot.models.TLeaf._tleafi1_format1 - elif letter_upper == "G": - special_struct = uproot.models.TLeaf._tleafl1_format0 - elif letter_upper == "L": + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafL_v1): + letter_upper = "L" special_struct = uproot.models.TLeaf._tleafl1_format0 - elif letter_upper == "F": + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafF_v1): + letter_upper = "F" special_struct = uproot.models.TLeaf._tleaff1_format1 - elif letter_upper == "D": + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafD_v1): + letter_upper = "D" special_struct = uproot.models.TLeaf._tleafd1_format1 - elif letter_upper == "C": + elif isinstance(leaf, uproot.models.TLeaf.Model_TLeafC_v1): + letter_upper = "C" special_struct = uproot.models.TLeaf._tleafc1_format1 + # else: # This will never be reached? What to do about G + # letter_upper = "G" + # special_struct = uproot.models.TLeaf._tleafl1_format0 + if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): + special_struct = uproot.models.TLeaf._tleafelement1_format1 + out.append((b"TLeafElement") + b"\x00") + else: + out.append(("TLeaf" + letter_upper).encode() + b"\x00") # single TLeaf - - leaf_name = self._branch_data.member("fName").encode( - errors="surrogateescape" - ) + leaf_name = self._branch_data["fName"].encode(errors="surrogateescape") leaf_title = ( - self._branch_data.member("fLeaves")[0] + self._branch_data["fLeaves"][0] .member("fTitle") .encode(errors="surrogateescape") ) @@ -852,14 +865,21 @@ def serialize(self, out): leaf.member("fLeafCount") # fLeafCount ) ) - - # specialized TLeaf* members (fMinimum, fMaximum) - # datum["tleaf_special_struct"] = special_struct - out.append( - special_struct.pack( - int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) + if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): + out.append( + uproot.models.TLeaf._tleafelement1_format1.pack( + leaf.member("fID"), # fIsRange + leaf.member("fType"), + ) + ) + else: + # specialized TLeaf* members (fMinimum, fMaximum) + # datum["tleaf_special_struct"] = special_struct + out.append( + special_struct.pack( + int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) + ) ) - ) out[subany_tleaf_index] = ( uproot.serialization._serialize_object_any_format1.pack( @@ -877,30 +897,38 @@ def serialize(self, out): # empty TObjArray of fBaskets (embedded) # TODO "fBranches, which is a TObjArray of nested TBranch instances (possibly TBranchElement)" + if len(self._branch_data["fBaskets"]) != 1: + # print(len(self._branch_data["fBaskets"])) + raise NotImplementedError + + # out.append( + # self._branch_data["fBaskets"].serialize( + # out, + # ) + # ) + out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) - # out.append(self._branch_data.member("")) - - # assert sum(1 if x is None else 0 for x in out) == 4 - self._branch_data.members["basket_metadata_start"] = (6 + 6 + 8 + 6) + sum( + assert sum(1 if x is None else 0 for x in out) == 4 + self._branch_data["basket_metadata_start"] = (6 + 6 + 8 + 6) + sum( len(x) for x in out if x is not None - ) # ? + ) # speedbump and fBasketBytes out.append(b"\x01") - out.append(uproot._util.tobytes(self._branch_data.member("fBasketBytes"))) + out.append(uproot._util.tobytes(self._branch_data["fBasketBytes"])) # speedbump and fBasketEntry out.append(b"\x01") - out.append(uproot._util.tobytes(self._branch_data.member("fBasketEntry"))) + out.append(uproot._util.tobytes(self._branch_data["fBasketEntry"])) # speedbump and fBasketSeek out.append(b"\x01") - out.append(uproot._util.tobytes(self._branch_data.member("fBasketSeek"))) + out.append(uproot._util.tobytes(self._branch_data["fBasketSeek"])) - self._branch_data.member("fFileName").serialize() # name = None? + out.append(self._branch_data["fFileName"].serialize()) # name = None? out[tbranch_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch @@ -915,7 +943,42 @@ def serialize(self, out): ) return out, tleaf_reference_number - # def write(self, ) + def read_members(self): + self._branch_data = {} + self._branch_data["fTitle"] = self._branch.member("fTitle") + self._branch_data["fName"] = self._branch.member("fName") + self._branch_data["fFillColor"] = self._branch.member("fFillColor") + self._branch_data["fFillStyle"] = self._branch.member("fFillStyle") + try: + self._branch_data["fIOFeatures"] = self._branch.member("fIOFeatures") + except KeyError: + self._branch_data["fIOFeatures"] = 0 # ? self._branch_member("fIOFeatures") + self._branch_data["fCompress"] = self._branch.member("fCompress") + self._branch_data["fBasketSize"] = self._branch.member("fBasketSize") + self._branch_data["fEntryOffsetLen"] = self._branch.member("fEntryOffsetLen") + self._branch_data["fWriteBasket"] = self._branch.member("fWriteBasket") + self._branch_data["fEntryNumber"] = self._branch.member("fEntryNumber") + self._branch_data["fOffset"] = self._branch.member("fOffset") + self._branch_data["fMaxBaskets"] = self._branch.member("fMaxBaskets") + self._branch_data["fSplitLevel"] = self._branch.member("fSplitLevel") + self._branch_data["fEntries"] = self._branch.member("fEntries") + try: + self._branch_data["fFirstEntry"] = self._branch.member("fFirstEntry") + except KeyError: + self._branch_data["fFirstEntry"] = 0 + self._branch_data["fTotBytes"] = self._branch.member("fTotBytes") + self._branch_data["fZipBytes"] = self._branch.member("fZipBytes") + self._branch_data["fLeaves"] = self._branch.member("fLeaves") + self._branch_data["fBaskets"] = self._branch.member("fBaskets") + self._branch_data["fBranches"] = self._branch.member("fBranches") + self._branch_data["fBasketBytes"] = self._branch.member("fBasketBytes") + self._branch_data["fBasketEntry"] = self._branch.member("fBasketEntry") + self._branch_data["fBasketSeek"] = self._branch.member("fBasketSeek") + self._branch_data["fFileName"] = self._branch.member("fFileName") + + def serialize_leaf_elements(self, out, special_struct): + # specialized TLeaf* members (fMinimum, fMaximum) + out.append(special_struct.pack(0, 0)) class FreeSegments(CascadeNode): @@ -2049,6 +2112,7 @@ def add_tree( initial_basket_capacity, resize_factor, existing_branches=None, + new_branches=None, ): import uproot.writing._cascadetree @@ -2065,6 +2129,7 @@ def add_tree( existing_branches, ) tree.write_anew(sink) + tree.add_data(sink, new_branches) return tree def add_rntuple(self, sink, name, title, akform): diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 7ba982e13..ec57ac88a 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -1597,6 +1597,287 @@ def write_string_basket(self, sink, branch_name, compression, array, offsets): return fKeylen + fObjlen, fNbytes, location + def add_data(self, file, sink, data): + # do checks before getting here...easier + # add to a single branch? + # remember not to alter data! + if self._num_baskets >= self._basket_capacity - 1: + self._basket_capacity = max( + self._basket_capacity + 1, + int(math.ceil(self._basket_capacity * self._resize_factor)), + ) + + for datum in self._branch_data: + if datum["kind"] == "record": + continue + + fBasketBytes = datum["fBasketBytes"] + fBasketEntry = datum["fBasketEntry"] + fBasketSeek = datum["fBasketSeek"] + datum["fBasketBytes"] = numpy.zeros( + self._basket_capacity, uproot.models.TBranch._tbranch13_dtype1 + ) + datum["fBasketEntry"] = numpy.zeros( + self._basket_capacity, uproot.models.TBranch._tbranch13_dtype2 + ) + datum["fBasketSeek"] = numpy.zeros( + self._basket_capacity, uproot.models.TBranch._tbranch13_dtype3 + ) + datum["fBasketBytes"][: len(fBasketBytes)] = fBasketBytes + datum["fBasketEntry"][: len(fBasketEntry)] = fBasketEntry + datum["fBasketSeek"][: len(fBasketSeek)] = fBasketSeek + datum["fBasketEntry"][len(fBasketEntry)] = self._num_entries + + oldloc = start = self._key.location + stop = start + self._key.num_bytes + self._key.compressed_bytes + + self.write_anew(sink) + + newloc = self._key.seek_location + file._move_tree(oldloc, newloc) + + self._freesegments.release(start, stop) + sink.set_file_length(self._freesegments.fileheader.end) + sink.flush() + + provided = None + + if isinstance(data, numpy.ndarray) and data.dtype.fields is not None: + provided = recarray_to_dict(data) + + if ( + provided is None + and not isinstance(data, Mapping) + or not all(isinstance(x, str) for x in data) + ): + raise TypeError("'add' requires a mapping from branch name (str) to arrays") + + # Awkward may be impossible + if uproot._util.from_module(data, "awkward"): + try: + awkward = uproot.extras.awkward() + except ModuleNotFoundError as err: + raise TypeError( + f"an Awkward Array was provided, but 'awkward' cannot be imported: {data!r}" + ) from err + + if isinstance(data, awkward.Array): + if data.ndim > 1 and not data.layout.purelist_isregular: + provided = { + self._counter_name(""): numpy.asarray( + awkward.num(data, axis=1), dtype=">u4" + ) + } + else: + provided = {} + for k, v in zip(awkward.fields(data), awkward.unzip(data)): + provided[k] = v + actual_branches = {} + + for name in provided: + if datum["fName"] in provided: + actual_branches[datum["fName"]] = provided.pop(name) + else: + raise ValueError( + "'extend' must be given an array for every branch; missing {}".format( + repr(datum["fName"]) + ) + ) + + if len(provided) != 0: + raise ValueError( + "'extend' was given data that do not correspond to any branch: {}".format( + ", ".join(repr(x) for x in provided) + ) + ) + + tofill = [] + num_entries = None + for branch_name, branch_array in actual_branches.items(): + # if num_entries is None: + # num_entries = len(branch_array) + # elif num_entries != len(branch_array): + # raise ValueError( + # f"'extend' must fill every branch with the same number of entries; {branch_name!r} has {len(branch_array)} entries" + # ) + + # datum = self._branch_data[self._branch_lookup[branch_name]] + # if datum["kind"] == "record": + # continue + + if datum["counter"] is None: + if datum["dtype"] == ">U0": + lengths = numpy.asarray(awkward.num(branch_array.layout)) + which_big = lengths >= 255 + + lengths_extension_offsets = numpy.empty( + len(branch_array.layout) + 1, numpy.int64 + ) + lengths_extension_offsets[0] = 0 + numpy.cumsum(which_big * 4, out=lengths_extension_offsets[1:]) + + lengths_extension = awkward.contents.ListOffsetArray( + awkward.index.Index64(lengths_extension_offsets), + awkward.contents.NumpyArray( + lengths[which_big].astype(">u4").view("u1") + ), + ) + + lengths[which_big] = 255 + + leafc_data_awkward = awkward.concatenate( + [ + lengths.reshape(-1, 1).astype("u1"), + lengths_extension, + awkward.without_parameters(branch_array.layout), + ], + axis=1, + ) + + big_endian = numpy.asarray(awkward.flatten(leafc_data_awkward)) + big_endian_offsets = ( + lengths_extension_offsets + + numpy.asarray(branch_array.layout.offsets) + + numpy.arange(len(branch_array.layout.offsets)) + ).astype(">i4", copy=True) + tofill.append( + ( + branch_name, + datum["compression"], + big_endian, + big_endian_offsets, + ) + ) + else: + big_endian = uproot._util.ensure_numpy(branch_array).astype( + datum["dtype"] + ) + if big_endian.shape != (len(branch_array),) + datum["shape"]: + raise ValueError( + "'extend' must fill branches with a consistent shape: has {}, trying to fill with {}".format( + datum["shape"], + big_endian.shape[1:], + ) + ) + tofill.append((branch_name, datum["compression"], big_endian, None)) + if datum["kind"] == "counter": + datum["tleaf_maximum_value"] = max( + big_endian.max(), datum["tleaf_maximum_value"] + ) + + else: + try: + awkward = uproot.extras.awkward() + except ModuleNotFoundError as err: + raise TypeError( + f"a jagged array was provided (possibly as an iterable), but 'awkward' cannot be imported: {branch_name}: {branch_array!r}" + ) from err + layout = branch_array.layout + while not isinstance(layout, awkward.contents.ListOffsetArray): + if isinstance(layout, awkward.contents.IndexedArray): + layout = layout.project() + + elif isinstance(layout, awkward.contents.ListArray): + layout = layout.to_ListOffsetArray64(False) + + else: + raise AssertionError( + "how did this pass the type check?\n\n" + repr(layout) + ) + + content = layout.content + offsets = numpy.asarray(layout.offsets) + + if offsets[0] != 0: + content = content[offsets[0] :] + offsets = offsets - offsets[0] + if len(content) > offsets[-1]: + content = content[: offsets[-1]] + + shape = [len(content)] + while not isinstance(content, awkward.contents.NumpyArray): + if isinstance(content, awkward.contents.IndexedArray): + content = content.project() + + elif isinstance(content, awkward.contents.EmptyArray): + content = content.to_NumpyArray(dtype=numpy.float64) + + elif isinstance(content, awkward.contents.RegularArray): + shape.append(content.size) + content = content.content + + else: + raise AssertionError( + "how did this pass the type check?\n\n" + repr(content) + ) + + big_endian = numpy.asarray(content.data, dtype=datum["dtype"]) + shape = tuple(shape) + big_endian.shape[1:] + + if shape[1:] != datum["shape"]: + raise ValueError( + "'extend' must fill branches with a consistent shape: has {}, trying to fill with {}".format( + datum["shape"], + shape[1:], + ) + ) + big_endian_offsets = offsets.astype(">i4", copy=True) + + tofill.append( + ( + branch_name, + datum["compression"], + big_endian.reshape(-1), + big_endian_offsets, + ) + ) + + # actually write baskets into the file + uncompressed_bytes = 0 + compressed_bytes = 0 + for branch_name, compression, big_endian, big_endian_offsets in tofill: + datum = self._branch_data[self._branch_lookup[branch_name]] + + if datum["dtype"] == ">U0": + totbytes, zipbytes, location = self.write_string_basket( + sink, branch_name, compression, big_endian, big_endian_offsets + ) + datum["fEntryOffsetLen"] = 4 * (len(big_endian_offsets) - 1) + + elif big_endian_offsets is None: + totbytes, zipbytes, location = self.write_np_basket( + sink, branch_name, compression, big_endian + ) + else: + totbytes, zipbytes, location = self.write_jagged_basket( + sink, branch_name, compression, big_endian, big_endian_offsets + ) + datum["fEntryOffsetLen"] = 4 * (len(big_endian_offsets) - 1) + uncompressed_bytes += totbytes + compressed_bytes += zipbytes + + datum["fTotBytes"] += totbytes + datum["fZipBytes"] += zipbytes + + datum["fBasketBytes"][self._num_baskets] = zipbytes + + if self._num_baskets + 1 < self._basket_capacity: + fBasketEntry = datum["fBasketEntry"] + i = self._num_baskets + fBasketEntry[i + 1] = num_entries + fBasketEntry[i] + + datum["fBasketSeek"][self._num_baskets] = location + + datum["arrays_write_stop"] = self._num_baskets + 1 + + # update TTree metadata in file + self._num_entries += num_entries + self._num_baskets += 1 + self._metadata["fTotBytes"] += uncompressed_bytes + self._metadata["fZipBytes"] += compressed_bytes + + self.write_updates(sink) + _tbasket_offsets_length = struct.Struct(">I") diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 49fe19a40..dca7088bd 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1344,10 +1344,10 @@ def mktree( return tree - def add_branches( # my own variation of mktree + def add( # my own variation of mktree self, name, - branch_types, + branches, source, title="", *, @@ -1355,7 +1355,6 @@ def add_branches( # my own variation of mktree field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner, initial_basket_capacity=10, resize_factor=10.0, - # new_branch, ): """ Args: @@ -1374,27 +1373,35 @@ def add_branches( # my own variation of mktree """ if self._file.sink.closed: raise ValueError("cannot create a TTree in a closed file") - if not isinstance(source, uproot.TTree): + + try: + file = uproot.open(self.file_path, minimal_ttree_metadata=False) + old_ttree = file[source] + except ValueError: + msg = f"TTree {source} not found in file {self.file}" + raise ValueError(msg) from None + if not isinstance(old_ttree, uproot.TTree): raise TypeError("'source' must be a TTree") # ? - names = source.keys() + names = old_ttree.keys() if len(names) == 0: raise ValueError( - f"""TTree {source.name} in file {source.file_path} is empty.""" + f"""TTree {old_ttree.name} in file {old_ttree.file_path} is empty.""" ) - # names.append(new_branch.name) # May need the TKey? (uproot.reading.ReadOnlyKey) - try: # Will this throw an error? proabably? - at = source.name.rindex("/") + at = old_ttree.name.rindex("/") except ValueError: - treename = source.name + treename = old_ttree.name directory = self else: - dirpath, treename = source.name[:at], source.name[at + 1 :] + dirpath, treename = old_ttree.name[:at], old_ttree.name[at + 1 :] directory = self.mkdir(dirpath) path = (*directory._path, treename) + # Make branch types? + branch_types = {key: type(data) for key, data in branches.items} + tree = WritableTree( path, directory._file, @@ -1407,7 +1414,8 @@ def add_branches( # my own variation of mktree field_name, initial_basket_capacity, resize_factor, - source.branches, + old_ttree.branches, + branches, ), ) directory._file._new_tree(tree) @@ -1651,6 +1659,9 @@ def update(self, pairs=None, **more_pairs): self._file._cascading.streamers.update_streamers(self._file.sink, streamers) +# class UpdatableTree: + + class WritableTree: """ Args: @@ -1956,6 +1967,12 @@ def show( stream=stream, ) + def add_data( + self, data, **more_data + ): # Eventually... def add(self, as_dict=None, **as_kwds): + # data must be a dict, + self._cascading.add_data(self._file, self._file.sink, data) + class WritableBranch: """ diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py new file mode 100644 index 000000000..7feb58720 --- /dev/null +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -0,0 +1,42 @@ +import uproot +from skhep_testdata import data_path +import uproot.writing.writable +# import ROOT +import numpy as np + +import awkward as ak + +def test_vector(): + with uproot.update("/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root") as write: + write.add_branches("tree1", {"branch": int}, source='t') + + with uproot.open("/Users/zobil/Documents/samples/uproot-vectorVectorDouble.root") as read: + print(read["t"]["x"].arrays()) + + with uproot.open("/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root") as read: + print(read["tree1"]) + # print(read["tree1"].all_members) + # print(read["tree1"]["x"].all_members) + # print(read["tree1"]["x"].member("fLeaves")[0]) + + +def simple_test(): + with uproot.recreate("arrays.root") as file: + file['tree'] = {"b1": [1,2,3], "b2": [2,3,4]} + + with uproot.recreate("arrays_check.root") as file: + file['tree'] = {"b1": [1,2,3], "b2": [2,3,4]} + + with uproot.open("arrays.root", minimal_ttree_metadata=False) as read: + print(read['tree']['b1'].all_members) + + with uproot.update("arrays.root") as write: + write.add("tree", {"b3": [5,6,7]}, source='tree') + + with uproot.open("arrays.root") as new: + print(new['tree'].keys()) + print(new['tree'].member("fBranches")) + + # for key in + +simple_test() \ No newline at end of file From 16544e07ae47cae6cac2802513b566f9d0939155 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 17:27:07 +0000 Subject: [PATCH 03/20] style: pre-commit fixes --- tests/test_1155_feat_add_copy_ttree.py | 37 ++++++++++++++++---------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 7feb58720..670d96de8 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -1,19 +1,27 @@ import uproot from skhep_testdata import data_path import uproot.writing.writable + # import ROOT import numpy as np import awkward as ak -def test_vector(): - with uproot.update("/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root") as write: - write.add_branches("tree1", {"branch": int}, source='t') - with uproot.open("/Users/zobil/Documents/samples/uproot-vectorVectorDouble.root") as read: +def test_vector(): + with uproot.update( + "/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root" + ) as write: + write.add_branches("tree1", {"branch": int}, source="t") + + with uproot.open( + "/Users/zobil/Documents/samples/uproot-vectorVectorDouble.root" + ) as read: print(read["t"]["x"].arrays()) - with uproot.open("/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root") as read: + with uproot.open( + "/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root" + ) as read: print(read["tree1"]) # print(read["tree1"].all_members) # print(read["tree1"]["x"].all_members) @@ -22,21 +30,22 @@ def test_vector(): def simple_test(): with uproot.recreate("arrays.root") as file: - file['tree'] = {"b1": [1,2,3], "b2": [2,3,4]} + file["tree"] = {"b1": [1, 2, 3], "b2": [2, 3, 4]} with uproot.recreate("arrays_check.root") as file: - file['tree'] = {"b1": [1,2,3], "b2": [2,3,4]} - + file["tree"] = {"b1": [1, 2, 3], "b2": [2, 3, 4]} + with uproot.open("arrays.root", minimal_ttree_metadata=False) as read: - print(read['tree']['b1'].all_members) + print(read["tree"]["b1"].all_members) with uproot.update("arrays.root") as write: - write.add("tree", {"b3": [5,6,7]}, source='tree') + write.add("tree", {"b3": [5, 6, 7]}, source="tree") with uproot.open("arrays.root") as new: - print(new['tree'].keys()) - print(new['tree'].member("fBranches")) + print(new["tree"].keys()) + print(new["tree"].member("fBranches")) + + # for key in - # for key in -simple_test() \ No newline at end of file +simple_test() From 0d91275c7eaadf46dab13bfd2984635846cda5b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Thu, 18 Apr 2024 11:13:08 +0200 Subject: [PATCH 04/20] Progress adding branches filled with data, have to fix some problems with copying jagged branches --- src/uproot/models/TBranch.py | 1 + src/uproot/writing/_cascade.py | 219 ++++++++++++++----------- src/uproot/writing/_cascadetree.py | 122 +++++++++----- src/uproot/writing/writable.py | 93 ++++++++++- tests/test_1155_feat_add_copy_ttree.py | 77 +++++++-- 5 files changed, 365 insertions(+), 147 deletions(-) diff --git a/src/uproot/models/TBranch.py b/src/uproot/models/TBranch.py index 9f7466ebf..41243ad96 100644 --- a/src/uproot/models/TBranch.py +++ b/src/uproot/models/TBranch.py @@ -14,6 +14,7 @@ import uproot import uproot.models.TH +import uproot.models.TObjArray _tbranch10_format1 = struct.Struct(">iiiiqiIiqqq") _tbranch10_dtype1 = numpy.dtype(">i4") diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 2d8beb965..1daf82d8e 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -30,6 +30,7 @@ import uproot.compression import uproot.const +import uproot.deserialization import uproot.models.TBranch import uproot.models.TLeaf import uproot.models.TList @@ -102,7 +103,6 @@ def write(self, sink): + repr(self) ) tmp = self.serialize() - # print(f"writing {self._location}:{self._location + len(tmp)} ({len(tmp)}) {type(self).__name__} {self.name if hasattr(self, 'name') else ''} {self.title if hasattr(self, 'title') else ''}") sink.write(self._location, tmp) self._file_dirty = False @@ -582,13 +582,14 @@ def deserialize(cls, raw_bytes, location, num_bytes, num_slices, in_path): return out -class OldBranch(CascadeLeaf): # Branch or branches? +class OldBranches(CascadeLeaf): """ A :doc:`uproot.writing._cascade.CascadeLeaf` for copying an old TBranch to a new TTree. ? """ - def __init__(self, branch): - self._branch = branch + def __init__(self, branches): + self._branches = branches + self._branch_data = {} @property def allocation(self): @@ -622,13 +623,14 @@ def num_bytes(self): return total - def serialize(self, out): + def serialize(self, out, branch): # superclass TNamed (Model_TNamed(uproot.model.Model)) # superclass TAttFill - self.read_members() + self.read_members(branch) + datum = self._branch_data[branch.member("fName")] key_num_bytes = uproot.reading._key_format_big.size + 6 - name_asbytes = self._branch_data["fName"].encode(errors="surrogateescape") - title_asbytes = self._branch_data["fTitle"].encode(errors="surrogateescape") + name_asbytes = datum["fName"].encode(errors="surrogateescape") + title_asbytes = datum["fTitle"].encode(errors="surrogateescape") key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len(name_asbytes) key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len(title_asbytes) @@ -642,25 +644,22 @@ def serialize(self, out): tbranch_index = len(out) out.append(None) - tbranch_tobject = uproot.models.TObject.Model_TObject.empty() # ? - # tbranch_tnamed = self._branch_data['TNamed'].serialize() # ? + tbranch_tobject = uproot.models.TObject.Model_TObject.empty() tbranch_tnamed = uproot.models.TNamed.Model_TNamed.empty() tbranch_tnamed._bases.append(tbranch_tobject) - tbranch_tnamed._members["fTitle"] = self._branch_data["fTitle"] - tbranch_tnamed._serialize( - out, True, self._branch_data["fName"], numpy.uint32(0x00400000) - ) + tbranch_tnamed._members["fTitle"] = datum["fTitle"] + tbranch_tnamed._serialize(out, True, datum["fName"], numpy.uint32(0x00400000)) # TAttFill v2, fFillColor: 0, fFillStyle: 1001 # make model TAttFill v2 with fFillColor and fFillStyle tattfill = uproot.models.TAtt.Model_TAttFill_v2.empty() # tattfill._deeply_writable = True # ? - tattfill._members["fFillColor"] = self._branch_data["fFillColor"] - tattfill._members["fFillStyle"] = self._branch_data["fFillStyle"] + tattfill._members["fFillColor"] = datum["fFillColor"] + tattfill._members["fFillStyle"] = datum["fFillStyle"] out.append(tattfill.serialize(out)) - self._branch_data["metadata_start"] = (6 + 6 + 8 + 6) + sum( + datum["metadata_start"] = (6 + 6 + 8 + 6) + sum( len(x) for x in out if x is not None ) @@ -669,15 +668,13 @@ def serialize(self, out): # Without this, when small buffers are left uncompressed, ROOT complains about them not being compressed. # (I don't know where the "no, really, this is uncompressed" bit is.) - # Have to actually make something for if there's a TBranchElement!! - out.append( uproot.models.TBranch._tbranch13_format1.pack( - self._branch_data["fCompress"], - self._branch_data["fBasketSize"], - self._branch_data["fEntryOffsetLen"], - self._branch_data["fWriteBasket"], # fWriteBasket - self._branch_data["fEntryNumber"], # fEntryNumber + datum["fCompress"], + datum["fBasketSize"], + datum["fEntryOffsetLen"], + datum["fWriteBasket"], # fWriteBasket + datum["fEntryNumber"], # fEntryNumber ) ) @@ -685,18 +682,19 @@ def serialize(self, out): out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") # out.append(self._branch_data["fIOFeatures"].serialize()) # 0 to bytestring?? + out.append( uproot.models.TBranch._tbranch13_format2.pack( - self._branch_data["fOffset"], - self._branch_data["fMaxBaskets"], # fMaxBaskets - self._branch_data["fSplitLevel"], - self._branch_data["fEntries"], # fEntries - self._branch_data["fFirstEntry"], - self._branch_data["fTotBytes"], - self._branch_data["fZipBytes"], + datum["fOffset"], + datum["fMaxBaskets"], # fMaxBaskets + datum["fSplitLevel"], + datum["fEntries"], # fEntries + datum["fFirstEntry"], + datum["fTotBytes"], + datum["fZipBytes"], ) ) - # if 'fClonesName' in self._branch.all_members.keys(): # TBranchElement? + # if 'fClonesName' in self._branch.all_members.keys(): # TBranchElement - find a more robust way to check....or make sure this can't be misleading # out.append(self._branch.member("fClassName").serialize()) # These three are TStrings # out.append(self._branch.member("fParentName").serialize()) # out.append(self._branch.member("fClonesName").serialize()) @@ -713,8 +711,9 @@ def serialize(self, out): # out.append(uproot.serialization.serialize_object_any(self._branch.member("fBranchCount"))) # out.append(uproot.serialization.serialize_object_any(self._branch.member("fBranchCount2"))) # empty TObjArray of TBranches + out.append( # TODO how to handle this? Make sure to be TBranchElements will be handled too - self._branch_data["fBranches"].serialize( + datum["fBranches"].serialize( out, ) ) @@ -729,11 +728,11 @@ def serialize(self, out): absolute_location = key_num_bytes + sum(len(x) for x in out if x is not None) absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) - tleaf_reference_number = absolute_location + 2 + datum["tleaf_reference_number"] = absolute_location + 2 subany_tleaf_index = len(out) out.append(None) - for leaf in self._branch_data["fLeaves"]: + for leaf in datum["fLeaves"]: # Make and serialize each leaf?? # if isinstance(leaf, model....) if isinstance(leaf, uproot.models.TLeaf.Model_TLeafO_v1): @@ -769,11 +768,9 @@ def serialize(self, out): else: out.append(("TLeaf" + letter_upper).encode() + b"\x00") # single TLeaf - leaf_name = self._branch_data["fName"].encode(errors="surrogateescape") + leaf_name = datum["fName"].encode(errors="surrogateescape") leaf_title = ( - self._branch_data["fLeaves"][0] - .member("fTitle") - .encode(errors="surrogateescape") + datum["fLeaves"][0].member("fTitle").encode(errors="surrogateescape") ) leaf_name_length = (1 if len(leaf_name) < 255 else 5) + len(leaf_name) leaf_title_length = (1 if len(leaf_title) < 255 else 5) + len(leaf_title) @@ -851,6 +848,7 @@ def serialize(self, out): ) ) + # generic TLeaf members out.append( uproot.models.TLeaf._tleaf2_format0.pack( leaf.member("fLen"), @@ -860,26 +858,34 @@ def serialize(self, out): leaf.member("fIsUnsigned"), ) ) - out.append( - uproot.serialization.serialize_object_any( - leaf.member("fLeafCount") # fLeafCount - ) - ) - if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): + if leaf.member("fLeafCount") is not None: out.append( - uproot.models.TLeaf._tleafelement1_format1.pack( - leaf.member("fID"), # fIsRange - leaf.member("fType"), + uproot.deserialization._read_object_any_format1.pack( + self._branch_data[ + branch.member("fLeaves")[0] + .member("fLeafCount") + .member("fName") + ]["tleaf_reference_number"] ) ) else: - # specialized TLeaf* members (fMinimum, fMaximum) - # datum["tleaf_special_struct"] = special_struct - out.append( - special_struct.pack( - int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) - ) + out.append(b"\x00\x00\x00\x00") + + # if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): + # out.append( + # uproot.models.TLeaf._tleafelement1_format1.pack( + # leaf.member("fID"), # fIsRange + # leaf.member("fType"), + # ) + # ) + # else: + # specialized TLeaf* members (fMinimum, fMaximum) + # datum["tleaf_special_struct"] = special_struct + out.append( + special_struct.pack( + int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) ) + ) out[subany_tleaf_index] = ( uproot.serialization._serialize_object_any_format1.pack( @@ -897,9 +903,8 @@ def serialize(self, out): # empty TObjArray of fBaskets (embedded) # TODO "fBranches, which is a TObjArray of nested TBranch instances (possibly TBranchElement)" - if len(self._branch_data["fBaskets"]) != 1: - # print(len(self._branch_data["fBaskets"])) - raise NotImplementedError + # if len(self._branch_data["fBaskets"]) != 1: + # raise NotImplementedError # out.append( # self._branch_data["fBaskets"].serialize( @@ -912,23 +917,25 @@ def serialize(self, out): ) assert sum(1 if x is None else 0 for x in out) == 4 - self._branch_data["basket_metadata_start"] = (6 + 6 + 8 + 6) + sum( + datum["basket_metadata_start"] = (6 + 6 + 8 + 6) + sum( len(x) for x in out if x is not None ) # speedbump and fBasketBytes out.append(b"\x01") - out.append(uproot._util.tobytes(self._branch_data["fBasketBytes"])) + out.append(uproot._util.tobytes(datum["fBasketBytes"])) # speedbump and fBasketEntry out.append(b"\x01") - out.append(uproot._util.tobytes(self._branch_data["fBasketEntry"])) + out.append(uproot._util.tobytes(datum["fBasketEntry"])) # speedbump and fBasketSeek out.append(b"\x01") - out.append(uproot._util.tobytes(self._branch_data["fBasketSeek"])) + out.append(uproot._util.tobytes(datum["fBasketSeek"])) + + # out.append(datum["fFileName"].serialize()) # name = None? - out.append(self._branch_data["fFileName"].serialize()) # name = None? + out.append(b"\x00") out[tbranch_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch @@ -941,40 +948,41 @@ def serialize(self, out): uproot.const.kNewClassTag, ) ) - return out, tleaf_reference_number + return out - def read_members(self): - self._branch_data = {} - self._branch_data["fTitle"] = self._branch.member("fTitle") - self._branch_data["fName"] = self._branch.member("fName") - self._branch_data["fFillColor"] = self._branch.member("fFillColor") - self._branch_data["fFillStyle"] = self._branch.member("fFillStyle") + def read_members(self, branch): + name = branch.member("fName") + self._branch_data[name] = {} + self._branch_data[name]["fTitle"] = branch.member("fTitle") + self._branch_data[name]["fName"] = branch.member("fName") + self._branch_data[name]["fFillColor"] = branch.member("fFillColor") + self._branch_data[name]["fFillStyle"] = branch.member("fFillStyle") try: - self._branch_data["fIOFeatures"] = self._branch.member("fIOFeatures") + self._branch_data[name]["fIOFeatures"] = branch.member("fIOFeatures") except KeyError: - self._branch_data["fIOFeatures"] = 0 # ? self._branch_member("fIOFeatures") - self._branch_data["fCompress"] = self._branch.member("fCompress") - self._branch_data["fBasketSize"] = self._branch.member("fBasketSize") - self._branch_data["fEntryOffsetLen"] = self._branch.member("fEntryOffsetLen") - self._branch_data["fWriteBasket"] = self._branch.member("fWriteBasket") - self._branch_data["fEntryNumber"] = self._branch.member("fEntryNumber") - self._branch_data["fOffset"] = self._branch.member("fOffset") - self._branch_data["fMaxBaskets"] = self._branch.member("fMaxBaskets") - self._branch_data["fSplitLevel"] = self._branch.member("fSplitLevel") - self._branch_data["fEntries"] = self._branch.member("fEntries") + self._branch_data[name]["fIOFeatures"] = 0 # ? branch_member("fIOFeatures") + self._branch_data[name]["fCompress"] = branch.member("fCompress") + self._branch_data[name]["fBasketSize"] = branch.member("fBasketSize") + self._branch_data[name]["fEntryOffsetLen"] = branch.member("fEntryOffsetLen") + self._branch_data[name]["fWriteBasket"] = branch.member("fWriteBasket") + self._branch_data[name]["fEntryNumber"] = branch.member("fEntryNumber") + self._branch_data[name]["fOffset"] = branch.member("fOffset") + self._branch_data[name]["fMaxBaskets"] = branch.member("fMaxBaskets") + self._branch_data[name]["fSplitLevel"] = branch.member("fSplitLevel") + self._branch_data[name]["fEntries"] = branch.member("fEntries") try: - self._branch_data["fFirstEntry"] = self._branch.member("fFirstEntry") + self._branch_data[name]["fFirstEntry"] = branch.member("fFirstEntry") except KeyError: - self._branch_data["fFirstEntry"] = 0 - self._branch_data["fTotBytes"] = self._branch.member("fTotBytes") - self._branch_data["fZipBytes"] = self._branch.member("fZipBytes") - self._branch_data["fLeaves"] = self._branch.member("fLeaves") - self._branch_data["fBaskets"] = self._branch.member("fBaskets") - self._branch_data["fBranches"] = self._branch.member("fBranches") - self._branch_data["fBasketBytes"] = self._branch.member("fBasketBytes") - self._branch_data["fBasketEntry"] = self._branch.member("fBasketEntry") - self._branch_data["fBasketSeek"] = self._branch.member("fBasketSeek") - self._branch_data["fFileName"] = self._branch.member("fFileName") + self._branch_data[name]["fFirstEntry"] = 0 + self._branch_data[name]["fTotBytes"] = branch.member("fTotBytes") + self._branch_data[name]["fZipBytes"] = branch.member("fZipBytes") + self._branch_data[name]["fLeaves"] = branch.member("fLeaves") + self._branch_data[name]["fBaskets"] = branch.member("fBaskets") + self._branch_data[name]["fBranches"] = branch.member("fBranches") + self._branch_data[name]["fBasketBytes"] = branch.member("fBasketBytes") + self._branch_data[name]["fBasketEntry"] = branch.member("fBasketEntry") + self._branch_data[name]["fBasketSeek"] = branch.member("fBasketSeek") + self._branch_data[name]["fFileName"] = branch.member("fFileName") def serialize_leaf_elements(self, out, special_struct): # specialized TLeaf* members (fMinimum, fMaximum) @@ -2111,6 +2119,33 @@ def add_tree( field_name, initial_basket_capacity, resize_factor, + ): + import uproot.writing._cascadetree + + tree = uproot.writing._cascadetree.Tree( + self, + name, + title, + branch_types, + self._freesegments, + counter_name, + field_name, + initial_basket_capacity, + resize_factor, + ) + tree.write_anew(sink) + return tree + + def copy_tree( + self, + sink, + name, + title, + branch_types, + counter_name, + field_name, + initial_basket_capacity, + resize_factor, existing_branches=None, new_branches=None, ): @@ -2129,7 +2164,7 @@ def add_tree( existing_branches, ) tree.write_anew(sink) - tree.add_data(sink, new_branches) + tree.add_data(sink._file, sink, new_branches) return tree def add_rntuple(self, sink, name, title, akform): diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index ec57ac88a..afbe76abb 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -100,12 +100,12 @@ def __init__( branch_types_items = branch_types.items() else: branch_types_items = branch_types - if len(branch_types) == 0: raise ValueError("TTree must have at least one branch") self._branch_data = [] self._branch_lookup = {} + for branch_name, branch_type in branch_types_items: branch_dict = None branch_dtype = None @@ -123,7 +123,6 @@ def __init__( if isinstance(branch_type, str) and branch_type.strip() == "bytes": raise TypeError branch_dtype = numpy.dtype(branch_type) - except TypeError as err: try: awkward = uproot.extras.awkward() @@ -149,7 +148,6 @@ def __init__( branch_datashape = branch_datashape.content branch_dtype = self._branch_ak_to_np(branch_datashape) - if branch_dict is not None: if branch_name not in self._branch_lookup: self._branch_lookup[branch_name] = len(self._branch_data) @@ -173,7 +171,6 @@ def __init__( self._branch_data.append( self._branch_np(subname, content, dtype) ) - elif branch_dtype is not None: if branch_name not in self._branch_lookup: self._branch_lookup[branch_name] = len(self._branch_data) @@ -205,6 +202,7 @@ def __init__( counter = self._branch_np( counter_name, counter_dtype, counter_dtype, kind="counter" ) + if counter_name in self._branch_lookup: # counters always replace non-counters del self._branch_data[self._branch_lookup[counter_name]] @@ -338,6 +336,7 @@ def _branch_ak_to_np(self, branch_datashape): def _branch_np( self, branch_name, branch_type, branch_dtype, counter=None, kind="normal" ): + branch_dtype = branch_dtype.newbyteorder(">") if branch_dtype.subdtype is None: @@ -346,6 +345,7 @@ def _branch_np( branch_dtype, branch_shape = branch_dtype.subdtype letter = _dtype_to_char.get(branch_dtype) + if letter is None: raise TypeError(f"cannot write NumPy dtype {branch_dtype} in TTree") @@ -468,7 +468,6 @@ def extend(self, file, sink, data): for datum in self._branch_data: if datum["kind"] == "record": continue - fBasketBytes = datum["fBasketBytes"] fBasketEntry = datum["fBasketEntry"] fBasketSeek = datum["fBasketSeek"] @@ -596,7 +595,6 @@ def extend(self, file, sink, data): f"branch {kk!r} provided both as an explicit array and generated as a counter, and they disagree" ) provided[k] = v - actual_branches = {} for datum in self._branch_data: if datum["kind"] == "record": @@ -897,7 +895,6 @@ def write_anew(self, sink): ) if self._existing_branches: num_branches += len(self._existing_branches) - # TObjArray header with fName: "" out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") out.append( @@ -909,14 +906,14 @@ def write_anew(self, sink): # Write old branches? if self._existing_branches: + old_branches = uproot.writing._cascade.OldBranches(self._existing_branches) for branch in self._existing_branches: # create OldTBranch object # members = uproot.branch.read_members() - old_branch = uproot.writing._cascade.OldBranch(branch) - out, temp = old_branch.serialize( - out + out = old_branches.serialize( + out, branch ) # should call uproot.models.TBranch._tbranch13_format...pack or something - tleaf_reference_numbers.append(temp) # and don't forget the tleaves + # tleaf_reference_numbers.append(temp) # and don't forget the tleaves for datum in self._branch_data: if datum["kind"] == "record": @@ -995,7 +992,6 @@ def write_anew(self, sink): absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) datum["tleaf_reference_number"] = absolute_location + 2 tleaf_reference_numbers.append(datum["tleaf_reference_number"]) - subany_tleaf_index = len(out) out.append(None) @@ -1020,7 +1016,6 @@ def write_anew(self, sink): special_struct = uproot.models.TLeaf._tleafd1_format1 elif letter_upper == "C": special_struct = uproot.models.TLeaf._tleafc1_format1 - fLenType = datum["dtype"].itemsize fIsUnsigned = letter != letter_upper @@ -1031,7 +1026,6 @@ def write_anew(self, sink): if datum["counter"] is not None: dims = "[" + datum["counter"]["fName"] + "]" + dims - # single TLeaf leaf_name = datum["fName"].encode(errors="surrogateescape") leaf_title = (datum["fName"] + dims).encode(errors="surrogateescape") @@ -1424,7 +1418,6 @@ def write_np_basket(self, sink, branch_name, compression, array): out.append(b"\x00") # part of the Key (included in fKeylen, at least) out.append(compressed_data) - sink.write(location, b"".join(out)) self._freesegments.write(sink) sink.set_file_length(self._freesegments.fileheader.end) @@ -1642,17 +1635,6 @@ def add_data(self, file, sink, data): provided = None - if isinstance(data, numpy.ndarray) and data.dtype.fields is not None: - provided = recarray_to_dict(data) - - if ( - provided is None - and not isinstance(data, Mapping) - or not all(isinstance(x, str) for x in data) - ): - raise TypeError("'add' requires a mapping from branch name (str) to arrays") - - # Awkward may be impossible if uproot._util.from_module(data, "awkward"): try: awkward = uproot.extras.awkward() @@ -1672,11 +1654,77 @@ def add_data(self, file, sink, data): provided = {} for k, v in zip(awkward.fields(data), awkward.unzip(data)): provided[k] = v - actual_branches = {} - for name in provided: + if isinstance(data, numpy.ndarray) and data.dtype.fields is not None: + provided = recarray_to_dict(data) + + if provided is None: + if not isinstance(data, Mapping) or not all( + isinstance(x, str) for x in data + ): + raise TypeError( + "'extend' requires a mapping from branch name (str) to arrays" + ) + + provided = {} + for k, v in data.items(): + if not uproot._util.from_module(v, "awkward"): + if not hasattr(v, "dtype") and not isinstance(v, Mapping): + try: + with warnings.catch_warnings(): + warnings.simplefilter( + "error", category=numpy.VisibleDeprecationWarning + ) + v = numpy.array(v) # noqa: PLW2901 (overwriting v) + if v.dtype == numpy.dtype("O"): + raise Exception + except (numpy.VisibleDeprecationWarning, Exception): + try: + awkward = uproot.extras.awkward() + except ModuleNotFoundError as err: + raise TypeError( + f"NumPy dtype would be dtype('O'), so we won't use NumPy, but 'awkward' cannot be imported: {k}: {type(v)}" + ) from err + v = awkward.from_iter(v) # noqa: PLW2901 (overwriting v) + + if getattr(v, "dtype", None) == numpy.dtype("O"): + try: + awkward = uproot.extras.awkward() + except ModuleNotFoundError as err: + raise TypeError( + f"NumPy dtype is dtype('O'), so we won't use NumPy, but 'awkward' cannot be imported: {k}: {type(v)}" + ) from err + v = awkward.from_iter(v) # noqa: PLW2901 (overwriting v) + + if uproot._util.from_module(v, "awkward"): + try: + awkward = uproot.extras.awkward() + except ModuleNotFoundError as err: + raise TypeError( + f"an Awkward Array was provided, but 'awkward' cannot be imported: {k}: {type(v)}" + ) from err + if ( + isinstance(v, awkward.Array) + and v.ndim > 1 + and not v.layout.purelist_isregular + ): + kk = self._counter_name(k) + vv = numpy.asarray(awkward.num(v, axis=1), dtype=">u4") + if kk in provided and not numpy.array_equal(vv, provided[kk]): + raise ValueError( + f"branch {kk!r} provided both as an explicit array and generated as a counter, and they disagree" + ) + provided[kk] = vv + + if k in provided and not numpy.array_equal(v, provided[k]): + raise ValueError( + f"branch {kk!r} provided both as an explicit array and generated as a counter, and they disagree" + ) + provided[k] = v + actual_branches = {} + for datum in self._branch_data: if datum["fName"] in provided: - actual_branches[datum["fName"]] = provided.pop(name) + actual_branches[datum["fName"]] = provided.pop(datum["fName"]) else: raise ValueError( "'extend' must be given an array for every branch; missing {}".format( @@ -1694,14 +1742,14 @@ def add_data(self, file, sink, data): tofill = [] num_entries = None for branch_name, branch_array in actual_branches.items(): - # if num_entries is None: - # num_entries = len(branch_array) - # elif num_entries != len(branch_array): - # raise ValueError( - # f"'extend' must fill every branch with the same number of entries; {branch_name!r} has {len(branch_array)} entries" - # ) - - # datum = self._branch_data[self._branch_lookup[branch_name]] + if num_entries is None: + num_entries = len(branch_array) + elif num_entries != len(branch_array): + raise ValueError( + f"'extend' must fill every branch with the same number of entries; {branch_name!r} has {len(branch_array)} entries" + ) + + datum = self._branch_data[self._branch_lookup[branch_name]] # if datum["kind"] == "record": # continue diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index dca7088bd..cc0b0ad30 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1344,7 +1344,7 @@ def mktree( return tree - def add( # my own variation of mktree + def add( # variation of mktree for copying ttree self, name, branches, @@ -1399,17 +1399,100 @@ def add( # my own variation of mktree path = (*directory._path, treename) - # Make branch types? - branch_types = {key: type(data) for key, data in branches.items} + # # if awkward: + # if uproot._util.from_module(branches, "awkward"): + + # # Go through all fields? Check lengths and get dtypes? + # data[branch_name] = branch_array + # metadata[branch_name] = branch_array.type + + awkward = uproot.extras.awkward() + # branch_types = {name: array.type for name, array in zip(awkward.fields(branches), awkward.unzip(branches))} + import numpy + + if uproot._util.from_module(branches, "awkward"): + import awkward + + if isinstance(branches, awkward.Array): + branches = {"": branches} + + if isinstance(branches, numpy.ndarray) and branches.dtype.fields is not None: + branches = uproot.writing._cascadetree.recarray_to_dict(branches) + data = {} + metadata = {} + for branch_name, branch_array in branches.items(): + if ( + isinstance(branch_array, numpy.ndarray) + and branch_array.dtype.fields is not None + ): + branch_array = uproot.writing._cascadetree.recarray_to_dict( # noqa: PLW2901 (overwriting branch_array) + branch_array + ) + + if isinstance(branch_array, Mapping) and all( + isinstance(x, str) for x in branch_array + ): + datum = {} + metadatum = {} + for kk, vv in branch_array.items(): + try: + vv = ( # noqa: PLW2901 (overwriting vv) + uproot._util.ensure_numpy(vv) + ) + except TypeError: + raise TypeError( + f"unrecognizable array type {type(branch_array)} associated with {branch_name!r}" + ) from None + datum[kk] = vv + branch_dtype = vv.dtype + branch_shape = vv.shape[1:] + if branch_shape != (): + branch_dtype = numpy.dtype((branch_dtype, branch_shape)) + metadatum[kk] = branch_dtype + + data[branch_name] = datum + metadata[branch_name] = metadatum + + else: + if uproot._util.from_module(branch_array, "awkward"): + data[branch_name] = branch_array + metadata[branch_name] = branch_array.type + + else: + try: + branch_array = uproot._util.ensure_numpy( # noqa: PLW2901 (overwriting branch_array) + branch_array + ) + except TypeError: + awkward = uproot.extras.awkward() + try: + branch_array = awkward.from_iter( # noqa: PLW2901 (overwriting branch_array) + branch_array + ) + except Exception: + raise TypeError( + f"unrecognizable array type {type(branch_array)} associated with {branch_name!r}" + ) from None + else: + data[branch_name] = branch_array + metadata[branch_name] = awkward.type(branch_array) + + else: + data[branch_name] = branch_array + branch_dtype = branch_array.dtype + branch_shape = branch_array.shape[1:] + if branch_shape != (): + branch_dtype = numpy.dtype((branch_dtype, branch_shape)) + metadata[branch_name] = branch_dtype tree = WritableTree( path, directory._file, - directory._cascading.add_tree( + directory._cascading.copy_tree( directory._file.sink, name, title, - branch_types, + metadata, counter_name, field_name, initial_basket_capacity, diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 670d96de8..448b0aedf 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -15,12 +15,14 @@ def test_vector(): write.add_branches("tree1", {"branch": int}, source="t") with uproot.open( - "/Users/zobil/Documents/samples/uproot-vectorVectorDouble.root" + "/Users/zobil/Documents/samples/uproot-vectorVectorDouble.root", + minimal_ttree_metadata=False, ) as read: print(read["t"]["x"].arrays()) with uproot.open( - "/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root" + "/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root", + minimal_ttree_metadata=False, ) as read: print(read["tree1"]) # print(read["tree1"].all_members) @@ -32,20 +34,69 @@ def simple_test(): with uproot.recreate("arrays.root") as file: file["tree"] = {"b1": [1, 2, 3], "b2": [2, 3, 4]} - with uproot.recreate("arrays_check.root") as file: - file["tree"] = {"b1": [1, 2, 3], "b2": [2, 3, 4]} + with uproot.update("arrays.root") as write: + write.add("tree", {"b3": [5, 6, 7], "b4": [7, 8, 9]}, source="tree") - with uproot.open("arrays.root", minimal_ttree_metadata=False) as read: - print(read["tree"]["b1"].all_members) + with uproot.open("arrays.root", minimal_ttree_metadata=False) as new: + print(new["tree"].all_members) + print(new["tree"]["b4"].all_members) + assert new["tree"].keys() == ["b1", "b2", "b3", "b4"] + assert ak.all(new["tree"].arrays()["b1"] == [1, 2, 3]) + assert ak.all(new["tree"].arrays()["b2"] == [2, 3, 4]) + assert ak.all(new["tree"].arrays()["b3"] == [5, 6, 7]) + assert ak.all(new["tree"].arrays()["b4"] == [7, 8, 9]) + + +def ak_test(): + with uproot.recreate("ak_arrays.root") as file: + file["tree"] = { + "b1": ak.Array([[1, 2, 3], [1, 2], [6, 7]]), + "b2": ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]), + } + # file.mktree("tree", ) + + with uproot.open("ak_arrays.root") as check: + print( + "counter", + check["tree"]["b1"].member("fLeaves")[0].member("fLeafCount").all_members, + ) + + with uproot.update("ak_arrays.root") as write: + write.add( + "tree", + {"b3": ak.Array([[5, 4, 5], [6], [7]]), "b4": ak.Array([[7], [8], [9]])}, + source="tree", + ) + + with uproot.open("ak_arrays.root", minimal_ttree_metadata=False) as new: + assert new["tree"].keys() == [ + "nb1", + "b1", + "nb2", + "b2", + "nb3", + "b3", + "nb4", + "b4", + ] + assert ak.all(new["tree"]["b1"].array() == ak.Array([[1, 2, 3], [1, 2], [6, 7]])) + assert ak.all(new["tree"]["b2"].array() == ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]])) + assert ak.all(new["tree"]["b3"].array() == ak.Array([[5, 4, 5], [6], [7]])) + assert ak.all(new["tree"]["b4"].array() == ak.Array([[7], [8], [9]])) + +with uproot.open("/Users/zobil/Documents/samples/uproot-HZZ.root", minimal_ttree_metadata=False) as test: + # print(test['events']["Jet_Px"].all_members) + print(test['events']) + # print(test['events'].all_members) - with uproot.update("arrays.root") as write: - write.add("tree", {"b3": [5, 6, 7]}, source="tree") - with uproot.open("arrays.root") as new: - print(new["tree"].keys()) - print(new["tree"].member("fBranches")) +# with uproot.update("/Users/zobil/Documents/samples/uproot-HZZ2.root") as test: +# data = np.arange(0, 2421, 1) +# test.add("events", {"data": data}, source="events") - # for key in +with uproot.open("/Users/zobil/Documents/samples/uproot-HZZ2.root") as check: + print(check['events'].arrays()) +# simple_test() +ak_test() -simple_test() From 2fc5e1c2126bd21d6b6b702dc1fd288e854d45cf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:13:36 +0000 Subject: [PATCH 05/20] style: pre-commit fixes --- tests/test_1155_feat_add_copy_ttree.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 448b0aedf..b0be06986 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -79,14 +79,21 @@ def ak_test(): "nb4", "b4", ] - assert ak.all(new["tree"]["b1"].array() == ak.Array([[1, 2, 3], [1, 2], [6, 7]])) - assert ak.all(new["tree"]["b2"].array() == ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]])) + assert ak.all( + new["tree"]["b1"].array() == ak.Array([[1, 2, 3], [1, 2], [6, 7]]) + ) + assert ak.all( + new["tree"]["b2"].array() == ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]) + ) assert ak.all(new["tree"]["b3"].array() == ak.Array([[5, 4, 5], [6], [7]])) assert ak.all(new["tree"]["b4"].array() == ak.Array([[7], [8], [9]])) -with uproot.open("/Users/zobil/Documents/samples/uproot-HZZ.root", minimal_ttree_metadata=False) as test: + +with uproot.open( + "/Users/zobil/Documents/samples/uproot-HZZ.root", minimal_ttree_metadata=False +) as test: # print(test['events']["Jet_Px"].all_members) - print(test['events']) + print(test["events"]) # print(test['events'].all_members) @@ -95,8 +102,7 @@ def ak_test(): # test.add("events", {"data": data}, source="events") with uproot.open("/Users/zobil/Documents/samples/uproot-HZZ2.root") as check: - print(check['events'].arrays()) + print(check["events"].arrays()) # simple_test() ak_test() - From 11503d6f54f4cfcee537763df59f610eee2a6511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Thu, 18 Apr 2024 15:44:19 +0200 Subject: [PATCH 06/20] fixed tLeaf bug, everything seems good on the surface --- src/uproot/interpretation/identify.py | 1 - src/uproot/writing/_cascade.py | 31 +++---- src/uproot/writing/_cascadetree.py | 5 +- tests/test_1155_feat_add_copy_ttree.py | 109 +++++++++++++------------ 4 files changed, 75 insertions(+), 71 deletions(-) diff --git a/src/uproot/interpretation/identify.py b/src/uproot/interpretation/identify.py index 26daafbd9..8d6884d8d 100644 --- a/src/uproot/interpretation/identify.py +++ b/src/uproot/interpretation/identify.py @@ -124,7 +124,6 @@ def _from_leaves_one(leaf, title): for x in re.findall(_item_any_pattern, title) ): is_jagged = True - return dims, is_jagged diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 1daf82d8e..162ee23e8 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -627,13 +627,6 @@ def serialize(self, out, branch): # superclass TNamed (Model_TNamed(uproot.model.Model)) # superclass TAttFill self.read_members(branch) - datum = self._branch_data[branch.member("fName")] - key_num_bytes = uproot.reading._key_format_big.size + 6 - name_asbytes = datum["fName"].encode(errors="surrogateescape") - title_asbytes = datum["fTitle"].encode(errors="surrogateescape") - key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len(name_asbytes) - key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len(title_asbytes) - any_tbranch_index = len(out) out.append(None) # if 'fClonesName' in self._branch.all_members.keys(): @@ -644,6 +637,13 @@ def serialize(self, out, branch): tbranch_index = len(out) out.append(None) + datum = self._branch_data[branch.member("fName")] + key_num_bytes = uproot.reading._key_format_big.size + 6 + name_asbytes = branch.tree.name.encode(errors="surrogateescape") + title_asbytes = branch.tree.title.encode(errors="surrogateescape") + key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len(name_asbytes) + key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len(title_asbytes) + tbranch_tobject = uproot.models.TObject.Model_TObject.empty() tbranch_tnamed = uproot.models.TNamed.Model_TNamed.empty() tbranch_tnamed._bases.append(tbranch_tobject) @@ -729,7 +729,6 @@ def serialize(self, out, branch): absolute_location = key_num_bytes + sum(len(x) for x in out if x is not None) absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) datum["tleaf_reference_number"] = absolute_location + 2 - subany_tleaf_index = len(out) out.append(None) for leaf in datum["fLeaves"]: @@ -762,12 +761,12 @@ def serialize(self, out, branch): # else: # This will never be reached? What to do about G # letter_upper = "G" # special_struct = uproot.models.TLeaf._tleafl1_format0 - if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): - special_struct = uproot.models.TLeaf._tleafelement1_format1 - out.append((b"TLeafElement") + b"\x00") - else: - out.append(("TLeaf" + letter_upper).encode() + b"\x00") - # single TLeaf + # if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): # TLeafElement... + # special_struct = uproot.models.TLeaf._tleafelement1_format1 + # out.append((b"TLeafElement") + b"\x00") + # else: + out.append(("TLeaf" + letter_upper).encode() + b"\x00") + # single TLeaf leaf_name = datum["fName"].encode(errors="surrogateescape") leaf_title = ( datum["fLeaves"][0].member("fTitle").encode(errors="surrogateescape") @@ -858,6 +857,7 @@ def serialize(self, out, branch): leaf.member("fIsUnsigned"), ) ) + if leaf.member("fLeafCount") is not None: out.append( uproot.deserialization._read_object_any_format1.pack( @@ -881,6 +881,7 @@ def serialize(self, out, branch): # else: # specialized TLeaf* members (fMinimum, fMaximum) # datum["tleaf_special_struct"] = special_struct + out.append( special_struct.pack( int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) @@ -948,7 +949,7 @@ def serialize(self, out, branch): uproot.const.kNewClassTag, ) ) - return out + return out, datum["tleaf_reference_number"] def read_members(self, branch): name = branch.member("fName") diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index afbe76abb..00913d045 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -910,11 +910,10 @@ def write_anew(self, sink): for branch in self._existing_branches: # create OldTBranch object # members = uproot.branch.read_members() - out = old_branches.serialize( + out, temp = old_branches.serialize( out, branch ) # should call uproot.models.TBranch._tbranch13_format...pack or something - # tleaf_reference_numbers.append(temp) # and don't forget the tleaves - + tleaf_reference_numbers.append(temp) # and don't forget the tleaves for datum in self._branch_data: if datum["kind"] == "record": continue diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index b0be06986..82126bed4 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -1,6 +1,7 @@ import uproot from skhep_testdata import data_path import uproot.writing.writable +from pathlib import Path # import ROOT import numpy as np @@ -47,62 +48,66 @@ def simple_test(): assert ak.all(new["tree"].arrays()["b4"] == [7, 8, 9]) -def ak_test(): +def test_ak_arrays(): with uproot.recreate("ak_arrays.root") as file: file["tree"] = { "b1": ak.Array([[1, 2, 3], [1, 2], [6, 7]]), "b2": ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]), } - # file.mktree("tree", ) - - with uproot.open("ak_arrays.root") as check: - print( - "counter", - check["tree"]["b1"].member("fLeaves")[0].member("fLeafCount").all_members, - ) - - with uproot.update("ak_arrays.root") as write: - write.add( - "tree", - {"b3": ak.Array([[5, 4, 5], [6], [7]]), "b4": ak.Array([[7], [8], [9]])}, - source="tree", - ) - - with uproot.open("ak_arrays.root", minimal_ttree_metadata=False) as new: - assert new["tree"].keys() == [ - "nb1", - "b1", - "nb2", - "b2", - "nb3", - "b3", - "nb4", - "b4", - ] - assert ak.all( - new["tree"]["b1"].array() == ak.Array([[1, 2, 3], [1, 2], [6, 7]]) - ) - assert ak.all( - new["tree"]["b2"].array() == ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]) - ) - assert ak.all(new["tree"]["b3"].array() == ak.Array([[5, 4, 5], [6], [7]])) - assert ak.all(new["tree"]["b4"].array() == ak.Array([[7], [8], [9]])) - - -with uproot.open( - "/Users/zobil/Documents/samples/uproot-HZZ.root", minimal_ttree_metadata=False -) as test: - # print(test['events']["Jet_Px"].all_members) - print(test["events"]) - # print(test['events'].all_members) - - -# with uproot.update("/Users/zobil/Documents/samples/uproot-HZZ2.root") as test: -# data = np.arange(0, 2421, 1) -# test.add("events", {"data": data}, source="events") - -with uproot.open("/Users/zobil/Documents/samples/uproot-HZZ2.root") as check: - print(check["events"].arrays()) + with uproot.recreate("ak_test.root") as file: + file["tree"] = { + "b1": ak.Array([[1, 2, 3], [1, 2], [6, 7]]), + "b2": ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]), + "b3": ak.Array([[5, 4, 5], [6], [7]]), + "b4": ak.Array([[7], [8], [9]]), + } + with uproot.open("ak_test.root", minimal_ttree_metadata=False) as correct: + with uproot.update("ak_arrays.root") as write: + write.add( + "tree", + { + "b3": ak.Array([[5, 4, 5], [6], [7]]), + "b4": ak.Array([[7], [8], [9]]), + }, + source="tree", + ) + + with uproot.open("ak_arrays.root", minimal_ttree_metadata=False) as new: + print(new["tree"].member("fLeaves")[1]) + print(new["tree"]["b1"].member("fLeaves")[0]) + print(correct["tree"].member("fLeaves")[1]) + print(correct["tree"]["b1"].member("fLeaves")[0]) + + assert new["tree"].keys() == correct["tree"].keys() + assert ak.all(new["tree"]["b1"].array() == correct["tree"]["b1"].array()) + assert ak.all(new["tree"]["b2"].array() == correct["tree"]["b2"].array()) + assert ak.all(new["tree"]["b3"].array() == correct["tree"]["b3"].array()) + assert ak.all(new["tree"]["b4"].array() == correct["tree"]["b4"].array()) + + +def HZZ_test(): + with uproot.open( + "/Users/zobil/Documents/samples/uproot-HZZ.root", minimal_ttree_metadata=False + ) as test: + + # print(test["events"]["NMuon"].typename) + # print(test["events"]) + # print(test['events'].all_members) + + # with uproot.update("/Users/zobil/Documents/samples/uproot-HZZ.root copy") as new: + # data = np.arange(0, 2421, 1) + # new.add("events", {"data": data}, source="events") + + with uproot.open( + "/Users/zobil/Documents/samples/uproot-HZZ.root copy", + minimal_ttree_metadata=False, + ) as check: + print(check["events"].arrays()) + print(test["events"].arrays()) + + # print(check["events"]["Photon_Px"].member("fLeaves")[0].member("fLeafCount")) + # simple_test() -ak_test() +HZZ_test() +test_ak_arrays() From efac6a70ab30fc090562032796ce09acf0b8ec40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Tue, 30 Apr 2024 10:19:41 +0200 Subject: [PATCH 07/20] ROOT can read both branch names/types now, but not the data --- src/uproot/writing/_cascade.py | 32 ++--- src/uproot/writing/_cascadetree.py | 27 +--- src/uproot/writing/writable.py | 19 +-- tests/test_1155_feat_add_copy_ttree.py | 189 ++++++++++++++++++------- 4 files changed, 168 insertions(+), 99 deletions(-) diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 162ee23e8..442001a9b 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -649,7 +649,6 @@ def serialize(self, out, branch): tbranch_tnamed._bases.append(tbranch_tobject) tbranch_tnamed._members["fTitle"] = datum["fTitle"] tbranch_tnamed._serialize(out, True, datum["fName"], numpy.uint32(0x00400000)) - # TAttFill v2, fFillColor: 0, fFillStyle: 1001 # make model TAttFill v2 with fFillColor and fFillStyle tattfill = uproot.models.TAtt.Model_TAttFill_v2.empty() @@ -712,12 +711,11 @@ def serialize(self, out, branch): # out.append(uproot.serialization.serialize_object_any(self._branch.member("fBranchCount2"))) # empty TObjArray of TBranches - out.append( # TODO how to handle this? Make sure to be TBranchElements will be handled too - datum["fBranches"].serialize( - out, - ) + # TODO how to handle this? Make sure to be TBranchElements will be handled too + # empty TObjArray of TBranches + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) - subtobjarray_of_leaves_index = len(out) out.append(None) @@ -904,8 +902,8 @@ def serialize(self, out, branch): # empty TObjArray of fBaskets (embedded) # TODO "fBranches, which is a TObjArray of nested TBranch instances (possibly TBranchElement)" - # if len(self._branch_data["fBaskets"]) != 1: - # raise NotImplementedError + if len(datum["fBaskets"]) >= 1: + raise NotImplementedError # out.append( # self._branch_data["fBaskets"].serialize( @@ -933,15 +931,11 @@ def serialize(self, out, branch): # speedbump and fBasketSeek out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketSeek"])) - # out.append(datum["fFileName"].serialize()) # name = None? - out.append(b"\x00") - out[tbranch_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch ) - out[any_tbranch_index] = ( uproot.serialization._serialize_object_any_format1.pack( numpy.uint32(sum(len(x) for x in out[any_tbranch_index + 1 :]) + 4) @@ -949,6 +943,7 @@ def serialize(self, out, branch): uproot.const.kNewClassTag, ) ) + return out, datum["tleaf_reference_number"] def read_members(self, branch): @@ -2147,8 +2142,9 @@ def copy_tree( field_name, initial_basket_capacity, resize_factor, - existing_branches=None, - new_branches=None, + existing_ttree, + existing_branches, + new_branches, ): import uproot.writing._cascadetree @@ -2163,9 +2159,11 @@ def copy_tree( initial_basket_capacity, resize_factor, existing_branches, + existing_ttree, ) tree.write_anew(sink) - tree.add_data(sink._file, sink, new_branches) + tree.extend(sink._file, sink, new_branches) + return tree def add_rntuple(self, sink, name, title, akform): @@ -2688,7 +2686,6 @@ def create_empty( filename = "dynamic.root" if filename is None else os.path.split(filename)[-1] if len(filename) >= 256: raise ValueError("ROOT file names must be less than 256 bytes") - fileheader = FileHeader( None, None, @@ -2700,7 +2697,6 @@ def create_empty( None, uuid_function(), ) - freesegments_key = Key( None, None, @@ -2758,6 +2754,7 @@ def create_empty( fileheader.begin, None, ) + directory_data = DirectoryData(None, initial_directory_bytes, []) rootdirectory = RootDirectory( directory_key, @@ -2789,7 +2786,6 @@ def create_empty( ) fileheader.info_location = streamers_key.location fileheader.info_num_bytes = streamers_key.allocation + streamers.allocation - rootdirectory.write(sink) streamers.write(sink) diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 00913d045..015e4bdc1 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -86,6 +86,7 @@ def __init__( initial_basket_capacity, resize_factor, existing_branches=None, + existing_ttree=None, ): self._directory = directory self._name = name @@ -312,6 +313,10 @@ def __init__( "fAutoFlush": -30000000, "fEstimate": 1000000, } + if existing_ttree: + self._metadata["fTotBytes"] = existing_ttree.member("fTotBytes") + self._metadata["fZipBytes"] = existing_ttree.member("fZipBytes") + self._key = None def _branch_ak_to_np(self, branch_datashape): @@ -821,9 +826,7 @@ def extend(self, file, sink, data): fBasketEntry[i + 1] = num_entries + fBasketEntry[i] datum["fBasketSeek"][self._num_baskets] = location - datum["arrays_write_stop"] = self._num_baskets + 1 - # update TTree metadata in file self._num_entries += num_entries self._num_baskets += 1 @@ -956,7 +959,6 @@ def write_anew(self, sink): self._num_entries, # fEntryNumber ) ) - # fIOFeatures (TIOFeatures) out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") @@ -971,7 +973,6 @@ def write_anew(self, sink): datum["fZipBytes"], ) ) - # empty TObjArray of TBranches out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -1118,7 +1119,6 @@ def write_anew(self, sink): fIsUnsigned, ) ) - if datum["counter"] is None: # null fLeafCount out.append(b"\x00\x00\x00\x00") @@ -1132,6 +1132,7 @@ def write_anew(self, sink): # specialized TLeaf* members (fMinimum, fMaximum) out.append(special_struct.pack(0, 0)) + datum["tleaf_special_struct"] = special_struct out[subany_tleaf_index] = ( @@ -1160,15 +1161,12 @@ def write_anew(self, sink): # speedbump and fBasketBytes out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketBytes"])) - # speedbump and fBasketEntry out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketEntry"])) - # speedbump and fBasketSeek out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketSeek"])) - # empty fFileName out.append(b"\x00") @@ -1220,7 +1218,6 @@ def write_anew(self, sink): self._metadata_start = sum(len(x) for x in out[:metadata_out_index]) raw_data = b"".join(out) - self._key = self._directory.add_object( sink, "TTree", @@ -1257,7 +1254,6 @@ def write_updates(self, sink): self._metadata["fEstimate"], ), ) - for datum in self._branch_data: if datum["kind"] == "record": continue @@ -1302,27 +1298,21 @@ def write_updates(self, sink): datum["fBasketEntry"][start : stop + 1] ) fBasketSeek_part = uproot._util.tobytes(datum["fBasketSeek"][start:stop]) - position = base + datum["basket_metadata_start"] + 1 position += datum["fBasketBytes"][:start].nbytes sink.write(position, fBasketBytes_part) position += len(fBasketBytes_part) position += datum["fBasketBytes"][stop:].nbytes - position += 1 position += datum["fBasketEntry"][:start].nbytes sink.write(position, fBasketEntry_part) position += len(fBasketEntry_part) position += datum["fBasketEntry"][stop + 1 :].nbytes - position += 1 position += datum["fBasketSeek"][:start].nbytes sink.write(position, fBasketSeek_part) position += len(fBasketSeek_part) position += datum["fBasketSeek"][stop:].nbytes - - datum["arrays_write_start"] = datum["arrays_write_stop"] - if datum["dtype"] == ">U0": position = ( base @@ -1388,7 +1378,6 @@ def write_np_basket(self, sink, branch_name, compression, array): parent_location = self._directory.key.location # FIXME: is this correct? location = self._freesegments.allocate(fNbytes, dry_run=False) - out = [] out.append( uproot.reading._key_format_big.pack( @@ -1593,6 +1582,7 @@ def add_data(self, file, sink, data): # do checks before getting here...easier # add to a single branch? # remember not to alter data! + if self._num_baskets >= self._basket_capacity - 1: self._basket_capacity = max( self._basket_capacity + 1, @@ -1619,7 +1609,6 @@ def add_data(self, file, sink, data): datum["fBasketEntry"][: len(fBasketEntry)] = fBasketEntry datum["fBasketSeek"][: len(fBasketSeek)] = fBasketSeek datum["fBasketEntry"][len(fBasketEntry)] = self._num_entries - oldloc = start = self._key.location stop = start + self._key.num_bytes + self._key.compressed_bytes @@ -1914,9 +1903,7 @@ def add_data(self, file, sink, data): fBasketEntry[i + 1] = num_entries + fBasketEntry[i] datum["fBasketSeek"][self._num_baskets] = location - datum["arrays_write_stop"] = self._num_baskets + 1 - # update TTree metadata in file self._num_entries += num_entries self._num_baskets += 1 diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index cc0b0ad30..428047eb4 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -122,7 +122,6 @@ def recreate(file_path: str | Path | IO, **options): "unrecognized options for uproot.create or uproot.recreate: " + ", ".join(repr(x) for x in options) ) - cascading = uproot.writing._cascade.create_empty( sink, compression, @@ -173,7 +172,6 @@ def update(file_path: str | Path | IO, **options): "unrecognized options for uproot.update: " + ", ".join(repr(x) for x in options) ) - cascading = uproot.writing._cascade.update_existing( sink, initial_directory_bytes, @@ -232,7 +230,6 @@ def __repr__(self): @property def sink(self) -> uproot.sink.file.FileSink: """ - Returns a :doc:`uproot.sink.file.FileSink`, the physical layer for writing (and sometimes reading) data. """ return self._sink @@ -1346,9 +1343,8 @@ def mktree( def add( # variation of mktree for copying ttree self, - name, - branches, source, + branches, title="", *, counter_name=lambda counted: "n" + counted, @@ -1387,7 +1383,6 @@ def add( # variation of mktree for copying ttree raise ValueError( f"""TTree {old_ttree.name} in file {old_ttree.file_path} is empty.""" ) - try: # Will this throw an error? proabably? at = old_ttree.name.rindex("/") except ValueError: @@ -1396,6 +1391,11 @@ def add( # variation of mktree for copying ttree else: dirpath, treename = old_ttree.name[:at], old_ttree.name[at + 1 :] directory = self.mkdir(dirpath) + import copy + + ot = copy.deepcopy(old_ttree) + + del self[old_ttree.name] path = (*directory._path, treename) @@ -1490,14 +1490,15 @@ def add( # variation of mktree for copying ttree directory._file, directory._cascading.copy_tree( directory._file.sink, - name, + ot.name, title, metadata, counter_name, field_name, initial_basket_capacity, resize_factor, - old_ttree.branches, + ot, + ot.branches, branches, ), ) @@ -2054,7 +2055,7 @@ def add_data( self, data, **more_data ): # Eventually... def add(self, as_dict=None, **as_kwds): # data must be a dict, - self._cascading.add_data(self._file, self._file.sink, data) + self._cascading.extend(self._file, self._file.sink, data) class WritableBranch: diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 82126bed4..947157252 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -1,7 +1,8 @@ import uproot from skhep_testdata import data_path +import uproot.serialization import uproot.writing.writable -from pathlib import Path +import os # import ROOT import numpy as np @@ -13,7 +14,7 @@ def test_vector(): with uproot.update( "/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root" ) as write: - write.add_branches("tree1", {"branch": int}, source="t") + write.add_branches("t", {"branch": int}) with uproot.open( "/Users/zobil/Documents/samples/uproot-vectorVectorDouble.root", @@ -31,83 +32,167 @@ def test_vector(): # print(read["tree1"]["x"].member("fLeaves")[0]) -def simple_test(): - with uproot.recreate("arrays.root") as file: - file["tree"] = {"b1": [1, 2, 3], "b2": [2, 3, 4]} +def simple_test(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int8) + data1 = np.array([2, 3, 4, 5, 6], dtype=np.int8) - with uproot.update("arrays.root") as write: - write.add("tree", {"b3": [5, 6, 7], "b4": [7, 8, 9]}, source="tree") + with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: + f["tree"] = {"b1": data, "b2": data1} - with uproot.open("arrays.root", minimal_ttree_metadata=False) as new: - print(new["tree"].all_members) - print(new["tree"]["b4"].all_members) - assert new["tree"].keys() == ["b1", "b2", "b3", "b4"] - assert ak.all(new["tree"].arrays()["b1"] == [1, 2, 3]) - assert ak.all(new["tree"].arrays()["b2"] == [2, 3, 4]) - assert ak.all(new["tree"].arrays()["b3"] == [5, 6, 7]) - assert ak.all(new["tree"].arrays()["b4"] == [7, 8, 9]) + with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + f["tree"] = {"b1": data} + with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: + f.add("tree", {"b2": data1}) -def test_ak_arrays(): - with uproot.recreate("ak_arrays.root") as file: + with uproot.open( + os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False + ) as check: + # check["tree"].show() + with uproot.open( + os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False + ) as new: + new_chunk, new_cursor = new.key("tree").get_uncompressed_chunk_cursor() + check_chenk, check_cursor = check.key( + "tree" + ).get_uncompressed_chunk_cursor() + print("begin", new.file.chunk(22002, new.file.fEND).raw_data.tobytes()) + print(check.file.fEND) + print(new.file.fEND) + # print(check['tree'].chunk.raw_data.tobytes(), "\n") + # print(new['tree'].chunk.raw_data.tobytes()) + # cursor = uproot.source.cursor.Cursor(0) + # print(check.cursor) + # print(check.file.source.chunk( + # 0, 160 + # ).raw_data.tobytes()) + # print(new.file.chunk(0, 100).raw_data.tobytes()) + # print("?",len('root\x00\x00\xf3\xc0\x00\x00\x00d\x00\x00V\x06\x00\x00U\xc4\x00\x00\x00B\x00\x00\x00\x02\x00\x00\x00<\x04\x00\x00\x00e\x00\x00\tn\x00\x00LV\x00\x01\xcd)\xb8|\x06\x0f\x11\xef\x82\xae\xfe\xb1\xc7\x122b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x'), "\n") + + # print(new['tree'].cursor) + # print(check['tree'].chunk.start) + + # key = new.key("tree") + # chunk, cursor = key.get_uncompressed_chunk_cursor() + + # new.file.chunk(new.cursor.index, new.cursor.index+500) + # new.cursor.debug(new.file.chunk(new.cursor.index, new.cursor.index+500), limit_bytes=1000) + + # print(new["tree"]["b4"].member("fLeaves")[0].all_members) + assert new["tree"].keys() == ["b1", "b2"] + + print(new.keys()) + print(check["tree"]["b1"].all_members) + assert ak.all(new["tree"].arrays()["b1"] == [1, 2, 3, 4, 5]) + + assert ak.all(new["tree"].arrays()["b2"] == [2, 3, 4, 5, 6]) + # assert ak.all(new["tree"].arrays()["b3"] == [5, 6, 7]) + # assert ak.all(new["tree"].arrays()["b4"] == [7, 8, 9]) + + import ROOT + + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") + tree = inFile.Get("tree") + # print(tree.GetBranch("b2")) + # for x in tree: + # print(getattr(x, 'b2')) + + +def test_ak_arrays(tmp_path): + with uproot.recreate(os.path.join(tmp_path, "control.root")) as file: file["tree"] = { "b1": ak.Array([[1, 2, 3], [1, 2], [6, 7]]), "b2": ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]), + "b3": ak.Array([[5, 4, 5], [6], [7]]), + "b4": ak.Array([[7], [8], [9]]), } - with uproot.recreate("ak_test.root") as file: + + with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: file["tree"] = { "b1": ak.Array([[1, 2, 3], [1, 2], [6, 7]]), "b2": ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]), - "b3": ak.Array([[5, 4, 5], [6], [7]]), - "b4": ak.Array([[7], [8], [9]]), } - with uproot.open("ak_test.root", minimal_ttree_metadata=False) as correct: - with uproot.update("ak_arrays.root") as write: - write.add( - "tree", - { - "b3": ak.Array([[5, 4, 5], [6], [7]]), - "b4": ak.Array([[7], [8], [9]]), - }, - source="tree", - ) - - with uproot.open("ak_arrays.root", minimal_ttree_metadata=False) as new: - print(new["tree"].member("fLeaves")[1]) - print(new["tree"]["b1"].member("fLeaves")[0]) - print(correct["tree"].member("fLeaves")[1]) - print(correct["tree"]["b1"].member("fLeaves")[0]) - assert new["tree"].keys() == correct["tree"].keys() - assert ak.all(new["tree"]["b1"].array() == correct["tree"]["b1"].array()) - assert ak.all(new["tree"]["b2"].array() == correct["tree"]["b2"].array()) - assert ak.all(new["tree"]["b3"].array() == correct["tree"]["b3"].array()) - assert ak.all(new["tree"]["b4"].array() == correct["tree"]["b4"].array()) + with uproot.update(os.path.join(tmp_path, "ak_test.root")) as write: + write.add( + "tree", + { + "b3": ak.Array([[5, 4, 5], [6], [7]]), + "b4": ak.Array([[7], [8], [9]]), + }, + ) - -def HZZ_test(): with uproot.open( - "/Users/zobil/Documents/samples/uproot-HZZ.root", minimal_ttree_metadata=False + os.path.join(tmp_path, "control.root"), minimal_ttree_metadata=False + ) as correct: + with uproot.open( + os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False + ) as new: + print(new.file.show_streamers("TLeafL")) + # print(new['tree']['b1'].member("fLeaves")[0].member("fName")) + # print(correct["tree"].show()) + # print(new['tree'].chunk.raw_data.tobytes()) + # print(correct["tree"]["b1"].member("fLeaves")[0]) + # print(correct.file.chunk(correct.file.fSeekInfo, correct.file.fEND).raw_data.tobytes()) + # correct.file.show_streamers() + + # key = new.key("tree") + # chunk, cursor = key.get_uncompressed_chunk_cursor() + # cursor.debug(chunk, limit_bytes=1000) + # print("...") + # key = correct.key("tree") + # chunk, cursor = key.get_uncompressed_chunk_cursor() + # cursor.debug(chunk, limit_bytes=1000) + + # assert new["tree"].keys() == correct["tree"].keys() + # assert ak.all(new["tree"]["b1"].array() == correct["tree"]["b1"].array()) + # assert ak.all(new["tree"]["b2"].array() == correct["tree"]["b2"].array()) + # assert ak.all(new["tree"]["b3"].array() == correct["tree"]["b3"].array()) + # assert ak.all(new["tree"]["b4"].array() == correct["tree"]["b4"].array()) + import ROOT + + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") + tree = inFile.Get("tree") + + +def HZZ_test(tmp_path): + with uproot.open( + data_path("uproot-HZZ.root"), minimal_ttree_metadata=False ) as test: # print(test["events"]["NMuon"].typename) # print(test["events"]) # print(test['events'].all_members) - # with uproot.update("/Users/zobil/Documents/samples/uproot-HZZ.root copy") as new: - # data = np.arange(0, 2421, 1) - # new.add("events", {"data": data}, source="events") + with uproot.update(os.path.join(tmp_path, "uproot-HZZ.root copy")) as new: + # data = np.arange(0, 2420, 1) + data = [] + for i in range(2421): + data.append(np.arange(0, 3, 1)) + data = ak.Array(data) + new.add("events", {"data": data}) with uproot.open( - "/Users/zobil/Documents/samples/uproot-HZZ.root copy", + os.path.join(tmp_path, "uproot-HZZ.root copy"), minimal_ttree_metadata=False, ) as check: + print(check.keys(cycle=False)) + print(check["events"]["data"].array()) print(check["events"].arrays()) print(test["events"].arrays()) - # print(check["events"]["Photon_Px"].member("fLeaves")[0].member("fLeafCount")) + for key in test["events"].keys(): + assert key in test["events"].keys() + assert ak.all( + check["events"][key].array() == test["events"][key].array() + ) + + # print(check['events'].chunk.start, check['events'].chunk.stop) + # print(check['events'].chunk.get(1000, 2000, check['events'].cursor, context=None).tobytes()) + import ROOT -# simple_test() -HZZ_test() -test_ak_arrays() + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "uproot-HZZ.root copy"), "READ" + ) + tree = inFile.Get("events") From f93437f7a8f17a9cb14c46a490d945979b76ef71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Fri, 10 May 2024 11:59:10 +0200 Subject: [PATCH 08/20] Changed method of switching and deleting ttrees --- src/uproot/writing/_cascade.py | 26 +- src/uproot/writing/_cascadetree.py | 650 ++++++++++++++----------- src/uproot/writing/writable.py | 47 +- tests/test_1155_feat_add_copy_ttree.py | 125 ++--- 4 files changed, 438 insertions(+), 410 deletions(-) diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 442001a9b..dea7e9d4f 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -824,6 +824,7 @@ def serialize(self, out, branch): ) out.append(uproot._util.tobytes(leaf_header)) + if len(leaf_name) < 255: out.append( struct.pack(">B%ds" % len(leaf_name), len(leaf_name), leaf_name) @@ -905,12 +906,6 @@ def serialize(self, out, branch): if len(datum["fBaskets"]) >= 1: raise NotImplementedError - # out.append( - # self._branch_data["fBaskets"].serialize( - # out, - # ) - # ) - out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) @@ -2132,7 +2127,7 @@ def add_tree( tree.write_anew(sink) return tree - def copy_tree( + def add_branches( self, sink, name, @@ -2145,6 +2140,7 @@ def copy_tree( existing_ttree, existing_branches, new_branches, + directory, ): import uproot.writing._cascadetree @@ -2161,9 +2157,19 @@ def copy_tree( existing_branches, existing_ttree, ) - tree.write_anew(sink) - tree.extend(sink._file, sink, new_branches) - + tree.add_branches( + sink, directory.file, new_branches + ) # need new_branches for extend... + # start = key.seek_location + # stop = start + key.num_bytes + key.compressed_bytes + # directory._cascading.freesegments.release(start, stop) + + # directory._cascading._data.remove_key(key) + # self._cascading.header.modified_on = datetime.datetime.now() + + # directory._cascading.write(self._file.sink) + # directory._file.sink.set_file_length(self._cascading.freesegments.fileheader.end) + # directory._file.sink.flush() return tree def add_rntuple(self, sink, name, title, akform): diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 015e4bdc1..255b7f631 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -1578,339 +1578,419 @@ def write_string_basket(self, sink, branch_name, compression, array, offsets): return fKeylen + fObjlen, fNbytes, location - def add_data(self, file, sink, data): - # do checks before getting here...easier - # add to a single branch? - # remember not to alter data! + def get_tree_key(self): + if ";" in self._name: + at = self._name.rindex(";") + item, cycle = self._name[:at], self._name[at + 1 :] + key = self._directory.data.get_key(item, cycle) + else: + key = self._directory.data.get_key(self._name, None) + return key + + def add_branches(self, sink, file, new_branches): + old_key = self.get_tree_key() + self.write_with_new_branches(sink, old_key) + start = old_key.location + stop = start + old_key.num_bytes + old_key.compressed_bytes + self._freesegments.release(start, stop) + sink.set_file_length(self._freesegments.fileheader.end) + sink.flush() - if self._num_baskets >= self._basket_capacity - 1: - self._basket_capacity = max( - self._basket_capacity + 1, - int(math.ceil(self._basket_capacity * self._resize_factor)), - ) + self.extend(file, sink, new_branches) - for datum in self._branch_data: - if datum["kind"] == "record": - continue + def write_with_new_branches(self, sink, old_key): + key_num_bytes = uproot.reading._key_format_big.size + 6 + name_asbytes = self._name.encode(errors="surrogateescape") + title_asbytes = self._title.encode(errors="surrogateescape") + key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len(name_asbytes) + key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len(title_asbytes) - fBasketBytes = datum["fBasketBytes"] - fBasketEntry = datum["fBasketEntry"] - fBasketSeek = datum["fBasketSeek"] - datum["fBasketBytes"] = numpy.zeros( - self._basket_capacity, uproot.models.TBranch._tbranch13_dtype1 - ) - datum["fBasketEntry"] = numpy.zeros( - self._basket_capacity, uproot.models.TBranch._tbranch13_dtype2 - ) - datum["fBasketSeek"] = numpy.zeros( - self._basket_capacity, uproot.models.TBranch._tbranch13_dtype3 - ) - datum["fBasketBytes"][: len(fBasketBytes)] = fBasketBytes - datum["fBasketEntry"][: len(fBasketEntry)] = fBasketEntry - datum["fBasketSeek"][: len(fBasketSeek)] = fBasketSeek - datum["fBasketEntry"][len(fBasketEntry)] = self._num_entries - oldloc = start = self._key.location - stop = start + self._key.num_bytes + self._key.compressed_bytes + out = [None] + ttree_header_index = 0 - self.write_anew(sink) + tobject = uproot.models.TObject.Model_TObject.empty() + tnamed = uproot.models.TNamed.Model_TNamed.empty() + tnamed._bases.append(tobject) + tnamed._members["fTitle"] = self._title + tnamed._serialize(out, True, self._name, uproot.const.kMustCleanup) - newloc = self._key.seek_location - file._move_tree(oldloc, newloc) + # TAttLine v2, fLineColor: 602 fLineStyle: 1 fLineWidth: 1 + # TAttFill v2, fFillColor: 0, fFillStyle: 1001 + # TAttMarker v2, fMarkerColor: 1, fMarkerStyle: 1, fMarkerSize: 1.0 + out.append( + b"@\x00\x00\x08\x00\x02\x02Z\x00\x01\x00\x01" + b"@\x00\x00\x06\x00\x02\x00\x00\x03\xe9" + b"@\x00\x00\n\x00\x02\x00\x01\x00\x01?\x80\x00\x00" + ) - self._freesegments.release(start, stop) - sink.set_file_length(self._freesegments.fileheader.end) - sink.flush() + metadata_out_index = len(out) + out.append( + uproot.models.TTree._ttree20_format1.pack( + self._num_entries, + self._metadata["fTotBytes"], + self._metadata["fZipBytes"], + self._metadata["fSavedBytes"], + self._metadata["fFlushedBytes"], + self._metadata["fWeight"], + self._metadata["fTimerInterval"], + self._metadata["fScanField"], + self._metadata["fUpdate"], + self._metadata["fDefaultEntryOffsetLen"], + self._metadata["fNClusterRange"], + self._metadata["fMaxEntries"], + self._metadata["fMaxEntryLoop"], + self._metadata["fMaxVirtualSize"], + self._metadata["fAutoSave"], + self._metadata["fAutoFlush"], + self._metadata["fEstimate"], + ) + ) - provided = None + # speedbump (0), fClusterRangeEnd (empty array), + # speedbump (0), fClusterSize (empty array) + # fIOFeatures (TIOFeatures) + out.append(b"\x00\x00@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") - if uproot._util.from_module(data, "awkward"): - try: - awkward = uproot.extras.awkward() - except ModuleNotFoundError as err: - raise TypeError( - f"an Awkward Array was provided, but 'awkward' cannot be imported: {data!r}" - ) from err + tleaf_reference_numbers = [] - if isinstance(data, awkward.Array): - if data.ndim > 1 and not data.layout.purelist_isregular: - provided = { - self._counter_name(""): numpy.asarray( - awkward.num(data, axis=1), dtype=">u4" - ) - } - else: - provided = {} - for k, v in zip(awkward.fields(data), awkward.unzip(data)): - provided[k] = v + tobjarray_of_branches_index = len(out) + out.append(None) - if isinstance(data, numpy.ndarray) and data.dtype.fields is not None: - provided = recarray_to_dict(data) + num_branches = sum( + 0 if datum["kind"] == "record" else 1 for datum in self._branch_data + ) + if self._existing_branches: + num_branches += len(self._existing_branches) + # TObjArray header with fName: "" + out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") + out.append( + uproot.models.TObjArray._tobjarray_format1.pack( + num_branches, # TObjArray fSize + 0, # TObjArray fLowerBound + ) + ) - if provided is None: - if not isinstance(data, Mapping) or not all( - isinstance(x, str) for x in data - ): - raise TypeError( - "'extend' requires a mapping from branch name (str) to arrays" - ) + # Write old branches? + if self._existing_branches: + old_branches = uproot.writing._cascade.OldBranches(self._existing_branches) + for branch in self._existing_branches: + # create OldTBranch object + # members = uproot.branch.read_members() + out, temp = old_branches.serialize( + out, branch + ) # should call uproot.models.TBranch._tbranch13_format...pack or something + tleaf_reference_numbers.append(temp) # and don't forget the tleaves + for datum in self._branch_data: + if datum["kind"] == "record": + continue - provided = {} - for k, v in data.items(): - if not uproot._util.from_module(v, "awkward"): - if not hasattr(v, "dtype") and not isinstance(v, Mapping): - try: - with warnings.catch_warnings(): - warnings.simplefilter( - "error", category=numpy.VisibleDeprecationWarning - ) - v = numpy.array(v) # noqa: PLW2901 (overwriting v) - if v.dtype == numpy.dtype("O"): - raise Exception - except (numpy.VisibleDeprecationWarning, Exception): - try: - awkward = uproot.extras.awkward() - except ModuleNotFoundError as err: - raise TypeError( - f"NumPy dtype would be dtype('O'), so we won't use NumPy, but 'awkward' cannot be imported: {k}: {type(v)}" - ) from err - v = awkward.from_iter(v) # noqa: PLW2901 (overwriting v) + any_tbranch_index = len(out) + out.append(None) + out.append(b"TBranch\x00") - if getattr(v, "dtype", None) == numpy.dtype("O"): - try: - awkward = uproot.extras.awkward() - except ModuleNotFoundError as err: - raise TypeError( - f"NumPy dtype is dtype('O'), so we won't use NumPy, but 'awkward' cannot be imported: {k}: {type(v)}" - ) from err - v = awkward.from_iter(v) # noqa: PLW2901 (overwriting v) + tbranch_index = len(out) + out.append(None) - if uproot._util.from_module(v, "awkward"): - try: - awkward = uproot.extras.awkward() - except ModuleNotFoundError as err: - raise TypeError( - f"an Awkward Array was provided, but 'awkward' cannot be imported: {k}: {type(v)}" - ) from err - if ( - isinstance(v, awkward.Array) - and v.ndim > 1 - and not v.layout.purelist_isregular - ): - kk = self._counter_name(k) - vv = numpy.asarray(awkward.num(v, axis=1), dtype=">u4") - if kk in provided and not numpy.array_equal(vv, provided[kk]): - raise ValueError( - f"branch {kk!r} provided both as an explicit array and generated as a counter, and they disagree" - ) - provided[kk] = vv + tbranch_tobject = uproot.models.TObject.Model_TObject.empty() + tbranch_tnamed = uproot.models.TNamed.Model_TNamed.empty() + tbranch_tnamed._bases.append(tbranch_tobject) + tbranch_tnamed._members["fTitle"] = datum["fTitle"] + tbranch_tnamed._serialize( + out, True, datum["fName"], numpy.uint32(0x00400000) + ) - if k in provided and not numpy.array_equal(v, provided[k]): - raise ValueError( - f"branch {kk!r} provided both as an explicit array and generated as a counter, and they disagree" - ) - provided[k] = v - actual_branches = {} - for datum in self._branch_data: - if datum["fName"] in provided: - actual_branches[datum["fName"]] = provided.pop(datum["fName"]) - else: - raise ValueError( - "'extend' must be given an array for every branch; missing {}".format( - repr(datum["fName"]) - ) - ) + # TAttFill v2, fFillColor: 0, fFillStyle: 1001 + out.append(b"@\x00\x00\x06\x00\x02\x00\x00\x03\xe9") - if len(provided) != 0: - raise ValueError( - "'extend' was given data that do not correspond to any branch: {}".format( - ", ".join(repr(x) for x in provided) + assert sum(1 if x is None else 0 for x in out) == 4 + datum["metadata_start"] = (6 + 6 + 8 + 6) + sum( + len(x) for x in out if x is not None + ) + + # Lie about the compression level so that ROOT checks and does the right thing. + # https://github.com/root-project/root/blob/87a998d48803bc207288d90038e60ff148827664/tree/tree/src/TBasket.cxx#L560-L578 + # Without this, when small buffers are left uncompressed, ROOT complains about them not being compressed. + # (I don't know where the "no, really, this is uncompressed" bit is.) + fCompress = 0 + + out.append( + uproot.models.TBranch._tbranch13_format1.pack( + fCompress, + datum["fBasketSize"], + datum["fEntryOffsetLen"], + self._num_baskets, # fWriteBasket + self._num_entries, # fEntryNumber ) ) + # fIOFeatures (TIOFeatures) + out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") - tofill = [] - num_entries = None - for branch_name, branch_array in actual_branches.items(): - if num_entries is None: - num_entries = len(branch_array) - elif num_entries != len(branch_array): - raise ValueError( - f"'extend' must fill every branch with the same number of entries; {branch_name!r} has {len(branch_array)} entries" + out.append( + uproot.models.TBranch._tbranch13_format2.pack( + datum["fOffset"], + self._basket_capacity, # fMaxBaskets + datum["fSplitLevel"], + self._num_entries, # fEntries + datum["fFirstEntry"], + datum["fTotBytes"], + datum["fZipBytes"], ) + ) + # empty TObjArray of TBranches + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) - datum = self._branch_data[self._branch_lookup[branch_name]] - # if datum["kind"] == "record": - # continue + subtobjarray_of_leaves_index = len(out) + out.append(None) - if datum["counter"] is None: - if datum["dtype"] == ">U0": - lengths = numpy.asarray(awkward.num(branch_array.layout)) - which_big = lengths >= 255 + # TObjArray header with fName: "", fSize: 1, fLowerBound: 0 + out.append( + b"\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00" + ) - lengths_extension_offsets = numpy.empty( - len(branch_array.layout) + 1, numpy.int64 - ) - lengths_extension_offsets[0] = 0 - numpy.cumsum(which_big * 4, out=lengths_extension_offsets[1:]) + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) + absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) + datum["tleaf_reference_number"] = absolute_location + 2 + tleaf_reference_numbers.append(datum["tleaf_reference_number"]) + subany_tleaf_index = len(out) + out.append(None) - lengths_extension = awkward.contents.ListOffsetArray( - awkward.index.Index64(lengths_extension_offsets), - awkward.contents.NumpyArray( - lengths[which_big].astype(">u4").view("u1") - ), - ) + letter = _dtype_to_char[datum["dtype"]] + letter_upper = letter.upper() + out.append(("TLeaf" + letter_upper).encode() + b"\x00") + if letter_upper == "O": + special_struct = uproot.models.TLeaf._tleafO1_format1 + elif letter_upper == "B": + special_struct = uproot.models.TLeaf._tleafb1_format1 + elif letter_upper == "S": + special_struct = uproot.models.TLeaf._tleafs1_format1 + elif letter_upper == "I": + special_struct = uproot.models.TLeaf._tleafi1_format1 + elif letter_upper == "G": + special_struct = uproot.models.TLeaf._tleafl1_format0 + elif letter_upper == "L": + special_struct = uproot.models.TLeaf._tleafl1_format0 + elif letter_upper == "F": + special_struct = uproot.models.TLeaf._tleaff1_format1 + elif letter_upper == "D": + special_struct = uproot.models.TLeaf._tleafd1_format1 + elif letter_upper == "C": + special_struct = uproot.models.TLeaf._tleafc1_format1 + fLenType = datum["dtype"].itemsize + fIsUnsigned = letter != letter_upper - lengths[which_big] = 255 + if datum["shape"] == (): + dims = "" + else: + dims = "".join("[" + str(x) + "]" for x in datum["shape"]) - leafc_data_awkward = awkward.concatenate( - [ - lengths.reshape(-1, 1).astype("u1"), - lengths_extension, - awkward.without_parameters(branch_array.layout), - ], - axis=1, - ) + if datum["counter"] is not None: + dims = "[" + datum["counter"]["fName"] + "]" + dims + # single TLeaf + leaf_name = datum["fName"].encode(errors="surrogateescape") + leaf_title = (datum["fName"] + dims).encode(errors="surrogateescape") + leaf_name_length = (1 if len(leaf_name) < 255 else 5) + len(leaf_name) + leaf_title_length = (1 if len(leaf_title) < 255 else 5) + len(leaf_title) + + leaf_header = numpy.array( + [ + 64, + 0, + 0, + 76, + 0, + 1, + 64, + 0, + 0, + 54, + 0, + 2, + 64, + 0, + 0, + 30, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + ], + numpy.uint8, + ) + tmp = leaf_header[0:4].view(">u4") + tmp[:] = ( + numpy.uint32( + 42 + leaf_name_length + leaf_title_length + special_struct.size + ) + | uproot.const.kByteCountMask + ) + tmp = leaf_header[6:10].view(">u4") + tmp[:] = ( + numpy.uint32(36 + leaf_name_length + leaf_title_length) + | uproot.const.kByteCountMask + ) + tmp = leaf_header[12:16].view(">u4") + tmp[:] = ( + numpy.uint32(12 + leaf_name_length + leaf_title_length) + | uproot.const.kByteCountMask + ) - big_endian = numpy.asarray(awkward.flatten(leafc_data_awkward)) - big_endian_offsets = ( - lengths_extension_offsets - + numpy.asarray(branch_array.layout.offsets) - + numpy.arange(len(branch_array.layout.offsets)) - ).astype(">i4", copy=True) - tofill.append( - ( - branch_name, - datum["compression"], - big_endian, - big_endian_offsets, - ) - ) - else: - big_endian = uproot._util.ensure_numpy(branch_array).astype( - datum["dtype"] + out.append(uproot._util.tobytes(leaf_header)) + if len(leaf_name) < 255: + out.append( + struct.pack(">B%ds" % len(leaf_name), len(leaf_name), leaf_name) + ) + else: + out.append( + struct.pack( + ">BI%ds" % len(leaf_name), 255, len(leaf_name), leaf_name ) - if big_endian.shape != (len(branch_array),) + datum["shape"]: - raise ValueError( - "'extend' must fill branches with a consistent shape: has {}, trying to fill with {}".format( - datum["shape"], - big_endian.shape[1:], - ) - ) - tofill.append((branch_name, datum["compression"], big_endian, None)) - if datum["kind"] == "counter": - datum["tleaf_maximum_value"] = max( - big_endian.max(), datum["tleaf_maximum_value"] - ) - + ) + if len(leaf_title) < 255: + out.append( + struct.pack(">B%ds" % len(leaf_title), len(leaf_title), leaf_title) + ) else: - try: - awkward = uproot.extras.awkward() - except ModuleNotFoundError as err: - raise TypeError( - f"a jagged array was provided (possibly as an iterable), but 'awkward' cannot be imported: {branch_name}: {branch_array!r}" - ) from err - layout = branch_array.layout - while not isinstance(layout, awkward.contents.ListOffsetArray): - if isinstance(layout, awkward.contents.IndexedArray): - layout = layout.project() + out.append( + struct.pack( + ">BI%ds" % len(leaf_title), 255, len(leaf_title), leaf_title + ) + ) - elif isinstance(layout, awkward.contents.ListArray): - layout = layout.to_ListOffsetArray64(False) + fLen = 1 + for item in datum["shape"]: + fLen *= item - else: - raise AssertionError( - "how did this pass the type check?\n\n" + repr(layout) - ) + # generic TLeaf members + out.append( + uproot.models.TLeaf._tleaf2_format0.pack( + fLen, + fLenType, + 0, # fOffset + datum["kind"] == "counter", # fIsRange + fIsUnsigned, + ) + ) + if datum["counter"] is None: + # null fLeafCount + out.append(b"\x00\x00\x00\x00") + else: + # reference to fLeafCount + out.append( + uproot.deserialization._read_object_any_format1.pack( + datum["counter"]["tleaf_reference_number"] + ) + ) - content = layout.content - offsets = numpy.asarray(layout.offsets) + # specialized TLeaf* members (fMinimum, fMaximum) + out.append(special_struct.pack(0, 0)) - if offsets[0] != 0: - content = content[offsets[0] :] - offsets = offsets - offsets[0] - if len(content) > offsets[-1]: - content = content[: offsets[-1]] + datum["tleaf_special_struct"] = special_struct - shape = [len(content)] - while not isinstance(content, awkward.contents.NumpyArray): - if isinstance(content, awkward.contents.IndexedArray): - content = content.project() + out[subany_tleaf_index] = ( + uproot.serialization._serialize_object_any_format1.pack( + numpy.uint32(sum(len(x) for x in out[subany_tleaf_index + 1 :]) + 4) + | uproot.const.kByteCountMask, + uproot.const.kNewClassTag, + ) + ) - elif isinstance(content, awkward.contents.EmptyArray): - content = content.to_NumpyArray(dtype=numpy.float64) + out[subtobjarray_of_leaves_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[subtobjarray_of_leaves_index + 1 :]), + 3, # TObjArray + ) - elif isinstance(content, awkward.contents.RegularArray): - shape.append(content.size) - content = content.content + # empty TObjArray of fBaskets (embedded) + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) - else: - raise AssertionError( - "how did this pass the type check?\n\n" + repr(content) - ) + assert sum(1 if x is None else 0 for x in out) == 4 + datum["basket_metadata_start"] = (6 + 6 + 8 + 6) + sum( + len(x) for x in out if x is not None + ) - big_endian = numpy.asarray(content.data, dtype=datum["dtype"]) - shape = tuple(shape) + big_endian.shape[1:] + # speedbump and fBasketBytes + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketBytes"])) + # speedbump and fBasketEntry + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketEntry"])) + # speedbump and fBasketSeek + out.append(b"\x01") + out.append(uproot._util.tobytes(datum["fBasketSeek"])) + # empty fFileName + out.append(b"\x00") - if shape[1:] != datum["shape"]: - raise ValueError( - "'extend' must fill branches with a consistent shape: has {}, trying to fill with {}".format( - datum["shape"], - shape[1:], - ) - ) - big_endian_offsets = offsets.astype(">i4", copy=True) + out[tbranch_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch + ) - tofill.append( - ( - branch_name, - datum["compression"], - big_endian.reshape(-1), - big_endian_offsets, - ) + out[any_tbranch_index] = ( + uproot.serialization._serialize_object_any_format1.pack( + numpy.uint32(sum(len(x) for x in out[any_tbranch_index + 1 :]) + 4) + | uproot.const.kByteCountMask, + uproot.const.kNewClassTag, ) + ) - # actually write baskets into the file - uncompressed_bytes = 0 - compressed_bytes = 0 - for branch_name, compression, big_endian, big_endian_offsets in tofill: - datum = self._branch_data[self._branch_lookup[branch_name]] - - if datum["dtype"] == ">U0": - totbytes, zipbytes, location = self.write_string_basket( - sink, branch_name, compression, big_endian, big_endian_offsets - ) - datum["fEntryOffsetLen"] = 4 * (len(big_endian_offsets) - 1) + out[tobjarray_of_branches_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tobjarray_of_branches_index + 1 :]), 3 # TObjArray + ) - elif big_endian_offsets is None: - totbytes, zipbytes, location = self.write_np_basket( - sink, branch_name, compression, big_endian - ) - else: - totbytes, zipbytes, location = self.write_jagged_basket( - sink, branch_name, compression, big_endian, big_endian_offsets - ) - datum["fEntryOffsetLen"] = 4 * (len(big_endian_offsets) - 1) - uncompressed_bytes += totbytes - compressed_bytes += zipbytes + # TObjArray of TLeaf references + tleaf_reference_bytes = uproot._util.tobytes( + numpy.array(tleaf_reference_numbers, ">u4") + ) + out.append( + struct.pack( + ">I13sI4s", + (21 + len(tleaf_reference_bytes)) | uproot.const.kByteCountMask, + b"\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00", + len(tleaf_reference_numbers), + b"\x00\x00\x00\x00", + ) + ) - datum["fTotBytes"] += totbytes - datum["fZipBytes"] += zipbytes + out.append(tleaf_reference_bytes) - datum["fBasketBytes"][self._num_baskets] = zipbytes + # null fAliases (b"\x00\x00\x00\x00") + # empty fIndexValues array (4-byte length is zero) + # empty fIndex array (4-byte length is zero) + # null fTreeIndex (b"\x00\x00\x00\x00") + # null fFriends (b"\x00\x00\x00\x00") + # null fUserInfo (b"\x00\x00\x00\x00") + # null fBranchRef (b"\x00\x00\x00\x00") + out.append(b"\x00" * 28) - if self._num_baskets + 1 < self._basket_capacity: - fBasketEntry = datum["fBasketEntry"] - i = self._num_baskets - fBasketEntry[i + 1] = num_entries + fBasketEntry[i] + out[ttree_header_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[ttree_header_index + 1 :]), 20 # TTree + ) - datum["fBasketSeek"][self._num_baskets] = location - datum["arrays_write_stop"] = self._num_baskets + 1 - # update TTree metadata in file - self._num_entries += num_entries - self._num_baskets += 1 - self._metadata["fTotBytes"] += uncompressed_bytes - self._metadata["fZipBytes"] += compressed_bytes + self._metadata_start = sum(len(x) for x in out[:metadata_out_index]) - self.write_updates(sink) + raw_data = b"".join(out) + self._key = self._directory.add_object( + sink, + "TTree", + self._name, + self._title, + raw_data, + len(raw_data), + replaces=old_key, + big=True, + ) _tbasket_offsets_length = struct.Struct(">I") diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 428047eb4..11194ffb5 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -951,7 +951,6 @@ def _get_del_search(self, where, isget): keys=last._cascading.data.key_names, file_path=self.file_path, ) - return step else: @@ -1341,11 +1340,10 @@ def mktree( return tree - def add( # variation of mktree for copying ttree + def add_branches( # variation of mktree for copying ttree self, source, branches, - title="", *, counter_name=lambda counted: "n" + counted, field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner, @@ -1368,7 +1366,7 @@ def add( # variation of mktree for copying ttree to make an empty TTree or to control its parameters. """ if self._file.sink.closed: - raise ValueError("cannot create a TTree in a closed file") + raise ValueError("cannot modify a TTree in a closed file") try: file = uproot.open(self.file_path, minimal_ttree_metadata=False) @@ -1383,31 +1381,16 @@ def add( # variation of mktree for copying ttree raise ValueError( f"""TTree {old_ttree.name} in file {old_ttree.file_path} is empty.""" ) + at = -1 try: # Will this throw an error? proabably? at = old_ttree.name.rindex("/") except ValueError: treename = old_ttree.name directory = self - else: - dirpath, treename = old_ttree.name[:at], old_ttree.name[at + 1 :] - directory = self.mkdir(dirpath) - import copy - - ot = copy.deepcopy(old_ttree) - - del self[old_ttree.name] - + treename = old_ttree.name[at + 1 :] path = (*directory._path, treename) - # # if awkward: - # if uproot._util.from_module(branches, "awkward"): - - # # Go through all fields? Check lengths and get dtypes? - # data[branch_name] = branch_array - # metadata[branch_name] = branch_array.type - awkward = uproot.extras.awkward() - # branch_types = {name: array.type for name, array in zip(awkward.fields(branches), awkward.unzip(branches))} import numpy if uproot._util.from_module(branches, "awkward"): @@ -1484,25 +1467,26 @@ def add( # variation of mktree for copying ttree if branch_shape != (): branch_dtype = numpy.dtype((branch_dtype, branch_shape)) metadata[branch_name] = branch_dtype - + file.close() tree = WritableTree( path, directory._file, - directory._cascading.copy_tree( + directory._cascading.add_branches( directory._file.sink, - ot.name, - title, + old_ttree.name, + old_ttree.title, metadata, counter_name, field_name, initial_basket_capacity, resize_factor, - ot, - ot.branches, + old_ttree, + old_ttree.branches, branches, + directory, ), ) - directory._file._new_tree(tree) + # directory._file._new_tree(tree) seen = set() streamers = [] @@ -1529,7 +1513,6 @@ def add( # variation of mktree for copying ttree directory._file._cascading.streamers.update_streamers( directory._file.sink, streamers ) - return tree def mkrntuple( @@ -2051,12 +2034,6 @@ def show( stream=stream, ) - def add_data( - self, data, **more_data - ): # Eventually... def add(self, as_dict=None, **as_kwds): - # data must be a dict, - self._cascading.extend(self._file, self._file.sink, data) - class WritableBranch: """ diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 947157252..3ecdf5606 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -3,6 +3,9 @@ import uproot.serialization import uproot.writing.writable import os +import pytest + +ROOT = pytest.importorskip("ROOT") # import ROOT import numpy as np @@ -33,17 +36,19 @@ def test_vector(): def simple_test(tmp_path): - data = np.array([1, 2, 3, 4, 5], dtype=np.int8) - data1 = np.array([2, 3, 4, 5, 6], dtype=np.int8) + import ROOT + + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array([2.0, 3.0, 4.0, 5.0, 6.0], dtype=np.int32) with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: - f["tree"] = {"b1": data, "b2": data1} + f["whatever"] = {"b1": data} with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: - f["tree"] = {"b1": data} + f["whatever"] = {"b1": data, "b2": data1} with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: - f.add("tree", {"b2": data1}) + f.add_branches("whatever", {"b3": data, "b4": data1}) with uproot.open( os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False @@ -52,73 +57,39 @@ def simple_test(tmp_path): with uproot.open( os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False ) as new: - new_chunk, new_cursor = new.key("tree").get_uncompressed_chunk_cursor() - check_chenk, check_cursor = check.key( - "tree" - ).get_uncompressed_chunk_cursor() - print("begin", new.file.chunk(22002, new.file.fEND).raw_data.tobytes()) - print(check.file.fEND) - print(new.file.fEND) - # print(check['tree'].chunk.raw_data.tobytes(), "\n") - # print(new['tree'].chunk.raw_data.tobytes()) - # cursor = uproot.source.cursor.Cursor(0) - # print(check.cursor) - # print(check.file.source.chunk( - # 0, 160 - # ).raw_data.tobytes()) - # print(new.file.chunk(0, 100).raw_data.tobytes()) - # print("?",len('root\x00\x00\xf3\xc0\x00\x00\x00d\x00\x00V\x06\x00\x00U\xc4\x00\x00\x00B\x00\x00\x00\x02\x00\x00\x00<\x04\x00\x00\x00e\x00\x00\tn\x00\x00LV\x00\x01\xcd)\xb8|\x06\x0f\x11\xef\x82\xae\xfe\xb1\xc7\x122b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x'), "\n") - - # print(new['tree'].cursor) - # print(check['tree'].chunk.start) - - # key = new.key("tree") - # chunk, cursor = key.get_uncompressed_chunk_cursor() - - # new.file.chunk(new.cursor.index, new.cursor.index+500) - # new.cursor.debug(new.file.chunk(new.cursor.index, new.cursor.index+500), limit_bytes=1000) - - # print(new["tree"]["b4"].member("fLeaves")[0].all_members) - assert new["tree"].keys() == ["b1", "b2"] - - print(new.keys()) - print(check["tree"]["b1"].all_members) - assert ak.all(new["tree"].arrays()["b1"] == [1, 2, 3, 4, 5]) - - assert ak.all(new["tree"].arrays()["b2"] == [2, 3, 4, 5, 6]) - # assert ak.all(new["tree"].arrays()["b3"] == [5, 6, 7]) - # assert ak.all(new["tree"].arrays()["b4"] == [7, 8, 9]) - - import ROOT + print(new.file.chunk(1358, 2677).raw_data.tobytes(), "\n") + print(check.file.chunk(1358, 2677).raw_data.tobytes()) inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") - tree = inFile.Get("tree") - # print(tree.GetBranch("b2")) - # for x in tree: - # print(getattr(x, 'b2')) + tree = inFile.Get("whatever;1") + print(tree) + for x in tree: + print(getattr(x, "b1")) def test_ak_arrays(tmp_path): + + data = np.array([1, 2, 3], dtype=np.int64) + data1 = np.array([2, 3, 4], dtype=np.int64) + data2 = np.array([3, 4, 5], dtype=np.int64) with uproot.recreate(os.path.join(tmp_path, "control.root")) as file: file["tree"] = { - "b1": ak.Array([[1, 2, 3], [1, 2], [6, 7]]), - "b2": ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]), - "b3": ak.Array([[5, 4, 5], [6], [7]]), - "b4": ak.Array([[7], [8], [9]]), + "b1": ak.Array([data, data1, data2]), + "b2": ak.Array([data1, data2, data]), + "b3": ak.Array([data2, data, data1]), } with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: file["tree"] = { - "b1": ak.Array([[1, 2, 3], [1, 2], [6, 7]]), - "b2": ak.Array([[1, 2, 3], [1, 2], [6, 7, 8]]), + "b1": ak.Array([data, data1, data2]), + "b2": ak.Array([data1, data2, data]), } with uproot.update(os.path.join(tmp_path, "ak_test.root")) as write: - write.add( + write.add_branches( "tree", { - "b3": ak.Array([[5, 4, 5], [6], [7]]), - "b4": ak.Array([[7], [8], [9]]), + "b3": ak.Array([data2, data, data1]), }, ) @@ -128,31 +99,18 @@ def test_ak_arrays(tmp_path): with uproot.open( os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False ) as new: - print(new.file.show_streamers("TLeafL")) - # print(new['tree']['b1'].member("fLeaves")[0].member("fName")) - # print(correct["tree"].show()) - # print(new['tree'].chunk.raw_data.tobytes()) - # print(correct["tree"]["b1"].member("fLeaves")[0]) - # print(correct.file.chunk(correct.file.fSeekInfo, correct.file.fEND).raw_data.tobytes()) - # correct.file.show_streamers() - - # key = new.key("tree") - # chunk, cursor = key.get_uncompressed_chunk_cursor() - # cursor.debug(chunk, limit_bytes=1000) - # print("...") - # key = correct.key("tree") - # chunk, cursor = key.get_uncompressed_chunk_cursor() - # cursor.debug(chunk, limit_bytes=1000) - - # assert new["tree"].keys() == correct["tree"].keys() - # assert ak.all(new["tree"]["b1"].array() == correct["tree"]["b1"].array()) - # assert ak.all(new["tree"]["b2"].array() == correct["tree"]["b2"].array()) - # assert ak.all(new["tree"]["b3"].array() == correct["tree"]["b3"].array()) - # assert ak.all(new["tree"]["b4"].array() == correct["tree"]["b4"].array()) + import ROOT inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") tree = inFile.Get("tree") + print(tree.GetBranch("b1")) + for x in tree: + + print(getattr(x, "b1")) + print(tree.GetBranch("b2")) + for x in tree: + print(getattr(x, "b2")) def HZZ_test(tmp_path): @@ -170,7 +128,7 @@ def HZZ_test(tmp_path): for i in range(2421): data.append(np.arange(0, 3, 1)) data = ak.Array(data) - new.add("events", {"data": data}) + new.add_branches("events", {"data": data}) with uproot.open( os.path.join(tmp_path, "uproot-HZZ.root copy"), @@ -187,8 +145,8 @@ def HZZ_test(tmp_path): check["events"][key].array() == test["events"][key].array() ) - # print(check['events'].chunk.start, check['events'].chunk.stop) - # print(check['events'].chunk.get(1000, 2000, check['events'].cursor, context=None).tobytes()) + # print(check.file.chunk.start, check['events'].chunk.stop) + # print(check.file.chunk.get(1000, 2000, check['events'].cursor, context=None).tobytes()) import ROOT @@ -196,3 +154,10 @@ def HZZ_test(tmp_path): os.path.join(tmp_path, "uproot-HZZ.root copy"), "READ" ) tree = inFile.Get("events") + + # print(check["events"]["Photon_Px"].member("fLeaves")[0].member("fLeafCount")) + + +simple_test("/Users/zobil/Desktop/directory") +# HZZ_test("/Users/zobil/Desktop/directory") +# test_ak_arrays("/Users/zobil/Desktop/directory") From 49116faafd1eed14a99458eae2d792285b965a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Wed, 15 May 2024 15:52:10 +0200 Subject: [PATCH 09/20] Added some streamer handling --- src/uproot/writing/_cascade.py | 28 ++- src/uproot/writing/_cascadetree.py | 53 ++--- src/uproot/writing/writable.py | 70 +++---- tests/test_1155_feat_add_copy_ttree.py | 257 +++++++++++++++++++++---- 4 files changed, 295 insertions(+), 113 deletions(-) diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 21dbc333e..19dfda41c 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -713,9 +713,21 @@ def serialize(self, out, branch): # TODO how to handle this? Make sure to be TBranchElements will be handled too # empty TObjArray of TBranches - out.append( - b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - ) + if len(datum["fBranches"]) == 0: + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) + else: + # print("serialize branches!!") + # # TObjArray header with fName: "" + out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") + out.append( + uproot.models.TObjArray._tobjarray_format1.pack( + len(self._branch_data["fBranches"]), # TObjArray fSize + 0, # TObjArray fLowerBound + ) + ) + subtobjarray_of_leaves_index = len(out) out.append(None) @@ -904,7 +916,9 @@ def serialize(self, out, branch): # TODO "fBranches, which is a TObjArray of nested TBranch instances (possibly TBranchElement)" if len(datum["fBaskets"]) >= 1: - raise NotImplementedError + # print("NotImplementedError, cannot yet write TObjArray of fBaskets") + msg = "Cannot yet write baskets" + raise NotImplementedError(msg) out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -918,11 +932,9 @@ def serialize(self, out, branch): # speedbump and fBasketBytes out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketBytes"])) - # speedbump and fBasketEntry out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketEntry"])) - # speedbump and fBasketSeek out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketSeek"])) @@ -2157,7 +2169,7 @@ def add_branches( existing_branches, existing_ttree, ) - tree.add_branches( + updated_streamers = tree.add_branches( sink, directory.file, new_branches ) # need new_branches for extend... # start = key.seek_location @@ -2170,7 +2182,7 @@ def add_branches( # directory._cascading.write(self._file.sink) # directory._file.sink.set_file_length(self._cascading.freesegments.fileheader.end) # directory._file.sink.flush() - return tree + return tree, updated_streamers def add_rntuple(self, sink, name, title, akform): import uproot.writing._cascadentuple diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 55ba0cb5c..5eae93c7d 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -897,8 +897,7 @@ def write_anew(self, sink): num_branches = sum( 0 if datum["kind"] == "record" else 1 for datum in self._branch_data ) - if self._existing_branches: - num_branches += len(self._existing_branches) + # TObjArray header with fName: "" out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") out.append( @@ -908,16 +907,6 @@ def write_anew(self, sink): ) ) - # Write old branches? - if self._existing_branches: - old_branches = uproot.writing._cascade.OldBranches(self._existing_branches) - for branch in self._existing_branches: - # create OldTBranch object - # members = uproot.branch.read_members() - out, temp = old_branches.serialize( - out, branch - ) # should call uproot.models.TBranch._tbranch13_format...pack or something - tleaf_reference_numbers.append(temp) # and don't forget the tleaves for datum in self._branch_data: if datum["kind"] == "record": continue @@ -1369,7 +1358,6 @@ def write_np_basket(self, sink, branch_name, compression, array): itemsize = array.dtype.itemsize for item in array.shape[1:]: itemsize *= item - uncompressed_data = uproot._util.tobytes(array) compressed_data = uproot.compression.compress(uncompressed_data, compression) @@ -1590,16 +1578,20 @@ def get_tree_key(self): def add_branches(self, sink, file, new_branches): old_key = self.get_tree_key() - self.write_with_new_branches(sink, old_key) - start = old_key.location - stop = start + old_key.num_bytes + old_key.compressed_bytes - self._freesegments.release(start, stop) - sink.set_file_length(self._freesegments.fileheader.end) - sink.flush() - + # start = old_key.location + # stop = start + old_key.num_bytes + old_key.compressed_bytes + # self._freesegments.release(start, stop) + # sink.set_file_length(self._freesegments.fileheader.end) + # sink.flush() + # streamers = [x for x in file._cascading.tlist_of_streamers] + streamers = self.write_with_new_branches(sink, old_key) + # Reset + # old_key = self.get_tree_key() self.extend(file, sink, new_branches) + return streamers def write_with_new_branches(self, sink, old_key): + models_for_streamers = [] key_num_bytes = uproot.reading._key_format_big.size + 6 name_asbytes = self._name.encode(errors="surrogateescape") title_asbytes = self._title.encode(errors="surrogateescape") @@ -1660,6 +1652,7 @@ def write_with_new_branches(self, sink, old_key): num_branches = sum( 0 if datum["kind"] == "record" else 1 for datum in self._branch_data ) + # Include original branches in num_branches if self._existing_branches: num_branches += len(self._existing_branches) # TObjArray header with fName: "" @@ -1671,16 +1664,13 @@ def write_with_new_branches(self, sink, old_key): ) ) - # Write old branches? + # Write old branches if self._existing_branches: old_branches = uproot.writing._cascade.OldBranches(self._existing_branches) for branch in self._existing_branches: # create OldTBranch object - # members = uproot.branch.read_members() - out, temp = old_branches.serialize( - out, branch - ) # should call uproot.models.TBranch._tbranch13_format...pack or something - tleaf_reference_numbers.append(temp) # and don't forget the tleaves + out, temp = old_branches.serialize(out, branch) + tleaf_reference_numbers.append(temp) for datum in self._branch_data: if datum["kind"] == "record": continue @@ -1764,22 +1754,32 @@ def write_with_new_branches(self, sink, old_key): out.append(("TLeaf" + letter_upper).encode() + b"\x00") if letter_upper == "O": special_struct = uproot.models.TLeaf._tleafO1_format1 + model = uproot.models.TLeaf.Model_TLeafO_v1.class_rawstreamers elif letter_upper == "B": special_struct = uproot.models.TLeaf._tleafb1_format1 + model = uproot.models.TLeaf.Model_TLeafB_v1 elif letter_upper == "S": special_struct = uproot.models.TLeaf._tleafs1_format1 + model = uproot.models.TLeaf.Model_TLeafS_v1 elif letter_upper == "I": special_struct = uproot.models.TLeaf._tleafi1_format1 + model = uproot.models.TLeaf.Model_TLeafI_v1 elif letter_upper == "G": special_struct = uproot.models.TLeaf._tleafl1_format0 elif letter_upper == "L": special_struct = uproot.models.TLeaf._tleafl1_format0 + model = uproot.models.TLeaf.Model_TLeafL_v1 elif letter_upper == "F": special_struct = uproot.models.TLeaf._tleaff1_format1 + model = uproot.models.TLeaf.Model_TLeafF_v1 elif letter_upper == "D": special_struct = uproot.models.TLeaf._tleafd1_format1 + model = uproot.models.TLeaf.Model_TLeafD_v1 elif letter_upper == "C": special_struct = uproot.models.TLeaf._tleafc1_format1 + model = uproot.models.TLeaf.Model_TLeafC_v1 + + models_for_streamers.append(model) fLenType = datum["dtype"].itemsize fIsUnsigned = letter != letter_upper @@ -1992,6 +1992,7 @@ def write_with_new_branches(self, sink, old_key): replaces=old_key, big=True, ) + return models_for_streamers _tbasket_offsets_length = struct.Struct(">I") diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 11194ffb5..bf5000549 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1379,7 +1379,7 @@ def add_branches( # variation of mktree for copying ttree names = old_ttree.keys() if len(names) == 0: raise ValueError( - f"""TTree {old_ttree.name} in file {old_ttree.file_path} is empty.""" + f"""TTree {old_ttree.name} in file {old_ttree.file_path} is empty.""" # TODO does this check need to be here? ) at = -1 try: # Will this throw an error? proabably? @@ -1411,7 +1411,11 @@ def add_branches( # variation of mktree for copying ttree branch_array = uproot.writing._cascadetree.recarray_to_dict( # noqa: PLW2901 (overwriting branch_array) branch_array ) - + entries = old_ttree.member("fEntries") + if len(branch_array) != old_ttree.member("fEntries"): + raise ValueError( + f"'add_branches' must fill every branch with the same number of entries; new branches should have {entries} entries, but {branch_name!r} has {len(branch_array)} entries" + ) if isinstance(branch_array, Mapping) and all( isinstance(x, str) for x in branch_array ): @@ -1468,40 +1472,31 @@ def add_branches( # variation of mktree for copying ttree branch_dtype = numpy.dtype((branch_dtype, branch_shape)) metadata[branch_name] = branch_dtype file.close() - tree = WritableTree( - path, - directory._file, - directory._cascading.add_branches( - directory._file.sink, - old_ttree.name, - old_ttree.title, - metadata, - counter_name, - field_name, - initial_basket_capacity, - resize_factor, - old_ttree, - old_ttree.branches, - branches, - directory, - ), + update_streamers = [] + obj, update_streamers = directory._cascading.add_branches( + directory._file.sink, + old_ttree.name, + old_ttree.title, + metadata, + counter_name, + field_name, + initial_basket_capacity, + resize_factor, + old_ttree, + old_ttree.branches, + branches, + directory, ) - # directory._file._new_tree(tree) - - seen = set() - streamers = [] - for model in ( - uproot.models.TLeaf.Model_TLeafB_v1, - uproot.models.TLeaf.Model_TLeafS_v1, - uproot.models.TLeaf.Model_TLeafI_v1, - uproot.models.TLeaf.Model_TLeafL_v1, - uproot.models.TLeaf.Model_TLeafF_v1, - uproot.models.TLeaf.Model_TLeafD_v1, - uproot.models.TLeaf.Model_TLeafC_v1, - uproot.models.TLeaf.Model_TLeafO_v1, + tree = WritableTree(path, directory._file, obj) + update_streamers.append( uproot.models.TBranch.Model_TBranch_v13, + ) + update_streamers.append( uproot.models.TTree.Model_TTree_v20, - ): + ) + seen = set() + streamers = [] + for model in update_streamers: for rawstreamer in model.class_rawstreamers: classname_version = rawstreamer[-2], rawstreamer[-1] if classname_version not in seen: @@ -1509,9 +1504,9 @@ def add_branches( # variation of mktree for copying ttree streamers.append( uproot.writing._cascade.RawStreamerInfo(*rawstreamer) ) - directory._file._cascading.streamers.update_streamers( - directory._file.sink, streamers + directory._file.sink, + streamers, ) return tree @@ -1696,7 +1691,6 @@ def update(self, pairs=None, **more_pairs): update. """ streamers = [] - if pairs is not None: if hasattr(pairs, "keys"): all_pairs = itertools.chain( @@ -1722,13 +1716,9 @@ def update(self, pairs=None, **more_pairs): directory = directory[item] uproot.writing.identify.add_to_directory(v, name, directory, streamers) - self._file._cascading.streamers.update_streamers(self._file.sink, streamers) -# class UpdatableTree: - - class WritableTree: """ Args: diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 3ecdf5606..01f7862c4 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -6,8 +6,6 @@ import pytest ROOT = pytest.importorskip("ROOT") - -# import ROOT import numpy as np import awkward as ak @@ -36,10 +34,17 @@ def test_vector(): def simple_test(tmp_path): - import ROOT - data = np.array([1, 2, 3, 4, 5], dtype=np.int64) - data1 = np.array([2.0, 3.0, 4.0, 5.0, 6.0], dtype=np.int32) + data1 = np.array( + [ + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + ], + dtype=np.int32, + ) with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: f["whatever"] = {"b1": data} @@ -53,13 +58,44 @@ def simple_test(tmp_path): with uproot.open( os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False ) as check: - # check["tree"].show() with uproot.open( os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False ) as new: - print(new.file.chunk(1358, 2677).raw_data.tobytes(), "\n") - print(check.file.chunk(1358, 2677).raw_data.tobytes()) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") + tree = inFile.Get("whatever;1") + print(tree) + for x in tree: + print(getattr(x, "b1")) + + +def test_subbranches(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array( + [ + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + ], + dtype=np.int32, + ) + + with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + f["whatever"] = {"b1": data, "b2": data1} + with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: + f.add_branches("whatever", {"b3": data, "b4": data1}) + + with uproot.open( + os.path.join(tmp_path, "tree_tester.root"), minimal_ttree_metadata=False + ) as check: + # check["tree"].show() + print(check.keys()) + with uproot.open( + os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False + ) as new: + print(new["whatever"].all_members) inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") tree = inFile.Get("whatever;1") print(tree) @@ -67,6 +103,22 @@ def simple_test(tmp_path): print(getattr(x, "b1")) +def test_different_fEntries(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array([2.0, 3.0, 4.0, 5.0, 6.0], dtype=np.int32) + + with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + with pytest.raises(ValueError): + f["whatever"] = {"b1": data, "b2": data1} + f.add_branches( + "whatever", + { + "b3": data, + "b4": np.array([2.0, 3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.int32), + }, + ) + + def test_ak_arrays(tmp_path): data = np.array([1, 2, 3], dtype=np.int64) @@ -82,13 +134,13 @@ def test_ak_arrays(tmp_path): with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: file["tree"] = { "b1": ak.Array([data, data1, data2]), - "b2": ak.Array([data1, data2, data]), } with uproot.update(os.path.join(tmp_path, "ak_test.root")) as write: write.add_branches( "tree", { + "b2": ak.Array([data1, data2, data]), "b3": ak.Array([data2, data, data1]), }, ) @@ -99,29 +151,173 @@ def test_ak_arrays(tmp_path): with uproot.open( os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False ) as new: - - import ROOT - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") tree = inFile.Get("tree") - print(tree.GetBranch("b1")) for x in tree: - print(getattr(x, "b1")) - print(tree.GetBranch("b2")) - for x in tree: - print(getattr(x, "b2")) + print(tree.Scan()) + # ak.Array() + # for x in tree: + # print(getattr(x, "b2").GetArray()) + + +def test_streamers_same_dtypes(tmp_path): + from ROOT import TTree + from array import array + + N = 4 + data = array("f", N * [0.0]) + data1 = array("f", [2.0, 3.0, 4.0, 5.0]) + + inFile = root.TFile( + "/Users/zobil/Desktop/directory/root_streamers_F.root", "RECREATE" + ) + tree = root.TTree("tree1", "tree") + import numpy as np + + # Basic type branch (float) - use array of length 1 + # n = array('f', [ 1.5 ]) + # tree.Branch('b1', n, 'b1/F') + + # Array branch - use array of length N + N = 4 + # a = array('d', N*[ 0. ]) + # tree.Branch('b1', a, 'b1[' + str(N) + ']/D') + + # # Array branch - use NumPy array of length N + npa = np.zeros(4, dtype=np.float32) + tree.Branch("b1", npa, "b1/F") + for i in range(4): + npa[0] = i**0 + tree.Fill() + inFile.Write() + inFile.Close() + + inFile = root.TFile.Open(os.path.join(tmp_path, "root_streamers_F.root"), "OPEN") + tree = inFile.Get("tree1") + tree.Scan() + data = np.array([5.0, 6.0, 7.0, 8.0], dtype=np.float32) + + with uproot.update(os.path.join(tmp_path, "root_streamers_F.root")) as file: + file.add_branches("tree1", {"b2": data}) + + with uproot.open( + os.path.join(tmp_path, "root_streamers_F.root"), minimal_ttree_metadata=False + ) as file: + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "root_streamers_F.root"), "READ" + ) + tree = inFile.Get("tree1;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] + assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] + indx += 1 + + tree.Scan() + check = [ + "TBranch", + "TAttLine", + "TCollection", + "TLeafF", + "listOfRules", + "TString", + "TObjArray", + "TAttFill", + "TBranchRef", + "TList", + "ROOT::TIOFeatures", + "TSeqCollection", + "TAttMarker", + "TTree", + "TNamed", + "TObject", + "TAttLine", + "TLeaf", + "TRefTable", + ] + for i in set(file.file.streamers): + assert i in check + + +def test_streamers_diff_dtypes(tmp_path): + + inFile = ROOT.TFile( + "/Users/zobil/Desktop/directory/root_diff_dtypes.root", "RECREATE" + ) + tree = ROOT.TTree("tree1", "tree") + + # Basic type branch (float) - use array of length 1 + # n = array('f', [ 1.5 ]) + # tree.Branch('b1', n, 'b1/F') + + # Array branch - use array of length N + N = 4 + # a = array('d', N*[ 0. ]) + # tree.Branch('b1', a, 'b1[' + str(N) + ']/D') + + # # Array branch - use NumPy array of length N + npa = np.zeros(4, dtype=float) + tree.Branch("b1", npa, "b1F") + for i in range(4): + npa[0] = i**0 + tree.Fill() + inFile.Write() + inFile.Close() + + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_diff_dtypes.root"), "OPEN") + tree = inFile.Get("tree1") + tree.Scan() + data = np.array([5, 6, 7, 8], dtype=np.int64) + with uproot.update(os.path.join(tmp_path, "root_diff_dtypes.root")) as file: + file.add_branches("tree1", {"b2": data}) + + with uproot.open( + os.path.join(tmp_path, "root_diff_dtypes.root"), minimal_ttree_metadata=False + ) as file: + file["tree1"]["b2"].member("fLeaves")[0].all_members + + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "root_diff_dtypes.root"), "READ" + ) + tree = inFile.Get("tree1;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] + assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] + indx += 1 + + # tree.Scan() + check = [ + "TBranch", + "TAttLine", + "TCollection", + "TLeafF", + "listOfRules", + "TString", + "TObjArray", + "TAttFill", + "TBranchRef", + "TList", + "ROOT::TIOFeatures", + "TSeqCollection", + "TAttMarker", + "TTree", + "TNamed", + "TObject", + "TAttLine", + "TLeaf", + "TRefTable", + "TLeafL", + ] + for i in set(file.file.streamers): + assert i in check def HZZ_test(tmp_path): with uproot.open( data_path("uproot-HZZ.root"), minimal_ttree_metadata=False ) as test: - - # print(test["events"]["NMuon"].typename) - # print(test["events"]) - # print(test['events'].all_members) - with uproot.update(os.path.join(tmp_path, "uproot-HZZ.root copy")) as new: # data = np.arange(0, 2420, 1) data = [] @@ -134,30 +330,13 @@ def HZZ_test(tmp_path): os.path.join(tmp_path, "uproot-HZZ.root copy"), minimal_ttree_metadata=False, ) as check: - print(check.keys(cycle=False)) - print(check["events"]["data"].array()) - print(check["events"].arrays()) - print(test["events"].arrays()) - for key in test["events"].keys(): assert key in test["events"].keys() assert ak.all( check["events"][key].array() == test["events"][key].array() ) - # print(check.file.chunk.start, check['events'].chunk.stop) - # print(check.file.chunk.get(1000, 2000, check['events'].cursor, context=None).tobytes()) - - import ROOT - inFile = ROOT.TFile.Open( os.path.join(tmp_path, "uproot-HZZ.root copy"), "READ" ) tree = inFile.Get("events") - - # print(check["events"]["Photon_Px"].member("fLeaves")[0].member("fLeafCount")) - - -simple_test("/Users/zobil/Desktop/directory") -# HZZ_test("/Users/zobil/Desktop/directory") -# test_ak_arrays("/Users/zobil/Desktop/directory") From 404e92d98eaa479a54ade095cee126a0e2d8cc92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Thu, 16 May 2024 17:12:09 +0200 Subject: [PATCH 10/20] Small updates to streamers, updated docs, more in-depth tests --- src/uproot/writing/_cascade.py | 75 +++++++---- src/uproot/writing/_cascadetree.py | 6 +- src/uproot/writing/writable.py | 31 +++-- tests/test_1155_feat_add_copy_ttree.py | 167 ++++++++++--------------- 4 files changed, 138 insertions(+), 141 deletions(-) diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 19dfda41c..bf49671f5 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -624,15 +624,13 @@ def num_bytes(self): return total def serialize(self, out, branch): - # superclass TNamed (Model_TNamed(uproot.model.Model)) - # superclass TAttFill self.read_members(branch) any_tbranch_index = len(out) out.append(None) - # if 'fClonesName' in self._branch.all_members.keys(): - # out.append(b"TBranchElement\x00") - # else: - out.append(b"TBranch\x00") + if "fClonesName" in self._branch_data.keys(): + out.append(b"TBranchElement\x00") + else: + out.append(b"TBranch\x00") tbranch_index = len(out) out.append(None) @@ -650,9 +648,7 @@ def serialize(self, out, branch): tbranch_tnamed._members["fTitle"] = datum["fTitle"] tbranch_tnamed._serialize(out, True, datum["fName"], numpy.uint32(0x00400000)) # TAttFill v2, fFillColor: 0, fFillStyle: 1001 - # make model TAttFill v2 with fFillColor and fFillStyle tattfill = uproot.models.TAtt.Model_TAttFill_v2.empty() - # tattfill._deeply_writable = True # ? tattfill._members["fFillColor"] = datum["fFillColor"] tattfill._members["fFillStyle"] = datum["fFillStyle"] @@ -672,14 +668,15 @@ def serialize(self, out, branch): datum["fCompress"], datum["fBasketSize"], datum["fEntryOffsetLen"], - datum["fWriteBasket"], # fWriteBasket - datum["fEntryNumber"], # fEntryNumber + datum["fWriteBasket"], + datum["fEntryNumber"], ) ) + # TODO Check this? # fIOFeatures (TIOFeatures) out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") - # out.append(self._branch_data["fIOFeatures"].serialize()) + # print(self._branch_data["fIOFeatures"].serialize()) # 0 to bytestring?? out.append( @@ -711,22 +708,49 @@ def serialize(self, out, branch): # out.append(uproot.serialization.serialize_object_any(self._branch.member("fBranchCount2"))) # empty TObjArray of TBranches - # TODO how to handle this? Make sure to be TBranchElements will be handled too - # empty TObjArray of TBranches + # TODO Test this! Later make sure TBranchElements are handled + if len(datum["fBranches"]) == 0: + # empty TObjArray of TBranches out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) - else: - # print("serialize branches!!") - # # TObjArray header with fName: "" - out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") - out.append( - uproot.models.TObjArray._tobjarray_format1.pack( - len(self._branch_data["fBranches"]), # TObjArray fSize - 0, # TObjArray fLowerBound - ) - ) + # else: + # out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") + # out.append( + # uproot.models.TObjArray._tobjarray_format1.pack( + # len(self._branch_data["fBranches"]), # TObjArray fSize + # 0, # TObjArray fLowerBound + # ) + # ) + # for branch in self._branch_data["fBranches"]: + # out.append( + # uproot.models.TBranch._tbranch13_format1.pack( + # datum["fCompress"], + # datum["fBasketSize"], + # datum["fEntryOffsetLen"], + # datum["fWriteBasket"], + # datum["fEntryNumber"], + # ) + # ) + + # # TODO Check this? + # # fIOFeatures (TIOFeatures) + # out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") + # # print(self._branch_data["fIOFeatures"].serialize()) + # # 0 to bytestring?? + + # out.append( + # uproot.models.TBranch._tbranch13_format2.pack( + # datum["fOffset"], + # datum["fMaxBaskets"], # fMaxBaskets + # datum["fSplitLevel"], + # datum["fEntries"], # fEntries + # datum["fFirstEntry"], + # datum["fTotBytes"], + # datum["fZipBytes"], + # ) + # ) subtobjarray_of_leaves_index = len(out) out.append(None) @@ -913,11 +937,8 @@ def serialize(self, out, branch): ) # empty TObjArray of fBaskets (embedded) - # TODO "fBranches, which is a TObjArray of nested TBranch instances (possibly TBranchElement)" - if len(datum["fBaskets"]) >= 1: - # print("NotImplementedError, cannot yet write TObjArray of fBaskets") - msg = "Cannot yet write baskets" + msg = f"NotImplementedError, cannot yet write TObjArray of fBaskets. Branch {datum['fName']} has {len(datum['fBaskets'])} fBaskets." raise NotImplementedError(msg) out.append( diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 5eae93c7d..aac5f3d35 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -1727,6 +1727,8 @@ def write_with_new_branches(self, sink, old_key): datum["fZipBytes"], ) ) + if uproot.models.TBranch.Model_TBranch_v13 not in models_for_streamers: + models_for_streamers.append(uproot.models.TBranch.Model_TBranch_v13) # empty TObjArray of TBranches out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -1778,8 +1780,8 @@ def write_with_new_branches(self, sink, old_key): elif letter_upper == "C": special_struct = uproot.models.TLeaf._tleafc1_format1 model = uproot.models.TLeaf.Model_TLeafC_v1 - - models_for_streamers.append(model) + if model not in models_for_streamers: + models_for_streamers.append(model) fLenType = datum["dtype"].itemsize fIsUnsigned = letter != letter_upper diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index bf5000549..c73620da8 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1352,18 +1352,29 @@ def add_branches( # variation of mktree for copying ttree ): """ Args: - source (TTree): existing TTree to copy/replace - Creates an empty TTree in this directory. - - Note that TTrees can be created by assigning TTree-like data to a directory - (see :doc:`uproot.writing.writable.WritableTree` for recognized TTree-like types): + source (TTree): Name of existing TTree to copy/replace + branch_types (dict or pairs of str \u2192 NumPy dtype/Awkward type): Name + and type specification for the TBranches. + title (str): Title for the new TTree. + counter_name (callable of str \u2192 str): Function to generate counter-TBranch + names for Awkward Arrays of variable-length lists. + field_name (callable of str \u2192 str): Function to generate TBranch + names for columns of an Awkward record array or a Pandas DataFrame. + initial_basket_capacity (int): Number of TBaskets that can be written to the + TTree without rewriting the TTree metadata to make room. + resize_factor (float): When the TTree metadata needs to be rewritten, + this specifies how many more TBasket slots to allocate as a multiplicative + factor. + Adds new branches to existing TTrees by rewriting the whole TTree with the new data. + To maintain custom ``counter_name``, ``field_name``, ``initial_basket_capacity`` or + ``resize_factor`` values for the new branches, pass the custom values to the parameters. + Currently, writing new branches in batches is not possible; data in new ``branches`` + must fit in memory. .. code-block:: python - my_directory["tree"] = {"branch1": np.array(...), "branch2": ak.Array(...)} + my_directory.add_branches("tree", {"branch1": np.array(...), "branch2": ak.Array(...)}) - but TTrees created this way will never be empty. Use this method - to make an empty TTree or to control its parameters. """ if self._file.sink.closed: raise ValueError("cannot modify a TTree in a closed file") @@ -1472,7 +1483,6 @@ def add_branches( # variation of mktree for copying ttree branch_dtype = numpy.dtype((branch_dtype, branch_shape)) metadata[branch_name] = branch_dtype file.close() - update_streamers = [] obj, update_streamers = directory._cascading.add_branches( directory._file.sink, old_ttree.name, @@ -1488,9 +1498,6 @@ def add_branches( # variation of mktree for copying ttree directory, ) tree = WritableTree(path, directory._file, obj) - update_streamers.append( - uproot.models.TBranch.Model_TBranch_v13, - ) update_streamers.append( uproot.models.TTree.Model_TTree_v20, ) diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 01f7862c4..75b1edc75 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -28,9 +28,6 @@ def test_vector(): minimal_ttree_metadata=False, ) as read: print(read["tree1"]) - # print(read["tree1"].all_members) - # print(read["tree1"]["x"].all_members) - # print(read["tree1"]["x"].member("fLeaves")[0]) def simple_test(tmp_path): @@ -63,44 +60,11 @@ def simple_test(tmp_path): ) as new: inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") tree = inFile.Get("whatever;1") - print(tree) + indx = 0 for x in tree: - print(getattr(x, "b1")) - - -def test_subbranches(tmp_path): - data = np.array([1, 2, 3, 4, 5], dtype=np.int64) - data1 = np.array( - [ - 2.0, - 3.0, - 4.0, - 5.0, - 6.0, - ], - dtype=np.int32, - ) - - with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: - f["whatever"] = {"b1": data, "b2": data1} - - with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: - f.add_branches("whatever", {"b3": data, "b4": data1}) - - with uproot.open( - os.path.join(tmp_path, "tree_tester.root"), minimal_ttree_metadata=False - ) as check: - # check["tree"].show() - print(check.keys()) - with uproot.open( - os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False - ) as new: - print(new["whatever"].all_members) - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") - tree = inFile.Get("whatever;1") - print(tree) - for x in tree: - print(getattr(x, "b1")) + assert getattr(x, "b1") == data[indx] + assert getattr(x, "b2") == data1[indx] + indx += 1 def test_different_fEntries(tmp_path): @@ -125,20 +89,20 @@ def test_ak_arrays(tmp_path): data1 = np.array([2, 3, 4], dtype=np.int64) data2 = np.array([3, 4, 5], dtype=np.int64) with uproot.recreate(os.path.join(tmp_path, "control.root")) as file: - file["tree"] = { + file["whatever"] = { "b1": ak.Array([data, data1, data2]), "b2": ak.Array([data1, data2, data]), "b3": ak.Array([data2, data, data1]), } with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: - file["tree"] = { + file["whatever"] = { "b1": ak.Array([data, data1, data2]), } with uproot.update(os.path.join(tmp_path, "ak_test.root")) as write: write.add_branches( - "tree", + "whatever", { "b2": ak.Array([data1, data2, data]), "b3": ak.Array([data2, data, data1]), @@ -152,39 +116,20 @@ def test_ak_arrays(tmp_path): os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False ) as new: inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") - tree = inFile.Get("tree") + tree = inFile.Get("whatever") for x in tree: - print(getattr(x, "b1")) - print(tree.Scan()) - # ak.Array() - # for x in tree: - # print(getattr(x, "b2").GetArray()) + getattr(x, "b1") + inFile.Close() + df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) + npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) + assert ak.all(npy3["b1"] == [data, data1, data2]) + assert ak.all(npy3["b2"] == [data1, data2, data]) + assert ak.all(npy3["b3"] == [data2, data, data1]) def test_streamers_same_dtypes(tmp_path): - from ROOT import TTree - from array import array - - N = 4 - data = array("f", N * [0.0]) - data1 = array("f", [2.0, 3.0, 4.0, 5.0]) - - inFile = root.TFile( - "/Users/zobil/Desktop/directory/root_streamers_F.root", "RECREATE" - ) - tree = root.TTree("tree1", "tree") - import numpy as np - - # Basic type branch (float) - use array of length 1 - # n = array('f', [ 1.5 ]) - # tree.Branch('b1', n, 'b1/F') - - # Array branch - use array of length N - N = 4 - # a = array('d', N*[ 0. ]) - # tree.Branch('b1', a, 'b1[' + str(N) + ']/D') - - # # Array branch - use NumPy array of length N + inFile = ROOT.TFile(os.path.join("root_same_dtypes.root"), "RECREATE") + tree = ROOT.TTree("tree1", "tree") npa = np.zeros(4, dtype=np.float32) tree.Branch("b1", npa, "b1/F") for i in range(4): @@ -193,20 +138,20 @@ def test_streamers_same_dtypes(tmp_path): inFile.Write() inFile.Close() - inFile = root.TFile.Open(os.path.join(tmp_path, "root_streamers_F.root"), "OPEN") + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_same_dtypes.root"), "OPEN") tree = inFile.Get("tree1") - tree.Scan() data = np.array([5.0, 6.0, 7.0, 8.0], dtype=np.float32) - with uproot.update(os.path.join(tmp_path, "root_streamers_F.root")) as file: + with uproot.update(os.path.join(tmp_path, "root_same_dtypes.root")) as file: file.add_branches("tree1", {"b2": data}) with uproot.open( - os.path.join(tmp_path, "root_streamers_F.root"), minimal_ttree_metadata=False + os.path.join(tmp_path, "root_same_dtypes.root"), minimal_ttree_metadata=False ) as file: inFile = ROOT.TFile.Open( - os.path.join(tmp_path, "root_streamers_F.root"), "READ" + os.path.join(tmp_path, "root_same_dtypes.root"), "READ" ) + inFile.ShowStreamerInfo() tree = inFile.Get("tree1;1") indx = 0 for x in tree: @@ -238,25 +183,14 @@ def test_streamers_same_dtypes(tmp_path): ] for i in set(file.file.streamers): assert i in check + inFile.Close() def test_streamers_diff_dtypes(tmp_path): - inFile = ROOT.TFile( "/Users/zobil/Desktop/directory/root_diff_dtypes.root", "RECREATE" ) tree = ROOT.TTree("tree1", "tree") - - # Basic type branch (float) - use array of length 1 - # n = array('f', [ 1.5 ]) - # tree.Branch('b1', n, 'b1/F') - - # Array branch - use array of length N - N = 4 - # a = array('d', N*[ 0. ]) - # tree.Branch('b1', a, 'b1[' + str(N) + ']/D') - - # # Array branch - use NumPy array of length N npa = np.zeros(4, dtype=float) tree.Branch("b1", npa, "b1F") for i in range(4): @@ -267,16 +201,15 @@ def test_streamers_diff_dtypes(tmp_path): inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_diff_dtypes.root"), "OPEN") tree = inFile.Get("tree1") - tree.Scan() data = np.array([5, 6, 7, 8], dtype=np.int64) + data1 = np.array([5.2, 6.3, 7.4, 8.5], dtype=np.float64) with uproot.update(os.path.join(tmp_path, "root_diff_dtypes.root")) as file: - file.add_branches("tree1", {"b2": data}) + file.add_branches("tree1", {"b2": data, "b3": data1}) with uproot.open( os.path.join(tmp_path, "root_diff_dtypes.root"), minimal_ttree_metadata=False ) as file: file["tree1"]["b2"].member("fLeaves")[0].all_members - inFile = ROOT.TFile.Open( os.path.join(tmp_path, "root_diff_dtypes.root"), "READ" ) @@ -286,7 +219,6 @@ def test_streamers_diff_dtypes(tmp_path): assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] indx += 1 - # tree.Scan() check = [ "TBranch", @@ -309,17 +241,18 @@ def test_streamers_diff_dtypes(tmp_path): "TLeaf", "TRefTable", "TLeafL", + "TLeafD", ] for i in set(file.file.streamers): assert i in check + inFile.Close() def HZZ_test(tmp_path): with uproot.open( data_path("uproot-HZZ.root"), minimal_ttree_metadata=False - ) as test: + ) as control: with uproot.update(os.path.join(tmp_path, "uproot-HZZ.root copy")) as new: - # data = np.arange(0, 2420, 1) data = [] for i in range(2421): data.append(np.arange(0, 3, 1)) @@ -329,14 +262,48 @@ def HZZ_test(tmp_path): with uproot.open( os.path.join(tmp_path, "uproot-HZZ.root copy"), minimal_ttree_metadata=False, - ) as check: - for key in test["events"].keys(): - assert key in test["events"].keys() + ) as new: + for key in control["events"].keys(): + assert key in new["events"].keys() assert ak.all( - check["events"][key].array() == test["events"][key].array() + new["events"][key].array() == control["events"][key].array() ) - inFile = ROOT.TFile.Open( os.path.join(tmp_path, "uproot-HZZ.root copy"), "READ" ) tree = inFile.Get("events") + indx = 0 + inFile.Close() + df3 = ROOT.RDataFrame( + "events", os.path.join(tmp_path, "uproot-HZZ.root copy") + ) + npy3 = ak.from_rdataframe(df3, columns=("data"), keep_order=True) + # for key in npy3.keys(): + # assert ak.all(npy3[key] == control['events'][key].array()) + assert ak.all(npy3 == data) + inFile.Close() + + +def nested_branches(tmp_path): + # Make example + inFile = ROOT.TFile(os.path.join(tmp_path, "root_nested_branches.root"), "RECREATE") + tree = ROOT.TTree("tree1", "tree") + b1 = np.zeros(4, dtype=float) + b2 = np.zeros(4, dtype=float) + branch1 = tree.Branch("b1", b1, "b1F") + branch2 = tree.Branch("b2", b2, "b1F") + for i in range(4): + b1[0] = i**0 + b2[0] = i**0 + tree.Fill() + branch2.SetObject(branch1) + inFile.Write() + inFile.Close() + + # Test + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "root_nested_branches.root"), "OPEN" + ) + tree = inFile.Get("tree1") + tree.Scan() + inFile.Close() From 2b986c192cf1d1b553387f6485f1139ccf9222bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Thu, 30 May 2024 18:41:11 +0200 Subject: [PATCH 11/20] Working on support for TBranchElements. Added some small tests --- src/uproot/models/TBranch.py | 10 +- src/uproot/writing/_cascade.py | 103 ++++++++------- src/uproot/writing/_cascadetree.py | 2 - src/uproot/writing/writable.py | 1 + tests/test_1155_feat_add_copy_ttree.py | 168 ++++++++++++++++++------- 5 files changed, 188 insertions(+), 96 deletions(-) diff --git a/src/uproot/models/TBranch.py b/src/uproot/models/TBranch.py index 41243ad96..ef5ad1538 100644 --- a/src/uproot/models/TBranch.py +++ b/src/uproot/models/TBranch.py @@ -299,10 +299,11 @@ class Model_TBranch_v12(uproot.behaviors.TBranch.TBranch, uproot.model.Versioned """ A :doc:`uproot.model.VersionedModel` for ``TBranch`` version 12. """ - + behaviors = (uproot.behaviors.TBranch.TBranch,) def read_members(self, chunk, cursor, context, file): + # print("reading here in TBranch V12") if uproot._awkwardforth.get_forth_obj(context) is not None: raise uproot.interpretation.objects.CannotBeForth() if self.is_memberwise: @@ -435,6 +436,7 @@ class Model_TBranch_v13(uproot.behaviors.TBranch.TBranch, uproot.model.Versioned behaviors = (uproot.behaviors.TBranch.TBranch,) def read_members(self, chunk, cursor, context, file): + # print("Reading in TBranch 13") if uproot._awkwardforth.get_forth_obj(context) is not None: raise uproot.interpretation.objects.CannotBeForth() if self.is_memberwise: @@ -744,6 +746,7 @@ class Model_TBranchElement_v10( behaviors = (uproot.behaviors.TBranch.TBranch,) def read_members(self, chunk, cursor, context, file): + # print("read here in TBranchElement V10") if uproot._awkwardforth.get_forth_obj(context) is not None: raise uproot.interpretation.objects.CannotBeForth() if self.is_memberwise: @@ -751,8 +754,9 @@ def read_members(self, chunk, cursor, context, file): f"""memberwise serialization of {type(self).__name__} in file {self.file.file_path}""" ) + # print("got here") self._bases.append( - file.class_named("TBranch", 12).read( + file.class_named("TBranch", 13).read( chunk, cursor, context, @@ -787,7 +791,7 @@ def read_members(self, chunk, cursor, context, file): chunk, cursor, context, file, self._file, self.concrete ) - base_names_versions = [("TBranch", 12)] + base_names_versions = [("TBranch", 13)] member_names = [ "fClassName", "fParentName", diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index bf49671f5..717930ee2 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -627,7 +627,7 @@ def serialize(self, out, branch): self.read_members(branch) any_tbranch_index = len(out) out.append(None) - if "fClonesName" in self._branch_data.keys(): + if "fClonesName" in branch.all_members.keys(): out.append(b"TBranchElement\x00") else: out.append(b"TBranch\x00") @@ -662,7 +662,6 @@ def serialize(self, out, branch): # https://github.com/root-project/root/blob/87a998d48803bc207288d90038e60ff148827664/tree/tree/src/TBasket.cxx#L560-L578 # Without this, when small buffers are left uncompressed, ROOT complains about them not being compressed. # (I don't know where the "no, really, this is uncompressed" bit is.) - out.append( uproot.models.TBranch._tbranch13_format1.pack( datum["fCompress"], @@ -672,7 +671,6 @@ def serialize(self, out, branch): datum["fEntryNumber"], ) ) - # TODO Check this? # fIOFeatures (TIOFeatures) out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") @@ -690,31 +688,18 @@ def serialize(self, out, branch): datum["fZipBytes"], ) ) - # if 'fClonesName' in self._branch.all_members.keys(): # TBranchElement - find a more robust way to check....or make sure this can't be misleading - # out.append(self._branch.member("fClassName").serialize()) # These three are TStrings - # out.append(self._branch.member("fParentName").serialize()) - # out.append(self._branch.member("fClonesName").serialize()) - # out.append( - # uproot.models.TBranch._tbranchelement10_format1.pack( - # self._branch.member("fCheckSum"), - # self._branch.member("fClassVersion"), - # self._branch.member("fID"), - # self._branch.member("fType"), - # self._branch.member("fStreamerType"), - # self._branch.member("fMaximum"), - # ) - # ) - # out.append(uproot.serialization.serialize_object_any(self._branch.member("fBranchCount"))) - # out.append(uproot.serialization.serialize_object_any(self._branch.member("fBranchCount2"))) + # empty TObjArray of TBranches # TODO Test this! Later make sure TBranchElements are handled - if len(datum["fBranches"]) == 0: + # if len(datum["fBranches"]) == 0: # empty TObjArray of TBranches - out.append( - b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - ) + + out.append( + b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + ) + # else: # out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") # out.append( @@ -795,11 +780,11 @@ def serialize(self, out, branch): # else: # This will never be reached? What to do about G # letter_upper = "G" # special_struct = uproot.models.TLeaf._tleafl1_format0 - # if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): # TLeafElement... - # special_struct = uproot.models.TLeaf._tleafelement1_format1 - # out.append((b"TLeafElement") + b"\x00") - # else: - out.append(("TLeaf" + letter_upper).encode() + b"\x00") + if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): # TLeafElement... + special_struct = uproot.models.TLeaf._tleafelement1_format1 + out.append((b"TLeafElement") + b"\x00") + else: + out.append(("TLeaf" + letter_upper).encode() + b"\x00") # single TLeaf leaf_name = datum["fName"].encode(errors="surrogateescape") leaf_title = ( @@ -892,7 +877,13 @@ def serialize(self, out, branch): leaf.member("fIsUnsigned"), ) ) - + if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): + out.append( + uproot.models.TLeaf._tleafelement1_format1.pack( + leaf.member("fID"), # fIsRange + leaf.member("fType"), + ) + ) if leaf.member("fLeafCount") is not None: out.append( uproot.deserialization._read_object_any_format1.pack( @@ -906,22 +897,16 @@ def serialize(self, out, branch): else: out.append(b"\x00\x00\x00\x00") - # if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): - # out.append( - # uproot.models.TLeaf._tleafelement1_format1.pack( - # leaf.member("fID"), # fIsRange - # leaf.member("fType"), - # ) - # ) - # else: + + if not isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): # specialized TLeaf* members (fMinimum, fMaximum) - # datum["tleaf_special_struct"] = special_struct + datum["tleaf_special_struct"] = special_struct - out.append( - special_struct.pack( - int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) + out.append( + special_struct.pack( + int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) + ) ) - ) out[subany_tleaf_index] = ( uproot.serialization._serialize_object_any_format1.pack( @@ -938,8 +923,9 @@ def serialize(self, out, branch): # empty TObjArray of fBaskets (embedded) if len(datum["fBaskets"]) >= 1: - msg = f"NotImplementedError, cannot yet write TObjArray of fBaskets. Branch {datum['fName']} has {len(datum['fBaskets'])} fBaskets." - raise NotImplementedError(msg) + # msg = f"NotImplementedError, cannot yet write TObjArray of fBaskets. Branch {datum['fName']} has {len(datum['fBaskets'])} fBaskets." + # raise NotImplementedError(msg) + print("fBaskets", datum['fBaskets'][0]) out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -961,9 +947,14 @@ def serialize(self, out, branch): out.append(uproot._util.tobytes(datum["fBasketSeek"])) # out.append(datum["fFileName"].serialize()) # name = None? out.append(b"\x00") - out[tbranch_index] = uproot.serialization.numbytes_version( - sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch - ) + if 'fClonesName' in branch.all_members.keys(): + out[tbranch_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tbranch_index + 1 :]), 10 # TBranchElement (?) + ) + else: + out[tbranch_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch + ) out[any_tbranch_index] = ( uproot.serialization._serialize_object_any_format1.pack( numpy.uint32(sum(len(x) for x in out[any_tbranch_index + 1 :]) + 4) @@ -971,7 +962,23 @@ def serialize(self, out, branch): uproot.const.kNewClassTag, ) ) - + if 'fClonesName' in branch.all_members.keys(): # TBranchElement - find a more robust way to check....or make sure this can't be misleading + out.append(branch.member("fClassName").serialize()) # These three are TStrings + out.append(branch.member("fParentName").serialize()) + out.append(branch.member("fClonesName").serialize()) + out.append( + uproot.models.TBranch._tbranchelement10_format1.pack( + branch.member("fCheckSum"), + branch.member("fClassVersion"), + branch.member("fID"), + branch.member("fType"), + branch.member("fStreamerType"), + branch.member("fMaximum"), + ) + ) + out.append(uproot.serialization.serialize_object_any(branch.member("fBranchCount"))) + out.append(uproot.serialization.serialize_object_any(branch.member("fBranchCount2"))) + return out, datum["tleaf_reference_number"] def read_members(self, branch): diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index aac5f3d35..8145fd468 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -1585,8 +1585,6 @@ def add_branches(self, sink, file, new_branches): # sink.flush() # streamers = [x for x in file._cascading.tlist_of_streamers] streamers = self.write_with_new_branches(sink, old_key) - # Reset - # old_key = self.get_tree_key() self.extend(file, sink, new_branches) return streamers diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index c73620da8..6a22d090f 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1511,6 +1511,7 @@ def add_branches( # variation of mktree for copying ttree streamers.append( uproot.writing._cascade.RawStreamerInfo(*rawstreamer) ) + print(update_streamers) directory._file._cascading.streamers.update_streamers( directory._file.sink, streamers, diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 75b1edc75..b2adaabb0 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -1,5 +1,7 @@ import uproot -from skhep_testdata import data_path +from skhep_testdata import data_path, known_files +import uproot.model +import uproot.models import uproot.serialization import uproot.writing.writable import os @@ -11,26 +13,71 @@ import awkward as ak -def test_vector(): - with uproot.update( - "/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root" - ) as write: - write.add_branches("t", {"branch": int}) +def test_vector(tmp_path): + # with uproot.open( + # os.path.join(tmp_path, "uproot-vectorVectorDouble.root"), + # minimal_ttree_metadata=False, + # ) as read: + # print(read['t']['x'].debug(1)) + # print(read.cursor.debug(read.file.chunk(start=0, stop=5852))) + # print(read.file.chunk(start=0, stop=5852).raw_data.tobytes()) + + # with uproot.update( + # os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + # ) as write: + # write.add_branches("t", {"branch": [1,2,3,4,5]}) + + # with uproot.open( + # os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + # minimal_ttree_metadata=False, + # ) as read: + # print(read['t']) + + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "vectorVectorDouble.root"), "READ") + tree = inFile.Get("t") + for x in tree: + print(getattr(x, "x")) + ROOT.TClass.TBranchElement + # inFile.Close() - with uproot.open( - "/Users/zobil/Documents/samples/uproot-vectorVectorDouble.root", - minimal_ttree_metadata=False, - ) as read: - print(read["t"]["x"].arrays()) +def simple_test(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array( + [ + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + ], + dtype=np.int32, + ) - with uproot.open( - "/Users/zobil/Documents/samples/uproot-vectorVectorDouble-work.root", - minimal_ttree_metadata=False, - ) as read: - print(read["tree1"]) + with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: + f["whatever"] = {"b1": data, "b2": data1} + with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + f["whatever"] = {"b1": data, "b2": data1} -def simple_test(tmp_path): + with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: + f.add_branches("whatever", {"b3": data, "b4": data1}) + + with uproot.open( + os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False + ) as check: + with uproot.open( + os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False + ) as new: + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") + tree = inFile.Get("whatever;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == data[indx] + assert getattr(x, "b2") == data1[indx] + indx += 1 + print(check.file.chunk(start=0, stop=8000).raw_data.tobytes()) + +def test_multiple_trees(tmp_path): data = np.array([1, 2, 3, 4, 5], dtype=np.int64) data1 = np.array( [ @@ -44,13 +91,16 @@ def simple_test(tmp_path): ) with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: - f["whatever"] = {"b1": data} + f["whatever"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} + f["whatever1"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: f["whatever"] = {"b1": data, "b2": data1} + f["whatever1"] = {"b1": data, "b2": data1, "b3": data} with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: f.add_branches("whatever", {"b3": data, "b4": data1}) + f.add_branches("whatever1", {"b4": data1}) with uproot.open( os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False @@ -58,6 +108,10 @@ def simple_test(tmp_path): with uproot.open( os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False ) as new: + assert ak.all(new['whatever']['b1'].array() == data) + assert ak.all(new['whatever1']['b4'].array() == data1) + assert ak.all(new['whatever1']['b2'].array() == data1) + assert ak.all(new['whatever1']['b4'].array() == data1) inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") tree = inFile.Get("whatever;1") indx = 0 @@ -66,7 +120,6 @@ def simple_test(tmp_path): assert getattr(x, "b2") == data1[indx] indx += 1 - def test_different_fEntries(tmp_path): data = np.array([1, 2, 3, 4, 5], dtype=np.int64) data1 = np.array([2.0, 3.0, 4.0, 5.0, 6.0], dtype=np.int32) @@ -82,6 +135,8 @@ def test_different_fEntries(tmp_path): }, ) +def test_all_dtypes(tmp_path): + print("to-do") def test_ak_arrays(tmp_path): @@ -126,8 +181,8 @@ def test_ak_arrays(tmp_path): assert ak.all(npy3["b2"] == [data1, data2, data]) assert ak.all(npy3["b3"] == [data2, data, data1]) - def test_streamers_same_dtypes(tmp_path): + # Make file with ROOT inFile = ROOT.TFile(os.path.join("root_same_dtypes.root"), "RECREATE") tree = ROOT.TTree("tree1", "tree") npa = np.zeros(4, dtype=np.float32) @@ -185,8 +240,8 @@ def test_streamers_same_dtypes(tmp_path): assert i in check inFile.Close() - def test_streamers_diff_dtypes(tmp_path): + # Make file with ROOT inFile = ROOT.TFile( "/Users/zobil/Desktop/directory/root_diff_dtypes.root", "RECREATE" ) @@ -247,7 +302,46 @@ def test_streamers_diff_dtypes(tmp_path): assert i in check inFile.Close() +def HZZ_test(tmp_path): + with uproot.open( + data_path("uproot-HZZ.root"), minimal_ttree_metadata=False + ) as control: + with uproot.update(os.path.join(tmp_path, "uproot-HZZ.root copy")) as new: + data = [] + for i in range(2421): + data.append(np.arange(0, 3, 1)) + data = ak.Array(data) + new.add_branches("events", {"data": data}) + + with uproot.open( + os.path.join(tmp_path, "uproot-HZZ.root copy"), + minimal_ttree_metadata=False, + ) as new: + for key in control["events"].keys(): + assert key in new["events"].keys() + assert ak.all( + new["events"][key].array() == control["events"][key].array() + ) + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "uproot-HZZ.root copy"), "READ" + ) + tree = inFile.Get("events") + indx = 0 + inFile.Close() + df3 = ROOT.RDataFrame( + "events", os.path.join(tmp_path, "uproot-HZZ.root copy") + ) + npy3 = ak.from_rdataframe(df3, columns=("data"), keep_order=True) + # for key in npy3.keys(): + # assert ak.all(npy3[key] == control['events'][key].array()) + assert ak.all(npy3 == data) + inFile.Close() +def test_nested_branches(tmp_path): + # Make example + with uproot.open(data_path("uproot-HZZ-objects.root")): + print("examine this ") + def HZZ_test(tmp_path): with uproot.open( data_path("uproot-HZZ.root"), minimal_ttree_metadata=False @@ -283,27 +377,15 @@ def HZZ_test(tmp_path): assert ak.all(npy3 == data) inFile.Close() +def test_branch_v8(tmp_path): + with uproot.open(os.path.join(tmp_path, 'uproot-issue-250.root')) as control: + with uproot.update(os.path.join(tmp_path, 'uproot-issue-250.root')) as new: + print("hi") + # with uproot.open(os.path.join("uproot-from-geant4.root copy")) as new: -def nested_branches(tmp_path): - # Make example - inFile = ROOT.TFile(os.path.join(tmp_path, "root_nested_branches.root"), "RECREATE") - tree = ROOT.TTree("tree1", "tree") - b1 = np.zeros(4, dtype=float) - b2 = np.zeros(4, dtype=float) - branch1 = tree.Branch("b1", b1, "b1F") - branch2 = tree.Branch("b2", b2, "b1F") - for i in range(4): - b1[0] = i**0 - b2[0] = i**0 - tree.Fill() - branch2.SetObject(branch1) - inFile.Write() - inFile.Close() +# test_vector("/Users/zobil/Desktop/directory/vectorVector") + +files = ['uproot-from-geant4.root'] # Values in fBaskets, can't open with uproot.update() + +# Try uproot unit tests to generate uproot-events maybe? - # Test - inFile = ROOT.TFile.Open( - os.path.join(tmp_path, "root_nested_branches.root"), "OPEN" - ) - tree = inFile.Get("tree1") - tree.Scan() - inFile.Close() From 76551f037f70437cdd5c6a1b05dea45c9add5555 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 May 2024 16:43:20 +0000 Subject: [PATCH 12/20] style: pre-commit fixes --- src/uproot/models/TBranch.py | 2 +- src/uproot/writing/_cascade.py | 35 +++++++++++++++++--------- tests/test_1155_feat_add_copy_ttree.py | 35 ++++++++++++++++++-------- 3 files changed, 48 insertions(+), 24 deletions(-) diff --git a/src/uproot/models/TBranch.py b/src/uproot/models/TBranch.py index ef5ad1538..6db6abe01 100644 --- a/src/uproot/models/TBranch.py +++ b/src/uproot/models/TBranch.py @@ -299,7 +299,7 @@ class Model_TBranch_v12(uproot.behaviors.TBranch.TBranch, uproot.model.Versioned """ A :doc:`uproot.model.VersionedModel` for ``TBranch`` version 12. """ - + behaviors = (uproot.behaviors.TBranch.TBranch,) def read_members(self, chunk, cursor, context, file): diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 717930ee2..95a63f5d1 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -694,8 +694,8 @@ def serialize(self, out, branch): # TODO Test this! Later make sure TBranchElements are handled # if len(datum["fBranches"]) == 0: - # empty TObjArray of TBranches - + # empty TObjArray of TBranches + out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) @@ -780,7 +780,9 @@ def serialize(self, out, branch): # else: # This will never be reached? What to do about G # letter_upper = "G" # special_struct = uproot.models.TLeaf._tleafl1_format0 - if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): # TLeafElement... + if isinstance( + leaf, uproot.models.TLeaf.Model_TLeafElement_v1 + ): # TLeafElement... special_struct = uproot.models.TLeaf._tleafelement1_format1 out.append((b"TLeafElement") + b"\x00") else: @@ -897,9 +899,8 @@ def serialize(self, out, branch): else: out.append(b"\x00\x00\x00\x00") - if not isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): - # specialized TLeaf* members (fMinimum, fMaximum) + # specialized TLeaf* members (fMinimum, fMaximum) datum["tleaf_special_struct"] = special_struct out.append( @@ -925,7 +926,7 @@ def serialize(self, out, branch): if len(datum["fBaskets"]) >= 1: # msg = f"NotImplementedError, cannot yet write TObjArray of fBaskets. Branch {datum['fName']} has {len(datum['fBaskets'])} fBaskets." # raise NotImplementedError(msg) - print("fBaskets", datum['fBaskets'][0]) + print("fBaskets", datum["fBaskets"][0]) out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -947,7 +948,7 @@ def serialize(self, out, branch): out.append(uproot._util.tobytes(datum["fBasketSeek"])) # out.append(datum["fFileName"].serialize()) # name = None? out.append(b"\x00") - if 'fClonesName' in branch.all_members.keys(): + if "fClonesName" in branch.all_members.keys(): out[tbranch_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tbranch_index + 1 :]), 10 # TBranchElement (?) ) @@ -962,8 +963,12 @@ def serialize(self, out, branch): uproot.const.kNewClassTag, ) ) - if 'fClonesName' in branch.all_members.keys(): # TBranchElement - find a more robust way to check....or make sure this can't be misleading - out.append(branch.member("fClassName").serialize()) # These three are TStrings + if ( + "fClonesName" in branch.all_members.keys() + ): # TBranchElement - find a more robust way to check....or make sure this can't be misleading + out.append( + branch.member("fClassName").serialize() + ) # These three are TStrings out.append(branch.member("fParentName").serialize()) out.append(branch.member("fClonesName").serialize()) out.append( @@ -976,9 +981,15 @@ def serialize(self, out, branch): branch.member("fMaximum"), ) ) - out.append(uproot.serialization.serialize_object_any(branch.member("fBranchCount"))) - out.append(uproot.serialization.serialize_object_any(branch.member("fBranchCount2"))) - + out.append( + uproot.serialization.serialize_object_any(branch.member("fBranchCount")) + ) + out.append( + uproot.serialization.serialize_object_any( + branch.member("fBranchCount2") + ) + ) + return out, datum["tleaf_reference_number"] def read_members(self, branch): diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index b2adaabb0..495c82076 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -19,8 +19,8 @@ def test_vector(tmp_path): # minimal_ttree_metadata=False, # ) as read: # print(read['t']['x'].debug(1)) - # print(read.cursor.debug(read.file.chunk(start=0, stop=5852))) - # print(read.file.chunk(start=0, stop=5852).raw_data.tobytes()) + # print(read.cursor.debug(read.file.chunk(start=0, stop=5852))) + # print(read.file.chunk(start=0, stop=5852).raw_data.tobytes()) # with uproot.update( # os.path.join(tmp_path, "cp-vectorVectorDouble.root"), @@ -40,6 +40,7 @@ def test_vector(tmp_path): ROOT.TClass.TBranchElement # inFile.Close() + def simple_test(tmp_path): data = np.array([1, 2, 3, 4, 5], dtype=np.int64) data1 = np.array( @@ -77,6 +78,7 @@ def simple_test(tmp_path): indx += 1 print(check.file.chunk(start=0, stop=8000).raw_data.tobytes()) + def test_multiple_trees(tmp_path): data = np.array([1, 2, 3, 4, 5], dtype=np.int64) data1 = np.array( @@ -108,10 +110,10 @@ def test_multiple_trees(tmp_path): with uproot.open( os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False ) as new: - assert ak.all(new['whatever']['b1'].array() == data) - assert ak.all(new['whatever1']['b4'].array() == data1) - assert ak.all(new['whatever1']['b2'].array() == data1) - assert ak.all(new['whatever1']['b4'].array() == data1) + assert ak.all(new["whatever"]["b1"].array() == data) + assert ak.all(new["whatever1"]["b4"].array() == data1) + assert ak.all(new["whatever1"]["b2"].array() == data1) + assert ak.all(new["whatever1"]["b4"].array() == data1) inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") tree = inFile.Get("whatever;1") indx = 0 @@ -120,6 +122,7 @@ def test_multiple_trees(tmp_path): assert getattr(x, "b2") == data1[indx] indx += 1 + def test_different_fEntries(tmp_path): data = np.array([1, 2, 3, 4, 5], dtype=np.int64) data1 = np.array([2.0, 3.0, 4.0, 5.0, 6.0], dtype=np.int32) @@ -135,9 +138,11 @@ def test_different_fEntries(tmp_path): }, ) + def test_all_dtypes(tmp_path): print("to-do") + def test_ak_arrays(tmp_path): data = np.array([1, 2, 3], dtype=np.int64) @@ -181,6 +186,7 @@ def test_ak_arrays(tmp_path): assert ak.all(npy3["b2"] == [data1, data2, data]) assert ak.all(npy3["b3"] == [data2, data, data1]) + def test_streamers_same_dtypes(tmp_path): # Make file with ROOT inFile = ROOT.TFile(os.path.join("root_same_dtypes.root"), "RECREATE") @@ -240,6 +246,7 @@ def test_streamers_same_dtypes(tmp_path): assert i in check inFile.Close() + def test_streamers_diff_dtypes(tmp_path): # Make file with ROOT inFile = ROOT.TFile( @@ -302,6 +309,7 @@ def test_streamers_diff_dtypes(tmp_path): assert i in check inFile.Close() + def HZZ_test(tmp_path): with uproot.open( data_path("uproot-HZZ.root"), minimal_ttree_metadata=False @@ -337,11 +345,13 @@ def HZZ_test(tmp_path): assert ak.all(npy3 == data) inFile.Close() + def test_nested_branches(tmp_path): # Make example with uproot.open(data_path("uproot-HZZ-objects.root")): print("examine this ") - + + def HZZ_test(tmp_path): with uproot.open( data_path("uproot-HZZ.root"), minimal_ttree_metadata=False @@ -377,15 +387,18 @@ def HZZ_test(tmp_path): assert ak.all(npy3 == data) inFile.Close() + def test_branch_v8(tmp_path): - with uproot.open(os.path.join(tmp_path, 'uproot-issue-250.root')) as control: - with uproot.update(os.path.join(tmp_path, 'uproot-issue-250.root')) as new: + with uproot.open(os.path.join(tmp_path, "uproot-issue-250.root")) as control: + with uproot.update(os.path.join(tmp_path, "uproot-issue-250.root")) as new: print("hi") # with uproot.open(os.path.join("uproot-from-geant4.root copy")) as new: + # test_vector("/Users/zobil/Desktop/directory/vectorVector") -files = ['uproot-from-geant4.root'] # Values in fBaskets, can't open with uproot.update() +files = [ + "uproot-from-geant4.root" +] # Values in fBaskets, can't open with uproot.update() # Try uproot unit tests to generate uproot-events maybe? - From f2de07dd1c4a224fa0df88b1fcaf2a84c26aace3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Tue, 11 Jun 2024 14:27:15 +0200 Subject: [PATCH 13/20] Old branches now copied with just reference numbers changed, still have to fix some things --- src/uproot/model.py | 5 +- src/uproot/models/TBranch.py | 8 +- src/uproot/models/TNamed.py | 2 +- src/uproot/reading.py | 8 +- src/uproot/writing/_cascade.py | 41 ++++---- src/uproot/writing/_cascadetree.py | 90 +++++++++++++++--- src/uproot/writing/writable.py | 1 - tests/test_1155_feat_add_copy_ttree.py | 124 +++++++++++++++---------- 8 files changed, 187 insertions(+), 92 deletions(-) diff --git a/src/uproot/model.py b/src/uproot/model.py index 36dde4270..b9159c1bd 100644 --- a/src/uproot/model.py +++ b/src/uproot/model.py @@ -787,7 +787,6 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None): self._is_memberwise = False old_breadcrumbs = context.get("breadcrumbs", ()) context["breadcrumbs"] = (*old_breadcrumbs, self) - self.hook_before_read(chunk=chunk, cursor=cursor, context=context, file=file) forth_obj = uproot._awkwardforth.get_forth_obj(context) if forth_obj is not None: @@ -798,6 +797,7 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None): if context.get("reading", True): temp_index = cursor._index self.read_numbytes_version(chunk, cursor, context) + length = cursor._index - temp_index if length != 0 and forth_obj is not None: forth_stash.pre_code.append(f"{length} stream skip\n") @@ -843,6 +843,7 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None): cursor.skip(4) if context.get("reading", True): + self.hook_before_read_members( chunk=chunk, cursor=cursor, context=context, file=file ) @@ -868,7 +869,7 @@ def read(cls, chunk, cursor, context, file, selffile, parent, concrete=None): out = self.postprocess(chunk, cursor, context, file) context["breadcrumbs"] = old_breadcrumbs - + # print(out) return out def read_numbytes_version(self, chunk, cursor, context): diff --git a/src/uproot/models/TBranch.py b/src/uproot/models/TBranch.py index 6db6abe01..41243ad96 100644 --- a/src/uproot/models/TBranch.py +++ b/src/uproot/models/TBranch.py @@ -303,7 +303,6 @@ class Model_TBranch_v12(uproot.behaviors.TBranch.TBranch, uproot.model.Versioned behaviors = (uproot.behaviors.TBranch.TBranch,) def read_members(self, chunk, cursor, context, file): - # print("reading here in TBranch V12") if uproot._awkwardforth.get_forth_obj(context) is not None: raise uproot.interpretation.objects.CannotBeForth() if self.is_memberwise: @@ -436,7 +435,6 @@ class Model_TBranch_v13(uproot.behaviors.TBranch.TBranch, uproot.model.Versioned behaviors = (uproot.behaviors.TBranch.TBranch,) def read_members(self, chunk, cursor, context, file): - # print("Reading in TBranch 13") if uproot._awkwardforth.get_forth_obj(context) is not None: raise uproot.interpretation.objects.CannotBeForth() if self.is_memberwise: @@ -746,7 +744,6 @@ class Model_TBranchElement_v10( behaviors = (uproot.behaviors.TBranch.TBranch,) def read_members(self, chunk, cursor, context, file): - # print("read here in TBranchElement V10") if uproot._awkwardforth.get_forth_obj(context) is not None: raise uproot.interpretation.objects.CannotBeForth() if self.is_memberwise: @@ -754,9 +751,8 @@ def read_members(self, chunk, cursor, context, file): f"""memberwise serialization of {type(self).__name__} in file {self.file.file_path}""" ) - # print("got here") self._bases.append( - file.class_named("TBranch", 13).read( + file.class_named("TBranch", 12).read( chunk, cursor, context, @@ -791,7 +787,7 @@ def read_members(self, chunk, cursor, context, file): chunk, cursor, context, file, self._file, self.concrete ) - base_names_versions = [("TBranch", 13)] + base_names_versions = [("TBranch", 12)] member_names = [ "fClassName", "fParentName", diff --git a/src/uproot/models/TNamed.py b/src/uproot/models/TNamed.py index 96b303ef9..6c7ba557f 100644 --- a/src/uproot/models/TNamed.py +++ b/src/uproot/models/TNamed.py @@ -34,7 +34,7 @@ def read_members(self, chunk, cursor, context, file): concrete=self.concrete, ) ) - + # print("tnamed", context, chunk.raw_data.tobytes()) self._members["fName"] = cursor.string(chunk, context) self._members["fTitle"] = cursor.string(chunk, context) diff --git a/src/uproot/reading.py b/src/uproot/reading.py index 0dce3a970..3615b875e 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -2032,7 +2032,6 @@ def key(self, where): Note that this does not read any data from the file. """ where = uproot._util.ensure_str(where) - if "/" in where: step, last_item = self.descent_into_path(where) return step.key(last_item) @@ -2057,7 +2056,8 @@ def key(self, where): # Follow ROOT's behaviour in comparing negative fCycle values elif cycle is None and abs(last.fCycle) < abs(key.fCycle): last = key - + chunk, tmp_cursor = key.get_uncompressed_chunk_cursor() + # print("debug", tmp_cursor.debug(chunk)) if last is not None: return last elif cycle is None: @@ -2070,6 +2070,8 @@ def key(self, where): ) def __getitem__(self, where): + # if where == "x": + # print("getitem readonlydirectory") if "/" in where or ":" in where: items = where.split("/") step = last = self @@ -2338,6 +2340,7 @@ def data_cursor(self): file where the data begins (the object to be read, after its copy of the ``TKey`` and before the object's number of bytes/version header). """ + # print("data_cursor!!", self._fClassName) return uproot.source.cursor.Cursor(self._fSeekKey + self._fKeylen) @property @@ -2479,7 +2482,6 @@ def get(self): del self._file.object_cache[self.cache_key] else: return out - if self._fClassName in must_be_attached: selffile = self._file parent = self diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 95a63f5d1..62334a080 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -629,11 +629,13 @@ def serialize(self, out, branch): out.append(None) if "fClonesName" in branch.all_members.keys(): out.append(b"TBranchElement\x00") + tbranchelement_index = len(out) + out.append(None) else: out.append(b"TBranch\x00") - tbranch_index = len(out) - out.append(None) + tbranch_index = len(out) + out.append(None) datum = self._branch_data[branch.member("fName")] key_num_bytes = uproot.reading._key_format_big.size + 6 @@ -653,7 +655,6 @@ def serialize(self, out, branch): tattfill._members["fFillStyle"] = datum["fFillStyle"] out.append(tattfill.serialize(out)) - datum["metadata_start"] = (6 + 6 + 8 + 6) + sum( len(x) for x in out if x is not None ) @@ -662,6 +663,7 @@ def serialize(self, out, branch): # https://github.com/root-project/root/blob/87a998d48803bc207288d90038e60ff148827664/tree/tree/src/TBasket.cxx#L560-L578 # Without this, when small buffers are left uncompressed, ROOT complains about them not being compressed. # (I don't know where the "no, really, this is uncompressed" bit is.) + out.append( uproot.models.TBranch._tbranch13_format1.pack( datum["fCompress"], @@ -879,13 +881,6 @@ def serialize(self, out, branch): leaf.member("fIsUnsigned"), ) ) - if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): - out.append( - uproot.models.TLeaf._tleafelement1_format1.pack( - leaf.member("fID"), # fIsRange - leaf.member("fType"), - ) - ) if leaf.member("fLeafCount") is not None: out.append( uproot.deserialization._read_object_any_format1.pack( @@ -908,7 +903,13 @@ def serialize(self, out, branch): int(leaf.member("fMinimum")), int(leaf.member("fMaximum")) ) ) - + if isinstance(leaf, uproot.models.TLeaf.Model_TLeafElement_v1): + out.append( + uproot.models.TLeaf._tleafelement1_format1.pack( + leaf.member("fID"), # fIsRange + leaf.member("fType"), + ) + ) out[subany_tleaf_index] = ( uproot.serialization._serialize_object_any_format1.pack( numpy.uint32(sum(len(x) for x in out[subany_tleaf_index + 1 :]) + 4) @@ -924,9 +925,8 @@ def serialize(self, out, branch): # empty TObjArray of fBaskets (embedded) if len(datum["fBaskets"]) >= 1: - # msg = f"NotImplementedError, cannot yet write TObjArray of fBaskets. Branch {datum['fName']} has {len(datum['fBaskets'])} fBaskets." - # raise NotImplementedError(msg) - print("fBaskets", datum["fBaskets"][0]) + msg = f"NotImplementedError, cannot yet write TObjArray of fBaskets. Branch {datum['fName']} has {len(datum['fBaskets'])} fBaskets." + raise NotImplementedError(msg) out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -948,10 +948,15 @@ def serialize(self, out, branch): out.append(uproot._util.tobytes(datum["fBasketSeek"])) # out.append(datum["fFileName"].serialize()) # name = None? out.append(b"\x00") + if "fClonesName" in branch.all_members.keys(): - out[tbranch_index] = uproot.serialization.numbytes_version( - sum(len(x) for x in out[tbranch_index + 1 :]), 10 # TBranchElement (?) + out[tbranchelement_index] = uproot.serialization.numbytes_version( + sum(len(x) for x in out[tbranchelement_index + 1 :] if x is not None), + 10, # TBranchElement (?) ) + # out[tbranch_index] = uproot.serialization.numbytes_version( + # sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch + # ) else: out[tbranch_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch @@ -965,7 +970,7 @@ def serialize(self, out, branch): ) if ( "fClonesName" in branch.all_members.keys() - ): # TBranchElement - find a more robust way to check....or make sure this can't be misleading + ): # TBranchElement - find a more robust way to check....or make sure this is only is True if branch is a TBranchElement out.append( branch.member("fClassName").serialize() ) # These three are TStrings @@ -2209,7 +2214,7 @@ def add_branches( existing_ttree, ) updated_streamers = tree.add_branches( - sink, directory.file, new_branches + sink, directory, new_branches ) # need new_branches for extend... # start = key.seek_location # stop = start + key.num_bytes + key.compressed_bytes diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 8145fd468..8d4c9ab14 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -97,6 +97,7 @@ def __init__( self._basket_capacity = initial_basket_capacity self._resize_factor = resize_factor self._existing_branches = existing_branches + self._existing_ttree = existing_ttree if isinstance(branch_types, dict): branch_types_items = branch_types.items() else: @@ -316,6 +317,23 @@ def __init__( if existing_ttree: self._metadata["fTotBytes"] = existing_ttree.member("fTotBytes") self._metadata["fZipBytes"] = existing_ttree.member("fZipBytes") + self._metadata["fSavedBytes"] = existing_ttree.member("fSavedBytes") + self._metadata["fFlushedBytes"] = existing_ttree.member("fFlushedBytes") + self._metadata["fWeight"] = existing_ttree.member("fWeight") + self._metadata["fTimerInterval"] = existing_ttree.member("fTimerInterval") + self._metadata["fScanField"] = existing_ttree.member("fScanField") + self._metadata["fUpdate"] = existing_ttree.member("fUpdate") + self._metadata["fDefaultEntryOffsetLen"] = existing_ttree.member( + "fDefaultEntryOffsetLen" + ) + if "fNClusterRange" in existing_ttree.all_members.keys(): + self._metadata["fNClusterRange"] = existing_ttree.member( + "fNClusterRange" + ) + self._metadata["fMaxEntries"] = existing_ttree.member("fMaxEntries") + self._metadata["fMaxEntryLoop"] = existing_ttree.member("fMaxEntryLoop") + self._metadata["fAutoSave"] = existing_ttree.member("fAutoSave") + self._metadata["fEstimate"] = existing_ttree.member("fEstimate") self._key = None @@ -1163,7 +1181,6 @@ def write_anew(self, sink): out[tbranch_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch ) - out[any_tbranch_index] = ( uproot.serialization._serialize_object_any_format1.pack( numpy.uint32(sum(len(x) for x in out[any_tbranch_index + 1 :]) + 4) @@ -1171,7 +1188,6 @@ def write_anew(self, sink): uproot.const.kNewClassTag, ) ) - out[tobjarray_of_branches_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tobjarray_of_branches_index + 1 :]), 3 # TObjArray ) @@ -1189,7 +1205,6 @@ def write_anew(self, sink): b"\x00\x00\x00\x00", ) ) - out.append(tleaf_reference_bytes) # null fAliases (b"\x00\x00\x00\x00") @@ -1576,7 +1591,7 @@ def get_tree_key(self): key = self._directory.data.get_key(self._name, None) return key - def add_branches(self, sink, file, new_branches): + def add_branches(self, sink, directory, new_branches): old_key = self.get_tree_key() # start = old_key.location # stop = start + old_key.num_bytes + old_key.compressed_bytes @@ -1584,11 +1599,13 @@ def add_branches(self, sink, file, new_branches): # sink.set_file_length(self._freesegments.fileheader.end) # sink.flush() # streamers = [x for x in file._cascading.tlist_of_streamers] - streamers = self.write_with_new_branches(sink, old_key) - self.extend(file, sink, new_branches) + + streamers = self.write_with_new_branches(sink, old_key, directory) + # streamers = self.update_ttree_add_branches(directory.file, sink, old_key, new_branches) + self.extend(directory.file, sink, new_branches) return streamers - def write_with_new_branches(self, sink, old_key): + def write_with_new_branches(self, sink, old_key, directory): models_for_streamers = [] key_num_bytes = uproot.reading._key_format_big.size + 6 name_asbytes = self._name.encode(errors="surrogateescape") @@ -1650,6 +1667,7 @@ def write_with_new_branches(self, sink, old_key): num_branches = sum( 0 if datum["kind"] == "record" else 1 for datum in self._branch_data ) + # Include original branches in num_branches if self._existing_branches: num_branches += len(self._existing_branches) @@ -1664,11 +1682,57 @@ def write_with_new_branches(self, sink, old_key): # Write old branches if self._existing_branches: - old_branches = uproot.writing._cascade.OldBranches(self._existing_branches) + # old_branches = uproot.writing._cascade.OldBranches(self._existing_branches) for branch in self._existing_branches: - # create OldTBranch object - out, temp = old_branches.serialize(out, branch) - tleaf_reference_numbers.append(temp) + # # create OldTBranch object + + cursor = ( + branch.cursor.copy() + ) # cursor before TObjArray of TBranches...hopefully + first_indx = cursor.index + cursor.skip_after(branch) + second_indx = cursor.index + cursor1 = branch.member("fLeaves").cursor.copy() + f_indx = cursor1.index + cursor1.skip_after(branch.member("fLeaves")) + # s_indx = cursor1.index + + branch_start = ( + len( + uproot.writing.identify.to_TString(branch.classname).serialize() + ) + + 2 + ) + + key_num_bytes = uproot.reading._key_format_big.size + 6 + name_asbytes = branch.name.encode(errors="surrogateescape") + title_asbytes = branch.title.encode(errors="surrogateescape") + key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len( + name_asbytes + ) + key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len( + title_asbytes + ) + + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + first_indx - branch_start : f_indx + 25 + ] + ) # to leaf reference... + + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) + absolute_location += 8 + 6 * ( + sum(1 if x is None else 0 for x in out) - 1 + ) + tleaf_reference_numbers.append(absolute_location + 2) + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + f_indx + 25 : second_indx + ] + ) + for datum in self._branch_data: if datum["kind"] == "record": continue @@ -1906,7 +1970,6 @@ def write_with_new_branches(self, sink, old_key): uproot.const.kNewClassTag, ) ) - out[subtobjarray_of_leaves_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[subtobjarray_of_leaves_index + 1 :]), 3, # TObjArray @@ -1950,6 +2013,8 @@ def write_with_new_branches(self, sink, old_key): sum(len(x) for x in out[tobjarray_of_branches_index + 1 :]), 3 # TObjArray ) + # TODO find tleaf reference numbers and append them ?? or update and then append + # TObjArray of TLeaf references tleaf_reference_bytes = uproot._util.tobytes( numpy.array(tleaf_reference_numbers, ">u4") @@ -1965,7 +2030,6 @@ def write_with_new_branches(self, sink, old_key): ) out.append(tleaf_reference_bytes) - # null fAliases (b"\x00\x00\x00\x00") # empty fIndexValues array (4-byte length is zero) # empty fIndex array (4-byte length is zero) diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 6a22d090f..c73620da8 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1511,7 +1511,6 @@ def add_branches( # variation of mktree for copying ttree streamers.append( uproot.writing._cascade.RawStreamerInfo(*rawstreamer) ) - print(update_streamers) directory._file._cascading.streamers.update_streamers( directory._file.sink, streamers, diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index 495c82076..dc544059d 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -14,31 +14,49 @@ def test_vector(tmp_path): - # with uproot.open( - # os.path.join(tmp_path, "uproot-vectorVectorDouble.root"), - # minimal_ttree_metadata=False, - # ) as read: - # print(read['t']['x'].debug(1)) - # print(read.cursor.debug(read.file.chunk(start=0, stop=5852))) - # print(read.file.chunk(start=0, stop=5852).raw_data.tobytes()) - - # with uproot.update( - # os.path.join(tmp_path, "cp-vectorVectorDouble.root"), - # ) as write: - # write.add_branches("t", {"branch": [1,2,3,4,5]}) - - # with uproot.open( - # os.path.join(tmp_path, "cp-vectorVectorDouble.root"), - # minimal_ttree_metadata=False, - # ) as read: - # print(read['t']) - - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "vectorVectorDouble.root"), "READ") - tree = inFile.Get("t") - for x in tree: - print(getattr(x, "x")) - ROOT.TClass.TBranchElement - # inFile.Close() + with uproot.open( + os.path.join(tmp_path, "uproot-vectorVectorDouble.root"), + minimal_ttree_metadata=False, + ) as read: + print(read["t"]["x"]) + + # print("break \n \n", uproot.models.TBranch._tbranch13_format1.size) + + with uproot.update( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + ) as write: + write.add_branches("t", {"branch": [1, 2, 3, 4, 5]}) + + with uproot.open( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + minimal_ttree_metadata=False, + ) as read: + # print(read['t']['x'].member('fLeaves')[0].cursor.index) + print("chunk bytes", read["t"]) + # with uproot.open("/Users/zobil/Desktop/directory/arrays1.root") as file: + # print(file.file.show_streamers()) + # inFile = ROOT.TFile.Open(os.path.join(tmp_path, "cp-vectorVectorDouble.root"), "READ") + # tree = inFile.Get("t") + # for x in tree: + # print(getattr(x, "x")) + + +# with uproot.recreate("score.root") as file: +# data = np.array([1, 2, 3], dtype=np.int64) +# data1 = np.array([2, 3, 4], dtype=np.int64) +# data2 = np.array([3, 4, 5], dtype=np.int64) +# file["whatever"] = { +# "b1": ak.Array([data, data1, data2]), +# "b2": ak.Array([data1, data2, data]), +# "b3": ak.Array([data2, data, data1]), +# } + +# with uproot.update("score.root") as file: +# data = [] +# for i in range(2421): +# data.append(np.arange(0, 3, 1)) +# data = ak.Array(data, np.int64) +# file['whatever'].extend({"b1": data, "b2": data, "b3": data}) def simple_test(tmp_path): @@ -55,7 +73,7 @@ def simple_test(tmp_path): ) with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: - f["whatever"] = {"b1": data, "b2": data1} + f["whatever"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: f["whatever"] = {"b1": data, "b2": data1} @@ -69,6 +87,18 @@ def simple_test(tmp_path): with uproot.open( os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False ) as new: + # print("???", new['whatever'].branches) + # print("???", check['whatever']['b1'].all_members) + # string = uproot.models.TString.Model_TString(check['whatever']['b1'].classname) + # print(type(string)) + # print(new['whatever'].chunk.raw_data.tobytes()) + # print(new["whatever"].chunk.raw_data.tobytes()) + print(check["whatever"].chunk.raw_data.tobytes()) + # print(check["whatever"].chunk.raw_data.tobytes()) + assert ak.all(new["whatever"]["b1"].array() == data) + assert ak.all(new["whatever"]["b2"].array() == data1) + assert ak.all(new["whatever"]["b3"].array() == data) + assert ak.all(new["whatever"]["b4"].array() == data1) inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") tree = inFile.Get("whatever;1") indx = 0 @@ -76,7 +106,6 @@ def simple_test(tmp_path): assert getattr(x, "b1") == data[indx] assert getattr(x, "b2") == data1[indx] indx += 1 - print(check.file.chunk(start=0, stop=8000).raw_data.tobytes()) def test_multiple_trees(tmp_path): @@ -92,29 +121,29 @@ def test_multiple_trees(tmp_path): dtype=np.int32, ) - with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: + with uproot.recreate(os.path.join(tmp_path, "mult_trees1.root")) as f: f["whatever"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} f["whatever1"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} - with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + with uproot.recreate(os.path.join(tmp_path, "mult_trees2.root")) as f: f["whatever"] = {"b1": data, "b2": data1} f["whatever1"] = {"b1": data, "b2": data1, "b3": data} - with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: + with uproot.update(os.path.join(tmp_path, "mult_trees2.root")) as f: f.add_branches("whatever", {"b3": data, "b4": data1}) f.add_branches("whatever1", {"b4": data1}) with uproot.open( - os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False + os.path.join(tmp_path, "mult_trees1.root"), minimal_ttree_metadata=False ) as check: with uproot.open( - os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False + os.path.join(tmp_path, "mult_trees2.root"), minimal_ttree_metadata=False ) as new: assert ak.all(new["whatever"]["b1"].array() == data) assert ak.all(new["whatever1"]["b4"].array() == data1) assert ak.all(new["whatever1"]["b2"].array() == data1) assert ak.all(new["whatever1"]["b4"].array() == data1) - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "mult_trees2.root"), "READ") tree = inFile.Get("whatever;1") indx = 0 for x in tree: @@ -175,16 +204,18 @@ def test_ak_arrays(tmp_path): with uproot.open( os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False ) as new: - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") - tree = inFile.Get("whatever") - for x in tree: - getattr(x, "b1") - inFile.Close() - df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) - npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) - assert ak.all(npy3["b1"] == [data, data1, data2]) - assert ak.all(npy3["b2"] == [data1, data2, data]) - assert ak.all(npy3["b3"] == [data2, data, data1]) + print(correct["whatever"]["b3"].member("fLeaves")[0].all_members) + print(new["whatever"]["b3"].member("fLeaves")[0].all_members) + # inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") + # tree = inFile.Get("whatever") + # for x in tree: + # getattr(x, "b1") + # inFile.Close() + # df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) + # npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) + # assert ak.all(npy3["b1"] == [data, data1, data2]) + # assert ak.all(npy3["b2"] == [data1, data2, data]) + # assert ak.all(npy3["b3"] == [data2, data, data1]) def test_streamers_same_dtypes(tmp_path): @@ -395,10 +426,7 @@ def test_branch_v8(tmp_path): # with uproot.open(os.path.join("uproot-from-geant4.root copy")) as new: -# test_vector("/Users/zobil/Desktop/directory/vectorVector") - -files = [ - "uproot-from-geant4.root" -] # Values in fBaskets, can't open with uproot.update() +simple_test("/Users/zobil/Desktop/directory/") +# test_ak_arrays("/Users/zobil/Desktop/directory/vectorVector") # Try uproot unit tests to generate uproot-events maybe? From 0b42253590d4c65d7e11ce82bdf62da4484c1a51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Wed, 12 Jun 2024 14:03:24 +0200 Subject: [PATCH 14/20] Fixed writing, all tests are passing so far --- src/uproot/writing/_cascadetree.py | 20 ++---- tests/test_1155_feat_add_copy_ttree.py | 84 ++++++-------------------- 2 files changed, 25 insertions(+), 79 deletions(-) diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 8d4c9ab14..bc4f360c8 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -997,6 +997,7 @@ def write_anew(self, sink): absolute_location = key_num_bytes + sum( len(x) for x in out if x is not None ) + absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) datum["tleaf_reference_number"] = absolute_location + 2 tleaf_reference_numbers.append(datum["tleaf_reference_number"]) @@ -1206,7 +1207,6 @@ def write_anew(self, sink): ) ) out.append(tleaf_reference_bytes) - # null fAliases (b"\x00\x00\x00\x00") # empty fIndexValues array (4-byte length is zero) # empty fIndex array (4-byte length is zero) @@ -1685,7 +1685,6 @@ def write_with_new_branches(self, sink, old_key, directory): # old_branches = uproot.writing._cascade.OldBranches(self._existing_branches) for branch in self._existing_branches: # # create OldTBranch object - cursor = ( branch.cursor.copy() ) # cursor before TObjArray of TBranches...hopefully @@ -1704,16 +1703,6 @@ def write_with_new_branches(self, sink, old_key, directory): + 2 ) - key_num_bytes = uproot.reading._key_format_big.size + 6 - name_asbytes = branch.name.encode(errors="surrogateescape") - title_asbytes = branch.title.encode(errors="surrogateescape") - key_num_bytes += (1 if len(name_asbytes) < 255 else 5) + len( - name_asbytes - ) - key_num_bytes += (1 if len(title_asbytes) < 255 else 5) + len( - title_asbytes - ) - out.append( self._existing_ttree.chunk.raw_data.tobytes()[ first_indx - branch_start : f_indx + 25 @@ -1726,7 +1715,9 @@ def write_with_new_branches(self, sink, old_key, directory): absolute_location += 8 + 6 * ( sum(1 if x is None else 0 for x in out) - 1 ) - tleaf_reference_numbers.append(absolute_location + 2) + + tleaf_reference_numbers.append(absolute_location) + out.append( self._existing_ttree.chunk.raw_data.tobytes()[ f_indx + 25 : second_indx @@ -1957,7 +1948,6 @@ def write_with_new_branches(self, sink, old_key, directory): datum["counter"]["tleaf_reference_number"] ) ) - # specialized TLeaf* members (fMinimum, fMaximum) out.append(special_struct.pack(0, 0)) @@ -2019,7 +2009,7 @@ def write_with_new_branches(self, sink, old_key, directory): tleaf_reference_bytes = uproot._util.tobytes( numpy.array(tleaf_reference_numbers, ">u4") ) - out.append( + out.append( # This is still fine struct.pack( ">I13sI4s", (21 + len(tleaf_reference_bytes)) | uproot.const.kByteCountMask, diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index dc544059d..c13c072af 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -87,14 +87,10 @@ def simple_test(tmp_path): with uproot.open( os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False ) as new: - # print("???", new['whatever'].branches) - # print("???", check['whatever']['b1'].all_members) - # string = uproot.models.TString.Model_TString(check['whatever']['b1'].classname) - # print(type(string)) - # print(new['whatever'].chunk.raw_data.tobytes()) - # print(new["whatever"].chunk.raw_data.tobytes()) - print(check["whatever"].chunk.raw_data.tobytes()) - # print(check["whatever"].chunk.raw_data.tobytes()) + for key in new["whatever"].keys(): + assert ak.all( + new["whatever"].arrays()[key] == check["whatever"].arrays()[key] + ) assert ak.all(new["whatever"]["b1"].array() == data) assert ak.all(new["whatever"]["b2"].array() == data1) assert ak.all(new["whatever"]["b3"].array() == data) @@ -103,6 +99,7 @@ def simple_test(tmp_path): tree = inFile.Get("whatever;1") indx = 0 for x in tree: + print(getattr(x, "b1")) assert getattr(x, "b1") == data[indx] assert getattr(x, "b2") == data1[indx] indx += 1 @@ -173,7 +170,6 @@ def test_all_dtypes(tmp_path): def test_ak_arrays(tmp_path): - data = np.array([1, 2, 3], dtype=np.int64) data1 = np.array([2, 3, 4], dtype=np.int64) data2 = np.array([3, 4, 5], dtype=np.int64) @@ -187,13 +183,13 @@ def test_ak_arrays(tmp_path): with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: file["whatever"] = { "b1": ak.Array([data, data1, data2]), + "b2": ak.Array([data1, data2, data]), } with uproot.update(os.path.join(tmp_path, "ak_test.root")) as write: write.add_branches( "whatever", { - "b2": ak.Array([data1, data2, data]), "b3": ak.Array([data2, data, data1]), }, ) @@ -204,18 +200,20 @@ def test_ak_arrays(tmp_path): with uproot.open( os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False ) as new: - print(correct["whatever"]["b3"].member("fLeaves")[0].all_members) - print(new["whatever"]["b3"].member("fLeaves")[0].all_members) - # inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") - # tree = inFile.Get("whatever") - # for x in tree: - # getattr(x, "b1") - # inFile.Close() - # df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) - # npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) - # assert ak.all(npy3["b1"] == [data, data1, data2]) - # assert ak.all(npy3["b2"] == [data1, data2, data]) - # assert ak.all(npy3["b3"] == [data2, data, data1]) + new["whatever"].arrays() + print(correct["whatever"]["b2"].member("fLeaves")[0].all_members) + print(new["whatever"]["b2"].member("fLeaves")[0].all_members) + print(new["whatever"]["b3"].arrays()) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") + tree = inFile.Get("whatever") + for x in tree: + getattr(x, "b1") + inFile.Close() + df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) + npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) + assert ak.all(npy3["b1"] == [data, data1, data2]) + assert ak.all(npy3["b2"] == [data1, data2, data]) + assert ak.all(npy3["b3"] == [data2, data, data1]) def test_streamers_same_dtypes(tmp_path): @@ -341,42 +339,6 @@ def test_streamers_diff_dtypes(tmp_path): inFile.Close() -def HZZ_test(tmp_path): - with uproot.open( - data_path("uproot-HZZ.root"), minimal_ttree_metadata=False - ) as control: - with uproot.update(os.path.join(tmp_path, "uproot-HZZ.root copy")) as new: - data = [] - for i in range(2421): - data.append(np.arange(0, 3, 1)) - data = ak.Array(data) - new.add_branches("events", {"data": data}) - - with uproot.open( - os.path.join(tmp_path, "uproot-HZZ.root copy"), - minimal_ttree_metadata=False, - ) as new: - for key in control["events"].keys(): - assert key in new["events"].keys() - assert ak.all( - new["events"][key].array() == control["events"][key].array() - ) - inFile = ROOT.TFile.Open( - os.path.join(tmp_path, "uproot-HZZ.root copy"), "READ" - ) - tree = inFile.Get("events") - indx = 0 - inFile.Close() - df3 = ROOT.RDataFrame( - "events", os.path.join(tmp_path, "uproot-HZZ.root copy") - ) - npy3 = ak.from_rdataframe(df3, columns=("data"), keep_order=True) - # for key in npy3.keys(): - # assert ak.all(npy3[key] == control['events'][key].array()) - assert ak.all(npy3 == data) - inFile.Close() - - def test_nested_branches(tmp_path): # Make example with uproot.open(data_path("uproot-HZZ-objects.root")): @@ -424,9 +386,3 @@ def test_branch_v8(tmp_path): with uproot.update(os.path.join(tmp_path, "uproot-issue-250.root")) as new: print("hi") # with uproot.open(os.path.join("uproot-from-geant4.root copy")) as new: - - -simple_test("/Users/zobil/Desktop/directory/") -# test_ak_arrays("/Users/zobil/Desktop/directory/vectorVector") - -# Try uproot unit tests to generate uproot-events maybe? From 1236395162cb4db8ce37d89afb3caa6101c5f290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Thu, 4 Jul 2024 16:12:32 +0200 Subject: [PATCH 15/20] Added some version restraints --- src/uproot/writing/_cascadetree.py | 6 +- src/uproot/writing/writable.py | 68 +++++- tests/test_1155_feat_add_copy_ttree.py | 293 +++++++++++++------------ 3 files changed, 226 insertions(+), 141 deletions(-) diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index bc4f360c8..9bf062d2f 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -492,6 +492,7 @@ def extend(self, file, sink, data): for datum in self._branch_data: if datum["kind"] == "record": continue + fBasketBytes = datum["fBasketBytes"] fBasketEntry = datum["fBasketEntry"] fBasketSeek = datum["fBasketSeek"] @@ -619,6 +620,7 @@ def extend(self, file, sink, data): f"branch {kk!r} provided both as an explicit array and generated as a counter, and they disagree" ) provided[k] = v + actual_branches = {} for datum in self._branch_data: if datum["kind"] == "record": @@ -845,7 +847,9 @@ def extend(self, file, sink, data): fBasketEntry[i + 1] = num_entries + fBasketEntry[i] datum["fBasketSeek"][self._num_baskets] = location + datum["arrays_write_stop"] = self._num_baskets + 1 + # update TTree metadata in file self._num_entries += num_entries self._num_baskets += 1 @@ -1809,7 +1813,7 @@ def write_with_new_branches(self, sink, old_key, directory): out.append(("TLeaf" + letter_upper).encode() + b"\x00") if letter_upper == "O": special_struct = uproot.models.TLeaf._tleafO1_format1 - model = uproot.models.TLeaf.Model_TLeafO_v1.class_rawstreamers + model = uproot.models.TLeaf.Model_TLeafO_v1 elif letter_upper == "B": special_struct = uproot.models.TLeaf._tleafb1_format1 model = uproot.models.TLeaf.Model_TLeafB_v1 diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index c73620da8..4b4c56aad 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1386,14 +1386,76 @@ def add_branches( # variation of mktree for copying ttree msg = f"TTree {source} not found in file {self.file}" raise ValueError(msg) from None if not isinstance(old_ttree, uproot.TTree): - raise TypeError("'source' must be a TTree") # ? + raise TypeError("'source' must be the name of a TTree") # ? + if not isinstance(old_ttree, uproot.models.TTree.Model_TTree_v20): + if uproot.model.classname_version(old_ttree.encoded_classname) < 20: + raise TypeError( + f"Cannot update TTree models older than v20 in place. This TTree is {old_ttree.encoded_classname} from before 2017." + ) # TODO rewrite! + raise TypeError( + f"Can only update Model_TTree_v20 in place, not {old_ttree.encoded_classname}." + ) # TODO rewrite? + elif ( + uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[0] + == "TBranch" + and uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[ + 1 + ] + != 13 + ): + if ( + uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[ + 1 + ] + < 13 + ): + raise TypeError( + f"Cannot update TBranch models older than v13 in place. This TBranch is {old_ttree.branches[0].encoded_classname} from before 2017." + ) # TODO rewrite! + raise TypeError( + f"Can only update Model_TBranch_v13 in place, not {old_ttree.branches[0].encoded_classname}." + ) # TODO rewrite? + elif ( + uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[0] + == "TBranchElement" + and uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[ + 1 + ] + != 10 + ): + if ( + uproot.model.classname_decode(old_ttree.branches[0].encoded_classname)[ + 1 + ] + < 10 + ): + raise TypeError( + f"Cannot update TBranchElement models older than v10 in place. This TBranchElement is {old_ttree.branches[0].encoded_classname} from before 2017." + ) # TODO rewrite! + raise TypeError( + "Can only update TBranchElement models v10 in place." + ) # TODO rewrite? + leaf = uproot.model.classname_decode( + old_ttree.branches[0].member("fLeaves")[0].encoded_classname + ) + if leaf[0].startswith("TLeaf") and leaf[1] != 1: + if leaf[1] < 1: + raise TypeError( + f"Cannot only update version 1 TLeaf* and TLeafElements. This TLeaf* is a {old_ttree.branches[0].member('fLeaves')[0].encoded_classname} from before 2017." + ) + else: + raise TypeError( + f"Cannot only update version 1 TLeaf* and TLeafElements, not {old_ttree.branches[0].member('fLeaves')[0].encoded_classname}." + ) + names = old_ttree.keys() if len(names) == 0: raise ValueError( - f"""TTree {old_ttree.name} in file {old_ttree.file_path} is empty.""" # TODO does this check need to be here? + f"""TTree {old_ttree.name} in file {old_ttree.file_path} is empty.""" ) + at = -1 - try: # Will this throw an error? proabably? + try: at = old_ttree.name.rindex("/") except ValueError: treename = old_ttree.name diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py index c13c072af..a1da90055 100644 --- a/tests/test_1155_feat_add_copy_ttree.py +++ b/tests/test_1155_feat_add_copy_ttree.py @@ -14,49 +14,37 @@ def test_vector(tmp_path): + data = [1, 2, 3, 4, 5] with uproot.open( os.path.join(tmp_path, "uproot-vectorVectorDouble.root"), minimal_ttree_metadata=False, ) as read: - print(read["t"]["x"]) + with uproot.update( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + ) as write: + write.add_branches("t", {"branch": data}) - # print("break \n \n", uproot.models.TBranch._tbranch13_format1.size) + with uproot.open( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + minimal_ttree_metadata=False, + ) as new: + for i in read["t"].keys(): + assert ak.all(read["t"][i].array() == new["t"][i].array()) + assert ak.all(new["t"]["branch"].array() == data) - with uproot.update( - os.path.join(tmp_path, "cp-vectorVectorDouble.root"), - ) as write: - write.add_branches("t", {"branch": [1, 2, 3, 4, 5]}) + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), "READ" + ) + tree = inFile.Get("t;1") + indx = 0 - with uproot.open( - os.path.join(tmp_path, "cp-vectorVectorDouble.root"), - minimal_ttree_metadata=False, - ) as read: - # print(read['t']['x'].member('fLeaves')[0].cursor.index) - print("chunk bytes", read["t"]) - # with uproot.open("/Users/zobil/Desktop/directory/arrays1.root") as file: - # print(file.file.show_streamers()) - # inFile = ROOT.TFile.Open(os.path.join(tmp_path, "cp-vectorVectorDouble.root"), "READ") - # tree = inFile.Get("t") - # for x in tree: - # print(getattr(x, "x")) - - -# with uproot.recreate("score.root") as file: -# data = np.array([1, 2, 3], dtype=np.int64) -# data1 = np.array([2, 3, 4], dtype=np.int64) -# data2 = np.array([3, 4, 5], dtype=np.int64) -# file["whatever"] = { -# "b1": ak.Array([data, data1, data2]), -# "b2": ak.Array([data1, data2, data]), -# "b3": ak.Array([data2, data, data1]), -# } - -# with uproot.update("score.root") as file: -# data = [] -# for i in range(2421): -# data.append(np.arange(0, 3, 1)) -# data = ak.Array(data, np.int64) -# file['whatever'].extend({"b1": data, "b2": data, "b3": data}) + for x in tree: + indx2 = 0 + for i in getattr(x, "x"): + assert ak.all(list(i) == read["t"]["x"].array()[indx][indx2]) + indx2 += 1 + assert getattr(x, "branch") == data[indx] + indx += 1 def simple_test(tmp_path): @@ -99,7 +87,6 @@ def simple_test(tmp_path): tree = inFile.Get("whatever;1") indx = 0 for x in tree: - print(getattr(x, "b1")) assert getattr(x, "b1") == data[indx] assert getattr(x, "b2") == data1[indx] indx += 1 @@ -118,35 +105,28 @@ def test_multiple_trees(tmp_path): dtype=np.int32, ) - with uproot.recreate(os.path.join(tmp_path, "mult_trees1.root")) as f: - f["whatever"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} - f["whatever1"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} - - with uproot.recreate(os.path.join(tmp_path, "mult_trees2.root")) as f: + with uproot.recreate(os.path.join(tmp_path, "mult_trees.root")) as f: f["whatever"] = {"b1": data, "b2": data1} f["whatever1"] = {"b1": data, "b2": data1, "b3": data} - with uproot.update(os.path.join(tmp_path, "mult_trees2.root")) as f: + with uproot.update(os.path.join(tmp_path, "mult_trees.root")) as f: f.add_branches("whatever", {"b3": data, "b4": data1}) f.add_branches("whatever1", {"b4": data1}) with uproot.open( - os.path.join(tmp_path, "mult_trees1.root"), minimal_ttree_metadata=False - ) as check: - with uproot.open( - os.path.join(tmp_path, "mult_trees2.root"), minimal_ttree_metadata=False - ) as new: - assert ak.all(new["whatever"]["b1"].array() == data) - assert ak.all(new["whatever1"]["b4"].array() == data1) - assert ak.all(new["whatever1"]["b2"].array() == data1) - assert ak.all(new["whatever1"]["b4"].array() == data1) - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "mult_trees2.root"), "READ") - tree = inFile.Get("whatever;1") - indx = 0 - for x in tree: - assert getattr(x, "b1") == data[indx] - assert getattr(x, "b2") == data1[indx] - indx += 1 + os.path.join(tmp_path, "mult_trees.root"), minimal_ttree_metadata=False + ) as new: + assert ak.all(new["whatever"]["b1"].array() == data) + assert ak.all(new["whatever1"]["b4"].array() == data1) + assert ak.all(new["whatever1"]["b2"].array() == data1) + assert ak.all(new["whatever1"]["b4"].array() == data1) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "mult_trees.root"), "READ") + tree = inFile.Get("whatever;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == data[indx] + assert getattr(x, "b2") == data1[indx] + indx += 1 def test_different_fEntries(tmp_path): @@ -165,20 +145,105 @@ def test_different_fEntries(tmp_path): ) -def test_all_dtypes(tmp_path): - print("to-do") +def test_dtypes(tmp_path): # tleaf types? + data = [ + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int64, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int32, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int8, + ), + np.array( + [ + 1.0, + 2.0, + 3.0, + 4.0, + ], + dtype=np.float32, + ), + np.array( + [ + 1.0, + 2.0, + 3.0, + 4.0, + ], + dtype=np.float64, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.double, + ), + np.array([True, False, True, False], dtype=bool), + ] + + with uproot.recreate(os.path.join(tmp_path, "all_dtypes.root")) as f: + f["whatever"] = { + "b1": data[0], + "b2": data[1], + "b3": data[2], + "b4": data[3], + "b5": data[4], + "b6": data[5], + "b7": data[6], + } + + with uproot.update(os.path.join(tmp_path, "all_dtypes.root")) as write: + write.add_branches( + "whatever", + { + "b8": data[0], + "b9": data[1], + "b10": data[2], + "b12": data[3], + "b13": data[4], + "b14": data[5], + "b15": data[6], + }, + ) + + with uproot.open(os.path.join(tmp_path, "all_dtypes.root")) as read: + + read["whatever"] def test_ak_arrays(tmp_path): - data = np.array([1, 2, 3], dtype=np.int64) - data1 = np.array([2, 3, 4], dtype=np.int64) + data = np.array( + [ + 1, + 2, + ], + dtype=np.int64, + ) + data1 = np.array([2, 3, 4, 5], dtype=np.int64) data2 = np.array([3, 4, 5], dtype=np.int64) - with uproot.recreate(os.path.join(tmp_path, "control.root")) as file: - file["whatever"] = { - "b1": ak.Array([data, data1, data2]), - "b2": ak.Array([data1, data2, data]), - "b3": ak.Array([data2, data, data1]), - } with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: file["whatever"] = { @@ -195,30 +260,24 @@ def test_ak_arrays(tmp_path): ) with uproot.open( - os.path.join(tmp_path, "control.root"), minimal_ttree_metadata=False - ) as correct: - with uproot.open( - os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False - ) as new: - new["whatever"].arrays() - print(correct["whatever"]["b2"].member("fLeaves")[0].all_members) - print(new["whatever"]["b2"].member("fLeaves")[0].all_members) - print(new["whatever"]["b3"].arrays()) - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") - tree = inFile.Get("whatever") - for x in tree: - getattr(x, "b1") - inFile.Close() - df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) - npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) - assert ak.all(npy3["b1"] == [data, data1, data2]) - assert ak.all(npy3["b2"] == [data1, data2, data]) - assert ak.all(npy3["b3"] == [data2, data, data1]) + os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False + ) as new: + new["whatever"].arrays() + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") + tree = inFile.Get("whatever") + for x in tree: + getattr(x, "b1") + inFile.Close() + df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) + npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) + assert ak.all(npy3["b1"] == [data, data1, data2]) + assert ak.all(npy3["b2"] == [data1, data2, data]) + assert ak.all(npy3["b3"] == [data2, data, data1]) def test_streamers_same_dtypes(tmp_path): - # Make file with ROOT - inFile = ROOT.TFile(os.path.join("root_same_dtypes.root"), "RECREATE") + # Make an example file with ROOT + inFile = ROOT.TFile(os.path.join(tmp_path, "root_same_dtypes.root"), "RECREATE") tree = ROOT.TTree("tree1", "tree") npa = np.zeros(4, dtype=np.float32) tree.Branch("b1", npa, "b1/F") @@ -249,7 +308,7 @@ def test_streamers_same_dtypes(tmp_path): assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] indx += 1 - tree.Scan() + # tree.Scan() check = [ "TBranch", "TAttLine", @@ -277,7 +336,7 @@ def test_streamers_same_dtypes(tmp_path): def test_streamers_diff_dtypes(tmp_path): - # Make file with ROOT + # Make an example file with ROOT inFile = ROOT.TFile( "/Users/zobil/Desktop/directory/root_diff_dtypes.root", "RECREATE" ) @@ -339,50 +398,10 @@ def test_streamers_diff_dtypes(tmp_path): inFile.Close() -def test_nested_branches(tmp_path): - # Make example - with uproot.open(data_path("uproot-HZZ-objects.root")): - print("examine this ") +def test_old_versions(tmp_path): + with pytest.raises(TypeError): + with uproot.update(os.path.join(tmp_path, "cp-uproot-HZZ.root")) as file: + file.add_branches("events", {"b2": [1, 2, 3]}) -def HZZ_test(tmp_path): - with uproot.open( - data_path("uproot-HZZ.root"), minimal_ttree_metadata=False - ) as control: - with uproot.update(os.path.join(tmp_path, "uproot-HZZ.root copy")) as new: - data = [] - for i in range(2421): - data.append(np.arange(0, 3, 1)) - data = ak.Array(data) - new.add_branches("events", {"data": data}) - - with uproot.open( - os.path.join(tmp_path, "uproot-HZZ.root copy"), - minimal_ttree_metadata=False, - ) as new: - for key in control["events"].keys(): - assert key in new["events"].keys() - assert ak.all( - new["events"][key].array() == control["events"][key].array() - ) - inFile = ROOT.TFile.Open( - os.path.join(tmp_path, "uproot-HZZ.root copy"), "READ" - ) - tree = inFile.Get("events") - indx = 0 - inFile.Close() - df3 = ROOT.RDataFrame( - "events", os.path.join(tmp_path, "uproot-HZZ.root copy") - ) - npy3 = ak.from_rdataframe(df3, columns=("data"), keep_order=True) - # for key in npy3.keys(): - # assert ak.all(npy3[key] == control['events'][key].array()) - assert ak.all(npy3 == data) - inFile.Close() - - -def test_branch_v8(tmp_path): - with uproot.open(os.path.join(tmp_path, "uproot-issue-250.root")) as control: - with uproot.update(os.path.join(tmp_path, "uproot-issue-250.root")) as new: - print("hi") - # with uproot.open(os.path.join("uproot-from-geant4.root copy")) as new: +test_old_versions("/Users/zobil/Desktop/directory/uproot-HZZ") From 3f6db0fb11b8b6f95a01ffb42a02f3020225c059 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Fri, 5 Jul 2024 10:18:39 +0200 Subject: [PATCH 16/20] Pytest issues --- src/uproot/writing/_cascadetree.py | 97 +++--- src/uproot/writing/writable.py | 2 +- tests/test_1155_feat_add_copy_ttree.py | 407 ------------------------- 3 files changed, 44 insertions(+), 462 deletions(-) delete mode 100644 tests/test_1155_feat_add_copy_ttree.py diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 9bf062d2f..4432938c2 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -98,16 +98,17 @@ def __init__( self._resize_factor = resize_factor self._existing_branches = existing_branches self._existing_ttree = existing_ttree + if isinstance(branch_types, dict): branch_types_items = branch_types.items() else: branch_types_items = branch_types + if len(branch_types) == 0: raise ValueError("TTree must have at least one branch") self._branch_data = [] self._branch_lookup = {} - for branch_name, branch_type in branch_types_items: branch_dict = None branch_dtype = None @@ -125,7 +126,8 @@ def __init__( if isinstance(branch_type, str) and branch_type.strip() == "bytes": raise TypeError branch_dtype = numpy.dtype(branch_type) - except TypeError as err: + + except (TypeError, ValueError) as err: try: awkward = uproot.extras.awkward() except ModuleNotFoundError as err: @@ -150,6 +152,7 @@ def __init__( branch_datashape = branch_datashape.content branch_dtype = self._branch_ak_to_np(branch_datashape) + if branch_dict is not None: if branch_name not in self._branch_lookup: self._branch_lookup[branch_name] = len(self._branch_data) @@ -173,6 +176,7 @@ def __init__( self._branch_data.append( self._branch_np(subname, content, dtype) ) + elif branch_dtype is not None: if branch_name not in self._branch_lookup: self._branch_lookup[branch_name] = len(self._branch_data) @@ -204,7 +208,6 @@ def __init__( counter = self._branch_np( counter_name, counter_dtype, counter_dtype, kind="counter" ) - if counter_name in self._branch_lookup: # counters always replace non-counters del self._branch_data[self._branch_lookup[counter_name]] @@ -314,27 +317,6 @@ def __init__( "fAutoFlush": -30000000, "fEstimate": 1000000, } - if existing_ttree: - self._metadata["fTotBytes"] = existing_ttree.member("fTotBytes") - self._metadata["fZipBytes"] = existing_ttree.member("fZipBytes") - self._metadata["fSavedBytes"] = existing_ttree.member("fSavedBytes") - self._metadata["fFlushedBytes"] = existing_ttree.member("fFlushedBytes") - self._metadata["fWeight"] = existing_ttree.member("fWeight") - self._metadata["fTimerInterval"] = existing_ttree.member("fTimerInterval") - self._metadata["fScanField"] = existing_ttree.member("fScanField") - self._metadata["fUpdate"] = existing_ttree.member("fUpdate") - self._metadata["fDefaultEntryOffsetLen"] = existing_ttree.member( - "fDefaultEntryOffsetLen" - ) - if "fNClusterRange" in existing_ttree.all_members.keys(): - self._metadata["fNClusterRange"] = existing_ttree.member( - "fNClusterRange" - ) - self._metadata["fMaxEntries"] = existing_ttree.member("fMaxEntries") - self._metadata["fMaxEntryLoop"] = existing_ttree.member("fMaxEntryLoop") - self._metadata["fAutoSave"] = existing_ttree.member("fAutoSave") - self._metadata["fEstimate"] = existing_ttree.member("fEstimate") - self._key = None def _branch_ak_to_np(self, branch_datashape): @@ -359,7 +341,6 @@ def _branch_ak_to_np(self, branch_datashape): def _branch_np( self, branch_name, branch_type, branch_dtype, counter=None, kind="normal" ): - branch_dtype = branch_dtype.newbyteorder(">") if branch_dtype.subdtype is None: @@ -368,7 +349,6 @@ def _branch_np( branch_dtype, branch_shape = branch_dtype.subdtype letter = _dtype_to_char.get(branch_dtype) - if letter is None: raise TypeError(f"cannot write NumPy dtype {branch_dtype} in TTree") @@ -971,6 +951,7 @@ def write_anew(self, sink): self._num_entries, # fEntryNumber ) ) + # fIOFeatures (TIOFeatures) out.append(b"@\x00\x00\x07\x00\x00\x1a\xa1/\x10\x00") @@ -985,6 +966,7 @@ def write_anew(self, sink): datum["fZipBytes"], ) ) + # empty TObjArray of TBranches out.append( b"@\x00\x00\x15\x00\x03\x00\x01\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -1001,10 +983,10 @@ def write_anew(self, sink): absolute_location = key_num_bytes + sum( len(x) for x in out if x is not None ) - absolute_location += 8 + 6 * (sum(1 if x is None else 0 for x in out) - 1) datum["tleaf_reference_number"] = absolute_location + 2 tleaf_reference_numbers.append(datum["tleaf_reference_number"]) + subany_tleaf_index = len(out) out.append(None) @@ -1029,6 +1011,7 @@ def write_anew(self, sink): special_struct = uproot.models.TLeaf._tleafd1_format1 elif letter_upper == "C": special_struct = uproot.models.TLeaf._tleafc1_format1 + fLenType = datum["dtype"].itemsize fIsUnsigned = letter != letter_upper @@ -1039,6 +1022,7 @@ def write_anew(self, sink): if datum["counter"] is not None: dims = "[" + datum["counter"]["fName"] + "]" + dims + # single TLeaf leaf_name = datum["fName"].encode(errors="surrogateescape") leaf_title = (datum["fName"] + dims).encode(errors="surrogateescape") @@ -1132,6 +1116,7 @@ def write_anew(self, sink): fIsUnsigned, ) ) + if datum["counter"] is None: # null fLeafCount out.append(b"\x00\x00\x00\x00") @@ -1145,7 +1130,6 @@ def write_anew(self, sink): # specialized TLeaf* members (fMinimum, fMaximum) out.append(special_struct.pack(0, 0)) - datum["tleaf_special_struct"] = special_struct out[subany_tleaf_index] = ( @@ -1174,18 +1158,22 @@ def write_anew(self, sink): # speedbump and fBasketBytes out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketBytes"])) + # speedbump and fBasketEntry out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketEntry"])) + # speedbump and fBasketSeek out.append(b"\x01") out.append(uproot._util.tobytes(datum["fBasketSeek"])) + # empty fFileName out.append(b"\x00") out[tbranch_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tbranch_index + 1 :]), 13 # TBranch ) + out[any_tbranch_index] = ( uproot.serialization._serialize_object_any_format1.pack( numpy.uint32(sum(len(x) for x in out[any_tbranch_index + 1 :]) + 4) @@ -1193,6 +1181,7 @@ def write_anew(self, sink): uproot.const.kNewClassTag, ) ) + out[tobjarray_of_branches_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tobjarray_of_branches_index + 1 :]), 3 # TObjArray ) @@ -1210,7 +1199,9 @@ def write_anew(self, sink): b"\x00\x00\x00\x00", ) ) + out.append(tleaf_reference_bytes) + # null fAliases (b"\x00\x00\x00\x00") # empty fIndexValues array (4-byte length is zero) # empty fIndex array (4-byte length is zero) @@ -1263,6 +1254,7 @@ def write_updates(self, sink): self._metadata["fEstimate"], ), ) + for datum in self._branch_data: if datum["kind"] == "record": continue @@ -1307,21 +1299,27 @@ def write_updates(self, sink): datum["fBasketEntry"][start : stop + 1] ) fBasketSeek_part = uproot._util.tobytes(datum["fBasketSeek"][start:stop]) + position = base + datum["basket_metadata_start"] + 1 position += datum["fBasketBytes"][:start].nbytes sink.write(position, fBasketBytes_part) position += len(fBasketBytes_part) position += datum["fBasketBytes"][stop:].nbytes + position += 1 position += datum["fBasketEntry"][:start].nbytes sink.write(position, fBasketEntry_part) position += len(fBasketEntry_part) position += datum["fBasketEntry"][stop + 1 :].nbytes + position += 1 position += datum["fBasketSeek"][:start].nbytes sink.write(position, fBasketSeek_part) position += len(fBasketSeek_part) position += datum["fBasketSeek"][stop:].nbytes + + datum["arrays_write_start"] = datum["arrays_write_stop"] + if datum["dtype"] == ">U0": position = ( base @@ -1377,6 +1375,7 @@ def write_np_basket(self, sink, branch_name, compression, array): itemsize = array.dtype.itemsize for item in array.shape[1:]: itemsize *= item + uncompressed_data = uproot._util.tobytes(array) compressed_data = uproot.compression.compress(uncompressed_data, compression) @@ -1386,6 +1385,7 @@ def write_np_basket(self, sink, branch_name, compression, array): parent_location = self._directory.key.location # FIXME: is this correct? location = self._freesegments.allocate(fNbytes, dry_run=False) + out = [] out.append( uproot.reading._key_format_big.pack( @@ -1414,6 +1414,7 @@ def write_np_basket(self, sink, branch_name, compression, array): out.append(b"\x00") # part of the Key (included in fKeylen, at least) out.append(compressed_data) + sink.write(location, b"".join(out)) self._freesegments.write(sink) sink.set_file_length(self._freesegments.fileheader.end) @@ -1586,30 +1587,20 @@ def write_string_basket(self, sink, branch_name, compression, array, offsets): return fKeylen + fObjlen, fNbytes, location - def get_tree_key(self): + def add_branches(self, sink, directory, new_branches): + # Get readonlykey for old tree if ";" in self._name: at = self._name.rindex(";") item, cycle = self._name[:at], self._name[at + 1 :] key = self._directory.data.get_key(item, cycle) else: key = self._directory.data.get_key(self._name, None) - return key - def add_branches(self, sink, directory, new_branches): - old_key = self.get_tree_key() - # start = old_key.location - # stop = start + old_key.num_bytes + old_key.compressed_bytes - # self._freesegments.release(start, stop) - # sink.set_file_length(self._freesegments.fileheader.end) - # sink.flush() - # streamers = [x for x in file._cascading.tlist_of_streamers] - - streamers = self.write_with_new_branches(sink, old_key, directory) - # streamers = self.update_ttree_add_branches(directory.file, sink, old_key, new_branches) + streamers = self._write_with_new_branches(sink, key) self.extend(directory.file, sink, new_branches) return streamers - def write_with_new_branches(self, sink, old_key, directory): + def _write_with_new_branches(self, sink, old_key): models_for_streamers = [] key_num_bytes = uproot.reading._key_format_big.size + 6 name_asbytes = self._name.encode(errors="surrogateescape") @@ -1673,8 +1664,8 @@ def write_with_new_branches(self, sink, old_key, directory): ) # Include original branches in num_branches - if self._existing_branches: - num_branches += len(self._existing_branches) + num_branches += len(self._existing_branches) + # TObjArray header with fName: "" out.append(b"\x00\x01\x00\x00\x00\x00\x03\x00@\x00\x00") out.append( @@ -1686,19 +1677,15 @@ def write_with_new_branches(self, sink, old_key, directory): # Write old branches if self._existing_branches: - # old_branches = uproot.writing._cascade.OldBranches(self._existing_branches) for branch in self._existing_branches: - # # create OldTBranch object - cursor = ( - branch.cursor.copy() - ) # cursor before TObjArray of TBranches...hopefully + cursor = branch.cursor.copy() + + # cursor before TObjArray of TBranches first_indx = cursor.index cursor.skip_after(branch) second_indx = cursor.index - cursor1 = branch.member("fLeaves").cursor.copy() - f_indx = cursor1.index - cursor1.skip_after(branch.member("fLeaves")) - # s_indx = cursor1.index + + f_indx = branch.member("fLeaves").cursor.index branch_start = ( len( @@ -1711,8 +1698,9 @@ def write_with_new_branches(self, sink, old_key, directory): self._existing_ttree.chunk.raw_data.tobytes()[ first_indx - branch_start : f_indx + 25 ] - ) # to leaf reference... + ) + # Write TLeaf Reference absolute_location = key_num_bytes + sum( len(x) for x in out if x is not None ) @@ -1722,6 +1710,7 @@ def write_with_new_branches(self, sink, old_key, directory): tleaf_reference_numbers.append(absolute_location) + # Write remainder of branch out.append( self._existing_ttree.chunk.raw_data.tobytes()[ f_indx + 25 : second_indx diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 4b4c56aad..6b5ccffaf 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1386,7 +1386,7 @@ def add_branches( # variation of mktree for copying ttree msg = f"TTree {source} not found in file {self.file}" raise ValueError(msg) from None if not isinstance(old_ttree, uproot.TTree): - raise TypeError("'source' must be the name of a TTree") # ? + raise TypeError("'source' must be the name of a TTree") if not isinstance(old_ttree, uproot.models.TTree.Model_TTree_v20): if uproot.model.classname_version(old_ttree.encoded_classname) < 20: raise TypeError( diff --git a/tests/test_1155_feat_add_copy_ttree.py b/tests/test_1155_feat_add_copy_ttree.py deleted file mode 100644 index a1da90055..000000000 --- a/tests/test_1155_feat_add_copy_ttree.py +++ /dev/null @@ -1,407 +0,0 @@ -import uproot -from skhep_testdata import data_path, known_files -import uproot.model -import uproot.models -import uproot.serialization -import uproot.writing.writable -import os -import pytest - -ROOT = pytest.importorskip("ROOT") -import numpy as np - -import awkward as ak - - -def test_vector(tmp_path): - data = [1, 2, 3, 4, 5] - with uproot.open( - os.path.join(tmp_path, "uproot-vectorVectorDouble.root"), - minimal_ttree_metadata=False, - ) as read: - with uproot.update( - os.path.join(tmp_path, "cp-vectorVectorDouble.root"), - ) as write: - write.add_branches("t", {"branch": data}) - - with uproot.open( - os.path.join(tmp_path, "cp-vectorVectorDouble.root"), - minimal_ttree_metadata=False, - ) as new: - for i in read["t"].keys(): - assert ak.all(read["t"][i].array() == new["t"][i].array()) - assert ak.all(new["t"]["branch"].array() == data) - - inFile = ROOT.TFile.Open( - os.path.join(tmp_path, "cp-vectorVectorDouble.root"), "READ" - ) - tree = inFile.Get("t;1") - indx = 0 - - for x in tree: - indx2 = 0 - for i in getattr(x, "x"): - assert ak.all(list(i) == read["t"]["x"].array()[indx][indx2]) - indx2 += 1 - assert getattr(x, "branch") == data[indx] - indx += 1 - - -def simple_test(tmp_path): - data = np.array([1, 2, 3, 4, 5], dtype=np.int64) - data1 = np.array( - [ - 2.0, - 3.0, - 4.0, - 5.0, - 6.0, - ], - dtype=np.int32, - ) - - with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: - f["whatever"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} - - with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: - f["whatever"] = {"b1": data, "b2": data1} - - with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: - f.add_branches("whatever", {"b3": data, "b4": data1}) - - with uproot.open( - os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False - ) as check: - with uproot.open( - os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False - ) as new: - for key in new["whatever"].keys(): - assert ak.all( - new["whatever"].arrays()[key] == check["whatever"].arrays()[key] - ) - assert ak.all(new["whatever"]["b1"].array() == data) - assert ak.all(new["whatever"]["b2"].array() == data1) - assert ak.all(new["whatever"]["b3"].array() == data) - assert ak.all(new["whatever"]["b4"].array() == data1) - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") - tree = inFile.Get("whatever;1") - indx = 0 - for x in tree: - assert getattr(x, "b1") == data[indx] - assert getattr(x, "b2") == data1[indx] - indx += 1 - - -def test_multiple_trees(tmp_path): - data = np.array([1, 2, 3, 4, 5], dtype=np.int64) - data1 = np.array( - [ - 2.0, - 3.0, - 4.0, - 5.0, - 6.0, - ], - dtype=np.int32, - ) - - with uproot.recreate(os.path.join(tmp_path, "mult_trees.root")) as f: - f["whatever"] = {"b1": data, "b2": data1} - f["whatever1"] = {"b1": data, "b2": data1, "b3": data} - - with uproot.update(os.path.join(tmp_path, "mult_trees.root")) as f: - f.add_branches("whatever", {"b3": data, "b4": data1}) - f.add_branches("whatever1", {"b4": data1}) - - with uproot.open( - os.path.join(tmp_path, "mult_trees.root"), minimal_ttree_metadata=False - ) as new: - assert ak.all(new["whatever"]["b1"].array() == data) - assert ak.all(new["whatever1"]["b4"].array() == data1) - assert ak.all(new["whatever1"]["b2"].array() == data1) - assert ak.all(new["whatever1"]["b4"].array() == data1) - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "mult_trees.root"), "READ") - tree = inFile.Get("whatever;1") - indx = 0 - for x in tree: - assert getattr(x, "b1") == data[indx] - assert getattr(x, "b2") == data1[indx] - indx += 1 - - -def test_different_fEntries(tmp_path): - data = np.array([1, 2, 3, 4, 5], dtype=np.int64) - data1 = np.array([2.0, 3.0, 4.0, 5.0, 6.0], dtype=np.int32) - - with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: - with pytest.raises(ValueError): - f["whatever"] = {"b1": data, "b2": data1} - f.add_branches( - "whatever", - { - "b3": data, - "b4": np.array([2.0, 3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.int32), - }, - ) - - -def test_dtypes(tmp_path): # tleaf types? - data = [ - np.array( - [ - 1, - 2, - 3, - 4, - ], - dtype=np.int64, - ), - np.array( - [ - 1, - 2, - 3, - 4, - ], - dtype=np.int32, - ), - np.array( - [ - 1, - 2, - 3, - 4, - ], - dtype=np.int8, - ), - np.array( - [ - 1.0, - 2.0, - 3.0, - 4.0, - ], - dtype=np.float32, - ), - np.array( - [ - 1.0, - 2.0, - 3.0, - 4.0, - ], - dtype=np.float64, - ), - np.array( - [ - 1, - 2, - 3, - 4, - ], - dtype=np.double, - ), - np.array([True, False, True, False], dtype=bool), - ] - - with uproot.recreate(os.path.join(tmp_path, "all_dtypes.root")) as f: - f["whatever"] = { - "b1": data[0], - "b2": data[1], - "b3": data[2], - "b4": data[3], - "b5": data[4], - "b6": data[5], - "b7": data[6], - } - - with uproot.update(os.path.join(tmp_path, "all_dtypes.root")) as write: - write.add_branches( - "whatever", - { - "b8": data[0], - "b9": data[1], - "b10": data[2], - "b12": data[3], - "b13": data[4], - "b14": data[5], - "b15": data[6], - }, - ) - - with uproot.open(os.path.join(tmp_path, "all_dtypes.root")) as read: - - read["whatever"] - - -def test_ak_arrays(tmp_path): - data = np.array( - [ - 1, - 2, - ], - dtype=np.int64, - ) - data1 = np.array([2, 3, 4, 5], dtype=np.int64) - data2 = np.array([3, 4, 5], dtype=np.int64) - - with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: - file["whatever"] = { - "b1": ak.Array([data, data1, data2]), - "b2": ak.Array([data1, data2, data]), - } - - with uproot.update(os.path.join(tmp_path, "ak_test.root")) as write: - write.add_branches( - "whatever", - { - "b3": ak.Array([data2, data, data1]), - }, - ) - - with uproot.open( - os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False - ) as new: - new["whatever"].arrays() - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") - tree = inFile.Get("whatever") - for x in tree: - getattr(x, "b1") - inFile.Close() - df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) - npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) - assert ak.all(npy3["b1"] == [data, data1, data2]) - assert ak.all(npy3["b2"] == [data1, data2, data]) - assert ak.all(npy3["b3"] == [data2, data, data1]) - - -def test_streamers_same_dtypes(tmp_path): - # Make an example file with ROOT - inFile = ROOT.TFile(os.path.join(tmp_path, "root_same_dtypes.root"), "RECREATE") - tree = ROOT.TTree("tree1", "tree") - npa = np.zeros(4, dtype=np.float32) - tree.Branch("b1", npa, "b1/F") - for i in range(4): - npa[0] = i**0 - tree.Fill() - inFile.Write() - inFile.Close() - - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_same_dtypes.root"), "OPEN") - tree = inFile.Get("tree1") - data = np.array([5.0, 6.0, 7.0, 8.0], dtype=np.float32) - - with uproot.update(os.path.join(tmp_path, "root_same_dtypes.root")) as file: - file.add_branches("tree1", {"b2": data}) - - with uproot.open( - os.path.join(tmp_path, "root_same_dtypes.root"), minimal_ttree_metadata=False - ) as file: - inFile = ROOT.TFile.Open( - os.path.join(tmp_path, "root_same_dtypes.root"), "READ" - ) - inFile.ShowStreamerInfo() - tree = inFile.Get("tree1;1") - indx = 0 - for x in tree: - assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] - assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] - indx += 1 - - # tree.Scan() - check = [ - "TBranch", - "TAttLine", - "TCollection", - "TLeafF", - "listOfRules", - "TString", - "TObjArray", - "TAttFill", - "TBranchRef", - "TList", - "ROOT::TIOFeatures", - "TSeqCollection", - "TAttMarker", - "TTree", - "TNamed", - "TObject", - "TAttLine", - "TLeaf", - "TRefTable", - ] - for i in set(file.file.streamers): - assert i in check - inFile.Close() - - -def test_streamers_diff_dtypes(tmp_path): - # Make an example file with ROOT - inFile = ROOT.TFile( - "/Users/zobil/Desktop/directory/root_diff_dtypes.root", "RECREATE" - ) - tree = ROOT.TTree("tree1", "tree") - npa = np.zeros(4, dtype=float) - tree.Branch("b1", npa, "b1F") - for i in range(4): - npa[0] = i**0 - tree.Fill() - inFile.Write() - inFile.Close() - - inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_diff_dtypes.root"), "OPEN") - tree = inFile.Get("tree1") - data = np.array([5, 6, 7, 8], dtype=np.int64) - data1 = np.array([5.2, 6.3, 7.4, 8.5], dtype=np.float64) - with uproot.update(os.path.join(tmp_path, "root_diff_dtypes.root")) as file: - file.add_branches("tree1", {"b2": data, "b3": data1}) - - with uproot.open( - os.path.join(tmp_path, "root_diff_dtypes.root"), minimal_ttree_metadata=False - ) as file: - file["tree1"]["b2"].member("fLeaves")[0].all_members - inFile = ROOT.TFile.Open( - os.path.join(tmp_path, "root_diff_dtypes.root"), "READ" - ) - tree = inFile.Get("tree1;1") - indx = 0 - for x in tree: - assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] - assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] - indx += 1 - # tree.Scan() - check = [ - "TBranch", - "TAttLine", - "TCollection", - "TLeafF", - "listOfRules", - "TString", - "TObjArray", - "TAttFill", - "TBranchRef", - "TList", - "ROOT::TIOFeatures", - "TSeqCollection", - "TAttMarker", - "TTree", - "TNamed", - "TObject", - "TAttLine", - "TLeaf", - "TRefTable", - "TLeafL", - "TLeafD", - ] - for i in set(file.file.streamers): - assert i in check - inFile.Close() - - -def test_old_versions(tmp_path): - with pytest.raises(TypeError): - with uproot.update(os.path.join(tmp_path, "cp-uproot-HZZ.root")) as file: - file.add_branches("events", {"b2": [1, 2, 3]}) - - -test_old_versions("/Users/zobil/Desktop/directory/uproot-HZZ") From 29d360c6afa81235e8e70c3e7c86e1c85ac46eda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Fri, 5 Jul 2024 10:29:28 +0200 Subject: [PATCH 17/20] still pytest issues --- src/uproot/reading.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/uproot/reading.py b/src/uproot/reading.py index 3615b875e..14b6a9f51 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -466,7 +466,6 @@ def __init__(self, file): _file_header_fields_small = struct.Struct(">4siiiiiiiBiiiH16s") _file_header_fields_big = struct.Struct(">4siiqqiiiBiqiH16s") - class ReadOnlyFile(CommonFileMethods): """ Args: @@ -1264,7 +1263,6 @@ def hook_after_interpret_streamers(self, **kwargs): _directory_format_big = struct.Struct(">hIIiiqqq") _directory_format_num_keys = struct.Struct(">i") - class ReadOnlyDirectory(Mapping): """ Args: @@ -2032,6 +2030,7 @@ def key(self, where): Note that this does not read any data from the file. """ where = uproot._util.ensure_str(where) + if "/" in where: step, last_item = self.descent_into_path(where) return step.key(last_item) @@ -2056,8 +2055,7 @@ def key(self, where): # Follow ROOT's behaviour in comparing negative fCycle values elif cycle is None and abs(last.fCycle) < abs(key.fCycle): last = key - chunk, tmp_cursor = key.get_uncompressed_chunk_cursor() - # print("debug", tmp_cursor.debug(chunk)) + if last is not None: return last elif cycle is None: @@ -2070,8 +2068,6 @@ def key(self, where): ) def __getitem__(self, where): - # if where == "x": - # print("getitem readonlydirectory") if "/" in where or ":" in where: items = where.split("/") step = last = self @@ -2340,7 +2336,6 @@ def data_cursor(self): file where the data begins (the object to be read, after its copy of the ``TKey`` and before the object's number of bytes/version header). """ - # print("data_cursor!!", self._fClassName) return uproot.source.cursor.Cursor(self._fSeekKey + self._fKeylen) @property @@ -2482,6 +2477,7 @@ def get(self): del self._file.object_cache[self.cache_key] else: return out + if self._fClassName in must_be_attached: selffile = self._file parent = self @@ -2701,4 +2697,4 @@ def hook_after_interpret(self, **kwargs): This is the last hook called in the :doc:`uproot.reading.ReadOnlyKey` constructor. - """ + """ \ No newline at end of file From fd162699ba96bf7845e50d746150fd401ab5ff9c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 5 Jul 2024 08:29:50 +0000 Subject: [PATCH 18/20] style: pre-commit fixes --- src/uproot/reading.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uproot/reading.py b/src/uproot/reading.py index 14b6a9f51..0dce3a970 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -466,6 +466,7 @@ def __init__(self, file): _file_header_fields_small = struct.Struct(">4siiiiiiiBiiiH16s") _file_header_fields_big = struct.Struct(">4siiqqiiiBiqiH16s") + class ReadOnlyFile(CommonFileMethods): """ Args: @@ -1263,6 +1264,7 @@ def hook_after_interpret_streamers(self, **kwargs): _directory_format_big = struct.Struct(">hIIiiqqq") _directory_format_num_keys = struct.Struct(">i") + class ReadOnlyDirectory(Mapping): """ Args: @@ -2697,4 +2699,4 @@ def hook_after_interpret(self, **kwargs): This is the last hook called in the :doc:`uproot.reading.ReadOnlyKey` constructor. - """ \ No newline at end of file + """ From 5a711ee56de7280538820ab111a3b79580ca907c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Fri, 5 Jul 2024 10:38:29 +0200 Subject: [PATCH 19/20] edited docs --- src/uproot/writing/writable.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 6b5ccffaf..0069644df 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -1352,10 +1352,9 @@ def add_branches( # variation of mktree for copying ttree ): """ Args: - source (TTree): Name of existing TTree to copy/replace - branch_types (dict or pairs of str \u2192 NumPy dtype/Awkward type): Name - and type specification for the TBranches. - title (str): Title for the new TTree. + source (TTree): Name of existing TTree to copy/replace. TTree must be version 20. + branches (dict of pairs of str \u2192 NumPy dtype/Awkward type): Names and data + of branches to be added to the TTree. counter_name (callable of str \u2192 str): Function to generate counter-TBranch names for Awkward Arrays of variable-length lists. field_name (callable of str \u2192 str): Function to generate TBranch @@ -1366,8 +1365,9 @@ def add_branches( # variation of mktree for copying ttree this specifies how many more TBasket slots to allocate as a multiplicative factor. Adds new branches to existing TTrees by rewriting the whole TTree with the new data. - To maintain custom ``counter_name``, ``field_name``, ``initial_basket_capacity`` or - ``resize_factor`` values for the new branches, pass the custom values to the parameters. + This function can only copy TTrees version 20, TBranches version 13, and TBranchElements + version 10. To maintain custom ``counter_name``, ``field_name``, ``initial_basket_capacity`` + or ``resize_factor`` values for the new branches, pass the custom values to the parameters. Currently, writing new branches in batches is not possible; data in new ``branches`` must fit in memory. From a1cbd0c55e7bfbae54f6c39f8a51e1c31d623487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Bilodeau?= Date: Wed, 17 Jul 2024 10:12:03 +0200 Subject: [PATCH 20/20] Now adding subbranch TLeaf refs to TTree metadata --- src/uproot/writing/_cascadetree.py | 114 +++- ...155_feat_add_branches_to_existing_ttree.py | 515 ++++++++++++++++++ 2 files changed, 605 insertions(+), 24 deletions(-) create mode 100644 tests/test_1155_feat_add_branches_to_existing_ttree.py diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 4432938c2..d26a7308c 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -835,7 +835,6 @@ def extend(self, file, sink, data): self._num_baskets += 1 self._metadata["fTotBytes"] += uncompressed_bytes self._metadata["fZipBytes"] += compressed_bytes - self.write_updates(sink) def write_anew(self, sink): @@ -1181,7 +1180,6 @@ def write_anew(self, sink): uproot.const.kNewClassTag, ) ) - out[tobjarray_of_branches_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tobjarray_of_branches_index + 1 :]), 3 # TObjArray ) @@ -1648,7 +1646,6 @@ def _write_with_new_branches(self, sink, old_key): self._metadata["fEstimate"], ) ) - # speedbump (0), fClusterRangeEnd (empty array), # speedbump (0), fClusterSize (empty array) # fIOFeatures (TIOFeatures) @@ -1674,7 +1671,6 @@ def _write_with_new_branches(self, sink, old_key): 0, # TObjArray fLowerBound ) ) - # Write old branches if self._existing_branches: for branch in self._existing_branches: @@ -1694,29 +1690,98 @@ def _write_with_new_branches(self, sink, old_key): + 2 ) - out.append( - self._existing_ttree.chunk.raw_data.tobytes()[ - first_indx - branch_start : f_indx + 25 - ] - ) + if len(branch.branches) == 0: + # No subbranches + # Write remainder of branch + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + first_indx - branch_start : f_indx + 25 + ] + ) + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) - # Write TLeaf Reference - absolute_location = key_num_bytes + sum( - len(x) for x in out if x is not None - ) - absolute_location += 8 + 6 * ( - sum(1 if x is None else 0 for x in out) - 1 - ) + absolute_location += 8 + 6 * ( + sum(1 if x is None else 0 for x in out) - 1 + ) - tleaf_reference_numbers.append(absolute_location) + tleaf_reference_numbers.append(absolute_location) + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + f_indx + 25 : second_indx + ] + ) + else: + # With subbranches + subbranch = branch.branches[0] + cursor = subbranch.cursor.copy() + # cursor before TObjArray of TBranches + first_indx1 = cursor.index + cursor.skip_after(subbranch) + second_indx1 = cursor.index + + f_indx1 = subbranch.member("fLeaves").cursor.index + + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + first_indx - branch_start : first_indx1 - 8 + ] + ) + for ( + subbranch + ) in branch.branches: # how to get it to not copy all subbranches? + cursor = subbranch.cursor.copy() + # cursor before TObjArray of TBranches + first_indx1 = cursor.index + cursor.skip_after(subbranch) + second_indx1 = cursor.index + + f_indx1 = subbranch.member("fLeaves").cursor.index + + branch_start = ( + len( + uproot.writing.identify.to_TString( + subbranch.classname + ).serialize() + ) + + 2 + ) + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + first_indx1 - 8 : f_indx1 + 25 + ] + ) + # Write TLeaf Reference + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) + absolute_location += 8 + 6 * ( + sum(1 if x is None else 0 for x in out) - 1 + ) - # Write remainder of branch - out.append( - self._existing_ttree.chunk.raw_data.tobytes()[ - f_indx + 25 : second_indx - ] - ) + tleaf_reference_numbers.append(absolute_location) + + # Write remainder of branch + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + f_indx1 + 25 : second_indx1 + ] + ) + # Write TLeaf Reference + absolute_location = key_num_bytes + sum( + len(x) for x in out if x is not None + ) + absolute_location += 8 + 6 * ( + sum(1 if x is None else 0 for x in out) - 1 + ) + tleaf_reference_numbers.append(absolute_location) + out.append( + self._existing_ttree.chunk.raw_data.tobytes()[ + second_indx1:second_indx + ] + ) for datum in self._branch_data: if datum["kind"] == "record": continue @@ -1995,13 +2060,13 @@ def _write_with_new_branches(self, sink, old_key): out[tobjarray_of_branches_index] = uproot.serialization.numbytes_version( sum(len(x) for x in out[tobjarray_of_branches_index + 1 :]), 3 # TObjArray ) - # TODO find tleaf reference numbers and append them ?? or update and then append # TObjArray of TLeaf references tleaf_reference_bytes = uproot._util.tobytes( numpy.array(tleaf_reference_numbers, ">u4") ) + out.append( # This is still fine struct.pack( ">I13sI4s", @@ -2029,6 +2094,7 @@ def _write_with_new_branches(self, sink, old_key): self._metadata_start = sum(len(x) for x in out[:metadata_out_index]) raw_data = b"".join(out) + self._key = self._directory.add_object( sink, "TTree", diff --git a/tests/test_1155_feat_add_branches_to_existing_ttree.py b/tests/test_1155_feat_add_branches_to_existing_ttree.py new file mode 100644 index 000000000..879bbf2aa --- /dev/null +++ b/tests/test_1155_feat_add_branches_to_existing_ttree.py @@ -0,0 +1,515 @@ +import uproot +import os +import pytest + +ROOT = pytest.importorskip("ROOT") + +import numpy as np + +import awkward as ak +from skhep_testdata import data_path + + +def test_vector(tmp_path): + data = [1, 2, 3, 4, 5] + with uproot.open( + os.path.join(tmp_path, "uproot-vectorVectorDouble.root"), + minimal_ttree_metadata=False, + ) as read: + with pytest.raises(TypeError): + with uproot.update( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + ) as write: + write.add_branches("t", {"branch": data}) + + with uproot.open( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), + minimal_ttree_metadata=False, + ) as new: + for i in read["t"].keys(): + assert ak.all(read["t"][i].array() == new["t"][i].array()) + assert ak.all(new["t"]["branch"].array() == data) + + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "cp-vectorVectorDouble.root"), "READ" + ) + tree = inFile.Get("t;1") + indx = 0 + + for x in tree: + indx2 = 0 + for i in getattr(x, "x"): + assert ak.all(list(i) == read["t"]["x"].array()[indx][indx2]) + indx2 += 1 + assert getattr(x, "branch") == data[indx] + indx += 1 + + +def simple_test(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array( + [ + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + ], + dtype=np.int32, + ) + + with uproot.recreate(os.path.join(tmp_path, "arrays1.root")) as f: + f["whatever"] = {"b1": data, "b2": data1, "b3": data, "b4": data1} + + with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + f["whatever"] = {"b1": data, "b2": data1} + + with uproot.update(os.path.join(tmp_path, "arrays2.root")) as f: + f.add_branches("whatever", {"b3": data, "b4": data1}) + + with uproot.open( + os.path.join(tmp_path, "arrays1.root"), minimal_ttree_metadata=False + ) as check: + with uproot.open( + os.path.join(tmp_path, "arrays2.root"), minimal_ttree_metadata=False + ) as new: + print(new["whatever"].arrays()) + for key in new["whatever"].keys(): + assert ak.all( + new["whatever"].arrays()[key] == check["whatever"].arrays()[key] + ) + assert ak.all(new["whatever"]["b1"].array() == data) + assert ak.all(new["whatever"]["b2"].array() == data1) + assert ak.all(new["whatever"]["b3"].array() == data) + assert ak.all(new["whatever"]["b4"].array() == data1) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "arrays2.root"), "READ") + tree = inFile.Get("whatever;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == data[indx] + assert getattr(x, "b2") == data1[indx] + indx += 1 + + +def test_multiple_trees(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array( + [ + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + ], + dtype=np.int32, + ) + + with uproot.recreate(os.path.join(tmp_path, "mult_trees.root")) as f: + f["whatever"] = {"b1": data, "b2": data1} + f["whatever1"] = {"b1": data, "b2": data1, "b3": data} + + with uproot.update(os.path.join(tmp_path, "mult_trees.root")) as f: + f.add_branches("whatever", {"b3": data, "b4": data1}) + f.add_branches("whatever1", {"b4": data1}) + + with uproot.open( + os.path.join(tmp_path, "mult_trees.root"), minimal_ttree_metadata=False + ) as new: + assert ak.all(new["whatever"]["b1"].array() == data) + assert ak.all(new["whatever1"]["b4"].array() == data1) + assert ak.all(new["whatever1"]["b2"].array() == data1) + assert ak.all(new["whatever1"]["b4"].array() == data1) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "mult_trees.root"), "READ") + tree = inFile.Get("whatever;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == data[indx] + assert getattr(x, "b2") == data1[indx] + indx += 1 + + +def test_different_fEntries(tmp_path): + data = np.array([1, 2, 3, 4, 5], dtype=np.int64) + data1 = np.array([2.0, 3.0, 4.0, 5.0, 6.0], dtype=np.int32) + + with uproot.recreate(os.path.join(tmp_path, "arrays2.root")) as f: + with pytest.raises(ValueError): + f["whatever"] = {"b1": data, "b2": data1} + f.add_branches( + "whatever", + { + "b3": data, + "b4": np.array([2.0, 3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.int32), + }, + ) + + +def test_dtypes(tmp_path): # tleaf types? + data = [ + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int64, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int32, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.int8, + ), + np.array( + [ + 1.0, + 2.0, + 3.0, + 4.0, + ], + dtype=np.float32, + ), + np.array( + [ + 1.0, + 2.0, + 3.0, + 4.0, + ], + dtype=np.float64, + ), + np.array( + [ + 1, + 2, + 3, + 4, + ], + dtype=np.double, + ), + np.array([True, False, True, False], dtype=bool), + ] + + with uproot.recreate(os.path.join(tmp_path, "all_dtypes.root")) as f: + f["whatever"] = { + "b1": data[0], + "b2": data[1], + "b3": data[2], + "b4": data[3], + "b5": data[4], + "b6": data[5], + "b7": data[6], + } + + with uproot.update(os.path.join(tmp_path, "all_dtypes.root")) as write: + write.add_branches( + "whatever", + { + "b8": data[0], + "b9": data[1], + "b10": data[2], + "b12": data[3], + "b13": data[4], + "b14": data[5], + "b15": data[6], + }, + ) + + with uproot.open(os.path.join(tmp_path, "all_dtypes.root")) as read: + + read["whatever"] + + +def test_ak_arrays(tmp_path): + data = np.array( + [ + 1, + 2, + ], + dtype=np.int64, + ) + data1 = np.array([2, 3, 4, 5], dtype=np.int64) + data2 = np.array([3, 4, 5], dtype=np.int64) + + with uproot.recreate(os.path.join(tmp_path, "ak_test.root")) as file: + file["whatever"] = { + "b1": ak.Array([data, data1, data2]), + "b2": ak.Array([data1, data2, data]), + } + + with uproot.update(os.path.join(tmp_path, "ak_test.root")) as write: + write.add_branches( + "whatever", + { + "b3": ak.Array([data2, data, data1]), + }, + ) + + with uproot.open( + os.path.join(tmp_path, "ak_test.root"), minimal_ttree_metadata=False + ) as new: + new["whatever"].arrays() + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "ak_test.root"), "READ") + tree = inFile.Get("whatever") + for x in tree: + getattr(x, "b1") + inFile.Close() + df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) + npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True) + assert ak.all(npy3["b1"] == [data, data1, data2]) + assert ak.all(npy3["b2"] == [data1, data2, data]) + assert ak.all(npy3["b3"] == [data2, data, data1]) + + +def test_streamers_same_dtypes(tmp_path): + # Make an example file with ROOT + inFile = ROOT.TFile(os.path.join(tmp_path, "root_same_dtypes.root"), "RECREATE") + tree = ROOT.TTree("tree1", "tree") + npa = np.zeros(4, dtype=np.float32) + tree.Branch("b1", npa, "b1/F") + for i in range(4): + npa[0] = i**0 + tree.Fill() + inFile.Write() + inFile.Close() + + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_same_dtypes.root"), "OPEN") + tree = inFile.Get("tree1") + data = np.array([5.0, 6.0, 7.0, 8.0], dtype=np.float32) + + with uproot.update(os.path.join(tmp_path, "root_same_dtypes.root")) as file: + file.add_branches("tree1", {"b2": data}) + + with uproot.open( + os.path.join(tmp_path, "root_same_dtypes.root"), minimal_ttree_metadata=False + ) as file: + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "root_same_dtypes.root"), "READ" + ) + # inFile.ShowStreamerInfo() + tree = inFile.Get("tree1;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] + assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] + indx += 1 + + # tree.Scan() + check = [ + "TBranch", + "TAttLine", + "TCollection", + "TLeafF", + "listOfRules", + "TString", + "TObjArray", + "TAttFill", + "TBranchRef", + "TList", + "ROOT::TIOFeatures", + "TSeqCollection", + "TAttMarker", + "TTree", + "TNamed", + "TObject", + "TAttLine", + "TLeaf", + "TRefTable", + ] + for i in set(file.file.streamers): + assert i in check + inFile.Close() + + +def test_streamers_diff_dtypes(tmp_path): + # Make an example file with ROOT + inFile = ROOT.TFile( + "/Users/zobil/Desktop/directory/root_diff_dtypes.root", "RECREATE" + ) + tree = ROOT.TTree("tree1", "tree") + npa = np.zeros(4, dtype=float) + tree.Branch("b1", npa, "b1F") + for i in range(4): + npa[0] = i**0 + tree.Fill() + inFile.Write() + inFile.Close() + + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "root_diff_dtypes.root"), "OPEN") + tree = inFile.Get("tree1") + data = np.array([5, 6, 7, 8], dtype=np.int64) + data1 = np.array([5.2, 6.3, 7.4, 8.5], dtype=np.float64) + with uproot.update(os.path.join(tmp_path, "root_diff_dtypes.root")) as file: + file.add_branches("tree1", {"b2": data, "b3": data1}) + + with uproot.open( + os.path.join(tmp_path, "root_diff_dtypes.root"), minimal_ttree_metadata=False + ) as file: + file["tree1"]["b2"].member("fLeaves")[0].all_members + inFile = ROOT.TFile.Open( + os.path.join(tmp_path, "root_diff_dtypes.root"), "READ" + ) + tree = inFile.Get("tree1;1") + indx = 0 + for x in tree: + assert getattr(x, "b1") == file["tree1"]["b1"].array()[indx] + assert getattr(x, "b2") == file["tree1"]["b2"].array()[indx] + indx += 1 + # tree.Scan() + check = [ + "TBranch", + "TAttLine", + "TCollection", + "TLeafF", + "listOfRules", + "TString", + "TObjArray", + "TAttFill", + "TBranchRef", + "TList", + "ROOT::TIOFeatures", + "TSeqCollection", + "TAttMarker", + "TTree", + "TNamed", + "TObject", + "TAttLine", + "TLeaf", + "TRefTable", + "TLeafL", + "TLeafD", + ] + for i in set(file.file.streamers): + assert i in check + inFile.Close() + + +def test_old_versions(tmp_path): + with pytest.raises(TypeError): + with uproot.update(os.path.join(tmp_path, "uproot-HZZ.root")) as file: + file.add_branches("events", {"b2": [1, 2, 3]}) + + +def test_TreeEventSimple0(tmp_path): + with uproot.update(os.path.join(tmp_path, "cp/TreeEventTreeSimple0.root")) as file: + file.add_branches( + "TreeEventTreeSimple0", {"b1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} + ) + with uproot.open( + os.path.join(tmp_path, "cp/TreeEventTreeSimple0.root") + ) as new: # Okay can't read with arrays() + print(new.file.chunk(0, 20000).raw_data.tobytes()) + # print(new['TreeEventTreeSimple0']['b1'].array()) + # inFile = ROOT.TFile.Open( + # os.path.join(tmp_path, "TreeEventTreeSimple0.root"), "READ" + # ) + # tree = inFile.Get("TreeEventTreeSimple0;1") + # indx = 0 + # for x in tree: + # assert getattr(x, "Event_branch") + # print(getattr(x, "Event_branch")) + # indx += 1 + + +def test_TreeEventSimple1(tmp_path): + with uproot.update(os.path.join(tmp_path, "cp/TreeEventTreeSimple1.root")) as file: + file.add_branches( + "TreeEventTreeSimple1", + {"new_v": np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], np.float32)}, + ) + with uproot.open( + os.path.join(tmp_path, "TreeEventTreeSimple1.root") + ) as file: # can't read with arrays() + with uproot.open( + os.path.join(tmp_path, "cp/TreeEventTreeSimple1.root") + ) as copy: + print(file["TreeEventTreeSimple1"]) + + +def test_TreeEventSimple3(tmp_path): + with uproot.update( + os.path.join(tmp_path, "TreeEventTreeSimple3.root") + ) as file: # can't read with arrays() + file["tree"] = {"b1": [1, 2, 3, 4, 5], "b2": [3, 4, 5, 6, 7]} + with uproot.open(os.path.join(tmp_path, "TreeEventTreeSimple3.root")) as copy: + print(file["TreeEventTreeSimple1"].chunk.raw_data.tobytes()) + # print(copy['TreeEventTreeSimple1']) + # inFile = ROOT.TFile.Open(os.path.join(tmp_path,"cp/TreeEventTreeSimple3.root"), "READ") + # tree = inFile.Get("TreeEventTreeSimple1") + # # for x in tree: + # # getattr(x, "new_v") + # inFile.Close() + # df3 = ROOT.RDataFrame("whatever", os.path.join(tmp_path, "ak_test.root")) + # npy3 = ak.from_rdataframe(df3, columns=("b1", "b2", "b3"), keep_order=True)] + + # for x in tree: + # assert getattr(x, "Event_branch") + # print(getattr(x, "Event_branch")) + # print(getattr(x, "a")) + # indx += 1 + # file.Write() + # file.Close() + + +def test_TreeEventSimple2(tmp_path): + # with uproot.update(os.path.join(tmp_path, "cp/TreeEventTreeSimple2.root")) as file: + # file.add_branches("TreeEventTreeSimple2", {"b1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + with uproot.open( + os.path.join(tmp_path, "TreeEventTreeSimple2.root") + ) as file: # Okay can't read with arrays() + print(file["TreeEventTreeSimple2"]) + with uproot.open( + os.path.join(tmp_path, "cp/TreeEventTreeSimple2.root") + ) as new: # Okay can't read with arrays() + print(new["TreeEventTreeSimple2"].asdfa) + # inFile = ROOT.TFile.Open( + # os.path.join(tmp_path, "TreeEventTreeSimple0.root"), "READ" + # ) + # tree = inFile.Get("TreeEventTreeSimple0;1") + # indx = 0 + # for x in tree: + # assert getattr(x, "Event_branch") + # print(getattr(x, "Event_branch")) + # indx += 1 + + +def test_TreeClass0(tmp_path): + + with uproot.update(os.path.join(tmp_path, "cp/TreeClass0.root")) as file: + file.add_branches( + "TreeClass0", + {"b1": np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=np.float64)}, + ) + with uproot.open( + os.path.join(tmp_path, "cp/TreeClass0.root") + ) as file: # Okay can't read with arrays() + print(file["TreeClass0"]["ClassC_branch"]) + # with uproot.open(os.path.join(tmp_path,"cp/TreeClass0.root")) as new: # Okay can't read with arrays() + # print(new['TreeEventTreeSimple2'].asdfa) + inFile = ROOT.TFile.Open(os.path.join(tmp_path, "cp/TreeClass0.root"), "READ") + tree = inFile.Get("TreeClass0;1") + indx = 0 + for x in tree: + assert getattr(x, "ClassC_branch") + print(getattr(x, "ClassC_branch")) + indx += 1 + + +def look(): + with uproot.open( + "/Users/zobil/Documents/trees/modified/TreeEventTreeSimple1.root" + ) as file: + print(file["TreeEventTreeSimple1"]["Event_branch"].member("fLeaves"))