diff --git a/src/uproot/behaviors/RNTuple.py b/src/uproot/behaviors/RNTuple.py index 705212755..e3caeea06 100644 --- a/src/uproot/behaviors/RNTuple.py +++ b/src/uproot/behaviors/RNTuple.py @@ -458,8 +458,13 @@ def fields(self): fields = [ rntuple.all_fields[i] for i, f in enumerate(rntuple.field_records) - if f.parent_field_id == self._fid and f.parent_field_id != i + if f.parent_field_id == self._fid + and f.parent_field_id != i + and not rntuple.all_fields[i].is_ignored ] + # If the child field is anonymous, we return the grandchildren + if len(fields) == 1 and fields[0].is_anonymous: + fields = fields[0].fields self._fields = fields return self._fields @@ -468,15 +473,21 @@ def path(self): """ The full path of the field in the :doc:`uproot.models.RNTuple.RNTuple`. When it is the ``RNTuple`` itself, this is ``"."``. + + Note that this is not the full path within the ROOT file. """ if isinstance(self, uproot.behaviors.RNTuple.RNTuple): return "." + # For some anonymous fields, the path is not available + if self.is_anonymous or self.is_ignored: + return None if self._path is None: path = self.name parent = self.parent field = self while not isinstance(parent, uproot.behaviors.RNTuple.RNTuple): - path = f"{parent.name}.{path}" + if not parent.is_anonymous: + path = f"{parent.name}.{path}" field = parent parent = field.parent self._path = path @@ -1423,7 +1434,7 @@ def iteritems( and (filter_typename is no_filter or filter_typename(field.typename)) and (filter_field is no_filter or filter_field(field)) ): - if ignore_duplicates and field.name in keys_set: + if field.is_anonymous or (ignore_duplicates and field.name in keys_set): pass else: keys_set.add(field.name) @@ -1437,7 +1448,11 @@ def iteritems( filter_field=filter_field, full_paths=full_paths, ): - k2 = f"{field.name}.{k1}" if full_paths else k1 + k2 = ( + f"{field.name}.{k1}" + if full_paths and not field.is_anonymous + else k1 + ) if filter_name is no_filter or _filter_name_deep( filter_name, self, v ): @@ -1483,9 +1498,10 @@ def itertypenames( for k, v in self.iteritems( filter_name=filter_name, filter_typename=filter_typename, - filter_branch=filter_branch, + filter_field=filter_field, recursive=recursive, full_paths=full_paths, + filter_branch=filter_branch, ): yield k, v.typename @@ -1616,8 +1632,8 @@ def __getitem__(self, where): raise uproot.KeyInFileError( original_where, keys=self.keys(recursive=recursive), - file_path=self._file.file_path, # TODO - object_path=self.object_path, # TODO + file_path=self.ntuple.parent._file.file_path, + object_path=self.path, ) from None return this @@ -1629,8 +1645,8 @@ def __getitem__(self, where): raise uproot.KeyInFileError( original_where, keys=self.keys(recursive=recursive), - file_path=self._file.file_path, - object_path=self.object_path, + file_path=self.ntuple.parent._file.file_path, + object_path=self.path, ) else: @@ -1826,20 +1842,6 @@ def _filter_name_deep(filter_name, hasfields, field): return filter_name("." + name) -def _keys_deep(hasbranches): - out = set() - for branch in hasbranches.itervalues(recursive=True): - name = branch.name - out.add(name) - while branch is not hasbranches: - branch = branch.parent # noqa: PLW2901 (overwriting branch) - if branch is not hasbranches: - name = branch.name + "/" + name - out.add(name) - out.add("/" + name) - return out - - def _get_recursive(hasfields, where): if hasfields._lookup is None: hasfields._lookup = {f.name: f for f in hasfields.fields} diff --git a/src/uproot/models/RNTuple.py b/src/uproot/models/RNTuple.py index 322d5d3c3..56f802d06 100644 --- a/src/uproot/models/RNTuple.py +++ b/src/uproot/models/RNTuple.py @@ -6,6 +6,7 @@ from __future__ import annotations import dataclasses +import re import struct import sys from collections import defaultdict @@ -511,10 +512,13 @@ def field_form(self, this_id, keys, ak_add_doc=False): recordlist = [] namelist = [] for i in newids: - if any( - key.startswith(f"{self.all_fields[i].path}.") - or key == self.all_fields[i].path - for key in keys + if ( + any( + key.startswith(f"{self.all_fields[i].path}.") + or key == self.all_fields[i].path + for key in keys + ) + or self.all_fields[i].is_anonymous ): recordlist.append( self.field_form(i, keys, ak_add_doc=ak_add_doc) @@ -1469,6 +1473,8 @@ def __init__(self, fid, ntuple): self._fields = None self._lookup = None self._path = None + self._is_anonymous = None + self._is_ignored = None def __repr__(self): if len(self) == 0: @@ -1497,6 +1503,47 @@ def typename(self): """ return self._ntuple.field_records[self._fid].type_name + @property + def record(self): + """ + The field record of the ``RField``. + """ + return self._ntuple.field_records[self._fid] + + @property + def is_anonymous(self): + """ + There are some anonymous fields in the RNTuple specification that we hide from the user + to simplify the interface. These are fields named `_0` that are children of a collection + or variant field. + """ + if self._is_anonymous is None: + self._is_anonymous = not self.top_level and ( + self.parent.record.struct_role + in ( + uproot.const.RNTupleFieldRole.COLLECTION, + uproot.const.RNTupleFieldRole.VARIANT, + ) + or self.parent.record.flags & uproot.const.RNTupleFieldFlags.REPETITIVE + ) + return self._is_anonymous + + @property + def is_ignored(self): + """ + There are some fields in the RNTuple specification named `:_i` (for `i=0,1,2,...`) + that encode class hierarchy. These are not useful in Uproot, so they are ignored. + """ + if self._is_ignored is None: + self._is_ignored = ( + not self.top_level + and self.parent.record.struct_role + == uproot.const.RNTupleFieldRole.RECORD + and re.fullmatch(r":_[0-9]+", self.name) is not None + ) + + return self._is_ignored + @property def parent(self): """ @@ -1513,6 +1560,7 @@ def index(self): """ Integer position of this ``RField`` in its parent's list of fields. """ + # TODO: This needs to be optimized for performance for i, field in enumerate(self.parent.fields): if field is self: return i diff --git a/tests/test_1250_rntuple_improvements.py b/tests/test_1250_rntuple_improvements.py index 8dffecad3..aadb4ded6 100644 --- a/tests/test_1250_rntuple_improvements.py +++ b/tests/test_1250_rntuple_improvements.py @@ -28,7 +28,7 @@ def test_field_class(): ) v = sub_sub_struct["v"] - assert len(v) == 1 + assert len(v) == 0 def test_array_methods(): diff --git a/tests/test_1406_improved_rntuple_methods.py b/tests/test_1406_improved_rntuple_methods.py index 2b9dc32e6..336cf8076 100644 --- a/tests/test_1406_improved_rntuple_methods.py +++ b/tests/test_1406_improved_rntuple_methods.py @@ -2,9 +2,7 @@ import os -import numpy import pytest -import skhep_testdata import uproot @@ -29,6 +27,7 @@ }, ], "struct5": [(1, 2, 3), (4, 5, 6)], + "struct6": [[(1, 2, 3), (4, 5, 6)], [(7, 8, 9)]], } ) @@ -41,23 +40,23 @@ def test_keys(tmp_path): obj = uproot.open(filepath)["ntuple"] - assert len(obj) == 5 - assert len(obj.keys(recursive=False)) == 5 + assert len(obj) == 6 + assert len(obj.keys(recursive=False)) == 6 - assert len(obj.keys()) == 29 - assert len(obj.keys(full_paths=False)) == 29 - assert len(obj.keys(full_paths=False, ignore_duplicates=True)) == 16 + assert len(obj.keys()) == 31 + assert len(obj.keys(full_paths=False)) == 31 + assert len(obj.keys(full_paths=False, ignore_duplicates=True)) == 17 assert len(obj.keys(filter_name="x")) == 4 assert len(obj.keys(filter_name="z")) == 2 assert len(obj.keys(filter_name="do*")) == 1 - assert len(obj.keys(filter_typename="std::int*_t")) == 16 + assert len(obj.keys(filter_typename="std::int*_t")) == 19 assert len(obj.keys(filter_field=lambda f: f.name == "up")) == 1 assert obj["struct1"].keys() == ["x", "y"] - assert len(obj["struct4"].keys()) == 12 + assert len(obj["struct4"].keys()) == 10 def test_getitem(tmp_path): @@ -73,12 +72,19 @@ def test_getitem(tmp_path): assert obj["struct3"] is obj.fields[2] assert obj["struct4"] is obj.fields[3] assert obj["struct5"] is obj.fields[4] + assert obj["struct6"] is obj.fields[5] assert obj["struct1"]["x"] is obj.fields[0].fields[0] assert obj["struct1"]["x"] is obj["struct1.x"] assert obj["struct1"]["x"] is obj["struct1/x"] assert obj["struct1"]["x"] is obj[r"struct1\x"] + # Make sure it accesses the grandchildren field instead of the "real" _0 + assert obj["struct5._0"].record.struct_role == uproot.const.RNTupleFieldRole.LEAF + assert obj["struct5._1"].record.struct_role == uproot.const.RNTupleFieldRole.LEAF + assert obj["struct5._2"].record.struct_role == uproot.const.RNTupleFieldRole.LEAF + assert obj["struct6._0"].record.struct_role == uproot.const.RNTupleFieldRole.LEAF + def test_to_akform(tmp_path): filepath = os.path.join(tmp_path, "test.root") @@ -128,3 +134,15 @@ def test_iterate_and_concatenate(tmp_path): true_array = ak.concatenate([data, data], axis=0) assert ak.array_equal(array, true_array) + + +def test_array(tmp_path): + filepath = os.path.join(tmp_path, "test.root") + + with uproot.recreate(filepath) as file: + obj = file.mkrntuple("ntuple", data) + + obj = uproot.open(filepath)["ntuple"] + + assert obj["struct5._0"].array().tolist() == [1, 4] + # assert obj["struct6._0"].array().tolist() == [[1, 4], [7]] # TODO: Need to fix this diff --git a/tests/test_1411_rntuple_physlite_ATLAS.py b/tests/test_1411_rntuple_physlite_ATLAS.py index 2e9ae28f9..7953c2d13 100644 --- a/tests/test_1411_rntuple_physlite_ATLAS.py +++ b/tests/test_1411_rntuple_physlite_ATLAS.py @@ -86,7 +86,7 @@ def test_truth_muon_containers(physlite_file): # Check values mass_evt_0 = 105.7 - AOD_type = [":_0"] # Uproot interpretation of AOD containers + AOD_type = [] # C++ class definitions are ignored mu_pdgid = [13, -13] assert (