Skip to content

fix: RNTuple form construction logic #1467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/uproot/behaviors/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,10 @@ def to_akform(
if self is rntuple:
for field in self.fields:
# the field needs to be in the keys or be a parent of a field in the keys
if any(key.startswith(field.name) for key in keys):
if any(
key.startswith(f"{field.name}.") or key == field.name
for key in keys
):
top_names.append(field.name)
record_list.append(
rntuple.field_form(field.field_id, keys, ak_add_doc=ak_add_doc)
Expand All @@ -537,7 +540,7 @@ def to_akform(
# Also include the field itself
keys = [self.path] + [f"{self.path}.{k}" for k in keys]
# The field needs to be in the keys or be a parent of a field in the keys
if any(key.startswith(self.path) for key in keys):
if any(key.startswith(f"{self.path}.") or key == self.path for key in keys):
top_names.append(self.name)
record_list.append(
rntuple.field_form(self.field_id, keys, ak_add_doc=ak_add_doc)
Expand Down
58 changes: 26 additions & 32 deletions src/uproot/models/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ def read_members(self, chunk, cursor, context, file):
self._field_names = None
self._column_records = None
self._alias_column_records = None
self._alias_columns_dict_ = None
self._related_ids_ = None
self._column_records_dict_ = None
self._num_entries = None
Expand Down Expand Up @@ -268,14 +267,6 @@ def alias_column_records(self):
)
return self._alias_column_records

@property
def _alias_columns_dict(self):
if self._alias_columns_dict_ is None:
self._alias_columns_dict_ = {
el.field_id: el.physical_id for el in self.alias_column_records
}
return self._alias_columns_dict_

@property
def _column_records_dict(self):
if self._column_records_dict_ is None:
Expand Down Expand Up @@ -382,23 +373,22 @@ def page_list_envelopes(self):

return self._page_list_envelopes

def base_col_form(self, cr, col_id, parameters=None, cardinality=False):
def base_col_form(self, cr, parameters=None, is_cardinality=False):
"""
Args:
cr (:doc:`uproot.models.RNTuple.MetaData`): The column record.
col_id (int): The column id.
parameters (dict): The parameters to pass to the ``NumpyForm``.
cardinality (bool): Whether the column is a cardinality column.
is_cardinality (bool): Whether the column is a cardinality column.

Returns an Awkward Form describing the column if applicable, or a form key otherwise.
"""
ak = uproot.extras.awkward()

form_key = f"column-{col_id}" + ("-cardinality" if cardinality else "")
form_key = f"column-{cr.idx}" + ("-cardinality" if is_cardinality else "")
dtype_byte = cr.type
if dtype_byte == uproot.const.rntuple_col_type_to_num_dict["switch"]:
return form_key
elif dtype_byte in uproot.const.rntuple_index_types and not cardinality:
elif dtype_byte in uproot.const.rntuple_index_types and not is_cardinality:
return form_key
dt_str = uproot.const.rntuple_col_num_to_dtype_dict[dtype_byte]
if dt_str == "bit":
Expand All @@ -411,7 +401,7 @@ def base_col_form(self, cr, col_id, parameters=None, cardinality=False):
parameters=parameters,
)

def col_form(self, field_id, extra_parameters=None):
def col_form(self, field_id, extra_parameters=None, is_cardinality=False):
"""
Args:
field_id (int): The field id.
Expand All @@ -423,8 +413,6 @@ def col_form(self, field_id, extra_parameters=None):
cfid = field_id
if self.field_records[cfid].source_field_id is not None:
cfid = self.field_records[cfid].source_field_id
if cfid in self._alias_columns_dict:
cfid = self._alias_columns_dict[cfid]
if cfid not in self._column_records_dict:
raise (
RuntimeError(
Expand All @@ -437,21 +425,17 @@ def col_form(self, field_id, extra_parameters=None):
rel_crs = [c for c in rel_crs if c.repr_idx == 0]

if len(rel_crs) == 1: # base case
cardinality = "RNTupleCardinality" in self.field_records[field_id].type_name
return self.base_col_form(
rel_crs[0],
rel_crs[0].idx,
parameters=extra_parameters,
cardinality=cardinality,
is_cardinality=is_cardinality,
)
elif (
len(rel_crs) == 2
and rel_crs[1].type == uproot.const.rntuple_col_type_to_num_dict["char"]
):
# string field splits->2 in col records
inner = self.base_col_form(
rel_crs[1], rel_crs[1].idx, parameters={"__array__": "char"}
)
inner = self.base_col_form(rel_crs[1], parameters={"__array__": "char"})
form_key = f"column-{rel_crs[0].idx}"
parameters = {"__array__": "string"}
if extra_parameters is not None:
Expand Down Expand Up @@ -492,17 +476,21 @@ def field_form(self, this_id, keys, ak_add_doc=False):
structural_role == uproot.const.RNTupleFieldRole.LEAF
and this_record.repetition == 0
):
is_cardinality = "RNTupleCardinality" in this_record.type_name
if self.field_records[this_id].source_field_id is not None:
this_id = self.field_records[this_id].source_field_id
# deal with std::atomic
# they have no associated column, but exactly one subfield containing the underlying data
tmp_id = self._alias_columns_dict.get(this_id, this_id)
if (
tmp_id not in self._column_records_dict
and len(self._related_ids[tmp_id]) == 1
this_id not in self._column_records_dict
and len(self._related_ids[this_id]) == 1
):
this_id = self._related_ids[tmp_id][0]
this_id = self._related_ids[this_id][0]
# base case of recursion
# n.b. the split may happen in column
return self.col_form(this_id, extra_parameters=parameters)
return self.col_form(
this_id, extra_parameters=parameters, is_cardinality=is_cardinality
)
elif structural_role == uproot.const.RNTupleFieldRole.LEAF:
if this_id in self._related_ids:
# std::array has only one subfield
Expand All @@ -523,7 +511,11 @@ def field_form(self, this_id, keys, ak_add_doc=False):
recordlist = []
namelist = []
for i in newids:
if any(key.startswith(self.all_fields[i].path) for key in keys):
if any(
key.startswith(f"{self.all_fields[i].path}.")
or key == self.all_fields[i].path
for key in keys
):
recordlist.append(
self.field_form(i, keys, ak_add_doc=ak_add_doc)
)
Expand All @@ -536,8 +528,6 @@ def field_form(self, this_id, keys, ak_add_doc=False):
cfid = this_id
if self.field_records[cfid].source_field_id is not None:
cfid = self.field_records[cfid].source_field_id
if cfid in self._alias_columns_dict:
cfid = self._alias_columns_dict[cfid]
if cfid not in self._column_records_dict:
raise (
RuntimeError(
Expand All @@ -561,7 +551,11 @@ def field_form(self, this_id, keys, ak_add_doc=False):
recordlist = []
namelist = []
for i in newids:
if any(key.startswith(self.all_fields[i].path) for key in keys):
if any(
key.startswith(f"{self.all_fields[i].path}.")
or key == self.all_fields[i].path
for key in keys
):
recordlist.append(self.field_form(i, keys, ak_add_doc=ak_add_doc))
namelist.append(field_records[i].field_name)
if all(name == f"_{i}" for i, name in enumerate(namelist)):
Expand Down
21 changes: 21 additions & 0 deletions tests/test_1467_rntuple_akform_construction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE

import pytest
import skhep_testdata

import uproot

ak = pytest.importorskip("awkward")


def test_akform_logic():
filepath = skhep_testdata.data_path(
"cmsopendata2015_ttbar_19980_NANOAOD_RNTupleImporter_rntuple_v1-0-0-1.root"
)

with uproot.open(filepath) as file:
obj = file["Events"]
arrays = obj.arrays()

# This is a very simple test, but if something was wrong it would have crashed before getting here
assert len(arrays.fields) == 969
Loading