Skip to content

Add DataTree.prune() method … #10598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
16b92e3
Add DataTree.is_data_empty property and .prune() method …
aladinor Aug 1, 2025
0b5ee3b
documenting changes in whats-new.rst file
aladinor Aug 1, 2025
d5d5621
removing blank lines
aladinor Aug 1, 2025
043a037
Merge branch 'main' into feature/datatree-prune-method
aladinor Aug 2, 2025
ba73805
removing new property instead using data_vars and fixing correspondin…
aladinor Aug 5, 2025
6a9664b
removing .is_empty_data entry
aladinor Aug 5, 2025
239c53f
Merge branch 'feature/datatree-prune-method' of https://github.com/al…
aladinor Aug 5, 2025
d141fbe
updating github url
aladinor Aug 5, 2025
d4b3970
fixing test accordingly
aladinor Aug 5, 2025
4431834
fixing doctest
aladinor Aug 5, 2025
7fc2e8b
fixing doctest
aladinor Aug 5, 2025
6e1956f
fixing doctest
aladinor Aug 5, 2025
6f20286
replacing doctest
aladinor Aug 5, 2025
6da389d
replacing doctest
aladinor Aug 5, 2025
d7f85b8
removing empty line
aladinor Aug 5, 2025
ecf186d
removing empty line
aladinor Aug 5, 2025
03c78fa
Update xarray/core/datatree.py
aladinor Aug 5, 2025
6161755
Update xarray/core/datatree.py
aladinor Aug 5, 2025
a7cd8d5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 5, 2025
338c76a
improving doctests
aladinor Aug 5, 2025
aea2e67
fixing typo
aladinor Aug 5, 2025
730c7aa
refactoring test accodingly to Tom's suggestion
aladinor Aug 5, 2025
7722cfe
Merge branch 'main' into feature/datatree-prune-method
aladinor Aug 5, 2025
7aa25c9
fixing test_prune_after_filtering
aladinor Aug 6, 2025
1c78329
Merge branch 'feature/datatree-prune-method' of https://github.com/al…
aladinor Aug 6, 2025
d99ee78
refactoring test to use assert_identical
aladinor Aug 6, 2025
82ea57e
refactoring test to use assert)_equal
aladinor Aug 6, 2025
7cd493d
adding reference to .prune method in Subsetting Tree Nodes
aladinor Aug 6, 2025
8223810
adding # doctest: +NORMALIZE_WHITESPACE to avoid error with trailing …
aladinor Aug 6, 2025
bbcaf91
Fix doctest trailing space issue in prune method
aladinor Aug 6, 2025
219a4e6
trial 2 fixing trailing space
aladinor Aug 6, 2025
27c964a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/user-guide/hierarchical-data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,8 @@ The result is a new tree, containing only the nodes matching the condition.

(Yes, under the hood :py:meth:`~xarray.DataTree.filter` is just syntactic sugar for the pattern we showed you in :ref:`iterating over trees` !)

If you want to filter out empty nodes you can use :py:meth:`~xarray.DataTree.prune`.

.. _Tree Contents:

Tree Contents
Expand Down
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ v2025.07.2 (unreleased)

New Features
~~~~~~~~~~~~
- Added :py:meth:`DataTree.prune` method to remove empty nodes while preserving tree structure.
Useful for cleaning up DataTree after time-based filtering operations (:issue:`10590`, :pull:`10598`).
By `Alfonso Ladino <https://github.com/aladinor>`_.


Breaking changes
Expand Down
67 changes: 67 additions & 0 deletions xarray/core/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,6 +1448,73 @@ def filter_like(self, other: DataTree) -> DataTree:
other_keys = {key for key, _ in other.subtree_with_keys}
return self.filter(lambda node: node.relative_to(self) in other_keys)

def prune(self, drop_size_zero_vars: bool = False) -> DataTree:
"""
Remove empty nodes from the tree.

Returns a new tree containing only nodes that contain data variables with actual data.
Intermediate nodes are kept if they are required to support non-empty children.

Parameters
----------
drop_size_zero_vars : bool, default False
If True, also considers variables with zero size as empty.
If False, keeps nodes with data variables even if they have zero size.

Returns
-------
DataTree
A new tree with empty nodes removed.

See Also
--------
filter

Examples
--------
>>> dt = xr.DataTree.from_dict(
... {
... "/a": xr.Dataset({"foo": ("x", [1, 2])}),
... "/b": xr.Dataset({"bar": ("x", [])}),
... "/c": xr.Dataset(),
... }
... )
>>> dt.prune()
<xarray.DataTree>
Group: /
├── Group: /a
│ Dimensions: (x: 2)
│ Dimensions without coordinates: x
│ Data variables:
│ foo (x) int64 16B 1 2
└── Group: /b
Dimensions: (x: 0)
Dimensions without coordinates: x
Data variables:
bar (x) float64 0B

The ``drop_size_zero_vars`` parameter controls whether variables
with zero size are considered empty:

>>> dt.prune(drop_size_zero_vars=True)
<xarray.DataTree>
Group: /
└── Group: /a
Dimensions: (x: 2)
Dimensions without coordinates: x
Data variables:
foo (x) int64 16B 1 2
"""
non_empty_cond: Callable[[DataTree], bool]
if drop_size_zero_vars:
non_empty_cond = lambda node: len(node.data_vars) > 0 and any(
var.size > 0 for var in node.data_vars.values()
)
else:
non_empty_cond = lambda node: len(node.data_vars) > 0

return self.filter(non_empty_cond)

def match(self, pattern: str) -> DataTree:
"""
Return nodes with paths matching pattern.
Expand Down
79 changes: 79 additions & 0 deletions xarray/tests/test_datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1942,6 +1942,85 @@ def test_filter(self) -> None:
)
assert_identical(actual, expected)

def test_prune_basic(self) -> None:
tree = DataTree.from_dict(
{"/a": xr.Dataset({"foo": ("x", [1, 2])}), "/b": xr.Dataset()}
)

pruned = tree.prune()

assert "a" in pruned.children
assert "b" not in pruned.children
assert_identical(
pruned.children["a"].to_dataset(), tree.children["a"].to_dataset()
)

def test_prune_with_zero_size_vars(self) -> None:
tree = DataTree.from_dict(
{
"/a": xr.Dataset({"foo": ("x", [1, 2])}),
"/b": xr.Dataset({"empty": ("dim", [])}),
"/c": xr.Dataset(),
}
)

pruned_default = tree.prune()
expected_default = DataTree.from_dict(
{
"/a": xr.Dataset({"foo": ("x", [1, 2])}),
"/b": xr.Dataset({"empty": ("dim", [])}),
}
)
assert_identical(pruned_default, expected_default)

pruned_strict = tree.prune(drop_size_zero_vars=True)
expected_strict = DataTree.from_dict(
{
"/a": xr.Dataset({"foo": ("x", [1, 2])}),
}
)
assert_identical(pruned_strict, expected_strict)

def test_prune_with_intermediate_nodes(self) -> None:
tree = DataTree.from_dict(
{
"/": xr.Dataset(),
"/group1": xr.Dataset(),
"/group1/subA": xr.Dataset({"temp": ("x", [1, 2])}),
"/group1/subB": xr.Dataset(),
"/group2": xr.Dataset({"empty": ("dim", [])}),
}
)
pruned = tree.prune()
expected_tree = DataTree.from_dict(
{
"/group1/subA": xr.Dataset({"temp": ("x", [1, 2])}),
"/group2": xr.Dataset({"empty": ("dim", [])}),
}
)
assert_identical(pruned, expected_tree)

def test_prune_after_filtering(self) -> None:
from pandas import date_range

ds1 = xr.Dataset(
{"foo": ("time", [1, 2, 3, 4, 5])},
coords={"time": date_range("2023-01-01", periods=5, freq="D")},
)
ds2 = xr.Dataset(
{"var": ("time", [1, 2, 3, 4, 5])},
coords={"time": date_range("2023-01-04", periods=5, freq="D")},
)

tree = DataTree.from_dict({"a": ds1, "b": ds2})
filtered = tree.sel(time=slice("2023-01-01", "2023-01-03"))

pruned = filtered.prune(drop_size_zero_vars=True)
expected_tree = DataTree.from_dict(
{"a": ds1.sel(time=slice("2023-01-01", "2023-01-03"))}
)
assert_identical(pruned, expected_tree)


class TestIndexing:
def test_isel_siblings(self) -> None:
Expand Down
Loading