Skip to content

Commit 7ba5f85

Browse files
committed
Improve reading and writing of NetCDF files to/from bytes or file-like objects.
* Allows use of h5netcdf engine when writing to file-like objects (such as BytesIO), stop forcing use of scipy backend in this case (which is incompatible with groups and DataTree). Makes h5netcdf the default engine for DataTree.to_netcdf rather than leaving the choice of default up to Dataset.to_netcdf. * Allows use of h5netcdf engine to read from a bytes object. * Allows DataTree.to_netcdf to return bytes when filepath argument is omitted (similar to Dataset.to_netcdf.
1 parent 4d16307 commit 7ba5f85

File tree

7 files changed

+96
-24
lines changed

7 files changed

+96
-24
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ v2025.07.2 (unreleased)
1313
New Features
1414
~~~~~~~~~~~~
1515

16+
- :py:meth:`DataTree.to_netcdf` can now write to a file-like object, or return bytes if called without a filepath. (:issue:`10570`)
17+
By `Matthew Willson <https://github.com/mjwillson>`_.
1618

1719
Breaking changes
1820
~~~~~~~~~~~~~~~~

xarray/backends/api.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from xarray.core.indexes import Index
4444
from xarray.core.treenode import group_subtrees
4545
from xarray.core.types import NetcdfWriteModes, ZarrWriteModes
46-
from xarray.core.utils import is_remote_uri
46+
from xarray.core.utils import emit_user_level_warning, is_remote_uri
4747
from xarray.namedarray.daskmanager import DaskManager
4848
from xarray.namedarray.parallelcompat import guess_chunkmanager
4949
from xarray.structure.chunks import _get_chunk, _maybe_chunk
@@ -1911,11 +1911,11 @@ def to_netcdf(
19111911
if path_or_file is None:
19121912
if engine is None:
19131913
engine = "scipy"
1914-
elif engine != "scipy":
1914+
elif engine not in ("scipy", "h5netcdf"):
19151915
raise ValueError(
19161916
"invalid engine for creating bytes with "
1917-
f"to_netcdf: {engine!r}. Only the default engine "
1918-
"or engine='scipy' is supported"
1917+
f"to_netcdf: {engine!r}. Only the default engine, "
1918+
"engine='scipy' or engine='h5netcdf' is supported."
19191919
)
19201920
if not compute:
19211921
raise NotImplementedError(
@@ -1927,7 +1927,13 @@ def to_netcdf(
19271927
engine = _get_default_engine(path_or_file)
19281928
path_or_file = _normalize_path(path_or_file)
19291929
else: # file-like object
1930-
engine = "scipy"
1930+
if engine not in ("scipy", "h5netcdf"):
1931+
emit_user_level_warning(
1932+
f"Requested {engine=} is not compatible with writing to a file-like object. "
1933+
"This will raise an error in the future, for now defaulting to engine='scipy'.",
1934+
FutureWarning,
1935+
)
1936+
engine = "scipy"
19311937

19321938
# validate Dataset keys, DataArray names, and attr keys/values
19331939
_validate_dataset_names(dataset)

xarray/backends/h5netcdf_.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,9 @@ def open(
159159
)
160160

161161
if isinstance(filename, bytes):
162-
raise ValueError(
163-
"can't open netCDF4/HDF5 as bytes "
164-
"try passing a path or file-like object"
165-
)
166-
elif isinstance(filename, io.IOBase):
162+
filename = io.BytesIO(filename)
163+
164+
if isinstance(filename, io.IOBase) and mode == "r":
167165
magic_number = read_magic_number_from_file(filename)
168166
if not magic_number.startswith(b"\211HDF\r\n\032\n"):
169167
raise ValueError(

xarray/core/datatree.py

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from __future__ import annotations
22

33
import functools
4+
import io
45
import itertools
6+
from os import PathLike
57
import textwrap
68
from collections import ChainMap
79
from collections.abc import (
@@ -1659,9 +1661,11 @@ def _inplace_binary_op(self, other, f) -> Self:
16591661
def __eq__(self, other: DtCompatible) -> Self: # type: ignore[override]
16601662
return super().__eq__(other)
16611663

1664+
# filepath=None writes to bytes
1665+
@overload
16621666
def to_netcdf(
16631667
self,
1664-
filepath,
1668+
filepath: None = None,
16651669
mode: NetcdfWriteModes = "w",
16661670
encoding=None,
16671671
unlimited_dims=None,
@@ -1671,14 +1675,45 @@ def to_netcdf(
16711675
write_inherited_coords: bool = False,
16721676
compute: bool = True,
16731677
**kwargs,
1674-
):
1678+
) -> bytes: ...
1679+
1680+
@overload
1681+
def to_netcdf(
1682+
self,
1683+
filepath: str | PathLike,
1684+
mode: NetcdfWriteModes = "w",
1685+
encoding=None,
1686+
unlimited_dims=None,
1687+
format: T_DataTreeNetcdfTypes | None = None,
1688+
engine: T_DataTreeNetcdfEngine | None = None,
1689+
group: str | None = None,
1690+
write_inherited_coords: bool = False,
1691+
compute: bool = True,
1692+
**kwargs,
1693+
) -> None: ...
1694+
1695+
def to_netcdf(
1696+
self,
1697+
filepath: str | PathLike | io.IOBase | None = None,
1698+
mode: NetcdfWriteModes = "w",
1699+
encoding=None,
1700+
unlimited_dims=None,
1701+
format: T_DataTreeNetcdfTypes | None = None,
1702+
engine: T_DataTreeNetcdfEngine | None = None,
1703+
group: str | None = None,
1704+
write_inherited_coords: bool = False,
1705+
compute: bool = True,
1706+
**kwargs,
1707+
) -> None | bytes:
16751708
"""
16761709
Write datatree contents to a netCDF file.
16771710
16781711
Parameters
16791712
----------
1680-
filepath : str or Path
1681-
Path to which to save this datatree.
1713+
filepath : str or PathLike or file-like object or None
1714+
Path to which to save this datatree, or a file-like object to write
1715+
it to (which must support read and write and be seekable) or None
1716+
to return in-memory bytes.
16821717
mode : {"w", "a"}, default: "w"
16831718
Write ('w') or append ('a') mode. If mode='w', any existing file at
16841719
this location will be overwritten. If mode='a', existing variables
@@ -1717,14 +1752,18 @@ def to_netcdf(
17171752
kwargs :
17181753
Additional keyword arguments to be passed to ``xarray.Dataset.to_netcdf``
17191754
1755+
Returns
1756+
-------
1757+
A bytes object with the byte content of the netCDF file, if filepath was None.
1758+
17201759
Note
17211760
----
17221761
Due to file format specifications the on-disk root group name
17231762
is always ``"/"`` overriding any given ``DataTree`` root node name.
17241763
"""
17251764
from xarray.core.datatree_io import _datatree_to_netcdf
17261765

1727-
_datatree_to_netcdf(
1766+
return _datatree_to_netcdf(
17281767
self,
17291768
filepath,
17301769
mode=mode,

xarray/core/datatree_io.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from __future__ import annotations
22

33
from collections.abc import Mapping
4+
import io
45
from os import PathLike
5-
from typing import TYPE_CHECKING, Any, Literal, get_args
6+
from typing import TYPE_CHECKING, Any, Literal, get_args, overload
67

78
from xarray.core.datatree import DataTree
89
from xarray.core.types import NetcdfWriteModes, ZarrWriteModes
@@ -13,10 +14,9 @@
1314
if TYPE_CHECKING:
1415
from xarray.core.types import ZarrStoreLike
1516

16-
1717
def _datatree_to_netcdf(
1818
dt: DataTree,
19-
filepath: str | PathLike,
19+
filepath: str | PathLike | io.IOBase | None = None,
2020
mode: NetcdfWriteModes = "w",
2121
encoding: Mapping[str, Any] | None = None,
2222
unlimited_dims: Mapping | None = None,
@@ -26,18 +26,21 @@ def _datatree_to_netcdf(
2626
write_inherited_coords: bool = False,
2727
compute: bool = True,
2828
**kwargs,
29-
) -> None:
29+
) -> None | bytes:
3030
"""This function creates an appropriate datastore for writing a datatree to
3131
disk as a netCDF file.
3232
3333
See `DataTree.to_netcdf` for full API docs.
3434
"""
3535

3636
if format not in [None, *get_args(T_DataTreeNetcdfTypes)]:
37-
raise ValueError("to_netcdf only supports the NETCDF4 format")
37+
raise ValueError("DataTree.to_netcdf only supports the NETCDF4 format")
3838

3939
if engine not in [None, *get_args(T_DataTreeNetcdfEngine)]:
40-
raise ValueError("to_netcdf only supports the netcdf4 and h5netcdf engines")
40+
raise ValueError("DataTree.to_netcdf only supports the netcdf4 and h5netcdf engines")
41+
42+
if engine is None:
43+
engine = "h5netcdf"
4144

4245
if group is not None:
4346
raise NotImplementedError(
@@ -58,6 +61,9 @@ def _datatree_to_netcdf(
5861
f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}"
5962
)
6063

64+
if filepath is None:
65+
filepath = io.BytesIO()
66+
6167
if unlimited_dims is None:
6268
unlimited_dims = {}
6369

@@ -78,6 +84,8 @@ def _datatree_to_netcdf(
7884
)
7985
mode = "a"
8086

87+
return filepath.getvalue() if isinstance(filepath, io.BytesIO) else None
88+
8189

8290
def _datatree_to_zarr(
8391
dt: DataTree,

xarray/tests/test_backends.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4543,9 +4543,6 @@ class TestH5NetCDFFileObject(TestH5NetCDFData):
45434543
engine: T_NetcdfEngine = "h5netcdf"
45444544

45454545
def test_open_badbytes(self) -> None:
4546-
with pytest.raises(ValueError, match=r"HDF5 as bytes"):
4547-
with open_dataset(b"\211HDF\r\n\032\n", engine="h5netcdf"): # type: ignore[arg-type]
4548-
pass
45494546
with pytest.raises(
45504547
ValueError, match=r"match in any of xarray's currently installed IO"
45514548
):

xarray/tests/test_backends_datatree.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,28 @@ def test_phony_dims_warning(self, tmpdir) -> None:
539539
"phony_dim_3": 25,
540540
}
541541

542+
def test_roundtrip_via_bytes(self, simple_datatree):
543+
original_dt = simple_datatree
544+
roundtrip_dt = open_datatree(original_dt.to_netcdf())
545+
assert_equal(original_dt, roundtrip_dt)
546+
547+
def test_roundtrip_via_bytes_engine_specified(self, simple_datatree):
548+
original_dt = simple_datatree
549+
roundtrip_dt = open_datatree(original_dt.to_netcdf(engine=self.engine))
550+
assert_equal(original_dt, roundtrip_dt)
551+
552+
def test_roundtrip_using_filelike_object(self, tmpdir, simple_datatree):
553+
original_dt = simple_datatree
554+
filepath = tmpdir + '/test.nc'
555+
# h5py requires both read and write access when writing, it will
556+
# work with file-like objects provided they support both, and are
557+
# seekable.
558+
with open(filepath, 'wb+') as file:
559+
original_dt.to_netcdf(file, engine=self.engine)
560+
with open(filepath, 'rb') as file:
561+
roundtrip_dt = open_datatree(file, engine=self.engine)
562+
assert_equal(original_dt, roundtrip_dt)
563+
542564

543565
@requires_zarr
544566
@parametrize_zarr_format

0 commit comments

Comments
 (0)