pydata · shoyer · Aug 8, 2025 · Jul 25, 2025 · Jul 28, 2025 · Jul 28, 2025
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -13,6 +13,8 @@ v2025.07.2 (unreleased)
 New Features
 ~~~~~~~~~~~~
 
+- :py:meth:`DataTree.to_netcdf` can now write to a file-like object, or return bytes if called without a filepath. (:issue:`10570`)
+  By `Matthew Willson <https://github.com/mjwillson>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -11,7 +11,7 @@
     Sequence,
 )
 from functools import partial
-from io import BytesIO
+from io import IOBase
 from itertools import starmap
 from numbers import Number
 from typing import (
@@ -31,6 +31,8 @@
 from xarray.backends.common import (
     AbstractDataStore,
     ArrayWriter,
+    BytesIOProxy,
+    T_PathFileOrDataStore,
     _find_absolute_paths,
     _normalize_path,
 )
@@ -503,7 +505,7 @@ def _datatree_from_backend_datatree(
 
 
 def open_dataset(
-    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
+    filename_or_obj: T_PathFileOrDataStore,
     *,
     engine: T_Engine = None,
     chunks: T_Chunks = None,
@@ -533,12 +535,13 @@ def open_dataset(
 
     Parameters
     ----------
-    filename_or_obj : str, Path, file-like or DataStore
+    filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore
         Strings and Path objects are interpreted as a path to a netCDF file
         or an OpenDAP URL and opened with python-netCDF4, unless the filename
         ends with .gz, in which case the file is gunzipped and opened with
-        scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
-        objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
+        scipy.io.netcdf (only netCDF3 supported). Bytes, memoryview and
+        file-like objects are opened by scipy.io.netcdf (netCDF3) or h5netcdf
+        (netCDF4).
     engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
         , installed backend \
         or subclass of xarray.backends.BackendEntrypoint, optional
@@ -743,7 +746,7 @@ def open_dataset(
 
 
 def open_dataarray(
-    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
+    filename_or_obj: T_PathFileOrDataStore,
     *,
     engine: T_Engine = None,
     chunks: T_Chunks = None,
@@ -774,12 +777,13 @@ def open_dataarray(
 
     Parameters
     ----------
-    filename_or_obj : str, Path, file-like or DataStore
+    filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore
         Strings and Path objects are interpreted as a path to a netCDF file
         or an OpenDAP URL and opened with python-netCDF4, unless the filename
         ends with .gz, in which case the file is gunzipped and opened with
-        scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
-        objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
+        scipy.io.netcdf (only netCDF3 supported). Bytes, memoryview and
+        file-like objects are opened by scipy.io.netcdf (netCDF3) or h5netcdf
+        (netCDF4).
     engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\
         , installed backend \
         or subclass of xarray.backends.BackendEntrypoint, optional
@@ -970,7 +974,7 @@ def open_dataarray(
 
 
 def open_datatree(
-    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
+    filename_or_obj: T_PathFileOrDataStore,
     *,
     engine: T_Engine = None,
     chunks: T_Chunks = None,
@@ -1001,8 +1005,10 @@ def open_datatree(
 
     Parameters
     ----------
-    filename_or_obj : str, Path, file-like, or DataStore
-        Strings and Path objects are interpreted as a path to a netCDF file or Zarr store.
+    filename_or_obj : str, Path, file-like, bytes or DataStore
+        Strings and Path objects are interpreted as a path to a netCDF file or
+        Zarr store. Bytes and memoryview objects are interpreted as file
+        contents.
     engine : {"netcdf4", "h5netcdf", "zarr", None}, \
              installed backend or xarray.backends.BackendEntrypoint, optional
         Engine to use when reading files. If not provided, the default engine
@@ -1208,7 +1214,7 @@ def open_datatree(
 
 
 def open_groups(
-    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
+    filename_or_obj: T_PathFileOrDataStore,
     *,
     engine: T_Engine = None,
     chunks: T_Chunks = None,
@@ -1243,8 +1249,10 @@ def open_groups(
 
     Parameters
     ----------
-    filename_or_obj : str, Path, file-like, or DataStore
-        Strings and Path objects are interpreted as a path to a netCDF file or Zarr store.
+    filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore
+        Strings and Path objects are interpreted as a path to a netCDF file or
+        Zarr store. Bytes and memoryview objects are interpreted as file
+        contents.
     engine : {"netcdf4", "h5netcdf", "zarr", None}, \
              installed backend or xarray.backends.BackendEntrypoint, optional
         Engine to use when reading files. If not provided, the default engine
@@ -1780,7 +1788,7 @@ def to_netcdf(
 ) -> tuple[ArrayWriter, AbstractDataStore]: ...
 
 
-# path=None writes to bytes
+# path=None writes to bytes or memoryview, depending on store
 @overload
 def to_netcdf(
     dataset: Dataset,
@@ -1795,7 +1803,7 @@ def to_netcdf(
     multifile: Literal[False] = False,
     invalid_netcdf: bool = False,
     auto_complex: bool | None = None,
-) -> bytes: ...
+) -> bytes | memoryview: ...
 
 
 # compute=False returns dask.Delayed
@@ -1821,7 +1829,7 @@ def to_netcdf(
 @overload
 def to_netcdf(
     dataset: Dataset,
-    path_or_file: str | os.PathLike,
+    path_or_file: str | os.PathLike | IOBase,
     mode: NetcdfWriteModes = "w",
     format: T_NetcdfTypes | None = None,
     group: str | None = None,
@@ -1877,7 +1885,7 @@ def to_netcdf(
 @overload
 def to_netcdf(
     dataset: Dataset,
-    path_or_file: str | os.PathLike | None,
+    path_or_file: str | os.PathLike | IOBase | None,
     mode: NetcdfWriteModes = "w",
     format: T_NetcdfTypes | None = None,
     group: str | None = None,
@@ -1888,12 +1896,12 @@ def to_netcdf(
     multifile: bool = False,
     invalid_netcdf: bool = False,
     auto_complex: bool | None = None,
-) -> tuple[ArrayWriter, AbstractDataStore] | bytes | Delayed | None: ...
+) -> tuple[ArrayWriter, AbstractDataStore] | bytes | memoryview | Delayed | None: ...
 
 
 def to_netcdf(
     dataset: Dataset,
-    path_or_file: str | os.PathLike | None = None,
+    path_or_file: str | os.PathLike | IOBase | None = None,
     mode: NetcdfWriteModes = "w",
     format: T_NetcdfTypes | None = None,
     group: str | None = None,
@@ -1904,7 +1912,7 @@ def to_netcdf(
     multifile: bool = False,
     invalid_netcdf: bool = False,
     auto_complex: bool | None = None,
-) -> tuple[ArrayWriter, AbstractDataStore] | bytes | Delayed | None:
+) -> tuple[ArrayWriter, AbstractDataStore] | bytes | memoryview | Delayed | None:
     """This function creates an appropriate datastore for writing a dataset to
     disk as a netCDF file
 
@@ -1918,26 +1926,27 @@ def to_netcdf(
     if encoding is None:
         encoding = {}
 
-    if path_or_file is None:
+    if isinstance(path_or_file, str):
+        if engine is None:
+            engine = _get_default_engine(path_or_file)
+        path_or_file = _normalize_path(path_or_file)
+    else:
+        # writing to bytes/memoryview or a file-like object
         if engine is None:
+            # TODO: only use 'scipy' if format is None or a netCDF3 format
             engine = "scipy"
-        elif engine != "scipy":
+        elif engine not in ("scipy", "h5netcdf"):
             raise ValueError(
-                "invalid engine for creating bytes with "
-                f"to_netcdf: {engine!r}. Only the default engine "
-                "or engine='scipy' is supported"
+                "invalid engine for creating bytes/memoryview or writing to a "
+                f"file-like object with to_netcdf: {engine!r}. Only "
+                "engine=None, engine='scipy' and engine='h5netcdf' is "
+                "supported."
             )
         if not compute:
             raise NotImplementedError(
                 "to_netcdf() with compute=False is not yet implemented when "
                 "returning bytes"
             )
-    elif isinstance(path_or_file, str):
-        if engine is None:
-            engine = _get_default_engine(path_or_file)
-        path_or_file = _normalize_path(path_or_file)
-    else:  # file-like object
-        engine = "scipy"
 
     # validate Dataset keys, DataArray names, and attr keys/values
     _validate_dataset_names(dataset)
@@ -1962,7 +1971,11 @@ def to_netcdf(
             f"is not currently supported with dask's {scheduler} scheduler"
         )
 
-    target = path_or_file if path_or_file is not None else BytesIO()
+    if path_or_file is None:
+        target = BytesIOProxy()
+    else:
+        target = path_or_file  # type: ignore[assignment]
+
     kwargs = dict(autoclose=True) if autoclose else {}
     if invalid_netcdf:
         if engine == "h5netcdf":
@@ -2002,17 +2015,19 @@ def to_netcdf(
 
         writes = writer.sync(compute=compute)
 
-        if isinstance(target, BytesIO):
-            store.sync()
-            return target.getvalue()
     finally:
         if not multifile and compute:  # type: ignore[redundant-expr]
             store.close()
 
+    if path_or_file is None:
+        assert isinstance(target, BytesIOProxy)  # created in this function
+        return target.getvalue_or_getbuffer()
+
     if not compute:
         import dask
 
         return dask.delayed(_finalize_store)(writes, store)
+
     return None
 
 

diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -4,9 +4,18 @@
 import os
 import time
 import traceback
-from collections.abc import Hashable, Iterable, Mapping, Sequence
+from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence
+from dataclasses import dataclass
 from glob import glob
-from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, Union, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    ClassVar,
+    Generic,
+    TypeVar,
+    Union,
+    overload,
+)
 
 import numpy as np
 import pandas as pd
@@ -188,6 +197,24 @@ def _normalize_path_list(
     return _normalize_path_list(paths)
 
 
+BytesOrMemory = TypeVar("BytesOrMemory", bytes, memoryview)
+
+
+@dataclass
+class BytesIOProxy(Generic[BytesOrMemory]):
+    """Proxy object for a write that returns either bytes or a memoryview."""
+
+    # TODO: remove this in favor of BytesIO when Dataset.to_netcdf() stops
+    # returning bytes from the scipy engine
+    getvalue: Callable[[], BytesOrMemory] | None = None
+
+    def getvalue_or_getbuffer(self) -> BytesOrMemory:
+        """Get the value of this write as bytes or memory."""
+        if self.getvalue is None:
+            raise ValueError("must set getvalue before fetching value")
+        return self.getvalue()
+
+
 def _open_remote_file(file, mode, storage_options=None):
     import fsspec
 
@@ -324,6 +351,11 @@ def __exit__(self, exception_type, exception_value, traceback):
         self.close()
 
 
+T_PathFileOrDataStore = (
+    str | os.PathLike[Any] | ReadBuffer | bytes | memoryview | AbstractDataStore
+)
+
+
 class ArrayWriter:
     __slots__ = ("lock", "regions", "sources", "targets")
 
@@ -705,7 +737,12 @@ def __repr__(self) -> str:
 
     def open_dataset(
         self,
-        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
+        filename_or_obj: str
+        | os.PathLike[Any]
+        | ReadBuffer
+        | bytes
+        | memoryview
+        | AbstractDataStore,
         *,
         drop_variables: str | Iterable[str] | None = None,
     ) -> Dataset:
@@ -717,7 +754,12 @@ def open_dataset(
 
     def guess_can_open(
         self,
-        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
+        filename_or_obj: str
+        | os.PathLike[Any]
+        | ReadBuffer
+        | bytes
+        | memoryview
+        | AbstractDataStore,
     ) -> bool:
         """
         Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`.
@@ -727,7 +769,12 @@ def guess_can_open(
 
     def open_datatree(
         self,
-        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
+        filename_or_obj: str
+        | os.PathLike[Any]
+        | ReadBuffer
+        | bytes
+        | memoryview
+        | AbstractDataStore,
         *,
         drop_variables: str | Iterable[str] | None = None,
     ) -> DataTree:
@@ -739,7 +786,12 @@ def open_datatree(
 
     def open_groups_as_dict(
         self,
-        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
+        filename_or_obj: str
+        | os.PathLike[Any]
+        | ReadBuffer
+        | bytes
+        | memoryview
+        | AbstractDataStore,
         *,
         drop_variables: str | Iterable[str] | None = None,
     ) -> dict[str, Dataset]:

diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py
@@ -339,8 +339,11 @@ def __hash__(self):
 class DummyFileManager(FileManager):
     """FileManager that simply wraps an open file in the FileManager interface."""
 
-    def __init__(self, value):
+    def __init__(self, value, *, close=None):
+        if close is None:
+            close = value.close
         self._value = value
+        self._close = close
 
     def acquire(self, needs_lock=True):
         del needs_lock  # ignored
@@ -353,4 +356,4 @@ def acquire_context(self, needs_lock=True):
 
     def close(self, needs_lock=True):
         del needs_lock  # ignored
-        self._value.close()
+        self._close()