-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Support for DataTree.to_netcdf to write to a file-like object or bytes #10571
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
7ba5f85
7b2d8aa
0176558
c4e2b9a
21b1618
d778fdf
6317631
8fe46c0
754737d
bf2c750
23d5147
54427fe
ceef536
07a3708
4ea9d06
53739ff
07bbe18
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
Sequence, | ||
) | ||
from functools import partial | ||
from io import BytesIO | ||
from io import IOBase | ||
from itertools import starmap | ||
from numbers import Number | ||
from typing import ( | ||
|
@@ -31,6 +31,7 @@ | |
from xarray.backends.common import ( | ||
AbstractDataStore, | ||
ArrayWriter, | ||
BytesIOProxy, | ||
_find_absolute_paths, | ||
_normalize_path, | ||
) | ||
|
@@ -503,7 +504,12 @@ def _datatree_from_backend_datatree( | |
|
||
|
||
def open_dataset( | ||
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, | ||
filename_or_obj: str | ||
| os.PathLike[Any] | ||
| ReadBuffer | ||
| bytes | ||
| memoryview | ||
| AbstractDataStore, | ||
*, | ||
engine: T_Engine = None, | ||
chunks: T_Chunks = None, | ||
|
@@ -533,12 +539,13 @@ def open_dataset( | |
|
||
Parameters | ||
---------- | ||
filename_or_obj : str, Path, file-like or DataStore | ||
filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore | ||
Strings and Path objects are interpreted as a path to a netCDF file | ||
or an OpenDAP URL and opened with python-netCDF4, unless the filename | ||
ends with .gz, in which case the file is gunzipped and opened with | ||
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like | ||
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). | ||
scipy.io.netcdf (only netCDF3 supported). Bytes, memoryview and | ||
file-like objects are opened by scipy.io.netcdf (netCDF3) or h5netcdf | ||
(netCDF4). | ||
Comment on lines
+538
to
+544
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if the explicit mention of netCDF file here (and in the other There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is a good consideration for updating later. |
||
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\ | ||
, installed backend \ | ||
or subclass of xarray.backends.BackendEntrypoint, optional | ||
|
@@ -743,7 +750,12 @@ def open_dataset( | |
|
||
|
||
def open_dataarray( | ||
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, | ||
filename_or_obj: str | ||
| os.PathLike[Any] | ||
| ReadBuffer | ||
| bytes | ||
| memoryview | ||
| AbstractDataStore, | ||
*, | ||
engine: T_Engine = None, | ||
chunks: T_Chunks = None, | ||
|
@@ -774,12 +786,13 @@ def open_dataarray( | |
|
||
Parameters | ||
---------- | ||
filename_or_obj : str, Path, file-like or DataStore | ||
filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore | ||
Strings and Path objects are interpreted as a path to a netCDF file | ||
or an OpenDAP URL and opened with python-netCDF4, unless the filename | ||
ends with .gz, in which case the file is gunzipped and opened with | ||
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like | ||
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). | ||
scipy.io.netcdf (only netCDF3 supported). Bytes, memoryview and | ||
file-like objects are opened by scipy.io.netcdf (netCDF3) or h5netcdf | ||
(netCDF4). | ||
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\ | ||
, installed backend \ | ||
or subclass of xarray.backends.BackendEntrypoint, optional | ||
|
@@ -970,7 +983,12 @@ def open_dataarray( | |
|
||
|
||
def open_datatree( | ||
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, | ||
filename_or_obj: str | ||
| os.PathLike[Any] | ||
| ReadBuffer | ||
| bytes | ||
| memoryview | ||
| AbstractDataStore, | ||
*, | ||
engine: T_Engine = None, | ||
chunks: T_Chunks = None, | ||
|
@@ -1001,8 +1019,10 @@ def open_datatree( | |
|
||
Parameters | ||
---------- | ||
filename_or_obj : str, Path, file-like, or DataStore | ||
Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. | ||
filename_or_obj : str, Path, file-like, bytes or DataStore | ||
Strings and Path objects are interpreted as a path to a netCDF file or | ||
Zarr store. Bytes and memoryview objects are interpreted as file | ||
contents. | ||
engine : {"netcdf4", "h5netcdf", "zarr", None}, \ | ||
installed backend or xarray.backends.BackendEntrypoint, optional | ||
Engine to use when reading files. If not provided, the default engine | ||
|
@@ -1208,7 +1228,12 @@ def open_datatree( | |
|
||
|
||
def open_groups( | ||
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, | ||
filename_or_obj: str | ||
| os.PathLike[Any] | ||
| ReadBuffer | ||
| bytes | ||
| memoryview | ||
| AbstractDataStore, | ||
*, | ||
engine: T_Engine = None, | ||
chunks: T_Chunks = None, | ||
|
@@ -1243,8 +1268,10 @@ def open_groups( | |
|
||
Parameters | ||
---------- | ||
filename_or_obj : str, Path, file-like, or DataStore | ||
Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. | ||
filename_or_obj : str, Path, file-like, bytes, memoryview or DataStore | ||
Strings and Path objects are interpreted as a path to a netCDF file or | ||
Zarr store. Bytes and memoryview objects are interpreted as file | ||
contents. | ||
engine : {"netcdf4", "h5netcdf", "zarr", None}, \ | ||
installed backend or xarray.backends.BackendEntrypoint, optional | ||
Engine to use when reading files. If not provided, the default engine | ||
|
@@ -1780,7 +1807,7 @@ def to_netcdf( | |
) -> tuple[ArrayWriter, AbstractDataStore]: ... | ||
|
||
|
||
# path=None writes to bytes | ||
# path=None writes to bytes or memoryview, depending on store | ||
@overload | ||
def to_netcdf( | ||
dataset: Dataset, | ||
|
@@ -1795,7 +1822,7 @@ def to_netcdf( | |
multifile: Literal[False] = False, | ||
invalid_netcdf: bool = False, | ||
auto_complex: bool | None = None, | ||
) -> bytes: ... | ||
) -> bytes | memoryview: ... | ||
|
||
|
||
# compute=False returns dask.Delayed | ||
|
@@ -1821,7 +1848,7 @@ def to_netcdf( | |
@overload | ||
def to_netcdf( | ||
dataset: Dataset, | ||
path_or_file: str | os.PathLike, | ||
path_or_file: str | os.PathLike | IOBase, | ||
mode: NetcdfWriteModes = "w", | ||
format: T_NetcdfTypes | None = None, | ||
group: str | None = None, | ||
|
@@ -1877,7 +1904,7 @@ def to_netcdf( | |
@overload | ||
def to_netcdf( | ||
dataset: Dataset, | ||
path_or_file: str | os.PathLike | None, | ||
path_or_file: str | os.PathLike | IOBase | None, | ||
mode: NetcdfWriteModes = "w", | ||
format: T_NetcdfTypes | None = None, | ||
group: str | None = None, | ||
|
@@ -1888,12 +1915,12 @@ def to_netcdf( | |
multifile: bool = False, | ||
invalid_netcdf: bool = False, | ||
auto_complex: bool | None = None, | ||
) -> tuple[ArrayWriter, AbstractDataStore] | bytes | Delayed | None: ... | ||
) -> tuple[ArrayWriter, AbstractDataStore] | bytes | memoryview | Delayed | None: ... | ||
|
||
|
||
def to_netcdf( | ||
dataset: Dataset, | ||
path_or_file: str | os.PathLike | None = None, | ||
path_or_file: str | os.PathLike | IOBase | None = None, | ||
mode: NetcdfWriteModes = "w", | ||
format: T_NetcdfTypes | None = None, | ||
group: str | None = None, | ||
|
@@ -1904,7 +1931,7 @@ def to_netcdf( | |
multifile: bool = False, | ||
invalid_netcdf: bool = False, | ||
auto_complex: bool | None = None, | ||
) -> tuple[ArrayWriter, AbstractDataStore] | bytes | Delayed | None: | ||
) -> tuple[ArrayWriter, AbstractDataStore] | bytes | memoryview | Delayed | None: | ||
"""This function creates an appropriate datastore for writing a dataset to | ||
disk as a netCDF file | ||
|
||
|
@@ -1918,26 +1945,27 @@ def to_netcdf( | |
if encoding is None: | ||
encoding = {} | ||
|
||
if path_or_file is None: | ||
if isinstance(path_or_file, str): | ||
if engine is None: | ||
engine = _get_default_engine(path_or_file) | ||
path_or_file = _normalize_path(path_or_file) | ||
else: | ||
# writing to bytes/memoryview or a file-like object | ||
if engine is None: | ||
# TODO: only use 'scipy' if format is None or a netCDF3 format | ||
engine = "scipy" | ||
elif engine != "scipy": | ||
elif engine not in ("scipy", "h5netcdf"): | ||
raise ValueError( | ||
"invalid engine for creating bytes with " | ||
f"to_netcdf: {engine!r}. Only the default engine " | ||
"or engine='scipy' is supported" | ||
"invalid engine for creating bytes/memoryview or writing to a " | ||
f"file-like object with to_netcdf: {engine!r}. Only " | ||
"engine=None, engine='scipy' and engine='h5netcdf' is " | ||
"supported." | ||
) | ||
if not compute: | ||
raise NotImplementedError( | ||
"to_netcdf() with compute=False is not yet implemented when " | ||
"returning bytes" | ||
) | ||
elif isinstance(path_or_file, str): | ||
if engine is None: | ||
engine = _get_default_engine(path_or_file) | ||
path_or_file = _normalize_path(path_or_file) | ||
else: # file-like object | ||
engine = "scipy" | ||
|
||
# validate Dataset keys, DataArray names, and attr keys/values | ||
_validate_dataset_names(dataset) | ||
|
@@ -1962,7 +1990,11 @@ def to_netcdf( | |
f"is not currently supported with dask's {scheduler} scheduler" | ||
) | ||
|
||
target = path_or_file if path_or_file is not None else BytesIO() | ||
if path_or_file is None: | ||
target = BytesIOProxy() | ||
else: | ||
target = path_or_file # type: ignore[assignment] | ||
|
||
kwargs = dict(autoclose=True) if autoclose else {} | ||
if invalid_netcdf: | ||
if engine == "h5netcdf": | ||
|
@@ -2002,17 +2034,19 @@ def to_netcdf( | |
|
||
writes = writer.sync(compute=compute) | ||
|
||
if isinstance(target, BytesIO): | ||
store.sync() | ||
return target.getvalue() | ||
finally: | ||
if not multifile and compute: # type: ignore[redundant-expr] | ||
store.close() | ||
|
||
if path_or_file is None: | ||
assert isinstance(target, BytesIOProxy) # created in this function | ||
return target.getvalue_or_getbuffer() | ||
|
||
if not compute: | ||
import dask | ||
|
||
return dask.delayed(_finalize_store)(writes, store) | ||
|
||
return None | ||
|
||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.