Skip to content

Commit 7b2d8aa

Browse files
committed
Add a test for Dataset.to_netcdf(engine='h5netcdf') and fix a bug where bytes were being returned before the h5py.File had been closed, which it appears is needed for it to finish writing a valid file. This required a further workaround to prevent the BytesIO being closed by the scipy backend when it is used in a similar way.
1 parent 7ba5f85 commit 7b2d8aa

File tree

2 files changed

+26
-4
lines changed

2 files changed

+26
-4
lines changed

xarray/backends/api.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1958,7 +1958,18 @@ def to_netcdf(
19581958
f"is not currently supported with dask's {scheduler} scheduler"
19591959
)
19601960

1961-
target = path_or_file if path_or_file is not None else BytesIO()
1961+
if path_or_file is None:
1962+
target = BytesIO()
1963+
# We can't get the BytesIO's value *before* closing the store, since
1964+
# the h5netcdf backend won't finish writing until its close is called.
1965+
# However if we try to get the BytesIO's value *after* closing the store,
1966+
# the scipy backend will close the BytesIO, preventing its value from
1967+
# being read. The solution is to prevent the BytesIO from being closed:
1968+
close_bytesio = target.close
1969+
target.close = lambda: None # type: ignore[method-assign]
1970+
else:
1971+
target = path_or_file # type: ignore[assignment]
1972+
19621973
kwargs = dict(autoclose=True) if autoclose else {}
19631974
if invalid_netcdf:
19641975
if engine == "h5netcdf":
@@ -1998,13 +2009,15 @@ def to_netcdf(
19982009

19992010
writes = writer.sync(compute=compute)
20002011

2001-
if isinstance(target, BytesIO):
2002-
store.sync()
2003-
return target.getvalue()
20042012
finally:
20052013
if not multifile and compute: # type: ignore[redundant-expr]
20062014
store.close()
20072015

2016+
if path_or_file is None:
2017+
value = target.getvalue()
2018+
close_bytesio()
2019+
return value
2020+
20082021
if not compute:
20092022
import dask
20102023

xarray/tests/test_backends.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4601,6 +4601,15 @@ def test_open_fileobj(self) -> None:
46014601
pass
46024602

46034603

4604+
@requires_h5netcdf
4605+
class TestH5NetCDFInMemoryData:
4606+
def test_roundtrip_via_bytes(self) -> None:
4607+
original = create_test_data()
4608+
netcdf_bytes = original.to_netcdf(engine="h5netcdf")
4609+
roundtrip = open_dataset(netcdf_bytes, engine="h5netcdf") # type: ignore[arg-type]
4610+
assert_identical(roundtrip, original)
4611+
4612+
46044613
@requires_h5netcdf
46054614
@requires_dask
46064615
@pytest.mark.filterwarnings("ignore:deallocating CachingFileManager")

0 commit comments

Comments
 (0)