pydata · charles-turner-1 · Jul 13, 2025 · Jul 13, 2025 · Jul 13, 2025 · Jul 13, 2025
diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py
@@ -5,17 +5,20 @@
 
 import numpy as np
 
+from xarray.core.common import _contains_cftime_datetimes
 from xarray.core.indexing import ImplicitToExplicitIndexingAdapter
 from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray
 from xarray.namedarray.utils import is_duck_dask_array, module_available
 
 if TYPE_CHECKING:
     from xarray.namedarray._typing import (
         T_Chunks,
+        _Chunks,
         _DType_co,
         _NormalizedChunks,
         duckarray,
     )
+    from xarray.namedarray.parallelcompat import ChunkedArrayMixinProtocol
 
     try:
         from dask.array import Array as DaskArray
@@ -264,3 +267,57 @@ def shuffle(
         if chunks != "auto":
             raise NotImplementedError("Only chunks='auto' is supported at present.")
         return dask.array.shuffle(x, indexer, axis, chunks="auto")
+
+    def rechunk(
+        self,
+        data: ChunkedArrayMixinProtocol,
+        chunks: _NormalizedChunks | tuple[int, ...] | _Chunks,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Changes the chunking pattern of the given array.
+
+        Called when the .chunk method is called on an xarray object that is already chunked.
+
+        Parameters
+        ----------
+        data : dask array
+            Array to be rechunked.
+        chunks :  int, tuple, dict or str, optional
+            The new block dimensions to create. -1 indicates the full size of the
+            corresponding dimension. Default is "auto" which automatically
+            determines chunk sizes.
+
+        Returns
+        -------
+        chunked array
+
+        See Also
+        --------
+        dask.array.Array.rechunk
+        cubed.Array.rechunk
+        """
+
+        if _contains_cftime_datetimes(data):
+            from dask.array.core import normalize_chunks
+
+            from xarray.namedarray.utils import fake_target_chunksize
+
+            limit = self.get_auto_chunk_size()
+
+            limit, var_dtype = fake_target_chunksize(data, target_chunksize=limit)
+
+            chunks = normalize_chunks(
+                chunks,
+                shape=data.shape,  # type: ignore[attr-defined]
+                dtype=var_dtype,
+                limit=limit,
+            )  # type: ignore[no-untyped-call]
+
+        return data.rechunk(chunks, **kwargs)
+
+    def get_auto_chunk_size(self) -> int:
+        from dask import config as dask_config
+        from dask.utils import parse_bytes
+
+        return parse_bytes(dask_config.get("array.chunk-size"))
diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py
@@ -746,3 +746,27 @@ def store(
         cubed.store
         """
         raise NotImplementedError()
+
+    def get_auto_chunk_size(
+        self,
+    ) -> int:
+        """
+        Get the default chunk size for a variable.
+
+        This is used to determine the chunk size when opening a dataset with
+        ``chunks="auto"`` or when rechunking an array with ``chunks="auto"``.
+
+        Parameters
+        ----------
+        target_chunksize : int, optional
+            The target chunk size in bytes. If not provided, a default value is used.
+
+        Returns
+        -------
+        chunk_size : int
+            The chunk size in bytes.
+        """
+
+        raise NotImplementedError(
+            "get_auto_chunk_size must be implemented by the chunk manager."
+        )
diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import importlib
+import sys
 import warnings
 from collections.abc import Hashable, Iterable, Iterator, Mapping
 from functools import lru_cache
@@ -23,7 +24,9 @@
         DaskArray = NDArray  # type: ignore[assignment, misc]
         DaskCollection: Any = NDArray  # type: ignore[no-redef]
 
+    from xarray.core.variable import Variable
     from xarray.namedarray._typing import _Dim, duckarray
+    from xarray.namedarray.parallelcompat import T_ChunkedArray
 
 
 K = TypeVar("K")
@@ -195,6 +198,31 @@ def either_dict_or_kwargs(
     return pos_kwargs
 
 
+def fake_target_chunksize(
+    data: Variable | T_ChunkedArray,
+    target_chunksize: int,
+) -> tuple[int, np.dtype[Any]]:
+    """
+    Naughty trick - let's get the ratio of our cftime_nbytes, and then compute
-    Naughty trick - let's get the ratio of our cftime_nbytes, and then compute
+    The `normalize_chunks` algorithm takes a size `limit` in bytes, but will not work for object dtypes.
+    So we rescale the `limit` to an appropriate one based on `float64` dtype, and pass that to `normalize_chunks`.
+    
+    Naughty trick - let's get the ratio of our cftime_nbytes, and then compute
-    Naughty trick - let's get the ratio of our cftime_nbytes, and then compute
+    The `normalize_chunks` algorithm takes a size `limit` in bytes, but will not work for object dtypes.
+    So we rescale the `limit` to an appropriate one based on `float64` dtype, and pass that to `normalize_chunks`.
+    
+    Naughty trick - let's get the ratio of our cftime_nbytes, and then compute
+    the ratio of that size to a np.float64. Then we can just adjust our target_chunksize
+    and use the default dask chunking algorithm to get a reasonable chunk size.
+    """
+    from xarray.core.formatting import first_n_items
+
+    output_dtype = np.dtype(np.float64)
+
+    if data.dtype == object:
+        nbytes_approx: int = sys.getsizeof(first_n_items(data, 1))  # type: ignore[no-untyped-call]
+    else:
+        nbytes_approx = data.dtype.itemsize
-    if data.dtype == object:
-        nbytes_approx: int = sys.getsizeof(first_n_items(data, 1))  # type: ignore[no-untyped-call]
-    else:
-        nbytes_approx = data.dtype.itemsize
+    if data.dtype != object:
+        return limit, var.dtype
+        
+    nbytes_approx: int = sys.getsizeof(first_n_items(data, 1))  # type: ignore[no-untyped-call]
-    if data.dtype == object:
-        nbytes_approx: int = sys.getsizeof(first_n_items(data, 1))  # type: ignore[no-untyped-call]
-    else:
-        nbytes_approx = data.dtype.itemsize
+    if data.dtype != object:
+        return limit, var.dtype
+        
+    nbytes_approx: int = sys.getsizeof(first_n_items(data, 1))  # type: ignore[no-untyped-call]
+
+    f64_nbytes = output_dtype.itemsize
+
+    target_chunksize = int(target_chunksize * (f64_nbytes / nbytes_approx))
+
+    return target_chunksize, output_dtype
+
+
 class ReprObject:
     """Object that prints as the given value, for use with sentinel values."""
 

diff --git a/xarray/structure/chunks.py b/xarray/structure/chunks.py
@@ -18,11 +18,13 @@
     get_chunked_array_type,
     guess_chunkmanager,
 )
+from xarray.namedarray.utils import fake_target_chunksize
 
 if TYPE_CHECKING:
     from xarray.core.dataarray import DataArray
     from xarray.core.dataset import Dataset
     from xarray.core.types import T_ChunkDim
+    from xarray.core.variable import IndexVariable, Variable
 
     MissingCoreDimOptions = Literal["raise", "copy", "drop"]
 
@@ -83,8 +85,15 @@ def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint):
         for dim, preferred_chunk_sizes in zip(dims, preferred_chunk_shape, strict=True)
     )
 
+    limit = chunkmanager.get_auto_chunk_size()
+    limit, var_dtype = fake_target_chunksize(var, limit)
-    limit, var_dtype = fake_target_chunksize(var, limit)
+    # The `normalize_chunks` algorithm takes a size `limit` in bytes, but will not work for object dtypes.
+    # So we rescale the `limit` to an appropriate one based on `float64` dtype, and pass that to `normalize_chunks`.
+    limit, var_dtype = fake_target_chunksize(var, limit)
-    limit, var_dtype = fake_target_chunksize(var, limit)
+    # The `normalize_chunks` algorithm takes a size `limit` in bytes, but will not work for object dtypes.
+    # So we rescale the `limit` to an appropriate one based on `float64` dtype, and pass that to `normalize_chunks`.
+    limit, var_dtype = fake_target_chunksize(var, limit)
+
     chunk_shape = chunkmanager.normalize_chunks(
-        chunk_shape, shape=shape, dtype=var.dtype, previous_chunks=preferred_chunk_shape
+        chunk_shape,
+        shape=shape,
+        dtype=var_dtype,
+        limit=limit,
+        previous_chunks=preferred_chunk_shape,
     )
 
     # Warn where requested chunks break preferred chunks, provided that the variable

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -60,6 +60,7 @@
 from xarray.coding.variables import SerializationWarning
 from xarray.conventions import encode_dataset_coordinates
 from xarray.core import indexing
+from xarray.core.common import _contains_cftime_datetimes
 from xarray.core.indexes import PandasIndex
 from xarray.core.options import set_options
 from xarray.core.types import PDDatetimeUnitOptions
@@ -6061,6 +6062,32 @@ def test_open_multi_dataset(self) -> None:
             ) as actual:
                 assert_identical(expected, actual)
 
+    @requires_cftime
+    def test_open_dataset_cftime_autochunk(self) -> None:
+        """Create a dataset with cftime datetime objects and
+        ensure that auto-chunking works correctly."""
+        import cftime
+
+        original = xr.Dataset(
+            {
+                "foo": ("time", [0.0]),
+                "time_bnds": (
+                    ("time", "bnds"),
+                    [
+                        [
+                            cftime.Datetime360Day(2005, 12, 1, 0, 0, 0, 0),
+                            cftime.Datetime360Day(2005, 12, 2, 0, 0, 0, 0),
+                        ]
+                    ],
+                ),
+            },
+            {"time": [cftime.Datetime360Day(2005, 12, 1, 12, 0, 0, 0)]},
+        )
+        with self.roundtrip(original, open_kwargs={"chunks": "auto"}) as actual:
+            assert isinstance(actual.time_bnds.variable.data, da.Array)
+            assert _contains_cftime_datetimes(actual.time)
+            assert_identical(original, actual)
+
     # Flaky test. Very open to contributions on fixing this
     @pytest.mark.flaky
     def test_dask_roundtrip(self) -> None:

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -1072,6 +1072,29 @@ def make_da():
     return da
 
 
+def make_da_cftime():
+    yrs = np.arange(2000, 2120)
+    cftime_dates = xr.date_range(
+        start=f"{yrs[0]}-01-01",
+        end=f"{yrs[-1]}-12-31",
+        freq="1YE",
+        use_cftime=True,
+    )
+    yr_array = np.tile(cftime_dates.values, (10, 1))
+    da = xr.DataArray(
+        yr_array,
+        dims=["x", "t"],
+        coords={"x": np.arange(10), "t": cftime_dates},
+        name="a",
+    ).chunk({"x": 4, "t": 5})
+    da.x.attrs["long_name"] = "x"
+    da.attrs["test"] = "test"
+    da.coords["c2"] = 0.5
+    da.coords["ndcoord"] = da.x * 2
+
+    return da
+
+
 def make_ds():
     map_ds = xr.Dataset()
     map_ds["a"] = make_da()
@@ -1153,6 +1176,36 @@ def test_auto_chunk_da(obj):
     assert actual.chunks == expected.chunks
 
 
+def test_auto_chunk_da_cftime():
+    def make_da_cftime():
+        yrs = np.arange(2000, 2120)
+        cftime_dates = xr.date_range(
+            start=f"{yrs[0]}-01-01",
+            end=f"{yrs[-1]}-12-31",
+            freq="1YE",
+            use_cftime=True,
+        )
+        yr_array = np.tile(cftime_dates.values, (10, 1))
+        da = xr.DataArray(
+            yr_array,
+            dims=["x", "t"],
+            coords={"x": np.arange(10), "t": cftime_dates},
+            name="a",
+        ).chunk({"x": 4, "t": 5})
+        da.x.attrs["long_name"] = "x"
+        da.attrs["test"] = "test"
+        da.coords["c2"] = 0.5
+        da.coords["ndcoord"] = da.x * 2
+
+        return da
+
+    obj = make_da_cftime()
+    actual = obj.chunk("auto").data
+    expected = obj.data.rechunk({0: 10, 1: 120})
+    np.testing.assert_array_equal(actual, expected)
+    assert actual.chunks == expected.chunks
+
+
 def test_map_blocks_error(map_da, map_ds):
     def bad_func(darray):
         return (darray * darray.x + 5 * darray.y)[:1, :1]

diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py
@@ -18,6 +18,8 @@
     _ShapeType_co,
 )
 from xarray.namedarray.core import NamedArray, from_array
+from xarray.namedarray.utils import fake_target_chunksize
+from xarray.tests import requires_cftime
 
 if TYPE_CHECKING:
     from types import ModuleType
@@ -26,6 +28,7 @@
 
     from xarray.namedarray._typing import (
         Default,
+        DuckArray,
         _AttrsLike,
         _Dim,
         _DimsLike,
@@ -609,3 +612,55 @@ def test_repr() -> None:
 
     # Basic comparison:
     assert r == "<xarray.NamedArray (x: 1)> Size: 8B\narray([0], dtype=uint64)"
+
+
+@pytest.mark.parametrize(
+    "input_array, expected_chunksize_faked",
+    [
+        (np.arange(100).reshape(10, 10), 1024),
+        (np.arange(100).reshape(10, 10).astype(np.float32), 2048),
+    ],
+)
+def test_fake_target_chunksize(
+    input_array: DuckArray[Any], expected_chunksize_faked: int
+) -> None:
+    """
+    Check that `fake_target_chunksize` returns the expected chunksize and dtype.
+    - It pretends to dask we are chunking an array with an 8-byte dtype, ie. a float64.
+    As such, it will *double* the amount of memory a 4-byte dtype (like float32) would try to use,
+    fooling it into actually using the correct amount of memory. For object dtypes, which are
+    generally larger, it will reduce the effective dask configuration chunksize, reducing the size of
+    the arrays per chunk such that we get the same amount of memory used.
+    """
+    target_chunksize = 1024
+
+    faked_chunksize, dtype = fake_target_chunksize(input_array, target_chunksize)  # type: ignore[arg-type]
+
+    assert faked_chunksize == expected_chunksize_faked
+    assert dtype == np.float64
+
+
+@requires_cftime
+def test_fake_target_chunksize_cftime() -> None:
+    """
+    Check that `fake_target_chunksize` returns the expected chunksize and dtype.
+    - It pretends to dask we are chunking an array with an 8-byte dtype, ie. a float64.
+    - This is the same as the above test, but specifically for a CFTime array case - split for testing reasons
+    """
+    import cftime
+
+    target_chunksize = 1024
+
+    input_array = np.array(
+        [
+            cftime.Datetime360Day(2000, month, day, 0, 0, 0, 0)
+            for month in range(1, 11)
+            for day in range(1, 11)
+        ],
+        dtype=object,
+    ).reshape(10, 10)
+
+    faked_chunksize, dtype = fake_target_chunksize(input_array, target_chunksize)  # type: ignore[arg-type]
+
+    assert faked_chunksize == 73
+    assert dtype == np.float64