Skip to content

Commit eb08bd5

Browse files
committed
(feat): async full reading of sparse
1 parent 37ad8f4 commit eb08bd5

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

src/anndata/_core/sparse_dataset.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# - think about supporting the COO format
1313
from __future__ import annotations
1414

15+
import asyncio
1516
import warnings
1617
from abc import ABC
1718
from collections.abc import Iterable
@@ -661,6 +662,31 @@ def to_memory(self) -> CSMatrix | CSArray:
661662
mtx.indptr = self._indptr
662663
return mtx
663664

665+
async def to_memory_async(self) -> CSMatrix | CSArray:
666+
format_class = get_memory_class(
667+
self.format, use_sparray_in_io=settings.use_sparse_array_on_read
668+
)
669+
mtx = format_class(self.shape, dtype=self.dtype)
670+
mtx.indptr = self._indptr
671+
if isinstance(self._data, ZarrArray):
672+
await asyncio.gather(
673+
*(
674+
self.set_memory_async_from_zarr(mtx, attr)
675+
for attr in ["data", "indices"]
676+
)
677+
)
678+
else:
679+
mtx.data = self._data[...]
680+
mtx.indices = self._indices[...]
681+
return mtx
682+
683+
async def set_memory_async_from_zarr(
684+
self, mtx: CSMatrix | CSArray, attr: Literal["indptr", "data", "indices"]
685+
) -> None:
686+
setattr(
687+
mtx, attr, await getattr(self, f"_{attr}")._async_array.getitem(())
688+
) # TODO: better way to asyncify
689+
664690

665691
class _CSRDataset(BaseCompressedSparseDataset, abc.CSRDataset):
666692
"""Internal concrete version of :class:`anndata.abc.CSRDataset`."""

src/anndata/_io/specs/methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ def chunk_slice(start: int, stop: int) -> tuple[slice | None, slice | None]:
831831
@_REGISTRY.register_read(ZarrGroup, IOSpec("csc_matrix", "0.1.0"))
832832
@_REGISTRY.register_read(ZarrGroup, IOSpec("csr_matrix", "0.1.0"))
833833
async def read_sparse(elem: GroupStorageType, *, _reader: Reader) -> CSMatrix | CSArray:
834-
return sparse_dataset(elem).to_memory()
834+
return await sparse_dataset(elem).to_memory_async()
835835

836836

837837
#################

0 commit comments

Comments
 (0)