Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ doc = [
"sphinx_rtd_theme",
]
test = [
"packaging",
"numpy<2 ; python_version == '3.9'",
"blosc2>=2.5.1",
"blosc2-grok>=0.2.2",
Expand Down
135 changes: 118 additions & 17 deletions src/hdf5plugin/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import h5py
import numpy
from packaging.version import parse as parse_version

import hdf5plugin

Expand All @@ -58,6 +59,21 @@ def should_test(filter_name: str) -> bool:
)


compression_name_to_class = {
"blosc": hdf5plugin.Blosc,
"blosc2": hdf5plugin.Blosc2,
"bshuf": hdf5plugin.Bitshuffle,
"bzip2": hdf5plugin.BZip2,
"lz4": hdf5plugin.LZ4,
"fcidecomp": hdf5plugin.FciDecomp,
"sperr": hdf5plugin.Sperr,
"sz": hdf5plugin.SZ,
"sz3": hdf5plugin.SZ3,
"zfp": hdf5plugin.Zfp,
"zstd": hdf5plugin.Zstd,
}


class BaseTestHDF5PluginRW(unittest.TestCase):
"""Base class for testing write/read HDF5 dataset with the plugins"""

Expand Down Expand Up @@ -88,30 +104,15 @@ def _test(
"""
data = numpy.ones((self._data_natoms,), dtype=dtype).reshape(self._data_shape)
filename = os.path.join(self.tempdir, "test_" + filter_name + ".h5")

compression_class = {
"blosc": hdf5plugin.Blosc,
"blosc2": hdf5plugin.Blosc2,
"bshuf": hdf5plugin.Bitshuffle,
"bzip2": hdf5plugin.BZip2,
"lz4": hdf5plugin.LZ4,
"fcidecomp": hdf5plugin.FciDecomp,
"sperr": hdf5plugin.Sperr,
"sz": hdf5plugin.SZ,
"sz3": hdf5plugin.SZ3,
"zfp": hdf5plugin.Zfp,
"zstd": hdf5plugin.Zstd,
}[filter_name]
compression_class = compression_name_to_class[filter_name]

# Write
f = h5py.File(filename, "w")
if options is None:
options = {}
f.create_dataset(
"data",
data=data,
chunks=data.shape,
compression=compression_class(**options),
compression=compression_class(**(options or {})),
)
f.close()

Expand Down Expand Up @@ -399,6 +400,105 @@ def testZstd(self):
self._test("zstd", dtype=dtype, options=options)


class TestStrings(unittest.TestCase):
"""Test strings compression"""

@classmethod
def setUp(self):
self.tempdir = tempfile.TemporaryDirectory()
N = 100
self.string_arrays = [
# Note: h5py does not support dtype="U"
numpy.array(["test", "strings", "ascii"] * N, dtype="S"),
numpy.array([b"test", b"strings", b"binary"] * N, dtype="O"),
]
has_h5py_314 = parse_version(h5py.__version__) >= parse_version("3.14")
has_numpy_2 = parse_version(numpy.__version__) >= parse_version("2.0")
if has_h5py_314 and has_numpy_2:
self.string_arrays.append(
numpy.array(["test", "strings", "Crème brûlée"] * N, dtype="T")
)

@classmethod
def tearDown(self):
self.tempdir.cleanup()

def _test_strings(
self,
filter_name: str,
options: dict[str, Any] | None = None,
) -> None:
"""Test string compression for a particular filter

:param filter_name: The name of the filter to use
"""
filename = os.path.join(self.tempdir.name, f"{filter_name}.h5")
compression_class = compression_name_to_class[filter_name]

for data in self.string_arrays:
with self.subTest(name=data.dtype.kind):
ds_name = f"data{data.dtype.kind}"
# Write
with h5py.File(filename, "w") as f:
f.create_dataset(
ds_name,
data=data,
chunks=data.shape,
compression=compression_class(**(options or {})),
)

# Read
with h5py.File(filename, "r") as f:
if data.dtype.kind == "T":
# Use h5py accessor. Note that this is very different from
# f[ds_name][()].astype("T")
saved = f[ds_name].astype("T")[()]
else:
saved = f[ds_name][()]

plist = f[ds_name].id.get_create_plist()
filters = [plist.get_filter(i) for i in range(plist.get_nfilters())]

# Read chunk raw (compressed) data
chunk = f[ds_name].id.read_direct_chunk((0,))[1]

# Check if chunk is actually compressed
self.assertLess(len(chunk), data.nbytes)

self.assertTrue(numpy.array_equal(saved, data))
self.assertEqual(saved.dtype, data.dtype)

self.assertEqual(len(filters), 1)
self.assertEqual(filters[0][0], hdf5plugin.FILTERS[filter_name])

@unittest.skip(reason="segfault (#364)")
@unittest.skipUnless(should_test("blosc"), "Blosc filter not available")
def testStringsBlosc(self):
"""Strings write/read test with blosc filter plugin"""
self._test_strings("blosc") # Default options

@unittest.skip(reason="segfault (#364)")
@unittest.skipUnless(should_test("blosc2"), "Blosc filter not available")
def testStringsBlosc2(self):
"""Strings write/read test with blosc2 filter plugin"""
self._test_strings("blosc2")

@unittest.skipUnless(should_test("bzip2"), "BZip2 filter not available")
def testStringsBZip2(self):
"""Strings write/read test with BZip2 filter plugin"""
self._test_strings("bzip2")

@unittest.skipUnless(should_test("lz4"), "LZ4 filter not available")
def testStringsLZ4(self):
"""Strings write/read test with LZ4 filter plugin"""
self._test_strings("lz4")

@unittest.skipUnless(should_test("zstd"), "Zstd filter not available")
def testStringsZstd(self):
"""Strings write/read test with Zstd filter plugin"""
self._test_strings("zstd")


class TestPackage(unittest.TestCase):
"""Test general features of the hdf5plugin package"""

Expand Down Expand Up @@ -641,6 +741,7 @@ def suite() -> unittest.TestSuite:
test_suite = unittest.TestSuite()
for cls in (
TestHDF5PluginRW,
TestStrings,
TestPackage,
TestRegisterFilter,
TestGetFilters,
Expand Down
Loading