Skip to content

Commit 257b200

Browse files
committed
Adopt libzim changes around cache management
1 parent 63b94fa commit 257b200

File tree

6 files changed

+64
-113
lines changed

6 files changed

+64
-113
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
- libzim 9.4.0 Cache Control API
11+
- remove `Archive.dirent_lookup_cache_max_size`, does not exists anymore in libzim
12+
- move `Archive.cluster_cache_max_size` and `Archive.cluster_cache_current_size` to methods outside of `Archive` object: `get_cluster_cache_max_size`, `set_cluster_cache_max_size`, `get_cluster_cache_current_size`
13+
- deprecate usage of `get_illustration_sizes()`
14+
15+
1016
## [3.7.0] - 2025-04-18
1117

1218
### Added

libzim/libwrapper.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,14 +172,9 @@ class Archive : public Wrapper<zim::Archive>
172172
FORWARD(bool, hasTitleIndex)
173173
FORWARD(bool, hasChecksum)
174174
FORWARD(bool, check)
175-
FORWARD(zim::size_type, getClusterCacheMaxSize)
176-
FORWARD(zim::size_type, getClusterCacheCurrentSize)
177-
FORWARD(void, setClusterCacheMaxSize)
178175
FORWARD(zim::size_type, getDirentCacheMaxSize)
179176
FORWARD(zim::size_type, getDirentCacheCurrentSize)
180177
FORWARD(void, setDirentCacheMaxSize)
181-
FORWARD(zim::size_type, getDirentLookupCacheMaxSize)
182-
FORWARD(void, setDirentLookupCacheMaxSize)
183178
};
184179

185180
class SearchResultSet : public Wrapper<zim::SearchResultSet>

libzim/libzim.pyx

Lines changed: 29 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,37 +1343,6 @@ cdef class Archive:
13431343
except RuntimeError as e:
13441344
raise KeyError(str(e))
13451345
1346-
@property
1347-
def cluster_cache_max_size(self) -> pyint:
1348-
"""Maximum size of the cluster cache.
1349-
1350-
Returns:
1351-
(int): maximum number of clusters stored in the cache.
1352-
"""
1353-
return self.c_archive.getClusterCacheMaxSize()
1354-
1355-
@cluster_cache_max_size.setter
1356-
def cluster_cache_max_size(self, nb_clusters: pyint):
1357-
"""Set the size of the cluster cache.
1358-
1359-
If the new size is lower than the number of currently stored clusters
1360-
some clusters will be dropped from cache to respect the new size.
1361-
1362-
Args:
1363-
nb_clusters (int): maximum number of clusters stored in the cache
1364-
"""
1365-
1366-
self.c_archive.setClusterCacheMaxSize(nb_clusters)
1367-
1368-
@property
1369-
def cluster_cache_current_size(self) -> pyint:
1370-
"""Size of the cluster cache.
1371-
1372-
Returns:
1373-
(int): number of clusters currently stored in the cache.
1374-
"""
1375-
return self.c_archive.getClusterCacheCurrentSize()
1376-
13771346
@property
13781347
def dirent_cache_max_size(self) -> pyint:
13791348
"""Maximum size of the dirent cache.
@@ -1404,36 +1373,38 @@ cdef class Archive:
14041373
"""
14051374
return self.c_archive.getDirentCacheCurrentSize()
14061375
1407-
@property
1408-
def dirent_lookup_cache_max_size(self) -> pyint:
1409-
"""Size of the dirent lookup cache.
1376+
def __repr__(self) -> str:
1377+
return f"{self.__class__.__name__}(filename={self.filename})"
14101378
1411-
The returned size returns the default size or the last set size.
1412-
This may not correspond to the actual size of the dirent lookup cache.
1413-
See set_dirent_lookup_cache_max_size for more information.
14141379
1415-
Returns:
1416-
(int): maximum number of sub ranges created in the lookup cache.
1417-
"""
1418-
return self.c_archive.getDirentLookupCacheMaxSize()
1380+
def get_cluster_cache_max_size() -> pyint:
1381+
"""Get the maximum size of the cluster cache.
14191382

1420-
@dirent_lookup_cache_max_size.setter
1421-
def dirent_lookup_cache_max_size(self, nb_ranges: pyint):
1422-
"""Set the size of the dirent lookup cache.
1383+
Returns:
1384+
(int): the maximum memory size used by the cluster cache (in bytes).
1385+
"""
1386+
return zim.getClusterCacheMaxSize()
14231387
1424-
Contrary to other set_<foo>_cache_max_size, this method is useless
1425-
once the lookup cache is created.
1426-
The lookup cache is created at first access to a entry in the archive.
1427-
So this method must be called before any access to content (including metadata).
1428-
It is best to call this method first, just after the archive creation.
1388+
def set_cluster_cache_max_size(size_in_bytes: pyint):
1389+
"""Set the size of the cluster cache.
14291390

1430-
Args:
1431-
nb_ranges (int): maximum number of sub ranges created in the lookup cache.
1432-
"""
1433-
self.c_archive.setDirentLookupCacheMaxSize(nb_ranges)
1391+
If the new size is lower than the number of currently stored clusters
1392+
some clusters will be dropped from cache to respect the new size.
1393+
1394+
Args:
1395+
size_in_bytes (int): the memory limit (in bytes) for the cluster cache.
1396+
"""
1397+
1398+
zim.setClusterCacheMaxSize(nb_clusters)
1399+
1400+
def get_cluster_cache_current_size() -> pyint:
1401+
"""Get the current size of the cluster cache.
1402+
1403+
Returns:
1404+
(int): the current memory size (in bytes) used by the cluster cache.
1405+
"""
1406+
return zim.getClusterCacheCurrentSize()
14341407
1435-
def __repr__(self) -> str:
1436-
return f"{self.__class__.__name__}(filename={self.filename})"
14371408
14381409
reader_module_doc = """libzim reader module
14391410

@@ -1453,6 +1424,9 @@ reader_public_objects = [
14531424
Archive,
14541425
Entry,
14551426
Item,
1427+
get_cluster_cache_max_size,
1428+
set_cluster_cache_max_size,
1429+
get_cluster_cache_current_size,
14561430
]
14571431
reader = create_module(reader_module_name, reader_module_doc, reader_public_objects)
14581432

libzim/reader.pyi

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,19 +78,13 @@ class Archive:
7878
def has_illustration(self, size: int | None = None) -> bool: ...
7979
def get_illustration_item(self, size: int | None = None) -> Item: ...
8080
@property
81-
def cluster_cache_max_size(self) -> int: ...
82-
@cluster_cache_max_size.setter
83-
def cluster_cache_max_size(self, nb_clusters: int): ...
84-
@property
85-
def cluster_cache_current_size(self) -> int: ...
86-
@property
8781
def dirent_cache_max_size(self) -> int: ...
8882
@dirent_cache_max_size.setter
8983
def dirent_cache_max_size(self, nb_dirents: int): ...
9084
@property
9185
def dirent_cache_current_size(self) -> int: ...
92-
@property
93-
def dirent_lookup_cache_max_size(self) -> int: ...
94-
@dirent_lookup_cache_max_size.setter
95-
def dirent_lookup_cache_max_size(self, nb_ranges: int): ...
9686
def __repr__(self) -> str: ...
87+
88+
def get_cluster_cache_max_size() -> int: ...
89+
def set_cluster_cache_max_size(nb_clusters: int): ...
90+
def get_cluster_cache_current_size() -> int: ...

libzim/zim.pxd

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,14 +178,9 @@ cdef extern from "libwrapper.h" namespace "wrapper":
178178
bool hasChecksum() except +
179179
bool check() except +
180180

181-
uint64_t getClusterCacheMaxSize() except +
182-
uint64_t getClusterCacheCurrentSize() except +
183-
void setClusterCacheMaxSize(uint64_t nbClusters) except +
184181
uint64_t getDirentCacheMaxSize() except +
185182
uint64_t getDirentCacheCurrentSize() except +
186183
void setDirentCacheMaxSize(uint64_t nbDirents) except +
187-
uint64_t getDirentLookupCacheMaxSize() except +
188-
void setDirentLookupCacheMaxSize(uint64_t nbRanges) except +
189184

190185
cdef cppclass Searcher:
191186
Searcher()
@@ -233,3 +228,8 @@ cdef extern from "libwrapper.h" namespace "wrapper":
233228

234229
cdef extern from "zim/version.h" namespace "zim":
235230
cdef vector[pair[string, string]] getVersions()
231+
232+
cdef extern from "zim/archive.h" namespace "zim":
233+
cdef uint64_t getClusterCacheMaxSize() except +
234+
cdef uint64_t getClusterCacheCurrentSize() except +
235+
cdef void setClusterCacheMaxSize(uint64_t sizeInB) except +

tests/test_libzim_reader.py

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
import pytest
1010

1111
import libzim.writer # pyright: ignore [reportMissingModuleSource]
12-
from libzim.reader import Archive, Entry # pyright: ignore [reportMissingModuleSource]
12+
from libzim.reader import ( # pyright: ignore [reportMissingModuleSource]
13+
Archive,
14+
Entry,
15+
get_cluster_cache_current_size,
16+
get_cluster_cache_max_size,
17+
set_cluster_cache_max_size,
18+
)
1319
from libzim.search import Query, Searcher # pyright: ignore [reportMissingModuleSource]
1420
from libzim.suggestion import ( # pyright: ignore [reportMissingModuleSource]
1521
SuggestionSearcher,
@@ -621,28 +627,25 @@ def test_reader_get_random_entry(all_zims):
621627
@pytest.mark.parametrize(*parametrize_for(["filename"]))
622628
def test_cluster_cache(all_zims, filename):
623629
zim = Archive(all_zims / filename)
624-
default_value = 16
625-
new_value = 1
626-
empty_value = 0
630+
default_value = 536870912 # 512M
631+
new_value = 1024
627632

628-
assert zim.cluster_cache_max_size == default_value
633+
assert get_cluster_cache_max_size() == default_value
629634

630-
zim.cluster_cache_max_size = new_value
631-
assert zim.cluster_cache_max_size == new_value
635+
# modify cluster cache max size
636+
set_cluster_cache_max_size(new_value)
637+
assert get_cluster_cache_max_size() == new_value
632638

633639
# test index access
634640
for index in range(0, zim.entry_count - 1):
635641
bytes(zim._get_entry_by_id(index).get_item().content)
636642

637-
assert zim.cluster_cache_current_size <= new_value
638-
639-
zim.cluster_cache_max_size = empty_value
640-
assert zim.cluster_cache_max_size == empty_value
641-
642-
for index in range(0, zim.entry_count - 1):
643-
bytes(zim._get_entry_by_id(index).get_item().content)
643+
# check current size is not too big (not really relevant since cache keeps at least
644+
# one cluster in memory, so this value depends on maximum cluster size
645+
assert get_cluster_cache_current_size() <= new_value
644646

645-
assert zim.cluster_cache_current_size == empty_value
647+
# restore default value for next tests
648+
set_cluster_cache_max_size(default_value)
646649

647650

648651
@skip_if_offline
@@ -671,26 +674,5 @@ def test_dirent_cache(all_zims, filename):
671674
for index in range(0, zim.entry_count - 1):
672675
bytes(zim._get_entry_by_id(index).get_item().content)
673676

674-
assert zim.dirent_cache_current_size == empty_value
675-
676-
677-
@skip_if_offline
678-
@pytest.mark.parametrize(*parametrize_for(["filename"]))
679-
def test_dirent_lookup_cache(all_zims, filename):
680-
zim = Archive(all_zims / filename)
681-
default_value = 1024
682-
new_value = 2
683-
empty_value = 0
684-
685-
assert zim.dirent_lookup_cache_max_size == default_value
686-
687-
zim.dirent_lookup_cache_max_size = new_value
688-
assert zim.dirent_lookup_cache_max_size == new_value
689-
690-
# test index access
691-
for index in range(0, zim.entry_count - 1):
692-
bytes(zim._get_entry_by_id(index).get_item().content)
693-
694-
# setting after reading records the value but it has no use
695-
zim.dirent_lookup_cache_max_size = empty_value
696-
assert zim.dirent_lookup_cache_max_size == empty_value
677+
# always at least one entry is kept in cache unless ZIM is empty
678+
assert zim.dirent_cache_current_size == (1 if zim.entry_count else 0)

0 commit comments

Comments
 (0)