Skip to content

Commit 75c4e87

Browse files
committed
Adopt libzim changes around cache management
1 parent 6a54e2f commit 75c4e87

File tree

6 files changed

+63
-113
lines changed

6 files changed

+63
-113
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1818
- Upgrade Github CI Actions
1919
- Run tests on minimum supported platforms + more recent stable ones
2020
- Fix various Cython warning and deprecation notices (#239)
21+
- libzim 9.4.0 Cache Control API
22+
- remove `Archive.dirent_lookup_cache_max_size`, does not exists anymore in libzim
23+
- move `Archive.cluster_cache_max_size` and `Archive.cluster_cache_current_size` to methods outside of `Archive` object: `get_cluster_cache_max_size`, `set_cluster_cache_max_size`, `get_cluster_cache_current_size` (size is now in bytes)
24+
- deprecate usage of `get_illustration_sizes()`
25+
2126

2227
## [3.7.0] - 2025-04-18
2328

libzim/libwrapper.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,14 +172,9 @@ class Archive : public Wrapper<zim::Archive>
172172
FORWARD(bool, hasTitleIndex)
173173
FORWARD(bool, hasChecksum)
174174
FORWARD(bool, check)
175-
FORWARD(zim::size_type, getClusterCacheMaxSize)
176-
FORWARD(zim::size_type, getClusterCacheCurrentSize)
177-
FORWARD(void, setClusterCacheMaxSize)
178175
FORWARD(zim::size_type, getDirentCacheMaxSize)
179176
FORWARD(zim::size_type, getDirentCacheCurrentSize)
180177
FORWARD(void, setDirentCacheMaxSize)
181-
FORWARD(zim::size_type, getDirentLookupCacheMaxSize)
182-
FORWARD(void, setDirentLookupCacheMaxSize)
183178
};
184179

185180
class SearchResultSet : public Wrapper<zim::SearchResultSet>

libzim/libzim.pyx

Lines changed: 29 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,37 +1342,6 @@ cdef class Archive:
13421342
except RuntimeError as e:
13431343
raise KeyError(str(e))
13441344
1345-
@property
1346-
def cluster_cache_max_size(self) -> pyint:
1347-
"""Maximum size of the cluster cache.
1348-
1349-
Returns:
1350-
(int): maximum number of clusters stored in the cache.
1351-
"""
1352-
return self.c_archive.getClusterCacheMaxSize()
1353-
1354-
@cluster_cache_max_size.setter
1355-
def cluster_cache_max_size(self, nb_clusters: pyint):
1356-
"""Set the size of the cluster cache.
1357-
1358-
If the new size is lower than the number of currently stored clusters
1359-
some clusters will be dropped from cache to respect the new size.
1360-
1361-
Args:
1362-
nb_clusters (int): maximum number of clusters stored in the cache
1363-
"""
1364-
1365-
self.c_archive.setClusterCacheMaxSize(nb_clusters)
1366-
1367-
@property
1368-
def cluster_cache_current_size(self) -> pyint:
1369-
"""Size of the cluster cache.
1370-
1371-
Returns:
1372-
(int): number of clusters currently stored in the cache.
1373-
"""
1374-
return self.c_archive.getClusterCacheCurrentSize()
1375-
13761345
@property
13771346
def dirent_cache_max_size(self) -> pyint:
13781347
"""Maximum size of the dirent cache.
@@ -1403,36 +1372,38 @@ cdef class Archive:
14031372
"""
14041373
return self.c_archive.getDirentCacheCurrentSize()
14051374
1406-
@property
1407-
def dirent_lookup_cache_max_size(self) -> pyint:
1408-
"""Size of the dirent lookup cache.
1375+
def __repr__(self) -> str:
1376+
return f"{self.__class__.__name__}(filename={self.filename})"
14091377
1410-
The returned size returns the default size or the last set size.
1411-
This may not correspond to the actual size of the dirent lookup cache.
1412-
See set_dirent_lookup_cache_max_size for more information.
14131378
1414-
Returns:
1415-
(int): maximum number of sub ranges created in the lookup cache.
1416-
"""
1417-
return self.c_archive.getDirentLookupCacheMaxSize()
1379+
def get_cluster_cache_max_size() -> pyint:
1380+
"""Get the maximum size of the cluster cache.
14181381

1419-
@dirent_lookup_cache_max_size.setter
1420-
def dirent_lookup_cache_max_size(self, nb_ranges: pyint):
1421-
"""Set the size of the dirent lookup cache.
1382+
Returns:
1383+
(int): the maximum memory size used by the cluster cache (in bytes).
1384+
"""
1385+
return zim.getClusterCacheMaxSize()
14221386
1423-
Contrary to other set_<foo>_cache_max_size, this method is useless
1424-
once the lookup cache is created.
1425-
The lookup cache is created at first access to a entry in the archive.
1426-
So this method must be called before any access to content (including metadata).
1427-
It is best to call this method first, just after the archive creation.
1387+
def set_cluster_cache_max_size(size_in_bytes: pyint):
1388+
"""Set the size of the cluster cache.
14281389

1429-
Args:
1430-
nb_ranges (int): maximum number of sub ranges created in the lookup cache.
1431-
"""
1432-
self.c_archive.setDirentLookupCacheMaxSize(nb_ranges)
1390+
If the new size is lower than the number of currently stored clusters
1391+
some clusters will be dropped from cache to respect the new size.
1392+
1393+
Args:
1394+
size_in_bytes (int): the memory limit (in bytes) for the cluster cache.
1395+
"""
1396+
1397+
zim.setClusterCacheMaxSize(size_in_bytes)
1398+
1399+
def get_cluster_cache_current_size() -> pyint:
1400+
"""Get the current size of the cluster cache.
1401+
1402+
Returns:
1403+
(int): the current memory size (in bytes) used by the cluster cache.
1404+
"""
1405+
return zim.getClusterCacheCurrentSize()
14331406
1434-
def __repr__(self) -> str:
1435-
return f"{self.__class__.__name__}(filename={self.filename})"
14361407
14371408
reader_module_doc = """libzim reader module
14381409

@@ -1452,6 +1423,9 @@ reader_public_objects = [
14521423
Archive,
14531424
Entry,
14541425
Item,
1426+
get_cluster_cache_max_size,
1427+
set_cluster_cache_max_size,
1428+
get_cluster_cache_current_size,
14551429
]
14561430
reader = create_module(reader_module_name, reader_module_doc, reader_public_objects)
14571431

libzim/reader.pyi

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,19 +78,13 @@ class Archive:
7878
def has_illustration(self, size: int | None = None) -> bool: ...
7979
def get_illustration_item(self, size: int | None = None) -> Item: ...
8080
@property
81-
def cluster_cache_max_size(self) -> int: ...
82-
@cluster_cache_max_size.setter
83-
def cluster_cache_max_size(self, nb_clusters: int): ...
84-
@property
85-
def cluster_cache_current_size(self) -> int: ...
86-
@property
8781
def dirent_cache_max_size(self) -> int: ...
8882
@dirent_cache_max_size.setter
8983
def dirent_cache_max_size(self, nb_dirents: int): ...
9084
@property
9185
def dirent_cache_current_size(self) -> int: ...
92-
@property
93-
def dirent_lookup_cache_max_size(self) -> int: ...
94-
@dirent_lookup_cache_max_size.setter
95-
def dirent_lookup_cache_max_size(self, nb_ranges: int): ...
9686
def __repr__(self) -> str: ...
87+
88+
def get_cluster_cache_max_size() -> int: ...
89+
def set_cluster_cache_max_size(nb_clusters: int): ...
90+
def get_cluster_cache_current_size() -> int: ...

libzim/zim.pxd

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,14 +178,9 @@ cdef extern from "libwrapper.h" namespace "wrapper":
178178
bool hasChecksum() except +
179179
bool check() except +
180180

181-
uint64_t getClusterCacheMaxSize() except +
182-
uint64_t getClusterCacheCurrentSize() except +
183-
void setClusterCacheMaxSize(uint64_t nbClusters) except +
184181
uint64_t getDirentCacheMaxSize() except +
185182
uint64_t getDirentCacheCurrentSize() except +
186183
void setDirentCacheMaxSize(uint64_t nbDirents) except +
187-
uint64_t getDirentLookupCacheMaxSize() except +
188-
void setDirentLookupCacheMaxSize(uint64_t nbRanges) except +
189184

190185
cdef cppclass Searcher:
191186
Searcher()
@@ -233,3 +228,8 @@ cdef extern from "libwrapper.h" namespace "wrapper":
233228

234229
cdef extern from "zim/version.h" namespace "zim":
235230
cdef vector[pair[string, string]] getVersions()
231+
232+
cdef extern from "zim/archive.h" namespace "zim":
233+
cdef uint64_t getClusterCacheMaxSize() except +
234+
cdef uint64_t getClusterCacheCurrentSize() except +
235+
cdef void setClusterCacheMaxSize(uint64_t sizeInB) except +

tests/test_libzim_reader.py

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
import pytest
1010

1111
import libzim.writer # pyright: ignore [reportMissingModuleSource]
12-
from libzim.reader import Archive, Entry # pyright: ignore [reportMissingModuleSource]
12+
from libzim.reader import ( # pyright: ignore [reportMissingModuleSource]
13+
Archive,
14+
Entry,
15+
get_cluster_cache_current_size,
16+
get_cluster_cache_max_size,
17+
set_cluster_cache_max_size,
18+
)
1319
from libzim.search import Query, Searcher # pyright: ignore [reportMissingModuleSource]
1420
from libzim.suggestion import ( # pyright: ignore [reportMissingModuleSource]
1521
SuggestionSearcher,
@@ -621,28 +627,25 @@ def test_reader_get_random_entry(all_zims):
621627
@pytest.mark.parametrize(*parametrize_for(["filename"]))
622628
def test_cluster_cache(all_zims, filename):
623629
zim = Archive(all_zims / filename)
624-
default_value = 16
625-
new_value = 1
626-
empty_value = 0
630+
default_value = 536870912 # 512M
631+
new_value = 1024
627632

628-
assert zim.cluster_cache_max_size == default_value
633+
assert get_cluster_cache_max_size() == default_value
629634

630-
zim.cluster_cache_max_size = new_value
631-
assert zim.cluster_cache_max_size == new_value
635+
# modify cluster cache max size
636+
set_cluster_cache_max_size(new_value)
637+
assert get_cluster_cache_max_size() == new_value
632638

633639
# test index access
634640
for index in range(0, zim.entry_count - 1):
635641
bytes(zim._get_entry_by_id(index).get_item().content)
636642

637-
assert zim.cluster_cache_current_size <= new_value
638-
639-
zim.cluster_cache_max_size = empty_value
640-
assert zim.cluster_cache_max_size == empty_value
641-
642-
for index in range(0, zim.entry_count - 1):
643-
bytes(zim._get_entry_by_id(index).get_item().content)
643+
# check current size is not too big (not really relevant since cache keeps at least
644+
# one cluster in memory, so this value depends on maximum cluster size
645+
assert get_cluster_cache_current_size() <= new_value
644646

645-
assert zim.cluster_cache_current_size == empty_value
647+
# restore default value for next tests
648+
set_cluster_cache_max_size(default_value)
646649

647650

648651
@skip_if_offline
@@ -671,26 +674,5 @@ def test_dirent_cache(all_zims, filename):
671674
for index in range(0, zim.entry_count - 1):
672675
bytes(zim._get_entry_by_id(index).get_item().content)
673676

674-
assert zim.dirent_cache_current_size == empty_value
675-
676-
677-
@skip_if_offline
678-
@pytest.mark.parametrize(*parametrize_for(["filename"]))
679-
def test_dirent_lookup_cache(all_zims, filename):
680-
zim = Archive(all_zims / filename)
681-
default_value = 1024
682-
new_value = 2
683-
empty_value = 0
684-
685-
assert zim.dirent_lookup_cache_max_size == default_value
686-
687-
zim.dirent_lookup_cache_max_size = new_value
688-
assert zim.dirent_lookup_cache_max_size == new_value
689-
690-
# test index access
691-
for index in range(0, zim.entry_count - 1):
692-
bytes(zim._get_entry_by_id(index).get_item().content)
693-
694-
# setting after reading records the value but it has no use
695-
zim.dirent_lookup_cache_max_size = empty_value
696-
assert zim.dirent_lookup_cache_max_size == empty_value
677+
# always at least one entry is kept in cache unless ZIM is empty
678+
assert zim.dirent_cache_current_size == (1 if zim.entry_count else 0)

0 commit comments

Comments
 (0)