Merge pull request #174 from opentensor/feat/thewhaleking/add-cache-size-env-vars

basfroman · web-flow · commit 028a1a15de7e · 2025-08-08T09:34:20.000-07:00
Adds env var support for setting cache size
diff --git a/README.md b/README.md
@@ -54,6 +54,41 @@ async def main():
 asyncio.run(main())
 ```
 
+### Caching
+There are a few different cache types used in this library to improve the performance overall. The one with which
+you are probably familiar is the typical `functools.lru_cache` used in `sync_substrate.SubstrateInterface`.
+
+By default, it uses a max cache size of 512 for smaller returns, and 16 for larger ones. These cache sizes are 
+user-configurable using the respective env vars, `SUBSTRATE_CACHE_METHOD_SIZE` and `SUBSTRATE_RUNTIME_CACHE_SIZE`.
+
+They are applied only on methods whose results cannot change — such as the block hash for a given block number 
+(small, 512 default), or the runtime for a given runtime version (large, 16 default).
+
+Additionally, in `AsyncSubstrateInterface`, because of its asynchronous nature, we developed our own asyncio-friendly 
+LRU caches. The primary one is the `CachedFetcher` which wraps the same methods as `functools.lru_cache` does in 
+`SubstrateInterface`, but the key difference here is that each request is assigned a future that is returned when the 
+initial request completes. So, if you were to do:
+
+```python
+bn = 5000
+bh1, bh2 = await asyncio.gather(
+    asi.get_block_hash(bn),
+    asi.get_block_hash(bn)
+)
+```
+it would actually only make one single network call, and return the result to both requests. Like `SubstrateInterface`,
+it also takes the `SUBSTRATE_CACHE_METHOD_SIZE` and `SUBSTRATE_RUNTIME_CACHE_SIZE` vars to set cache size.
+
+The third and final caching mechanism we use is `async_substrate_interface.async_substrate.DiskCachedAsyncSubstrateInterface`,
+which functions the same as the normal `AsyncSubstrateInterface`, but that also saves this cache to the disk, so the cache
+is preserved between runs. This is product for a fairly nice use-case (such as `btcli`). As you may call different networks
+with entirely different results, this cache is keyed by the uri supplied at instantiation of the `DiskCachedAsyncSubstrateInterface`
+object, so `DiskCachedAsyncSubstrateInterface(network_1)` and `DiskCachedAsyncSubstrateInterface(network_2)` will not share
+the same on-disk cache.
+
+As with the other two caches, this also takes `SUBSTRATE_CACHE_METHOD_SIZE` and `SUBSTRATE_RUNTIME_CACHE_SIZE` env vars.
+
+
 ## Contributing
 
 Contributions are welcome! Please open an issue or submit a pull request to the `staging` branch.
diff --git a/async_substrate_interface/async_substrate.py b/async_substrate_interface/async_substrate.py
@@ -7,6 +7,7 @@
 import asyncio
 import inspect
 import logging
+import os
 import ssl
 import warnings
 from contextlib import suppress
@@ -42,7 +43,6 @@
     SubstrateRequestException,
     ExtrinsicNotFound,
     BlockNotFound,
-    MaxRetriesExceeded,
     StateDiscardedError,
 )
 from async_substrate_interface.protocols import Keypair
@@ -81,6 +81,10 @@
 logger = logging.getLogger("async_substrate_interface")
 raw_websocket_logger = logging.getLogger("raw_websocket")
 
+# env vars dictating the cache size of the cached methods
+SUBSTRATE_CACHE_METHOD_SIZE = int(os.getenv("SUBSTRATE_CACHE_METHOD_SIZE", "512"))
+SUBSTRATE_RUNTIME_CACHE_SIZE = int(os.getenv("SUBSTRATE_RUNTIME_CACHE_SIZE", "16"))
+
 
 class AsyncExtrinsicReceipt:
     """
@@ -1178,7 +1182,7 @@ async def init_runtime(
         else:
             return await self.get_runtime_for_version(runtime_version, block_hash)
 
-    @cached_fetcher(max_size=16, cache_key_index=0)
+    @cached_fetcher(max_size=SUBSTRATE_RUNTIME_CACHE_SIZE, cache_key_index=0)
     async def get_runtime_for_version(
         self, runtime_version: int, block_hash: Optional[str] = None
     ) -> Runtime:
@@ -2111,7 +2115,7 @@ async def get_metadata(self, block_hash=None) -> MetadataV15:
 
         return runtime.metadata_v15
 
-    @cached_fetcher(max_size=512)
+    @cached_fetcher(max_size=SUBSTRATE_CACHE_METHOD_SIZE)
     async def get_parent_block_hash(self, block_hash) -> str:
         """
         Retrieves the block hash of the parent of the given block hash
@@ -2166,7 +2170,7 @@ async def get_storage_by_key(self, block_hash: str, storage_key: str) -> Any:
                 "Unknown error occurred during retrieval of events"
             )
 
-    @cached_fetcher(max_size=16)
+    @cached_fetcher(max_size=SUBSTRATE_RUNTIME_CACHE_SIZE)
     async def get_block_runtime_info(self, block_hash: str) -> dict:
         """
         Retrieve the runtime info of given block_hash
@@ -2179,7 +2183,7 @@ async def _get_block_runtime_info(self, block_hash: str) -> dict:
         response = await self.rpc_request("state_getRuntimeVersion", [block_hash])
         return response.get("result")
 
-    @cached_fetcher(max_size=512)
+    @cached_fetcher(max_size=SUBSTRATE_CACHE_METHOD_SIZE)
     async def get_block_runtime_version_for(self, block_hash: str):
         """
         Retrieve the runtime version of the parent of a given block_hash
@@ -2494,7 +2498,7 @@ async def rpc_request(
         else:
             raise SubstrateRequestException(result[payload_id][0])
 
-    @cached_fetcher(max_size=512)
+    @cached_fetcher(max_size=SUBSTRATE_CACHE_METHOD_SIZE)
     async def get_block_hash(self, block_id: int) -> str:
         """
         Retrieves the hash of the specified block number
@@ -4022,19 +4026,19 @@ class DiskCachedAsyncSubstrateInterface(AsyncSubstrateInterface):
     Experimental new class that uses disk-caching in addition to memory-caching for the cached methods
     """
 
-    @async_sql_lru_cache(maxsize=512)
+    @async_sql_lru_cache(maxsize=SUBSTRATE_CACHE_METHOD_SIZE)
     async def get_parent_block_hash(self, block_hash):
         return await self._get_parent_block_hash(block_hash)
 
-    @async_sql_lru_cache(maxsize=16)
+    @async_sql_lru_cache(maxsize=SUBSTRATE_RUNTIME_CACHE_SIZE)
     async def get_block_runtime_info(self, block_hash: str) -> dict:
         return await self._get_block_runtime_info(block_hash)
 
-    @async_sql_lru_cache(maxsize=512)
+    @async_sql_lru_cache(maxsize=SUBSTRATE_CACHE_METHOD_SIZE)
     async def get_block_runtime_version_for(self, block_hash: str):
         return await self._get_block_runtime_version_for(block_hash)
 
-    @async_sql_lru_cache(maxsize=512)
+    @async_sql_lru_cache(maxsize=SUBSTRATE_CACHE_METHOD_SIZE)
     async def get_block_hash(self, block_id: int) -> str:
         return await self._get_block_hash(block_id)
 
diff --git a/async_substrate_interface/sync_substrate.py b/async_substrate_interface/sync_substrate.py
@@ -1,5 +1,6 @@
 import functools
 import logging
+import os
 import socket
 from hashlib import blake2b
 from typing import Optional, Union, Callable, Any
@@ -55,6 +56,10 @@
 logger = logging.getLogger("async_substrate_interface")
 raw_websocket_logger = logging.getLogger("raw_websocket")
 
+# env vars dictating the cache size of the cached methods
+SUBSTRATE_CACHE_METHOD_SIZE = int(os.getenv("SUBSTRATE_CACHE_METHOD_SIZE", "512"))
+SUBSTRATE_RUNTIME_CACHE_SIZE = int(os.getenv("SUBSTRATE_RUNTIME_CACHE_SIZE", "16"))
+
 
 class ExtrinsicReceipt:
     """
@@ -813,6 +818,7 @@ def init_runtime(
         self.runtime = runtime
         return self.runtime
 
+    @functools.lru_cache(maxsize=SUBSTRATE_RUNTIME_CACHE_SIZE)
     def get_runtime_for_version(
         self, runtime_version: int, block_hash: Optional[str] = None
     ) -> Runtime:
@@ -1668,7 +1674,7 @@ def get_metadata(self, block_hash=None) -> MetadataV15:
 
         return runtime.metadata_v15
 
-    @functools.lru_cache(maxsize=512)
+    @functools.lru_cache(maxsize=SUBSTRATE_CACHE_METHOD_SIZE)
     def get_parent_block_hash(self, block_hash):
         block_header = self.rpc_request("chain_getHeader", [block_hash])
 
@@ -1708,7 +1714,7 @@ def get_storage_by_key(self, block_hash: str, storage_key: str) -> Any:
                 "Unknown error occurred during retrieval of events"
             )
 
-    @functools.lru_cache(maxsize=16)
+    @functools.lru_cache(maxsize=SUBSTRATE_RUNTIME_CACHE_SIZE)
     def get_block_runtime_info(self, block_hash: str) -> dict:
         """
         Retrieve the runtime info of given block_hash
@@ -1718,7 +1724,7 @@ def get_block_runtime_info(self, block_hash: str) -> dict:
 
     get_block_runtime_version = get_block_runtime_info
 
-    @functools.lru_cache(maxsize=512)
+    @functools.lru_cache(maxsize=SUBSTRATE_CACHE_METHOD_SIZE)
     def get_block_runtime_version_for(self, block_hash: str):
         """
         Retrieve the runtime version of the parent of a given block_hash
@@ -1959,7 +1965,7 @@ def _make_rpc_request(
 
         return request_manager.get_results()
 
-    @functools.lru_cache(maxsize=512)
+    @functools.lru_cache(maxsize=SUBSTRATE_CACHE_METHOD_SIZE)
     def supports_rpc_method(self, name: str) -> bool:
         """
         Check if substrate RPC supports given method
@@ -2036,7 +2042,7 @@ def rpc_request(
         else:
             raise SubstrateRequestException(result[payload_id][0])
 
-    @functools.lru_cache(maxsize=512)
+    @functools.lru_cache(maxsize=SUBSTRATE_CACHE_METHOD_SIZE)
     def get_block_hash(self, block_id: int) -> str:
         return self.rpc_request("chain_getBlockHash", [block_id])["result"]
 
diff --git a/async_substrate_interface/utils/cache.py b/async_substrate_interface/utils/cache.py
@@ -219,6 +219,7 @@ def __init__(
         """
         self._inflight: dict[Hashable, asyncio.Future] = {}
         self._method = method
+        self._max_size = max_size
         self._cache = LRUCache(max_size=max_size)
         self._cache_key_index = cache_key_index
 
diff --git a/tests/helpers/fixtures.py b/tests/helpers/fixtures.py
diff --git a/tests/unit_tests/asyncio_/test_env_vars.py b/tests/unit_tests/asyncio_/test_env_vars.py
@@ -0,0 +1,23 @@
+from tests.helpers.fixtures import import_fresh
+
+
+def test_env_vars(monkeypatch):
+    monkeypatch.setenv("SUBSTRATE_CACHE_METHOD_SIZE", 10)
+    monkeypatch.setenv("SUBSTRATE_RUNTIME_CACHE_SIZE", 9)
+    async_substrate = import_fresh("async_substrate_interface.async_substrate")
+    asi = async_substrate.AsyncSubstrateInterface("", _mock=True)
+    assert asi.get_runtime_for_version._max_size == 9
+    assert asi.get_block_runtime_info._max_size == 9
+    assert asi.get_parent_block_hash._max_size == 10
+    assert asi.get_block_runtime_version_for._max_size == 10
+    assert asi.get_block_hash._max_size == 10
+
+
+def test_defaults():
+    async_substrate = import_fresh("async_substrate_interface.async_substrate")
+    asi = async_substrate.AsyncSubstrateInterface("", _mock=True)
+    assert asi.get_runtime_for_version._max_size == 16
+    assert asi.get_block_runtime_info._max_size == 16
+    assert asi.get_parent_block_hash._max_size == 512
+    assert asi.get_block_runtime_version_for._max_size == 512
+    assert asi.get_block_hash._max_size == 512
diff --git a/tests/unit_tests/sync/test_env_vars.py b/tests/unit_tests/sync/test_env_vars.py
@@ -0,0 +1,23 @@
+from tests.helpers.fixtures import import_fresh
+
+
+def test_env_vars(monkeypatch):
+    monkeypatch.setenv("SUBSTRATE_CACHE_METHOD_SIZE", 10)
+    monkeypatch.setenv("SUBSTRATE_RUNTIME_CACHE_SIZE", 9)
+    sync_substrate = import_fresh("async_substrate_interface.sync_substrate")
+    asi = sync_substrate.SubstrateInterface("", _mock=True)
+    assert asi.get_runtime_for_version.cache_parameters()["maxsize"] == 9
+    assert asi.get_block_runtime_info.cache_parameters()["maxsize"] == 9
+    assert asi.get_parent_block_hash.cache_parameters()["maxsize"] == 10
+    assert asi.get_block_runtime_version_for.cache_parameters()["maxsize"] == 10
+    assert asi.get_block_hash.cache_parameters()["maxsize"] == 10
+
+
+def test_defaults():
+    sync_substrate = import_fresh("async_substrate_interface.sync_substrate")
+    asi = sync_substrate.SubstrateInterface("", _mock=True)
+    assert asi.get_runtime_for_version.cache_parameters()["maxsize"] == 16
+    assert asi.get_block_runtime_info.cache_parameters()["maxsize"] == 16
+    assert asi.get_parent_block_hash.cache_parameters()["maxsize"] == 512
+    assert asi.get_block_runtime_version_for.cache_parameters()["maxsize"] == 512
+    assert asi.get_block_hash.cache_parameters()["maxsize"] == 512