diff --git a/docs/getting_started/django_settings.rst b/docs/getting_started/django_settings.rst index 7d68ffe..18db5c5 100644 --- a/docs/getting_started/django_settings.rst +++ b/docs/getting_started/django_settings.rst @@ -100,3 +100,64 @@ own unique page in the cache, set this value to ``None`` or ``[]``. If you feel as though the spammers have won, and want the nuclear option, you can set this to ``[r".*"]`` which will ignore all querystrings. This is surely a terrible idea, but it can be done. + + +.. _WAGTAIL_CACHE_CLEAR_EXPIRED_ON_SET: + +WAGTAIL_CACHE_CLEAR_EXPIRED_ON_SET +---------------------------------- + +.. versionadded:: + + This setting will clear any expired `KeyringItems` as a new item is set, + and is OFF by default. + +If set to `True`, as a cache item is set the manager will delete any expired +items from the database. If there are likely to be many expired items in the +cache, then that might be time-consuming so this setting can be turned off. +You can use the Django management command `wagtail_cache_clear_expired_items` +periodically to clear expired items instead. + + +.. _WAGTAIL_CACHE_USE_RAW_DELETE: + +WAGTAIL_CACHE_USE_RAW_DELETE +---------------------------- + +.. versionadded:: 2.3.0 + + This setting will use Django's ``QuerySet._raw_delete`` method to clear + KeyringItems from the database. This is fast but means that signals are not + sent during that process. This is OFF by default. + +If your cache is large, then there can be many ``KeyringItem`` objects in the +database. When you publish a Wagtail page that is high in the tree, many +of those items may be deleted. + +If the delete process is too slow, then you can change this setting to use +Django's ``QuerySet._raw_delete`` method. That runs significantly faster than +``QuerySet.delete`` but it means that signals are not sent during that process. + + +WAGTAIL_CACHE_TIMEOUT_JITTER_FUNC +--------------------------------- + +.. versionadded:: + + An optional function that will be called to adjust the cache timeout each + time a cache item is set. Set to None by default. + +This can be used to add a random jitter to the cache timeout to avoid cache +stampedes. + +The function should take the timeout as an argument and return a new +timeout. For example, to add a random jitter of up to 10% to the timeout: + +.. code-block:: python + + import random + + def jitter_timeout(timeout): + return timeout * random.uniform(0.9, 1.1) + + WAGTAIL_CACHE_TIMEOUT_JITTER_FUNC = jitter_timeout diff --git a/testproject/home/tests.py b/testproject/home/tests.py index 88fd078..b608e8e 100644 --- a/testproject/home/tests.py +++ b/testproject/home/tests.py @@ -1,3 +1,4 @@ +import datetime import time from django.contrib.auth.models import User @@ -7,6 +8,7 @@ from django.test import modify_settings from django.test import override_settings from django.urls import reverse +from django.utils.timezone import now from wagtail import hooks from wagtail.models import PageViewRestriction @@ -19,7 +21,9 @@ from wagtailcache.cache import CacheControl from wagtailcache.cache import Status from wagtailcache.cache import clear_cache +from wagtailcache.models import KeyringItem from wagtailcache.settings import wagtailcache_settings +from wagtailcache.utils import batched def hook_true(obj, is_cacheable: bool) -> bool: @@ -35,6 +39,9 @@ def hook_any(obj, is_cacheable: bool): class WagtailCacheTest(TestCase): + # Django's default `testserver` is not a valid domain name. + client_headers = {"SERVER_NAME": "example.com"} + @classmethod def get_content_type(cls, modelname: str): ctype, _ = ContentType.objects.get_or_create( @@ -141,7 +148,7 @@ def head_hit(self, url: str): """ HEAD a page and test that it was served from the cache. """ - response = self.client.head(url) + response = self.client.head(url, **self.client_headers) self.assertEqual(response.get(self.header_name, None), Status.HIT.value) return response @@ -149,7 +156,7 @@ def get_hit(self, url: str): """ Gets a page and tests that it was served from the cache. """ - response = self.client.get(url) + response = self.client.get(url, **self.client_headers) self.assertEqual(response.get(self.header_name, None), Status.HIT.value) return response @@ -157,7 +164,7 @@ def head_miss(self, url: str): """ HEAD a page and test that it was not served from the cache. """ - response = self.client.head(url) + response = self.client.head(url, **self.client_headers) self.assertEqual( response.get(self.header_name, None), Status.MISS.value ) @@ -166,7 +173,7 @@ def get_miss(self, url: str): """ Gets a page and tests that it was not served from the cache. """ - response = self.client.get(url) + response = self.client.get(url, **self.client_headers) self.assertEqual( response.get(self.header_name, None), Status.MISS.value ) @@ -177,7 +184,7 @@ def head_skip(self, url: str): HEAD a page and test that it was intentionally not served from the cache. """ - response = self.client.head(url) + response = self.client.head(url, **self.client_headers) self.assertEqual( response.get(self.header_name, None), Status.SKIP.value ) @@ -191,7 +198,7 @@ def get_skip(self, url: str): Gets a page and tests that it was intentionally not served from the cache. """ - response = self.client.get(url) + response = self.client.get(url, **self.client_headers) self.assertEqual( response.get(self.header_name, None), Status.SKIP.value ) @@ -205,7 +212,7 @@ def get_error(self, url: str): """ Gets a page and tests that an error in the cache backend was handled. """ - response = self.client.get(url) + response = self.client.get(url, **self.client_headers) self.assertEqual(response.status_code, 200) self.assertEqual( response.get(self.header_name, None), Status.ERROR.value @@ -216,7 +223,7 @@ def head_error(self, url: str): """ HEAD a page and tests that an error in the cache backend was handled. """ - response = self.client.head(url) + response = self.client.head(url, **self.client_headers) self.assertEqual(response.status_code, 200) self.assertEqual( response.get(self.header_name, None), Status.ERROR.value @@ -228,7 +235,7 @@ def post_skip(self, url: str): POSTS a page and tests that it was intentionally not served from the cache. """ - response = self.client.post(url) + response = self.client.post(url, **self.client_headers) self.assertEqual( response.get(self.header_name, None), Status.SKIP.value ) @@ -288,6 +295,11 @@ def test_querystrings(self): # A get with both should also hit, since it is the second request. self.head_hit(page.get_url() + "?valid=0&utm_code=0") self.get_hit(page.get_url() + "?valid=0&utm_code=0") + # A get with a very long querysting should be cached. + self.head_miss(page.get_url() + "?" + "a" * 2000) + self.get_miss(page.get_url() + "?" + "a" * 2000) + self.head_hit(page.get_url() + "?" + "a" * 2000) + self.get_hit(page.get_url() + "?" + "a" * 2000) @override_settings(WAGTAIL_CACHE_IGNORE_COOKIES=False) def test_cookie_page(self): @@ -380,6 +392,7 @@ def test_page_restricted(self): "password": "the cybers", "return_url": self.page_cachedpage_restricted.get_url(), }, + **self.client_headers, ) self.assertRedirects( response, self.page_cachedpage_restricted.get_url() @@ -489,7 +502,9 @@ def test_template_response_view_hit(self): def test_admin(self): self.client.force_login(self.user) - response = self.client.get(reverse("wagtailcache:index")) + response = self.client.get( + reverse("wagtailcache:index"), **self.client_headers + ) self.client.logout() self.assertEqual(response.status_code, 200) @@ -500,7 +515,9 @@ def test_admin_clearcache(self): self.get_hit(self.page_cachedpage.get_url()) # Now log in as admin and clear the cache. self.client.force_login(self.user) - response = self.client.get(reverse("wagtailcache:clearcache")) + response = self.client.get( + reverse("wagtailcache:clearcache"), **self.client_headers + ) self.client.logout() self.assertEqual(response.status_code, 302) # Now the page should miss cache. @@ -510,33 +527,28 @@ def test_admin_clearcache(self): def test_cache_keyring(self): # Check if keyring is not present - self.assertEqual(self.cache.get("keyring"), None) + self.assertEqual(KeyringItem.objects.count(), 0) # Get should hit cache. self.get_miss(self.page_cachedpage.get_url()) + self.assertEqual(KeyringItem.objects.count(), 1) # Get first key from keyring - key = next(iter(self.cache.get("keyring"))) - url = "http://%s%s" % ("testserver", self.page_cachedpage.get_url()) + url = "http://%s%s" % ("example.com", self.page_cachedpage.get_url()) + keyring_item = KeyringItem.objects.active_for_url_regexes(url).first() # Compare Keys - self.assertEqual(key, url) + self.assertEqual(keyring_item.url, url) - @override_settings(WAGTAIL_CACHE_BACKEND="one_second") - def test_cache_keyring_no_uri_key_duplication(self): - # First get to populate keyring + def test_clear_cache(self): + # First get should miss cache. self.get_miss(self.page_cachedpage.get_url()) - # Wait a short time - time.sleep(0.5) - # Fetch a different page - self.get_miss(self.page_wagtailpage.get_url()) - # Wait until the first page is expired, but not the keyring - time.sleep(0.6) - # Fetch the first page again + # Second get should hit cache. + self.get_hit(self.page_cachedpage.get_url()) + # clear all from Cache + clear_cache() + # Now the page should miss cache. self.get_miss(self.page_cachedpage.get_url()) - # Check the keyring does not contain duplicate uri_keys - url = "http://%s%s" % ("testserver", self.page_cachedpage.get_url()) - keyring = self.cache.get("keyring") - self.assertEqual(len(keyring.get(url, [])), 1) - def test_clear_cache(self): + @override_settings(WAGTAIL_CACHE_USE_RAW_DELETE=True) + def test_clear_cache_raw_delete(self): # First get should miss cache. self.get_miss(self.page_cachedpage.get_url()) # Second get should hit cache. @@ -570,22 +582,30 @@ def test_clear_cache_url(self): @override_settings(WAGTAIL_CACHE=True) def test_enable_wagtailcache(self): # Intentionally enable wagtail-cache, make sure it works. - response = self.client.get(self.page_cachedpage.get_url()) + response = self.client.get( + self.page_cachedpage.get_url(), **self.client_headers + ) self.assertIsNotNone(response.get(self.header_name, None)) @override_settings(WAGTAIL_CACHE=False) def test_disable_wagtailcache(self): # Intentionally disable wagtail-cache, make sure it is inactive. - response = self.client.get(self.page_cachedpage.get_url()) + response = self.client.get( + self.page_cachedpage.get_url(), **self.client_headers + ) self.assertIsNone(response.get(self.header_name, None)) @override_settings(WAGTAIL_CACHE_BACKEND="zero") def test_zero_timeout(self): # Wagtail-cache should ignore the page when a timeout is zero. - response = self.client.get(self.page_cachedpage.get_url()) + response = self.client.get( + self.page_cachedpage.get_url(), **self.client_headers + ) self.assertIsNone(response.get(self.header_name, None)) # Second should also not cache. - response = self.client.get(self.page_cachedpage.get_url()) + response = self.client.get( + self.page_cachedpage.get_url(), **self.client_headers + ) self.assertIsNone(response.get(self.header_name, None)) # Load admin panel to render the zero timeout. self.test_admin() @@ -612,15 +632,30 @@ def test_page_error_set(self): self.head_error(page.get_url()) self.get_error(page.get_url()) + @override_settings( + WAGTAIL_CACHE_BACKEND="one_second", + WAGTAIL_CACHE_TIMEOUT_JITTER_FUNC=lambda timeout: timeout * 2, + ) + def test_timeout_jitter(self): + # Wagtail-cache should apply jitter to the timeout. + url = self.page_cachedpage.get_url() + self.client.get(url, **self.client_headers) + time.sleep(1.5) + self.get_hit(url) + # ---- HOOKS --------------------------------------------------------------- def test_request_hook_true(self): # A POST should never be cached. - response = self.client.post(reverse("cached_view")) + response = self.client.post( + reverse("cached_view"), **self.client_headers + ) self.assertEqual( response.get(self.header_name, None), Status.SKIP.value ) - response = self.client.post(reverse("cached_view")) + response = self.client.post( + reverse("cached_view"), **self.client_headers + ) self.assertEqual( response.get(self.header_name, None), Status.SKIP.value ) @@ -632,11 +667,15 @@ def test_request_hook_true(self): # the response still has the final say in whether or not the response is # cached. However a simple POST request where the response does not # forbid caching will in fact get cached! - response = self.client.post(reverse("cached_view")) + response = self.client.post( + reverse("cached_view"), **self.client_headers + ) self.assertEqual( response.get(self.header_name, None), Status.MISS.value ) - response = self.client.post(reverse("cached_view")) + response = self.client.post( + reverse("cached_view"), **self.client_headers + ) self.assertEqual(response.get(self.header_name, None), Status.HIT.value) def test_request_hook_false(self): @@ -684,3 +723,201 @@ def test_response_hook_any(self): self.assertEqual(hook_fns, [hook_any]) # The page should be cached normally due to hook returning garbage. self.test_page_hit() + + # ---- MODELS -------------------------------------------------------------- + def test_keyring_update_or_create(self): + expiry = now() + datetime.timedelta(hours=1) + key = "abc123" + url = "https://example.com/" + + KeyringItem.objects.set( + expiry=expiry, + key=key, + url=url, + ) + self.assertEqual(KeyringItem.objects.count(), 1) + self.assertEqual(KeyringItem.objects.first().url, url) + + expiry2 = now() + datetime.timedelta(hours=1) + KeyringItem.objects.set( + expiry=expiry2, + key=key, + url=url, + ) + self.assertEqual(KeyringItem.objects.count(), 1) + self.assertEqual(KeyringItem.objects.first().expiry, expiry2) + + def test_keyring_update_or_create__long_url(self): + expiry = now() + datetime.timedelta(hours=1) + key = "abc123" + url = f"https://example.com/?query={ 'a' * 900 }" + + KeyringItem.objects.set( + expiry=expiry, + key=key, + url=url, + ) + self.assertEqual(KeyringItem.objects.count(), 1) + + def test_delete_expired(self): + """ + Cache items expire by themselves, so we only need to actively + delete database items + """ + expiry1 = now() + datetime.timedelta(seconds=1) + expiry2 = now() + datetime.timedelta(seconds=2) + used_keys = [] + + for exp in [expiry1, expiry2]: + exp_iso = exp.isoformat() + key = f"key-{exp_iso}" + url = f"https://example.com/{exp_iso}" + KeyringItem.objects.set( + expiry=exp, + key=key, + url=url, + ) + # Item should not expire + self.cache.set(key, url, 100) + used_keys.append(key) + self.assertEqual(KeyringItem.objects.count(), 2) + time.sleep(1) + KeyringItem.objects.clear_expired() + self.assertEqual(KeyringItem.objects.count(), 1) + # Cache items remain + for key in used_keys: + self.assertTrue(self.cache.get(key)) + + @override_settings(WAGTAIL_CACHE_BATCH_SIZE=2) + def test_bulk_delete(self): + """ + Bulk delete removes cache items and database items that refer to them + """ + timeout = 10 + expiry = now() + datetime.timedelta(seconds=timeout) + keys = [f"key-{counter}" for counter in range(8)] + + for key in keys: + url = "https://example.com/" + KeyringItem.objects.set( + expiry=expiry, + key=key, + url=url, + ) + self.cache.set(key, url, timeout) + + KeyringItem.objects.bulk_delete_cache_keys( + KeyringItem.objects.filter(key__in=keys[:4]) + ) + + for key in keys[:4]: + self.assertFalse(KeyringItem.objects.filter(key=key).exists()) + self.assertFalse(self.cache.get(key)) + + for key in keys[4:]: + self.assertTrue(KeyringItem.objects.filter(key=key).exists()) + self.assertTrue(self.cache.get(key)) + + @override_settings(WAGTAIL_CACHE_USE_RAW_DELETE=True) + def test_bulk_delete_raw_delete(self): + """ + You can optionally use Django's `_raw_delete` + for speed with many cache keys. + """ + timeout = 10 + expiry = now() + datetime.timedelta(seconds=timeout) + keys = [f"key-{counter}" for counter in range(8)] + + for key in keys: + url = "https://example.com/" + KeyringItem.objects.set( + expiry=expiry, + key=key, + url=url, + ) + self.cache.set(key, url, timeout) + + KeyringItem.objects.bulk_delete_cache_keys( + KeyringItem.objects.filter(key__in=keys[:4]) + ) + + for key in keys[:4]: + self.assertFalse(KeyringItem.objects.filter(key=key).exists()) + self.assertFalse(self.cache.get(key)) + + for key in keys[4:]: + self.assertTrue(KeyringItem.objects.filter(key=key).exists()) + self.assertTrue(self.cache.get(key)) + + def test_active_for_url_regexes(self): + past_expiry = now() - datetime.timedelta(seconds=1) + future_expiry = now() + datetime.timedelta(seconds=1) + url = "https://example.com" + + KeyringItem.objects.set( + expiry=past_expiry, + key="key", + url=url, + ) + KeyringItem.objects.set( + expiry=future_expiry, + key="key-2", + url=url, + ) + KeyringItem.objects.set( + expiry=future_expiry, + key="key-3", + url=f"{url}/key-3/", + ) + self.assertEqual( + KeyringItem.objects.active_for_url_regexes([url]).count(), 2 + ) + + def test_active_for_urls_no_regexes(self): + past_expiry = now() - datetime.timedelta(seconds=1) + future_expiry = now() + datetime.timedelta(seconds=1) + url = "https://example.com" + url2 = "https://test.example.com" + + KeyringItem.objects.set( + expiry=past_expiry, + key="key", + url=url, + ) + KeyringItem.objects.set( + expiry=future_expiry, + key="key-2", + url=url, + ) + KeyringItem.objects.set( + expiry=future_expiry, + key="key-3", + url=url2, + ) + self.assertEqual( + KeyringItem.objects.active_for_url_regexes([]).count(), 2 + ) + + def test_keyringitem_str(self): + future_expiry = datetime.datetime(year=2030, month=1, day=1) + url = "https://example.com" + + KeyringItem.objects.set( + expiry=future_expiry, + key="key-2", + url=url, + ) + self.assertEqual( + str(KeyringItem.objects.first()), + "https://example.com -> key-2 (Expires: 2030-01-01 00:00:00+00:00)", + ) + + def test_batched(self): + self.assertEqual( + [batch for batch in batched("ABCDEFG", 3)], + [("A", "B", "C"), ("D", "E", "F"), ("G",)], + ) + + def test_batched_invalid_batch_size(self): + with self.assertRaises(ValueError): + next(batched("ABCDEFG", 0)) diff --git a/wagtailcache/cache.py b/wagtailcache/cache.py index 0d92c75..eff87e9 100644 --- a/wagtailcache/cache.py +++ b/wagtailcache/cache.py @@ -2,6 +2,7 @@ Functionality to set, serve from, and clear the cache. """ +import datetime import logging import re from enum import Enum @@ -24,8 +25,10 @@ from django.utils.cache import learn_cache_key from django.utils.cache import patch_response_headers from django.utils.deprecation import MiddlewareMixin +from django.utils.timezone import now from wagtail import hooks +from wagtailcache.models import KeyringItem from wagtailcache.settings import wagtailcache_settings @@ -329,6 +332,10 @@ def process_response( timeout = get_max_age(response) if timeout is None: timeout = self._wagcache.default_timeout + if wagtailcache_settings.WAGTAIL_CACHE_TIMEOUT_JITTER_FUNC: + timeout = wagtailcache_settings.WAGTAIL_CACHE_TIMEOUT_JITTER_FUNC( + timeout + ) patch_response_headers(response, timeout) if timeout: try: @@ -339,15 +346,14 @@ def process_response( # (of the chopped request, not the real one). cr = _chop_querystring(request) uri = unquote(cr.build_absolute_uri()) - keyring = self._wagcache.get("keyring", {}) - # Get current cache keys belonging to this URI. - # This should be a list of keys. - uri_keys: List[str] = keyring.get(uri, []) - # Append the key to this list if not already present and save. - if cache_key not in uri_keys: - uri_keys.append(cache_key) - keyring[uri] = uri_keys - self._wagcache.set("keyring", keyring) + + expiry = now() + datetime.timedelta(seconds=timeout) + KeyringItem.objects.set( + expiry=expiry, + key=cache_key, + url=uri, + ) + if isinstance(response, SimpleTemplateResponse): def callback(r): @@ -378,26 +384,14 @@ def clear_cache(urls: List[str] = []) -> None: return _wagcache = caches[wagtailcache_settings.WAGTAIL_CACHE_BACKEND] - if urls and "keyring" in _wagcache: - keyring = _wagcache.get("keyring") - # Check the provided URL matches a key in our keyring. - matched_urls = [] - for regex in urls: - for key in keyring: - if re.match(regex, key): - matched_urls.append(key) - # If it matches, delete each entry from the cache, - # and delete the URL from the keyring. - for url in matched_urls: - entries = keyring.get(url, []) - for cache_key in entries: - _wagcache.delete(cache_key) - del keyring[url] - # Save the keyring. - _wagcache.set("keyring", keyring) - # Clears the entire cache backend used by wagtail-cache. + if urls: + active_keys = KeyringItem.objects.active_for_url_regexes(urls) + # Delete the keys from the cache and the keyring + KeyringItem.objects.bulk_delete_cache_keys(active_keys) else: - _wagcache.clear() + # Clear the entire cache backend used by wagtail-cache + # and the KeyringItems. + KeyringItem.objects.bulk_clear_cache() def cache_page(view_func: Callable[..., HttpResponse]): diff --git a/wagtailcache/management/commands/clear_wagtail_expired_cache_items.py b/wagtailcache/management/commands/clear_wagtail_expired_cache_items.py new file mode 100644 index 0000000..1e25821 --- /dev/null +++ b/wagtailcache/management/commands/clear_wagtail_expired_cache_items.py @@ -0,0 +1,16 @@ +from django.core.management.base import BaseCommand + +from wagtailcache.models import KeyringItem + + +class Command(BaseCommand): + help = "Clear expired KeyringItems from the database" + + def handle(self, *args, **options): + try: + cleared_count = KeyringItem.objects.clear_expired() + msg = f"Successfully cleared {cleared_count} expired KeyringItems" + self.stdout.write(self.style.SUCCESS(msg)) + except Exception as e: + msg = f"Failed to clear expired KeyringItems: {e}" + self.stdout.write(self.style.ERROR(msg)) diff --git a/wagtailcache/migrations/0001_initial.py b/wagtailcache/migrations/0001_initial.py new file mode 100644 index 0000000..49292bc --- /dev/null +++ b/wagtailcache/migrations/0001_initial.py @@ -0,0 +1,49 @@ +# Generated by Django 4.1.9 on 2024-01-25 03:33 + +from typing import List, Tuple +from django.db import migrations, models + + +class Migration(migrations.Migration): + initial = True + + dependencies: List[Tuple[str, str]] = [] + + operations = [ + migrations.CreateModel( + name="KeyringItem", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("expiry", models.DateTimeField()), + ("key", models.CharField(max_length=512)), + ("url", models.URLField()), + ], + options={ + "ordering": ["url"], + }, + ), + migrations.AddIndex( + model_name="keyringitem", + index=models.Index(fields=["expiry"], name="wagtailcach_expiry_b9702b_idx"), + ), + migrations.AddIndex( + model_name="keyringitem", + index=models.Index(fields=["key"], name="wagtailcach_key_0c2934_idx"), + ), + migrations.AddIndex( + model_name="keyringitem", + index=models.Index(fields=["url"], name="wagtailcach_url_04699f_idx"), + ), + migrations.AlterUniqueTogether( + name="keyringitem", + unique_together={("url", "key")}, + ), + ] diff --git a/wagtailcache/migrations/0002_increase_url_length.py b/wagtailcache/migrations/0002_increase_url_length.py new file mode 100644 index 0000000..b52bb22 --- /dev/null +++ b/wagtailcache/migrations/0002_increase_url_length.py @@ -0,0 +1,15 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("wagtailcache", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="keyringitem", + name="url", + field=models.URLField(max_length=1000), + ), + ] diff --git a/wagtailcache/migrations/0003_alter_keyringitem_url.py b/wagtailcache/migrations/0003_alter_keyringitem_url.py new file mode 100644 index 0000000..2432e4f --- /dev/null +++ b/wagtailcache/migrations/0003_alter_keyringitem_url.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.15 on 2024-10-24 00:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("wagtailcache", "0002_increase_url_length"), + ] + + operations = [ + migrations.AlterField( + model_name="keyringitem", + name="url", + field=models.TextField(), + ), + ] diff --git a/wagtailcache/migrations/__init__.py b/wagtailcache/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/wagtailcache/models.py b/wagtailcache/models.py new file mode 100644 index 0000000..1ca689d --- /dev/null +++ b/wagtailcache/models.py @@ -0,0 +1,107 @@ +from typing import List + +from django.core.cache import caches +from django.db import models +from django.db.models import Q +from django.db.models import QuerySet +from django.utils.timezone import now + +from wagtailcache.settings import wagtailcache_settings +from wagtailcache.utils import batched + + +class KeyringItemManager(models.Manager): + def __init__(self): + super().__init__() + self._wagcache = caches[wagtailcache_settings.WAGTAIL_CACHE_BACKEND] + + def set(self, url, key, expiry) -> "KeyringItem": + """ + Create or update a keyring item, clearing expired items too. + """ + # Ensure `full_clean` is called to validate the model. + try: + item = self.get(url=url, key=key) + item.expiry = expiry + except KeyringItem.DoesNotExist: + item = KeyringItem(url=url, key=key, expiry=expiry) + item.full_clean() + item.save() + + if wagtailcache_settings.WAGTAIL_CACHE_CLEAR_EXPIRED_ON_SET: + self.clear_expired() + + return item + + def _delete_qs(self, keys_qs: QuerySet) -> None: + # Delete from database, optionally use `_raw_delete` + # for speed with many cache keys. + if wagtailcache_settings.WAGTAIL_CACHE_USE_RAW_DELETE: + keys_qs.delete() + keys_qs._raw_delete(using=self.db) + else: + keys_qs.delete() + + def bulk_delete_cache_keys(self, keys_qs: QuerySet) -> None: + """ + Bulk delete the keys from the cache in batches, and the + KeyringItem instances. + """ + # Delete from cache + for key_batch in batched( + keys_qs.values_list("key", flat=True), + wagtailcache_settings.WAGTAIL_CACHE_BATCH_SIZE, + ): + self._wagcache.delete_many(key_batch) + + self._delete_qs(keys_qs) + + def bulk_clear_cache(self): + """ + Clear the whole cache and all KeyringItem instances. + """ + self._wagcache.clear() + self._delete_qs(self.all()) + + def clear_expired(self) -> int: + """ + Clear all items whose expiry has passed. + """ + return self.filter(expiry__lt=now()).delete()[0] + + def active(self): + return self.filter(expiry__gt=now()) + + def active_for_url_regexes(self, urls: List[str]): + qs = self.active() + if not urls: + return qs + q_objects = Q() + for url in urls: + q_objects.add(Q(url__regex=url), Q.OR) + return qs.filter(q_objects) + + +class KeyringItem(models.Model): + """ + KeyringItems relate the URL of a page on the site to the key of an item + in the cache. + """ + + expiry = models.DateTimeField() + key = models.CharField(max_length=512) + url = models.TextField() + + objects = KeyringItemManager() + + class Meta: + ordering = ["url"] + indexes = [ + models.Index(fields=["expiry"]), + models.Index(fields=["key"]), + models.Index(fields=["url"]), + ] + unique_together = [["url", "key"]] + + def __str__(self): + return f"{self.url} -> {self.key} (Expires: {self.expiry})" diff --git a/wagtailcache/settings.py b/wagtailcache/settings.py index d56c9a6..3fbf412 100644 --- a/wagtailcache/settings.py +++ b/wagtailcache/settings.py @@ -10,6 +10,7 @@ class _DefaultSettings: WAGTAIL_CACHE = True WAGTAIL_CACHE_BACKEND = "default" + WAGTAIL_CACHE_BATCH_SIZE = 100 WAGTAIL_CACHE_HEADER = "X-Wagtail-Cache" WAGTAIL_CACHE_IGNORE_COOKIES = True WAGTAIL_CACHE_IGNORE_QS = [ @@ -34,6 +35,9 @@ class _DefaultSettings: r"^trk_.*$", # Listrak r"^utm_.*$", # Google Analytics ] + WAGTAIL_CACHE_CLEAR_EXPIRED_ON_SET = False + WAGTAIL_CACHE_TIMEOUT_JITTER_FUNC = None + WAGTAIL_CACHE_USE_RAW_DELETE = False def __getattribute__(self, attr: Text): # First load from Django settings. diff --git a/wagtailcache/templates/wagtailcache/index.html b/wagtailcache/templates/wagtailcache/index.html index 58aaca0..31b53a3 100644 --- a/wagtailcache/templates/wagtailcache/index.html +++ b/wagtailcache/templates/wagtailcache/index.html @@ -41,12 +41,13 @@

{% trans "Contents" %}

{% trans "Note that 301/302 redirects and 404s may also be cached." %}