Skip to content

fix(search): Refactor cl.lib.search_utils.fetch_and_paginate_results … #5931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 88 additions & 31 deletions cl/lib/search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from django.conf import settings
from django.core.cache import cache
from django.core.exceptions import PermissionDenied
from django.core.paginator import EmptyPage, Page, PageNotAnInteger
from django.core.paginator import EmptyPage, Page, PageNotAnInteger, Paginator
from django.http import HttpRequest
from django.http.request import QueryDict
from django_elasticsearch_dsl.search import Search
from elasticsearch_dsl.response import Response
from eyecite.models import FullCaseCitation
from eyecite.tokenizers import HyperscanTokenizer

Expand Down Expand Up @@ -322,7 +323,8 @@ def store_search_api_query(


class CachedESSearchResults(TypedDict):
results: Page | list
results: Page | list # Deprecated. See #5562
hits: Response | list
main_total: int | None
child_total: int | None

Expand All @@ -335,7 +337,7 @@ def retrieve_cached_search_results(

:param get_params: The GET parameters provided by the user.
:return: A two-tuple containing either the cached search results and the
cache key based ona prefix and the get parameters, or None and the cache key
cache key based on a prefix and the get parameters, or None and the cache key
if no cached results were found.
"""

Expand All @@ -355,6 +357,40 @@ def retrieve_cached_search_results(
return None, cache_key


def get_results_from_paginator(
paginator: Paginator, page_num: int = 1
) -> Page:
"""Get results Page from Paginator

:param paginator: The paginator to get results from
:param page_num: Page number to request
:return: Page object
"""
try:
results = paginator.page(page_num)
except PageNotAnInteger:
results = paginator.page(1)
except EmptyPage:
results = paginator.page(paginator.num_pages)

return results


def enrich_search_results(results: Page, search_type: str, get_params: dict):
"""Enrich dataset before returning to user

:param results: A ESPaginator page object
:param search_type: The search type
:param get_params: A dictionary of parameters sent in request
:return: None
"""
convert_str_date_fields_to_date_objects(results, search_type)
merge_courts_from_db(results, search_type)
limit_inner_hits(get_params, results, search_type)
set_results_highlights(results, search_type)
merge_unavailable_fields_on_parent_document(results, search_type)


def fetch_and_paginate_results(
get_params: QueryDict,
search_query: Search,
Expand All @@ -375,63 +411,84 @@ def fetch_and_paginate_results(
the total number of hits for the child document.
"""

# Run the query and set up pagination
# Get or set default params
try:
page = int(get_params.get("page", 1))
except ValueError:
page = 1
search_type = get_params.get("type", SEARCH_TYPES.OPINION)

# Check cache for displaying insights on the Home Page.
if cache_key is not None:
# Check cache for displaying insights on the Home Page.
results = cache.get(cache_key)
if results is not None:
cache_data = cache.get(cache_key)
if cache_data is not None:
if type(cache_data) is Page:
# Handle existing cache entries that still contain the Page object
results = cache_data

elif type(cache_data) is Response:
# Create Django paginator for insights as ES metadata is not stored
paginator = Paginator(cache_data, page)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to use rows_per_page here; otherwise, only one result will be displayed on the home page for the latest opinions.

Suggested change
paginator = Paginator(cache_data, page)
paginator = Paginator(cache_data, rows_per_page)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, you're right. Fixed in latest commit.

results = get_results_from_paginator(paginator, page)
enrich_search_results(results, search_type, get_params)

return results, 0, False, None, None

# Check micro-cache for all other search requests.
results_dict, micro_cache_key = retrieve_cached_search_results(get_params)
if results_dict:
if "hits" in results_dict:
# Create paginator from ES hits
paginator = ESPaginator(
results_dict["main_total"], results_dict["hits"], rows_per_page
)

# Get appropriate page
results = get_results_from_paginator(paginator, page)

# Enrich results
enrich_search_results(results, search_type, get_params)

# Handle existing cache entries that still contain the Page object
elif "results" in results_dict:
results = results_dict["results"]

# Return results and counts. Set query time to 1ms.
return (
results_dict["results"],
results,
1,
False,
results_dict["main_total"],
results_dict["child_total"],
)

try:
page = int(get_params.get("page", 1))
except ValueError:
page = 1

# Check pagination depth
check_pagination_depth(page)

# Fetch results from ES
hits, query_time, error, main_total, child_total = fetch_es_results(
get_params, search_query, child_docs_count_query, page, rows_per_page
)

if error:
return [], query_time, error, main_total, child_total

# Create paginator from ES hits
paginator = ESPaginator(main_total, hits, rows_per_page)
try:
results = paginator.page(page)
except PageNotAnInteger:
results = paginator.page(1)
except EmptyPage:
results = paginator.page(paginator.num_pages)

search_type = get_params.get("type", SEARCH_TYPES.OPINION)
# Set highlights in results.
convert_str_date_fields_to_date_objects(results, search_type)
merge_courts_from_db(results, search_type)
limit_inner_hits(get_params, results, search_type)
set_results_highlights(results, search_type)
merge_unavailable_fields_on_parent_document(results, search_type)
# Get appropriate page
results = get_results_from_paginator(paginator, page)

# Enrich results
enrich_search_results(results, search_type, get_params)

if cache_key is not None:
# Cache only Page results for displaying insights on the Home Page.
cache.set(cache_key, results, settings.QUERY_RESULTS_CACHE)
# Cache only ES hits for displaying insights on the Home Page.
cache.set(cache_key, hits, settings.QUERY_RESULTS_CACHE)
elif settings.ELASTICSEARCH_MICRO_CACHE_ENABLED:
# Cache Page results and counts for all other search requests.
# Cache ES hits and counts for all other search requests.
results_dict = {
"results": results,
"results": [],
"hits": hits,
"main_total": main_total,
"child_total": child_total,
}
Expand Down
4 changes: 3 additions & 1 deletion cl/search/tests/tests_es_opinion.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,9 @@ async def test_can_filtering_by_citation_count(self) -> None:

search_params = {"q": "*", "cited_lt": 100, "cited_gt": 80}

r = self._test_api_results_count(search_params, 0, "citation_count")
r = await self._test_api_results_count(
search_params, 0, "citation_count"
)

@skip_if_common_tests_skipped
async def test_citation_ordering_by_citation_count(self) -> None:
Expand Down
72 changes: 72 additions & 0 deletions cl/search/tests/tests_es_recap.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from django.core.cache import cache
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.management import call_command
from django.core.paginator import Paginator
from django.test import RequestFactory, override_settings
from django.urls import reverse
from django.utils.timezone import now
Expand All @@ -34,6 +35,10 @@
get_parties_from_case_name,
get_parties_from_case_name_bankr,
)
from cl.lib.search_utils import (
enrich_search_results,
get_results_from_paginator,
)
from cl.lib.test_helpers import (
RECAPSearchTestCase,
rd_type_v4_api_keys,
Expand Down Expand Up @@ -2680,6 +2685,73 @@ def test_initial_document_button(self) -> None:
for docket in dockets_to_remove:
docket.delete()

async def test_get_results_from_paginator(self) -> None:
"""Assert Paginator Page objects are handled as expected."""

# Make basic paginator with 50 elements, 10 per page
num_items = 50
per_page = 10
num_pages = num_items // per_page
self.assertEqual(num_pages, 5)

a = [1] * num_items
p = Paginator(a, per_page)

# Default page_num 1
r = get_results_from_paginator(p)
self.assertEqual(r.number, 1)
self.assertEqual(r.has_previous(), False)

# Not integer page_num should return first page
r = get_results_from_paginator(p, page_num="a")
self.assertEqual(r.number, 1)
self.assertEqual(r.has_previous(), False)

# Higher page number should return last page
r = get_results_from_paginator(p, page_num=num_pages + 1)
self.assertEqual(r.number, num_pages)
self.assertEqual(r.has_previous(), True)
self.assertEqual(r.has_next(), False)

# Empty paginator should return a single empty page
p = Paginator([], 1)

# Default page_num 1
r = get_results_from_paginator(p)
self.assertEqual(r.number, 1)
self.assertEqual(r.has_previous(), False)
self.assertEqual(r.has_next(), False)

# Not integer page_num should return first page
r = get_results_from_paginator(p, page_num="a")
self.assertEqual(r.number, 1)
self.assertEqual(r.has_previous(), False)
self.assertEqual(r.has_next(), False)

# Higher page number should return last page
r = get_results_from_paginator(p, page_num=2)
self.assertEqual(r.number, 1)
self.assertEqual(r.has_previous(), False)
self.assertEqual(r.has_next(), False)

async def test_enrich_search_results(self) -> None:
"""Assert Paginator Page objects are enriched as expected.
Ideally, this would validate each step of the enrichment,
but those tests do not appear to exist yet
"""

# Create empty paginator
paginator = Paginator([], 1)
page = paginator.page(1)

search_type = SEARCH_TYPES.OPINION
get_params = {}

r = enrich_search_results(page, search_type, get_params)

# Shouldn't be a response
self.assertIsNone(r)

@mock.patch("cl.lib.search_utils.fetch_es_results")
@override_settings(
RECAP_SEARCH_PAGE_SIZE=2, ELASTICSEARCH_MICRO_CACHE_ENABLED=True
Expand Down