Skip to content

feat(recap): Adds support for ACMS attachment page purchases #5971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
22 changes: 18 additions & 4 deletions cl/corpus_importer/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from juriscraper.lib.exceptions import PacerLoginException, ParsingException
from juriscraper.lib.string_utils import CaseNameTweaker, harmonize
from juriscraper.pacer import (
ACMSAttachmentPage,
AppellateAttachmentPage,
AppellateDocketReport,
AttachmentPage,
Expand Down Expand Up @@ -1868,11 +1869,24 @@ def get_att_report_by_rd(
cookies=session_data.cookies, proxy=session_data.proxy_address
)
pacer_court_id = map_cl_to_pacer_id(rd.docket_entry.docket.court_id)
if is_appellate_court(pacer_court_id):
att_report = AppellateAttachmentPage(pacer_court_id, s)
is_appellate_case = is_appellate_court(pacer_court_id)
is_acms_document = rd.is_acms_document()

if is_acms_document:
report_class = ACMSAttachmentPage
elif is_appellate_case:
report_class = AppellateAttachmentPage
else:
report_class = AttachmentPage

att_report = report_class(pacer_court_id, s)

if is_acms_document:
docket_case_id = rd.docket_entry.docket.pacer_case_id
rd_entry_id = rd.pacer_doc_id
Comment on lines +1885 to +1886
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we start ingesting ACMS email notifications that don't include a pacer_case_id, we'll retrieve it using the AcmsCaseSearch API. However, the pacer_doc_id will be empty for documents merged from ACMS email notifications.

So, I think it would be a good idea to check these values before querying the report as a safeguard. If either is missing, we could raise an error and store it in the FQ, so users understand why the fetch can't complete.

It might be better to perform this check in fetch_attachment_page, since that’s where these values are initially retrieved.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I think it would be a good idea to check these values before querying the report as a safeguard.

Great idea — thanks! I'll update the code.

att_report.query(docket_case_id, rd_entry_id)
else:
att_report = AttachmentPage(pacer_court_id, s)
att_report.query(rd.pacer_doc_id)
att_report.query(rd.pacer_doc_id)
return att_report


Expand Down
42 changes: 25 additions & 17 deletions cl/recap/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import concurrent.futures
import hashlib
import json
import logging
from dataclasses import dataclass
from datetime import datetime
Expand Down Expand Up @@ -2128,12 +2129,6 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> list[int]:
self.request.chain = None
return []

if rd.is_acms_document():
msg = "ACMS attachment pages are not currently supported"
mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
self.request.chain = None
return []

session_data = get_pacer_cookie_from_cache(fq.user_id)
if not session_data:
msg = "Unable to find cached cookies. Aborting request."
Expand Down Expand Up @@ -2188,32 +2183,45 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> list[int]:
)
raise self.retry(exc=exc)

text = r.response.text
is_appellate = is_appellate_court(court_id)
# Determine the appropriate parser function based on court jurisdiction
# (appellate or district)
att_data_parser = (
get_data_from_appellate_att_report
if is_appellate
else get_data_from_att_report
)
att_data = att_data_parser(text, court_id)
is_acms_case = rd.is_acms_document()
if not is_acms_case:
text = r.response.text
# Determine the appropriate parser function based on court jurisdiction
# (appellate or district)
att_data_parser = (
get_data_from_appellate_att_report
if is_appellate
else get_data_from_att_report
)
att_data = att_data_parser(text, court_id)
else:
att_data = r.data
text = json.dumps(r.data, default=str)

if att_data == {}:
msg = "Not a valid attachment page upload"
mark_fq_status(fq, msg, PROCESSING_STATUS.INVALID_CONTENT)
self.request.chain = None
return []

if is_acms_case:
document_number = att_data["entry_number"]
elif is_appellate:
# Appellate attachments don't contain a document_number
document_number = None
else:
document_number = att_data["document_number"]

try:
async_to_sync(merge_attachment_page_data)(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be a good idea to update the PacerHtmlFiles storage for the attachment data within merge_attachment_page_data, so the attachment data is saved as JSON, similar to what you did for the docket data?

rd.docket_entry.docket.court,
pacer_case_id,
att_data["pacer_doc_id"],
# Appellate attachments don't contain a document_number
None if is_appellate else att_data["document_number"],
document_number,
text,
att_data["attachments"],
is_acms_attachment=is_acms_case,
)
except RECAPDocument.MultipleObjectsReturned:
msg = (
Expand Down
108 changes: 74 additions & 34 deletions cl/recap/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3490,6 +3490,30 @@ def setUp(self) -> None:
recap_document_id=self.rd_appellate.pk,
)

self.acms_court = CourtFactory(
id="ca9", jurisdiction=Court.FEDERAL_APPELLATE
)
self.acms_docket = DocketFactory(
source=Docket.RECAP,
court=self.acms_court,
pacer_case_id="5d8e355d-b229-4b16-b00f-7552d2f79d4f",
)
self.rd_acms = RECAPDocumentFactory(
docket_entry=DocketEntryWithParentsFactory(
docket=self.acms_docket, entry_number=9
),
document_number=9,
pacer_doc_id="4e108d6c-ad5b-f011-bec2-001dd80b194b",
is_available=False,
document_type=RECAPDocument.PACER_DOCUMENT,
)

self.fq_acms = PacerFetchQueue.objects.create(
user=User.objects.get(username="recap"),
request_type=REQUEST_TYPE.ATTACHMENT_PAGE,
recap_document_id=self.rd_acms.pk,
)

def test_fetch_attachment_page_no_pacer_doc_id(
self, mock_court_accessible
) -> None:
Expand All @@ -3503,40 +3527,6 @@ def test_fetch_attachment_page_no_pacer_doc_id(
self.fq.refresh_from_db()
self.assertEqual(self.fq.status, PROCESSING_STATUS.NEEDS_INFO)

@mock.patch(
"cl.recap.tasks.get_pacer_cookie_from_cache",
return_value=None,
)
def test_fetch_att_page_no_cookies(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to remove this test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry about that — I accidentally removed it while deleting the other test related to ACMS purchases

self, mock_get_cookies, mock_court_accessible
) -> None:
result = do_pacer_fetch(self.fq)
result.get()

self.fq.refresh_from_db()
self.assertEqual(self.fq.status, PROCESSING_STATUS.FAILED)
self.assertIn("Unable to find cached cookies", self.fq.message)

def test_fetch_acms_att_page(self, mock_court_accessible) -> None:
rd_acms = RECAPDocumentFactory(
docket_entry=DocketEntryWithParentsFactory(docket=DocketFactory()),
pacer_doc_id="784459c4-e2cd-ef11-b8e9-001dd804c0b4",
)
fq_acms = PacerFetchQueue.objects.create(
user=User.objects.get(username="recap"),
request_type=REQUEST_TYPE.ATTACHMENT_PAGE,
recap_document_id=rd_acms.pk,
)
result = do_pacer_fetch(fq_acms)
result.get()

fq_acms.refresh_from_db()
self.assertEqual(fq_acms.status, PROCESSING_STATUS.FAILED)
self.assertIn(
"ACMS attachment pages are not currently supported",
fq_acms.message,
)

@mock.patch(
"cl.recap.tasks.get_pacer_cookie_from_cache",
)
Expand Down Expand Up @@ -3630,6 +3620,56 @@ def test_fetch_att_page_from_appellate(
"Successfully completed fetch", self.fq_appellate.message
)

@mock.patch(
"cl.recap.tasks.get_pacer_cookie_from_cache",
)
@mock.patch(
"cl.corpus_importer.tasks.ACMSAttachmentPage",
new=fakes.FakeAcmsAttachmentPage,
)
@mock.patch(
"cl.corpus_importer.tasks.AppellateAttachmentPage",
)
@mock.patch(
"cl.corpus_importer.tasks.AttachmentPage",
)
@mock.patch(
"cl.corpus_importer.tasks.is_appellate_court", wraps=is_appellate_court
)
@mock.patch("cl.recap.tasks.is_appellate_court", wraps=is_appellate_court)
def test_fetch_att_page_from_acms(
self,
mock_court_check_task,
mock_court_check_parser,
mock_district_report_parser,
mock_appellate_report_parser,
mock_get_cookies,
mock_court_accessible,
):
# Trigger the fetch operation for an ACMS attachment page
result = do_pacer_fetch(self.fq_acms)
result.get()

self.fq_acms.refresh_from_db()

docket_entry = self.rd_acms.docket_entry
amcs_court_id = docket_entry.docket.court_id
# Verify court validation calls with expected court ID
mock_court_check_task.assert_called_with(amcs_court_id)
mock_court_check_parser.assert_called_with(amcs_court_id)

# Ensure that only the ACMS parser was used
mock_district_report_parser.assert_not_called()
mock_appellate_report_parser.assert_not_called()

# Assert successful fetch status and expected message
self.assertEqual(self.fq_acms.status, PROCESSING_STATUS.SUCCESSFUL)
self.assertIn("Successfully completed fetch", self.fq_acms.message)

# Verify that 3 RECAPDocument objects were created for the docket entry
docket_entry.refresh_from_db()
self.assertEqual(docket_entry.recap_documents.count(), 3)


class ProcessingQueueApiFilterTest(TestCase):
def setUp(self) -> None:
Expand Down
48 changes: 48 additions & 0 deletions cl/tests/fakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,54 @@ def data(self, *args, **kwargs):
}


class FakeAcmsAttachmentPage(FakeAppellateAttachmentPage):
@property
def data(self, *args, **kwargs):
return {
"pacer_doc_id": "4e108d6c-ad5b-f011-bec2-001dd80b194b",
"pacer_case_id": "5d8e355d-b229-4b16-b00f-7552d2f79d4f",
"entry_number": 9,
"description": "MOTION [Entered: 07/07/2025 08:41 PM]",
"date_filed": date(2025, 7, 8),
"date_end": date(2025, 7, 7),
"attachments": [
{
"attachment_number": 1,
"description": "Motion",
"page_count": 30,
"pacer_doc_id": "4e108d6c-ad5b-f011-bec2-001dd80b194b",
"acms_document_guid": "d1358903-ad5b-f011-a2da-001dd80b00cb",
"cost": 3.0,
"date_filed": date(2025, 7, 8),
"permission": "Public",
"file_size": 864.0,
},
{
"attachment_number": 2,
"description": "Declaration",
"page_count": 4,
"pacer_doc_id": "4e108d6c-ad5b-f011-bec2-001dd80b194b",
"acms_document_guid": "2f373c0f-ad5b-f011-a2da-001dd80b00cb",
"cost": 0.4,
"date_filed": date(2025, 7, 8),
"permission": "Public",
"file_size": 288.0,
},
{
"attachment_number": 3,
"description": "Declaration",
"page_count": 30,
"pacer_doc_id": "4e108d6c-ad5b-f011-bec2-001dd80b194b",
"acms_document_guid": "c6aae921-ad5b-f011-a2da-001dd80b00cb",
"cost": 3.0,
"date_filed": date(2025, 7, 8),
"permission": "Public",
"file_size": 11264.0,
},
],
}


class FakeFreeOpinionReport:
def __init__(self, *args, **kwargs):
pass
Expand Down