Skip to content

feat(recap): Adds support for ACMS attachment page purchases #5971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
22 changes: 18 additions & 4 deletions cl/corpus_importer/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from juriscraper.lib.exceptions import PacerLoginException, ParsingException
from juriscraper.lib.string_utils import CaseNameTweaker, harmonize
from juriscraper.pacer import (
ACMSAttachmentPage,
AppellateAttachmentPage,
AppellateDocketReport,
AttachmentPage,
Expand Down Expand Up @@ -1868,11 +1869,24 @@ def get_att_report_by_rd(
cookies=session_data.cookies, proxy=session_data.proxy_address
)
pacer_court_id = map_cl_to_pacer_id(rd.docket_entry.docket.court_id)
if is_appellate_court(pacer_court_id):
att_report = AppellateAttachmentPage(pacer_court_id, s)
is_appellate_case = is_appellate_court(pacer_court_id)
is_acms_document = rd.is_acms_document()

if is_acms_document:
report_class = ACMSAttachmentPage
elif is_appellate_case:
report_class = AppellateAttachmentPage
else:
report_class = AttachmentPage

att_report = report_class(pacer_court_id, s)

if is_acms_document:
docket_case_id = rd.docket_entry.docket.pacer_case_id
rd_entry_id = rd.pacer_doc_id
Comment on lines +1885 to +1886
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we start ingesting ACMS email notifications that don't include a pacer_case_id, we'll retrieve it using the AcmsCaseSearch API. However, the pacer_doc_id will be empty for documents merged from ACMS email notifications.

So, I think it would be a good idea to check these values before querying the report as a safeguard. If either is missing, we could raise an error and store it in the FQ, so users understand why the fetch can't complete.

It might be better to perform this check in fetch_attachment_page, since that’s where these values are initially retrieved.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I think it would be a good idea to check these values before querying the report as a safeguard.

Great idea — thanks! I'll update the code.

att_report.query(docket_case_id, rd_entry_id)
else:
att_report = AttachmentPage(pacer_court_id, s)
att_report.query(rd.pacer_doc_id)
att_report.query(rd.pacer_doc_id)
return att_report


Expand Down
7 changes: 6 additions & 1 deletion cl/recap/mergers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,8 +1813,13 @@ async def merge_attachment_page_data(
pacer_file = await sync_to_async(PacerHtmlFiles)(
content_object=de, upload_type=UPLOAD_TYPE.ATTACHMENT_PAGE
)
pacer_file_name = (
"attachment_page.json"
if is_acms_attachment
else "attachment_page.html"
)
await sync_to_async(pacer_file.filepath.save)(
"attachment_page.html", # Irrelevant b/c S3PrivateUUIDStorageTest
pacer_file_name, # Irrelevant b/c S3PrivateUUIDStorageTest
ContentFile(text.encode()),
)

Expand Down
42 changes: 28 additions & 14 deletions cl/recap/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import concurrent.futures
import hashlib
import json
import logging
from dataclasses import dataclass
from datetime import datetime
Expand Down Expand Up @@ -2119,9 +2120,10 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> list[int]:
self.request.chain = None
return []

if rd.is_acms_document():
msg = "ACMS attachment pages are not currently supported"
mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
is_acms_case = rd.is_acms_document()
if is_acms_case and not pacer_case_id:
msg = f"Unable to complete purchase: Missing case_id for RECAP Document object {rd.pk}."
mark_fq_status(fq, msg, PROCESSING_STATUS.NEEDS_INFO)
self.request.chain = None
return []

Expand Down Expand Up @@ -2179,32 +2181,44 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> list[int]:
)
raise self.retry(exc=exc)

text = r.response.text
is_appellate = is_appellate_court(court_id)
# Determine the appropriate parser function based on court jurisdiction
# (appellate or district)
att_data_parser = (
get_data_from_appellate_att_report
if is_appellate
else get_data_from_att_report
)
att_data = att_data_parser(text, court_id)
if not is_acms_case:
text = r.response.text
# Determine the appropriate parser function based on court jurisdiction
# (appellate or district)
att_data_parser = (
get_data_from_appellate_att_report
if is_appellate
else get_data_from_att_report
)
att_data = att_data_parser(text, court_id)
else:
att_data = r.data
text = json.dumps(r.data, default=str)

if att_data == {}:
msg = "Not a valid attachment page upload"
mark_fq_status(fq, msg, PROCESSING_STATUS.INVALID_CONTENT)
self.request.chain = None
return []

if is_acms_case:
document_number = att_data["entry_number"]
elif is_appellate:
# Appellate attachments don't contain a document_number
document_number = None
else:
document_number = att_data["document_number"]

try:
async_to_sync(merge_attachment_page_data)(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be a good idea to update the PacerHtmlFiles storage for the attachment data within merge_attachment_page_data, so the attachment data is saved as JSON, similar to what you did for the docket data?

rd.docket_entry.docket.court,
pacer_case_id,
att_data["pacer_doc_id"],
# Appellate attachments don't contain a document_number
None if is_appellate else att_data["document_number"],
document_number,
text,
att_data["attachments"],
is_acms_attachment=is_acms_case,
)
except RECAPDocument.MultipleObjectsReturned:
msg = (
Expand Down
94 changes: 74 additions & 20 deletions cl/recap/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3490,6 +3490,30 @@ def setUp(self) -> None:
recap_document_id=self.rd_appellate.pk,
)

self.acms_court = CourtFactory(
id="ca9", jurisdiction=Court.FEDERAL_APPELLATE
)
self.acms_docket = DocketFactory(
source=Docket.RECAP,
court=self.acms_court,
pacer_case_id="5d8e355d-b229-4b16-b00f-7552d2f79d4f",
)
self.rd_acms = RECAPDocumentFactory(
docket_entry=DocketEntryWithParentsFactory(
docket=self.acms_docket, entry_number=9
),
document_number=9,
pacer_doc_id="4e108d6c-ad5b-f011-bec2-001dd80b194b",
is_available=False,
document_type=RECAPDocument.PACER_DOCUMENT,
)

self.fq_acms = PacerFetchQueue.objects.create(
user=User.objects.get(username="recap"),
request_type=REQUEST_TYPE.ATTACHMENT_PAGE,
recap_document_id=self.rd_acms.pk,
)

def test_fetch_attachment_page_no_pacer_doc_id(
self, mock_court_accessible
) -> None:
Expand Down Expand Up @@ -3517,26 +3541,6 @@ def test_fetch_att_page_no_cookies(
self.assertEqual(self.fq.status, PROCESSING_STATUS.FAILED)
self.assertIn("Unable to find cached cookies", self.fq.message)

def test_fetch_acms_att_page(self, mock_court_accessible) -> None:
rd_acms = RECAPDocumentFactory(
docket_entry=DocketEntryWithParentsFactory(docket=DocketFactory()),
pacer_doc_id="784459c4-e2cd-ef11-b8e9-001dd804c0b4",
)
fq_acms = PacerFetchQueue.objects.create(
user=User.objects.get(username="recap"),
request_type=REQUEST_TYPE.ATTACHMENT_PAGE,
recap_document_id=rd_acms.pk,
)
result = do_pacer_fetch(fq_acms)
result.get()

fq_acms.refresh_from_db()
self.assertEqual(fq_acms.status, PROCESSING_STATUS.FAILED)
self.assertIn(
"ACMS attachment pages are not currently supported",
fq_acms.message,
)

@mock.patch(
"cl.recap.tasks.get_pacer_cookie_from_cache",
)
Expand Down Expand Up @@ -3630,6 +3634,56 @@ def test_fetch_att_page_from_appellate(
"Successfully completed fetch", self.fq_appellate.message
)

@mock.patch(
"cl.recap.tasks.get_pacer_cookie_from_cache",
)
@mock.patch(
"cl.corpus_importer.tasks.ACMSAttachmentPage",
new=fakes.FakeAcmsAttachmentPage,
)
@mock.patch(
"cl.corpus_importer.tasks.AppellateAttachmentPage",
)
@mock.patch(
"cl.corpus_importer.tasks.AttachmentPage",
)
@mock.patch(
"cl.corpus_importer.tasks.is_appellate_court", wraps=is_appellate_court
)
@mock.patch("cl.recap.tasks.is_appellate_court", wraps=is_appellate_court)
def test_fetch_att_page_from_acms(
self,
mock_court_check_task,
mock_court_check_parser,
mock_district_report_parser,
mock_appellate_report_parser,
mock_get_cookies,
mock_court_accessible,
):
# Trigger the fetch operation for an ACMS attachment page
result = do_pacer_fetch(self.fq_acms)
result.get()

self.fq_acms.refresh_from_db()

docket_entry = self.rd_acms.docket_entry
amcs_court_id = docket_entry.docket.court_id
# Verify court validation calls with expected court ID
mock_court_check_task.assert_called_with(amcs_court_id)
mock_court_check_parser.assert_called_with(amcs_court_id)

# Ensure that only the ACMS parser was used
mock_district_report_parser.assert_not_called()
mock_appellate_report_parser.assert_not_called()

# Assert successful fetch status and expected message
self.assertEqual(self.fq_acms.status, PROCESSING_STATUS.SUCCESSFUL)
self.assertIn("Successfully completed fetch", self.fq_acms.message)

# Verify that 3 RECAPDocument objects were created for the docket entry
docket_entry.refresh_from_db()
self.assertEqual(docket_entry.recap_documents.count(), 3)


class ProcessingQueueApiFilterTest(TestCase):
def setUp(self) -> None:
Expand Down
48 changes: 48 additions & 0 deletions cl/tests/fakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,54 @@ def data(self, *args, **kwargs):
}


class FakeAcmsAttachmentPage(FakeAppellateAttachmentPage):
@property
def data(self, *args, **kwargs):
return {
"pacer_doc_id": "4e108d6c-ad5b-f011-bec2-001dd80b194b",
"pacer_case_id": "5d8e355d-b229-4b16-b00f-7552d2f79d4f",
"entry_number": 9,
"description": "MOTION [Entered: 07/07/2025 08:41 PM]",
"date_filed": date(2025, 7, 8),
"date_end": date(2025, 7, 7),
"attachments": [
{
"attachment_number": 1,
"description": "Motion",
"page_count": 30,
"pacer_doc_id": "4e108d6c-ad5b-f011-bec2-001dd80b194b",
"acms_document_guid": "d1358903-ad5b-f011-a2da-001dd80b00cb",
"cost": 3.0,
"date_filed": date(2025, 7, 8),
"permission": "Public",
"file_size": 864.0,
},
{
"attachment_number": 2,
"description": "Declaration",
"page_count": 4,
"pacer_doc_id": "4e108d6c-ad5b-f011-bec2-001dd80b194b",
"acms_document_guid": "2f373c0f-ad5b-f011-a2da-001dd80b00cb",
"cost": 0.4,
"date_filed": date(2025, 7, 8),
"permission": "Public",
"file_size": 288.0,
},
{
"attachment_number": 3,
"description": "Declaration",
"page_count": 30,
"pacer_doc_id": "4e108d6c-ad5b-f011-bec2-001dd80b194b",
"acms_document_guid": "c6aae921-ad5b-f011-a2da-001dd80b00cb",
"cost": 3.0,
"date_filed": date(2025, 7, 8),
"permission": "Public",
"file_size": 11264.0,
},
],
}


class FakeFreeOpinionReport:
def __init__(self, *args, **kwargs):
pass
Expand Down