-
-
Notifications
You must be signed in to change notification settings - Fork 182
feat(recap): Adds support for ACMS attachment page purchases #5971
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
57c6a56
d5b197f
f92e20b
8f2912a
924eccc
0322c73
3a7eef8
9e94ffd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import asyncio | ||
import concurrent.futures | ||
import hashlib | ||
import json | ||
import logging | ||
from dataclasses import dataclass | ||
from datetime import datetime | ||
|
@@ -2128,12 +2129,6 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> list[int]: | |
self.request.chain = None | ||
return [] | ||
|
||
if rd.is_acms_document(): | ||
msg = "ACMS attachment pages are not currently supported" | ||
mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED) | ||
self.request.chain = None | ||
return [] | ||
|
||
session_data = get_pacer_cookie_from_cache(fq.user_id) | ||
if not session_data: | ||
msg = "Unable to find cached cookies. Aborting request." | ||
|
@@ -2188,32 +2183,45 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> list[int]: | |
) | ||
raise self.retry(exc=exc) | ||
|
||
text = r.response.text | ||
is_appellate = is_appellate_court(court_id) | ||
# Determine the appropriate parser function based on court jurisdiction | ||
# (appellate or district) | ||
att_data_parser = ( | ||
get_data_from_appellate_att_report | ||
if is_appellate | ||
else get_data_from_att_report | ||
) | ||
att_data = att_data_parser(text, court_id) | ||
is_acms_case = rd.is_acms_document() | ||
if not is_acms_case: | ||
text = r.response.text | ||
# Determine the appropriate parser function based on court jurisdiction | ||
# (appellate or district) | ||
att_data_parser = ( | ||
get_data_from_appellate_att_report | ||
if is_appellate | ||
else get_data_from_att_report | ||
) | ||
att_data = att_data_parser(text, court_id) | ||
else: | ||
att_data = r.data | ||
text = json.dumps(r.data, default=str) | ||
|
||
if att_data == {}: | ||
msg = "Not a valid attachment page upload" | ||
mark_fq_status(fq, msg, PROCESSING_STATUS.INVALID_CONTENT) | ||
self.request.chain = None | ||
return [] | ||
|
||
if is_acms_case: | ||
document_number = att_data["entry_number"] | ||
elif is_appellate: | ||
# Appellate attachments don't contain a document_number | ||
document_number = None | ||
else: | ||
document_number = att_data["document_number"] | ||
|
||
try: | ||
async_to_sync(merge_attachment_page_data)( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be a good idea to update the |
||
rd.docket_entry.docket.court, | ||
pacer_case_id, | ||
att_data["pacer_doc_id"], | ||
# Appellate attachments don't contain a document_number | ||
None if is_appellate else att_data["document_number"], | ||
document_number, | ||
text, | ||
att_data["attachments"], | ||
is_acms_attachment=is_acms_case, | ||
) | ||
except RECAPDocument.MultipleObjectsReturned: | ||
msg = ( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3490,6 +3490,30 @@ def setUp(self) -> None: | |
recap_document_id=self.rd_appellate.pk, | ||
) | ||
|
||
self.acms_court = CourtFactory( | ||
id="ca9", jurisdiction=Court.FEDERAL_APPELLATE | ||
) | ||
self.acms_docket = DocketFactory( | ||
source=Docket.RECAP, | ||
court=self.acms_court, | ||
pacer_case_id="5d8e355d-b229-4b16-b00f-7552d2f79d4f", | ||
) | ||
self.rd_acms = RECAPDocumentFactory( | ||
docket_entry=DocketEntryWithParentsFactory( | ||
docket=self.acms_docket, entry_number=9 | ||
), | ||
document_number=9, | ||
pacer_doc_id="4e108d6c-ad5b-f011-bec2-001dd80b194b", | ||
is_available=False, | ||
document_type=RECAPDocument.PACER_DOCUMENT, | ||
) | ||
|
||
self.fq_acms = PacerFetchQueue.objects.create( | ||
user=User.objects.get(username="recap"), | ||
request_type=REQUEST_TYPE.ATTACHMENT_PAGE, | ||
recap_document_id=self.rd_acms.pk, | ||
) | ||
|
||
def test_fetch_attachment_page_no_pacer_doc_id( | ||
self, mock_court_accessible | ||
) -> None: | ||
|
@@ -3503,40 +3527,6 @@ def test_fetch_attachment_page_no_pacer_doc_id( | |
self.fq.refresh_from_db() | ||
self.assertEqual(self.fq.status, PROCESSING_STATUS.NEEDS_INFO) | ||
|
||
@mock.patch( | ||
"cl.recap.tasks.get_pacer_cookie_from_cache", | ||
return_value=None, | ||
) | ||
def test_fetch_att_page_no_cookies( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason to remove this test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry about that — I accidentally removed it while deleting the other test related to ACMS purchases |
||
self, mock_get_cookies, mock_court_accessible | ||
) -> None: | ||
result = do_pacer_fetch(self.fq) | ||
result.get() | ||
|
||
self.fq.refresh_from_db() | ||
self.assertEqual(self.fq.status, PROCESSING_STATUS.FAILED) | ||
self.assertIn("Unable to find cached cookies", self.fq.message) | ||
|
||
def test_fetch_acms_att_page(self, mock_court_accessible) -> None: | ||
rd_acms = RECAPDocumentFactory( | ||
docket_entry=DocketEntryWithParentsFactory(docket=DocketFactory()), | ||
pacer_doc_id="784459c4-e2cd-ef11-b8e9-001dd804c0b4", | ||
) | ||
fq_acms = PacerFetchQueue.objects.create( | ||
user=User.objects.get(username="recap"), | ||
request_type=REQUEST_TYPE.ATTACHMENT_PAGE, | ||
recap_document_id=rd_acms.pk, | ||
) | ||
result = do_pacer_fetch(fq_acms) | ||
result.get() | ||
|
||
fq_acms.refresh_from_db() | ||
self.assertEqual(fq_acms.status, PROCESSING_STATUS.FAILED) | ||
self.assertIn( | ||
"ACMS attachment pages are not currently supported", | ||
fq_acms.message, | ||
) | ||
|
||
@mock.patch( | ||
"cl.recap.tasks.get_pacer_cookie_from_cache", | ||
) | ||
|
@@ -3630,6 +3620,56 @@ def test_fetch_att_page_from_appellate( | |
"Successfully completed fetch", self.fq_appellate.message | ||
) | ||
|
||
@mock.patch( | ||
"cl.recap.tasks.get_pacer_cookie_from_cache", | ||
) | ||
@mock.patch( | ||
"cl.corpus_importer.tasks.ACMSAttachmentPage", | ||
new=fakes.FakeAcmsAttachmentPage, | ||
) | ||
@mock.patch( | ||
"cl.corpus_importer.tasks.AppellateAttachmentPage", | ||
) | ||
@mock.patch( | ||
"cl.corpus_importer.tasks.AttachmentPage", | ||
) | ||
@mock.patch( | ||
"cl.corpus_importer.tasks.is_appellate_court", wraps=is_appellate_court | ||
) | ||
@mock.patch("cl.recap.tasks.is_appellate_court", wraps=is_appellate_court) | ||
def test_fetch_att_page_from_acms( | ||
self, | ||
mock_court_check_task, | ||
mock_court_check_parser, | ||
mock_district_report_parser, | ||
mock_appellate_report_parser, | ||
mock_get_cookies, | ||
mock_court_accessible, | ||
): | ||
# Trigger the fetch operation for an ACMS attachment page | ||
result = do_pacer_fetch(self.fq_acms) | ||
result.get() | ||
|
||
self.fq_acms.refresh_from_db() | ||
|
||
docket_entry = self.rd_acms.docket_entry | ||
amcs_court_id = docket_entry.docket.court_id | ||
# Verify court validation calls with expected court ID | ||
mock_court_check_task.assert_called_with(amcs_court_id) | ||
mock_court_check_parser.assert_called_with(amcs_court_id) | ||
|
||
# Ensure that only the ACMS parser was used | ||
mock_district_report_parser.assert_not_called() | ||
mock_appellate_report_parser.assert_not_called() | ||
|
||
# Assert successful fetch status and expected message | ||
self.assertEqual(self.fq_acms.status, PROCESSING_STATUS.SUCCESSFUL) | ||
self.assertIn("Successfully completed fetch", self.fq_acms.message) | ||
|
||
# Verify that 3 RECAPDocument objects were created for the docket entry | ||
docket_entry.refresh_from_db() | ||
self.assertEqual(docket_entry.recap_documents.count(), 3) | ||
|
||
|
||
class ProcessingQueueApiFilterTest(TestCase): | ||
def setUp(self) -> None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When we start ingesting ACMS email notifications that don't include a
pacer_case_id
, we'll retrieve it using theAcmsCaseSearch
API. However, thepacer_doc_id
will be empty for documents merged from ACMS email notifications.So, I think it would be a good idea to check these values before querying the report as a safeguard. If either is missing, we could raise an error and store it in the FQ, so users understand why the fetch can't complete.
It might be better to perform this check in
fetch_attachment_page
, since that’s where these values are initially retrieved.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great idea — thanks! I'll update the code.