Skip to content

Commit aa49b12

Browse files
asmacdojjnesbitt
authored andcommitted
enh: Add Dandiset DOIs
- Dandiset DOI will redirect to the DLP - Example: 10.80507/dandi.000004 - Dandiset DOI is stored in the doi field of the draft version - Dandiset DOI metadata (on Datacite) will match the draft version until first publication - Once a Dandiset is published, the Dandiset DOI metadata will match the latest publication See the design document for more details: #2012
1 parent f222f03 commit aa49b12

File tree

14 files changed

+1090
-113
lines changed

14 files changed

+1090
-113
lines changed

dandiapi/api/datacite.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
"""
2+
DataCite API client implementation.
3+
4+
This module provides the implementation details for interacting with the DataCite API.
5+
The public interface is exposed through doi.py.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import copy
11+
import logging
12+
from typing import TYPE_CHECKING
13+
14+
from django.conf import settings
15+
import requests
16+
17+
if TYPE_CHECKING:
18+
from dandiapi.api.models import Version
19+
20+
# All of the required DOI configuration settings
21+
# Cannot be in doi.py to avoid circular imports
22+
DANDI_DOI_SETTINGS = [
23+
(settings.DANDI_DOI_API_URL, 'DANDI_DOI_API_URL'),
24+
(settings.DANDI_DOI_API_USER, 'DANDI_DOI_API_USER'),
25+
(settings.DANDI_DOI_API_PASSWORD, 'DANDI_DOI_API_PASSWORD'),
26+
(settings.DANDI_DOI_API_PREFIX, 'DANDI_DOI_API_PREFIX'),
27+
]
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
class DataCiteClient:
33+
"""Client for interacting with the DataCite API."""
34+
35+
def __init__(self):
36+
self.api_url = settings.DANDI_DOI_API_URL
37+
self.api_user = settings.DANDI_DOI_API_USER
38+
self.api_password = settings.DANDI_DOI_API_PASSWORD
39+
self.api_prefix = settings.DANDI_DOI_API_PREFIX
40+
self.auth = requests.auth.HTTPBasicAuth(self.api_user, self.api_password)
41+
self.headers = {'Accept': 'application/vnd.api+json'}
42+
self.timeout = 30
43+
44+
def is_configured(self) -> bool:
45+
"""Check if the DOI client is properly configured."""
46+
return all(setting is not None for setting, _ in DANDI_DOI_SETTINGS)
47+
48+
def format_doi(self, dandiset_id: str, version_id: str | None = None) -> str:
49+
"""
50+
Format a DOI string for a dandiset or version.
51+
52+
Args:
53+
dandiset_id: The dandiset identifier.
54+
version_id: Optional version identifier. If provided, creates a Version DOI.
55+
If omitted, creates a Dandiset DOI.
56+
57+
Returns:
58+
Formatted DOI string.
59+
"""
60+
if version_id:
61+
# TODO(asmaco) replace "dandi" with non-hardcoded ID_PATTERN
62+
# https://github.com/dandi/dandi-schema/pull/294/files#diff-43c9cc813638d87fd33e527a7baccb2fd7dff85595a7e686bfaf61f0409bd403R47
63+
return f'{self.api_prefix}/dandi.{dandiset_id}/{version_id}'
64+
return f'{self.api_prefix}/dandi.{dandiset_id}'
65+
66+
def generate_doi_data(
67+
self, version: Version, version_doi: bool = True, event: str | None = None
68+
) -> tuple[str, dict]:
69+
"""
70+
Generate DOI data for a version or dandiset.
71+
72+
Args:
73+
version: Version object containing metadata.
74+
version_doi: If True, generate a Version DOI, otherwise generate a Dandiset DOI.
75+
event: The DOI event type.
76+
- None: Creates a Draft DOI.
77+
- "publish": Creates or promotes to a Findable DOI.
78+
- "hide": Converts to a Registered DOI.
79+
80+
Returns:
81+
Tuple of (doi_string, datacite_payload)
82+
"""
83+
# TODO(asmacdo) if not datacite configured make sure we dont save any dois to model
84+
from dandischema.datacite import to_datacite
85+
dandiset_id = version.dandiset.identifier
86+
version_id = version.version
87+
metadata = copy.deepcopy(version.metadata)
88+
89+
# Generate the appropriate DOI string
90+
if version_doi:
91+
doi = self.format_doi(dandiset_id, version_id)
92+
else:
93+
doi = self.format_doi(dandiset_id)
94+
# Dandiset DOI is the same as version url without version
95+
metadata['url'] = metadata['url'].rsplit('/', 1)[0]
96+
97+
metadata['doi'] = doi
98+
99+
# Generate the datacite payload with the appropriate event
100+
datacite_payload = to_datacite(metadata, event=event)
101+
102+
return (doi, datacite_payload)
103+
104+
def create_or_update_doi(self, original_datacite_payload: dict) -> str | None:
105+
"""
106+
Create or update a DOI with the DataCite API.
107+
108+
Args:
109+
datacite_payload: The DOI payload to send to DataCite.
110+
111+
Returns:
112+
The DOI string on success, None on failure when not configured.
113+
114+
Raises:
115+
requests.exceptions.HTTPError: If the API request fails.
116+
"""
117+
datacite_payload = copy.deepcopy(original_datacite_payload)
118+
doi = datacite_payload['data']['attributes']['doi']
119+
120+
if not self.is_configured():
121+
logger.warning('DOI API not configured. Skipping operations for %s', doi)
122+
return None
123+
124+
# Check if we're trying to create a non-draft DOI when it's not allowed
125+
event = datacite_payload['data']['attributes'].get('event')
126+
if not settings.DANDI_DOI_PUBLISH and event in ['publish', 'hide']:
127+
# Remove the event to make it a draft DOI
128+
if 'event' in datacite_payload['data']['attributes']:
129+
del datacite_payload['data']['attributes']['event']
130+
131+
logger.warning(
132+
'DANDI_DOI_PUBLISH is not enabled. DOI %s will be created as draft.', doi
133+
)
134+
135+
try:
136+
response = requests.post(
137+
self.api_url,
138+
json=datacite_payload,
139+
auth=self.auth,
140+
headers=self.headers,
141+
timeout=self.timeout,
142+
)
143+
response.raise_for_status()
144+
# Return early on success
145+
return doi
146+
except requests.exceptions.HTTPError as e:
147+
# HTTP 422 status code means DOI already exists
148+
already_exists_code = 422
149+
if e.response is not None and e.response.status_code == already_exists_code:
150+
# Retry with PUT if DOI already exists
151+
update_url = f'{self.api_url}/{doi}'
152+
try:
153+
update_response = requests.put(
154+
update_url,
155+
json=datacite_payload,
156+
auth=self.auth,
157+
headers=self.headers,
158+
timeout=self.timeout,
159+
)
160+
update_response.raise_for_status()
161+
# Success with update
162+
return doi
163+
except Exception:
164+
error_details = f'Failed to update existing DOI {doi}'
165+
if e.response and hasattr(e.response, 'text'):
166+
error_details += f'\nResponse: {e.response.text}'
167+
error_details += f'\nPayload: {datacite_payload}'
168+
logger.exception(error_details)
169+
raise
170+
else:
171+
error_details = f'Failed to create DOI {doi}'
172+
if e.response and hasattr(e.response, 'text'):
173+
error_details += f'\nResponse: {e.response.text}'
174+
error_details += f'\nPayload: {datacite_payload}'
175+
logger.exception(error_details)
176+
raise
177+
178+
def delete_or_hide_doi(self, doi: str) -> None:
179+
"""
180+
Delete a draft DOI or hide a findable DOI depending on its state.
181+
182+
This method first checks the DOI's state and then either deletes it (if it's a draft)
183+
or hides it (if it's findable). Hiding a DOI requires DANDI_DOI_PUBLISH to be enabled.
184+
185+
Args:
186+
doi: The DOI to delete or hide.
187+
188+
Raises:
189+
requests.exceptions.HTTPError: If the API request fails.
190+
"""
191+
if not self.is_configured():
192+
logger.warning('DOI API not configured. Skipping operations for %s', doi)
193+
return
194+
195+
doi_url = f'{self.api_url}/{doi}'
196+
197+
try:
198+
# First, get DOI information to check its state
199+
response = requests.get(
200+
doi_url, auth=self.auth, headers=self.headers, timeout=self.timeout
201+
)
202+
response.raise_for_status()
203+
204+
doi_data = response.json()
205+
# Get the state, defaulting to 'draft' if absent
206+
doi_state = doi_data.get('data', {}).get('attributes', {}).get('state', 'draft')
207+
208+
if doi_state == 'draft':
209+
# Draft DOIs can be deleted
210+
delete_response = requests.delete(
211+
doi_url, auth=self.auth, headers=self.headers, timeout=self.timeout
212+
)
213+
delete_response.raise_for_status()
214+
logger.info('Successfully deleted draft DOI: %s', doi)
215+
else:
216+
# Findable DOIs must be hidden
217+
# Check if DANDI_DOI_PUBLISH is enabled for hiding
218+
if not settings.DANDI_DOI_PUBLISH:
219+
logger.warning(
220+
'DANDI_DOI_PUBLISH is not enabled. DOI %s will remain findable.', doi
221+
)
222+
return
223+
224+
# Create hide payload
225+
hide_payload = {
226+
'data': {'id': doi, 'type': 'dois', 'attributes': {'event': 'hide'}}
227+
}
228+
229+
hide_response = requests.put(
230+
doi_url,
231+
json=hide_payload,
232+
auth=self.auth,
233+
headers=self.headers,
234+
timeout=self.timeout,
235+
)
236+
hide_response.raise_for_status()
237+
logger.info('Successfully hid findable DOI: %s', doi)
238+
239+
except requests.exceptions.HTTPError as e:
240+
if e.response and e.response.status_code == requests.codes.not_found:
241+
logger.warning('Tried to get data for nonexistent DOI %s', doi)
242+
return
243+
logger.exception('Failed to delete or hide DOI %s', doi)
244+
raise
245+
246+

dandiapi/api/doi.py

Lines changed: 62 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,74 @@
1+
"""
2+
DOI management interface for the DANDI Archive.
3+
4+
This module provides the public interface for DOI operations,
5+
while the implementation details are in datacite.py.
6+
"""
7+
18
from __future__ import annotations
29

310
import logging
411
from typing import TYPE_CHECKING
512

6-
from django.conf import settings
7-
import requests
13+
from dandiapi.api.datacite import DataCiteClient
814

915
if TYPE_CHECKING:
1016
from dandiapi.api.models import Version
1117

12-
# All of the required DOI configuration settings
13-
DANDI_DOI_SETTINGS = [
14-
(settings.DANDI_DOI_API_URL, 'DANDI_DOI_API_URL'),
15-
(settings.DANDI_DOI_API_USER, 'DANDI_DOI_API_USER'),
16-
(settings.DANDI_DOI_API_PASSWORD, 'DANDI_DOI_API_PASSWORD'),
17-
(settings.DANDI_DOI_API_PREFIX, 'DANDI_DOI_API_PREFIX'),
18-
]
19-
2018
logger = logging.getLogger(__name__)
2119

2220

23-
def doi_configured() -> bool:
24-
return any(setting is not None for setting, _ in DANDI_DOI_SETTINGS)
25-
26-
27-
def _generate_doi_data(version: Version):
28-
from dandischema.datacite import to_datacite
29-
30-
publish = settings.DANDI_DOI_PUBLISH
31-
# Use the DANDI test datacite instance as a placeholder if PREFIX isn't set
32-
prefix = settings.DANDI_DOI_API_PREFIX or '10.80507'
33-
dandiset_id = version.dandiset.identifier
34-
version_id = version.version
35-
doi = f'{prefix}/dandi.{dandiset_id}/{version_id}'
36-
metadata = version.metadata
37-
metadata['doi'] = doi
38-
return (doi, to_datacite(metadata, publish=publish))
39-
40-
41-
def create_doi(version: Version) -> str:
42-
doi, request_body = _generate_doi_data(version)
43-
# If DOI isn't configured, skip the API call
44-
if doi_configured():
45-
try:
46-
requests.post(
47-
settings.DANDI_DOI_API_URL,
48-
json=request_body,
49-
auth=requests.auth.HTTPBasicAuth(
50-
settings.DANDI_DOI_API_USER,
51-
settings.DANDI_DOI_API_PASSWORD,
52-
),
53-
timeout=30,
54-
).raise_for_status()
55-
except requests.exceptions.HTTPError as e:
56-
logger.exception('Failed to create DOI %s', doi)
57-
logger.exception(request_body)
58-
if e.response:
59-
logger.exception(e.response.text)
60-
raise
61-
return doi
62-
63-
64-
def delete_doi(doi: str) -> None:
65-
# If DOI isn't configured, skip the API call
66-
if doi_configured():
67-
doi_url = settings.DANDI_DOI_API_URL.rstrip('/') + '/' + doi
68-
with requests.Session() as s:
69-
s.auth = (settings.DANDI_DOI_API_USER, settings.DANDI_DOI_API_PASSWORD)
70-
try:
71-
r = s.get(doi_url, headers={'Accept': 'application/vnd.api+json'})
72-
r.raise_for_status()
73-
except requests.exceptions.HTTPError as e:
74-
if e.response and e.response.status_code == requests.codes.not_found:
75-
logger.warning('Tried to get data for nonexistent DOI %s', doi)
76-
return
77-
logger.exception('Failed to fetch data for DOI %s', doi)
78-
raise
79-
if r.json()['data']['attributes']['state'] == 'draft':
80-
try:
81-
s.delete(doi_url).raise_for_status()
82-
except requests.exceptions.HTTPError:
83-
logger.exception('Failed to delete DOI %s', doi)
84-
raise
85-
else:
86-
logger.debug('Skipping DOI deletion for %s since not configured', doi)
21+
# Singleton instance
22+
datacite_client = DataCiteClient()
23+
24+
25+
def generate_doi_data(
26+
version: Version, version_doi: bool = True, event: str | None = None
27+
) -> tuple[str, dict]:
28+
"""
29+
Generate DOI data for a version or dandiset.
30+
31+
Args:
32+
version: Version object containing metadata.
33+
version_doi: If True, generate a Version DOI, otherwise generate a Dandiset DOI.
34+
event: The DOI event type.
35+
- None: Creates a Draft DOI.
36+
- "publish": Creates or promotes to a Findable DOI.
37+
- "hide": Converts to a Registered DOI.
38+
39+
Returns:
40+
Tuple of (doi_string, datacite_payload)
41+
"""
42+
return datacite_client.generate_doi_data(version, version_doi, event)
43+
44+
45+
def create_or_update_doi(datacite_payload: dict) -> str | None:
46+
"""
47+
Create or update a DOI with the DataCite API.
48+
49+
Args:
50+
datacite_payload: The DOI payload to send to DataCite.
51+
52+
Returns:
53+
The DOI string on success, None on failure when not configured.
54+
55+
Raises:
56+
requests.exceptions.HTTPError: If the API request fails.
57+
"""
58+
return datacite_client.create_or_update_doi(datacite_payload)
59+
60+
61+
def delete_or_hide_doi(doi: str) -> None:
62+
"""
63+
Delete a draft DOI or hide a findable DOI depending on its state.
64+
65+
This method first checks the DOI's state and then either deletes it (if it's a draft)
66+
or hides it (if it's findable). Hiding a DOI requires DANDI_DOI_PUBLISH to be enabled.
67+
68+
Args:
69+
doi: The DOI to delete or hide.
70+
71+
Raises:
72+
requests.exceptions.HTTPError: If the API request fails.
73+
"""
74+
datacite_client.delete_or_hide_doi(doi)

0 commit comments

Comments
 (0)