diff --git a/bioimageio_collection_backoffice/backup.py b/bioimageio_collection_backoffice/backup.py index 6bc0e87d..02bf0e3c 100644 --- a/bioimageio_collection_backoffice/backup.py +++ b/bioimageio_collection_backoffice/backup.py @@ -2,8 +2,8 @@ from datetime import datetime from io import BytesIO from pathlib import PurePosixPath -from typing import Any, Dict, List -from urllib.parse import quote_plus +from typing import Dict, List +from urllib.parse import quote_plus, urlparse import markdown import requests @@ -17,11 +17,11 @@ from bioimageio.spec.utils import download from loguru import logger + from ._settings import settings from .remote_collection import Record, RemoteCollection -from .requests_utils import put_file, raise_for_status_discretely from .s3_client import Client - +import bioimageio_collection_backoffice.zenodo as zd class SkipForNow(NotImplementedError): pass @@ -30,6 +30,11 @@ class SkipForNow(NotImplementedError): def backup(client: Client): """backup all published resources to their own zenodo records""" remote_collection = RemoteCollection(client=client) + zenodo_client = zd.Client( + session=requests.Session(), + access_token=settings.zenodo_api_access_token.get_secret_value(), + api_hostname=urlparse(settings.zenodo_url).netloc, + ) backed_up: List[str] = [] error = None @@ -38,7 +43,7 @@ def backup(client: Client): continue try: - backup_published_version(v) + backup_published_version(v, zenodo_client=zenodo_client) except SkipForNow as e: logger.warning("{}\n{}", e, traceback.format_exc()) except Exception as e: @@ -53,7 +58,7 @@ def backup(client: Client): def backup_published_version( - v: Record, + v: Record, zenodo_client: zd.Client ): with ValidationContext(perform_io_checks=False): rdf = load_description(v.rdf_url) @@ -79,34 +84,12 @@ def backup_published_version( if rdf.license is None: raise ValueError(f"Missing license for {v.id}") - headers = {"Content-Type": "application/json"} - access_token = settings.zenodo_api_access_token.get_secret_value() - assert len(access_token) > 1, "missing zenodo api access token" - params = {"access_token": access_token} - if v.concept.doi is None: - # Create empty deposition - r_create = requests.post( - f"{settings.zenodo_url}/api/deposit/depositions", - params=params, - json={}, - headers=headers, - ) + deposition_info = zenodo_client.create_new_concept() else: - concept_id = v.concept.doi.split("/zenodo.")[1] + concept_doi = zd.ZenodoDoi[zd.ConceptId].model_validate(v.concept.doi) # create a new deposition version with different deposition_id from the existing deposition - r_create = requests.post( - settings.zenodo_url - + "/api/deposit/depositions/" - + concept_id - + "/actions/newversion", - params=params, - ) - - raise_for_status_discretely(r_create) - deposition_info = r_create.json() - - bucket_url = deposition_info["links"]["bucket"] + deposition_info: zd.Record = zenodo_client.create_new_concept_version(concept_id=concept_doi.id) # # use the new version's deposit link # newversion_draft_url = deposition_info["links"]['latest_draft'] @@ -119,16 +102,7 @@ def backup_published_version( file_data = v.client.load_file(file_path) assert file_data is not None filename = PurePosixPath(file_path).name - put_file(BytesIO(file_data), f"{bucket_url}/{filename}", params) - - # Report deposition URL - deposition_id = str(deposition_info["id"]) - concept_id = str(deposition_info["conceptrecid"]) - doi = deposition_info["metadata"]["prereserve_doi"]["doi"] - assert isinstance(doi, str) - concept_doi = doi.replace(deposition_id, concept_id) - - # base_url = f"{settings.zenodo_url}/record/{concept_id}/files/" + zenodo_client.add_file_to_record(record=deposition_info, file_name=filename, data=BytesIO(file_data)) metadata = rdf_to_zenodo_metadata( rdf, @@ -136,40 +110,17 @@ def backup_published_version( publication_date=v.info.created, ) - put_url = f"{settings.zenodo_url}/api/deposit/depositions/{deposition_id}" - logger.debug("PUT {} with metadata: {}", put_url, metadata) - r_metadata = requests.put( - put_url, - params=params, - json={"metadata": metadata}, - headers=headers, - ) - raise_for_status_discretely(r_metadata) + zenodo_client.add_metadata_to_record(record_id=deposition_info.id, metadata=metadata) - publish_url = ( - f"{settings.zenodo_url}/api/deposit/depositions/{deposition_id}/actions/publish" - ) - logger.debug("POST {}", publish_url) - r_publish = requests.post( - publish_url, - params=params, + published_record = zenodo_client.publish(record_id=deposition_info.id) + if published_record.doi is None: + raise TypeError("Expected published record to have a DOI, found None") + if published_record.conceptdoi is None: + raise TypeError("Expected published record to have a concept DOI, found None") + v.set_dois( + doi=published_record.doi.as_str(), + concept_doi=published_record.conceptdoi.as_str(), ) - raise_for_status_discretely(r_publish) - v.set_dois(doi=doi, concept_doi=concept_doi) - - -def rdf_authors_to_metadata_creators(rdf: ResourceDescr): - creators: List[Dict[str, str]] = [] - for author in rdf.authors: - creator = {"name": str(author.name)} - if author.affiliation: - creator["affiliation"] = author.affiliation - - if author.orcid: - creator["orcid"] = str(author.orcid) - - creators.append(creator) - return creators def rdf_to_zenodo_metadata( @@ -178,9 +129,30 @@ def rdf_to_zenodo_metadata( additional_note: str = "\n(Uploaded via https://bioimage.io)", publication_date: datetime, rdf_file_name: str, -) -> Dict[str, Any]: +) -> "zd.OpenAccessSoftwareMetadataArgs | zd.OpenAccessDatasetMetadataArgs": + if rdf.license is None: + raise ValueError(f"Missing license for {rdf.id}") + license = str(rdf.license).lower() + if not zd.is_zenodo_license(license): + message = f"License '{rdf.license}' not known to Zenodo." + logger.error( + ( + message + + " Please add manually as custom license" + + " (as this is currently not supported to do via REST API)" + ) + ) + raise ValueError(message) + + creators = [ + zd.RecordCreator( + name=str(author.name), + affiliation=None if author.affiliation is None else str(author.affiliation), + orcid = None if author.orcid is None else str(author.orcid), + ) + for author in rdf.authors + ] - creators = rdf_authors_to_metadata_creators(rdf) docstring = "" if rdf.documentation is not None: docstring = download(rdf.documentation).path.read_text() @@ -190,42 +162,33 @@ def rdf_to_zenodo_metadata( description = markdown.markdown(description_md) logger.debug("html description:\n{}", description_md) keywords = ["backup.bioimage.io", "bioimage.io", "bioimage.io:" + rdf.type] - # related_identifiers = generate_related_identifiers_from_rdf(rdf, rdf_file_name) # TODO: add related identifiers - - ret: Dict[str, Any] = { - "title": f"bioimage.io upload: {rdf.id}", - "description": description, - "access_right": "open", - "upload_type": "dataset" if rdf.type == "dataset" else "software", - "creators": creators, - "publication_date": publication_date.date().isoformat(), - "keywords": keywords + rdf.tags, - "notes": rdf.description + additional_note, - # "related_identifiers": related_identifiers, - # "communities": [], - } - - if rdf.license is not None: - # check if license id is valid: - license_response = requests.get( - f"https://zenodo.org/api/vocabularies/licenses/{rdf.license.lower()}" - ) - try: - raise_for_status_discretely(license_response) - except Exception as e: - logger.error(str(e)) - logger.error( - ( - f"License '{rdf.license}' not known to Zenodo." - + " Please add manually as custom license" - + " (as this is currently not supported to do via REST API)" - ) - ) - else: - ret["license"] = rdf.license - - return ret + if rdf.type == "dataset": + return zd.OpenAccessDatasetMetadataArgs( + title=f"bioimage.io upload: {rdf.id}", + description=description, + access_right="open", + upload_type="dataset", + creators=creators, + publication_date=publication_date.date(), + keywords=keywords + rdf.tags, + notes=rdf.description + additional_note, + license=license, + prereserve_doi=True, + ) + else: + return zd.OpenAccessSoftwareMetadataArgs( + title=f"bioimage.io upload: {rdf.id}", + description=description, + access_right="open", + upload_type="software", + creators=creators, + publication_date=publication_date.date(), + keywords=keywords + rdf.tags, + notes=rdf.description + additional_note, + license=license, + prereserve_doi=True, + ) def generate_related_identifiers_from_rdf(rdf: ResourceDescr, rdf_file_name: str): related_identifiers: List[Dict[str, str]] = [] diff --git a/bioimageio_collection_backoffice/remote_collection.py b/bioimageio_collection_backoffice/remote_collection.py index 9046c592..8229c768 100644 --- a/bioimageio_collection_backoffice/remote_collection.py +++ b/bioimageio_collection_backoffice/remote_collection.py @@ -1025,7 +1025,7 @@ def update_info(self, update: RecordInfo): def set_dois(self, *, doi: str, concept_doi: str): if self.doi is not None: raise ValueError(f"May not overwrite existing doi={self.doi} with {doi}") - if self.concept_doi is not None: + if self.concept_doi is not None and concept_doi != self.concept_doi: raise ValueError( f"May not overwrite existing concept_doi={self.concept_doi} with {concept_doi}" ) diff --git a/bioimageio_collection_backoffice/zenodo/__init__.py b/bioimageio_collection_backoffice/zenodo/__init__.py new file mode 100644 index 00000000..7c5ff463 --- /dev/null +++ b/bioimageio_collection_backoffice/zenodo/__init__.py @@ -0,0 +1,12 @@ +from .client import ( + Client +) +from .response import ( + Record +) +from .request_args import ( + OpenAccessSoftwareMetadataArgs, OpenAccessDatasetMetadataArgs, RecordCreator +) +from .metadata import ( + RecordId, ConceptId, ZenodoDoi, is_zenodo_license, +) diff --git a/bioimageio_collection_backoffice/zenodo/client.py b/bioimageio_collection_backoffice/zenodo/client.py new file mode 100644 index 00000000..64bf6c30 --- /dev/null +++ b/bioimageio_collection_backoffice/zenodo/client.py @@ -0,0 +1,138 @@ +from pathlib import PurePosixPath +from typing import Any, Optional, Sequence, Mapping, Literal +import sys +from io import IOBase +import json + +import pydantic +import requests +from loguru import logger + +from bioimageio_collection_backoffice.requests_utils import raise_for_status_discretely +from bioimageio_collection_backoffice.zenodo.metadata import ConceptId, RecordId +from bioimageio_collection_backoffice.zenodo.request_args import OpenAccessDatasetMetadataArgs, OpenAccessSoftwareMetadataArgs +from bioimageio_collection_backoffice.zenodo.response import QueriedRecord, Record, RecordQueryResponse + +class Client: + def __init__( + self, + *, + session: Optional[requests.Session]=None, + access_token: str, + api_hostname: str, + ) -> None: + self.session = session or requests.Session() + self.access_token = access_token + self.api_hostname = api_hostname + super().__init__() + + def _get(self, *, endpoint: PurePosixPath, params: Mapping[str, str]) -> requests.Response: + url = f'https://{self.api_hostname}{endpoint}' + params={ + "access_token": self.access_token, + **params, + } + logger.debug(f"GET to {url}") + logger.debug(f"PAYLOAD: {json.dumps({**params, 'access_token': '--redacted--'}, indent=4)}") + resp = self.session.get( + url, + params=params, + json={}, + headers={"Content-Type": "application/json"} + ) + logger.debug(f"RESPONSE PAYLOAD: {json.dumps(resp.json(), indent=4)}") + raise_for_status_discretely(resp) + return resp + + def _send( + self, + *, + method: Literal["post", "put", "delete"], + endpoint: PurePosixPath, + payload: pydantic.JsonValue + ) -> requests.Response: + url = f'https://{self.api_hostname}{endpoint}' + logger.debug(f"{method} to {url}", file=sys.stderr) + logger.debug(f"PAYLOAD: {json.dumps(payload, indent=4)}") + resp = self.session.request( + method, + url, + params={"access_token": self.access_token}, + data=json.dumps(payload), + headers={"Content-Type": "application/json"} + ) + if resp.headers.get("Content-Type") == "application/json": + logger.debug(f"Response: {json.dumps(resp.json(), indent=4)}") + raise_for_status_discretely(resp) + return resp + + def _post(self, *, endpoint: PurePosixPath, payload: pydantic.JsonValue) -> requests.Response: + return self._send(method="post", endpoint=endpoint, payload=payload) + + def _put(self, *, endpoint: PurePosixPath, payload: pydantic.JsonValue) -> requests.Response: + return self._send(method="put", endpoint=endpoint, payload=payload) + + def _delete(self, *, endpoint: PurePosixPath) -> requests.Response: + return self._send(method="delete", endpoint=endpoint, payload={}) + + def delete_record(self, *, record: RecordId): + _ = self._delete(endpoint=PurePosixPath(f"/api/deposit/depositions/{record.id}")) + + def get_concept_versions(self, concept_id: Optional[ConceptId]) -> Sequence[QueriedRecord]: + params = {"all_versions": "1"} + if concept_id is not None: + params["q"] = f'conceptrecid:{concept_id}' + resp = self._get( + endpoint=PurePosixPath('/api/records'), + params=params, + ) + parsed_resp = RecordQueryResponse.model_validate_json(resp.content) + return parsed_resp.hits.hits + + def create_new_concept_version(self, concept_id: ConceptId) -> Record: + version_ids = sorted([v.id for v in self.get_concept_versions(concept_id)]) + if len(version_ids) == 0: + raise RuntimeError(f"A limitation of zenodo's API prevents creating new versions for concept {concept_id}") + latest_version_id = version_ids[-1] + resp = self._post( + endpoint=PurePosixPath(f"/api/deposit/depositions/{latest_version_id}/actions/newversion"), + payload={}, + ) + parsed_resp = Record.model_validate_json(resp.content) + return parsed_resp + + def create_new_concept(self) -> Record: + resp = self._post(endpoint=PurePosixPath('/api/deposit/depositions'), payload={}) + return Record.model_validate_json(resp.content) + + def add_metadata_to_record( + self, + *, + record_id: RecordId, + metadata: "OpenAccessSoftwareMetadataArgs | OpenAccessDatasetMetadataArgs", + ): + _ = self._put( + endpoint=PurePosixPath(f'/api/deposit/depositions/{record_id}'), + payload={"metadata": metadata.model_dump()}, + ) + + def add_file_to_record(self, *, record: Record, file_name: str, data: IOBase) -> Any: + bucket_url = record.links.bucket + if bucket_url is None: + raise ValueError(f"Record has no bucket url") # FIXME: should bucket really be optional? + resp = self.session.put( + f"{bucket_url.geturl()}/{file_name}", # FIXME: use a URL implementation instead of fstring + data=data, + params={"access_token": self.access_token}, + ) + if resp.headers.get("Content-Type") == "application/json": + logger.debug(f"Response: {json.dumps(resp.json(), indent=4)}") + raise_for_status_discretely(resp) + + def publish(self, *, record_id: RecordId) -> Record: + resp = self._post( + endpoint=PurePosixPath(f'/api/deposit/depositions/{record_id}/actions/publish'), + payload={}, + ) + return Record.model_validate_json(resp.content) + diff --git a/bioimageio_collection_backoffice/zenodo/metadata.py b/bioimageio_collection_backoffice/zenodo/metadata.py new file mode 100644 index 00000000..70c4992e --- /dev/null +++ b/bioimageio_collection_backoffice/zenodo/metadata.py @@ -0,0 +1,538 @@ +from collections.abc import Mapping +from typing import Any, Generic, TypeAlias, Literal, TypeGuard, TypeVar + +import pydantic + +PublicationType: TypeAlias = Literal[ + "annotationcollection", + "book", + "section", + "conferencepaper", + "datamanagementplan", + "article", + "patent", + "preprint", + "deliverable", + "milestone", + "proposal", + "report", + "softwaredocumentation", + "taxonomictreatment", + "technicalnote", + "thesis", + "workingpaper", + "other", +] + +LicenseName: TypeAlias = Literal[ + "glide", + "amdplpa", + "antlr-pd", + "antlr-pd-fallback", + "abstyles", + "afl-1.1", + "afl-1.2", + "afl-2.0", + "afl-2.1", + "afl-3.0", + "ampas", + "apl-1.0", + "adobe-glyph", + "apafml", + "adobe-2006", + "agpl-1.0-only", + "agpl-1.0-or-later", + "afmparse", + "against-drm", + "aladdin", + "adsl", + "apache-1.0", + "apache-1.1", + "apache-2.0", + "aml", + "apsl-1.0", + "apsl-1.1", + "apsl-1.2", + "apsl-2.0", + "artistic-1.0", + "artistic-1.0-perl", + "artistic-1.0-cl8", + "artistic-2.0", + "aal", + "bsd-1-clause", + "bsd-2-clause", + "bsd-2-clause-views", + "bsd-3-clause", + "bsd-3-clause-clear", + "bsd-3-clause-no-nuclear-license", + "bsd-3-clause-no-nuclear-license-2014", + "bsd-3-clause-no-nuclear-warranty", + "bsd-3-clause-open-mpi", + "bsd-4-clause", + "bsd-protection", + "bsd-source-code", + "0bsd", + "bsd-3-clause-attribution", + "bsd-2-clause-patent", + "bsd-4-clause-uc", + "bahyph", + "barr", + "beerware", + "bittorrent-1.0", + "bittorrent-1.1", + "blueoak-1.0.0", + "bsl-1.0", + "borceux", + "busl-1.1", + "cern-ohl-p-2.0", + "cern-ohl-s-2.0", + "cern-ohl-w-2.0", + "cern-ohl-1.1", + "cern-ohl-1.2", + "mit-cmu", + "cnri-jython", + "cnri-python", + "cnri-python-gpl-compatible", + "cua-opl-1.0", + "caldera", + "cecill-1.0", + "cecill-1.1", + "cecill-2.0", + "cecill-2.1", + "cecill-b", + "cecill-c", + "clartistic", + "cpol-1.02", + "cddl-1.0", + "cddl-1.1", + "cpal-1.0", + "cpl-1.0", + "cdla-permissive-1.0", + "cdla-sharing-1.0", + "catosl-1.1", + "condor-1.1", + "cc-by-1.0", + "cc-by-2.0", + "cc-by-2.5", + "cc-by-3.0-at", + "cc-by-3.0-us", + "cc-by-3.0", + "cc-by-4.0", + "cc-by-nd-1.0", + "cc-by-nd-2.0", + "cc-by-nd-2.5", + "cc-by-nd-3.0", + "cc-by-nd-4.0", + "cc-by-nc-1.0", + "cc-by-nc-2.0", + "cc-by-nc-2.5", + "cc-by-nc-3.0", + "cc-by-nc-4.0", + "cc-by-nc-nd-1.0", + "cc-by-nc-nd-2.0", + "cc-by-nc-nd-2.5", + "cc-by-nc-nd-3.0-igo", + "cc-by-nc-nd-3.0", + "cc-by-nc-nd-4.0", + "cc-by-nc-sa-1.0", + "cc-by-nc-sa-2.0", + "cc-by-nc-sa-2.5", + "cc-by-nc-sa-3.0", + "cc-by-nc-sa-4.0", + "cc-by-sa-1.0", + "cc-by-sa-2.0-uk", + "cc-by-sa-2.0", + "cc-by-sa-2.5", + "cc-by-sa-3.0", + "cc-by-sa-4.0", + "cc-by-sa-3.0-at", + "cc-pddc", + "cc0-1.0", + "crossword", + "cal-1.0", + "cal-1.0-combined-work-exception", + "crystalstacker", + "cube", + "doc", + "dsdp", + "dsl", + "d-fsl-1.0", + "wtfpl", + "dotseqn", + "epics", + "eudatagrid", + "epl-1.0", + "epl-2.0", + "ecl-1.0", + "ecl-2.0", + "efl-1.0", + "efl-2.0", + "mit-advertising", + "entessa", + "erlpl-1.1", + "etalab-2.0", + "eurofound", + "eupl-1.0", + "eupl-1.1", + "eupl-1.2", + "eurosym", + "fsfap", + "fsful", + "fsfullr", + "fair", + "frameworx-1.0", + "zenodo-freetoread-1.0", + "freeimage", + "ftl", + "gl2ps", + "agpl-3.0-only", + "agpl-3.0-or-later", + "gfdl-1.1-only", + "gfdl-1.1-invariants-only", + "gfdl-1.1-no-invariants-only", + "gfdl-1.1-or-later", + "gfdl-1.1-invariants-or-later", + "gfdl-1.1-no-invariants-or-later", + "gfdl-1.2-only", + "gfdl-1.2-invariants-only", + "gfdl-1.2-no-invariants-only", + "gfdl-1.2-or-later", + "gfdl-1.2-invariants-or-later", + "gfdl-1.2-no-invariants-or-later", + "gfdl-1.3-only", + "gfdl-1.3-invariants-only", + "gfdl-1.3-no-invariants-only", + "gfdl-1.3-or-later", + "gfdl-1.3-invariants-or-later", + "gfdl-1.3-no-invariants-or-later", + "gpl-1.0-only", + "gpl-1.0-or-later", + "gpl-2.0-only", + "gpl-2.0-or-later", + "gpl-3.0-only", + "gpl-3.0-or-later", + "lgpl-2.1-only", + "lgpl-2.1-or-later", + "lgpl-3.0-only", + "lgpl-3.0-or-later", + "lgpl-2.0-only", + "lgpl-2.0-or-later", + "geogratis", + "giftware", + "glulxe", + "glwtpl", + "htmltidy", + "haskellreport", + "hesa-withrights", + "hippocratic-2.1", + "hpnd", + "hpnd-sell-variant", + "ibm-pibs", + "ipl-1.0", + "icu", + "ipa", + "isc", + "imagemagick", + "imlib2", + "ijg", + "info-zip", + "intel-acpi", + "intel", + "interbase-1.0", + "user-jsim", + "json", + "jabber-osl", + "jpnic", + "jasper-2.0", + "lppl-1.0", + "lppl-1.1", + "lppl-1.2", + "lppl-1.3a", + "lppl-1.3c", + "latex2e", + "bsd-3-clause-lbnl", + "leptonica", + "lgpllr", + "lal-1.2", + "lal-1.3", + "liliq-p-1.1", + "liliq-rplus-1.1", + "liliq-r-1.1", + "notspecified", + "linux-openib", + "localauth-withrights", + "lucent-plan9", + "lpl-1.0", + "lpl-1.02", + "mitnfa", + "mit", + "mit-0", + "mit-open-group", + "mitre", + "makeindex", + "mtll", + "met-office-cp", + "ms-pl", + "ms-rl", + "motosoto", + "mpl-1.0", + "mpl-1.1", + "mpl-2.0", + "mpl-2.0-no-copyleft-exception", + "mulanpsl-1.0", + "mulanpsl-2.0", + "multics", + "mup", + "nasa-1.3", + "nist-pd", + "nist-pd-fallback", + "nrl", + "ntp", + "ntp-0", + "naumen", + "nbpl-1.0", + "net-snmp", + "netcdf", + "ngpl", + "nosl", + "npl-1.0", + "npl-1.1", + "newsletr", + "geo-no-fee-unrestricted", + "nlpl", + "nokia", + "ncgl-uk-2.0", + "nposl-3.0", + "nlod-1.0", + "noweb", + "oclc-2.0", + "odbl-1.0", + "pddl-1.0", + "ogc-1.0", + "oset-pl-2.1", + "occt-pl", + "odc-by-1.0", + "ogl-canada-2.0", + "ogl-uk-1.0", + "ogl-uk-2.0", + "ogl-uk-3.0", + "ogtsl", + "oldap-2.2.2", + "oldap-1.1", + "oldap-1.2", + "oldap-1.3", + "oldap-1.4", + "oldap-2.0", + "oldap-2.0.1", + "oldap-2.1", + "oldap-2.2", + "oldap-2.2.1", + "oldap-2.3", + "oldap-2.4", + "oldap-2.5", + "oldap-2.6", + "oldap-2.7", + "oldap-2.8", + "oml", + "opl-1.0", + "osl-1.0", + "osl-1.1", + "osl-2.0", + "osl-2.1", + "osl-3.0", + "o-uda-1.0", + "openssl", + "other-at", + "other-nc", + "other-closed", + "other-open", + "other-pd", + "php-3.0", + "php-3.01", + "libpng-2.0", + "plexus", + "polyform-noncommercial-1.0.0", + "polyform-small-business-1.0.0", + "postgresql", + "python-2.0", + "psf-2.0", + "qpl-1.0", + "qhull", + "qtpl", + "rsa-md", + "rdisc", + "rpsl-1.0", + "rpl-1.1", + "rpl-1.5", + "rhecos-1.1", + "rscpl", + "ruby", + "scea", + "sgi-b-1.0", + "sgi-b-1.1", + "sgi-b-2.0", + "ofl-1.0", + "ofl-1.0-rfn", + "ofl-1.0-no-rfn", + "ofl-1.1", + "ofl-1.1-rfn", + "ofl-1.1-no-rfn", + "snia", + "blessing", + "ssh-openssh", + "ssh-short", + "sax-pd", + "saxpath", + "swl", + "smppl", + "sendmail", + "sendmail-8.23", + "sspl-1.0", + "simpl-2.0", + "sleepycat", + "shl-0.5", + "shl-0.51", + "spencer-86", + "spencer-94", + "spencer-99", + "smlnj", + "dli-model-use", + "sugarcrm-1.1.3", + "sissl", + "sissl-1.2", + "spl-1.0", + "watcom-1.0", + "tapr-ohl-1.0", + "tcl", + "tcp-wrappers", + "tmate", + "torque-1.1", + "tu-berlin-1.0", + "tu-berlin-2.0", + "miros", + "parity-6.0.0", + "parity-7.0.0", + "unlicense", + "tosl", + "ukclickusepsi", + "ukcrown", + "ukcrown-withrights", + "ukpsi", + "unicode-dfs-2015", + "unicode-dfs-2016", + "unicode-tou", + "upl-1.0", + "ncsa", + "ucl-1.0", + "vostrom", + "vim", + "vsl-1.0", + "w3c-20150513", + "w3c-19980720", + "w3c", + "wsuipa", + "xnet", + "x11", + "xfree86-1.1", + "xpp", + "xskat", + "xerox", + "ypl-1.0", + "ypl-1.1", + "zed", + "zend-2.0", + "zimbra-1.3", + "zimbra-1.4", + "zpl-1.1", + "zpl-2.0", + "zpl-2.1", + "bzip2-1.0.5", + "bzip2-1.0.6", + "copyleft-next-0.3.0", + "copyleft-next-0.3.1", + "curl", + "diffmark", + "dvipdfm", + "egenix", + "mit-enna", + "mit-feh", + "gsoap-1.3b", + "gnuplot", + "imatix", + "libpng", + "libselinux-1.0", + "libtiff", + "mpich2", + "psfrag", + "psutils", + "wxwindows", + "xinetd", + "zlib", + "zlib-acknowledgement", +] +def is_zenodo_license(license: str) -> TypeGuard[LicenseName]: + return license in LicenseName.__args__ + +class _ZenodoEntryId(pydantic.BaseModel): + id: int + + @pydantic.model_serializer + def serialize_model(self) -> int: + return self.id + + @pydantic.model_validator(mode="before") + @classmethod + def parse_int(cls, value: Any) -> Mapping[str, int]: + if isinstance(value, int): + id = value + elif isinstance(value, str): + id = int(value) + elif isinstance(value, Mapping): + return value # pyright: ignore + else: + raise TypeError(f"Expected int or str, found {type(value).__name__}") + return {"id": id} + + def __str__(self) -> str: + return str(self.id) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, self.__class__): + return False + return self.id == other.id + + def __lt__(self, other: object) -> bool: + if not isinstance(other, self.__class__): + return NotImplemented + return self.id < other.id + +class ConceptId(_ZenodoEntryId): + pass + +class RecordId(_ZenodoEntryId): + pass + +Id = TypeVar("Id", ConceptId, RecordId) +class ZenodoDoi(pydantic.BaseModel, Generic[Id]): + prefix: str + id: Id + + def as_str(self) -> str: + return self.serialize_model() + + @pydantic.model_serializer + def serialize_model(self) -> str: + return f"{self.prefix}/zenodo.{self.id}" + + @pydantic.model_validator(mode="before") + @classmethod + def parse_str(cls, value: Any) -> Mapping[str, "str | int"]: + if isinstance(value, Mapping): + return value # pyright: ignore + if isinstance(value, str): + parts = value.split("/zenodo.") + return { + "prefix": parts[0], + "id": int(parts[1]) + } + raise TypeError(f"Expected str or mapping, found {type(value).__name__}") diff --git a/bioimageio_collection_backoffice/zenodo/request_args.py b/bioimageio_collection_backoffice/zenodo/request_args.py new file mode 100644 index 00000000..f210107d --- /dev/null +++ b/bioimageio_collection_backoffice/zenodo/request_args.py @@ -0,0 +1,47 @@ +from typing import List, Optional, Literal, Any +import datetime + + +import pydantic + +from bioimageio_collection_backoffice.zenodo.metadata import LicenseName + + +class RecordCreator(pydantic.BaseModel): + name: str + affiliation: Optional[str] = None + orcid: Optional[str] = None + gnd: Optional[str] = None + +class _MetadataArgsBase(pydantic.BaseModel): + title: str + description: str + creators: List[RecordCreator] + keywords: Optional[List[str]] + notes: Optional[str] + publication_date: datetime.date + prereserve_doi: bool + + @pydantic.field_serializer('publication_date') + def serialize_publication_date(self, date: datetime.date, _info: Any): + return date.isoformat() + +class _SoftwareMetadataArgs(_MetadataArgsBase): + upload_type: Literal["software"] = "software" + +class _DatasetMetadataArgs(_MetadataArgsBase): + upload_type: Literal["dataset"] = "dataset" + +class _OpenAccess(pydantic.BaseModel): + access_right: Literal["open"] = "open" + license: LicenseName + +# Subclassing and multi-inheritance was the most natural way of +# flattening fields into the metadata base class + +class OpenAccessSoftwareMetadataArgs(_SoftwareMetadataArgs, _OpenAccess): + pass + +class OpenAccessDatasetMetadataArgs(_DatasetMetadataArgs, _OpenAccess): + pass + diff --git a/bioimageio_collection_backoffice/zenodo/response.py b/bioimageio_collection_backoffice/zenodo/response.py new file mode 100644 index 00000000..93ed5dd1 --- /dev/null +++ b/bioimageio_collection_backoffice/zenodo/response.py @@ -0,0 +1,48 @@ +from typing import Sequence, Optional +from urllib.parse import urlparse, ParseResult + +import pydantic + +from bioimageio_collection_backoffice.zenodo.metadata import ConceptId, RecordId, ZenodoDoi + +class RecordLinks(pydantic.BaseModel): + bucket: Optional[ParseResult] = None + + @pydantic.field_validator("bucket", mode="before") + @classmethod + def deserialize_url(cls, raw_url: Optional[str]) -> Optional[ParseResult]: + if raw_url is None: + return None + return urlparse(raw_url) + +class PrereservedDoi(pydantic.BaseModel): + doi: ZenodoDoi[RecordId] + +class RecordMetadata(pydantic.BaseModel): + prereserve_doi: PrereservedDoi + +class Record(pydantic.BaseModel): + concept_id: ConceptId = pydantic.Field(alias='conceptrecid') + id: RecordId + state: str + links: RecordLinks + metadata: RecordMetadata + doi: Optional[ZenodoDoi[RecordId]] = None + conceptdoi: Optional[ZenodoDoi[ConceptId]] = None + +class QueriedRecordMetadata(pydantic.BaseModel): + prereserved_doi: PrereservedDoi + +# When querying zenodo, records show in a different format then when e.g. creating them +class QueriedRecord(pydantic.BaseModel): + concept_record_id: ConceptId = pydantic.Field(alias='conceptrecid') + id: RecordId + state: str + +class RecordQueryResult(pydantic.BaseModel): + hits: Sequence[QueriedRecord] + +class RecordQueryResponse(pydantic.BaseModel): + hits: RecordQueryResult + +