diff --git a/CHANGELOG.md b/CHANGELOG.md index a2676b507..5c8d92878 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- In bounding box `spatial_extent`s of `load_collection`, `load_stac`, ...: normalize non-standard "EPSG:123"-style CRS strings to proper integer value ([#259](https://github.com/Open-EO/openeo-python-client/issues/259), [Open-EO/openeo-processes#391](https://github.com/Open-EO/openeo-processes/issues/391)) + ### Removed ### Fixed diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index 147454195..6c56cd37f 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -67,7 +67,14 @@ from openeo.rest.service import Service from openeo.rest.udp import RESTUserDefinedProcess from openeo.rest.vectorcube import VectorCube -from openeo.util import dict_no_none, guess_format, load_json, normalize_crs, rfc3339 +from openeo.util import ( + BBoxDict, + dict_no_none, + guess_format, + load_json, + normalize_crs, + rfc3339, +) if typing.TYPE_CHECKING: # Imports for type checking only (circular import issue at runtime). @@ -627,7 +634,7 @@ def filter_bbox( west, south, east, north = bbox.bounds elif isinstance(bbox, (list, tuple)) and len(bbox) == 4: west, south, east, north = bbox[:4] - elif isinstance(bbox, dict): + elif BBoxDict.is_compatible_dict(bbox): west, south, east, north = (bbox[k] for k in ["west", "south", "east", "north"]) if "crs" in bbox: crs = bbox["crs"] @@ -3038,12 +3045,8 @@ def _get_geometry_argument( return argument.from_node() elif allow_none and argument is None: return argument - elif ( - allow_bounding_box - and isinstance(argument, dict) - and all(k in argument for k in ["west", "south", "east", "north"]) - ): - return argument + elif allow_bounding_box and BBoxDict.is_compatible_dict(argument): + return BBoxDict.from_dict(argument, warn_on_crs_change=True) # Support URL based geometry references (with `load_url` and best-effort format guess) if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I): diff --git a/openeo/util.py b/openeo/util.py index 53550ccb8..c4e734494 100644 --- a/openeo/util.py +++ b/openeo/util.py @@ -6,6 +6,7 @@ from __future__ import annotations +import copy import datetime as dt import functools import json @@ -16,7 +17,7 @@ from collections import OrderedDict from enum import Enum from pathlib import Path -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Any, Callable, List, Mapping, Optional, Tuple, Union from urllib.parse import urljoin import requests @@ -523,10 +524,19 @@ class BBoxDict(dict): .. versionadded:: 0.10.1 """ - def __init__(self, *, west: float, south: float, east: float, north: float, crs: Optional[Union[str, int]] = None): + def __init__( + self, + *, + west: float, + south: float, + east: float, + north: float, + crs: Optional[Union[str, int]] = None, + warn_on_crs_change: bool = False, + ): super().__init__(west=west, south=south, east=east, north=north) if crs is not None: - self.update(crs=normalize_crs(crs)) + self.update(crs=normalize_crs(crs, warn_on_change=warn_on_crs_change)) # TODO: provide west, south, east, north, crs as @properties? Read-only or read-write? @@ -545,7 +555,12 @@ def from_any(cls, x: Any, *, crs: Optional[str] = None) -> BBoxDict: raise InvalidBBoxException(f"Can not construct BBoxDict from {x!r}") @classmethod - def from_dict(cls, data: dict) -> BBoxDict: + def is_compatible_dict(cls, data) -> bool: + """Check if given value is a valid bounding box dictionary.""" + return isinstance(data, dict) and all(k in data for k in ("west", "south", "east", "north")) + + @classmethod + def from_dict(cls, data: dict, *, warn_on_crs_change: bool = False) -> BBoxDict: """Build from dictionary with at least keys "west", "south", "east", and "north".""" expected_fields = {"west", "south", "east", "north"} # TODO: also support upper case fields? @@ -553,10 +568,18 @@ def from_dict(cls, data: dict) -> BBoxDict: missing = expected_fields.difference(data.keys()) if missing: raise InvalidBBoxException(f"Missing bbox fields {sorted(missing)}") + # TODO: also support parameters? invalid = {k: data[k] for k in expected_fields if not isinstance(data[k], (int, float))} if invalid: raise InvalidBBoxException(f"Non-numerical bbox fields {invalid}.") - return cls(west=data["west"], south=data["south"], east=data["east"], north=data["north"], crs=data.get("crs")) + return cls( + west=data["west"], + south=data["south"], + east=data["east"], + north=data["north"], + crs=data.get("crs"), + warn_on_crs_change=warn_on_crs_change, + ) @classmethod def from_sequence(cls, seq: Union[list, tuple], crs: Optional[str] = None) -> BBoxDict: @@ -626,7 +649,7 @@ def get(self, fraction: float) -> str: return f"{self.left}{bar:{self.fill}<{width}s}{self.right}" -def normalize_crs(crs: Any, *, use_pyproj: bool = True) -> Union[None, int, str]: +def normalize_crs(crs: Any, *, use_pyproj: bool = True, warn_on_change: bool = False) -> Union[None, int, str]: """ Normalize the given value (describing a CRS or Coordinate Reference System) to an openEO compatible EPSG code (int) or WKT2 CRS string. @@ -654,16 +677,18 @@ def normalize_crs(crs: Any, *, use_pyproj: bool = True) -> Union[None, int, str] :param use_pyproj: whether ``pyproj`` should be leveraged at all (mainly useful for testing the "no pyproj available" code path) + :param warn_on_change: whether to emit a warning when the normalization involves a change in value. + :return: EPSG code as int, or WKT2 string. Or None if input was empty. :raises ValueError: When the given CRS data can not be parsed/converted/normalized. """ + orig_crs = copy.deepcopy(crs) if crs in (None, "", {}): - return None - - if pyproj and use_pyproj: + crs = None + elif pyproj and use_pyproj: try: # (if available:) let pyproj do the validation/parsing crs_obj = pyproj.CRS.from_user_input(crs) @@ -689,4 +714,8 @@ def normalize_crs(crs: Any, *, use_pyproj: bool = True) -> Union[None, int, str] else: raise ValueError(f"Can not normalize CRS data {type(crs)}") + if warn_on_change and orig_crs != crs: + # TODO: is this warning actually useful? + logger.warning(f"Normalized CRS {orig_crs!r} to {crs!r}") + return crs diff --git a/tests/rest/test_connection.py b/tests/rest/test_connection.py index 397cbd70c..587388f1f 100644 --- a/tests/rest/test_connection.py +++ b/tests/rest/test_connection.py @@ -10,6 +10,7 @@ from contextlib import nullcontext from pathlib import Path +import dirty_equals import pytest import requests import requests_mock @@ -2703,6 +2704,31 @@ def test_load_collection_spatial_extent_vector_cube(self, dummy_backend): }, } + @pytest.mark.parametrize("crs", ["EPSG:4326", "epsg:4326", 4326, "4326"]) + def test_load_collection_normalize_epsg_crs(self, dummy_backend, crs, caplog): + caplog.set_level(level=logging.WARNING) + spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4, "crs": crs} + temporal_extent = ["2019-01-01", "2019-01-22"] + cube = dummy_backend.connection.load_collection( + "S2", spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=["B2", "B3"] + ) + cube.execute() + assert dummy_backend.get_sync_pg() == { + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "S2", + "spatial_extent": {"east": 3, "north": 4, "south": 2, "west": 1, "crs": 4326}, + "temporal_extent": ["2019-01-01", "2019-01-22"], + "bands": ["B2", "B3"], + }, + "result": True, + } + } + if crs != 4326: + assert f"Normalized CRS {crs!r} to 4326" in caplog.text + else: + assert "Normalized CRS" not in caplog.text def test_load_result(requests_mock): requests_mock.get(API_URL, json={"api_version": "1.0.0"}) @@ -3026,6 +3052,23 @@ def test_load_stac_spatial_extent_bbox(self, dummy_backend): "spatial_extent": {"west": 1, "south": 2, "east": 3, "north": 4}, } + @pytest.mark.parametrize("crs", ["EPSG:32632", "epsg:32632", 32632, "32632"]) + def test_load_stac_spatial_extent_bbox_normalize_epsg_crs(self, dummy_backend, crs, caplog): + caplog.set_level(level=logging.WARNING) + spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4, "crs": crs} + # TODO #694 how to avoid request to dummy STAC URL (without mocking, which is overkill for this test) + cube = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=spatial_extent) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": {"west": 1, "south": 2, "east": 3, "north": 4, "crs": 32632}, + } + + if crs != 32632: + assert f"Normalized CRS {crs!r} to 32632" in caplog.text + else: + assert "Normalized CRS" not in caplog.text + @pytest.mark.parametrize( "spatial_extent", [ diff --git a/tests/test_util.py b/tests/test_util.py index 4b53f9246..9002aba50 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -693,6 +693,26 @@ def test_to_json(self): d = BBoxDict(west=1, south=2, east=3, north=4) assert json.dumps(d) == '{"west": 1, "south": 2, "east": 3, "north": 4}' + @pytest.mark.parametrize("crs", ["EPSG:4326", "epsg:4326", 4326, "4326"]) + def test_from_dict_normalize_crs(self, crs): + d = {"west": 1, "south": 2, "east": 3, "north": 4, "crs": crs} + assert BBoxDict.from_dict(d) == {"west": 1, "south": 2, "east": 3, "north": 4, "crs": 4326} + + @pytest.mark.parametrize("crs", ["EPSG:4326", "epsg:4326", 4326, "4326"]) + def test_from_dict_normalize_crs_warn_on_change(self, crs, caplog): + d = {"west": 1, "south": 2, "east": 3, "north": 4, "crs": crs} + assert BBoxDict.from_dict(d, warn_on_crs_change=True) == { + "west": 1, + "south": 2, + "east": 3, + "north": 4, + "crs": 4326, + } + if crs != 4326: + assert f"Normalized CRS {crs!r} to 4326" in caplog.text + else: + assert "Normalized CRS" not in caplog.text + def test_to_bbox_dict_from_sequence(self): assert to_bbox_dict([1, 2, 3, 4]) == {"west": 1, "south": 2, "east": 3, "north": 4} assert to_bbox_dict((1, 2, 3, 4)) == {"west": 1, "south": 2, "east": 3, "north": 4}