Skip to content

Commit 967126a

Browse files
authored
Merge pull request #44 from trchudley/development
v1.0.1
2 parents 8501769 + 20f8df3 commit 967126a

File tree

5 files changed

+37
-11
lines changed

5 files changed

+37
-11
lines changed

docs/appendix/faq.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,7 @@ We would encourage users who are interested in larger-scale analysis to get in t
1717
Strips can appear empty (i.e. `NaN` values) when photogrammetry fails (due to cloud, water, etc) or it is masked by the PGC bitmask. Data previewed by the `pdt.load.preview()` function is masked according to this bitmask, and data loaded by the `pdt.load.from_search()` function is also masked by default (this can be disabled by using the `bitmask = False` option).
1818

1919
As a result, it is entirely possible that the `search()` function can return a valid datastrip that covers a sufficient proportion of the AOI to meet the `min_aoi_frac` requirements, but it will appear empty (i.e. all-`NaN`) when viewed. The `preview()` function will help you identify these 'empty' scenes, but there may still be (poor-quality) data present if it is downloaded using the `load.from_search()` function with `bitmask = False`.
20+
21+
#### Q: Can I load DEMs as `dask` arrays, and/or enable lazy evaluation for downloads?
22+
23+
Yes! Like the [`rioxarray.load_rasterio()`](https://corteva.github.io/rioxarray/html/rioxarray.html#rioxarray-open-rasterio) function they wrap, `load.from_search()`, `load.mosaic()`, and `load.from_fpath()` accept a `chunks` variable (e.g.`(50, 50)`, `True`, `"auto"`), which triggers loading as a `dask` array (and, as a bonus, will result in 'lazy evaluation', where the data is not downloaded or computed until it is needed by a further command).

notebooks/get_icebergs.ipynb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@
175175
"\n",
176176
"if not os.path.exists(out_fpath):\n",
177177
" dem = pdt.load.from_search(gdf.iloc[i], bounds=bounds, bitmask=True)\n",
178-
" dem.compute() # rioxarray uses lazy evaluation, so we can force the download using the `.compute()` function.\n",
179178
" dem.rio.to_raster(out_fpath, compress='ZSTD', predictor=3, zlevel=1)\n",
180179
" \n",
181180
"else:\n",
@@ -492,7 +491,7 @@
492491
"name": "python",
493492
"nbconvert_exporter": "python",
494493
"pygments_lexer": "ipython3",
495-
"version": "3.12.4"
494+
"version": "3.12.10"
496495
}
497496
},
498497
"nbformat": 4,

notebooks/strip_search.ipynb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -514,9 +514,7 @@
514514
" if not os.path.exists(out_fpath):\n",
515515
" \n",
516516
" dem = pdt.load.from_search(gdf_row, bounds=bounds, bitmask=True)\n",
517-
" \n",
518-
" dem.compute() # rioxarray uses lazy evaluation, so we can force the download using the `.compute()` function.\n",
519-
" \n",
517+
" \n",
520518
" dem.rio.to_raster(out_fpath, compress='ZSTD', predictor=3, zlevel=1)\n",
521519
" \n",
522520
" return dem\n",
@@ -533,6 +531,9 @@
533531
"source": [
534532
"Note, these are 2 m strips that will take a while to download! To save on added time when rerunning this notebook, I've added an additional test to the function: if we've already downloaded the DEM and saved it to the local directory, this function will instead load it from the local file location using the `load.from_fpath()` function.\n",
535533
"\n",
534+
"> ⚠️\n",
535+
"> **NOTE**: More advanced geospatial python users may wish to note that DEMs can be loaded as `dask` arrays, enabling lazy evaluation and only triggering download when required by a further command. This is done providing a `chunks` parameter to `load.from_search()`, `load.from_id()`, or `load.mosaic()` (e.g.`(50, 50)`, `True`, `\"auto\"`), as is the case for [the `rioxarray.load_rasterio()` function that it wraps](https://corteva.github.io/rioxarray/html/rioxarray.html#rioxarray-open-rasterio).\n",
536+
"\n",
536537
"Regardless, we can now use this function to select and download relevant rows from our geodataframe using the standard Pandax indexing method (`DataFrame.iloc[[i]]`, where `i` is the desired row index):"
537538
]
538539
},
@@ -704,7 +705,7 @@
704705
"name": "python",
705706
"nbconvert_exporter": "python",
706707
"pygments_lexer": "ipython3",
707-
"version": "3.12.4"
708+
"version": "3.12.10"
708709
}
709710
},
710711
"nbformat": 4,

src/pdemtools/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313
from . import _geomorphometry
1414
from . import _utils
1515

16-
__version__ = "1.0.0"
16+
__version__ = "1.1.0"
1717

1818
__all__ = ["search", "DemAccessor"]

src/pdemtools/load.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def from_fpath(
4040
bounds: Optional[Union[tuple, Polygon]] = None,
4141
bitmask_fpath: Optional[str] = None,
4242
pad: Optional[bool] = False,
43+
chunks: Optional[Union[int, tuple, dict]] = None,
4344
) -> DataArray:
4445
"""Given a filepath (local or an AWS link), loads the desired ArcticDEM/REMA DEM
4546
strip as an ``xarray`` ``DataArray``. Option to filter to bounds and bitmask, if
@@ -59,6 +60,9 @@ def from_fpath(
5960
:param pad: If the DEM strip is not the full extent of the given bounds,
6061
pad with NaNs to match the full bounds. Defaults to False.
6162
:type pad: bool
63+
:param chunks: Chunk size for `rioxarray.open_rasterio`, triggering `dask`
64+
parallelisation and lazy evaluation/loading. Defaults to `None`.
65+
:type chunks: int | tuple | dict
6266
6367
:returns: xarray DataArray of DEM strip
6468
:rtype: DataArray
@@ -69,7 +73,7 @@ def from_fpath(
6973
raise ValueError("pad must be True or False")
7074

7175
# Open dataarray using rioxarray
72-
dem = rxr.open_rasterio(dem_fpath)
76+
dem = rxr.open_rasterio(dem_fpath, chunks=chunks)
7377

7478
# Convert shapely geometry to bounds
7579
if isinstance(bounds, Polygon):
@@ -169,6 +173,7 @@ def from_search(
169173
bounds: Optional[Union[tuple, Polygon]] = None,
170174
bitmask: Optional[bool] = True,
171175
pad: Optional[bool] = False,
176+
chunks: Optional[Union[int, tuple, dict]] = None,
172177
):
173178
"""Given a row from the GeoDataFrame output of ``pdemtools.search()``, loads the 2
174179
m DEM strip of the desired ArcticDEM/REMA DEM strip as an xarray DataArray.
@@ -190,6 +195,9 @@ def from_search(
190195
:param pad: If the DEM strip is not the full extent of the given bounds,
191196
pad with NaNs to match the full bounds. Defaults to False.
192197
:type pad: bool
198+
:param chunks: Chunk size for `rioxarray.open_rasterio`, triggering `dask`
199+
parallelisation and lazy evaluation/loading. Defaults to `None`.
200+
:type chunks: int | tuple | dict
193201
194202
:returns: xarray DataArray of DEM strip
195203
:rtype: DataArray
@@ -223,6 +231,7 @@ def from_search(
223231
bounds,
224232
bitmask_url,
225233
pad,
234+
chunks,
226235
)
227236

228237

@@ -236,6 +245,7 @@ def from_id(
236245
version: Optional[str] = "s2s041",
237246
preview: Optional[bool] = False,
238247
pad: Optional[bool] = False,
248+
chunks: Optional[Union[int, tuple, dict]] = None,
239249
) -> DataArray:
240250
"""An alternative method of loading the selected ArcticDEM/REMA strip, which
241251
requires only the geocell and the dem_id (e.g. ``geocell = 'n70w051'``, ``dem_id =
@@ -270,6 +280,9 @@ def from_id(
270280
:param pad: If the DEM strip is not the full extent of the given bounds,
271281
pad with NaNs to match the full bounds. Defaults to False.
272282
:type pad: bool
283+
:param chunks: Chunk size for `rioxarray.open_rasterio`, triggering `dask`
284+
parallelisation and lazy evaluation/loading. Defaults to `None`.
285+
:type chunks: int | tuple | dict
273286
274287
:return: xarray DataArray of DEM strip
275288
:rtype: DataArray
@@ -309,6 +322,7 @@ def from_id(
309322
bounds,
310323
bitmask_fpath,
311324
pad,
325+
chunks,
312326
)
313327

314328

@@ -317,6 +331,7 @@ def mosaic(
317331
resolution: Literal["2m", "10m", "32m"],
318332
bounds: Union[tuple, Polygon] = None,
319333
version: Optional[Literal["v2.0", "v3.0", "v4.1"]] = None,
334+
chunks: Optional[Union[int, tuple, dict]] = None,
320335
):
321336
"""Given a dataset, resolution, and bounding box, downloads the ArcticDEM or REMA
322337
mosiac from AWS.
@@ -333,6 +348,12 @@ def mosaic(
333348
:param bounds: Clip to bounds [xmin, ymin, xmax, ymax], in EPSG:3413 (ArcticDEM) or
334349
EPSG:3031 (REMA). Will accept a shapely geometry to extract bounds from.
335350
:type bounds: tuple | Polygon, optional
351+
:param chunks: Chunk size for `rioxarray.open_rasterio`, triggering `dask`
352+
parallelisation and lazy evaluation/loading. Defaults to `None`.
353+
:type chunks: int | tuple | dict
354+
355+
:return: xarray DataArray of DEM mosaic
356+
:rtype: DataArray
336357
"""
337358

338359
# sanity check that datset and versioning is correct versioning is valid for selected dataset
@@ -401,13 +422,14 @@ def mosaic(
401422
# load dem(s)
402423
dems = []
403424
for fpath in fpaths:
404-
dem = rxr.open_rasterio(fpath).rio.clip_box(*bounds)
425+
dem = rxr.open_rasterio(fpath, chunks=chunks).rio.clip_box(*bounds)
405426
dems.append(dem)
406427

407428
if len(fpaths) == 1:
408-
dem = rxr.open_rasterio(fpaths[0]).rio.clip_box(*bounds)
429+
dem = rxr.open_rasterio(fpaths[0], chunks=chunks).rio.clip_box(*bounds)
409430

410-
# If multiple dems, merge them
431+
# If multiple dems, merge them - NB I don't know whether this breaks lazy
432+
# evaluation for chunked data
411433
if len(dems) > 1:
412434
dem = merge_arrays(dems)
413435
else:

0 commit comments

Comments
 (0)