Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/dolphin/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def filter_long_wavelength(
quiet=True,
)

filled_data = io.load_gdal(temp_dst, masked=True).filled(0)
filled_data = io.load_masked(temp_dst).filled(0)

# Apply Gaussian filter
lowpass_filtered = fft.fft2(filled_data, workers=workers)
Expand Down Expand Up @@ -277,7 +277,7 @@ def _filter_and_save(
if cor_path is not None:
bad_pixel_mask |= io.load_gdal(cor_path) < correlation_cutoff
if conncomp_path is not None:
bad_pixel_mask |= io.load_gdal(conncomp_path, masked=True).astype(bool) == 0
bad_pixel_mask |= io.load_masked(conncomp_path).astype(bool) == 0

unw = io.load_gdal(unw_filename)
filt_arr = filter_long_wavelength(
Expand Down
114 changes: 88 additions & 26 deletions src/dolphin/io/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
"get_raster_units",
"get_raster_xysize",
"load_gdal",
"load_masked",
"load_overview",
"set_raster_description",
"set_raster_metadata",
"set_raster_nodata",
Expand Down Expand Up @@ -96,11 +98,9 @@ def load_gdal(
*,
band: Optional[int] = None,
subsample_factor: Union[int, tuple[int, int]] = 1,
overview: Optional[int] = None,
rows: Optional[slice] = None,
cols: Optional[slice] = None,
masked: bool = False,
) -> np.ndarray | np.ma.MaskedArray:
) -> np.ndarray:
"""Load a gdal file into a numpy array.

Parameters
Expand All @@ -112,38 +112,21 @@ def load_gdal(
subsample_factor : int or tuple[int, int], optional
Subsample the data by this factor. Default is 1 (no subsampling).
Uses nearest neighbor resampling.
overview: int, optional
If passed, will load an overview of the file.
Raster must have existing overviews, or ValueError is raised.
rows : slice, optional
Rows to load. Default is None (load all rows).
cols : slice, optional
Columns to load. Default is None (load all columns).
masked : bool, optional
If True, return a masked array using the raster's `nodata` value.
Default is False.

Returns
-------
arr : np.ndarray or np.ma.MaskedArray
arr : np.ndarray
Array of shape (bands, y, x) or (y, x) if `band` is specified,
where y = height // subsample_factor and x = width // subsample_factor.

"""
ds = _get_gdal_ds(filename)
nrows, ncols = ds.RasterYSize, ds.RasterXSize

if overview is not None:
# We can handle the overviews most easily
bnd = ds.GetRasterBand(band or 1)
ovr_count = bnd.GetOverviewCount()
if ovr_count > 0:
idx = ovr_count + overview if overview < 0 else overview
out = bnd.GetOverview(idx).ReadAsArray()
bnd = ds = None
return out
logger.warning(f"Requested {overview = }, but none found for {filename}")

# if rows or cols are not specified, load all rows/cols
rows = slice(0, nrows) if rows in (None, slice(None)) else rows
cols = slice(0, ncols) if cols in (None, slice(None)) else cols
Expand Down Expand Up @@ -184,14 +167,93 @@ def load_gdal(
bnd = ds.GetRasterBand(band)
bnd.ReadAsArray(xoff, yoff, xsize, ysize, buf_obj=out, resample_alg=resamp)

if not masked:
return out
# Get the nodata value
return out


def load_masked(
filename: Filename,
*,
band: Optional[int] = None,
subsample_factor: Union[int, tuple[int, int]] = 1,
rows: Optional[slice] = None,
cols: Optional[slice] = None,
) -> np.ma.MaskedArray:
"""Load a gdal file into a numpy array, and mask the nodata value.

Parameters
----------
filename : str or Path
Path to the file to load.
band : int, optional
Band to load. If None, load all bands as 3D array.
subsample_factor : int or tuple[int, int], optional
Subsample the data by this factor. Default is 1 (no subsampling).
Uses nearest neighbor resampling.
rows : slice, optional
Rows to load. Default is None (load all rows).
cols : slice, optional
Columns to load. Default is None (load all columns).

Returns
-------
arr : np.ma.MaskedArray
Array of shape (bands, y, x) or (y, x) if `band` is specified,
where y = height // subsample_factor and x = width // subsample_factor.
The nodata value is masked.

"""
data = load_gdal(
filename,
band=band,
subsample_factor=subsample_factor,
rows=rows,
cols=cols,
)
nd = get_raster_nodata(filename)
if nd is not None and np.isnan(nd):
return np.ma.masked_invalid(out)
return np.ma.masked_invalid(data)
else:
return np.ma.masked_equal(out, nd)
return np.ma.masked_equal(data, nd)


def load_overview(filename: Filename, *, level: int = 1) -> np.ndarray:
"""Load an overview from a gdal file into a numpy array.

Parameters
----------
filename : str or Path
Path to the file to load.
level : int, optional
Level of the overview to load.

Returns
-------
arr : np.ndarray
Array of shape (bands, y, x) or (y, x) if `band` is specified,
where y = height // 2**level and x = width // 2**level.

Raises
------
FileNotFoundError
If the overview doesn't exist.

"""
ds = _get_gdal_ds(filename)
bnd = ds.GetRasterBand(1)
ovr_count = bnd.GetOverviewCount()
assert isinstance(ovr_count, int)
if ovr_count == 0:
msg = f"No overviews found for {filename}"
raise ValueError(msg)
idx = ovr_count + level if level < 0 else level
if idx < 0 or idx >= ovr_count:
msg = (
f"Requested {level = }, but only {ovr_count} overviews found for {filename}"
)
raise ValueError(msg)
out = bnd.GetOverview(idx).ReadAsArray()
bnd = ds = None
return out


def format_nc_filename(filename: Filename, ds_name: Optional[str] = None) -> str:
Expand Down
5 changes: 3 additions & 2 deletions src/dolphin/io/_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,14 +958,15 @@ def read_stack(
"""Read in the SLC stack."""
if masked is None:
masked = self._read_masked
data = io.load_gdal(
read_func = io.load_masked if masked else io.load_gdal
data = read_func(
self.outfile,
band=band,
subsample_factor=subsample_factor,
rows=rows,
cols=cols,
masked=masked,
)

# Check to get around gdal `ds.ReadAsArray()` squashing dimensions
if len(self) == 1 and keepdims:
# Add the front (1,) dimension which is missing for a single file
Expand Down
4 changes: 2 additions & 2 deletions src/dolphin/masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def combine_mask_files(

for input_convention, mask_file in zip(input_conventions, mask_files):
# TODO: if we separate input missing data from mask 1/0, this changes
mask = io.load_gdal(mask_file, masked=True).astype(bool)
mask = io.load_masked(mask_file).astype(bool)
# Fill with "mask" value
mask = mask.filled(bool(input_convention.value))
if input_convention != MaskConvention.NUMPY:
Expand Down Expand Up @@ -160,7 +160,7 @@ def load_mask_as_numpy(mask_file: PathOrStr) -> np.ndarray:

"""
# The mask file will by have 0s at invalid data, 1s at good
nodata_mask = io.load_gdal(mask_file, masked=True).astype(bool).filled(False)
nodata_mask = io.load_masked(mask_file).astype(bool).filled(False)
# invert the mask so Trues are the missing data pixels
nodata_mask = ~nodata_mask
return nodata_mask
Expand Down
6 changes: 3 additions & 3 deletions src/dolphin/ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def _use_existing_files(
output_amp_dispersion_file: Filename,
amp_dispersion_threshold: float,
) -> None:
amp_disp = io.load_gdal(existing_amp_dispersion_file, masked=True)
amp_disp = io.load_masked(existing_amp_dispersion_file)
ps = amp_disp < amp_dispersion_threshold
ps = ps.astype(np.uint8)
# Set the PS nodata value to the max uint8 value
Expand Down Expand Up @@ -308,7 +308,7 @@ def multilook_ps_files(
if Path(ps_out_path).exists():
logger.info(f"{ps_out_path} exists, skipping.")
else:
ps_mask = io.load_gdal(ps_mask_file, masked=True).astype(bool)
ps_mask = io.load_masked(ps_mask_file).astype(bool)
ps_mask_looked = utils.take_looks(
ps_mask, strides["y"], strides["x"], func_type="any", edge_strategy="pad"
)
Expand All @@ -330,7 +330,7 @@ def multilook_ps_files(
if amp_disp_out_path.exists():
logger.info(f"{amp_disp_out_path} exists, skipping.")
else:
amp_disp = io.load_gdal(amp_dispersion_file, masked=True)
amp_disp = io.load_masked(amp_dispersion_file)
# We use `nanmin` assuming that the multilooked PS is using
# the strongest PS (the one with the lowest amplitude dispersion)
amp_disp_looked = utils.take_looks(
Expand Down
10 changes: 5 additions & 5 deletions src/dolphin/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def _redo_reference(
ref_date = secondary_dates[extra_ref_idx]
logger.info(f"Re-referencing later timeseries files to {ref_date}")
extra_ref_img = inverted_phase_paths[extra_ref_idx]
ref = io.load_gdal(extra_ref_img, masked=True)
ref = io.load_masked(extra_ref_img)

# Use a temp directory while re-referencing
extra_out_dir = inverted_phase_paths[0].parent / "extra"
Expand All @@ -285,7 +285,7 @@ def _redo_reference(
cur_img = inverted_phase_paths[idx]
new_stem = format_dates(ref_date, secondary_dates[idx])
cur_output_name = extra_out_dir / f"{new_stem}.tif"
cur = io.load_gdal(cur_img, masked=True)
cur = io.load_masked(cur_img)
new_out = cur - ref
io.write_arr(
arr=new_out,
Expand Down Expand Up @@ -341,7 +341,7 @@ def _convert_and_reference(
if target.exists(): # Check to prevent overwriting
continue

arr_radians = io.load_gdal(p, masked=True)
arr_radians = io.load_masked(p)
nodataval = io.get_raster_nodata(p)
# Reference to the
ref_value = arr_radians.filled(np.nan)[ref_row, ref_col]
Expand Down Expand Up @@ -1258,7 +1258,7 @@ def select_reference_point(
return ref_point

logger.info("Selecting reference point")
quality_file_values = io.load_gdal(quality_file, masked=True)
quality_file_values = io.load_masked(quality_file)

# Start with all points as valid candidates
isin_largest_conncomp = np.ones(quality_file_values.shape, dtype=bool)
Expand Down Expand Up @@ -1349,7 +1349,7 @@ def intersect_conncomp(arr: np.ma.MaskedArray, axis: int) -> np.ndarray:
read_masked=True,
)

conncomp_intersection = io.load_gdal(conncomp_intersection_file, masked=True)
conncomp_intersection = io.load_masked(conncomp_intersection_file)

# Find the largest conncomp region in the intersection
label, n_labels = ndimage.label(
Expand Down
10 changes: 5 additions & 5 deletions src/dolphin/unwrap/_unwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def unwrap(
unwrapper_corr_filename = Path(corr_filename)
name_change = "."

ifg = io.load_gdal(ifg_filename, masked=True)
ifg = io.load_masked(ifg_filename)
if unwrap_options.run_goldstein:
suf = Path(unw_filename).suffix
if suf == ".tif":
Expand Down Expand Up @@ -372,17 +372,17 @@ def unwrap(
str(pre_interp_unw_filename).split(".")[0] + (name_change + "unw" + suf)
)

pre_interp_ifg = io.load_gdal(pre_interp_ifg_filename, masked=True).filled(0)
pre_interp_ifg = io.load_masked(pre_interp_ifg_filename).filled(0)

corr = io.load_gdal(corr_filename, masked=True).filled(0)
corr = io.load_masked(corr_filename).filled(0)
cutoff = preproc_options.interpolation_cor_threshold
logger.info(f"Masking pixels with correlation below {cutoff}")
coherent_pixel_mask = corr >= cutoff
if similarity_filename and (
sim_cutoff := preproc_options.interpolation_similarity_threshold
):
logger.info(f"Masking pixels with similarity below {sim_cutoff}")
sim = io.load_gdal(similarity_filename, masked=True).filled(0)
sim = io.load_masked(similarity_filename).filled(0)
coherent_pixel_mask &= sim >= sim_cutoff

logger.info(f"Interpolating {pre_interp_ifg_filename} -> {interp_ifg_filename}")
Expand Down Expand Up @@ -478,7 +478,7 @@ def unwrap(
"Transferring ambiguity numbers from filtered/interpolated"
f" ifg {unwrapper_unw_filename}"
)
unw_arr = io.load_gdal(unwrapper_unw_filename, masked=True).filled(unw_nodata)
unw_arr = io.load_masked(unwrapper_unw_filename).filled(unw_nodata)

final_arr = transfer_ambiguities(np.angle(ifg), unw_arr)
final_arr[ifg.mask] = unw_nodata
Expand Down
6 changes: 3 additions & 3 deletions src/dolphin/unwrap/_unwrap_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ def unwrap_spurt(
# NOTE: we are working around spurt currently wanting "temporal_coherence.tif",
# and a temporal coherence threshold.
# we'll make our own mask of 0=bad, 1=good, then pass a threshold of 0.5
temp_coh = io.load_gdal(temporal_coherence_filename, masked=True).filled(0)
temp_coh = io.load_masked(temporal_coherence_filename).filled(0)
# Mark the "bad" pixels (good=1, bad=0, following the unwrapper mask convention)
temp_coh_mask = temp_coh > options.temporal_coherence_threshold
combined_mask = temp_coh_mask
if similarity_filename and options.similarity_threshold:
sim = io.load_gdal(similarity_filename, masked=True).filled(0)
sim = io.load_masked(similarity_filename).filled(0)
sim_mask = sim > options.similarity_threshold
# A good pixel can have good similarity, or good temp. coherence
combined_mask = combined_mask | sim_mask
Expand Down Expand Up @@ -167,7 +167,7 @@ def _create_conncomps_from_mask(
unw_filenames: Sequence[PathOrStr],
dilate_by: int = 25,
) -> list[Path]:
arr = io.load_gdal(temporal_coherence_filename, masked=True)
arr = io.load_masked(temporal_coherence_filename)
good_pixels = arr > temporal_coherence_threshold
strel = np.ones((dilate_by, dilate_by))
# "1" pixels will be spread out and have (approximately) 1.0 in surrounding pixels
Expand Down
6 changes: 3 additions & 3 deletions src/dolphin/workflows/single.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def _get_ps_mask(
ps_mask_file: Optional[Filename], nrows: int, ncols: int
) -> np.ndarray:
if ps_mask_file is not None:
ps_mask = io.load_gdal(ps_mask_file, masked=True)
ps_mask = io.load_masked(ps_mask_file)
# Fill the nodata values with false
ps_mask = ps_mask.astype(bool).filled(False)
else:
Expand All @@ -393,8 +393,8 @@ def _get_amp_mean_variance(
) -> tuple[np.ndarray, np.ndarray]:
if amp_mean_file is not None and amp_dispersion_file is not None:
# Note: have to fill, since numba (as of 0.57) can't do masked arrays
amp_mean = io.load_gdal(amp_mean_file, masked=True).filled(np.nan)
amp_dispersion = io.load_gdal(amp_dispersion_file, masked=True).filled(np.nan)
amp_mean = io.load_masked(amp_mean_file).filled(np.nan)
amp_dispersion = io.load_masked(amp_dispersion_file).filled(np.nan)
# convert back to variance from dispersion: amp_disp = std_dev / mean
amp_variance = (amp_dispersion * amp_mean) ** 2
else:
Expand Down
Loading