diff --git a/src/dolphin/filtering.py b/src/dolphin/filtering.py index ce0ea7350..40ede437a 100644 --- a/src/dolphin/filtering.py +++ b/src/dolphin/filtering.py @@ -114,7 +114,7 @@ def filter_long_wavelength( quiet=True, ) - filled_data = io.load_gdal(temp_dst, masked=True).filled(0) + filled_data = io.load_masked(temp_dst).filled(0) # Apply Gaussian filter lowpass_filtered = fft.fft2(filled_data, workers=workers) @@ -277,7 +277,7 @@ def _filter_and_save( if cor_path is not None: bad_pixel_mask |= io.load_gdal(cor_path) < correlation_cutoff if conncomp_path is not None: - bad_pixel_mask |= io.load_gdal(conncomp_path, masked=True).astype(bool) == 0 + bad_pixel_mask |= io.load_masked(conncomp_path).astype(bool) == 0 unw = io.load_gdal(unw_filename) filt_arr = filter_long_wavelength( diff --git a/src/dolphin/io/_core.py b/src/dolphin/io/_core.py index a7da3a8f5..171d9972e 100644 --- a/src/dolphin/io/_core.py +++ b/src/dolphin/io/_core.py @@ -48,6 +48,8 @@ "get_raster_units", "get_raster_xysize", "load_gdal", + "load_masked", + "load_overview", "set_raster_description", "set_raster_metadata", "set_raster_nodata", @@ -96,11 +98,9 @@ def load_gdal( *, band: Optional[int] = None, subsample_factor: Union[int, tuple[int, int]] = 1, - overview: Optional[int] = None, rows: Optional[slice] = None, cols: Optional[slice] = None, - masked: bool = False, -) -> np.ndarray | np.ma.MaskedArray: +) -> np.ndarray: """Load a gdal file into a numpy array. Parameters @@ -112,20 +112,14 @@ def load_gdal( subsample_factor : int or tuple[int, int], optional Subsample the data by this factor. Default is 1 (no subsampling). Uses nearest neighbor resampling. - overview: int, optional - If passed, will load an overview of the file. - Raster must have existing overviews, or ValueError is raised. rows : slice, optional Rows to load. Default is None (load all rows). cols : slice, optional Columns to load. Default is None (load all columns). - masked : bool, optional - If True, return a masked array using the raster's `nodata` value. - Default is False. Returns ------- - arr : np.ndarray or np.ma.MaskedArray + arr : np.ndarray Array of shape (bands, y, x) or (y, x) if `band` is specified, where y = height // subsample_factor and x = width // subsample_factor. @@ -133,17 +127,6 @@ def load_gdal( ds = _get_gdal_ds(filename) nrows, ncols = ds.RasterYSize, ds.RasterXSize - if overview is not None: - # We can handle the overviews most easily - bnd = ds.GetRasterBand(band or 1) - ovr_count = bnd.GetOverviewCount() - if ovr_count > 0: - idx = ovr_count + overview if overview < 0 else overview - out = bnd.GetOverview(idx).ReadAsArray() - bnd = ds = None - return out - logger.warning(f"Requested {overview = }, but none found for {filename}") - # if rows or cols are not specified, load all rows/cols rows = slice(0, nrows) if rows in (None, slice(None)) else rows cols = slice(0, ncols) if cols in (None, slice(None)) else cols @@ -184,14 +167,93 @@ def load_gdal( bnd = ds.GetRasterBand(band) bnd.ReadAsArray(xoff, yoff, xsize, ysize, buf_obj=out, resample_alg=resamp) - if not masked: - return out - # Get the nodata value + return out + + +def load_masked( + filename: Filename, + *, + band: Optional[int] = None, + subsample_factor: Union[int, tuple[int, int]] = 1, + rows: Optional[slice] = None, + cols: Optional[slice] = None, +) -> np.ma.MaskedArray: + """Load a gdal file into a numpy array, and mask the nodata value. + + Parameters + ---------- + filename : str or Path + Path to the file to load. + band : int, optional + Band to load. If None, load all bands as 3D array. + subsample_factor : int or tuple[int, int], optional + Subsample the data by this factor. Default is 1 (no subsampling). + Uses nearest neighbor resampling. + rows : slice, optional + Rows to load. Default is None (load all rows). + cols : slice, optional + Columns to load. Default is None (load all columns). + + Returns + ------- + arr : np.ma.MaskedArray + Array of shape (bands, y, x) or (y, x) if `band` is specified, + where y = height // subsample_factor and x = width // subsample_factor. + The nodata value is masked. + + """ + data = load_gdal( + filename, + band=band, + subsample_factor=subsample_factor, + rows=rows, + cols=cols, + ) nd = get_raster_nodata(filename) if nd is not None and np.isnan(nd): - return np.ma.masked_invalid(out) + return np.ma.masked_invalid(data) else: - return np.ma.masked_equal(out, nd) + return np.ma.masked_equal(data, nd) + + +def load_overview(filename: Filename, *, level: int = 1) -> np.ndarray: + """Load an overview from a gdal file into a numpy array. + + Parameters + ---------- + filename : str or Path + Path to the file to load. + level : int, optional + Level of the overview to load. + + Returns + ------- + arr : np.ndarray + Array of shape (bands, y, x) or (y, x) if `band` is specified, + where y = height // 2**level and x = width // 2**level. + + Raises + ------ + FileNotFoundError + If the overview doesn't exist. + + """ + ds = _get_gdal_ds(filename) + bnd = ds.GetRasterBand(1) + ovr_count = bnd.GetOverviewCount() + assert isinstance(ovr_count, int) + if ovr_count == 0: + msg = f"No overviews found for {filename}" + raise ValueError(msg) + idx = ovr_count + level if level < 0 else level + if idx < 0 or idx >= ovr_count: + msg = ( + f"Requested {level = }, but only {ovr_count} overviews found for {filename}" + ) + raise ValueError(msg) + out = bnd.GetOverview(idx).ReadAsArray() + bnd = ds = None + return out def format_nc_filename(filename: Filename, ds_name: Optional[str] = None) -> str: diff --git a/src/dolphin/io/_readers.py b/src/dolphin/io/_readers.py index 45e902844..0399616a7 100644 --- a/src/dolphin/io/_readers.py +++ b/src/dolphin/io/_readers.py @@ -958,14 +958,15 @@ def read_stack( """Read in the SLC stack.""" if masked is None: masked = self._read_masked - data = io.load_gdal( + read_func = io.load_masked if masked else io.load_gdal + data = read_func( self.outfile, band=band, subsample_factor=subsample_factor, rows=rows, cols=cols, - masked=masked, ) + # Check to get around gdal `ds.ReadAsArray()` squashing dimensions if len(self) == 1 and keepdims: # Add the front (1,) dimension which is missing for a single file diff --git a/src/dolphin/masking.py b/src/dolphin/masking.py index 1c6cda314..c230e29b1 100644 --- a/src/dolphin/masking.py +++ b/src/dolphin/masking.py @@ -112,7 +112,7 @@ def combine_mask_files( for input_convention, mask_file in zip(input_conventions, mask_files): # TODO: if we separate input missing data from mask 1/0, this changes - mask = io.load_gdal(mask_file, masked=True).astype(bool) + mask = io.load_masked(mask_file).astype(bool) # Fill with "mask" value mask = mask.filled(bool(input_convention.value)) if input_convention != MaskConvention.NUMPY: @@ -160,7 +160,7 @@ def load_mask_as_numpy(mask_file: PathOrStr) -> np.ndarray: """ # The mask file will by have 0s at invalid data, 1s at good - nodata_mask = io.load_gdal(mask_file, masked=True).astype(bool).filled(False) + nodata_mask = io.load_masked(mask_file).astype(bool).filled(False) # invert the mask so Trues are the missing data pixels nodata_mask = ~nodata_mask return nodata_mask diff --git a/src/dolphin/ps.py b/src/dolphin/ps.py index 2bd3204c3..c0409bb9b 100644 --- a/src/dolphin/ps.py +++ b/src/dolphin/ps.py @@ -253,7 +253,7 @@ def _use_existing_files( output_amp_dispersion_file: Filename, amp_dispersion_threshold: float, ) -> None: - amp_disp = io.load_gdal(existing_amp_dispersion_file, masked=True) + amp_disp = io.load_masked(existing_amp_dispersion_file) ps = amp_disp < amp_dispersion_threshold ps = ps.astype(np.uint8) # Set the PS nodata value to the max uint8 value @@ -308,7 +308,7 @@ def multilook_ps_files( if Path(ps_out_path).exists(): logger.info(f"{ps_out_path} exists, skipping.") else: - ps_mask = io.load_gdal(ps_mask_file, masked=True).astype(bool) + ps_mask = io.load_masked(ps_mask_file).astype(bool) ps_mask_looked = utils.take_looks( ps_mask, strides["y"], strides["x"], func_type="any", edge_strategy="pad" ) @@ -330,7 +330,7 @@ def multilook_ps_files( if amp_disp_out_path.exists(): logger.info(f"{amp_disp_out_path} exists, skipping.") else: - amp_disp = io.load_gdal(amp_dispersion_file, masked=True) + amp_disp = io.load_masked(amp_dispersion_file) # We use `nanmin` assuming that the multilooked PS is using # the strongest PS (the one with the lowest amplitude dispersion) amp_disp_looked = utils.take_looks( diff --git a/src/dolphin/timeseries.py b/src/dolphin/timeseries.py index cc0bf44bc..bbc70aff9 100644 --- a/src/dolphin/timeseries.py +++ b/src/dolphin/timeseries.py @@ -270,7 +270,7 @@ def _redo_reference( ref_date = secondary_dates[extra_ref_idx] logger.info(f"Re-referencing later timeseries files to {ref_date}") extra_ref_img = inverted_phase_paths[extra_ref_idx] - ref = io.load_gdal(extra_ref_img, masked=True) + ref = io.load_masked(extra_ref_img) # Use a temp directory while re-referencing extra_out_dir = inverted_phase_paths[0].parent / "extra" @@ -285,7 +285,7 @@ def _redo_reference( cur_img = inverted_phase_paths[idx] new_stem = format_dates(ref_date, secondary_dates[idx]) cur_output_name = extra_out_dir / f"{new_stem}.tif" - cur = io.load_gdal(cur_img, masked=True) + cur = io.load_masked(cur_img) new_out = cur - ref io.write_arr( arr=new_out, @@ -341,7 +341,7 @@ def _convert_and_reference( if target.exists(): # Check to prevent overwriting continue - arr_radians = io.load_gdal(p, masked=True) + arr_radians = io.load_masked(p) nodataval = io.get_raster_nodata(p) # Reference to the ref_value = arr_radians.filled(np.nan)[ref_row, ref_col] @@ -1258,7 +1258,7 @@ def select_reference_point( return ref_point logger.info("Selecting reference point") - quality_file_values = io.load_gdal(quality_file, masked=True) + quality_file_values = io.load_masked(quality_file) # Start with all points as valid candidates isin_largest_conncomp = np.ones(quality_file_values.shape, dtype=bool) @@ -1349,7 +1349,7 @@ def intersect_conncomp(arr: np.ma.MaskedArray, axis: int) -> np.ndarray: read_masked=True, ) - conncomp_intersection = io.load_gdal(conncomp_intersection_file, masked=True) + conncomp_intersection = io.load_masked(conncomp_intersection_file) # Find the largest conncomp region in the intersection label, n_labels = ndimage.label( diff --git a/src/dolphin/unwrap/_unwrap.py b/src/dolphin/unwrap/_unwrap.py index d32f27076..faf31f57b 100644 --- a/src/dolphin/unwrap/_unwrap.py +++ b/src/dolphin/unwrap/_unwrap.py @@ -318,7 +318,7 @@ def unwrap( unwrapper_corr_filename = Path(corr_filename) name_change = "." - ifg = io.load_gdal(ifg_filename, masked=True) + ifg = io.load_masked(ifg_filename) if unwrap_options.run_goldstein: suf = Path(unw_filename).suffix if suf == ".tif": @@ -372,9 +372,9 @@ def unwrap( str(pre_interp_unw_filename).split(".")[0] + (name_change + "unw" + suf) ) - pre_interp_ifg = io.load_gdal(pre_interp_ifg_filename, masked=True).filled(0) + pre_interp_ifg = io.load_masked(pre_interp_ifg_filename).filled(0) - corr = io.load_gdal(corr_filename, masked=True).filled(0) + corr = io.load_masked(corr_filename).filled(0) cutoff = preproc_options.interpolation_cor_threshold logger.info(f"Masking pixels with correlation below {cutoff}") coherent_pixel_mask = corr >= cutoff @@ -382,7 +382,7 @@ def unwrap( sim_cutoff := preproc_options.interpolation_similarity_threshold ): logger.info(f"Masking pixels with similarity below {sim_cutoff}") - sim = io.load_gdal(similarity_filename, masked=True).filled(0) + sim = io.load_masked(similarity_filename).filled(0) coherent_pixel_mask &= sim >= sim_cutoff logger.info(f"Interpolating {pre_interp_ifg_filename} -> {interp_ifg_filename}") @@ -478,7 +478,7 @@ def unwrap( "Transferring ambiguity numbers from filtered/interpolated" f" ifg {unwrapper_unw_filename}" ) - unw_arr = io.load_gdal(unwrapper_unw_filename, masked=True).filled(unw_nodata) + unw_arr = io.load_masked(unwrapper_unw_filename).filled(unw_nodata) final_arr = transfer_ambiguities(np.angle(ifg), unw_arr) final_arr[ifg.mask] = unw_nodata diff --git a/src/dolphin/unwrap/_unwrap_3d.py b/src/dolphin/unwrap/_unwrap_3d.py index c813fda67..3aeb81023 100644 --- a/src/dolphin/unwrap/_unwrap_3d.py +++ b/src/dolphin/unwrap/_unwrap_3d.py @@ -41,12 +41,12 @@ def unwrap_spurt( # NOTE: we are working around spurt currently wanting "temporal_coherence.tif", # and a temporal coherence threshold. # we'll make our own mask of 0=bad, 1=good, then pass a threshold of 0.5 - temp_coh = io.load_gdal(temporal_coherence_filename, masked=True).filled(0) + temp_coh = io.load_masked(temporal_coherence_filename).filled(0) # Mark the "bad" pixels (good=1, bad=0, following the unwrapper mask convention) temp_coh_mask = temp_coh > options.temporal_coherence_threshold combined_mask = temp_coh_mask if similarity_filename and options.similarity_threshold: - sim = io.load_gdal(similarity_filename, masked=True).filled(0) + sim = io.load_masked(similarity_filename).filled(0) sim_mask = sim > options.similarity_threshold # A good pixel can have good similarity, or good temp. coherence combined_mask = combined_mask | sim_mask @@ -167,7 +167,7 @@ def _create_conncomps_from_mask( unw_filenames: Sequence[PathOrStr], dilate_by: int = 25, ) -> list[Path]: - arr = io.load_gdal(temporal_coherence_filename, masked=True) + arr = io.load_masked(temporal_coherence_filename) good_pixels = arr > temporal_coherence_threshold strel = np.ones((dilate_by, dilate_by)) # "1" pixels will be spread out and have (approximately) 1.0 in surrounding pixels diff --git a/src/dolphin/workflows/single.py b/src/dolphin/workflows/single.py index 6bf0a3aca..9c88ac520 100644 --- a/src/dolphin/workflows/single.py +++ b/src/dolphin/workflows/single.py @@ -379,7 +379,7 @@ def _get_ps_mask( ps_mask_file: Optional[Filename], nrows: int, ncols: int ) -> np.ndarray: if ps_mask_file is not None: - ps_mask = io.load_gdal(ps_mask_file, masked=True) + ps_mask = io.load_masked(ps_mask_file) # Fill the nodata values with false ps_mask = ps_mask.astype(bool).filled(False) else: @@ -393,8 +393,8 @@ def _get_amp_mean_variance( ) -> tuple[np.ndarray, np.ndarray]: if amp_mean_file is not None and amp_dispersion_file is not None: # Note: have to fill, since numba (as of 0.57) can't do masked arrays - amp_mean = io.load_gdal(amp_mean_file, masked=True).filled(np.nan) - amp_dispersion = io.load_gdal(amp_dispersion_file, masked=True).filled(np.nan) + amp_mean = io.load_masked(amp_mean_file).filled(np.nan) + amp_dispersion = io.load_masked(amp_dispersion_file).filled(np.nan) # convert back to variance from dispersion: amp_disp = std_dev / mean amp_variance = (amp_dispersion * amp_mean) ** 2 else: diff --git a/tests/test_io_core.py b/tests/test_io_core.py index 24266024a..25e9270b0 100644 --- a/tests/test_io_core.py +++ b/tests/test_io_core.py @@ -63,21 +63,28 @@ def test_load_slice_oob(self, raster_100_by_200): ) def test_load_masked(self, raster_with_nan_block): - arr = io.load_gdal(raster_with_nan_block, masked=True) + # Test load_masked function returns masked array + arr = io.load_masked(raster_with_nan_block) assert isinstance(arr, np.ma.masked_array) assert np.ma.is_masked(arr) assert arr[arr.mask].size == 32 * 32 assert np.all(arr.mask[:32, :32]) + # Test that load_gdal returns regular array (not masked) arr = io.load_gdal(raster_with_nan_block) assert not isinstance(arr, np.ma.masked_array) assert not np.ma.is_masked(arr) - assert np.all(np.isnan(arr[:32, :32])) + # Complex arrays with NaN values + if np.iscomplexobj(arr): + assert np.any(np.isnan(arr.real[:32, :32]) | np.isnan(arr.imag[:32, :32])) + else: + assert np.any(np.isnan(arr[:32, :32])) def test_load_masked_empty_nodata(self, raster_100_by_200): - arr = io.load_gdal(raster_100_by_200, masked=True) + # Test load_masked with file that has no nodata + arr = io.load_masked(raster_100_by_200) assert isinstance(arr, np.ma.masked_array) - assert arr.mask == np.ma.nomask + assert arr.mask is np.ma.nomask or np.all(~arr.mask) def test_load_band(self, tmp_path, slc_stack, slc_file_list): # Check on a VRT, which has multiple bands