From c161618f55c2d69377418b28efdb50ef0553bceb Mon Sep 17 00:00:00 2001 From: ghiggi Date: Sat, 23 Aug 2025 12:00:08 +0200 Subject: [PATCH 1/2] Add open_files --- gpm/__init__.py | 2 +- gpm/dataset/conventions.py | 35 ++- gpm/dataset/coords.py | 26 +- gpm/dataset/dataset.py | 87 ++++++ gpm/dataset/decoding/coordinates.py | 38 +-- gpm/dataset/decoding/dataarray_attrs.py | 10 +- gpm/dataset/granule.py | 36 ++- gpm/etc/products_attributes.yaml | 312 +++++++++++++++++++ gpm/io/checks.py | 2 +- gpm/io/products.py | 9 + gpm/tests/test_dataset/test_granule_files.py | 7 + 11 files changed, 509 insertions(+), 55 deletions(-) create mode 100644 gpm/etc/products_attributes.yaml diff --git a/gpm/__init__.py b/gpm/__init__.py index fc586a51..ca5cd6c3 100644 --- a/gpm/__init__.py +++ b/gpm/__init__.py @@ -40,7 +40,7 @@ define_configs, read_configs, ) -from gpm.dataset.dataset import open_dataset, open_datatree # noqa +from gpm.dataset.dataset import open_dataset, open_datatree, open_files # noqa from gpm.dataset.datatree import open_raw_datatree # noqa from gpm.dataset.granule import open_granule, open_granule_dataset, open_granule_datatree # noqa from gpm.dataset.tcprimed import open_granule_tcprimed # noqa diff --git a/gpm/dataset/conventions.py b/gpm/dataset/conventions.py index caaac75f..542aea52 100644 --- a/gpm/dataset/conventions.py +++ b/gpm/dataset/conventions.py @@ -124,6 +124,15 @@ def reshape_dataset(ds): return ds +def add_gpm_api_product(ds, product): + """Add gpm_api_product attribute to Dataset and DataArray variables.""" + product = "UNDEFINED" if product is None else product + ds.attrs["gpm_api_product"] = product + for var in ds.data_vars: + ds[var].attrs["gpm_api_product"] = product + return ds + + def finalize_dataset(ds, product, decode_cf, scan_mode, start_time=None, end_time=None): """Finalize GPM xarray.Dataset object.""" import pyproj @@ -141,9 +150,7 @@ def finalize_dataset(ds, product, decode_cf, scan_mode, start_time=None, end_tim # - Units --> units # - Remove DimensionNames # - Sanitize LongName --> description - - # - Add : key : value - ds = standardize_dataarrays_attrs(ds, product) + ds = standardize_dataarrays_attrs(ds) ##------------------------------------------------------------------------. # Decode dataset @@ -154,11 +161,6 @@ def finalize_dataset(ds, product, decode_cf, scan_mode, start_time=None, end_tim if "time_bnds" in ds: ds["time_bnds"] = ds["time_bnds"].astype("M8[ns]").compute() - ##------------------------------------------------------------------------. - # Set relevant coordinates - # - Add range id, radar and pmw frequencies ... - ds = set_coordinates(ds, product, scan_mode) - ###-----------------------------------------------------------------------. ## Check swath time coordinate # --> Ensure validity of the time dimension @@ -166,10 +168,20 @@ def finalize_dataset(ds, product, decode_cf, scan_mode, start_time=None, end_tim # - Do not check for regular time dimension ! ds = ensure_time_validity(ds, limit=10) + ##------------------------------------------------------------------------. + # Set relevant coordinates + # - Add range id, radar and pmw frequencies ... + ds = set_coordinates(ds, product=product, scan_mode=scan_mode) + + ##------------------------------------------------------------------------. + # Add gpm_api product name to Dataset and DataArrays attributes + # - This is required in decode_variables for some products ! + ds = add_gpm_api_product(ds, product) + ##------------------------------------------------------------------------. # Decode variables - if config.get("decode_variables"): - ds = decode_variables(ds, product) + if config.get("decode_variables") and product is not None: + ds = decode_variables(ds, product=product) ##------------------------------------------------------------------------. # Add CF-compliant coordinates attributes and encoding @@ -201,9 +213,8 @@ def finalize_dataset(ds, product, decode_cf, scan_mode, start_time=None, end_tim warnings.warn(msg, GPM_Warning, stacklevel=2) ##------------------------------------------------------------------------. - # Add GPM-API global attributes + # Add history into dataset attributes ds = add_history(ds) - ds.attrs["gpm_api_product"] = product ##------------------------------------------------------------------------. # Subset dataset for start_time and end_time diff --git a/gpm/dataset/coords.py b/gpm/dataset/coords.py index e62cf562..1bf20433 100644 --- a/gpm/dataset/coords.py +++ b/gpm/dataset/coords.py @@ -51,29 +51,45 @@ def _get_orbit_scan_time(dt, scan_mode): def get_orbit_coords(dt, scan_mode): """Get coordinates from Orbit objects.""" + # Decode FileHeader string attrs = decode_string(dt.attrs["FileHeader"]) + # Retrieve Granule ID granule_id = attrs["GranuleNumber"] + # Retrieve time and lat/lon coordinates ds = dt[scan_mode] time = _get_orbit_scan_time(dt, scan_mode) - lon = ds["Longitude"].data lat = ds["Latitude"].data - n_along_track, n_cross_track = lon.shape + + # Define other coordinates + shape = lon.shape + if len(shape) == 2: + n_along_track, n_cross_track = shape + geolocation_dims = ["along_track", "cross_track"] + else: # 1 (along-track only, e.g GMI-1A S3) + n_along_track = shape[0] + n_cross_track = 0 + geolocation_dims = ["along_track"] + granule_id = np.repeat(granule_id, n_along_track) along_track_id = np.arange(n_along_track) cross_track_id = np.arange(n_cross_track) gpm_id = [str(g) + "-" + str(z) for g, z in zip(granule_id, along_track_id, strict=False)] - return { - "lon": xr.DataArray(lon, dims=["along_track", "cross_track"]), - "lat": xr.DataArray(lat, dims=["along_track", "cross_track"]), + # Define dictionary with DataArray coordinates + dict_coords = { + "lon": xr.DataArray(lon, dims=geolocation_dims), + "lat": xr.DataArray(lat, dims=geolocation_dims), "time": xr.DataArray(time, dims="along_track"), "gpm_id": xr.DataArray(gpm_id, dims="along_track"), "gpm_granule_id": xr.DataArray(granule_id, dims="along_track"), "gpm_cross_track_id": xr.DataArray(cross_track_id, dims="cross_track"), "gpm_along_track_id": xr.DataArray(along_track_id, dims="along_track"), } + if n_cross_track == 0: + _ = dict_coords.pop("gpm_cross_track_id") + return dict_coords def get_time_delta_from_time_interval(time_interval): diff --git a/gpm/dataset/dataset.py b/gpm/dataset/dataset.py index d8a04550..757e8198 100644 --- a/gpm/dataset/dataset.py +++ b/gpm/dataset/dataset.py @@ -105,6 +105,9 @@ def _get_scan_modes_datasets_and_closers(filepaths, parallel, scan_modes, decode if parallel: list_info = dask.compute(*list_info) + # Retrieve scan modes list + scan_modes = list(list_info[0][0]) + # ----------------------------------------------------. # Retrieve datatree closers list_dt_closers = [dt_closer for _, dt_closer in list_info] @@ -488,3 +491,87 @@ def open_datatree( ##------------------------------------------------------------------------. return dt + + +def _infer_product_name(ds) -> str | None: + """Infer product name from GPM Dataset attributes.""" + from gpm.io.products import get_products_attributes_dict + + products_dict = get_products_attributes_dict() + for product, attrs in products_dict.items(): + if ( + attrs["AlgorithmID"] == ds.attrs["AlgorithmID"] + and attrs["SatelliteName"] == ds.attrs["SatelliteName"] + and attrs["InstrumentName"] == ds.attrs["InstrumentName"] + ): + return product + return None + + +def open_files( + filepaths, + parallel=False, + scan_modes=None, + groups=None, + variables=None, + prefix_group=False, + start_time=None, + end_time=None, + chunks=-1, + decode_cf=True, + **kwargs, +): + + ##------------------------------------------------------------------------. + # Ensure filepaths is a list + if isinstance(filepaths, str): + filepaths = [filepaths] + + ##------------------------------------------------------------------------. + dict_scan_modes, list_dt_closers = _get_scan_modes_datasets_and_closers( + filepaths=filepaths, + parallel=parallel, + scan_modes=scan_modes, + decode_cf=False, + # Custom options + variables=variables, + groups=groups, + prefix_group=prefix_group, + chunks=chunks, + **kwargs, + ) + + # Retrieve scan_modes from dictionary + scan_modes = sorted(dict_scan_modes) + + # Infer product from file + product = _infer_product_name(dict_scan_modes[scan_modes[0]]) + + # Warn if product is unknown + if product is None: + msg = "GPM-API didn't apply specialized variables decoding because product is unknown !" + warnings.warn(msg, GPM_Warning, stacklevel=2) + + # Finalize datatree + dict_scan_modes = { + scan_mode: finalize_dataset( + ds=ds, + product=product, + scan_mode=scan_mode, + decode_cf=decode_cf, + start_time=start_time, + end_time=end_time, + ) + for scan_mode, ds in dict_scan_modes.items() + } + + # Create datatree + dt = xr.DataTree.from_dict(dict_scan_modes) + + # Specify scan modes closers + for scan_mode, ds in dict_scan_modes.items(): + dt[scan_mode].set_close(ds._close) + + # Specify files closers + dt.set_close(partial(_multi_file_closer, list_dt_closers)) + return dt diff --git a/gpm/dataset/decoding/coordinates.py b/gpm/dataset/decoding/coordinates.py index 3abfdcb0..ac4bee86 100644 --- a/gpm/dataset/decoding/coordinates.py +++ b/gpm/dataset/decoding/coordinates.py @@ -246,23 +246,25 @@ def set_coordinates(ds, product, scan_mode): ds = _parse_sun_local_time(ds) ds = ds.set_coords("sunLocalTime") - #### PMW - # - 1B and 1C products - if product.startswith("1C") or product.startswith("1B"): - ds = _add_pmw_coordinates(ds, product, scan_mode) - # - Deal with incidenceAngleIndex in PMW 1C products - if product.startswith("1C"): - ds = _deal_with_pmw_incidence_angle_index(ds) - #### RADAR - if product in ["2A-DPR", "2A-Ku", "2A-Ka", "2A-PR", "2A-ENV-DPR", "2A-ENV-PR", "2A-ENV-Ka", "2A-ENV-Ku"]: - ds = _add_radar_coordinates(ds, product, scan_mode) - - #### CMB - if product in ["2B-GPM-CORRA", "2B-TRMM-CORRA"]: - ds = _add_cmb_coordinates(ds, product, scan_mode) - - #### SLH and CSH products - if product in ["2A-GPM-SLH", "2B-GPM-CSH"] and "range" in list(ds.dims): - ds = add_lh_height(ds) + # Add specific coordinates depending on product + if product is not None: + #### PMW + # - 1B and 1C products + if product.startswith("1C") or product.startswith("1B"): + ds = _add_pmw_coordinates(ds, product, scan_mode) + # - Deal with incidenceAngleIndex in PMW 1C products + if product.startswith("1C"): + ds = _deal_with_pmw_incidence_angle_index(ds) + #### RADAR + if product in ["2A-DPR", "2A-Ku", "2A-Ka", "2A-PR", "2A-ENV-DPR", "2A-ENV-PR", "2A-ENV-Ka", "2A-ENV-Ku"]: + ds = _add_radar_coordinates(ds, product, scan_mode) + + #### CMB + if product in ["2B-GPM-CORRA", "2B-TRMM-CORRA"]: + ds = _add_cmb_coordinates(ds, product, scan_mode) + + #### SLH and CSH products + if product in ["2A-GPM-SLH", "2B-GPM-CSH"] and "range" in list(ds.dims): + ds = add_lh_height(ds) return ds diff --git a/gpm/dataset/decoding/dataarray_attrs.py b/gpm/dataset/decoding/dataarray_attrs.py index 2c2b74fd..1db98485 100644 --- a/gpm/dataset/decoding/dataarray_attrs.py +++ b/gpm/dataset/decoding/dataarray_attrs.py @@ -94,7 +94,7 @@ def _sanitize_attributes(attrs): return attrs -def _format_dataarray_attrs(da, product=None): +def _format_dataarray_attrs(da): attrs = da.attrs # Ensure fill values are numbers @@ -116,20 +116,16 @@ def _format_dataarray_attrs(da, product=None): if "source_dtype" not in attrs and "dtype" in da.encoding: attrs["source_dtype"] = da.encoding["dtype"] - # Add gpm_api product name - if product is not None: - attrs["gpm_api_product"] = product - # Attach attributes da.attrs = attrs return da -def standardize_dataarrays_attrs(ds, product): +def standardize_dataarrays_attrs(ds): # Sanitize variable attributes for var, da in ds.items(): - ds[var] = _format_dataarray_attrs(da, product) + ds[var] = _format_dataarray_attrs(da) # Drop attributes from bounds coordinates # - https://github.com/pydata/xarray/issues/8368 diff --git a/gpm/dataset/granule.py b/gpm/dataset/granule.py index b56c13ef..386e8cfa 100644 --- a/gpm/dataset/granule.py +++ b/gpm/dataset/granule.py @@ -231,7 +231,7 @@ def _get_scan_mode_dataset( return ds -def get_scan_modes_datasets(filepath, scan_modes, groups, variables, decode_cf, chunks, prefix_group, **kwargs): +def get_scan_modes_datasets(filepath, groups, variables, decode_cf, chunks, prefix_group, scan_modes=None, **kwargs): """Return a dictionary with a dataset for each scan mode.""" from gpm.dataset.datatree import open_raw_datatree from gpm.dataset.granule import _get_scan_mode_dataset @@ -240,17 +240,31 @@ def get_scan_modes_datasets(filepath, scan_modes, groups, variables, decode_cf, dt = open_raw_datatree(filepath=filepath, chunks=chunks, decode_cf=decode_cf, use_api_defaults=True, **kwargs) dt_closer = dt._close + # List scan modes if not specified + if scan_modes is None: + nodes = list(dt) + invalid_nodes = [ + "gmi1aHeader", + "tmi1aHeader", + "DiagGroup", + "AlgorithmRuntimeInfo", + "GprofDHeadr", + ] + scan_modes = set(nodes) - set(invalid_nodes) + # Retrieve scan mode dataset (without cf decoding) - dict_scan_modes = { - scan_mode: _get_scan_mode_dataset( - dt=dt, - scan_mode=scan_mode, - groups=groups, - variables=variables, - prefix_group=prefix_group, - ) - for scan_mode in scan_modes - } + dict_scan_modes = {} + for scan_mode in scan_modes: + try: + dict_scan_modes[scan_mode] = _get_scan_mode_dataset( + dt=dt, + scan_mode=scan_mode, + groups=groups, + variables=variables, + prefix_group=prefix_group, + ) + except Exception as e: + print(f"Skipping scan mode {scan_mode}: {e}") return dict_scan_modes, dt_closer diff --git a/gpm/etc/products_attributes.yaml b/gpm/etc/products_attributes.yaml new file mode 100644 index 00000000..738d26c8 --- /dev/null +++ b/gpm/etc/products_attributes.yaml @@ -0,0 +1,312 @@ +1A-GMI: + AlgorithmID: 1AGMI + SatelliteName: GPM + InstrumentName: GMI +1A-TMI: + AlgorithmID: 1ATMI + SatelliteName: TRMM + InstrumentName: TMI +1B-GMI: + AlgorithmID: 1BGMI + SatelliteName: GPM + InstrumentName: GMI +1B-Ka: + AlgorithmID: 1BKa + SatelliteName: GPM + InstrumentName: DPR +1B-Ku: + AlgorithmID: 1BKu + SatelliteName: GPM + InstrumentName: DPR +1B-PR: + AlgorithmID: 1BPR + SatelliteName: TRMM + InstrumentName: PR +1B-TMI: + AlgorithmID: 1BTMI + SatelliteName: TRMM + InstrumentName: TMI +1C-AMSR2-GCOMW1: + AlgorithmID: 1CAMSR2 + SatelliteName: GCOMW1 + InstrumentName: AMSR2 +1C-AMSRE-AQUA: + AlgorithmID: 1CAMSRE + SatelliteName: AQUA + InstrumentName: AMSRE +1C-AMSUB-NOAA15: + AlgorithmID: 1CAMSUB + SatelliteName: NOAA15 + InstrumentName: AMSUB +1C-AMSUB-NOAA16: + AlgorithmID: 1CAMSUB + SatelliteName: NOAA16 + InstrumentName: AMSUB +1C-AMSUB-NOAA17: + AlgorithmID: 1CAMSUB + SatelliteName: NOAA17 + InstrumentName: AMSUB +1C-ATMS-NOAA20: + AlgorithmID: 1CATMS + SatelliteName: NOAA20 + InstrumentName: ATMS +1C-ATMS-NOAA21: + AlgorithmID: 1CATMS + SatelliteName: NOAA21 + InstrumentName: ATMS +1C-ATMS-NPP: + AlgorithmID: 1CATMS + SatelliteName: NPP + InstrumentName: ATMS +1C-GMI-R: + AlgorithmID: 1CGMI + SatelliteName: GPM + InstrumentName: GMI +1C-GMI: + AlgorithmID: 1CGMI + SatelliteName: GPM + InstrumentName: GMI +1C-MHS-METOPA: + AlgorithmID: 1CMHS + SatelliteName: METOPA + InstrumentName: MHS +1C-MHS-METOPB: + AlgorithmID: 1CMHS + SatelliteName: METOPB + InstrumentName: MHS +1C-MHS-METOPC: + AlgorithmID: 1CMHS + SatelliteName: METOPC + InstrumentName: MHS +1C-MHS-NOAA18: + AlgorithmID: 1CMHS + SatelliteName: NOAA18 + InstrumentName: MHS +1C-MHS-NOAA19: + AlgorithmID: 1CMHS + SatelliteName: NOAA19 + InstrumentName: MHS +1C-SAPHIR-MT1: + AlgorithmID: 1CSAPHIR + SatelliteName: MT1 + InstrumentName: SAPHIR +1C-SSMI-F08: + AlgorithmID: 1CSSMI + SatelliteName: F08 + InstrumentName: SSMI +1C-SSMI-F10: + AlgorithmID: 1CSSMI + SatelliteName: F10 + InstrumentName: SSMI +1C-SSMI-F11: + AlgorithmID: 1CSSMI + SatelliteName: F11 + InstrumentName: SSMI +1C-SSMI-F13: + AlgorithmID: 1CSSMI + SatelliteName: F13 + InstrumentName: SSMI +1C-SSMI-F14: + AlgorithmID: 1CSSMI + SatelliteName: F14 + InstrumentName: SSMI +1C-SSMI-F15: + AlgorithmID: 1CSSMI + SatelliteName: F15 + InstrumentName: SSMI +1C-SSMIS-F16: + AlgorithmID: 1CSSMIS + SatelliteName: F16 + InstrumentName: SSMIS +1C-SSMIS-F17: + AlgorithmID: 1CSSMIS + SatelliteName: F17 + InstrumentName: SSMIS +1C-SSMIS-F18: + AlgorithmID: 1CSSMIS + SatelliteName: F18 + InstrumentName: SSMIS +1C-SSMIS-F19: + AlgorithmID: 1CSSMIS + SatelliteName: F19 + InstrumentName: SSMIS +1C-TMI: + AlgorithmID: 1CTMI + SatelliteName: TRMM + InstrumentName: TMI +2A-AMSR2-GCOMW1-CLIM: + AlgorithmID: 2AGPROFAMSR2 + SatelliteName: GCOMW1 + InstrumentName: AMSR2 +2A-AMSRE-AQUA-CLIM: + AlgorithmID: 2AGPROFAMSRE + SatelliteName: AQUA + InstrumentName: AMSRE +2A-AMSUB-NOAA15-CLIM: + AlgorithmID: 2AGPROFAMSUB + SatelliteName: NOAA15 + InstrumentName: AMSUB +2A-AMSUB-NOAA16-CLIM: + AlgorithmID: 2AGPROFAMSUB + SatelliteName: NOAA16 + InstrumentName: AMSUB +2A-AMSUB-NOAA17-CLIM: + AlgorithmID: 2AGPROFAMSUB + SatelliteName: NOAA17 + InstrumentName: AMSUB +2A-ATMS-NOAA20-CLIM: + AlgorithmID: 2AGPROFATMS + SatelliteName: NOAA20 + InstrumentName: ATMS +2A-ATMS-NOAA21-CLIM: + AlgorithmID: 2AGPROFATMS + SatelliteName: NOAA21 + InstrumentName: ATMS +2A-ATMS-NOAA21: + AlgorithmID: 2AGPROFATMS + SatelliteName: NOAA21 + InstrumentName: ATMS +2A-ATMS-NPP-CLIM: + AlgorithmID: 2AGPROFATMS + SatelliteName: NPP + InstrumentName: ATMS +2A-DPR: + AlgorithmID: 2ADPR + SatelliteName: GPM + InstrumentName: DPR +2A-ENV-DPR: + AlgorithmID: 2ADPRENV + SatelliteName: GPM + InstrumentName: DPR +2A-ENV-Ka: + AlgorithmID: 2AKaENV + SatelliteName: GPM + InstrumentName: DPR +2A-ENV-Ku: + AlgorithmID: 2AKuENV + SatelliteName: GPM + InstrumentName: DPR +2A-ENV-PR: + AlgorithmID: 2APRENV + SatelliteName: TRMM + InstrumentName: PR +2A-GMI-CLIM: + AlgorithmID: 2AGPROFGMI + SatelliteName: GPM + InstrumentName: GMI +2A-GMI: + AlgorithmID: 2AGPROFGMI + SatelliteName: GPM + InstrumentName: GMI +2A-GPM-SLH: + AlgorithmID: 2HSLH + SatelliteName: GPM + InstrumentName: DPR +2A-Ka: + AlgorithmID: 2AKa + SatelliteName: GPM + InstrumentName: DPR +2A-Ku: + AlgorithmID: 2AKu + SatelliteName: GPM + InstrumentName: DPR +2A-MHS-METOPA-CLIM: + AlgorithmID: 2AGPROFMHS + SatelliteName: METOPA + InstrumentName: MHS +2A-MHS-METOPB-CLIM: + AlgorithmID: 2AGPROFMHS + SatelliteName: METOPB + InstrumentName: MHS +2A-MHS-METOPC-CLIM: + AlgorithmID: 2AGPROFMHS + SatelliteName: METOPC + InstrumentName: MHS +2A-MHS-NOAA18-CLIM: + AlgorithmID: 2AGPROFMHS + SatelliteName: NOAA18 + InstrumentName: MHS +2A-MHS-NOAA19-CLIM: + AlgorithmID: 2AGPROFMHS + SatelliteName: NOAA19 + InstrumentName: MHS +2A-PR: + AlgorithmID: 2APR + SatelliteName: TRMM + InstrumentName: PR +2A-SAPHIR-MT1-CLIM: + AlgorithmID: 2APRPSSAPHIR + SatelliteName: MT1 + InstrumentName: SAPHIR +2A-SAPHIR-MT1: + AlgorithmID: 2APRPSSAPHIR + SatelliteName: MT1 + InstrumentName: SAPHIR +2A-SSMI-F08-CLIM: + AlgorithmID: 2AGPROFSSMI + SatelliteName: F08 + InstrumentName: SSMI +2A-SSMI-F10-CLIM: + AlgorithmID: 2AGPROFSSMI + SatelliteName: F10 + InstrumentName: SSMI +2A-SSMI-F11-CLIM: + AlgorithmID: 2AGPROFSSMI + SatelliteName: F11 + InstrumentName: SSMI +2A-SSMI-F13-CLIM: + AlgorithmID: 2AGPROFSSMI + SatelliteName: F13 + InstrumentName: SSMI +2A-SSMI-F14-CLIM: + AlgorithmID: 2AGPROFSSMI + SatelliteName: F14 + InstrumentName: SSMI +2A-SSMI-F15-CLIM: + AlgorithmID: 2AGPROFSSMI + SatelliteName: F15 + InstrumentName: SSMI +2A-SSMIS-F16-CLIM: + AlgorithmID: 2AGPROFSSMIS + SatelliteName: F16 + InstrumentName: SSMIS +2A-SSMIS-F17-CLIM: + AlgorithmID: 2AGPROFSSMIS + SatelliteName: F17 + InstrumentName: SSMIS +2A-SSMIS-F18-CLIM: + AlgorithmID: 2AGPROFSSMIS + SatelliteName: F18 + InstrumentName: SSMIS +2A-SSMIS-F19-CLIM: + AlgorithmID: 2AGPROFSSMIS + SatelliteName: F19 + InstrumentName: SSMIS +2A-TMI-CLIM: + AlgorithmID: 2AGPROFTMI + SatelliteName: TRMM + InstrumentName: TMI +2A-TRMM-SLH: + AlgorithmID: 2HSLHT + SatelliteName: TRMM + InstrumentName: PR +2B-GPM-CORRA: + AlgorithmID: 2BCMB + SatelliteName: GPM + InstrumentName: DPRGMI +2B-GPM-CSH: + AlgorithmID: 2HCSH + SatelliteName: GPM + InstrumentName: DPRGMI +2B-TRMM-CORRA: + AlgorithmID: 2BCMBT + SatelliteName: TRMM + InstrumentName: PRTMI +2B-TRMM-CSH: + AlgorithmID: 2HCSHT + SatelliteName: TRMM + InstrumentName: PRTMI +IMERG-FR: + AlgorithmID: 3IMERGHH + SatelliteName: MULTI + InstrumentName: MERGED diff --git a/gpm/io/checks.py b/gpm/io/checks.py index e1f8c57c..b8ff809d 100644 --- a/gpm/io/checks.py +++ b/gpm/io/checks.py @@ -162,7 +162,7 @@ def check_transfer_tool(transfer_tool): return transfer_tool -def check_product(product, product_type): +def check_product(product, product_type=None): """Check product validity.""" from gpm.io.products import available_products diff --git a/gpm/io/products.py b/gpm/io/products.py index d71cc8ca..e8cbda24 100644 --- a/gpm/io/products.py +++ b/gpm/io/products.py @@ -95,6 +95,15 @@ def get_info_dict(): return read_yaml(filepath) +@functools.cache +def get_products_attributes_dict(): + """Get products attributes dictionary.""" + from gpm import _root_path + + filepath = os.path.join(_root_path, "gpm", "etc", "products_attributes.yaml") + return read_yaml(filepath) + + def get_info_dict_subset( sensors=None, satellites=None, diff --git a/gpm/tests/test_dataset/test_granule_files.py b/gpm/tests/test_dataset/test_granule_files.py index d601989e..5bcec8dc 100644 --- a/gpm/tests/test_dataset/test_granule_files.py +++ b/gpm/tests/test_dataset/test_granule_files.py @@ -162,3 +162,10 @@ def test_open_granule_datatree(self, filepath): dt = gpm.open_granule_datatree(filepath, cache=False, lock=False, decode_cf=True) assert isinstance(dt, xr.DataTree) dt.close() + + @pytest.mark.parametrize("filepath", [ORBIT_EXAMPLE_FILEPATH, GRID_EXAMPLE_FILEPATH]) + def test_open_files(self, filepath): + """Test open file with open_files.""" + dt = gpm.open_files(filepath, cache=False, lock=False, decode_cf=True) + assert isinstance(dt, xr.DataTree) + dt.close() From 61a3bbb729513bf9317f5bce2d97bade7572da08 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Mon, 25 Aug 2025 10:50:31 +0200 Subject: [PATCH 2/2] Fix polars categorical casting to float --- gpm/bucket/dataframe.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gpm/bucket/dataframe.py b/gpm/bucket/dataframe.py index ae1eb474..5a0e42c7 100644 --- a/gpm/bucket/dataframe.py +++ b/gpm/bucket/dataframe.py @@ -46,12 +46,11 @@ def pl_cut(values, bounds, include_lowest=True, right=True): left_closed=not right, # left_closed=False equivalent of pandas right=True include_breaks=False, ) - - indices = indices.cast(float) + indices = indices.cast(str).cast(float) # NaN are represented as null # Include values of first bins (include_lowest=True of pd.cut) if include_lowest: indices[values == bounds[0]] = 0 - # Replace -1 and len(bounds) + # Replace -1 and len(bounds) with null indices[indices == -1.0] = None indices[indices == len(bounds) - 1] = None indices[indices.is_nan()] = None