Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,4 @@ cdf40d265cc82775607a1bf25f5f527bacc97405
49ad0f7ebe0b07459abc00a5c33c55a646f1e7e0
ac03492012837799b7111607188acff9f739044a
d858665d799690d73b56bcb961684382551193f4
6d6c74123ac964593e6c078e4627ba1c4aa81043
4 changes: 3 additions & 1 deletion cime_config/SystemTests/rxcropmaturity.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ def __init__(self, case):
self._run_Nyears = int(stop_n)

# Only allow RXCROPMATURITY to be called with test cropMonthOutput
if casebaseid.split("-")[-1] != "cropMonthOutput":
if casebaseid.split("-")[-1] != "cropMonthOutput" and not casebaseid.endswith(
"clm-cropMonthOutput--clm-h2a"
):
Copy link
Member Author

@samsrabin samsrabin Aug 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment explaining this.

error_message = (
"Only call RXCROPMATURITY with test cropMonthOutput "
+ "to avoid potentially huge sets of daily outputs."
Expand Down
11 changes: 11 additions & 0 deletions cime_config/testdefs/testlist_clm.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4329,6 +4329,17 @@
</options>
</test>

<test name="RXCROPMATURITY_Lm61" grid="f10_f10_mg37" compset="IHistClm60BgcCrop" testmods="clm/cropMonthOutput--clm/h2a">
<machines>
<machine name="derecho" compiler="intel" category="rxcropmaturity"/>
<machine name="derecho" compiler="intel" category="crop_calendars"/>
</machines>
<options>
<option name="wallclock">6:00:00</option>
<option name="comment">This test is a copy of the 10x15 RXCROPMATURITY_Lm61 test but with the h2a testmod added. This allows us to test the GDD-generating Python scripts using time-averaged outputs of certain variables that we prefer instantaneous values of. We could theoretically just have the scripts fail if those outputs aren't instantaneous, but (a) it doesn't make that much of a difference and (b) it would be annoying to have to redo the long GDD-generating runs just for this.</option>
</options>
</test>

<test name="RXCROPMATURITYSKIPGEN_Ld1097" grid="f10_f10_mg37" compset="IHistClm60BgcCrop" testmods="clm/cropMonthOutput">
<machines>
<machine name="derecho" compiler="intel" category="aux_clm"/>
Expand Down
3 changes: 3 additions & 0 deletions cime_config/testdefs/testmods_dirs/clm/h2a/user_nl_clm
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

hist_avgflag_pertape(3) = 'A'

13 changes: 9 additions & 4 deletions python/ctsm/crop_calendars/cropcal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,18 @@
copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py
"""

import warnings
from importlib.util import find_spec

import numpy as np
import xarray as xr

from ctsm.utils import is_instantaneous

with warnings.catch_warnings():
warnings.filterwarnings(action="ignore", category=DeprecationWarning)
DASK_UNAVAILABLE = find_spec("dask") is None


def define_pftlist():
"""
Expand Down Expand Up @@ -281,10 +288,8 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None):
f"Not sure how to handle vegtype_mainlist as type {type(vegtype_mainlist[0])}"
)

if vegtype_str.shape == ():
indices = np.array([-1])
else:
indices = np.full(len(vegtype_str), -1)
vegtype_str = np.atleast_1d(vegtype_str)
indices = np.full(len(vegtype_str), -1)
for vegtype_str_2 in np.unique(vegtype_str):
indices[np.where(vegtype_str == vegtype_str_2)] = vegtype_mainlist.index(vegtype_str_2)
if convert_to_ndarray:
Expand Down
9 changes: 7 additions & 2 deletions python/ctsm/crop_calendars/generate_gdds_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import warnings
import os
import glob
from importlib import util as importlib_util
import numpy as np
import xarray as xr

Expand Down Expand Up @@ -256,9 +255,11 @@ def import_and_process_1yr(
log(logger, f"netCDF year {this_year}...")

# Without dask, this can take a LONG time at resolutions finer than 2-deg
if importlib_util.find_spec("dask"):
if not utils.DASK_UNAVAILABLE:
log(logger, "dask available")
chunks = {"time": 1}
else:
log(logger, "dask NOT available")
chunks = None

# Get h1 file (list)
Expand Down Expand Up @@ -550,6 +551,10 @@ def import_and_process_1yr(
clm_gdd_var = "GDDACCUM"
my_vars = [clm_gdd_var, "GDDHARV"]
patterns = [f"*h2i.{this_year-1}-01*.nc", f"*h2i.{this_year-1}-01*.nc.base"]

# TODO: Undo this, replacing with just h2i or h2a
patterns += [f"*h2a.{this_year-1}-01*.nc", f"*h2a.{this_year-1}-01*.nc.base"]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look for h2i files first, falling back to h2a files if (a) h2i files not found or (b) variable not in h2i files.


for pat in patterns:
pattern = os.path.join(indir, pat)
h2_files = glob.glob(pattern)
Expand Down
2 changes: 1 addition & 1 deletion python/ctsm/crop_calendars/grid_one_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def create_filled_array(this_ds, fill_value, thisvar_da, new_dims):
if fill_value:
thisvar_gridded[:] = fill_value
else:
thisvar_gridded[:] = np.NaN
thisvar_gridded[:] = np.nan
return thisvar_gridded


Expand Down
9 changes: 3 additions & 6 deletions python/ctsm/crop_calendars/import_ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
"""

import re
import warnings
from importlib.util import find_spec
import numpy as np
import xarray as xr
from ctsm.utils import is_instantaneous
Expand Down Expand Up @@ -276,12 +274,11 @@ def import_ds(
if isinstance(filelist, list) and len(filelist) == 1:
filelist = filelist[0]
if isinstance(filelist, list):
with warnings.catch_warnings():
warnings.filterwarnings(action="ignore", category=DeprecationWarning)
dask_unavailable = find_spec("dask") is None
if dask_unavailable:
if utils.DASK_UNAVAILABLE:
log(logger, "dask NOT available")
this_ds = manual_mfdataset(filelist, my_vars, my_vegtypes, time_slice, logger=logger)
else:
log(logger, "dask available")
this_ds = xr.open_mfdataset(
sorted(filelist),
data_vars="minimal",
Expand Down
129 changes: 129 additions & 0 deletions python/ctsm/test/test_unit_grid_one_variable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/usr/bin/env python3

"""Unit tests for grid_one_variable.py"""

import unittest
import numpy as np
import xarray as xr

from ctsm import unit_testing
from ctsm.crop_calendars import grid_one_variable as g1v

# Allow names that pylint doesn't like, because otherwise I find it hard
# to make readable unit test names
# pylint: disable=invalid-name


class TestGridOneVariable(unittest.TestCase):
"""Tests of grid_one_variable"""

def setUp(self):
self.lat_da = xr.DataArray(
data=[-45.0, 45.0],
dims=["lat"],
)
self.lon_da = xr.DataArray(
data=[-145.0, 145.0],
dims=["lon"],
)
self.result = None
self.expected = None

def _compare_arrays(self):
print(f"Result:\n{self.result}")
print(f"Expected:\n{self.expected}")
self.assertTrue(np.array_equal(self.result, self.expected, equal_nan=True))

def test_create_filled_array_fillnan(self):
"""Test create_filled_array() with NaN fill_value"""
this_da = xr.DataArray(
data=np.array([[1, 2], [3, 4]]),
dims=["lat", "lon"],
coords={"lat": self.lat_da, "lon": self.lon_da},
)
dummy_ds = xr.Dataset()

fill_value = np.nan
self.result = g1v.create_filled_array(dummy_ds, fill_value, this_da, ["lat"])

self.expected = np.full_like(self.lat_da, fill_value)
self._compare_arrays()

def test_create_filled_array_fill6(self):
"""Test create_filled_array() with fill_value = 6.0"""
this_da = xr.DataArray(
data=np.array([[1, 2], [3, 4]]),
dims=["lat", "lon"],
coords={"lat": self.lat_da, "lon": self.lon_da},
)
dummy_ds = xr.Dataset()

fill_value = 6.0
self.result = g1v.create_filled_array(dummy_ds, fill_value, this_da, ["lat"])

self.expected = np.full_like(self.lat_da, fill_value)
self._compare_arrays()

def test_create_filled_array_fill6_ivtstr(self):
"""Test create_filled_array() with fill_value = 6.0 and ivt_str in new_dims"""
this_da = xr.DataArray(
data=np.array([[1, 2], [3, 4]]),
dims=["lat", "lon"],
coords={"lat": self.lat_da, "lon": self.lon_da},
)

ivt_da = xr.DataArray(
data=[14, 15, 16],
dims=["ivt"],
)
this_ds = xr.Dataset(coords={"ivt": ivt_da})

fill_value = 6.0
self.result = g1v.create_filled_array(this_ds, fill_value, this_da, ["ivt_str"])

self.expected = np.full_like(ivt_da, fill_value)
self._compare_arrays()

def test_create_filled_array_fill6_extradim(self):
"""
Test create_filled_array() with fill_value = -7.0 and some non-ivt_str in new_dims that is
also not in this_da.coords
"""
this_da = xr.DataArray(
data=np.array([[1, 2], [3, 4]]),
dims=["lat", "lon"],
coords={"lat": self.lat_da, "lon": self.lon_da},
)

extradim_name = "extra_dim"
extradim_da = xr.DataArray(
data=[14, 15, 16],
dims=[extradim_name],
)
this_ds = xr.Dataset(coords={extradim_name: extradim_da})

fill_value = -7.0
self.result = g1v.create_filled_array(this_ds, fill_value, this_da, [extradim_name])

self.expected = np.full_like(extradim_da, fill_value)
self._compare_arrays()

def test_create_filled_array_fillfalse(self):
"""Test create_filled_array() with false(y) fill_value"""
this_da = xr.DataArray(
data=np.array([[1, 2], [3, 4]]),
dims=["lat", "lon"],
coords={"lat": self.lat_da, "lon": self.lon_da},
)
dummy_ds = xr.Dataset()

fill_value = False
self.result = g1v.create_filled_array(dummy_ds, fill_value, this_da, ["lat"])

self.expected = np.full_like(self.lat_da, np.nan)
self._compare_arrays()


if __name__ == "__main__":
unit_testing.setup_for_tests()
unittest.main()
46 changes: 45 additions & 1 deletion python/ctsm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

import logging
import os
import stat
import sys
import glob
import string
import re
import pdb
import tempfile
import subprocess

from datetime import date, timedelta, datetime
from getpass import getuser
Expand Down Expand Up @@ -191,6 +194,7 @@ def write_output(file, file_in, file_out, file_type):
"""

# update attributes
logger.info("Updating metadata: %s", file_out)
title = "Modified " + file_type + " file"
summary = "Modified " + file_type + " file"
contact = "N/A"
Expand All @@ -205,11 +209,51 @@ def write_output(file, file_in, file_out, file_type):
description=description,
)

logger.info("Writing: %s", file_out)
# mode 'w' overwrites file if it exists
file.to_netcdf(path=file_out, mode="w", format="NETCDF3_64BIT")
file.to_netcdf(path=file_out, mode="w", format="NETCDF4_CLASSIC")
logger.info("Successfully created: %s", file_out)
file.close()

logger.info("Trying to convert to NETCDF3_CLASSIC: %s", file_out)
if convert_netcdf_to_classic(file_out):
logger.info("Conversion succeeded")
else:
logger.info("Conversion failed, perhaps because nccopy wasn't found in your shell")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Check whether nccopy is available in shell before the initial file.to_netcdf() call. If it's not, just skip straight to old behavior of "NETCDF3_64BIT".
  • If conversion fails, try saving again via file.to_netcdf(), this time straight to "NETCDF3_64BIT".

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @samsrabin I'm just wondering the reasons behind the convert to classic being done here?

I don't know of any compelling reasons to prefer classic over other formats. And classic has some pretty strong restrictions. And it seems preferable to me to write it out in the preferred format from the get go rather than convert. But I would like to hear the thinking here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think I meant for this all to be netCDF-3 64-bit. Will fix.



def convert_netcdf_to_classic(filepath):
"""
Try to convert netCDF to netCDF-3 Classic format using nccopy.
"""
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

convert_netcdf_to_classic() should be moved to netcdf_utils and unit-tested.


# Get temporary file path
dirpath = os.path.dirname(filepath)
with tempfile.NamedTemporaryFile(delete=False, dir=dirpath, suffix=".nc") as tmp:
temp_path = tmp.name

try:
subprocess.run(["nccopy", "-3", filepath, temp_path], check=True)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use xarray's Dataset.to_netcdf() instead?

copy_permissions(filepath, temp_path) # nccopy doesn't preserve permissions
os.replace(temp_path, filepath)
return True
except subprocess.CalledProcessError:
return False


def copy_permissions(src_file, dst_file):
"""
Copy file permissions from src to dest
"""
# Get the mode (permissions) of the source file
src_mode = os.stat(src_file).st_mode

# Extract the permission bits
permissions = stat.S_IMODE(src_mode)

# Apply them to the destination file
os.chmod(dst_file, permissions)


def get_isosplit(iso_string, split):
"""
Expand Down
Loading