Init ML model

hombit · hombit · commit 7ce644d38324 · 2025-09-16T19:31:25.000-04:00
diff --git a/.github/workflows/testing-and-coverage.yml b/.github/workflows/testing-and-coverage.yml
@@ -20,6 +20,13 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
+    - name: Cache JAX compilation cache
+      uses: actions/cache@v4
+      with:
+       path: /tmp/jax_cache
+       key: jax-cache-${{ runner.os }}
+       restore-keys: |
+         jax-cache-
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v5
       with:
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,8 +17,11 @@ classifiers = [
 dynamic = ["version"]
 requires-python = ">=3.10"
 dependencies = [
+    "flax>=0.11",
+    "jax",
     "nested-pandas>=0.5,<0.6",
     "numpy>=2",
+    "optax>=0.2.6",
     "scipy>=1",
 ]
 
diff --git a/src/uncle_val/models.py b/src/uncle_val/models.py
@@ -0,0 +1,76 @@
+from collections.abc import Sequence
+
+import jax.numpy as jnp
+from flax import nnx
+from jax.scipy import stats
+
+
+class MLPModel(nnx.Module):
+    """Multi-layer Perceptron (MLP) model for the u function
+
+    Parameters
+    ----------
+    d_input : int
+        Number of input parameters, e.g. length of theta
+    d_middle : list of int
+        Size of hidden layers, e.g. [64, 32, 16]
+    d_output : int
+        Number of output parameters, 1 for u, 2 for [u, l].
+    rngs : flax.nnx.Rangs
+        Random number generator for parameter initialization.
+    """
+
+    def __init__(
+        self, d_input: int, *, d_middle: Sequence[int] = (300, 300, 400), d_output: int = 1, rngs: nnx.Rngs
+    ):
+        layers = []
+        dims = [d_input] + list(d_middle) + [d_output]
+        for i, (d1, d2) in enumerate(zip(dims[:-1], dims[1:], strict=False)):
+            layers.append(nnx.Linear(d1, d2, rngs=rngs, kernel_init=nnx.initializers.normal()))
+            if i < len(dims) - 2:  # not the last layer
+                layers.append(nnx.relu)
+                layers.append(nnx.Dropout(0.2, rngs=rngs))
+        self.layers = nnx.Sequential(*layers)
+
+    def __call__(self, x):
+        """Compute the output of the model"""
+        return jnp.exp(-self.layers(x))
+
+
+def chi2_lc_train_step(model: nnx.Module, optimizer: nnx.Optimizer, theta, flux, err) -> None:
+    """Training step on a single light curve, with chi2 probability based loss.
+
+    This gets a single light curve, gets u=model(theta), computes chi-squared
+    statistics for a constant-flux model using `flux` and `err`, and uses
+    minus logarithm of chi-squared probability as the loss function.
+
+    Parameters
+    ----------
+    model : flax.nnx.Module
+        Model to train, input vector size is d_input.
+    optimizer : flax.optim.Optimizer
+        Optimizer to use for training
+    theta : array-like
+        Input parameter vector for the model, (n_obs, d_input).
+    flux : array-like
+        Flux vector, (n_obs,).
+    err : array-like
+        Error vector, (n_obs,).
+
+    Returns
+    -------
+    None
+    """
+
+    def minus_lnprob_chi2(model):
+        u = model(theta)[:, 0]
+        total_err = u * err
+        avg_flux = jnp.average(flux, weights=total_err**-2)
+        chi2 = jnp.sum(jnp.square((flux - avg_flux) / total_err))
+        lnprob = stats.chi2.logpdf(chi2, jnp.size(flux) - 1)
+        return -lnprob
+
+    loss, grads = nnx.value_and_grad(minus_lnprob_chi2)(model)
+    optimizer.update(model, grads)
+
+    return loss
diff --git a/tests/uncle_val/conftest.py b/tests/uncle_val/conftest.py
@@ -0,0 +1,12 @@
+import jax
+
+
+def set_jax_compilation_cache():
+    """Setup Jax compilation cache to speed up tests."""
+    jax.config.update("jax_compilation_cache_dir", "/tmp/jax_cache")
+    jax.config.update("jax_persistent_cache_min_entry_size_bytes", -1)
+    jax.config.update("jax_persistent_cache_min_compile_time_secs", 0)
+    jax.config.update("jax_persistent_cache_enable_xla_caches", "all")
+
+
+set_jax_compilation_cache()
diff --git a/tests/uncle_val/test_models.py b/tests/uncle_val/test_models.py
@@ -0,0 +1,62 @@
+import jax.numpy as jnp
+import numpy as np
+import optax
+from flax import nnx
+from numpy.testing import assert_allclose
+from uncle_val.datasets import fake_non_variable_lcs
+from uncle_val.models import MLPModel, chi2_lc_train_step
+
+
+def test_mlp_model():
+    """Fit MLPModel for a constant u function"""
+    np_rng = np.random.default_rng(42)
+    nnx_rngs = nnx.Rngs(int(np_rng.integers(1 << 63)))
+
+    train_steps = 2000
+    n_obj = 1000
+    n_src = np_rng.integers(30, 150, size=n_obj)
+    u = 2.0
+
+    nf = fake_non_variable_lcs(
+        n_obj=n_obj,
+        n_src=n_src,
+        err=None,
+        u=u,
+        rng=np_rng,
+    )
+    ln_fluxes = np.log(nf["objectForcedSource.psfFlux"])
+    nf["objectForcedSource.norm_flux"] = (ln_fluxes - np.mean(ln_fluxes)) / np.std(ln_fluxes)
+    ln_errs = np.log(nf["objectForcedSource.psfFluxErr"])
+    nf["objectForcedSource.norm_err"] = (ln_errs - np.mean(ln_errs)) / np.std(ln_errs)
+
+    struct_array = nf["objectForcedSource"].array.struct_array.combine_chunks()
+    flux_arr = struct_array.field("psfFlux")
+    err_arr = struct_array.field("psfFluxErr")
+    norm_flux_arr = struct_array.field("norm_flux")
+    norm_err_arr = struct_array.field("norm_err")
+
+    model = MLPModel(
+        d_input=2,
+        d_output=1,
+        rngs=nnx_rngs,
+    )
+    optimizer = nnx.Optimizer(model, optax.adam(1e-3), wrt=nnx.Param)
+
+    step = nnx.jit(chi2_lc_train_step)
+
+    for idx in np_rng.choice(len(flux_arr), train_steps):
+        flux = jnp.asarray(flux_arr[idx].values)
+        err = jnp.asarray(err_arr[idx].values)
+        norm_flux = jnp.asarray(norm_flux_arr[idx].values)
+        norm_err = jnp.asarray(norm_err_arr[idx].values)
+
+        theta = jnp.stack([norm_flux, norm_err], axis=-1)
+        step(
+            model=model,
+            optimizer=optimizer,
+            theta=theta,
+            flux=flux,
+            err=err,
+        )
+
+    assert_allclose(np.asarray(model(theta)), u, rtol=0.1)