From 03cb0b72174ca6712b0eaa92983c53aa4465398c Mon Sep 17 00:00:00 2001 From: "Andrew R. McCluskey" Date: Wed, 17 Jun 2026 14:40:59 +0100 Subject: [PATCH 01/10] Implement the ability to pass arbitrary scipy.stats objects as priors, superceeding bounds. --- docs/source/arrhenius_t.ipynb | 20 ++-- kinisi/arrhenius.py | 27 ++--- kinisi/fitting.py | 72 +++++++++---- kinisi/tests/test_arrhenius.py | 161 +++------------------------- kinisi/tests/test_fitting.py | 182 ++++++++++++++++++++++++++++++++ kinisi/tests/test_yeh_hummer.py | 34 +++--- kinisi/yeh_hummer.py | 63 +++++++---- 7 files changed, 341 insertions(+), 218 deletions(-) create mode 100644 kinisi/tests/test_fitting.py diff --git a/docs/source/arrhenius_t.ipynb b/docs/source/arrhenius_t.ipynb index 74f7e671..612dcf8f 100644 --- a/docs/source/arrhenius_t.ipynb +++ b/docs/source/arrhenius_t.ipynb @@ -31,6 +31,7 @@ "import matplotlib.pyplot as plt\n", "from kinisi.analyze import DiffusionAnalyzer\n", "from kinisi.arrhenius import Arrhenius\n", + "from scipy.stats import uniform\n", "import warnings\n", "np.random.seed(42)\n", "warnings.filterwarnings(\"ignore\", category=UserWarning)\n", @@ -145,8 +146,9 @@ "metadata": {}, "outputs": [], "source": [ - "s = Arrhenius(td, bounds=[[0.1 * sc.Unit('eV'), 0.2 * sc.Unit('eV')], \n", - " [1e-5 * td.data.unit, 1e-4 * td.data.unit]])" + "priors = [uniform(0.1, 0.1), uniform(1e-5, 9e-5)]\n", + "\n", + "s = Arrhenius(td, priors=priors)" ] }, { @@ -154,15 +156,19 @@ "id": "507098fb-3afe-40b7-977d-1db0d8eb20a4", "metadata": {}, "source": [ - "When you create a `TemperatureDependent` object, of which `Arrhenius` is a subclass, you can optionally provide bounds as a list of lists. \n", + "When you create a `TemperatureDependent` object, of which `Arrhenius` is a subclass, you can optionally provide priors as a list of `scipy.stats` objects. \n", "For example, if we wanted the activation energy to be restricted between 0.1 and 0.2 eV and the preexponential factor between 10-5 and 10-4 cm2/s, then we would use the following: \n", "\n", "```python\n", - "Arrhenius(td, bounds=[[0.1 * sc.Unit('eV'), 0.2 * sc.Unit('eV')], \n", - " [1e-5 * td.data.unit, 1e-4 * td.data.unit]])\n", + "priors = [uniform(0.1, 0.1), uniform(1e-5, 9e-5)]\n", + "\n", + "s = Arrhenius(td, priors=priors)\n", "```\n", "\n", - "If you do not set bounds, they are automatically set to 0.5 and 1.5 of the best fit values.\n", + "So there are two uniform distributions, one for the activation energy and another for the preexponential factor.\n", + "The first value in `uniform` is the minimum bound, and the maximum bound is found be the second value plus the minimum. \n", + "We recommend looks at the `scipy.stats` documentation to help clarify how to use these object. \n", + "If you do not set `priors`, they are automatically set to be uniform with a min and max of 0.5 and 1.5 of maximum likelihood values.\n", "\n", "The `Arrhenius` object automatically estimates the maximum likelihood values for the activation energy and preexponential factor and stores them is an appropriate `sc.DataGroup`. " ] @@ -308,7 +314,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.13.11" } }, "nbformat": 4, diff --git a/kinisi/arrhenius.py b/kinisi/arrhenius.py index 6291e2a4..fc5bf515 100644 --- a/kinisi/arrhenius.py +++ b/kinisi/arrhenius.py @@ -33,8 +33,9 @@ class TemperatureDependent(FittingBase): :param function: A callable function that describes the relationship between temperature and diffusion. :param parameter_names: A tuple of parameter names for the function. :param parameter_units: A tuple of sc.Unit objects corresponding to the parameter names. - :param bounds: Optional bounds for the parameters of the function. Defaults to None, in which case these - are defined as +/- 50 percent of the best fit values. + :param priors: Optional prior probability distributions for the parameters of the function. + Defaults to None, in which case a uniform distribution is defined with limits of + +/- 50 percent of the best fit values. """ def __init__( @@ -43,7 +44,7 @@ def __init__( function: Callable, parameter_names: tuple[str], parameter_units: tuple[sc.Unit], - bounds: None | list = None, + priors: None | list = None, ) -> 'TemperatureDependent': self.diffusion = diffusion self.temperature = diffusion.coords['temperature'] @@ -53,7 +54,7 @@ def __init__( function=function, parameter_names=parameter_names, parameter_units=parameter_units, - bounds=bounds, + priors=priors, coordinate_name='temperature', ) @@ -94,19 +95,20 @@ class Arrhenius(TemperatureDependent): Evaluate the data with a standard Arrhenius relationship. :param diffusion: Diffusion coefficient sc.DataFrame with a temperature coordinate and variances. - :param bounds: Optional bounds for the parameters of the function. Defaults to None, in which case these - are defined as +/- 50 percent of the best fit values. + :param priors: Optional prior probability distributions for the parameters of the function. + Defaults to None, in which case a uniform distribution is defined with limits of + +/- 50 percent of the max likelihood values. """ def __init__( self, diffusion, - bounds: tuple[tuple[VariableLike, VariableLike], tuple[VariableLike, VariableLike]] | None = None, + priors: tuple[tuple[VariableLike, VariableLike], tuple[VariableLike, VariableLike]] | None = None, ) -> 'Arrhenius': parameter_names = ('activation_energy', 'preexponential_factor') parameter_units = (sc.Unit('eV'), sc.Unit('cm^2/s')) - super().__init__(diffusion, arrhenius, parameter_names, parameter_units, bounds=bounds) + super().__init__(diffusion, arrhenius, parameter_names, parameter_units, priors=priors) @property def activation_energy(self) -> VariableLike | Samples: @@ -144,14 +146,15 @@ class VogelFulcherTammann(TemperatureDependent): Evaluate the data with a Vogel-Fulcher-Tammann relationship. :param diffusion: Diffusion coefficient sc.DataFrame with a temperature coordinate and variances. - :param bounds: Optional bounds for the parameters of the function. Defaults to None, in which case these - are defined as +/- 50 percent of the best fit values. + :param priors: Optional prior probability distributions for the parameters of the function. + Defaults to None, in which case a uniform distribution is defined with limits of + +/- 50 percent of the max likelihood values. """ def __init__( self, diffusion, - bounds: tuple[ + priors: tuple[ tuple[VariableLike, VariableLike], tuple[VariableLike, VariableLike], tuple[VariableLike, VariableLike] ] | None = None, @@ -159,7 +162,7 @@ def __init__( parameter_names = ('activation_energy', 'preexponential_factor', 'T0') parameter_units = (sc.Unit('eV'), sc.Unit('cm^2/s'), sc.Unit('K')) - super().__init__(diffusion, vtf_equation, parameter_names, parameter_units, bounds=bounds) + super().__init__(diffusion, vtf_equation, parameter_names, parameter_units, priors=priors) @property def activation_energy(self) -> VariableLike | Samples: diff --git a/kinisi/fitting.py b/kinisi/fitting.py index 5ff15737..2f6730a4 100644 --- a/kinisi/fitting.py +++ b/kinisi/fitting.py @@ -15,6 +15,7 @@ from scipy.linalg import pinvh from scipy.optimize import minimize from scipy.stats import uniform +from scipy.stats._distn_infrastructure import rv_frozen from kinisi.samples import Samples @@ -31,8 +32,9 @@ class FittingBase: :param function: A callable function that describes the relationship :param parameter_names: A tuple of parameter names for the function :param parameter_units: A tuple of sc.Unit objects corresponding to the parameter names - :param bounds: Optional bounds for the parameters of the function. Defaults to None, - in which case these are defined as +/- 50 percent of the best fit values + :param priors: Prior probability distributions for the parameters of the function. + Defaults to None, in which case a uniform distribution is defined with limits of + +/- 50 percent of the max likelihood fit values. :param coordinate_name: Name of the coordinate to use as independent variable """ @@ -42,28 +44,36 @@ def __init__( function: Callable, parameter_names: tuple[str, ...], parameter_units: tuple[sc.Unit, ...], - bounds: None | list = None, + priors: None | list = None, coordinate_name: str | None = None, ) -> 'FittingBase': self.data = data self.data_group = sc.DataGroup({'data': data}) self.function = function - self.bounds = bounds + self.priors = priors self.parameter_names = parameter_names self.parameter_units = parameter_units self.coordinate_name = coordinate_name - if bounds is not None and len(bounds) != len(self.parameter_names): - raise ValueError( - f'Bounds must be a tuple of length {len(self.parameter_names)}, got {len(bounds)} instead.' - ) - - # Perform initial fit - self.max_likelihood() + if priors is not None: + if len(priors) != len(self.parameter_names): + raise ValueError( + f'Priors must be a list of length {len(self.parameter_names)}, got {len(priors)} instead.' + ) + if not all([isinstance(p, rv_frozen) for p in priors]): + raise ValueError( + f'Priors must be a list of rv_frozen scipy.stats objects.' + ) + + if self.priors is None: + # Perform initial fit + self.max_likelihood() + else: + self.max_aposteriori() - # Set default bounds if not provided - if self.bounds is None: - self.bounds = tuple( + # Set default priors if not provided + if self.priors is None: + bounds = tuple( [ ( self.data_group[p].value * 0.5 * self.data_group[p].unit, @@ -73,8 +83,8 @@ def __init__( ] ) - # Set up priors for nested sampling - self.priors = [uniform(b[0].value, b[1].value - b[0].value) for b in self.bounds] + # Set up priors for nested sampling + self.priors = [uniform(b[0].value, b[1].value - b[0].value) for b in bounds] def __repr__(self): """String representation.""" @@ -150,6 +160,15 @@ def log_posterior(self, parameters: tuple[float]) -> float: :return: The log posterior probability of the model parameters. """ return self.log_likelihood(parameters) + self.log_prior(parameters) + + def nlp(self, parameters: tuple[float]) -> float: + """ + Calculate the negative log posterior of the model given the data. + + :param parameters: The parameters of the model. + :return: The negative log likelihood of the model. + """ + return -self.log_posterior(parameters) def prior_transform(self, parameters: tuple[float]) -> tuple[float]: """ @@ -164,13 +183,22 @@ def prior_transform(self, parameters: tuple[float]) -> tuple[float]: return x def max_likelihood(self) -> tuple[float]: - """Find the best fit parameters for the model.""" - if self.bounds is not None: - x0 = [((b[1] + b[0]) / 2) for b in self.bounds] + """Find the max likelihood fit parameters for the model.""" + if self.priors is not None: + x0 = [p.mean() for p in self.priors] + else: + x0 = [1 for u in self.parameter_units] + result = minimize(self.nll, x0).x + for i, name in enumerate(self.parameter_names): + self.data_group[name] = result[i] * self.parameter_units[i] + + def max_aposteriori(self) -> tuple[float]: + """Find the max aposteriori fit parameters for the model.""" + if self.priors is not None: + x0 = [p.mean() for p in self.priors] else: - x0 = [1 * u for u in self.parameter_units] - bounds = [(b[0].value, b[1].value) for b in self.bounds] if self.bounds is not None else None - result = minimize(self.nll, [x.value for x in x0], bounds=bounds).x + x0 = [1 for u in self.parameter_units] + result = minimize(self.nlp, x0).x for i, name in enumerate(self.parameter_names): self.data_group[name] = result[i] * self.parameter_units[i] diff --git a/kinisi/tests/test_arrhenius.py b/kinisi/tests/test_arrhenius.py index b5518141..321deeec 100644 --- a/kinisi/tests/test_arrhenius.py +++ b/kinisi/tests/test_arrhenius.py @@ -10,10 +10,8 @@ import unittest -import numpy as np import scipp as sc from numpy.testing import assert_almost_equal, assert_equal -from scipp import testing from scipy.stats import uniform from kinisi import arrhenius @@ -39,129 +37,9 @@ def straight_line(x, m, c): class TestTemperatureDependent(unittest.TestCase): """ - Unit tests for TemperatureDependent class + Unit tests for the TemperatureDependent class """ - def test_init(self): - """ - Test the initialisation of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - u = uniform(loc=0, scale=1) - assert_equal(td.function, straight_line) - assert td.parameter_names == ('m', 'c') - assert td.parameter_units == (sc.Unit('m/s'), sc.Unit('m')) - assert isinstance(td.data_group['m'], sc.Variable) - assert isinstance(td.data_group['c'], sc.Variable) - assert isinstance(td.bounds[0][0], sc.Variable) - assert isinstance(td.bounds[0][1], sc.Variable) - assert isinstance(td.bounds[1][0], sc.Variable) - assert isinstance(td.bounds[1][1], sc.Variable) - assert isinstance(td.priors[0], type(u)) - assert isinstance(td.priors[1], type(u)) - - def test_init_bounds(self): - """ - Test the initialisation of TemperatureDependent class with bounds - """ - bounds = ((0 * sc.Unit('m/s'), 1 * sc.Unit('m/s')), (0 * sc.Unit('m'), 1e20 * sc.Unit('m'))) - td = arrhenius.TemperatureDependent( - data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), bounds=bounds - ) - assert_equal(td.function, straight_line) - assert td.parameter_names == ('m', 'c') - assert td.parameter_units == (sc.Unit('m/s'), sc.Unit('m')) - testing.assert_allclose(td.bounds[0][0], bounds[0][0]) - testing.assert_allclose(td.bounds[0][1], bounds[0][1]) - testing.assert_allclose(td.bounds[1][0], bounds[1][0]) - testing.assert_allclose(td.bounds[1][1], bounds[1][1]) - - def test_init_wrong_bounds(self): - """ - Test the initialisation of TemperatureDependent class with wrong bounds - """ - with self.assertRaises(ValueError): - arrhenius.TemperatureDependent( - data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), bounds=((0, 1), (0, 1e20), (0, 1)) - ) - - def test_repr(self): - """ - Test the string representation of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - assert str(td.__repr__()) == str(td.data_group.__repr__()) - - def test_str(self): - """ - Test the string representation of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - assert str(td) == str(td.data_group) - - def test_repr_html(self): - """ - Test the HTML representation of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - assert isinstance(td._repr_html_(), str) - - def test_log_likelihood(self): - """ - Test the log-likelihood function of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - assert isinstance(td.log_likelihood([1, 0]), float) - assert_almost_equal(td.log_likelihood([1, 0]), -13.275855715784758) - - def test_nll(self): - """ - Test the negative log-likelihood function of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - assert isinstance(td.nll([1, 0]), float) - assert_almost_equal(td.nll([1, 0]), 13.275855715784758) - - def test_log_prior(self): - """ - Test the log-prior function of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - assert isinstance(td.log_prior([1, 0]), float) - assert_almost_equal(td.log_prior([1, 0]), -np.inf) - - def test_mcmc(self): - """ - Test the MCMC sampling function of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - td.mcmc(n_samples=10, n_burn=5, n_walkers=32) - assert isinstance(td.data_group['m'], Samples) - assert isinstance(td.data_group['c'], Samples) - assert td.data_group['m'].shape == (32,) - assert td.data_group['c'].shape == (32,) - - def test_nested_sampling(self): - """ - Test the nested sampling function of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - td.nested_sampling() - assert isinstance(td.data_group['m'], Samples) - assert isinstance(td.data_group['c'], Samples) - assert isinstance(td.logz, sc.Variable) - - def test_flatchain(self): - """ - Test the flatchain property of TemperatureDependent class - """ - td = arrhenius.TemperatureDependent(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - td.mcmc(n_samples=10, n_burn=5, n_walkers=32) - td.mcmc(n_samples=10, n_burn=5, n_walkers=32) - assert isinstance(td.flatchain, sc.DataGroup) - assert len(td.flatchain) == 2 - assert td.flatchain.shape == (32,) - def test_extrapolate(self): """ Test the extrapolate function of TemperatureDependent class @@ -199,19 +77,17 @@ def test_init(self): assert isinstance(arr.activation_energy, sc.Variable) assert isinstance(arr.preexponential_factor, sc.Variable) - def test_init_bounds(self): + def test_init_priros(self): """ - Test the initialisation of Arrhenius class with bounds + Test the initialisation of Arrhenius class with priors """ - bounds = ((0 * sc.Unit('eV'), 1 * sc.Unit('eV')), (0 * sc.Unit('cm^2/s'), 1e20 * sc.Unit('cm^2/s'))) - arr = arrhenius.Arrhenius(data, bounds=bounds) + priors = [uniform(0, 1), uniform(0, 1e20)] + arr = arrhenius.Arrhenius(data, priors=priors) assert_equal(arr.function, arrhenius.arrhenius) assert arr.parameter_names == ('activation_energy', 'preexponential_factor') assert arr.parameter_units == (sc.Unit('eV'), sc.Unit('cm^2/s')) - testing.assert_allclose(arr.bounds[0][0], bounds[0][0]) - testing.assert_allclose(arr.bounds[0][1], bounds[0][1]) - testing.assert_allclose(arr.bounds[1][0], bounds[1][0]) - testing.assert_allclose(arr.bounds[1][1], bounds[1][1]) + assert arr.priors[0] == priors[0] + assert arr.priors[1] == priors[1] def test_arrhenius(self): """ @@ -237,25 +113,22 @@ def test_init(self): assert isinstance(vtf.preexponential_factor, sc.Variable) assert isinstance(vtf.T0, sc.Variable) - def test_init_bounds(self): + def test_init_priors(self): """ - Test the initialisation of VogelFulcherTammann class with bounds + Test the initialisation of VogelFulcherTammann class with priors """ - bounds = ( - (0 * sc.Unit('eV'), 1 * sc.Unit('eV')), - (0 * sc.Unit('cm^2/s'), 1e20 * sc.Unit('cm^2/s')), - (0 * sc.Unit('K'), 1000 * sc.Unit('K')), + priors = ( + uniform(0, 1), + uniform(0, 1e20), + uniform(0, 1000), ) - vtf = arrhenius.VogelFulcherTammann(data, bounds=bounds) + vtf = arrhenius.VogelFulcherTammann(data, priors=priors) assert_equal(vtf.function, arrhenius.vtf_equation) assert vtf.parameter_names == ('activation_energy', 'preexponential_factor', 'T0') assert vtf.parameter_units == (sc.Unit('eV'), sc.Unit('cm^2/s'), sc.Unit('K')) - testing.assert_allclose(vtf.bounds[0][0], bounds[0][0]) - testing.assert_allclose(vtf.bounds[0][1], bounds[0][1]) - testing.assert_allclose(vtf.bounds[1][0], bounds[1][0]) - testing.assert_allclose(vtf.bounds[1][1], bounds[1][1]) - testing.assert_allclose(vtf.bounds[2][0], bounds[2][0]) - testing.assert_allclose(vtf.bounds[2][1], bounds[2][1]) + assert vtf.priors[0] == priors[0] + assert vtf.priors[1] == priors[1] + assert vtf.priors[2] == priors[2] def test_vtf_equation(self): """ diff --git a/kinisi/tests/test_fitting.py b/kinisi/tests/test_fitting.py new file mode 100644 index 00000000..3aef51ac --- /dev/null +++ b/kinisi/tests/test_fitting.py @@ -0,0 +1,182 @@ +""" +Tests for fitting module +""" + +# Copyright (c) kinisi developers. +# Distributed under the terms of the MIT License. +# author: Andrew R. McCluskey (arm61) + +# pylint: disable=R0201 + +import unittest + +import numpy as np +import scipp as sc +from numpy.testing import assert_almost_equal, assert_equal +from scipy.stats import uniform, norm +from scipy.stats._distn_infrastructure import rv_frozen + +from kinisi import fitting +from kinisi.samples import Samples + +temp = sc.linspace(start=5, stop=50, num=10, dim='temperature', unit=sc.units.K) +D = sc.linspace(start=5, stop=50, num=10, unit='cm^2 / s', dim='temperature') +D.variances = D.values * 0.1 # 10% uncertainty +data = sc.DataArray(data=D, coords={'temperature': temp}) + + +def straight_line(x, m, c): + """ + A simple linear function for testing purposes. + + :param x: The independent variable. + :param m: The slope of the line. + :param c: The y-intercept of the line. + :return: The value of the linear function at x. + """ + return m * x + c + + +class TestFittingBase(unittest.TestCase): + """ + Unit tests for FittingBase class + """ + + def test_init(self): + """ + Test the initialisation of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + u = uniform(loc=0, scale=1) + assert_equal(td.function, straight_line) + assert td.parameter_names == ('m', 'c') + assert td.parameter_units == (sc.Unit('m/s'), sc.Unit('m')) + assert isinstance(td.data_group['m'], sc.Variable) + assert isinstance(td.data_group['c'], sc.Variable) + assert isinstance(td.priors[0], rv_frozen) + assert isinstance(td.priors[1], rv_frozen) + + def test_init_priors(self): + """ + Test the initialisation of FittingBase class with priors + """ + priors = [uniform(0, 1), uniform(0, 1e20)] + td = fitting.FittingBase( + data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors + ) + assert_equal(td.function, straight_line) + assert td.parameter_names == ('m', 'c') + assert td.parameter_units == (sc.Unit('m/s'), sc.Unit('m')) + assert td.priors[0] == priors[0] + assert td.priors[1] == priors[1] + + def test_init_norm_priors(self): + """ + Test the initialisation of the FittingBase Class + """ + priors = [norm(10, 1), norm(0, 1)] + td = fitting.FittingBase( + data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors + ) + assert_equal(td.function, straight_line) + assert td.parameter_names == ('m', 'c') + assert td.parameter_units == (sc.Unit('m/s'), sc.Unit('m')) + assert td.priors[0] == priors[0] + assert td.priors[1] == priors[1] + + def test_init_wrong_priors(self): + """ + Test the initialisation of FittingBase class with wrong number of priors + """ + with self.assertRaises(ValueError): + priors = [uniform(0, 1), uniform(0, 1e20), uniform(0, 1)] + fitting.FittingBase( + data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors + ) + + def test_repr(self): + """ + Test the string representation of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + assert str(td.__repr__()) == str(td.data_group.__repr__()) + + def test_str(self): + """ + Test the string representation of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + assert str(td) == str(td.data_group) + + def test_repr_html(self): + """ + Test the HTML representation of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + assert isinstance(td._repr_html_(), str) + + def test_log_likelihood(self): + """ + Test the log-likelihood function of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + assert isinstance(td.log_likelihood([1, 0]), float) + assert_almost_equal(td.log_likelihood([1, 0]), -13.275855715784758) + + def test_nll(self): + """ + Test the negative log-likelihood function of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + assert isinstance(td.nll([1, 0]), float) + assert_almost_equal(td.nll([1, 0]), 13.275855715784758) + + def test_log_prior(self): + """ + Test the log-prior function of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + assert isinstance(td.log_prior([1, 0]), float) + assert_almost_equal(td.log_prior([1, 0]), -np.inf) + + def test_mcmc(self): + """ + Test the MCMC sampling function of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + td.mcmc(n_samples=10, n_burn=5, n_walkers=32) + assert isinstance(td.data_group['m'], Samples) + assert isinstance(td.data_group['c'], Samples) + assert td.data_group['m'].shape == (32,) + assert td.data_group['c'].shape == (32,) + + def test_nested_sampling(self): + """ + Test the nested sampling function of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + td.nested_sampling() + assert isinstance(td.data_group['m'], Samples) + assert isinstance(td.data_group['c'], Samples) + assert isinstance(td.logz, sc.Variable) + + def test_nested_sampling_norm_priors(self): + """ + Test the nested sampling function of FittingBase class where priors are norm + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=[norm(1, 1), norm(0, 1)]) + td.nested_sampling() + assert isinstance(td.data_group['m'], Samples) + assert isinstance(td.data_group['c'], Samples) + assert isinstance(td.logz, sc.Variable) + + def test_flatchain(self): + """ + Test the flatchain property of FittingBase class + """ + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) + td.mcmc(n_samples=10, n_burn=5, n_walkers=32) + td.mcmc(n_samples=10, n_burn=5, n_walkers=32) + assert isinstance(td.flatchain, sc.DataGroup) + assert len(td.flatchain) == 2 + assert td.flatchain.shape == (32,) diff --git a/kinisi/tests/test_yeh_hummer.py b/kinisi/tests/test_yeh_hummer.py index 92d48cf5..0209c83e 100644 --- a/kinisi/tests/test_yeh_hummer.py +++ b/kinisi/tests/test_yeh_hummer.py @@ -5,6 +5,7 @@ import numpy as np import pytest import scipp as sc +from scipy.stats import uniform from kinisi.yeh_hummer import YehHummer @@ -61,6 +62,9 @@ def test_yeh_hummer_tip3p_data(self): assert yh.D_infinite.value > np.max(D_values) # Check that viscosity is in reasonable range for water at 298K (should be around 1e-3 Pa*s) + print(yh.shear_viscosity) + print('a') + print(yh.shear_viscosity.value) assert 1e-4 < yh.shear_viscosity.value < 1e-2 def test_yeh_hummer_mcmc(self): @@ -91,8 +95,8 @@ def test_yeh_hummer_mcmc(self): assert dist.shape[0] == len(box_lengths) # Number of data points assert dist.shape[1] > 0 # Number of samples - def test_yeh_hummer_bounds(self): - """Test YehHummer with custom bounds.""" + def test_yeh_hummer_priors(self): + """Test YehHummer with custom priors.""" box_lengths = np.array([20.0, 30.0, 40.0]) D_values = np.array([5.0e-5, 5.2e-5, 5.4e-5]) D_errors = np.array([0.1e-5, 0.1e-5, 0.1e-5]) @@ -102,17 +106,17 @@ def test_yeh_hummer_bounds(self): coords={'box_length': sc.Variable(dims=['system'], values=box_lengths, unit='angstrom')}, ) - # Custom bounds - bounds = ( - (4e-5 * sc.Unit('cm^2/s'), 7e-5 * sc.Unit('cm^2/s')), # D_0 bounds - (1e-4 * sc.Unit('Pa*s'), 1e-2 * sc.Unit('Pa*s')), # viscosity bounds + # Custom priors + priors = ( + uniform(4e-5, 7e-5), # D_0 prior + uniform(1e-4, 1e-2), # viscosity prior ) - yh = YehHummer(td, temperature=sc.scalar(298, unit='K'), bounds=bounds) + yh = YehHummer(td, temperature=sc.scalar(298, unit='K'), priors=priors) - # Check that fitted values are within bounds - assert bounds[0][0].value <= yh.D_infinite.value <= bounds[0][1].value - assert bounds[1][0].value <= yh.shear_viscosity.value <= bounds[1][1].value + # Check that fitted values are within priors + assert priors[0].a <= yh.D_infinite.value <= (priors[0].b + priors[0].a) + assert priors[1].a <= yh.shear_viscosity.value <= (priors[1].b + priors[1].a) def test_yeh_hummer_properties(self): """Test YehHummer property accessors.""" @@ -137,8 +141,8 @@ def test_yeh_hummer_properties(self): assert len(str(yh)) > 0 assert len(repr(yh)) > 0 - def test_yeh_hummer_invalid_bounds(self): - """Test YehHummer with invalid bounds.""" + def test_yeh_hummer_invalid_priors(self): + """Test YehHummer with invalid priors.""" box_lengths = np.array([20.0, 30.0, 40.0]) D_values = np.array([5.0e-5, 5.2e-5, 5.4e-5]) D_errors = np.array([0.1e-5, 0.1e-5, 0.1e-5]) @@ -149,7 +153,7 @@ def test_yeh_hummer_invalid_bounds(self): ) # Wrong number of bounds - bounds = ((4e-5 * sc.Unit('cm^2/s'), 7e-5 * sc.Unit('cm^2/s')),) # Only one bound + priors = [uniform(4e-5, 7e-5)] # Only one bound - with pytest.raises(ValueError, match='Bounds must be a tuple of length 2'): - YehHummer(td, temperature=sc.scalar(298, unit='K'), bounds=bounds) + with pytest.raises(ValueError, match='Priors must be a list of length 2.'): + YehHummer(td, temperature=sc.scalar(298, unit='K'), priors=priors) diff --git a/kinisi/yeh_hummer.py b/kinisi/yeh_hummer.py index ba2136e0..a957de68 100644 --- a/kinisi/yeh_hummer.py +++ b/kinisi/yeh_hummer.py @@ -13,6 +13,7 @@ import scipp as sc import scipp.constants as const from scipy.optimize import curve_fit, minimize +from scipy.stats import uniform from kinisi import __version__ from kinisi.fitting import FittingBase @@ -43,7 +44,7 @@ class YehHummer(FittingBase): :param bounds: Optional bounds for [D_0, viscosity] parameters (viscosity in Pa*s) """ - def __init__(self, diffusion, temperature: sc.Variable, bounds=None): + def __init__(self, diffusion, temperature: sc.Variable, priors=None): self.diffusion = diffusion # Extract box lengths from coordinates @@ -60,22 +61,22 @@ def __init__(self, diffusion, temperature: sc.Variable, bounds=None): parameter_names = ('D_0', 'slope') parameter_units = (diffusion.unit, self._slope_unit) - # Compute bounds: use provided or defaults - if bounds is None: + # Compute priors: use provided or defaults + if priors is None: D_max = np.max(diffusion.values) - D_bounds = (D_max * 0.8 * diffusion.unit, D_max * 2.0 * diffusion.unit) + D_prior = uniform(D_max * 0.8, D_max * 2.0) visc_lower, visc_upper = 1e-5 * sc.Unit('Pa*s'), 1e-1 * sc.Unit('Pa*s') + + # Higher viscosity = lower slope, so bounds are inverted + slope_bounds = ( + self.viscosity_to_slope(visc_upper) * self._slope_unit, + self.viscosity_to_slope(visc_lower) * self._slope_unit, + ) + slope_prior = uniform(*(v.value for v in slope_bounds)) + priors = [D_prior, slope_prior] else: - if len(bounds) != 2: - raise ValueError('Bounds must be a tuple of length 2: (D_0_bounds, viscosity_bounds)') - D_bounds = (bounds[0][0].to(unit=parameter_units[0]), bounds[0][1].to(unit=parameter_units[0])) - visc_lower, visc_upper = bounds[1] - - # Higher viscosity = lower slope, so bounds are inverted - slope_bounds = ( - self.viscosity_to_slope(visc_upper) * self._slope_unit, - self.viscosity_to_slope(visc_lower) * self._slope_unit, - ) + if len(priors) != 2: + raise ValueError('Priors must be a list of length 2.') # Initialize base class with linear function super().__init__( @@ -83,7 +84,7 @@ def __init__(self, diffusion, temperature: sc.Variable, bounds=None): function=self._yeh_hummer_function, parameter_names=parameter_names, parameter_units=parameter_units, - bounds=[D_bounds, slope_bounds], + priors=priors, coordinate_name='box_length', ) @@ -159,9 +160,35 @@ def linear_func(x, a, b): # Use these as initial parameters for optimization x0 = [D_0_init, slope_init] - # Convert bounds to format expected by scipy - bounds_scipy = [(b[0].value, b[1].value) for b in self.bounds] - result = minimize(self.nll, x0, bounds=bounds_scipy, method='L-BFGS-B') + result = minimize(self.nll, x0, method='L-BFGS-B') + + # Store results + self.data_group['D_0'] = result.x[0] * self.parameter_units[0] + self.data_group['slope'] = result.x[1] * self.parameter_units[1] + + def max_aposteriori(self): + """Find maximum aposteriori parameters with better initial guess for YehHummer.""" + # Use linear fit for initial parameters + inv_L, D_values, D_errors = self._prepare_data_for_fit() + + def linear_func(x, a, b): + return a - b * x + + popt, _ = curve_fit( + linear_func, + inv_L, + D_values, + sigma=D_errors if np.any(D_errors > 0) else None, + p0=[np.max(D_values), (D_values[0] - D_values[-1]) / (inv_L[0] - inv_L[-1])], + ) + + D_0_init = popt[0] + slope_init = popt[1] + + # Use these as initial parameters for optimization + x0 = [D_0_init, slope_init] + + result = minimize(self.nlp, x0, method='L-BFGS-B') # Store results self.data_group['D_0'] = result.x[0] * self.parameter_units[0] From d32aaa2536ee3f61a6c970ca2c29209486c0c454 Mon Sep 17 00:00:00 2001 From: "Andrew R. McCluskey" Date: Wed, 17 Jun 2026 14:44:28 +0100 Subject: [PATCH 02/10] Remove print statements --- kinisi/tests/test_yeh_hummer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/kinisi/tests/test_yeh_hummer.py b/kinisi/tests/test_yeh_hummer.py index 0209c83e..e1472a3b 100644 --- a/kinisi/tests/test_yeh_hummer.py +++ b/kinisi/tests/test_yeh_hummer.py @@ -62,9 +62,6 @@ def test_yeh_hummer_tip3p_data(self): assert yh.D_infinite.value > np.max(D_values) # Check that viscosity is in reasonable range for water at 298K (should be around 1e-3 Pa*s) - print(yh.shear_viscosity) - print('a') - print(yh.shear_viscosity.value) assert 1e-4 < yh.shear_viscosity.value < 1e-2 def test_yeh_hummer_mcmc(self): From 24179c9f5fa3fe7041c6c90b64053856194b0202 Mon Sep 17 00:00:00 2001 From: "Andrew R. McCluskey" Date: Wed, 17 Jun 2026 14:45:00 +0100 Subject: [PATCH 03/10] Version number bump --- kinisi/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kinisi/__init__.py b/kinisi/__init__.py index 5683f0eb..99358322 100644 --- a/kinisi/__init__.py +++ b/kinisi/__init__.py @@ -1,4 +1,4 @@ -__version__ = '2.0.5' +__version__ = '2.1.0' from .due import Doi, due From 626790b2162fa5321cc4b9590aca947e54bc7aea Mon Sep 17 00:00:00 2001 From: "Andrew R. McCluskey" Date: Wed, 17 Jun 2026 14:47:09 +0100 Subject: [PATCH 04/10] Remove random assignment --- kinisi/tests/test_fitting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/kinisi/tests/test_fitting.py b/kinisi/tests/test_fitting.py index 3aef51ac..c23cc217 100644 --- a/kinisi/tests/test_fitting.py +++ b/kinisi/tests/test_fitting.py @@ -47,7 +47,6 @@ def test_init(self): Test the initialisation of FittingBase class """ td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m'))) - u = uniform(loc=0, scale=1) assert_equal(td.function, straight_line) assert td.parameter_names == ('m', 'c') assert td.parameter_units == (sc.Unit('m/s'), sc.Unit('m')) From 65eacbd1bf7ffd08db4faab9340ca6e756e28d54 Mon Sep 17 00:00:00 2001 From: "Andrew R. McCluskey" Date: Wed, 17 Jun 2026 14:49:18 +0100 Subject: [PATCH 05/10] Ruff --- kinisi/arrhenius.py | 6 +++--- kinisi/fitting.py | 16 +++++++--------- kinisi/tests/test_fitting.py | 18 +++++++----------- kinisi/yeh_hummer.py | 2 +- 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/kinisi/arrhenius.py b/kinisi/arrhenius.py index fc5bf515..329738e1 100644 --- a/kinisi/arrhenius.py +++ b/kinisi/arrhenius.py @@ -33,7 +33,7 @@ class TemperatureDependent(FittingBase): :param function: A callable function that describes the relationship between temperature and diffusion. :param parameter_names: A tuple of parameter names for the function. :param parameter_units: A tuple of sc.Unit objects corresponding to the parameter names. - :param priors: Optional prior probability distributions for the parameters of the function. + :param priors: Optional prior probability distributions for the parameters of the function. Defaults to None, in which case a uniform distribution is defined with limits of +/- 50 percent of the best fit values. """ @@ -95,7 +95,7 @@ class Arrhenius(TemperatureDependent): Evaluate the data with a standard Arrhenius relationship. :param diffusion: Diffusion coefficient sc.DataFrame with a temperature coordinate and variances. - :param priors: Optional prior probability distributions for the parameters of the function. + :param priors: Optional prior probability distributions for the parameters of the function. Defaults to None, in which case a uniform distribution is defined with limits of +/- 50 percent of the max likelihood values. """ @@ -146,7 +146,7 @@ class VogelFulcherTammann(TemperatureDependent): Evaluate the data with a Vogel-Fulcher-Tammann relationship. :param diffusion: Diffusion coefficient sc.DataFrame with a temperature coordinate and variances. - :param priors: Optional prior probability distributions for the parameters of the function. + :param priors: Optional prior probability distributions for the parameters of the function. Defaults to None, in which case a uniform distribution is defined with limits of +/- 50 percent of the max likelihood values. """ diff --git a/kinisi/fitting.py b/kinisi/fitting.py index 2f6730a4..749d8123 100644 --- a/kinisi/fitting.py +++ b/kinisi/fitting.py @@ -32,7 +32,7 @@ class FittingBase: :param function: A callable function that describes the relationship :param parameter_names: A tuple of parameter names for the function :param parameter_units: A tuple of sc.Unit objects corresponding to the parameter names - :param priors: Prior probability distributions for the parameters of the function. + :param priors: Prior probability distributions for the parameters of the function. Defaults to None, in which case a uniform distribution is defined with limits of +/- 50 percent of the max likelihood fit values. :param coordinate_name: Name of the coordinate to use as independent variable @@ -61,9 +61,7 @@ def __init__( f'Priors must be a list of length {len(self.parameter_names)}, got {len(priors)} instead.' ) if not all([isinstance(p, rv_frozen) for p in priors]): - raise ValueError( - f'Priors must be a list of rv_frozen scipy.stats objects.' - ) + raise ValueError('Priors must be a list of rv_frozen scipy.stats objects.') if self.priors is None: # Perform initial fit @@ -160,11 +158,11 @@ def log_posterior(self, parameters: tuple[float]) -> float: :return: The log posterior probability of the model parameters. """ return self.log_likelihood(parameters) + self.log_prior(parameters) - - def nlp(self, parameters: tuple[float]) -> float: + + def nlp(self, parameters: tuple[float]) -> float: """ - Calculate the negative log posterior of the model given the data. - + Calculate the negative log posterior of the model given the data. + :param parameters: The parameters of the model. :return: The negative log likelihood of the model. """ @@ -191,7 +189,7 @@ def max_likelihood(self) -> tuple[float]: result = minimize(self.nll, x0).x for i, name in enumerate(self.parameter_names): self.data_group[name] = result[i] * self.parameter_units[i] - + def max_aposteriori(self) -> tuple[float]: """Find the max aposteriori fit parameters for the model.""" if self.priors is not None: diff --git a/kinisi/tests/test_fitting.py b/kinisi/tests/test_fitting.py index c23cc217..322c91f8 100644 --- a/kinisi/tests/test_fitting.py +++ b/kinisi/tests/test_fitting.py @@ -13,7 +13,7 @@ import numpy as np import scipp as sc from numpy.testing import assert_almost_equal, assert_equal -from scipy.stats import uniform, norm +from scipy.stats import norm, uniform from scipy.stats._distn_infrastructure import rv_frozen from kinisi import fitting @@ -60,9 +60,7 @@ def test_init_priors(self): Test the initialisation of FittingBase class with priors """ priors = [uniform(0, 1), uniform(0, 1e20)] - td = fitting.FittingBase( - data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors - ) + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors) assert_equal(td.function, straight_line) assert td.parameter_names == ('m', 'c') assert td.parameter_units == (sc.Unit('m/s'), sc.Unit('m')) @@ -74,9 +72,7 @@ def test_init_norm_priors(self): Test the initialisation of the FittingBase Class """ priors = [norm(10, 1), norm(0, 1)] - td = fitting.FittingBase( - data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors - ) + td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors) assert_equal(td.function, straight_line) assert td.parameter_names == ('m', 'c') assert td.parameter_units == (sc.Unit('m/s'), sc.Unit('m')) @@ -89,9 +85,7 @@ def test_init_wrong_priors(self): """ with self.assertRaises(ValueError): priors = [uniform(0, 1), uniform(0, 1e20), uniform(0, 1)] - fitting.FittingBase( - data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors - ) + fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=priors) def test_repr(self): """ @@ -163,7 +157,9 @@ def test_nested_sampling_norm_priors(self): """ Test the nested sampling function of FittingBase class where priors are norm """ - td = fitting.FittingBase(data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=[norm(1, 1), norm(0, 1)]) + td = fitting.FittingBase( + data, straight_line, ('m', 'c'), (sc.Unit('m/s'), sc.Unit('m')), priors=[norm(1, 1), norm(0, 1)] + ) td.nested_sampling() assert isinstance(td.data_group['m'], Samples) assert isinstance(td.data_group['c'], Samples) diff --git a/kinisi/yeh_hummer.py b/kinisi/yeh_hummer.py index a957de68..9e63cf8e 100644 --- a/kinisi/yeh_hummer.py +++ b/kinisi/yeh_hummer.py @@ -66,7 +66,7 @@ def __init__(self, diffusion, temperature: sc.Variable, priors=None): D_max = np.max(diffusion.values) D_prior = uniform(D_max * 0.8, D_max * 2.0) visc_lower, visc_upper = 1e-5 * sc.Unit('Pa*s'), 1e-1 * sc.Unit('Pa*s') - + # Higher viscosity = lower slope, so bounds are inverted slope_bounds = ( self.viscosity_to_slope(visc_upper) * self._slope_unit, From 8049e3b242dfeff7c1a61084f342e25891e231be Mon Sep 17 00:00:00 2001 From: Andrew McCluskey Date: Wed, 17 Jun 2026 14:55:00 +0100 Subject: [PATCH 06/10] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- docs/source/arrhenius_t.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/arrhenius_t.ipynb b/docs/source/arrhenius_t.ipynb index 612dcf8f..1564ddb9 100644 --- a/docs/source/arrhenius_t.ipynb +++ b/docs/source/arrhenius_t.ipynb @@ -166,11 +166,11 @@ "```\n", "\n", "So there are two uniform distributions, one for the activation energy and another for the preexponential factor.\n", - "The first value in `uniform` is the minimum bound, and the maximum bound is found be the second value plus the minimum. \n", - "We recommend looks at the `scipy.stats` documentation to help clarify how to use these object. \n", - "If you do not set `priors`, they are automatically set to be uniform with a min and max of 0.5 and 1.5 of maximum likelihood values.\n", - "\n", - "The `Arrhenius` object automatically estimates the maximum likelihood values for the activation energy and preexponential factor and stores them is an appropriate `sc.DataGroup`. " + "The first value in `uniform` is `loc` (the minimum), and the second is `scale` (the width, i.e., max - min). \n", + "We recommend looking at the `scipy.stats` documentation to clarify how to use these objects. \n", + "If you do not set `priors`, they are automatically set to be uniform with a min and max of 0.5 and 1.5 times the maximum-likelihood values.\n", + "\n", + "The `Arrhenius` object automatically estimates the maximum-likelihood values for the activation energy and preexponential factor and stores them in an appropriate `sc.DataGroup`. " ] }, { From d0bdbc1f30361a17e8e831dc9d4f9ec4148a9e59 Mon Sep 17 00:00:00 2001 From: "Andrew R. McCluskey" Date: Wed, 17 Jun 2026 14:55:56 +0100 Subject: [PATCH 07/10] Small fixes from copilot --- kinisi/arrhenius.py | 7 ++----- kinisi/fitting.py | 10 +++++----- kinisi/tests/test_arrhenius.py | 2 +- kinisi/yeh_hummer.py | 7 ++++--- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/kinisi/arrhenius.py b/kinisi/arrhenius.py index 329738e1..7c21dda7 100644 --- a/kinisi/arrhenius.py +++ b/kinisi/arrhenius.py @@ -103,7 +103,7 @@ class Arrhenius(TemperatureDependent): def __init__( self, diffusion, - priors: tuple[tuple[VariableLike, VariableLike], tuple[VariableLike, VariableLike]] | None = None, + priors: list | None = None, ) -> 'Arrhenius': parameter_names = ('activation_energy', 'preexponential_factor') parameter_units = (sc.Unit('eV'), sc.Unit('cm^2/s')) @@ -154,10 +154,7 @@ class VogelFulcherTammann(TemperatureDependent): def __init__( self, diffusion, - priors: tuple[ - tuple[VariableLike, VariableLike], tuple[VariableLike, VariableLike], tuple[VariableLike, VariableLike] - ] - | None = None, + priors: list | None = None, ) -> 'VogelFulcherTammann': parameter_names = ('activation_energy', 'preexponential_factor', 'T0') parameter_units = (sc.Unit('eV'), sc.Unit('cm^2/s'), sc.Unit('K')) diff --git a/kinisi/fitting.py b/kinisi/fitting.py index 749d8123..58d358f4 100644 --- a/kinisi/fitting.py +++ b/kinisi/fitting.py @@ -60,8 +60,8 @@ def __init__( raise ValueError( f'Priors must be a list of length {len(self.parameter_names)}, got {len(priors)} instead.' ) - if not all([isinstance(p, rv_frozen) for p in priors]): - raise ValueError('Priors must be a list of rv_frozen scipy.stats objects.') + if not all(hasattr(p, 'logpdf') and hasattr(p, 'ppf') for p in priors): + raise ValueError("Priors must provide 'logpdf' and 'ppf' methods (e.g., frozen scipy.stats distributions).") if self.priors is None: # Perform initial fit @@ -164,7 +164,7 @@ def nlp(self, parameters: tuple[float]) -> float: Calculate the negative log posterior of the model given the data. :param parameters: The parameters of the model. - :return: The negative log likelihood of the model. + :return: The negative log posterior of the model. """ return -self.log_posterior(parameters) @@ -180,7 +180,7 @@ def prior_transform(self, parameters: tuple[float]) -> tuple[float]: x[i] = self.priors[i].ppf(parameters[i]) return x - def max_likelihood(self) -> tuple[float]: + def max_likelihood(self): """Find the max likelihood fit parameters for the model.""" if self.priors is not None: x0 = [p.mean() for p in self.priors] @@ -190,7 +190,7 @@ def max_likelihood(self) -> tuple[float]: for i, name in enumerate(self.parameter_names): self.data_group[name] = result[i] * self.parameter_units[i] - def max_aposteriori(self) -> tuple[float]: + def max_aposteriori(self): """Find the max aposteriori fit parameters for the model.""" if self.priors is not None: x0 = [p.mean() for p in self.priors] diff --git a/kinisi/tests/test_arrhenius.py b/kinisi/tests/test_arrhenius.py index 321deeec..32aef08d 100644 --- a/kinisi/tests/test_arrhenius.py +++ b/kinisi/tests/test_arrhenius.py @@ -77,7 +77,7 @@ def test_init(self): assert isinstance(arr.activation_energy, sc.Variable) assert isinstance(arr.preexponential_factor, sc.Variable) - def test_init_priros(self): + def test_init_priors(self): """ Test the initialisation of Arrhenius class with priors """ diff --git a/kinisi/yeh_hummer.py b/kinisi/yeh_hummer.py index 9e63cf8e..4601451d 100644 --- a/kinisi/yeh_hummer.py +++ b/kinisi/yeh_hummer.py @@ -41,7 +41,8 @@ class YehHummer(FittingBase): :param diffusion: sc.DataArray with diffusion coefficients and box_length coordinate :param temperature: Temperature (will be extracted from coords if not provided) - :param bounds: Optional bounds for [D_0, viscosity] parameters (viscosity in Pa*s) + :param priors: Optional priors for [D_0, viscosity] parameters using scipy.stats objects + (viscosity in Pa*s) """ def __init__(self, diffusion, temperature: sc.Variable, priors=None): @@ -64,7 +65,7 @@ def __init__(self, diffusion, temperature: sc.Variable, priors=None): # Compute priors: use provided or defaults if priors is None: D_max = np.max(diffusion.values) - D_prior = uniform(D_max * 0.8, D_max * 2.0) + D_prior = uniform(D_max * 0.8, (D_max * 2.0) - (D_max - 0.8)) visc_lower, visc_upper = 1e-5 * sc.Unit('Pa*s'), 1e-1 * sc.Unit('Pa*s') # Higher viscosity = lower slope, so bounds are inverted @@ -72,7 +73,7 @@ def __init__(self, diffusion, temperature: sc.Variable, priors=None): self.viscosity_to_slope(visc_upper) * self._slope_unit, self.viscosity_to_slope(visc_lower) * self._slope_unit, ) - slope_prior = uniform(*(v.value for v in slope_bounds)) + slope_prior = uniform(slope_bounds[0], slope_bounds[1] - slope_bounds[0]) priors = [D_prior, slope_prior] else: if len(priors) != 2: From 581dd103fcae69f5345c75b025c11e57a0c14ea1 Mon Sep 17 00:00:00 2001 From: "Andrew R. McCluskey" Date: Wed, 17 Jun 2026 14:58:16 +0100 Subject: [PATCH 08/10] Ruff --- kinisi/fitting.py | 5 +++-- kinisi/yeh_hummer.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/kinisi/fitting.py b/kinisi/fitting.py index 58d358f4..cf96bf11 100644 --- a/kinisi/fitting.py +++ b/kinisi/fitting.py @@ -15,7 +15,6 @@ from scipy.linalg import pinvh from scipy.optimize import minimize from scipy.stats import uniform -from scipy.stats._distn_infrastructure import rv_frozen from kinisi.samples import Samples @@ -61,7 +60,9 @@ def __init__( f'Priors must be a list of length {len(self.parameter_names)}, got {len(priors)} instead.' ) if not all(hasattr(p, 'logpdf') and hasattr(p, 'ppf') for p in priors): - raise ValueError("Priors must provide 'logpdf' and 'ppf' methods (e.g., frozen scipy.stats distributions).") + raise ValueError( + "Priors must provide 'logpdf' and 'ppf' methods (e.g., frozen scipy.stats distributions)." + ) if self.priors is None: # Perform initial fit diff --git a/kinisi/yeh_hummer.py b/kinisi/yeh_hummer.py index 4601451d..2c58b5df 100644 --- a/kinisi/yeh_hummer.py +++ b/kinisi/yeh_hummer.py @@ -41,7 +41,7 @@ class YehHummer(FittingBase): :param diffusion: sc.DataArray with diffusion coefficients and box_length coordinate :param temperature: Temperature (will be extracted from coords if not provided) - :param priors: Optional priors for [D_0, viscosity] parameters using scipy.stats objects + :param priors: Optional priors for [D_0, viscosity] parameters using scipy.stats objects (viscosity in Pa*s) """ From 76dd67bd6d51882bedde06826b21eeb32496bca1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Jun 2026 14:06:40 +0000 Subject: [PATCH 09/10] Fix docstring, default prior construction, return annotation, and test slope/viscosity labeling --- kinisi/fitting.py | 4 ++-- kinisi/tests/test_yeh_hummer.py | 4 ++-- kinisi/yeh_hummer.py | 17 ++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/kinisi/fitting.py b/kinisi/fitting.py index cf96bf11..82d5e4b1 100644 --- a/kinisi/fitting.py +++ b/kinisi/fitting.py @@ -181,7 +181,7 @@ def prior_transform(self, parameters: tuple[float]) -> tuple[float]: x[i] = self.priors[i].ppf(parameters[i]) return x - def max_likelihood(self): + def max_likelihood(self) -> None: """Find the max likelihood fit parameters for the model.""" if self.priors is not None: x0 = [p.mean() for p in self.priors] @@ -191,7 +191,7 @@ def max_likelihood(self): for i, name in enumerate(self.parameter_names): self.data_group[name] = result[i] * self.parameter_units[i] - def max_aposteriori(self): + def max_aposteriori(self) -> None: """Find the max aposteriori fit parameters for the model.""" if self.priors is not None: x0 = [p.mean() for p in self.priors] diff --git a/kinisi/tests/test_yeh_hummer.py b/kinisi/tests/test_yeh_hummer.py index e1472a3b..2601ae7a 100644 --- a/kinisi/tests/test_yeh_hummer.py +++ b/kinisi/tests/test_yeh_hummer.py @@ -106,14 +106,14 @@ def test_yeh_hummer_priors(self): # Custom priors priors = ( uniform(4e-5, 7e-5), # D_0 prior - uniform(1e-4, 1e-2), # viscosity prior + uniform(1e-4, 1e-2), # slope prior ) yh = YehHummer(td, temperature=sc.scalar(298, unit='K'), priors=priors) # Check that fitted values are within priors assert priors[0].a <= yh.D_infinite.value <= (priors[0].b + priors[0].a) - assert priors[1].a <= yh.shear_viscosity.value <= (priors[1].b + priors[1].a) + assert priors[1].a <= yh.data_group['slope'].value <= (priors[1].b + priors[1].a) def test_yeh_hummer_properties(self): """Test YehHummer property accessors.""" diff --git a/kinisi/yeh_hummer.py b/kinisi/yeh_hummer.py index 2c58b5df..463cb7fa 100644 --- a/kinisi/yeh_hummer.py +++ b/kinisi/yeh_hummer.py @@ -41,8 +41,8 @@ class YehHummer(FittingBase): :param diffusion: sc.DataArray with diffusion coefficients and box_length coordinate :param temperature: Temperature (will be extracted from coords if not provided) - :param priors: Optional priors for [D_0, viscosity] parameters using scipy.stats objects - (viscosity in Pa*s) + :param priors: Optional priors for [D_0, slope] parameters using scipy.stats objects, + where slope has units of [diffusion] * [length] """ def __init__(self, diffusion, temperature: sc.Variable, priors=None): @@ -65,15 +65,14 @@ def __init__(self, diffusion, temperature: sc.Variable, priors=None): # Compute priors: use provided or defaults if priors is None: D_max = np.max(diffusion.values) - D_prior = uniform(D_max * 0.8, (D_max * 2.0) - (D_max - 0.8)) + D_prior = uniform(D_max * 0.8, D_max * 1.2) visc_lower, visc_upper = 1e-5 * sc.Unit('Pa*s'), 1e-1 * sc.Unit('Pa*s') - # Higher viscosity = lower slope, so bounds are inverted - slope_bounds = ( - self.viscosity_to_slope(visc_upper) * self._slope_unit, - self.viscosity_to_slope(visc_lower) * self._slope_unit, - ) - slope_prior = uniform(slope_bounds[0], slope_bounds[1] - slope_bounds[0]) + # Higher viscosity = lower slope, so bounds are inverted; viscosity_to_slope + # already returns plain floats so no unit multiplication needed here + slope_min = self.viscosity_to_slope(visc_upper) + slope_max = self.viscosity_to_slope(visc_lower) + slope_prior = uniform(slope_min, slope_max - slope_min) priors = [D_prior, slope_prior] else: if len(priors) != 2: From d920d547e851de9bc99313bd2c9e07e4c874123a Mon Sep 17 00:00:00 2001 From: "Andrew R. McCluskey" Date: Wed, 17 Jun 2026 15:17:44 +0100 Subject: [PATCH 10/10] Use value --- kinisi/yeh_hummer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kinisi/yeh_hummer.py b/kinisi/yeh_hummer.py index 2c58b5df..d8e1f2cb 100644 --- a/kinisi/yeh_hummer.py +++ b/kinisi/yeh_hummer.py @@ -73,7 +73,7 @@ def __init__(self, diffusion, temperature: sc.Variable, priors=None): self.viscosity_to_slope(visc_upper) * self._slope_unit, self.viscosity_to_slope(visc_lower) * self._slope_unit, ) - slope_prior = uniform(slope_bounds[0], slope_bounds[1] - slope_bounds[0]) + slope_prior = uniform(slope_bounds[0].value, (slope_bounds[1] - slope_bounds[0]).value) priors = [D_prior, slope_prior] else: if len(priors) != 2: