add stochastic reconfiguration

kevinsung · kevinsung · commit 40b323a6d9e7 · 2024-04-28T00:21:50.000-04:00
diff --git a/python/ffsim/optimize/__init__.py b/python/ffsim/optimize/__init__.py
@@ -11,7 +11,11 @@
 """Optimization algorithms."""
 
 from ffsim.optimize.linear_method import minimize_linear_method
+from ffsim.optimize.stochastic_reconfiguration import (
+    minimize_stochastic_reconfiguration,
+)
 
 __all__ = [
     "minimize_linear_method",
+    "minimize_stochastic_reconfiguration",
 ]
diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py
@@ -0,0 +1,225 @@
+# (C) Copyright IBM 2024.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Stochastic reconfiguration for optimization of a variational ansatz."""
+
+from __future__ import annotations
+
+import math
+from typing import Any, Callable
+
+import numpy as np
+import scipy.linalg
+from scipy.optimize import OptimizeResult, minimize
+from scipy.sparse.linalg import LinearOperator
+
+from ffsim.optimize._util import (
+    WrappedCallable,
+    WrappedLinearOperator,
+    jacobian_finite_diff,
+    orthogonalize_columns,
+)
+
+
+def minimize_stochastic_reconfiguration(
+    params_to_vec: Callable[[np.ndarray], np.ndarray],
+    hamiltonian: LinearOperator,
+    x0: np.ndarray,
+    *,
+    maxiter: int = 1000,
+    variation: float = 1.0,
+    cond: float = 1e-8,
+    epsilon: float = 1e-8,
+    gtol: float = 1e-5,
+    optimize_hyperparameters: bool = True,
+    optimize_hyperparameters_args: dict | None = None,
+    callback: Callable[[OptimizeResult], Any] | None = None,
+) -> OptimizeResult:
+    """Minimize the energy of a variational ansatz using stochastic reconfiguration.
+
+    References:
+
+    - `Generalized Lanczos algorithm for variational quantum Monte Carlo`_
+
+    Args:
+        params_to_vec: Function representing the wavefunction ansatz. It takes as input
+            a vector of real-valued parameters and outputs the state vector represented
+            by those parameters.
+        hamiltonian: The Hamiltonian representing the energy to be minimized.
+        x0: Initial guess for the parameters.
+        maxiter: Maximum number of optimization iterations to perform.
+        variation: Hyperparameter controlling the size of parameter variations
+            used in the linear expansion of the wavefunction. Its value must be
+            strictly between 0 and 1. A larger value results in larger variations.
+        cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to
+            solve for the parameter update.
+        epsilon: Increment to use for approximating the gradient using
+            finite difference.
+        gtol: Convergence threshold for the norm of the projected gradient.
+        optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in
+            each iteration. Optimizing the hyperparameter incurs more function and
+            energy evaluations in each iteration, but may speed up convergence, leading
+            to fewer iterations overall. The optimization is performed using
+            `scipy.optimize.minimize`_.
+        optimize_hyperparameters_args: Arguments to use when calling
+            `scipy.optimize.minimize`_ to optimize the hyperparameters. The call is
+            constructed as
+
+            .. code::
+
+                scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args)
+
+            If not specified, the default value of `dict(method="L-BFGS-B")` will be
+            used.
+
+        callback: A callable called after each iteration. It is called with the
+            signature
+
+            .. code::
+
+                callback(intermediate_result: OptimizeResult)
+
+            where ``intermediate_result`` is a `scipy.optimize.OptimizeResult`_
+            with attributes ``x``  and ``fun``, the present values of the parameter
+            vector and objective function. For all iterations except for the last,
+            it also contains the ``jac`` attribute holding the present value of the
+            gradient, as well as ``regularization`` and ``variation`` attributes
+            holding the present values of the `regularization` and `variation`
+            hyperparameters.
+
+    Returns:
+        The optimization result represented as a `scipy.optimize.OptimizeResult`_
+        object. Note the following definitions of selected attributes:
+
+        - ``x``: The final parameters of the optimization.
+        - ``fun``: The energy associated with the final parameters. That is, the
+          expectation value of the Hamiltonian with respect to the state vector
+          corresponding to the parameters.
+        - ``nfev``: The number of times the ``params_to_vec`` function was called.
+        - ``nlinop``: The number of times the ``hamiltonian`` linear operator was
+          applied to a vector.
+
+    .. _Generalized Lanczos algorithm for variational quantum Monte Carlo: https://doi.org/10.1103/PhysRevB.64.024512
+    .. _scipy.optimize.OptimizeResult: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult
+    .. _scipy.linalg.lstsq: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html
+    .. _scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html
+    """  # noqa: E501
+    if variation <= 0:
+        raise ValueError(f"variation must be positive. Got {variation}.")
+    if maxiter < 1:
+        raise ValueError(f"maxiter must be at least 1. Got {maxiter}.")
+
+    if optimize_hyperparameters_args is None:
+        optimize_hyperparameters_args = dict(method="L-BFGS-B")
+
+    variation_param = math.sqrt(variation)
+    params = x0.copy()
+    converged = False
+    intermediate_result = OptimizeResult(
+        x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0
+    )
+
+    params_to_vec = WrappedCallable(params_to_vec, intermediate_result)
+    hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result)
+
+    for i in range(maxiter):
+        vec = params_to_vec(params)
+        jac = jacobian_finite_diff(params_to_vec, params, len(vec), epsilon=epsilon)
+        jac = orthogonalize_columns(jac, vec)
+
+        energy, grad, overlap_mat = _sr_matrices(vec, jac, hamiltonian)
+        intermediate_result.njev += 1
+
+        if i > 0 and callback is not None:
+            intermediate_result.x = params
+            intermediate_result.fun = energy
+            intermediate_result.jac = grad
+            intermediate_result.overlap_mat = overlap_mat
+            intermediate_result.variation = variation
+            callback(intermediate_result)
+
+        if np.linalg.norm(grad) < gtol:
+            converged = True
+            break
+
+        if optimize_hyperparameters:
+
+            def f(x: np.ndarray) -> float:
+                (variation_param,) = x
+                variation = variation_param**2
+                param_update = _get_param_update(
+                    grad,
+                    overlap_mat,
+                    variation,
+                    cond=cond,
+                )
+                vec = params_to_vec(params + param_update)
+                return np.vdot(vec, hamiltonian @ vec).real
+
+            result = minimize(
+                f,
+                x0=[variation_param],
+                **optimize_hyperparameters_args,
+            )
+            (variation_param,) = result.x
+            variation = variation_param**2
+
+        param_update = _get_param_update(
+            grad,
+            overlap_mat,
+            variation,
+            cond=cond,
+        )
+        params = params + param_update
+        intermediate_result.nit += 1
+
+    vec = params_to_vec(params)
+    energy = np.vdot(vec, hamiltonian @ vec).real
+
+    intermediate_result.x = params
+    intermediate_result.fun = energy
+    del intermediate_result.jac
+    if callback is not None:
+        callback(intermediate_result)
+
+    if converged:
+        success = True
+        message = "Convergence: Norm of projected gradient <= gtol."
+    else:
+        success = False
+        message = "Stop: Total number of iterations reached limit."
+
+    return OptimizeResult(
+        x=params,
+        success=success,
+        message=message,
+        fun=energy,
+        jac=grad,
+        nfev=intermediate_result.nfev,
+        njev=intermediate_result.njev,
+        nlinop=intermediate_result.nlinop,
+        nit=intermediate_result.nit,
+    )
+
+
+def _sr_matrices(
+    vec: np.ndarray, jac: np.ndarray, hamiltonian: LinearOperator
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    energy = np.vdot(vec, hamiltonian @ vec)
+    grad = vec.conj() @ (hamiltonian @ jac)
+    overlap_mat = jac.T.conj() @ jac
+    return energy.real, 2 * grad.real, overlap_mat.real
+
+
+def _get_param_update(
+    grad: np.ndarray, overlap_mat: np.ndarray, variation: float, cond: float
+) -> np.ndarray:
+    x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond)
+    return x
diff --git a/tests/python/optimize/stochastic_reconfiguration_test.py b/tests/python/optimize/stochastic_reconfiguration_test.py
@@ -0,0 +1,118 @@
+# (C) Copyright IBM 2024.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Tests for stochastic reconfiguration optimization algorithm."""
+
+from __future__ import annotations
+
+from collections import defaultdict
+
+import numpy as np
+import pyscf
+import pytest
+
+import ffsim
+
+
+def test_minimize_stochastic_reconfiguration():
+    # Build an H2 molecule
+    mol = pyscf.gto.Mole()
+    mol.build(
+        atom=[["H", (0, 0, 0)], ["H", (0, 0, 1.8)]],
+        basis="sto-6g",
+    )
+    hartree_fock = pyscf.scf.RHF(mol)
+    hartree_fock.kernel()
+
+    # Initialize parameters
+    n_reps = 2
+    n_params = ffsim.UCJOperator.n_params(hartree_fock.mol.nao_nr(), n_reps)
+    rng = np.random.default_rng(1804)
+    x0 = rng.uniform(-10, 10, size=n_params)
+
+    # Get molecular data and molecular Hamiltonian (one- and two-body tensors)
+    mol_data = ffsim.MolecularData.from_scf(hartree_fock)
+    norb = mol_data.norb
+    nelec = mol_data.nelec
+    mol_hamiltonian = mol_data.hamiltonian
+    hamiltonian = ffsim.linear_operator(mol_hamiltonian, norb=norb, nelec=nelec)
+    reference_state = ffsim.hartree_fock_state(norb, nelec)
+
+    def params_to_vec(x: np.ndarray):
+        operator = ffsim.UCJOperator.from_parameters(x, norb=norb, n_reps=n_reps)
+        return ffsim.apply_unitary(reference_state, operator, norb=norb, nelec=nelec)
+
+    def energy(x: np.ndarray):
+        vec = params_to_vec(x)
+        return np.real(np.vdot(vec, hamiltonian @ vec))
+
+    info = defaultdict(list)
+
+    def callback(intermediate_result):
+        info["x"].append(intermediate_result.x)
+        info["fun"].append(intermediate_result.fun)
+        np.testing.assert_allclose(
+            energy(intermediate_result.x), intermediate_result.fun
+        )
+        if hasattr(intermediate_result, "jac"):
+            info["jac"].append(intermediate_result.jac)
+        if hasattr(intermediate_result, "variation"):
+            info["variation"].append(intermediate_result.variation)
+
+    # default optimization
+    result = ffsim.optimize.minimize_stochastic_reconfiguration(
+        params_to_vec, x0=x0, hamiltonian=hamiltonian, callback=callback
+    )
+    np.testing.assert_allclose(energy(result.x), result.fun)
+    np.testing.assert_allclose(result.fun, -0.970773)
+    np.testing.assert_allclose(info["fun"][0], -0.853501, atol=1e-5)
+    np.testing.assert_allclose(info["fun"][-1], -0.970773, atol=1e-5)
+    for params, fun in zip(info["x"], info["fun"]):
+        np.testing.assert_allclose(energy(params), fun)
+    assert result.nit <= 20
+    assert result.nit < result.nlinop < result.nfev
+
+    # optimization without optimizing hyperparameters
+    info = defaultdict(list)
+    result = ffsim.optimize.minimize_stochastic_reconfiguration(
+        params_to_vec,
+        x0=x0,
+        hamiltonian=hamiltonian,
+        optimize_hyperparameters=False,
+        callback=callback,
+    )
+    np.testing.assert_allclose(energy(result.x), result.fun)
+    np.testing.assert_allclose(result.fun, -0.970773)
+    for params, fun in zip(info["x"], info["fun"]):
+        np.testing.assert_allclose(energy(params), fun)
+    assert result.nit <= 30
+    assert result.nit < result.nlinop < result.nfev
+    assert set(info["variation"]) == {1.0}
+
+    # optimization with maxiter
+    info = defaultdict(list)
+    result = ffsim.optimize.minimize_stochastic_reconfiguration(
+        params_to_vec, hamiltonian=hamiltonian, x0=x0, maxiter=3, callback=callback
+    )
+    assert result.nit == 3
+    assert len(info["x"]) == 3
+    assert len(info["fun"]) == 3
+    assert len(info["jac"]) == 2
+    np.testing.assert_allclose(energy(result.x), result.fun)
+
+    # test raising errors
+    with pytest.raises(ValueError, match="variation"):
+        result = ffsim.optimize.minimize_stochastic_reconfiguration(
+            params_to_vec, x0=x0, hamiltonian=hamiltonian, variation=-0.1
+        )
+    with pytest.raises(ValueError, match="maxiter"):
+        result = ffsim.optimize.minimize_stochastic_reconfiguration(
+            params_to_vec, x0=x0, hamiltonian=hamiltonian, maxiter=0
+        )