|
| 1 | +# (C) Copyright IBM 2024. |
| 2 | +# |
| 3 | +# This code is licensed under the Apache License, Version 2.0. You may |
| 4 | +# obtain a copy of this license in the LICENSE.txt file in the root directory |
| 5 | +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. |
| 6 | +# |
| 7 | +# Any modifications or derivative works of this code must retain this |
| 8 | +# copyright notice, and modified files need to carry a notice indicating |
| 9 | +# that they have been altered from the originals. |
| 10 | + |
| 11 | +"""Stochastic reconfiguration for optimization of a variational ansatz.""" |
| 12 | + |
| 13 | +from __future__ import annotations |
| 14 | + |
| 15 | +import math |
| 16 | +from typing import Any, Callable |
| 17 | + |
| 18 | +import numpy as np |
| 19 | +import scipy.linalg |
| 20 | +from scipy.optimize import OptimizeResult, minimize |
| 21 | +from scipy.sparse.linalg import LinearOperator |
| 22 | + |
| 23 | +from ffsim.optimize._util import ( |
| 24 | + WrappedCallable, |
| 25 | + WrappedLinearOperator, |
| 26 | + jacobian_finite_diff, |
| 27 | + orthogonalize_columns, |
| 28 | +) |
| 29 | + |
| 30 | + |
| 31 | +def minimize_stochastic_reconfiguration( |
| 32 | + params_to_vec: Callable[[np.ndarray], np.ndarray], |
| 33 | + hamiltonian: LinearOperator, |
| 34 | + x0: np.ndarray, |
| 35 | + *, |
| 36 | + maxiter: int = 1000, |
| 37 | + variation: float = 1.0, |
| 38 | + cond: float = 1e-8, |
| 39 | + epsilon: float = 1e-8, |
| 40 | + gtol: float = 1e-5, |
| 41 | + optimize_hyperparameters: bool = True, |
| 42 | + optimize_hyperparameters_args: dict | None = None, |
| 43 | + callback: Callable[[OptimizeResult], Any] | None = None, |
| 44 | +) -> OptimizeResult: |
| 45 | + """Minimize the energy of a variational ansatz using stochastic reconfiguration. |
| 46 | +
|
| 47 | + References: |
| 48 | +
|
| 49 | + - `Generalized Lanczos algorithm for variational quantum Monte Carlo`_ |
| 50 | +
|
| 51 | + Args: |
| 52 | + params_to_vec: Function representing the wavefunction ansatz. It takes as input |
| 53 | + a vector of real-valued parameters and outputs the state vector represented |
| 54 | + by those parameters. |
| 55 | + hamiltonian: The Hamiltonian representing the energy to be minimized. |
| 56 | + x0: Initial guess for the parameters. |
| 57 | + maxiter: Maximum number of optimization iterations to perform. |
| 58 | + variation: Hyperparameter controlling the size of parameter variations |
| 59 | + used in the linear expansion of the wavefunction. Its value must be |
| 60 | + strictly between 0 and 1. A larger value results in larger variations. |
| 61 | + cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to |
| 62 | + solve for the parameter update. |
| 63 | + epsilon: Increment to use for approximating the gradient using |
| 64 | + finite difference. |
| 65 | + gtol: Convergence threshold for the norm of the projected gradient. |
| 66 | + optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in |
| 67 | + each iteration. Optimizing the hyperparameter incurs more function and |
| 68 | + energy evaluations in each iteration, but may speed up convergence, leading |
| 69 | + to fewer iterations overall. The optimization is performed using |
| 70 | + `scipy.optimize.minimize`_. |
| 71 | + optimize_hyperparameters_args: Arguments to use when calling |
| 72 | + `scipy.optimize.minimize`_ to optimize the hyperparameters. The call is |
| 73 | + constructed as |
| 74 | +
|
| 75 | + .. code:: |
| 76 | +
|
| 77 | + scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args) |
| 78 | +
|
| 79 | + If not specified, the default value of `dict(method="L-BFGS-B")` will be |
| 80 | + used. |
| 81 | +
|
| 82 | + callback: A callable called after each iteration. It is called with the |
| 83 | + signature |
| 84 | +
|
| 85 | + .. code:: |
| 86 | +
|
| 87 | + callback(intermediate_result: OptimizeResult) |
| 88 | +
|
| 89 | + where ``intermediate_result`` is a `scipy.optimize.OptimizeResult`_ |
| 90 | + with attributes ``x`` and ``fun``, the present values of the parameter |
| 91 | + vector and objective function. For all iterations except for the last, |
| 92 | + it also contains the ``jac`` attribute holding the present value of the |
| 93 | + gradient, as well as ``regularization`` and ``variation`` attributes |
| 94 | + holding the present values of the `regularization` and `variation` |
| 95 | + hyperparameters. |
| 96 | +
|
| 97 | + Returns: |
| 98 | + The optimization result represented as a `scipy.optimize.OptimizeResult`_ |
| 99 | + object. Note the following definitions of selected attributes: |
| 100 | +
|
| 101 | + - ``x``: The final parameters of the optimization. |
| 102 | + - ``fun``: The energy associated with the final parameters. That is, the |
| 103 | + expectation value of the Hamiltonian with respect to the state vector |
| 104 | + corresponding to the parameters. |
| 105 | + - ``nfev``: The number of times the ``params_to_vec`` function was called. |
| 106 | + - ``nlinop``: The number of times the ``hamiltonian`` linear operator was |
| 107 | + applied to a vector. |
| 108 | +
|
| 109 | + .. _Generalized Lanczos algorithm for variational quantum Monte Carlo: https://doi.org/10.1103/PhysRevB.64.024512 |
| 110 | + .. _scipy.optimize.OptimizeResult: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult |
| 111 | + .. _scipy.linalg.lstsq: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html |
| 112 | + .. _scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html |
| 113 | + """ # noqa: E501 |
| 114 | + if variation <= 0: |
| 115 | + raise ValueError(f"variation must be positive. Got {variation}.") |
| 116 | + if maxiter < 1: |
| 117 | + raise ValueError(f"maxiter must be at least 1. Got {maxiter}.") |
| 118 | + |
| 119 | + if optimize_hyperparameters_args is None: |
| 120 | + optimize_hyperparameters_args = dict(method="L-BFGS-B") |
| 121 | + |
| 122 | + variation_param = math.sqrt(variation) |
| 123 | + params = x0.copy() |
| 124 | + converged = False |
| 125 | + intermediate_result = OptimizeResult( |
| 126 | + x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0 |
| 127 | + ) |
| 128 | + |
| 129 | + params_to_vec = WrappedCallable(params_to_vec, intermediate_result) |
| 130 | + hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result) |
| 131 | + |
| 132 | + for i in range(maxiter): |
| 133 | + vec = params_to_vec(params) |
| 134 | + jac = jacobian_finite_diff(params_to_vec, params, len(vec), epsilon=epsilon) |
| 135 | + jac = orthogonalize_columns(jac, vec) |
| 136 | + |
| 137 | + energy, grad, overlap_mat = _sr_matrices(vec, jac, hamiltonian) |
| 138 | + intermediate_result.njev += 1 |
| 139 | + |
| 140 | + if i > 0 and callback is not None: |
| 141 | + intermediate_result.x = params |
| 142 | + intermediate_result.fun = energy |
| 143 | + intermediate_result.jac = grad |
| 144 | + intermediate_result.overlap_mat = overlap_mat |
| 145 | + intermediate_result.variation = variation |
| 146 | + callback(intermediate_result) |
| 147 | + |
| 148 | + if np.linalg.norm(grad) < gtol: |
| 149 | + converged = True |
| 150 | + break |
| 151 | + |
| 152 | + if optimize_hyperparameters: |
| 153 | + |
| 154 | + def f(x: np.ndarray) -> float: |
| 155 | + (variation_param,) = x |
| 156 | + variation = variation_param**2 |
| 157 | + param_update = _get_param_update( |
| 158 | + grad, |
| 159 | + overlap_mat, |
| 160 | + variation, |
| 161 | + cond=cond, |
| 162 | + ) |
| 163 | + vec = params_to_vec(params + param_update) |
| 164 | + return np.vdot(vec, hamiltonian @ vec).real |
| 165 | + |
| 166 | + result = minimize( |
| 167 | + f, |
| 168 | + x0=[variation_param], |
| 169 | + **optimize_hyperparameters_args, |
| 170 | + ) |
| 171 | + (variation_param,) = result.x |
| 172 | + variation = variation_param**2 |
| 173 | + |
| 174 | + param_update = _get_param_update( |
| 175 | + grad, |
| 176 | + overlap_mat, |
| 177 | + variation, |
| 178 | + cond=cond, |
| 179 | + ) |
| 180 | + params = params + param_update |
| 181 | + intermediate_result.nit += 1 |
| 182 | + |
| 183 | + vec = params_to_vec(params) |
| 184 | + energy = np.vdot(vec, hamiltonian @ vec).real |
| 185 | + |
| 186 | + intermediate_result.x = params |
| 187 | + intermediate_result.fun = energy |
| 188 | + del intermediate_result.jac |
| 189 | + if callback is not None: |
| 190 | + callback(intermediate_result) |
| 191 | + |
| 192 | + if converged: |
| 193 | + success = True |
| 194 | + message = "Convergence: Norm of projected gradient <= gtol." |
| 195 | + else: |
| 196 | + success = False |
| 197 | + message = "Stop: Total number of iterations reached limit." |
| 198 | + |
| 199 | + return OptimizeResult( |
| 200 | + x=params, |
| 201 | + success=success, |
| 202 | + message=message, |
| 203 | + fun=energy, |
| 204 | + jac=grad, |
| 205 | + nfev=intermediate_result.nfev, |
| 206 | + njev=intermediate_result.njev, |
| 207 | + nlinop=intermediate_result.nlinop, |
| 208 | + nit=intermediate_result.nit, |
| 209 | + ) |
| 210 | + |
| 211 | + |
| 212 | +def _sr_matrices( |
| 213 | + vec: np.ndarray, jac: np.ndarray, hamiltonian: LinearOperator |
| 214 | +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: |
| 215 | + energy = np.vdot(vec, hamiltonian @ vec) |
| 216 | + grad = vec.conj() @ (hamiltonian @ jac) |
| 217 | + overlap_mat = jac.T.conj() @ jac |
| 218 | + return energy.real, 2 * grad.real, overlap_mat.real |
| 219 | + |
| 220 | + |
| 221 | +def _get_param_update( |
| 222 | + grad: np.ndarray, overlap_mat: np.ndarray, variation: float, cond: float |
| 223 | +) -> np.ndarray: |
| 224 | + x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond) |
| 225 | + return x |
0 commit comments