From 5ded72d4803f460e92b418040e9af58280c75acc Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Sun, 12 Oct 2025 11:22:47 -0700 Subject: [PATCH 01/11] snes solver: Pseudo Transient Continuation method Setting `pseudo_time = true` now enables Pseudo-timestepping, in which each cell has a separate timestep. The timestep in each cell is set inversely proportional to the residual (RMS time-derivative of all quantities in cell). Recommend enabling `pid_controller` that multiplies all timesteps by the same factor, to control the number of nonlinear iterations. Output time uses the minimum timestep in the domain, so simulations shouldn't need to run for as long as when all cells are evolved at the same speed. --- src/solver/impls/snes/snes.cxx | 522 +++++++++++++++++++++++++++------ src/solver/impls/snes/snes.hxx | 22 +- 2 files changed, 456 insertions(+), 88 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index de6c54388d..49650b7b97 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -83,8 +84,7 @@ PetscErrorCode FormFunctionForDifferencing(void* ctx, Vec x, Vec f) { * * This can be a linearised and simplified form of FormFunction */ -PetscErrorCode FormFunctionForColoring(void* UNUSED(snes), Vec x, Vec f, - void* ctx) { +PetscErrorCode FormFunctionForColoring(void* UNUSED(snes), Vec x, Vec f, void* ctx) { return static_cast(ctx)->snes_function(x, f, true); } @@ -96,7 +96,7 @@ PetscErrorCode snesPCapply(PC pc, Vec x, Vec y) { PetscFunctionReturn(s->precon(x, y)); } -} +} // namespace SNESSolver::SNESSolver(Options* opts) : Solver(opts), @@ -138,6 +138,21 @@ SNESSolver::SNESSolver(Options* opts) (*options)["timestep_factor_on_lower_its"] .doc("Multiply timestep if iterations are below lower_its") .withDefault(1.4)), + pseudo_time((*options)["pseudo_time"] + .doc("Pseudo-Transient Continuation method?") + .withDefault(false)), + pseudo_alpha((*options)["pseudo_alpha"] + .doc("Sets timestep using dt = alpha / residual") + .withDefault(100. * atol * timestep)), + pseudo_growth_factor((*options)["pseudo_growth_factor"] + .doc("PTC growth factor on success") + .withDefault(1.1)), + pseudo_reduction_factor((*options)["pseudo_reduction_factor"] + .doc("PTC reduction factor on failure") + .withDefault(0.5)), + pseudo_max_ratio((*options)["pseudo_max_ratio"] + .doc("PTC maximum timestep ratio between neighbors") + .withDefault(2.)), pid_controller( (*options)["pid_controller"].doc("Use PID controller?").withDefault(false)), target_its((*options)["target_its"].doc("Target snes iterations").withDefault(7)), @@ -277,6 +292,21 @@ int SNESSolver::init() { CHKERRQ(ierr); } + if (pseudo_time) { + // Storage for per-variable timestep + PetscCall(VecDuplicate(snes_x, &dt_vec)); + // Starting timestep + PetscCall(VecSet(dt_vec, timestep)); + + // Storage for previous residual. Used to compare + // residuals before and after step, and adjust timestep accordingly + PetscCall(VecDuplicate(snes_x, &previous_f)); + + // Diagnostic outputs + pseudo_timestep = timestep; + pseudo_residual = 0.0; + } + // Nonlinear solver interface (SNES) output_info.write("Create SNES\n"); SNESCreate(BoutComm::get(), &snes); @@ -767,6 +797,19 @@ int SNESSolver::run() { CHKERRQ(ierr); } + if (pseudo_time) { + // Calculate the initial residual in snes_f + run_rhs(simtime); + { + BoutReal* fdata = nullptr; + ierr = VecGetArray(snes_f, &fdata); + CHKERRQ(ierr); + save_derivs(fdata); + ierr = VecRestoreArray(snes_f, &fdata); + CHKERRQ(ierr); + } + } + BoutReal target = simtime; for (int s = 0; s < getNumberOutputSteps(); s++) { target += getOutputTimestep(); @@ -837,46 +880,67 @@ int SNESSolver::run() { // Copy the state (snes_x) into initial values (x0) VecCopy(snes_x, x0); - if (timestep < dt_min_reset) { - // Hit the minimum timestep, probably through repeated failures + if (pseudo_time) { + // Pseudo-Transient Continuation + // Each evolving quantity may have its own timestep + // Set timestep and dt scalars to the minimum of dt_vec + + PetscCall(VecMin(dt_vec, nullptr, ×tep)); + dt = timestep; - if (saved_jacobian_lag != 0) { - // Already tried this and it didn't work - throw BoutException("Solver failed after many attempts"); + // Copy residual from snes_f to previous_f + // The value of snes_f is set before the timestepping loop + // then calculated after the timestep + PetscCall(VecCopy(snes_f, previous_f)); + + looping = true; + if (simtime + timestep >= target) { + looping = false; } + } else { + // Timestepping - // Try resetting the preconditioner, turn off predictor, and use a large timestep - SNESGetLagJacobian(snes, &saved_jacobian_lag); - SNESSetLagJacobian(snes, 1); - timestep = getOutputTimestep(); - predictor = false; // Predictor can cause problems in near steady-state. - } + if (timestep < dt_min_reset) { + // Hit the minimum timestep, probably through repeated failures - // Set the timestep - dt = timestep; - looping = true; - if (simtime + dt >= target) { - // Note: When the timestep is changed the preconditioner needs to be updated - // => Step over the output time and interpolate if not matrix free - - if (matrix_free) { - // Ensure that the timestep goes to the next output time and then stops. - // This avoids the need to interpolate - dt = target - simtime; + if (saved_jacobian_lag != 0) { + // Already tried this and it didn't work + throw BoutException("Solver failed after many attempts"); + } + + // Try resetting the preconditioner, turn off predictor, and use a large timestep + SNESGetLagJacobian(snes, &saved_jacobian_lag); + SNESSetLagJacobian(snes, 1); + timestep = getOutputTimestep(); + predictor = false; // Predictor can cause problems in near steady-state. } - looping = false; - } - if (predictor and (time1 > 0.0)) { - // Use (time1, x1) and (simtime, x0) to make prediction - // snes_x <- x0 + (dt / (simtime - time1)) * (x0 - x1) - // snes_x <- -β * x1 + (1 + β) * snes_x - BoutReal beta = dt / (simtime - time1); - VecAXPBY(snes_x, -beta, (1. + beta), x1); - } + // Set the timestep + dt = timestep; + looping = true; + if (simtime + dt >= target) { + // Note: When the timestep is changed the preconditioner needs to be updated + // => Step over the output time and interpolate if not matrix free + + if (matrix_free) { + // Ensure that the timestep goes to the next output time and then stops. + // This avoids the need to interpolate + dt = target - simtime; + } + looping = false; + } + + if (predictor and (time1 > 0.0)) { + // Use (time1, x1) and (simtime, x0) to make prediction + // snes_x <- x0 + (dt / (simtime - time1)) * (x0 - x1) + // snes_x <- -β * x1 + (1 + β) * snes_x + BoutReal beta = dt / (simtime - time1); + VecAXPBY(snes_x, -beta, (1. + beta), x1); + } - if (pid_controller) { - SNESSetLagJacobian(snes, lag_jacobian); + if (pid_controller) { + SNESSetLagJacobian(snes, lag_jacobian); + } } // Run the solver @@ -917,8 +981,16 @@ int SNESSolver::run() { ++snes_failures; steps_since_snes_failure = 0; - // Try a smaller timestep - timestep *= timestep_factor_on_failure; + if (pseudo_time) { + // Global scaling of timesteps + // Note: A better strategy might be to reduce timesteps + // in problematic cells. + PetscCall(VecScale(dt_vec, timestep_factor_on_failure)); + + } else { + // Try a smaller timestep + timestep *= timestep_factor_on_failure; + } // Restore state VecCopy(x0, snes_x); @@ -972,7 +1044,8 @@ int SNESSolver::run() { if (nl_its == 0) { // This can occur even with SNESSetForceIteration - // Results in simulation state freezing and rapidly going to the end + // Results in simulation state freezing and rapidly going + // to the end if (scale_vars) { // scaled_x <- snes_x * var_scaling_factors @@ -1016,8 +1089,14 @@ int SNESSolver::run() { // Gather and print diagnostic information output.print("\r"); // Carriage return for printing to screen + + // if (pseudo_time) { + // output.write("\tTime {} alpha {} min_timestep {} nl_iter {} lin_iter {} reason {}", + // simtime, pseudo_alpha, timestep, nl_its, lin_its, static_cast(reason)); + // } else { output.write("Time: {}, timestep: {}, nl iter: {}, lin iter: {}, reason: {}", simtime, timestep, nl_its, lin_its, static_cast(reason)); + // } if (snes_failures > 0) { output.write(", SNES failures: {}", snes_failures); } @@ -1069,62 +1148,256 @@ int SNESSolver::run() { } #endif // PETSC_VERSION_GE(3,20,0) - if (looping) { + if (pseudo_time) { + // Adjust local timesteps - if (pid_controller) { - // Changing the timestep. - // Note: The preconditioner depends on the timestep, - // so we recalculate the jacobian and the preconditioner - // every time the timestep changes + // Calculate residuals + if (scale_vars) { + // scaled_x <- snes_x * var_scaling_factors + PetscCall(VecPointwiseMult(scaled_x, snes_x, var_scaling_factors)); + const BoutReal* xdata = nullptr; + PetscCall(VecGetArrayRead(scaled_x, &xdata)); + load_vars(const_cast(xdata)); + PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); + } else { + const BoutReal* xdata = nullptr; + PetscCall(VecGetArrayRead(snes_x, &xdata)); + load_vars(const_cast(xdata)); + PetscCall(VecRestoreArrayRead(snes_x, &xdata)); + } + run_rhs(simtime); + { + BoutReal* fdata = nullptr; + PetscCall(VecGetArray(snes_f, &fdata)); + save_derivs(fdata); + PetscCall(VecRestoreArray(snes_f, &fdata)); + } - timestep = pid(timestep, nl_its); + // Compare previous_f with snes_f + // Use a per-cell timestep so that e.g density and pressure + // evolve in a way consistent with the equation of state. + + // Reading the residual vectors + const BoutReal* previous_residual = nullptr; + PetscCall(VecGetArrayRead(previous_f, &previous_residual)); + const BoutReal* current_residual = nullptr; + PetscCall(VecGetArrayRead(snes_f, ¤t_residual)); + // Modifying the dt_vec values + BoutReal* dt_data = nullptr; + PetscCall(VecGetArray(dt_vec, &dt_data)); + + // Note: The ordering of quantities in the PETSc vectors + // depends on the Solver::loop_vars function + Mesh* mesh = bout::globals::mesh; + int idx = 0; // Index into PETSc Vecs + + // Boundary cells + for (const auto& i2d : mesh->getRegion2D("RGN_BNDRY")) { + // Field2D quantities evolved together + int count = 0; + BoutReal R0 = 0.0; + BoutReal R1 = 0.0; - // NOTE(malamast): Do we really need this? - // Recompute Jacobian (for now) - if (saved_jacobian_lag == 0) { - SNESGetLagJacobian(snes, &saved_jacobian_lag); - SNESSetLagJacobian(snes, 1); + for (const auto& f : f2d) { + if (!f.evolve_bndry) { + continue; // Not evolving boundary => Skip + } + R0 += SQ(previous_residual[idx + count]); + R1 += SQ(current_residual[idx + count]); + ++count; + } + if (count > 0) { + R0 = sqrt(R0 / count); + R1 = sqrt(R1 / count); + + // Adjust timestep for these quantities + BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], R0, R1); + for (int i = 0; i != count; ++i) { + dt_data[idx++] = new_timestep; + } } - } else { + // Field3D quantities evolved together within a cell + for (int jz = 0; jz < mesh->LocalNz; jz++) { + count = 0; + R0 = 0.0; + R1 = 0.0; + for (const auto& f : f3d) { + if (!f.evolve_bndry) { + continue; // Not evolving boundary => Skip + } + R0 += SQ(previous_residual[idx + count]); + R1 += SQ(current_residual[idx + count]); + ++count; + } + if (count > 0) { + R0 = sqrt(R0 / count); + R1 = sqrt(R1 / count); + + BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], R0, R1); + + auto i3d = mesh->ind2Dto3D(i2d, jz); + pseudo_residual[i3d] = R1; + pseudo_timestep[i3d] = new_timestep; + + for (int i = 0; i != count; ++i) { + dt_data[idx++] = new_timestep; + } + } + } + } + + // Bulk of domain. + // These loops don't check the boundary flags + for (const auto& i2d : mesh->getRegion2D("RGN_NOBNDRY")) { + // Field2D quantities evolved together + if (f2d.size() > 0) { + BoutReal R0 = 0.0; + BoutReal R1 = 0.0; + for (std::size_t i = 0; i != f2d.size(); ++i) { + R0 += SQ(previous_residual[idx + i]); + R1 += SQ(current_residual[idx + i]); + } + R0 = sqrt(R0 / f2d.size()); + R1 = sqrt(R1 / f2d.size()); + + // Adjust timestep for these quantities + BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], R0, R1); + for (std::size_t i = 0; i != f2d.size(); ++i) { + dt_data[idx++] = new_timestep; + } + } - // Consider changing the timestep. - // Note: The preconditioner depends on the timestep, - // so if it is not recalculated the it will be less - // effective. - if ((nl_its <= lower_its) && (timestep < max_timestep) - && (steps_since_snes_failure > 2)) { - // Increase timestep slightly - timestep *= timestep_factor_on_lower_its; - - timestep = std::min(timestep, max_timestep); - - // Note: Setting the SNESJacobianFn to NULL retains - // previously set evaluation function. - // - // The SNES Jacobian is a combination of the RHS Jacobian - // and a factor involving the timestep. - // Depends on equation_form - // -> Probably call SNESSetJacobian(snes, Jfd, Jfd, NULL, fdcoloring); - - if (static_cast(lin_its) / nl_its > 4) { - // Recompute Jacobian (for now) - if (saved_jacobian_lag == 0) { - SNESGetLagJacobian(snes, &saved_jacobian_lag); - SNESSetLagJacobian(snes, 1); + // Field3D quantities evolved together within a cell + if (f3d.size() > 0) { + for (int jz = 0; jz < mesh->LocalNz; jz++) { + BoutReal R0 = 0.0; + BoutReal R1 = 0.0; + for (std::size_t i = 0; i != f3d.size(); ++i) { + R0 += SQ(previous_residual[idx + i]); + R1 += SQ(current_residual[idx + i]); + } + R0 = sqrt(R0 / f3d.size()); + R1 = sqrt(R1 / f3d.size()); + BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], R0, R1); + + auto i3d = mesh->ind2Dto3D(i2d, jz); + pseudo_residual[i3d] = R1; + + // Compare to neighbors + BoutReal min_neighboring_dt = max_timestep; + BoutReal mean_neighboring_dt = 0.0; + int neighbor_count = 0; + if (i3d.x() != 0) { + BoutReal val = pseudo_timestep[i3d.xm()]; + min_neighboring_dt = std::min(min_neighboring_dt, val); + mean_neighboring_dt += val; + ++neighbor_count; + } + if (i3d.x() != mesh->LocalNx - 1) { + BoutReal val = pseudo_timestep[i3d.xp()]; + min_neighboring_dt = std::min(min_neighboring_dt, val); + mean_neighboring_dt += val; + ++neighbor_count; + } + if (i3d.y() != 0) { + BoutReal val = pseudo_timestep[i3d.ym()]; + min_neighboring_dt = std::min(min_neighboring_dt, val); + mean_neighboring_dt += val; + ++neighbor_count; + } + if (i3d.x() != mesh->LocalNy - 1) { + BoutReal val = pseudo_timestep[i3d.yp()]; + min_neighboring_dt = std::min(min_neighboring_dt, val); + mean_neighboring_dt += val; + ++neighbor_count; + } + mean_neighboring_dt /= neighbor_count; + + // Smooth + //new_timestep = 0.1 * mean_neighboring_dt + 0.9 * new_timestep; + + // Limit ratio of timestep between neighboring cells + new_timestep = + std::min(new_timestep, pseudo_max_ratio * min_neighboring_dt); + + pseudo_timestep[i3d] = new_timestep; + + for (std::size_t i = 0; i != f3d.size(); ++i) { + dt_data[idx++] = new_timestep; } } + } + } + + // Restore Vec data arrays + PetscCall(VecRestoreArrayRead(previous_f, &previous_residual)); + PetscCall(VecRestoreArrayRead(snes_f, ¤t_residual)); + PetscCall(VecRestoreArray(dt_vec, &dt_data)); + + // Need timesteps on neighboring processors + // to limit ratio + mesh->communicate(pseudo_timestep); + // Neumann boundary so timestep isn't pinned at boundaries + pseudo_timestep.applyBoundary("neumann"); + + if (pid_controller) { + // Adjust pseudo_alpha based on nonlinear iterations + pseudo_alpha = pid(pseudo_alpha, nl_its, max_timestep * atol * 100); + } - } else if (nl_its >= upper_its) { - // Reduce timestep slightly - timestep *= timestep_factor_on_upper_its; + } else if (pid_controller) { + // Changing the timestep using a PID controller. + // Note: The preconditioner depends on the timestep, + // so we recalculate the jacobian and the preconditioner + // every time the timestep changes - // Recompute Jacobian + timestep = pid(timestep, nl_its, max_timestep); + + // NOTE(malamast): Do we really need this? + // Recompute Jacobian (for now) + if (saved_jacobian_lag == 0) { + SNESGetLagJacobian(snes, &saved_jacobian_lag); + SNESSetLagJacobian(snes, 1); + } + + } else { + // Consider changing the timestep. + // Note: The preconditioner depends on the timestep, + // so if it is not recalculated the it will be less + // effective. + if ((nl_its <= lower_its) && (timestep < max_timestep) + && (steps_since_snes_failure > 2)) { + // Increase timestep slightly + timestep *= timestep_factor_on_lower_its; + + timestep = std::min(timestep, max_timestep); + + // Note: Setting the SNESJacobianFn to NULL retains + // previously set evaluation function. + // + // The SNES Jacobian is a combination of the RHS Jacobian + // and a factor involving the timestep. + // Depends on equation_form + // -> Probably call SNESSetJacobian(snes, Jfd, Jfd, NULL, fdcoloring); + + if (static_cast(lin_its) / nl_its > 4) { + // Recompute Jacobian (for now) if (saved_jacobian_lag == 0) { SNESGetLagJacobian(snes, &saved_jacobian_lag); SNESSetLagJacobian(snes, 1); } } + + } else if (nl_its >= upper_its) { + // Reduce timestep slightly + timestep *= timestep_factor_on_upper_its; + + // Recompute Jacobian + if (saved_jacobian_lag == 0) { + SNESGetLagJacobian(snes, &saved_jacobian_lag); + SNESSetLagJacobian(snes, 1); + } } } snes_failures = 0; @@ -1132,7 +1405,7 @@ int SNESSolver::run() { if (!matrix_free) { ASSERT2(simtime >= target); - ASSERT2(simtime - dt < target); + ASSERT2(simtime - dt <= target); // Stepped over output timestep => Interpolate // snes_x is the solution at t = simtime // x0 is the solution at t = simtime - dt @@ -1181,6 +1454,56 @@ int SNESSolver::run() { return 0; } +/// Switched Evolution Relaxation (SER) +BoutReal SNESSolver::updatePseudoTimestep(BoutReal previous_timestep, + BoutReal previous_residual, + BoutReal current_residual) { + + // Timestep inversely proportional to the residual, clipped to avoid + // rapid changes in timestep. + return std::min( + {std::max({pseudo_alpha / current_residual, previous_timestep / 1.5, dt_min_reset}), + 1.5 * previous_timestep, max_timestep}); + + /* + + // Alternative strategy based on history of residuals + // A hybrid strategy may be most effective, in which the timestep is + // inversely proportional to residual initially, or when residuals are large, + // and then the method transitions to being history-based + + const BoutReal converged_threshold = 100 * atol; + const BoutReal transition_threshold = 1000 * atol; + + if (current_residual < converged_threshold) { + return max_timestep; + } + + // Smoothly transition from SER updates to frozen timestep + if (current_residual < transition_threshold) { + BoutReal reduction_ratio = current_residual / previous_residual; + + if (reduction_ratio < 0.8) { + return std::min(0.5*(pseudo_growth_factor + 1.) * previous_timestep, + max_timestep); + } else if (reduction_ratio > 1.2) { + return std::max(0.5*(pseudo_reduction_factor + 1) * previous_timestep, dt_min_reset); + } + // Leave unchanged if residual changes are small + return previous_timestep; + } + + if (current_residual <= previous_residual) { + // Success + return std::min(pseudo_growth_factor * previous_timestep, + max_timestep); + } + // Failed + // Consider rejecting the step + return std::max(pseudo_reduction_factor * previous_timestep, dt_min_reset); + */ +} + // f = rhs PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { // Get data from PETSc into BOUT++ fields @@ -1240,13 +1563,24 @@ PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { // f = (x0 - x)/Δt + f // First calculate x - x0 to minimise floating point issues VecWAXPY(delta_x, -1.0, x0, x); // delta_x = x - x0 - VecAXPY(f, -1. / dt, delta_x); // f <- f - delta_x / dt + if (pseudo_time) { + // dt can be different for each quantity + VecPointwiseDivide(delta_x, delta_x, dt_vec); // delta_x /= dt + VecAXPY(f, -1., delta_x); // f <- f - delta_x + } else { + VecAXPY(f, -1. / dt, delta_x); // f <- f - delta_x / dt + } break; } case BoutSnesEquationForm::backward_euler: { // Backward Euler // Set f = x - x0 - Δt*f - VecAYPX(f, -dt, x); // f <- x - Δt*f + if (pseudo_time) { + VecPointwiseMult(f, f, dt_vec); // f <- Δt*f + VecAYPX(f, -1, x); // f <- x - f + } else { + VecAYPX(f, -dt, x); // f <- x - Δt*f + } VecAXPY(f, -1.0, x0); // f <- f - x0 break; } @@ -1443,7 +1777,7 @@ void SNESSolver::updateColoring() { } } -BoutReal SNESSolver::pid(BoutReal timestep, int nl_its) { +BoutReal SNESSolver::pid(BoutReal timestep, int nl_its, BoutReal max_dt) { /* ---------- multiplicative PID factors ---------- */ const BoutReal facP = std::pow(double(target_its) / double(nl_its), kP); @@ -1452,11 +1786,11 @@ BoutReal SNESSolver::pid(BoutReal timestep, int nl_its) { / double(nl_its) / double(nl_its_prev2), kD); - // clamp groth factor to avoid huge changes + // clamp growth factor to avoid huge changes const BoutReal fac = std::clamp(facP * facI * facD, 0.2, 5.0); /* ---------- update timestep and history ---------- */ - const BoutReal dt_new = std::min(timestep * fac, max_timestep); + const BoutReal dt_new = std::min(timestep * fac, max_dt); nl_its_prev2 = nl_its_prev; nl_its_prev = nl_its; @@ -1464,4 +1798,20 @@ BoutReal SNESSolver::pid(BoutReal timestep, int nl_its) { return dt_new; } +void SNESSolver::outputVars(Options& output_options, bool save_repeat) { + // Call base class function + Solver::outputVars(output_options, save_repeat); + + if (!save_repeat) { + return; // Don't save diagnostics to restart files + } + + if (pseudo_time) { + output_options["snes_pseudo_residual"].assignRepeat(pseudo_residual, "t", save_repeat, + "SNESSolver"); + output_options["snes_pseudo_timestep"].assignRepeat(pseudo_timestep, "t", save_repeat, + "SNESSolver"); + } +} + #endif // BOUT_HAS_PETSC diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index 31deae6f06..c1bb3ecc56 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -39,6 +39,7 @@ class SNESSolver; #include #include +#include #include #include @@ -88,9 +89,12 @@ public: /// finite difference approximated Jacobian. PetscErrorCode scaleJacobian(Mat B); + /// Save diagnostics to output + void outputVars(Options& output_options, bool save_repeat = true) override; + private: BoutReal timestep; ///< Internal timestep - BoutReal dt; ///< Current timestep used in snes_function + BoutReal dt; ///< Current timestep used in snes_function. BoutReal dt_min_reset; ///< If dt falls below this, reset solve BoutReal max_timestep; ///< Maximum timestep @@ -107,6 +111,20 @@ private: BoutReal timestep_factor_on_upper_its; BoutReal timestep_factor_on_lower_its; + // Pseudo-Transient Continuation (PTC) variables + bool pseudo_time; ///< Use Pseudo time-stepping + BoutReal pseudo_alpha; ///< dt = alpha / residual + BoutReal pseudo_growth_factor; ///< Timestep increase 1.1 - 1.2 + BoutReal pseudo_reduction_factor; ///< Timestep decrease 0.5 + BoutReal pseudo_max_ratio; ///< Maximum timestep ratio between neighboring cells + Vec dt_vec; ///< Each quantity can have its own timestep + Vec previous_f; ///< Previous residual + /// Decide the next pseudo-timestep + BoutReal updatePseudoTimestep(BoutReal previous_timestep, BoutReal previous_residual, + BoutReal current_residual); + Field3D pseudo_residual; ///< Diagnostic output + Field3D pseudo_timestep; + ///< PID controller parameters bool pid_controller; ///< Use PID controller? int target_its; ///< Target number of nonlinear iterations for the PID controller. @@ -118,7 +136,7 @@ private: int nl_its_prev; int nl_its_prev2; - BoutReal pid(BoutReal timestep, int nl_its); ///< Updates the timestep + BoutReal pid(BoutReal timestep, int nl_its, BoutReal max_dt); ///< Updates the timestep bool diagnose; ///< Output additional diagnostics bool diagnose_failures; ///< Print diagnostics on SNES failures From e9b8d12c45b8047f5b5c6a4da881db0c8df0f803 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Mon, 13 Oct 2025 10:16:15 -0700 Subject: [PATCH 02/11] snes: Remove pseudo_time switch, use equation_form=pseudo_transient Rather than having a separate switch, just use equation_form to select the Pseudo-Transient Continuation method. --- include/bout/mesh.hxx | 2 +- manual/sphinx/user_docs/time_integration.rst | 186 ++++++++++++++----- src/solver/impls/snes/snes.cxx | 62 +++---- src/solver/impls/snes/snes.hxx | 8 +- 4 files changed, 168 insertions(+), 90 deletions(-) diff --git a/include/bout/mesh.hxx b/include/bout/mesh.hxx index 02e2a23905..1db25adf78 100644 --- a/include/bout/mesh.hxx +++ b/include/bout/mesh.hxx @@ -757,7 +757,7 @@ public: void addRegionPerp(const std::string& region_name, const Region& region); /// Converts an Ind2D to an Ind3D using calculation - Ind3D ind2Dto3D(const Ind2D& ind2D, int jz = 0) { + Ind3D ind2Dto3D(const Ind2D& ind2D, int jz = 0) const { return {ind2D.ind * LocalNz + jz, LocalNy, LocalNz}; } diff --git a/manual/sphinx/user_docs/time_integration.rst b/manual/sphinx/user_docs/time_integration.rst index c1526c14e2..373b5cdbe6 100644 --- a/manual/sphinx/user_docs/time_integration.rst +++ b/manual/sphinx/user_docs/time_integration.rst @@ -361,16 +361,145 @@ And the adaptive timestepping options: Backward Euler - SNES --------------------- -The `beuler` or `snes` solver type (either name can be used) is -intended mainly for solving steady-state problems, so integrates in -time using a stable but low accuracy method (Backward Euler). It uses -PETSc's SNES solvers to solve the nonlinear system at each timestep, -and adjusts the internal timestep to keep the number of SNES -iterations within a given range. +The `beuler` or `snes` solver type (either name can be used) is a PETSc-based implicit +solver for finding steady-state solutions to systems of partial differential equations. +It supports multiple solution strategies including backward Euler timestepping, +direct Newton iteration, and Pseudo-Transient Continuation (PTC) with Switched +Evolution Relaxation (SER). + +Basic Configuration +~~~~~~~~~~~~~~~~~~~ + +The SNES solver is configured through the ``[solver]`` section of the input file: + +.. code-block:: ini + + [solver] + type = snes + + # Nonlinear solver settings + snes_type = newtonls # anderson, newtonls, newtontr, nrichardson + atol = 1e-7 # Absolute tolerance + rtol = 1e-6 # Relative tolerance + stol = 1e-12 # Solution change tolerance + max_nonlinear_iterations = 20 # Maximum SNES iterations per solve + + # Linear solver settings + ksp_type = fgmres # Linear solver: gmres, bicgstab, etc. + maxl = 20 # Maximum linear iterations + pc_type = ilu # Preconditioner: ilu, bjacobi, hypre, etc. + +Timestepping Modes +------------------ + +The solver supports several timestepping strategies controlled by ``equation_form``: + +**Backward Euler (default)** + Standard implicit backward Euler method. Good for general timestepping. + + .. code-block:: ini + + equation_form = rearranged_backward_euler # Default + + This method has low accuracy in time but its dissipative properties + are helpful when evolving to steady state solutions. + +**Direct Newton** + Solves the steady-state problem F(u) = 0 directly without timestepping. + + .. code-block:: ini + + equation_form = direct_newton + + This method is unlikely to converge unless the system is very close + to steady state. + +**Pseudo-Transient Continuation** + Uses pseudo-time to guide the solution to steady state. Recommended for + highly nonlinear problems where Newton's method fails. + + .. code-block:: ini + + equation_form = pseudo_transient + + This uses the same form as rearranged_backward_euler, but the time step + can be different for each cell. + + +Jacobian Finite Difference with Coloring +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The default and recommended approach for most problems: + +.. code-block:: ini + + [solver] + use_coloring = true # Enable (default) + lag_jacobian = 50 # Reuse Jacobian for this many iterations + + # Stencil shape (determines Jacobian sparsity pattern) + stencil:taxi = 2 # Taxi-cab distance (default) + stencil:square = 0 # Square stencil extent + stencil:cross = 0 # Cross stencil extent + +The coloring algorithm exploits the sparse structure of the Jacobian to reduce +the number of function evaluations needed for finite differencing. + +Jacobian coloring stencil +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The stencil used to create the Jacobian colouring can be varied, +depending on which numerical operators are in use. + +``solver:stencil:cross = N`` +e.g. for N == 2 + +.. code-block:: bash + + * + * + * * x * * + * + * + + +``solver:stencil:square = N`` +e.g. for N == 2 + +.. code-block:: bash + + * * * * * + * * * * * + * * x * * + * * * * * + * * * * * + +``solver:stencil:taxi = N`` +e.g. for N == 2 + +.. code-block:: bash + + * + * * * + * * x * * + * * * + * + +Setting ``solver:force_symmetric_coloring = true``, will make sure +that the jacobian colouring matrix is symmetric. This will often +include a few extra non-zeros that the stencil will miss otherwise + + + +---------------------------+---------------+----------------------------------------------------+ | Option | Default |Description | +===========================+===============+====================================================+ +| pseudo_time | false | Pseudo-Transient Continuation (PTC) method, using | +| | | a different timestep for each cell. | ++---------------------------+---------------+----------------------------------------------------+ +| pseudo_max_ratio | 2. | Maximum timestep ratio between neighboring cells | ++---------------------------+---------------+----------------------------------------------------+ | snes_type | newtonls | PETSc SNES nonlinear solver (try anderson, qn) | +---------------------------+---------------+----------------------------------------------------+ | ksp_type | gmres | PETSc KSP linear solver | @@ -423,6 +552,8 @@ iterations within a given range. The predictor is linear extrapolation from the last two timesteps. It seems to be effective, but can be disabled by setting ``predictor = false``. + + The default `newtonls` SNES type can be very effective if combined with Jacobian coloring: The coloring enables the Jacobian to be calculated relatively efficiently; once a Jacobian matrix has been @@ -456,50 +587,13 @@ Preconditioner types: Enable with command-line args ``-pc_type hypre -pc_hypre_type euclid -pc_hypre_euclid_levels k`` where ``k`` is the level (1-8 typically). -Jacobian coloring stencil -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The stencil used to create the Jacobian colouring can be varied, -depending on which numerical operators are in use. - - -``solver:stencil:cross = N`` -e.g. for N == 2 - -.. code-block:: bash - - * - * - * * x * * - * - * +Pseudo-Transient Continuation (PTC) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``solver:stencil:square = N`` -e.g. for N == 2 -.. code-block:: bash - - * * * * * - * * * * * - * * x * * - * * * * * - * * * * * +Saves diagnostic ``Field3D`` output variables ``snes_pseudo_residual`` and ``snes_pseudo_timestep``. -``solver:stencil:taxi = N`` -e.g. for N == 2 - -.. code-block:: bash - - * - * * * - * * x * * - * * * - * - -Setting ``solver:force_symmetric_coloring = true``, will make sure -that the jacobian colouring matrix is symmetric. This will often -include a few extra non-zeros that the stencil will miss otherwise ODE integration --------------- diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index 49650b7b97..b48afccc48 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -106,6 +106,11 @@ SNESSolver::SNESSolver(Options* opts) .doc("If dt falls below this, reset to starting dt") .withDefault(1e-6)), max_timestep((*options)["max_timestep"].doc("Maximum timestep").withDefault(1e37)), + equation_form( + (*options)["equation_form"] + .doc("Form of equation to solve: rearranged_backward_euler (default);" + " pseudo_transient; backward_euler; direct_newton") + .withDefault(BoutSnesEquationForm::rearranged_backward_euler)), snes_type((*options)["snes_type"] .doc("PETSc nonlinear solver method to use") .withDefault("anderson")), @@ -138,9 +143,6 @@ SNESSolver::SNESSolver(Options* opts) (*options)["timestep_factor_on_lower_its"] .doc("Multiply timestep if iterations are below lower_its") .withDefault(1.4)), - pseudo_time((*options)["pseudo_time"] - .doc("Pseudo-Transient Continuation method?") - .withDefault(false)), pseudo_alpha((*options)["pseudo_alpha"] .doc("Sets timestep using dt = alpha / residual") .withDefault(100. * atol * timestep)), @@ -164,11 +166,6 @@ SNESSolver::SNESSolver(Options* opts) diagnose_failures((*options)["diagnose_failures"] .doc("Print more diagnostics when SNES fails") .withDefault(false)), - equation_form( - (*options)["equation_form"] - .doc("Form of equation to solve: rearranged_backward_euler (default);" - " pseudo_transient; backward_euler; direct_newton") - .withDefault(BoutSnesEquationForm::rearranged_backward_euler)), predictor((*options)["predictor"].doc("Use linear predictor?").withDefault(true)), use_precon((*options)["use_precon"] .doc("Use user-supplied preconditioner?") @@ -292,7 +289,7 @@ int SNESSolver::init() { CHKERRQ(ierr); } - if (pseudo_time) { + if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Storage for per-variable timestep PetscCall(VecDuplicate(snes_x, &dt_vec)); // Starting timestep @@ -797,7 +794,7 @@ int SNESSolver::run() { CHKERRQ(ierr); } - if (pseudo_time) { + if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Calculate the initial residual in snes_f run_rhs(simtime); { @@ -880,7 +877,7 @@ int SNESSolver::run() { // Copy the state (snes_x) into initial values (x0) VecCopy(snes_x, x0); - if (pseudo_time) { + if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Pseudo-Transient Continuation // Each evolving quantity may have its own timestep // Set timestep and dt scalars to the minimum of dt_vec @@ -981,7 +978,7 @@ int SNESSolver::run() { ++snes_failures; steps_since_snes_failure = 0; - if (pseudo_time) { + if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Global scaling of timesteps // Note: A better strategy might be to reduce timesteps // in problematic cells. @@ -1089,14 +1086,8 @@ int SNESSolver::run() { // Gather and print diagnostic information output.print("\r"); // Carriage return for printing to screen - - // if (pseudo_time) { - // output.write("\tTime {} alpha {} min_timestep {} nl_iter {} lin_iter {} reason {}", - // simtime, pseudo_alpha, timestep, nl_its, lin_its, static_cast(reason)); - // } else { output.write("Time: {}, timestep: {}, nl iter: {}, lin iter: {}, reason: {}", simtime, timestep, nl_its, lin_its, static_cast(reason)); - // } if (snes_failures > 0) { output.write(", SNES failures: {}", snes_failures); } @@ -1148,7 +1139,7 @@ int SNESSolver::run() { } #endif // PETSC_VERSION_GE(3,20,0) - if (pseudo_time) { + if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Adjust local timesteps // Calculate residuals @@ -1552,35 +1543,27 @@ PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { CHKERRQ(ierr); switch (equation_form) { - case BoutSnesEquationForm::pseudo_transient: { - // Pseudo-transient timestepping (as in UEDGE) - // f <- f - x/Δt - VecAXPY(f, -1. / dt, x); - break; - } case BoutSnesEquationForm::rearranged_backward_euler: { // Rearranged Backward Euler // f = (x0 - x)/Δt + f // First calculate x - x0 to minimise floating point issues VecWAXPY(delta_x, -1.0, x0, x); // delta_x = x - x0 - if (pseudo_time) { - // dt can be different for each quantity - VecPointwiseDivide(delta_x, delta_x, dt_vec); // delta_x /= dt - VecAXPY(f, -1., delta_x); // f <- f - delta_x - } else { - VecAXPY(f, -1. / dt, delta_x); // f <- f - delta_x / dt - } + VecAXPY(f, -1. / dt, delta_x); // f <- f - delta_x / dt + break; + } + case BoutSnesEquationForm::pseudo_transient: { + // Pseudo-transient timestepping. Same as Rearranged Backward Euler + // except that Δt is a vector + // f = (x0 - x)/Δt + f + VecWAXPY(delta_x, -1.0, x0, x); + VecPointwiseDivide(delta_x, delta_x, dt_vec); // delta_x /= dt + VecAXPY(f, -1., delta_x); // f <- f - delta_x break; } case BoutSnesEquationForm::backward_euler: { // Backward Euler // Set f = x - x0 - Δt*f - if (pseudo_time) { - VecPointwiseMult(f, f, dt_vec); // f <- Δt*f - VecAYPX(f, -1, x); // f <- x - f - } else { - VecAYPX(f, -dt, x); // f <- x - Δt*f - } + VecAYPX(f, -dt, x); // f <- x - Δt*f VecAXPY(f, -1.0, x0); // f <- f - x0 break; } @@ -1806,7 +1789,8 @@ void SNESSolver::outputVars(Options& output_options, bool save_repeat) { return; // Don't save diagnostics to restart files } - if (pseudo_time) { + if (equation_form == BoutSnesEquationForm::pseudo_transient) { + output_options["snes_pseudo_alpha"].assignRepeat(pseudo_alpha, "t", save_repeat, "SNESSolver"); output_options["snes_pseudo_residual"].assignRepeat(pseudo_residual, "t", save_repeat, "SNESSolver"); output_options["snes_pseudo_timestep"].assignRepeat(pseudo_timestep, "t", save_repeat, diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index c1bb3ecc56..257588e2a9 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -98,6 +98,9 @@ private: BoutReal dt_min_reset; ///< If dt falls below this, reset solve BoutReal max_timestep; ///< Maximum timestep + /// Form of the equation to solve + BoutSnesEquationForm equation_form; + std::string snes_type; BoutReal atol; ///< Absolute tolerance BoutReal rtol; ///< Relative tolerance @@ -112,7 +115,7 @@ private: BoutReal timestep_factor_on_lower_its; // Pseudo-Transient Continuation (PTC) variables - bool pseudo_time; ///< Use Pseudo time-stepping + // These are used if equation_form = pseudo_transient BoutReal pseudo_alpha; ///< dt = alpha / residual BoutReal pseudo_growth_factor; ///< Timestep increase 1.1 - 1.2 BoutReal pseudo_reduction_factor; ///< Timestep decrease 0.5 @@ -144,9 +147,6 @@ private: int nlocal; ///< Number of variables on local processor int neq; ///< Number of variables in total - /// Form of the equation to solve - BoutSnesEquationForm equation_form; - PetscLib lib; ///< Handles initialising, finalising PETSc Vec snes_f; ///< Used by SNES to store function Vec snes_x; ///< Result of SNES From a024d5858789f697c0a654e97dfcb99840a2b2ab Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Mon, 13 Oct 2025 11:14:11 -0700 Subject: [PATCH 03/11] snes: Refactoring, split rhs_function from snes_function The rhs_function returns the time derivatives (residuals) that are used in the PTC method. The snes_function calls rhs_function, then modifies the function depending on equation_form. --- src/solver/impls/snes/snes.cxx | 137 +++++++++++++-------------------- src/solver/impls/snes/snes.hxx | 9 +++ 2 files changed, 63 insertions(+), 83 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index b48afccc48..cec0c3e058 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -18,6 +18,8 @@ #include #include "petscsnes.h" +#include "petscsys.h" +#include "petscvec.h" class ColoringStencil { private: @@ -253,8 +255,9 @@ int SNESSolver::init() { VecDuplicate(snes_x, &snes_f); VecDuplicate(snes_x, &x0); - if (equation_form == BoutSnesEquationForm::rearranged_backward_euler) { - // Need an intermediate vector for rearranged Backward Euler + if ((equation_form == BoutSnesEquationForm::rearranged_backward_euler) + || (equation_form == BoutSnesEquationForm::pseudo_transient)) { + // Need an intermediate vector for rearranged Backward Euler or Pseudo-Transient Continuation ierr = VecDuplicate(snes_x, &delta_x); CHKERRQ(ierr); } @@ -931,7 +934,7 @@ int SNESSolver::run() { // Use (time1, x1) and (simtime, x0) to make prediction // snes_x <- x0 + (dt / (simtime - time1)) * (x0 - x1) // snes_x <- -β * x1 + (1 + β) * snes_x - BoutReal beta = dt / (simtime - time1); + const BoutReal beta = dt / (simtime - time1); VecAXPBY(snes_x, -beta, (1. + beta), x1); } @@ -1142,27 +1145,7 @@ int SNESSolver::run() { if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Adjust local timesteps - // Calculate residuals - if (scale_vars) { - // scaled_x <- snes_x * var_scaling_factors - PetscCall(VecPointwiseMult(scaled_x, snes_x, var_scaling_factors)); - const BoutReal* xdata = nullptr; - PetscCall(VecGetArrayRead(scaled_x, &xdata)); - load_vars(const_cast(xdata)); - PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); - } else { - const BoutReal* xdata = nullptr; - PetscCall(VecGetArrayRead(snes_x, &xdata)); - load_vars(const_cast(xdata)); - PetscCall(VecRestoreArrayRead(snes_x, &xdata)); - } - run_rhs(simtime); - { - BoutReal* fdata = nullptr; - PetscCall(VecGetArray(snes_f, &fdata)); - save_derivs(fdata); - PetscCall(VecRestoreArray(snes_f, &fdata)); - } + PetscCall(rhs_function(snes_x, snes_f, false)); // Compare previous_f with snes_f // Use a per-cell timestep so that e.g density and pressure @@ -1280,25 +1263,25 @@ int SNESSolver::run() { BoutReal mean_neighboring_dt = 0.0; int neighbor_count = 0; if (i3d.x() != 0) { - BoutReal val = pseudo_timestep[i3d.xm()]; + const BoutReal val = pseudo_timestep[i3d.xm()]; min_neighboring_dt = std::min(min_neighboring_dt, val); mean_neighboring_dt += val; ++neighbor_count; } if (i3d.x() != mesh->LocalNx - 1) { - BoutReal val = pseudo_timestep[i3d.xp()]; + const BoutReal val = pseudo_timestep[i3d.xp()]; min_neighboring_dt = std::min(min_neighboring_dt, val); mean_neighboring_dt += val; ++neighbor_count; } if (i3d.y() != 0) { - BoutReal val = pseudo_timestep[i3d.ym()]; + const BoutReal val = pseudo_timestep[i3d.ym()]; min_neighboring_dt = std::min(min_neighboring_dt, val); mean_neighboring_dt += val; ++neighbor_count; } if (i3d.x() != mesh->LocalNy - 1) { - BoutReal val = pseudo_timestep[i3d.yp()]; + const BoutReal val = pseudo_timestep[i3d.yp()]; min_neighboring_dt = std::min(min_neighboring_dt, val); mean_neighboring_dt += val; ++neighbor_count; @@ -1495,29 +1478,23 @@ BoutReal SNESSolver::updatePseudoTimestep(BoutReal previous_timestep, */ } -// f = rhs -PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { +PetscErrorCode SNESSolver::rhs_function(Vec x, Vec f, bool linear) { // Get data from PETSc into BOUT++ fields if (scale_vars) { // scaled_x <- x * var_scaling_factors - int ierr = VecPointwiseMult(scaled_x, x, var_scaling_factors); - CHKERRQ(ierr); + PetscCall(VecPointwiseMult(scaled_x, x, var_scaling_factors)); const BoutReal* xdata = nullptr; - ierr = VecGetArrayRead(scaled_x, &xdata); - CHKERRQ(ierr); - load_vars(const_cast( - xdata)); // const_cast needed due to load_vars API. Not writing to xdata. - ierr = VecRestoreArrayRead(scaled_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecGetArrayRead(scaled_x, &xdata)); + // const_cast needed due to load_vars API. Not writing to xdata. + load_vars(const_cast(xdata)); + PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); } else { const BoutReal* xdata = nullptr; - int ierr = VecGetArrayRead(x, &xdata); - CHKERRQ(ierr); - load_vars(const_cast( - xdata)); // const_cast needed due to load_vars API. Not writing to xdata. - ierr = VecRestoreArrayRead(x, &xdata); - CHKERRQ(ierr); + PetscCall(VecGetArrayRead(x, &xdata)); + // const_cast needed due to load_vars API. Not writing to xdata. + load_vars(const_cast(xdata)); + PetscCall(VecRestoreArrayRead(x, &xdata)); } try { @@ -1527,28 +1504,35 @@ PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { // Simulation might fail, e.g. negative densities // if timestep too large output_warn.write("WARNING: BoutException thrown: {}\n", e.what()); + return 2; + } + + // Copy derivatives back + BoutReal* fdata = nullptr; + PetscCall(VecGetArray(f, &fdata)); + save_derivs(fdata); + PetscCall(VecRestoreArray(f, &fdata)); + return PETSC_SUCCESS; +} +// Result in f depends on equation_form +PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { + + // Call the RHS function + if (rhs_function(x, f, linear) != PETSC_SUCCESS) { // Tell SNES that the input was out of domain SNESSetFunctionDomainError(snes); // Note: Returning non-zero error here leaves vectors in locked state return 0; } - // Copy derivatives back - BoutReal* fdata = nullptr; - int ierr = VecGetArray(f, &fdata); - CHKERRQ(ierr); - save_derivs(fdata); - ierr = VecRestoreArray(f, &fdata); - CHKERRQ(ierr); - switch (equation_form) { case BoutSnesEquationForm::rearranged_backward_euler: { // Rearranged Backward Euler // f = (x0 - x)/Δt + f // First calculate x - x0 to minimise floating point issues VecWAXPY(delta_x, -1.0, x0, x); // delta_x = x - x0 - VecAXPY(f, -1. / dt, delta_x); // f <- f - delta_x / dt + VecAXPY(f, -1. / dt, delta_x); // f <- f - delta_x / dt break; } case BoutSnesEquationForm::pseudo_transient: { @@ -1563,7 +1547,7 @@ PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { case BoutSnesEquationForm::backward_euler: { // Backward Euler // Set f = x - x0 - Δt*f - VecAYPX(f, -dt, x); // f <- x - Δt*f + VecAYPX(f, -dt, x); // f <- x - Δt*f VecAXPY(f, -1.0, x0); // f <- f - x0 break; } @@ -1578,8 +1562,7 @@ PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { if (scale_rhs) { // f <- f * rhs_scaling_factors - ierr = VecPointwiseMult(f, f, rhs_scaling_factors); - CHKERRQ(ierr); + PetscCall(VecPointwiseMult(f, f, rhs_scaling_factors)); } return 0; @@ -1594,36 +1577,28 @@ PetscErrorCode SNESSolver::precon(Vec x, Vec f) { throw BoutException("No user preconditioner"); } - int ierr; - // Get data from PETSc into BOUT++ fields Vec solution; - SNESGetSolution(snes, &solution); + PetscCall(SNESGetSolution(snes, &solution)); BoutReal* soldata; - ierr = VecGetArray(solution, &soldata); - CHKERRQ(ierr); + PetscCall(VecGetArray(solution, &soldata)); load_vars(soldata); - ierr = VecRestoreArray(solution, &soldata); - CHKERRQ(ierr); + PetscCall(VecRestoreArray(solution, &soldata)); // Load vector to be inverted into ddt() variables const BoutReal* xdata; - ierr = VecGetArrayRead(x, &xdata); - CHKERRQ(ierr); + PetscCall(VecGetArrayRead(x, &xdata)); load_derivs(const_cast(xdata)); // Note: load_derivs does not modify data - ierr = VecRestoreArrayRead(x, &xdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArrayRead(x, &xdata)); // Run the preconditioner runPreconditioner(simtime + dt, dt, 0.0); // Save the solution from F_vars BoutReal* fdata; - ierr = VecGetArray(f, &fdata); - CHKERRQ(ierr); + PetscCall(VecGetArray(f, &fdata)); save_derivs(fdata); - ierr = VecRestoreArray(f, &fdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArray(f, &fdata)); return 0; } @@ -1635,8 +1610,6 @@ PetscErrorCode SNESSolver::scaleJacobian(Mat Jac_new) { return 0; // Not scaling the RHS values } - int ierr; - // Get index of rows owned by this processor int rstart, rend; MatGetOwnershipRange(Jac_new, &rstart, &rend); @@ -1651,8 +1624,7 @@ PetscErrorCode SNESSolver::scaleJacobian(Mat Jac_new) { // Calculate the norm of each row of the Jacobian PetscScalar* row_inv_norm_data; - ierr = VecGetArray(jac_row_inv_norms, &row_inv_norm_data); - CHKERRQ(ierr); + PetscCall(VecGetArray(jac_row_inv_norms, &row_inv_norm_data)); PetscInt ncols; const PetscScalar* vals; @@ -1676,12 +1648,11 @@ PetscErrorCode SNESSolver::scaleJacobian(Mat Jac_new) { MatRestoreRow(Jac_new, row, &ncols, nullptr, &vals); } - ierr = VecRestoreArray(jac_row_inv_norms, &row_inv_norm_data); - CHKERRQ(ierr); + PetscCall(VecRestoreArray(jac_row_inv_norms, &row_inv_norm_data)); // Modify the RHS scaling: factor = factor / norm - ierr = VecPointwiseMult(rhs_scaling_factors, rhs_scaling_factors, jac_row_inv_norms); - CHKERRQ(ierr); + PetscCall( + VecPointwiseMult(rhs_scaling_factors, rhs_scaling_factors, jac_row_inv_norms)); if (diagnose) { // Print maximum and minimum scaling factors @@ -1692,10 +1663,9 @@ PetscErrorCode SNESSolver::scaleJacobian(Mat Jac_new) { } // Scale the Jacobian rows by multiplying on the left by 1/norm - ierr = MatDiagonalScale(Jac_new, jac_row_inv_norms, nullptr); - CHKERRQ(ierr); + PetscCall(MatDiagonalScale(Jac_new, jac_row_inv_norms, nullptr)); - return 0; + return PETSC_SUCCESS; } /// @@ -1790,7 +1760,8 @@ void SNESSolver::outputVars(Options& output_options, bool save_repeat) { } if (equation_form == BoutSnesEquationForm::pseudo_transient) { - output_options["snes_pseudo_alpha"].assignRepeat(pseudo_alpha, "t", save_repeat, "SNESSolver"); + output_options["snes_pseudo_alpha"].assignRepeat(pseudo_alpha, "t", save_repeat, + "SNESSolver"); output_options["snes_pseudo_residual"].assignRepeat(pseudo_residual, "t", save_repeat, "SNESSolver"); output_options["snes_pseudo_timestep"].assignRepeat(pseudo_timestep, "t", save_repeat, diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index 257588e2a9..53b87d6c28 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -69,6 +69,8 @@ public: /// /// f = (x - gamma*G(x)) - rhs /// + /// The form depends on equation_form + /// /// /// @param[in] x The state vector /// @param[out] f The vector for the result f(x) @@ -93,6 +95,13 @@ public: void outputVars(Options& output_options, bool save_repeat = true) override; private: + /// Call the physics model RHS function + /// + /// @param[in] x The state vector. Will be scaled if scale_vars=true + /// @param[out] f The vector for the result f(x) + /// @param[in] linear Specifies that the SNES solver is in a linear (KSP) inner loop + PetscErrorCode rhs_function(Vec x, Vec f, bool linear); + BoutReal timestep; ///< Internal timestep BoutReal dt; ///< Current timestep used in snes_function. BoutReal dt_min_reset; ///< If dt falls below this, reset solve From 37187e332893dea215871337258db3c1d812e314 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Mon, 13 Oct 2025 12:23:27 -0700 Subject: [PATCH 04/11] snes: Add pseudo_strategy option Switches between different strategies for choosing cell time steps. --- src/solver/impls/snes/snes.cxx | 78 +++++++++++++++++++++------------- src/solver/impls/snes/snes.hxx | 11 +++++ 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index cec0c3e058..1adb2c4b46 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -145,6 +146,9 @@ SNESSolver::SNESSolver(Options* opts) (*options)["timestep_factor_on_lower_its"] .doc("Multiply timestep if iterations are below lower_its") .withDefault(1.4)), + pseudo_strategy((*options)["pseudo_strategy"] + .doc("PTC strategy to use when setting timesteps") + .withDefault(BoutPTCStrategy::inverse_residual)), pseudo_alpha((*options)["pseudo_alpha"] .doc("Sets timestep using dt = alpha / residual") .withDefault(100. * atol * timestep)), @@ -401,18 +405,13 @@ int SNESSolver::init() { auto n_square = (*options)["stencil:square"] .doc("Extent of stencil (square)") .withDefault(0); - auto n_taxi = (*options)["stencil:taxi"] - .doc("Extent of stencil (taxi-cab norm)") - .withDefault(0); auto n_cross = (*options)["stencil:cross"] .doc("Extent of stencil (cross)") .withDefault(0); // Set n_taxi 2 if nothing else is set - // Probably a better way to do this - if (n_square == 0 && n_taxi == 0 && n_cross == 0) { - output_info.write("Setting solver:stencil:taxi = 2\n"); - n_taxi = 2; - } + auto n_taxi = (*options)["stencil:taxi"] + .doc("Extent of stencil (taxi-cab norm)") + .withDefault((n_square == 0 && n_cross == 0) ? 2 : 0); auto const xy_offsets = ColoringStencil::getOffsets(n_square, n_taxi, n_cross); { @@ -1428,26 +1427,21 @@ int SNESSolver::run() { return 0; } -/// Switched Evolution Relaxation (SER) -BoutReal SNESSolver::updatePseudoTimestep(BoutReal previous_timestep, - BoutReal previous_residual, - BoutReal current_residual) { - - // Timestep inversely proportional to the residual, clipped to avoid - // rapid changes in timestep. +/// Timestep inversely proportional to the residual, clipped to avoid +/// rapid changes in timestep. +BoutReal SNESSolver::updatePseudoTimestep_inverse_residual(BoutReal previous_timestep, + BoutReal current_residual) { return std::min( {std::max({pseudo_alpha / current_residual, previous_timestep / 1.5, dt_min_reset}), 1.5 * previous_timestep, max_timestep}); +} - /* - - // Alternative strategy based on history of residuals - // A hybrid strategy may be most effective, in which the timestep is - // inversely proportional to residual initially, or when residuals are large, - // and then the method transitions to being history-based - - const BoutReal converged_threshold = 100 * atol; - const BoutReal transition_threshold = 1000 * atol; +// Strategy based on history of residuals +BoutReal SNESSolver::updatePseudoTimestep_history_based(BoutReal previous_timestep, + BoutReal previous_residual, + BoutReal current_residual) { + const BoutReal converged_threshold = 10 * atol; + const BoutReal transition_threshold = 100 * atol; if (current_residual < converged_threshold) { return max_timestep; @@ -1455,13 +1449,14 @@ BoutReal SNESSolver::updatePseudoTimestep(BoutReal previous_timestep, // Smoothly transition from SER updates to frozen timestep if (current_residual < transition_threshold) { - BoutReal reduction_ratio = current_residual / previous_residual; + const BoutReal reduction_ratio = current_residual / previous_residual; if (reduction_ratio < 0.8) { - return std::min(0.5*(pseudo_growth_factor + 1.) * previous_timestep, + return std::min(0.5 * (pseudo_growth_factor + 1.) * previous_timestep, max_timestep); } else if (reduction_ratio > 1.2) { - return std::max(0.5*(pseudo_reduction_factor + 1) * previous_timestep, dt_min_reset); + return std::max(0.5 * (pseudo_reduction_factor + 1) * previous_timestep, + dt_min_reset); } // Leave unchanged if residual changes are small return previous_timestep; @@ -1469,13 +1464,36 @@ BoutReal SNESSolver::updatePseudoTimestep(BoutReal previous_timestep, if (current_residual <= previous_residual) { // Success - return std::min(pseudo_growth_factor * previous_timestep, - max_timestep); + return std::min(pseudo_growth_factor * previous_timestep, max_timestep); } // Failed // Consider rejecting the step return std::max(pseudo_reduction_factor * previous_timestep, dt_min_reset); - */ +} + +/// Switched Evolution Relaxation (SER) +BoutReal SNESSolver::updatePseudoTimestep(BoutReal previous_timestep, + BoutReal previous_residual, + BoutReal current_residual) { + switch (pseudo_strategy) { + case BoutPTCStrategy::inverse_residual: + return updatePseudoTimestep_inverse_residual(previous_timestep, current_residual); + + case BoutPTCStrategy::history_based: + return updatePseudoTimestep_history_based(previous_timestep, previous_residual, + current_residual); + + case BoutPTCStrategy::hybrid: + // A hybrid strategy may be most effective, in which the timestep is + // inversely proportional to residual initially, or when residuals are large, + // and then the method transitions to being history-based + if (current_residual > 1000. * atol) { + return updatePseudoTimestep_inverse_residual(previous_timestep, current_residual); + } else { + return updatePseudoTimestep_history_based(previous_timestep, previous_residual, + current_residual); + } + }; } PetscErrorCode SNESSolver::rhs_function(Vec x, Vec f, bool linear) { diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index 53b87d6c28..a7ed032a8f 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -53,6 +53,11 @@ RegisterSolver registersolverbeuler("beuler"); BOUT_ENUM_CLASS(BoutSnesEquationForm, pseudo_transient, rearranged_backward_euler, backward_euler, direct_newton); +BOUT_ENUM_CLASS(BoutPTCStrategy, + inverse_residual, ///< dt = pseudo_alpha / residual + history_based, ///< Grow/shrink dt based on residual decrease/increase + hybrid); ///< Combine inverse_residual and history_based strategies + /// Uses PETSc's SNES interface to find a steady state solution to a /// nonlinear ODE by integrating in time with Backward Euler class SNESSolver : public Solver { @@ -125,6 +130,7 @@ private: // Pseudo-Transient Continuation (PTC) variables // These are used if equation_form = pseudo_transient + BoutPTCStrategy pseudo_strategy; ///< Strategy to use when setting timesteps BoutReal pseudo_alpha; ///< dt = alpha / residual BoutReal pseudo_growth_factor; ///< Timestep increase 1.1 - 1.2 BoutReal pseudo_reduction_factor; ///< Timestep decrease 0.5 @@ -134,6 +140,11 @@ private: /// Decide the next pseudo-timestep BoutReal updatePseudoTimestep(BoutReal previous_timestep, BoutReal previous_residual, BoutReal current_residual); + BoutReal updatePseudoTimestep_inverse_residual(BoutReal previous_timestep, + BoutReal current_residual); + BoutReal updatePseudoTimestep_history_based(BoutReal previous_timestep, + BoutReal previous_residual, + BoutReal current_residual); Field3D pseudo_residual; ///< Diagnostic output Field3D pseudo_timestep; From 07c2882ef08b26b0f8f0c7405e372c0b47c9247e Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Mon, 13 Oct 2025 13:03:58 -0700 Subject: [PATCH 05/11] snes: Replace CHKERRQ with PetscCall macro Minor tidying --- src/solver/impls/snes/snes.cxx | 135 ++++++++++++--------------------- 1 file changed, 47 insertions(+), 88 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index 1adb2c4b46..dbcaa0fa6a 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -94,8 +94,7 @@ PetscErrorCode FormFunctionForColoring(void* UNUSED(snes), Vec x, Vec f, void* c PetscErrorCode snesPCapply(PC pc, Vec x, Vec y) { // Get the context SNESSolver* s; - int ierr = PCShellGetContext(pc, reinterpret_cast(&s)); - CHKERRQ(ierr); + PetscCall(PCShellGetContext(pc, reinterpret_cast(&s))); PetscFunctionReturn(s->precon(x, y)); } @@ -245,55 +244,43 @@ int SNESSolver::init() { n3Dvars(), n2Dvars(), neq, nlocal); // Initialise PETSc components - int ierr; // Vectors output_info.write("Creating vector\n"); - ierr = VecCreate(BoutComm::get(), &snes_x); - CHKERRQ(ierr); - ierr = VecSetSizes(snes_x, nlocal, PETSC_DECIDE); - CHKERRQ(ierr); - ierr = VecSetFromOptions(snes_x); - CHKERRQ(ierr); + PetscCall(VecCreate(BoutComm::get(), &snes_x)); + PetscCall(VecSetSizes(snes_x, nlocal, PETSC_DECIDE)); + PetscCall(VecSetFromOptions(snes_x)); - VecDuplicate(snes_x, &snes_f); - VecDuplicate(snes_x, &x0); + PetscCall(VecDuplicate(snes_x, &snes_f)); + PetscCall(VecDuplicate(snes_x, &x0)); if ((equation_form == BoutSnesEquationForm::rearranged_backward_euler) || (equation_form == BoutSnesEquationForm::pseudo_transient)) { // Need an intermediate vector for rearranged Backward Euler or Pseudo-Transient Continuation - ierr = VecDuplicate(snes_x, &delta_x); - CHKERRQ(ierr); + PetscCall(VecDuplicate(snes_x, &delta_x)); } if (predictor) { // Storage for previous solution - ierr = VecDuplicate(snes_x, &x1); - CHKERRQ(ierr); + PetscCall(VecDuplicate(snes_x, &x1)); } if (scale_rhs) { // Storage for rhs factors, one per evolving variable - ierr = VecDuplicate(snes_x, &rhs_scaling_factors); - CHKERRQ(ierr); + PetscCall(VecDuplicate(snes_x, &rhs_scaling_factors)); // Set all factors to 1 to start with - ierr = VecSet(rhs_scaling_factors, 1.0); - CHKERRQ(ierr); + PetscCall(VecSet(rhs_scaling_factors, 1.0)); // Array to store inverse Jacobian row norms - ierr = VecDuplicate(snes_x, &jac_row_inv_norms); - CHKERRQ(ierr); + PetscCall(VecDuplicate(snes_x, &jac_row_inv_norms)); } if (scale_vars) { // Storage for var factors, one per evolving variable - ierr = VecDuplicate(snes_x, &var_scaling_factors); - CHKERRQ(ierr); + PetscCall(VecDuplicate(snes_x, &var_scaling_factors)); // Set all factors to 1 to start with - ierr = VecSet(var_scaling_factors, 1.0); - CHKERRQ(ierr); + PetscCall(VecSet(var_scaling_factors, 1.0)); // Storage for scaled 'x' state vectors - ierr = VecDuplicate(snes_x, &scaled_x); - CHKERRQ(ierr); + PetscCall(VecDuplicate(snes_x, &scaled_x)); } if (equation_form == BoutSnesEquationForm::pseudo_transient) { @@ -360,8 +347,7 @@ int SNESSolver::init() { // Create a vector to store interpolated output solution // Used so that the timestep does not have to be adjusted, // because that would require updating the preconditioner. - ierr = VecDuplicate(snes_x, &output_x); - CHKERRQ(ierr); + PetscCall(VecDuplicate(snes_x, &output_x)); if (use_coloring) { // Use matrix coloring. @@ -560,8 +546,7 @@ int SNESSolver::init() { // Depends on all variables on this cell for (int j = 0; j < n2d; j++) { PetscInt col = ind2 + j; - ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - CHKERRQ(ierr); + PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); } } } @@ -578,8 +563,7 @@ int SNESSolver::init() { // Depends on 2D fields for (int j = 0; j < n2d; j++) { PetscInt col = ind0 + j; - ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - CHKERRQ(ierr); + PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); } // Star pattern @@ -604,9 +588,9 @@ int SNESSolver::init() { // 3D fields on this cell for (int j = 0; j < n3d; j++) { PetscInt col = ind2 + j; - ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); + PetscErrorCode ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - if (ierr != 0) { + if (ierr != PETSC_SUCCESS) { output.write("ERROR: {} {} : ({}, {}) -> ({}, {}) : {} -> {}\n", row, x, y, xi, yi, ind2, ind2 + n3d - 1); } @@ -631,8 +615,7 @@ int SNESSolver::init() { // Test if the matrix is symmetric // Values are 0 or 1 so tolerance (1e-5) shouldn't matter PetscBool symmetric; - ierr = MatIsSymmetric(Jfd, 1e-5, &symmetric); - CHKERRQ(ierr); + PetscCall(MatIsSymmetric(Jfd, 1e-5, &symmetric)); if (!symmetric) { output_warn.write("Jacobian pattern is not symmetric\n"); } @@ -657,8 +640,7 @@ int SNESSolver::init() { if (prune_jacobian) { // Will remove small elements from the Jacobian. // Save a copy to recover from over-pruning - ierr = MatDuplicate(Jfd, MAT_SHARE_NONZERO_PATTERN, &Jfd_original); - CHKERRQ(ierr); + PetscCall(MatDuplicate(Jfd, MAT_SHARE_NONZERO_PATTERN, &Jfd_original)); } } else { // Brute force calculation @@ -785,15 +767,13 @@ int SNESSolver::init() { int SNESSolver::run() { TRACE("SNESSolver::run()"); - int ierr; + // Set initial guess at the solution from variables { BoutReal* xdata = nullptr; - int ierr = VecGetArray(snes_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecGetArray(snes_x, &xdata)); save_vars(xdata); - ierr = VecRestoreArray(snes_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArray(snes_x, &xdata)); } if (equation_form == BoutSnesEquationForm::pseudo_transient) { @@ -801,11 +781,9 @@ int SNESSolver::run() { run_rhs(simtime); { BoutReal* fdata = nullptr; - ierr = VecGetArray(snes_f, &fdata); - CHKERRQ(ierr); + PetscCall(VecGetArray(snes_f, &fdata)); save_derivs(fdata); - ierr = VecRestoreArray(snes_f, &fdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArray(snes_f, &fdata)); } } @@ -834,14 +812,11 @@ int SNESSolver::run() { // Take ownership of snes_x and var_scaling_factors data PetscScalar* snes_x_data = nullptr; - ierr = VecGetArray(snes_x, &snes_x_data); - CHKERRQ(ierr); + PetscCall(VecGetArray(snes_x, &snes_x_data)); PetscScalar* x1_data; - ierr = VecGetArray(x1, &x1_data); - CHKERRQ(ierr); + PetscCall(VecGetArray(x1, &x1_data)); PetscScalar* var_scaling_factors_data; - ierr = VecGetArray(var_scaling_factors, &var_scaling_factors_data); - CHKERRQ(ierr); + PetscCall(VecGetArray(var_scaling_factors, &var_scaling_factors_data)); // Normalise each value in the state // Limit normalisation so scaling factor is never smaller than rtol @@ -854,12 +829,9 @@ int SNESSolver::run() { } // Restore vector underlying data - ierr = VecRestoreArray(var_scaling_factors, &var_scaling_factors_data); - CHKERRQ(ierr); - ierr = VecRestoreArray(x1, &x1_data); - CHKERRQ(ierr); - ierr = VecRestoreArray(snes_x, &snes_x_data); - CHKERRQ(ierr); + PetscCall(VecRestoreArray(var_scaling_factors, &var_scaling_factors_data)); + PetscCall(VecRestoreArray(x1, &x1_data)); + PetscCall(VecRestoreArray(snes_x, &snes_x_data)); if (diagnose) { // Print maximum and minimum scaling factors @@ -955,7 +927,7 @@ int SNESSolver::run() { int lin_its; SNESGetLinearSolveIterations(snes, &lin_its); - if ((ierr != 0) or (reason < 0)) { + if ((ierr != PETSC_SUCCESS) or (reason < 0)) { // Diverged or SNES failed if (diagnose_failures) { @@ -1001,8 +973,7 @@ int SNESSolver::run() { if (diagnose) { output.write("\nRestoring Jacobian\n"); } - ierr = MatCopy(Jfd_original, Jfd, DIFFERENT_NONZERO_PATTERN); - CHKERRQ(ierr); + PetscCall(MatCopy(Jfd_original, Jfd, DIFFERENT_NONZERO_PATTERN)); // The non-zero pattern has changed, so update coloring updateColoring(); jacobian_pruned = false; // Reset flag. Will be set after pruning. @@ -1048,33 +1019,26 @@ int SNESSolver::run() { if (scale_vars) { // scaled_x <- snes_x * var_scaling_factors - ierr = VecPointwiseMult(scaled_x, snes_x, var_scaling_factors); - CHKERRQ(ierr); + PetscCall(VecPointwiseMult(scaled_x, snes_x, var_scaling_factors)); const BoutReal* xdata = nullptr; - ierr = VecGetArrayRead(scaled_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecGetArrayRead(scaled_x, &xdata)); load_vars(const_cast(xdata)); - ierr = VecRestoreArrayRead(scaled_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); } else { const BoutReal* xdata = nullptr; - ierr = VecGetArrayRead(snes_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecGetArrayRead(snes_x, &xdata)); load_vars(const_cast(xdata)); - ierr = VecRestoreArrayRead(snes_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArrayRead(snes_x, &xdata)); } run_rhs(simtime); // Copy derivatives back { BoutReal* fdata = nullptr; - ierr = VecGetArray(snes_f, &fdata); - CHKERRQ(ierr); + PetscCall(VecGetArray(snes_f, &fdata)); save_derivs(fdata); - ierr = VecRestoreArray(snes_f, &fdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArray(snes_f, &fdata)); } // Forward Euler @@ -1130,7 +1094,7 @@ int SNESSolver::run() { } // Prune Jacobian, keeping diagonal elements - ierr = MatFilter(Jfd, prune_abstol, PETSC_TRUE, PETSC_TRUE); + PetscCall(MatFilter(Jfd, prune_abstol, PETSC_TRUE, PETSC_TRUE)); // Update the coloring from Jfd matrix updateColoring(); @@ -1400,22 +1364,17 @@ int SNESSolver::run() { // Put the result into variables if (scale_vars) { // scaled_x <- output_x * var_scaling_factors - int ierr = VecPointwiseMult(scaled_x, output_x, var_scaling_factors); - CHKERRQ(ierr); + PetscCall(VecPointwiseMult(scaled_x, output_x, var_scaling_factors)); const BoutReal* xdata = nullptr; - ierr = VecGetArrayRead(scaled_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecGetArrayRead(scaled_x, &xdata)); load_vars(const_cast(xdata)); - ierr = VecRestoreArrayRead(scaled_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); } else { const BoutReal* xdata = nullptr; - int ierr = VecGetArrayRead(output_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecGetArrayRead(output_x, &xdata)); load_vars(const_cast(xdata)); - ierr = VecRestoreArrayRead(output_x, &xdata); - CHKERRQ(ierr); + PetscCall(VecRestoreArrayRead(output_x, &xdata)); } run_rhs(target); // Run RHS to calculate auxilliary variables From cc58791f15881b4fdf722916f672029d28bee584 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Mon, 13 Oct 2025 15:06:10 -0700 Subject: [PATCH 06/11] snes: Split PTC implementation into separate functions Trying to simplify the `run()` function by splitting out pieces into separate functions. --- src/solver/impls/snes/snes.cxx | 353 +++++++++++++++------------------ src/solver/impls/snes/snes.hxx | 9 +- 2 files changed, 172 insertions(+), 190 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index dbcaa0fa6a..1840dd21c1 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -284,18 +284,7 @@ int SNESSolver::init() { } if (equation_form == BoutSnesEquationForm::pseudo_transient) { - // Storage for per-variable timestep - PetscCall(VecDuplicate(snes_x, &dt_vec)); - // Starting timestep - PetscCall(VecSet(dt_vec, timestep)); - - // Storage for previous residual. Used to compare - // residuals before and after step, and adjust timestep accordingly - PetscCall(VecDuplicate(snes_x, &previous_f)); - - // Diagnostic outputs - pseudo_timestep = timestep; - pseudo_residual = 0.0; + PetscCall(initPseudoTimestepping()); } // Nonlinear solver interface (SNES) @@ -588,7 +577,8 @@ int SNESSolver::init() { // 3D fields on this cell for (int j = 0; j < n3d; j++) { PetscInt col = ind2 + j; - PetscErrorCode ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); + PetscErrorCode ierr = + MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); if (ierr != PETSC_SUCCESS) { output.write("ERROR: {} {} : ({}, {}) -> ({}, {}) : {} -> {}\n", @@ -859,11 +849,6 @@ int SNESSolver::run() { PetscCall(VecMin(dt_vec, nullptr, ×tep)); dt = timestep; - // Copy residual from snes_f to previous_f - // The value of snes_f is set before the timestepping loop - // then calculated after the timestep - PetscCall(VecCopy(snes_f, previous_f)); - looping = true; if (simtime + timestep >= target) { looping = false; @@ -1107,176 +1092,7 @@ int SNESSolver::run() { if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Adjust local timesteps - - PetscCall(rhs_function(snes_x, snes_f, false)); - - // Compare previous_f with snes_f - // Use a per-cell timestep so that e.g density and pressure - // evolve in a way consistent with the equation of state. - - // Reading the residual vectors - const BoutReal* previous_residual = nullptr; - PetscCall(VecGetArrayRead(previous_f, &previous_residual)); - const BoutReal* current_residual = nullptr; - PetscCall(VecGetArrayRead(snes_f, ¤t_residual)); - // Modifying the dt_vec values - BoutReal* dt_data = nullptr; - PetscCall(VecGetArray(dt_vec, &dt_data)); - - // Note: The ordering of quantities in the PETSc vectors - // depends on the Solver::loop_vars function - Mesh* mesh = bout::globals::mesh; - int idx = 0; // Index into PETSc Vecs - - // Boundary cells - for (const auto& i2d : mesh->getRegion2D("RGN_BNDRY")) { - // Field2D quantities evolved together - int count = 0; - BoutReal R0 = 0.0; - BoutReal R1 = 0.0; - - for (const auto& f : f2d) { - if (!f.evolve_bndry) { - continue; // Not evolving boundary => Skip - } - R0 += SQ(previous_residual[idx + count]); - R1 += SQ(current_residual[idx + count]); - ++count; - } - if (count > 0) { - R0 = sqrt(R0 / count); - R1 = sqrt(R1 / count); - - // Adjust timestep for these quantities - BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], R0, R1); - for (int i = 0; i != count; ++i) { - dt_data[idx++] = new_timestep; - } - } - - // Field3D quantities evolved together within a cell - for (int jz = 0; jz < mesh->LocalNz; jz++) { - count = 0; - R0 = 0.0; - R1 = 0.0; - for (const auto& f : f3d) { - if (!f.evolve_bndry) { - continue; // Not evolving boundary => Skip - } - R0 += SQ(previous_residual[idx + count]); - R1 += SQ(current_residual[idx + count]); - ++count; - } - if (count > 0) { - R0 = sqrt(R0 / count); - R1 = sqrt(R1 / count); - - BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], R0, R1); - - auto i3d = mesh->ind2Dto3D(i2d, jz); - pseudo_residual[i3d] = R1; - pseudo_timestep[i3d] = new_timestep; - - for (int i = 0; i != count; ++i) { - dt_data[idx++] = new_timestep; - } - } - } - } - - // Bulk of domain. - // These loops don't check the boundary flags - for (const auto& i2d : mesh->getRegion2D("RGN_NOBNDRY")) { - // Field2D quantities evolved together - if (f2d.size() > 0) { - BoutReal R0 = 0.0; - BoutReal R1 = 0.0; - for (std::size_t i = 0; i != f2d.size(); ++i) { - R0 += SQ(previous_residual[idx + i]); - R1 += SQ(current_residual[idx + i]); - } - R0 = sqrt(R0 / f2d.size()); - R1 = sqrt(R1 / f2d.size()); - - // Adjust timestep for these quantities - BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], R0, R1); - for (std::size_t i = 0; i != f2d.size(); ++i) { - dt_data[idx++] = new_timestep; - } - } - - // Field3D quantities evolved together within a cell - if (f3d.size() > 0) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal R0 = 0.0; - BoutReal R1 = 0.0; - for (std::size_t i = 0; i != f3d.size(); ++i) { - R0 += SQ(previous_residual[idx + i]); - R1 += SQ(current_residual[idx + i]); - } - R0 = sqrt(R0 / f3d.size()); - R1 = sqrt(R1 / f3d.size()); - BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], R0, R1); - - auto i3d = mesh->ind2Dto3D(i2d, jz); - pseudo_residual[i3d] = R1; - - // Compare to neighbors - BoutReal min_neighboring_dt = max_timestep; - BoutReal mean_neighboring_dt = 0.0; - int neighbor_count = 0; - if (i3d.x() != 0) { - const BoutReal val = pseudo_timestep[i3d.xm()]; - min_neighboring_dt = std::min(min_neighboring_dt, val); - mean_neighboring_dt += val; - ++neighbor_count; - } - if (i3d.x() != mesh->LocalNx - 1) { - const BoutReal val = pseudo_timestep[i3d.xp()]; - min_neighboring_dt = std::min(min_neighboring_dt, val); - mean_neighboring_dt += val; - ++neighbor_count; - } - if (i3d.y() != 0) { - const BoutReal val = pseudo_timestep[i3d.ym()]; - min_neighboring_dt = std::min(min_neighboring_dt, val); - mean_neighboring_dt += val; - ++neighbor_count; - } - if (i3d.x() != mesh->LocalNy - 1) { - const BoutReal val = pseudo_timestep[i3d.yp()]; - min_neighboring_dt = std::min(min_neighboring_dt, val); - mean_neighboring_dt += val; - ++neighbor_count; - } - mean_neighboring_dt /= neighbor_count; - - // Smooth - //new_timestep = 0.1 * mean_neighboring_dt + 0.9 * new_timestep; - - // Limit ratio of timestep between neighboring cells - new_timestep = - std::min(new_timestep, pseudo_max_ratio * min_neighboring_dt); - - pseudo_timestep[i3d] = new_timestep; - - for (std::size_t i = 0; i != f3d.size(); ++i) { - dt_data[idx++] = new_timestep; - } - } - } - } - - // Restore Vec data arrays - PetscCall(VecRestoreArrayRead(previous_f, &previous_residual)); - PetscCall(VecRestoreArrayRead(snes_f, ¤t_residual)); - PetscCall(VecRestoreArray(dt_vec, &dt_data)); - - // Need timesteps on neighboring processors - // to limit ratio - mesh->communicate(pseudo_timestep); - // Neumann boundary so timestep isn't pinned at boundaries - pseudo_timestep.applyBoundary("neumann"); + PetscCall(updatePseudoTimestepping(snes_x)); if (pid_controller) { // Adjust pseudo_alpha based on nonlinear iterations @@ -1386,6 +1202,167 @@ int SNESSolver::run() { return 0; } +PetscErrorCode SNESSolver::initPseudoTimestepping() { + // Storage for per-variable timestep + PetscCall(VecDuplicate(snes_x, &dt_vec)); + // Starting timestep + PetscCall(VecSet(dt_vec, timestep)); + + // Diagnostic outputs + pseudo_timestep = timestep; + pseudo_residual = 0.0; + pseudo_residual_2d = 0.0; + + return PETSC_SUCCESS; +} + +PetscErrorCode SNESSolver::updatePseudoTimestepping(Vec x) { + // Call RHS function to get time derivatives + PetscCall(rhs_function(x, snes_f, false)); + + // Use a per-cell timestep so that e.g density and pressure + // evolve in a way consistent with the equation of state. + + // Reading the residual vectors + const BoutReal* current_residual = nullptr; + PetscCall(VecGetArrayRead(snes_f, ¤t_residual)); + // Modifying the dt_vec values + BoutReal* dt_data = nullptr; + PetscCall(VecGetArray(dt_vec, &dt_data)); + + // Note: The ordering of quantities in the PETSc vectors + // depends on the Solver::loop_vars function + Mesh* mesh = bout::globals::mesh; + int idx = 0; // Index into PETSc Vecs + + // Boundary cells + for (const auto& i2d : mesh->getRegion2D("RGN_BNDRY")) { + // Field2D quantities evolved together + int count = 0; + BoutReal residual = 0.0; + + for (const auto& f : f2d) { + if (!f.evolve_bndry) { + continue; // Not evolving boundary => Skip + } + residual += SQ(current_residual[idx + count]); + ++count; + } + if (count > 0) { + residual = sqrt(residual / count); + + // Adjust timestep for these quantities + BoutReal new_timestep = + updatePseudoTimestep(dt_data[idx], pseudo_residual_2d[i2d], residual); + for (int i = 0; i != count; ++i) { + dt_data[idx++] = new_timestep; + } + pseudo_residual_2d[i2d] = residual; + } + + // Field3D quantities evolved together within a cell + for (int jz = 0; jz < mesh->LocalNz; jz++) { + count = 0; + residual = 0.0; + for (const auto& f : f3d) { + if (!f.evolve_bndry) { + continue; // Not evolving boundary => Skip + } + residual += SQ(current_residual[idx + count]); + ++count; + } + if (count > 0) { + residual = sqrt(residual / count); + + auto i3d = mesh->ind2Dto3D(i2d, jz); + BoutReal new_timestep = + updatePseudoTimestep(dt_data[idx], pseudo_residual[i3d], residual); + + pseudo_residual[i3d] = residual; + pseudo_timestep[i3d] = new_timestep; + + for (int i = 0; i != count; ++i) { + dt_data[idx++] = new_timestep; + } + } + } + } + + // Bulk of domain. + // These loops don't check the boundary flags + for (const auto& i2d : mesh->getRegion2D("RGN_NOBNDRY")) { + // Field2D quantities evolved together + if (f2d.size() > 0) { + BoutReal residual = 0.0; + for (std::size_t i = 0; i != f2d.size(); ++i) { + residual += SQ(current_residual[idx + i]); + } + residual = sqrt(residual / f2d.size()); + + // Adjust timestep for these quantities + BoutReal new_timestep = + updatePseudoTimestep(dt_data[idx], pseudo_residual_2d[i2d], residual); + for (std::size_t i = 0; i != f2d.size(); ++i) { + dt_data[idx++] = new_timestep; + } + pseudo_residual_2d[i2d] = residual; + } + + // Field3D quantities evolved together within a cell + if (f3d.size() > 0) { + for (int jz = 0; jz < mesh->LocalNz; jz++) { + auto i3d = mesh->ind2Dto3D(i2d, jz); + + BoutReal residual = 0.0; + for (std::size_t i = 0; i != f3d.size(); ++i) { + residual += SQ(current_residual[idx + i]); + } + residual = sqrt(residual / f3d.size()); + BoutReal new_timestep = + updatePseudoTimestep(dt_data[idx], pseudo_residual[i3d], residual); + + pseudo_residual[i3d] = residual; + + // Compare to neighbors + BoutReal min_neighboring_dt = max_timestep; + if (i3d.x() != 0) { + min_neighboring_dt = std::min(min_neighboring_dt, pseudo_timestep[i3d.xm()]); + } + if (i3d.x() != mesh->LocalNx - 1) { + min_neighboring_dt = std::min(min_neighboring_dt, pseudo_timestep[i3d.xp()]); + } + if (i3d.y() != 0) { + min_neighboring_dt = std::min(min_neighboring_dt, pseudo_timestep[i3d.ym()]); + } + if (i3d.x() != mesh->LocalNy - 1) { + min_neighboring_dt = std::min(min_neighboring_dt, pseudo_timestep[i3d.yp()]); + } + + // Limit ratio of timestep between neighboring cells + new_timestep = std::min(new_timestep, pseudo_max_ratio * min_neighboring_dt); + + pseudo_timestep[i3d] = new_timestep; + + for (std::size_t i = 0; i != f3d.size(); ++i) { + dt_data[idx++] = new_timestep; + } + } + } + } + + // Restore Vec data arrays + PetscCall(VecRestoreArrayRead(snes_f, ¤t_residual)); + PetscCall(VecRestoreArray(dt_vec, &dt_data)); + + // Need timesteps on neighboring processors + // to limit ratio + mesh->communicate(pseudo_timestep); + // Neumann boundary so timestep isn't pinned at boundaries + pseudo_timestep.applyBoundary("neumann"); + + return PETSC_SUCCESS; +} + /// Timestep inversely proportional to the residual, clipped to avoid /// rapid changes in timestep. BoutReal SNESSolver::updatePseudoTimestep_inverse_residual(BoutReal previous_timestep, diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index a7ed032a8f..9086106ecd 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -136,8 +136,12 @@ private: BoutReal pseudo_reduction_factor; ///< Timestep decrease 0.5 BoutReal pseudo_max_ratio; ///< Maximum timestep ratio between neighboring cells Vec dt_vec; ///< Each quantity can have its own timestep - Vec previous_f; ///< Previous residual - /// Decide the next pseudo-timestep + + /// Initialize the Pseudo-Transient Continuation method + PetscErrorCode initPseudoTimestepping(); + /// Update dt_vec based on new solution x + PetscErrorCode updatePseudoTimestepping(Vec x); + /// Decide the next pseudo-timestep. Called by updatePseudoTimestepping BoutReal updatePseudoTimestep(BoutReal previous_timestep, BoutReal previous_residual, BoutReal current_residual); BoutReal updatePseudoTimestep_inverse_residual(BoutReal previous_timestep, @@ -146,6 +150,7 @@ private: BoutReal previous_residual, BoutReal current_residual); Field3D pseudo_residual; ///< Diagnostic output + Field2D pseudo_residual_2d; Field3D pseudo_timestep; ///< PID controller parameters From dcbdc5d13dcb4e44801a55d5ff6b27ce23679bf0 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Mon, 13 Oct 2025 19:45:56 -0700 Subject: [PATCH 07/11] snes: Refactoring Jacobian coloring and pruning Moving Jacobian calculations into separate functions. --- src/solver/impls/snes/snes.cxx | 728 +++++++++++++++++---------------- src/solver/impls/snes/snes.hxx | 11 + 2 files changed, 380 insertions(+), 359 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index 1840dd21c1..838da5fa5a 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -18,6 +18,7 @@ #include +#include "petscerror.h" #include "petscsnes.h" #include "petscsys.h" #include "petscvec.h" @@ -100,6 +101,371 @@ PetscErrorCode snesPCapply(PC pc, Vec x, Vec y) { } } // namespace +PetscErrorCode SNESSolver::FDJinitialise() { + if (use_coloring) { + // Use matrix coloring. + // This greatly reduces the number of times the rhs() function + // needs to be evaluated when calculating the Jacobian. + + // Use global mesh for now + Mesh* mesh = bout::globals::mesh; + + ////////////////////////////////////////////////// + // Get the local indices by starting at 0 + Field3D index = globalIndex(0); + + ////////////////////////////////////////////////// + // Pre-allocate PETSc storage + + output_progress.write("Setting Jacobian matrix sizes\n"); + + const int n2d = f2d.size(); + const int n3d = f3d.size(); + + // Set size of Matrix on each processor to nlocal x nlocal + MatCreate(BoutComm::get(), &Jfd); + MatSetOption(Jfd, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE); + MatSetSizes(Jfd, nlocal, nlocal, PETSC_DETERMINE, PETSC_DETERMINE); + MatSetFromOptions(Jfd); + // Determine which row/columns of the matrix are locally owned + int Istart, Iend; + MatGetOwnershipRange(Jfd, &Istart, &Iend); + // Convert local into global indices + // Note: Not in the boundary cells, to keep -1 values + for (const auto& i : mesh->getRegion3D("RGN_NOBNDRY")) { + index[i] += Istart; + } + // Now communicate to fill guard cells + mesh->communicate(index); + + // Non-zero elements on this processor + std::vector d_nnz; + std::vector o_nnz; + auto n_square = (*options)["stencil:square"] + .doc("Extent of stencil (square)") + .withDefault(0); + auto n_cross = + (*options)["stencil:cross"].doc("Extent of stencil (cross)").withDefault(0); + // Set n_taxi 2 if nothing else is set + auto n_taxi = (*options)["stencil:taxi"] + .doc("Extent of stencil (taxi-cab norm)") + .withDefault((n_square == 0 && n_cross == 0) ? 2 : 0); + + auto const xy_offsets = ColoringStencil::getOffsets(n_square, n_taxi, n_cross); + { + // This is ugly but can't think of a better and robust way to + // count the non-zeros for some arbitrary stencil + // effectively the same loop as the one that sets the non-zeros below + std::vector> d_nnz_map2d(nlocal); + std::vector> o_nnz_map2d(nlocal); + std::vector> d_nnz_map3d(nlocal); + std::vector> o_nnz_map3d(nlocal); + // Loop over every element in 2D to count the *unique* non-zeros + for (int x = mesh->xstart; x <= mesh->xend; x++) { + for (int y = mesh->ystart; y <= mesh->yend; y++) { + + const int ind0 = ROUND(index(x, y, 0)) - Istart; + + // 2D fields + for (int i = 0; i < n2d; i++) { + const PetscInt row = ind0 + i; + // Loop through each point in the stencil + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) + || (yi >= mesh->LocalNy)) { + continue; + } + + const int ind2 = ROUND(index(xi, yi, 0)); + if (ind2 < 0) { + continue; // A boundary point + } + + // Depends on all variables on this cell + for (int j = 0; j < n2d; j++) { + const PetscInt col = ind2 + j; + if (col >= Istart && col < Iend) { + d_nnz_map2d[row].insert(col); + } else { + o_nnz_map2d[row].insert(col); + } + } + } + } + // 3D fields + for (int z = 0; z < mesh->LocalNz; z++) { + const int ind = ROUND(index(x, y, z)) - Istart; + + for (int i = 0; i < n3d; i++) { + PetscInt row = ind + i; + if (z == 0) { + row += n2d; + } + + // Depends on 2D fields + for (int j = 0; j < n2d; j++) { + const PetscInt col = ind0 + j; + if (col >= Istart && col < Iend) { + d_nnz_map2d[row].insert(col); + } else { + o_nnz_map2d[row].insert(col); + } + } + + // Star pattern + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; + + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) + || (yi >= mesh->LocalNy)) { + continue; + } + + int ind2 = ROUND(index(xi, yi, 0)); + if (ind2 < 0) { + continue; // Boundary point + } + + if (z == 0) { + ind2 += n2d; + } + + // 3D fields on this cell + for (int j = 0; j < n3d; j++) { + const PetscInt col = ind2 + j; + if (col >= Istart && col < Iend) { + d_nnz_map3d[row].insert(col); + } else { + o_nnz_map3d[row].insert(col); + } + } + } + } + } + } + } + + d_nnz.reserve(nlocal); + d_nnz.reserve(nlocal); + + for (int i = 0; i < nlocal; ++i) { + // Assume all elements in the z direction are potentially coupled + d_nnz.emplace_back(d_nnz_map3d[i].size() * mesh->LocalNz + d_nnz_map2d[i].size()); + o_nnz.emplace_back(o_nnz_map3d[i].size() * mesh->LocalNz + o_nnz_map2d[i].size()); + } + } + + output_progress.write("Pre-allocating Jacobian\n"); + // Pre-allocate + MatMPIAIJSetPreallocation(Jfd, 0, d_nnz.data(), 0, o_nnz.data()); + MatSeqAIJSetPreallocation(Jfd, 0, d_nnz.data()); + MatSetUp(Jfd); + MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE); + + ////////////////////////////////////////////////// + // Mark non-zero entries + + output_progress.write("Marking non-zero Jacobian entries\n"); + PetscScalar val = 1.0; + for (int x = mesh->xstart; x <= mesh->xend; x++) { + for (int y = mesh->ystart; y <= mesh->yend; y++) { + + const int ind0 = ROUND(index(x, y, 0)); + + // 2D fields + for (int i = 0; i < n2d; i++) { + const PetscInt row = ind0 + i; + + // Loop through each point in the stencil + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) || (yi >= mesh->LocalNy)) { + continue; + } + + int ind2 = ROUND(index(xi, yi, 0)); + if (ind2 < 0) { + continue; // A boundary point + } + + // Depends on all variables on this cell + for (int j = 0; j < n2d; j++) { + PetscInt col = ind2 + j; + PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); + } + } + } + // 3D fields + for (int z = 0; z < mesh->LocalNz; z++) { + int ind = ROUND(index(x, y, z)); + + for (int i = 0; i < n3d; i++) { + PetscInt row = ind + i; + if (z == 0) { + row += n2d; + } + + // Depends on 2D fields + for (int j = 0; j < n2d; j++) { + PetscInt col = ind0 + j; + PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); + } + + // Star pattern + for (const auto& [x_off, y_off] : xy_offsets) { + int xi = x + x_off; + int yi = y + y_off; + + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) + || (yi >= mesh->LocalNy)) { + continue; + } + for (int zi = 0; zi < mesh->LocalNz; ++zi) { + int ind2 = ROUND(index(xi, yi, zi)); + if (ind2 < 0) { + continue; // Boundary point + } + + if (z == 0) { + ind2 += n2d; + } + + // 3D fields on this cell + for (int j = 0; j < n3d; j++) { + PetscInt col = ind2 + j; + PetscErrorCode ierr = + MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); + + if (ierr != PETSC_SUCCESS) { + output.write("ERROR: {} {} : ({}, {}) -> ({}, {}) : {} -> {}\n", row, + x, y, xi, yi, ind2, ind2 + n3d - 1); + } + CHKERRQ(ierr); + } + } + } + } + } + } + } + + // Finished marking non-zero entries + + output_progress.write("Assembling Jacobian matrix\n"); + + // Assemble Matrix + MatAssemblyBegin(Jfd, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(Jfd, MAT_FINAL_ASSEMBLY); + + { + // Test if the matrix is symmetric + // Values are 0 or 1 so tolerance (1e-5) shouldn't matter + PetscBool symmetric; + PetscCall(MatIsSymmetric(Jfd, 1e-5, &symmetric)); + if (!symmetric) { + output_warn.write("Jacobian pattern is not symmetric\n"); + } + } + + // The above can miss entries around the X-point branch cut: + // The diagonal terms are complicated because moving in X then Y + // is different from moving in Y then X at the X-point. + // Making sure the colouring matrix is symmetric does not + // necessarily give the correct stencil but may help. + if ((*options)["force_symmetric_coloring"] + .doc("Modifies coloring matrix to force it to be symmetric") + .withDefault(false)) { + Mat Jfd_T; + MatCreateTranspose(Jfd, &Jfd_T); + MatAXPY(Jfd, 1, Jfd_T, DIFFERENT_NONZERO_PATTERN); + } + + output_progress.write("Creating Jacobian coloring\n"); + updateColoring(); + + if (prune_jacobian) { + // Will remove small elements from the Jacobian. + // Save a copy to recover from over-pruning + PetscCall(MatDuplicate(Jfd, MAT_SHARE_NONZERO_PATTERN, &Jfd_original)); + } + } else { + // Brute force calculation + // There is usually no reason to use this, except as a check of + // the coloring calculation. + + MatCreateAIJ( + BoutComm::get(), nlocal, nlocal, // Local sizes + PETSC_DETERMINE, PETSC_DETERMINE, // Global sizes + 3, // Number of nonzero entries in diagonal portion of local submatrix + nullptr, + 0, // Number of nonzeros per row in off-diagonal portion of local submatrix + nullptr, &Jfd); + + if (matrix_free_operator) { + SNESSetJacobian(snes, Jmf, Jfd, SNESComputeJacobianDefault, this); + } else { + SNESSetJacobian(snes, Jfd, Jfd, SNESComputeJacobianDefault, this); + } + + MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); + } +} + +PetscErrorCode SNESSolver::FDJpruneJacobian() { +#if PETSC_VERSION_GE(3, 20, 0) + + // Remove small elements from the Jacobian and recompute the coloring + // Only do this if there are a significant number of small elements. + int small_elements = 0; + int total_elements = 0; + + // Get index of rows owned by this processor + int rstart, rend; + MatGetOwnershipRange(Jfd, &rstart, &rend); + + PetscInt ncols; + const PetscScalar* vals; + for (int row = rstart; row < rend; row++) { + MatGetRow(Jfd, row, &ncols, nullptr, &vals); + for (int col = 0; col < ncols; col++) { + if (std::abs(vals[col]) < prune_abstol) { + ++small_elements; + } + ++total_elements; + } + MatRestoreRow(Jfd, row, &ncols, nullptr, &vals); + } + + if (small_elements > prune_fraction * total_elements) { + if (diagnose) { + output.write("\nPruning Jacobian elements: {} / {}\n", small_elements, + total_elements); + } + + // Prune Jacobian, keeping diagonal elements + PetscCall(MatFilter(Jfd, prune_abstol, PETSC_TRUE, PETSC_TRUE)); + + // Update the coloring from Jfd matrix + updateColoring(); + + // Mark the Jacobian as pruned. This is so that it is only restored if pruned. + jacobian_pruned = true; + } +#endif // PETSC_VERSION_GE(3,20,0) +} + +PetscErrorCode SNESSolver::FDJrestoreFromPruning() { + // Restore pruned non-zero elements + PetscCall(MatCopy(Jfd_original, Jfd, DIFFERENT_NONZERO_PATTERN)); + // The non-zero pattern has changed, so update coloring + updateColoring(); + jacobian_pruned = false; // Reset flag. Will be set after pruning. +} + SNESSolver::SNESSolver(Options* opts) : Solver(opts), timestep( @@ -338,321 +704,7 @@ int SNESSolver::init() { // because that would require updating the preconditioner. PetscCall(VecDuplicate(snes_x, &output_x)); - if (use_coloring) { - // Use matrix coloring. - // This greatly reduces the number of times the rhs() function - // needs to be evaluated when calculating the Jacobian. - - // Use global mesh for now - Mesh* mesh = bout::globals::mesh; - - ////////////////////////////////////////////////// - // Get the local indices by starting at 0 - Field3D index = globalIndex(0); - - ////////////////////////////////////////////////// - // Pre-allocate PETSc storage - - output_progress.write("Setting Jacobian matrix sizes\n"); - - const int n2d = f2d.size(); - const int n3d = f3d.size(); - - // Set size of Matrix on each processor to nlocal x nlocal - MatCreate(BoutComm::get(), &Jfd); - MatSetOption(Jfd, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE); - MatSetSizes(Jfd, nlocal, nlocal, PETSC_DETERMINE, PETSC_DETERMINE); - MatSetFromOptions(Jfd); - // Determine which row/columns of the matrix are locally owned - int Istart, Iend; - MatGetOwnershipRange(Jfd, &Istart, &Iend); - // Convert local into global indices - // Note: Not in the boundary cells, to keep -1 values - for (const auto& i : mesh->getRegion3D("RGN_NOBNDRY")) { - index[i] += Istart; - } - // Now communicate to fill guard cells - mesh->communicate(index); - - // Non-zero elements on this processor - std::vector d_nnz; - std::vector o_nnz; - auto n_square = (*options)["stencil:square"] - .doc("Extent of stencil (square)") - .withDefault(0); - auto n_cross = (*options)["stencil:cross"] - .doc("Extent of stencil (cross)") - .withDefault(0); - // Set n_taxi 2 if nothing else is set - auto n_taxi = (*options)["stencil:taxi"] - .doc("Extent of stencil (taxi-cab norm)") - .withDefault((n_square == 0 && n_cross == 0) ? 2 : 0); - - auto const xy_offsets = ColoringStencil::getOffsets(n_square, n_taxi, n_cross); - { - // This is ugly but can't think of a better and robust way to - // count the non-zeros for some arbitrary stencil - // effectively the same loop as the one that sets the non-zeros below - std::vector> d_nnz_map2d(nlocal); - std::vector> o_nnz_map2d(nlocal); - std::vector> d_nnz_map3d(nlocal); - std::vector> o_nnz_map3d(nlocal); - // Loop over every element in 2D to count the *unique* non-zeros - for (int x = mesh->xstart; x <= mesh->xend; x++) { - for (int y = mesh->ystart; y <= mesh->yend; y++) { - - const int ind0 = ROUND(index(x, y, 0)) - Istart; - - // 2D fields - for (int i = 0; i < n2d; i++) { - const PetscInt row = ind0 + i; - // Loop through each point in the stencil - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - - const int ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // A boundary point - } - - // Depends on all variables on this cell - for (int j = 0; j < n2d; j++) { - const PetscInt col = ind2 + j; - if (col >= Istart && col < Iend) { - d_nnz_map2d[row].insert(col); - } else { - o_nnz_map2d[row].insert(col); - } - } - } - } - // 3D fields - for (int z = 0; z < mesh->LocalNz; z++) { - const int ind = ROUND(index(x, y, z)) - Istart; - - for (int i = 0; i < n3d; i++) { - PetscInt row = ind + i; - if (z == 0) { - row += n2d; - } - - // Depends on 2D fields - for (int j = 0; j < n2d; j++) { - const PetscInt col = ind0 + j; - if (col >= Istart && col < Iend) { - d_nnz_map2d[row].insert(col); - } else { - o_nnz_map2d[row].insert(col); - } - } - - // Star pattern - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - - int ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // Boundary point - } - - if (z == 0) { - ind2 += n2d; - } - - // 3D fields on this cell - for (int j = 0; j < n3d; j++) { - const PetscInt col = ind2 + j; - if (col >= Istart && col < Iend) { - d_nnz_map3d[row].insert(col); - } else { - o_nnz_map3d[row].insert(col); - } - } - } - } - } - } - } - - d_nnz.reserve(nlocal); - d_nnz.reserve(nlocal); - - for (int i = 0; i < nlocal; ++i) { - // Assume all elements in the z direction are potentially coupled - d_nnz.emplace_back(d_nnz_map3d[i].size() * mesh->LocalNz - + d_nnz_map2d[i].size()); - o_nnz.emplace_back(o_nnz_map3d[i].size() * mesh->LocalNz - + o_nnz_map2d[i].size()); - } - } - - output_progress.write("Pre-allocating Jacobian\n"); - // Pre-allocate - MatMPIAIJSetPreallocation(Jfd, 0, d_nnz.data(), 0, o_nnz.data()); - MatSeqAIJSetPreallocation(Jfd, 0, d_nnz.data()); - MatSetUp(Jfd); - MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE); - - ////////////////////////////////////////////////// - // Mark non-zero entries - - output_progress.write("Marking non-zero Jacobian entries\n"); - PetscScalar val = 1.0; - for (int x = mesh->xstart; x <= mesh->xend; x++) { - for (int y = mesh->ystart; y <= mesh->yend; y++) { - - const int ind0 = ROUND(index(x, y, 0)); - - // 2D fields - for (int i = 0; i < n2d; i++) { - const PetscInt row = ind0 + i; - - // Loop through each point in the stencil - for (const auto& [x_off, y_off] : xy_offsets) { - const int xi = x + x_off; - const int yi = y + y_off; - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - - int ind2 = ROUND(index(xi, yi, 0)); - if (ind2 < 0) { - continue; // A boundary point - } - - // Depends on all variables on this cell - for (int j = 0; j < n2d; j++) { - PetscInt col = ind2 + j; - PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); - } - } - } - // 3D fields - for (int z = 0; z < mesh->LocalNz; z++) { - int ind = ROUND(index(x, y, z)); - - for (int i = 0; i < n3d; i++) { - PetscInt row = ind + i; - if (z == 0) { - row += n2d; - } - - // Depends on 2D fields - for (int j = 0; j < n2d; j++) { - PetscInt col = ind0 + j; - PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); - } - - // Star pattern - for (const auto& [x_off, y_off] : xy_offsets) { - int xi = x + x_off; - int yi = y + y_off; - - if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) - || (yi >= mesh->LocalNy)) { - continue; - } - for (int zi = 0; zi < mesh->LocalNz; ++zi) { - int ind2 = ROUND(index(xi, yi, zi)); - if (ind2 < 0) { - continue; // Boundary point - } - - if (z == 0) { - ind2 += n2d; - } - - // 3D fields on this cell - for (int j = 0; j < n3d; j++) { - PetscInt col = ind2 + j; - PetscErrorCode ierr = - MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - - if (ierr != PETSC_SUCCESS) { - output.write("ERROR: {} {} : ({}, {}) -> ({}, {}) : {} -> {}\n", - row, x, y, xi, yi, ind2, ind2 + n3d - 1); - } - CHKERRQ(ierr); - } - } - } - } - } - } - } - - // Finished marking non-zero entries - - output_progress.write("Assembling Jacobian matrix\n"); - - // Assemble Matrix - MatAssemblyBegin(Jfd, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(Jfd, MAT_FINAL_ASSEMBLY); - - { - // Test if the matrix is symmetric - // Values are 0 or 1 so tolerance (1e-5) shouldn't matter - PetscBool symmetric; - PetscCall(MatIsSymmetric(Jfd, 1e-5, &symmetric)); - if (!symmetric) { - output_warn.write("Jacobian pattern is not symmetric\n"); - } - } - - // The above can miss entries around the X-point branch cut: - // The diagonal terms are complicated because moving in X then Y - // is different from moving in Y then X at the X-point. - // Making sure the colouring matrix is symmetric does not - // necessarily give the correct stencil but may help. - if ((*options)["force_symmetric_coloring"] - .doc("Modifies coloring matrix to force it to be symmetric") - .withDefault(false)) { - Mat Jfd_T; - MatCreateTranspose(Jfd, &Jfd_T); - MatAXPY(Jfd, 1, Jfd_T, DIFFERENT_NONZERO_PATTERN); - } - - output_progress.write("Creating Jacobian coloring\n"); - updateColoring(); - - if (prune_jacobian) { - // Will remove small elements from the Jacobian. - // Save a copy to recover from over-pruning - PetscCall(MatDuplicate(Jfd, MAT_SHARE_NONZERO_PATTERN, &Jfd_original)); - } - } else { - // Brute force calculation - // There is usually no reason to use this, except as a check of - // the coloring calculation. - - MatCreateAIJ( - BoutComm::get(), nlocal, nlocal, // Local sizes - PETSC_DETERMINE, PETSC_DETERMINE, // Global sizes - 3, // Number of nonzero entries in diagonal portion of local submatrix - nullptr, - 0, // Number of nonzeros per row in off-diagonal portion of local submatrix - nullptr, &Jfd); - - if (matrix_free_operator) { - SNESSetJacobian(snes, Jmf, Jfd, SNESComputeJacobianDefault, this); - } else { - SNESSetJacobian(snes, Jfd, Jfd, SNESComputeJacobianDefault, this); - } - - MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); - } + FDJinitialise(); // Re-use Jacobian // Note: If the 'Amat' Jacobian is matrix free, SNESComputeJacobian @@ -954,14 +1006,10 @@ int SNESSolver::run() { if (jacobian_pruned and (snes_failures > 2) and (4 * lin_its > 3 * maxl)) { // Taking 3/4 of maximum linear iterations on average per linear step // May indicate a preconditioner problem. - // Restore pruned non-zero elements if (diagnose) { output.write("\nRestoring Jacobian\n"); } - PetscCall(MatCopy(Jfd_original, Jfd, DIFFERENT_NONZERO_PATTERN)); - // The non-zero pattern has changed, so update coloring - updateColoring(); - jacobian_pruned = false; // Reset flag. Will be set after pruning. + PetscCall(FDJrestoreFromPruning()); } if (saved_jacobian_lag == 0) { @@ -1045,50 +1093,12 @@ int SNESSolver::run() { output.write("\n"); } -#if PETSC_VERSION_GE(3, 20, 0) // MatFilter and MatEliminateZeros(Mat, bool) require PETSc >= 3.20 if (jacobian_recalculated and prune_jacobian) { jacobian_recalculated = false; // Reset flag - // Remove small elements from the Jacobian and recompute the coloring - // Only do this if there are a significant number of small elements. - int small_elements = 0; - int total_elements = 0; - - // Get index of rows owned by this processor - int rstart, rend; - MatGetOwnershipRange(Jfd, &rstart, &rend); - - PetscInt ncols; - const PetscScalar* vals; - for (int row = rstart; row < rend; row++) { - MatGetRow(Jfd, row, &ncols, nullptr, &vals); - for (int col = 0; col < ncols; col++) { - if (std::abs(vals[col]) < prune_abstol) { - ++small_elements; - } - ++total_elements; - } - MatRestoreRow(Jfd, row, &ncols, nullptr, &vals); - } - - if (small_elements > prune_fraction * total_elements) { - if (diagnose) { - output.write("\nPruning Jacobian elements: {} / {}\n", small_elements, - total_elements); - } - - // Prune Jacobian, keeping diagonal elements - PetscCall(MatFilter(Jfd, prune_abstol, PETSC_TRUE, PETSC_TRUE)); - - // Update the coloring from Jfd matrix - updateColoring(); - - // Mark the Jacobian as pruned. This is so that it is only restored if pruned. - jacobian_pruned = true; - } + FDJpruneJacobian(); } -#endif // PETSC_VERSION_GE(3,20,0) if (equation_form == BoutSnesEquationForm::pseudo_transient) { // Adjust local timesteps diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index 9086106ecd..98e96d3e4e 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -58,6 +58,13 @@ BOUT_ENUM_CLASS(BoutPTCStrategy, history_based, ///< Grow/shrink dt based on residual decrease/increase hybrid); ///< Combine inverse_residual and history_based strategies +// class FiniteDifferenceJacobian { +// public: +// PetscError initialise(); +// private: + +// }; + /// Uses PETSc's SNES interface to find a steady state solution to a /// nonlinear ODE by integrating in time with Backward Euler class SNESSolver : public Solver { @@ -100,6 +107,10 @@ public: void outputVars(Options& output_options, bool save_repeat = true) override; private: + PetscErrorCode FDJinitialise(); + PetscErrorCode FDJpruneJacobian(); + PetscErrorCode FDJrestoreFromPruning(); + /// Call the physics model RHS function /// /// @param[in] x The state vector. Will be scaled if scale_vars=true From 48c0b180f677183aaeb8ffa42e5a063b1a72c0b3 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Tue, 14 Oct 2025 11:03:06 -0700 Subject: [PATCH 08/11] snes: Expand manual and minor tidying Add manual sections on adaptive timestepping and PTC method. Minor tidying to address some Clang-Tidy comments. --- manual/sphinx/user_docs/time_integration.rst | 230 +++++++++++++++++-- src/solver/impls/snes/snes.cxx | 7 +- src/solver/impls/snes/snes.hxx | 8 +- 3 files changed, 214 insertions(+), 31 deletions(-) diff --git a/manual/sphinx/user_docs/time_integration.rst b/manual/sphinx/user_docs/time_integration.rst index 373b5cdbe6..7658602bc8 100644 --- a/manual/sphinx/user_docs/time_integration.rst +++ b/manual/sphinx/user_docs/time_integration.rst @@ -390,7 +390,7 @@ The SNES solver is configured through the ``[solver]`` section of the input file pc_type = ilu # Preconditioner: ilu, bjacobi, hypre, etc. Timestepping Modes ------------------- +~~~~~~~~~~~~~~~~~~ The solver supports several timestepping strategies controlled by ``equation_form``: @@ -425,6 +425,183 @@ The solver supports several timestepping strategies controlled by ``equation_for This uses the same form as rearranged_backward_euler, but the time step can be different for each cell. +Adaptive Timestepping +~~~~~~~~~~~~~~~~~~~~~ + +When ``equation_form = rearranged_backward_euler`` (default), the +solver uses global timestepping with adaptive timestep control based +on nonlinear iteration count. + +.. code-block:: ini + + [solver] + type = snes + equation_form = rearranged_backward_euler + + # Initial and maximum timesteps + timestep = 1.0 # Initial timestep + max_timestep = 1e10 # Upper limit on timestep + dt_min_reset = 1e-6 # Reset the solver when timestep < this + + # Timestep adaptation + lower_its = 3 # Increase dt if iterations < this + upper_its = 10 # Decrease dt if iterations > this + timestep_factor_on_lower_its = 1.4 # Growth factor + timestep_factor_on_upper_its = 0.9 # Reduction factor + timestep_factor_on_failure = 0.5 # Reduction on convergence failure + +PID Controller +^^^^^^^^^^^^^^ + +An alternative adaptive strategy using a PID controller: + +.. code-block:: ini + + [solver] + pid_controller = true + target_its = 7 # Target number of nonlinear iterations + kP = 0.7 # Proportional gain + kI = 0.3 # Integral gain + kD = 0.2 # Derivative gain + +The PID controller adjusts the timestep to maintain approximately ``target_its`` +nonlinear iterations per solve, providing smoother adaptation than threshold-based +methods. + +Pseudo-Transient Continuation and Switched Evolution Relaxation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When ``equation_form = pseudo_transient`` the solver uses +Pseudo-Transient Continuation (PTC). This is a robust numerical +technique for solving steady-state problems that are too nonlinear for +direct Newton iteration. Instead of solving the steady-state system +**F(u) = 0** directly, PTC solves a modified time-dependent problem: + +.. math:: + + M(u) \frac{\partial u}{\partial \tau} + F(u) = 0 + +where :math:`\tau` is a pseudo-time variable (not physical time) and :math:`M(u)` +is a preconditioning matrix. As :math:`\tau \to \infty`, the solution converges +to the steady state **F(u) = 0**. + +The key advantage of PTC is that it transforms a difficult root-finding problem +into a sequence of easier initial value problems. Poor initial guesses that would +cause Newton's method to diverge can still reach the solution via a stable +pseudo-transient path. + +The Switched Evolution Relaxation (SER) method is a spatially adaptive +variant of PTC that allows each cell to use a different +pseudo-timestep :math:`\Delta\tau_i`. The timestep in each cell adapts +based on the local residual, allowing the algorithm to take large +timesteps in well-behaved regions (fast convergence), while taking +small timesteps in difficult regions (stable advancement). The the +same :math:`\Delta\tau_i` is used for all equations (density, +momentum, energy etc.) within each cell. This maintains coupling +between temperature, pressure, and composition through the equation of +state. + +**Key parameters:** + +``pseudo_max_ratio`` (default: 2.0) + Maximum allowed ratio of timesteps between neighboring cells. This prevents + sharp spatial gradients in convergence rate. + +**Example PTC configuration:** + +.. code-block:: ini + + [solver] + type = snes + equation_form = pseudo_transient + + timestep = 1.0 # Initial timestep + + # SER parameters + pid_controller = true # Scale timesteps based on iterations + pseudo_max_ratio = 2.0 # Limit neighbor timestep ratio + + # Tolerances + atol = 1e-7 + rtol = 1e-6 + stol = 1e-12 + +SER timestep strategy +^^^^^^^^^^^^^^^^^^^^^ + +After each nonlinear solve the timesteps in each cell are adjusted. +The strategy used depends on the ``pseudo_strategy`` option: + +**inverse_residual** (default) + +If ``pseudo_strategy = inverse_residual`` then the timestep is inversely +proportional to the RMS residual in each cell. +``pseudo_alpha`` (default: 100 × atol × timestep) +Controls the relationship between residual and timestep. The local timestep +is computed as: + +.. math:: + + \Delta\tau_i = \frac{\alpha}{||R_i||} + +Larger values allow more aggressive timestepping. The default is to use +a fixed ``pseudo_alpha`` but a better strategy is to enable the PID controller +that adjusts this parameter based on the nonlinear solver convergence. + +The timestep is limited to be between ``dt_min_reset`` and +``max_timestep``. In addition the timestep is limited between 0.67 × +previous timestep and 1.5 × previous timestep, to limit sudden changes +in timestep. + +In practice this strategy seems to work well, though problems could +arise when residuals become very small. + +**history_based** + +When ``pseudo_strategy = history_based`` the history of residuals +within each cell is used to adjust the timestep. The key parameters +are: + +``pseudo_growth_factor`` (default: 1.1) + Factor by which timestep increases when residual decreases successfully. + +``pseudo_reduction_factor`` (default: 0.5) + Factor by which timestep decreases when residual increases (step rejected). + +This method may be less susceptible to fluctuations when residuals +become small, but tends to be slower to converge when residuals are +large. + +**hybrid** + +When ``pseudo_strategy = hybrid`` the ``inverse_residual`` and +``history_based`` strategies are combined: When the residuals are +large the ``inverse_residual`` method is used, and when residuals +become small the method switches to ``history_based``. + +PID Controller +^^^^^^^^^^^^^^ + +When using the PTC method the PID controller can be used to dynamically +adjust ``pseudo_alpha`` depending on the nonlinearity of the system: + +.. code-block:: ini + + [solver] + pid_controller = true + target_its = 7 # Target number of nonlinear iterations + kP = 0.7 # Proportional gain + kI = 0.3 # Integral gain + kD = 0.2 # Derivative gain + +The PID controller adjusts ``pseudo_alpha``, scaling all cell +timesteps together, to maintain approximately ``target_its`` nonlinear +iterations per solve. + +With this enabled the solver uses the number of nonlinear iterations +to scale timesteps globally, and residuals to scale timesteps locally. +Note that the PID controller has no effect on the ``history_based`` +strategy because that strategy does not use ``pseudo_alpha``. Jacobian Finite Difference with Coloring ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -435,7 +612,7 @@ The default and recommended approach for most problems: [solver] use_coloring = true # Enable (default) - lag_jacobian = 50 # Reuse Jacobian for this many iterations + lag_jacobian = 5 # Reuse Jacobian for this many iterations # Stencil shape (determines Jacobian sparsity pattern) stencil:taxi = 2 # Taxi-cab distance (default) @@ -449,7 +626,17 @@ Jacobian coloring stencil ^^^^^^^^^^^^^^^^^^^^^^^^^ The stencil used to create the Jacobian colouring can be varied, -depending on which numerical operators are in use. +depending on which numerical operators are in use. It is important to +note that the coloring won't work for every problem: It assumes that +each evolving quantity is coupled to all other evolving quantities on +the same grid cell, and on all the neighbouring grid cells. If the RHS +function includes Fourier transforms, or matrix inversions +(e.g. potential solves) then these will introduce longer-range +coupling and the Jacobian calculation will give spurious +results. Generally the method will then fail to converge. Two +solutions are to a) switch to matrix-free (``matrix_free=true``), +or b) solve the matrix inversion as a constraint. + ``solver:stencil:cross = N`` e.g. for N == 2 @@ -489,8 +676,26 @@ Setting ``solver:force_symmetric_coloring = true``, will make sure that the jacobian colouring matrix is symmetric. This will often include a few extra non-zeros that the stencil will miss otherwise +Diagnostics and Monitoring +--------------------------- + +.. code-block:: ini + + [solver] + diagnose = true # Print iteration info to screen + diagnose_failures = true # Detailed diagnostics on failures + +When ``equation_form = pseudo_transient``, the solver saves additional diagnostic fields: + +- ``snes_pseudo_residual``: Local residual in each cell +- ``snes_pseudo_timestep``: Local pseudo-timestep in each cell +- ``snes_pseudo_alpha``: Global timestep scaling +These can be visualized to understand convergence behavior and identify +problematic regions. +Summary of solver options +~~~~~~~~~~~~~~~~~~~~~~~~~ +---------------------------+---------------+----------------------------------------------------+ | Option | Default |Description | @@ -552,22 +757,11 @@ include a few extra non-zeros that the stencil will miss otherwise The predictor is linear extrapolation from the last two timesteps. It seems to be effective, but can be disabled by setting ``predictor = false``. - - The default `newtonls` SNES type can be very effective if combined with Jacobian coloring: The coloring enables the Jacobian to be calculated relatively efficiently; once a Jacobian matrix has been calculated, effective preconditioners can be used to speed up -convergence. It is important to note that the coloring assumes a star -stencil and so won't work for every problem: It assumes that each -evolving quantity is coupled to all other evolving quantities on the -same grid cell, and on all the neighbouring grid cells. If the RHS -function includes Fourier transforms, or matrix inversions -(e.g. potential solves) then these will introduce longer-range -coupling and the Jacobian calculation will give spurious -results. Generally the method will then fail to converge. Two -solutions are to a) switch to matrix-free (``matrix_free=true``), or b) -solve the matrix inversion as a constraint. +convergence. The `SNES type `_ @@ -587,12 +781,6 @@ Preconditioner types: Enable with command-line args ``-pc_type hypre -pc_hypre_type euclid -pc_hypre_euclid_levels k`` where ``k`` is the level (1-8 typically). -Pseudo-Transient Continuation (PTC) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Saves diagnostic ``Field3D`` output variables ``snes_pseudo_residual`` and ``snes_pseudo_timestep``. ODE integration diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index 838da5fa5a..1207fcee1b 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -19,6 +19,7 @@ #include #include "petscerror.h" +#include "petscpc.h" #include "petscsnes.h" #include "petscsys.h" #include "petscvec.h" @@ -1435,11 +1436,11 @@ BoutReal SNESSolver::updatePseudoTimestep(BoutReal previous_timestep, // and then the method transitions to being history-based if (current_residual > 1000. * atol) { return updatePseudoTimestep_inverse_residual(previous_timestep, current_residual); - } else { - return updatePseudoTimestep_history_based(previous_timestep, previous_residual, - current_residual); } + return updatePseudoTimestep_history_based(previous_timestep, previous_residual, + current_residual); }; + throw BoutException("SNESSolver::updatePseudoTimestep invalid BoutPTCStrategy"); } PetscErrorCode SNESSolver::rhs_function(Vec x, Vec f, bool linear) { diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index 98e96d3e4e..c37cc0b37a 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -39,6 +39,7 @@ class SNESSolver; #include #include +#include #include #include @@ -58,13 +59,6 @@ BOUT_ENUM_CLASS(BoutPTCStrategy, history_based, ///< Grow/shrink dt based on residual decrease/increase hybrid); ///< Combine inverse_residual and history_based strategies -// class FiniteDifferenceJacobian { -// public: -// PetscError initialise(); -// private: - -// }; - /// Uses PETSc's SNES interface to find a steady state solution to a /// nonlinear ODE by integrating in time with Backward Euler class SNESSolver : public Solver { From ca04bc81b374cbdd5a91f2e0a7843b9322a2a1f6 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Tue, 14 Oct 2025 15:17:58 -0700 Subject: [PATCH 09/11] snes: Add missing returns, clang tidying --- src/solver/impls/snes/snes.cxx | 32 ++++++++++++++++++++------------ src/solver/impls/snes/snes.hxx | 6 +++--- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index 1207fcee1b..d349193ee4 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -14,14 +14,18 @@ #include #include +#include +#include #include #include #include "petscerror.h" +#include "petscmat.h" #include "petscpc.h" #include "petscsnes.h" #include "petscsys.h" +#include "petscsystypes.h" #include "petscvec.h" class ColoringStencil { @@ -270,7 +274,7 @@ PetscErrorCode SNESSolver::FDJinitialise() { // Mark non-zero entries output_progress.write("Marking non-zero Jacobian entries\n"); - PetscScalar val = 1.0; + const PetscScalar val = 1.0; for (int x = mesh->xstart; x <= mesh->xend; x++) { for (int y = mesh->ystart; y <= mesh->yend; y++) { @@ -288,14 +292,14 @@ PetscErrorCode SNESSolver::FDJinitialise() { continue; } - int ind2 = ROUND(index(xi, yi, 0)); + const int ind2 = ROUND(index(xi, yi, 0)); if (ind2 < 0) { continue; // A boundary point } // Depends on all variables on this cell for (int j = 0; j < n2d; j++) { - PetscInt col = ind2 + j; + const PetscInt col = ind2 + j; PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); } } @@ -312,7 +316,7 @@ PetscErrorCode SNESSolver::FDJinitialise() { // Depends on 2D fields for (int j = 0; j < n2d; j++) { - PetscInt col = ind0 + j; + const PetscInt col = ind0 + j; PetscCall(MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES)); } @@ -337,7 +341,7 @@ PetscErrorCode SNESSolver::FDJinitialise() { // 3D fields on this cell for (int j = 0; j < n3d; j++) { - PetscInt col = ind2 + j; + const PetscInt col = ind2 + j; PetscErrorCode ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); @@ -414,6 +418,7 @@ PetscErrorCode SNESSolver::FDJinitialise() { MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); } + return PETSC_SUCCESS; } PetscErrorCode SNESSolver::FDJpruneJacobian() { @@ -457,6 +462,7 @@ PetscErrorCode SNESSolver::FDJpruneJacobian() { jacobian_pruned = true; } #endif // PETSC_VERSION_GE(3,20,0) + return PETSC_SUCCESS; } PetscErrorCode SNESSolver::FDJrestoreFromPruning() { @@ -465,6 +471,7 @@ PetscErrorCode SNESSolver::FDJrestoreFromPruning() { // The non-zero pattern has changed, so update coloring updateColoring(); jacobian_pruned = false; // Reset flag. Will be set after pruning. + return PETSC_SUCCESS; } SNESSolver::SNESSolver(Options* opts) @@ -705,7 +712,8 @@ int SNESSolver::init() { // because that would require updating the preconditioner. PetscCall(VecDuplicate(snes_x, &output_x)); - FDJinitialise(); + // Initialize the Finite Difference Jacobian + PetscCall(FDJinitialise()); // Re-use Jacobian // Note: If the 'Amat' Jacobian is matrix free, SNESComputeJacobian @@ -1286,7 +1294,7 @@ PetscErrorCode SNESSolver::updatePseudoTimestepping(Vec x) { residual = sqrt(residual / count); auto i3d = mesh->ind2Dto3D(i2d, jz); - BoutReal new_timestep = + const BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], pseudo_residual[i3d], residual); pseudo_residual[i3d] = residual; @@ -1303,7 +1311,7 @@ PetscErrorCode SNESSolver::updatePseudoTimestepping(Vec x) { // These loops don't check the boundary flags for (const auto& i2d : mesh->getRegion2D("RGN_NOBNDRY")) { // Field2D quantities evolved together - if (f2d.size() > 0) { + if (!f2d.empty()) { BoutReal residual = 0.0; for (std::size_t i = 0; i != f2d.size(); ++i) { residual += SQ(current_residual[idx + i]); @@ -1311,7 +1319,7 @@ PetscErrorCode SNESSolver::updatePseudoTimestepping(Vec x) { residual = sqrt(residual / f2d.size()); // Adjust timestep for these quantities - BoutReal new_timestep = + const BoutReal new_timestep = updatePseudoTimestep(dt_data[idx], pseudo_residual_2d[i2d], residual); for (std::size_t i = 0; i != f2d.size(); ++i) { dt_data[idx++] = new_timestep; @@ -1320,7 +1328,7 @@ PetscErrorCode SNESSolver::updatePseudoTimestepping(Vec x) { } // Field3D quantities evolved together within a cell - if (f3d.size() > 0) { + if (!f3d.empty()) { for (int jz = 0; jz < mesh->LocalNz; jz++) { auto i3d = mesh->ind2Dto3D(i2d, jz); @@ -1530,7 +1538,7 @@ PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) { PetscCall(VecPointwiseMult(f, f, rhs_scaling_factors)); } - return 0; + return PETSC_SUCCESS; } /* @@ -1572,7 +1580,7 @@ PetscErrorCode SNESSolver::scaleJacobian(Mat Jac_new) { jacobian_recalculated = true; if (!scale_rhs) { - return 0; // Not scaling the RHS values + return PETSC_SUCCESS; // Not scaling the RHS values } // Get index of rows owned by this processor diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index c37cc0b37a..c988f2cc3f 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -101,9 +101,9 @@ public: void outputVars(Options& output_options, bool save_repeat = true) override; private: - PetscErrorCode FDJinitialise(); - PetscErrorCode FDJpruneJacobian(); - PetscErrorCode FDJrestoreFromPruning(); + PetscErrorCode FDJinitialise(); ///< Finite Difference Jacobian initialise + PetscErrorCode FDJpruneJacobian(); ///< Remove small elements from the Jacobian + PetscErrorCode FDJrestoreFromPruning(); ///< Restore Jacobian to original pattern /// Call the physics model RHS function /// From 9b1eb95f3d2dfb0f2a325fc938949ce33edc5362 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Wed, 15 Oct 2025 15:39:48 -0700 Subject: [PATCH 10/11] snes: pid_consider_failures and asinh_vars Experimental features, off by default 1. pid_consider_failures (bool, default: false) If enabled, this reduces the magnitude of timestep increases when recent solves have frequently failed. Makes the solver more cautious so may reduce failures but also may not increase timesteps as quickly. Net effect on performance is likely problem dependent. 2. asinh_vars (bool, default: false) If enabled, evolves asinh() of all variables. This might be a good idea when variable magnitudes vary widely, either due to normalisation or strong variations across the mesh. - Behaves like log() for large values - Close to linear for small values, so the solver doesn't over-resolve e.g low density regions - Retains sign, unlike log: asinh(-x) = -asinh(x) --- src/solver/impls/snes/snes.cxx | 121 ++++++++++++++++++++++++--------- src/solver/impls/snes/snes.hxx | 6 ++ 2 files changed, 96 insertions(+), 31 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index d349193ee4..4325a11250 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -540,6 +540,10 @@ SNESSolver::SNESSolver(Options* opts) kP((*options)["kP"].doc("Proportional PID parameter").withDefault(0.7)), kI((*options)["kI"].doc("Integral PID parameter").withDefault(0.3)), kD((*options)["kD"].doc("Derivative PID parameter").withDefault(0.2)), + pid_consider_failures( + (*options)["pid_consider_failures"] + .doc("Reduce timestep increases if recent solves have failed") + .withDefault(false)), diagnose( (*options)["diagnose"].doc("Print additional diagnostics").withDefault(false)), diagnose_failures((*options)["diagnose_failures"] @@ -594,6 +598,9 @@ SNESSolver::SNESSolver(Options* opts) .withDefault(false)), scale_vars((*options)["scale_vars"] .doc("Scale variables (Jacobian column scaling)?") + .withDefault(false)), + asinh_vars((*options)["asinh_vars"] + .doc("Apply asinh() to all variables?") .withDefault(false)) {} int SNESSolver::init() { @@ -655,6 +662,8 @@ int SNESSolver::init() { PetscCall(VecSet(var_scaling_factors, 1.0)); // Storage for scaled 'x' state vectors PetscCall(VecDuplicate(snes_x, &scaled_x)); + } else if (asinh_vars) { + PetscCall(VecDuplicate(snes_x, &scaled_x)); } if (equation_form == BoutSnesEquationForm::pseudo_transient) { @@ -824,18 +833,17 @@ int SNESSolver::run() { BoutReal* xdata = nullptr; PetscCall(VecGetArray(snes_x, &xdata)); save_vars(xdata); - PetscCall(VecRestoreArray(snes_x, &xdata)); - } - if (equation_form == BoutSnesEquationForm::pseudo_transient) { - // Calculate the initial residual in snes_f - run_rhs(simtime); - { - BoutReal* fdata = nullptr; - PetscCall(VecGetArray(snes_f, &fdata)); - save_derivs(fdata); - PetscCall(VecRestoreArray(snes_f, &fdata)); + if (asinh_vars) { + // Evolving asinh(vars) + PetscInt size; + PetscCall(VecGetLocalSize(snes_x, &size)); + for (PetscInt i = 0; i != size; ++i) { + xdata[i] = std::asinh(xdata[i] / asinh_scale); + } } + + PetscCall(VecRestoreArray(snes_x, &xdata)); } BoutReal target = simtime; @@ -847,6 +855,7 @@ int SNESSolver::run() { int steps_since_snes_failure = 0; int saved_jacobian_lag = 0; int loop_count = 0; + recent_failure_rate = 0.0; do { if (simtime >= target) break; // Could happen if step over multiple outputs @@ -973,9 +982,14 @@ int SNESSolver::run() { int lin_its; SNESGetLinearSolveIterations(snes, &lin_its); + // Rolling average of recent failures + recent_failure_rate *= 1. - inv_failure_window; + if ((ierr != PETSC_SUCCESS) or (reason < 0)) { // Diverged or SNES failed + recent_failure_rate += inv_failure_window; + if (diagnose_failures) { // Print diagnostics to help identify source of the problem @@ -1200,17 +1214,29 @@ int SNESSolver::run() { if (scale_vars) { // scaled_x <- output_x * var_scaling_factors PetscCall(VecPointwiseMult(scaled_x, output_x, var_scaling_factors)); - - const BoutReal* xdata = nullptr; - PetscCall(VecGetArrayRead(scaled_x, &xdata)); - load_vars(const_cast(xdata)); - PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); + } else if (asinh_vars) { + PetscCall(VecCopy(output_x, scaled_x)); } else { - const BoutReal* xdata = nullptr; - PetscCall(VecGetArrayRead(output_x, &xdata)); - load_vars(const_cast(xdata)); - PetscCall(VecRestoreArrayRead(output_x, &xdata)); + scaled_x = output_x; + } + + if (asinh_vars) { + PetscInt size; + PetscCall(VecGetLocalSize(scaled_x, &size)); + + BoutReal* scaled_data = nullptr; + PetscCall(VecGetArray(scaled_x, &scaled_data)); + for (PetscInt i = 0; i != size; ++i) { + scaled_data[i] = asinh_scale * std::sinh(scaled_data[i]); + } + PetscCall(VecRestoreArray(scaled_x, &scaled_data)); } + + const BoutReal* xdata = nullptr; + PetscCall(VecGetArrayRead(scaled_x, &xdata)); + load_vars(const_cast(xdata)); + PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); + run_rhs(target); // Run RHS to calculate auxilliary variables if (call_monitors(target, s, getNumberOutputSteps()) != 0) { @@ -1456,20 +1482,30 @@ PetscErrorCode SNESSolver::rhs_function(Vec x, Vec f, bool linear) { if (scale_vars) { // scaled_x <- x * var_scaling_factors PetscCall(VecPointwiseMult(scaled_x, x, var_scaling_factors)); - - const BoutReal* xdata = nullptr; - PetscCall(VecGetArrayRead(scaled_x, &xdata)); - // const_cast needed due to load_vars API. Not writing to xdata. - load_vars(const_cast(xdata)); - PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); + } else if (asinh_vars) { + PetscCall(VecCopy(x, scaled_x)); } else { - const BoutReal* xdata = nullptr; - PetscCall(VecGetArrayRead(x, &xdata)); - // const_cast needed due to load_vars API. Not writing to xdata. - load_vars(const_cast(xdata)); - PetscCall(VecRestoreArrayRead(x, &xdata)); + scaled_x = x; + } + + if (asinh_vars) { + PetscInt size; + PetscCall(VecGetLocalSize(scaled_x, &size)); + + BoutReal* scaled_data = nullptr; + PetscCall(VecGetArray(scaled_x, &scaled_data)); + for (PetscInt i = 0; i != size; ++i) { + scaled_data[i] = asinh_scale * std::sinh(scaled_data[i]); + } + PetscCall(VecRestoreArray(scaled_x, &scaled_data)); } + const BoutReal* xdata = nullptr; + PetscCall(VecGetArrayRead(scaled_x, &xdata)); + // const_cast needed due to load_vars API. Not writing to xdata. + load_vars(const_cast(xdata)); + PetscCall(VecRestoreArrayRead(scaled_x, &xdata)); + try { // Call RHS function run_rhs(simtime + dt, linear); @@ -1484,6 +1520,24 @@ PetscErrorCode SNESSolver::rhs_function(Vec x, Vec f, bool linear) { BoutReal* fdata = nullptr; PetscCall(VecGetArray(f, &fdata)); save_derivs(fdata); + + if (asinh_vars) { + // Modify time-derivatives for asinh(var) using chain rule + // Evolving u = asinh(var / scale) + // + // du/dt = dvar/dt * du/dvar + // + // du/var = 1 / sqrt(var^2 + scale^2) + PetscInt size; + PetscCall(VecGetLocalSize(f, &size)); + const BoutReal* scaled_data = nullptr; + PetscCall(VecGetArrayRead(scaled_x, &scaled_data)); + for (PetscInt i = 0; i != size; ++i) { + fdata[i] /= std::sqrt(SQ(scaled_data[i]) + SQ(asinh_scale)); + } + PetscCall(VecRestoreArrayRead(scaled_x, &scaled_data)); + } + PetscCall(VecRestoreArray(f, &fdata)); return PETSC_SUCCESS; } @@ -1713,7 +1767,12 @@ BoutReal SNESSolver::pid(BoutReal timestep, int nl_its, BoutReal max_dt) { kD); // clamp growth factor to avoid huge changes - const BoutReal fac = std::clamp(facP * facI * facD, 0.2, 5.0); + BoutReal fac = std::clamp(facP * facI * facD, 0.2, 5.0); + + if (pid_consider_failures && (fac > 1.0)) { + // Reduce aggressiveness if recent steps have failed often + fac = pow(fac, std::max(0.3, 1.0 - 2.0 * recent_failure_rate)); + } /* ---------- update timestep and history ---------- */ const BoutReal dt_new = std::min(timestep * fac, max_dt); diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index c988f2cc3f..7039954418 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -165,6 +165,9 @@ private: BoutReal kP; ///< (0.6 - 0.8) Proportional parameter (main response to current step) BoutReal kI; ///< (0.2 - 0.4) Integral parameter (smooths history of changes) BoutReal kD; ///< (0.1 - 0.3) Derivative (dampens oscillation - optional) + bool pid_consider_failures; ///< Reduce timestep increases if recent solves have failed + BoutReal recent_failure_rate; ///< Rolling average of recent failure rate + const BoutReal inv_failure_window = 0.1; ///< 1 / number of recent solves int nl_its_prev; int nl_its_prev2; @@ -222,6 +225,9 @@ private: bool scale_vars; ///< Scale individual variables? Vec var_scaling_factors; ///< Factors to multiply variables when passing to user Vec scaled_x; ///< The values passed to the user RHS + + bool asinh_vars; ///< Evolve asinh(vars) to compress magnitudes while preserving signs + const BoutReal asinh_scale = 1e-5; // Scale below which asinh response becomes ~linear }; #else From 9d63267793d31a12677d34f5d9384801734a17c6 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Fri, 31 Oct 2025 23:48:34 -0700 Subject: [PATCH 11/11] snes: Add max_snes_failures option Abort if too many consecutive failures. Prevents getting stuck in an infinite loop. --- src/solver/impls/snes/snes.cxx | 32 ++++++++++++++++++++++++-------- src/solver/impls/snes/snes.hxx | 2 ++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index 6a06bf21f5..b8bf8c3e58 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -508,6 +508,9 @@ SNESSolver::SNESSolver(Options* opts) upper_its((*options)["upper_its"] .doc("Iterations above which the next timestep is reduced") .withDefault(static_cast(maxits * 0.8))), + max_snes_failures((*options)["max_snes_failures"] + .doc("Abort after this number of consecutive failures") + .withDefault(10)), timestep_factor_on_failure((*options)["timestep_factor_on_failure"] .doc("Multiply timestep on convergence failure") .withDefault(0.5)), @@ -990,7 +993,10 @@ int SNESSolver::run() { recent_failure_rate += inv_failure_window; - if (diagnose_failures) { + ++snes_failures; + steps_since_snes_failure = 0; + + if (diagnose_failures or (snes_failures == max_snes_failures)) { // Print diagnostics to help identify source of the problem output.write("\n======== SNES failed =========\n"); @@ -1009,15 +1015,25 @@ int SNESSolver::run() { } } - ++snes_failures; - steps_since_snes_failure = 0; + if (snes_failures == max_snes_failures) { + output.write("Too many SNES failures ({}). Aborting."); + return 1; + } if (equation_form == BoutSnesEquationForm::pseudo_transient) { - // Global scaling of timesteps - // Note: A better strategy might be to reduce timesteps - // in problematic cells. - PetscCall(VecScale(dt_vec, timestep_factor_on_failure)); - + if (snes_failures == max_snes_failures - 1) { + // Last chance. Set to uniform smallest timestep + PetscCall(VecSet(dt_vec, dt_min_reset)); + + } else if (snes_failures == 5) { + // Set uniform timestep + PetscCall(VecSet(dt_vec, timestep)); + } else { + // Global scaling of timesteps + // Note: A better strategy might be to reduce timesteps + // in problematic cells. + PetscCall(VecScale(dt_vec, timestep_factor_on_failure)); + } } else { // Try a smaller timestep timestep *= timestep_factor_on_failure; diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index 7039954418..5d007f8ad7 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -129,6 +129,8 @@ private: int maxits; ///< Maximum nonlinear iterations int lower_its, upper_its; ///< Limits on iterations for timestep adjustment + int max_snes_failures; ///< Maximum number of consecutive SNES failures before abort. + BoutReal timestep_factor_on_failure; BoutReal timestep_factor_on_upper_its; BoutReal timestep_factor_on_lower_its;