diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/Project.toml b/Project.toml index 762eea3b..e7120f01 100644 --- a/Project.toml +++ b/Project.toml @@ -3,6 +3,8 @@ uuid = "10dff2fc-5484-5881-a0e0-c90441020f8a" version = "0.14.0" [deps] +Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" +GenericLinearAlgebra = "14197337-ba66-59df-a3e3-ca00e7dcff7a" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" @@ -10,18 +12,26 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +QRMumps = "422b30a1-cc69-4d85-abe7-cc07b540c444" SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" SolverParameters = "d076d87d-d1f9-4ea3-a44b-64b4cdd1e470" SolverTools = "b5612192-2639-5dc1-abfe-fbedd65fab29" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e" [compat] -Krylov = "0.10.0" +Arpack = "0.5.4" +GenericLinearAlgebra = "0.3.18" +Krylov = "0.10.1" LinearOperators = "2.0" NLPModels = "0.21" NLPModelsModifiers = "0.7" +QRMumps = "0.3.1" SolverCore = "0.3" SolverParameters = "0.1" SolverTools = "0.9" +SparseArrays = "1.11.0" +TSVD = "0.4.4" julia = "1.10" [extras] diff --git a/README.md b/README.md index cd21ff94..b4299756 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,9 @@ This package provides an implementation of four classic algorithms for unconstra > high-order regularized models. *Mathematical Programming*, 163(1), 359-368. > DOI: [10.1007/s10107-016-1065-8](https://doi.org/10.1007/s10107-016-1065-8) +- `R2N`: An inexact second-order quadratic regularization method for unconstrained optimization (with shifted L-BFGS or shifted Hessian operator); +- `R2NLS`: an inexact second-order quadratic regularization method for nonlinear least-squares problems; + - `fomo`: a first-order method with momentum for unconstrained optimization; - `tron`: a pure Julia implementation of TRON, a trust-region solver for bound-constrained optimization described in diff --git a/docs/src/solvers.md b/docs/src/solvers.md index 322f7c2e..0ee48194 100644 --- a/docs/src/solvers.md +++ b/docs/src/solvers.md @@ -7,11 +7,13 @@ - [`trunk`](@ref) - [`R2`](@ref) - [`fomo`](@ref) +- [`R2N`](@ref) +- [`R2NLS`](@ref) | Problem type | Solvers | | --------------------- | -------- | -| Unconstrained NLP | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref)| -| Unconstrained NLS | [`trunk`](@ref), [`tron`](@ref) | +| Unconstrained NLP | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref), ['R2N'](@ref)| +| Unconstrained NLS | [`trunk`](@ref), [`tron`](@ref), [`R2NLS`](@ref) | | Bound-constrained NLP | [`tron`](@ref) | | Bound-constrained NLS | [`tron`](@ref) | @@ -22,5 +24,7 @@ lbfgs tron trunk R2 +R2N +R2NLS fomo ``` diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index 0e86dfd6..4280848e 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -1,7 +1,7 @@ module JSOSolvers # stdlib -using LinearAlgebra, Logging, Printf +using LinearAlgebra, Logging, Printf, Arpack, SparseArrays # JSO packages using Krylov, @@ -41,13 +41,18 @@ function normM!(n, x, M, z) end end +# Utilities +include("utilities.jl") + # Unconstrained solvers include("lbfgs.jl") include("trunk.jl") include("fomo.jl") +include("R2N.jl") # Unconstrained solvers for NLS include("trunkls.jl") +include("R2NLS.jl") # List of keywords accepted by TRONTrustRegion const tron_keys = ( diff --git a/src/R2N.jl b/src/R2N.jl new file mode 100644 index 00000000..64fefa8f --- /dev/null +++ b/src/R2N.jl @@ -0,0 +1,585 @@ +export R2N, R2NSolver +export ShiftedLBFGSSolver +using LinearOperators, LinearAlgebra + +# Define a new mutable operator for A = H + σI +mutable struct ShiftedOperator{T, V, OpH <: AbstractLinearOperator{T}} <: AbstractLinearOperator{T} + H::OpH + σ::T + n::Int + symmetric::Bool + hermitian::Bool +end + +# Constructor for the new operator +function ShiftedOperator(H::OpH) where {T, OpH <: AbstractLinearOperator{T}} + return ShiftedOperator{T, Vector{T}, OpH}(H, zero(T), H.n, H.symmetric, H.hermitian) +end + +# Define required properties for AbstractLinearOperator +Base.size(A::ShiftedOperator) = (A.n, A.n) +LinearAlgebra.isreal(A::ShiftedOperator{T}) where {T <: Real} = true +LinearOperators.is_symmetric(A::ShiftedOperator) = A.symmetric +LinearOperators.is_hermitian(A::ShiftedOperator) = A.hermitian + +# Define the core multiplication rules: y = (H + σI)x +function LinearAlgebra.mul!(y::V, A::ShiftedOperator{T, V}, x::V) where {T, V} + # y = Hx + σx + mul!(y, A.H, x) + axpy!(A.σ, x, y) # y += A.σ * x + return y +end + +function LinearAlgebra.mul!(y::V, A::ShiftedOperator{T, V}, x::V, α::Number, β::Number) where {T, V} + # y = α(Hx + σx) + βy + mul!(y, A.H, x, α, β) # y = α*Hx + β*y + axpy!(α * A.σ, x, y) # y += α*A.σ*x + return y +end + +abstract type AbstractShiftedLBFGSSolver end + +struct ShiftedLBFGSSolver <: AbstractShiftedLBFGSSolver + # Shifted LBFGS-specific fields +end + +# const R2N_allowed_subsolvers = [:cg_lanczos_shift, :minres, :shifted_lbfgs] +const R2N_allowed_subsolvers = [:cg, :cr, :minres, :shifted_lbfgs] +#TODO CgLanczosShiftSolver is not implemented yet for negative curvature problems + +const npc_handler_allowed = [:armijo, :sigma, :prev, :cp] + +""" + R2N(nlp; kwargs...) +An inexact second-order quadratic regularization method for unconstrained optimization (with shifted L-BFGS or shifted Hessian operator). +For advanced usage, first define a `R2NSolver` to preallocate the memory used in the algorithm, and then call `solve!`: + solver = R2NSolver(nlp) + solve!(solver, nlp; kwargs...) +# Arguments +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. +- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters. +- `σmin = eps(T)`: step parameter for R2N algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. +- `subsolver::Symbol = :shifted_lbfgs`: the subsolver to solve the shifted system. The `MinresSolver` which solves the shifted linear system exactly at each iteration. Using the exact solver is only possible if `nlp` is an `LBFGSModel`. +- `subsolver_verbose::Int = 0`: if > 0, display iteration information every `subsolver_verbose` iteration of the subsolver if KrylovWorkspace type is selected. +See `JSOSolvers.R2N_allowed_subsolvers` for a list of available `SubSolver`. +- `scp_flag::Bool = true`: if true, we compare the norm of the calculate step with `θ2 * norm(scp)`, each iteration, selecting the smaller step. +- `npc_handler::Symbol = :armijo`: the non_positive_curve handling strategy. + - `:armijo`: uses the Armijo rule to handle non-positive curvature. + - `:sigma`: increase the regularization parameter σ. + - `:prev`: if subsolver return after first iteration, increase the sigma, but if subsolver return after second iteration, set s_k = s_k^(t-1). + - `:cp`: set s_k to Cauchy point. +See `JSOSolvers.npc_handler_allowed` for a list of available `npc_handler` strategies. +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. +# Callback +$(Callback_docstring) +# Examples +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = R2N(nlp) +# output +"Execution stats: first-order stationary" +``` +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = R2NSolver(nlp); +stats = solve!(solver, nlp) +# output +"Execution stats: first-order stationary" +``` +""" + +mutable struct R2NSolver{ + T, + V, + Op <: AbstractLinearOperator{T}, + OpA <: ShiftedOperator{T, V}, + Sub <: Union{KrylovWorkspace{T, T, V}, ShiftedLBFGSSolver}, +} <: AbstractOptimizationSolver + x::V + cx::V + gx::V + gn::V + σ::T + H::Op + A::OpA # The new combined operator A = H + σI + Hs::V + s::V + scp::V + obj_vec::V # used for non-monotone + r2_subsolver::Sub + cgtol::T +end + +function R2NSolver( + nlp::AbstractNLPModel{T, V}; + non_mono_size = 1, + subsolver::Symbol = :minres, +) where {T, V} + subsolver in R2N_allowed_subsolvers || + error("subproblem solver must be one of $(R2N_allowed_subsolvers)") + + !(subsolver == :shifted_lbfgs) || + (nlp isa LBFGSModel) || + error("Unsupported subsolver type, ShiftedLBFGSSolver can only be used by LBFGSModel") + + non_mono_size >= 1 || error("non_mono_size must be greater than or equal to 1") + + nvar = nlp.meta.nvar + x = V(undef, nvar) + cx = V(undef, nvar) + gx = V(undef, nvar) + gn = isa(nlp, QuasiNewtonModel) ? V(undef, nvar) : V(undef, 0) + Hs = V(undef, nvar) + H = isa(nlp, QuasiNewtonModel) ? nlp.op : hess_op!(nlp, x, Hs) + # Create the single, reusable operator A + A = ShiftedOperator(H) + OpA = typeof(A) + Op = typeof(H) + σ = zero(T) + s = V(undef, nvar) + scp = V(undef, nvar) # Cauchy point + cgtol = one(T) + obj_vec = fill(typemin(T), non_mono_size) + + if subsolver == :shifted_lbfgs + r2_subsolver = ShiftedLBFGSSolver() + else + r2_subsolver = krylov_workspace(Val(subsolver), nequ, nvar, V) + end + + Sub = typeof(r2_subsolver) + return R2NSolver{T, V, Op, OpA, Sub}( + x, + cx, + gx, + gn, + σ, + H, + A, + Hs, + s, + scp, + obj_vec, + r2_subsolver, + cgtol, + ) +end + +function SolverCore.reset!(solver::R2NSolver{T}) where {T} + fill!(solver.obj_vec, typemin(T)) + reset!(solver.H) + solver +end +function SolverCore.reset!(solver::R2NSolver{T}, nlp::AbstractNLPModel) where {T} + fill!(solver.obj_vec, typemin(T)) + # @assert (length(solver.gn) == 0) || isa(nlp, QuasiNewtonModel) + solver.H = isa(nlp, QuasiNewtonModel) ? nlp.op : hess_op!(nlp, solver.x, solver.Hs) + + solver +end + +@doc (@doc R2NSolver) function R2N( + nlp::AbstractNLPModel{T, V}; + subsolver::Symbol = :minres, + non_mono_size = 1, + kwargs..., +) where {T, V} + solver = R2NSolver(nlp; non_mono_size = non_mono_size, subsolver = subsolver) + return solve!(solver, nlp; non_mono_size = non_mono_size, kwargs...) +end + +function SolverCore.solve!( + solver::R2NSolver{T, V}, + nlp::AbstractNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = nlp.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + η1 = T(0.0001), + η2 = T(0.001), + θ1 = T(0.5), + θ2 = eps(T)^(-1), + λ = T(2), + σmin = zero(T), + max_time::Float64 = 30.0, + max_eval::Int = -1, + max_iter::Int = typemax(Int), + verbose::Int = 0, + subsolver_verbose::Int = 0, + non_mono_size = 1, + npc_handler::Symbol = :armijo, + scp_flag::Bool = true, +) where {T, V} + unconstrained(nlp) || error("R2N should only be called on unconstrained problems.") + npc_handler in npc_handler_allowed || error("npc_handler must be one of $(npc_handler_allowed)") + @assert(λ > 1) + + reset!(stats) + start_time = time() + set_time!(stats, 0.0) + n = nlp.meta.nvar + x = solver.x .= x + ck = solver.cx + ∇fk = solver.gx # k-1 + ∇fn = solver.gn #current + s = solver.s + scp = solver.scp + H = solver.H + Hs = solver.Hs + A = solver.A + σk = solver.σ + cgtol = solver.cgtol + subsolver_solved = false + + set_iter!(stats, 0) + f0 = obj(nlp, x) + set_objective!(stats, f0) + + grad!(nlp, x, ∇fk) + isa(nlp, QuasiNewtonModel) && (∇fn .= ∇fk) + norm_∇fk = norm(∇fk) + set_dual_residual!(stats, norm_∇fk) + + σk = 2^round(log2(norm_∇fk + 1)) / norm_∇fk + A.σ = σk + ρk = zero(T) + + # Stopping criterion: + fmin = min(-one(T), f0) / eps(T) + unbounded = f0 < fmin + + ϵ = atol + rtol * norm_∇fk + optimal = norm_∇fk ≤ ϵ + + if optimal + @info("Optimal point found at initial point") + @info log_header( + [:iter, :f, :grad_norm, :sigma, :rho, :dir], + [Int, Float64, Float64, Float64, Float64, String], + hdr_override = Dict( + :f => "f(x)", + :grad_norm => "‖∇f‖", + :sigma => "σ", + :rho => "ρ", + :dir => "DIR", + ), + ) + + # Define and log the row information with corresponding data values + @info log_row([stats.iter, stats.objective, norm_∇fk, σk, ρk, ""]) + end + + cp_step_log = " " + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info log_header( + [:iter, :f, :grad_norm, :sigma, :rho, :dir, :cp_step_log, :sub_status], + [Int, Float64, Float64, Float64, Float64, String, String, String], + hdr_override = Dict( + :f => "f(x)", + :grad_norm => "‖∇f‖", + :sigma => "σ", + :rho => "ρ", + :dir => "DIR", + :cp_step_log => "Cauchy Step", + :sub_status => "Subsolver Status", + ), + ) + @info log_row([stats.iter, stats.objective, norm_∇fk, σk, ρk, "", cp_step_log, ""]) + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + unbounded = unbounded, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + cgtol = max(rtol, min(T(0.1), √norm_∇fk, T(0.9) * cgtol)) + γ_k = zero(T) + ν_k = one(T) + scp_recal = true + + while !done + scp_recal = true # Recalculate the Cauchy point if needed + # Compute the Cauchy step. + # Note that we use buffer values Hs to avoid reallocating memory. + mul!(Hs, H, ∇fk) + curv = dot(∇fk, Hs) + slope = σk * norm_∇fk^2 # slope= σ * ||∇f||^2 + γ_k = (curv + slope) / norm_∇fk^2 + if γ_k < 0 + cp_step_log = "Cauchy step" + ν_k = 2*(1-δ1) / (γ_k) + else + # we have to calcualte the scp, since we have encounter a negative curvature + λmax, found_λ = opnorm(H) + cp_step_log = "ν_k" + ν_k = θ1 / (λmax + σk) + end + + # Solving for step direction + ∇fk .*= -1 + # update the operator A with the current σk + A.σ = σk + A.H = H + # solve for the step direction + subsolver_solved, sub_stats, subiter = + subsolve!(r2_subsolver, solver, s, zero(T), n, subsolver_verbose) + + if !(subsolver == :shifted_lbfgs) #not exact solver + if r2_subsolver.stats.npcCount >= 1 #npc case + if npc_handler == :armijo + c = 1e-4 + α = 1.0 + increase_factor = 1.5 + decrease_factor = 0.5 + min_alpha = 1e-8 + max_alpha = 1e2 + dir = r2_subsolver.npc + f0 = stats.objective + grad_dir = dot(∇fk, dir) + # First, try α = 1 + ck .= x .+ α * dir + fck = obj(nlp, ck) + if fck > f0 + c * α * grad_dir + # Backward tracking + while fck > f0 + c * α * grad_dir && α > min_alpha + α *= decrease_factor + ck .= x .+ α * dir + fck = obj(nlp, ck) + end + else + # Forward tracking + while true + α_new = α * increase_factor + ck .= x .+ α_new * dir + fck_new = obj(nlp, ck) + if fck_new > f0 + c * α_new * grad_dir || α_new > max_alpha + break + end + α = α_new + fck = fck_new + end + end + s .= α * dir + elseif npc_handler == :prev #Cr and cg will return the last iteration s + s .= r2_subsolver.x + elseif npc_handler == :cp + # Cauchy point + scp .= ν_k * ∇fk # the ∇fk is already negative + s .= scp + scp_recal = false # we have already calculated the Cauchy point + end + end + end + + if !subsolver_solved + @warn("Subsolver failed to solve the shifted system") + break + end + + if scp_flag && scp_recal + # Based on the flag, scp is calcualted + scp .= ν_k * ∇fk # the ∇fk is already negative + if norm(s) > θ2 * norm(scp) + s .= scp + end + end + if npc_handler == :sigma && r2_subsolver.stats.npcCount >= 1 # non-positive curvature case happen and the npc_handler is sigma + step_accepted = false + else + slope = dot(s, ∇fk) # = -∇fkᵀ s because we flipped the sign of ∇fk + mul!(Hs, H, s) + curv = dot(s, Hs) + + ΔTk = slope - curv / 2 + ck .= x .+ s + fck = obj(nlp, ck) + + if non_mono_size > 1 #non-monotone behaviour + k = mod(stats.iter, non_mono_size) + 1 + solver.obj_vec[k] = stats.objective + fck_max = maximum(solver.obj_vec) + ρk = (fck_max - fck) / (fck_max - stats.objective + ΔTk) + else + ρk = (stats.objective - fck) / ΔTk + end + + # Update regularization parameters and Acceptance of the new candidate + step_accepted = ρk >= η1 + end + + if step_accepted + x .= ck + grad!(nlp, x, ∇fk) + if isa(nlp, QuasiNewtonModel) + ∇fn .-= ∇fk + ∇fn .*= -1 # = ∇f(xₖ₊₁) - ∇f(xₖ) + push!(nlp, s, ∇fn) + ∇fn .= ∇fk + end + set_objective!(stats, fck) + unbounded = fck < fmin + norm_∇fk = norm(∇fk) + if ρk >= η2 + σk = max(σmin, γ3 * σk) + else # η1 ≤ ρk < η2 + σk = min(σmin, γ1 * σk) + end + else # η1 > ρk + σk = max(σmin, γ2 * σk) + ∇fk .*= -1 + end + + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + cgtol = max(rtol, min(T(0.1), √norm_∇fk, T(0.9) * cgtol)) + set_dual_residual!(stats, norm_∇fk) + + solver.σ = σk + solver.cgtol = cgtol + set_dual_residual!(stats, norm_∇fk) + + callback(nlp, solver, stats) + + norm_∇fk = stats.dual_feas # if the user change it, they just change the stats.norm , they also have to change cgtol + σk = solver.σ + cgtol = solver.cgtol + norm_∇fk = stats.dual_feas + + optimal = norm_∇fk ≤ ϵ + if verbose > 0 && mod(stats.iter, verbose) == 0 + dir_stat = step_accepted ? "↘" : "↗" + @info log_row([ + stats.iter, + stats.objective, + norm_∇fk, + σk, + ρk, + subiter, + cp_step_log, + dir_stat, + sub_stats, + ]) + end + + if stats.status == :user + done = true + else + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + unbounded = unbounded, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + done = stats.status != :unknown + end + end + + set_solution!(stats, x) + return stats +end +# Dispatch for subsolvers KrylovWorkspace: cg and cr +function subsolve!( + r2_subsolver::KrylovWorkspace{T, T, V}, + R2N::R2NSolver, + s, + atol, + n, + subsolver_verbose, +) where {T, V} + krylov_solve!( + r2_subsolver, + R2N.A, # Use the ShiftedOperator A + R2N.gx, + itmax = max(2 * n, 50), + atol = atol, + rtol = R2N.cgtol, + verbose = subsolver_verbose, + linesearch = true, + ) + s .= r2_subsolver.x + return Krylov.issolved(r2_subsolver), r2_subsolver.stats.status, r2_subsolver.stats.niter +end + +# Dispatch for MinresSolver +function subsolve!(r2_subsolver::MinresSolver, R2N::R2NSolver, s, atol, n, subsolver_verbose) + krylov_solve!( + r2_subsolver, + R2N.H, + R2N.gx, + λ = R2N.σ, + itmax = max(2 * n, 50), + atol = atol, + rtol = R2N.cgtol, + verbose = subsolver_verbose, + linesearch = true, + ) + s .= r2_subsolver.x + return Krylov.issolved(r2_subsolver), r2_subsolver.stats.status, r2_subsolver.stats.niter +end + +# Dispatch for KrylovWorkspace +function subsolve!( + r2_subsolver::CgLanczosShiftSolver, + R2N::R2NSolver, + s, + atol, + n, + subsolver_verbose, +) + krylov_solve!( + r2_subsolver, + R2N.H, + R2N.gx, + R2N.opI * R2N.σ, #shift vector σ * I + λ = R2N.σ, + itmax = max(2 * n, 50), + atol = atol, + rtol = R2N.cgtol, + verbose = subsolver_verbose, + linesearch = true, + ) + s .= r2_subsolver.x + return issolved(r2_subsolver), r2_subsolver.stats.status, r2_subsolver.stats.niter +end + +# Dispatch for ShiftedLBFGSSolver +function subsolve!(r2_subsolver::ShiftedLBFGSSolver, R2N::R2NSolver, s, atol, n, subsolver_verbose) + ∇f_neg = R2N.gx + H = R2N.H + σ = R2N.σ + solve_shifted_system!(s, H, ∇f_neg, σ) + return true, :first_order, 1 +end diff --git a/src/R2NLS.jl b/src/R2NLS.jl new file mode 100644 index 00000000..e42c522b --- /dev/null +++ b/src/R2NLS.jl @@ -0,0 +1,612 @@ +export R2NLS, R2SolverNLS +export QRMumpsSolver +using SparseMatricesCOO + + +using QRMumps, LinearAlgebra, SparseArrays + +abstract type AbstractQRMumpsSolver end + +""" + QRMumpsSolver + +A solver structure for handling the linear least-squares subproblems within R2NLS +using the QRMumps package. This structure pre-allocates all necessary memory +for the sparse matrix representation and the factorization. +""" +mutable struct QRMumpsSolver{T} <: AbstractQRMumpsSolver + # QRMumps structures + spmat::qrm_spmat{T} + spfct::qrm_spfct{T} + + # COO storage for the augmented matrix [J; sqrt(σ) * I] + irn::Vector{Int} + jcn::Vector{Int} + val::Vector{T} + + # Augmented RHS vector + b_aug::Vector{T} + + # Problem dimensions + m::Int + n::Int + nnzj::Int + + closed::Bool # Avoid double-destroy + + function QRMumpsSolver(nlp::AbstractNLSModel{T}) where {T} + # Safely initialize QRMumps library + qrm_init() + + # 1. Get problem dimensions and Jacobian structure + meta_nls = nls_meta(nlp) + n = nlp.nls_meta.nvar + m = nlp.nls_meta.nequ + nnzj = meta_nls.nnzj + + # 2. Allocate COO arrays for the augmented matrix [J; sqrt(σ)I] + # Total non-zeros = non-zeros in Jacobian (nnzj) + n diagonal entries for the identity block. + irn = Vector{Int}(undef, nnzj + n) + jcn = Vector{Int}(undef, nnzj + n) + val = Vector{T}(undef, nnzj + n) + + # 3. Fill in the sparsity pattern of the Jacobian J(x) + jac_structure_residual!(nlp, view(irn, 1:nnzj), view(jcn, 1:nnzj)) + + # 4. Fill in the sparsity pattern for the √σ·Iₙ block + # This block lives in rows m+1 to m+n and columns 1 to n. + @inbounds for i = 1:n + irn[nnzj + i] = m + i + jcn[nnzj + i] = i + end + + # 5. Initialize QRMumps sparse matrix and factorization structures + spmat = qrm_spmat_init(m + n, n, irn, jcn, val; sym = false) + spfct = qrm_spfct_init(spmat) + qrm_analyse!(spmat, spfct; transp = 'n') + + # 6. Pre-allocate the augmented right-hand-side vector + b_aug = Vector{T}(undef, m+n) + + # 7. Create the solver object and set a finalizer for safe cleanup. + solver = new{T}(spmat, spfct, irn, jcn, val, b_aug, m, n, nnzj) + + # register a finalizer that calls close! (anonymous closure avoids ordering issues) + finalizer(solver) do s + try + close!(s) + catch + @warn "Error during finalization of QRMumpsSolver. Ensure all resources are properly released." + end + end + return solver + end + + function close!(s::QRMumpsSolver) #TODO move to Finilizer + if s.closed + return + end + s.closed = true + # best-effort; qrm_spfct_destroy! and qrm_spmat_destroy! documented functions. + try + qrm_spfct_destroy!(s.spfct) + catch err + @warn "qrm_spfct_destroy! failed" err + end + try + qrm_spmat_destroy!(s.spmat) + catch err + @warn "qrm_spmat_destroy! failed" err + end + return nothing + end +end + +const R2NLS_allowed_subsolvers = (:cgls, :crls, :lsqr, :lsmr, :qrmumps) + +""" + R2NLS(nlp; kwargs...) +An inexact second-order quadratic regularization method designed specifically for nonlinear least-squares problems. +The objective is to solve + min ½‖F(x)‖² +where `F: ℝⁿ → ℝᵐ` is a vector-valued function defining the least-squares residuals. +For advanced usage, first create a `R2SolverNLS` to preallocate the necessary memory for the algorithm, and then call `solve!`: + solver = R2SolverNLS(nlp) + solve!(solver, nlp; kwargs...) +# Arguments +- `nlp::AbstractNLSModel{T, V}` is the nonlinear least-squares model to solve. See `NLPModels.jl` for additional details. +# Keyword Arguments +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance; the algorithm stops when ‖J(x)ᵀF(x)‖ ≤ atol + rtol * ‖J(x₀)ᵀF(x₀)‖. +- `η1 =T(0.0001) eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. +- `θ1 = T(0.5)`, `θ2 = eps(T)^-1`: Cauchy step parameters. +- `γ1 = T(1.5)`, `γ2 = T(2.5)`, `γ3 = T(0.5)`: regularization update parameters. +- `δ1 = T(0.5)`: used for Cauchy point calculate. +- `σmin = eps(T)`: minimum step parameter for the R2NLS algorithm. +- `max_eval::Int = -1`: maximum number of objective function evaluations. +- `max_time::Float64 = 30.0`: maximum allowed time in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, displays iteration details every `verbose` iterations. +- `scp_flag::Bool = true`: if true, we compare the norm of the calculate step with `θ2 * norm(scp)`, each iteration, selecting the smaller step. +- `subsolver::Symbol = :lsmr`: method used as subproblem solver, see `JSOSolvers.R2N_allowed_subsolvers` for a list. +- `subsolver_verbose::Int = 0`: if > 0, displays subsolver iteration details every `subsolver_verbose` iterations when a KrylovWorkspace type is selected. +- `non_mono_size = 1`: the size of the non-monotone behaviour. If > 1, the algorithm will use a non-monotone strategy to accept steps. + +# Output +Returns a `GenericExecutionStats` object containing statistics and information about the optimization process (see `SolverCore.jl`). +# Callback +$(Callback_docstring) +# Examples +```jldoctest +using JSOSolvers, ADNLPModels +F(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] +model = ADNLSModel(F, [-1.2; 1.0], 2) +stats = R2NLS(model) +# output +"Execution stats: first-order stationary" +``` +```jldoctest +using JSOSolvers, ADNLPModels +F(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] +model = ADNLSModel(F, [-1.2; 1.0], 2) +solver = R2SolverNLS(model) +stats = solve!(solver, model) +# output +"Execution stats: first-order stationary" +``` + +# Example with QRMumps subsolver and explicit cleanup #TODO remove +```jldoctest +using JSOSolvers, ADNLPModels +F(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] +model = ADNLSModel(F, [-1.2; 1.0], 2) +solver = R2SolverNLS(model; subsolver = :qrmumps) +stats = solve!(solver, model) +close!(solver.ls_subsolver) # Free QRMumps resources +qrm_finalize() # Shutdown QRMumps library globally #TODO remove +# output +"Execution stats: first-order stationary" +``` +""" +mutable struct R2SolverNLS{ + T, + V, + Op <: union{AbstractLinearOperator{T},SparseMatrixCOO{T,Int}}, + Sub <: Union{KrylovWorkspace{T, T, V}, QRMumpsSolver{T}}, +} <: AbstractOptimizationSolver + x::V + xt::V + temp::V + gx::V + Fx::V + rt::V + Jv::V + Jtv::V + Jx::Op + ls_subsolver::Sub + obj_vec::V # used for non-monotone behaviour + subtol::T + s::V + scp::V + σ::T +end + +function R2SolverNLS( + nlp::AbstractNLSModel{T, V}; + non_mono_size = 1, + subsolver::Symbol = :lsmr, +) where {T, V} + subsolver in R2NLS_allowed_subsolvers || + error("subproblem solver must be one of $(R2NLS_allowed_subsolvers)") + non_mono_size >= 1 || error("non_mono_size must be greater than or equal to 1") + + nvar = nlp.meta.nvar + nequ = nlp.nls_meta.nequ + x = V(undef, nvar) + xt = V(undef, nvar) + temp = V(undef, nequ) + gx = V(undef, nvar) + Fx = V(undef, nequ) + rt = V(undef, nequ) + Jv = V(undef, nequ) + Jtv = V(undef, nvar) + s = V(undef, nvar) + scp = V(undef, nvar) + σ = eps(T)^(1 / 5) + if subsolver == :qrmumps + Jv = V(undef, 0) + Jtv = V(undef, 0) + ls_subsolver = QRMumpsSolver(nlp) + Jx = SparseMatrixCOO(nequ, nvar, view(ls_subsolver.irn, 1:ls_subsolver.nnzj), view(ls_subsolver.jcn, 1:ls_subsolver.nnzj), view(ls_subsolver.val, 1:ls_subsolver.nnzj)) + else + Jx = jac_op_residual!(nlp, x, Jv, Jtv) + ls_subsolver = krylov_workspace(Val(subsolver), nequ, nvar, V) + end + Sub = typeof(ls_subsolver) + Op = typeof(Jx) + + + subtol = one(T) # must be ≤ 1.0 + obj_vec = fill(typemin(T), non_mono_size) + + return R2SolverNLS{T, V, Op, Sub}( + x, + xt, + temp, + gx, + Fx, + rt, + Jv, + Jtv, + Jx, + ls_subsolver, + obj_vec, + subtol, + s, + scp, + σ, + ) +end + +function SolverCore.reset!(solver::R2SolverNLS{T}) where {T} + fill!(solver.obj_vec, typemin(T)) + solver +end +function SolverCore.reset!(solver::R2SolverNLS{T}, nlp::AbstractNLSModel) where {T} + fill!(solver.obj_vec, typemin(T)) + solver +end + +@doc (@doc R2SolverNLS) function R2NLS( + nlp::AbstractNLSModel{T, V}; + subsolver::Symbol = :lsmr, + non_mono_size = 1, + kwargs..., +) where {T, V} + solver = R2SolverNLS(nlp; non_mono_size = non_mono_size, subsolver = subsolver) + return solve!(solver, nlp; non_mono_size = non_mono_size, kwargs...) +end + +function SolverCore.solve!( + solver::R2SolverNLS{T, V}, + nlp::AbstractNLSModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = nlp.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + Fatol::T = zero(T), + Frtol::T = zero(T), + η1 = eps(T)^(1 / 4), + η2 = T(0.95), + θ1 = T(0.5), + θ2 = eps(T)^(-1), + γ1 = T(1.5), + γ2 = T(2.5), + γ3 = T(0.5), + δ1 = T(0.5), + σmin = eps(T), + max_time::Float64 = 30.0, + max_eval::Int = -1, + max_iter::Int = typemax(Int), + verbose::Int = 0, + scp_flag::Bool = true, + subsolver_verbose::Int = 0, + non_mono_size = 1, +) where {T, V} + unconstrained(nlp) || error("R2NLS should only be called on unconstrained problems.") + if !(nlp.meta.minimize) + error("R2NLS only works for minimization problem") + end + + reset!(stats) + @assert(η1 > 0 && η1 < 1) + @assert(θ1 > 0 && θ1 < 1) + @assert(θ2 > 1) + @assert(γ1 >= 1 && γ1 <= γ2 && γ3 <= 1) + @assert(δ1>0 && δ1<1) + + start_time = time() + set_time!(stats, 0.0) + + n = nlp.nls_meta.nvar + m = nlp.nls_meta.nequ + x = solver.x .= x + xt = solver.xt + ∇f = solver.gx # k-1 + ls_subsolver = solver.ls_subsolver + r, rt = solver.Fx, solver.rt + s = solver.s + scp = solver.scp + subtol = solver.subtol + + σk = solver.σ + residual!(nlp, x, r) + # f, ∇f = objgrad!(nlp, x, ∇f, r, recompute = false) #TODO it is expensive for QRMumps do this mul!(∇f, Jx', r) + f = obj(nlp, x, r, recompute = false) #TODO it is expensive for QRMumps do this mul!(∇f, Jx', r) + f0 = f + + # preallocate storage for products with Jx and Jx' + Jx = solver.Jx + if Jx isa SparseMatrixCOO + jac_coord_residual!(nlp, x, view(ls_subsolver.val, 1:ls_subsolver.nnzj)) + Jx.vals .= view(ls_subsolver.val, 1:ls_subsolver.nnzj) #TODO check if this is needed? + end + + mul!(∇f, Jx', r) + + norm_∇fk = norm(∇f) + ρk = zero(T) + + # Stopping criterion: + fmin = min(-one(T), f0) / eps(T) + unbounded = f < fmin + + σk = 2^round(log2(norm_∇fk + 1)) / norm_∇fk + ϵ = atol + rtol * norm_∇fk + ϵF = Fatol + Frtol * 2 * √f + + # Preallocate xt. + xt = solver.xt + temp = solver.temp + + optimal = norm_∇fk ≤ ϵ + small_residual = 2 * √f ≤ ϵF + + set_iter!(stats, 0) + set_objective!(stats, f) + set_dual_residual!(stats, norm_∇fk) + + if optimal + @info "Optimal point found at initial point" + @info log_header( + [:iter, :f, :dual, :σ, :ρ], + [Int, Float64, Float64, Float64, Float64], + hdr_override = Dict(:f => "f(x)", :dual => "‖∇f‖"), + ) + @info log_row([stats.iter, stats.objective, norm_∇fk, σk, ρk]) + end + cp_step_log = " " + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info log_header( + [:iter, :f, :dual, :σ, :ρ, :sub_iter, :dir, :cp_step_log, :sub_status], + [Int, Float64, Float64, Float64, Float64, Int, String, String, String], + hdr_override = Dict( + :f => "f(x)", + :dual => "‖∇f‖", + :sub_iter => "subiter", + :dir => "dir", + :cp_step_log => "cp step", + :sub_status => "status", + ), + ) + @info log_row([stats.iter, stats.objective, norm_∇fk, σk, ρk, 0, " ", " ", " "]) + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + unbounded = unbounded, + max_eval = max_eval, + iter = stats.iter, + small_residual = small_residual, + max_iter = max_iter, + max_time = max_time, + ), + ) + + subtol = max(rtol, min(T(0.1), √norm_∇fk, T(0.9) * subtol)) + solver.σ = σk + solver.subtol = subtol + + callback(nlp, solver, stats) + + subtol = solver.subtol + σk = solver.σ + + done = stats.status != :unknown + ν_k = one(T) + + while !done + + # Compute the Cauchy step. + mul!(temp, Jx, ∇f) # temp <- Jx'*∇f + curv = dot(temp, temp) # curv = ∇f' Jx'Jx *∇f + slope = σk * norm_∇fk^2 # slope= σ * ||∇f||^2 + γ_k = (curv + slope) / norm_∇fk^2 + temp .= .-r + solver.σ = σk + + if γ_k > 0 + ν_k = 2*(1-δ1) / (γ_k) + cp_step_log = "α_k" + # Compute the step s. + subsolver_solved, sub_stats, subiter = + subsolve!(ls_subsolver, solver, nlp, s, atol, n, m, max_time, subsolver_verbose) + if scp_flag + # Based on the flag, scp is calcualted + scp .= -ν_k * ∇f + if norm(s) > θ2 * norm(scp) + s .= scp + end + end + else # when zero curvature occures + # we have to calcualte the scp, since we have encounter a negative curvature + λmax, found_λ = opnorm(Jx) + found_λ || error("operator norm computation failed") + cp_step_log = "ν_k" + ν_k = θ1 / (λmax + σk) + scp .= -ν_k * ∇f + s .= scp + end + + # Compute actual vs. predicted reduction. + xt .= x .+ s + mul!(temp, Jx, s) + slope = dot(r, temp) + curv = dot(temp, temp) + + residual!(nlp, xt, rt) + fck = obj(nlp, x, rt, recompute = false) + + ΔTk = -slope - curv / 2 + if non_mono_size > 1 #non-monotone behaviour + k = mod(stats.iter, non_mono_size) + 1 + solver.obj_vec[k] = stats.objective + fck_max = maximum(solver.obj_vec) + ρk = (fck_max - fck) / (fck_max - stats.objective + ΔTk) + else + ρk = (stats.objective - fck) / ΔTk + end + + # Update regularization parameters and Acceptance of the new candidate + step_accepted = ρk >= η1 + if step_accepted + if Jx isa SparseMatrixCOO # we need to update the values of Jx in QRMumpsSolver + jac_coord_residual!(nlp, x, view(ls_subsolver.val, 1:ls_subsolver.nnzj)) + Jx.vals .= view(ls_subsolver.val, 1:ls_subsolver.nnzj) + end + # update Jx implicitly for other solvers + x .= xt + r .= rt + f = fck + # grad!(nlp, x, ∇f, r, recompute = false) + mul!(∇f, Jx', r) # ∇f = Jx' * r + set_objective!(stats, fck) + unbounded = fck < fmin + norm_∇fk = norm(∇f) + if ρk >= η2 + σk = max(σmin, γ3 * σk) + else # η1 ≤ ρk < η2 + σk = min(σmin, γ1 * σk) + end + else # η1 > ρk + σk = max(σmin, γ2 * σk) + end + + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + subtol = max(rtol, min(T(0.1), √norm_∇fk, T(0.9) * subtol)) + + solver.σ = σk + solver.subtol = subtol + set_dual_residual!(stats, norm_∇fk) + + callback(nlp, solver, stats) + + σk = solver.σ + subtol = solver.subtol + norm_∇fk = stats.dual_feas + + optimal = norm_∇fk ≤ ϵ + small_residual = 2 * √f ≤ ϵF + + if verbose > 0 && mod(stats.iter, verbose) == 0 + dir_stat = step_accepted ? "↘" : "↗" + @info log_row([ + stats.iter, + stats.objective, + norm_∇fk, + σk, + ρk, + subiter, + cp_step_log, + dir_stat, + sub_stats, + ]) + end + + if stats.status == :user + done = true + else + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + unbounded = unbounded, + small_residual = small_residual, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + end + + done = stats.status != :unknown + end + + set_solution!(stats, x) + return stats +end + +# Dispatch for KrylovWorkspace +function subsolve!( + ls_subsolver::KrylovWorkspace, + R2NLS::R2SolverNLS, + nlp, + s, + atol, + n, + m, + max_time, + subsolver_verbose, +) + krylov_solve!( + ls_subsolver, + R2NLS.Jx, + R2NLS.temp, + atol = atol, + rtol = R2NLS.subtol, + λ = √(R2NLS.σ), # λ ≥ 0 is a regularization parameter. + itmax = max(2 * (n + m), 50), + # timemax = max_time - R2SolverNLS.stats.elapsed_time, + verbose = subsolver_verbose, + ) + s .= ls_subsolver.x + return Krylov.issolved(ls_subsolver), ls_subsolver.stats.status, ls_subsolver.stats.niter +end + +# Dispatch for QRMumpsSolver +function subsolve!( + ls::QRMumpsSolver, + R2NLS::R2SolverNLS, + nlp, + s, + atol, + n, + m, + max_time, + subsolver_verbose, +) + + # 1. Update Jacobian values at the current point x + # jac_coord_residual!(nlp, R2NLS.x, view(ls.val, 1:ls.nnzj)) + + # 2. Update regularization parameter σ + sqrt_σ = sqrt(R2NLS.σ) + @inbounds for i = 1:n + ls.val[ls.nnzj + i] = sqrt_σ + end + + # 3. Build the augmented right-hand side vector: b_aug = [-F(x); 0] + ls.b_aug[1:m] .= R2NLS.temp # -F(x) + fill!(view(ls.b_aug, (m + 1):(m + n)), zero(eltype(ls.b_aug))) # we have to do this for some reason #Applying all of its Householder (or Givens) transforms to the entire RHS vector b_aug—i.e. computing QTbQTb. + # Update spmat + qrm_update!(ls.spmat, ls.val) + + # 4. Solve the least-squares system + qrm_factorize!(ls.spmat, ls.spfct; transp = 'n') + qrm_apply!(ls.spfct, ls.b_aug; transp = 't') + qrm_solve!(ls.spfct, ls.b_aug, s; transp = 'n') + + # 5. Return status. For a direct solver, we assume success. + return true, "QRMumps", 1 +end diff --git a/src/utilities.jl b/src/utilities.jl new file mode 100644 index 00000000..9b13f3b8 --- /dev/null +++ b/src/utilities.jl @@ -0,0 +1,106 @@ +import LinearAlgebra: opnorm # bring the Base name into your namespace +using GenericLinearAlgebra +using TSVD + +export opnorm +# — Top‐level dispatch — +function LinearAlgebra.opnorm(B; kwargs...) + _opnorm(B, eltype(B); kwargs...) +end + +# This method will be picked if eltype is one of the four types Arpack supports +# (Float32, Float64, ComplexF32, ComplexF64). +function _opnorm( + B, + ::Type{T}; + kwargs..., +) where {T <: Union{Float32, Float64, ComplexF32, ComplexF64}} + m, n = size(B) + return (m == n ? opnorm_eig : opnorm_svd)(B; kwargs...) +end + +# Fallback for everything else +function _opnorm(B, ::Type{T}; kwargs...) where {T} + _, s, _ = tsvd(B) + return s[1], true # return largest singular value +end + +function opnorm_eig(B; max_attempts::Int = 3) + n = size(B, 1) + # 1) tiny dense Float64: direct LAPACK + if n ≤ 5 + return maximum(abs, eigen(Matrix(B)).values), true + end + + # 2) iterative ARPACK + nev, ncv = 1, max(20, 2*1 + 1) + attempt, λ, have_eig = 0, zero(eltype(B)), false + + while !(have_eig || attempt >= max_attempts) + attempt += 1 + try + # Estimate largest eigenvalue in absolute value + d, nconv, niter, nmult, resid = + eigs(B; nev = nev, ncv = ncv, which = :LM, ritzvec = false, check = 1) + + # Check if eigenvalue has converged + have_eig = nconv == 1 + if have_eig + λ = abs(d[1]) # Take absolute value of the largest eigenvalue + break # Exit loop if successful + else + # Increase NCV for the next attempt if convergence wasn't achieved + ncv = min(2 * ncv, n) + end + catch e + if occursin("XYAUPD_Exception", string(e)) && ncv < n + @warn "Arpack error: $e. Increasing NCV to $ncv and retrying." + ncv = min(2 * ncv, n) # Increase NCV but don't exceed matrix size + else + rethrow(e) # Re-raise if it's a different error + end + end + end + + return λ, have_eig +end + +function opnorm_svd(J; max_attempts::Int = 3) + m, n = size(J) + # 1) tiny dense Float64: direct LAPACK + if min(m, n) ≤ 5 + return maximum(svd(Matrix(J)).S), true + end + + # 2) iterative ARPACK‐SVD + nsv, ncv = 1, 10 + attempt, σ, have_svd = 0, zero(eltype(J)), false + n = min(m, n) + + while !(have_svd || attempt >= max_attempts) + attempt += 1 + try + # Estimate largest singular value + s, nconv, niter, nmult, resid = svds(J; nsv = nsv, ncv = ncv, ritzvec = false, check = 1) + + # Check if singular value has converged + have_svd = nconv >= 1 + if have_svd + σ = maximum(s.S) # Take the largest singular value + break # Exit loop if successful + else + # Increase NCV for the next attempt if convergence wasn't achieved + ncv = min(2 * ncv, n) + end + catch e + if occursin("XYAUPD_Exception", string(e)) && ncv < n + @warn "Arpack error: $e. Increasing NCV to $ncv and retrying." + ncv = min(2 * ncv, n) # Increase NCV but don't exceed matrix size + else + rethrow(e) # Re-raise if it's a different error + end + end + end + + return σ, have_svd +end \ No newline at end of file diff --git a/test/allocs.jl b/test/allocs.jl index 70fbd214..b3603e81 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,17 +30,27 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in - (:LBFGSSolver, :FoSolver, :FomoSolver, :TrunkSolver, :TronSolver) + @testset "$name" for (name, symsolver) in ( + # (:R2N, :R2NSolver), + (:R2N_exact, :R2NSolver), + (:R2, :FoSolver), + (:fomo, :FomoSolver), + (:lbfgs, :LBFGSSolver), + (:tron, :TronSolver), + (:trunk, :TrunkSolver), + ) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() - if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) - if (symsolver == :FoSolver || symsolver == :FomoSolver) + if unconstrained(nlp) || (bound_constrained(nlp) && (name == :TronSolver)) + if (name == :FoSolver || name == :FomoSolver) solver = eval(symsolver)(nlp; M = 2) # nonmonotone configuration allocates extra memory + elseif name == :R2N_exact + solver = + eval(symsolver)(LBFGSModel(nlp), subsolver_type = JSOSolvers.ShiftedLBFGSSolver) else solver = eval(symsolver)(nlp) end - if symsolver == :FomoSolver + if name == :FomoSolver T = eltype(nlp.meta.x0) stats = GenericExecutionStats(nlp, solver_specific = Dict(:avgβmax => T(0))) else @@ -48,8 +58,8 @@ if Sys.isunix() end with_logger(NullLogger()) do SolverCore.solve!(solver, nlp, stats) - SolverCore.reset!(solver) - NLPModels.reset!(nlp) + reset!(solver) + reset!(nlp) al = @wrappedallocs SolverCore.solve!(solver, nlp, stats) @test al == 0 end @@ -57,11 +67,32 @@ if Sys.isunix() end end - @testset "$symsolver" for symsolver in (:TrunkSolverNLS, :TronSolverNLS) + @testset "$name" for (name, symsolver) in ( + (:TrunkSolverNLS, :TrunkSolverNLS), + (:R2SolverNLS, :R2SolverNLS), + (:R2SolverNLS_CG, :R2SolverNLS), + (:R2SolverNLS_LSQR, :R2SolverNLS), + (:R2SolverNLS_CR, :R2SolverNLS), + (:R2SolverNLS_LSMR, :R2SolverNLS), + (:R2SolverNLS_QRMumps, :R2SolverNLS), + (:TronSolverNLS, :TronSolverNLS), + ) for model in NLPModelsTest.nls_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolverNLS)) - solver = eval(symsolver)(nlp) + if name == :R2SolverNLS_CG + solver = eval(symsolver)(nlp, subsolver = :cgls) + elseif name == :R2SolverNLS_LSQR + solver = eval(symsolver)(nlp, subsolver = :lsqr) + elseif name == :R2SolverNLS_CR + solver = eval(symsolver)(nlp, subsolver = :crls) + elseif name == :R2SolverNLS_LSMR + solver = eval(symsolver)(nlp, subsolver = :lsmr) + elseif name == :R2SolverNLS_QRMumps + solver = eval(symsolver)(nlp, subsolver = :qrmumps) + else + solver = eval(symsolver)(nlp) + end stats = GenericExecutionStats(nlp) with_logger(NullLogger()) do SolverCore.solve!(solver, nlp, stats) diff --git a/test/callback.jl b/test/callback.jl index ddadc799..c4e5144c 100644 --- a/test/callback.jl +++ b/test/callback.jl @@ -36,6 +36,11 @@ using ADNLPModels, JSOSolvers, LinearAlgebra, Logging #, Plots fomo(nlp, callback = cb) end @test stats.iter == 8 + + stats = with_logger(NullLogger()) do + R2N(nlp, callback = cb) + end + @test stats.iter == 8 end @testset "Test callback for NLS" begin @@ -56,6 +61,11 @@ end tron(nls, callback = cb) end @test stats.iter == 8 + + stats = with_logger(NullLogger()) do + R2NLS(nls, callback = cb) + end + @test stats.iter == 8 end @testset "Testing Solver Values" begin diff --git a/test/consistency.jl b/test/consistency.jl index 8ec758fa..37e46c3e 100644 --- a/test/consistency.jl +++ b/test/consistency.jl @@ -10,53 +10,85 @@ function consistency() @testset "Consistency" begin args = Pair{Symbol, Number}[:atol => 1e-6, :rtol => 1e-6, :max_eval => 20000, :max_time => 60.0] - @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] + @testset "NLP with $mtd" for (mtd, solver) in [ + ("trunk", trunk), + ("lbfgs", lbfgs), + ("tron", tron), + ("R2", R2), + # ("R2N", R2N), + ( + "R2N_exact", + (nlp; kwargs...) -> + R2N(LBFGSModel(nlp), subsolver_type = JSOSolvers.ShiftedLBFGSSolver; kwargs...), + ), + ("fomo", fomo), + ] with_logger(NullLogger()) do - NLPModels.reset!(unlp) - stats = mtd(unlp; args...) + reset!(unlp) + stats = solver(unlp; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order - NLPModels.reset!(unlp) - stats = mtd(unlp; max_eval = 1) + reset!(unlp) + stats = solver(unlp; max_eval = 1) @test stats.status == :max_eval slow_nlp = ADNLPModel(x -> begin sleep(0.1) f(x) end, unlp.meta.x0) - stats = mtd(slow_nlp; max_time = 0.0) + stats = solver(slow_nlp; max_time = 0.0) @test stats.status == :max_time end end - @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] + @testset "Quasi-Newton NLP with $mtd" for (mtd, solver) in [ + ("trunk", trunk), + ("lbfgs", lbfgs), + ("tron", tron), + ("R2", R2), + # ("R2N", R2N), + ( + "R2N_exact", + (nlp; kwargs...) -> + R2N(LBFGSModel(nlp), subsolver_type = JSOSolvers.ShiftedLBFGSSolver; kwargs...), + ), + ("fomo", fomo), + ] with_logger(NullLogger()) do - NLPModels.reset!(qnlp) - stats = mtd(qnlp; args...) + reset!(qnlp) + stats = solver(qnlp; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order end end - @testset "NLS with $mtd" for mtd in [trunk] + @testset "NLS with $mtd" for (mtd, solver) in [ + ("trunk", trunk), + ("R2NLS", (unls; kwargs...) -> R2NLS(unls; kwargs...)), + ("R2NLS_CGLS", (unls; kwargs...) -> R2NLS(unls, subsolver = :cgls; kwargs...)), + ("R2NLS_LSQR", (unls; kwargs...) -> R2NLS(unls, subsolver = :lsqr; kwargs...)), + ("R2NLS_CRLS", (unls; kwargs...) -> R2NLS(unls, subsolver = :lsqr; kwargs...)), + ("R2NLS_LSMR", (unls; kwargs...) -> R2NLS(unls, subsolver = :lsmr; kwargs...)), + ("R2NLS_QRMumps", (unls; kwargs...) -> R2NLS(unls, subsolver = :qrmumps; kwargs...)), + ] with_logger(NullLogger()) do - stats = mtd(unls; args...) + stats = solver(unls; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order NLPModels.reset!(unls) - stats = mtd(unls; max_eval = 1) + stats = solver(unls; max_eval = 1) @test stats.status == :max_eval slow_nls = ADNLSModel(x -> begin sleep(0.1) F(x) end, unls.meta.x0, nls_meta(unls).nequ) - stats = mtd(slow_nls; max_time = 0.0) + stats = solver(slow_nls; max_time = 0.0) @test stats.status == :max_time end end - @testset "Quasi-Newton NLS with $mtd" for mtd in [trunk] + @testset "Quasi-Newton NLS with $mtd" for (mtd, solver) in [("trunk", trunk)] with_logger(NullLogger()) do - stats = mtd(qnls; args...) + stats = solver(qnls; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order end diff --git a/test/restart.jl b/test/restart.jl index 38765465..a3857f12 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,5 +1,6 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( (:R2, :FoSolver), + (:R2N_exact, :R2NSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), @@ -9,8 +10,13 @@ nlp = ADNLPModel(f, [-1.2; 1.0]) stats = GenericExecutionStats(nlp) - solver = eval(s)(nlp) - stats = SolverCore.solve!(solver, nlp, stats) + if fun == :R2N_exact + nlp = LBFGSModel(nlp) + solver = eval(s)(nlp,subsolver_type = JSOSolvers.ShiftedLBFGSSolver) + else + solver = eval(s)(nlp) + end + stats = SolverCore.solve!(solver, nlp, stats) @test stats.status == :first_order @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) @@ -25,12 +31,30 @@ end @testset "Test restart NLS with a different initial guess: $fun" for (fun, s) in ( (:tron, :TronSolverNLS), (:trunk, :TrunkSolverNLS), + (:R2SolverNLS, :R2SolverNLS), + (:R2SolverNLS_CG, :R2SolverNLS), + (:R2SolverNLS_LSQR, :R2SolverNLS), + (:R2SolverNLS_CR, :R2SolverNLS), + (:R2SolverNLS_LSMR, :R2SolverNLS), + (:R2SolverNLS_QRMumps, :R2SolverNLS), ) F(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] nlp = ADNLSModel(F, [-1.2; 1.0], 2) stats = GenericExecutionStats(nlp) - solver = eval(s)(nlp) + if fun == :R2SolverNLS_CG + solver = eval(s)(nlp, subsolver = :cgls) + elseif fun == :R2SolverNLS_LSQR + solver = eval(s)(nlp, subsolver = :lsqr) + elseif fun == :R2SolverNLS_CR + solver = eval(s)(nlp, subsolver = :crls) + elseif fun == :R2SolverNLS_LSMR + solver = eval(s)(nlp, subsolver = :lsmr) + elseif fun == :R2SolverNLS_QRMumps + solver = eval(s)(nlp, subsolver = :qrmumps) + else + solver = eval(s)(nlp) + end stats = SolverCore.solve!(solver, nlp, stats) @test stats.status == :first_order @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) @@ -45,6 +69,7 @@ end @testset "Test restart with a different problem: $fun" for (fun, s) in ( (:R2, :FoSolver), + (:R2N_exact, :R2NSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), @@ -54,7 +79,12 @@ end nlp = ADNLPModel(f, [-1.2; 1.0]) stats = GenericExecutionStats(nlp) - solver = eval(s)(nlp) + if fun == :R2N_exact + nlp = LBFGSModel(nlp) + solver = eval(s)(nlp,subsolver_type = JSOSolvers.ShiftedLBFGSSolver) + else + solver = eval(s)(nlp) + end stats = SolverCore.solve!(solver, nlp, stats) @test stats.status == :first_order @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) @@ -71,12 +101,30 @@ end @testset "Test restart NLS with a different problem: $fun" for (fun, s) in ( (:tron, :TronSolverNLS), (:trunk, :TrunkSolverNLS), + (:R2SolverNLS, :R2SolverNLS), + (:R2SolverNLS_CG, :R2SolverNLS), + (:R2SolverNLS_LSQR, :R2SolverNLS), + (:R2SolverNLS_CR, :R2SolverNLS), + (:R2SolverNLS_LSMR, :R2SolverNLS), + (:R2SolverNLS_QRMumps, :R2SolverNLS), ) F(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] nlp = ADNLSModel(F, [-1.2; 1.0], 2) stats = GenericExecutionStats(nlp) - solver = eval(s)(nlp) + if fun == :R2SolverNLS_CG + solver = eval(s)(nlp, subsolver = :cgls) + elseif fun == :R2SolverNLS_LSQR + solver = eval(s)(nlp, subsolver = :lsqr) + elseif fun == :R2SolverNLS_CR + solver = eval(s)(nlp, subsolver = :crls) + elseif fun == :R2SolverNLS_LSMR + solver = eval(s)(nlp, subsolver = :lsmr) + elseif fun == :R2SolverNLS_QRMumps + solver = eval(s)(nlp, subsolver = :qrmumps) + else + solver = eval(s)(nlp) + end stats = SolverCore.solve!(solver, nlp, stats) @test stats.status == :first_order @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) diff --git a/test/runtests.jl b/test/runtests.jl index e2036e39..efca77fd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,13 +1,15 @@ # stdlib -using Printf, LinearAlgebra, Logging, SparseArrays, Test +using Printf, LinearAlgebra, Logging, SparseArrays, Test, Arpack # additional packages -using ADNLPModels, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverTools +using ADNLPModels, Krylov, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverTools using NLPModelsTest # this package using JSOSolvers +include("test_Utilities.jl") + @testset "Test small residual checks $solver" for solver in (:TrunkSolverNLS, :TronSolverNLS) nls = ADNLSModel(x -> [x[1] - 1; sin(x[2])], [-1.2; 1.0], 2) stats = GenericExecutionStats(nls) @@ -18,37 +20,53 @@ using JSOSolvers end @testset "Test iteration limit" begin - @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk) + @testset "$name" for (name, solver) in [ + ("trunk", trunk), + ("lbfgs", lbfgs), + ("tron", tron), + ("R2", R2), + # ("R2N", R2N), + ("R2N_exact", (nlp; kwargs...) -> R2N(LBFGSModel(nlp), subsolver_type = JSOSolvers.ShiftedLBFGSSolver; kwargs...)), + ("fomo", fomo), + ] f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 nlp = ADNLPModel(f, [-1.2; 1.0]) - - stats = eval(fun)(nlp, max_iter = 1) + stats = eval(solver)(nlp, max_iter = 1) + stats = solver(nlp, max_iter = 1) @test stats.status == :max_iter end + @testset "$(fun)-NLS" for fun in (tron, trunk) f(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] nlp = ADNLSModel(f, [-1.2; 1.0], 2) - stats = eval(fun)(nlp, max_iter = 1) @test stats.status == :max_iter end end @testset "Test unbounded below" begin - @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk) + @testset "$name" for (name, solver) in [ + ("trunk", trunk), + ("lbfgs", lbfgs), + ("tron", tron), + ("R2", R2), + # ("R2N", R2N), + ("R2N_exact", (nlp; kwargs...) -> R2N(LBFGSModel(nlp), subsolver_type = JSOSolvers.ShiftedLBFGSSolver; kwargs...)), + ("fomo", fomo), + ] T = Float64 x0 = [T(0)] f(x) = -exp(x[1]) nlp = ADNLPModel(f, x0) - - stats = eval(fun)(nlp) + stats = solver(nlp) @test stats.status == :unbounded @test stats.objective < -one(T) / eps(T) end end include("restart.jl") +include("test_edge_cases.jl") include("callback.jl") include("consistency.jl") include("test_solvers.jl") diff --git a/test/test_edge_cases.jl b/test/test_edge_cases.jl new file mode 100644 index 00000000..92da9c65 --- /dev/null +++ b/test/test_edge_cases.jl @@ -0,0 +1,64 @@ +# 1. Test error for constrained problems +@testset "Constrained Problems Error" begin + f(x) = (x[1] - 1.0)^2 + 100 * (x[2] - x[1]^2)^2 + x0 = [-1.2; 1.0] + lvar = [-Inf; 0.1] + uvar = [0.5; 0.5] + c(x) = [x[1] + x[2] - 2; x[1]^2 + x[2]^2] + lcon = [0.0; -Inf] + ucon = [Inf; 1.0] + nlp_constrained = ADNLPModel(f, x0, lvar, uvar, c, lcon, ucon) + + solver = R2NSolver(nlp_constrained) + stats = GenericExecutionStats(nlp_constrained) + + @test_throws ErrorException begin + SolverCore.solve!(solver, nlp_constrained, stats) + end +end + +# 2. Test error when non_mono_size < 1 +@testset "non_mono_size < 1 Error" begin + f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 + nlp = ADNLPModel(f, [-1.2; 1.0]) + + @test_throws ErrorException begin + R2NSolver(nlp; non_mono_size = 0) + end + @test_throws ErrorException begin + R2N(nlp; non_mono_size = 0) + end + + @test_throws ErrorException begin + R2NSolver(nlp; non_mono_size = -1) + end + @test_throws ErrorException begin + R2N(nlp; non_mono_size = -1) + end +end + +# 3. Test error when subsolver_type is ShiftedLBFGSSolver but nlp is not of type LBFGSModel +@testset "ShiftedLBFGSSolver with wrong nlp type Error" begin + f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 + nlp = ADNLPModel(f, [-1.2; 1.0]) + + @test_throws ErrorException begin + R2NSolver(nlp; subsolver_type = ShiftedLBFGSSolver) + end + @test_throws ErrorException begin + R2N(nlp; subsolver_type = ShiftedLBFGSSolver) + end +end + +# 4. Test error when subsolver_type is not a subtype of R2N_allowed_subsolvers +@testset "Invalid subsolver type Error" begin + f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 + nlp = ADNLPModel(f, [-1.2; 1.0]) + + @test_throws ErrorException begin + R2NSolver(nlp; subsolver_type = CgSolver) + end + @test_throws ErrorException begin + R2N(nlp; subsolver_type = CgSolver) + end +end \ No newline at end of file diff --git a/test/test_solvers.jl b/test/test_solvers.jl index 30216e4e..1bd67790 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -8,6 +8,7 @@ function tests() ("lbfgs", lbfgs), ("tron", tron), ("R2", R2), + ("R2N_exact", (nlp; kwargs...) -> R2N(LBFGSModel(nlp), subsolver_type = JSOSolvers.ShiftedLBFGSSolver; kwargs...)), ("fomo_r2", fomo), ("fomo_tr", (nlp; kwargs...) -> fomo(nlp, step_backend = JSOSolvers.tr_step(); kwargs...)), ] @@ -41,6 +42,12 @@ function tests() ("trunk full Hessian", (nls; kwargs...) -> trunk(nls, variant = :Newton; kwargs...)), ("tron+cgls", (nls; kwargs...) -> tron(nls, subsolver = :cgls; kwargs...)), ("tron full Hessian", (nls; kwargs...) -> tron(nls, variant = :Newton; kwargs...)), + ("R2NLS", (unls; kwargs...) -> R2NLS(unls; kwargs...)), + ("R2NLS_CGLS", (unls; kwargs...) -> R2NLS(unls, subsolver = :cgls; kwargs...)), + ("R2NLS_LSQR", (unls; kwargs...) -> R2NLS(unls, subsolver = :lsqr; kwargs...)), + ("R2NLS_CRLS", (unls; kwargs...) -> R2NLS(unls, subsolver = :lsqr; kwargs...)), + ("R2NLS_LSMR", (unls; kwargs...) -> R2NLS(unls, subsolver = :lsmr; kwargs...)), + # ("R2NLS_QRMumps", (unls; kwargs...) -> R2NLS(unls, subsolver = :qrmumps; kwargs...)), #TODO SolverTEST be able to turn off or on tests ] unconstrained_nls(solver) multiprecision_nls(solver, :unc) diff --git a/test/test_utilities.jl b/test/test_utilities.jl new file mode 100644 index 00000000..2ce1f999 --- /dev/null +++ b/test/test_utilities.jl @@ -0,0 +1,29 @@ +@testset "opnorm and TSVD tests (LinearOperators)" begin + # 1) Square Float64 via direct LAPACK or ARPACK + A_mat = [2.0 0.0; 0.0 -1.0] + A_op = LinearOperator(A_mat) + λ, ok = opnorm(A_op) + @test ok == true + @test isapprox(λ, 2.0; rtol=1e-12) + + # 2) Rectangular Float64 via direct LAPACK or ARPACK SVD + J_mat = [3.0 0.0 0.0; 0.0 1.0 0.0] + J_op = LinearOperator(J_mat) + σ, ok_sv = opnorm(J_op) + @test ok_sv == true + @test isapprox(σ, 3.0; rtol=1e-12) + + # 3) Square BigFloat via TSVD + B_mat = Matrix{BigFloat}([2.0 0.0; 0.0 -1.0]) + B_op = LinearOperator(B_mat) + λ_bf, ok_bf = opnorm(B_op) + @test ok_bf == true + @test isapprox(λ_bf, BigFloat(2); rtol=1e-12) + + # 4) Rectangular BigFloat via rectangular TSVD + JR_mat = Matrix{BigFloat}([3.0 0.0 0.0; 0.0 1.0 0.0]) + JR_op = LinearOperator(JR_mat) + σ_bf, ok_bf2 = opnorm(JR_op) + @test ok_bf2 == true + @test isapprox(σ_bf, BigFloat(3); rtol=1e-12) +end