diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index b7eb25c1a..14c8dcd4d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -19,6 +19,7 @@ jobs: matrix: group: - Core + - OptimizationBase - OptimizationBBO - OptimizationCMAEvolutionStrategy - OptimizationEvolutionary @@ -66,7 +67,7 @@ jobs: GROUP: ${{ matrix.group }} - uses: julia-actions/julia-processcoverage@v1 with: - directories: src,lib/OptimizationBBO/src,lib/OptimizationCMAEvolutionStrategy/src,lib/OptimizationEvolutionary/src,lib/OptimizationGCMAES/src,lib/OptimizationManopt/src,lib/OptimizationMOI/src,lib/OptimizationMetaheuristics/src,lib/OptimizationMultistartOptimization/src,lib/OptimizationNLopt/src,lib/OptimizationNOMAD/src,lib/OptimizationOptimJL/src,lib/OptimizationOptimisers/src,lib/OptimizationPolyalgorithms/src,lib/OptimizationQuadDIRECT/src,lib/OptimizationSpeedMapping/src + directories: src,lib/OptimizationBase/src,lib/OptimizationBBO/src,lib/OptimizationCMAEvolutionStrategy/src,lib/OptimizationEvolutionary/src,lib/OptimizationGCMAES/src,lib/OptimizationManopt/src,lib/OptimizationMOI/src,lib/OptimizationMetaheuristics/src,lib/OptimizationMultistartOptimization/src,lib/OptimizationNLopt/src,lib/OptimizationNOMAD/src,lib/OptimizationOptimJL/src,lib/OptimizationOptimisers/src,lib/OptimizationPolyalgorithms/src,lib/OptimizationQuadDIRECT/src,lib/OptimizationSpeedMapping/src - uses: codecov/codecov-action@v5 with: file: lcov.info diff --git a/Project.toml b/Project.toml index 67f39e158..c77373cfe 100644 --- a/Project.toml +++ b/Project.toml @@ -6,17 +6,23 @@ version = "4.4.0" ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" ConsoleProgressMonitor = "88cd18e8-d9cc-4ea6-8889-5259c0d15c8b" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" LBFGSB = "5be7bae1-8223-5378-bac3-9e7378a2f6e6" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" OptimizationBase = "bca83a33-5cc9-4baa-983d-23429ab6bcbb" +PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" +SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" TerminalLoggers = "5d786b92-1e48-4d6f-9151-6b4477ca9bed" [compat] @@ -28,8 +34,10 @@ Boltz = "1" ComponentArrays = ">= 0.13.9" ConsoleProgressMonitor = "0.1.1" DiffEqFlux = "2, 3, 4" +DifferentiationInterface = "0.7" DocStringExtensions = "0.9" Enzyme = "0.13" +FastClosures = "0.3" FiniteDiff = "2" Flux = "0.13, 0.14, 0.15, 0.16" ForwardDiff = "0.10, 1" @@ -43,27 +51,29 @@ Lux = "1.12.4" MLUtils = "0.4" ModelingToolkit = "9" Optim = ">= 1.4.1" -OptimizationBase = "2" +Optimisers = ">= 0.2.5" OptimizationMOI = "0.5" OptimizationOptimJL = "0.4" OptimizationOptimisers = "0.3" OrdinaryDiffEqTsit5 = "1" +PDMats = "0.11" Pkg = "1" Printf = "1.10" ProgressLogging = "0.1" -Random = "1.10" +Random = "1.10" Reexport = "1.2" ReverseDiff = "1" SafeTestsets = "0.1" SciMLBase = "2.39.0" SciMLSensitivity = "7" SparseArrays = "1.10" +SparseConnectivityTracer = "0.6, 1" SparseDiffTools = "2" +SparseMatrixColorings = "0.4" Symbolics = "6" TerminalLoggers = "0.1" Test = "1.10" Tracker = "0.2" -Optimisers = ">= 0.2.5" Zygote = "0.6, 0.7" julia = "1.10" @@ -101,7 +111,4 @@ Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Aqua", "BenchmarkTools", "Boltz", "ComponentArrays", "DiffEqFlux", "Enzyme", "FiniteDiff", "Flux", "ForwardDiff", - "Ipopt", "IterTools", "Lux", "MLUtils", "ModelingToolkit", "Optim", "OptimizationMOI", "OptimizationOptimJL", "OptimizationOptimisers", - "OrdinaryDiffEqTsit5", "Pkg", "Random", "ReverseDiff", "SafeTestsets", "SciMLSensitivity", "SparseArrays", "SparseDiffTools", - "Symbolics", "Test", "Tracker", "Zygote"] +test = ["Aqua", "BenchmarkTools", "Boltz", "ComponentArrays", "DiffEqFlux", "Enzyme", "FiniteDiff", "Flux", "ForwardDiff", "Ipopt", "IterTools", "Lux", "MLUtils", "ModelingToolkit", "Optim", "OptimizationMOI", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEqTsit5", "Pkg", "Random", "ReverseDiff", "SafeTestsets", "SciMLSensitivity", "SparseArrays", "SparseDiffTools", "Symbolics", "Test", "Tracker", "Zygote"] diff --git a/lib/OptimizationBase/LICENSE b/lib/OptimizationBase/LICENSE new file mode 100644 index 000000000..5056c1c66 --- /dev/null +++ b/lib/OptimizationBase/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Vaibhav Dixit and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/OptimizationBase/Project.toml b/lib/OptimizationBase/Project.toml new file mode 100644 index 000000000..5c81f3e6f --- /dev/null +++ b/lib/OptimizationBase/Project.toml @@ -0,0 +1,69 @@ +name = "OptimizationBase" +uuid = "bca83a33-5cc9-4baa-983d-23429ab6bcbb" +authors = ["Vaibhav Dixit and contributors"] +version = "2.10.0" + +[deps] +ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" +DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" +SparseMatrixColorings = "0a514795-09f3-496d-8182-132a7b665d35" + +[weakdeps] +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" +FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" +MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +SymbolicAnalysis = "4297ee4d-0239-47d8-ba5d-195ecdf594fe" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[extensions] +OptimizationEnzymeExt = "Enzyme" +OptimizationFiniteDiffExt = "FiniteDiff" +OptimizationForwardDiffExt = "ForwardDiff" +OptimizationMLDataDevicesExt = "MLDataDevices" +OptimizationMLUtilsExt = "MLUtils" +OptimizationMTKExt = "ModelingToolkit" +OptimizationReverseDiffExt = "ReverseDiff" +OptimizationSymbolicAnalysisExt = "SymbolicAnalysis" +OptimizationZygoteExt = "Zygote" + +[compat] +ADTypes = "1.9" +ArrayInterface = "7.6" +DifferentiationInterface = "0.7" +DocStringExtensions = "0.9" +Enzyme = "0.13.2" +FastClosures = "0.3" +FiniteDiff = "2.12" +ForwardDiff = "0.10.26, 1" +LinearAlgebra = "1.9, 1.10" +MLDataDevices = "1" +MLUtils = "0.4" +ModelingToolkit = "9, 10" +PDMats = "0.11" +Reexport = "1.2" +ReverseDiff = "1.14" +SciMLBase = "2" +SparseConnectivityTracer = "0.6, 1" +SparseMatrixColorings = "0.4" +SymbolicAnalysis = "0.3" +Zygote = "0.6.67, 0.7" +julia = "1.10" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test"] diff --git a/lib/OptimizationBase/ext/OptimizationEnzymeExt.jl b/lib/OptimizationBase/ext/OptimizationEnzymeExt.jl new file mode 100644 index 000000000..d53b4495f --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationEnzymeExt.jl @@ -0,0 +1,731 @@ +module OptimizationEnzymeExt + +import OptimizationBase, OptimizationBase.ArrayInterface +import OptimizationBase.SciMLBase: OptimizationFunction +import OptimizationBase.SciMLBase +import OptimizationBase.LinearAlgebra: I, dot +import OptimizationBase.ADTypes: AutoEnzyme +using Enzyme +using Core: Vararg + +@inline function firstapply(f::F, θ, p) where {F} + res = f(θ, p) + if isa(res, AbstractFloat) + res + else + first(res) + end +end + +function inner_grad(mode::Mode, θ, bθ, f, p) where {Mode} + Enzyme.autodiff(mode, + Const(firstapply), + Active, + Const(f), + Enzyme.Duplicated(θ, bθ), + Const(p) + ) + return nothing +end + +function hv_f2_alloc(mode::Mode, x, f, p) where {Mode} + dx = Enzyme.make_zero(x) + Enzyme.autodiff(mode, + Const(firstapply), + Active, + Const(f), + Enzyme.Duplicated(x, dx), + Const(p) + ) + return dx +end + +function inner_cons(x, fcons::Function, p::Union{SciMLBase.NullParameters, Nothing}, + num_cons::Int, i::Int) + res = zeros(eltype(x), num_cons) + fcons(res, x, p) + return res[i] +end + +function cons_f2(mode, x, dx, fcons, p, num_cons, i) + Enzyme.autodiff_deferred( + mode, Const(inner_cons), Active, Enzyme.Duplicated(x, dx), + Const(fcons), Const(p), Const(num_cons), Const(i)) + return nothing +end + +function inner_cons_oop( + x::Vector{T}, fcons::Function, p::Union{SciMLBase.NullParameters, Nothing}, + i::Int) where {T} + return fcons(x, p)[i] +end + +function cons_f2_oop(mode, x, dx, fcons, p, i) + Enzyme.autodiff_deferred( + mode, Const(inner_cons_oop), Active, Enzyme.Duplicated(x, dx), + Const(fcons), Const(p), Const(i)) + return nothing +end + +function lagrangian(x, _f::Function, cons::Function, p, λ, σ = one(eltype(x)))::Float64 + res = zeros(eltype(x), length(λ)) + cons(res, x, p) + return σ * _f(x, p) + dot(λ, res) +end + +function lag_grad(mode, x, dx, lagrangian::Function, _f::Function, cons::Function, p, σ, λ) + Enzyme.autodiff_deferred( + mode, Const(lagrangian), Active, Enzyme.Duplicated(x, dx), + Const(_f), Const(cons), Const(p), Const(λ), Const(σ)) + return nothing +end + +function set_runtime_activity2( + a::Mode1, ::Enzyme.Mode{ABI, Err, RTA}) where {Mode1, ABI, Err, RTA} + Enzyme.set_runtime_activity(a, RTA) +end +function_annotation(::Nothing) = Nothing +function_annotation(::AutoEnzyme{<:Any, A}) where A = A +function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, x, + adtype::AutoEnzyme, p, num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + rmode = if adtype.mode isa Nothing + Enzyme.Reverse + else + set_runtime_activity2(Enzyme.Reverse, adtype.mode) + end + + fmode = if adtype.mode isa Nothing + Enzyme.Forward + else + set_runtime_activity2(Enzyme.Forward, adtype.mode) + end + + func_annot = function_annotation(adtype) + + if g == true && f.grad === nothing + function grad(res, θ, p = p) + Enzyme.make_zero!(res) + Enzyme.autodiff(rmode, + Const(firstapply), + Active, + Const(f.f), + Enzyme.Duplicated(θ, res), + Const(p) + ) + end + elseif g == true + grad = (G, θ, p = p) -> f.grad(G, θ, p) + else + grad = nothing + end + + if fg == true && f.fg === nothing + function fg!(res, θ, p = p) + Enzyme.make_zero!(res) + y = Enzyme.autodiff(WithPrimal(rmode), + Const(firstapply), + Active, + Const(f.f), + Enzyme.Duplicated(θ, res), + Const(p) + )[2] + return y + end + elseif fg == true + fg! = (res, θ, p = p) -> f.fg(res, θ, p) + else + fg! = nothing + end + + if h == true && f.hess === nothing + vdθ = Tuple((Array(r) for r in eachrow(I(length(x)) * one(eltype(x))))) + bθ = zeros(eltype(x), length(x)) + + if f.hess_prototype === nothing + vdbθ = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x)) + else + #useless right now, looks like there is no way to tell Enzyme the sparsity pattern? + vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype))) + end + + function hess(res, θ, p = p) + Enzyme.make_zero!(bθ) + Enzyme.make_zero!.(vdbθ) + + Enzyme.autodiff(fmode, + inner_grad, + Const(rmode), + Enzyme.BatchDuplicated(θ, vdθ), + Enzyme.BatchDuplicatedNoNeed(bθ, vdbθ), + Const(f.f), + Const(p) + ) + + for i in eachindex(θ) + res[i, :] .= vdbθ[i] + end + end + elseif h == true + hess = (H, θ, p = p) -> f.hess(H, θ, p) + else + hess = nothing + end + + if fgh == true && f.fgh === nothing + function fgh!(G, H, θ, p = p) + vdθ = Tuple((Array(r) for r in eachrow(I(length(θ)) * one(eltype(θ))))) + vdbθ = Tuple(zeros(eltype(θ), length(θ)) for i in eachindex(θ)) + + Enzyme.autodiff(fmode, + inner_grad, + Const(rmode), + Enzyme.BatchDuplicated(θ, vdθ), + Enzyme.BatchDuplicatedNoNeed(G, vdbθ), + Const(f.f), + Const(p) + ) + + for i in eachindex(θ) + H[i, :] .= vdbθ[i] + end + end + elseif fgh == true + fgh! = (G, H, θ, p = p) -> f.fgh(G, H, θ, p) + else + fgh! = nothing + end + + if hv == true && f.hv === nothing + function hv!(H, θ, v, p = p) + H .= Enzyme.autodiff( + fmode, hv_f2_alloc, Const(rmode), Duplicated(θ, v), + Const(f.f), Const(p) + )[1] + end + elseif hv == true + hv! = (H, θ, v, p = p) -> f.hv(H, θ, v, p) + else + hv! = nothing + end + + if f.cons === nothing + cons = nothing + else + cons = (res, θ) -> f.cons(res, θ, p) + end + + if cons !== nothing && cons_j == true && f.cons_j === nothing + # if num_cons > length(x) + seeds = Enzyme.onehot(x) + Jaccache = Tuple(zeros(eltype(x), num_cons) for i in 1:length(x)) + basefunc = f.cons + if func_annot <: Enzyme.Const + basefunc = Enzyme.Const(basefunc) + elseif func_annot <: Enzyme.Duplicated || func_annot <: Enzyme.BatchDuplicated + basefunc = Enzyme.BatchDuplicated(basefunc, Tuple(make_zero(basefunc) for i in 1:length(x))) + elseif func_annot <: Enzyme.DuplicatedNoNeed || func_annot <: Enzyme.BatchDuplicatedNoNeed + basefunc = Enzyme.BatchDuplicatedNoNeed(basefunc, Tuple(make_zero(basefunc) for i in 1:length(x))) + end + # else + # seeds = Enzyme.onehot(zeros(eltype(x), num_cons)) + # Jaccache = Tuple(zero(x) for i in 1:num_cons) + # end + + y = zeros(eltype(x), num_cons) + + function cons_j!(J, θ) + for jc in Jaccache + Enzyme.make_zero!(jc) + end + Enzyme.make_zero!(y) + if func_annot <: Enzyme.Duplicated || func_annot <: Enzyme.BatchDuplicated || func_annot <: Enzyme.DuplicatedNoNeed || func_annot <: Enzyme.BatchDuplicatedNoNeed + for bf in basefunc.dval + Enzyme.make_zero!(bf) + end + end + Enzyme.autodiff(fmode, basefunc , BatchDuplicated(y, Jaccache), + BatchDuplicated(θ, seeds), Const(p)) + for i in eachindex(θ) + if J isa Vector + J[i] = Jaccache[i][1] + else + copyto!(@view(J[:, i]), Jaccache[i]) + end + end + # else + # Enzyme.autodiff(Enzyme.Reverse, f.cons, BatchDuplicated(y, seeds), + # BatchDuplicated(θ, Jaccache), Const(p)) + # for i in 1:num_cons + # if J isa Vector + # J .= Jaccache[1] + # else + # J[i, :] = Jaccache[i] + # end + # end + # end + end + elseif cons_j == true && cons !== nothing + cons_j! = (J, θ) -> f.cons_j(J, θ, p) + else + cons_j! = nothing + end + + if cons !== nothing && cons_vjp == true && f.cons_vjp === nothing + cons_res = zeros(eltype(x), num_cons) + function cons_vjp!(res, θ, v) + Enzyme.make_zero!(res) + Enzyme.make_zero!(cons_res) + + Enzyme.autodiff(rmode, + f.cons, + Const, + Duplicated(cons_res, v), + Duplicated(θ, res), + Const(p) + ) + end + elseif cons_vjp == true && cons !== nothing + cons_vjp! = (Jv, θ, σ) -> f.cons_vjp(Jv, θ, σ, p) + else + cons_vjp! = nothing + end + + if cons !== nothing && cons_jvp == true && f.cons_jvp === nothing + cons_res = zeros(eltype(x), num_cons) + + function cons_jvp!(res, θ, v) + Enzyme.make_zero!(res) + Enzyme.make_zero!(cons_res) + + Enzyme.autodiff(fmode, + f.cons, + Duplicated(cons_res, res), + Duplicated(θ, v), + Const(p) + ) + end + elseif cons_jvp == true && cons !== nothing + cons_jvp! = (Jv, θ, v) -> f.cons_jvp(Jv, θ, v, p) + else + cons_jvp! = nothing + end + + if cons !== nothing && cons_h == true && f.cons_h === nothing + cons_vdθ = Tuple((Array(r) for r in eachrow(I(length(x)) * one(eltype(x))))) + cons_bθ = zeros(eltype(x), length(x)) + cons_vdbθ = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x)) + + function cons_h!(res, θ) + for i in 1:num_cons + Enzyme.make_zero!(cons_bθ) + Enzyme.make_zero!.(cons_vdbθ) + Enzyme.autodiff(fmode, + cons_f2, + Const(rmode), + Enzyme.BatchDuplicated(θ, cons_vdθ), + Enzyme.BatchDuplicated(cons_bθ, cons_vdbθ), + Const(f.cons), + Const(p), + Const(num_cons), + Const(i)) + + for j in eachindex(θ) + res[i][j, :] .= cons_vdbθ[j] + end + end + end + elseif cons !== nothing && cons_h == true + cons_h! = (res, θ) -> f.cons_h(res, θ, p) + else + cons_h! = nothing + end + + if lag_h == true && f.lag_h === nothing && cons !== nothing + lag_vdθ = Tuple((Array(r) for r in eachrow(I(length(x)) * one(eltype(x))))) + lag_bθ = zeros(eltype(x), length(x)) + + if f.hess_prototype === nothing + lag_vdbθ = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x)) + else + #useless right now, looks like there is no way to tell Enzyme the sparsity pattern? + lag_vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype))) + end + + function lag_h!(h, θ, σ, μ, p = p) + Enzyme.make_zero!(lag_bθ) + Enzyme.make_zero!.(lag_vdbθ) + + Enzyme.autodiff(fmode, + lag_grad, + Const(rmode), + Enzyme.BatchDuplicated(θ, lag_vdθ), + Enzyme.BatchDuplicatedNoNeed(lag_bθ, lag_vdbθ), + Const(lagrangian), + Const(f.f), + Const(f.cons), + Const(p), + Const(σ), + Const(μ) + ) + k = 0 + + for i in eachindex(θ) + vec_lagv = lag_vdbθ[i] + h[(k + 1):(k + i)] .= @view(vec_lagv[1:i]) + k += i + end + end + + function lag_h!(H::AbstractMatrix, θ, σ, μ, p = p) + Enzyme.make_zero!(H) + Enzyme.make_zero!(lag_bθ) + Enzyme.make_zero!.(lag_vdbθ) + + Enzyme.autodiff(fmode, + lag_grad, + Const(rmode), + Enzyme.BatchDuplicated(θ, lag_vdθ), + Enzyme.BatchDuplicatedNoNeed(lag_bθ, lag_vdbθ), + Const(lagrangian), + Const(f.f), + Const(f.cons), + Const(p), + Const(σ), + Const(μ) + ) + + for i in eachindex(θ) + H[i, :] .= lag_vdbθ[i] + end + end + elseif lag_h == true && cons !== nothing + lag_h! = (θ, σ, μ, p = p) -> f.lag_h(θ, σ, μ, p) + else + lag_h! = nothing + end + + return OptimizationFunction{true}(f.f, adtype; + grad = grad, fg = fg!, fgh = fgh!, + hess = hess, hv = hv!, + cons = cons, cons_j = cons_j!, + cons_jvp = cons_jvp!, cons_vjp = cons_vjp!, + cons_h = cons_h!, + hess_prototype = f.hess_prototype, + cons_jac_prototype = f.cons_jac_prototype, + cons_hess_prototype = f.cons_hess_prototype, + lag_h = lag_h!, + lag_hess_prototype = f.lag_hess_prototype, + sys = f.sys, + expr = f.expr, + cons_expr = f.cons_expr) +end + +function OptimizationBase.instantiate_function(f::OptimizationFunction{true}, + cache::OptimizationBase.ReInitCache, + adtype::AutoEnzyme, + num_cons = 0; kwargs...) + p = cache.p + x = cache.u0 + + return OptimizationBase.instantiate_function(f, x, adtype, p, num_cons; kwargs...) +end + +function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, x, + adtype::AutoEnzyme, p, num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + rmode = if adtype.mode isa Nothing + Enzyme.Reverse + else + set_runtime_activity2(Enzyme.Reverse, adtype.mode) + end + + fmode = if adtype.mode isa Nothing + Enzyme.Forward + else + set_runtime_activity2(Enzyme.Forward, adtype.mode) + end + + if g == true && f.grad === nothing + res = zeros(eltype(x), size(x)) + function grad(θ, p = p) + Enzyme.make_zero!(res) + Enzyme.autodiff(rmode, + Const(firstapply), + Active, + Const(f.f), + Enzyme.Duplicated(θ, res), + Const(p) + ) + return res + end + elseif fg == true + grad = (θ, p = p) -> f.grad(θ, p) + else + grad = nothing + end + + if fg == true && f.fg === nothing + res_fg = zeros(eltype(x), size(x)) + function fg!(θ, p = p) + Enzyme.make_zero!(res_fg) + y = Enzyme.autodiff(WithPrimal(rmode), + Const(firstapply), + Active, + Const(f.f), + Enzyme.Duplicated(θ, res_fg), + Const(p) + )[2] + return y, res + end + elseif fg == true + fg! = (θ, p = p) -> f.fg(θ, p) + else + fg! = nothing + end + + if h == true && f.hess === nothing + vdθ = Tuple((Array(r) for r in eachrow(I(length(x)) * one(eltype(x))))) + bθ = zeros(eltype(x), length(x)) + vdbθ = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x)) + + function hess(θ, p = p) + Enzyme.make_zero!(bθ) + Enzyme.make_zero!.(vdbθ) + + Enzyme.autodiff(fmode, + inner_grad, + Const(rmode), + Enzyme.BatchDuplicated(θ, vdθ), + Enzyme.BatchDuplicated(bθ, vdbθ), + Const(f.f), + Const(p) + ) + + return reduce( + vcat, [reshape(vdbθ[i], (1, length(vdbθ[i]))) for i in eachindex(θ)]) + end + elseif h == true + hess = (θ, p = p) -> f.hess(θ, p) + else + hess = nothing + end + + if fgh == true && f.fgh === nothing + vdθ_fgh = Tuple((Array(r) for r in eachrow(I(length(x)) * one(eltype(x))))) + vdbθ_fgh = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x)) + G_fgh = zeros(eltype(x), length(x)) + H_fgh = zeros(eltype(x), length(x), length(x)) + + function fgh!(θ, p = p) + Enzyme.make_zero!(G_fgh) + Enzyme.make_zero!(H_fgh) + Enzyme.make_zero!.(vdbθ_fgh) + + Enzyme.autodiff(fmode, + inner_grad, + Const(rmode), + Enzyme.BatchDuplicated(θ, vdθ_fgh), + Enzyme.BatchDuplicatedNoNeed(G_fgh, vdbθ_fgh), + Const(f.f), + Const(p) + ) + + for i in eachindex(θ) + H_fgh[i, :] .= vdbθ_fgh[i] + end + return G_fgh, H_fgh + end + elseif fgh == true + fgh! = (θ, p = p) -> f.fgh(θ, p) + else + fgh! = nothing + end + + if hv == true && f.hv === nothing + function hv!(θ, v, p = p) + return Enzyme.autodiff( + fmode, hv_f2_alloc, DuplicatedNoNeed, Const(rmode), Duplicated(θ, v), + Const(_f), Const(f.f), Const(p) + )[1] + end + elseif hv == true + hv! = (θ, v, p = p) -> f.hv(θ, v, p) + else + hv! = f.hv + end + + if f.cons === nothing + cons = nothing + else + function cons(θ) + return f.cons(θ, p) + end + end + + if cons_j == true && cons !== nothing && f.cons_j === nothing + seeds = Enzyme.onehot(x) + Jaccache = Tuple(zeros(eltype(x), num_cons) for i in 1:length(x)) + + function cons_j!(θ) + for i in eachindex(Jaccache) + Enzyme.make_zero!(Jaccache[i]) + end + Jaccache, y = Enzyme.autodiff(WithPrimal(fmode), f.cons, Duplicated, + BatchDuplicated(θ, seeds), Const(p)) + if size(y, 1) == 1 + return reduce(vcat, Jaccache) + else + return reduce(hcat, Jaccache) + end + end + elseif cons_j == true && cons !== nothing + cons_j! = (θ) -> f.cons_j(θ, p) + else + cons_j! = nothing + end + + if cons_vjp == true && cons !== nothing && f.cons_vjp == true + res_vjp = zeros(eltype(x), size(x)) + cons_vjp_res = zeros(eltype(x), num_cons) + + function cons_vjp!(θ, v) + Enzyme.make_zero!(res_vjp) + Enzyme.make_zero!(cons_vjp_res) + + Enzyme.autodiff(WithPrimal(rmode), + f.cons, + Const, + Duplicated(cons_vjp_res, v), + Duplicated(θ, res_vjp), + Const(p) + ) + return res_vjp + end + elseif cons_vjp == true && cons !== nothing + cons_vjp! = (θ, v) -> f.cons_vjp(θ, v, p) + else + cons_vjp! = nothing + end + + if cons_jvp == true && cons !== nothing && f.cons_jvp == true + res_jvp = zeros(eltype(x), size(x)) + cons_jvp_res = zeros(eltype(x), num_cons) + + function cons_jvp!(θ, v) + Enzyme.make_zero!(res_jvp) + Enzyme.make_zero!(cons_jvp_res) + + Enzyme.autodiff(fmode, + f.cons, + Duplicated(cons_jvp_res, res_jvp), + Duplicated(θ, v), + Const(p) + ) + return res_jvp + end + elseif cons_jvp == true && cons !== nothing + cons_jvp! = (θ, v) -> f.cons_jvp(θ, v, p) + else + cons_jvp! = nothing + end + + if cons_h == true && cons !== nothing && f.cons_h === nothing + cons_vdθ = Tuple((Array(r) for r in eachrow(I(length(x)) * one(eltype(x))))) + cons_bθ = zeros(eltype(x), length(x)) + cons_vdbθ = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x)) + + function cons_h!(θ) + return map(1:num_cons) do i + Enzyme.make_zero!(cons_bθ) + Enzyme.make_zero!.(cons_vdbθ) + Enzyme.autodiff(fmode, + cons_f2_oop, + Const(rmode), + Enzyme.BatchDuplicated(θ, cons_vdθ), + Enzyme.BatchDuplicated(cons_bθ, cons_vdbθ), + Const(f.cons), + Const(p), + Const(i)) + + return reduce(hcat, cons_vdbθ) + end + end + elseif cons_h == true && cons !== nothing + cons_h! = (θ) -> f.cons_h(θ, p) + else + cons_h! = nothing + end + + if lag_h == true && f.lag_h === nothing && cons !== nothing + lag_vdθ = Tuple((Array(r) for r in eachrow(I(length(x)) * one(eltype(x))))) + lag_bθ = zeros(eltype(x), length(x)) + if f.hess_prototype === nothing + lag_vdbθ = Tuple(zeros(eltype(x), length(x)) for i in eachindex(x)) + else + lag_vdbθ = Tuple((copy(r) for r in eachrow(f.hess_prototype))) + end + + function lag_h!(θ, σ, μ, p = p) + Enzyme.make_zero!(lag_bθ) + Enzyme.make_zero!.(lag_vdbθ) + + Enzyme.autodiff(fmode, + lag_grad, + Const(rmode), + Enzyme.BatchDuplicated(θ, lag_vdθ), + Enzyme.BatchDuplicatedNoNeed(lag_bθ, lag_vdbθ), + Const(lagrangian), + Const(f.f), + Const(f.cons), + Const(p), + Const(σ), + Const(μ) + ) + + k = 0 + + for i in eachindex(θ) + vec_lagv = lag_vdbθ[i] + res[(k + 1):(k + i), :] .= @view(vec_lagv[1:i]) + k += i + end + return res + end + elseif lag_h == true && cons !== nothing + lag_h! = (θ, σ, μ, p = p) -> f.lag_h(θ, σ, μ, p) + else + lag_h! = nothing + end + + return OptimizationFunction{false}(f.f, adtype; grad = grad, + fg = fg!, fgh = fgh!, + hess = hess, hv = hv!, + cons = cons, cons_j = cons_j!, + cons_jvp = cons_jvp!, cons_vjp = cons_vjp!, + cons_h = cons_h!, + hess_prototype = f.hess_prototype, + cons_jac_prototype = f.cons_jac_prototype, + cons_hess_prototype = f.cons_hess_prototype, + lag_h = lag_h!, + lag_hess_prototype = f.lag_hess_prototype, + sys = f.sys, + expr = f.expr, + cons_expr = f.cons_expr) +end + +function OptimizationBase.instantiate_function(f::OptimizationFunction{false}, + cache::OptimizationBase.ReInitCache, + adtype::AutoEnzyme, + num_cons = 0; kwargs...) + p = cache.p + x = cache.u0 + + return OptimizationBase.instantiate_function(f, x, adtype, p, num_cons; kwargs...) +end + +end diff --git a/lib/OptimizationBase/ext/OptimizationFiniteDiffExt.jl b/lib/OptimizationBase/ext/OptimizationFiniteDiffExt.jl new file mode 100644 index 000000000..ed95f2a93 --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationFiniteDiffExt.jl @@ -0,0 +1,5 @@ +module OptimizationFiniteDiffExt + +using DifferentiationInterface, FiniteDiff + +end diff --git a/lib/OptimizationBase/ext/OptimizationForwardDiffExt.jl b/lib/OptimizationBase/ext/OptimizationForwardDiffExt.jl new file mode 100644 index 000000000..0ff3e5ffb --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationForwardDiffExt.jl @@ -0,0 +1,5 @@ +module OptimizationForwardDiffExt + +using DifferentiationInterface, ForwardDiff + +end diff --git a/lib/OptimizationBase/ext/OptimizationMLDataDevicesExt.jl b/lib/OptimizationBase/ext/OptimizationMLDataDevicesExt.jl new file mode 100644 index 000000000..ae8d5106a --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationMLDataDevicesExt.jl @@ -0,0 +1,8 @@ +module OptimizationMLDataDevicesExt + +using MLDataDevices +using OptimizationBase + +OptimizationBase.isa_dataiterator(::DeviceIterator) = true + +end diff --git a/lib/OptimizationBase/ext/OptimizationMLUtilsExt.jl b/lib/OptimizationBase/ext/OptimizationMLUtilsExt.jl new file mode 100644 index 000000000..517883129 --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationMLUtilsExt.jl @@ -0,0 +1,8 @@ +module OptimizationMLUtilsExt + +using MLUtils +using OptimizationBase + +OptimizationBase.isa_dataiterator(::MLUtils.DataLoader) = true + +end diff --git a/lib/OptimizationBase/ext/OptimizationMTKExt.jl b/lib/OptimizationBase/ext/OptimizationMTKExt.jl new file mode 100644 index 000000000..a15ba3171 --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationMTKExt.jl @@ -0,0 +1,210 @@ +module OptimizationMTKExt + +import OptimizationBase, OptimizationBase.ArrayInterface +import OptimizationBase.SciMLBase +import OptimizationBase.SciMLBase: OptimizationFunction +import OptimizationBase.ADTypes: AutoModelingToolkit, AutoSymbolics, AutoSparse +using ModelingToolkit + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, x, adtype::AutoSparse{<:AutoSymbolics}, p, + num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + p = isnothing(p) ? SciMLBase.NullParameters() : p + + sys = complete(ModelingToolkit.modelingtoolkitize(OptimizationProblem(f, x, p; + lcons = fill(0.0, + num_cons), + ucons = fill(0.0, + num_cons)))) + #sys = ModelingToolkit.structural_simplify(sys) + # don't need to pass `x` or `p` since they're defaults now + f = OptimizationProblem(sys, nothing; grad = g, hess = h, + sparse = true, cons_j = cons_j, cons_h = cons_h, + cons_sparse = true).f + + grad = (G, θ, args...) -> f.grad(G, θ, p, args...) + + hess = (H, θ, args...) -> f.hess(H, θ, p, args...) + + hv = function (H, θ, v, args...) + res = (eltype(θ)).(f.hess_prototype) + hess(res, θ, args...) + H .= res * v + end + + if !isnothing(f.cons) + cons = (res, θ) -> f.cons(res, θ, p) + cons_j = (J, θ) -> f.cons_j(J, θ, p) + cons_h = (res, θ) -> f.cons_h(res, θ, p) + else + cons = nothing + cons_j = nothing + cons_h = nothing + end + + return OptimizationFunction{true}(f.f, adtype; grad = grad, hess = hess, hv = hv, + cons = cons, cons_j = cons_j, cons_h = cons_h, + hess_prototype = f.hess_prototype, + cons_jac_prototype = f.cons_jac_prototype, + cons_hess_prototype = f.cons_hess_prototype, + expr = OptimizationBase.symbolify(f.expr), + cons_expr = OptimizationBase.symbolify.(f.cons_expr), + sys = sys, + observed = f.observed) +end + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, cache::OptimizationBase.ReInitCache, + adtype::AutoSparse{<:AutoSymbolics}, num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + p = isnothing(cache.p) ? SciMLBase.NullParameters() : cache.p + + sys = complete(ModelingToolkit.modelingtoolkitize(OptimizationProblem(f, cache.u0, + cache.p; + lcons = fill(0.0, + num_cons), + ucons = fill(0.0, + num_cons)))) + #sys = ModelingToolkit.structural_simplify(sys) + # don't need to pass `x` or `p` since they're defaults now + f = OptimizationProblem(sys, nothing; grad = g, hess = h, + sparse = true, cons_j = cons_j, cons_h = cons_h, + cons_sparse = true).f + + grad = (G, θ, args...) -> f.grad(G, θ, cache.p, args...) + + hess = (H, θ, args...) -> f.hess(H, θ, cache.p, args...) + + hv = function (H, θ, v, args...) + res = (eltype(θ)).(f.hess_prototype) + hess(res, θ, args...) + H .= res * v + end + + if !isnothing(f.cons) + cons = (res, θ) -> f.cons(res, θ, cache.p) + cons_j = (J, θ) -> f.cons_j(J, θ, cache.p) + cons_h = (res, θ) -> f.cons_h(res, θ, cache.p) + else + cons = nothing + cons_j = nothing + cons_h = nothing + end + + return OptimizationFunction{true}(f.f, adtype; grad = grad, hess = hess, hv = hv, + cons = cons, cons_j = cons_j, cons_h = cons_h, + hess_prototype = f.hess_prototype, + cons_jac_prototype = f.cons_jac_prototype, + cons_hess_prototype = f.cons_hess_prototype, + expr = OptimizationBase.symbolify(f.expr), + cons_expr = OptimizationBase.symbolify.(f.cons_expr), + sys = sys, + observed = f.observed) +end + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, x, adtype::AutoSymbolics, p, + num_cons = 0; g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + p = isnothing(p) ? SciMLBase.NullParameters() : p + + sys = complete(ModelingToolkit.modelingtoolkitize(OptimizationProblem(f, x, p; + lcons = fill(0.0, + num_cons), + ucons = fill(0.0, + num_cons)))) + #sys = ModelingToolkit.structural_simplify(sys) + # don't need to pass `x` or `p` since they're defaults now + f = OptimizationProblem(sys, nothing; grad = g, hess = h, + sparse = false, cons_j = cons_j, cons_h = cons_h, + cons_sparse = false).f + + grad = (G, θ, args...) -> f.grad(G, θ, p, args...) + + hess = (H, θ, args...) -> f.hess(H, θ, p, args...) + + hv = function (H, θ, v, args...) + res = ArrayInterface.zeromatrix(θ) + hess(res, θ, args...) + H .= res * v + end + + if !isnothing(f.cons) + cons = (res, θ) -> f.cons(res, θ, p) + cons_j = (J, θ) -> f.cons_j(J, θ, p) + cons_h = (res, θ) -> f.cons_h(res, θ, p) + else + cons = nothing + cons_j = nothing + cons_h = nothing + end + + return OptimizationFunction{true}(f.f, adtype; grad = grad, hess = hess, hv = hv, + cons = cons, cons_j = cons_j, cons_h = cons_h, + hess_prototype = f.hess_prototype, + cons_jac_prototype = f.cons_jac_prototype, + cons_hess_prototype = f.cons_hess_prototype, + expr = OptimizationBase.symbolify(f.expr), + cons_expr = OptimizationBase.symbolify.(f.cons_expr), + sys = sys, + observed = f.observed) +end + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, cache::OptimizationBase.ReInitCache, + adtype::AutoSymbolics, num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + p = isnothing(cache.p) ? SciMLBase.NullParameters() : cache.p + + sys = complete(ModelingToolkit.modelingtoolkitize(OptimizationProblem(f, cache.u0, + cache.p; + lcons = fill(0.0, + num_cons), + ucons = fill(0.0, + num_cons)))) + #sys = ModelingToolkit.structural_simplify(sys) + # don't need to pass `x` or `p` since they're defaults now + f = OptimizationProblem(sys, nothing; grad = g, hess = h, + sparse = false, cons_j = cons_j, cons_h = cons_h, + cons_sparse = false).f + + grad = (G, θ, args...) -> f.grad(G, θ, cache.p, args...) + + hess = (H, θ, args...) -> f.hess(H, θ, cache.p, args...) + + hv = function (H, θ, v, args...) + res = ArrayInterface.zeromatrix(θ) + hess(res, θ, args...) + H .= res * v + end + + if !isnothing(f.cons) + cons = (res, θ) -> f.cons(res, θ, cache.p) + cons_j = (J, θ) -> f.cons_j(J, θ, cache.p) + cons_h = (res, θ) -> f.cons_h(res, θ, cache.p) + else + cons = nothing + cons_j = nothing + cons_h = nothing + end + + return OptimizationFunction{true}(f.f, adtype; grad = grad, hess = hess, hv = hv, + cons = cons, cons_j = cons_j, cons_h = cons_h, + hess_prototype = f.hess_prototype, + cons_jac_prototype = f.cons_jac_prototype, + cons_hess_prototype = f.cons_hess_prototype, + expr = OptimizationBase.symbolify(f.expr), + cons_expr = OptimizationBase.symbolify.(f.cons_expr), + sys = sys, + observed = f.observed) +end + +end diff --git a/lib/OptimizationBase/ext/OptimizationReverseDiffExt.jl b/lib/OptimizationBase/ext/OptimizationReverseDiffExt.jl new file mode 100644 index 000000000..11e57cf3b --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationReverseDiffExt.jl @@ -0,0 +1,5 @@ +module OptimizationReverseDiffExt + +using DifferentiationInterface, ReverseDiff + +end diff --git a/lib/OptimizationBase/ext/OptimizationSymbolicAnalysisExt.jl b/lib/OptimizationBase/ext/OptimizationSymbolicAnalysisExt.jl new file mode 100644 index 000000000..ecf690915 --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationSymbolicAnalysisExt.jl @@ -0,0 +1,118 @@ +module OptimizationSymbolicAnalysisExt + +using OptimizationBase, SciMLBase, SymbolicAnalysis, SymbolicAnalysis.Symbolics, + OptimizationBase.ArrayInterface +using SymbolicAnalysis: AnalysisResult +import SymbolicAnalysis.Symbolics: variable, Equation, Inequality, unwrap, @variables + +function OptimizationBase.symify_cache( + f::OptimizationFunction{iip, AD, F, G, FG, H, FGH, HV, C, CJ, CJV, CVJ, CH, HP, + CJP, CHP, O, EX, CEX, SYS, LH, LHP, HCV, CJCV, CHCV, LHCV}, + prob, num_cons, + manifold) where { + iip, AD, F, G, FG, H, FGH, HV, C, CJ, CJV, CVJ, CH, HP, CJP, CHP, O, + EX <: Nothing, CEX <: Nothing, SYS, LH, LHP, HCV, CJCV, CHCV, LHCV} + obj_expr = f.expr + cons_expr = f.cons_expr === nothing ? nothing : getfield.(f.cons_expr, Ref(:lhs)) + + if obj_expr === nothing || cons_expr === nothing + try + vars = if prob.u0 isa Matrix + @variables X[1:size(prob.u0, 1), 1:size(prob.u0, 2)] + else + ArrayInterface.restructure( + prob.u0, [variable(:x, i) for i in eachindex(prob.u0)]) + end + params = if prob.p isa SciMLBase.NullParameters + [] + elseif prob.p isa MTK.MTKParameters + [variable(:α, i) for i in eachindex(vcat(p...))] + else + ArrayInterface.restructure(p, [variable(:α, i) for i in eachindex(p)]) + end + + if prob.u0 isa Matrix + vars = vars[1] + end + + if obj_expr === nothing + obj_expr = f.f(vars, params) + end + + if cons_expr === nothing && SciMLBase.isinplace(prob) && !isnothing(prob.f.cons) + lhs = Array{Symbolics.Num}(undef, num_cons) + f.cons(lhs, vars) + cons = Union{Equation, Inequality}[] + + if !isnothing(prob.lcons) + for i in 1:num_cons + if !isinf(prob.lcons[i]) + if prob.lcons[i] != prob.ucons[i] + push!(cons, prob.lcons[i] ≲ lhs[i]) + else + push!(cons, lhs[i] ~ prob.ucons[i]) + end + end + end + end + + if !isnothing(prob.ucons) + for i in 1:num_cons + if !isinf(prob.ucons[i]) && prob.lcons[i] != prob.ucons[i] + push!(cons, lhs[i] ≲ prob.ucons[i]) + end + end + end + if (isnothing(prob.lcons) || all(isinf, prob.lcons)) && + (isnothing(prob.ucons) || all(isinf, prob.ucons)) + throw(ArgumentError("Constraints passed have no proper bounds defined. + Ensure you pass equal bounds (the scalar that the constraint should evaluate to) for equality constraints + or pass the lower and upper bounds for inequality constraints.")) + end + cons_expr = lhs + elseif cons_expr === nothing && !isnothing(prob.f.cons) + cons_expr = f.cons(vars, params) + end + catch err + throw(ArgumentError("Automatic symbolic expression generation with failed with error: $err. + Try by setting `structural_analysis = false` instead if the solver doesn't require symbolic expressions.")) + end + end + + if obj_expr !== nothing + obj_expr = obj_expr |> Symbolics.unwrap + if manifold === nothing + obj_res = analyze(obj_expr) + else + obj_res = analyze(obj_expr, manifold) + end + @info "Objective Euclidean curvature: $(obj_res.curvature)" + if obj_res.gcurvature !== nothing + @info "Objective Geodesic curvature: $(obj_res.gcurvature)" + end + else + obj_res = nothing + end + + if cons_expr !== nothing + cons_expr = cons_expr .|> Symbolics.unwrap + if manifold === nothing + cons_res = analyze.(cons_expr) + else + cons_res = analyze.(cons_expr, Ref(manifold)) + end + for i in 1:num_cons + @info "Constraints Euclidean curvature: $(cons_res[i].curvature)" + + if cons_res[i].gcurvature !== nothing + @info "Constraints Geodesic curvature: $(cons_res[i].gcurvature)" + end + end + else + cons_res = nothing + end + + return obj_res, cons_res +end + +end diff --git a/lib/OptimizationBase/ext/OptimizationZygoteExt.jl b/lib/OptimizationBase/ext/OptimizationZygoteExt.jl new file mode 100644 index 000000000..7574af77d --- /dev/null +++ b/lib/OptimizationBase/ext/OptimizationZygoteExt.jl @@ -0,0 +1,563 @@ +module OptimizationZygoteExt + +using OptimizationBase, SparseArrays +using OptimizationBase.FastClosures +import OptimizationBase.ArrayInterface +import OptimizationBase.SciMLBase: OptimizationFunction +import OptimizationBase.LinearAlgebra: I, dot +import DifferentiationInterface +import DifferentiationInterface: prepare_gradient, prepare_hessian, prepare_hvp, + prepare_pullback, prepare_pushforward, pullback!, + pushforward!, + pullback, pushforward, + prepare_jacobian, value_and_gradient!, value_and_gradient, + value_derivative_and_second_derivative!, + value_derivative_and_second_derivative, + gradient!, hessian!, hvp!, jacobian!, gradient, hessian, + hvp, jacobian, Constant +using ADTypes, SciMLBase +import Zygote, Zygote.ForwardDiff + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, x, + adtype::Union{ADTypes.AutoZygote, + DifferentiationInterface.SecondOrder{ + <:ADTypes.AbstractADType, <:ADTypes.AutoZygote}}, + p = SciMLBase.NullParameters(), num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + adtype, soadtype = OptimizationBase.generate_adtype(adtype) + + if g == true && f.grad === nothing + prep_grad = prepare_gradient(f.f, adtype, x, Constant(p), strict=Val(false)) + function grad(res, θ) + gradient!(f.f, res, prep_grad, adtype, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function grad(res, θ, p) + gradient!(f.f, res, prep_grad, adtype, θ, Constant(p)) + end + end + elseif g == true + grad = (G, θ, p = p) -> f.grad(G, θ, p) + else + grad = nothing + end + + if fg == true && f.fg === nothing + if g == false + prep_grad = prepare_gradient(f.f, adtype, x, Constant(p), strict=Val(false)) + end + function fg!(res, θ) + (y, _) = value_and_gradient!(f.f, res, prep_grad, adtype, θ, Constant(p)) + return y + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fg!(res, θ, p) + (y, _) = value_and_gradient!(f.f, res, prep_grad, adtype, θ, Constant(p)) + return y + end + end + elseif fg == true + fg! = (G, θ, p = p) -> f.fg(G, θ, p) + else + fg! = nothing + end + + hess_sparsity = f.hess_prototype + hess_colors = f.hess_colorvec + if h == true && f.hess === nothing + prep_hess = prepare_hessian(f.f, soadtype, x, Constant(p), strict=Val(false)) + function hess(res, θ) + hessian!(f.f, res, prep_hess, soadtype, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(res, θ, p) + hessian!(f.f, res, prep_hess, soadtype, θ, Constant(p)) + end + end + elseif h == true + hess = (H, θ, p = p) -> f.hess(H, θ, p) + else + hess = nothing + end + + if fgh == true && f.fgh === nothing + function fgh!(G, H, θ) + (y, _, _) = value_derivative_and_second_derivative!( + f.f, G, H, prep_hess, soadtype, θ, Constant(p)) + return y + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(G, H, θ, p) + (y, _, _) = value_derivative_and_second_derivative!( + f.f, G, H, prep_hess, soadtype, θ, Constant(p)) + return y + end + end + elseif fgh == true + fgh! = (G, H, θ, p = p) -> f.fgh(G, H, θ, p) + else + fgh! = nothing + end + + if hv == true && f.hv === nothing + prep_hvp = prepare_hvp(f.f, soadtype, x, (zeros(eltype(x), size(x)),), Constant(p)) + function hv!(H, θ, v) + hvp!(f.f, (H,), prep_hvp, soadtype, θ, (v,), Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(H, θ, v, p) + hvp!(f.f, (H,), prep_hvp, soadtype, θ, (v,), Constant(p)) + end + end + elseif hv == true + hv! = (H, θ, v, p = p) -> f.hv(H, θ, v, p) + else + hv! = nothing + end + + if f.cons === nothing + cons = nothing + else + cons = (res, θ) -> f.cons(res, θ, p) + + function cons_oop(x) + _res = Zygote.Buffer(x, num_cons) + f.cons(_res, x, p) + return copy(_res) + end + + function cons_oop(x, i) + _res = Zygote.Buffer(x, num_cons) + f.cons(_res, x, p) + return _res[i] + end + + function lagrangian(θ, σ, λ, p) + return σ * f.f(θ, p) + dot(λ, cons_oop(θ)) + end + end + + cons_jac_prototype = f.cons_jac_prototype + cons_jac_colorvec = f.cons_jac_colorvec + if cons !== nothing && cons_j == true && f.cons_j === nothing + prep_jac = prepare_jacobian(cons_oop, adtype, x, strict=Val(false)) + function cons_j!(J, θ) + jacobian!(cons_oop, J, prep_jac, adtype, θ) + if size(J, 1) == 1 + J = vec(J) + end + end + elseif cons !== nothing && cons_j == true + cons_j! = (J, θ) -> f.cons_j(J, θ, p) + else + cons_j! = nothing + end + + if f.cons_vjp === nothing && cons_vjp == true && cons !== nothing + prep_pullback = prepare_pullback(cons_oop, adtype, x, (ones(eltype(x), num_cons),), strict=Val(false)) + function cons_vjp!(J, θ, v) + pullback!(cons_oop, (J,), prep_pullback, adtype, θ, (v,)) + end + elseif cons_vjp == true + cons_vjp! = (J, θ, v) -> f.cons_vjp(J, θ, v, p) + else + cons_vjp! = nothing + end + + if cons !== nothing && f.cons_jvp === nothing && cons_jvp == true + prep_pushforward = prepare_pushforward( + cons_oop, adtype, x, (ones(eltype(x), length(x)),), strict=Val(false)) + function cons_jvp!(J, θ, v) + pushforward!(cons_oop, (J,), prep_pushforward, adtype, θ, (v,)) + end + elseif cons_jvp == true + cons_jvp! = (J, θ, v) -> f.cons_jvp(J, θ, v, p) + else + cons_jvp! = nothing + end + + conshess_sparsity = f.cons_hess_prototype + conshess_colors = f.cons_hess_colorvec + if cons !== nothing && cons_h == true && f.cons_h === nothing + prep_cons_hess = [prepare_hessian(cons_oop, soadtype, x, Constant(i), strict=Val(false)) + for i in 1:num_cons] + + function cons_h!(H, θ) + for i in 1:num_cons + hessian!(cons_oop, H[i], prep_cons_hess[i], soadtype, θ, Constant(i)) + end + end + elseif cons !== nothing && cons_h == true + cons_h! = (res, θ) -> f.cons_h(res, θ, p) + else + cons_h! = nothing + end + + lag_hess_prototype = f.lag_hess_prototype + + if f.lag_h === nothing && cons !== nothing && lag_h == true + lag_extras = prepare_hessian( + lagrangian, soadtype, x, Constant(one(eltype(x))), + Constant(ones(eltype(x), num_cons)), Constant(p), strict=Val(false)) + lag_hess_prototype = zeros(Bool, num_cons, length(x)) + + function lag_h!(H::AbstractMatrix, θ, σ, λ) + if σ == zero(eltype(θ)) + cons_h!(H, θ) + H *= λ + else + hessian!(lagrangian, H, lag_extras, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + function lag_h!(h::AbstractVector, θ, σ, λ) + H = hessian( + lagrangian, lag_extras, soadtype, θ, Constant(σ), Constant(λ), Constant(p)) + k = 0 + for i in 1:length(θ) + for j in 1:i + k += 1 + h[k] = H[i, j] + end + end + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(H::AbstractMatrix, θ, σ, λ, p) + if σ == zero(eltype(θ)) + cons_h(H, θ) + H *= λ + else + hessian!(lagrangian, H, lag_extras, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + function lag_h!(h::AbstractVector, θ, σ, λ, p) + H = hessian(lagrangian, lag_extras, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + k = 0 + for i in 1:length(θ) + for j in 1:i + k += 1 + h[k] = H[i, j] + end + end + end + end + elseif cons !== nothing && lag_h == true + lag_h! = (res, θ, σ, μ, p = p) -> f.lag_h(res, θ, σ, μ, p) + else + lag_h! = nothing + end + + return OptimizationFunction{true}(f.f, adtype; + grad = grad, fg = fg!, hess = hess, hv = hv!, fgh = fgh!, + cons = cons, cons_j = cons_j!, cons_h = cons_h!, + cons_vjp = cons_vjp!, cons_jvp = cons_jvp!, + hess_prototype = hess_sparsity, + hess_colorvec = hess_colors, + cons_jac_prototype = cons_jac_prototype, + cons_jac_colorvec = cons_jac_colorvec, + cons_hess_prototype = conshess_sparsity, + cons_hess_colorvec = conshess_colors, + lag_h = lag_h!, + lag_hess_prototype = lag_hess_prototype, + sys = f.sys, + expr = f.expr, + cons_expr = f.cons_expr) +end + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, cache::OptimizationBase.ReInitCache, + adtype::ADTypes.AutoZygote, num_cons = 0; kwargs...) + x = cache.u0 + p = cache.p + + return OptimizationBase.instantiate_function( + f, x, adtype, p, num_cons; kwargs...) +end + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, x, + adtype::ADTypes.AutoSparse{<:Union{ADTypes.AutoZygote, + DifferentiationInterface.SecondOrder{ + <:ADTypes.AbstractADType, <:ADTypes.AutoZygote}}}, + p = SciMLBase.NullParameters(), num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + adtype, soadtype = OptimizationBase.generate_sparse_adtype(adtype) + + if g == true && f.grad === nothing + extras_grad = prepare_gradient(f.f, adtype.dense_ad, x, Constant(p), strict=Val(false)) + function grad(res, θ) + gradient!(f.f, res, extras_grad, adtype.dense_ad, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function grad(res, θ, p) + gradient!(f.f, res, extras_grad, adtype.dense_ad, θ, Constant(p)) + end + end + elseif g == true + grad = (G, θ, p = p) -> f.grad(G, θ, p) + else + grad = nothing + end + + if fg == true && f.fg === nothing + if g == false + extras_grad = prepare_gradient(f.f, adtype.dense_ad, x, Constant(p), strict=Val(false)) + end + function fg!(res, θ) + (y, _) = value_and_gradient!( + f.f, res, extras_grad, adtype.dense_ad, θ, Constant(p)) + return y + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fg!(res, θ, p) + (y, _) = value_and_gradient!( + f.f, res, extras_grad, adtype.dense_ad, θ, Constant(p)) + return y + end + end + elseif fg == true + fg! = (G, θ, p = p) -> f.fg(G, θ, p) + else + fg! = nothing + end + + hess_sparsity = f.hess_prototype + hess_colors = f.hess_colorvec + if h == true && f.hess === nothing + prep_hess = prepare_hessian(f.f, soadtype, x, Constant(p), strict=Val(false)) + function hess(res, θ) + hessian!(f.f, res, prep_hess, soadtype, θ, Constant(p)) + end + hess_sparsity = prep_hess.coloring_result.A + hess_colors = prep_hess.coloring_result.color + + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(res, θ, p) + hessian!(f.f, res, prep_hess, soadtype, θ, Constant(p)) + end + end + elseif h == true + hess = (H, θ, p = p) -> f.hess(H, θ, p) + else + hess = nothing + end + + if fgh == true && f.fgh === nothing + function fgh!(G, H, θ) + (y, _, _) = value_derivative_and_second_derivative!( + f.f, G, H, θ, prep_hess, soadtype, Constant(p)) + return y + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(G, H, θ, p) + (y, _, _) = value_derivative_and_second_derivative!( + f.f, G, H, θ, prep_hess, soadtype, Constant(p)) + return y + end + end + elseif fgh == true + fgh! = (G, H, θ, p = p) -> f.fgh(G, H, θ, p) + else + fgh! = nothing + end + + if hv == true && f.hv === nothing + prep_hvp = prepare_hvp( + f.f, soadtype.dense_ad, x, (zeros(eltype(x), size(x)),), Constant(p)) + function hv!(H, θ, v) + hvp!(f.f, (H,), prep_hvp, soadtype.dense_ad, θ, (v,), Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(H, θ, v, p) + hvp!(f.f, (H,), prep_hvp, soadtype.dense_ad, θ, (v,), Constant(p)) + end + end + elseif hv == true + hv! = (H, θ, v, p = p) -> f.hv(H, θ, v, p) + else + hv! = nothing + end + + if f.cons === nothing + cons = nothing + else + cons = (res, θ) -> f.cons(res, θ, p) + + function cons_oop(x) + _res = Zygote.Buffer(x, num_cons) + f.cons(_res, x, p) + return copy(_res) + end + + function cons_oop(x, i) + _res = Zygote.Buffer(x, num_cons) + f.cons(_res, x, p) + return _res[i] + end + + function lagrangian(θ, σ, λ, p) + return σ * f.f(θ, p) + dot(λ, cons_oop(θ)) + end + end + + cons_jac_prototype = f.cons_jac_prototype + cons_jac_colorvec = f.cons_jac_colorvec + if cons !== nothing && cons_j == true && f.cons_j === nothing + prep_jac = prepare_jacobian(cons_oop, adtype, x) + function cons_j!(J, θ) + jacobian!(cons_oop, J, prep_jac, adtype, θ) + if size(J, 1) == 1 + J = vec(J) + end + end + cons_jac_prototype = prep_jac.coloring_result.A + cons_jac_colorvec = prep_jac.coloring_result.color + elseif cons !== nothing && cons_j == true + cons_j! = (J, θ) -> f.cons_j(J, θ, p) + else + cons_j! = nothing + end + + if f.cons_vjp === nothing && cons_vjp == true && cons !== nothing + extras_pullback = prepare_pullback( + cons_oop, adtype.dense_ad, x, (ones(eltype(x), num_cons),)) + function cons_vjp!(J, θ, v) + pullback!( + cons_oop, (J,), extras_pullback, adtype.dense_ad, θ, (v,)) + end + elseif cons_vjp == true + cons_vjp! = (J, θ, v) -> f.cons_vjp(J, θ, v, p) + else + cons_vjp! = nothing + end + + if f.cons_jvp === nothing && cons_jvp == true && cons !== nothing + extras_pushforward = prepare_pushforward( + cons_oop, adtype.dense_ad, x, (ones(eltype(x), length(x)),)) + function cons_jvp!(J, θ, v) + pushforward!( + cons_oop, (J,), extras_pushforward, adtype.dense_ad, θ, (v,)) + end + elseif cons_jvp == true + cons_jvp! = (J, θ, v) -> f.cons_jvp(J, θ, v, p) + else + cons_jvp! = nothing + end + + conshess_sparsity = f.cons_hess_prototype + conshess_colors = f.cons_hess_colorvec + if cons !== nothing && f.cons_h === nothing && cons_h == true + prep_cons_hess = [prepare_hessian(cons_oop, soadtype, x, Constant(i), strict=Val(false)) + for i in 1:num_cons] + colores = getfield.(prep_cons_hess, :coloring_result) + conshess_sparsity = getfield.(colores, :A) + conshess_colors = getfield.(colores, :color) + function cons_h!(H, θ) + for i in 1:num_cons + hessian!(cons_oop, H[i], prep_cons_hess[i], soadtype, θ, Constant(i)) + end + end + elseif cons_h == true + cons_h! = (res, θ) -> f.cons_h(res, θ, p) + else + cons_h! = nothing + end + + lag_hess_prototype = f.lag_hess_prototype + lag_hess_colors = f.lag_hess_colorvec + if cons !== nothing && f.lag_h === nothing && lag_h == true + lag_extras = prepare_hessian( + lagrangian, soadtype, x, Constant(one(eltype(x))), + Constant(ones(eltype(x), num_cons)), Constant(p), strict=Val(false)) + lag_hess_prototype = lag_extras.coloring_result.A + lag_hess_colors = lag_extras.coloring_result.color + + function lag_h!(H::AbstractMatrix, θ, σ, λ) + if σ == zero(eltype(θ)) + cons_h!(H, θ) + H *= λ + else + hessian!(lagrangian, H, lag_extras, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + function lag_h!(h, θ, σ, λ) + H = hessian( + lagrangian, lag_extras, soadtype, θ, Constant(σ), Constant(λ), Constant(p)) + k = 0 + rows, cols, _ = findnz(H) + for (i, j) in zip(rows, cols) + if i <= j + k += 1 + h[k] = H[i, j] + end + end + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(H::AbstractMatrix, θ, σ, λ, p) + if σ == zero(eltype(θ)) + cons_h!(H, θ) + H *= λ + else + hessian!(lagrangian, H, lag_extras, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + function lag_h!(h::AbstractVector, θ, σ, λ, p) + H = hessian(lagrangian, lag_extras, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + k = 0 + for i in 1:length(θ) + for j in 1:i + k += 1 + h[k] = H[i, j] + end + end + end + end + elseif cons !== nothing && cons_h == true + lag_h! = (res, θ, σ, μ, p = p) -> f.lag_h(res, θ, σ, μ, p) + else + lag_h! = nothing + end + return OptimizationFunction{true}(f.f, adtype; + grad = grad, fg = fg!, hess = hess, hv = hv!, fgh = fgh!, + cons = cons, cons_j = cons_j!, cons_h = cons_h!, + hess_prototype = hess_sparsity, + hess_colorvec = hess_colors, + cons_jac_prototype = cons_jac_prototype, + cons_jac_colorvec = cons_jac_colorvec, + cons_hess_prototype = conshess_sparsity, + cons_hess_colorvec = conshess_colors, + lag_h = lag_h!, + lag_hess_prototype = lag_hess_prototype, + lag_hess_colorvec = lag_hess_colors, + sys = f.sys, + expr = f.expr, + cons_expr = f.cons_expr) +end + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, cache::OptimizationBase.ReInitCache, + adtype::ADTypes.AutoSparse{<:AutoZygote}, num_cons = 0; kwargs...) + x = cache.u0 + p = cache.p + + return OptimizationBase.instantiate_function(f, x, adtype, p, num_cons; kwargs...) +end + +end diff --git a/lib/OptimizationBase/src/OptimizationBase.jl b/lib/OptimizationBase/src/OptimizationBase.jl new file mode 100644 index 000000000..c849a5dde --- /dev/null +++ b/lib/OptimizationBase/src/OptimizationBase.jl @@ -0,0 +1,33 @@ +module OptimizationBase + +using DocStringExtensions +using Reexport +@reexport using SciMLBase, ADTypes + +using ArrayInterface, Base.Iterators, SparseArrays, LinearAlgebra +import SciMLBase: OptimizationProblem, + OptimizationFunction, ObjSense, + MaxSense, MinSense, OptimizationStats +export ObjSense, MaxSense, MinSense + +using FastClosures + +struct NullCallback end +(x::NullCallback)(args...) = false +const DEFAULT_CALLBACK = NullCallback() + +struct NullData end +const DEFAULT_DATA = Iterators.cycle((NullData(),)) +Base.iterate(::NullData, i = 1) = nothing +Base.length(::NullData) = 0 + +include("adtypes.jl") +include("symify.jl") +include("cache.jl") +include("OptimizationDIExt.jl") +include("OptimizationDISparseExt.jl") +include("function.jl") + +export solve, OptimizationCache, DEFAULT_CALLBACK, DEFAULT_DATA + +end diff --git a/lib/OptimizationBase/src/OptimizationDIExt.jl b/lib/OptimizationBase/src/OptimizationDIExt.jl new file mode 100644 index 000000000..f32e6cf33 --- /dev/null +++ b/lib/OptimizationBase/src/OptimizationDIExt.jl @@ -0,0 +1,502 @@ +import ArrayInterface +import SciMLBase: OptimizationFunction +import LinearAlgebra: I +import DifferentiationInterface +import DifferentiationInterface: prepare_gradient, prepare_hessian, prepare_hvp, + prepare_pullback, prepare_pushforward, pullback!, + pushforward!, + pullback, pushforward, + prepare_jacobian, value_and_gradient!, value_and_gradient, + value_derivative_and_second_derivative!, + value_derivative_and_second_derivative, + gradient!, hessian!, hvp!, jacobian!, gradient, hessian, + hvp, jacobian, Constant +using ADTypes, SciMLBase + +function instantiate_function( + f::OptimizationFunction{true}, x, adtype::ADTypes.AbstractADType, + p = SciMLBase.NullParameters(), num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + adtype, soadtype = generate_adtype(adtype) + + if g == true && f.grad === nothing + prep_grad = prepare_gradient(f.f, adtype, x, Constant(p)) + function grad(res, θ) + gradient!(f.f, res, prep_grad, adtype, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function grad(res, θ, p) + gradient!(f.f, res, prep_grad, adtype, θ, Constant(p)) + end + end + elseif g == true + grad = (G, θ, p = p) -> f.grad(G, θ, p) + else + grad = nothing + end + + if fg == true && f.fg === nothing + if g == false + prep_grad = prepare_gradient(f.f, adtype, x, Constant(p)) + end + function fg!(res, θ) + (y, _) = value_and_gradient!(f.f, res, prep_grad, adtype, θ, Constant(p)) + return y + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fg!(res, θ, p) + (y, _) = value_and_gradient!(f.f, res, prep_grad, adtype, θ, Constant(p)) + return y + end + end + elseif fg == true + fg! = (G, θ, p = p) -> f.fg(G, θ, p) + else + fg! = nothing + end + + hess_sparsity = f.hess_prototype + hess_colors = f.hess_colorvec + if h == true && f.hess === nothing + prep_hess = prepare_hessian(f.f, soadtype, x, Constant(p)) + function hess(res, θ) + hessian!(f.f, res, prep_hess, soadtype, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(res, θ, p) + hessian!(f.f, res, prep_hess, soadtype, θ, Constant(p)) + end + end + elseif h == true + hess = (H, θ, p = p) -> f.hess(H, θ, p) + else + hess = nothing + end + + if fgh == true && f.fgh === nothing + function fgh!(G, H, θ) + (y, _, _) = value_derivative_and_second_derivative!( + f.f, G, H, prep_hess, soadtype, θ, Constant(p)) + return y + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(G, H, θ, p) + (y, _, _) = value_derivative_and_second_derivative!( + f.f, G, H, prep_hess, soadtype, θ, Constant(p)) + return y + end + end + elseif fgh == true + fgh! = (G, H, θ, p = p) -> f.fgh(G, H, θ, p) + else + fgh! = nothing + end + + if hv == true && f.hv === nothing + prep_hvp = prepare_hvp(f.f, soadtype, x, (zeros(eltype(x), size(x)),), Constant(p)) + function hv!(H, θ, v) + only(hvp!(f.f, (H,), prep_hvp, soadtype, θ, (v,), Constant(p))) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(H, θ, v, p) + only(hvp!(f.f, (H,), soadtype, θ, (v,), Constant(p))) + end + end + elseif hv == true + hv! = (H, θ, v, p = p) -> f.hv(H, θ, v, p) + else + hv! = nothing + end + + if f.cons === nothing + cons = nothing + else + cons = (res, x) -> f.cons(res, x, p) + function cons_oop(x) + _res = zeros(eltype(x), num_cons) + f.cons(_res, x, p) + return _res + end + + function cons_oop(x, i) + _res = zeros(eltype(x), num_cons) + f.cons(_res, x, p) + return _res[i] + end + + function lagrangian(θ, σ, λ, p) + return σ * f.f(θ, p) + dot(λ, cons_oop(θ)) + end + end + + cons_jac_prototype = f.cons_jac_prototype + cons_jac_colorvec = f.cons_jac_colorvec + if f.cons !== nothing && cons_j == true && f.cons_j === nothing + prep_jac = prepare_jacobian(cons_oop, adtype, x) + function cons_j!(J, θ) + jacobian!(cons_oop, J, prep_jac, adtype, θ) + if size(J, 1) == 1 + J = vec(J) + end + end + elseif cons_j == true && f.cons !== nothing + cons_j! = (J, θ) -> f.cons_j(J, θ, p) + else + cons_j! = nothing + end + + if f.cons_vjp === nothing && cons_vjp == true && f.cons !== nothing + prep_pullback = prepare_pullback(cons_oop, adtype, x, (ones(eltype(x), num_cons),)) + function cons_vjp!(J, θ, v) + only(pullback!(cons_oop, (J,), prep_pullback, adtype, θ, (v,))) + end + elseif cons_vjp == true && f.cons !== nothing + cons_vjp! = (J, θ, v) -> f.cons_vjp(J, θ, v, p) + else + cons_vjp! = nothing + end + + if f.cons_jvp === nothing && cons_jvp == true && f.cons !== nothing + prep_pushforward = prepare_pushforward( + cons_oop, adtype, x, (ones(eltype(x), length(x)),)) + function cons_jvp!(J, θ, v) + only(pushforward!(cons_oop, (J,), prep_pushforward, adtype, θ, (v,))) + end + elseif cons_jvp == true && f.cons !== nothing + cons_jvp! = (J, θ, v) -> f.cons_jvp(J, θ, v, p) + else + cons_jvp! = nothing + end + + conshess_sparsity = f.cons_hess_prototype + conshess_colors = f.cons_hess_colorvec + if f.cons !== nothing && f.cons_h === nothing && cons_h == true + prep_cons_hess = [prepare_hessian(cons_oop, soadtype, x, Constant(i)) + for i in 1:num_cons] + + function cons_h!(H, θ) + for i in 1:num_cons + hessian!(cons_oop, H[i], prep_cons_hess[i], soadtype, θ, Constant(i)) + end + end + elseif cons_h == true && f.cons !== nothing + cons_h! = (res, θ) -> f.cons_h(res, θ, p) + else + cons_h! = nothing + end + + lag_hess_prototype = f.lag_hess_prototype + + if f.cons !== nothing && lag_h == true && f.lag_h === nothing + lag_prep = prepare_hessian( + lagrangian, soadtype, x, Constant(one(eltype(x))), + Constant(ones(eltype(x), num_cons)), Constant(p)) + lag_hess_prototype = zeros(Bool, num_cons, length(x)) + + function lag_h!(H::AbstractMatrix, θ, σ, λ) + if σ == zero(eltype(θ)) + cons_h!(H, θ) + H *= λ + else + hessian!(lagrangian, H, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + function lag_h!(h::AbstractVector, θ, σ, λ) + H = hessian( + lagrangian, lag_prep, soadtype, θ, Constant(σ), Constant(λ), Constant(p)) + k = 0 + for i in 1:length(θ) + for j in 1:i + k += 1 + h[k] = H[i, j] + end + end + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(H::AbstractMatrix, θ, σ, λ, p) + if σ == zero(eltype(θ)) + cons_h!(H, θ) + H *= λ + else + hessian!(lagrangian, H, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + function lag_h!(h::AbstractVector, θ, σ, λ, p) + H = hessian(lagrangian, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + k = 0 + for i in 1:length(θ) + for j in 1:i + k += 1 + h[k] = H[i, j] + end + end + end + end + elseif lag_h == true && f.cons !== nothing + lag_h! = (res, θ, σ, μ, p = p) -> f.lag_h(res, θ, σ, μ, p) + else + lag_h! = nothing + end + + return OptimizationFunction{true}(f.f, adtype; + grad = grad, fg = fg!, hess = hess, hv = hv!, fgh = fgh!, + cons = cons, cons_j = cons_j!, cons_h = cons_h!, + cons_vjp = cons_vjp!, cons_jvp = cons_jvp!, + hess_prototype = hess_sparsity, + hess_colorvec = hess_colors, + cons_jac_prototype = cons_jac_prototype, + cons_jac_colorvec = cons_jac_colorvec, + cons_hess_prototype = conshess_sparsity, + cons_hess_colorvec = conshess_colors, + lag_h = lag_h!, + lag_hess_prototype = lag_hess_prototype, + sys = f.sys, + expr = f.expr, + cons_expr = f.cons_expr) +end + +function instantiate_function( + f::OptimizationFunction{true}, cache::OptimizationBase.ReInitCache, + adtype::ADTypes.AbstractADType, num_cons = 0; + kwargs...) + x = cache.u0 + p = cache.p + + return instantiate_function(f, x, adtype, p, num_cons; kwargs...) +end + +function instantiate_function( + f::OptimizationFunction{false}, x, adtype::ADTypes.AbstractADType, + p = SciMLBase.NullParameters(), num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + adtype, soadtype = generate_adtype(adtype) + + if g == true && f.grad === nothing + prep_grad = prepare_gradient(f.f, adtype, x, Constant(p)) + function grad(θ) + gradient(f.f, prep_grad, adtype, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function grad(θ, p) + gradient(f.f, prep_grad, adtype, θ, Constant(p)) + end + end + elseif g == true + grad = (θ, p = p) -> f.grad(θ, p) + else + grad = nothing + end + + if fg == true && f.fg === nothing + if g == false + prep_grad = prepare_gradient(f.f, adtype, x, Constant(p)) + end + function fg!(θ) + (y, res) = value_and_gradient(f.f, prep_grad, adtype, θ, Constant(p)) + return y, res + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fg!(θ, p) + (y, res) = value_and_gradient(f.f, prep_grad, adtype, θ, Constant(p)) + return y, res + end + end + elseif fg == true + fg! = (θ, p = p) -> f.fg(θ, p) + else + fg! = nothing + end + + hess_sparsity = f.hess_prototype + hess_colors = f.hess_colorvec + if h == true && f.hess === nothing + prep_hess = prepare_hessian(f.f, soadtype, x, Constant(p)) + function hess(θ) + hessian(f.f, prep_hess, soadtype, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(θ, p) + hessian(f.f, prep_hess, soadtype, θ, Constant(p)) + end + end + elseif h == true + hess = (θ, p = p) -> f.hess(θ, p) + else + hess = nothing + end + + if fgh == true && f.fgh === nothing + function fgh!(θ) + (y, G, H) = value_derivative_and_second_derivative( + f.f, prep_hess, adtype, θ, Constant(p)) + return y, G, H + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(θ, p) + (y, G, H) = value_derivative_and_second_derivative( + f.f, prep_hess, adtype, θ, Constant(p)) + return y, G, H + end + end + elseif fgh == true + fgh! = (θ, p = p) -> f.fgh(θ, p) + else + fgh! = nothing + end + + if hv == true && f.hv === nothing + prep_hvp = prepare_hvp(f.f, soadtype, x, (zeros(eltype(x), size(x)),), Constant(p)) + function hv!(θ, v) + only(hvp(f.f, prep_hvp, soadtype, θ, (v,), Constant(p))) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(θ, v, p) + only(hvp(f.f, prep_hvp, soadtype, θ, (v,), Constant(p))) + end + end + elseif hv == true + hv! = (θ, v, p = p) -> f.hv(θ, v, p) + else + hv! = nothing + end + + if f.cons === nothing + cons = nothing + else + cons = Base.Fix2(f.cons, p) + + function lagrangian(θ, σ, λ, p) + return σ * f.f(θ, p) + dot(λ, f.cons(θ, p)) + end + end + + cons_jac_prototype = f.cons_jac_prototype + cons_jac_colorvec = f.cons_jac_colorvec + if f.cons !== nothing && cons_j == true && f.cons_j === nothing + prep_jac = prepare_jacobian(f.cons, adtype, x, Constant(p)) + function cons_j!(θ) + J = jacobian(f.cons, prep_jac, adtype, θ, Constant(p)) + if size(J, 1) == 1 + J = vec(J) + end + return J + end + elseif cons_j == true && f.cons !== nothing + cons_j! = (θ) -> f.cons_j(θ, p) + else + cons_j! = nothing + end + + if f.cons_vjp === nothing && cons_vjp == true && f.cons !== nothing + prep_pullback = prepare_pullback( + f.cons, adtype, x, (ones(eltype(x), num_cons),), Constant(p)) + function cons_vjp!(θ, v) + return only(pullback(f.cons, prep_pullback, adtype, θ, (v,), Constant(p))) + end + elseif cons_vjp == true && f.cons !== nothing + cons_vjp! = (θ, v) -> f.cons_vjp(θ, v, p) + else + cons_vjp! = nothing + end + + if f.cons_jvp === nothing && cons_jvp == true && f.cons !== nothing + prep_pushforward = prepare_pushforward( + f.cons, adtype, x, (ones(eltype(x), length(x)),), Constant(p)) + function cons_jvp!(θ, v) + return only(pushforward(f.cons, prep_pushforward, adtype, θ, (v,), Constant(p))) + end + elseif cons_jvp == true && f.cons !== nothing + cons_jvp! = (θ, v) -> f.cons_jvp(θ, v, p) + else + cons_jvp! = nothing + end + + conshess_sparsity = f.cons_hess_prototype + conshess_colors = f.cons_hess_colorvec + if f.cons !== nothing && cons_h == true && f.cons_h === nothing + function cons_i(x, i) + return f.cons(x, p)[i] + end + prep_cons_hess = [prepare_hessian(cons_i, soadtype, x, Constant(i)) + for i in 1:num_cons] + + function cons_h!(θ) + H = map(1:num_cons) do i + hessian(cons_i, prep_cons_hess[i], soadtype, θ, Constant(i)) + end + return H + end + elseif cons_h == true && f.cons !== nothing + cons_h! = (θ) -> f.cons_h(θ, p) + else + cons_h! = nothing + end + + lag_hess_prototype = f.lag_hess_prototype + + if f.cons !== nothing && lag_h == true && f.lag_h === nothing + lag_prep = prepare_hessian( + lagrangian, soadtype, x, Constant(one(eltype(x))), + Constant(ones(eltype(x), num_cons)), Constant(p)) + lag_hess_prototype = zeros(Bool, num_cons, length(x)) + + function lag_h!(θ, σ, λ) + if σ == zero(eltype(θ)) + return λ .* cons_h(θ) + else + return hessian(lagrangian, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(θ, σ, λ, p) + if σ == zero(eltype(θ)) + return λ .* cons_h(θ) + else + return hessian(lagrangian, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + end + elseif lag_h == true && f.cons !== nothing + lag_h! = (θ, σ, λ, p = p) -> f.lag_h(θ, σ, λ, p) + else + lag_h! = nothing + end + + return OptimizationFunction{false}(f.f, adtype; + grad = grad, fg = fg!, hess = hess, hv = hv!, fgh = fgh!, + cons = cons, cons_j = cons_j!, cons_h = cons_h!, + cons_vjp = cons_vjp!, cons_jvp = cons_jvp!, + hess_prototype = hess_sparsity, + hess_colorvec = hess_colors, + cons_jac_prototype = cons_jac_prototype, + cons_jac_colorvec = cons_jac_colorvec, + cons_hess_prototype = conshess_sparsity, + cons_hess_colorvec = conshess_colors, + lag_h = lag_h!, + lag_hess_prototype = lag_hess_prototype, + sys = f.sys, + expr = f.expr, + cons_expr = f.cons_expr) +end + +function instantiate_function( + f::OptimizationFunction{false}, cache::OptimizationBase.ReInitCache, + adtype::ADTypes.AbstractADType, num_cons = 0; kwargs...) + x = cache.u0 + p = cache.p + + return instantiate_function(f, x, adtype, p, num_cons; kwargs...) +end diff --git a/lib/OptimizationBase/src/OptimizationDISparseExt.jl b/lib/OptimizationBase/src/OptimizationDISparseExt.jl new file mode 100644 index 000000000..dd3219fe5 --- /dev/null +++ b/lib/OptimizationBase/src/OptimizationDISparseExt.jl @@ -0,0 +1,532 @@ +import ArrayInterface +import SciMLBase: OptimizationFunction +import LinearAlgebra: I +import DifferentiationInterface +import DifferentiationInterface: prepare_gradient, prepare_hessian, prepare_hvp, + prepare_jacobian, value_and_gradient!, + value_derivative_and_second_derivative!, + value_and_gradient, value_derivative_and_second_derivative, + gradient!, hessian!, hvp!, jacobian!, gradient, hessian, + hvp, jacobian +using ADTypes +using SparseConnectivityTracer, SparseMatrixColorings + +function instantiate_function( + f::OptimizationFunction{true}, x, adtype::ADTypes.AutoSparse{<:AbstractADType}, + p = SciMLBase.NullParameters(), num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + adtype, soadtype = generate_sparse_adtype(adtype) + + if g == true && f.grad === nothing + prep_grad = prepare_gradient(f.f, adtype.dense_ad, x, Constant(p)) + function grad(res, θ) + gradient!(f.f, res, prep_grad, adtype.dense_ad, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() + function grad(res, θ, p) + gradient!(f.f, res, prep_grad, adtype.dense_ad, θ, Constant(p)) + end + end + elseif g == true + grad = (G, θ, p = p) -> f.grad(G, θ, p) + else + grad = nothing + end + + if fg == true && f.fg === nothing + if g == false + prep_grad = prepare_gradient(f.f, adtype.dense_ad, x, Constant(p)) + end + function fg!(res, θ) + (y, _) = value_and_gradient!( + f.f, res, prep_grad, adtype.dense_ad, θ, Constant(p)) + return y + end + if p !== SciMLBase.NullParameters() + function fg!(res, θ, p) + (y, _) = value_and_gradient!( + f.f, res, prep_grad, adtype.dense_ad, θ, Constant(p)) + return y + end + end + elseif fg == true + fg! = (G, θ, p = p) -> f.fg(G, θ, p) + else + fg! = nothing + end + + hess_sparsity = f.hess_prototype + hess_colors = f.hess_colorvec + if f.hess === nothing && h == true + prep_hess = prepare_hessian(f.f, soadtype, x, Constant(p)) + function hess(res, θ) + hessian!(f.f, res, prep_hess, soadtype, θ, Constant(p)) + end + hess_sparsity = prep_hess.coloring_result.A + hess_colors = prep_hess.coloring_result.color + + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(res, θ, p) + hessian!(f.f, res, prep_hess, soadtype, θ, Constant(p)) + end + end + elseif h == true + hess = (H, θ, p = p) -> f.hess(H, θ, p) + else + hess = nothing + end + + if fgh == true && f.fgh === nothing + function fgh!(G, H, θ) + (y, _, _) = value_derivative_and_second_derivative!( + f.f, G, H, prep_hess, soadtype.dense_ad, θ, Constant(p)) + return y + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(G, H, θ, p) + (y, _, _) = value_derivative_and_second_derivative!( + f.f, G, H, prep_hess, soadtype.dense_ad, θ, Constant(p)) + return y + end + end + elseif fgh == true + fgh! = (G, H, θ, p = p) -> f.fgh(G, H, θ, p) + else + fgh! = nothing + end + + if hv == true && f.hv === nothing + prep_hvp = prepare_hvp( + f.f, soadtype.dense_ad, x, (zeros(eltype(x), size(x)),), Constant(p)) + function hv!(H, θ, v) + only(hvp!(f.f, (H,), prep_hvp, soadtype.dense_ad, θ, (v,), Constant(p))) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(H, θ, v, p) + only(hvp!(f.f, (H,), prep_hvp, soadtype.dense_ad, θ, (v,), Constant(p))) + end + end + elseif hv == true + hv! = (H, θ, v, p = p) -> f.hv(H, θ, v, p) + else + hv! = nothing + end + + if f.cons === nothing + cons = nothing + else + cons = (res, θ) -> f.cons(res, θ, p) + + function cons_oop(x) + _res = zeros(eltype(x), num_cons) + f.cons(_res, x, p) + return _res + end + + function cons_oop(x, i) + _res = zeros(eltype(x), num_cons) + f.cons(_res, x, p) + return _res[i] + end + + function lagrangian(θ, σ, λ, p) + if eltype(θ) <: SparseConnectivityTracer.AbstractTracer || !iszero(θ) + return σ * f.f(θ, p) + dot(λ, cons_oop(θ)) + else + return dot(λ, cons_oop(θ)) + end + end + end + + cons_jac_prototype = f.cons_jac_prototype + cons_jac_colorvec = f.cons_jac_colorvec + if f.cons !== nothing && cons_j == true && f.cons_j === nothing + prep_jac = prepare_jacobian(cons_oop, adtype, x) + function cons_j!(J, θ) + jacobian!(cons_oop, J, prep_jac, adtype, θ) + if size(J, 1) == 1 + J = vec(J) + end + end + cons_jac_prototype = prep_jac.coloring_result.A + cons_jac_colorvec = prep_jac.coloring_result.color + elseif cons_j === true && f.cons !== nothing + cons_j! = (J, θ) -> f.cons_j(J, θ, p) + else + cons_j! = nothing + end + + if f.cons_vjp === nothing && cons_vjp == true && f.cons !== nothing + prep_pullback = prepare_pullback( + cons_oop, adtype.dense_ad, x, (ones(eltype(x), num_cons),)) + function cons_vjp!(J, θ, v) + only(pullback!(cons_oop, (J,), prep_pullback, adtype.dense_ad, θ, (v,))) + end + elseif cons_vjp === true && f.cons !== nothing + cons_vjp! = (J, θ, v) -> f.cons_vjp(J, θ, v, p) + else + cons_vjp! = nothing + end + + if f.cons_jvp === nothing && cons_jvp == true && f.cons !== nothing + prep_pushforward = prepare_pushforward( + cons_oop, adtype.dense_ad, x, (ones(eltype(x), length(x)),)) + function cons_jvp!(J, θ, v) + only(pushforward!(cons_oop, (J,), prep_pushforward, adtype.dense_ad, θ, (v,))) + end + elseif cons_jvp === true && f.cons !== nothing + cons_jvp! = (J, θ, v) -> f.cons_jvp(J, θ, v, p) + else + cons_jvp! = nothing + end + + conshess_sparsity = f.cons_hess_prototype + conshess_colors = f.cons_hess_colorvec + if f.cons !== nothing && f.cons_h === nothing && cons_h == true + prep_cons_hess = [prepare_hessian(cons_oop, soadtype, x, Constant(i)) + for i in 1:num_cons] + colores = getfield.(prep_cons_hess, :coloring_result) + conshess_sparsity = getfield.(colores, :A) + conshess_colors = getfield.(colores, :color) + function cons_h!(H, θ) + for i in 1:num_cons + hessian!(cons_oop, H[i], prep_cons_hess[i], soadtype, θ, Constant(i)) + end + end + elseif cons_h == true && f.cons !== nothing + cons_h! = (res, θ) -> f.cons_h(res, θ, p) + else + cons_h! = nothing + end + + lag_hess_prototype = f.lag_hess_prototype + lag_hess_colors = f.lag_hess_colorvec + if f.cons !== nothing && lag_h == true && f.lag_h === nothing + lag_prep = prepare_hessian( + lagrangian, soadtype, x, Constant(one(eltype(x))), + Constant(ones(eltype(x), num_cons)), Constant(p)) + lag_hess_prototype = lag_prep.coloring_result.A + lag_hess_colors = lag_prep.coloring_result.color + + function lag_h!(H::AbstractMatrix, θ, σ, λ) + if σ == zero(eltype(θ)) + cons_h!(H, θ) + H *= λ + else + hessian!(lagrangian, H, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + function lag_h!(h, θ, σ, λ) + H = hessian( + lagrangian, lag_prep, soadtype, θ, Constant(σ), Constant(λ), Constant(p)) + k = 0 + rows, cols, _ = findnz(H) + for (i, j) in zip(rows, cols) + if i <= j + k += 1 + h[k] = H[i, j] + end + end + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(H::AbstractMatrix, θ, σ, λ, p) + if σ == zero(eltype(θ)) + cons_h(H, θ) + H *= λ + else + hessian!(lagrangian, H, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + end + end + + function lag_h!(h, θ, σ, λ, p) + H = hessian(lagrangian, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + k = 0 + rows, cols, _ = findnz(H) + for (i, j) in zip(rows, cols) + if i <= j + k += 1 + h[k] = H[i, j] + end + end + end + end + elseif lag_h == true + lag_h! = (H, θ, σ, λ, p = p) -> f.lag_h(H, θ, σ, λ, p) + else + lag_h! = nothing + end + return OptimizationFunction{true}(f.f, adtype; + grad = grad, fg = fg!, hess = hess, hv = hv!, fgh = fgh!, + cons = cons, cons_j = cons_j!, cons_h = cons_h!, + cons_vjp = cons_vjp!, cons_jvp = cons_jvp!, + hess_prototype = hess_sparsity, + hess_colorvec = hess_colors, + cons_jac_prototype = cons_jac_prototype, + cons_jac_colorvec = cons_jac_colorvec, + cons_hess_prototype = conshess_sparsity, + cons_hess_colorvec = conshess_colors, + lag_h = lag_h!, + lag_hess_prototype = lag_hess_prototype, + lag_hess_colorvec = lag_hess_colors, + sys = f.sys, + expr = f.expr, + cons_expr = f.cons_expr) +end + +function instantiate_function( + f::OptimizationFunction{true}, cache::OptimizationBase.ReInitCache, + adtype::ADTypes.AutoSparse{<:AbstractADType}, num_cons = 0; kwargs...) + x = cache.u0 + p = cache.p + + return instantiate_function(f, x, adtype, p, num_cons; kwargs...) +end + +function instantiate_function( + f::OptimizationFunction{false}, x, adtype::ADTypes.AutoSparse{<:AbstractADType}, + p = SciMLBase.NullParameters(), num_cons = 0; + g = false, h = false, hv = false, fg = false, fgh = false, + cons_j = false, cons_vjp = false, cons_jvp = false, cons_h = false, + lag_h = false) + adtype, soadtype = generate_sparse_adtype(adtype) + + if g == true && f.grad === nothing + prep_grad = prepare_gradient(f.f, adtype.dense_ad, x, Constant(p)) + function grad(θ) + gradient(f.f, prep_grad, adtype.dense_ad, θ, Constant(p)) + end + if p !== SciMLBase.NullParameters() && p !== nothing + function grad(θ, p) + gradient(f.f, prep_grad, adtype.dense_ad, θ, Constant(p)) + end + end + elseif g == true + grad = (θ, p = p) -> f.grad(θ, p) + else + grad = nothing + end + + if fg == true && f.fg === nothing + if g == false + prep_grad = prepare_gradient(f.f, adtype.dense_ad, x, Constant(p)) + end + function fg!(θ) + (y, G) = value_and_gradient(f.f, prep_grad, adtype.dense_ad, θ, Constant(p)) + return y, G + end + if p !== SciMLBase.NullParameters() && p !== nothing + function fg!(θ, p) + (y, G) = value_and_gradient(f.f, prep_grad, adtype.dense_ad, θ, Constant(p)) + return y, G + end + end + elseif fg == true + fg! = (θ, p = p) -> f.fg(θ, p) + else + fg! = nothing + end + + if fgh == true && f.fgh === nothing + function fgh!(θ) + (y, G, H) = value_derivative_and_second_derivative( + f.f, prep_hess, soadtype, θ, Constant(p)) + return y, G, H + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function fgh!(θ, p) + (y, G, H) = value_derivative_and_second_derivative( + f.f, prep_hess, soadtype, θ, Constant(p)) + return y, G, H + end + end + elseif fgh == true + fgh! = (θ, p = p) -> f.fgh(θ, p) + else + fgh! = nothing + end + + hess_sparsity = f.hess_prototype + hess_colors = f.hess_colorvec + if h == true && f.hess === nothing + prep_hess = prepare_hessian(f.f, soadtype, x, Constant(p)) + function hess(θ) + hessian(f.f, prep_hess, soadtype, θ, Constant(p)) + end + hess_sparsity = prep_hess.coloring_result.A + hess_colors = prep_hess.coloring_result.color + + if p !== SciMLBase.NullParameters() && p !== nothing + function hess(θ, p) + hessian(f.f, prep_hess, soadtype, θ, Constant(p)) + end + end + elseif h == true + hess = (θ, p = p) -> f.hess(θ, p) + else + hess = nothing + end + + if hv == true && f.hv === nothing + prep_hvp = prepare_hvp( + f.f, soadtype.dense_ad, x, (zeros(eltype(x), size(x)),), Constant(p)) + function hv!(θ, v) + only(hvp(f.f, prep_hvp, soadtype.dense_ad, θ, (v,), Constant(p))) + end + + if p !== SciMLBase.NullParameters() && p !== nothing + function hv!(θ, v, p) + only(hvp(f.f, prep_hvp, soadtype.dense_ad, θ, (v,), Constant(p))) + end + end + elseif hv == true + hv! = (θ, v, p = p) -> f.hv(θ, v, p) + else + hv! = nothing + end + + if f.cons === nothing + cons = nothing + else + cons = Base.Fix2(f.cons, p) + + function lagrangian(θ, σ, λ, p) + return σ * f.f(θ, p) + dot(λ, f.cons(θ, p)) + end + end + + cons_jac_prototype = f.cons_jac_prototype + cons_jac_colorvec = f.cons_jac_colorvec + if f.cons !== nothing && cons_j == true && f.cons_j === nothing + prep_jac = prepare_jacobian(f.cons, adtype, x, Constant(p)) + function cons_j!(θ) + J = jacobian(f.cons, prep_jac, adtype, θ, Constant(p)) + if size(J, 1) == 1 + J = vec(J) + end + return J + end + cons_jac_prototype = prep_jac.coloring_result.A + cons_jac_colorvec = prep_jac.coloring_result.color + elseif cons_j === true && f.cons !== nothing + cons_j! = (θ) -> f.cons_j(θ, p) + else + cons_j! = nothing + end + + if f.cons_vjp === nothing && cons_vjp == true && f.cons !== nothing + prep_pullback = prepare_pullback( + f.cons, adtype.dense_ad, x, (ones(eltype(x), num_cons),), Constant(p)) + function cons_vjp!(θ, v) + only(pullback(f.cons, prep_pullback, adtype.dense_ad, θ, (v,), Constant(p))) + end + elseif cons_vjp === true && f.cons !== nothing + cons_vjp! = (θ, v) -> f.cons_vjp(θ, v, p) + else + cons_vjp! = nothing + end + + if f.cons_jvp === nothing && cons_jvp == true && f.cons !== nothing + prep_pushforward = prepare_pushforward( + f.cons, adtype.dense_ad, x, (ones(eltype(x), length(x)),), Constant(p)) + function cons_jvp!(θ, v) + only(pushforward( + f.cons, prep_pushforward, adtype.dense_ad, θ, (v,), Constant(p))) + end + elseif cons_jvp === true && f.cons !== nothing + cons_jvp! = (θ, v) -> f.cons_jvp(θ, v, p) + else + cons_jvp! = nothing + end + + conshess_sparsity = f.cons_hess_prototype + conshess_colors = f.cons_hess_colorvec + if f.cons !== nothing && cons_h == true && f.cons_h === nothing + function cons_i(x, i) + f.cons(x, p)[i] + end + prep_cons_hess = [prepare_hessian(cons_i, soadtype, x, Constant(i)) + for i in 1:num_cons] + + function cons_h!(θ) + H = map(1:num_cons) do i + hessian(cons_i, prep_cons_hess[i], soadtype, θ, Constant(i)) + end + return H + end + colores = getfield.(prep_cons_hess, :coloring_result) + conshess_sparsity = getfield.(colores, :A) + conshess_colors = getfield.(colores, :color) + elseif cons_h == true && f.cons !== nothing + cons_h! = (res, θ) -> f.cons_h(res, θ, p) + else + cons_h! = nothing + end + + lag_hess_prototype = f.lag_hess_prototype + lag_hess_colors = f.lag_hess_colorvec + if f.cons !== nothing && lag_h == true && f.lag_h === nothing + lag_prep = prepare_hessian( + lagrangian, soadtype, x, Constant(one(eltype(x))), + Constant(ones(eltype(x), num_cons)), Constant(p)) + function lag_h!(θ, σ, λ) + if σ == zero(eltype(θ)) + return λ .* cons_h!(θ) + else + hess = hessian(lagrangian, lag_prep, soadtype, θ, + Constant(σ), Constant(λ), Constant(p)) + return hess + end + end + lag_hess_prototype = lag_prep.coloring_result.A + lag_hess_colors = lag_prep.coloring_result.color + + if p !== SciMLBase.NullParameters() && p !== nothing + function lag_h!(θ, σ, λ, p) + if σ == zero(eltype(θ)) + return λ .* cons_h!(θ) + else + hess = hessian( + lagrangian, lag_prep, θ, Constant(σ), Constant(λ), Constant(p)) + return hess + end + end + end + elseif lag_h == true && f.cons !== nothing + lag_h! = (θ, σ, μ, p = p) -> f.lag_h(θ, σ, μ, p) + else + lag_h! = nothing + end + return OptimizationFunction{false}(f.f, adtype; + grad = grad, fg = fg!, hess = hess, hv = hv!, fgh = fgh!, + cons = cons, cons_j = cons_j!, cons_h = cons_h!, + cons_vjp = cons_vjp!, cons_jvp = cons_jvp!, + hess_prototype = hess_sparsity, + hess_colorvec = hess_colors, + cons_jac_prototype = cons_jac_prototype, + cons_jac_colorvec = cons_jac_colorvec, + cons_hess_prototype = conshess_sparsity, + cons_hess_colorvec = conshess_colors, + lag_h = lag_h!, + lag_hess_prototype = lag_hess_prototype, + lag_hess_colorvec = lag_hess_colors, + sys = f.sys, + expr = f.expr, + cons_expr = f.cons_expr) +end + +function instantiate_function( + f::OptimizationFunction{false}, cache::OptimizationBase.ReInitCache, + adtype::ADTypes.AutoSparse{<:AbstractADType}, num_cons = 0; kwargs...) + x = cache.u0 + p = cache.p + + return instantiate_function(f, x, adtype, p, num_cons; kwargs...) +end diff --git a/lib/OptimizationBase/src/adtypes.jl b/lib/OptimizationBase/src/adtypes.jl new file mode 100644 index 000000000..071528dc4 --- /dev/null +++ b/lib/OptimizationBase/src/adtypes.jl @@ -0,0 +1,288 @@ +""" + AutoEnzyme <: AbstractADType + +An AbstractADType choice for use in OptimizationFunction for automatically +generating the unspecified derivative functions. Usage: +```julia +OptimizationFunction(f, AutoEnzyme(); kwargs...) +``` +This uses the [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl) package. Enzyme performs automatic differentiation on the LLVM IR code generated from julia. +It is highly-efficient and its ability perform AD on optimized code allows Enzyme to meet or exceed the performance of state-of-the-art AD tools. + - Compatible with GPUs + - Compatible with Hessian-based optimization + - Compatible with Hv-based optimization + - Compatible with constraints +Note that only the unspecified derivative functions are defined. For example, +if a `hess` function is supplied to the `OptimizationFunction`, then the Hessian +is not defined via Enzyme. +""" +AutoEnzyme + +""" + AutoFiniteDiff{T1,T2,T3} <: AbstractADType + +An AbstractADType choice for use in OptimizationFunction for automatically +generating the unspecified derivative functions. Usage: + +```julia +OptimizationFunction(f, AutoFiniteDiff(); kwargs...) +``` + +This uses [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl). +While not necessarily the most efficient, this is the only +choice that doesn't require the `f` function to be automatically +differentiable, which means it applies to any choice. However, because +it's using finite differencing, one needs to be careful as this procedure +introduces numerical error into the derivative estimates. + + - Compatible with GPUs + - Compatible with Hessian-based optimization + - Compatible with Hv-based optimization + - Compatible with constraint functions + +Note that only the unspecified derivative functions are defined. For example, +if a `hess` function is supplied to the `OptimizationFunction`, then the +Hessian is not defined via FiniteDiff. + +## Constructor + +```julia +AutoFiniteDiff(; fdtype = Val(:forward)fdjtype = fdtype, fdhtype = Val(:hcentral)) +``` + + - `fdtype`: the method used for defining the gradient + - `fdjtype`: the method used for defining the Jacobian of constraints. + - `fdhtype`: the method used for defining the Hessian + +For more information on the derivative type specifiers, see the +[FiniteDiff.jl documentation](https://github.com/JuliaDiff/FiniteDiff.jl). +""" +AutoFiniteDiff + +""" + AutoForwardDiff{chunksize} <: AbstractADType + +An AbstractADType choice for use in OptimizationFunction for automatically +generating the unspecified derivative functions. Usage: + +```julia +OptimizationFunction(f, AutoForwardDiff(); kwargs...) +``` + +This uses the [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) +package. It is the fastest choice for small systems, especially with +heavy scalar interactions. It is easy to use and compatible with most +Julia functions which have loose type restrictions. However, +because it's forward-mode, it scales poorly in comparison to other AD +choices. Hessian construction is suboptimal as it uses the forward-over-forward +approach. + + - Compatible with GPUs + - Compatible with Hessian-based optimization + - Compatible with Hv-based optimization + - Compatible with constraints + +Note that only the unspecified derivative functions are defined. For example, +if a `hess` function is supplied to the `OptimizationFunction`, then the +Hessian is not defined via ForwardDiff. +""" +AutoForwardDiff + +""" + AutoModelingToolkit <: AbstractADType + +An AbstractADType choice for use in OptimizationFunction for automatically +generating the unspecified derivative functions. Usage: + +```julia +OptimizationFunction(f, AutoModelingToolkit(); kwargs...) +``` + +This uses the [ModelingToolkit.jl](https://github.com/SciML/ModelingToolkit.jl) +package's `modelingtookitize` functionality to generate the derivatives and other fields of an `OptimizationFunction`. +This backend creates the symbolic expressions for the objective and its derivatives as well as +the constraints and their derivatives. Through `structural_simplify`, it enforces simplifications +that can reduce the number of operations needed to compute the derivatives of the constraints. This automatically +generates the expression graphs that some solver interfaces through OptimizationMOI like +[AmplNLWriter.jl](https://github.com/jump-dev/AmplNLWriter.jl) require. + + - Compatible with GPUs + - Compatible with Hessian-based optimization + - Compatible with Hv-based optimization + - Compatible with constraints + +Note that only the unspecified derivative functions are defined. For example, +if a `hess` function is supplied to the `OptimizationFunction`, then the +Hessian is not generated via ModelingToolkit. + +## Constructor + +```julia +AutoModelingToolkit(false, false) +``` + + - `obj_sparse`: to indicate whether the objective hessian is sparse. + - `cons_sparse`: to indicate whether the constraints' jacobian and hessian are sparse. + +""" +AutoModelingToolkit + +""" + AutoReverseDiff <: AbstractADType + +An AbstractADType choice for use in OptimizationFunction for automatically +generating the unspecified derivative functions. Usage: + +```julia +OptimizationFunction(f, AutoReverseDiff(); kwargs...) +``` + +This uses the [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl) +package. `AutoReverseDiff` has a default argument, `compile`, which +denotes whether the reverse pass should be compiled. **`compile` should only +be set to `true` if `f` contains no branches (if statements, while loops) +otherwise it can produce incorrect derivatives!** + +`AutoReverseDiff` is generally applicable to many pure Julia codes, +and with `compile=true` it is one of the fastest options on code with +heavy scalar interactions. Hessian calculations are fast by mixing +ForwardDiff with ReverseDiff for forward-over-reverse. However, its +performance can falter when `compile=false`. + + - Not compatible with GPUs + - Compatible with Hessian-based optimization by mixing with ForwardDiff + - Compatible with Hv-based optimization by mixing with ForwardDiff + - Not compatible with constraint functions + +Note that only the unspecified derivative functions are defined. For example, +if a `hess` function is supplied to the `OptimizationFunction`, then the +Hessian is not defined via ReverseDiff. + +## Constructor + +```julia +AutoReverseDiff(; compile = false) +``` + +#### Note: currently, compilation is not defined/used! +""" +AutoReverseDiff + +""" + AutoTracker <: AbstractADType + +An AbstractADType choice for use in OptimizationFunction for automatically +generating the unspecified derivative functions. Usage: + +```julia +OptimizationFunction(f, AutoTracker(); kwargs...) +``` + +This uses the [Tracker.jl](https://github.com/FluxML/Tracker.jl) package. +Generally slower than ReverseDiff, it is generally applicable to many +pure Julia codes. + + - Compatible with GPUs + - Not compatible with Hessian-based optimization + - Not compatible with Hv-based optimization + - Not compatible with constraint functions + +Note that only the unspecified derivative functions are defined. For example, +if a `hess` function is supplied to the `OptimizationFunction`, then the +Hessian is not defined via Tracker. +""" +AutoTracker + +""" + AutoZygote <: AbstractADType + +An AbstractADType choice for use in OptimizationFunction for automatically +generating the unspecified derivative functions. Usage: + +```julia +OptimizationFunction(f, AutoZygote(); kwargs...) +``` + +This uses the [Zygote.jl](https://github.com/FluxML/Zygote.jl) package. +This is the staple reverse-mode AD that handles a large portion of +Julia with good efficiency. Hessian construction is fast via +forward-over-reverse mixing ForwardDiff.jl with Zygote.jl + + - Compatible with GPUs + - Compatible with Hessian-based optimization via ForwardDiff + - Compatible with Hv-based optimization via ForwardDiff + - Not compatible with constraint functions + +Note that only the unspecified derivative functions are defined. For example, +if a `hess` function is supplied to the `OptimizationFunction`, then the +Hessian is not defined via Zygote. +""" +AutoZygote + +function generate_adtype(adtype) + if !(adtype isa SciMLBase.NoAD || adtype isa DifferentiationInterface.SecondOrder || + adtype isa AutoZygote) + soadtype = DifferentiationInterface.SecondOrder(adtype, adtype) + elseif adtype isa AutoZygote + soadtype = DifferentiationInterface.SecondOrder(AutoForwardDiff(), adtype) + elseif adtype isa DifferentiationInterface.SecondOrder + soadtype = adtype + adtype = adtype.inner + elseif adtype isa SciMLBase.NoAD + soadtype = adtype + adtype = adtype + end + return adtype, soadtype +end + +function spadtype_to_spsoadtype(adtype) + if !(adtype.dense_ad isa SciMLBase.NoAD || + adtype.dense_ad isa DifferentiationInterface.SecondOrder || + adtype.dense_ad isa AutoZygote) + soadtype = AutoSparse( + DifferentiationInterface.SecondOrder(adtype.dense_ad, adtype.dense_ad), + sparsity_detector = adtype.sparsity_detector, + coloring_algorithm = adtype.coloring_algorithm) + elseif adtype.dense_ad isa AutoZygote + soadtype = AutoSparse( + DifferentiationInterface.SecondOrder(AutoForwardDiff(), adtype.dense_ad), + sparsity_detector = adtype.sparsity_detector, + coloring_algorithm = adtype.coloring_algorithm) + else + soadtype = adtype + end + return soadtype +end + +function filled_spad(adtype) + if adtype.sparsity_detector isa ADTypes.NoSparsityDetector && + adtype.coloring_algorithm isa ADTypes.NoColoringAlgorithm + adtype = AutoSparse(adtype.dense_ad; sparsity_detector = TracerSparsityDetector(), + coloring_algorithm = GreedyColoringAlgorithm()) + elseif adtype.sparsity_detector isa ADTypes.NoSparsityDetector && + !(adtype.coloring_algorithm isa ADTypes.NoColoringAlgorithm) + adtype = AutoSparse(adtype.dense_ad; sparsity_detector = TracerSparsityDetector(), + coloring_algorithm = adtype.coloring_algorithm) + elseif !(adtype.sparsity_detector isa ADTypes.NoSparsityDetector) && + adtype.coloring_algorithm isa ADTypes.NoColoringAlgorithm + adtype = AutoSparse(adtype.dense_ad; sparsity_detector = adtype.sparsity_detector, + coloring_algorithm = GreedyColoringAlgorithm()) + end +end + +function generate_sparse_adtype(adtype) + if !(adtype.dense_ad isa DifferentiationInterface.SecondOrder) + adtype = filled_spad(adtype) + soadtype = spadtype_to_spsoadtype(adtype) + else + soadtype = adtype + adtype = AutoSparse( + adtype.dense_ad.inner, + sparsity_detector = soadtype.sparsity_detector, + coloring_algorithm = soadtype.coloring_algorithm) + adtype = filled_spad(adtype) + soadtype = filled_spad(soadtype) + end + + return adtype, soadtype +end diff --git a/lib/OptimizationBase/src/augmented_lagrangian.jl b/lib/OptimizationBase/src/augmented_lagrangian.jl new file mode 100644 index 000000000..879090089 --- /dev/null +++ b/lib/OptimizationBase/src/augmented_lagrangian.jl @@ -0,0 +1,13 @@ +function generate_auglag(θ) + x = cache.f(θ, cache.p) + cons_tmp .= zero(eltype(θ)) + cache.f.cons(cons_tmp, θ) + cons_tmp[eq_inds] .= cons_tmp[eq_inds] - cache.lcons[eq_inds] + cons_tmp[ineq_inds] .= cons_tmp[ineq_inds] .- cache.ucons[ineq_inds] + opt_state = Optimization.OptimizationState(u = θ, objective = x[1]) + if cache.callback(opt_state, x...) + error("Optimization halted by callback.") + end + return x[1] + sum(@. λ * cons_tmp[eq_inds] + ρ / 2 * (cons_tmp[eq_inds] .^ 2)) + + 1 / (2 * ρ) * sum((max.(Ref(0.0), μ .+ (ρ .* cons_tmp[ineq_inds]))) .^ 2) +end diff --git a/lib/OptimizationBase/src/cache.jl b/lib/OptimizationBase/src/cache.jl new file mode 100644 index 000000000..7d529324b --- /dev/null +++ b/lib/OptimizationBase/src/cache.jl @@ -0,0 +1,98 @@ +isa_dataiterator(data) = false + +struct AnalysisResults{O, C} + objective::O + constraints::C +end + +struct OptimizationCache{F, RC, LB, UB, LC, UC, S, O, P, C, M} <: + SciMLBase.AbstractOptimizationCache + f::F + reinit_cache::RC + lb::LB + ub::UB + lcons::LC + ucons::UC + sense::S + opt::O + progress::P + callback::C + manifold::M + analysis_results::AnalysisResults + solver_args::NamedTuple +end + +function OptimizationCache(prob::SciMLBase.OptimizationProblem, opt; + callback = DEFAULT_CALLBACK, + maxiters::Union{Number, Nothing} = nothing, + maxtime::Union{Number, Nothing} = nothing, + abstol::Union{Number, Nothing} = nothing, + reltol::Union{Number, Nothing} = nothing, + progress = false, + structural_analysis = false, + manifold = nothing, + kwargs...) + if isa_dataiterator(prob.p) + reinit_cache = OptimizationBase.ReInitCache(prob.u0, iterate(prob.p)[1]) + reinit_cache_passedon = OptimizationBase.ReInitCache(prob.u0, prob.p) + else + reinit_cache = OptimizationBase.ReInitCache(prob.u0, prob.p) + reinit_cache_passedon = reinit_cache + end + + num_cons = prob.ucons === nothing ? 0 : length(prob.ucons) + + if !(prob.f.adtype isa DifferentiationInterface.SecondOrder || + prob.f.adtype isa AutoZygote) && + (SciMLBase.requireshessian(opt) || SciMLBase.requiresconshess(opt) || + SciMLBase.requireslagh(opt)) + @warn "The selected optimization algorithm requires second order derivatives, but `SecondOrder` ADtype was not provided. + So a `SecondOrder` with $(prob.f.adtype) for both inner and outer will be created, this can be suboptimal and not work in some cases so + an explicit `SecondOrder` ADtype is recommended." + elseif prob.f.adtype isa AutoZygote && + (SciMLBase.requiresconshess(opt) || SciMLBase.requireslagh(opt) || + SciMLBase.requireshessian(opt)) + @warn "The selected optimization algorithm requires second order derivatives, but `AutoZygote` ADtype was provided. + So a `SecondOrder` with `AutoZygote` for inner and `AutoForwardDiff` for outer will be created, for choosing another pair + an explicit `SecondOrder` ADtype is recommended." + end + + f = OptimizationBase.instantiate_function( + prob.f, reinit_cache, prob.f.adtype, num_cons; + g = SciMLBase.requiresgradient(opt), h = SciMLBase.requireshessian(opt), + hv = SciMLBase.requireshessian(opt), fg = SciMLBase.allowsfg(opt), + fgh = SciMLBase.allowsfgh(opt), cons_j = SciMLBase.requiresconsjac(opt), cons_h = SciMLBase.requiresconshess(opt), + cons_vjp = SciMLBase.allowsconsjvp(opt), cons_jvp = SciMLBase.allowsconsjvp(opt), lag_h = SciMLBase.requireslagh(opt), sense = prob.sense) + + if structural_analysis + obj_res, cons_res = symify_cache(f, prob, num_cons, manifold) + else + obj_res = nothing + cons_res = nothing + end + + return OptimizationCache(f, reinit_cache_passedon, prob.lb, prob.ub, prob.lcons, + prob.ucons, prob.sense, + opt, progress, callback, manifold, AnalysisResults(obj_res, cons_res), + merge((; maxiters, maxtime, abstol, reltol), + NamedTuple(kwargs))) +end + +function SciMLBase.__init(prob::SciMLBase.OptimizationProblem, opt; + callback = DEFAULT_CALLBACK, + maxiters::Union{Number, Nothing} = nothing, + maxtime::Union{Number, Nothing} = nothing, + abstol::Union{Number, Nothing} = nothing, + reltol::Union{Number, Nothing} = nothing, + progress = false, + kwargs...) + return OptimizationCache(prob, opt; maxiters, maxtime, abstol, callback, + reltol, progress, + kwargs...) +end + +# Wrapper for fields that may change in `reinit!(cache)` of a cache. +mutable struct ReInitCache{uType, P} + u0::uType + p::P +end diff --git a/lib/OptimizationBase/src/function.jl b/lib/OptimizationBase/src/function.jl new file mode 100644 index 000000000..822a3587e --- /dev/null +++ b/lib/OptimizationBase/src/function.jl @@ -0,0 +1,242 @@ + +function symbolify(e::Expr) + if !(e.args[1] isa Symbol) + e.args[1] = Symbol(e.args[1]) + end + symbolify.(e.args) + return e +end + +function symbolify(e) + return e +end + +function rep_pars_vals!(e::Expr, p) + rep_pars_vals!.(e.args, Ref(p)) + replace!(e.args, p...) +end + +function rep_pars_vals!(e, p) end + +""" + instantiate_function(f, x, ::AbstractADType, p, num_cons = 0)::OptimizationFunction + +This function is used internally by Optimization.jl to construct +the necessary extra functions (gradients, Hessians, etc.) before +optimization. Each of the ADType dispatches use the supplied automatic +differentiation type in order to specify how the construction process +occurs. + +If no ADType is given, then the default `NoAD` dispatch simply +defines closures on any supplied gradient function to enclose the +parameters to match the interfaces for the specific optimization +libraries (i.e. (G,x)->f.grad(G,x,p)). If a function is not given +and the `NoAD` dispatch is used, or if the AD dispatch is currently +not capable of defining said derivative, then the constructed +`OptimizationFunction` will simply use `nothing` to specify and undefined +function. + +The return of `instantiate_function` is an `OptimizationFunction` which +is then used in the optimization process. If an optimizer requires a +function that is not defined, an error is thrown. + +For more information on the use of automatic differentiation, see the +documentation of the `AbstractADType` types. +""" +function OptimizationBase.instantiate_function( + f::MultiObjectiveOptimizationFunction, x, ::SciMLBase.NoAD, + p, num_cons = 0; kwargs...) + jac = f.jac === nothing ? nothing : (J, x, args...) -> f.jac(J, x, p, args...) + hess = f.hess === nothing ? nothing : + [(H, x, args...) -> h(H, x, p, args...) for h in f.hess] + hv = f.hv === nothing ? nothing : (H, x, v, args...) -> f.hv(H, x, v, p, args...) + cons = f.cons === nothing ? nothing : (res, x) -> f.cons(res, x, p) + cons_j = f.cons_j === nothing ? nothing : (res, x) -> f.cons_j(res, x, p) + cons_jvp = f.cons_jvp === nothing ? nothing : (res, x) -> f.cons_jvp(res, x, p) + cons_vjp = f.cons_vjp === nothing ? nothing : (res, x) -> f.cons_vjp(res, x, p) + cons_h = f.cons_h === nothing ? nothing : (res, x) -> f.cons_h(res, x, p) + hess_prototype = f.hess_prototype === nothing ? nothing : + convert.(eltype(x), f.hess_prototype) + cons_jac_prototype = f.cons_jac_prototype === nothing ? nothing : + convert.(eltype(x), f.cons_jac_prototype) + cons_hess_prototype = f.cons_hess_prototype === nothing ? nothing : + [convert.(eltype(x), f.cons_hess_prototype[i]) + for i in 1:num_cons] + expr = symbolify(f.expr) + cons_expr = symbolify.(f.cons_expr) + + return MultiObjectiveOptimizationFunction{true}( + f.f, SciMLBase.NoAD(); jac = jac, hess = hess, + hv = hv, + cons = cons, cons_j = cons_j, cons_jvp = cons_jvp, cons_vjp = cons_vjp, cons_h = cons_h, + hess_prototype = hess_prototype, + cons_jac_prototype = cons_jac_prototype, + cons_hess_prototype = cons_hess_prototype, + expr = expr, cons_expr = cons_expr, + sys = f.sys, + observed = f.observed) +end + +function OptimizationBase.instantiate_function( + f::MultiObjectiveOptimizationFunction, cache::ReInitCache, ::SciMLBase.NoAD, + num_cons = 0; kwargs...) + jac = f.jac === nothing ? nothing : (J, x, args...) -> f.jac(J, x, cache.p, args...) + hess = f.hess === nothing ? nothing : + [(H, x, args...) -> h(H, x, cache.p, args...) for h in f.hess] + hv = f.hv === nothing ? nothing : (H, x, v, args...) -> f.hv(H, x, v, cache.p, args...) + cons = f.cons === nothing ? nothing : (res, x) -> f.cons(res, x, cache.p) + cons_j = f.cons_j === nothing ? nothing : (res, x) -> f.cons_j(res, x, cache.p) + cons_jvp = f.cons_jvp === nothing ? nothing : (res, x) -> f.cons_jvp(res, x, cache.p) + cons_vjp = f.cons_vjp === nothing ? nothing : (res, x) -> f.cons_vjp(res, x, cache.p) + cons_h = f.cons_h === nothing ? nothing : (res, x) -> f.cons_h(res, x, cache.p) + hess_prototype = f.hess_prototype === nothing ? nothing : + convert.(eltype(cache.u0), f.hess_prototype) + cons_jac_prototype = f.cons_jac_prototype === nothing ? nothing : + convert.(eltype(cache.u0), f.cons_jac_prototype) + cons_hess_prototype = f.cons_hess_prototype === nothing ? nothing : + [convert.(eltype(cache.u0), f.cons_hess_prototype[i]) + for i in 1:num_cons] + expr = symbolify(f.expr) + cons_expr = symbolify.(f.cons_expr) + + return MultiObjectiveOptimizationFunction{true}( + f.f, SciMLBase.NoAD(); jac = jac, hess = hess, + hv = hv, + cons = cons, cons_j = cons_j, cons_jvp = cons_jvp, cons_vjp = cons_vjp, cons_h = cons_h, + hess_prototype = hess_prototype, + cons_jac_prototype = cons_jac_prototype, + cons_hess_prototype = cons_hess_prototype, + expr = expr, cons_expr = cons_expr, + sys = f.sys, + observed = f.observed) +end + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, x, ::SciMLBase.NoAD, + p, num_cons = 0; sense, kwargs...) + if f.grad === nothing + grad = nothing + else + function grad(G, x) + return f.grad(G, x, p) + end + if p != SciMLBase.NullParameters() + function grad(G, x, p) + return f.grad(G, x, p) + end + end + end + if f.fg === nothing + fg = nothing + else + function fg(G, x) + return f.fg(G, x, p) + end + if p != SciMLBase.NullParameters() + function fg(G, x, p) + return f.fg(G, x, p) + end + end + end + if f.hess === nothing + hess = nothing + else + function hess(H, x) + return f.hess(H, x, p) + end + if p != SciMLBase.NullParameters() + function hess(H, x, p) + return f.hess(H, x, p) + end + end + end + + if f.fgh === nothing + fgh = nothing + else + function fgh(G, H, x) + return f.fgh(G, H, x, p) + end + if p != SciMLBase.NullParameters() + function fgh(G, H, x, p) + return f.fgh(G, H, x, p) + end + end + end + + if f.hv === nothing + hv = nothing + else + function hv(H, x, v) + return f.hv(H, x, v, p) + end + if p != SciMLBase.NullParameters() + function hv(H, x, v, p) + return f.hv(H, x, v, p) + end + end + end + + cons = f.cons === nothing ? nothing : (res, x) -> f.cons(res, x, p) + cons_j = f.cons_j === nothing ? nothing : (res, x) -> f.cons_j(res, x, p) + cons_vjp = f.cons_vjp === nothing ? nothing : (res, x) -> f.cons_vjp(res, x, p) + cons_jvp = f.cons_jvp === nothing ? nothing : (res, x) -> f.cons_jvp(res, x, p) + cons_h = f.cons_h === nothing ? nothing : (res, x) -> f.cons_h(res, x, p) + + if f.lag_h === nothing + lag_h = nothing + else + function lag_h(res, x) + return f.lag_h(res, x, p) + end + if p != SciMLBase.NullParameters() + function lag_h(res, x, p) + return f.lag_h(res, x, p) + end + end + end + hess_prototype = f.hess_prototype === nothing ? nothing : + convert.(eltype(x), f.hess_prototype) + cons_jac_prototype = f.cons_jac_prototype === nothing ? nothing : + convert.(eltype(x), f.cons_jac_prototype) + cons_hess_prototype = f.cons_hess_prototype === nothing ? nothing : + [convert.(eltype(x), f.cons_hess_prototype[i]) + for i in 1:num_cons] + expr = symbolify(f.expr) + cons_expr = symbolify.(f.cons_expr) + + obj_f = (x,p) -> sense == MaxSense ? -1.0* f.f(x,p) : f.f(x,p) + return OptimizationFunction{true}(obj_f, SciMLBase.NoAD(); + grad = grad, fg = fg, hess = hess, fgh = fgh, hv = hv, + cons = cons, cons_j = cons_j, cons_h = cons_h, + cons_vjp = cons_vjp, cons_jvp = cons_jvp, + lag_h = lag_h, + hess_prototype = hess_prototype, + cons_jac_prototype = cons_jac_prototype, + cons_hess_prototype = cons_hess_prototype, + expr = expr, cons_expr = cons_expr, + sys = f.sys, + observed = f.observed) +end + +function OptimizationBase.instantiate_function( + f::OptimizationFunction{true}, cache::ReInitCache, ::SciMLBase.NoAD, + num_cons = 0; kwargs...) + x = cache.u0 + p = cache.p + + return instantiate_function(f, x, SciMLBase.NoAD(), p, num_cons; kwargs...) +end + +function instantiate_function(f::OptimizationFunction, x, adtype::ADTypes.AbstractADType, + p, num_cons = 0; kwargs...) + adtypestr = string(adtype) + _strtind = findfirst('.', adtypestr) + strtind = isnothing(_strtind) ? 5 : _strtind + 5 + open_nrmlbrkt_ind = findfirst('(', adtypestr) + open_squigllybrkt_ind = findfirst('{', adtypestr) + open_brkt_ind = isnothing(open_squigllybrkt_ind) ? open_nrmlbrkt_ind : + min(open_nrmlbrkt_ind, open_squigllybrkt_ind) + adpkg = adtypestr[strtind:(open_brkt_ind - 1)] + throw(ArgumentError("The passed automatic differentiation backend choice is not available. Please load the corresponding AD package $adpkg.")) +end diff --git a/lib/OptimizationBase/src/symify.jl b/lib/OptimizationBase/src/symify.jl new file mode 100644 index 000000000..43f353165 --- /dev/null +++ b/lib/OptimizationBase/src/symify.jl @@ -0,0 +1,3 @@ +function symify_cache(f::OptimizationFunction, prob, num_cons, manifold) + throw("Structural analysis requires SymbolicAnalysis.jl to be loaded, either add `using SymbolicAnalysis` to your script or set `structural_analysis = false`.") +end diff --git a/lib/OptimizationBase/test/Project.toml b/lib/OptimizationBase/test/Project.toml new file mode 100644 index 000000000..25549349b --- /dev/null +++ b/lib/OptimizationBase/test/Project.toml @@ -0,0 +1,45 @@ +[deps] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" +FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Lux = "b2108857-7c20-44ae-9111-449ecde12c47" +MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e" +ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" +Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" +Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" +OptimizationManopt = "e57b7fff-7ee7-4550-b4f0-90e9476e9fb6" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SparseConnectivityTracer = "9f842d2f-2579-4b1d-911e-f412cf18a3f5" +SymbolicAnalysis = "4297ee4d-0239-47d8-ba5d-195ecdf594fe" +Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[compat] +Aqua = "0.8" +ComponentArrays = ">= 0.13.9" +Enzyme = "0.13" +IterTools = ">= 1.3.0" +Lux = "1.12" +Manifolds = "0.9" +Optim = ">= 1.4.1" +Optimisers = ">= 0.2.5" +Optimization = "4" +OptimizationManopt = "0.0.4" +SparseConnectivityTracer = "0.6" +SymbolicAnalysis = "0.3.0" +SafeTestsets = ">= 0.0.1" diff --git a/lib/OptimizationBase/test/adtests.jl b/lib/OptimizationBase/test/adtests.jl new file mode 100644 index 000000000..6fe4eea05 --- /dev/null +++ b/lib/OptimizationBase/test/adtests.jl @@ -0,0 +1,1189 @@ +using OptimizationBase, Test, DifferentiationInterface, SparseArrays, Symbolics +using ForwardDiff, Zygote, ReverseDiff, FiniteDiff, Tracker +using ModelingToolkit, Enzyme, Random + +x0 = zeros(2) +rosenbrock(x, p = nothing) = (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2 +l1 = rosenbrock(x0) + +function g!(G, x) + G[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1] + G[2] = 200.0 * (x[2] - x[1]^2) +end + +function h!(H, x) + H[1, 1] = 2.0 - 400.0 * x[2] + 1200.0 * x[1]^2 + H[1, 2] = -400.0 * x[1] + H[2, 1] = -400.0 * x[1] + H[2, 2] = 200.0 +end + +G1 = Array{Float64}(undef, 2) +G2 = Array{Float64}(undef, 2) +H1 = Array{Float64}(undef, 2, 2) +H2 = Array{Float64}(undef, 2, 2) + +g!(G1, x0) +h!(H1, x0) + +cons = (res, x, p) -> (res[1] = x[1]^2 + x[2]^2; return nothing) +optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoModelingToolkit(), cons = cons) +optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoModelingToolkit(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) +optprob.grad(G2, x0) +@test G1 == G2 +optprob.hess(H2, x0) +@test H1 == H2 +res = Array{Float64}(undef, 1) +optprob.cons(res, x0) +@test res == [0.0] +J = Array{Float64}(undef, 2) +optprob.cons_j(J, [5.0, 3.0]) +@test J == [10.0, 6.0] +H3 = [Array{Float64}(undef, 2, 2)] +optprob.cons_h(H3, x0) +@test H3 == [[2.0 0.0; 0.0 2.0]] + +function con2_c(res, x, p) + res[1] = x[1]^2 + x[2]^2 + res[2] = x[2] * sin(x[1]) - x[1] + return nothing +end +optf = OptimizationFunction(rosenbrock, + OptimizationBase.AutoModelingToolkit(), + cons = con2_c) +optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoModelingToolkit(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) +optprob.grad(G2, x0) +@test G1 == G2 +optprob.hess(H2, x0) +@test H1 == H2 +res = Array{Float64}(undef, 2) +optprob.cons(res, x0) +@test res == [0.0, 0.0] +J = Array{Float64}(undef, 2, 2) +optprob.cons_j(J, [5.0, 3.0]) +@test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) +H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] +optprob.cons_h(H3, x0) +@test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + +@testset "one constraint tests" begin + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoEnzyme(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H2 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoForwardDiff(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoForwardDiff(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H2 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoReverseDiff(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoReverseDiff(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H2 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoReverseDiff(true), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoReverseDiff(true), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H2 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, AutoZygote(), cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, AutoZygote(), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res == [0.0] + J = Array{Float64}(undef, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J == [10.0, 6.0] + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ == [10.0, 6.0] + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == [8.0] + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H2 + μ[1] * H3[1] rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction(rosenbrock, + DifferentiationInterface.SecondOrder( + ADTypes.AutoFiniteDiff(), ADTypes.AutoReverseDiff()), + cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, + DifferentiationInterface.SecondOrder( + ADTypes.AutoFiniteDiff(), ADTypes.AutoReverseDiff()), + nothing, 1, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1≈G2 rtol=1e-5 + optprob.hess(H2, x0) + @test H1≈H2 rtol=1e-5 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv≈[2.0, 200.0] rtol=1e-5 + res = Array{Float64}(undef, 1) + optprob.cons(res, x0) + @test res ≈ [0.0] + J = Array{Float64}(undef, 1, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test J≈[10.0 6.0] rtol=1e-5 + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0]) + @test vJ≈[10.0, 6.0] rtol=1e-5 + Jv = Array{Float64}(undef, 1) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv≈[8.0] rtol=1e-5 + H3 = [Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3≈[[2.0 0.0; 0.0 2.0]] rtol=1e-5 + Random.seed!(123) + H4 = Array{Float64}(undef, 2, 2) + μ = randn(1) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H2 + μ[1] * H3[1] rtol=1e-6 +end + +@testset "two constraints tests" begin + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + optf = OptimizationFunction(rosenbrock, OptimizationBase.AutoEnzyme(), cons = con2_c) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoEnzyme(), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv ≈ 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoReverseDiff(), cons = con2_c) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoReverseDiff(true), cons = con2_c) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(true), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, OptimizationBase.AutoForwardDiff(), cons = con2_c) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(compile = true), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, AutoZygote(), cons = con2_c) + optprob = OptimizationBase.instantiate_function( + optf, x0, AutoZygote(), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1 == G2 + optprob.hess(H2, x0) + @test H1 == H2 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv == [2.0, 200.0] + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res == [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ == sum(J, dims = 1)[:] + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv == 0.5 * sum(J, dims = 2)[:] + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3 == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 + + G2 = Array{Float64}(undef, 2) + H2 = Array{Float64}(undef, 2, 2) + + optf = OptimizationFunction( + rosenbrock, DifferentiationInterface.SecondOrder( + ADTypes.AutoFiniteDiff(), ADTypes.AutoReverseDiff()), + cons = con2_c) + optprob = OptimizationBase.instantiate_function( + optf, x0, + DifferentiationInterface.SecondOrder( + ADTypes.AutoFiniteDiff(), ADTypes.AutoReverseDiff()), + nothing, 2, g = true, h = true, hv = true, + cons_j = true, cons_h = true, cons_vjp = true, + cons_jvp = true, lag_h = true) + optprob.grad(G2, x0) + @test G1≈G2 rtol=1e-5 + optprob.hess(H2, x0) + @test H1≈H2 rtol=1e-5 + Hv = Array{Float64}(undef, 2) + optprob.hv(Hv, x0, [1.0, 1.0]) + @test Hv≈[2.0, 200.0] rtol=1e-5 + res = Array{Float64}(undef, 2) + optprob.cons(res, x0) + @test res ≈ [0.0, 0.0] + J = Array{Float64}(undef, 2, 2) + optprob.cons_j(J, [5.0, 3.0]) + @test all(isapprox(J, [10.0 6.0; -0.149013 -0.958924]; rtol = 1e-3)) + vJ = Array{Float64}(undef, 2) + optprob.cons_vjp(vJ, [5.0, 3.0], [1.0, 1.0]) + @test vJ≈sum(J, dims = 1)[:] rtol=1e-5 + Jv = Array{Float64}(undef, 2) + optprob.cons_jvp(Jv, [5.0, 3.0], [0.5, 0.5]) + @test Jv≈0.5 * sum(J, dims = 2)[:] rtol=1e-5 + H3 = [Array{Float64}(undef, 2, 2), Array{Float64}(undef, 2, 2)] + optprob.cons_h(H3, x0) + @test H3≈[[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] rtol=1e-5 + H4 = Array{Float64}(undef, 2, 2) + μ = randn(2) + σ = rand() + optprob.lag_h(H4, x0, σ, μ) + @test H4≈σ * H1 + sum(μ .* H3) rtol=1e-6 +end + +@testset "Sparse Tests" begin + # Define a sparse objective function + function sparse_objective(x, p) + return x[1]^2 + 100 * (x[3] - x[2]^2)^2 + end + + # Define sparse constraints + function sparse_constraints(res, x, p) + res[1] = x[1] + x[2] + (x[2] * x[3])^2 - 1 + res[2] = x[1]^2 + x[3]^2 - 1 + end + + # Initial point + x0 = [0.5, 0.5, 0.5] + + # Create OptimizationFunction + optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseForwardDiff(), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 + + optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseReverseDiff(), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 + + optf = OptimizationFunction( + sparse_objective, OptimizationBase.AutoSparseReverseDiff(true), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 + + optf = OptimizationFunction(sparse_objective, OptimizationBase.AutoSparseFiniteDiff(), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 + + optf = OptimizationFunction(sparse_objective, + AutoSparse(DifferentiationInterface.SecondOrder( + ADTypes.AutoForwardDiff(), ADTypes.AutoZygote())), + cons = sparse_constraints) + + # Instantiate the optimization problem + optprob = OptimizationBase.instantiate_function(optf, x0, + AutoSparse(DifferentiationInterface.SecondOrder( + ADTypes.AutoForwardDiff(), ADTypes.AutoZygote())), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true, lag_h = true) + # Test gradient + G = zeros(3) + optprob.grad(G, x0) + @test G ≈ [1.0, -50.0, 50.0] + + # Test Hessian + H_expected = sparse( + [1, 2, 2, 3, 3], [1, 2, 3, 2, 3], [2.0, 100.0, -200.0, -200.0, 200.0], 3, 3) + H = similar(optprob.hess_prototype, Float64) + optprob.hess(H, x0) + @test H ≈ H_expected + @test nnz(H) == 5 # Check sparsity + + # Test constraints + res = zeros(2) + optprob.cons(res, x0) + @test res ≈ [0.0625, -0.5] + + # Test constraint Jacobian + J_expected = sparse([1, 1, 1, 2, 2], [1, 2, 3, 1, 3], [1.0, 1.25, 0.25, 1.0, 1.0], 2, 3) + J = similar(optprob.cons_jac_prototype, Float64) + optprob.cons_j(J, x0) + @test J ≈ J_expected + @test nnz(J) == 5 # Check sparsity + + # Test constraint Hessians + H_cons_expected = [sparse([2, 2, 3, 3], [2, 3, 2, 3], [0.5, 1.0, 1.0, 0.5], 3, 3), + sparse([1, 3], [1, 3], [2.0, 2.0], 3, 3)] + H_cons = [similar(h, Float64) for h in optprob.cons_hess_prototype] + optprob.cons_h(H_cons, x0) + @test all(H_cons .≈ H_cons_expected) + @test all(nnz.(H_cons) .== [4, 2]) # Check sparsity + + lag_H_expected = sparse( + [1, 2, 3, 2, 3], [1, 2, 2, 3, 3], [6.0, 100.5, -199.0, -199.0, 204.5], 3, 3) + σ = 1.0 + λ = [1.0, 2.0] + lag_H = similar(optprob.lag_hess_prototype, Float64) + optprob.lag_h(lag_H, x0, σ, λ) + @test lag_H ≈ lag_H_expected + @test nnz(lag_H) == 5 +end + +@testset "OOP" begin + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoEnzyme(), + cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoEnzyme(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoEnzyme(), + cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, OptimizationBase.AutoEnzyme(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test optprob.cons_j([5.0, 3.0])≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoFiniteDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoFiniteDiff(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0)≈G1 rtol=1e-6 + @test optprob.hess(x0)≈H1 rtol=1e-6 + + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0])≈[10.0, 6.0] rtol=1e-6 + + @test optprob.cons_h(x0) ≈ [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoFiniteDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoFiniteDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0)≈G1 rtol=1e-6 + @test optprob.hess(x0)≈H1 rtol=1e-6 + @test optprob.cons(x0) == [0.0, 0.0] + @test optprob.cons_j([5.0, 3.0])≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test optprob.cons_h(x0) ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoForwardDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoForwardDiff(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoForwardDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test optprob.cons_j([5.0, 3.0])≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoReverseDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoReverseDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test optprob.cons_j([5.0, 3.0])≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoReverseDiff(true), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(true), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoReverseDiff(true), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoReverseDiff(true), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test optprob.cons_j([5.0, 3.0])≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoSparseForwardDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test Array(optprob.hess(x0)) ≈ H1 + + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoSparseForwardDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test Array(optprob.hess(x0)) ≈ H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test Array(optprob.cons_j([5.0, 3.0]))≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test Array.(optprob.cons_h(x0)) ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoSparseFiniteDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseFiniteDiff(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0)≈G1 rtol=1e-4 + @test Array(optprob.hess(x0)) ≈ H1 + + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) ≈ [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoSparseFiniteDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseForwardDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test Array(optprob.hess(x0)) ≈ H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test Array(optprob.cons_j([5.0, 3.0]))≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test Array.(optprob.cons_h(x0)) ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoSparseReverseDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseReverseDiff(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoSparseReverseDiff(), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseReverseDiff(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test Array(optprob.hess(x0)) ≈ H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test Array(optprob.cons_j([5.0, 3.0]))≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test Array.(optprob.cons_h(x0)) ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoSparseReverseDiff(true), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseReverseDiff(true), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + OptimizationBase.AutoSparseReverseDiff(true), + cons = cons) + optprob = OptimizationBase.instantiate_function(optf, x0, + OptimizationBase.AutoSparseReverseDiff(true), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test Array(optprob.hess(x0)) ≈ H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test Array(optprob.cons_j([5.0, 3.0]))≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test Array.(optprob.cons_h(x0)) ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2] + optf = OptimizationFunction{false}(rosenbrock, + AutoZygote(), + cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, AutoZygote(), + nothing, 1, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test optprob.hess(x0) == H1 + @test optprob.cons(x0) == [0.0] + + @test optprob.cons_j([5.0, 3.0]) == [10.0, 6.0] + + @test optprob.cons_h(x0) == [[2.0 0.0; 0.0 2.0]] + + cons = (x, p) -> [x[1]^2 + x[2]^2, x[2] * sin(x[1]) - x[1]] + optf = OptimizationFunction{false}(rosenbrock, + AutoZygote(), + cons = cons) + optprob = OptimizationBase.instantiate_function( + optf, x0, AutoZygote(), + nothing, 2, g = true, h = true, cons_j = true, cons_h = true) + + @test optprob.grad(x0) == G1 + @test Array(optprob.hess(x0)) ≈ H1 + @test optprob.cons(x0) == [0.0, 0.0] + @test optprob.cons_j([5.0, 3.0])≈[10.0 6.0; -0.149013 -0.958924] rtol=1e-6 + @test Array.(optprob.cons_h(x0)) ≈ [[2.0 0.0; 0.0 2.0], [-0.0 1.0; 1.0 0.0]] +end + +using MLUtils + +@testset "Stochastic gradient" begin + x0 = rand(10000) + y0 = sin.(x0) + data = MLUtils.DataLoader((x0, y0), batchsize = 100) + + function loss(coeffs, data) + ypred = [evalpoly(data[1][i], coeffs) for i in eachindex(data[1])] + return sum(abs2, ypred .- data[2]) + end + + optf = OptimizationFunction(loss, AutoForwardDiff()) + optf = OptimizationBase.instantiate_function( + optf, rand(3), AutoForwardDiff(), iterate(data)[1], g = true, fg = true) + G0 = zeros(3) + optf.grad(G0, ones(3), (x0, y0)) + stochgrads = [] + i = 0 + for (x, y) in data + G = zeros(3) + optf.grad(G, ones(3), (x, y)) + push!(stochgrads, copy(G)) + G1 = zeros(3) + optf.fg(G1, ones(3), (x, y)) + @test G≈G1 rtol=1e-6 + end + @test G0≈sum(stochgrads) rtol=1e-1 + + optf = OptimizationFunction(loss, AutoReverseDiff()) + optf = OptimizationBase.instantiate_function( + optf, rand(3), AutoReverseDiff(), iterate(data)[1], g = true, fg = true) + G0 = zeros(3) + optf.grad(G0, ones(3), (x0, y0)) + stochgrads = [] + for (x, y) in data + G = zeros(3) + optf.grad(G, ones(3), (x, y)) + push!(stochgrads, copy(G)) + G1 = zeros(3) + optf.fg(G1, ones(3), (x, y)) + @test G≈G1 rtol=1e-6 + end + @test G0≈sum(stochgrads) rtol=1e-1 + + optf = OptimizationFunction(loss, AutoZygote()) + optf = OptimizationBase.instantiate_function( + optf, rand(3), AutoZygote(), iterate(data)[1], g = true, fg = true) + G0 = zeros(3) + optf.grad(G0, ones(3), (x0, y0)) + stochgrads = [] + for (x, y) in data + G = zeros(3) + optf.grad(G, ones(3), (x, y)) + push!(stochgrads, copy(G)) + G1 = zeros(3) + optf.fg(G1, ones(3), (x, y)) + @test G≈G1 rtol=1e-6 + end + @test G0≈sum(stochgrads) rtol=1e-1 + + optf = OptimizationFunction(loss, AutoEnzyme()) + optf = OptimizationBase.instantiate_function( + optf, rand(3), AutoEnzyme(mode = set_runtime_activity(Reverse)), + iterate(data)[1], g = true, fg = true) + G0 = zeros(3) + optf.grad(G0, ones(3), (x0, y0)) + stochgrads = [] + for (x, y) in data + G = zeros(3) + optf.grad(G, ones(3), (x, y)) + push!(stochgrads, copy(G)) + G1 = zeros(3) + optf.fg(G1, ones(3), (x, y)) + @test G≈G1 rtol=1e-6 + end + @test G0≈sum(stochgrads) rtol=1e-1 +end diff --git a/lib/OptimizationBase/test/cvxtest.jl b/lib/OptimizationBase/test/cvxtest.jl new file mode 100644 index 000000000..cef7859bb --- /dev/null +++ b/lib/OptimizationBase/test/cvxtest.jl @@ -0,0 +1,52 @@ +using Optimization, OptimizationBase, ForwardDiff, SymbolicAnalysis, LinearAlgebra, + Manifolds, OptimizationManopt + +function f(x, p = nothing) + return exp(x[1]) + x[1]^2 +end + +optf = OptimizationFunction(f, Optimization.AutoForwardDiff()) +prob = OptimizationProblem(optf, [0.4], structural_analysis = true) + +@time sol = solve(prob, Optimization.LBFGS(), maxiters = 1000) +@test sol.cache.analysis_results.objective.curvature == SymbolicAnalysis.Convex +@test sol.cache.analysis_results.constraints === nothing + +x0 = zeros(2) +rosenbrock(x, p = nothing) = (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2 +l1 = rosenbrock(x0) + +optf = OptimizationFunction(rosenbrock, AutoEnzyme()) +prob = OptimizationProblem(optf, x0, structural_analysis = true) +@time res = solve(prob, Optimization.LBFGS(), maxiters = 100) +@test res.cache.analysis_results.objective.curvature == SymbolicAnalysis.UnknownCurvature + +function con2_c(res, x, p) + res .= [x[1]^2 + x[2]^2, (x[2] * sin(x[1]) + x[1]) - 5] +end + +optf = OptimizationFunction(rosenbrock, AutoZygote(), cons = con2_c) +prob = OptimizationProblem(optf, x0, lcons = [1.0, -Inf], ucons = [1.0, 0.0], + lb = [-1.0, -1.0], ub = [1.0, 1.0], structural_analysis = true) +@time res = solve(prob, Optimization.LBFGS(), maxiters = 100) +@test res.cache.analysis_results.objective.curvature == SymbolicAnalysis.UnknownCurvature +@test res.cache.analysis_results.constraints[1].curvature == SymbolicAnalysis.Convex +@test res.cache.analysis_results.constraints[2].curvature == + SymbolicAnalysis.UnknownCurvature + +m = 100 +σ = 0.005 +q = Matrix{Float64}(LinearAlgebra.I(5)) .+ 2.0 + +M = SymmetricPositiveDefinite(5) +data2 = [exp(M, q, σ * rand(M; vector_at = q)) for i in 1:m]; + +f(x, p = nothing) = sum(SymbolicAnalysis.distance(M, data2[i], x)^2 for i in 1:5) +optf = OptimizationFunction(f, Optimization.AutoForwardDiff()) +prob = OptimizationProblem(optf, data2[1]; manifold = M, structural_analysis = true) + +opt = OptimizationManopt.GradientDescentOptimizer() +@time sol = solve(prob, opt, maxiters = 100) +@test sol.minimum < 1e-1 +@test sol.cache.analysis_results.objective.curvature == SymbolicAnalysis.UnknownCurvature +@test sol.cache.analysis_results.objective.gcurvature == SymbolicAnalysis.GConvex diff --git a/lib/OptimizationBase/test/matrixvalued.jl b/lib/OptimizationBase/test/matrixvalued.jl new file mode 100644 index 000000000..7a60823ac --- /dev/null +++ b/lib/OptimizationBase/test/matrixvalued.jl @@ -0,0 +1,81 @@ +using OptimizationBase, LinearAlgebra, ForwardDiff, Zygote, FiniteDiff, + DifferentiationInterface, SparseConnectivityTracer +using Test, ReverseDiff + +@testset "Matrix Valued" begin + for adtype in [AutoForwardDiff(), SecondOrder(AutoForwardDiff(), AutoZygote()), + SecondOrder(AutoForwardDiff(), AutoFiniteDiff()), + AutoSparse(AutoForwardDiff(), sparsity_detector = TracerLocalSparsityDetector()), + AutoSparse(SecondOrder(AutoForwardDiff(), AutoZygote()), + sparsity_detector = TracerLocalSparsityDetector()), + AutoSparse(SecondOrder(AutoForwardDiff(), AutoFiniteDiff()), + sparsity_detector = TracerLocalSparsityDetector())] + # 1. Matrix Factorization + @show adtype + function matrix_factorization_objective(X, A) + U, V = @view(X[1:size(A, 1), 1:Int(size(A, 2) / 2)]), + @view(X[1:size(A, 1), (Int(size(A, 2) / 2) + 1):size(A, 2)]) + return norm(A - U * V') + end + function non_negative_constraint(X, A) + U, V = X + return [all(U .>= 0) && all(V .>= 0)] + end + A_mf = rand(4, 4) # Original matrix + U_mf = rand(4, 2) # Factor matrix U + V_mf = rand(4, 2) # Factor matrix V + + optf = OptimizationFunction{false}( + matrix_factorization_objective, adtype, cons = non_negative_constraint) + optf = OptimizationBase.instantiate_function( + optf, hcat(U_mf, V_mf), adtype, A_mf, g = true, h = true, + cons_j = true, cons_h = true) + optf.grad(hcat(U_mf, V_mf)) + optf.hess(hcat(U_mf, V_mf)) + if !(adtype isa ADTypes.AutoSparse) + optf.cons_j(hcat(U_mf, V_mf)) + optf.cons_h(hcat(U_mf, V_mf)) + end + + # 2. Principal Component Analysis (PCA) + function pca_objective(X, A) + return -tr(X' * A * X) # Minimize the negative of the trace for maximization + end + function orthogonality_constraint(X, A) + return [norm(X' * X - I) < 1e-6] + end + A_pca = rand(4, 4) # Covariance matrix (can be symmetric positive definite) + X_pca = rand(4, 2) # Matrix to hold principal components + + optf = OptimizationFunction{false}( + pca_objective, adtype, cons = orthogonality_constraint) + optf = OptimizationBase.instantiate_function( + optf, X_pca, adtype, A_pca, g = true, h = true, + cons_j = true, cons_h = true) + optf.grad(X_pca) + optf.hess(X_pca) + if !(adtype isa ADTypes.AutoSparse) + optf.cons_j(X_pca) + optf.cons_h(X_pca) + end + + # 3. Matrix Completion + function matrix_completion_objective(X, P) + A, Omega = P + return norm(Omega .* (A - X)) + end + # r = 2 # Rank of the matrix to be completed + # function rank_constraint(X, P) + # return [rank(X) <= r] + # end + A_mc = rand(4, 4) # Original matrix with missing entries + Omega_mc = rand(4, 4) .> 0.5 # Mask for observed entries (boolean matrix) + X_mc = rand(4, 4) # Matrix to be completed + optf = OptimizationFunction{false}( + matrix_completion_objective, adtype) + optf = OptimizationBase.instantiate_function( + optf, X_mc, adtype, (A_mc, Omega_mc), g = true, h = true) + optf.grad(X_mc) + optf.hess(X_mc) + end +end diff --git a/lib/OptimizationBase/test/runtests.jl b/lib/OptimizationBase/test/runtests.jl new file mode 100644 index 000000000..fad4b35ef --- /dev/null +++ b/lib/OptimizationBase/test/runtests.jl @@ -0,0 +1,8 @@ +using OptimizationBase +using Test + +@testset "OptimizationBase.jl" begin + include("adtests.jl") + include("cvxtest.jl") + include("matrixvalued.jl") +end diff --git a/src/Optimization.jl b/src/Optimization.jl index 8d0257dd1..5e716c8cf 100644 --- a/src/Optimization.jl +++ b/src/Optimization.jl @@ -5,7 +5,12 @@ module Optimization using DocStringExtensions using Reexport -@reexport using SciMLBase, ADTypes, OptimizationBase +@reexport using SciMLBase, ADTypes + +# Include OptimizationBase as a submodule +include("../lib/OptimizationBase/src/OptimizationBase.jl") +using .OptimizationBase +export OptimizationBase if !isdefined(Base, :get_extension) using Requires @@ -14,7 +19,7 @@ end using Logging, ProgressLogging, ConsoleProgressMonitor, TerminalLoggers, LoggingExtras using ArrayInterface, Base.Iterators, SparseArrays, LinearAlgebra -import OptimizationBase: instantiate_function, OptimizationCache, ReInitCache +import .OptimizationBase: instantiate_function, OptimizationCache, ReInitCache import SciMLBase: OptimizationProblem, OptimizationFunction, ObjSense, MaxSense, MinSense, OptimizationStats diff --git a/test/runtests.jl b/test/runtests.jl index 2e1b7f39f..cc49b3f5b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,6 +15,7 @@ function activate_subpkg_env(subpkg) end if GROUP == "All" || GROUP == "Core" + dev_subpkg("OptimizationBase") dev_subpkg("OptimizationOptimJL") dev_subpkg("OptimizationOptimisers") dev_subpkg("OptimizationMOI")