-
Notifications
You must be signed in to change notification settings - Fork 0
Fix QUBO local-minimum polish #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -100,6 +100,183 @@ function tensorize(p::AbstractPolynomial{T}; cutoff = zero(T)) where T | |
| return isempty(os) ? MPO(T,sites) : MPO(T, os, sites) | ||
| end | ||
|
|
||
| function _qubo_coefficients(Q::AbstractMatrix{T}, l::Union{AbstractVector{T}, Nothing}, c::T) where T | ||
| n = size(Q, 1) | ||
| R = promote_type(T, Float64) | ||
| linear = Vector{R}(undef, n) | ||
| quadratic = zeros(R, n, n) | ||
|
|
||
| for i in 1:n | ||
| linear[i] = Q[i, i] + maybe(v -> v[i], l; default=zero(T)) | ||
| end | ||
|
|
||
| for i in 1:n | ||
| for j in (i + 1):n | ||
| quadratic[i, j] = Q[i, j] + Q[j, i] | ||
| end | ||
| end | ||
|
|
||
| return linear, quadratic, R(c) | ||
| end | ||
|
|
||
| function _qubo_value(x, linear, quadratic, c) | ||
| n = length(x) | ||
| value = c | ||
|
|
||
| @inbounds for i in 1:n | ||
| if x[i] == 1 | ||
| value += linear[i] | ||
|
|
||
| for j in (i + 1):n | ||
| if x[j] == 1 | ||
| value += quadratic[i, j] | ||
| end | ||
| end | ||
| end | ||
| end | ||
|
|
||
| return value | ||
| end | ||
|
|
||
| function _branch_bound_qubo( | ||
| Q::AbstractMatrix{T}, | ||
| l::Union{AbstractVector{T}, Nothing}, | ||
| c::T, | ||
| incumbent_x, | ||
| incumbent; | ||
| max_variables::Integer = 36, | ||
| time_limit::Real = 1.0, | ||
| ) where T | ||
| n = size(Q, 1) | ||
|
|
||
| if n > max_variables || time_limit <= 0 | ||
| return nothing | ||
| end | ||
|
|
||
| linear, quadratic, constant = _qubo_coefficients(Q, l, c) | ||
| x0 = Int.(incumbent_x) | ||
| best = min(float(incumbent), _qubo_value(x0, linear, quadratic, constant)) | ||
| tol = sqrt(eps(Float64)) | ||
|
|
||
| score = [ | ||
| abs(linear[i]) + sum(abs(quadratic[min(i, j), max(i, j)]) for j in 1:n if j != i) | ||
| for i in 1:n | ||
| ] | ||
| order = sortperm(score; rev=true) | ||
|
|
||
| ordered_linear = linear[order] | ||
| ordered_quadratic = zeros(eltype(quadratic), n, n) | ||
|
|
||
| for i in 1:n | ||
| for j in (i + 1):n | ||
| oi, oj = order[i], order[j] | ||
| ordered_quadratic[i, j] = quadratic[min(oi, oj), max(oi, oj)] | ||
| end | ||
| end | ||
|
|
||
| negative_suffix = zeros(eltype(quadratic), n + 1) | ||
| for i in n:-1:1 | ||
| negative_suffix[i] = negative_suffix[i + 1] | ||
|
|
||
| for j in (i + 1):n | ||
| q = ordered_quadratic[i, j] | ||
| if q < 0 | ||
| negative_suffix[i] += q | ||
| end | ||
| end | ||
| end | ||
|
|
||
| assignment = zeros(Int, n) | ||
| best_assignment = x0[order] | ||
| coeff = copy(ordered_linear) | ||
| deadline = time() + time_limit | ||
| nodes = 0 | ||
| timed_out = false | ||
|
|
||
| function lower_bound(depth, partial) | ||
| bound = partial | ||
|
|
||
| @inbounds for i in depth:n | ||
| bound += min(zero(coeff[i]), coeff[i]) | ||
| end | ||
|
|
||
| return bound + negative_suffix[depth] | ||
| end | ||
|
|
||
| function search(depth, partial) | ||
| if timed_out | ||
| return | ||
| end | ||
|
|
||
| nodes += 1 | ||
| if nodes % 1024 == 0 && time() >= deadline | ||
| timed_out = true | ||
| return | ||
| end | ||
|
|
||
| if lower_bound(depth, partial) >= best - tol | ||
| return | ||
| end | ||
|
|
||
| if depth > n | ||
| if partial < best - tol | ||
| best = partial | ||
| best_assignment = copy(assignment) | ||
| end | ||
|
|
||
| return | ||
| end | ||
|
|
||
| one_partial = partial + coeff[depth] | ||
|
|
||
| if one_partial < partial | ||
| assignment[depth] = 1 | ||
|
|
||
| @inbounds for j in (depth + 1):n | ||
| coeff[j] += ordered_quadratic[depth, j] | ||
| end | ||
|
|
||
| search(depth + 1, one_partial) | ||
|
|
||
| @inbounds for j in (depth + 1):n | ||
| coeff[j] -= ordered_quadratic[depth, j] | ||
| end | ||
|
|
||
| assignment[depth] = 0 | ||
| search(depth + 1, partial) | ||
| else | ||
| assignment[depth] = 0 | ||
| search(depth + 1, partial) | ||
| assignment[depth] = 1 | ||
|
|
||
| @inbounds for j in (depth + 1):n | ||
| coeff[j] += ordered_quadratic[depth, j] | ||
| end | ||
|
|
||
| search(depth + 1, one_partial) | ||
|
|
||
| @inbounds for j in (depth + 1):n | ||
| coeff[j] -= ordered_quadratic[depth, j] | ||
| end | ||
|
|
||
| assignment[depth] = 0 | ||
| end | ||
| end | ||
|
|
||
| search(1, constant) | ||
|
|
||
| if best >= float(incumbent) - tol | ||
| return nothing | ||
| end | ||
|
|
||
| x = zeros(Int, n) | ||
| for (i, original_index) in pairs(order) | ||
| x[original_index] = best_assignment[i] | ||
| end | ||
|
|
||
| return best, x | ||
| end | ||
|
|
||
|
|
||
| """ | ||
| minimize(Q::Matrix[, l::Vector[, c::Number ; device, cutoff, kwargs...) | ||
|
|
@@ -136,6 +313,12 @@ Keyword arguments: | |
| - `vtol :: Float64` - If specified, determines the variance tolerance before the algorithm stops. | ||
| The variance test determines whether DMRG converged to an eigenstate (not necessarily the ground state), | ||
| but is expensive to calculate. | ||
| - `polish :: Bool` - Run bounded branch-and-bound postprocessing for QUBO matrix inputs. Defaults to `true`. | ||
| - `polish_max_variables :: Int` - Maximum number of variables eligible for bounded polishing. Defaults to `36`. | ||
| - `polish_time_limit :: Float64` - Maximum seconds spent in bounded polishing. Defaults to `1.0`. | ||
| The polish step returns the best improved incumbent found within this budget. | ||
| If polishing finds a strictly better bitstring, the returned `Solution` samples that single | ||
| polished bitstring rather than the original DMRG distribution. | ||
| - `noise` - A float or array of floats (per iteration) specifying the noise term added to the system to help with convergence. | ||
| It is recommended to use a large noise (~ 1e-5) on the initial iterations and let it go to zero on later iterations. | ||
| - `eigsolve_krylovdim :: Int = 3` - Maximum Krylov space dimension used in the local eigensolver. | ||
|
|
@@ -170,11 +353,22 @@ See also [`maximize`](@ref). | |
| """ | ||
| function minimize end | ||
|
|
||
| function minimize(Q :: AbstractMatrix{T} , l :: Union{AbstractVector{T}, Nothing} = nothing , c :: T = zero(T); cutoff=1e-8, kwargs...) where T | ||
| function minimize(Q :: AbstractMatrix{T} , l :: Union{AbstractVector{T}, Nothing} = nothing , c :: T = zero(T); | ||
| cutoff=1e-8, polish::Bool=true, polish_max_variables::Integer=36, | ||
| polish_time_limit::Real=1.0, kwargs...) where T | ||
| H = tensorize(Q, isnothing(l) ? diag(Q) : diag(Q) + l; cutoff) | ||
| obj(x) = dot(x, Q, x) + c + maybe(l -> dot(l,x), l; default=zero(T)) | ||
|
|
||
| return _minimize(H, c, obj; cutoff, kwargs...) | ||
| postprocess = polish ? (x, incumbent, remaining_time) -> _branch_bound_qubo( | ||
| Q, | ||
| l, | ||
| c, | ||
| x, | ||
| incumbent; | ||
| max_variables=polish_max_variables, | ||
| time_limit=min(polish_time_limit, remaining_time), | ||
| ) : nothing | ||
|
|
||
| return _minimize(H, c, obj; cutoff, postprocess, kwargs...) | ||
| end | ||
|
|
||
| function minimize(p::AbstractPolynomial{T}; cutoff=1e-8, kwargs...) where T | ||
|
|
@@ -234,6 +428,7 @@ function _minimize( H :: MPO | |
| , eigsolve_krylovdim :: Int = 3 | ||
| , eigsolve_maxiter :: Int = 2 | ||
| , eigsolve_tol :: Float64 = 1e-14 | ||
| , postprocess :: Union{Nothing, Function} = nothing | ||
| # Iteration callback | ||
| , on_iteration :: Union{Nothing, Function} = nothing | ||
| , callback_every :: Int = 1 | ||
|
|
@@ -326,6 +521,26 @@ function _minimize( H :: MPO | |
| dist = Solution{T}(psi, energies_log, bond_dims_log, elapsed_times_log) | ||
| x = sample(dist) | ||
| optimal = obj(x) | ||
|
|
||
| if !isnothing(postprocess) | ||
| remaining_time = if isfinite(time_limit) | ||
| max(0.0, Float64(time_limit) - (time() - initial_time)) | ||
| else | ||
| time_limit | ||
| end | ||
| polished = remaining_time > 0 ? postprocess(x, optimal, remaining_time) : nothing | ||
|
|
||
| if !isnothing(polished) | ||
| polished_optimal, polished_x = polished | ||
|
|
||
| if polished_optimal < optimal - sqrt(eps(Float64)) | ||
| optimal = polished_optimal | ||
| psi = MPS(sites, string.(Int.(polished_x))) |> device | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nonblocking: rebuilding
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in 27e3f11. The |
||
| dist = Solution{T}(psi, energies_log, bond_dims_log, elapsed_times_log) | ||
| end | ||
| end | ||
| end | ||
|
|
||
| elapsed_time = time() - initial_time | ||
|
|
||
| iterlog_footer(verbosity, optimal, elapsed_time) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -151,6 +151,28 @@ | |
| @test length(unique(ids)) == 3 | ||
| end | ||
|
|
||
| @testset "polish respects solve time limit" begin | ||
| Q = [0.0 0.0; 0.0 0.0] | ||
| H = TenSolver.tensorize(Q, diag(Q); cutoff=1e-8) | ||
| called = Ref(false) | ||
| postprocess = (x, incumbent, remaining_time) -> begin | ||
| called[] = true | ||
| return incumbent - 1, ones(Int, 2) | ||
| end | ||
|
|
||
| TenSolver._minimize( | ||
| H, | ||
| 0.0, | ||
| x -> dot(x, Q, x); | ||
| iterations=1, | ||
| time_limit=0.0, | ||
| postprocess, | ||
| verbosity=0, | ||
| ) | ||
|
|
||
| @test !called[] | ||
| end | ||
|
|
||
| end | ||
|
|
||
| @testset "Pure quadratic" begin | ||
|
|
@@ -209,4 +231,82 @@ | |
| # Solution is sampleable | ||
| @test x0 in psi | ||
| end | ||
|
|
||
| @testset "Issue #19 polish escapes local minima" begin | ||
| rows = [ | ||
| 1, 1, 2, 1, 2, 3, 5, 1, 2, 3, 4, 5, 6, 5, 6, 7, 9, 9, 10, 8, 9, 10, 11, | ||
| 9, 10, 11, 12, 1, 2, 4, 7, 1, 2, 3, 1, 2, 3, 4, 14, 4, 5, 6, 7, 14, 8, 9, | ||
| 10, 11, 12, 12, 13, 12, 13, 1, 2, 3, 4, 14, 5, 6, 7, 9, 10, 11, 12, 1, 4, | ||
| 7, 1, 4, 7, 25, 2, 4, 7, 2, 4, 7, 27, 3, 4, 7, 3, 4, 7, 29, 1, 2, 14, 1, | ||
| 2, 14, 31, 7, 14, 9, 13, 10, 13, 11, 13, | ||
| ] | ||
| cols = [ | ||
| 2, 3, 3, 4, 4, 4, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 10, 11, 11, 12, 12, 12, | ||
| 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16, 16, 16, 16, 17, | ||
| 17, 17, 17, 17, 18, 18, 18, 18, 18, 19, 19, 20, 20, 21, 21, 21, 22, 22, | ||
| 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 28, | ||
| 28, 28, 28, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 32, 32, 32, 32, 33, | ||
| 33, 34, 34, 35, 35, 36, 36, | ||
| ] | ||
| vals = [ | ||
| 341.9864, 256.4898, 256.4898, -85.4966, -85.4966, 85.4966, 256.4898, 85.4966, 85.4966, | ||
| 85.4966, -85.4966, 256.4898, 256.4898, -85.4966, -85.4966, -85.4966, 256.4898, 256.4898, | ||
| 256.4898, -85.4966, 85.4966, 85.4966, 85.4966, -170.9932, -170.9932, -170.9932, 170.9932, | ||
| 85.4966, 85.4966, 256.4898, 85.4966, -85.4966, -85.4966, -85.4966, -85.4966, -85.4966, | ||
| -85.4966, -85.4966, -85.4966, -85.4966, -85.4966, -85.4966, -85.4966, -85.4966, -85.4966, | ||
| -85.4966, -85.4966, -85.4966, 85.4966, -85.4966, -85.4966, 85.4966, 85.4966, 85.4966, | ||
| 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, | ||
| 85.4966, 85.4966, -85.4966, 85.4966, 85.4966, -85.4966, 85.4966, 85.4966, 85.4966, -85.4966, | ||
| 85.4966, 85.4966, -85.4966, 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, 85.4966, | ||
| 85.4966, 85.4966, -85.4966, -85.4966, -85.4966, -85.4966, -85.4966, -85.4966, 85.4966, | ||
| -85.4966, -85.4966, 85.4966, -85.4966, 85.4966, -85.4966, 85.4966, -85.4966, | ||
| ] | ||
|
|
||
| Q = zeros(36, 36) | ||
| for (i, j, v) in zip(rows, cols, vals) | ||
| Q[i, j] = v | ||
| end | ||
|
|
||
| l = zeros(36) | ||
| l[ | ||
| [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, | ||
| 11, 12, 13, 14, 16, 17, 18, 20, | ||
| 21, 22, 23, 24, 25, 26, 27, 28, | ||
| 29, 30, 31, 32, 33, 34, 35, 36] | ||
| ] = [ | ||
| -42.7194, -42.7084, -85.3741, 175.952, 48.9413, 52.4653, -210.389, 91.6596, 86.2356, 86.3366, | ||
| 86.7676, 6.99, 172.326, -42.7483, 85.4966, 85.4966, 85.4966, -42.7483, | ||
| -42.7483, -42.7483, -42.7483, -42.7483, -42.7483, -42.7483, -42.7483, -42.7483, | ||
| -128.245, -128.245, 128.245, 128.245, 128.245, 42.7483, 42.7483, 42.7483, | ||
| ] | ||
| c = 641.2245 | ||
| x0 = zeros(Int, 36) | ||
|
|
||
| polished = TenSolver._branch_bound_qubo( | ||
| Q, | ||
| l, | ||
| c, | ||
| x0, | ||
| dot(x0, Q, x0) + dot(l, x0) + c; | ||
| time_limit=Inf, | ||
| ) | ||
|
|
||
| @test !isnothing(polished) | ||
| E, x = polished | ||
| @test E ≈ 11.7099 | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nonblocking (flaky test): this asserts exact optimality (
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in 27e3f11. The exact |
||
| @test dot(x, Q, x) + dot(l, x) + c ≈ E | ||
|
|
||
| Random.seed!(1) | ||
| incumbent_E, incumbent_psi = TenSolver.minimize(Q, l, c; iterations=1, polish=false, verbosity=0) | ||
| incumbent_x = TenSolver.sample(incumbent_psi) | ||
| incumbent = dot(incumbent_x, Q, incumbent_x) + dot(l, incumbent_x) + c | ||
| @test incumbent ≈ incumbent_E | ||
|
|
||
| Random.seed!(1) | ||
| E, psi = TenSolver.minimize(Q, l, c; iterations=1, polish=true, polish_time_limit=1.0, verbosity=0) | ||
| x = TenSolver.sample(psi) | ||
|
|
||
| @test E <= incumbent + 1e-8 | ||
| @test dot(x, Q, x) + dot(l, x) + c ≈ E | ||
| end | ||
| end | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Blocking:
time_limitis only checked inside the DMRG loop, then polish runs with its own default 1s budget even if the caller's solve budget is already exhausted. This also affects the JuMP path becauseMOI.TimeLimitSec()maps totime_limitinQUBODrivers.sample, but the defaultpolish_time_limitstill runs afterwards. Users relying on time limits can now exceed them by up to the polish budget on every <=36 variable QUBO. Please cap the polish budget by the remaining solve time for finitetime_limit, or skip polish when no time remains, and add a small test covering that interaction.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Addressed in b59237b. Polish now receives the remaining overall solve budget and is skipped when no time remains;
polish_time_limitis only an additional cap. Addedtest/qubo.jlcoverage inpolish respects solve time limit.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Addressed in 27e3f11. The remaining solve budget cap is preserved after merging current
main, andtest/qubo.jlstill covers skipping polish when no solve time remains.