|
| 1 | +const normcdf1 = 0.8413447460685429 # StatsFuns.normcdf(1) |
| 2 | +const normcdfn1 = 0.15865525393145705 # StatsFuns.normcdf(-1) |
| 3 | + |
1 | 4 | """ |
2 | | - mcse(x::AbstractVector{<:Real}; method::Symbol=:imse, kwargs...) |
| 5 | + mcse(estimator, samples::AbstractArray{<:Union{Missing,Real}}; kwargs...) |
| 6 | +
|
| 7 | +Estimate the Monte Carlo standard errors (MCSE) of the `estimator` applied to `samples` of |
| 8 | +shape `(draws, chains, parameters)`. |
| 9 | +
|
| 10 | +See also: [`ess_rhat`](@ref) |
| 11 | +
|
| 12 | +## Estimators |
| 13 | +
|
| 14 | +`estimator` must accept a vector of the same `eltype` as `samples` and return a real estimate. |
3 | 15 |
|
4 | | -Compute the Monte Carlo standard error (MCSE) of samples `x`. |
5 | | -The optional argument `method` describes how the errors are estimated. Possible options are: |
| 16 | +For the following estimators, the effective sample size [`ess_rhat`](@ref) and an estimate |
| 17 | +of the asymptotic variance are used to compute the MCSE, and `kwargs` are forwarded to |
| 18 | +`ess_rhat`: |
| 19 | +- `Statistics.mean` |
| 20 | +- `Statistics.median` |
| 21 | +- `Statistics.std` |
| 22 | +- `Base.Fix2(Statistics.quantile, p::Real)` |
6 | 23 |
|
7 | | -- `:bm` for batch means [^Glynn1991] |
8 | | -- `:imse` initial monotone sequence estimator [^Geyer1992] |
9 | | -- `:ipse` initial positive sequence estimator [^Geyer1992] |
| 24 | +For other estimators, the subsampling bootstrap method (SBM)[^FlegalJones2011][^Flegal2012] |
| 25 | +is used as a fallback, and the only accepted `kwargs` are `batch_size`, which indicates the |
| 26 | +size of the overlapping batches used to estimate the MCSE, defaulting to |
| 27 | +`floor(Int, sqrt(draws * chains))`. Note that SBM tends to underestimate the MCSE, |
| 28 | +especially for highly autocorrelated chains. One should verify that autocorrelation is low |
| 29 | +by checking the bulk- and tail-[`ess_rhat`](@ref) values. |
10 | 30 |
|
11 | | -[^Glynn1991]: Glynn, P. W., & Whitt, W. (1991). Estimating the asymptotic variance with batch means. Operations Research Letters, 10(8), 431-435. |
| 31 | +[^FlegalJones2011]: Flegal JM, Jones GL. (2011) Implementing MCMC: estimating with confidence. |
| 32 | + Handbook of Markov Chain Monte Carlo. pp. 175-97. |
| 33 | + [pdf](http://faculty.ucr.edu/~jflegal/EstimatingWithConfidence.pdf) |
| 34 | +[^Flegal2012]: Flegal JM. (2012) Applicability of subsampling bootstrap methods in Markov chain Monte Carlo. |
| 35 | + Monte Carlo and Quasi-Monte Carlo Methods 2010. pp. 363-72. |
| 36 | + doi: [10.1007/978-3-642-27440-4_18](https://doi.org/10.1007/978-3-642-27440-4_18) |
12 | 37 |
|
13 | | -[^Geyer1992]: Geyer, C. J. (1992). Practical Markov Chain Monte Carlo. Statistical Science, 473-483. |
14 | 38 | """ |
15 | | -function mcse(x::AbstractVector{<:Real}; method::Symbol=:imse, kwargs...) |
16 | | - return if method === :bm |
17 | | - mcse_bm(x; kwargs...) |
18 | | - elseif method === :imse |
19 | | - mcse_imse(x) |
20 | | - elseif method === :ipse |
21 | | - mcse_ipse(x) |
22 | | - else |
23 | | - throw(ArgumentError("unsupported MCSE method $method")) |
| 39 | +mcse(f, x::AbstractArray{<:Union{Missing,Real},3}; kwargs...) = _mcse_sbm(f, x; kwargs...) |
| 40 | +function mcse( |
| 41 | + ::typeof(Statistics.mean), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs... |
| 42 | +) |
| 43 | + S = first(ess_rhat(Statistics.mean, samples; kwargs...)) |
| 44 | + return dropdims(Statistics.std(samples; dims=(1, 2)); dims=(1, 2)) ./ sqrt.(S) |
| 45 | +end |
| 46 | +function mcse( |
| 47 | + ::typeof(Statistics.std), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs... |
| 48 | +) |
| 49 | + x = (samples .- Statistics.mean(samples; dims=(1, 2))) .^ 2 # expectand proxy |
| 50 | + S = first(ess_rhat(Statistics.mean, x; kwargs...)) |
| 51 | + # asymptotic variance of sample variance estimate is Var[var] = E[μ₄] - E[var]², |
| 52 | + # where μ₄ is the 4th central moment |
| 53 | + # by the delta method, Var[std] = Var[var] / 4E[var] = (E[μ₄]/E[var] - E[var])/4, |
| 54 | + # See e.g. Chapter 3 of Van der Vaart, AW. (200) Asymptotic statistics. Vol. 3. |
| 55 | + mean_var = dropdims(Statistics.mean(x; dims=(1, 2)); dims=(1, 2)) |
| 56 | + mean_moment4 = dropdims(Statistics.mean(abs2, x; dims=(1, 2)); dims=(1, 2)) |
| 57 | + return @. sqrt((mean_moment4 / mean_var - mean_var) / S) / 2 |
| 58 | +end |
| 59 | +function mcse( |
| 60 | + f::Base.Fix2{typeof(Statistics.quantile),<:Real}, |
| 61 | + samples::AbstractArray{<:Union{Missing,Real},3}; |
| 62 | + kwargs..., |
| 63 | +) |
| 64 | + p = f.x |
| 65 | + S = first(ess_rhat(f, samples; kwargs...)) |
| 66 | + T = eltype(S) |
| 67 | + R = promote_type(eltype(samples), typeof(oneunit(eltype(samples)) / sqrt(oneunit(T)))) |
| 68 | + values = similar(S, R) |
| 69 | + for (i, xi, Si) in zip(eachindex(values), eachslice(samples; dims=3), S) |
| 70 | + values[i] = _mcse_quantile(vec(xi), p, Si) |
| 71 | + end |
| 72 | + return values |
| 73 | +end |
| 74 | +function mcse( |
| 75 | + ::typeof(Statistics.median), samples::AbstractArray{<:Union{Missing,Real},3}; kwargs... |
| 76 | +) |
| 77 | + S = first(ess_rhat(Statistics.median, samples; kwargs...)) |
| 78 | + T = eltype(S) |
| 79 | + R = promote_type(eltype(samples), typeof(oneunit(eltype(samples)) / sqrt(oneunit(T)))) |
| 80 | + values = similar(S, R) |
| 81 | + for (i, xi, Si) in zip(eachindex(values), eachslice(samples; dims=3), S) |
| 82 | + values[i] = _mcse_quantile(vec(xi), 1//2, Si) |
24 | 83 | end |
| 84 | + return values |
25 | 85 | end |
26 | 86 |
|
27 | | -function mcse_bm(x::AbstractVector{<:Real}; size::Int=floor(Int, sqrt(length(x)))) |
28 | | - n = length(x) |
29 | | - m = min(div(n, 2), size) |
30 | | - m == size || @warn "batch size was reduced to $m" |
31 | | - mcse = StatsBase.sem(Statistics.mean(@view(x[(i + 1):(i + m)])) for i in 0:m:(n - m)) |
32 | | - return mcse |
| 87 | +function _mcse_quantile(x, p, Seff) |
| 88 | + Seff === missing && return missing |
| 89 | + S = length(x) |
| 90 | + # quantile error distribution is asymptotically normal; estimate σ (mcse) with 2 |
| 91 | + # quadrature points: xl and xu, chosen as quantiles so that xu - xl = 2σ |
| 92 | + # compute quantiles of error distribution in probability space (i.e. quantiles passed through CDF) |
| 93 | + # Beta(α,β) is the approximate error distribution of quantile estimates |
| 94 | + α = Seff * p + 1 |
| 95 | + β = Seff * (1 - p) + 1 |
| 96 | + prob_x_upper = StatsFuns.betainvcdf(α, β, normcdf1) |
| 97 | + prob_x_lower = StatsFuns.betainvcdf(α, β, normcdfn1) |
| 98 | + # use inverse ECDF to get quantiles in quantile (x) space |
| 99 | + l = max(floor(Int, prob_x_lower * S), 1) |
| 100 | + u = min(ceil(Int, prob_x_upper * S), S) |
| 101 | + iperm = partialsortperm(x, l:u) # sort as little of x as possible |
| 102 | + xl = x[first(iperm)] |
| 103 | + xu = x[last(iperm)] |
| 104 | + # estimate mcse from quantiles |
| 105 | + return (xu - xl) / 2 |
33 | 106 | end |
34 | 107 |
|
35 | | -function mcse_imse(x::AbstractVector{<:Real}) |
36 | | - n = length(x) |
37 | | - lags = [0, 1] |
38 | | - ghat = StatsBase.autocov(x, lags) |
39 | | - Ghat = sum(ghat) |
40 | | - @inbounds value = Ghat + ghat[2] |
41 | | - @inbounds for i in 2:2:(n - 2) |
42 | | - lags[1] = i |
43 | | - lags[2] = i + 1 |
44 | | - StatsBase.autocov!(ghat, x, lags) |
45 | | - Ghat = min(Ghat, sum(ghat)) |
46 | | - Ghat > 0 || break |
47 | | - value += 2 * Ghat |
| 108 | +function _mcse_sbm( |
| 109 | + f, |
| 110 | + x::AbstractArray{<:Union{Missing,Real},3}; |
| 111 | + batch_size::Int=floor(Int, sqrt(size(x, 1) * size(x, 2))), |
| 112 | +) |
| 113 | + T = promote_type(eltype(x), typeof(zero(eltype(x)) / 1)) |
| 114 | + values = similar(x, T, (axes(x, 3),)) |
| 115 | + for (i, xi) in zip(eachindex(values), eachslice(x; dims=3)) |
| 116 | + values[i] = _mcse_sbm(f, vec(xi), batch_size) |
48 | 117 | end |
49 | | - |
50 | | - mcse = sqrt(value / n) |
51 | | - |
52 | | - return mcse |
| 118 | + return values |
53 | 119 | end |
54 | | - |
55 | | -function mcse_ipse(x::AbstractVector{<:Real}) |
| 120 | +function _mcse_sbm(f, x, batch_size) |
| 121 | + any(x -> x === missing, x) && return missing |
56 | 122 | n = length(x) |
57 | | - lags = [0, 1] |
58 | | - ghat = StatsBase.autocov(x, lags) |
59 | | - @inbounds value = ghat[1] + 2 * ghat[2] |
60 | | - @inbounds for i in 2:2:(n - 2) |
61 | | - lags[1] = i |
62 | | - lags[2] = i + 1 |
63 | | - StatsBase.autocov!(ghat, x, lags) |
64 | | - Ghat = sum(ghat) |
65 | | - Ghat > 0 || break |
66 | | - value += 2 * Ghat |
67 | | - end |
68 | | - |
69 | | - mcse = sqrt(value / n) |
70 | | - |
71 | | - return mcse |
| 123 | + i1 = firstindex(x) |
| 124 | + v = Statistics.var( |
| 125 | + f(view(x, i:(i + batch_size - 1))) for i in i1:(i1 + n - batch_size); |
| 126 | + corrected=false, |
| 127 | + ) |
| 128 | + return sqrt(v * (batch_size//n)) |
72 | 129 | end |
0 commit comments