From 85ec534c0ca2558faff27864915d2f536466fa5e Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Wed, 4 Dec 2024 18:10:47 +0000 Subject: [PATCH 01/33] added an initial implementation of `RAM` --- src/AdvancedMH.jl | 4 + src/RobustAdaptiveMetropolis.jl | 142 ++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 src/RobustAdaptiveMetropolis.jl diff --git a/src/AdvancedMH.jl b/src/AdvancedMH.jl index 7ff9759..3dcbf95 100644 --- a/src/AdvancedMH.jl +++ b/src/AdvancedMH.jl @@ -160,4 +160,8 @@ include("mh-core.jl") include("emcee.jl") include("MALA.jl") +include("RobustAdaptiveMetropolis.jl") +using .RobustAdaptiveMetropolis +export RAM + end # module AdvancedMH diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl new file mode 100644 index 0000000..f9e58c6 --- /dev/null +++ b/src/RobustAdaptiveMetropolis.jl @@ -0,0 +1,142 @@ +module RobustAdaptiveMetropolis + +using Random, LogDensityProblems, LinearAlgebra, AbstractMCMC +using DocStringExtensions: FIELDS + +using AdvancedMH: AdvancedMH + +export RAM + +# TODO: Should we generalise this arbitrary symmetric proposals? +# TODO: Implement checking of eigenvalues to avoid degenerate covariance estimates, as in the paper. +""" + RAM + +Robust Adaptive Metropolis-Hastings (RAM). + +This is a simple implementation of the RAM algorithm described in [^VIH12]. + +# Fields + +$(FIELDS) + +# References +[^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing. +""" +Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSampler + "target acceptance rate" + α::T=0.234 + "negative exponent of the adaptation decay rate" + γ::T=0.6 + "initial covariance matrix" + S::A=nothing +end + +# TODO: Should we record anything like the acceptance rates? +struct RAMState{T1,L,A,T2,T3} + x::T1 + logprob::L + S::A + logα::T2 + η::T3 + iteration::Int + isaccept::Bool +end + +function step_inner( + rng::Random.AbstractRNG, + model::AbstractMCMC.LogDensityModel, + sampler::RAM, + state::RAMState +) + # This is the initial state. + f = model.logdensity + d = LogDensityProblems.dimension(f) + + # Sample the proposal. + x = state.x + U = randn(rng, d) + x_new = x + state.S * U + + # Compute the acceptance probability. + lp = state.logprob + lp_new = LogDensityProblems.logdensity(f, x_new) + logα = min(lp_new - lp, zero(lp)) # `min` because we'll use this for updating + # TODO: use `randexp` instead. + isaccept = log(rand(rng)) < logα + + return x_new, lp_new, U, logα, isaccept +end + +function adapt(sampler::RAM, state::RAMState, logα::Real, U::AbstractVector) + # Update ` + Δα = exp(logα) - sampler.α + S = state.S + # TODO: Make this configurable by defining a more general path. + η = state.iteration^(-sampler.γ) + ΔS = η * abs(Δα) * S * U / norm(U) + # TODO: Maybe do in-place and then have the user extract it with a callback if they really want it. + S_new = if sign(Δα) == 1 + # One rank update. + LinearAlgebra.lowrankupdate(Cholesky(S), ΔS).L + else + # One rank downdate. + LinearAlgebra.lowrankdowndate(Cholesky(S), ΔS).L + end + return S_new, η +end + +function AbstractMCMC.step( + rng::Random.AbstractRNG, + model::AbstractMCMC.LogDensityModel, + sampler::RAM; + initial_params=nothing, + kwargs... +) + # This is the initial state. + f = model.logdensity + d = LogDensityProblems.dimension(f) + + # Initial parameter state. + x = initial_params === nothing ? rand(rng, d) : initial_params + # Initialize the Cholesky factor of the covariance matrix. + S = LowerTriangular(sampler.S === nothing ? diagm(0 => ones(eltype(sampler.γ), d)) : sampler.S) + + # Constuct the initial state. + lp = LogDensityProblems.logdensity(f, x) + state = RAMState(x, lp, S, 0.0, 0, 1, true) + + return AdvancedMH.Transition(x, lp, true), state +end + +function AbstractMCMC.step( + rng::Random.AbstractRNG, + model::AbstractMCMC.LogDensityModel, + sampler::RAM, + state::RAMState; + kwargs... +) + # Take the inner step. + x_new, lp_new, U, logα, isaccept = step_inner(rng, model, sampler, state) + # Adapt the proposal. + state_new = RAMState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, state.S, logα, state.η, state.iteration + 1, isaccept) + return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new +end + +function AbstractMCMC.step_warmup( + rng::Random.AbstractRNG, + model::AbstractMCMC.LogDensityModel, + sampler::RAM, + state::RAMState; + kwargs... +) + # Take the inner step. + x_new, lp_new, U, logα, isaccept = step_inner(rng, model, sampler, state) + # Adapt the proposal. + S_new, η = adapt(sampler, state, logα, U) + # Update state. + state_new = RAMState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, S_new, logα, η, state.iteration + 1, isaccept) + return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new +end + +end From de519a4ae5a05f66c8220b3af4792d3bf57d316d Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Wed, 4 Dec 2024 18:30:48 +0000 Subject: [PATCH 02/33] added proper docs for RAM --- docs/Project.toml | 3 +++ docs/src/api.md | 6 +++++ src/RobustAdaptiveMetropolis.jl | 40 +++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/docs/Project.toml b/docs/Project.toml index 1814eb3..5603db2 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,8 @@ [deps] +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" [compat] Documenter = "1" diff --git a/docs/src/api.md b/docs/src/api.md index c24b0c9..37197ef 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -12,3 +12,9 @@ MetropolisHastings ```@docs DensityModel ``` + +## Samplers + +```@docs +RAM +``` diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index f9e58c6..f937981 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -20,6 +20,46 @@ This is a simple implementation of the RAM algorithm described in [^VIH12]. $(FIELDS) +# Examples + +The following demonstrates how to implement a simple Gaussian model and sample from it using the RAM algorithm. + +```jldoctest +julia> using AdvancedMH, Distributions, MCMCChains, LogDensityProblems + +julia> # Define a Gaussian with zero mean and some covariance. + struct Gaussian{A} + Σ::A + end + +julia> # Implement the LogDensityProblems interface. + LogDensityProblems.dimension(model::Gaussian) = size(model.Σ, 1) + +julia> function LogDensityProblems.logdensity(model::Gaussian, x) + d = LogDensityProblems.dimension(model) + return logpdf(MvNormal(zeros(d),model.Σ), x) + end + +julia> LogDensityProblems.capabilities(::Gaussian) = LogDensityProblems.LogDensityOrder{0}() + +julia> # Construct the model. We'll use a correlation of 0.5. + model = Gaussian([1.0 0.5; 0.5 1.0]); + +julia> # Number of samples we want in the resulting chain. + num_samples = 10_000; + +julia> # Number of warmup steps, i.e. the number of steps to adapt the covariance of the proposal. + # Note that these are not included in the resulting chain, as `discard_initial=num_warmup` + # by default in the `sample` call. To include them, pass `discard_initial=0` to `sample`. + num_warmup = 10_000; + +julia> # Sample! + chain = sample(model, RAM(), 10_000; chain_type=Chains, num_warmup=10_000, progress=false); + +julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.1 +true +``` + # References [^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing. """ From 40ebb7ecdd03e0797792a6e768676d41a7073771 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Wed, 4 Dec 2024 18:39:50 +0000 Subject: [PATCH 03/33] fixed doctest for `RAM` + added impls of `getparams` and `setparams!!` --- docs/Project.toml | 1 + src/RobustAdaptiveMetropolis.jl | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 5603db2..402ce5d 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,7 @@ [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index f937981..9885aef 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -25,7 +25,7 @@ $(FIELDS) The following demonstrates how to implement a simple Gaussian model and sample from it using the RAM algorithm. ```jldoctest -julia> using AdvancedMH, Distributions, MCMCChains, LogDensityProblems +julia> using AdvancedMH, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra julia> # Define a Gaussian with zero mean and some covariance. struct Gaussian{A} @@ -83,6 +83,9 @@ struct RAMState{T1,L,A,T2,T3} isaccept::Bool end +AbstractMCMC.getparams(state::RAMState) = state.x +AbstractMCMC.setparams!!(state::RAMState, x) = RAMState(x, state.logprob, state.S, state.logα, state.η, state.iteration, state.isaccept) + function step_inner( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, From 2dec18a29f78987ee057bde781577940e98e87cd Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Wed, 4 Dec 2024 18:40:25 +0000 Subject: [PATCH 04/33] added DocStringExtensions as a dep --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index f89912d..d4c3a95 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.8.4" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" @@ -26,6 +27,7 @@ AdvancedMHStructArraysExt = "StructArrays" AbstractMCMC = "5.6" DiffResults = "1" Distributions = "0.25" +DocStringExtensions = "0.9" FillArrays = "1" ForwardDiff = "0.10" LinearAlgebra = "1.6" From 045f8c533f763397c2e61f7fa5aaba98b7273300 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Wed, 4 Dec 2024 18:40:52 +0000 Subject: [PATCH 05/33] bump patch version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index d4c3a95..1d517f7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "AdvancedMH" uuid = "5b7e9947-ddc0-4b3f-9b55-0d8042f74170" -version = "0.8.4" +version = "0.8.5" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" From 755a1808bf7ed81b45e160e7799b6b92e75b4294 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Wed, 4 Dec 2024 18:46:21 +0000 Subject: [PATCH 06/33] attempt at making the dcotest a bit more consistent --- docs/Project.toml | 1 + src/RobustAdaptiveMetropolis.jl | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index 402ce5d..793233c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,6 +4,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [compat] Documenter = "1" diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 9885aef..62e3ebe 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -25,7 +25,7 @@ $(FIELDS) The following demonstrates how to implement a simple Gaussian model and sample from it using the RAM algorithm. ```jldoctest -julia> using AdvancedMH, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra +julia> using AdvancedMH, Random, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra julia> # Define a Gaussian with zero mean and some covariance. struct Gaussian{A} @@ -53,10 +53,13 @@ julia> # Number of warmup steps, i.e. the number of steps to adapt the covarianc # by default in the `sample` call. To include them, pass `discard_initial=0` to `sample`. num_warmup = 10_000; +julia> # Set the seed so get some consistency. + Random.seed!(1234); + julia> # Sample! - chain = sample(model, RAM(), 10_000; chain_type=Chains, num_warmup=10_000, progress=false); + chain = sample(model, RAM(), 10_000; chain_type=Chains, num_warmup=10_000, progress=false, initial_params=zeros(2)); -julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.1 +julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.2 true ``` From 5c1c6f57c558f4df656e21608bab6a552315c40f Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Wed, 4 Dec 2024 19:02:14 +0000 Subject: [PATCH 07/33] a --- src/RobustAdaptiveMetropolis.jl | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 62e3ebe..8a60064 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -73,6 +73,10 @@ Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSa γ::T=0.6 "initial covariance matrix" S::A=nothing + "lower bound on eigenvalues of the adapted covariance matrix" + eigenvalue_lower_bound::T=0 + "upper bound on eigenvalues of the adapted covariance matrix" + eigenvalue_upper_bound::T=Inf end # TODO: Should we record anything like the acceptance rates? @@ -164,11 +168,20 @@ function AbstractMCMC.step( ) # Take the inner step. x_new, lp_new, U, logα, isaccept = step_inner(rng, model, sampler, state) - # Adapt the proposal. + # Accept / reject the proposal. state_new = RAMState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, state.S, logα, state.η, state.iteration + 1, isaccept) return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new end +function valid_eigenvalues(S, lower_bound, upper_bound) + # Short-circuit if the bounds are the default. + (lower_bound == 0 && upper_bound == Inf) && return true + + # Note that this is just the diagonal when `S` is triangular. + eigenvals = LinearAlgebra.eigenvals(S) + return all(lower_bound .<= eigenvals .<= upper_bound) +end + function AbstractMCMC.step_warmup( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, @@ -180,6 +193,12 @@ function AbstractMCMC.step_warmup( x_new, lp_new, U, logα, isaccept = step_inner(rng, model, sampler, state) # Adapt the proposal. S_new, η = adapt(sampler, state, logα, U) + # Check that `S_new` has eigenvalues in the desired range. + if !valid_eigenvalues(S_new, sampler.eigenvalue_lower_bound, sampler.eigenvalue_upper_bound) + # In this case, we just keep the old `S` (p. 13 in Vihola, 2012). + S_new = state.S + end + # Update state. state_new = RAMState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, S_new, logα, η, state.iteration + 1, isaccept) return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new From cddf8d102cc846bf4ba3610368542737ee3703f0 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Wed, 4 Dec 2024 19:02:32 +0000 Subject: [PATCH 08/33] added checks for eigenvalues according to p. 13 in Vihola (2012) (in preivous commit) --- src/RobustAdaptiveMetropolis.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 8a60064..71061ee 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -176,7 +176,6 @@ end function valid_eigenvalues(S, lower_bound, upper_bound) # Short-circuit if the bounds are the default. (lower_bound == 0 && upper_bound == Inf) && return true - # Note that this is just the diagonal when `S` is triangular. eigenvals = LinearAlgebra.eigenvals(S) return all(lower_bound .<= eigenvals .<= upper_bound) From 29c90788ad3b1651fdf411639bd6184be90f01d8 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Thu, 5 Dec 2024 07:13:27 +0000 Subject: [PATCH 09/33] fixed default value for `eigenvalue_lower_bound` --- src/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 71061ee..2c76604 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -74,7 +74,7 @@ Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSa "initial covariance matrix" S::A=nothing "lower bound on eigenvalues of the adapted covariance matrix" - eigenvalue_lower_bound::T=0 + eigenvalue_lower_bound::T=0.0 "upper bound on eigenvalues of the adapted covariance matrix" eigenvalue_upper_bound::T=Inf end From 78a5f513a42ce0701433b75483d34d1543d8dd2c Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 6 Dec 2024 13:52:41 +0000 Subject: [PATCH 10/33] applied suggestions from @mhauru --- src/RobustAdaptiveMetropolis.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 2c76604..f6df353 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -8,7 +8,6 @@ using AdvancedMH: AdvancedMH export RAM # TODO: Should we generalise this arbitrary symmetric proposals? -# TODO: Implement checking of eigenvalues to avoid degenerate covariance estimates, as in the paper. """ RAM @@ -71,7 +70,7 @@ Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSa α::T=0.234 "negative exponent of the adaptation decay rate" γ::T=0.6 - "initial covariance matrix" + "initial lower-triangular Cholesky factor" S::A=nothing "lower bound on eigenvalues of the adapted covariance matrix" eigenvalue_lower_bound::T=0.0 @@ -177,7 +176,7 @@ function valid_eigenvalues(S, lower_bound, upper_bound) # Short-circuit if the bounds are the default. (lower_bound == 0 && upper_bound == Inf) && return true # Note that this is just the diagonal when `S` is triangular. - eigenvals = LinearAlgebra.eigenvals(S) + eigenvals = LinearAlgebra.eigvals(S) return all(lower_bound .<= eigenvals .<= upper_bound) end From 652a227fff572686cdbfd53973238743df463e0e Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 6 Dec 2024 14:00:14 +0000 Subject: [PATCH 11/33] more doctesting of RAM + improved docstrings --- src/RobustAdaptiveMetropolis.jl | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index f6df353..0156b2e 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -23,7 +23,7 @@ $(FIELDS) The following demonstrates how to implement a simple Gaussian model and sample from it using the RAM algorithm. -```jldoctest +```jldoctest ram-gaussian julia> using AdvancedMH, Random, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra julia> # Define a Gaussian with zero mean and some covariance. @@ -56,25 +56,39 @@ julia> # Set the seed so get some consistency. Random.seed!(1234); julia> # Sample! - chain = sample(model, RAM(), 10_000; chain_type=Chains, num_warmup=10_000, progress=false, initial_params=zeros(2)); + chain = sample(model, RAM(), 10_000; chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)); julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.2 true ``` +It's also possible to restrict the eigenvalues to avoid either too small or too large values. See p. 13 in [^VIH12]. + +```jldoctest ram-gaussian` +julia> chain = sample( + model, + RAM(eigenvalue_lower_bound=0.1, eigenvalue_upper_bound=2.0), + 10_000; + chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2) + ); + +julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.2 +true +```` + # References [^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing. """ Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSampler - "target acceptance rate" + "target acceptance rate. Default: 0.234." α::T=0.234 - "negative exponent of the adaptation decay rate" + "negative exponent of the adaptation decay rate. Default: `0.6`." γ::T=0.6 - "initial lower-triangular Cholesky factor" + "initial lower-triangular Cholesky factor. Default: `nothing`." S::A=nothing - "lower bound on eigenvalues of the adapted covariance matrix" + "lower bound on eigenvalues of the adapted Cholesky factor. Default: `0.0`." eigenvalue_lower_bound::T=0.0 - "upper bound on eigenvalues of the adapted covariance matrix" + "upper bound on eigenvalues of the adapted Cholesky factor. Default: `Inf`." eigenvalue_upper_bound::T=Inf end From 5eaff522cb36a059d34866e92d2648f34d00e52b Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 6 Dec 2024 14:03:06 +0000 Subject: [PATCH 12/33] added docstring for `RAMState` --- src/RobustAdaptiveMetropolis.jl | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 0156b2e..59b8d8c 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -92,14 +92,30 @@ Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSa eigenvalue_upper_bound::T=Inf end -# TODO: Should we record anything like the acceptance rates? +""" + RAMState + +State of the Robust Adaptive Metropolis-Hastings (RAM) algorithm. + +See also: [`RAM`](@ref). + +# Fields +$(FIELDS) +""" struct RAMState{T1,L,A,T2,T3} + "current realization of the chain." x::T1 + "log density of `x` under the target model." logprob::L + "current lower-triangular Cholesky factor." S::A + "log acceptance ratio of the previous iteration (not necessarily of `x`)." logα::T2 + "current step size for adaptation of `S`." η::T3 + "current iteration." iteration::Int + "whether the previous iteration was accepted." isaccept::Bool end From d8688fa7053e6b25d8a5f4e0d3ac1b33bd031b3a Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 6 Dec 2024 14:25:57 +0000 Subject: [PATCH 13/33] added proper testing of RAM --- test/RobustAdaptiveMetropolis.jl | 72 ++++++++++++++++++++++++++++++++ test/runtests.jl | 2 + 2 files changed, 74 insertions(+) create mode 100644 test/RobustAdaptiveMetropolis.jl diff --git a/test/RobustAdaptiveMetropolis.jl b/test/RobustAdaptiveMetropolis.jl new file mode 100644 index 0000000..b053618 --- /dev/null +++ b/test/RobustAdaptiveMetropolis.jl @@ -0,0 +1,72 @@ +struct Gaussian{A} + Σ::A +end +LogDensityProblems.dimension(model::Gaussian) = size(model.Σ, 1) +LogDensityProblems.capabilities(::Gaussian) = LogDensityProblems.LogDensityOrder{0}() +function LogDensityProblems.logdensity(model::Gaussian, x) + d = LogDensityProblems.dimension(model) + return logpdf(MvNormal(zeros(d), model.Σ), x) +end + +Base.@kwdef struct StatesExtractor{A} + states::A = Vector{Any}() +end +function (callback::StatesExtractor)( + rng, + model, + sampler, + sample, + state, + iteration; + kwargs..., +) + if iteration == 1 + empty!(callback.states) + end + + push!(callback.states, state) +end + +@testset "RobustAdaptiveMetropolis" begin + # Testing of sampling is done in the docstring. Here we test explicit properties of the sampler. + @testset "eigenvalue bounds" begin + for (σ², lower_bound, upper_bound) in [ + (10.0, 0.5, 1.1), # should hit upper bound easily + (0.01, 0.5, 1.1), # should hit lower bound easily + ] + ρ = σ² / 2 + model = Gaussian([σ² ρ; ρ σ²]) + callback = StatesExtractor() + + # Use aggressive adaptation. + sampler = + RAM(γ = 0.51, eigenvalue_lower_bound = 0.9, eigenvalue_upper_bound = 1.1) + num_warmup = 1000 + discard_initial = 0 # we're only keeping the warmup samples + chain = sample( + model, + sampler, + num_warmup; + chain_type = Chains, + num_warmup, + discard_initial, + progress = false, + initial_params = zeros(2), + callback = callback, + ) + S_samples = getproperty.(callback.states, :S) + eigval_min = map(minimum, eachrow(mapreduce(eigvals, hcat, S_samples))) + eigval_max = map(maximum, eachrow(mapreduce(eigvals, hcat, S_samples))) + @test all(>=(sampler.eigenvalue_lower_bound), eigval_min) + @test all(<=(sampler.eigenvalue_upper_bound), eigval_max) + + if σ² < lower_bound + # We should hit the lower bound. + @test all(≈(sampler.eigenvalue_lower_bound, atol = 0.05), eigval_min) + else + # We should hit the upper bound. + @test all(≈(sampler.eigenvalue_upper_bound, atol = 0.05), eigval_max) + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index fd06296..76a1631 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -366,4 +366,6 @@ include("util.jl") end @testset "EMCEE" begin include("emcee.jl") end + + include("RobustAdaptiveMetropolis.jl") end From f5fc3010bf774f836ac7f9944d47a1b58ecde7bf Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 6 Dec 2024 14:26:22 +0000 Subject: [PATCH 14/33] Update src/RobustAdaptiveMetropolis.jl Co-authored-by: Markus Hauru --- src/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 59b8d8c..fd35a01 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -181,7 +181,7 @@ function AbstractMCMC.step( # Initialize the Cholesky factor of the covariance matrix. S = LowerTriangular(sampler.S === nothing ? diagm(0 => ones(eltype(sampler.γ), d)) : sampler.S) - # Constuct the initial state. + # Construct the initial state. lp = LogDensityProblems.logdensity(f, x) state = RAMState(x, lp, S, 0.0, 0, 1, true) From 56ec7178a79814bba0751174b6ce2684f3852c17 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 6 Dec 2024 15:27:36 +0000 Subject: [PATCH 15/33] added compat entries to docs --- docs/Project.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/Project.toml b/docs/Project.toml index 793233c..2ec693c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,3 +8,8 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [compat] Documenter = "1" +Distributions = "0.25" +LinearAlgebra = "1.6" +LogDensityProblems = "2" +MCMCChains = "6.0.4" +Random = "1.6" From da431b4fb32531c8b16d15ce84ca53992f4c280d Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Fri, 6 Dec 2024 15:39:04 +0000 Subject: [PATCH 16/33] apply suggestions from @devmotion Co-authored-by: David Widmann --- src/RobustAdaptiveMetropolis.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index fd35a01..4e0d213 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -58,7 +58,7 @@ julia> # Set the seed so get some consistency. julia> # Sample! chain = sample(model, RAM(), 10_000; chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)); -julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.2 +julia> isapprox(cov(Array(chain)), model.A; rtol = 0.2) true ``` @@ -134,15 +134,14 @@ function step_inner( # Sample the proposal. x = state.x - U = randn(rng, d) - x_new = x + state.S * U + U = randn(rng, eltype(x), d) + x_new = muladd(state.S, U, x) # Compute the acceptance probability. lp = state.logprob lp_new = LogDensityProblems.logdensity(f, x_new) logα = min(lp_new - lp, zero(lp)) # `min` because we'll use this for updating - # TODO: use `randexp` instead. - isaccept = log(rand(rng)) < logα + isaccept = randexp(rng) > -logα return x_new, lp_new, U, logα, isaccept end @@ -177,13 +176,14 @@ function AbstractMCMC.step( d = LogDensityProblems.dimension(f) # Initial parameter state. - x = initial_params === nothing ? rand(rng, d) : initial_params + T = initial_params === nothing ? eltype(sampler.γ) : Base.promote_type(eltype(sampler.γ), eltype(initial_params)) + x = initial_params === nothing ? rand(rng, T, d) : convert(AbstractVector{T}, initial_params) # Initialize the Cholesky factor of the covariance matrix. - S = LowerTriangular(sampler.S === nothing ? diagm(0 => ones(eltype(sampler.γ), d)) : sampler.S) + S = LowerTriangular(sampler.S === nothing ? diagm(0 => ones(T, d)) : convert(AbstractMatrix{T}, sampler.S)) # Construct the initial state. lp = LogDensityProblems.logdensity(f, x) - state = RAMState(x, lp, S, 0.0, 0, 1, true) + state = RAMState(x, lp, S, zero(T), 0, 1, true) return AdvancedMH.Transition(x, lp, true), state end @@ -207,7 +207,7 @@ function valid_eigenvalues(S, lower_bound, upper_bound) (lower_bound == 0 && upper_bound == Inf) && return true # Note that this is just the diagonal when `S` is triangular. eigenvals = LinearAlgebra.eigvals(S) - return all(lower_bound .<= eigenvals .<= upper_bound) + return all(x -> lower_bound <= x <= upper_bound, eigenvals) end function AbstractMCMC.step_warmup( From 924728124b311799564e079f357f60a477d4126e Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 08:50:34 +0000 Subject: [PATCH 17/33] renamed `RAM` to `RobostMetropolisHastings` + removed the separate module for this --- src/AdvancedMH.jl | 3 +- src/RobustAdaptiveMetropolis.jl | 59 ++++++++++++++------------------- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/src/AdvancedMH.jl b/src/AdvancedMH.jl index 3dcbf95..906d2d1 100644 --- a/src/AdvancedMH.jl +++ b/src/AdvancedMH.jl @@ -3,8 +3,9 @@ module AdvancedMH # Import the relevant libraries. using AbstractMCMC using Distributions -using LinearAlgebra: I +using LinearAlgebra: LinearAlgebra, I using FillArrays: Zeros +using DocStringExtensions: FIELDS using LogDensityProblems: LogDensityProblems diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 4e0d213..bf13407 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -1,15 +1,6 @@ -module RobustAdaptiveMetropolis - -using Random, LogDensityProblems, LinearAlgebra, AbstractMCMC -using DocStringExtensions: FIELDS - -using AdvancedMH: AdvancedMH - -export RAM - # TODO: Should we generalise this arbitrary symmetric proposals? """ - RAM + RobustAdaptiveMetropolis Robust Adaptive Metropolis-Hastings (RAM). @@ -56,7 +47,7 @@ julia> # Set the seed so get some consistency. Random.seed!(1234); julia> # Sample! - chain = sample(model, RAM(), 10_000; chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)); + chain = sample(model, RobustAdaptiveMetropolis(), 10_000; chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)); julia> isapprox(cov(Array(chain)), model.A; rtol = 0.2) true @@ -67,7 +58,7 @@ It's also possible to restrict the eigenvalues to avoid either too small or too ```jldoctest ram-gaussian` julia> chain = sample( model, - RAM(eigenvalue_lower_bound=0.1, eigenvalue_upper_bound=2.0), + RobustAdaptiveMetropolis(eigenvalue_lower_bound=0.1, eigenvalue_upper_bound=2.0), 10_000; chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2) ); @@ -79,7 +70,7 @@ true # References [^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing. """ -Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSampler +Base.@kwdef struct RobustAdaptiveMetropolis{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSampler "target acceptance rate. Default: 0.234." α::T=0.234 "negative exponent of the adaptation decay rate. Default: `0.6`." @@ -93,16 +84,16 @@ Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSa end """ - RAMState + RobustAdaptiveMetropolisState State of the Robust Adaptive Metropolis-Hastings (RAM) algorithm. -See also: [`RAM`](@ref). +See also: [`RobustAdaptiveMetropolis`](@ref). # Fields $(FIELDS) """ -struct RAMState{T1,L,A,T2,T3} +struct RobustAdaptiveMetropolisState{T1,L,A,T2,T3} "current realization of the chain." x::T1 "log density of `x` under the target model." @@ -119,14 +110,14 @@ struct RAMState{T1,L,A,T2,T3} isaccept::Bool end -AbstractMCMC.getparams(state::RAMState) = state.x -AbstractMCMC.setparams!!(state::RAMState, x) = RAMState(x, state.logprob, state.S, state.logα, state.η, state.iteration, state.isaccept) +AbstractMCMC.getparams(state::RobustAdaptiveMetropolisState) = state.x +AbstractMCMC.setparams!!(state::RobustAdaptiveMetropolisState, x) = RobustAdaptiveMetropolisState(x, state.logprob, state.S, state.logα, state.η, state.iteration, state.isaccept) -function step_inner( +function ram_step_inner( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, - sampler::RAM, - state::RAMState + sampler::RobustAdaptiveMetropolis, + state::RobustAdaptiveMetropolisState ) # This is the initial state. f = model.logdensity @@ -146,8 +137,7 @@ function step_inner( return x_new, lp_new, U, logα, isaccept end -function adapt(sampler::RAM, state::RAMState, logα::Real, U::AbstractVector) - # Update ` +function ram_adapt(sampler::RobustAdaptiveMetropolis, state::RobustAdaptiveMetropolisState, logα::Real, U::AbstractVector) Δα = exp(logα) - sampler.α S = state.S # TODO: Make this configurable by defining a more general path. @@ -167,7 +157,7 @@ end function AbstractMCMC.step( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, - sampler::RAM; + sampler::RobustAdaptiveMetropolis; initial_params=nothing, kwargs... ) @@ -183,7 +173,7 @@ function AbstractMCMC.step( # Construct the initial state. lp = LogDensityProblems.logdensity(f, x) - state = RAMState(x, lp, S, zero(T), 0, 1, true) + state = RobustAdaptiveMetropolisState(x, lp, S, zero(T), 0, 1, true) return AdvancedMH.Transition(x, lp, true), state end @@ -191,14 +181,14 @@ end function AbstractMCMC.step( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, - sampler::RAM, - state::RAMState; + sampler::RobustAdaptiveMetropolis, + state::RobustAdaptiveMetropolisState; kwargs... ) # Take the inner step. - x_new, lp_new, U, logα, isaccept = step_inner(rng, model, sampler, state) + x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state) # Accept / reject the proposal. - state_new = RAMState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, state.S, logα, state.η, state.iteration + 1, isaccept) + state_new = RobustAdaptiveMetropolisState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, state.S, logα, state.η, state.iteration + 1, isaccept) return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new end @@ -213,14 +203,14 @@ end function AbstractMCMC.step_warmup( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, - sampler::RAM, - state::RAMState; + sampler::RobustAdaptiveMetropolis, + state::RobustAdaptiveMetropolisState; kwargs... ) # Take the inner step. - x_new, lp_new, U, logα, isaccept = step_inner(rng, model, sampler, state) + x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state) # Adapt the proposal. - S_new, η = adapt(sampler, state, logα, U) + S_new, η = ram_adapt(sampler, state, logα, U) # Check that `S_new` has eigenvalues in the desired range. if !valid_eigenvalues(S_new, sampler.eigenvalue_lower_bound, sampler.eigenvalue_upper_bound) # In this case, we just keep the old `S` (p. 13 in Vihola, 2012). @@ -228,8 +218,7 @@ function AbstractMCMC.step_warmup( end # Update state. - state_new = RAMState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, S_new, logα, η, state.iteration + 1, isaccept) + state_new = RobustAdaptiveMetropolisState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, S_new, logα, η, state.iteration + 1, isaccept) return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new end -end From 47641201f2590bc130e0af4d6285ffca2b06b8e0 Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 08:51:09 +0000 Subject: [PATCH 18/33] formatting --- src/RobustAdaptiveMetropolis.jl | 87 ++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index bf13407..eed6bca 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -70,17 +70,18 @@ true # References [^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing. """ -Base.@kwdef struct RobustAdaptiveMetropolis{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSampler +Base.@kwdef struct RobustAdaptiveMetropolis{T,A<:Union{Nothing,AbstractMatrix{T}}} <: + AdvancedMH.MHSampler "target acceptance rate. Default: 0.234." - α::T=0.234 + α::T = 0.234 "negative exponent of the adaptation decay rate. Default: `0.6`." - γ::T=0.6 + γ::T = 0.6 "initial lower-triangular Cholesky factor. Default: `nothing`." - S::A=nothing + S::A = nothing "lower bound on eigenvalues of the adapted Cholesky factor. Default: `0.0`." - eigenvalue_lower_bound::T=0.0 + eigenvalue_lower_bound::T = 0.0 "upper bound on eigenvalues of the adapted Cholesky factor. Default: `Inf`." - eigenvalue_upper_bound::T=Inf + eigenvalue_upper_bound::T = Inf end """ @@ -111,13 +112,22 @@ struct RobustAdaptiveMetropolisState{T1,L,A,T2,T3} end AbstractMCMC.getparams(state::RobustAdaptiveMetropolisState) = state.x -AbstractMCMC.setparams!!(state::RobustAdaptiveMetropolisState, x) = RobustAdaptiveMetropolisState(x, state.logprob, state.S, state.logα, state.η, state.iteration, state.isaccept) +AbstractMCMC.setparams!!(state::RobustAdaptiveMetropolisState, x) = + RobustAdaptiveMetropolisState( + x, + state.logprob, + state.S, + state.logα, + state.η, + state.iteration, + state.isaccept, + ) function ram_step_inner( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, sampler::RobustAdaptiveMetropolis, - state::RobustAdaptiveMetropolisState + state::RobustAdaptiveMetropolisState, ) # This is the initial state. f = model.logdensity @@ -137,7 +147,12 @@ function ram_step_inner( return x_new, lp_new, U, logα, isaccept end -function ram_adapt(sampler::RobustAdaptiveMetropolis, state::RobustAdaptiveMetropolisState, logα::Real, U::AbstractVector) +function ram_adapt( + sampler::RobustAdaptiveMetropolis, + state::RobustAdaptiveMetropolisState, + logα::Real, + U::AbstractVector, +) Δα = exp(logα) - sampler.α S = state.S # TODO: Make this configurable by defining a more general path. @@ -158,18 +173,25 @@ function AbstractMCMC.step( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, sampler::RobustAdaptiveMetropolis; - initial_params=nothing, - kwargs... + initial_params = nothing, + kwargs..., ) # This is the initial state. f = model.logdensity d = LogDensityProblems.dimension(f) # Initial parameter state. - T = initial_params === nothing ? eltype(sampler.γ) : Base.promote_type(eltype(sampler.γ), eltype(initial_params)) - x = initial_params === nothing ? rand(rng, T, d) : convert(AbstractVector{T}, initial_params) + T = + initial_params === nothing ? eltype(sampler.γ) : + Base.promote_type(eltype(sampler.γ), eltype(initial_params)) + x = + initial_params === nothing ? rand(rng, T, d) : + convert(AbstractVector{T}, initial_params) # Initialize the Cholesky factor of the covariance matrix. - S = LowerTriangular(sampler.S === nothing ? diagm(0 => ones(T, d)) : convert(AbstractMatrix{T}, sampler.S)) + S = LowerTriangular( + sampler.S === nothing ? diagm(0 => ones(T, d)) : + convert(AbstractMatrix{T}, sampler.S), + ) # Construct the initial state. lp = LogDensityProblems.logdensity(f, x) @@ -183,13 +205,22 @@ function AbstractMCMC.step( model::AbstractMCMC.LogDensityModel, sampler::RobustAdaptiveMetropolis, state::RobustAdaptiveMetropolisState; - kwargs... + kwargs..., ) # Take the inner step. x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state) # Accept / reject the proposal. - state_new = RobustAdaptiveMetropolisState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, state.S, logα, state.η, state.iteration + 1, isaccept) - return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new + state_new = RobustAdaptiveMetropolisState( + isaccept ? x_new : state.x, + isaccept ? lp_new : state.logprob, + state.S, + logα, + state.η, + state.iteration + 1, + isaccept, + ) + return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), + state_new end function valid_eigenvalues(S, lower_bound, upper_bound) @@ -205,20 +236,32 @@ function AbstractMCMC.step_warmup( model::AbstractMCMC.LogDensityModel, sampler::RobustAdaptiveMetropolis, state::RobustAdaptiveMetropolisState; - kwargs... + kwargs..., ) # Take the inner step. x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state) # Adapt the proposal. S_new, η = ram_adapt(sampler, state, logα, U) # Check that `S_new` has eigenvalues in the desired range. - if !valid_eigenvalues(S_new, sampler.eigenvalue_lower_bound, sampler.eigenvalue_upper_bound) + if !valid_eigenvalues( + S_new, + sampler.eigenvalue_lower_bound, + sampler.eigenvalue_upper_bound, + ) # In this case, we just keep the old `S` (p. 13 in Vihola, 2012). S_new = state.S end # Update state. - state_new = RobustAdaptiveMetropolisState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, S_new, logα, η, state.iteration + 1, isaccept) - return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new + state_new = RobustAdaptiveMetropolisState( + isaccept ? x_new : state.x, + isaccept ? lp_new : state.logprob, + S_new, + logα, + η, + state.iteration + 1, + isaccept, + ) + return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), + state_new end - From 11f3b646fcebe7a05e6beb97109c9b23a1f02713 Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 08:53:10 +0000 Subject: [PATCH 19/33] made the docstring for RAM a bit nicer --- src/RobustAdaptiveMetropolis.jl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index eed6bca..7693615 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -14,8 +14,8 @@ $(FIELDS) The following demonstrates how to implement a simple Gaussian model and sample from it using the RAM algorithm. -```jldoctest ram-gaussian -julia> using AdvancedMH, Random, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra +```jldoctest ram-gaussian; setup=:(using Random; Random.seed!(1234);) +julia> using AdvancedMH, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra julia> # Define a Gaussian with zero mean and some covariance. struct Gaussian{A} @@ -43,11 +43,13 @@ julia> # Number of warmup steps, i.e. the number of steps to adapt the covarianc # by default in the `sample` call. To include them, pass `discard_initial=0` to `sample`. num_warmup = 10_000; -julia> # Set the seed so get some consistency. - Random.seed!(1234); - julia> # Sample! - chain = sample(model, RobustAdaptiveMetropolis(), 10_000; chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)); + chain = sample( + model, + RobustAdaptiveMetropolis(), + num_samples; + chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2) + ); julia> isapprox(cov(Array(chain)), model.A; rtol = 0.2) true @@ -59,7 +61,7 @@ It's also possible to restrict the eigenvalues to avoid either too small or too julia> chain = sample( model, RobustAdaptiveMetropolis(eigenvalue_lower_bound=0.1, eigenvalue_upper_bound=2.0), - 10_000; + num_samples; chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2) ); From df4feb1b0a52e17eff19b7c884390485e6334686 Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:00:07 +0000 Subject: [PATCH 20/33] fixed doctest --- src/AdvancedMH.jl | 6 ++---- src/RobustAdaptiveMetropolis.jl | 16 ++++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/AdvancedMH.jl b/src/AdvancedMH.jl index 906d2d1..a3d2ece 100644 --- a/src/AdvancedMH.jl +++ b/src/AdvancedMH.jl @@ -23,7 +23,8 @@ export SymmetricRandomWalkProposal, Ensemble, StretchProposal, - MALA + MALA, + RobustAdaptiveMetropolis # Reexports export sample, MCMCThreads, MCMCDistributed, MCMCSerial @@ -160,9 +161,6 @@ include("proposal.jl") include("mh-core.jl") include("emcee.jl") include("MALA.jl") - include("RobustAdaptiveMetropolis.jl") -using .RobustAdaptiveMetropolis -export RAM end # module AdvancedMH diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 7693615..0b159ab 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -51,13 +51,13 @@ julia> # Sample! chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2) ); -julia> isapprox(cov(Array(chain)), model.A; rtol = 0.2) +julia> isapprox(cov(Array(chain)), model.Σ; rtol = 0.2) true ``` It's also possible to restrict the eigenvalues to avoid either too small or too large values. See p. 13 in [^VIH12]. -```jldoctest ram-gaussian` +```jldoctest ram-gaussian julia> chain = sample( model, RobustAdaptiveMetropolis(eigenvalue_lower_bound=0.1, eigenvalue_upper_bound=2.0), @@ -144,7 +144,7 @@ function ram_step_inner( lp = state.logprob lp_new = LogDensityProblems.logdensity(f, x_new) logα = min(lp_new - lp, zero(lp)) # `min` because we'll use this for updating - isaccept = randexp(rng) > -logα + isaccept = Random.randexp(rng) > -logα return x_new, lp_new, U, logα, isaccept end @@ -159,14 +159,14 @@ function ram_adapt( S = state.S # TODO: Make this configurable by defining a more general path. η = state.iteration^(-sampler.γ) - ΔS = η * abs(Δα) * S * U / norm(U) + ΔS = η * abs(Δα) * S * U / LinearAlgebra.norm(U) # TODO: Maybe do in-place and then have the user extract it with a callback if they really want it. S_new = if sign(Δα) == 1 # One rank update. - LinearAlgebra.lowrankupdate(Cholesky(S), ΔS).L + LinearAlgebra.lowrankupdate(LinearAlgebra.Cholesky(S), ΔS).L else # One rank downdate. - LinearAlgebra.lowrankdowndate(Cholesky(S), ΔS).L + LinearAlgebra.lowrankdowndate(LinearAlgebra.Cholesky(S), ΔS).L end return S_new, η end @@ -190,8 +190,8 @@ function AbstractMCMC.step( initial_params === nothing ? rand(rng, T, d) : convert(AbstractVector{T}, initial_params) # Initialize the Cholesky factor of the covariance matrix. - S = LowerTriangular( - sampler.S === nothing ? diagm(0 => ones(T, d)) : + S = LinearAlgebra.LowerTriangular( + sampler.S === nothing ? LinearAlgebra.diagm(0 => ones(T, d)) : convert(AbstractMatrix{T}, sampler.S), ) From f78449224beb6cc5c255d43b94e2f34dfef1c9f0 Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:00:54 +0000 Subject: [PATCH 21/33] formatting --- src/RobustAdaptiveMetropolis.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 0b159ab..f87c301 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -183,12 +183,16 @@ function AbstractMCMC.step( d = LogDensityProblems.dimension(f) # Initial parameter state. - T = - initial_params === nothing ? eltype(sampler.γ) : + T = if initial_params === nothing + eltype(sampler.γ) + else Base.promote_type(eltype(sampler.γ), eltype(initial_params)) - x = - initial_params === nothing ? rand(rng, T, d) : + end + x = if initial_params === nothing + rand(rng, T, d) + else convert(AbstractVector{T}, initial_params) + end # Initialize the Cholesky factor of the covariance matrix. S = LinearAlgebra.LowerTriangular( sampler.S === nothing ? LinearAlgebra.diagm(0 => ones(T, d)) : From 45820d2453d13989bb9847ef73739b843252eeef Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:07:39 +0000 Subject: [PATCH 22/33] minor improvement to docstring of RAM --- src/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index f87c301..5495bd2 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -78,7 +78,7 @@ Base.@kwdef struct RobustAdaptiveMetropolis{T,A<:Union{Nothing,AbstractMatrix{T} α::T = 0.234 "negative exponent of the adaptation decay rate. Default: `0.6`." γ::T = 0.6 - "initial lower-triangular Cholesky factor. Default: `nothing`." + "initial lower-triangular Cholesky factor. If specified, should be convertible into a `LowerTriangular`. Default: `nothing`." S::A = nothing "lower bound on eigenvalues of the adapted Cholesky factor. Default: `0.0`." eigenvalue_lower_bound::T = 0.0 From 7405a19cf01c68a1d44dfcacfbc49d14a4f789a3 Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:13:30 +0000 Subject: [PATCH 23/33] fused scalar operations --- src/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 5495bd2..96099f8 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -159,7 +159,7 @@ function ram_adapt( S = state.S # TODO: Make this configurable by defining a more general path. η = state.iteration^(-sampler.γ) - ΔS = η * abs(Δα) * S * U / LinearAlgebra.norm(U) + ΔS = (η * abs(Δα)) * S * U / LinearAlgebra.norm(U) # TODO: Maybe do in-place and then have the user extract it with a callback if they really want it. S_new = if sign(Δα) == 1 # One rank update. From 5dce26540ec8b4dceb833629693f644025981504 Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:13:34 +0000 Subject: [PATCH 24/33] added dimensionality check of the provided `S` matrix --- src/RobustAdaptiveMetropolis.jl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 96099f8..a0aa8bb 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -194,10 +194,16 @@ function AbstractMCMC.step( convert(AbstractVector{T}, initial_params) end # Initialize the Cholesky factor of the covariance matrix. - S = LinearAlgebra.LowerTriangular( - sampler.S === nothing ? LinearAlgebra.diagm(0 => ones(T, d)) : - convert(AbstractMatrix{T}, sampler.S), - ) + S_data if sampler.S === nothing + LinearAlgebra.diagm(0 => ones(T, d)) + else + # Check the dimensionality of the provided `S`. + if size(sampler.S) != (d, d) + throw(ArgumentError("The provided `S` has the wrong dimensionality.")) + end + convert(AbstractMatrix{T}, sampler.S) + end + S = LinearAlgebra.LowerTriangular(S_data) # Construct the initial state. lp = LogDensityProblems.logdensity(f, x) From 5ee44e3da3057dfe17dfded1e1d3a19da81159a1 Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:22:44 +0000 Subject: [PATCH 25/33] fixed typo --- src/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index a0aa8bb..a584994 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -194,7 +194,7 @@ function AbstractMCMC.step( convert(AbstractVector{T}, initial_params) end # Initialize the Cholesky factor of the covariance matrix. - S_data if sampler.S === nothing + S_data = if sampler.S === nothing LinearAlgebra.diagm(0 => ones(T, d)) else # Check the dimensionality of the provided `S`. From 37a2189705023aaff96345ca6914b9bfa662fa72 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Tue, 10 Dec 2024 09:33:12 +0000 Subject: [PATCH 26/33] Update docs/src/api.md Co-authored-by: David Widmann --- docs/src/api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/api.md b/docs/src/api.md index 37197ef..9924859 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -16,5 +16,5 @@ DensityModel ## Samplers ```@docs -RAM +RobustAdaptiveMetropolis ``` From 5193119ed9bc06c13b0554b620445aed94740708 Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:33:31 +0000 Subject: [PATCH 27/33] use `randn` instead of `rand` for initialisation --- src/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index a584994..fe33feb 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -189,7 +189,7 @@ function AbstractMCMC.step( Base.promote_type(eltype(sampler.γ), eltype(initial_params)) end x = if initial_params === nothing - rand(rng, T, d) + randn(rng, T, d) else convert(AbstractVector{T}, initial_params) end From d4a144e533c041afdcdb9a58e2856bd8d440071f Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:36:25 +0000 Subject: [PATCH 28/33] added an explanation of the `min` --- src/RobustAdaptiveMetropolis.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index fe33feb..88013ea 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -143,7 +143,13 @@ function ram_step_inner( # Compute the acceptance probability. lp = state.logprob lp_new = LogDensityProblems.logdensity(f, x_new) - logα = min(lp_new - lp, zero(lp)) # `min` because we'll use this for updating + # Technically, the `min` here is unnecessary for sampling according to `min(..., 1)`. + # However, `ram_adapt` assumes that `logα` actually represents the log acceptance probability + # and is thus bounded at 0. Moreover, users might be interested in inspecting the average + # acceptance rate to check that the algorithm achieves something similar to the target rate. + # Hence, it's a bit more convenient for the user if we just perform the `min` here + # so they can just take an average of (`exp` of) the `logα` values. + logα = min(lp_new - lp, zero(lp)) isaccept = Random.randexp(rng) > -logα return x_new, lp_new, U, logα, isaccept From 6295e7821902014477e842b09a1b77cc53c80b54 Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Tue, 10 Dec 2024 09:37:09 +0000 Subject: [PATCH 29/33] Update test/RobustAdaptiveMetropolis.jl Co-authored-by: David Widmann --- test/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/RobustAdaptiveMetropolis.jl b/test/RobustAdaptiveMetropolis.jl index b053618..fe82f14 100644 --- a/test/RobustAdaptiveMetropolis.jl +++ b/test/RobustAdaptiveMetropolis.jl @@ -40,7 +40,7 @@ end # Use aggressive adaptation. sampler = - RAM(γ = 0.51, eigenvalue_lower_bound = 0.9, eigenvalue_upper_bound = 1.1) + RobustAdaptiveMetropolis(γ = 0.51, eigenvalue_lower_bound = 0.9, eigenvalue_upper_bound = 1.1) num_warmup = 1000 discard_initial = 0 # we're only keeping the warmup samples chain = sample( From 6f8fda42e78baec96452ce0e1994f08aaa10456a Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 09:51:02 +0000 Subject: [PATCH 30/33] use explicit `Cholesky` constructor for backwards compat --- src/RobustAdaptiveMetropolis.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 88013ea..81d5abe 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -169,10 +169,10 @@ function ram_adapt( # TODO: Maybe do in-place and then have the user extract it with a callback if they really want it. S_new = if sign(Δα) == 1 # One rank update. - LinearAlgebra.lowrankupdate(LinearAlgebra.Cholesky(S), ΔS).L + LinearAlgebra.lowrankupdate(LinearAlgebra.Cholesky(S.data, :L, 0), ΔS).L else # One rank downdate. - LinearAlgebra.lowrankdowndate(LinearAlgebra.Cholesky(S), ΔS).L + LinearAlgebra.lowrankdowndate(LinearAlgebra.Cholesky(S.data, :L, 0), ΔS).L end return S_new, η end From 5815a9b41e0e1b1068824bc2a0f9135e78e7a46a Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Tue, 10 Dec 2024 10:38:11 +0000 Subject: [PATCH 31/33] Fix typo: ```` -> ``` --- src/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 81d5abe..02a7155 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -67,7 +67,7 @@ julia> chain = sample( julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.2 true -```` +``` # References [^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing. From 1b38ca63248d31368f318c1462f5a40c8470d0eb Mon Sep 17 00:00:00 2001 From: Tor Fjelde Date: Tue, 10 Dec 2024 11:37:35 +0000 Subject: [PATCH 32/33] formatted according to `blue` --- src/RobustAdaptiveMetropolis.jl | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 02a7155..1fbd248 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -114,16 +114,11 @@ struct RobustAdaptiveMetropolisState{T1,L,A,T2,T3} end AbstractMCMC.getparams(state::RobustAdaptiveMetropolisState) = state.x -AbstractMCMC.setparams!!(state::RobustAdaptiveMetropolisState, x) = - RobustAdaptiveMetropolisState( - x, - state.logprob, - state.S, - state.logα, - state.η, - state.iteration, - state.isaccept, +function AbstractMCMC.setparams!!(state::RobustAdaptiveMetropolisState, x) + return RobustAdaptiveMetropolisState( + x, state.logprob, state.S, state.logα, state.η, state.iteration, state.isaccept ) +end function ram_step_inner( rng::Random.AbstractRNG, @@ -181,7 +176,7 @@ function AbstractMCMC.step( rng::Random.AbstractRNG, model::AbstractMCMC.LogDensityModel, sampler::RobustAdaptiveMetropolis; - initial_params = nothing, + initial_params=nothing, kwargs..., ) # This is the initial state. @@ -262,9 +257,7 @@ function AbstractMCMC.step_warmup( S_new, η = ram_adapt(sampler, state, logα, U) # Check that `S_new` has eigenvalues in the desired range. if !valid_eigenvalues( - S_new, - sampler.eigenvalue_lower_bound, - sampler.eigenvalue_upper_bound, + S_new, sampler.eigenvalue_lower_bound, sampler.eigenvalue_upper_bound ) # In this case, we just keep the old `S` (p. 13 in Vihola, 2012). S_new = state.S From f426d0d328fd40807ef96c80852dad1f56a21c3c Mon Sep 17 00:00:00 2001 From: Tor Erlend Fjelde Date: Tue, 10 Dec 2024 11:38:21 +0000 Subject: [PATCH 33/33] Update src/RobustAdaptiveMetropolis.jl Co-authored-by: Markus Hauru --- src/RobustAdaptiveMetropolis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl index 1fbd248..e0284f3 100644 --- a/src/RobustAdaptiveMetropolis.jl +++ b/src/RobustAdaptiveMetropolis.jl @@ -78,7 +78,7 @@ Base.@kwdef struct RobustAdaptiveMetropolis{T,A<:Union{Nothing,AbstractMatrix{T} α::T = 0.234 "negative exponent of the adaptation decay rate. Default: `0.6`." γ::T = 0.6 - "initial lower-triangular Cholesky factor. If specified, should be convertible into a `LowerTriangular`. Default: `nothing`." + "initial lower-triangular Cholesky factor of the covariance matrix. If specified, should be convertible into a `LowerTriangular`. Default: `nothing`, which is interpreted as the identity matrix." S::A = nothing "lower bound on eigenvalues of the adapted Cholesky factor. Default: `0.0`." eigenvalue_lower_bound::T = 0.0