diff --git a/src/stan/mcmc/auto_adaptation.hpp b/src/stan/mcmc/auto_adaptation.hpp new file mode 100644 index 0000000000..8ff6df92ca --- /dev/null +++ b/src/stan/mcmc/auto_adaptation.hpp @@ -0,0 +1,314 @@ +#ifndef STAN_MCMC_AUTO_ADAPTATION_HPP +#define STAN_MCMC_AUTO_ADAPTATION_HPP + +#include +#include +#include + +namespace stan { + +namespace mcmc { +template +struct log_prob_wrapper_covar { + const Model& model_; + explicit log_prob_wrapper_covar(const Model& model) : model_(model) {} + + template + T operator()(const Eigen::Matrix& q) const { + return model_.template log_prob( + const_cast&>(q), &std::cout); + } +}; + +namespace internal { +/** + * Compute the covariance of data in Y. + * + * Columns of Y are different variables. Rows are different samples. + * + * When there is only one row in Y, return a covariance matrix of the expected + * size filled with zeros. + * + * @param Y Data + * @return Covariance of Y + */ +Eigen::MatrixXd covariance(const Eigen::MatrixXd& Y) { + stan::math::check_nonzero_size("covariance", "Y", Y); + + Eigen::MatrixXd centered = Y.rowwise() - Y.colwise().mean(); + return centered.transpose() * centered / std::max(centered.rows() - 1.0, 1.0); +} + +/** + * Compute the largest magnitude eigenvalue of a symmetric matrix using the + * power method. The function f should return the product of that matrix with an + * abitrary vector. + * + * f should take one Eigen::VectorXd argument, x, and return the product of a + * matrix with x as an Eigen::VectorXd argument of the same size. + * + * The eigenvalue is estimated iteratively. If the kth estimate is e_k, then the + * function returns when either abs(e_{k + 1} - e_k) < tol * abs(e_k) or the + * maximum number of iterations have been performed + * + * This means the returned eigenvalue might not be computed to full precision + * + * @param initial_guess Initial guess of the eigenvector of the largest + * eigenvalue + * @param[in,out] max_iterations Maximum number of power iterations, on return + * number of iterations used + * @param[in,out] tol Relative tolerance, on return the relative error in the + * eigenvalue estimate + * @return Largest magnitude eigenvalue of operator f + */ +template +double power_method(F& f, const Eigen::VectorXd& initial_guess, + int& max_iterations, double& tol) { + Eigen::VectorXd v = initial_guess; + double eval = 0.0; + Eigen::VectorXd Av = f(v); + stan::math::check_matching_sizes("power_method", "matrix vector product", Av, + "vector", v); + + int i = 0; + for (; i < max_iterations; ++i) { + double v_norm = v.norm(); + double new_eval = v.dot(Av) / (v_norm * v_norm); + if (i == max_iterations - 1 + || std::abs(new_eval - eval) <= tol * std::abs(eval)) { + tol = std::abs(new_eval - eval) / std::abs(eval); + eval = new_eval; + max_iterations = i + 1; + break; + } + + eval = new_eval; + v = Av / Av.norm(); + + Av = f(v); + } + + return eval; +} + +/** + * Compute the largest eigenvalue of the sample covariance rescaled by a metric, + * that is, the largest eigenvalue of L^{-1} \Sigma L^{-T} + * + * @param L Cholesky decomposition of Metric + * @param Sigma Sample covariance + * @return Largest eigenvalue + */ +double eigenvalue_scaled_covariance(const Eigen::MatrixXd& L, + const Eigen::MatrixXd& Sigma) { + Eigen::MatrixXd S = L.template triangularView() + .solve(L.template triangularView() + .solve(Sigma) + .transpose()) + .transpose(); + + auto Sx = [&](Eigen::VectorXd x) -> Eigen::VectorXd { return S * x; }; + + int max_iterations = 200; + double tol = 1e-5; + + return internal::power_method(Sx, Eigen::VectorXd::Random(Sigma.cols()), + max_iterations, tol); +} + +/** + * Compute the largest eigenvalue of the Hessian of the log density rescaled by + * a metric, that is, the largest eigenvalue of L^T \nabla^2_{qq} H(q) L + * + * @tparam Model Type of model + * @param model Defines the log density + * @param q Point around which to compute the Hessian + * @param L Cholesky decomposition of Metric + * @return Largest eigenvalue + */ +template +double eigenvalue_scaled_hessian(const Model& model, const Eigen::MatrixXd& L, + const Eigen::VectorXd& q) { + Eigen::VectorXd eigenvalues; + Eigen::MatrixXd eigenvectors; + + auto hessian_vector = [&](const Eigen::VectorXd& x) -> Eigen::VectorXd { + double lp; + Eigen::VectorXd grad1; + Eigen::VectorXd grad2; + // stan::math::hessian_times_vector(log_prob_wrapper_covar(model), q, + // x, lp, Ax); + double dx = 1e-5; + Eigen::VectorXd dr = L * x * dx; + stan::math::gradient(log_prob_wrapper_covar(model), q + dr / 2.0, lp, + grad1); + stan::math::gradient(log_prob_wrapper_covar(model), q - dr / 2.0, lp, + grad2); + return L.transpose() * (grad1 - grad2) / dx; + }; + + int max_iterations = 200; + double tol = 1e-5; + + return internal::power_method( + hessian_vector, Eigen::VectorXd::Random(q.size()), max_iterations, tol); +} +} // namespace internal + +class auto_adaptation : public windowed_adaptation { + public: + explicit auto_adaptation(int n) : windowed_adaptation("covariance") {} + /** + * Update the metric if at the end of an adaptation window. + * + * @tparam Model Type of model + * @param model Defines the log density + * @param covar[out] New metric + * @param covar_is_diagonal[out] Set to true if metric is diagonal, false + * otherwise + * @param q New MCMC draw + * @return True if this was the end of an adaptation window, false otherwise + */ + template + bool learn_covariance(const Model& model, Eigen::MatrixXd& covar, + bool& covar_is_diagonal, const Eigen::VectorXd& q) { + if (adaptation_window()) + qs_.push_back(q); + + if (end_adaptation_window()) { + compute_next_window(); + + int M = q.size(); + int N = qs_.size(); + + Eigen::MatrixXd Y = Eigen::MatrixXd::Zero(M, N); + std::vector idxs(N); + for (int i = 0; i < qs_.size(); i++) + idxs[i] = i; + + std::random_shuffle(idxs.begin(), idxs.end()); + for (int i = 0; i < qs_.size(); i++) + Y.block(0, i, M, 1) = qs_[idxs[i]]; + + try { + bool use_dense = false; + // 's' stands for selection + // 'r' stands for refinement + for (char state : {'s', 'r'}) { + Eigen::MatrixXd Ytrain; + Eigen::MatrixXd Ytest; + + // If in selection state + if (state == 's') { + int Mtest; + Mtest = static_cast(0.2 * Y.cols()); + if (Mtest < 5) { + Mtest = 5; + } + + if (Y.cols() < 10) { + throw std::runtime_error( + "Each warmup stage must have at least 10 samples"); + } + + Ytrain = Y.block(0, 0, M, Y.cols() - Mtest); + Ytest = Y.block(0, Ytrain.cols(), M, Mtest); + } else { + Ytrain = Y; + } + + Eigen::MatrixXd cov_train + = (Ytrain.cols() > 0) ? internal::covariance(Ytrain.transpose()) + : Eigen::MatrixXd::Zero(M, M); + Eigen::MatrixXd cov_test + = (Ytest.cols() > 0) ? internal::covariance(Ytest.transpose()) + : Eigen::MatrixXd::Zero(M, M); + + Eigen::MatrixXd dense = (N / (N + 5.0)) * cov_train + + 1e-3 * (5.0 / (N + 5.0)) + * Eigen::MatrixXd::Identity( + cov_train.rows(), cov_train.cols()); + + Eigen::MatrixXd diag = dense.diagonal().asDiagonal(); + + covar = dense; + + // If in selection state + if (state == 's') { + Eigen::MatrixXd L_dense = dense.llt().matrixL(); + Eigen::MatrixXd L_diag + = diag.diagonal().array().sqrt().matrix().asDiagonal(); + + double low_eigenvalue_dense + = -1.0 + / internal::eigenvalue_scaled_covariance(L_dense, cov_test); + double low_eigenvalue_diag + = -1.0 + / internal::eigenvalue_scaled_covariance(L_diag, cov_test); + + double c_dense = 0.0; + double c_diag = 0.0; + for (int i = 0; i < 5; i++) { + double high_eigenvalue_dense + = internal::eigenvalue_scaled_hessian( + model, L_dense, Ytest.block(0, i, M, 1)); + double high_eigenvalue_diag = internal::eigenvalue_scaled_hessian( + model, L_diag, Ytest.block(0, i, M, 1)); + + c_dense = std::max(c_dense, std::sqrt(high_eigenvalue_dense + / low_eigenvalue_dense)); + c_diag = std::max(c_diag, std::sqrt(high_eigenvalue_diag + / low_eigenvalue_diag)); + } + + std::cout << "adapt: " << adapt_window_counter_ + << ", which: dense, max: " << c_dense << std::endl; + std::cout << "adapt: " << adapt_window_counter_ + << ", which: diag, max: " << c_diag << std::endl; + + if (c_dense < c_diag) { + use_dense = true; + } else { + use_dense = false; + } + } else { + if (use_dense) { + covar = dense; + covar_is_diagonal = false; + } else { + covar = diag; + covar_is_diagonal = true; + } + } + } + } catch (const std::exception& e) { + std::cout << e.what() << std::endl; + std::cout + << "Exception while using auto adaptation, falling back to diagonal" + << std::endl; + Eigen::MatrixXd cov = internal::covariance(Y.transpose()); + covar = ((N / (N + 5.0)) * cov.diagonal() + + 1e-3 * (5.0 / (N + 5.0)) * Eigen::VectorXd::Ones(cov.cols())) + .asDiagonal(); + covar_is_diagonal = true; + } + + ++adapt_window_counter_; + qs_.clear(); + + return true; + } + + ++adapt_window_counter_; + return false; + } + + protected: + std::vector qs_; +}; + +} // namespace mcmc + +} // namespace stan + +#endif diff --git a/src/stan/mcmc/hmc/hamiltonians/auto_e_metric.hpp b/src/stan/mcmc/hmc/hamiltonians/auto_e_metric.hpp new file mode 100644 index 0000000000..3041252a65 --- /dev/null +++ b/src/stan/mcmc/hmc/hamiltonians/auto_e_metric.hpp @@ -0,0 +1,70 @@ +#ifndef STAN_MCMC_HMC_HAMILTONIANS_AUTO_E_METRIC_HPP +#define STAN_MCMC_HMC_HAMILTONIANS_AUTO_E_METRIC_HPP + +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace mcmc { + +// Euclidean manifold with dense metric +template +class auto_e_metric : public base_hamiltonian { + public: + explicit auto_e_metric(const Model& model) + : base_hamiltonian(model) {} + + double T(auto_e_point& z) { + return 0.5 * z.p.transpose() * z.inv_e_metric_ * z.p; + } + + double tau(auto_e_point& z) { return T(z); } + + double phi(auto_e_point& z) { return this->V(z); } + + double dG_dt(auto_e_point& z, callbacks::logger& logger) { + return 2 * T(z) - z.q.dot(z.g); + } + + Eigen::VectorXd dtau_dq(auto_e_point& z, callbacks::logger& logger) { + return Eigen::VectorXd::Zero(this->model_.num_params_r()); + } + + Eigen::VectorXd dtau_dp(auto_e_point& z) { + if (z.is_diagonal_) { + return z.inv_e_metric_.diagonal().cwiseProduct(z.p); + } else { + return z.inv_e_metric_ * z.p; + } + } + + Eigen::VectorXd dphi_dq(auto_e_point& z, callbacks::logger& logger) { + return z.g; + } + + void sample_p(auto_e_point& z, BaseRNG& rng) { + typedef typename stan::math::index_type::type idx_t; + boost::variate_generator > rand_gaus( + rng, boost::normal_distribution<>()); + + if (z.is_diagonal_) { + for (int i = 0; i < z.p.size(); ++i) + z.p(i) = rand_gaus() / sqrt(z.inv_e_metric_(i, i)); + } else { + Eigen::VectorXd u(z.p.size()); + + for (idx_t i = 0; i < u.size(); ++i) + u(i) = rand_gaus(); + + z.p = z.inv_e_metric_.llt().matrixU().solve(u); + } + } +}; + +} // namespace mcmc +} // namespace stan +#endif diff --git a/src/stan/mcmc/hmc/hamiltonians/auto_e_point.hpp b/src/stan/mcmc/hmc/hamiltonians/auto_e_point.hpp new file mode 100644 index 0000000000..3a0c2fa8e1 --- /dev/null +++ b/src/stan/mcmc/hmc/hamiltonians/auto_e_point.hpp @@ -0,0 +1,66 @@ +#ifndef STAN_MCMC_HMC_HAMILTONIANS_AUTO_E_POINT_HPP +#define STAN_MCMC_HMC_HAMILTONIANS_AUTO_E_POINT_HPP + +#include +#include + +namespace stan { +namespace mcmc { +/** + * Point in a phase space with a base + * Euclidean manifold with auto metric + */ +class auto_e_point : public ps_point { + public: + /** + * Inverse mass matrix. + */ + Eigen::MatrixXd inv_e_metric_; + + /** + * Is inv_e_metric_ diagonal or not + */ + bool is_diagonal_; + + /** + * Construct a auto point in n-dimensional phase space + * with identity matrix as inverse mass matrix. + * + * @param n number of dimensions + */ + explicit auto_e_point(int n) + : ps_point(n), inv_e_metric_(n, n), is_diagonal_(true) { + inv_e_metric_.setIdentity(); + } + + /** + * Set elements of mass matrix + * + * @param inv_e_metric initial mass matrix + */ + void set_metric(const Eigen::MatrixXd& inv_e_metric) { + inv_e_metric_ = inv_e_metric; + is_diagonal_ = false; + } + + /** + * Write elements of mass matrix to string and handoff to writer. + * + * @param writer Stan writer callback + */ + inline void write_metric(stan::callbacks::writer& writer) { + writer("Elements of inverse mass matrix:"); + for (int i = 0; i < inv_e_metric_.rows(); ++i) { + std::stringstream inv_e_metric_ss; + inv_e_metric_ss << inv_e_metric_(i, 0); + for (int j = 1; j < inv_e_metric_.cols(); ++j) + inv_e_metric_ss << ", " << inv_e_metric_(i, j); + writer(inv_e_metric_ss.str()); + } + } +}; + +} // namespace mcmc +} // namespace stan + +#endif diff --git a/src/stan/mcmc/hmc/nuts/adapt_auto_e_nuts.hpp b/src/stan/mcmc/hmc/nuts/adapt_auto_e_nuts.hpp new file mode 100644 index 0000000000..56ebe6c160 --- /dev/null +++ b/src/stan/mcmc/hmc/nuts/adapt_auto_e_nuts.hpp @@ -0,0 +1,57 @@ +#ifndef STAN_MCMC_HMC_NUTS_ADAPT_AUTO_E_NUTS_HPP +#define STAN_MCMC_HMC_NUTS_ADAPT_AUTO_E_NUTS_HPP + +#include +#include +#include + +namespace stan { +namespace mcmc { +/** + * The No-U-Turn sampler (NUTS) with multinomial sampling + * with a Gaussian-Euclidean disintegration and adaptive + * dense metric and adaptive step size + */ +template +class adapt_auto_e_nuts : public auto_e_nuts, + public stepsize_auto_adapter { + protected: + const Model& model_; + + public: + adapt_auto_e_nuts(const Model& model, BaseRNG& rng) + : model_(model), + auto_e_nuts(model, rng), + stepsize_auto_adapter(model.num_params_r()) {} + + ~adapt_auto_e_nuts() {} + + sample transition(sample& init_sample, callbacks::logger& logger) { + sample s = auto_e_nuts::transition(init_sample, logger); + + if (this->adapt_flag_) { + this->stepsize_adaptation_.learn_stepsize(this->nom_epsilon_, + s.accept_stat()); + + bool update = this->auto_adaptation_.learn_covariance( + model_, this->z_.inv_e_metric_, this->z_.is_diagonal_, this->z_.q); + + if (update) { + this->init_stepsize(logger); + + this->stepsize_adaptation_.set_mu(log(10 * this->nom_epsilon_)); + this->stepsize_adaptation_.restart(); + } + } + return s; + } + + void disengage_adaptation() { + base_adapter::disengage_adaptation(); + this->stepsize_adaptation_.complete_adaptation(this->nom_epsilon_); + } +}; + +} // namespace mcmc +} // namespace stan +#endif diff --git a/src/stan/mcmc/hmc/nuts/auto_e_nuts.hpp b/src/stan/mcmc/hmc/nuts/auto_e_nuts.hpp new file mode 100644 index 0000000000..2974c8df07 --- /dev/null +++ b/src/stan/mcmc/hmc/nuts/auto_e_nuts.hpp @@ -0,0 +1,25 @@ +#ifndef STAN_MCMC_HMC_NUTS_AUTO_E_NUTS_HPP +#define STAN_MCMC_HMC_NUTS_AUTO_E_NUTS_HPP + +#include +#include +#include +#include + +namespace stan { +namespace mcmc { +/** + * The No-U-Turn sampler (NUTS) with multinomial sampling + * with a Gaussian-Euclidean disintegration and dense metric + */ +template +class auto_e_nuts + : public base_nuts { + public: + auto_e_nuts(const Model& model, BaseRNG& rng) + : base_nuts(model, rng) {} +}; + +} // namespace mcmc +} // namespace stan +#endif diff --git a/src/stan/mcmc/stepsize_auto_adapter.hpp b/src/stan/mcmc/stepsize_auto_adapter.hpp new file mode 100644 index 0000000000..7645ec460a --- /dev/null +++ b/src/stan/mcmc/stepsize_auto_adapter.hpp @@ -0,0 +1,39 @@ +#ifndef STAN_MCMC_STEPSIZE_AUTO_ADAPTER_HPP +#define STAN_MCMC_STEPSIZE_AUTO_ADAPTER_HPP + +#include +#include +#include +#include + +namespace stan { + +namespace mcmc { + +class stepsize_auto_adapter : public base_adapter { + public: + explicit stepsize_auto_adapter(int n) : auto_adaptation_(n) {} + + stepsize_adaptation& get_stepsize_adaptation() { + return stepsize_adaptation_; + } + + auto_adaptation& get_auto_adaptation() { return auto_adaptation_; } + + void set_window_params(unsigned int num_warmup, unsigned int init_buffer, + unsigned int term_buffer, unsigned int base_window, + callbacks::logger& logger) { + auto_adaptation_.set_window_params(num_warmup, init_buffer, term_buffer, + base_window, logger); + } + + protected: + stepsize_adaptation stepsize_adaptation_; + auto_adaptation auto_adaptation_; +}; + +} // namespace mcmc + +} // namespace stan + +#endif diff --git a/src/stan/services/sample/hmc_nuts_auto_e_adapt.hpp b/src/stan/services/sample/hmc_nuts_auto_e_adapt.hpp new file mode 100644 index 0000000000..d66693426c --- /dev/null +++ b/src/stan/services/sample/hmc_nuts_auto_e_adapt.hpp @@ -0,0 +1,162 @@ +#ifndef STAN_SERVICES_SAMPLE_HMC_NUTS_AUTO_E_ADAPT_HPP +#define STAN_SERVICES_SAMPLE_HMC_NUTS_AUTO_E_ADAPT_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace services { +namespace sample { + +/** + * Runs HMC with NUTS with adaptation using dense Euclidean metric + * with a pre-specified Euclidean metric. + * + * @tparam Model Model class + * @param[in] model Input model to test (with data already instantiated) + * @param[in] init var context for initialization + * @param[in] init_inv_metric var context exposing an initial dense + inverse Euclidean metric (must be positive definite) + * @param[in] random_seed random seed for the random number generator + * @param[in] chain chain id to advance the pseudo random number generator + * @param[in] init_radius radius to initialize + * @param[in] num_warmup Number of warmup samples + * @param[in] num_samples Number of samples + * @param[in] num_thin Number to thin the samples + * @param[in] save_warmup Indicates whether to save the warmup iterations + * @param[in] refresh Controls the output + * @param[in] stepsize initial stepsize for discrete evolution + * @param[in] stepsize_jitter uniform random jitter of stepsize + * @param[in] max_depth Maximum tree depth + * @param[in] delta adaptation target acceptance statistic + * @param[in] gamma adaptation regularization scale + * @param[in] kappa adaptation relaxation exponent + * @param[in] t0 adaptation iteration offset + * @param[in] init_buffer width of initial fast adaptation interval + * @param[in] term_buffer width of final fast adaptation interval + * @param[in] window initial width of slow adaptation interval + * @param[in,out] interrupt Callback for interrupts + * @param[in,out] logger Logger for messages + * @param[in,out] init_writer Writer callback for unconstrained inits + * @param[in,out] sample_writer Writer for draws + * @param[in,out] diagnostic_writer Writer for diagnostic information + * @return error_codes::OK if successful + */ +template +int hmc_nuts_auto_e_adapt( + Model& model, stan::io::var_context& init, + stan::io::var_context& init_inv_metric, unsigned int random_seed, + unsigned int chain, double init_radius, int num_warmup, int num_samples, + int num_thin, bool save_warmup, int refresh, double stepsize, + double stepsize_jitter, int max_depth, double delta, double gamma, + double kappa, double t0, unsigned int init_buffer, unsigned int term_buffer, + unsigned int window, callbacks::interrupt& interrupt, + callbacks::logger& logger, callbacks::writer& init_writer, + callbacks::writer& sample_writer, callbacks::writer& diagnostic_writer) { + boost::ecuyer1988 rng = util::create_rng(random_seed, chain); + + std::vector disc_vector; + std::vector cont_vector = util::initialize( + model, init, rng, init_radius, true, logger, init_writer); + + Eigen::MatrixXd inv_metric; + try { + inv_metric = util::read_dense_inv_metric(init_inv_metric, + model.num_params_r(), logger); + util::validate_dense_inv_metric(inv_metric, logger); + } catch (const std::domain_error& e) { + return error_codes::CONFIG; + } + + stan::mcmc::adapt_auto_e_nuts sampler(model, rng); + + sampler.set_metric(inv_metric); + + sampler.set_nominal_stepsize(stepsize); + sampler.set_stepsize_jitter(stepsize_jitter); + sampler.set_max_depth(max_depth); + + sampler.get_stepsize_adaptation().set_mu(log(10 * stepsize)); + sampler.get_stepsize_adaptation().set_delta(delta); + sampler.get_stepsize_adaptation().set_gamma(gamma); + sampler.get_stepsize_adaptation().set_kappa(kappa); + sampler.get_stepsize_adaptation().set_t0(t0); + + sampler.set_window_params(num_warmup, init_buffer, term_buffer, window, + logger); + + util::run_adaptive_sampler( + sampler, model, cont_vector, num_warmup, num_samples, num_thin, refresh, + save_warmup, rng, interrupt, logger, sample_writer, diagnostic_writer); + + return error_codes::OK; +} + +/** + * Runs HMC with NUTS with adaptation using dense Euclidean metric, + * with identity matrix as initial inv_metric. + * + * @tparam Model Model class + * @param[in] model Input model to test (with data already instantiated) + * @param[in] init var context for initialization + * @param[in] random_seed random seed for the random number generator + * @param[in] chain chain id to advance the pseudo random number generator + * @param[in] init_radius radius to initialize + * @param[in] num_warmup Number of warmup samples + * @param[in] num_samples Number of samples + * @param[in] num_thin Number to thin the samples + * @param[in] save_warmup Indicates whether to save the warmup iterations + * @param[in] refresh Controls the output + * @param[in] stepsize initial stepsize for discrete evolution + * @param[in] stepsize_jitter uniform random jitter of stepsize + * @param[in] max_depth Maximum tree depth + * @param[in] delta adaptation target acceptance statistic + * @param[in] gamma adaptation regularization scale + * @param[in] kappa adaptation relaxation exponent + * @param[in] t0 adaptation iteration offset + * @param[in] init_buffer width of initial fast adaptation interval + * @param[in] term_buffer width of final fast adaptation interval + * @param[in] window initial width of slow adaptation interval + * @param[in,out] interrupt Callback for interrupts + * @param[in,out] logger Logger for messages + * @param[in,out] init_writer Writer callback for unconstrained inits + * @param[in,out] sample_writer Writer for draws + * @param[in,out] diagnostic_writer Writer for diagnostic information + * @return error_codes::OK if successful + */ +template +int hmc_nuts_auto_e_adapt( + Model& model, stan::io::var_context& init, unsigned int random_seed, + unsigned int chain, double init_radius, int num_warmup, int num_samples, + int num_thin, bool save_warmup, int refresh, double stepsize, + double stepsize_jitter, int max_depth, double delta, double gamma, + double kappa, double t0, unsigned int init_buffer, unsigned int term_buffer, + unsigned int window, callbacks::interrupt& interrupt, + callbacks::logger& logger, callbacks::writer& init_writer, + callbacks::writer& sample_writer, callbacks::writer& diagnostic_writer) { + stan::io::dump dmp + = util::create_unit_e_dense_inv_metric(model.num_params_r()); + stan::io::var_context& unit_e_metric = dmp; + + return hmc_nuts_auto_e_adapt( + model, init, unit_e_metric, random_seed, chain, init_radius, num_warmup, + num_samples, num_thin, save_warmup, refresh, stepsize, stepsize_jitter, + max_depth, delta, gamma, kappa, t0, init_buffer, term_buffer, window, + interrupt, logger, init_writer, sample_writer, diagnostic_writer); +} + +} // namespace sample +} // namespace services +} // namespace stan +#endif diff --git a/src/test/test-models/good/model/correlated_gaussian.stan b/src/test/test-models/good/model/correlated_gaussian.stan new file mode 100644 index 0000000000..fc55ebd83e --- /dev/null +++ b/src/test/test-models/good/model/correlated_gaussian.stan @@ -0,0 +1,7 @@ +parameters { + vector[2] x; +} + +model { + x ~ multi_normal([0.0, 0.0], [[1.0, 0.99], [0.99, 1.0]]); +} diff --git a/src/test/test-models/good/model/independent_gaussian.stan b/src/test/test-models/good/model/independent_gaussian.stan new file mode 100644 index 0000000000..451a1251c8 --- /dev/null +++ b/src/test/test-models/good/model/independent_gaussian.stan @@ -0,0 +1,7 @@ +parameters { + vector[2] x; +} + +model { + x ~ normal(0.0, 1.0); +} diff --git a/src/test/test-models/good/model/known_hessian.stan b/src/test/test-models/good/model/known_hessian.stan new file mode 100644 index 0000000000..da35f67443 --- /dev/null +++ b/src/test/test-models/good/model/known_hessian.stan @@ -0,0 +1,7 @@ +parameters { + real x[3]; +} + +model { + x ~ normal(0, 1); +} diff --git a/src/test/unit/mcmc/auto_adaptation_learn_covariance_pick_dense_test.cpp b/src/test/unit/mcmc/auto_adaptation_learn_covariance_pick_dense_test.cpp new file mode 100644 index 0000000000..8b278d5395 --- /dev/null +++ b/src/test/unit/mcmc/auto_adaptation_learn_covariance_pick_dense_test.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include + +TEST(McmcVarAdaptation, learn_covariance_pick_dense) { + std::fstream data_stream(std::string("").c_str(), std::fstream::in); + stan::io::dump data_var_context(data_stream); + data_stream.close(); + + std::stringstream output; + correlated_gaussian_model_namespace::correlated_gaussian_model + correlated_gaussian_model(data_var_context, 0, &output); + + stan::test::unit::instrumented_logger logger; + + const int M = 2; + const int N = 20; + Eigen::MatrixXd qs(N, M); + qs << 0.256173753306128, -0.0238087093098673, -1.63218152810157, + -1.5309929638363, -0.812451331685826, -1.15062373620068, + -1.49814775191801, -1.51310110681996, 0.738630631536685, 1.03588205799336, + 0.472288580035284, 0.250286770328584, -1.63634486169493, -1.6222798835089, + -0.400790615207103, -0.337669147200631, -0.568779612417544, + -0.424833495378187, 0.103690913176746, 0.272885200284842, + -0.453017424229528, -0.504634004215693, 3.34484533887237, + 3.29418872328382, -1.3376507113241, -1.32724775403694, -0.137543235057544, + -0.0290938109919368, -1.58194496352741, -1.39338740677379, + 0.312166136194586, 0.336989933768233, -0.628941448228566, + -0.850758612234264, -0.766816808981044, -0.645020468024267, + -0.75078110234827, -0.502544092120385, -0.00694807494461906, + -0.186748159558166; + + Eigen::MatrixXd covar(M, M); + bool covar_is_diagonal; + + Eigen::MatrixXd target_covar(M, M); + + target_covar << 1.0311414783609130, 1.0100577463968425, 1.0100577463968425, + 1.0148380697138280; + + stan::mcmc::auto_adaptation adapter(M); + adapter.set_window_params(50, 0, 0, N, logger); + + for (int i = 0; i < N; ++i) { + Eigen::VectorXd q = qs.block(i, 0, 1, M).transpose(); + adapter.learn_covariance(correlated_gaussian_model, covar, + covar_is_diagonal, q); + } + + for (int i = 0; i < covar.size(); ++i) { + EXPECT_FLOAT_EQ(target_covar(i), covar(i)); + } + + EXPECT_EQ(covar_is_diagonal, false); + + EXPECT_EQ(0, logger.call_count()); +} diff --git a/src/test/unit/mcmc/auto_adaptation_learn_covariance_pick_diag_test.cpp b/src/test/unit/mcmc/auto_adaptation_learn_covariance_pick_diag_test.cpp new file mode 100644 index 0000000000..64e3a946be --- /dev/null +++ b/src/test/unit/mcmc/auto_adaptation_learn_covariance_pick_diag_test.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include + +TEST(McmcVarAdaptation, learn_covariance_pick_diagonal) { + std::fstream data_stream(std::string("").c_str(), std::fstream::in); + stan::io::dump data_var_context(data_stream); + data_stream.close(); + + std::stringstream output; + independent_gaussian_model_namespace::independent_gaussian_model + independent_gaussian_model(data_var_context, 0, &output); + + stan::test::unit::instrumented_logger logger; + + const int M = 2; + const int N = 20; + Eigen::MatrixXd qs(N, M); + qs << -1.12310858, -0.98044486, -0.40288484, -0.31777537, -0.46665535, + -0.41468940, 0.77996512, 0.79419535, -0.08336907, 0.12997631, 0.25331851, + 0.17888355, -0.02854676, -0.25660897, -0.04287046, 0.06199044, 1.36860228, + 1.16082198, -0.22577099, -0.27199475, 1.51647060, 1.46738068, -1.54875280, + -1.42235482, 0.58461375, 0.40408060, 0.12385424, 0.12959917, 0.21594157, + 0.18045828, 0.37963948, 0.34225195, -0.50232345, -0.41356307, -0.33320738, + -0.33695265, -1.01857538, -0.85228019, -1.07179123, -0.98666076; + + Eigen::MatrixXd covar(M, M); + bool covar_is_diagonal; + + Eigen::MatrixXd target_covar(M, M); + + target_covar << 0.50172809066980716963, 0.0, 0.0, 0.41270751419364998247; + + stan::mcmc::auto_adaptation adapter(M); + adapter.set_window_params(50, 0, 0, N, logger); + + for (int i = 0; i < N; ++i) { + Eigen::VectorXd q = qs.block(i, 0, 1, M).transpose(); + adapter.learn_covariance(independent_gaussian_model, covar, + covar_is_diagonal, q); + } + + for (int i = 0; i < covar.size(); ++i) { + EXPECT_FLOAT_EQ(target_covar(i), covar(i)); + } + + EXPECT_EQ(covar_is_diagonal, true); + + EXPECT_EQ(0, logger.call_count()); +} diff --git a/src/test/unit/mcmc/auto_adaptation_test.cpp b/src/test/unit/mcmc/auto_adaptation_test.cpp new file mode 100644 index 0000000000..a281b40cc6 --- /dev/null +++ b/src/test/unit/mcmc/auto_adaptation_test.cpp @@ -0,0 +1,173 @@ +#include +#include +#include +#include + +TEST(McmcAutoAdaptation, test_covariance_zero_rows_zero_cols) { + Eigen::MatrixXd X1(0, 5); + + EXPECT_THROW(stan::mcmc::internal::covariance(X1), std::invalid_argument); + + Eigen::MatrixXd X2(1, 0); + + EXPECT_THROW(stan::mcmc::internal::covariance(X2), std::invalid_argument); +} + +TEST(McmcAutoAdaptation, test_covariance_one_row_one_col) { + Eigen::MatrixXd X1(1, 2); + Eigen::MatrixXd X2(3, 1); + + X1 << 1.0, 2.0; + X2 << 1.0, 2.0, 3.0; + + Eigen::MatrixXd cov1 = stan::mcmc::internal::covariance(X1); + Eigen::MatrixXd cov2 = stan::mcmc::internal::covariance(X2); + + ASSERT_EQ(cov1.rows(), 2); + ASSERT_EQ(cov1.cols(), 2); + + ASSERT_EQ(cov2.rows(), 1); + ASSERT_EQ(cov2.cols(), 1); + + for (int i = 0; i < cov1.size(); ++i) { + ASSERT_FLOAT_EQ(cov1(i), 0.0); + } + + ASSERT_FLOAT_EQ(cov2(0), 1.0); +} + +TEST(McmcAutoAdaptation, test_covariance) { + Eigen::MatrixXd X1(3, 2); + Eigen::MatrixXd X2(2, 3); + + X1 << 0.0, -1.0, 0.5, -2.7, 3.0, 5.0; + X2 << 0.0, 3, -2.7, 0.5, -1, 5.0; + + Eigen::MatrixXd cov1 = stan::mcmc::internal::covariance(X1); + Eigen::MatrixXd cov2 = stan::mcmc::internal::covariance(X2); + + Eigen::MatrixXd cov1_ref(2, 2); + Eigen::MatrixXd cov2_ref(3, 3); + + cov1_ref << 2.5833333333333335, 6.0666666666666664, 6.0666666666666664, + 16.3633333333333333; + + cov2_ref << 0.125, -1.0, 1.925, -1.000, 8.0, -15.4, 1.925, -15.4, 29.645; + + ASSERT_EQ(cov1.rows(), cov1_ref.rows()); + ASSERT_EQ(cov1.cols(), cov1_ref.cols()); + + ASSERT_EQ(cov2.rows(), cov2_ref.rows()); + ASSERT_EQ(cov2.cols(), cov2_ref.cols()); + + for (int i = 0; i < cov1_ref.size(); ++i) { + ASSERT_FLOAT_EQ(cov1(i), cov1_ref(i)); + } + + for (int i = 0; i < cov2_ref.size(); ++i) { + ASSERT_FLOAT_EQ(cov2(i), cov2_ref(i)); + } +} + +TEST(McmcAutoAdaptation, power_method) { + Eigen::MatrixXd X(2, 2); + Eigen::VectorXd x0(2); + + X << 2.0, 0.5, 0.5, 1.0; + x0 << 1.0, 0.0; + + const int max_iterations = 10; + const double tol = 1e-10; + + auto Av = [&](const Eigen::VectorXd& v) { return X * v; }; + + int max_iterations_1 = max_iterations; + double tol_1 = tol; + + double eval + = stan::mcmc::internal::power_method(Av, x0, max_iterations_1, tol_1); + + EXPECT_FLOAT_EQ(eval, 2.20710678118654746); +} + +TEST(McmcAutoAdaptation, power_method_tol_check) { + Eigen::MatrixXd X(2, 2); + Eigen::VectorXd x0(2); + + X << 2.0, 0.5, 0.5, 1.0; + x0 << 1.0, 0.0; + + const int max_iterations = 1000; + const double tol = 1e-12; + + auto Av = [&](const Eigen::VectorXd& v) { return X * v; }; + + int max_iterations_1 = max_iterations; + double tol_1 = tol; + double eval + = stan::mcmc::internal::power_method(Av, x0, max_iterations_1, tol_1); + + EXPECT_LT(tol_1, tol); +} + +TEST(McmcAutoAdaptation, power_method_iter_check) { + Eigen::MatrixXd X(2, 2); + Eigen::VectorXd x0(2); + + X << 2.0, 0.5, 0.5, 1.0; + x0 << 1.0, 0.0; + + const int max_iterations = 10; + const double tol = 1e-50; + + auto Av = [&](const Eigen::VectorXd& v) { return X * v; }; + + int max_iterations_1 = max_iterations; + double tol_1 = tol; + double eval + = stan::mcmc::internal::power_method(Av, x0, max_iterations_1, tol_1); + + EXPECT_GT(tol_1, tol); + EXPECT_EQ(max_iterations_1, max_iterations); +} + +// The checks in here are very coarse because eigenvalue_scaled_covariance +// only estimates things with low precision +TEST(McmcAutoAdaptation, eigenvalue_scaled_covariance) { + Eigen::MatrixXd L(2, 2), Sigma(2, 2); + + L << 1.0, 0.0, 0.5, 1.0; + Sigma << 2.0, 0.7, 0.7, 1.3; + + double eval = stan::mcmc::internal::eigenvalue_scaled_covariance(L, Sigma); + + EXPECT_LT(std::abs(eval - 2.0908326913195983) / eval, 1e-2); + + L << 2.0, 0.0, 0.7, 1.3; + + eval = stan::mcmc::internal::eigenvalue_scaled_covariance(L, Sigma); + + EXPECT_LT(std::abs(eval - 0.62426035502958577) / eval, 1e-2); +} + +// The checks in here are very coarse because eigenvalue_scaled_hessian +// only estimates things with low precision +TEST(McmcAutoAdaptation, eigenvalue_scaled_hessian) { + std::fstream data_stream(std::string("").c_str(), std::fstream::in); + stan::io::dump data_var_context(data_stream); + data_stream.close(); + + std::stringstream output; + known_hessian_model_namespace::known_hessian_model known_hessian_model( + data_var_context, 0, &output); + + Eigen::MatrixXd L(3, 3); + Eigen::VectorXd q(3); + L << 2.0, 0.0, 0.0, 0.7, 1.3, 0.0, -1.5, 2.0, 4.0; + q << 0.0, 0.0, 0.0; + + double eval = stan::mcmc::internal::eigenvalue_scaled_hessian( + known_hessian_model, L, q); + + EXPECT_LT(std::abs(eval - 22.8141075806892850) / eval, 1e-2); +}