diff --git a/UserManual/src/chapter_Laplace.Rmd b/UserManual/src/chapter_Laplace.Rmd index 394da9db3..598367aef 100644 --- a/UserManual/src/chapter_Laplace.Rmd +++ b/UserManual/src/chapter_Laplace.Rmd @@ -3,7 +3,7 @@ ```{r, echo=FALSE} require(nimble) -require(nimbleSMC, quietly = TRUE, warn.conflicts = FALSE) +require(nimbleQuad, quietly = TRUE, warn.conflicts = FALSE) ``` # Laplace, AGHQ, and nested approximations {#cha-laplace} diff --git a/install_requirements.R b/install_requirements.R index 021768aec..8efd4b274 100755 --- a/install_requirements.R +++ b/install_requirements.R @@ -12,6 +12,7 @@ requirements <- c( 'pracma', ## for AD 'numDeriv', ## for AD 'mcmcse' ## for MCEM + # 'nimbleQuad' ## for laplace for MCEM test # 'lme4' ## for test-ADlaplace.R ) @@ -26,3 +27,4 @@ for (package in requirements) { install.packages("lme4", type = "source") + diff --git a/packages/nimble/DESCRIPTION b/packages/nimble/DESCRIPTION index 9d6134424..9d798aa12 100644 --- a/packages/nimble/DESCRIPTION +++ b/packages/nimble/DESCRIPTION @@ -28,7 +28,7 @@ Authors@R: c( person("Claudia", "Wehrhahn Cortes", role = "aut", comment = "Bayesian nonparametrics system"), person("Abel", "Rodr\u00edguez", role = "aut", comment = "Bayesian nonparametrics system"), person("Duncan", "Temple Lang", role = "aut", comment = "packaging configuration"), - person("Wei", "Zhang", role = "aut", comment = "Laplace approximation"), + person("Wei", "Zhang", role = "aut", comment = "node marginalization"), person("Sally", "Paganin", role = "aut", comment = "reversible jump MCMC"), person("Joshua", "Hug", role = "aut", comment = "WAIC"), person("Paul", "van Dam-Bates", role = "aut", comment = "P\u00f3lya-Gamma sampler, nimIntegrate, matrix exponential"), @@ -129,7 +129,6 @@ Collate: initializeModel.R parameterTransform.R CAR.R - Laplace.R MCMC_utils.R MCMC_configuration.R MCMC_build.R @@ -144,13 +143,14 @@ Collate: crossValidation.R BNP_distributions.R BNP_samplers.R + setupMargNodes.R NF_utils.R miscFunctions.R + normTooling.R + miscAlgorithms.R makevars.R setNimbleInternalFunctions.R registration.R nimble-package.r - QuadratureGrids.R - miscAlgorithms.R zzz.R RoxygenNote: 7.3.2 diff --git a/packages/nimble/NAMESPACE b/packages/nimble/NAMESPACE index 870bdaac5..daee8b53b 100644 --- a/packages/nimble/NAMESPACE +++ b/packages/nimble/NAMESPACE @@ -18,8 +18,10 @@ S3method(as.list, modelValuesBaseClass) S3method(length, nimPointerList) export(calc_dmnormConjugacyContributions) export(calc_dmnormAltParams) +export(calc_dmnorm_inv_ld_AltParams) export(calc_dwishAltParams) export(calc_dcatConjugacyContributions) +export(PDinverse_logdet) export(calc_dcar_normalConjugacyContributionShape) export(calc_dcar_normalConjugacyContributionRate) export(CAR_calcM) diff --git a/packages/nimble/R/Laplace.R b/packages/nimble/R/Laplace.R deleted file mode 100644 index 41d0a4fc6..000000000 --- a/packages/nimble/R/Laplace.R +++ /dev/null @@ -1,4565 +0,0 @@ -## NIMBLE Laplace approximation -## AGHQuad/Laplace base class -AGHQuad_BASE <- nimbleFunctionVirtual( - run = function() {}, - methods = list( - calcLogLik1 = function(p = double(1)){ - returnType(double()) - }, - calcLogLik2 = function(p = double(1)){ - returnType(double()) - }, - calcLogLik3 = function(p = double(1)){ - returnType(double()) - }, - gr_logLik1 = function(p = double(1)){ - returnType(double(1)) - }, - gr_logLik2 = function(p = double(1)){ - returnType(double(1)) - }, - gr_logLik3 = function(p = double(1)){ - returnType(double(1)) - }, - negHess = function(p = double(1), reTransform = double(1)){ - returnType(double(2)) - }, - update_max_inner_logLik = function(p = double(1)){ - returnType(double(1)) - }, - update_max_inner_logLik_internal = function(p = double(1)){ - returnType(double(1)) - }, - hess_joint_logLik_wrt_p_wrt_re = function(p = double(1), reTransform = double(1)){ - returnType(double(2)) - }, - hess_joint_logLik_wrt_p_wrt_re_internal = function(p = double(1), reTransform = double(1)){ - returnType(double(2)) - }, - reset_outer_logLik = function(){}, - save_outer_logLik = function(logLikVal = double()){}, - get_param_value = function(atOuterMode = integer(0, default = 0)){ - returnType(double(1)) - }, - get_inner_mode = function(atOuterMode = integer(0, default = 0)){ - returnType(double(1)) - }, - get_inner_negHessian = function(atOuterMode = integer(0, default = 0)){ - returnType(double(2)) - }, - get_inner_negHessian_chol = function(atOuterMode = integer(0, default = 0)){ - returnType(double(2)) - }, - check_convergence = function(){ - returnType(double()) - }, - updateSettings = function(optimMethod = character(0, default="NULL"), - optimStart = character(0, default="NULL"), - optimStartValues = double(1, default=Inf), - optimWarning = integer(0, default=-1), - useInnerCache = integer(0, default=-1), - nQuad = integer(0, default=-1), - gridType = character(0, default="NULL"), - optimControl = optimControlNimbleList(default=nimOptimDefaultControl()), - replace_optimControl = logical(0, default=FALSE)) { - }, - ## set_nQuad = function(nQUpdate = integer()){}, - ## set_transformation = function(transformation = character()){}, - ## set_warning = function(warn = logical()){}, - ## set_reInitMethod = function(method = character(), value=double(1)){}, - set_randomeffect_values = function(p = double(1)){} - ## set_inner_cache = function(cache = logical(0, default = TRUE)){} - ) -) - -setup_OneAGHQuad <- function(model, paramNodes, randomEffectsNodes, calcNodes, control) { - # common setup steps for 1D and >1D cases - optimControl_ <- extractControlElement(control, 'optimControl', nimOptimDefaultControl()) - optimMethod_ <- extractControlElement(control, 'optimMethod', 'nlminb') - optimStart_ <- extractControlElement(control, 'optimStart', 'last.best') - optimStartValues_ <- extractControlElement(control, 'optimStartValues', 0) - nre <- length(model$expandNodeNames(randomEffectsNodes, returnScalarComponents = TRUE)) - - paramDeps <- model$getDependencies(paramNodes, determOnly = TRUE, self=FALSE) - if(length(paramDeps) > 0) { - keep_paramDeps <- logical(length(paramDeps)) - for(i in seq_along(paramDeps)) { - if(any(paramDeps[i] == calcNodes)) keep_paramDeps[i] <- FALSE - else { - nextDeps <- model$getDependencies(paramDeps[i]) - keep_paramDeps[i] <- any(nextDeps %in% calcNodes) - } - } - paramDeps <- paramDeps[keep_paramDeps] - } - innerCalcNodes <- calcNodes - calcNodes <- model$expandNodeNames(c(paramDeps, calcNodes), sort = TRUE) - wrtNodes <- c(paramNodes, randomEffectsNodes) - reTrans <- parameterTransform(model, randomEffectsNodes) - npar <- length(model$expandNodeNames(paramNodes, returnScalarComponents = TRUE)) - - if(npar > 1) p_indices <- as.numeric(1:npar) - else p_indices <- as.numeric(c(1, -1)) - - list(optimControl_=optimControl_, - optimMethod_=optimMethod_, - optimStart_=optimStart_, - optimStartValues_=optimStartValues_, - nre = nre, - paramDeps = paramDeps, - innerCalcNodes = innerCalcNodes, - calcNodes = calcNodes, - wrtNodes = wrtNodes, - reTrans = reTrans, - npar = npar, - p_indices = p_indices - ) -} - -## A single Laplace approximation for only one scalar random effect node -buildOneLaplace1D <- function(model, paramNodes, randomEffectsNodes, calcNodes, control = list()) { - buildOneAGHQuad1D(model, nQuad = 1, paramNodes, randomEffectsNodes, calcNodes, control) -} - -buildOneAGHQuad1D <- nimbleFunction( - contains = AGHQuad_BASE, - setup = function(model, nQuad, paramNodes, randomEffectsNodes, calcNodes, control = list()) { - ## Check the number of random effects is 1 - ## optimControl_ <- extractControlElement(control, 'optimControl', nimOptimDefaultControl()) - ## optimMethod_ <- extractControlElement(control, 'optimMethod', 'BFGS') - ## optimStart_ <- extractControlElement(control, 'optimStart', 'constant') - ## optimStartValues_ <- extractControlElement(control, 'optimStartValues', 0) - nQuad_ <- nQuad - S <- setup_OneAGHQuad(model, paramNodes, randomEffectsNodes, calcNodes, control) - optimControl_ <- S$optimControl_ - optimMethod_ <- S$optimMethod_ - optimStart_ <- S$optimStart_ - optimStartValues_ <- S$optimStartValues_ - nre <- S$nre - paramDeps <- S$paramDeps - innerCalcNodes <- S$innerCalcNodes - calcNodes <- S$calcNodes - wrtNodes <- S$wrtNodes - reTrans <- S$reTrans - npar <- S$npar - p_indices <- S$p_indices - - ## nre <- length(model$expandNodeNames(randomEffectsNodes, returnScalarComponents = TRUE)) - if(length(nre) != 1) stop("buildOneAGHQuad1D: Number of random effects for buildOneAGHQuad1D or buildOneLaplace1D must be 1") - ## Check and add necessary upstream deterministic nodes into calcNodes - ## This ensures that deterministic nodes between paramNodes and calcNodes are used. - ## paramDeps <- model$getDependencies(paramNodes, determOnly = TRUE, self=FALSE) - ## if(length(paramDeps) > 0) { - ## keep_paramDeps <- logical(length(paramDeps)) - ## for(i in seq_along(paramDeps)) { - ## if(any(paramDeps[i] == calcNodes)) keep_paramDeps[i] <- FALSE - ## else { - ## nextDeps <- model$getDependencies(paramDeps[i]) - ## keep_paramDeps[i] <- any(nextDeps %in% calcNodes) - ## } - ## } - ## paramDeps <- paramDeps[keep_paramDeps] - ## } - ## innerCalcNodes <- calcNodes - ## calcNodes <- model$expandNodeNames(c(paramDeps, calcNodes), sort = TRUE) - ## wrtNodes <- c(paramNodes, randomEffectsNodes) - ## Indices of randomEffectsNodes and paramNodes inside wrtNodes - ## npar <- length(model$expandNodeNames(paramNodes, returnScalarComponents = TRUE)) - re_indices <- as.numeric(c(npar+1, -1)) - ## if(npar > 1) p_indices <- as.numeric(1:npar) - ## else p_indices <- as.numeric(c(1, -1)) - ## ## Indices of randomEffectsNodes inside randomEffectsNodes for use in getting the derivative of - ## ## the inner log-likelihood (paramNodes fixed) w.r.t. randomEffectsNodes. - re_indices_inner <- as.numeric(c(1, -1)) - p_and_re_indices <- as.numeric(1:(npar + 1)) - - ## Set up start values for the inner optimization of Laplace approximation - if(!is.character(optimStart_) | length(optimStart_) != 1) stop("buildOneAGHQuad1D: There is a problem with `optimStart`: ", optimStart_) - startID <- switch(optimStart_, last=1, last.best=2, constant=3, random=4, model=5) - if(startID==5) { - constant_init_par <- c(values(model, randomEffectsNodes), -1) - } else - constant_init_par <- c(optimStartValues_, -1) - - ## Update and constant nodes for obtaining derivatives using AD - inner_derivsInfo <- makeModelDerivsInfo(model = model, wrtNodes = randomEffectsNodes, calcNodes = innerCalcNodes) - inner_updateNodes <- inner_derivsInfo$updateNodes - inner_constantNodes <- inner_derivsInfo$constantNodes - joint_derivsInfo <- makeModelDerivsInfo(model = model, wrtNodes = wrtNodes, calcNodes = calcNodes) - joint_updateNodes <- joint_derivsInfo$updateNodes - joint_constantNodes <- joint_derivsInfo$constantNodes - - ## Automated transformation for random effects to ensure range of (-Inf, Inf) - ## reTrans <- parameterTransform(model, randomEffectsNodes) - - ## The following are used for caching values and gradient in the Laplace3 system - logLik3_saved_value <- -Inf # numeric(1) - logLik3_saved_gr <- if(npar > 1) numeric(npar) else as.numeric(c(1, -1)) - logLik3_previous_p <- if(npar > 1) rep(Inf, npar) else as.numeric(c(Inf, -1)) - ## The following are used for caching values for init purposes - max_inner_logLik_last_argmax <- constant_init_par - max_inner_logLik_last_value <- -Inf - max_inner_logLik_previous_p <- if(npar > 1) rep(Inf, npar) else as.numeric(c(Inf, -1)) - cache_inner_max <- TRUE - ## Record the maximum Laplace loglikelihood value for obtaining inner optimization start values - max_logLik <- -Inf - max_logLik_last_best_argmax <- constant_init_par - - ## Last call cache of neg Hessian. - saved_inner_negHess <- matrix(0, nrow = 1, ncol = 1) - ## Cache log like saved value to keep track of 3 methods. - logLik_saved_value <- -Inf - - ## Values to save when max inner log lik reached. - max_outer_logLik <- -Inf - outer_mode_inner_negHess <- matrix(0, nrow = 1, ncol = 1) - outer_mode_max_inner_logLik_last_argmax <- as.numeric(c(1, -1)) - outer_param_max <- if(npar > 1) rep(Inf, npar) else as.numeric(c(Inf, -1)) - - ## Cached gradients for AGHQ. - gr_sigmahatwrtre <- numeric(1) - gr_sigmahatwrtp <- if(npar > 1) numeric(npar) else as.numeric(c(1, -1)) - gr_rehatwrtp <- if(npar > 1) numeric(npar) else as.numeric(c(1, -1)) # double(1) - gr_QuadSum_value <- if(npar > 1) numeric(npar) else as.numeric(c(1, -1)) - AGHQuad_saved_gr <- if(npar > 1) numeric(npar) else as.numeric(c(1, -1)) - quadrature_previous_p <- if(npar > 1) rep(Inf, npar) else as.numeric(c(Inf, -1)) - - ## Convergence check for outer function. - converged <- 0 - - ## Build AGHQ grid for 1D: - AGHQuad_grid <- buildAGHQGrid(d = 1, nQuad = nQuad_) - - ## The following is used to ensure the one_time_fixes are run when needed. - one_time_fixes_done <- FALSE - - warn_optim <- extractControlElement(control, 'optimWarning', FALSE) ## Warn about inner optimization issues - }, - run = function(){}, - methods = list( - fix_one_vec = function(x = double(1)) { - if(length(x) == 2) { - if(x[2] == -1) { - ans <- numeric(length = 1, value = x[1]) - return(ans) - } - } - return(x) - returnType(double(1)) - }, - one_time_fixes = function() { - ## Run this once after compiling; remove extraneous -1 if necessary - if(one_time_fixes_done) return() - re_indices <<- fix_one_vec(re_indices) - re_indices_inner <<- fix_one_vec(re_indices_inner) - max_inner_logLik_last_argmax <<- fix_one_vec(max_inner_logLik_last_argmax) - outer_mode_max_inner_logLik_last_argmax <<- fix_one_vec(outer_mode_max_inner_logLik_last_argmax) - max_logLik_last_best_argmax <<- fix_one_vec(max_logLik_last_best_argmax) - constant_init_par <<- fix_one_vec(constant_init_par) - # if(startID == 3) optStart <<- fix_one_vec(optStart) - if(npar == 1) { - p_indices <<- fix_one_vec(p_indices) - logLik3_saved_gr <<- fix_one_vec(logLik3_saved_gr) - logLik3_previous_p <<- fix_one_vec(logLik3_previous_p) - max_inner_logLik_previous_p <<- fix_one_vec(max_inner_logLik_previous_p) - outer_param_max <<- fix_one_vec(outer_param_max) - gr_sigmahatwrtp <<- fix_one_vec(gr_sigmahatwrtp) - gr_rehatwrtp <<- fix_one_vec(gr_rehatwrtp) - gr_QuadSum_value <<- fix_one_vec(gr_QuadSum_value) - AGHQuad_saved_gr <<- fix_one_vec(AGHQuad_saved_gr) - quadrature_previous_p <<- fix_one_vec(quadrature_previous_p) - } - reInit <- values(model, randomEffectsNodes) - set_reInit(reInit) - one_time_fixes_done <<- TRUE - }, - updateSettings = function(optimMethod = character(0, default="NULL"), - optimStart = character(0, default="NULL"), - optimStartValues = double(1, default=Inf), - optimWarning = integer(0, default=-1), - useInnerCache = integer(0, default=-1), - nQuad = integer(0, default=-1), - gridType = character(0, default="NULL"), - optimControl = optimControlNimbleList(default=nimOptimDefaultControl()), - replace_optimControl = logical(0, default=FALSE)) { - # Checking should have been done already. Or, if this is being called directly, - # it will be for development or advanced uses and we can skip checking. - if(optimMethod != "NULL") optimMethod_ <<- optimMethod - if(optimStart != "NULL") { - if(optimStart == "last") startID <<- 1 # last - else if(optimStart == "last.best") startID <<- 2 # last.best - else if(optimStart == "constant") startID <<- 3 # use fixed vector optimStart provided at setup time - else if(optimStart == "random") startID <<- 4 - else if(optimStart == "model") { - startID <<- 3 - constant_init_par <<- reTrans$transform(values(model, randomEffectsNodes)) - } - } - if((length(optimStartValues) != 1) | (optimStartValues[1] != Inf) ) { - if((length(optimStartValues) == 1) & (optimStartValues[1] == -Inf) ) { # numeric code for "model" setting - constant_init_par <<- reTrans$transform(values(model, randomEffectsNodes)) - } else { - if(startID <= 3) { - constant_init_par <<- optimStartValues - if(length(constant_init_par) == 1) - if(nre > 1) - constant_init_par <<- rep(constant_init_par, nre) - } - } - } - if((!one_time_fixes_done) & (length(constant_init_par) == 1)) - constant_init_par <<- c(constant_init_par, -1) - if(optimWarning != -1) { - warn_optim <<- optimWarning != 0 - } - if(useInnerCache != -1) { - cache_inner_max <<- useInnerCache != 0 - } - if(nQuad != -1) { - AGHQuad_grid$setGridSize(nQUpdate = nQuad) - nQuad_ <<- nQuad - } - ## if(gridType != "") { - ## transMethod <<- gridType - ## } - if(replace_optimControl) { - optimControl$fnscale <- -1 - optimControl_ <<- optimControl - } - }, - set_reInit = function(re = double(1)) { - reInitTrans <- reTrans$transform(re) - max_inner_logLik_last_argmax <<- reInitTrans - }, - get_reInitTrans = function() { - if(startID == 1) ans <- max_inner_logLik_last_argmax ## last - else if(startID == 2) ans <- max_logLik_last_best_argmax ## last.best - else if(startID == 3) ans <- constant_init_par ## constant - else if(startID == 4){ ## random (prior). - model$simulate(randomEffectsNodes) - ans <- reTrans$transform(values(model, randomEffectsNodes)) ## From prior: - } - return(ans) - returnType(double(1)) - }, - ## Joint log-likelihood with values of parameters fixed: used only for inner optimization - inner_logLik = function(reTransform = double(1)) { - re <- reTrans$inverseTransform(reTransform) - values(model, randomEffectsNodes) <<- re - ans <- model$calculate(innerCalcNodes) + reTrans$logDetJacobian(reTransform) - return(ans) - returnType(double()) - }, - # Gradient of the joint log-likelihood (p fixed) w.r.t. transformed random effects: used only for inner optimization - gr_inner_logLik_internal = function(reTransform = double(1)) { - ans <- derivs(inner_logLik(reTransform), wrt = re_indices_inner, order = 1, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping for efficiency - gr_inner_logLik = function(reTransform = double(1)) { - ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = re_indices_inner, order = 0, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) - return(ans$value) - returnType(double(1)) - }, - # Hessian of the joint log-likelihood (p fixed) w.r.t. transformed random effects: used only for inner optimization - # This is being added to experiment with Newton's methods for inner optimization. If this approach provides good - # numerical behavior, we can revisit the efficiency of how to get derivatives, such as getting gradient and hessian together - # or whether it is better to keep them separate, as both may not always be jointly requested. - he_inner_logLik_internal = function(reTransform = double(1)) { - ans <- derivs(inner_logLik(reTransform), wrt = re_indices_inner, order = 2, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) - res <- ans$hessian[,,1] - return(res) - returnType(double(2)) - }, - he_inner_logLik_internal_as_vec = function(reTransform = double(1)) { - ans <- he_inner_logLik_internal(reTransform) - res <- nimNumeric(value = ans, length = length(reTransform)*length(reTransform)) - return(res) - returnType(double(1)) - }, - # Double taping for possible efficiency - he_inner_logLik = function(reTransform = double(1)) { - ans <- derivs(he_inner_logLik_internal_as_vec(reTransform), wrt = re_indices_inner, order = 0, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) - res <- matrix(value = ans$value, nrow = length(reTransform), ncol = length(reTransform)) - return(res) - returnType(double(2)) - }, - ## Solve the inner optimization for Laplace approximation - max_inner_logLik = function(p = double(1)) { - values(model, paramNodes) <<- p - model$calculate(paramDeps) - reInitTrans <- get_reInitTrans() - fn_init <- inner_logLik(reInitTrans) - if((fn_init == Inf) | (fn_init == -Inf) | (is.nan(fn_init)) | (is.na(fn_init))) { - optRes <- optimResultNimbleList$new() - optRes$par <- reInitTrans - optRes$value <- -Inf - optRes$convergence <- -1 - return(optRes) - } - optRes <- optim(reInitTrans, inner_logLik, gr = gr_inner_logLik, he = he_inner_logLik, method = optimMethod_, control = optimControl_) - if(optRes$convergence != 0 & warn_optim){ - print(" [Warning] `optim` did not converge for the inner optimization of AGHQ or Laplace approximation.") - } - converged <<- optRes$convergence - return(optRes) - returnType(optimResultNimbleList()) - }, - ## Outer check for inner convergence - check_convergence = function(){ - returnType(double()) - return(converged) - }, - ## Inner optimization using single-taped gradient - max_inner_logLik_internal = function(p = double(1)) { - values(model, paramNodes) <<- p - model$calculate(paramDeps) - reInitTrans <- get_reInitTrans() - fn_init <- inner_logLik(reInitTrans) - if((fn_init == Inf) | (fn_init == -Inf) | (is.nan(fn_init)) | (is.na(fn_init))) { - optRes <- optimResultNimbleList$new() - optRes$par <- reInitTrans - optRes$value <- -Inf - optRes$convergence <- -1 - return(optRes) - } - optRes <- optim(reInitTrans, inner_logLik, gr = gr_inner_logLik_internal, he = he_inner_logLik_internal, method = optimMethod_, control = optimControl_) - if(optRes$convergence != 0 & warn_optim){ - print(" [Warning] `optim` did not converge for the inner optimization of AGHQ or Laplace approximation.") - } - converged <<- optRes$convergence - return(optRes) - returnType(optimResultNimbleList()) - }, - ## These two update methods for max_inner_logLik use the same member data caches - update_max_inner_logLik = function(p = double(1)) { - optRes <- max_inner_logLik(p) - max_inner_logLik_last_argmax <<- optRes$par - max_inner_logLik_last_value <<- optRes$value - max_inner_logLik_previous_p <<- p - return(max_inner_logLik_last_argmax) - returnType(double(1)) - }, - update_max_inner_logLik_internal = function(p = double(1)) { - optRes <- max_inner_logLik_internal(p) - max_inner_logLik_last_argmax <<- optRes$par - max_inner_logLik_last_value <<- optRes$value - max_inner_logLik_previous_p <<- p - return(max_inner_logLik_last_argmax) - returnType(double(1)) - }, - ## Joint log-likelihood in terms of parameters and transformed random effects - joint_logLik = function(p = double(1), reTransform = double(1)) { - re <- reTrans$inverseTransform(reTransform) - values(model, paramNodes) <<- p - values(model, randomEffectsNodes) <<- re - ans <- model$calculate(calcNodes) + reTrans$logDetJacobian(reTransform) - return(ans) - returnType(double()) - }, - ## 1st order partial derivative w.r.t. parameters - gr_joint_logLik_wrt_p_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(joint_logLik(p, reTransform), wrt = p_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping - gr_joint_logLik_wrt_p = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_joint_logLik_wrt_p_internal(p, reTransform), wrt = p_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_updateNodes) - return(ans$value) - returnType(double(1)) - }, - ## 1st order partial derivative w.r.t. transformed random effects - gr_joint_logLik_wrt_re_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(joint_logLik(p, reTransform), wrt = re_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping - gr_joint_logLik_wrt_re = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_joint_logLik_wrt_re_internal(p, reTransform), wrt = re_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$value) - returnType(double(1)) - }, - ## 2nd order mixed partial derivative w.r.t. parameters and transformed random effects - hess_joint_logLik_wrt_p_wrt_re_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_joint_logLik_wrt_p_internal(p, reTransform), wrt = re_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian) - returnType(double(2)) - }, - ## Double taping - hess_joint_logLik_wrt_p_wrt_re = function(p = double(1), reTransform = double(1)) { - ans <- derivs(hess_joint_logLik_wrt_p_wrt_re_internal(p, reTransform), wrt = re_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - derivmat <- matrix(value = ans$value, nrow = npar) - return(derivmat) - returnType(double(2)) - }, - ## Negative Hessian: 2nd order unmixed partial derivative w.r.t. transformed random effects - negHess_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_joint_logLik_wrt_re_internal(p, reTransform), wrt = re_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(-ans$jacobian) - returnType(double(2)) - }, - ## Double taping - negHess = function(p = double(1), reTransform = double(1)) { - ans <- derivs(negHess_internal(p, reTransform), wrt = re_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - neghess <- matrix(ans$value, nrow = nre) - return(neghess) - returnType(double(2)) - }, - ## Logdet negative Hessian - logdetNegHess = function(p = double(1), reTransform = double(1)) { - negHessian <- negHess(p, reTransform) - ans <- log(negHessian[1,1]) - return(ans) - returnType(double()) - }, - ## Gradient of logdet (negative) Hessian w.r.t. parameters - gr_logdetNegHess_wrt_p_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(logdetNegHess(p, reTransform), wrt = p_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping - gr_logdetNegHess_wrt_p = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_logdetNegHess_wrt_p_internal(p, reTransform), wrt = p_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$value) - returnType(double(1)) - }, - ## Gradient of logdet (negative) Hessian w.r.t. transformed random effects - gr_logdetNegHess_wrt_re_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(logdetNegHess(p, reTransform), wrt = re_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping - gr_logdetNegHess_wrt_re = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_logdetNegHess_wrt_re_internal(p, reTransform), wrt = re_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$value) - returnType(double(1)) - }, - ## Put everything (gradient and Hessian) together for Laplace3 - joint_logLik_with_grad_and_hess = function(p = double(1), reTransform = double(1)) { - # This returns a vector of concatenated key quantities (see comment below for details) - # reTransform is the arg max of the inner logLik - # We could consider returning only upper triangular elements of chol(-Hessian), - # and re-constituting as a matrix when needed. - joint_logLik_res <- derivs(joint_logLik(p, reTransform), wrt = p_and_re_indices, order = c(1, 2), - model = model, updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - negHessValue <- -joint_logLik_res$hessian[npar + 1, npar + 1, 1] - logdetNegHessAns <- log(negHessValue) - hess_wrt_p_wrt_re <- joint_logLik_res$hessian[1:npar, npar + 1, 1] - ans <- c(joint_logLik_res$jacobian[1, 1:npar], logdetNegHessAns, negHessValue, hess_wrt_p_wrt_re) - ## If cholNegHess is considered, indices to components are: - ## gr_joint_logLik_wrt_p = (1:npar) [size = npar] - ## logdetNegHess = npar + 1 [1] - ## cholNegHess = npar + 1 + (1 : nre*nre) [nre x nre] - ## hess_wrt_p_wrt_re = npar + 1 + nre*nre + (1:npar*nre) [npar x nre] - return(ans) - returnType(double(1)) - }, - joint_logLik_with_higher_derivs = function(p = double(1), reTransform = double(1)) { - # value gives results from joint_logLik_with_grad_and_hess - # jacobian gives derivs of these outputs wrt (p, re). - # We only need gradient of logdetNegHess, which is the - # (1 + npar + 1, given in that order for sanity) row of jacobian - # Other rows of the jacobian are wasted, but when this function - # is meta-taped and optimized (part of CppAD), those calculations should be omitted - higher_order_deriv_res <- derivs(joint_logLik_with_grad_and_hess(p, reTransform), wrt = p_and_re_indices, order = c(0, 1), - model = model, updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - ans <- c(higher_order_deriv_res$value, higher_order_deriv_res$jacobian[npar + 1,]) - return(ans) - returnType(double(1)) - }, - update_logLik3_with_gr = function(p = double(1), reset = logical(0, default = FALSE)) { - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik(p) - } - reTransform <- max_inner_logLik_last_argmax - maxValue <- max_inner_logLik_last_value - ans <- derivs(joint_logLik_with_higher_derivs(p, reTransform), wrt = p_and_re_indices, order = 0, - model = model, updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - ind <- 1 - # all "logLik" here is joint log likelihood (i.e. for p and re) - gr_logLik_wrt_p <- numeric(value = ans$value[(ind):(ind + npar - 1)], length = npar) - ind <- ind + npar - logdetNegHess_value <- ans$value[ind] - ind <- ind + 1 - # chol_negHess <- matrix(ans$value[(ind):(ind + nre*nre - 1)], nrow = nre, ncol = nre) - negHessValue <- ans$value[ind] - saved_inner_negHess <<- matrix(negHessValue, ncol = 1, nrow = 1) - ind <- ind + 1 - hess_cross_terms <- numeric(value = ans$value[(ind):(ind + npar*1 - 1)], length = npar*1) - ind <- ind + npar*1 - gr_logdetNegHess_wrt_p_v <- numeric(value = ans$value[(ind):(ind + npar - 1)], length = npar) - ind <- ind + npar - gr_logdetNegHess_wrt_re_v <- ans$value[ind] - - if( nQuad_ == 1) { - ## Laplace Approximation - logLik_saved_value <<- maxValue - 0.5 * logdetNegHess_value + 0.5 * 1 * log(2*pi) - }else{ - ## AGHQ Approximation: - calcLogLik_AGHQuad(p) - } - logLik3_saved_value <<- logLik_saved_value - - if( nQuad_ == 1 ){ - ## Gradient of Laplace Approx - AGHQuad_saved_gr <<- gr_logLik_wrt_p - 0.5*(gr_logdetNegHess_wrt_p_v + hess_cross_terms * (gr_logdetNegHess_wrt_re_v / negHessValue)) - }else{ - ## Gradient of AGHQ Approx. - ## dre_hat/dp = d^2ll/drep / d^2ll/dre^2 - gr_rehatwrtp <<- hess_cross_terms/negHessValue - ## dsigma_hat/dp (needed at real scale) - sigma_hat <- 1/sqrt(negHessValue) - gr_sigmahatwrtp <<- -0.5*gr_logdetNegHess_wrt_p_v*sigma_hat - gr_sigmahatwrtre <<- -0.5*gr_logdetNegHess_wrt_re_v*sigma_hat - - grp_AGHQuad_sum <- gr_AGHQuad_nodes(p = p, method = 2) ## Use method 2 for these? - AGHQuad_saved_gr <<- grp_AGHQuad_sum - 0.5 * (gr_logdetNegHess_wrt_p_v + gr_logdetNegHess_wrt_re_v * gr_rehatwrtp) - } - logLik3_saved_gr <<- AGHQuad_saved_gr - - return(ans$value) - returnType(double(1)) - }, - logLik3_update = function(p = double(1)) { - if(any(p != logLik3_previous_p)) { - update_logLik3_with_gr(p) - logLik3_previous_p <<- p - } - }, - calcLogLik3 = function(p = double(1)) { - if(!one_time_fixes_done) one_time_fixes() - logLik3_update(p) - if(logLik3_saved_value > max_logLik) { - max_logLik <<- logLik3_saved_value - max_logLik_last_best_argmax <<- max_inner_logLik_last_argmax - } - return(logLik3_saved_value) - returnType(double()) - }, - gr_logLik3 = function(p = double(1)) { - if(!one_time_fixes_done) one_time_fixes() - logLik3_update(p) - return(logLik3_saved_gr) - returnType(double(1)) - }, - ## Laplace approximation 2: double taping with separate components - calcLogLik2 = function(p = double(1)){ - if(!one_time_fixes_done) one_time_fixes() - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik(p) - } - reTransform <- max_inner_logLik_last_argmax - maxValue <- max_inner_logLik_last_value - logdetNegHessian <- logdetNegHess(p, reTransform) - saved_inner_negHess <<- matrix(exp(logdetNegHessian), nrow = 1, ncol = 1) - - if(nQuad_ == 1){ - ## Laplace approximation. - logLik_saved_value <<- maxValue - 0.5 * logdetNegHessian + 0.5 * 1 * log(2*pi) - }else{ - ## Do Quadrature: - calcLogLik_AGHQuad(p) - } - - if(logLik_saved_value > max_logLik) { - max_logLik <<- logLik_saved_value - max_logLik_last_best_argmax <<- max_inner_logLik_last_argmax - } - return(logLik_saved_value) - returnType(double()) - }, - ## Laplace approximation 1: single taping with separate components - calcLogLik1 = function(p = double(1)){ - if(!one_time_fixes_done) one_time_fixes() - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik_internal(p) - } - reTransform <- max_inner_logLik_last_argmax - maxValue <- max_inner_logLik_last_value - logdetNegHessian <- logdetNegHess(p, reTransform) - saved_inner_negHess <<- matrix(exp(logdetNegHessian), nrow = 1, ncol = 1) - - if(nQuad_ == 1){ - ## Laplace approximation. - logLik_saved_value <<- maxValue - 0.5 * logdetNegHessian + 0.5 * 1 * log(2*pi) - }else{ - ## Do Quadrature: - calcLogLik_AGHQuad(p) - } - - if(logLik_saved_value > max_logLik) { - max_logLik <<- logLik_saved_value - max_logLik_last_best_argmax <<- max_inner_logLik_last_argmax - } - - return(logLik_saved_value) - returnType(double()) - }, - calcLogLik_AGHQuad = function(p = double(1)){ - ## AGHQ Approximation: 3 steps. build grid (happens once), transform z to re, save log density. - AGHQuad_grid$buildGrid() - nQ <- AGHQuad_grid$getGridSize() - AGHQuad_grid$transformGrid1D(negHess = saved_inner_negHess, inner_mode = max_inner_logLik_last_argmax) - modeIndex <- AGHQuad_grid$getModeIndex() ## if even, this is -1 - AGHQuad_grid$saveLogDens( -1, max_inner_logLik_last_value ) ## Cache this value regardless of even or odd. - for(i in 1:nQ) { - if(i != modeIndex) AGHQuad_grid$saveLogDens(i, joint_logLik(p = p, reTransform = AGHQuad_grid$getNodesTransformed(i) ) ) - } - - ## Given all the saved values, weights and log density, do quadrature sum. - logLik_saved_value <<- AGHQuad_grid$quadSum() - quadrature_previous_p <<- p ## Cache this to make sure you have it for - }, - ## Gradient of the Laplace approximation (version 2) w.r.t. parameters - gr_logLik2 = function(p = double(1)){ - if(!one_time_fixes_done) one_time_fixes() - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik(p) - } - reTransform <- max_inner_logLik_last_argmax - saved_inner_negHess <<- negHess(p, reTransform) - negHessian <- saved_inner_negHess[1, 1] - - # invNegHessian <- inverse(negHessian) - grlogdetNegHesswrtp <- gr_logdetNegHess_wrt_p(p, reTransform) - grlogdetNegHesswrtre <- gr_logdetNegHess_wrt_re(p, reTransform)[1] - hesslogLikwrtpre <- hess_joint_logLik_wrt_p_wrt_re(p, reTransform)[,1] - - if( nQuad_ == 1 ){ - ## Gradient of Laplace Approx - p1 <- gr_joint_logLik_wrt_p(p, reTransform) - AGHQuad_saved_gr <<- p1 - 0.5 * (grlogdetNegHesswrtp + hesslogLikwrtpre * (grlogdetNegHesswrtre / negHessian)) - }else{ - ## Gradient of AGHQ Approx. - ## dre_hat/dp = d^2ll/drep / d^2ll/dre^2 - gr_rehatwrtp <<- hesslogLikwrtpre/negHessian - ## dsigma_hat/dp (needed at real scale) - sigma_hat <- 1/sqrt(negHessian) - gr_sigmahatwrtp <<- -0.5*grlogdetNegHesswrtp*sigma_hat - gr_sigmahatwrtre <<- -0.5*grlogdetNegHesswrtre*sigma_hat - ## Sum gradient of each node. - grp_AGHQuad_sum <- gr_AGHQuad_nodes(p = p, method = 2) - AGHQuad_saved_gr <<- grp_AGHQuad_sum - 0.5 * (grlogdetNegHesswrtp + grlogdetNegHesswrtre * gr_rehatwrtp) - } - return(AGHQuad_saved_gr) - returnType(double(1)) - }, - ## Gradient of the Laplace approximation (version 1) w.r.t. parameters - gr_logLik1 = function(p = double(1)){ - if(!one_time_fixes_done) one_time_fixes() - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik_internal(p) - } - reTransform <- max_inner_logLik_last_argmax - saved_inner_negHess <<- negHess_internal(p, reTransform) ## repeated comp. pvdb. - negHessian <- saved_inner_negHess[1, 1] - - ## invNegHessian <- inverse(negHessian) - grlogdetNegHesswrtp <- gr_logdetNegHess_wrt_p_internal(p, reTransform) - grlogdetNegHesswrtre <- gr_logdetNegHess_wrt_re_internal(p, reTransform)[1] - hesslogLikwrtpre <- hess_joint_logLik_wrt_p_wrt_re_internal(p, reTransform)[,1] - - if( nQuad_ == 1 ){ - ## Gradient of Laplace Approx - p1 <- gr_joint_logLik_wrt_p_internal(p, reTransform) - AGHQuad_saved_gr <<- p1 - 0.5 * (grlogdetNegHesswrtp + hesslogLikwrtpre * (grlogdetNegHesswrtre / negHessian)) - }else{ - ## Gradient of AGHQ Approx. - ## dre_hat/dp = d^2ll/drep / d^2ll/dre^2 - gr_rehatwrtp <<- hesslogLikwrtpre/negHessian - ## dsigma_hat/dp (needed at real scale) - sigma_hat <- 1/sqrt(negHessian) - gr_sigmahatwrtp <<- -0.5*grlogdetNegHesswrtp*sigma_hat - gr_sigmahatwrtre <<- -0.5*grlogdetNegHesswrtre*sigma_hat - ## Sum gradient of each node. - grp_AGHQuad_sum <- gr_AGHQuad_nodes(p = p, method = 1) - AGHQuad_saved_gr <<- grp_AGHQuad_sum - 0.5 * (grlogdetNegHesswrtp + grlogdetNegHesswrtre * gr_rehatwrtp) - } - - return(AGHQuad_saved_gr) - returnType(double(1)) - }, - ## Partial gradient of AGHQ nodes w respect to p. - gr_AGHQuad_nodes = function(p = double(1), method = double()){ - - ## Need to have quadrature sum for gradient: - if(any(p != quadrature_previous_p)){ - calcLogLik_AGHQuad(p) - } - - ## Method 2 implies double taping. - modeIndex <- AGHQuad_grid$getModeIndex() - nQ <- AGHQuad_grid$getGridSize() - gr_margLogLik_wrt_p <- numeric(value = 0, length = dim(p)[1]) - wgts_lik <- numeric(value = 0, length = nQ) - for(i in 1:nQ) { - z_node_i <- AGHQuad_grid$getNodes(i)[1] - reTrans_i <- AGHQuad_grid$getNodesTransformed(i) - wgts_lik[i] <- exp(AGHQuad_grid$getLogDensity(i) - max_inner_logLik_last_value)*AGHQuad_grid$getWeights(i) - - ## At the mode (z = 0, don't have additional z*sigma_hat gr complication). - if( modeIndex == i ){ - if( method == 2 ) gr_jointlogLikwrtp <- gr_joint_logLik_wrt_p(p, reTrans_i) - else gr_jointlogLikwrtp <- gr_joint_logLik_wrt_p_internal(p, reTrans_i) - gr_margLogLik_wrt_p <- gr_margLogLik_wrt_p + wgts_lik[i]*gr_jointlogLikwrtp - }else{ - ## Chain Rule: dll/dre * ( dre_hat/dp + dsigma_hat/dp*z_i ) - ## dll/dp - if(method == 2){ - gr_logLikwrtrewrtre_i <- gr_joint_logLik_wrt_re(p, reTrans_i)[1] - gr_logLikewrtp_i <- gr_joint_logLik_wrt_p(p, reTrans_i) - }else{ - gr_logLikwrtrewrtre_i <- gr_joint_logLik_wrt_re_internal(p, reTrans_i)[1] - gr_logLikewrtp_i <- gr_joint_logLik_wrt_p_internal(p, reTrans_i) - } - gr_logLikwrtrewrtp_i <- gr_logLikwrtrewrtre_i * - ( (1 + gr_sigmahatwrtre*z_node_i) * gr_rehatwrtp + gr_sigmahatwrtp*z_node_i ) - ## The weighted gradient for the ith sum. - gr_margLogLik_wrt_p <- gr_margLogLik_wrt_p + wgts_lik[i]*( gr_logLikewrtp_i + gr_logLikwrtrewrtp_i ) - } - } - - return(gr_margLogLik_wrt_p / sum(wgts_lik[1:nQ])) - returnType(double(1)) - }, - get_inner_mode = function(atOuterMode = integer(0, default = 0)){ - returnType(double(1)) - if(atOuterMode) return(outer_mode_max_inner_logLik_last_argmax) - return(max_inner_logLik_last_argmax) - }, - get_inner_negHessian = function(atOuterMode = integer(0, default = 0)){ - returnType(double(2)) - if(atOuterMode) return(outer_mode_inner_negHess) - return(saved_inner_negHess) - }, - get_inner_negHessian_chol = function(atOuterMode = integer(0, default = 0)){ - returnType(double(2)) - if(atOuterMode) return(sqrt(outer_mode_inner_negHess)) - return(sqrt(saved_inner_negHess)) - }, - ## Update the maximum mode and neg hess based on the log likelihood passed via optim. - ## For efficient saving of values for calculating MLE values of random-effects and INLA simulation of them. - save_outer_logLik = function(logLikVal = double()){ - if(logLikVal >= max_outer_logLik) { - max_outer_logLik <<- logLikVal - outer_mode_inner_negHess <<- saved_inner_negHess - outer_mode_max_inner_logLik_last_argmax <<- max_inner_logLik_last_argmax - outer_param_max <<- max_inner_logLik_previous_p - } - }, - get_param_value = function(atOuterMode = integer(0, default = 0)){ - returnType(double(1)) - ## Ensures that the inner value will not match and cached values will not be used. - if(!cache_inner_max) return(numeric(value = Inf, length = npar)) - if(atOuterMode) return(outer_param_max) - return(max_inner_logLik_previous_p) - }, - ## Need to reset every time optim is called to recache. - reset_outer_logLik = function(){ - max_outer_logLik <<- -Inf - }, - ## Allow the user to explore using different sized quadrature grids. - ## set_nQuad = function(nQUpdate = integer()){ - ## AGHQuad_grid$setGridSize(nQUpdate = nQUpdate) - ## nQuad <<- nQUpdate - ## }, - ## set_transformation = function(transformation = character()){}, ## Not applicable to 1 Dimension. - ## set_warning = function(warn = logical()){ - ## warn_optim <<- warn - ## }, - ## Internal option to change initial values. - ## set_reInitMethod = function(method = character(), values = double(1)) { - ## if(method == "last") startID <<- 1 # last - ## else if(method == "last.best") startID <<- 2 # last.best - ## else if(method == "constant") startID <<- 3 # use fixed vector optimStart provided at setup time - ## else if(method == "random") startID <<- 4 - ## else if(method == "model") { - ## startID <<- 3 - ## constant_init_par <<- reTrans$transform(values(model, randomEffectsNodes)) - ## } else { - ## stop("invalid method for RE initialization") - ## } - ## if(startID <= 3) { - ## constant_init_par <<- values - ## } - ## }, - set_randomeffect_values = function(p = double(1)){ - foundIt <- FALSE - ## Last value called: - if(all(p == max_inner_logLik_previous_p)) { - re <- reTrans$inverseTransform(max_inner_logLik_last_argmax) - foundIt <- TRUE - } - ## Best value called: - if(all(p == outer_param_max)) { - re <- reTrans$inverseTransform(outer_mode_max_inner_logLik_last_argmax) - foundIt <- TRUE - } - if(foundIt){ - values(model, paramNodes) <<- p - model$calculate(paramDeps) - }else{ - # It would be nice to emit a message here, but different optimizers (e.g. BFGS vs nlminb) - # behave differently as to whether the previous (last) parameters were always the MLE. - # print(" [Warning] Have not cached the inner optimization. Running optimization now.") - update_max_inner_logLik(p) - re <- reTrans$inverseTransform(max_inner_logLik_last_argmax) - } - ## Ensure the model is up to date for all nodes. - values(model, randomEffectsNodes) <<- re - model$calculate(innerCalcNodes) - } - ## set_inner_cache = function(cache = logical(0, default = TRUE)){ - ## cache_inner_max <<- cache - ## } - ), - buildDerivs = list(inner_logLik = list(), - joint_logLik = list(), - gr_joint_logLik_wrt_re = list(), - negHess = list(), - logdetNegHess = list(), - gr_inner_logLik_internal = list(), - he_inner_logLik_internal = list(), - he_inner_logLik_internal_as_vec = list(), - gr_joint_logLik_wrt_p_internal = list(), - gr_joint_logLik_wrt_re_internal = list(), - hess_joint_logLik_wrt_p_wrt_re_internal = list(), - negHess_internal = list(), - gr_logdetNegHess_wrt_p_internal = list(), - gr_logdetNegHess_wrt_re_internal = list(), - joint_logLik_with_grad_and_hess = list(ignore = c("i","j")), - joint_logLik_with_higher_derivs = list()) -) ## End of buildOneAGHQuad1D - - -## A single Laplace approximation for models with more than one scalar random effect node -buildOneLaplace <- function(model, paramNodes, randomEffectsNodes, calcNodes, control = list()) { - buildOneAGHQuad(model, nQuad = 1, paramNodes, randomEffectsNodes, calcNodes, control) -} - -buildOneAGHQuad <- nimbleFunction( - contains = AGHQuad_BASE, - setup = function(model, nQuad = 1, paramNodes, randomEffectsNodes, calcNodes, control = list()) { - ## Check and add necessary (upstream) deterministic nodes into calcNodes - ## This ensures that deterministic nodes between paramNodes and calcNodes are used. - ## optimControl_ <- extractControlElement(control, 'optimControl', nimOptimDefaultControl()) - ## optimMethod_ <- extractControlElement(control, 'optimMethod', 'BFGS') - ## optimStart_ <- extractControlElement(control, 'optimStart', 'constant') - ## optimStartValues_ <- extractControlElement(control, 'optimStartValues', 0) - nQuad_ <- nQuad - S <- setup_OneAGHQuad(model, paramNodes, randomEffectsNodes, calcNodes, control) - optimControl_ <- S$optimControl_ - optimMethod_ <- S$optimMethod_ - optimStart_ <- S$optimStart_ - optimStartValues_ <- S$optimStartValues_ - nre <- S$nre - paramDeps <- S$paramDeps - innerCalcNodes <- S$innerCalcNodes - calcNodes <- S$calcNodes - wrtNodes <- S$wrtNodes - reTrans <- S$reTrans - npar <- S$npar - p_indices <- S$p_indices - - ## paramDeps <- model$getDependencies(paramNodes, determOnly = TRUE, self=FALSE) - ## if(length(paramDeps) > 0) { - ## keep_paramDeps <- logical(length(paramDeps)) - ## for(i in seq_along(paramDeps)) { - ## if(any(paramDeps[i] == calcNodes)) keep_paramDeps[i] <- FALSE - ## else { - ## nextDeps <- model$getDependencies(paramDeps[i]) - ## keep_paramDeps[i] <- any(nextDeps %in% calcNodes) - ## } - ## } - ## paramDeps <- paramDeps[keep_paramDeps] - ## } - ## innerCalcNodes <- calcNodes - ## calcNodes <- model$expandNodeNames(c(paramDeps, calcNodes), sort = TRUE) - ## wrtNodes <- c(paramNodes, randomEffectsNodes) - ## ## Indices of randomEffectsNodes and paramNodes inside wrtNodes - ## reTrans <- parameterTransform(model, randomEffectsNodes) - ## npar <- length(model$expandNodeNames(paramNodes, returnScalarComponents = TRUE)) - ## nre <- length(model$expandNodeNames(randomEffectsNodes, returnScalarComponents = TRUE)) - nreTrans <- reTrans$getTransformedLength() - if(nreTrans > 1) reTrans_indices <- as.numeric((npar+1):(npar+nreTrans)) - else reTrans_indices <- as.numeric(c(npar+1, -1)) - ## if(npar > 1) p_indices <- as.numeric(1:npar) - ## else p_indices <- as.numeric(c(1, -1)) - ## ## Indices of randomEffectsNodes inside randomEffectsNodes for use in getting the derivative of - ## ## the inner log-likelihood (paramNodes fixed) w.r.t. randomEffectsNodes. - if(nreTrans > 1) reTrans_indices_inner <- as.numeric(1:nreTrans) - else reTrans_indices_inner <- as.numeric(c(1, -1)) - p_and_reTrans_indices <- as.numeric(1:(npar + nreTrans)) - - ## Set up start values for the inner optimization of Laplace approximation - ## Set up start values for the inner optimization of Laplace approximation - if(!is.character(optimStart_) | length(optimStart_) != 1) stop("problem with optimStart ", optimStart_) - startID <- switch(optimStart_, last=1, last.best=2, constant=3, random=4, model=5) - if(startID==5) { - constant_init_par <- reTrans$transform(c(values(model, randomEffectsNodes))) - } else { - if(length(optimStartValues_) == 1) - constant_init_par <- rep(optimStartValues_, nreTrans) - else - constant_init_par <- optimStartValues_ - } - if(length(constant_init_par) != nreTrans) - stop("buildOneAGHQuad: Found ", length(constant_init_par), " initial values for inner optimization in Laplace or AGHQuad when expecting ", nreTrans) - if(length(constant_init_par) == 1) constant_init_par <- c(constant_init_par, -1) - - ## Update and constant nodes info for obtaining derivatives using AD - inner_derivsInfo <- makeModelDerivsInfo(model = model, wrtNodes = randomEffectsNodes, calcNodes = innerCalcNodes) - inner_updateNodes <- inner_derivsInfo$updateNodes - inner_constantNodes <- inner_derivsInfo$constantNodes - joint_derivsInfo <- makeModelDerivsInfo(model = model, wrtNodes = wrtNodes, calcNodes = calcNodes) - joint_updateNodes <- joint_derivsInfo$updateNodes - joint_constantNodes <- joint_derivsInfo$constantNodes - - ## The following are used for caching values and gradient in the Laplace3 system - logLik3_saved_value <- -Inf #numeric(1) - logLik3_saved_gr <- if(npar > 1) numeric(npar) else as.numeric(c(0, -1)) - logLik3_previous_p <- if(npar > 1) rep(Inf, npar) else as.numeric(c(Inf, -1)) - - max_inner_logLik_last_argmax <- constant_init_par #if(nreTrans > 1) rep(Inf, nreTrans) else as.numeric(c(Inf, -1)) - max_inner_logLik_last_value <- -Inf #numeric(1) - max_inner_logLik_previous_p <- if(npar > 1) rep(Inf, npar) else as.numeric(c(Inf, -1)) - cache_inner_max <- TRUE - - ## Record the maximum Laplace loglikelihood value for obtaining inner optimization start values - max_logLik <- -Inf - max_logLik_last_best_argmax <- constant_init_par #if(nreTrans > 1) rep(Inf, nreTrans) else as.numeric(c(0, -1)) - - ## The following is used to ensure the one_time_fixes are run when needed. - one_time_fixes_done <- FALSE - update_once <- TRUE - gr_inner_update_once <- TRUE - gr_inner_logLik_force_update <- TRUE - gr_inner_logLik_first <- TRUE - negHess_inner_update_once <- TRUE - negHess_inner_logLik_force_update <- TRUE - negHess_inner_logLik_first <- TRUE - - ## Cache values for access in outer function: - saved_inner_negHess <- matrix(0, nrow = nre, ncol = nre) - saved_inner_negHess_chol <- matrix(0, nrow = nre, ncol = nre) - - ## Cache log like saved value to keep track of 3 methods. - logLik_saved_value <- -Inf - - max_outer_logLik <- -Inf - outer_mode_inner_negHess <- matrix(0, nrow = nre, ncol = nre) - outer_mode_inner_negHess_chol <- matrix(0, nrow = nre, ncol = nre) - outer_mode_max_inner_logLik_last_argmax <- if(nreTrans > 1) numeric(nreTrans) else as.numeric(c(0, -1)) - outer_param_max <- if(npar > 1) rep(Inf, npar) else as.numeric(c(Inf, -1)) - - ## Build AGHQ grid: - AGHQuad_grid <- buildAGHQGrid(d = nre, nQuad = nQuad_) - transMethod <- extractControlElement(control, "gridType", "cholesky") - - converged <- 0 - warn_optim <- extractControlElement(control, 'optimWarning', FALSE) ## Warn about inner optimization issues - }, - run = function(){}, - methods = list( - fix_one_vec = function(x = double(1)) { - if(length(x) == 2) { - if(x[2] == -1) { - ans <- numeric(length = 1, value = x[1]) - return(ans) - } - } - return(x) - returnType(double(1)) - }, - one_time_fixes = function() { - if(one_time_fixes_done) return() - if(nre == 1) { - reTrans_indices <<- fix_one_vec(reTrans_indices) - reTrans_indices_inner <<- fix_one_vec(reTrans_indices_inner) - max_inner_logLik_last_argmax <<- fix_one_vec(max_inner_logLik_last_argmax) - max_logLik_last_best_argmax <<- fix_one_vec(max_logLik_last_best_argmax) - constant_init_par <<- fix_one_vec(max_logLik_last_best_argmax) - outer_mode_max_inner_logLik_last_argmax <<- fix_one_vec(outer_mode_max_inner_logLik_last_argmax) - } - if(npar == 1) { - p_indices <<- fix_one_vec(p_indices) - logLik3_saved_gr <<- fix_one_vec(logLik3_saved_gr) - logLik3_previous_p <<- fix_one_vec(logLik3_previous_p) - max_inner_logLik_previous_p <<- fix_one_vec(max_inner_logLik_previous_p) - outer_param_max <<- fix_one_vec(outer_param_max) - } - reInit <- values(model, randomEffectsNodes) - set_reInit(reInit) - one_time_fixes_done <<- TRUE - }, - updateSettings = function(optimMethod = character(0, default="NULL"), - optimStart = character(0, default="NULL"), - optimStartValues = double(1, default=Inf), - optimWarning = integer(0, default=-1), - useInnerCache = integer(0, default=-1), - nQuad = integer(0, default=-1), - gridType = character(0, default="NULL"), - optimControl = optimControlNimbleList(default=nimOptimDefaultControl()), - replace_optimControl = logical(0, default=FALSE)) { - # Checking should have been done already. Or, if this is being called directly, - # it will be for development or advanced uses and we can skip checking. - if(optimMethod != "NULL") optimMethod_ <<- optimMethod - if(optimStart != "NULL") { - if(optimStart == "last") startID <<- 1 # last - else if(optimStart == "last.best") startID <<- 2 # last.best - else if(optimStart == "constant") startID <<- 3 # use fixed vector optimStart provided at setup time - else if(optimStart == "random") startID <<- 4 - else if(optimStart == "model") { - startID <<- 3 - constant_init_par <<- reTrans$transform(values(model, randomEffectsNodes)) - } - } - if((length(optimStartValues) != 1) | (optimStartValues[1] != Inf) ) { - if((length(optimStartValues) == 1) & (optimStartValues[1] == -Inf) ) { # numeric code for "model" setting - constant_init_par <<- reTrans$transform(values(model, randomEffectsNodes)) - } else { - if(startID <= 3) { - constant_init_par <<- optimStartValues - if(length(constant_init_par) == 1) - if(nreTrans > 1) - constant_init_par <<- rep(constant_init_par, nreTrans) - } - } - } - if((!one_time_fixes_done) & (length(constant_init_par) == 1)){ - constant_init_par <<- c(constant_init_par, -1) - } - if(optimWarning != -1) { - warn_optim <<- optimWarning != 0 - } - if(useInnerCache != -1) { - cache_inner_max <<- useInnerCache != 0 - } - if(nQuad != -1) { - AGHQuad_grid$setGridSize(nQUpdate = nQuad) - nQuad_ <<- nQuad - } - if(gridType != "NULL") { - transMethod <<- gridType - } - if(replace_optimControl) { - optimControl$fnscale <- -1 - optimControl_ <<- optimControl - } - }, - set_reInit = function(re = double(1)) { - reInitTrans <- reTrans$transform(re) - max_inner_logLik_last_argmax <<- reInitTrans - }, - get_reInitTrans = function() { - if(startID == 1) ans <- max_inner_logLik_last_argmax ## last - else if(startID == 2) ans <- max_logLik_last_best_argmax ## last best - else if(startID == 3) ans <- constant_init_par ## constant - else if(startID == 4){ ## random - model$simulate(randomEffectsNodes) - ans <- reTrans$transform(values(model, randomEffectsNodes)) - } - return(ans) - returnType(double(1)) - }, - ## set_gr_inner_update = function(update = logical(0, default = TRUE)) { - ## gr_inner_update_once <<- update - ## }, - ## set_negHess_inner_update = function(update = logical(0, default = TRUE)) { - ## negHess_inner_update_once <<- update - ## }, - set_params = function(p = double(1)) { - values(model, paramNodes) <<- p - model$calculate(paramDeps) - gr_inner_update_once <<- TRUE - negHess_inner_update_once <<- TRUE - }, - ## Joint log-likelihood with values of parameters fixed: used only for inner optimization - inner_logLik = function(reTransform = double(1)) { - re <- reTrans$inverseTransform(reTransform) - values(model, randomEffectsNodes) <<- re - ans <- model$calculate(innerCalcNodes) + reTrans$logDetJacobian(reTransform) - return(ans) - returnType(double()) - }, - # Gradient of the joint log-likelihood (p fixed) w.r.t. transformed random effects: used only for inner optimization - gr_inner_logLik_internal = function(reTransform = double(1)) { - ans <- derivs(inner_logLik(reTransform), wrt = reTrans_indices_inner, order = 1, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping for efficiency - gr_inner_logLik = function(reTransform = double(1)) { - ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = reTrans_indices_inner, order = 0, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes, - do_update = gr_inner_logLik_force_update | gr_inner_update_once) - gr_inner_update_once <<- FALSE - return(ans$value) - returnType(double(1)) - }, - # Hessian of the joint log-likelihood (p fixed) w.r.t. transformed random effects: used only for inner optimization - # This is being added to experiment with Newton's methods for inner optimization. If this approach provides good - # numerical behavior, we can revisit the efficiency of how to get derivatives, such as getting gradient and hessian together - # or whether it is better to keep them separate, as both may not always be jointly requested. - he_inner_logLik_internal = function(reTransform = double(1)) { - ans <- derivs(inner_logLik(reTransform), wrt = reTrans_indices_inner, order = 2, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) - res <- ans$hessian[,,1] - return(res) - returnType(double(2)) - }, - he_inner_logLik_internal_as_vec = function(reTransform = double(1)) { - ans <- he_inner_logLik_internal(reTransform) - res <- nimNumeric(value = ans, length = nreTrans * nreTrans) - return(res) - returnType(double(1)) - }, - # Double taping for possible efficiency - he_inner_logLik = function(reTransform = double(1)) { - ans <- derivs(he_inner_logLik_internal_as_vec(reTransform), wrt = reTrans_indices_inner, order = 0, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) - res <- matrix(value = ans$value, nrow = length(reTransform), ncol = length(reTransform)) - return(res) - returnType(double(2)) - }, - negHess_inner_logLik_internal = function(reTransform = double(1)) { - ans <- derivs(gr_inner_logLik_internal(reTransform), wrt = reTrans_indices_inner, order = 1, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes) - return(-ans$jacobian) - returnType(double(2)) - }, - # We also tried double-taping straight to second order. That was a bit slower. - negHess_inner_logLik = function(reTransform = double(1)) { - ans <- derivs(negHess_inner_logLik_internal(reTransform), wrt = reTrans_indices_inner, order = 0, model = model, - updateNodes = inner_updateNodes, constantNodes = inner_constantNodes, - do_update = negHess_inner_logLik_force_update | negHess_inner_update_once) - negHess_inner_update_once <<- FALSE - neghess <- matrix(ans$value, nrow = nreTrans) - return(neghess) - returnType(double(2)) - }, - record_negHess_inner_logLik = function(reTransform = double(1)) { - negHess_inner_logLik_force_update <<- TRUE - negHess_inner_logLik(reTransform) # record - negHess_inner_logLik_first <<- FALSE - negHess_inner_logLik_force_update <<- FALSE - }, - ## Solve the inner optimization for Laplace approximation - max_inner_logLik = function(p = double(1)) { - set_params(p) - reInitTrans <- get_reInitTrans() - fn_init <- inner_logLik(reInitTrans) - if((fn_init == Inf) | (fn_init == -Inf) | (is.nan(fn_init)) | (is.na(fn_init))) { - optRes <- optimResultNimbleList$new() - optRes$par <- reInitTrans - optRes$value <- -Inf - optRes$convergence <- -1 - return(optRes) - } - if(gr_inner_logLik_first) { - gr_inner_logLik_force_update <<- TRUE - gr_inner_logLik(reInitTrans) - gr_inner_logLik_first <<- FALSE - gr_inner_logLik_force_update <<- FALSE - } - optRes <- optim(reInitTrans, inner_logLik, gr = gr_inner_logLik, he = he_inner_logLik, method = optimMethod_, control = optimControl_) - if(optRes$convergence != 0 & warn_optim){ - print(" [Warning] `optim` did not converge for the inner optimization of AGHQ or Laplace approximation.") - } - converged <<- optRes$convergence - return(optRes) - returnType(optimResultNimbleList()) - }, - max_inner_logLik_internal = function(p = double(1)) { - set_params(p) - reInitTrans <- get_reInitTrans() - fn_init <- inner_logLik(reInitTrans) - if((fn_init == Inf) | (fn_init == -Inf) | (is.nan(fn_init)) | (is.na(fn_init))) { - optRes <- optimResultNimbleList$new() - optRes$par <- reInitTrans - optRes$value <- -Inf - optRes$convergence <- -1 - return(optRes) - } - optRes <- optim(reInitTrans, inner_logLik, gr = gr_inner_logLik_internal, he = he_inner_logLik_internal, method = optimMethod_, control = optimControl_) - if(optRes$convergence != 0 & warn_optim){ - print(" [Warning] `optim` did not converge for the inner optimization of AGHQ or Laplace approximation.") - } - converged <<- optRes$convergence - return(optRes) - returnType(optimResultNimbleList()) - }, - ## Outer check on innner convergence. - check_convergence = function(){ - returnType(double()) - return(converged) - }, - ## These two update methods for max_inner_logLik use the same member data caches - update_max_inner_logLik = function(p = double(1)) { - optRes <- max_inner_logLik(p) - max_inner_logLik_last_argmax <<- optRes$par - max_inner_logLik_last_value <<- optRes$value - max_inner_logLik_previous_p <<- p - return(max_inner_logLik_last_argmax) - returnType(double(1)) - }, - update_max_inner_logLik_internal = function(p = double(1)) { - optRes <- max_inner_logLik_internal(p) - max_inner_logLik_last_argmax <<- optRes$par - max_inner_logLik_last_value <<- optRes$value - max_inner_logLik_previous_p <<- p - return(max_inner_logLik_last_argmax) - returnType(double(1)) - }, - ## Joint log-likelihood in terms of parameters and transformed random effects - joint_logLik = function(p = double(1), reTransform = double(1)) { - re <- reTrans$inverseTransform(reTransform) - values(model, paramNodes) <<- p - values(model, randomEffectsNodes) <<- re - ans <- model$calculate(calcNodes) + reTrans$logDetJacobian(reTransform) - return(ans) - returnType(double()) - }, - ## 1st order partial derivative w.r.t. parameters - gr_joint_logLik_wrt_p_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(joint_logLik(p, reTransform), wrt = p_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping - gr_joint_logLik_wrt_p = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_joint_logLik_wrt_p_internal(p, reTransform), wrt = p_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_updateNodes) - return(ans$value) - returnType(double(1)) - }, - ## 1st order partial derivative w.r.t. transformed random effects - gr_joint_logLik_wrt_re_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(joint_logLik(p, reTransform), wrt = reTrans_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping - gr_joint_logLik_wrt_re = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_joint_logLik_wrt_re_internal(p, reTransform), wrt = reTrans_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$value) - returnType(double(1)) - }, - ## 2nd order mixed partial derivative w.r.t. parameters and transformed random effects - hess_joint_logLik_wrt_p_wrt_re_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_joint_logLik_wrt_p_internal(p, reTransform), wrt = reTrans_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian) - returnType(double(2)) - }, - ## Double taping - hess_joint_logLik_wrt_p_wrt_re = function(p = double(1), reTransform = double(1)) { - ans <- derivs(hess_joint_logLik_wrt_p_wrt_re_internal(p, reTransform), wrt = reTrans_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - derivmat <- matrix(value = ans$value, nrow = npar) - return(derivmat) - returnType(double(2)) - }, - ## Negative Hessian: 2nd order unmixed partial derivative w.r.t. transformed random effects - negHess_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_joint_logLik_wrt_re_internal(p, reTransform), wrt = reTrans_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(-ans$jacobian) - returnType(double(2)) - }, - ## Double taping - negHess = function(p = double(1), reTransform = double(1)) { - ans <- derivs(negHess_internal(p, reTransform), wrt = reTrans_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes, do_update = update_once) - # update_once <<- FALSE - neghess <- matrix(ans$value, nrow = nreTrans) - return(neghess) - returnType(double(2)) - }, - reset_update = function(update = logical(0, default = TRUE)) { - update_once <<- update - }, - ## Logdet negative Hessian - cholNegHessian = function(p = double(1), reTransform = double(1)) { - negHessian <- negHess(p, reTransform) - ans <- chol(negHessian) - return(ans) - returnType(double(2)) - }, - ## Logdet negative Hessian - logdetNegHess = function(p = double(1), reTransform = double(1)) { - ans <- 2 * sum(log(diag(cholNegHessian(p, reTransform)))) - return(ans) - returnType(double()) - }, - ## Gradient of logdet (negative) Hessian w.r.t. parameters - gr_logdetNegHess_wrt_p_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(logdetNegHess(p, reTransform), wrt = p_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping - gr_logdetNegHess_wrt_p = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_logdetNegHess_wrt_p_internal(p, reTransform), wrt = p_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$value) - returnType(double(1)) - }, - ## Gradient of logdet (negative) Hessian w.r.t. transformed random effects - gr_logdetNegHess_wrt_re_internal = function(p = double(1), reTransform = double(1)) { - ans <- derivs(logdetNegHess(p, reTransform), wrt = reTrans_indices, order = 1, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping - gr_logdetNegHess_wrt_re = function(p = double(1), reTransform = double(1)) { - ans <- derivs(gr_logdetNegHess_wrt_re_internal(p, reTransform), wrt = reTrans_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - return(ans$value) - returnType(double(1)) - }, - ## Put everything (gradient and Hessian) together for Laplace3 - joint_logLik_with_grad_and_hess = function(p = double(1), reTransform = double(1)) { - # This returns a vector of concatenated key quantities (see comment below for details) - # reTransform is the arg max of the inner logLik - # We could consider returning only upper triangular elements of chol(-Hessian), - # and re-constituting as a matrix when needed. - joint_logLik_res <- derivs(joint_logLik(p, reTransform), wrt = p_and_reTrans_indices, order = c(1, 2), - model = model, updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - negHessUpper <- matrix(init = FALSE, nrow = nre, ncol = nreTrans) - for(i in 1:nreTrans){ - for(j in i:nreTrans){ - negHessUpper[i,j] <- -joint_logLik_res$hessian[npar + i, npar + j, 1] - } - } - # for(i in 1:nreTrans) negHessUpper[i,i:nreTrans] <- -joint_logLik_res$hessian[npar + i, npar + i:nreTrans, 1] - cholNegHess <- chol(negHessUpper) - logdetNegHessAns <- 2 * sum(log(diag(cholNegHess))) - hess_wrt_p_wrt_re <- matrix(init = FALSE, nrow = npar, ncol = nre) - for(i in 1:npar){ - for(j in 1:nreTrans){ - hess_wrt_p_wrt_re[i, j] <- joint_logLik_res$hessian[i, npar + j, 1] - } - } - # hess_wrt_p_wrt_re <- joint_logLik_res$hessian[1:npar, npar + (1:nreTrans), 1] # Wasn't working. - - ans <- c(joint_logLik_res$jacobian[1, 1:npar], logdetNegHessAns, cholNegHess, hess_wrt_p_wrt_re) - ## Indices to components of this are: - ## gr_joint_logLik_wrt_p = (1:npar) [size = npar] - ## logdetNegHess = npar + 1 [1] - ## cholNegHess = npar + 1 + (1 : nreTrans * nreTrans) [nreTrans x nreTrans] - ## hess_wrt_p_wrt_re = npar + 1 + nre*nre + (1:npar*nreTrans) [npar x nreTrans] - return(ans) - returnType(double(1)) - # return a concatenated vector - }, - joint_logLik_with_higher_derivs = function(p = double(1), reTransform = double(1)) { - higher_order_deriv_res <- derivs(joint_logLik_with_grad_and_hess(p, reTransform), wrt = p_and_reTrans_indices, - order = c(0, 1), model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - # value gives results from joint_logLik_with_grad_and_hess - # jacobian gives derivs of these outputs wrt (p, re). - # We only need gradient of logdetNegHess, which is the - # (1 + npar + 1, given in that order for sanity) row of jacobian - # Other rows of the jacobian are wasted, but when this function - # is meta-taped and optimized (part of CppAD), those calculations should be omitted - ans <- c(higher_order_deriv_res$value, higher_order_deriv_res$jacobian[npar + 1,]) - return(ans) - returnType(double(1)) - }, - update_logLik3_with_gr = function(p = double(1), reset = logical(0, default = FALSE)) { - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik(p) - } - reTransform <- max_inner_logLik_last_argmax - maxValue <- max_inner_logLik_last_value - ans <- derivs(joint_logLik_with_higher_derivs(p, reTransform), wrt = p_and_reTrans_indices, order = 0, model = model, - updateNodes = joint_updateNodes, constantNodes = joint_constantNodes) - ind <- 1 - # all "logLik" here is joint log likelihood (i.e. for p and re) - gr_logLik_wrt_p <- ans$value[(ind):(ind + npar - 1)] - ind <- ind + npar - logdetNegHess_value <- ans$value[ind] - ind <- ind + 1 - chol_negHess <- matrix(ans$value[(ind):(ind + nreTrans*nreTrans - 1)], nrow = nreTrans, ncol = nreTrans) - saved_inner_negHess_chol <<- chol_negHess ## Method 3 doesn't cache neg Hessian.*** Should we calc here? - ind <- ind + nreTrans*nreTrans - hess_cross_terms <- matrix(ans$value[(ind):(ind + npar*nreTrans - 1)], nrow = npar, ncol = nreTrans) - ind <- ind + npar*nreTrans - gr_logdetNegHess_wrt_p_v <- ans$value[(ind):(ind + npar - 1)] - ind <- ind + npar - gr_logdetNegHess_wrt_re_v <- ans$value[(ind):(ind + nreTrans - 1)] - - if( nQuad_ == 1) { - ## Laplace Approximation - logLik_saved_value <<- maxValue - 0.5 * logdetNegHess_value + 0.5 * nreTrans * log(2*pi) - }else{ - ## AGHQ Approximation: - calcLogLik_AGHQuad(p) - } - logLik3_saved_value <<- logLik_saved_value - - # We need A^T inverse(negHess) B - # where A = gr_logdetNegHess_wrt_re_v (a vector treated by default as a one-column matrix) - # and B = t(hess_cross_terms) - # We avoid forming the matrix inverse because we have negHess = U^T U, where U = chol(negHess) - # so inverse(negNess) = inverse(U) inverse(U^T), and inverse(U^T) = inverse(U)^T - # Since U it upper triangular, it is typically more efficient to do forwardsolve and/or backsolve - # than to actually form inverse(U) or inverse(negHess) - # We have (A^T inverse(U) ) ( inverse(U^T) B) = v^T w - # v^T = A^T inverse(U), so v = inverse(U^T) A = fowardsolve(U^T, gr_logdetNegHess_wrt_re_v ) - # w = inverse(U^T) B, so w = forwardsolve(U^T, t(hess_cross_terms)) - # - # We could return the chol and hess_cross_terms from the derivs steps - # in transposed form since that's how we need them here. - v <- forwardsolve(t(chol_negHess), gr_logdetNegHess_wrt_re_v) - w <- forwardsolve(t(chol_negHess), t(hess_cross_terms)) - gr_logLik_v <- gr_logLik_wrt_p - 0.5*(gr_logdetNegHess_wrt_p_v + v %*% w ) - # print( gr_logLik_v ) - logLik3_saved_gr <<- numeric(gr_logLik_v, length = npar) - return(ans$value) - returnType(double(1)) - }, - logLik3_update = function(p = double(1)) { - if(any(p != logLik3_previous_p)) { - update_logLik3_with_gr(p) - logLik3_previous_p <<- p - } - }, - calcLogLik3 = function(p = double(1)) { - if(!one_time_fixes_done) one_time_fixes() - logLik3_update(p) - ans <- logLik3_saved_value - if(ans > max_logLik) { - max_logLik <<- ans - max_logLik_last_best_argmax <<- max_inner_logLik_last_argmax - } - return(ans) - returnType(double()) - }, - gr_logLik3 = function(p = double(1)) { - if(!one_time_fixes_done) one_time_fixes() - logLik3_update(p) - return(logLik3_saved_gr) - returnType(double(1)) - }, - ## Laplace approximation 2: double taping with separate components - calcLogLik2 = function(p = double(1)){ - if(!one_time_fixes_done) one_time_fixes() - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik(p) - } - reTransform <- max_inner_logLik_last_argmax - maxValue <- max_inner_logLik_last_value - if(maxValue == -Inf) return(-Inf) # This would mean inner optimization failed - saved_inner_negHess_chol <<- cholNegHessian(p, reTransform) - logdetNegHessian <- 2 * sum(log(diag(saved_inner_negHess_chol))) - - if(nQuad_ == 1){ - logLik_saved_value <<- maxValue - 0.5 * logdetNegHessian + 0.5 * nreTrans * log(2*pi) - }else{ - calcLogLik_AGHQuad(p) - } - if(logLik_saved_value > max_logLik) { - max_logLik <<- logLik_saved_value - max_logLik_last_best_argmax <<- max_inner_logLik_last_argmax - } - - return(logLik_saved_value) - returnType(double()) - }, - ## Laplace approximation 1: single taping with separate components - calcLogLik1 = function(p = double(1)){ - if(!one_time_fixes_done) one_time_fixes() - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik_internal(p) - } - reTransform <- max_inner_logLik_last_argmax - maxValue <- max_inner_logLik_last_value - if(maxValue == -Inf) return(-Inf) # This would mean inner optimization failed - saved_inner_negHess_chol <<- cholNegHessian(p, reTransform) - logdetNegHessian <- 2 * sum(log(diag(saved_inner_negHess_chol))) - - if(nQuad_ == 1){ - ## Laplace Approx - logLik_saved_value <<- maxValue - 0.5 * logdetNegHessian + 0.5 * nreTrans * log(2*pi) - }else{ - ## AGHQ Approx - calcLogLik_AGHQuad(p) - } - if(logLik_saved_value > max_logLik) { - max_logLik <<- logLik_saved_value - max_logLik_last_best_argmax <<- max_inner_logLik_last_argmax - } - return(logLik_saved_value) - returnType(double()) - }, - calcLogLik_AGHQuad = function(p = double(1)){ - ## AGHQ Approximation: 3 steps. build grid (happens once), transform z to re, save log density. - AGHQuad_grid$buildGrid() - AGHQuad_grid$transformGrid(cholNegHess = saved_inner_negHess_chol, - inner_mode = max_inner_logLik_last_argmax, method = transMethod) - modeIndex <- AGHQuad_grid$getModeIndex() - nQ <- AGHQuad_grid$getGridSize() - AGHQuad_grid$saveLogDens(-1, max_inner_logLik_last_value ) - for(i in 1:nQ) { - if(i != modeIndex) AGHQuad_grid$saveLogDens(i, joint_logLik(p = p, reTransform = AGHQuad_grid$getNodesTransformed(i) ) ) - } - ## Given all the saved values, weights and log density, do quadrature sum. - logLik_saved_value <<- AGHQuad_grid$quadSum() - }, - ## Gradient of the Laplace approximation 2 w.r.t. parameters - gr_logLik2 = function(p = double(1)){ - if(!one_time_fixes_done) one_time_fixes() - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik(p) - } - reTransform <- max_inner_logLik_last_argmax - negHessian <- negHess(p, reTransform) - invNegHessian <- inverse(negHessian) - grlogdetNegHesswrtp <- gr_logdetNegHess_wrt_p(p, reTransform) - grlogdetNegHesswrtre <- gr_logdetNegHess_wrt_re(p, reTransform) - hesslogLikwrtpre <- hess_joint_logLik_wrt_p_wrt_re(p, reTransform) - ans <- gr_joint_logLik_wrt_p(p, reTransform) - - 0.5 * (grlogdetNegHesswrtp + (grlogdetNegHesswrtre %*% invNegHessian) %*% t(hesslogLikwrtpre)) - return(ans[1,]) - returnType(double(1)) - }, - ## Gradient of the Laplace approximation 1 w.r.t. parameters - gr_logLik1 = function(p = double(1)){ - if(!one_time_fixes_done) one_time_fixes() - if(any(p != max_inner_logLik_previous_p) | !cache_inner_max) { - update_max_inner_logLik_internal(p) - } - reTransform <- max_inner_logLik_last_argmax - negHessian <- negHess_internal(p, reTransform) - invNegHessian <- inverse(negHessian) - grlogdetNegHesswrtp <- gr_logdetNegHess_wrt_p_internal(p, reTransform) - grlogdetNegHesswrtre <- gr_logdetNegHess_wrt_re_internal(p, reTransform) - hesslogLikwrtpre <- hess_joint_logLik_wrt_p_wrt_re_internal(p, reTransform) - ans <- gr_joint_logLik_wrt_p_internal(p, reTransform) - - 0.5 * (grlogdetNegHesswrtp + (grlogdetNegHesswrtre %*% invNegHessian) %*% t(hesslogLikwrtpre)) - return(ans[1,]) - returnType(double(1)) - }, - get_inner_mode = function(atOuterMode = integer(0, default = 0)){ - returnType(double(1)) - if(atOuterMode) return(outer_mode_max_inner_logLik_last_argmax) - return(max_inner_logLik_last_argmax) - }, - get_inner_negHessian = function(atOuterMode = integer(0, default = 0)){ - returnType(double(2)) - if(atOuterMode) return(outer_mode_inner_negHess) - return(saved_inner_negHess) - }, - get_inner_negHessian_chol = function(atOuterMode = integer(0, default = 0)){ - returnType(double(2)) - if(atOuterMode) return(outer_mode_inner_negHess_chol) - return(saved_inner_negHess_chol) - }, - ## Update the maximum mode and neg hess based on the log likelihood passed via optim. - ## For efficient saving of values for calculating MLE values of random-effects. - save_outer_logLik = function(logLikVal = double()){ - if(logLikVal >= max_outer_logLik) { - max_outer_logLik <<- logLikVal - outer_mode_inner_negHess <<- saved_inner_negHess - outer_mode_max_inner_logLik_last_argmax <<- max_inner_logLik_last_argmax - outer_mode_inner_negHess_chol <<- saved_inner_negHess_chol - outer_param_max <<- max_inner_logLik_previous_p - } - }, - get_param_value = function(atOuterMode = integer(0, default = 0)){ - returnType(double(1)) - ## Ensures that the inner value will not match and cached values will not be used. - if(!cache_inner_max) return(numeric(value = Inf, length = npar)) - if(atOuterMode) return(outer_param_max) - return(max_inner_logLik_previous_p) - }, - ## Need to reset every call optim to recache. - reset_outer_logLik = function(){ - max_outer_logLik <<- -Inf - }, - ## set_nQuad = function(nQUpdate = integer()){ - ## AGHQuad_grid$setGridSize(nQUpdate = nQUpdate) - ## nQuad <<- nQUpdate - ## }, - ## Choose spectral vs cholesky. - ## set_transformation = function(transformation = character()){ - ## transMethod <<- transformation - ## }, - ## set_warning = function(warn = logical()){ - ## warn_optim <<- warn - ## }, - ## set_reInitMethod = function(method = character(), values = double(1)) { - ## if(method == "last") startID <<- 1 # last - ## else if(method == "last.best") startID <<- 2 # last.best - ## else if(method == "constant") startID <<- 3 # use fixed vector optimStart provided at setup time - ## else if(method == "random") startID <<- 4 - ## else if(method == "model") { - ## startID <<- 3 - ## constant_init_par <<- reTrans$transform(values(model, randomEffectsNodes)) - ## } else { - ## stop("invalid method for RE initialization") - ## } - ## if(startID <= 3) { - ## constant_init_par <<- values - ## if(length(values) == 1) - ## if(nreTrans > 1) - ## constant_init_par <<- rep(values, nreTrans) - ## } - ## }, - set_randomeffect_values = function(p = double(1)){ - foundIt <- FALSE - ## Last value called: - if(all(p == max_inner_logLik_previous_p)) { - re <- reTrans$inverseTransform(max_inner_logLik_last_argmax) - foundIt <- TRUE - } - ## Best value called: - if(all(p == outer_param_max)) { - re <- reTrans$inverseTransform(outer_mode_max_inner_logLik_last_argmax) - foundIt <- TRUE - } - if(foundIt){ - values(model, paramNodes) <<- p - ans <- model$calculate(paramDeps) - }else{ - # It would be nice to emit a message here, but different optimizers (e.g. BFGS vs nlminb) - # behave differently as to whether the previous (last) parameters were always the MLE. - # print(" [Warning] Have not cached the inner optimization. Running optimization now.") - update_max_inner_logLik(p) - re <- reTrans$inverseTransform(max_inner_logLik_last_argmax) - } - ## Ensure the model is up to date for all nodes. - values(model, randomEffectsNodes) <<- re - model$calculate(innerCalcNodes) - } - ## set_inner_cache = function(cache = logical(0, default = TRUE)){ - ## cache_inner_max <<- cache - ## } - ), - buildDerivs = list(inner_logLik = list(), - joint_logLik = list(), - gr_joint_logLik_wrt_re = list(), - negHess = list(), - cholNegHessian = list(), - logdetNegHess = list(), - gr_inner_logLik_internal = list(), - he_inner_logLik_internal = list(), - he_inner_logLik_internal_as_vec = list(), - gr_joint_logLik_wrt_p_internal = list(), - gr_joint_logLik_wrt_re_internal = list(), - hess_joint_logLik_wrt_p_wrt_re_internal = list(), - negHess_internal = list(), - gr_logdetNegHess_wrt_p_internal = list(), - gr_logdetNegHess_wrt_re_internal = list(), - joint_logLik_with_grad_and_hess = list(ignore = c("i","j")), - joint_logLik_with_higher_derivs = list(), - negHess_inner_logLik_internal = list()) -) ## End of buildOneAGHQuad - -#' Organize model nodes for marginalization -#' -#' Process model to organize nodes for marginalization (integration over latent -#' nodes or random effects) as by Laplace approximation. -#' -#' @param model A nimble model such as returned by \code{nimbleModel}. -#' -#' @param paramNodes A character vector of names of stochastic nodes that are -#' parameters of nodes to be marginalized over (\code{randomEffectsNodes}). -#' See details for default. -#' -#' @param randomEffectsNodes A character vector of nodes to be marginalized over -#' (or "integrated out"). In the case of calculating the likelihood of a model -#' with continuous random effects, the nodes to be marginalized over are the -#' random effects, hence the name of this argument. However, one can -#' marginalize over any nodes desired as long as they are continuous. -#' See details for default. -#' -#' @param calcNodes A character vector of nodes to be calculated as the -#' integrand for marginalization. Typically this will include -#' \code{randomEffectsNodes} and some data nodes. Se details for default. -#' -#' @param calcNodesOther A character vector of nodes to be calculated as part of -#' the log likelihood that are not connected to the \code{randomEffectNodes} -#' and so are not actually part of the marginalization. These are somewhat -#' extraneous to the purpose of this function, but it is convenient to handle -#' them here because often the purpose of marginalization is to calculate log -#' likelihoods, including from "other" parts of the model. -#' -#' @param split A logical indicating whether to split \code{randomEffectsNodes} -#' into conditionally independent sets that can be marginalized separately -#' (\code{TRUE}) or to keep them all in one set for a single marginalization -#' calculation. -#' -#' @param check A logical indicating whether to try to give reasonable warnings -#' of badly formed inputs that might be missing important nodes or include -#' unnecessary nodes. -#' -#' @param allowDiscreteLatent A logical indicating whether to -#' allow discrete latent states. (default = \code{FALSE}) -#' -#' @details -#' -#' This function is used by \code{buildLaplace} to organize model nodes into -#' roles needed for setting up the (approximate) marginalization done by Laplace -#' approximation. It is also possible to call this function directly and pass -#' the resulting list (possibly modified for your needs) to \code{buildLaplace}. -#' -#' Any of the input node vectors, when provided, will be processed using -#' \code{nodes <- model$expandNodeNames(nodes)}, where \code{nodes} may be -#' \code{paramNodes}, \code{randomEffectsNodes}, and so on. This step allows -#' any of the inputs to include node-name-like syntax that might contain -#' multiple nodes. For example, \code{paramNodes = 'beta[1:10]'} can be -#' provided if there are actually 10 scalar parameters, 'beta[1]' through -#' 'beta[10]'. The actual node names in the model will be determined by the -#' \code{exapndNodeNames} step. -#' -#' This function does not do any of the marginalization calculations. It only -#' organizes nodes into roles of parameters, random effects, integrand -#' calculations, and other log likelihood calculations. -#' -#' The checking done if `check=TRUE` tries to be reasonable, but it can't cover -#' all cases perfectly. If it gives an unnecessary warning, simply set `check=FALSE`. -#' -#' If \code{paramNodes} is not provided, its default depends on what other -#' arguments were provided. If neither \code{randomEffectsNodes} nor -#' \code{calcNodes} were provided, \code{paramNodes} defaults to all -#' top-level, stochastic nodes, excluding any posterior predictive nodes -#' (those with no data anywhere downstream). These are determined by -#' \code{model$getNodeNames(topOnly = TRUE, stochOnly = TRUE, -#' includePredictive = FALSE)}. If \code{randomEffectsNodes} was provided, -#' \code{paramNodes} defaults to stochastic parents of -#' \code{randomEffectsNodes}. In these cases, any provided \code{calcNodes} or -#' \code{calcNodesOther} are excluded from default \code{paramNodes}. If -#' \code{calcNodes} but not \code{randomEffectsNodes} was provided, then the -#' default for \code{randomEffectsNodes} is determined first, and then -#' \code{paramNodes} defaults to stochastic parents of -#' \code{randomEffectsNodes}. Finally, any stochastic parents of -#' \code{calcNodes} (whether provided or default) that are not in -#' \code{calcNodes} are added to the default for \code{paramNodes}, but only -#' after \code{paramNodes} has been used to determine the defaults for -#' \code{randomEffectsNodes}, if necessary. -#' -#' Note that to obtain sensible defaults, some nodes must have been marked as -#' data, either by the \code{data} argument in \code{nimbleModel} or by -#' \code{model$setData}. Otherwise, all nodes will appear to be posterior -#' predictive nodes, and the default \code{paramNodes} may be empty. -#' -#' For purposes of \code{buildLaplace}, \code{paramNodes} does not need to (but -#' may) include deterministic nodes between the parameters and any -#' \code{calcNodes}. Such deterministic nodes will be included in -#' calculations automatically when needed. -#' -#' If \code{randomEffectsNodes} is missing, the default is a bit complicated: it -#' includes all latent nodes that are descendants (or "downstream") of -#' \code{paramNodes} (if provided) and are either (i) ancestors (or -#' "upstream") of data nodes (if \code{calcNodes} is missing), or (ii) -#' ancestors or elements of \code{calcNodes} (if \code{calcNodes} and -#' \code{paramNodes} are provided), or (iii) elements of \code{calcNodes} (if -#' \code{calcNodes} is provided but \code{paramNodes} is missing). In all -#' cases, discrete nodes (with warning if \code{check=TRUE}), posterior -#' predictive nodes and \code{paramNodes} are excluded. -#' -#' \code{randomEffectsNodes} should only include stochastic nodes. -#' -#' If \code{calcNodes} is missing, the default is \code{randomEffectsNodes} and -#' their descendants to the next stochastic nodes, excluding posterior -#' predictive nodes. These are determined by -#' \code{model$getDependencies(randomEffectsNodes, includePredictive=FALSE)}. -#' -#' If \code{calcNodesOther} is missing, the default is all stochastic -#' descendants of \code{paramNodes}, excluding posterior predictive nodes -#' (from \code{model$getDependencies(paramNodes, stochOnly=TRUE, self=FALSE, -#' includePosterior=FALSE)}) that are not part of \code{calcNodes}. -#' -#' For purposes of \code{buildLaplace}, neither \code{calcNodes} nor -#' \code{calcNodesOther} needs to (but may) contain deterministic nodes -#' between \code{paramNodes} and \code{calcNodes} or \code{calcNodesOther}, -#' respectively. These will be included in calculations automatically when -#' needed. -#' -#' If \code{split} is \code{TRUE}, \code{model$getConditionallyIndependentSets} -#' is used to determine sets of the \code{randomEffectsNodes} that can be -#' independently marginalized. The \code{givenNodes} are the -#' \code{paramNodes} and \code{calcNodes} excluding any -#' \code{randomEffectsNodes} and their deterministic descendants. The -#' \code{nodes} (to be split into sets) are the \code{randomEffectsNodes}. -#' -#' If \code{split} is a numeric vector, \code{randomEffectsNodes} will be split -#' by \code{split}(\code{randomEffectsNodes}, \code{control$split}). The last -#' option allows arbitrary control over how \code{randomEffectsNodes} are -#' blocked. -#' -#' If \code{check=TRUE}, then defaults for each of the four categories of nodes -#' are created even if the corresponding argument was provided. Then warnings -#' are emitted if there are any extra (potentially unnecessary) nodes provided -#' compared to the default or if there are any nodes in the default that were -#' not provided (potentially necessary). These checks are not perfect and may -#' be simply turned off if you are confident in your inputs. -#' -#' (If \code{randomEffectsNodes} was provided but \code{calcNodes} was not -#' provided, the default (for purposes of \code{check=TRUE} only) for -#' \code{randomEffectsNodes} differs from the above description. It uses -#' stochastic descendants of \code{randomEffectsNodes} in place of the -#' "data nodes" when determining ancestors of data nodes. And it uses item -#' (ii) instead of (iii) in the list above.) -#' -#' @author Wei Zhang, Perry de Valpine, Paul van Dam-Bates -#' @return -#' -#' A list is returned with elements: -#' -#' \itemize{ -#' -#' \item \code{paramNodes}: final processed version of \code{paramNodes} -#' -#' \item \code{randomEffectsNodes}: final processed version of \code{randomEffectsNodes} -#' -#' \item \code{calcNodes}: final processed version of \code{calcNodes} -#' -#' \item \code{calcNodesOther}: final processed version of \code{calcNodesOther} -#' -#' \item \code{givenNodes}: Input to \code{model$getConditionallyIndependentSets}, if \code{split=TRUE}. -#' -#' \item \code{randomEffectsSets}: Output from -#' \code{model$getConditionallyIndependentSets}, if \code{split=TRUE}. This -#' will be a list of vectors of node names. The node names in one list element -#' can be marginalized independently from those in other list elements. The -#' union of the list elements should be all of \code{randomEffectsNodes}. If -#' \code{split=FALSE}, \code{randomEffectsSets} will be a list with one -#' element, simply containing \code{randomEffectsNodes}. If \code{split} is a -#' numeric vector, \code{randomEffectsSets} will be the result of -#' \code{split}(\code{randomEffectsNodes}, \code{control$split}). -#' -#' } -#' -#' @export -setupMargNodes <- function(model, paramNodes, randomEffectsNodes, calcNodes, - calcNodesOther, - split = TRUE, - check = TRUE, - allowDiscreteLatent = FALSE) { - paramProvided <- !missing(paramNodes) - reProvided <- !missing(randomEffectsNodes) - calcProvided <- !missing(calcNodes) - calcOtherProvided <- !missing(calcNodesOther) - - normalizeNodes <- function(nodes, sort = FALSE) { - if(is.null(nodes) || isFALSE(nodes)) character(0) - else model$expandNodeNames(nodes, sort = sort) - } - if(paramProvided) paramNodes <- normalizeNodes(paramNodes) - if(reProvided) randomEffectsNodes <- normalizeNodes(randomEffectsNodes) - if(calcProvided) calcNodes <- normalizeNodes(calcNodes, sort = TRUE) - if(calcOtherProvided) calcNodesOther <- normalizeNodes(calcNodesOther, sort = TRUE) - - if(reProvided) { - if(check && !allowDiscreteLatent) - if(any(model$isDiscrete(randomEffectsNodes))) - messageIfVerbose(" [Warning] Some elements of `randomEffectsNodes` follow discrete distributions. That is likely to cause problems.") - } - - # We considered a feature to allow params to be nodes without priors. This is a placeholder in case - # we ever pursue that again. - # allowNonPriors <- FALSE - # We may need to use determ and stochastic dependencies of parameters multiple times below - # Define these to avoid repeated computation - # A note for future: determ nodes between parameters and calcNodes are needed inside buildOneAGHQuad - # and buildOneAGHQuad1D. In the future, these could be all done here to be more efficient - paramDetermDeps <- character(0) - paramStochDeps <- character(0) - paramDetermDepsCalculated <- FALSE - paramStochDepsCalculated <- FALSE - - # 1. Default parameters are stochastic top-level nodes. (We previously - # considered an argument allowNonPriors, defaulting to FALSE. If TRUE, the - # default params would be all top-level stochastic nodes with no RHSonly - # nodes as parents and RHSonly nodes (handling of constants TBD, since - # non-scalars would be converted to data) that have stochastic dependencies - # (And then top-level stochastic nodes with RHSonly nodes as parents are - # essentially latent/data nodes, some of which would need to be added to - # randomEffectsNodes below.) However this got too complicated. It is - # simpler and clearer to require "priors" for parameters, even though prior - # probs may not be used. - paramsHandled <- TRUE - if(!paramProvided) { - if(!reProvided) { - if(!calcProvided) { - paramNodes <- model$getNodeNames(topOnly = TRUE, stochOnly = TRUE, includePredictive = FALSE) - } else { - # calcNodes were provided, but RE nodes were not, so delay creating default params - paramsHandled <- FALSE - } - } else { - nodesToFindParentsFrom <- randomEffectsNodes - paramNodes <- model$getParents(nodesToFindParentsFrom, self=FALSE, stochOnly=TRUE) - # self=FALSE doesn't omit if one RE node is a parent of another, so we have to do the next step - paramNodes <- setdiff(paramNodes, nodesToFindParentsFrom) - } - if(paramsHandled) { - if(calcProvided) paramNodes <- setdiff(paramNodes, calcNodes) - if(calcOtherProvided) paramNodes <- setdiff(paramNodes, calcNodesOther) - } - } - - # 2. Default random effects are latent nodes that are downstream stochastic dependencies of params. - # In step 3, default random effects are also limited to those that are upstream parents of calcNodes - if((!reProvided) || check) { - latentNodes <- model$getNodeNames(latentOnly = TRUE, stochOnly = TRUE, - includeData = FALSE, includePredictive = FALSE) - if(!allowDiscreteLatent) { - latentDiscrete <- model$isDiscrete(latentNodes) - if(any(latentDiscrete)) { - if((!reProvided) && check) { - messageIfVerbose(" [Note] In trying to determine default `randomEffectsNodes`, there are some nodes\n", - " that follow discrete distributions. These will be omitted.") - } - latentNodes <- latentNodes[!latentDiscrete] - } - } - if(paramsHandled) { - paramDownstream <- model$getDependencies(paramNodes, stochOnly = TRUE, self = FALSE, - downstream = TRUE, includePredictive = FALSE) - # paramStochDeps <- model$getDependencies(paramNodes, stochOnly = TRUE, self = FALSE) - # paramStochDepsCalculated <- TRUE - reNodesDefault <- intersect(latentNodes, paramDownstream) - } else { - reNodesDefault <- latentNodes - } - # Next, if calcNodes were not provided, we create a temporary - # dataNodesDefault for purposes of updating reNodesDefault if needed. The - # idea is that reNodesDefault should be trimmed to include only nodes - # upstream of "data" nodes, where "data" means nodes in the role of data for - # purposes of marginalization. - # The tempDataNodesDefault is either dependencies of RE nodes if provided, or - # actual data nodes in the model if RE nodes not provided. - # If calcNodes were provided, then they are used directly to trim reNodesDefault. - if(!calcProvided) { - if(reProvided) - tempDataNodesDefault <- model$getDependencies(randomEffectsNodes, stochOnly = TRUE, - self = FALSE, includePredictive = FALSE) - else - tempDataNodesDefault <- model$getNodeNames(dataOnly = TRUE) - if(paramsHandled) - tempDataNodesDefault <- setdiff(tempDataNodesDefault, paramNodes) - tempDataNodesDefaultParents <- model$getParents(tempDataNodesDefault, upstream = TRUE, stochOnly = TRUE) - # See comment above about why this is necessary: - tempDataNodesDefaultParents <- setdiff(tempDataNodesDefaultParents, tempDataNodesDefault) - reNodesDefault <- intersect(reNodesDefault, tempDataNodesDefaultParents) - } else { - # Update reNodesDefault to exclude nodes that lack downstream connection to a calcNode - if(paramsHandled) { # This means reProvided OR paramsProvided. Including parents allows checking - # of potentially missing REs. - reNodesDefault <- intersect(reNodesDefault, - model$getParents(calcNodes, upstream=TRUE, stochOnly = TRUE)) - } else { # This means !paramsHandled and hence !reProvided AND !paramsProvided - reNodesDefault <- intersect(reNodesDefault, - calcNodes) - reNodesDefault <- intersect(reNodesDefault, - model$getParents(calcNodes, upstream=TRUE, stochOnly = TRUE)) - } - } - } - - # If only calcNodes were provided, we have now created reNodesDefault from calcNodes, - # and are now ready to create default paramNodes - if(!paramsHandled) { - paramNodes <- model$getParents(reNodesDefault, self=FALSE, stochOnly=TRUE) - # See comment above about why this is necessary: - paramNodes <- setdiff(paramNodes, reNodesDefault) - if(calcOtherProvided) paramNodes <- setdiff(paramNodes, calcNodesOther) - } - - # 3. Optionally check random effects if they were provided (not default) - if(reProvided && check) { - # First check is for random effects that should have been included but weren't - reCheck <- setdiff(reNodesDefault, randomEffectsNodes) - if(length(reCheck)) { - errorNodes <- paste0(head(reCheck, n = 4), sep = "", collapse = ", ") - if(length(reCheck) > 4) errorNodes <- paste(errorNodes, "...") - messageIfVerbose(" [Warning] There are some random effects (latent states) in the model that look like\n", - " they should be included for the provided (or default) `paramNodes`,\n", - " but are not included in `randomEffectsNodes`: ", errorNodes, ".\n", - " To silence this warning, one can usually include `check = FALSE`\n", - " (potentially in the control list) for the algorithm or as\n", - " an argument to `setupMargNodes`.") - } - # Second check is for random effects that were included but look unnecessary - reCheck <- setdiff(randomEffectsNodes, reNodesDefault) - if(length(reCheck)) { - # Top nodes should never trigger warning. - # Descendants of top nodes that are in randomEffectsNodes should not trigger warning - topNodes <- model$getNodeNames(topOnly=TRUE) - reCheckTopNodes <- intersect(reCheck, topNodes) - if(length(reCheckTopNodes)) { - # Simple downstream=TRUE here is not a perfect check of connection among all nodes - # but it will avoid false alarms - reCheck <- setdiff(reCheck, model$getDependencies(reCheckTopNodes, downstream=TRUE, stochOnly=TRUE)) - } - if(length(reCheck)) { - errorNodes <- paste0(head(reCheck, n = 4), sep = "", collapse = ", ") - if(length(reCheck) > 4) errorNodes <- paste(errorNodes, "...") - extraMsg <- if(isTRUE(getNimbleOption('includeUnneededLatents'))) "" else " They will be omitted, but one can force inclusion with\n `nimbleOptions(includeUnneededLatents=TRUE)`.\n" - messageIfVerbose(" [Warning] There are some `randomEffectsNodes` provided that look like\n", - " they are not needed for the provided (or default) `paramNodes`:\n", - " ", errorNodes, ".\n", extraMsg, - " To silence this warning, one can usually include `check = FALSE`\n", - " (potentially in the control list) for the algorithm or as\n", - " an argument to `setupMargNodes`.") - if(!isTRUE(getNimbleOption('includeUnneededLatents'))) - randomEffectsNodes <- setdiff(randomEffectsNodes, reCheck) - } - } - } - # Set final choice of randomEffectsNodes - if(!reProvided) { - randomEffectsNodes <- reNodesDefault - } - - # Set actual default calcNodes. This time it has self=TRUE (default) - if((!calcProvided) || check) { - calcNodesDefault <- model$getDependencies(randomEffectsNodes, includePredictive = FALSE) - } - # 5. Optionally check calcNodes if they were provided (not default) - if(calcProvided && check) { - # First check is for calcNodes that look necessary but were omitted - calcCheck <- setdiff(calcNodesDefault, calcNodes) - if(length(calcCheck)) { - errorNodes <- paste0(head(calcCheck, n = 4), sep = "", collapse = ", ") - if(length(calcCheck) > 4) errorNodes <- paste(errorNodes, "...") - messageIfVerbose(" [Warning] There are some model nodes that look like they should be\n", - " included in the `calcNodes` because\n", - " they are dependencies of some `randomEffectsNodes`: ", errorNodes, ".\n", - " To silence this warning, one can usually include `check = FALSE`\n", - " (potentially in the control list) for the algorithm or as\n", - " an argument to `setupMargNodes`.") - } - # Second check is for calcNodes that look unnecessary - # If some determ nodes between paramNodes and randomEffectsNodes are provided in calcNodes - # then that's ok and we should not throw a warning message. - calcCheck <- setdiff(calcNodes, calcNodesDefault) - errorNodes <- calcCheck[model$getNodeType(calcCheck)=="stoch"] - # N.B. I commented out this checking of deterministic nodes for now. - # Iterating through individual nodes for getDependencies can be slow - # and I'd like to think more about how to do this. -Perry - ## determCalcCheck <- setdiff(calcCheck, errorNodes) - ## lengthDetermCalcCheck <- length(determCalcCheck) - ## # Check other determ nodes - ## if(lengthDetermCalcCheck){ - ## paramDetermDeps <- model$getDependencies(paramNodes, determOnly = TRUE, includePredictive = FALSE) - ## paramDetermDepsCalculated <- TRUE - ## for(i in 1:lengthDetermCalcCheck){ - ## if(!(determCalcCheck[i] %in% paramDetermDeps) || - ## !(any(model$getDependencies(determCalcCheck[i], self = FALSE) %in% calcNodesDefault))){ - ## errorNodes <- c(errorNodes, determCalcCheck[i]) - ## } - ## } - ## } - if(length(errorNodes)){ - outErrorNodes <- paste0(head(errorNodes, n = 4), sep = "", collapse = ", ") - if(length(errorNodes) > 4) outErrorNodes <- paste(outErrorNodes, "...") - messageIfVerbose(" [Warning] There are some `calcNodes` provided that look like\n", - " they are not needed for the provided (or default) `randomEffectsNodes`:\n", - " ", outErrorNodes, ".\n", - " To silence this warning, one can usually include `check = FALSE`\n", - " (potentially in the control list) for the algorithm or as\n", - " an argument to `setupMargNodes`.") - } - } - # Finish step 4 - if(!calcProvided){ - calcNodes <- calcNodesDefault - } - if(!paramProvided) { - possibleNewParamNodes <- model$getParents(calcNodes, self=FALSE, stochOnly=TRUE, includeData=FALSE) - # includeData=FALSE as data nodes cannot be parameters - # self=FALSE doesn't omit if one node is a parent of another, so we have to do the next step - possibleNewParamNodes <- setdiff(possibleNewParamNodes, calcNodesDefault) - paramNodes <- unique(c(paramNodes, possibleNewParamNodes)) - } - - # 6. Default calcNodesOther: nodes needed for full model likelihood but - # that are not involved in the marginalization done by Laplace. - # Default is a bit complicated: All dependencies from paramNodes to - # stochastic nodes that are not part of calcNodes. Note that calcNodes - # does not necessarily contain deterministic nodes between paramNodes and - # randomEffectsNodes. We don't want to include those in calcNodesOther. - # (A deterministic that is needed for both calcNodes and calcNodesOther should be included.) - # So we have to first do a setdiff on stochastic nodes and then fill in the - # deterministics that are needed. - if(!calcOtherProvided || check) { - paramStochDeps <- model$getDependencies(paramNodes, stochOnly = TRUE, # Should this be dataOnly=TRUE? - self = FALSE, includePredictive = FALSE) - calcNodesOtherDefault <- setdiff(paramStochDeps, calcNodes) - } - if(calcOtherProvided) { - if((length(calcNodesOther) > 0) && !any(model$getNodeType(calcNodesOther)=="stoch")){ - messageIfVerbose(" [Warning] There are no stochastic nodes in the `calcNodesOther` provided for Laplace or AGHQ approximation.") - } - } - if(!calcOtherProvided){ - calcNodesOther <- calcNodesOtherDefault - } - if(calcOtherProvided && check) { - calcOtherCheck <- setdiff(calcNodesOtherDefault, calcNodesOther) - if(length(calcOtherCheck)) { - # We only check missing stochastic nodes; determ nodes will be added below - missingStochNodesInds <- which((model$getNodeType(calcOtherCheck)) == "stoch") - lengthMissingStochNodes <- length(missingStochNodesInds) - if(lengthMissingStochNodes){ - missingStochNodes <- calcOtherCheck[missingStochNodesInds] - errorNodes <- paste0(head(missingStochNodes, n = 4), sep = "", collapse = ", ") - if(lengthMissingStochNodes > 4) errorNodes <- paste(errorNodes, "...") - messageIfVerbose(" [Warning] There are some model nodes (stochastic) that look like they should be\n", - " included in the `calcNodesOther` for parts of the likelihood calculation\n", - " outside of Laplace or AGHQ approximation: ", errorNodes, ".\n", - " To silence this warning, include `check = FALSE` in the control list\n", - " to `buildLaplace` or as an argument to `setupMargNodes`.") - } - } - # Check redundant stochastic nodes - calcOtherCheck <- setdiff(calcNodesOther, calcNodesOtherDefault) - stochCalcOtherCheck <- calcOtherCheck[model$getNodeType(calcOtherCheck) == "stoch"] - errorNodes <- stochCalcOtherCheck - # Check redundant determ nodes - # N.B. I commented-out this deterministic node checking for reasons similar to above. -Perry - ## determCalcOtherCheck <- setdiff(calcOtherCheck, stochCalcOtherCheck) - ## lengthDetermCalcOtherCheck <- length(determCalcOtherCheck) - ## errorNodes <- character(0) - ## if(lengthDetermCalcOtherCheck){ - ## if(!paramDetermDepsCalculated) { - ## paramDetermDeps <- model$getDependencies(paramNodes, determOnly = TRUE, includePredictive = FALSE) - ## paramDetermDepsCalculated <- TRUE - ## } - ## for(i in 1:lengthDetermCalcOtherCheck){ - ## if(!(determCalcOtherCheck[i] %in% paramDetermDeps) || - ## !(any(model$getDependencies(determCalcOtherCheck[i], self = FALSE) %in% calcNodesOtherDefault))){ - ## errorNodes <- c(errorNodes, determCalcOtherCheck[i]) - ## } - ## } - ## } - ## errorNodes <- c(stochCalcOtherCheck, errorNodes) - if(length(errorNodes)){ - outErrorNodes <- paste0(head(errorNodes, n = 4), sep = "", collapse = ", ") - if(length(errorNodes) > 4) outErrorNodes <- paste(outErrorNodes, "...") - messageIfVerbose(" [Warning] There are some nodes provided in `calcNodesOther` that look like\n", - " they are not needed for parts of the likelihood calculation\n", - " outside of Laplace or AGHQ approximation: ", outErrorNodes, ".\n", - " To silence this warning, include `check = FALSE` in the control list\n", - " to `buildLaplace` or as an argument to `setupMargNodes`.") - } - } - # Check and add necessary (upstream) deterministic nodes into calcNodesOther - # This ensures that deterministic nodes between paramNodes and calcNodesOther are used. - num_calcNodesOther <- length(calcNodesOther) - if(num_calcNodesOther > 0){ - if(!paramDetermDepsCalculated) { - paramDetermDeps <- model$getDependencies(paramNodes, determOnly = TRUE, includePredictive = FALSE) - paramDetermDepsCalculated <- TRUE - } - numParamDetermDeps <- length(paramDetermDeps) - if(numParamDetermDeps > 0) { - keep_paramDetermDeps <- logical(numParamDetermDeps) - for(i in seq_along(paramDetermDeps)) { - nextDeps <- model$getDependencies(paramDetermDeps[i]) - keep_paramDetermDeps[i] <- any(nextDeps %in% calcNodesOther) - } - paramDetermDeps <- paramDetermDeps[keep_paramDetermDeps] - } - calcNodesOther <- model$expandNodeNames(c(paramDetermDeps, calcNodesOther), sort = TRUE) - } - - # 7. Do the splitting into sets (if given) or conditionally independent sets (if TRUE) - givenNodes <- NULL - reSets <- list() - if(length(randomEffectsNodes)) { - if(isFALSE(split)) { - reSets <- list(randomEffectsNodes) - } else { - if(isTRUE(split)) { - # givenNodes should only be stochastic - givenNodes <- setdiff(c(paramNodes, calcNodes), - c(randomEffectsNodes, - model$getDependencies(randomEffectsNodes, determOnly=TRUE))) - reSets <- model$getConditionallyIndependentSets( - nodes = randomEffectsNodes, givenNodes = givenNodes, - unknownAsGiven = TRUE) - } - else if(is.numeric(split)){ - reSets <- split(randomEffectsNodes, split) - } - else stop("setupMargNodes: Invalid value for `split`") - } - } - list(paramNodes = paramNodes, - randomEffectsNodes = randomEffectsNodes, - calcNodes = calcNodes, - calcNodesOther = calcNodesOther, - givenNodes = givenNodes, - randomEffectsSets = reSets - ) -} - - -## Main function for Laplace approximation -#' @rdname laplace -#' @export -buildLaplace <- function(model, paramNodes, randomEffectsNodes, calcNodes, calcNodesOther, control = list()) { - buildAGHQ(model, nQuad = 1, paramNodes, randomEffectsNodes, calcNodes, calcNodesOther, control) -} - -## Main function for Adaptive Gauss-Hermite Quadrature -#' @rdname laplace -#' @export -buildAGHQ <- nimbleFunction( - name = 'AGHQ', - setup = function(model, nQuad = 1, paramNodes, randomEffectsNodes, calcNodes, - calcNodesOther, control = list()) { - split <- extractControlElement(control, 'split', TRUE) - check <- extractControlElement(control, 'check', TRUE) - innerOptimWarning <- extractControlElement(control, 'innerOptimWarning', FALSE) - - if(nQuad > 35) { - print(" [Note] Currently only a maximum of 35 quadrature points are allowed, setting nQuad to 35.") - nQuad <- 35 - } - nQuad_ <- nQuad - MargNodes <- NULL - if(!missing(paramNodes)) { - if(is.list(paramNodes)) { - # The user called setupMargNodes and provided a list of that format to paramNodes. - MargNodes <- paramNodes - } - } - if(is.null(MargNodes)) { - MargNodes <- setupMargNodes(model = model, paramNodes = paramNodes, - randomEffectsNodes = randomEffectsNodes, - calcNodes = calcNodes, - calcNodesOther = calcNodesOther, - split = split, - check = check) - } - paramNodes <- MargNodes$paramNodes - randomEffectsNodes <- MargNodes$randomEffectsNodes - calcNodes <- MargNodes$calcNodes - calcNodesOther <- MargNodes$calcNodesOther - num_calcNodesOther <- length(calcNodesOther) - # MargNodes$randomEffectsSets will be extracted below if needed - - if(length(calcNodesOther)) { - otherLogLik_derivsInfo <- makeModelDerivsInfo(model = model, wrtNodes = paramNodes, calcNodes = calcNodesOther) - otherLogLik_updateNodes <- otherLogLik_derivsInfo$updateNodes - otherLogLik_constantNodes <- otherLogLik_derivsInfo$constantNodes - } - else { ## calcNodesOther is empty - otherLogLik_updateNodes <- character(0) - otherLogLik_constantNodes <- character(0) - } - ## Out and inner optimization settings - outerOptimControl_ <- nimOptimDefaultControl() - innerOptimControl_ <- nimOptimDefaultControl() - optimControlArgNames <- c("trace", "fnscale", "parscale", "ndeps", "maxit", "abstol", "reltol", "alpha", - "beta", "gamma", "REPORT", "type", "lmm", "factr", "pgtol", "temp", "tmax") - if(!is.null(control$outerOptimControl)){ - validNames <- intersect(names(control$outerOptimControl), optimControlArgNames) - numValidNames <- length(validNames) - if(numValidNames > 0){ - for(i in 1:numValidNames){ - outerOptimControl_[[validNames[i]]] <- control$outerOptimControl[[validNames[i]]] - } - } - } - if(!is.null(control$innerOptimControl)) { - validNames_inner <- intersect(names(control$innerOptimControl), optimControlArgNames) - numValidNames_inner <- length(validNames_inner) - if(numValidNames_inner > 0){ - for(i in 1:numValidNames_inner) - innerOptimControl_[[validNames_inner[i]]] <- control$innerOptimControl[[validNames_inner[i]]] - } - } - outerOptimControl_$fnscale <- -1 - innerOptimControl_$fnscale <- -1 - if(!is.null(control$innerOptimMethod) && - ((control$innerOptimMethod %in% c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B")) || - control$innerOptimMethod %in% ls(nimbleUserNamespace$.optimizers))){ # .optimizers by default contains 'nlminb'. - innerOptimMethod <- control$innerOptimMethod - } else innerOptimMethod <- "nlminb" - - if(!is.null(control$outerOptimMethod) && - ((control$outerOptimMethod %in% c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B")) || - control$outerOptimMethod %in% ls(nimbleUserNamespace$.optimizers))){ # .optimizers by default contains 'nlminb'. - outerOptimMethod_ <- control$outerOptimMethod - } else outerOptimMethod_ <- "nlminb" - - innerOptimStart <- extractControlElement(control, "innerOptimStart", "last.best") - if(!is.character(innerOptimStart) | - length(innerOptimStart) != 1 | - !(innerOptimStart %in% (validIOS <- c("last", "last.best", "constant", "random", "model", "zero")))) - stop("buildAGHQ: `control$innerOptimStart` must be one of ", paste0('\'', validIOS, '\'', collapse=",")) - - innerOptimStartValues <- NULL - if(innerOptimStart == "model") { - innerOptimStartValues <- 0 # will be ignored but is need to trigger next message - } - if(innerOptimStart == "zero") { - innerOptimStart <- "constant" - innerOptimStartValues <- 0 - } - if(!is.null(innerOptimStartValues) & - !is.null(control$innerOptimStartValues)) { - messageIfVerbose(" [Note] Ignoring `control$innerOptimStartValues` because `control$innerOptimStart` is `", innerOptimStart, "`.") - } else { - innerOptimStartValues <- extractControlElement(control, "innerOptimStartValues", 0) - if(is.character(innerOptimStartValues)) - if(length(innerOptimStartValues) != 1 | - !(innerOptimStartValues == "model")) - stop("buildAGHQ: The only valid character value for `control$innerOptimStartValues` is 'model'") - } - - ## Create an AGHQuad (Adaptive Gauss-Hermite Quadrature) nimbleFunctionList - AGHQuad_nfl <- nimbleFunctionList(AGHQuad_BASE) - scalarRENodes <- model$expandNodeNames(randomEffectsNodes, returnScalarComponents = TRUE) - nre <- length(scalarRENodes) - multiSetsCheck <- FALSE ## AGHQ vs Laplace Check in findMLE. - gridType <- extractControlElement(control, "gridType", "cholesky") - innerControlList <- list(optimControl=innerOptimControl_, - optimMethod=innerOptimMethod, - optimStart=innerOptimStart, - optimStartValues=innerOptimStartValues, - optimWarning=innerOptimWarning, - gridType=gridType) - if(nre > 0){ - ## Record the order of random effects processed internally - internalRandomEffectsNodes <- NULL - lenInternalRENodeSets <- NULL - if(isFALSE(split)) { ## Do all randomEffectsNodes in one set - internalRandomEffectsNodes <- randomEffectsNodes - lenInternalRENodeSets <- nre - reNodesAsScalars <- model$expandNodeNames(internalRandomEffectsNodes, returnScalarComponents = TRUE) - - # old default was "model". new default is "last.best" with values=0 - # Essentially the following steps will now b done in buildOneAGHQuad[1D] - ## if(is.null(control$innerOptimStart)) innerOptimStart <- values(model, randomEffectsNodes) - ## else { - ## providedStart <- control$innerOptimStart - ## if(any(providedStart %in% c("last", "last.best"))) innerOptimStart <- providedStart - ## else if(is.numeric(sum(providedStart)) && (length(providedStart) == nre)) innerOptimStart <- providedStart - ## else innerOptimStart <- values(model, randomEffectsNodes) - ## } - ## ## In case random effects are not properly initialized - ## if(!any(innerOptimStart %in% c("last", "last.best")) & any(is.infinite(innerOptimStart) | is.na(innerOptimStart) | is.nan(innerOptimStart))){ - ## all_reTransform <- parameterTransform(model, randomEffectsNodes) - ## all_reTransform_length <- all_reTransform$getTransformedLength() - ## innerOptimStart <- all_reTransform$inverseTransform(rep(0, all_reTransform_length)) - ## } - ## Build AGHQuad - if(nre > 1 | isTRUE(control[['force_nDim']])) { - AGHQuad_nfl[[1]] <- buildOneAGHQuad(model, nQuad = nQuad_, paramNodes, randomEffectsNodes, - calcNodes, - innerControlList) - multiSetsCheck <- TRUE - } else AGHQuad_nfl[[1]] <- buildOneAGHQuad1D(model, nQuad = nQuad_, paramNodes, randomEffectsNodes, - calcNodes, - innerControlList) - } - else {## Split randomEffectsNodes into conditionally independent sets - reSets <- MargNodes$randomEffectsSets - num_reSets <- length(reSets) - reNodesAsScalars <- character() - if(num_reSets == 0){ - stop("buildAGHQ: There was a problem determining conditionally independent random effects sets for this model") - } - for(i in seq_along(reSets)){ - ## Work with one conditionally independent set of latent states - these_reNodes <- reSets[[i]] - internalRandomEffectsNodes <- c(internalRandomEffectsNodes, these_reNodes) - ## find paramNodes and calcNodes for this set of reNodes - ## paramNodes are the same for all AGHQuad_nfl elements. In the future this could be customized. - these_reDeps <- model$getDependencies(these_reNodes) ## candidate calcNodes via reNodes - these_calcNodes <- intersect(calcNodes, these_reDeps) ## definite calcNodes - these_reNodesAsScalars <- model$expandNodeNames(these_reNodes, returnScalarComponents = TRUE) - reNodesAsScalars <- c(reNodesAsScalars, these_reNodesAsScalars) - nre_these <- length(these_reNodesAsScalars) - lenInternalRENodeSets <- c(lenInternalRENodeSets, nre_these) - ## Process start values for inner optimisation - - if(is.numeric(innerOptimStartValues) && (length(innerOptimStartValues) == nre)) { - # input was a vector of all REs, so we must split it accordingly - if(is.null(names(innerOptimStartValues))) { - # split by order, because there are no names. use internalRandomEffectsNodes order. - # the last portion will be for the present set of values - these_reNodes_inds <- length(reNodesAsScalars) - nre_these + (1:nre_these) - } else { - # split by names - these_reNodes_inds <- match(these_reNodesAsScalars, innerOptimStartValues, nomatch=0) - if((length(these_reNodes_inds) != nre_these) | - (length(unique(these_reNodes_inds)) != length(these_reNodes_inds)) | - any(these_reNodes_inds==0)) - messageIfVerbose(" [Warning] There appears to be an incorrect name in `control$innerOptimStartValues`.") - } - these_innerOptimStartValues <- innerOptimStartValues[these_reNodes_inds] - } else - these_innerOptimStartValues <- innerOptimStartValues - - ## if(is.null(control$innerOptimStart)) innerOptimStart <- values(model, these_reNodes) - ## else { - ## providedStart <- control$innerOptimStart - ## if(any(providedStart %in% c("last", "last.best"))) innerOptimStart <- providedStart - ## else if(is.numeric(sum(providedStart)) && (length(providedStart) == nre)){ - ## these_reNodes_inds <- unlist(lapply(model$expandNodeNames(these_reNodes, returnScalarComponents = TRUE), function(x) {which(scalarRENodes == x)})) - ## innerOptimStart <- providedStart[these_reNodes_inds] - ## } - ## else innerOptimStart <- values(model, these_reNodes) - ## } - ## ## In case random effects are not properly initialized - ## if(!any(innerOptimStart %in% c("last", "last.best")) & any(is.infinite(innerOptimStart) | is.na(innerOptimStart) | is.nan(innerOptimStart))){ - ## these_reTransform <- parameterTransform(model, these_reNodes) - ## these_reTransform_length <- these_reTransform$getTransformedLength() - ## innerOptimStart <- these_reTransform$inverseTransform(rep(0, these_reTransform_length)) - ## } - ## Build AGHQuad for each set - if(nre_these > 1 | isTRUE(control[['force_nDim']])){ - AGHQuad_nfl[[i]] <- buildOneAGHQuad(model, nQuad = nQuad_, paramNodes, these_reNodes, these_calcNodes, - innerControlList) - #innerOptimControl_, innerOptimMethod, innerOptimStart, these_innerOptimStartValues) - multiSetsCheck <- TRUE - } - else AGHQuad_nfl[[i]] <- buildOneAGHQuad1D(model, nQuad = nQuad_, paramNodes, these_reNodes, these_calcNodes, - innerControlList) - #innerOptimControl_, innerOptimMethod, innerOptimStart, these_innerOptimStartValues) - } - } - if(length(lenInternalRENodeSets) == 1) lenInternalRENodeSets <- c(lenInternalRENodeSets, -1) - reTransform <- parameterTransform(model, internalRandomEffectsNodes) - reTransform_length <- reTransform$getTransformedLength() - if(reTransform_length > 1) reTransform_indices <- 1:reTransform_length - else reTransform_indices <- c(1, -1) - - reNodesAsScalars_vec <- reNodesAsScalars - if(nre == 1) reNodesAsScalars_vec <- c(reNodesAsScalars, "_EXTRA_") - reNodesAsScalars_first <- reNodesAsScalars[1] - } - else{ - ## No random effects - lenInternalRENodeSets <- numeric(2) - reTransform <- parameterTransform(model, paramNodes[1], control = list(allowDeterm = FALSE)) ## Won't be needed at all - reTransform_indices <- numeric(2) - reNodesAsScalars_vec <- character(0) - reNodesAsScalars_first <- character(1) - if(num_calcNodesOther == 0) - stop("buildAGHQ: Both `calcNodesOther` and `randomEffectsNodes` are empty for Laplace or AGHQ for the given model") - } - - paramNodesAsScalars <- model$expandNodeNames(paramNodes, returnScalarComponents = TRUE) - npar <- length(paramNodesAsScalars) - paramNodesAsScalars_vec <- paramNodesAsScalars - if(npar == 1) paramNodesAsScalars_vec <- c(paramNodesAsScalars, "_EXTRA_") - paramNodesAsScalars_first <- paramNodesAsScalars[1] - if(npar == 1) p_indices <- c(1, -1) - else p_indices <- 1:npar - ## setupOutputs(reNodesAsScalars, paramNodesAsScalars) - - ## Automated transformation for parameters - paramsTransform <- parameterTransform(model, paramNodes, control = list(allowDeterm = FALSE)) - pTransform_length <- paramsTransform$getTransformedLength() - if(pTransform_length > 1) pTransform_indices <- 1:pTransform_length - else pTransform_indices <- c(1, -1) - - ## Indicator for removing the redundant index -1 in pTransform_indices - one_time_fixes_done <- FALSE - ## Default calculation method for AGHQuad - computeMethod_ <- extractControlElement(control, "computeMethod", 2) - - useInnerCache_ <- extractControlElement(control, "useInnerCache", TRUE) - - ## The nimbleList definitions AGHQuad_params and AGHQuad_summary - ## have moved to predefined nimbleLists. - },## End of setup - run = function(){}, - methods = list( - getNodeNamesVec = function(returnParams = logical(0, default = TRUE)) { - one_time_fixes() - returnType(character(1)) - if(returnParams) return(paramNodesAsScalars_vec) - else return(reNodesAsScalars_vec) - }, - getNodeNameSingle = function(returnParams = logical(0, default = TRUE)) { - returnType(character()) - if(returnParams) return(paramNodesAsScalars_first) - else return(reNodesAsScalars_first) - }, - updateSettings = function(innerOptimMethod = character(0, default="NULL"), - innerOptimStart = character(0, default="NULL"), - innerOptimStartValues = double(1, default=Inf), - innerOptimWarning = integer(0, default = -1), - useInnerCache = integer(0, default=-1), - nQuad = integer(0, default=-1), - gridType = character(0, default="NULL"), - innerOptimControl = optimControlNimbleList(default=nimOptimDefaultControl()), - outerOptimMethod = character(0, default="NULL"), - replace_innerOptimControl = logical(0, default=FALSE), - outerOptimControl = optimControlNimbleList(default=nimOptimDefaultControl()), - replace_outerOptimControl = logical(0, default=FALSE), - computeMethod = integer(0, default=-1)) { - # checks - if(innerOptimStart != "NULL") { - if(innerOptimStart=="zero") { - stop("updateSettings: `innerOptimStart` choice of 'zero' is not supported in `updateSettings`. Use `innerOptimStart='constant'` and `innerOptimStartValues = 0` to achieve 'zero' behavior") - } - if(innerOptimStart != "last" & innerOptimStart != "last.best" & - innerOptimStart != "constant" & innerOptimStart != "random" & - innerOptimStart != "model") - stop("updateSettings: invalid value for `innerOptimStart`") - } - if(length(innerOptimStartValues) > 1) { - if(length(innerOptimStartValues) != nre) - stop("updateSettings: length of `innerOptimStartValues` must be 1 or total number of random effects") - } - if(nQuad != -1) { - if(nQuad < 1) stop("updateSettings: choose a positive number of grid points") - if(nQuad > 35) stop("updateSettings: currently only a maximum of 35 quadrature points is allowed") - threshold <- log(50000) # in text below too - for(i in seq_along(AGHQuad_nfl)) { - if(nQuad * log(lenInternalRENodeSets[i]) > threshold) { - print("updateSettings: choice of `nQuad` would yield >50000 nodes for ", lenInternalRENodeSets[i], " integration dimensions in conditionally independent set ", i, ".") - stop("too many integration nodes") - } - } - } - if(computeMethod != -1) { - if(!any(c(1, 2, 3) == computeMethod)) ## Cannot use `%in%` in nf code. - stop("updateSettings: `computeMethod` must be 1, 2, or 3") - } - if(gridType != "NULL") { - if(gridType != "spectral" & gridType != "cholesky") - stop("updateSettings: `gridType` must be either 'cholesky' or 'spectral'.") - } - # actions - one_time_fixes() - if(nQuad != -1) nQuad_ <<- nQuad - these_initsValues <- innerOptimStartValues - iStart <- 1 - for(i in seq_along(AGHQuad_nfl)) { - if(length(innerOptimStartValues) > 1) { - these_values <- innerOptimStartValues[iStart:(iStart + lenInternalRENodeSets[i] - 1)] - iStart <- iStart + lenInternalRENodeSets[i] - } - AGHQuad_nfl[[i]]$updateSettings(optimMethod = innerOptimMethod, - optimStart = innerOptimStart, - optimStartValues = innerOptimStartValues, - optimWarning = innerOptimWarning, - useInnerCache = useInnerCache, - nQuad = nQuad_, - gridType = gridType, - optimControl = innerOptimControl, - replace_optimControl = replace_innerOptimControl) - } - # TO-DO: create useInnerCache_ and allow control arg. - if(useInnerCache != -1) useInnerCache_ <<- useInnerCache != 0 - if(computeMethod != -1) computeMethod_ <<- computeMethod - if(replace_outerOptimControl) { - outerOptimControl$fnscale <- -1 - outerOptimControl_ <<- outerOptimControl - } - if(outerOptimMethod != "NULL") - outerOptimMethod_ <<- outerOptimMethod - }, - ## setMethod = function(method = integer()) { - ## if(nre == 0) print("AGHQuad or Laplace approximation is not needed for the given model: no random effects") - ## if(!any(c(1, 2, 3) == method)) stop("Choose a valid method ID from 1, 2, and 3") - ## methodID <<- method - ## }, - ## getMethod = function() { - ## return(methodID) - ## returnType(integer()) - ## }, - ## Let the user experiment with different quadrature grids: - ## setQuadSize = function(nQUpdate = integer()){ - ## nQuad0 <- nQuad_ - ## if(nQUpdate < 1) stop("Choose a positive number of grid points.") - ## if(nQUpdate > 35) stop("Currently only a maximum of 35 quadrature points are allowed.") - ## nQuad_ <<- nQUpdate - ## for(i in seq_along(AGHQuad_nfl)) { - ## if( lenInternalRENodeSets[i]^nQuad_ > 50000 ){ - ## nQuad_ <<- nQuad0 - ## stop("You have exceeded the maximum quadrature grid of 50,000 points.") - ## } - ## AGHQuad_nfl[[i]]$set_nQuad(nQuad_) - ## } - ## }, - ## setAGHQTransformation = function(method = character()){ - ## if(method != "spectral" & method != "cholesky") stop("Must choose either cholesky or spectral.") - ## for(i in seq_along(AGHQuad_nfl)) AGHQuad_nfl[[i]]$set_transformation(transformation = method) - ## }, - ## setInnerOptimInits = function(method = character(0), values = double(1)){ - ## full_values <- length(values) > 1 - ## if(full_values) - ## if(length(values) != nre) - ## stop("values may be empty, or have length = 1 or to the total number of scalar random effects.") - ## if(length(values) == 0) values <- c(0) - ## if(!full_values) these_values <- values - ## iStart <- 1 - ## for(i in seq_along(AGHQuad_nfl)) { - ## if(full_values) { - ## these_values <- values[ iStart:(iStart + lenInternalRENodeSets[i] - 1) ] - ## iStart <- iStart + lenInternalRENodeSets[i] - ## } - ## AGHQuad_nfl[[i]]$set_reInitMethod(method, these_values) - ## } - ## }, - one_time_fixes = function() { - if(one_time_fixes_done) return() - if(pTransform_length == 1){ - if(length(pTransform_indices) == 2){ - pTransform_indices <<- numeric(length = 1, value = 1) - } - } - if(npar == 1){ - if(length(p_indices) == 2){ - p_indices <<- numeric(length = 1, value = 1) - } - } - one_time_fixes_done <<- TRUE - }, - ## Check to see if the inner optimizations converged. - checkInnerConvergence = function(message = logical(0, default = FALSE)){ - converged <- 0 - for(i in seq_along(AGHQuad_nfl)){ - conCheck <- AGHQuad_nfl[[i]]$check_convergence() - if(conCheck != 0) { - converged <- 1 - if(message) print(" [Warning] Inner optimization did not converge for conditionally independent set ", i, " with code ", conCheck, ".") - } - } - returnType(double()) - return(converged) - }, - ## Other log-likelihood (parts not involving random effects, i.e. simply - ## additional calculations in the model) in terms of original parameters - otherLogLik = function(p = double(1)) { - if(num_calcNodesOther == 0) stop("`calcNodesOther` is empty: there is no exact likelihood component for the model") - values(model, paramNodes) <<- p - ans <- model$calculate(calcNodesOther) - return(ans) - returnType(double()) - }, - ## Gradient of the exact log-likelihood w.r.t parameters - gr_otherLogLik_internal = function(p = double(1)) { - if(num_calcNodesOther == 0) stop("`calcNodesOther` is empty: there is no exact likelihood component for the model") - if(!one_time_fixes_done) one_time_fixes() - ans <- derivs(otherLogLik(p), wrt = p_indices, order = 1, model = model, - updateNodes = otherLogLik_updateNodes, constantNodes = otherLogLik_constantNodes) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Double taping for efficiency - gr_otherLogLik = function(p = double(1)) { - if(num_calcNodesOther == 0) stop("`calcNodesOther` is empty: there is no exact likelihood component for the model") - if(!one_time_fixes_done) one_time_fixes() - ans <- derivs(gr_otherLogLik_internal(p), wrt = p_indices, order = 0, model = model, - updateNodes = otherLogLik_updateNodes, constantNodes = otherLogLik_constantNodes) - return(ans$value) - returnType(double(1)) - }, - ## AGHQuad approximation in terms of original parameters - calcLogLik = function(p = double(1), trans = logical(0, default = FALSE)) { - if(!one_time_fixes_done) one_time_fixes() - checkInterrupt() - if(trans) { - if(length(p) != pTransform_length) { - ## We cannot have variables in a nimStop. - print("For `calcLogLik` (or `calcLaplace`) with `trans = TRUE`, `p` should be length ", pTransform_length, " but was provided with length ", length(p), ".") - stop("incorrect length for `p`") - } - p <- paramsTransform$inverseTransform(p) - } - if(length(p) != npar) { - print("For `calcLogLik` (or `calcLaplace`), `p` should be length ", npar, " but is length ", length(p), ".") - stop("incorrect length for `p`") - } - if(num_calcNodesOther > 0) ans <- otherLogLik(p) - else ans <- 0 - if(nre > 0){ - for(i in seq_along(AGHQuad_nfl)){ - if(computeMethod_ == 1) ans <- ans + AGHQuad_nfl[[i]]$calcLogLik1(p) - else if(computeMethod_ == 2) ans <- ans + AGHQuad_nfl[[i]]$calcLogLik2(p) - else ans <- ans + AGHQuad_nfl[[i]]$calcLogLik3(p) - } - } - if(is.nan(ans) | is.na(ans)) ans <- -Inf - return(ans) - returnType(double()) - }, - calcLaplace = function(p = double(1), trans = logical(0, default = FALSE)) { - if(nQuad_ > 1) { - stop("`nQuad` must be equal to 1 to use `calcLaplace`. Either call `calcLogLik` or use `updateSettings()` to change `nQuad`") - } - ans <- calcLogLik(p, trans) - return(ans) - returnType(double()) - }, - ## Gradient of the AGHQuad approximation w.r.t. parameters - gr_logLik = function(p = double(1), trans = logical(0, default=FALSE)) { - if(!one_time_fixes_done) one_time_fixes() - if(trans) { - if(length(p) != pTransform_length) { - print("for `gr_logLik` (or `gr_Laplace`) with `trans = TRUE`, `p` should be length ", pTransform_length, " but was provided with length ", length(p), ".") - stop("incorrect length for `p`") - } - pDerivs <- derivs_pInverseTransform(p, c(0, 1)) - p <- pDerivs$value - } - if(length(p) != npar) { - print("for `gr_logLik` (or `gr_Laplace`), `p` should be length ", npar, " but is length ", length(p), ".") - stop("incorrect length for `p`") - } - if(num_calcNodesOther > 0) ans <- gr_otherLogLik(p) else ans <- numeric(length = npar) - if(nre > 0){ - for(i in seq_along(AGHQuad_nfl)) { - if(computeMethod_ == 1) ans <- ans + AGHQuad_nfl[[i]]$gr_logLik1(p) - else if(computeMethod_ == 2) ans <- ans + AGHQuad_nfl[[i]]$gr_logLik2(p) - else ans <- ans + AGHQuad_nfl[[i]]$gr_logLik3(p) - } - } - if(trans) { - ans <- (ans %*% pDerivs$jacobian)[1,] - } - return(ans) - returnType(double(1)) - }, - gr_Laplace = function(p = double(1), trans = logical(0, default=FALSE)) { - if(nQuad_ > 1) - stop("`nQuad` must be equal to 1 to use `calcLaplace`. Either call `calcLogLik` or use `updateSettings()` to change `nQuad`") - ans <- gr_logLik(p, trans) - return(ans) - returnType(double(1)) - }, - ## AGHQuad approximation in terms of transformed parameters - calcLogLik_pTransformed = function(pTransform = double(1)) { - ans <- calcLogLik(pTransform, trans = TRUE) - ## if(!one_time_fixes_done) one_time_fixes() - ## p <- paramsTransform$inverseTransform(pTransform) - ## ans <- calcLogLik(p) - ## if(is.nan(ans) | is.na(ans)) ans <- -Inf - cache_outer_logLik(ans) ## Save outer in the inner to cache values at outer mode. - return(ans) - returnType(double()) - }, - ## Inverse transform parameters to original scale - pInverseTransform = function(pTransform = double(1)) { - p <- paramsTransform$inverseTransform(pTransform) - return(p) - returnType(double(1)) - }, - ## Jacobian of the inverse transformation for parameters - derivs_pInverseTransform = function(pTransform = double(1), order = double(1)) { - if(!one_time_fixes_done) one_time_fixes() - ans <- derivs(pInverseTransform(pTransform), wrt = pTransform_indices, order = order) - return(ans) - returnType(ADNimbleList()) - }, - ## Inverse transform random effects to original scale - reInverseTransform = function(reTrans = double(1)) { - if(nre == 0) stop("no random effects in the model") - re <- reTransform$inverseTransform(reTrans) - return(re) - returnType(double(1)) - }, - ## Jacobian of the inverse transformation - derivs_reInverseTransform = function(reTrans = double(1), order = double(1)) { - if(!one_time_fixes_done) one_time_fixes() - if(nre == 0) stop("no random effects in the model") - ans <- derivs(reInverseTransform(reTrans), wrt = reTransform_indices, order = order) - return(ans) - returnType(ADNimbleList()) - }, - ## Gradient of the AGHQuad approximation in terms of transformed parameters - gr_logLik_pTransformed = function(pTransform = double(1)) { - ans <- gr_logLik(pTransform, trans = TRUE) - ## if(!one_time_fixes_done) one_time_fixes() - ## pDerivs <- derivs_pInverseTransform(pTransform, c(0, 1)) - ## gr <- gr_logLik(pDerivs$value) ## pDerivs$value gives original param values - ## ans <- (gr %*% pDerivs$jacobian)[1,] - return(ans) - returnType(double(1)) - }, - ## Prior contribution to the posterior - calcPrior_p = function(p = double(1)){ - ## Prior log likelihood: - values(model, paramNodes) <<- p - ans <- model$calculate(paramNodes) - return(ans) - returnType(double()) - }, - ## Prior contribution to the posterior on the transformed scale. - calcPrior_pTransformed = function(pTransform = double(1)) { - p <- paramsTransform$inverseTransform(pTransform) - ans <- calcPrior_p(p) + logDetJacobian(pTransform) - return(ans) - returnType(double()) - }, - ## Calculate posterior density at p log likelihood + log prior. - calcPostLogDens = function(p = double(1), trans = logical(0, default = FALSE)) { - ans <- 0 - if(trans) { - pstar <- paramsTransform$inverseTransform(p) ## Just want to do this once. - ans <- ans + logDetJacobian(p) ## p is transformed, add Jacobian here. - }else{ - pstar <- p - } - ## Error checking when calling calcLogLik. - ans <- ans + calcLogLik(pstar, FALSE) + calcPrior_p(pstar) - returnType(double()) - return(ans) - }, - ## Calculate posterior density at p transformed, log likelihood + log prior (transformed). - calcPostLogDens_pTransformed = function(pTransform = double(1)) { - ans <- calcPostLogDens(pTransform, TRUE) - cache_outer_logLik(ans) ## Update internal cache w/ prior. - - if(is.nan(ans) | is.na(ans)) ans <- -Inf - returnType(double()) - return(ans) - }, - ## Gradient of log det jacobian for parameter transformations. - gr_logDetJacobian = function(pTransform = double(1)){ - ans <- derivs(logDetJacobian(pTransform), wrt = pTransform_indices, order = 1) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Gradient of prior distribution. - gr_prior = function(p = double(1)){ - ans <- derivs(calcPrior_p(p), wrt = p_indices, order = 1) - return(ans$jacobian[1,]) - returnType(double(1)) - }, - ## Gradient of posterior density on the transformed scale. - gr_postLogDens_pTransformed = function(pTransform = double(1)){ - pDerivs <- derivs_pInverseTransform(pTransform, c(0, 1)) - grLogDetJacobian <- gr_logDetJacobian(pTransform) - grLogLikTrans <- gr_logLik(pTransform, TRUE) - - p <- pDerivs$value - grPrior <- gr_prior(p) - grPriorTrans <- (grPrior %*% pDerivs$jacobian)[1,] - - ans <- grLogLikTrans + grPriorTrans + grLogDetJacobian - return(ans) - returnType(double(1)) - }, - ## For internal purposes of building the gradient - logDetJacobian = function(pTransform = double(1)){ - ans <- paramsTransform$logDetJacobian(pTransform) - return(ans) - returnType(double()) - }, - ## Calculate MLE of parameters - findMLE = function(pStart = double(1, default = Inf), - hessian = logical(0, default = TRUE) ){ - mleRes <- optimize(pStart = pStart, - hessian = hessian, - parscale = "real") - return(mleRes) - returnType(optimResultNimbleList()) - }, - ## General Maximization Function - optimize = function(pStart = double(1, default = Inf), - hessian = logical(0, default = TRUE), - parscale = character(0, default = "transformed")) { - if(!one_time_fixes_done) one_time_fixes() ## Otherwise summary will look bad. - if(multiSetsCheck & nQuad_ > 1) stop("Currently only Laplace (`nQuad = 1`) is supported for maximization when integrations have more than one dimension at a time. Use `updateSettings(nQuad = 1)` to change.") - if(any(abs(pStart) == Inf)) pStart <- values(model, paramNodes) - if(length(pStart) != npar) { - print(" [Warning] For maximization, `pStart` should be length ", npar, " but is length ", length(pStart), ".") - ans <- optimResultNimbleList$new() - return(ans) - # stop("Wrong length for pStart in findMLE.") - } - ## Reset log likelihood internally for cache. - reset_outer_inner_logLik() - - ## In case parameter nodes are not properly initialized - if(any_na(pStart) | any_nan(pStart) | any(abs(pStart)==Inf)) pStartTransform <- rep(0, pTransform_length) - else pStartTransform <- paramsTransform$transform(pStart) - ## In case bad start values are provided - if(any_na(pStartTransform) | any_nan(pStartTransform) | any(abs(pStartTransform)==Inf)) pStartTransform <- rep(0, pTransform_length) - - optRes <- optim(pStartTransform, calcLogLik_pTransformed, gr_logLik_pTransformed, method = outerOptimMethod_, control = outerOptimControl_, hessian = hessian) - - if(optRes$convergence != 0) - print(" [Warning] `optim` has a non-zero convergence code: ", optRes$convergence, ".\n", - " The control parameters of `optim` can be adjusted in the control argument of\n", - " `buildLaplace` or `buildAGHQ` via `list(outerOptimControl = list())`.") - - ## Print out warning about inner convergence. - if( checkInnerConvergence(FALSE) != 0 ) - print(" [Warning] Inner optimization had a non-zero convergence code.\n", - " Use `checkInnerConvergence(TRUE)` to see details.") - - ## Back transform results to original scale if requested. - p <- paramsTransform$inverseTransform(optRes$par) - if(parscale == "real") optRes$par <- p - setModelValues(p) ## Make sure the model object contains all the updated parameter values. - - ## Returns on transformed scale just like optim. - return(optRes) - returnType(optimResultNimbleList()) - }, - ## User can update whether or not a warning is set for inner optimization. - ## setInnerOptimWarning = function(warn = logical(0, default = FALSE)){ - ## for(i in seq_along(AGHQuad_nfl)){ - ## AGHQuad_nfl[[i]]$set_warning(warn) - ## } - ## }, - ## Grab the inner Cholesky from the cached last values. - cache_outer_logLik = function(logLikVal = double()){ - for(i in seq_along(AGHQuad_nfl)){ - AGHQuad_nfl[[i]]$save_outer_logLik(logLikVal) - } - }, - ## Set cached log lik values to -Inf internally. - reset_outer_inner_logLik = function(){ - for(i in seq_along(AGHQuad_nfl)){ - AGHQuad_nfl[[i]]$reset_outer_logLik() - } - }, - ## Grab the inner Cholesky from the cached last values. - get_inner_cholesky = function(atOuterMode = integer(0, default = 0)){ - if(nre == 0) stop("no random effects in the model") - cholesky <- matrix(value = 0, nrow = nre, ncol = nre) - tot <- 0 - for(i in seq_along(AGHQuad_nfl)){ - numre <- lenInternalRENodeSets[i] - cholesky[(tot+1):(tot+numre), (tot+1):(tot+numre)] <- AGHQuad_nfl[[i]]$get_inner_negHessian_chol(atOuterMode) - tot <- tot + numre - } - return(cholesky) - returnType(double(2)) - }, - ## Grab the inner mode from the cached last values. - get_inner_mode = function(atOuterMode = integer(0, default = 0)){ - if(nre == 0) stop("no random effects in the model") - raneff <- numeric(nre) - tot <- 0 - for(i in seq_along(AGHQuad_nfl)){ - numre <- lenInternalRENodeSets[i] - raneff[(tot+1):(tot+numre)] <- AGHQuad_nfl[[i]]$get_inner_mode(atOuterMode) - tot <- tot + numre - } - return(raneff) - returnType(double(1)) - }, - ## Optimized random effects given transformed parameter values - optimRandomEffects = function(pTransform = double(1)){ - if(nre == 0) stop("no random effects in the model") - p <- pInverseTransform(pTransform) - raneff <- numeric(nre) - tmp <- numeric(nre) ## Not sure this is needed. - tot <- 0 - - computeMethod <- -1 - if(useInnerCache_){ - pMLE <- AGHQuad_nfl[[1]]$get_param_value(atOuterMode = 1) - pLast <- AGHQuad_nfl[[1]]$get_param_value(atOuterMode = 0) - ## Cache check for either last value or MLE - if(all(p == pMLE)) computeMethod <- 1 - else if(all(p == pLast)) computeMethod <- 0 - } - - for(i in seq_along(AGHQuad_nfl)){ - if(computeMethod == -1 ){ - if(computeMethod_ == 1) tmp <- AGHQuad_nfl[[i]]$update_max_inner_logLik_internal(p) - else tmp <- AGHQuad_nfl[[i]]$update_max_inner_logLik(p) - }else{ - tmp <- AGHQuad_nfl[[i]]$get_inner_mode(atOuterMode = computeMethod) - } - numre <- dim(tmp)[1] - raneff[(tot+1):(tot+numre)] <- tmp - tot <- tot + numre - } - return(raneff) - returnType(double(1)) - }, - ## Inverse of the negative Hessian of log-likelihood wrt transformed random effects - inverse_negHess = function(p = double(1), reTransform = double(1)){ - if(nre == 0) stop("no random effects in the model") - invHess <- matrix(value = 0, nrow = nre, ncol = nre) - tot <- 0 - - outer_mode_case <- -1 - if(useInnerCache_){ - pMLE <- AGHQuad_nfl[[1]]$get_param_value(atOuterMode = 1) - pLast <- AGHQuad_nfl[[1]]$get_param_value(atOuterMode = 0) - ## Cache check for either last value or MLE - if(all(p == pMLE)) outer_mode_case <- 1 - else if(all(p == pLast)) outer_mode_case <- 0 - } - - for(i in seq_along(AGHQuad_nfl)){ - numre <- lenInternalRENodeSets[i] - if(outer_mode_case == -1){ - tmp <- AGHQuad_nfl[[i]]$negHess(p, reTransform[(tot+1):(tot+numre)]) - }else{ - U <- AGHQuad_nfl[[i]]$get_inner_negHessian_chol(atOuterMode = outer_mode_case) - tmp <- t(U) %*% U - } - invHess[(tot+1):(tot+numre), (tot+1):(tot+numre)] <- inverse(tmp) - tot <- tot + numre - } - return(invHess) - returnType(double(2)) - }, - ## Hessian of joint log-likelihood wrt parameters and (transformed) random effects - hess_logLik_wrt_p_wrt_re = function(p = double(1), reTransform = double(1)){ - if(nre == 0) stop("no random effects in the model") - ans <- matrix(value = 0, nrow = npar, ncol = nre) - tot <- 0 - for(i in seq_along(AGHQuad_nfl)){ - numre <- lenInternalRENodeSets[i] - if(computeMethod_ == 1) tmp <- AGHQuad_nfl[[i]]$hess_joint_logLik_wrt_p_wrt_re_internal(p, reTransform[(tot+1):(tot+numre)]) - else tmp <- AGHQuad_nfl[[i]]$hess_joint_logLik_wrt_p_wrt_re(p, reTransform[(tot+1):(tot+numre)]) - ans[1:npar, (tot+1):(tot+numre)] <- tmp - tot <- tot + numre - } - return(ans) - returnType(double(2)) - }, - ## Gives the user control to start fresh by removing internally saved values. - ## setInnerCache = function(useCache = logical(0, default = TRUE)){ - ## innerCache <<- useCache - ## for(i in seq_along(AGHQuad_nfl)) AGHQuad_nfl[[i]]$set_inner_cache(useCache) - ## }, - ## Set all model values after finding the MLE. Function will repeat inner optimization if the inner cached values - ## the inner cached values don't match p. - setModelValues = function(p = double(1)){ - for(i in seq_along(AGHQuad_nfl)) - AGHQuad_nfl[[i]]$set_randomeffect_values(p) - }, - ## Summarise AGHQuad MLE results - summary = function(MLEoutput = optimResultNimbleList(), - originalScale = logical(0, default = TRUE), - randomEffectsStdError = logical(0, default = TRUE), - jointCovariance = logical(0, default = FALSE)){ - if(dim(MLEoutput$hessian)[1] == 0) stop("Hessian matrix was not calculated for Laplace or AGHQ MLE") - ## Output lists - ans <- AGHQuad_summary$new() - pres <- AGHQuad_params$new() - ranres <- AGHQuad_params$new() - ## Parameters - p <- MLEoutput$par - pTransform <- paramsTransform$transform(p) - vcov_pTransform <- -inverse(MLEoutput$hessian) - stdErr_pTransform <- sqrt(diag(vcov_pTransform)) - if(nre == 0) { ## No random effects - ranres$estimate <- numeric(0) - ranres$stdError <- numeric(0) - if(originalScale){ - derivspInvTransform <- derivs_pInverseTransform(pTransform, c(0, 1)) - JacobpInvTransform <- derivspInvTransform$jacobian - stdErr_p <- numeric(npar) - if(jointCovariance) { - vcov <- JacobpInvTransform %*% vcov_pTransform %*% t(JacobpInvTransform) - stdErr_p <- sqrt(diag(vcov)) - ans$vcov <- vcov - } - else{ - for(i in 1:npar){ - var_p_i <- (JacobpInvTransform[i,,drop=FALSE] %*% vcov_pTransform %*% t(JacobpInvTransform[i,,drop=FALSE]))[1,1] - stdErr_p[i] <- sqrt(var_p_i) - } - ans$vcov <- matrix(nrow = 0, ncol = 0) - } - pres$estimate <- p - pres$stdError <- stdErr_p - } - else { - pres$estimate <- pTransform - pres$stdError <- stdErr_pTransform - if(jointCovariance) ans$vcov <- vcov_pTransform - else ans$vcov <- matrix(0, nrow = 0, ncol = 0) - } - } - else{ - ## Random effects - optreTransform <- optimRandomEffects(pTransform) ## *** Replace this with cached inner modes. - optre <- reInverseTransform(optreTransform) - ntot <- npar + nre - if(jointCovariance) { - ## Inverse of the negative Hessian of log-likelihood wrt transformed random effects at MLEs - inv_negHess <- inverse_negHess(p, optreTransform) ## *** Replace this with cached inner modes. - jointInvNegHessZero <- matrix(0, nrow = ntot, ncol = ntot) - #jointInvNegHessZero[1:nre, 1:nre] <- inv_negHess - jointInvNegHessZero[(npar+1):ntot, (npar+1):ntot] <- inv_negHess - ## Hessian of log-likelihood wrt to params and transformed random effects - hessLoglikwrtpre <- hess_logLik_wrt_p_wrt_re(p, optreTransform) - ## Derivative of inverse transformation for params - derivspInvTransform <- derivs_pInverseTransform(pTransform, c(0, 1)) - JacobpInvTransform <- derivspInvTransform$jacobian - ## Jacobian of optimized random effects wrt transformed parameters - JacobOptreWrtParams <- inv_negHess %*% t(hessLoglikwrtpre) %*% JacobpInvTransform - jointJacob <- matrix(init = FALSE, nrow = ntot, ncol = npar) - #jointJacob[1:nre, 1:npar] <- JacobOptreWrtParams - jointJacob[(npar+1):ntot, 1:npar] <- JacobOptreWrtParams - #jointJacob[(nre+1):ntot, 1:npar] <- diag(npar) - jointJacob[1:npar, 1:npar] <- diag(npar) - ## Joint covariance matrix on transformed scale - vcov_Transform <- jointInvNegHessZero + jointJacob %*% vcov_pTransform %*% t(jointJacob) - if(originalScale){ - derivs_reInvTransform <- derivs_reInverseTransform(optreTransform, c(0, 1)) - Jacob_reInvTransform <- derivs_reInvTransform$jacobian - Jacob_JointInvTransform <- matrix(0, nrow = ntot, ncol = ntot) - #Jacob_JointInvTransform[1:nre, 1:nre] <- Jacob_reInvTransform - Jacob_JointInvTransform[(npar+1):ntot, (npar+1):ntot] <- Jacob_reInvTransform - #Jacob_JointInvTransform[(nre+1):ntot, (nre+1):ntot] <- JacobpInvTransform - Jacob_JointInvTransform[1:npar, 1:npar] <- JacobpInvTransform - vcov <- Jacob_JointInvTransform %*% vcov_Transform %*% t(Jacob_JointInvTransform) - stdErr_p_re <- sqrt(diag(vcov)) - stdErr_p <- stdErr_p_re[1:npar] - if(randomEffectsStdError){ - ranres$stdError <- stdErr_p_re[(npar+1):ntot] - } - else{ - ranres$stdError <- numeric(0) - } - ans$vcov <- vcov - pres$estimate <- p - pres$stdError <- stdErr_p - ranres$estimate <- optre - }## End of if(originalScale) - else { ## On transformed scale - if(randomEffectsStdError){ - stdErr_reTransform <- sqrt(diag(vcov_Transform)[(npar+1):ntot]) - ranres$stdError <- stdErr_reTransform - } - else{ - ranres$stdError <- numeric(0) - } - ans$vcov <- vcov_Transform - pres$estimate <- pTransform - pres$stdError <- sqrt(diag(vcov_Transform)[1:npar]) - ranres$estimate <- optreTransform - } - }## End of if(jointCovariance) - else { ## Do not return joint covariance matrix - if(originalScale){## On original scale - pres$estimate <- p - ranres$estimate <- optre - if(randomEffectsStdError){ - ## Joint covariance matrix on transform scale - inv_negHess <- inverse_negHess(p, optreTransform) - # jointInvNegHessZero <- matrix(0, nrow = ntot, ncol = ntot) - # jointInvNegHessZero[1:nre, 1:nre] <- inv_negHess - ## Hessian of log-likelihood wrt to params and transformed random effects - hessLoglikwrtpre <- hess_logLik_wrt_p_wrt_re(p, optreTransform) - ## Derivative of inverse transformation for params - derivspInvTransform <- derivs_pInverseTransform(pTransform, c(0, 1)) - JacobpInvTransform <- derivspInvTransform$jacobian - ## Covariance matrix for params on the original scale - vcov_p <- JacobpInvTransform %*% vcov_pTransform %*% t(JacobpInvTransform) - ## Jacobian of optimized random effects wrt transformed parameters - JacobOptreWrtParams <- inv_negHess %*% t(hessLoglikwrtpre) %*% JacobpInvTransform - # jointJacob <- matrix(NA, nrow = ntot, ncol = npar) - # jointJacob[1:nre, 1:npar] <- JacobOptreWrtParams - # jointJacob[(nre+1):ntot, 1:npar] <- diag(npar) - ## Join covariance matrix on transformed scale - # vcov_Transform <- jointInvNegHessZero + jointJacob %*% vcov_pTransform %*% t(jointJacob) - ## Covariance matrix for random effects (transformed) - vcov_reTransform <- inv_negHess + JacobOptreWrtParams %*% vcov_pTransform %*% t(JacobOptreWrtParams) - ## Derivatives information - derivs_reInvTransform <- derivs_reInverseTransform(optreTransform, c(0, 1)) - Jacob_reInvTransform <- derivs_reInvTransform$jacobian - # Jacob_JointInvTransform <- matrix(0, nrow = ntot, ncol = ntot) - # Jacob_JointInvTransform[1:nre, 1:nre] <- Jacob_reInvTransform - # Jacob_JointInvTransform[(nre+1):ntot, (nre+1):ntot] <- JacobpInvTransform - stdErr_re <- numeric(nre) - for(i in 1:nre){ - var_i <- (Jacob_reInvTransform[i,,drop=FALSE] %*% vcov_reTransform %*% t(Jacob_reInvTransform[i,,drop=FALSE]))[1,1] - stdErr_re[i] <- sqrt(var_i) - } - stdErr_p <- sqrt(diag(vcov_p)) - pres$stdError <- stdErr_p - ranres$stdError <- stdErr_re - ans$vcov <- vcov_p - }## End of if(randomEffectsStdError) - else { ## Do not calculate standard errors of random effects estimates - derivspInvTransform <- derivs_pInverseTransform(pTransform, c(0, 1)) - JacobpInvTransform <- derivspInvTransform$jacobian - ## Covariance matrix for params on the original scale - vcov_p <- JacobpInvTransform %*% vcov_pTransform %*% t(JacobpInvTransform) - # stdErr_p <- numeric(npar) - # for(i in 1:npar){ - # var_p_i <- (JacobpInvTransform[i,,drop=FALSE] %*% vcov_pTransform %*% t(JacobpInvTransform[i,,drop=FALSE]))[1,1] - # stdErr_p[i] <- sqrt(var_p_i) - # } - stdErr_p <- sqrt(diag(vcov_p)) - pres$stdError <- stdErr_p - ranres$stdError <- numeric(0) - ans$vcov <- vcov_p - } - }## End of if(originalScale) - else {## On transformed scale - pres$estimate <- pTransform - pres$stdError <- stdErr_pTransform - ranres$estimate <- optreTransform - ans$vcov <- vcov_pTransform - if(randomEffectsStdError){ - inv_negHess <- inverse_negHess(p, optreTransform) - jointInvNegHessZero <- matrix(0, nrow = ntot, ncol = ntot) - jointInvNegHessZero[1:nre, 1:nre] <- inv_negHess - ## Hessian of log-likelihood wrt to params and transformed random effects - hessLoglikwrtpre <- hess_logLik_wrt_p_wrt_re(p, optreTransform) - ## Derivative of inverse transformation for params - derivspInvTransform <- derivs_pInverseTransform(pTransform, c(0, 1)) - JacobpInvTransform <- derivspInvTransform$jacobian - ## Jacobian of optimized random effects wrt transformed parameters - JacobOptreWrtParams <- inv_negHess %*% t(hessLoglikwrtpre) %*% JacobpInvTransform - stdErr_reTransform <- numeric(nre) - for(i in 1:nre){ - var_reTransform_i <- inv_negHess[i, i] + (JacobOptreWrtParams[i,,drop=FALSE] %*% vcov_pTransform %*% t(JacobOptreWrtParams[i,,drop=FALSE]))[1,1] - stdErr_reTransform[i] <- sqrt(var_reTransform_i) - } - ranres$stdError <- stdErr_reTransform - } - else{ - ranres$stdError <- numeric(0) - } - } - } - } - pres$names <- paramNodesAsScalars_vec - ranres$names <- reNodesAsScalars_vec - ans$params <- pres - ans$randomEffects <- ranres - ans$originalScale <- originalScale - return(ans) - returnType(AGHQuad_summary()) - } - ), - buildDerivs = list(pInverseTransform = list(), - reInverseTransform = list(), - otherLogLik = list(), - gr_otherLogLik_internal = list(), - logDetJacobian = list(), - calcPrior_p = list() - ) -) - -#' Summarize results from Laplace or adaptive Gauss-Hermite quadrature approximation -#' -#' Process the results of the `findMLE` method of a nimble Laplace or AGHQ approximation -#' into a more useful format. -#' -#' @param laplace The Laplace approximation object, typically the compiled one. -#' This would be the result of compiling an object returned from -#' `buildLaplace`. -#' -#' @param AGHQ Same as \code{laplace}. Note that `buildLaplace` and -#' `buildAGHQ` create the same kind of algorithm object that can be used -#' interchangeably. `buildLaplace` simply sets the number of quadrature points -#' (`nQuad`) to 1 to achieve Laplace approximation as a special case of AGHQ. -#' -#' @param MLEoutput The maximum likelihood estimate using Laplace or AGHQ, -#' returned from e.g. `approx$findMLE(...)`, where \code{approx} is the -#' algorithm object returned by `buildLaplace` or `buildAGHQ`, or (more -#' typically) the result of compiling that object with `compileNimble`. See -#' `help(buildLaplace)` for more information. -#' -#' @param originalScale Should results be returned using the original -#' parameterization in the model code (TRUE) or the potentially transformed -#' parameterization used internally by the Laplace approximation (FALSE). -#' Transformations are used for any parameters and/or random effects that have -#' constrained ranges of valid values, so that in the transformed parameter -#' space there are no constraints. (default = TRUE) -#' -#' @param randomEffectsStdError If TRUE, calculate the standard error of the -#' estimates of random effects values. (default = TRUE) -#' -#' @param jointCovariance If TRUE, calculate the joint covariance matrix of -#' the parameters and random effects together. If FALSE, calculate the -#' covariance matrix of the parameters. (default = FALSE) -#' -#' @details -#' -#' The numbers obtained by this function can be obtained more directly by -#' `approx$summary(...)`. The added benefit of `summaryLaplace` is to arrange -#' the results into data frames (for parameters and random effects), with row -#' names for the model nodes, and also adding row and column names to the -#' covariance matrix. -#' -#' @return -#' -#' A list with data frames `params` and `randomEffects`, each with columns for -#' `estimate` and (possibly) `se` (standard error) and row names for model -#' nodes, a matrix `vcov` with the covariance matrix with row and column names, -#' and `originalScale` with the input value of `originalScale` so it is recorded -#' for later use if wanted. -#' -#' @aliases summaryAGHQ -#' -#' @name summaryLaplace -#' -#' @export -summaryLaplace <- function(laplace, MLEoutput, - originalScale = TRUE, - randomEffectsStdError = TRUE, - jointCovariance = FALSE) { - summary <- laplace$summary(MLEoutput, originalScale = originalScale, - randomEffectsStdError = randomEffectsStdError, - jointCovariance = jointCovariance) - paramNames <- summary$params$names - paramEsts <- summary$params$estimate - if(length(paramEsts) < length(paramNames)) paramNames <- paramNames[1:(length(paramNames)-1)] - names(paramEsts) <- paramNames - stdErrParams <- summary$params$stdError - paramsDF <- data.frame(estimate = paramEsts, stdError = stdErrParams, row.names = paramNames) - - REnames <- summary$randomEffects$names - REests <- summary$randomEffects$estimate - if(length(REests) < length(REnames)) REnames <- REnames[1:(length(REnames)-1)] - REstdErrs <- summary$randomEffects$stdError - if(length(REstdErrs)) - REDF <- data.frame(estimate = REests, stdError = REstdErrs, row.names = REnames) - else - REDF <- data.frame(estimate = REests, row.names = REnames) - - vcov <- summary$vcov - if (dim(vcov)[1] == length(paramNames)) { - colnames(vcov) <- rownames(vcov) <- c(paramNames) - } else { - colnames(vcov) <- rownames(vcov) <- c(paramNames, REnames) - } - list(params = paramsDF, - randomEffects = REDF, - vcov = vcov, - logLik = MLEoutput$value, - df = length(paramEsts), - originalScale = originalScale) -} - -#' @rdname summaryLaplace -#' @export -summaryAGHQ <- function(AGHQ, MLEoutput, - originalScale =TRUE, - randomEffectsStdError = TRUE, - jointCovariance = FALSE) { - summaryLaplace(AGHQ, MLEoutput, originalScale, randomEffectsStdError, jointCovariance) -} - -#' Combine steps of running Laplace or adaptive Gauss-Hermite quadrature approximation -#' -#' Use an approximation (compiled or uncompiled) returned from -#' `buildLaplace` or `buildAGHQ` to find the maximum likelihood estimate and return it -#' with random effects estimates and/or standard errors. -#' -#' @aliases runAGHQ runLaplace -#' -#' @param laplace A (compiled or uncompiled) nimble laplace approximation object -#' returned from `buildLaplace` or `buildAGHQ`. These return the same type of -#' approximation algorithm object. `buildLaplace` is simply `buildAGHQ` -#' with `nQuad=1`. -#' -#' @param AGHQ Same as \code{laplace}. -#' -#' @param pStart Initial values for parameters to begin optimization search for -#' the maximum likelihood estimates. If omitted, the values currently in the -#' (compiled or uncompiled) model object will be used. -#' -#' @param originalScale If \code{TRUE}, return all results on the original scale -#' of the parameters and/or random effects as written in the model. Otherwise, -#' return all results on potentially unconstrained transformed scales that are -#' used in the actual computations. Transformed scales (parameterizations) are -#' used if any parameter or random effect has contraint(s) on its support -#' (range of allowed values). Default = \code{TRUE}. -#' -#' @param randomEffectsStdError If \code{TRUE}, include standard errors for the -#' random effects estimates. Default = \code{TRUE}. -#' -#' @param jointCovariance If \code{TRUE}, return the full joint covariance -#' matrix (inverse of the Hessian) of parameters and random effects. Default = -#' \code{FALSE}. -#' -#' @details -#' -#' Adaptive Gauss-Hermite quadrature is a generalization of Laplace -#' approximation. \code{runLaplace} simply calles \code{runAGHQ} and provides a -#' convenient name. -#' -#' These functions manage the steps of calling the `findMLE` method to obtain -#' the maximum likelihood estimate of the parameters and then the -#' `summaryLaplace` function to obtain standard errors, (optionally) random -#' effects estimates (conditional modes), their standard errors, and the full -#' parameter-random effects covariance matrix. -#' -#' Note that for `nQuad > 1` (see \code{\link{buildAGHQ}}), i.e., AGHQ with -#' higher order than Laplace approximation, maximum likelihood estimation is -#' available only if all random effects integrations are univariate. With -#' multivariate random effects integrations, one can use `nQuad > 1` only to -#' calculate marginal log likelihoods at given parameter values. This is useful -#' for checking the accuracy of the log likelihood at the MLE obtained for -#' Laplace approximation (`nQuad == 1`). `nQuad` can be changed using the -#' `updateSettings` method of the approximation object. -#' -#' See \code{\link{summaryLaplace}}, which is called for the summary components. -#' -#' @return -#' -#' A list with elements \code{MLE} and \code{summary}. -#' -#' \code{MLE} is the result of the \code{findMLE} method, which contains the -#' parameter estimates and Hessian matrix. This is considered raw output, and -#' one should normally use instead the contents of \code{summary}. (For example -#' not that the Hessian matrix in \code{MLE} may not correspond to the same -#' scale as the parameter estimates if a transformation was used to operate in -#' an unconstrained parameter space.) -#' -#' \code{summary} is the result of \code{summaryLaplace} (or equivalently -#' \code{summaryAGHQ}), which contains parameter estimates and standard errors, -#' and optionally other requested components. All results in this object will be -#' on the same scale (parameterization), either original or transformed, as -#' requested. -#' -#' @export -runLaplace <- function(laplace, pStart, - originalScale = TRUE, - randomEffectsStdError = TRUE, - jointCovariance = FALSE) { - if(missing(pStart)) pStart <- Inf # code to use values in model - runAGHQ(AGHQ = laplace, pStart, originalScale, randomEffectsStdError, - jointCovariance) -} - -#' @rdname runLaplace -#' @export -runAGHQ <- function(AGHQ, pStart, - originalScale = TRUE, - randomEffectsStdError = TRUE, - jointCovariance = FALSE) { - if(missing(AGHQ)) stop('runAGHQ: must provide a NIMBLE Laplace or AGHQ algorithm') - if(!identical(nfGetDefVar(AGHQ, 'name'), 'AGHQ')) - stop('runAGHQ: AGHQ or Laplace argument must be a NIMBLE Laplace or AGHQ algorithm (compiled or uncompiled) from `buildLaplace` or `buildAGHQ`.') - if(!is.Cnf(AGHQ)) { - messageIfVerbose(' [Warning] Running an uncompiled Laplace or AGHQ algorithm.\n', - ' Use `compileNimble()` for faster execution.') - tmp <- AGHQ$gr_logLik_pTransformed - tmp <- AGHQ$calcLogLik_pTransformed - for(i in seq_along(AGHQ$AGHQuad_nfl)) { - tmp <- AGHQ$AGHQuad_nfl[[i]]$gr_inner_logLik - tmp <- AGHQ$AGHQuad_nfl[[i]]$he_inner_logLik - } - } - - if(missing(pStart)) pStart <- Inf # code to use values in the model - - opt <- try(AGHQ$findMLE(pStart = pStart, hessian = TRUE)) - if(inherits(opt, "try-error")) - stop("method findMLE had an error.") - - summary <- try(summaryLaplace(laplace=AGHQ, MLEoutput=opt, - originalScale=originalScale, - randomEffectsStdError=randomEffectsStdError, - jointCovariance=jointCovariance)) - if(inherits(summary, "try-error")) { - messageIfVerbose(" [Warning] `summaryLaplace` had an error. Only the MLE result will be returned.") - summary <- NULL - } - list(MLE = opt, summary=summary) -} - -#' Laplace approximation and adaptive Gauss-Hermite quadrature -#' -#' Build a Laplace or AGHQ approximation algorithm for a given NIMBLE model. -#' -#' @param model a NIMBLE model object, such as returned by \code{nimbleModel}. -#' The model must have automatic derivatives (AD) turned on, e.g. by using -#' \code{buildDerivs=TRUE} in \code{nimbleModel}. -#' @param nQuad number of quadrature points for AGHQ (in one dimension). Laplace approximation is -#' AGHQ with `nQuad=1`. Only odd numbers of nodes really -#' make sense. Often only one or a few nodes can achieve high accuracy. A maximum of -#' 35 nodes is supported. Note that for multivariate quadratures, the number -#' of nodes will be (number of dimensions)^nQuad. -#' @param paramNodes a character vector of names of parameter nodes in the -#' model; defaults are provided by \code{\link{setupMargNodes}}. -#' Alternatively, \code{paramNodes} can be a list in the format returned by -#' \code{setupMargNodes}, in which case \code{randomEffectsNodes}, -#' \code{calcNodes}, and \code{calcNodesOther} are not needed (and will be -#' ignored). -#' @param randomEffectsNodes a character vector of names of continuous -#' unobserved (latent) nodes to marginalize (integrate) over using Laplace/AGHQ -#' approximation; defaults are provided by \code{\link{setupMargNodes}}. -#' @param calcNodes a character vector of names of nodes for calculating the -#' integrand for Laplace/AGHQ approximation; defaults are provided by -#' \code{\link{setupMargNodes}}. There may be deterministic nodes between -#' \code{paramNodes} and \code{calcNodes}. These will be included in -#' calculations automatically and thus do not need to be included in -#' \code{calcNodes} (but there is no problem if they are). -#' @param calcNodesOther a character vector of names of nodes for calculating -#' terms in the log-likelihood that do not depend on any -#' \code{randomEffectsNodes}, and thus are not part of the marginalization, -#' but should be included for purposes of finding the MLE. This defaults to -#' stochastic nodes that depend on \code{paramNodes} but are not part of and -#' do not depend on \code{randomEffectsNodes}. There may be deterministic -#' nodes between \code{paramNodes} and \code{calcNodesOther}. These will be -#' included in calculations automatically and thus do not need to be included -#' in \code{calcNodesOther} (but there is no problem if they are). -#' @param control a named list for providing additional settings used in Laplace/AGHQ -#' approximation. See \code{control} section below. Most of these can be -#' updated later with the `updateSettings` method. -#' -#' @section \code{buildLaplace} and \code{buildAGHQ}: -#' -#' \code{buildLaplace} creates an object that can run Laplace approximation -#' for a given model or part of a model. \code{buildAGHQ} creates an object -#' that can run adaptive Gauss-Hermite quadrature (AGHQ, sometimes called -#' "adaptive Gaussian quadrature") for a given model or part of a model. -#' Laplace approximation is AGHQ with one quadrature point, hence -#' `buildLaplace` simply calls `buildAGHQ` with `nQuad=1`. These methods -#' approximate the integration over continuous random effects in a -#' hierarchical model to calculate the (marginal) likelihood. -#' -#' \code{buildAGHQ} and \code{buildLaplace} will by default (unless changed -#' manually via `control$split`) determine from the model which random effects -#' can be integrated over (marginalized) independently. For example, in a GLMM -#' with a grouping factor and an independent random effect intercept for each -#' group, the random effects can be marginalized as a set of univariate -#' approximations rather than one multivariate approximation. On the other hand, -#' correlated or nested random effects would require multivariate marginalization. -#' -#' Maximum likelihood estimation is available for Laplace approximation -#' (`nQuad=1`) with univariate or multivariate integrations. With `nQuad > 1`, -#' maximum likelihood estimation is available only if all integrations are -#' univariate (e.g., a set of univariate random effects). If there are -#' multivariate integrations, these can be calculated at chosen input parameters -#' but not maximized over parameters. For example, one can find the MLE based on -#' Laplace approximation and then increase `nQuad` (using the `updateSettings` -#' method below) to check on accuracy of the marginal log likelihood at the MLE. -#' -#' Beware that quadrature will use `nQuad^k` quadrature points, where `k` is the -#' dimension of each integration. Therefore quadrature for `k` greater that 2 or -#' 3 can be slow. As just noted, `buildAGHQ` will determine independent -#' dimensions of quadrature, so it is fine to have a set of univariate random -#' effects, as these will each have k=1. Multivariate quadrature (k>1) is only -#' necessary for nested, correlated, or otherwise dependent random effects. -#' -#' The recommended way to find the maximum likelihood estimate and associated -#' outputs is by calling \code{\link{runLaplace}} or \code{\link{runAGHQ}}. The -#' input should be the compiled Laplace or AGHQ algorithm object. This would be -#' produced by running \code{\link{compileNimble}} with input that is the result -#' of \code{buildLaplace} or \code{buildAGHQ}. -#' -#' For more granular control, see below for methods \code{findMLE} and -#' \code{summary}. See function \code{\link{summaryLaplace}} for an easier way -#' to call the \code{summary} method and obtain results that include node -#' names. These steps are all done within \code{runLaplace} and -#' \code{runAGHQ}. -#' -#' The NIMBLE User Manual at r-nimble.org also contains an example of Laplace -#' approximation. -#' -#' @section How input nodes are processed: -#' -#' \code{buildLaplace} and \code{buildAGHQ} make good tries at deciding what -#' to do with the input model and any (optional) of the node arguments. However, -#' random effects (over which approximate integration will be done) can be -#' written in models in multiple equivalent ways, and customized use cases may -#' call for integrating over chosen parts of a model. Hence, one can take full -#' charge of how different parts of the model will be used. -#' -#' Any of the input node vectors, when provided, will be processed using -#' \code{nodes <- model$expandNodeNames(nodes)}, where \code{nodes} may be -#' \code{paramNodes}, \code{randomEffectsNodes}, and so on. This step allows -#' any of the inputs to include node-name-like syntax that might contain -#' multiple nodes. For example, \code{paramNodes = 'beta[1:10]'} can be -#' provided if there are actually 10 scalar parameters, 'beta[1]' through -#' 'beta[10]'. The actual node names in the model will be determined by the -#' \code{exapndNodeNames} step. -#' -#' In many (but not all) cases, one only needs to provide a NIMBLE model object -#' and then the function will construct reasonable defaults necessary for -#' Laplace approximation to marginalize over all continuous latent states -#' (aka random effects) in a model. The default values for the four groups of -#' nodes are obtained by calling \code{\link{setupMargNodes}}, whose arguments -#' match those here (except for a few arguments which are taken from control -#' list elements here). -#' -#' \code{setupMargNodes} tries to give sensible defaults from -#' any combination of \code{paramNodes}, \code{randomEffectsNodes}, -#' \code{calcNodes}, and \code{calcNodesOther} that are provided. For example, -#' if you provide only \code{randomEffectsNodes} (perhaps you want to -#' marginalize over only some of the random effects in your model), -#' \code{setupMargNodes} will try to determine appropriate choices for the -#' others. -#' -#' \code{setupMargNodes} also determines which integration dimensions are -#' conditionally independent, i.e., which can be done separately from each -#' other. For example, when possible, 10 univariate random effects will be split -#' into 10 univariate integration problems rather than one 10-dimensional -#' integration problem. -#' -#' The defaults make general assumptions such as that -#' \code{randomEffectsNodes} have \code{paramNodes} as parents. However, The -#' steps for determining defaults are not simple, and it is possible that they -#' will be refined in the future. It is also possible that they simply don't -#' give what you want for a particular model. One example where they will not -#' give desired results can occur when random effects have no prior -#' parameters, such as `N(0,1)` nodes that will be multiplied by a scale -#' factor (e.g. sigma) and added to other explanatory terms in a model. Such -#' nodes look like top-level parameters in terms of model structure, so -#' you must provide a \code{randomEffectsNodes} argument to indicate which -#' they are. -#' -#' It can be helpful to call \code{setupMargNodes} directly to see exactly how -#' nodes will be arranged for Laplace approximation. For example, you may want -#' to verify the choice of \code{randomEffectsNodes} or get the order of -#' parameters it has established to use for making sense of the MLE and -#' results from the \code{summary} method. One can also call -#' \code{setupMargNodes}, customize the returned list, and then provide that -#' to \code{buildLaplace} as \code{paramNodes}. In that case, -#' \code{setupMargNodes} will not be called (again) by \code{buildLaplace}. -#' -#' If \code{setupMargNodes} is emitting an unnecessary warning, simply use -#' \code{control=list(check=FALSE)}. -#' -#' @section Managing parameter transformations that may be used internally: -#' -#' If any \code{paramNodes} (parameters) or \code{randomEffectsNodes} (random -#' effects / latent states) have constraints on the range of valid values -#' (because of the distribution they follow), they will be used on a -#' transformed scale determined by \code{parameterTransform}. This means the -#' Laplace approximation itself will be done on the transformed scale for -#' random effects and finding the MLE will be done on the transformed scale -#' for parameters. For parameters, prior distributions are not included in -#' calculations, but they are used to determine valid parameter ranges and -#' hence to set up any transformations. For example, if \code{sigma} is a -#' standard deviation, you can declare it with a prior such as \code{sigma ~ -#' dhalfflat()} to indicate that it must be greater than 0. -#' -#' For default determination of when transformations are needed, all parameters -#' must have a prior distribution simply to indicate the range of valid -#' values. For a param \code{p} that has no constraint, a simple choice is -#' \code{p ~ dflat()}. -#' -#' @section Understanding inner and outer optimizations: -#' -#' Note that there are two numerical optimizations when finding maximum -#' likelihood estimates with a Laplace or (1D) AGHQ algorithm: (1) maximizing -#' the joint log-likelihood of random effects and data given a parameter value -#' to construct the approximation to the marginal log-likelihood at the given -#' parameter value; (2) maximizing the approximation to the marginal -#' log-likelihood over the parameters. In what follows, the prefix 'inner' -#' refers to optimization (1) and 'outer' refers to optimization (2). Currently -#' both optimizations default to using method \code{"nlminb"}. However, one can -#' use other optimizers or simply run optimization (2) manually from R; see the -#' example below. In some problems, choice of inner and/or outer optimizer can -#' make a big difference for obtaining accurate results, especially for standard -#' errors. Hence it is worth experimenting if one is in doubt. -#' -#' @section \code{control} list arguments: -#' -#' The \code{control} list allows additional settings to be made using named -#' elements of the list. Most (or all) of these can be updated later using the -#' `updateSettings` method. Supported elements include: -#' -#' \itemize{ -#' -#' \item \code{split}. If TRUE (default), \code{randomEffectsNodes} will be -#' split into conditionally independent sets if possible. This -#' facilitates more efficient Laplace or AGHQ approximation because each -#' conditionally independent set can be marginalized independently. If -#' FALSE, \code{randomEffectsNodes} will be handled as one multivariate -#' block, with one multivariate approximation. If \code{split} is a -#' numeric vector, \code{randomEffectsNodes} will be split by calling -#' \code{split}(\code{randomEffectsNodes}, \code{control$split}). The -#' last option allows arbitrary control over how -#' \code{randomEffectsNodes} are blocked. -#' -#' \item \code{check}. If TRUE (default), a warning is issued if -#' \code{paramNodes}, \code{randomEffectsNodes} and/or \code{calcNodes} -#' are provided but seem to have missing or unnecessary -#' elements based on some default inspections of the model. If -#' unnecessary warnings are emitted, simply set \code{check=FALSE}. -#' -#' \item \code{innerOptimControl}. A list (either an R list or a -#' `optimControlNimbleList`) of control parameters for the inner -#' optimization of Laplace approximation using \code{nimOptim}. See -#' 'Details' of \code{\link{nimOptim}} for further information. Default -#' is `nimOptimDefaultControl()`. -#' -#' \item \code{innerOptimMethod}. Optimization method to be used in -#' \code{nimOptim} for the inner optimization. See 'Details' of -#' \code{\link{nimOptim}}. Currently \code{nimOptim} in NIMBLE supports: -#' \code{"Nelder-Mead"}", \code{"BFGS"}, \code{"CG"}, \code{"L-BFGS-B"}, -#' \code{"nlminb"}, \code{"bobyqa"}, and user-provided optimizers. By default, method -#' \code{"nlminb"} is used for both univariate and multivariate cases. For -#' \code{"nlminb"}, \code{"bobyqa"}, or user-provided optimizers, only a subset of -#' elements of the \code{innerOptimControlList} are supported. (Note -#' that control over the outer optimization method is available as an -#' argument to `findMLE`). Choice of optimizers can be important and so -#' can be worth exploring. -#' -#' \item \code{innerOptimStart}. Method for determining starting values for -#' the inner optimization. Options are: -#' -#' \itemize{ -#' -#' \item \code{"last.best"} (default): use optimized random effects values corresponding to -#' the best outer optimization (i.e. the largest marginal log likelihood value) so far -#' for each conditionally independent part of the approximation; -#' -#' \item \code{"last"}: use the result of the last inner optimization; -#' -#' \item \code{"zero"}: use all zeros; -#' -#' \item \code{"constant"}: always use the same values, determined by -#' \code{innerOptimStartValues}; -#' -#' \item \code{"random"}: randomly draw new starting values from the -#' model (i.e., from the prior); -#' -#' \item \code{"model"}: use values for random effects stored in the -#' model, which are determined from the first call. -#' -#' } -#' -#' Note that \code{"model"} and \code{"zero"} are shorthand for -#' \code{"constant"} with particular choices of -#' \code{innerOptimStartValues}. Note that \code{"last"} and -#' \code{"last.best"} require a choice for the very first values, which will -#' come from \code{innerOptimStartValues}. The default is -#' \code{innerOptimStart="zero"} and may change in the future. -#' -#' \item \code{innerOptimStartValues}. Values for some of -#' \code{innerOptimStart} approaches. If a scalar is provided, that -#' value is used for all elements of random effects for each -#' conditionally independent set. If a vector is provided, it must be -#' the length of *all* random effects. If these are named (by node -#' names), the names will be used to split them correctly among each -#' conditionally independent set of random effects. If they are not -#' named, it is not always obvious what the order should be because it -#' may depend on the conditionally independent sets of random -#' effects. It should match the order of names returned as part of -#' `summaryLaplace`. -#' -#' \item \code{innerOptimWarning}. If FALSE (default), do not emit warnings -#' from the inner optimization. Optimization methods may sometimes emit a -#' warning such as for bad parameter values encountered during the -#' optimization search. Often, a method can recover and still find the -#' optimum. In the approximations here, sometimes the inner optimization -#' search can fail entirely, yet the outer optimization see this as one failed -#' parameter value and can recover. Hence, it is often desirable to silence -#' warnings from the inner optimizer, and this is done by default. Set -#' \code{innerOptimWarning=TRUE} to see all warnings. -#' -#' \item \code{useInnerCache}. If TRUE (default), use caching system for -#' efficiency of inner optimizations. The caching system records one set of -#' previous parameters and uses the corresponding results if those parameters -#' are used again (e.g., in a gradient call). This should generally not be -#' modified. -#' -#' \item \code{outerOptimMethod}. Optimization method to be used in -#' \code{nimOptim} for the outer optimization. See 'Details' of -#' \code{\link{nimOptim}}. Currently \code{nimOptim} in NIMBLE supports: -#' \code{"Nelder-Mead"}", \code{"BFGS"}, \code{"CG"}, \code{"L-BFGS-B"}, -#' \code{"nlminb"}, \code{"bobyqa"}, and user-provided optimizers. By default, method -#' \code{"nlminb"} is used for both univariate and multivariate cases, -#' although some problems may benefit from other choices. For -#' \code{"nlminb"}, \code{"bobyqa"}, or user-provided optimizers, only a subset of -#' elements of the \code{innerOptimControlList} are supported. (Note -#' that control over the outer optimization method is available as an -#' argument to `findMLE`). Choice of optimizers can be important and so -#' can be worth exploring. -#' -#' \item \code{outerOptimControl}. A list of control parameters for maximizing -#' the Laplace log-likelihood using \code{nimOptim}. See 'Details' of -#' \code{\link{nimOptim}} for further information. -#' -#' \item \code{computeMethod}. There are three approaches available for -#' internal details of how the approximations, and specifically derivatives -#' involved in their calculation, are handled. These are labeled simply 1, 2, -#' and 3, and the default is 2. The relative performance of the methods will -#' depend on the specific model. Users wanting to explore efficiency can try -#' switching from method 2 (default) to methods 1 or 3 and comparing -#' performance. The first Laplace approximation with each method will be -#' (much) slower than subsequent Laplace approximations. Further details are -#' not provided at this time. -#' -#' \item \code{gridType} (relevant only \code{nQuad>1}). For multivariate AGHQ, -#' a grid must be constructed based on the Hessian at the inner mode. Options -#' include "cholesky" (default) and "spectral" (i.e., eigenvectors and -#' eigenvalues) for the corresponding matrix decompositions on which the grid -#' can be based. -#' -#' } # end itemize -#' -#' @section Available methods: -#' -#' The object returned by \code{buildLaplace} or \code{buildAGHQ} is a nimbleFunction object with -#' numerous methods (functions). Here these are described in three tiers of user -#' relevance. -#' -#' @section Most useful methods: -#' -#' The most relevant methods to a user are: -#' -#' \itemize{ -#' -#' \item \code{calcLogLik(p, trans=FALSE)}. Calculate the approximation to the -#' marginal log-likelihood function at parameter value \code{p}, which (if -#' \code{trans} is FALSE) should match the order of \code{paramNodes}. For -#' any non-scalar nodes in \code{paramNodes}, the order within the node is -#' column-major. The order of names can be obtained from method -#' \code{getNodeNamesVec(TRUE)}. Return value is the scalar (approximate, -#' marginal) log likelihood. -#' -#' If \code{trans} is TRUE, then \code{p} is the vector of parameters on -#' the transformed scale, if any, described above. In this case, the -#' parameters on the original scale (as the model was written) will be -#' determined by calling the method \code{pInverseTransform(p)}. Note that -#' the length of the parameter vector on the transformed scale might not -#' be the same as on the original scale (because some constraints of -#' non-scalar parameters result in fewer free transformed parameters than -#' original parameters). -#' -#' \item \code{calcLaplace(p, trans)}. This is the same as \code{calcLogLik} but -#' requires that the approximation be Laplace (i.e \code{nQuad} is 1), -#' and results in an error otherwise. -#' -#' \item \code{findMLE(pStart, hessian)}. Find the maximum likelihood -#' estimates of parameters using the approximated marginal likelihood. -#' This can be used if \code{nQuad} is 1 (Laplace case) or if -#' \code{nQuad>1} and all marginalizations involve only univariate -#' random effects. Arguments are \code{pStart}: initial parameter -#' values (defaults to parameter values currently in the model); -#' and \code{hessian}: whether to calculate and return the -#' Hessian matrix (defaults to \code{TRUE}, which is required for -#' subsequent use of \code{summary} method). Second derivatives in the -#' Hessian are determined by finite differences of the gradients -#' obtained by automatic differentiation (AD). Return value is a -#' nimbleList of type \code{optimResultNimbleList}, similar to what is -#' returned by R's optim. See \code{help(nimOptim)}. Note that -#' parameters (\code{par}) are returned for the natural parameters, i.e. how -#' they are defined in the model. But the \code{hessian}, if requested, is -#' computed for the parameters as transformed for optimization if -#' necessary. Hence one must be careful interpreting `hessian` if any -#' parameters have constraints, and the safest next step is to use the -#' \code{summary} method or \code{summaryLaplace} function. -#' -#' \item \code{summary(MLEoutput, originalScale, randomEffectsStdError, -#' jointCovariance)}. Summarize the maximum likelihood estimation -#' results, given object \code{MLEoutput} that was returned by -#' \code{findMLE}. The summary can include a covariance matrix for the -#' parameters, the random effects, or both), and these can be returned on -#' the original parameter scale or on the (potentially) transformed -#' scale(s) used in estimation. It is often preferred instead to call -#' function (not method) `summaryLaplace` because this will attach -#' parameter and random effects names (i.e., node names) to the results. -#' -#' In more detail, \code{summary} accepts the following optional arguments: -#' -#' \itemize{ -#' -#' \item \code{originalScale}. Logical. If TRUE, the function returns -#' results on the original scale(s) of parameters and random effects; -#' otherwise, it returns results on the transformed scale(s). If there -#' are no constraints, the two scales are identical. Defaults to TRUE. -#' -#' \item \code{randomEffectsStdError}. Logical. If TRUE, standard -#' errors of random effects will be calculated. -#' Defaults to TRUE. -#' -#' \item \code{jointCovariance}. Logical. If TRUE, the joint -#' variance-covariance matrix of the parameters and the random effects -#' will be returned. If FALSE, the variance-covariance matrix of the -#' parameters will be returned. Defaults to FALSE. -#' -#' } -#' -#' The object returned by \code{summary} is an \code{AGHQuad_summary} -#' nimbleList with elements: -#' -#' \itemize{ -#' -#' \item \code{params}. A nimbleList that contains estimates and -#' standard errors of parameters (on the original or transformed -#' scale, as chosen by \code{originalScale}). -#' -#' \item \code{randomEffects}. A nimbleList that contains estimates of -#' random effects and, if requested -#' (\code{randomEffectsStdError=TRUE}) their standard errors, on -#' original or transformed scale. Standard errors are calculated -#' following the generalized delta method of Kass and Steffey (1989). -#' -#' \item \code{vcov}. If requested (i.e. -#' \code{jointCovariance=TRUE}), the joint variance-covariance -#' matrix of the parameters and random effects, on original or -#' transformed scale. If \code{jointCovariance=FALSE}, the -#' covariance matrix of the parameters, on original or transformed -#' scale. -#' -#' \item \code{scale}. \code{"original"} or \code{"transformed"}, the -#' scale on which results were requested. -#' } -#' } -#' -#' -#' @section Methods for more advanced uses: -#' -#' Additional methods to access or control more details of the Laplace/AGHQ -#' approximation include: -#' -#' \itemize{ -#' -#' \item \code{updateSettings}. This provides a single function through which -#' many of the settings described above (mostly for the \code{control} list) -#' can be later changed. Options that can be changed include: -#' \code{innerOptimMethod}, \code{innerOptimStart}, -#' \code{innerOptimStartValues}, \code{useInnerCache}, \code{nQuad}, -#' \code{gridType}, \code{innerOptimControl}, \code{outerOptimMethod}, -#' \code{outerOptimControl}, and \code{computeMethod}. -#' For \code{innerOptimStart}, method "zero" cannot be -#' specified but can be achieved by choosing method "constant" with -#' \code{innerOptimStartValues=0}. Only provided options will be modified. The -#' exceptions are \code{innerOptimControl}, \code{outerOptimControl}, which -#' are replaced only when \code{replace_innerOptimControl=TRUE} or -#' \code{replace_outerOptimControl=TRUE}, respectively. -#' -#' \item \code{getNodeNamesVec(returnParams)}. Return a vector (>1) of names -#' of parameters/random effects nodes, according to \code{returnParams = -#' TRUE/FALSE}. Use this if there is more than one node. -#' -#' \item \code{getNodeNameSingle(returnParams)}. Return the name of a -#' single parameter/random effect node, according to \code{returnParams = -#' TRUE/FALSE}. Use this if there is only one node. -#' -#' \item \code{checkInnerConvergence(message)}. Checks whether all internal -#' optimizers converged. Returns a zero if everything converged and one -#' otherwise. If \code{message = TRUE}, it will print more details about -#' convergence for each conditionally independent set. -#' -#' \item \code{gr_logLik(p, trans)}. Gradient of the (approximated) -#' marginal log-likelihood at parameter value \code{p}. Argument \code{trans} -#' is similar to that in \code{calcLaplace}. If there are multiple parameters, -#' the vector \code{p} is given in the order of parameter names returned by -#' \code{getNodeNamesVec(returnParams=TRUE)}. -#' -#' \item \code{gr_Laplace(p, trans)}. This is the same as \code{gr_logLik}. -#' -#' \item \code{otherLogLik(p)}. Calculate the \code{calcNodesOther} -#' nodes, which returns the log-likelihood of the parts of the model that are -#' not included in the Laplace or AGHQ approximation. -#' -#' \item \code{gr_otherLogLik(p)}. Gradient (vector of derivatives with -#' respect to each parameter) of \code{otherLogLik(p)}. Results should -#' match \code{gr_otherLogLik_internal(p)} but may be more efficient after -#' the first call. -#' -#' } -#' -#' @section Internal or development methods: -#' -#' Some methods are included for calculating the (approximate) marginal log -#' posterior density by including the prior distribution of the parameters. This -#' is useful for finding the maximum a posteriori probability (MAP) estimate. -#' Currently these are provided for point calculations without estimation methods. -#' -#' \itemize{ -#' -#' \item \code{calcPrior_p(p)}. Log density of prior distribution. -#' -#' \item \code{calcPrior_pTransformed(pTransform)}. Log density of prior distribution on transformed scale, includes the Jacobian. -#' -#' \item \code{calcPostLogDens(p)}. Marginal log posterior density in terms of the parameter p. -#' -#' \item \code{calcPostLogDens_pTransformed (pTransform)}. Marginal log posterior density in terms of the transformed -#' parameter, which includes the Jacobian transformation. -#' -#' \item \code{gr_postLogDens_pTransformed(pTransform)}. Graident of marginal log posterior density on the transformed scale. -#' Other available options that are used in the derivative for more flexible include \code{logDetJacobian(pTransform)} and -#' \code{gr_logDeJacobian(pTransform)}, as well as \code{gr_prior(p)}. -#' } -#' -#' Finally, methods that are primarily for internal use by other methods include: -#' -#' \itemize{ -#' -#' \item \code{gr_logLik_pTransformed}. Gradient of the Laplace -#' approximation (\code{calcLogLik_pTransformed(pTransform)}) at transformed -#' (unconstrained) parameter value \code{pTransform}. -#' -#' \item \code{pInverseTransform(pTransform)}. Back-transform the transformed -#' parameter value \code{pTransform} to original scale. -#' -#' \item \code{derivs_pInverseTransform(pTransform, order)}. Derivatives of -#' the back-transformation (i.e. inverse of parameter transformation) with -#' respect to transformed parameters at \code{pTransform}. Derivative order -#' is given by \code{order} (any of 0, 1, and/or 2). -#' -#' \item \code{reInverseTransform(reTrans)}. Back-transform the transformed -#' random effects value \code{reTrans} to original scale. -#' -#' \item \code{derivs_reInverseTransform(reTrans, order)}. Derivatives of the -#' back-transformation (i.e. inverse of random effects transformation) with -#' respect to transformed random effects at \code{reTrans}. Derivative order -#' is given by \code{order} (any of 0, 1, and/or 2). -#' -#' \item \code{optimRandomEffects(pTransform)}. Calculate the optimized -#' random effects given transformed parameter value \code{pTransform}. The -#' optimized random effects are the mode of the conditional distribution of -#' random effects given data at parameters \code{pTransform}, i.e. the -#' calculation of \code{calcNodes}. -#' -#' \item \code{inverse_negHess(p, reTransform)}. Calculate the inverse of the -#' negative Hessian matrix of the joint (parameters and random effects) -#' log-likelihood with respect to transformed random effects, evaluated at -#' parameter value \code{p} and transformed random effects -#' \code{reTransform}. -#' -#' \item \code{hess_logLik_wrt_p_wrt_re(p, reTransform)}. Calculate the -#' Hessian matrix of the joint log-likelihood with respect to parameters and -#' transformed random effects, evaluated at parameter value \code{p} and -#' transformed random effects \code{reTransform}. -#' -#' \item \code{one_time_fixes()}. Users never need to run this. Is is called -#' when necessary internally to fix dimensionality issues if there is only -#' one parameter in the model. -#' -#' \item \code{calcLogLik_pTransformed(pTransform)}. Laplace approximation at -#' transformed (unconstrained) parameter value \code{pTransform}. To -#' make maximizing the Laplace likelihood unconstrained, an automated -#' transformation via \code{\link{parameterTransform}} is performed on -#' any parameters with constraints indicated by their priors (even -#' though the prior probabilities are not used). -#' -#' \item \code{gr_otherLogLik_internal(p)}. Gradient (vector of -#' derivatives with respect to each parameter) of \code{otherLogLik(p)}. -#' This is obtained using automatic differentiation (AD) with single-taping. -#' First call will always be slower than later calls. -#' -#' \item \code{cache_outer_logLik(logLikVal)}. Save the marginal log likelihood value -#' to the inner Laplace mariginlization functions to track the outer maximum internally. -#' -#' \item \code{reset_outer_inner_logLik()}. Reset the internal saved maximum marginal log likelihood. -#' -#' \item \code{get_inner_cholesky(atOuterMode = integer(0, default = 0))}. Returns the cholesky -#' of the negative Hessian with respect to the random effects. If \code{atOuterMode = 1} then returns -#' the value at the overall best marginal likelihood value, otherwise \code{atOuterMode = 0} returns the last. -#' -#' \item \code{get_inner_mode(atOuterMode = integer(0, default = 0))}. Returns the mode of the random effects -#' for either the last call to the innner quadrature functions (\code{atOuterMode = 0} ), or the last best -#' value for the marginal log likelihood, \code{atOuterMode = 1}. -#' -#' } -#' -#' @author Wei Zhang, Perry de Valpine, Paul van Dam-Bates -#' -#' @name laplace -#' -#' @aliases Laplace buildLaplace AGHQuad buildAGHQ AGHQ -#' -#' @examples -#' pumpCode <- nimbleCode({ -#' for (i in 1:N){ -#' theta[i] ~ dgamma(alpha, beta) -#' lambda[i] <- theta[i] * t[i] -#' x[i] ~ dpois(lambda[i]) -#' } -#' alpha ~ dexp(1.0) -#' beta ~ dgamma(0.1, 1.0) -#' }) -#' pumpConsts <- list(N = 10, t = c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5)) -#' pumpData <- list(x = c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22)) -#' pumpInits <- list(alpha = 0.1, beta = 0.1, theta = rep(0.1, pumpConsts$N)) -#' pump <- nimbleModel(code = pumpCode, name = "pump", constants = pumpConsts, -#' data = pumpData, inits = pumpInits, buildDerivs = TRUE) -#' -#' # Build Laplace approximation -#' pumpLaplace <- buildLaplace(pump) -#' -#' \dontrun{ -#' # Compile the model -#' Cpump <- compileNimble(pump) -#' CpumpLaplace <- compileNimble(pumpLaplace, project = pump) -#' # Calculate MLEs of parameters -#' MLEres <- CpumpLaplace$findMLE() -#' # Calculate estimates and standard errors for parameters and random effects on original scale -#' allres <- CpumpLaplace$summary(MLEres, randomEffectsStdError = TRUE) -#' -#' # Change the settings and also illustrate runLaplace -#' CpumpLaplace$updateSettings(innerOptimControl = list(maxit = 1000), -#' replace_innerOptimControl) -#' newres <- runLaplace(CpumpLaplace) -#' -#' # Illustrate use of the component log likelihood and gradient functions to -#' # run an optimizer manually from R. -#' # Use nlminb to find MLEs -#' MLEres.manual <- nlminb(c(0.1, 0.1), -#' function(x) -CpumpLaplace$calcLogLik(x), -#' function(x) -CpumpLaplace$gr_Laplace(x)) -#' } -#' -#' @references -#' -#' Kass, R. and Steffey, D. (1989). Approximate Bayesian inference in -#' conditionally independent hierarchical models (parametric empirical Bayes -#' models). \emph{Journal of the American Statistical Association}, 84(407), -#' 717-726. -#' -#' Liu, Q. and Pierce, D. A. (1994). A Note on Gauss-Hermite Quadrature. \emph{Biometrika}, 81(3) 624-629. -#' -#' Jackel, P. (2005). A note on multivariate Gauss-Hermite quadrature. London: \emph{ABN-Amro. Re.} -#' -#' Skaug, H. and Fournier, D. (2006). Automatic approximation of the marginal -#' likelihood in non-Gaussian hierarchical models. \emph{Computational -#' Statistics & Data Analysis}, 56, 699-709. -#' -NULL - -# The following code takes as input a compiled Laplace approximation and returns -# a list of functions sharing an environment that provide access to the pieces -# of Laplace approximation. The main trick is to build callable interfaces to -# the AGHQuad_nfl elemensts (conditionally independent Laplace approx's), which -# are nested and so not interfaced by default. The original purpose was to -# experiment with inner (and outer) optimization methods. This is very useful -# but is not a package feature, so I am leaving the source code on display here -# but commenting it out. -## laplaceRpieces <- function(cLaplace) { -## # limited to methodID==2 -## # uses -logLik as the working sign, so -## # optimization will be minimization instead of maximization -## cLaplace$one_time_fixes() -## RLaplace <- cLaplace$Robject -## cModel <- RLaplace$model$CobjectInterface -## paramNodes <- RLaplace$paramNodes -## param_values <- function(v) { -## if(missing(v)) return(values(cModel, paramNodes)) -## else values(cModel, paramNodes) <- v -## } -## promoteCallable <- function(RoneLaplace, modify=TRUE) { -## # This function is modified from the cppDef for nimbleFunctions -## # where it is very rarely used (see comment there). -## # There is a bug there because the indexing of existingExtPtrs is not set up -## # so I modify here to fix that. In future, we could fix this small bug, -## # either in promoteCallable or in the multi interface getExtPtrs method -## # N.B. By default this modifies its argument by updating -## # its .CobjectInterface -## RCobj <- nimble:::nf_getRefClassObject(RoneLaplace) -## oldCobjectInterface <- RCobj$.CobjectInterface -## if(!is.list(oldCobjectInterface)) return(oldCobjectInterface) -## extPtrs <- oldCobjectInterface[[1]]$getExtPtrs(oldCobjectInterface[[2]]) -## extPtrTypeIndex <- oldCobjectInterface[[1]]$extPtrTypeIndex -## existingExtPtrs <- vector('list', length(extPtrTypeIndex)) -## existingExtPtrs[[1]] <- extPtrs[[1]] -## existingExtPtrs[[ extPtrTypeIndex['NamedObjects'] ]] <- extPtrs[[2]] -## thisDll <- oldCobjectInterface[[1]]$dll -## nimbleProject <- oldCobjectInterface[[1]]$compiledNodeFun$nimbleProject -## Rgenerator <- oldCobjectInterface[[1]]$compiledNodeFun$Rgenerator -## newCobjectInterface <- Rgenerator(RoneLaplace, thisDll, -## project = nimbleProject, existingExtPtrs = existingExtPtrs) -## RCobj$.CobjectInterface <- newCobjectInterface -## newCobjectInterface -## } -## # make sure all AGHQs (conditionally independent) are promoted -## # to having a fully callable interface object -## AGHQ_list_ <- RLaplace$AGHQuad_nfl$contentsList |> lapply(promoteCallable) -## AGHQ_list_ |> lapply(\(x) x$one_time_fixes()) -## # Also make sure the parameter transformation is fully callable -## outerParamsTransform <- promoteCallable(RLaplace$paramsTransform) -## # reTrans refers to random effects in transformed (unconstrained) coordinates -## # Set up some object to manage information for inner -## # optimizations. -## num_condIndSets <- length(AGHQ_list_) -## # list of last optimizer results -## last_inner_opt_list <- vector('list', num_condIndSets) -## # list of last *best* value of inner optima -## best_inner_opt_list <- seq_along(AGHQ_list_) |> lapply(\(x) list(value = Inf)) -## # list of constant values for reTrans to use for initializing -## # inner optimizations with default option and mode "constant" -## constant_reTrans_list <- AGHQ_list_ |> lapply( -## \(x) { -## startID <- x$startID -## x$startID <- 3 -## ans <- x$get_reInitTrans() -## x$startID <- startID -## ans -## }) -## # list of minimum (last best) negative inner logLik value, -## # which correspond to last_best_reTrans_list -## best_inner_p_list <- constant_reTrans_list |> -## lapply(\(x) rep(Inf, length(x))) -## last_inner_p_list <- best_inner_p_list -## reset_last_best <- function(i) { -## if(missing(i)) i <- seq_along(AGHQ_list_) -## for(ii in i) { -## best_inner_opt_list[[ii]] <- list(value = Inf) -## last_inner_opt_list[[ii]] <- list() -## best_inner_p_list[[ii]] <- rep(Inf, length(best_inner_p_list[[ii]])) -## last_inner_p_list[[ii]] <- best_inner_p_list[[ii]] -## } -## } -## # current value of outer params -## current_params <- numeric() -## # current index of conditionally independent set being used -## current_condIndSet <- 1 -## # default inner optimizer -## default_inner_opt_fn <- \(re, fn, gr, he) { -## nimOptim(re, fn, gr, method = "nlminb") -## } -## inner_opt_fn_ <- default_inner_opt_fn -## inner_opt_fn <- function(f) { -## if(missing(f)) return(inner_opt_fn_) -## inner_opt_fn_ <<- f -## f -## } -## # outer optimizer -## default_outer_opt_fn <- \(p, fn, gr) { -## optim(p, fn, gr, method = "BFGS") -## } -## outer_opt_fn_ <- default_outer_opt_fn -## outer_opt_fn <- function(f) { -## if(missing(f)) return(outer_opt_fn_) -## outer_opt_fn_ <<- f -## f -## } -## # access the list of conditionally independent AGHQs -## AGHQ_list <- function() AGHQ_list_ -## # Objects for controlling initialization of inner optimization: -## # Three modes are available in the default method. -## reInitTrans_mode_ <- "constant" # or "last" or "last.best" -## # function to set these -## reInitTrans_mode <- function(mode) { -## if(missing(mode)) return(reInitTrans_mode_) -## reInitTrans_mode_ <<- mode -## } -## default_reInitTrans_fn <- function(AGHQobj, i) { -## optStart <- switch(reInitTrans_mode_, -## last = last_inner_opt_list[[i]]$par, -## last.best = best_inner_opt_list[[i]], -## constant = constant_reTrans_list[[i]]) -## optStart -## } -## reInitTrans_fn_ <- default_reInitTrans_fn -## reInitTrans_fn <- function(f) { -## if(missing(f)) return(reInitTrans_fn_) -## reInitTrans_fn_ <<- f -## return(f) -## } -## # neg inner logLik for one conditionally independent set -## inner_negLogLik <- function(reTrans, i = current_CondIndSet) { -## -AGHQ_list_[[i]]$inner_logLik(reTrans) -## } -## # neg gradient of inner logLik for one conditionally independent set -## gr_inner_negLogLik <- function(reTrans, i = current_CondIndSet) { -## -AGHQ_list_[[i]]$gr_inner_logLik(reTrans) -## } -## # neg Hessian of inner logLik for one conditionally independent set -## he_inner_negLogLik <- function(reTrans, i = current_CondIndSet, p = current_params) { -## AGHQ_list_[[i]]$negHess(p, reTrans) -## } -## closure <- environment() -## # minimize neg inner logLik -## update_min_inner_negLogLik <- function(p, -## reInitTrans, -## i = current_CondIndSet, -## inner_opt_fn, -## reInitTrans_fn) { -## optRes <- min_inner_negLogLik(p, reInitTrans, i, inner_opt_fn, reInitTrans_fn) -## last_inner_opt_list[[i]] <- optRes -## best_inner_p_list[[i]] <- p -## optRes -## } -## min_inner_negLogLik <- function(p, reInitTrans, i = current_CondIndSet, -## inner_opt_fn, -## reInitTrans_fn) { -## if(missing(inner_opt_fn)) inner_opt_fn <- get("inner_opt_fn", envir = closure)() -## if(missing(reInitTrans_fn)) reInitTrans_fn <- get("reInitTrans_fn", envir = closure)() -## innerObj <- AGHQ_list_[[i]] -## if(missing(reInitTrans)) reInitTrans <- reInitTrans_fn(innerObj, i) -## # The 1D vs nD versions different in set_params method. -## if(length(reInitTrans) > 1) { -## innerObj$set_params(p) -## } else { -## param_values(p) -## paramDeps <- RLaplace$AGHQuad_nfl[[i]]$paramDeps -## cModel$calculate(paramDeps) -## } -## fn_init <- inner_negLogLik(reInitTrans, i) -## current_params <<- p -## current_CondIndSet <<- i -## if(is.nan(fn_init) || is.na(fn_init) || fn_init == Inf || fn_init == -Inf) { -## ans <- list(par = reInitTrans, value = Inf, convergence = -1) -## return(ans) -## } -## if(length(reInitTrans) > 1) { -## if(innerObj$gr_inner_logLik_first) { -## innerObj$gr_inner_logLik_force_update <- TRUE -## innerObj$gr_inner_logLik(reInitTrans) -## innerObj$gr_inner_logLik_first <- FALSE -## innerObj$gr_inner_logLik_force_update <- FALSE -## } -## } -## optRes <- inner_opt_fn(reInitTrans, fn = inner_negLogLik, -## gr = gr_inner_negLogLik, he = he_inner_negLogLik) -## optRes -## } -## # get inner opt result -## last_inner_opt <- function(i = current_CondIndSet) { -## last_inner_opt_list[[i]] -## } -## # do one conditionally independent Laplace approx -## one_negLaplace <- function(p, reInitTrans, i = current_CondIndset, -## inner_opt_fn, -## reInitTrans_fn, -## opt) { -## if(missing(opt)) -## if(any(p!=last_inner_p_list[[i]])) { -## if(missing(inner_opt_fn)) inner_opt_fn <- get("inner_opt_fn", envir = closure)() -## if(missing(reInitTrans_fn)) reInitTrans_fn <- get("reInitTrans_fn", envir = closure)() -## opt <- update_min_inner_negLogLik(p, reInitTrans, i, -## inner_opt_fn, reInitTrans_fn) -## } else { -## opt <- last_inner_opt_list[[i]] -## } -## reTransform <- opt$par -## logdetNegHessian <- AGHQ_list_[[i]]$logdetNegHess(p, reTransform) -## nreTrans <- length(reTransform) -## ans <- opt$value + 0.5 * logdetNegHessian - 0.5 * nreTrans * log(2*pi) -## if(ans < best_inner_opt_list[[i]]$value) { -## best_inner_opt_list[[i]] <- ans -## best_inner_p_list[[i]] <- p -## } -## ans -## } -## one_gr_negLaplace <- function(p, reInitTrans, i = current_CondIndset, -## inner_opt_fn, -## reInitTrans_fn, -## opt) { -## if(missing(opt)) -## if(any(p!=last_inner_p_list[[i]])) { -## if(missing(inner_opt_fn)) inner_opt_fn <- get("inner_opt_fn", envir = closure)() -## if(missing(reInitTrans_fn)) reInitTrans_fn <- get("reInitTrans_fn", envir = closure)() -## opt <- update_min_inner_negLogLik(p, reInitTrans, i, -## inner_opt_fn, reInitTrans_fn) -## } else { -## opt <- last_inner_opt_list[[i]] -## } -## innerObj <- AGHQ_list_[[i]] -## reTransform <- opt$par -## negHessian <- innerObj$negHess(p, reTransform) -## invNegHessian <- inverse(negHessian) -## grlogdetNegHesswrtp <- innerObj$gr_logdetNegHess_wrt_p_internal(p, reTransform) -## grlogdetNegHesswrtre <- innerObj$gr_logdetNegHess_wrt_re_internal(p, reTransform) -## hesslogLikwrtpre <- innerObj$hess_joint_logLik_wrt_p_wrt_re_internal(p, reTransform) -## ans <- -innerObj$gr_joint_logLik_wrt_p_internal(p, reTransform) + -## 0.5 * (grlogdetNegHesswrtp + (grlogdetNegHesswrtre %*% invNegHessian) %*% t(hesslogLikwrtpre)) -## ans[1,] -## } -## negLaplace <- function(p, trans = FALSE, -## reInitTrans, -## inner_opt_fn, -## reInitTrans_fn) { -## if(missing(inner_opt_fn)) inner_opt_fn <- get("inner_opt_fn", envir = closure)() -## if(missing(reInitTrans_fn)) reInitTrans_fn <- get("reInitTrans_fn", envir = closure)() -## if(trans) -## p <- outerParamsTransform$inverseTransform(p) -## ans <- 0 -## if(cLaplace$num_calcNodesOther > 0) ans <- -cLaplace$otherLogLik(p) -## missing_reInitTrans <- missing(reInitTrans) -## for(i in seq_along(AGHQ_list_)) { -## if(missing_reInitTrans) reIT <- reInitTrans_fn(AGHQlist[[i]], i) -## else reIT <- reInitTrans[[i]] -## one_ans <- one_negLaplace(p, reIT, i, inner_opt_fn, reInitTrans_fn) -## ans <- ans + one_ans -## } -## if(is.nan(ans) || is.na(ans)) ans <- -Inf -## ans -## } -## gr_negLaplace <- function(p, trans = FALSE, -## reInitTrans, -## inner_opt_fn, -## reInitTrans_fn, -## reset = reset_last_best) { -## if(missing(inner_opt_fn)) inner_opt_fn <- get("inner_opt_fn", envir = closure)() -## if(missing(reInitTrans_fn)) reInitTrans_fn <- get("reInitTrans_fn", envir = closure)() -## if(trans) { -## pDerivs <- cLaplace$derivs_pInverseTransform(p, 0:1) -## p <- outerParamsTransform$inverseTransform(pDerivs$value) -## } -## if(cLaplace$num_calcNodesOther > 0) ans <- -cLaplace$gr_otherLogLik(p) -## else ans <- rep(0, length(p)) -## missing_reInitTrans <- missing(reInitTrans) -## for(i in seq_along(AGHQ_list_)) { -## if(missing_reInitTrans) reIT <- reInitTrans_fn(AGHQlist[[i]], i) -## else reIT <- reInitTrans[[i]] -## one_ans <- one_gr_negLaplace(p, reIT, i, inner_opt_fn, reInitTrans_fn) -## ans <- ans + one_ans -## } -## if(trans) { -## ans <- (ans %*% pDerivs$jacobian)[1,] -## } -## ans -## } -## findMLE <- function(pStart, -## outer_opt_fn, -## inner_opt_fn, -## reInitTrans_fn, -## reset = reset_last_best) { -## if(missing(outer_opt_fn)) outer_opt_fn <- get("outer_opt_fn", envir = closure)() -## if(!missing(inner_opt_fn)) get("inner_opt_fn", envir = closure)(inner_opt_fun) -## if(!missing(reInitTrans_fn)) get("reInitTrans_fn", envir = closure)(reInitTrans_fn) -## if(is.function(reset)) reset() -## if(missing(pStart)) -## pStart <- param_values() -## pStartTransform <- outerParamsTransform$transform(pStart) -## optRes <- outer_opt_fn(pStartTransform, \(p) negLaplace(p, TRUE), \(p) gr_negLaplace(p, TRUE)) -## if(optRes$convergence != 0) -## warning("Warning: Bad outer convergence") -## optRes$par <- outerParamsTransform$inverseTransform(optRes$par) -## return(optRes) -## } -## list(promoteCallable = promoteCallable, -## param_values = param_values, -## inner_opt_fn = inner_opt_fn, -## outer_opt_fn = outer_opt_fn, -## AGHQ_list = AGHQ_list, -## reInitTrans_mode = reInitTrans_mode, -## reInitTrans_fn = reInitTrans_fn, -## inner_negLogLik = inner_negLogLik, -## gr_inner_negLogLik = gr_inner_negLogLik, -## he_inner_negLogLik = he_inner_negLogLik, -## min_inner_negLogLik = min_inner_negLogLik, -## last_inner_opt = last_inner_opt, -## one_negLaplace = one_negLaplace, -## one_gr_negLaplace = one_gr_negLaplace, -## negLaplace = negLaplace, -## gr_negLaplace = gr_negLaplace, -## outerParamsTransform = outerParamsTransform, -## findMLE = findMLE -## ) -## } diff --git a/packages/nimble/R/MCMC_samplers.R b/packages/nimble/R/MCMC_samplers.R index 31334f154..827ac9126 100644 --- a/packages/nimble/R/MCMC_samplers.R +++ b/packages/nimble/R/MCMC_samplers.R @@ -2742,83 +2742,6 @@ samplePolyaGamma <- nimbleFunction( ) -## Tooling for dealing with allowing both dnorm and dmnorm nodes. -## Copied from INLA work; eventually consider handling as general tool. - -getParam_BASE <- nimbleFunctionVirtual( - run = function() {}, - methods = list( - getMean = function(index = integer()) { returnType(double(1)) }, - getPrecision = function(index = integer()) { returnType(double(2)) } - ) -) - -## A place holder to not take up much memory. -emptyParam <- nimbleFunction( - contains = getParam_BASE, - setup = function() {}, - run = function() {}, - methods = list( - getPrecision = function(index = integer()){ - returnType(double(2)) - return(matrix(1, nrow = 1, ncol = 1)) - }, - getMean = function(index = integer()){ - returnType(double(1)) - return(numeric(1, length = 1)) - } - ) -) - -## Need at least one dnorm to use this. -## NodeNames relate to node names in the model that are dnorm distributed -## gNodes indicates a 1 if dnorm, 0 o/w. -## This makes it easy to get the correct indices when pass in the node index in a loop. -gaussParam <- nimbleFunction( - contains = getParam_BASE, - setup = function(model, nodeNames, gNodes) { - indexConvert <- cumsum(gNodes) - if(length(indexConvert) == 1) - indexConvert <- c(indexConvert, -1) - }, - run = function() {}, - methods = list( - getPrecision = function(index = integer()){ - i <- indexConvert[index] - return(matrix(model$getParam(nodeNames[i], "tau"), nrow = 1, ncol = 1)) - returnType(double(2)) - }, - getMean = function(index = integer()){ - i <- indexConvert[index] - return(numeric(model$getParam(nodeNames[i], "mean"), length = 1)) - returnType(double(1)) - } - ) -) - -## Need at least one dmnorm to use this. -multiGaussParam <- nimbleFunction( - contains = getParam_BASE, - setup = function(model, nodeNames, gNodes) { - indexConvert <- cumsum(gNodes) - if(length(indexConvert) == 1) - indexConvert <- c(indexConvert, -1) - }, - run = function(){}, - methods = list( - getPrecision = function(index = integer()){ - i <- indexConvert[index] - return(model$getParam(nodeNames[i], "prec")) - returnType(double(2)) - }, - getMean = function(index = integer()){ - i <- indexConvert[index] - return(model$getParam(nodeNames[i], "mean")) - returnType(double(1)) - } - ) -) - #################################################################### ### Polya-Gamma Data Augmentation ################################## #################################################################### diff --git a/packages/nimble/R/QuadratureGrids.R b/packages/nimble/R/QuadratureGrids.R deleted file mode 100644 index b93b4358c..000000000 --- a/packages/nimble/R/QuadratureGrids.R +++ /dev/null @@ -1,344 +0,0 @@ -## Choosing not to export this right now but did write documentation just in case but very basic (PVDB). - -buildAGHQGrid <- nimbleFunction( - setup = function(d = 1, nQuad = 3){ - - odd <- TRUE - if(nQuad %% 2 == 0) odd <- FALSE - - if(nQuad > 35) { - print("We don't currently support more than 35 quadrature nodes per dimension. Setting nQuad to 35") - nQuad <- 35 - } - - ## nQ will be total number of quadrature nodes. - nQ <- nQuad^d ## Maybe dimension reduced if we prune. - zVals <- matrix(0, nrow = nQ, ncol = d) - nodeVals <- matrix(0, nrow = nQ, ncol = d) - - ## Need to do a reverse for Eigen Vectors: - inner_max <- 121 - reverse <- inner_max:1 - - ## One time fixes if we run into some scalar issues for compilation. - ## This is exclusively if the user requests Laplace (nQ = 1 AGHQ). - gridFix <- 0 - if(nQ == 1) { - gridFix <- 1 - } - - ## One time fixes for scalar / vector changes. - one_time_fixes_done <- FALSE - wgt <- numeric(nQ + gridFix) - logDensity <- numeric(nQ + gridFix) - logdetNegHessian <- 0 - margDens <- 0 - - gridBuilt <- FALSE - - ## AGHQ mode will be in the middle. - modeIndex <- -1 - maxLogDensity <- 0 - }, - run=function(){}, - methods = list( - one_time_fixes = function() { - ## Run this once after compiling; remove extraneous -1 if necessary - if(one_time_fixes_done) return() - if(nQ == 1) { - logDensity <<- numeric(length = 1, value = logDensity[1]) - wgt <<- numeric(length = 1, value = wgt[1]) - } - one_time_fixes_done <<- TRUE - }, - buildAGHQOne = function(nQ1 = integer()){ - res <- matrix(0, nrow = nQ1, ncol = 2) - if( nQ1 == 1 ){ - ## Laplace Approximation: - res[,1] <- 0 - res[,2] <- sqrt(2*pi) - }else{ - i <- 1:(nQ1-1) - dv <- sqrt(i/2) - ## Recreate pracma::Diag for this problem. - y <- matrix(0, nrow = nQ1, ncol = nQ1) - y[1:(nQ1-1), 1:(nQ1-1) + 1] <- diag(dv) - y[1:(nQ1-1) + 1, 1:(nQ1-1)] <- diag(dv) - E <- eigen(y, symmetric = TRUE) - L <- E$values # Always biggest to smallest. - V <- E$vectors - inds <- reverse[(inner_max-nQ1+1):inner_max] ## Hard coded to maximum 120. - x <- L[inds] - ## Make mode hard zero. We know nQ is odd and > 1. - if(odd) x[ceiling(nQ1 / 2 ) ] <- 0 - V <- t(V[, inds]) - ## Update nodes and weights in terms of z = x/sqrt(2) - ## and include Gaussian kernel in weight to integrate an arbitrary function. - w <- V[, 1]^2 * sqrt(2*pi) * exp(x^2) - x <- sqrt(2) * x - res[,1] <- x - res[,2] <- w - } - returnType(double(2)) - return(res) - }, - buildAGHQ = function(){ - one_time_fixes() - if( nQuad == 1 ){ - ## Laplace Approximation: - zVals <<- matrix(0, nrow = 1, ncol = d) - wgt <<- numeric(value = exp(0.5 * d * log(2*pi)), length = nQ) - modeIndex <<- 1 - }else{ - nodes <- buildAGHQOne(nQuad) - ## If d = 1, then we are done. - if(d == 1){ - zVals[,1] <<- nodes[,1] - wgt <<- nodes[,2] - if(odd) modeIndex <<- which(zVals[,1] == 0)[1] - }else{ - ## Build the multivariate quadrature rule. - wgt <<- rep(1, nQ) - - ## A counter for when to swap. - swp <- numeric(value = 0, length = d) - for( ii in 1:d ) swp[ii] <- nQuad^(ii-1) - - ## Repeat x for each dimension swp times. - for(j in 1:d ) { - indx <- 1 - for( ii in 1:nQ ) { - zVals[ii, j] <<- nodes[indx,1] - wgt[ii] <<- wgt[ii]*nodes[indx,2] - k <- ii %% swp[j] - if(k == 0) indx <- indx + 1 - if(indx > nQuad) indx <- 1 - } - } - ## Assuming mode index is the middle number. - if(odd) { - modeIndex <<- ceiling(nQ/2) - ## Just in case that goes horribly wrong... - if(sum(abs(zVals[modeIndex,])) != 0) { - for(ii in 1:nQ) { - if(sum(abs(zVals[ii,])) == 0) modeIndex <<- ii - } - } - } - } - } - }, - ## Doesn't default to building the grid. - buildGrid = function(){ - one_time_fixes() - if(!gridBuilt) buildAGHQ() - gridBuilt <<- TRUE - }, - quadSum = function(){ - if(!odd) modeIndex <<- -1 ## Make sure it's negative. - margDens <<- 0 - for( k in 1:nQ ){ - if(k == modeIndex) margDens <<- margDens + wgt[k] - else margDens <<- margDens + exp(logDensity[k] - maxLogDensity)*wgt[k] - } - ans <- log(margDens) + maxLogDensity - 0.5 * logdetNegHessian - returnType(double()) - return(ans) - }, - ## Reset the sizes of the storage to change the grid if the user wants more/less AGHQ. - setGridSize = function(nQUpdate = integer()){ - one_time_fixes() - - if(nQuad != nQUpdate){ - nQ <<- nQUpdate^d - nQuad <<- nQUpdate - - if(nQ %% 2 == 0) { - odd <<- FALSE - modeIndex <<- -1 - }else{ - odd <<- TRUE - } - ## Update weights and nodes. - setSize(wgt, nQ) - setSize(zVals, c(nQ, d)) - setSize(nodeVals, c(nQ, d)) - setSize(logDensity, nQ) - - ## Build the new grid (updates modeIndex). - buildAGHQ() - } - }, - saveLogDens = function(i = integer(0, default = -1), logDens = double()){ - if(i == -1){ - if(odd) logDensity[modeIndex] <<- logDens - maxLogDensity <<- logDens - }else{ - logDensity[i] <<- logDens - } - }, - transformGrid1D = function(negHess = double(2), inner_mode = double(1)){ - SD <- 1/sqrt(negHess[1,1]) - for( i in 1:nQ) nodeVals[i,] <<- inner_mode + SD*zVals[i,] - logdetNegHessian <<- log(negHess[1,1]) - }, - transformGrid = function(cholNegHess = double(2), inner_mode = double(1), method = character()){ - if(method == "spectral"){ - ## Spectral transformation. - negHess <- t(cholNegHess) %*% cholNegHess - eigenDecomp <- nimEigen(negHess) - ATransform <- matrix(0, nrow = d, ncol = d) - for( i in 1:d ){ - ATransform[,i] <- eigenDecomp$vectors[,i]/sqrt(eigenDecomp$values[i]) # eigenDecomp$vectors %*% diag(1/sqrt(eigenDecomp$values)) - } - for( i in 1:nQ) nodeVals[i, ] <<- inner_mode + (ATransform %*% zVals[i,]) - }else{ - ## Cholesky transformation. - for( i in 1:nQ) nodeVals[i, ] <<- inner_mode + backsolve(cholNegHess, zVals[i,]) - } - logdetNegHessian <<- 2*sum(log(diag(cholNegHess))) - }, - getWeights = function(i=integer()){ - returnType(double()) - if(i == -1 & odd) return(wgt[modeIndex]) - return(wgt[i]) - }, - getAllWeights = function(){ - returnType(double(1)) - return(wgt) - }, - getNodesTransformed = function(i=integer()){ - if(i == -1 & odd) return(nodeVals[modeIndex,]) - returnType(double(1)); - return(nodeVals[i,]) - }, - getAllNodesTransformed = function(){ - returnType(double(2)); - return(nodeVals) - }, - getNodes = function(i=integer()){ - if(i == -1 & odd) return(zVals[modeIndex,]) - returnType(double(1)); - return(zVals[i,]) - }, - getAllNodes = function(){ - returnType(double(2)); - return(zVals) - }, - getLogDensity = function(i=integer()){ - returnType(double()) - if(i == -1 & odd) return(logDensity[modeIndex]) - return(logDensity[i]) - }, - getModeIndex = function(){ - returnType(integer()) - return(modeIndex) - }, - getGridSize = function(){ - returnType(double()) - return(nQ) - } - ) -)## End of buildAGHQGrid - -#' Build Adaptive Gauss-Hermite Quadrature Grid -#' -#' Create quadrature grid for use in AGHQuad methods in Nimble. -#' -#' @param d Dimension of quadrature grid being requested. -#' -#' @param nQuad Number of quadrature nodes requested on build. -#' -#' @name buildAGHQGrid -#' -#' @details -#' -#' This function is used by used by \code{buildOneAGHQuad1D} -#' and \code{buildOneAGHQuad} create the quadrature grid using -#' adaptive Gauss-Hermite quadrature. Handles single or multiple dimension -#' grids and computes both grid locations and weights. Additionally, acts -#' as a cache system to do transformations, and return marginalized log density. -#' -#' Any of the input node vectors, when provided, will be processed using -#' \code{nodes <- model$expandNodeNames(nodes)}, where \code{nodes} may be -#' \code{paramNodes}, \code{randomEffectsNodes}, and so on. This step allows -#' any of the inputs to include node-name-like syntax that might contain -#' multiple nodes. For example, \code{paramNodes = 'beta[1:10]'} can be -#' provided if there are actually 10 scalar parameters, 'beta[1]' through -#' 'beta[10]'. The actual node names in the model will be determined by the -#' \code{exapndNodeNames} step. -#' -#' Available methods include -#' -#' \itemize{ -#' -#' \item \code{buildAGHQ}. Builds a adaptive Gauss-Hermite quadrature grid in d dimensions. -#' Calls \code{buildAGHQOne} to build the one dimensional grid and then expands in each dimension. -#' Some numerical issues occur in Eigen decomposition making the grid weights only accurate up to -#' 35 quadrature nodes. -#' -#' \item Options to get internally cached values are \code{getGridSize}, -#' \code{getModeIndex} for when there are an odd number of quadrature nodes, -#' \code{getLogDensity} for the cached values, \code{getAllNodes} for the -#' quadrature grids, \code{getNodes} for getting a single indexed nodes, -#' \code{getAllNodesTransformed} for nodes transformed to the parameter scale, -#' \code{getNodesTransformed} for a single transformed node, \code{getAllWeights} -#' to get all quadrature weights, \code{getWeights} single indexed weight. -#' -#' \item \code{transformGrid(cholNegHess, inner_mode, method)} transforms -#' the grid using either cholesky trasnformations, -#' as default, or spectral that makes use of the Eigen decomposition. For a single -#' dimension \code{transformGrid1D} is used. -#' -#' \item As the log density is evaluated externally, it is saved via \code{saveLogDens}, -#' which then is summed via \code{quadSum}. -#' -#' \item \code{buildGrid} builds the grid the initial time and is only run once in code. After, -#' the user must choose to \code{setGridSize} to update the grid size. -#' -#' -#' \item \code{check}. If TRUE (default), a warning is issued if -#' \code{paramNodes}, \code{randomEffectsNodes} and/or \code{calcNodes} -#' are provided but seek to have missing elements or unnecessary -#' elements based on some default inspection of the model. If -#' unnecessary warnings are emitted, simply set \code{check=FALSE}. -#' -#' \item \code{innerOptimControl}. A list of control parameters for the inner -#' optimization of Laplace approximation using \code{optim}. See -#' 'Details' of \code{\link{optim}} for further information. -#' -#' \item \code{innerOptimMethod}. Optimization method to be used in -#' \code{optim} for the inner optimization. See 'Details' of -#' \code{\link{optim}}. Currently \code{optim} in NIMBLE supports: -#' "\code{Nelder-Mead}", "\code{BFGS}", "\code{CG}", and -#' "\code{L-BFGS-B}". By default, method "\code{CG}" is used when -#' marginalizing over a single (scalar) random effect, and "\code{BFGS}" -#' is used for multiple random effects being jointly marginalized over. -#' -#' \item \code{innerOptimStart}. Choice of starting values for the inner -#' optimization. This could be \code{"last"}, \code{"last.best"}, or a -#' vector of user provided values. \code{"last"} means the most recent -#' random effects values left in the model will be used. When finding -#' the MLE, the most recent values will be the result of the most recent -#' inner optimization for Laplace. \code{"last.best"} means the random -#' effects values corresponding to the largest Laplace likelihood (from -#' any call to the \code{calcLaplace} or \code{calcLogLik} method, -#' including during an MLE search) will be used (even if it was not the -#' most recent Laplace likelihood). By default, the initial random -#' effects values will be used for inner optimization. -#' -#' \item \code{outOptimControl}. A list of control parameters for maximizing -#' the Laplace log-likelihood using \code{optim}. See 'Details' of -#' \code{\link{optim}} for further information. -#' } -#' -#' @references -#' -#' Golub, G. H. and Welsch, J. H. (1969). Calculation of Gauss Quadrature Rules. -#' Mathematics of Computation 23 (106): 221-230. -#' -#' Liu, Q. and Pierce, D. A. (1994). A Note on Gauss-Hermite Quadrature. Biometrika, 81(3) 624-629. -#' -#' Jackel, P. (2005). A note on multivariate Gauss-Hermite quadrature. London: ABN-Amro. Re. -#' -NULL diff --git a/packages/nimble/R/miscFunctions.R b/packages/nimble/R/miscFunctions.R index 97fb7a10e..cfad84e05 100644 --- a/packages/nimble/R/miscFunctions.R +++ b/packages/nimble/R/miscFunctions.R @@ -1,3 +1,70 @@ +#' Placeholder for buildLaplace +#' +#' This function has been moved to the `nimbleQuad` package. +#' +#' @param ... arguments +#' +#' @export +#' +buildLaplace <- function(...) + cat("NIMBLE's Laplace/AGHQ functionality, including this function, now resides in the 'nimbleQuad' package.\n") + +#' Placeholder for buildAGHQ +#' +#' This function has been moved to the `nimbleQuad` package. +#' +#' @param ... arguments +#' +#' @export +#' +buildAGHQ <- function(...) + cat("NIMBLE's Laplace/AGHQ functionality, including this function, now resides in the 'nimbleQuad' package.\n") + +#' Placeholder for runLaplace +#' +#' This function has been moved to the `nimbleQuad` package. +#' +#' @param ... arguments +#' +#' @export +#' +runLaplace <- function(...) + cat("NIMBLE's Laplace/AGHQ functionality, including this function, now resides in the 'nimbleQuad' package.\n") + +#' Placeholder for runAGHQ +#' +#' This function has been moved to the `nimbleQuad` package. +#' +#' @param ... arguments +#' +#' @export +#' +runAGHQ <- function(...) + cat("NIMBLE's Laplace/AGHQ functionality, including this function, now resides in the 'nimbleQuad' package.\n") + +#' Placeholder for summaryLaplace +#' +#' This function has been moved to the `nimbleQuad` package. +#' +#' @param ... arguments +#' +#' @export +#' +summaryLaplace <- function(...) + cat("NIMBLE's Laplace/AGHQ functionality, including this function, now resides in the 'nimbleQuad' package.\n") + +#' Placeholder for summaryAGHQ +#' +#' This function has been moved to the `nimbleQuad` package. +#' +#' @param ... arguments +#' +#' @export +#' +summaryAGHQ <- function(...) + cat("NIMBLE's Laplace/AGHQ functionality, including this function, now resides in the 'nimbleQuad' package.\n") + + #' Placeholder for buildAuxiliaryFilter #' #' This function has been moved to the `nimbleSMC` package. diff --git a/packages/nimble/R/normTooling.R b/packages/nimble/R/normTooling.R new file mode 100644 index 000000000..ec5bb264e --- /dev/null +++ b/packages/nimble/R/normTooling.R @@ -0,0 +1,126 @@ +## Tooling that allows us to use known derivative information for latent +## normal nodes in Laplace, rather than computing via AD. + +getParam_BASE <- nimbleFunctionVirtual( + run = function() {}, + methods = list( + getMean = function(index = integer()) { + returnType(double(1)) + }, + getPrecision = function(index = integer()) { + returnType(double(2)) + }, + calcGradient = function(reTransform = double(1), index = integer(), first = integer(), + last = integer()) { + returnType(double(1)) + } + ) +) + +## A place holder to not take up much memory. +emptyParam <- nimbleFunction( + contains = getParam_BASE, + setup = function() {}, + run = function() {}, + methods = list( + getPrecision = function(index = integer()) { + returnType(double(2)) + return(matrix(1, nrow = 1, ncol = 1)) + }, + getMean = function(index = integer()) { + returnType(double(1)) + return(numeric(1, length = 1)) + }, + calcGradient = function(reTransform = double(1), index = integer(), first = integer(), + last = integer()) { + returnType(double(1)) + return(numeric(1, length = 1)) + } + ) +) + +## Need at least one dnorm to use this. NodeNames relate to node names in the +## model that are dmnrom distributed gNodes (length of all randomEffectsNodes) +## indicates a 1 if dmnorm, 0 o/w. This makes it easy to get the correct +## indices when I just pass it the random-effect index in a loop. +gaussParam <- nimbleFunction( + contains = getParam_BASE, + setup = function(model, nodeNames, gNodes) { + indexConvert <- cumsum(gNodes) + if(length(indexConvert) == 1) + indexConvert <- c(indexConvert, -1) + }, + run = function() {}, + methods = list( + getPrecision = function(index = integer()) { + i <- indexConvert[index] + Q <- matrix(model$getParam(nodeNames[i], "tau"), nrow = 1, ncol = 1) + returnType(double(2)) + return(Q) + }, + getMean = function(index = integer()) { + i <- indexConvert[index] + mu <- numeric(model$getParam(nodeNames[i], "mean"), length = 1) + returnType(double(1)) + return(mu) + }, + ## Avoid too much memory creation by adding this internal. + calcGradient = function(reTransform = double(1), index = integer(), first = integer(), + last = integer()) { + i <- indexConvert[index] + ans <- -model$getParam(nodeNames[i], "tau") * (reTransform[first] - + model$getParam(nodeNames[i], "mean")) + returnType(double(1)) + return(numeric(value = ans, length = 1)) + } + ) +) + +## Need at least one dmnorm to use this. NodeNames relate to node names in the +## model that are dmnrom distributed gNodes (length of all randomEffectsNodes) +## indicates a 1 if dmnorm, 0 o/w. This makes it easy to get the correct +## indices when I just pass it the random-effect index in a loop. +multiGaussParam <- nimbleFunction( + contains = getParam_BASE, + setup = function(model, nodeNames, gNodes) { + indexConvert <- cumsum(gNodes) + if(length(indexConvert) == 1) + indexConvert <- c(indexConvert, -1) + }, + run = function() {}, + methods = list( + getPrecision = function(index = integer()) { + i <- indexConvert[index] + Q <- model$getParam(nodeNames[i], "prec") + returnType(double(2)) + return(Q) + }, + getMean = function(index = integer()) { + i <- indexConvert[index] + mu <- model$getParam(nodeNames[i], "mean") + returnType(double(1)) + return(mu) + }, + ## Avoid too much memory creation by adding this internal. + calcGradient = function(reTransform = double(1), index = integer(), first = integer(), + last = integer()) { + i <- indexConvert[index] + bstar <- (reTransform[first:last] - model$getParam(nodeNames[i], "mean")) + Q <- model$getParam(nodeNames[i], "prec") + ans <- -(Q %*% bstar)[, 1] + + ## This assumes use of dmnormAD, where `prec` is "free". + ## If we somehow wanted to use this with `dmnorm`, we should + ## create a version of this that uses `cholesky`: + ## U <- model$getParam(nodeNames[i], "cholesky") + ## if (model$getParam(nodeNames[i], "prec_param") == 1) { + ## ans <- -(t(U) %*% (U %*% bstar))[, 1] + ## } else { + ## ans <- -backsolve(U, forwardsolve(t(U), bstar)) + ## } + + returnType(double(1)) + return(ans) + } + ) +) diff --git a/packages/nimble/R/setupMargNodes.R b/packages/nimble/R/setupMargNodes.R new file mode 100644 index 000000000..497fafff7 --- /dev/null +++ b/packages/nimble/R/setupMargNodes.R @@ -0,0 +1,574 @@ +#' Organize model nodes for marginalization +#' +#' Process model to organize nodes for marginalization (integration over latent +#' nodes or random effects) as by Laplace approximation. +#' +#' @param model A nimble model such as returned by \code{nimbleModel}. +#' +#' @param paramNodes A character vector of names of stochastic nodes that are +#' parameters of nodes to be marginalized over (\code{randomEffectsNodes}). +#' See details for default. +#' +#' @param randomEffectsNodes A character vector of nodes to be marginalized over +#' (or "integrated out"). In the case of calculating the likelihood of a model +#' with continuous random effects, the nodes to be marginalized over are the +#' random effects, hence the name of this argument. However, one can +#' marginalize over any nodes desired as long as they are continuous. +#' See details for default. +#' +#' @param calcNodes A character vector of nodes to be calculated as the +#' integrand for marginalization. Typically this will include +#' \code{randomEffectsNodes} and some data nodes. Se details for default. +#' +#' @param calcNodesOther A character vector of nodes to be calculated as part of +#' the log likelihood that are not connected to the \code{randomEffectNodes} +#' and so are not actually part of the marginalization. These are somewhat +#' extraneous to the purpose of this function, but it is convenient to handle +#' them here because often the purpose of marginalization is to calculate log +#' likelihoods, including from "other" parts of the model. +#' +#' @param split A logical indicating whether to split \code{randomEffectsNodes} +#' into conditionally independent sets that can be marginalized separately +#' (\code{TRUE}) or to keep them all in one set for a single marginalization +#' calculation. +#' +#' @param check A logical indicating whether to try to give reasonable warnings +#' of badly formed inputs that might be missing important nodes or include +#' unnecessary nodes. +#' +#' @param allowDiscreteLatent A logical indicating whether to +#' allow discrete latent states. (default = \code{FALSE}) +#' +#' @details +#' +#' This function is used by \code{buildLaplace} to organize model nodes into +#' roles needed for setting up the (approximate) marginalization done by Laplace +#' approximation. It is also possible to call this function directly and pass +#' the resulting list (possibly modified for your needs) to \code{buildLaplace}. +#' +#' Any of the input node vectors, when provided, will be processed using +#' \code{nodes <- model$expandNodeNames(nodes)}, where \code{nodes} may be +#' \code{paramNodes}, \code{randomEffectsNodes}, and so on. This step allows +#' any of the inputs to include node-name-like syntax that might contain +#' multiple nodes. For example, \code{paramNodes = 'beta[1:10]'} can be +#' provided if there are actually 10 scalar parameters, 'beta[1]' through +#' 'beta[10]'. The actual node names in the model will be determined by the +#' \code{exapndNodeNames} step. +#' +#' This function does not do any of the marginalization calculations. It only +#' organizes nodes into roles of parameters, random effects, integrand +#' calculations, and other log likelihood calculations. +#' +#' The checking done if `check=TRUE` tries to be reasonable, but it can't cover +#' all cases perfectly. If it gives an unnecessary warning, simply set `check=FALSE`. +#' +#' If \code{paramNodes} is not provided, its default depends on what other +#' arguments were provided. If neither \code{randomEffectsNodes} nor +#' \code{calcNodes} were provided, \code{paramNodes} defaults to all +#' top-level, stochastic nodes, excluding any posterior predictive nodes +#' (those with no data anywhere downstream). These are determined by +#' \code{model$getNodeNames(topOnly = TRUE, stochOnly = TRUE, +#' includePredictive = FALSE)}. If \code{randomEffectsNodes} was provided, +#' \code{paramNodes} defaults to stochastic parents of +#' \code{randomEffectsNodes}. In these cases, any provided \code{calcNodes} or +#' \code{calcNodesOther} are excluded from default \code{paramNodes}. If +#' \code{calcNodes} but not \code{randomEffectsNodes} was provided, then the +#' default for \code{randomEffectsNodes} is determined first, and then +#' \code{paramNodes} defaults to stochastic parents of +#' \code{randomEffectsNodes}. Finally, any stochastic parents of +#' \code{calcNodes} (whether provided or default) that are not in +#' \code{calcNodes} are added to the default for \code{paramNodes}, but only +#' after \code{paramNodes} has been used to determine the defaults for +#' \code{randomEffectsNodes}, if necessary. +#' +#' Note that to obtain sensible defaults, some nodes must have been marked as +#' data, either by the \code{data} argument in \code{nimbleModel} or by +#' \code{model$setData}. Otherwise, all nodes will appear to be posterior +#' predictive nodes, and the default \code{paramNodes} may be empty. +#' +#' For purposes of \code{buildLaplace}, \code{paramNodes} does not need to (but +#' may) include deterministic nodes between the parameters and any +#' \code{calcNodes}. Such deterministic nodes will be included in +#' calculations automatically when needed. +#' +#' If \code{randomEffectsNodes} is missing, the default is a bit complicated: it +#' includes all latent nodes that are descendants (or "downstream") of +#' \code{paramNodes} (if provided) and are either (i) ancestors (or +#' "upstream") of data nodes (if \code{calcNodes} is missing), or (ii) +#' ancestors or elements of \code{calcNodes} (if \code{calcNodes} and +#' \code{paramNodes} are provided), or (iii) elements of \code{calcNodes} (if +#' \code{calcNodes} is provided but \code{paramNodes} is missing). In all +#' cases, discrete nodes (with warning if \code{check=TRUE}), posterior +#' predictive nodes and \code{paramNodes} are excluded. +#' +#' \code{randomEffectsNodes} should only include stochastic nodes. +#' +#' If \code{calcNodes} is missing, the default is \code{randomEffectsNodes} and +#' their descendants to the next stochastic nodes, excluding posterior +#' predictive nodes. These are determined by +#' \code{model$getDependencies(randomEffectsNodes, includePredictive=FALSE)}. +#' +#' If \code{calcNodesOther} is missing, the default is all stochastic +#' descendants of \code{paramNodes}, excluding posterior predictive nodes +#' (from \code{model$getDependencies(paramNodes, stochOnly=TRUE, self=FALSE, +#' includePosterior=FALSE)}) that are not part of \code{calcNodes}. +#' +#' For purposes of \code{buildLaplace}, neither \code{calcNodes} nor +#' \code{calcNodesOther} needs to (but may) contain deterministic nodes +#' between \code{paramNodes} and \code{calcNodes} or \code{calcNodesOther}, +#' respectively. These will be included in calculations automatically when +#' needed. +#' +#' If \code{split} is \code{TRUE}, \code{model$getConditionallyIndependentSets} +#' is used to determine sets of the \code{randomEffectsNodes} that can be +#' independently marginalized. The \code{givenNodes} are the +#' \code{paramNodes} and \code{calcNodes} excluding any +#' \code{randomEffectsNodes} and their deterministic descendants. The +#' \code{nodes} (to be split into sets) are the \code{randomEffectsNodes}. +#' +#' If \code{split} is a numeric vector, \code{randomEffectsNodes} will be split +#' by \code{split}(\code{randomEffectsNodes}, \code{control$split}). The last +#' option allows arbitrary control over how \code{randomEffectsNodes} are +#' blocked. +#' +#' If \code{check=TRUE}, then defaults for each of the four categories of nodes +#' are created even if the corresponding argument was provided. Then warnings +#' are emitted if there are any extra (potentially unnecessary) nodes provided +#' compared to the default or if there are any nodes in the default that were +#' not provided (potentially necessary). These checks are not perfect and may +#' be simply turned off if you are confident in your inputs. +#' +#' (If \code{randomEffectsNodes} was provided but \code{calcNodes} was not +#' provided, the default (for purposes of \code{check=TRUE} only) for +#' \code{randomEffectsNodes} differs from the above description. It uses +#' stochastic descendants of \code{randomEffectsNodes} in place of the +#' "data nodes" when determining ancestors of data nodes. And it uses item +#' (ii) instead of (iii) in the list above.) +#' +#' @author Wei Zhang, Perry de Valpine, Paul van Dam-Bates +#' @return +#' +#' A list is returned with elements: +#' +#' \itemize{ +#' +#' \item \code{paramNodes}: final processed version of \code{paramNodes} +#' +#' \item \code{randomEffectsNodes}: final processed version of \code{randomEffectsNodes} +#' +#' \item \code{calcNodes}: final processed version of \code{calcNodes} +#' +#' \item \code{calcNodesOther}: final processed version of \code{calcNodesOther} +#' +#' \item \code{givenNodes}: Input to \code{model$getConditionallyIndependentSets}, if \code{split=TRUE}. +#' +#' \item \code{randomEffectsSets}: Output from +#' \code{model$getConditionallyIndependentSets}, if \code{split=TRUE}. This +#' will be a list of vectors of node names. The node names in one list element +#' can be marginalized independently from those in other list elements. The +#' union of the list elements should be all of \code{randomEffectsNodes}. If +#' \code{split=FALSE}, \code{randomEffectsSets} will be a list with one +#' element, simply containing \code{randomEffectsNodes}. If \code{split} is a +#' numeric vector, \code{randomEffectsSets} will be the result of +#' \code{split}(\code{randomEffectsNodes}, \code{control$split}). +#' +#' } +#' +#' @export +setupMargNodes <- function(model, paramNodes, randomEffectsNodes, calcNodes, + calcNodesOther, + split = TRUE, + check = TRUE, + allowDiscreteLatent = FALSE) { + paramProvided <- !missing(paramNodes) + reProvided <- !missing(randomEffectsNodes) + calcProvided <- !missing(calcNodes) + calcOtherProvided <- !missing(calcNodesOther) + + normalizeNodes <- function(nodes, sort = FALSE) { + if(is.null(nodes) || isFALSE(nodes)) character(0) + else model$expandNodeNames(nodes, sort = sort) + } + if(paramProvided) paramNodes <- normalizeNodes(paramNodes) + if(reProvided) randomEffectsNodes <- normalizeNodes(randomEffectsNodes) + if(calcProvided) calcNodes <- normalizeNodes(calcNodes, sort = TRUE) + if(calcOtherProvided) calcNodesOther <- normalizeNodes(calcNodesOther, sort = TRUE) + + if(reProvided) { + if(check && !allowDiscreteLatent) + if(any(model$isDiscrete(randomEffectsNodes))) + messageIfVerbose(" [Warning] Some elements of `randomEffectsNodes` follow discrete distributions. That is likely to cause problems.") + } + + # We considered a feature to allow params to be nodes without priors. This is a placeholder in case + # we ever pursue that again. + # allowNonPriors <- FALSE + # We may need to use determ and stochastic dependencies of parameters multiple times below + # Define these to avoid repeated computation + # A note for future: determ nodes between parameters and calcNodes are needed inside buildOneAGHQuad + # and buildOneAGHQuad1D. In the future, these could be all done here to be more efficient + paramDetermDeps <- character(0) + paramStochDeps <- character(0) + paramDetermDepsCalculated <- FALSE + paramStochDepsCalculated <- FALSE + + # 1. Default parameters are stochastic top-level nodes. (We previously + # considered an argument allowNonPriors, defaulting to FALSE. If TRUE, the + # default params would be all top-level stochastic nodes with no RHSonly + # nodes as parents and RHSonly nodes (handling of constants TBD, since + # non-scalars would be converted to data) that have stochastic dependencies + # (And then top-level stochastic nodes with RHSonly nodes as parents are + # essentially latent/data nodes, some of which would need to be added to + # randomEffectsNodes below.) However this got too complicated. It is + # simpler and clearer to require "priors" for parameters, even though prior + # probs may not be used. + paramsHandled <- TRUE + if(!paramProvided) { + if(!reProvided) { + if(!calcProvided) { + paramNodes <- model$getNodeNames(topOnly = TRUE, stochOnly = TRUE, includePredictive = FALSE) + } else { + # calcNodes were provided, but RE nodes were not, so delay creating default params + paramsHandled <- FALSE + } + } else { + nodesToFindParentsFrom <- randomEffectsNodes + paramNodes <- model$getParents(nodesToFindParentsFrom, self=FALSE, stochOnly=TRUE) + # self=FALSE doesn't omit if one RE node is a parent of another, so we have to do the next step + paramNodes <- setdiff(paramNodes, nodesToFindParentsFrom) + } + if(paramsHandled) { + if(calcProvided) paramNodes <- setdiff(paramNodes, calcNodes) + if(calcOtherProvided) paramNodes <- setdiff(paramNodes, calcNodesOther) + } + } + + # 2. Default random effects are latent nodes that are downstream stochastic dependencies of params. + # In step 3, default random effects are also limited to those that are upstream parents of calcNodes + if((!reProvided) || check) { + latentNodes <- model$getNodeNames(latentOnly = TRUE, stochOnly = TRUE, + includeData = FALSE, includePredictive = FALSE) + if(!allowDiscreteLatent) { + latentDiscrete <- model$isDiscrete(latentNodes) + if(any(latentDiscrete)) { + if((!reProvided) && check) { + messageIfVerbose(" [Note] In trying to determine default `randomEffectsNodes`, there are some nodes\n", + " that follow discrete distributions. These will be omitted.") + } + latentNodes <- latentNodes[!latentDiscrete] + } + } + if(paramsHandled) { + paramDownstream <- model$getDependencies(paramNodes, stochOnly = TRUE, self = FALSE, + downstream = TRUE, includePredictive = FALSE) + # paramStochDeps <- model$getDependencies(paramNodes, stochOnly = TRUE, self = FALSE) + # paramStochDepsCalculated <- TRUE + reNodesDefault <- intersect(latentNodes, paramDownstream) + } else { + reNodesDefault <- latentNodes + } + # Next, if calcNodes were not provided, we create a temporary + # dataNodesDefault for purposes of updating reNodesDefault if needed. The + # idea is that reNodesDefault should be trimmed to include only nodes + # upstream of "data" nodes, where "data" means nodes in the role of data for + # purposes of marginalization. + # The tempDataNodesDefault is either dependencies of RE nodes if provided, or + # actual data nodes in the model if RE nodes not provided. + # If calcNodes were provided, then they are used directly to trim reNodesDefault. + if(!calcProvided) { + if(reProvided) + tempDataNodesDefault <- model$getDependencies(randomEffectsNodes, stochOnly = TRUE, + self = FALSE, includePredictive = FALSE) + else + tempDataNodesDefault <- model$getNodeNames(dataOnly = TRUE) + if(paramsHandled) + tempDataNodesDefault <- setdiff(tempDataNodesDefault, paramNodes) + tempDataNodesDefaultParents <- model$getParents(tempDataNodesDefault, upstream = TRUE, stochOnly = TRUE) + # See comment above about why this is necessary: + tempDataNodesDefaultParents <- setdiff(tempDataNodesDefaultParents, tempDataNodesDefault) + reNodesDefault <- intersect(reNodesDefault, tempDataNodesDefaultParents) + } else { + # Update reNodesDefault to exclude nodes that lack downstream connection to a calcNode + if(paramsHandled) { # This means reProvided OR paramsProvided. Including parents allows checking + # of potentially missing REs. + reNodesDefault <- intersect(reNodesDefault, + model$getParents(calcNodes, upstream=TRUE, stochOnly = TRUE)) + } else { # This means !paramsHandled and hence !reProvided AND !paramsProvided + reNodesDefault <- intersect(reNodesDefault, + calcNodes) + reNodesDefault <- intersect(reNodesDefault, + model$getParents(calcNodes, upstream=TRUE, stochOnly = TRUE)) + } + } + } + + # If only calcNodes were provided, we have now created reNodesDefault from calcNodes, + # and are now ready to create default paramNodes + if(!paramsHandled) { + paramNodes <- model$getParents(reNodesDefault, self=FALSE, stochOnly=TRUE) + # See comment above about why this is necessary: + paramNodes <- setdiff(paramNodes, reNodesDefault) + if(calcOtherProvided) paramNodes <- setdiff(paramNodes, calcNodesOther) + } + + # 3. Optionally check random effects if they were provided (not default) + if(reProvided && check) { + # First check is for random effects that should have been included but weren't + reCheck <- setdiff(reNodesDefault, randomEffectsNodes) + if(length(reCheck)) { + errorNodes <- paste0(head(reCheck, n = 4), sep = "", collapse = ", ") + if(length(reCheck) > 4) errorNodes <- paste(errorNodes, "...") + messageIfVerbose(" [Warning] There are some random effects (latent states) in the model that look like\n", + " they should be included for the provided (or default) `paramNodes`,\n", + " but are not included in `randomEffectsNodes`: ", errorNodes, ".\n", + " To silence this warning, one can usually include `check = FALSE`\n", + " (potentially in the control list) for the algorithm or as\n", + " an argument to `setupMargNodes`.") + } + # Second check is for random effects that were included but look unnecessary + reCheck <- setdiff(randomEffectsNodes, reNodesDefault) + if(length(reCheck)) { + # Top nodes should never trigger warning. + # Descendants of top nodes that are in randomEffectsNodes should not trigger warning + topNodes <- model$getNodeNames(topOnly=TRUE) + reCheckTopNodes <- intersect(reCheck, topNodes) + if(length(reCheckTopNodes)) { + # Simple downstream=TRUE here is not a perfect check of connection among all nodes + # but it will avoid false alarms + reCheck <- setdiff(reCheck, model$getDependencies(reCheckTopNodes, downstream=TRUE, stochOnly=TRUE)) + } + if(length(reCheck)) { + errorNodes <- paste0(head(reCheck, n = 4), sep = "", collapse = ", ") + if(length(reCheck) > 4) errorNodes <- paste(errorNodes, "...") + extraMsg <- if(isTRUE(getNimbleOption('includeUnneededLatents'))) "" else " They will be omitted, but one can force inclusion with\n `nimbleOptions(includeUnneededLatents=TRUE)`.\n" + messageIfVerbose(" [Warning] There are some `randomEffectsNodes` provided that look like\n", + " they are not needed for the provided (or default) `paramNodes`:\n", + " ", errorNodes, ".\n", extraMsg, + " To silence this warning, one can usually include `check = FALSE`\n", + " (potentially in the control list) for the algorithm or as\n", + " an argument to `setupMargNodes`.") + if(!isTRUE(getNimbleOption('includeUnneededLatents'))) + randomEffectsNodes <- setdiff(randomEffectsNodes, reCheck) + } + } + } + # Set final choice of randomEffectsNodes + if(!reProvided) { + randomEffectsNodes <- reNodesDefault + } + + # Set actual default calcNodes. This time it has self=TRUE (default) + if((!calcProvided) || check) { + calcNodesDefault <- model$getDependencies(randomEffectsNodes, includePredictive = FALSE) + } + # 5. Optionally check calcNodes if they were provided (not default) + if(calcProvided && check) { + # First check is for calcNodes that look necessary but were omitted + calcCheck <- setdiff(calcNodesDefault, calcNodes) + if(length(calcCheck)) { + errorNodes <- paste0(head(calcCheck, n = 4), sep = "", collapse = ", ") + if(length(calcCheck) > 4) errorNodes <- paste(errorNodes, "...") + messageIfVerbose(" [Warning] There are some model nodes that look like they should be\n", + " included in the `calcNodes` because\n", + " they are dependencies of some `randomEffectsNodes`: ", errorNodes, ".\n", + " To silence this warning, one can usually include `check = FALSE`\n", + " (potentially in the control list) for the algorithm or as\n", + " an argument to `setupMargNodes`.") + } + # Second check is for calcNodes that look unnecessary + # If some determ nodes between paramNodes and randomEffectsNodes are provided in calcNodes + # then that's ok and we should not throw a warning message. + calcCheck <- setdiff(calcNodes, calcNodesDefault) + errorNodes <- calcCheck[model$getNodeType(calcCheck)=="stoch"] + # N.B. I commented out this checking of deterministic nodes for now. + # Iterating through individual nodes for getDependencies can be slow + # and I'd like to think more about how to do this. -Perry + ## determCalcCheck <- setdiff(calcCheck, errorNodes) + ## lengthDetermCalcCheck <- length(determCalcCheck) + ## # Check other determ nodes + ## if(lengthDetermCalcCheck){ + ## paramDetermDeps <- model$getDependencies(paramNodes, determOnly = TRUE, includePredictive = FALSE) + ## paramDetermDepsCalculated <- TRUE + ## for(i in 1:lengthDetermCalcCheck){ + ## if(!(determCalcCheck[i] %in% paramDetermDeps) || + ## !(any(model$getDependencies(determCalcCheck[i], self = FALSE) %in% calcNodesDefault))){ + ## errorNodes <- c(errorNodes, determCalcCheck[i]) + ## } + ## } + ## } + if(length(errorNodes)){ + outErrorNodes <- paste0(head(errorNodes, n = 4), sep = "", collapse = ", ") + if(length(errorNodes) > 4) outErrorNodes <- paste(outErrorNodes, "...") + messageIfVerbose(" [Warning] There are some `calcNodes` provided that look like\n", + " they are not needed for the provided (or default) `randomEffectsNodes`:\n", + " ", outErrorNodes, ".\n", + " To silence this warning, one can usually include `check = FALSE`\n", + " (potentially in the control list) for the algorithm or as\n", + " an argument to `setupMargNodes`.") + } + } + # Finish step 4 + if(!calcProvided){ + calcNodes <- calcNodesDefault + } + if(!paramProvided) { + possibleNewParamNodes <- model$getParents(calcNodes, self=FALSE, stochOnly=TRUE, includeData=FALSE) + # includeData=FALSE as data nodes cannot be parameters + # self=FALSE doesn't omit if one node is a parent of another, so we have to do the next step + possibleNewParamNodes <- setdiff(possibleNewParamNodes, calcNodesDefault) + paramNodes <- unique(c(paramNodes, possibleNewParamNodes)) + } + + # 6. Default calcNodesOther: nodes needed for full model likelihood but + # that are not involved in the marginalization done by Laplace. + # Default is a bit complicated: All dependencies from paramNodes to + # stochastic nodes that are not part of calcNodes. Note that calcNodes + # does not necessarily contain deterministic nodes between paramNodes and + # randomEffectsNodes. We don't want to include those in calcNodesOther. + # (A deterministic that is needed for both calcNodes and calcNodesOther should be included.) + # So we have to first do a setdiff on stochastic nodes and then fill in the + # deterministics that are needed. + if(!calcOtherProvided || check) { + paramStochDeps <- model$getDependencies(paramNodes, stochOnly = TRUE, # Should this be dataOnly=TRUE? + self = FALSE, includePredictive = FALSE) + calcNodesOtherDefault <- setdiff(paramStochDeps, calcNodes) + } + if(calcOtherProvided) { + if((length(calcNodesOther) > 0) && !any(model$getNodeType(calcNodesOther)=="stoch")){ + messageIfVerbose(" [Warning] There are no stochastic nodes in the `calcNodesOther` provided for Laplace or AGHQ approximation.") + } + } + if(!calcOtherProvided){ + calcNodesOther <- calcNodesOtherDefault + } + if(calcOtherProvided && check) { + calcOtherCheck <- setdiff(calcNodesOtherDefault, calcNodesOther) + if(length(calcOtherCheck)) { + # We only check missing stochastic nodes; determ nodes will be added below + missingStochNodesInds <- which((model$getNodeType(calcOtherCheck)) == "stoch") + lengthMissingStochNodes <- length(missingStochNodesInds) + if(lengthMissingStochNodes){ + missingStochNodes <- calcOtherCheck[missingStochNodesInds] + errorNodes <- paste0(head(missingStochNodes, n = 4), sep = "", collapse = ", ") + if(lengthMissingStochNodes > 4) errorNodes <- paste(errorNodes, "...") + messageIfVerbose(" [Warning] There are some model nodes (stochastic) that look like they should be\n", + " included in the `calcNodesOther` for parts of the likelihood calculation\n", + " outside of Laplace or AGHQ approximation: ", errorNodes, ".\n", + " To silence this warning, include `check = FALSE` in the control list\n", + " to `buildLaplace` or as an argument to `setupMargNodes`.") + } + } + # Check redundant stochastic nodes + calcOtherCheck <- setdiff(calcNodesOther, calcNodesOtherDefault) + stochCalcOtherCheck <- calcOtherCheck[model$getNodeType(calcOtherCheck) == "stoch"] + errorNodes <- stochCalcOtherCheck + # Check redundant determ nodes + # N.B. I commented-out this deterministic node checking for reasons similar to above. -Perry + ## determCalcOtherCheck <- setdiff(calcOtherCheck, stochCalcOtherCheck) + ## lengthDetermCalcOtherCheck <- length(determCalcOtherCheck) + ## errorNodes <- character(0) + ## if(lengthDetermCalcOtherCheck){ + ## if(!paramDetermDepsCalculated) { + ## paramDetermDeps <- model$getDependencies(paramNodes, determOnly = TRUE, includePredictive = FALSE) + ## paramDetermDepsCalculated <- TRUE + ## } + ## for(i in 1:lengthDetermCalcOtherCheck){ + ## if(!(determCalcOtherCheck[i] %in% paramDetermDeps) || + ## !(any(model$getDependencies(determCalcOtherCheck[i], self = FALSE) %in% calcNodesOtherDefault))){ + ## errorNodes <- c(errorNodes, determCalcOtherCheck[i]) + ## } + ## } + ## } + ## errorNodes <- c(stochCalcOtherCheck, errorNodes) + if(length(errorNodes)){ + outErrorNodes <- paste0(head(errorNodes, n = 4), sep = "", collapse = ", ") + if(length(errorNodes) > 4) outErrorNodes <- paste(outErrorNodes, "...") + messageIfVerbose(" [Warning] There are some nodes provided in `calcNodesOther` that look like\n", + " they are not needed for parts of the likelihood calculation\n", + " outside of Laplace or AGHQ approximation: ", outErrorNodes, ".\n", + " To silence this warning, include `check = FALSE` in the control list\n", + " to `buildLaplace` or as an argument to `setupMargNodes`.") + } + } + # Check and add necessary (upstream) deterministic nodes into calcNodesOther + # This ensures that deterministic nodes between paramNodes and calcNodesOther are used. + num_calcNodesOther <- length(calcNodesOther) + if(num_calcNodesOther > 0){ + if(!paramDetermDepsCalculated) { + paramDetermDeps <- model$getDependencies(paramNodes, determOnly = TRUE, includePredictive = FALSE) + paramDetermDepsCalculated <- TRUE + } + numParamDetermDeps <- length(paramDetermDeps) + if(numParamDetermDeps > 0) { + keep_paramDetermDeps <- logical(numParamDetermDeps) + for(i in seq_along(paramDetermDeps)) { + nextDeps <- model$getDependencies(paramDetermDeps[i]) + keep_paramDetermDeps[i] <- any(nextDeps %in% calcNodesOther) + } + paramDetermDeps <- paramDetermDeps[keep_paramDetermDeps] + } + calcNodesOther <- model$expandNodeNames(c(paramDetermDeps, calcNodesOther), sort = TRUE) + } + + # 7. Do the splitting into sets (if given) or conditionally independent sets (if TRUE) + givenNodes <- NULL + reSets <- list() + if(length(randomEffectsNodes)) { + if(isFALSE(split)) { + reSets <- list(randomEffectsNodes) + } else { + if(isTRUE(split)) { + # givenNodes should only be stochastic + givenNodes <- setdiff(c(paramNodes, calcNodes), + c(randomEffectsNodes, + model$getDependencies(randomEffectsNodes, determOnly=TRUE))) + reSets <- model$getConditionallyIndependentSets( + nodes = randomEffectsNodes, givenNodes = givenNodes, + unknownAsGiven = TRUE) + } + else if(is.numeric(split)){ + reSets <- split(randomEffectsNodes, split) + } + else stop("setupMargNodes: Invalid value for `split`") + } + } + list(paramNodes = paramNodes, + randomEffectsNodes = randomEffectsNodes, + calcNodes = calcNodes, + calcNodesOther = calcNodesOther, + givenNodes = givenNodes, + randomEffectsSets = reSets + ) +} + + +splitLatents <- function(model, paramNodes, latentNodes, calcNodes, calcNodesOther, + control = list()) { + stochNodes <- model$getNodeNames(stochOnly = TRUE, includeData = FALSE) + discreteStochNodes <- model$isDiscrete(stochNodes) + if (any(discreteStochNodes)) + stop("splitLatents: found discrete non-data stochastic nodes in processing nodes for quadrature-based posterior approximation: ", + paste0(stochNodes[discreteStochNodes], collapse = ", "), ". Discrete non-data stochastic nodes cannot be handled by the posterior approximation algorithm.") + split <- extractControlElement(control, "split", TRUE) + check <- extractControlElement(control, "check", TRUE) + margNodes <- setupMargNodes(model = model, paramNodes = paramNodes, randomEffectsNodes = latentNodes, + calcNodes = calcNodes, calcNodesOther = calcNodesOther, split = split, check = check) + if (missing(paramNodes) && missing(latentNodes)) { + if (!missing(calcNodes) || !missing(calcNodesOther)) + messageIfVerbose(" [Note] Ignoring provide `calcNodes` and `calcNodesOther` because `paramNodes` and `latentNodes` not provided and are being determined automatically.") + paramNodes <- margNodes$paramNodes + latentNodes <- margNodes$randomEffectsNodes + deps <- model$getDependencies(latentNodes, includeData = FALSE, self = FALSE) + ## By default, we treat "siblings" of latent nodes as latents. + ## This attempts to have fixed effects in latents, + ## along with random effects. + newLatents <- model$getParents(deps, stochOnly = TRUE, includeData = FALSE) + paramNodes <- setdiff(paramNodes, newLatents) + latentNodes <- unique(c(latentNodes, newLatents)) + margNodes <- setupMargNodes(model = model, paramNodes = paramNodes, + randomEffectsNodes = latentNodes, split = split, check = check) + } + + return(margNodes) +} + diff --git a/packages/nimble/man/buildAGHQGrid.Rd b/packages/nimble/man/buildAGHQGrid.Rd deleted file mode 100644 index 69d877492..000000000 --- a/packages/nimble/man/buildAGHQGrid.Rd +++ /dev/null @@ -1,101 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/QuadratureGrids.R -\name{buildAGHQGrid} -\alias{buildAGHQGrid} -\title{Build Adaptive Gauss-Hermite Quadrature Grid} -\arguments{ -\item{d}{Dimension of quadrature grid being requested.} - -\item{nQuad}{Number of quadrature nodes requested on build.} -} -\description{ -Create quadrature grid for use in AGHQuad methods in Nimble. -} -\details{ -This function is used by used by \code{buildOneAGHQuad1D} -and \code{buildOneAGHQuad} create the quadrature grid using -adaptive Gauss-Hermite quadrature. Handles single or multiple dimension -grids and computes both grid locations and weights. Additionally, acts -as a cache system to do transformations, and return marginalized log density. - -Any of the input node vectors, when provided, will be processed using - \code{nodes <- model$expandNodeNames(nodes)}, where \code{nodes} may be - \code{paramNodes}, \code{randomEffectsNodes}, and so on. This step allows - any of the inputs to include node-name-like syntax that might contain - multiple nodes. For example, \code{paramNodes = 'beta[1:10]'} can be - provided if there are actually 10 scalar parameters, 'beta[1]' through - 'beta[10]'. The actual node names in the model will be determined by the - \code{exapndNodeNames} step. - -Available methods include - -\itemize{ - - \item \code{buildAGHQ}. Builds a adaptive Gauss-Hermite quadrature grid in d dimensions. - Calls \code{buildAGHQOne} to build the one dimensional grid and then expands in each dimension. - Some numerical issues occur in Eigen decomposition making the grid weights only accurate up to - 35 quadrature nodes. - - \item Options to get internally cached values are \code{getGridSize}, - \code{getModeIndex} for when there are an odd number of quadrature nodes, - \code{getLogDensity} for the cached values, \code{getAllNodes} for the - quadrature grids, \code{getNodes} for getting a single indexed nodes, - \code{getAllNodesTransformed} for nodes transformed to the parameter scale, - \code{getNodesTransformed} for a single transformed node, \code{getAllWeights} - to get all quadrature weights, \code{getWeights} single indexed weight. - - \item \code{transformGrid(cholNegHess, inner_mode, method)} transforms - the grid using either cholesky trasnformations, - as default, or spectral that makes use of the Eigen decomposition. For a single - dimension \code{transformGrid1D} is used. - - \item As the log density is evaluated externally, it is saved via \code{saveLogDens}, - which then is summed via \code{quadSum}. - - \item \code{buildGrid} builds the grid the initial time and is only run once in code. After, - the user must choose to \code{setGridSize} to update the grid size. - - - \item \code{check}. If TRUE (default), a warning is issued if - \code{paramNodes}, \code{randomEffectsNodes} and/or \code{calcNodes} - are provided but seek to have missing elements or unnecessary - elements based on some default inspection of the model. If - unnecessary warnings are emitted, simply set \code{check=FALSE}. - - \item \code{innerOptimControl}. A list of control parameters for the inner - optimization of Laplace approximation using \code{optim}. See - 'Details' of \code{\link{optim}} for further information. - - \item \code{innerOptimMethod}. Optimization method to be used in - \code{optim} for the inner optimization. See 'Details' of - \code{\link{optim}}. Currently \code{optim} in NIMBLE supports: - "\code{Nelder-Mead}", "\code{BFGS}", "\code{CG}", and - "\code{L-BFGS-B}". By default, method "\code{CG}" is used when - marginalizing over a single (scalar) random effect, and "\code{BFGS}" - is used for multiple random effects being jointly marginalized over. - - \item \code{innerOptimStart}. Choice of starting values for the inner - optimization. This could be \code{"last"}, \code{"last.best"}, or a - vector of user provided values. \code{"last"} means the most recent - random effects values left in the model will be used. When finding - the MLE, the most recent values will be the result of the most recent - inner optimization for Laplace. \code{"last.best"} means the random - effects values corresponding to the largest Laplace likelihood (from - any call to the \code{calcLaplace} or \code{calcLogLik} method, - including during an MLE search) will be used (even if it was not the - most recent Laplace likelihood). By default, the initial random - effects values will be used for inner optimization. - - \item \code{outOptimControl}. A list of control parameters for maximizing - the Laplace log-likelihood using \code{optim}. See 'Details' of - \code{\link{optim}} for further information. -} -} -\references{ -Golub, G. H. and Welsch, J. H. (1969). Calculation of Gauss Quadrature Rules. -Mathematics of Computation 23 (106): 221-230. - -Liu, Q. and Pierce, D. A. (1994). A Note on Gauss-Hermite Quadrature. Biometrika, 81(3) 624-629. - -Jackel, P. (2005). A note on multivariate Gauss-Hermite quadrature. London: ABN-Amro. Re. -} diff --git a/packages/nimble/man/laplace.Rd b/packages/nimble/man/laplace.Rd deleted file mode 100644 index e68235fa1..000000000 --- a/packages/nimble/man/laplace.Rd +++ /dev/null @@ -1,696 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Laplace.R -\name{buildLaplace} -\alias{buildLaplace} -\alias{buildAGHQ} -\alias{laplace} -\alias{Laplace} -\alias{AGHQuad} -\alias{AGHQ} -\title{Laplace approximation and adaptive Gauss-Hermite quadrature} -\usage{ -buildLaplace( - model, - paramNodes, - randomEffectsNodes, - calcNodes, - calcNodesOther, - control = list() -) - -buildAGHQ( - model, - nQuad = 1, - paramNodes, - randomEffectsNodes, - calcNodes, - calcNodesOther, - control = list() -) -} -\arguments{ -\item{model}{a NIMBLE model object, such as returned by \code{nimbleModel}. -The model must have automatic derivatives (AD) turned on, e.g. by using -\code{buildDerivs=TRUE} in \code{nimbleModel}.} - -\item{paramNodes}{a character vector of names of parameter nodes in the -model; defaults are provided by \code{\link{setupMargNodes}}. -Alternatively, \code{paramNodes} can be a list in the format returned by -\code{setupMargNodes}, in which case \code{randomEffectsNodes}, -\code{calcNodes}, and \code{calcNodesOther} are not needed (and will be -ignored).} - -\item{randomEffectsNodes}{a character vector of names of continuous -unobserved (latent) nodes to marginalize (integrate) over using Laplace/AGHQ -approximation; defaults are provided by \code{\link{setupMargNodes}}.} - -\item{calcNodes}{a character vector of names of nodes for calculating the -integrand for Laplace/AGHQ approximation; defaults are provided by -\code{\link{setupMargNodes}}. There may be deterministic nodes between -\code{paramNodes} and \code{calcNodes}. These will be included in -calculations automatically and thus do not need to be included in -\code{calcNodes} (but there is no problem if they are).} - -\item{calcNodesOther}{a character vector of names of nodes for calculating -terms in the log-likelihood that do not depend on any -\code{randomEffectsNodes}, and thus are not part of the marginalization, -but should be included for purposes of finding the MLE. This defaults to -stochastic nodes that depend on \code{paramNodes} but are not part of and -do not depend on \code{randomEffectsNodes}. There may be deterministic -nodes between \code{paramNodes} and \code{calcNodesOther}. These will be -included in calculations automatically and thus do not need to be included -in \code{calcNodesOther} (but there is no problem if they are).} - -\item{control}{a named list for providing additional settings used in Laplace/AGHQ -approximation. See \code{control} section below. Most of these can be -updated later with the `updateSettings` method.} - -\item{nQuad}{number of quadrature points for AGHQ (in one dimension). Laplace approximation is -AGHQ with `nQuad=1`. Only odd numbers of nodes really -make sense. Often only one or a few nodes can achieve high accuracy. A maximum of -35 nodes is supported. Note that for multivariate quadratures, the number -of nodes will be (number of dimensions)^nQuad.} -} -\description{ -Build a Laplace or AGHQ approximation algorithm for a given NIMBLE model. -} -\section{\code{buildLaplace} and \code{buildAGHQ}}{ - - -\code{buildLaplace} creates an object that can run Laplace approximation - for a given model or part of a model. \code{buildAGHQ} creates an object - that can run adaptive Gauss-Hermite quadrature (AGHQ, sometimes called - "adaptive Gaussian quadrature") for a given model or part of a model. - Laplace approximation is AGHQ with one quadrature point, hence - `buildLaplace` simply calls `buildAGHQ` with `nQuad=1`. These methods - approximate the integration over continuous random effects in a - hierarchical model to calculate the (marginal) likelihood. - -\code{buildAGHQ} and \code{buildLaplace} will by default (unless changed -manually via `control$split`) determine from the model which random effects -can be integrated over (marginalized) independently. For example, in a GLMM -with a grouping factor and an independent random effect intercept for each -group, the random effects can be marginalized as a set of univariate -approximations rather than one multivariate approximation. On the other hand, -correlated or nested random effects would require multivariate marginalization. - -Maximum likelihood estimation is available for Laplace approximation -(`nQuad=1`) with univariate or multivariate integrations. With `nQuad > 1`, -maximum likelihood estimation is available only if all integrations are -univariate (e.g., a set of univariate random effects). If there are -multivariate integrations, these can be calculated at chosen input parameters -but not maximized over parameters. For example, one can find the MLE based on -Laplace approximation and then increase `nQuad` (using the `updateSettings` -method below) to check on accuracy of the marginal log likelihood at the MLE. - -Beware that quadrature will use `nQuad^k` quadrature points, where `k` is the -dimension of each integration. Therefore quadrature for `k` greater that 2 or -3 can be slow. As just noted, `buildAGHQ` will determine independent -dimensions of quadrature, so it is fine to have a set of univariate random -effects, as these will each have k=1. Multivariate quadrature (k>1) is only -necessary for nested, correlated, or otherwise dependent random effects. - -The recommended way to find the maximum likelihood estimate and associated -outputs is by calling \code{\link{runLaplace}} or \code{\link{runAGHQ}}. The -input should be the compiled Laplace or AGHQ algorithm object. This would be -produced by running \code{\link{compileNimble}} with input that is the result -of \code{buildLaplace} or \code{buildAGHQ}. - -For more granular control, see below for methods \code{findMLE} and - \code{summary}. See function \code{\link{summaryLaplace}} for an easier way - to call the \code{summary} method and obtain results that include node - names. These steps are all done within \code{runLaplace} and - \code{runAGHQ}. - -The NIMBLE User Manual at r-nimble.org also contains an example of Laplace -approximation. -} - -\section{How input nodes are processed}{ - - -\code{buildLaplace} and \code{buildAGHQ} make good tries at deciding what -to do with the input model and any (optional) of the node arguments. However, -random effects (over which approximate integration will be done) can be -written in models in multiple equivalent ways, and customized use cases may -call for integrating over chosen parts of a model. Hence, one can take full -charge of how different parts of the model will be used. - -Any of the input node vectors, when provided, will be processed using - \code{nodes <- model$expandNodeNames(nodes)}, where \code{nodes} may be - \code{paramNodes}, \code{randomEffectsNodes}, and so on. This step allows - any of the inputs to include node-name-like syntax that might contain - multiple nodes. For example, \code{paramNodes = 'beta[1:10]'} can be - provided if there are actually 10 scalar parameters, 'beta[1]' through - 'beta[10]'. The actual node names in the model will be determined by the - \code{exapndNodeNames} step. - -In many (but not all) cases, one only needs to provide a NIMBLE model object - and then the function will construct reasonable defaults necessary for - Laplace approximation to marginalize over all continuous latent states - (aka random effects) in a model. The default values for the four groups of - nodes are obtained by calling \code{\link{setupMargNodes}}, whose arguments - match those here (except for a few arguments which are taken from control - list elements here). - -\code{setupMargNodes} tries to give sensible defaults from - any combination of \code{paramNodes}, \code{randomEffectsNodes}, - \code{calcNodes}, and \code{calcNodesOther} that are provided. For example, - if you provide only \code{randomEffectsNodes} (perhaps you want to - marginalize over only some of the random effects in your model), - \code{setupMargNodes} will try to determine appropriate choices for the - others. - -\code{setupMargNodes} also determines which integration dimensions are -conditionally independent, i.e., which can be done separately from each -other. For example, when possible, 10 univariate random effects will be split -into 10 univariate integration problems rather than one 10-dimensional -integration problem. - -The defaults make general assumptions such as that - \code{randomEffectsNodes} have \code{paramNodes} as parents. However, The - steps for determining defaults are not simple, and it is possible that they - will be refined in the future. It is also possible that they simply don't - give what you want for a particular model. One example where they will not - give desired results can occur when random effects have no prior - parameters, such as `N(0,1)` nodes that will be multiplied by a scale - factor (e.g. sigma) and added to other explanatory terms in a model. Such - nodes look like top-level parameters in terms of model structure, so - you must provide a \code{randomEffectsNodes} argument to indicate which - they are. - -It can be helpful to call \code{setupMargNodes} directly to see exactly how - nodes will be arranged for Laplace approximation. For example, you may want - to verify the choice of \code{randomEffectsNodes} or get the order of - parameters it has established to use for making sense of the MLE and - results from the \code{summary} method. One can also call - \code{setupMargNodes}, customize the returned list, and then provide that - to \code{buildLaplace} as \code{paramNodes}. In that case, - \code{setupMargNodes} will not be called (again) by \code{buildLaplace}. - -If \code{setupMargNodes} is emitting an unnecessary warning, simply use - \code{control=list(check=FALSE)}. -} - -\section{Managing parameter transformations that may be used internally}{ - - -If any \code{paramNodes} (parameters) or \code{randomEffectsNodes} (random - effects / latent states) have constraints on the range of valid values - (because of the distribution they follow), they will be used on a - transformed scale determined by \code{parameterTransform}. This means the - Laplace approximation itself will be done on the transformed scale for - random effects and finding the MLE will be done on the transformed scale - for parameters. For parameters, prior distributions are not included in - calculations, but they are used to determine valid parameter ranges and - hence to set up any transformations. For example, if \code{sigma} is a - standard deviation, you can declare it with a prior such as \code{sigma ~ - dhalfflat()} to indicate that it must be greater than 0. - -For default determination of when transformations are needed, all parameters - must have a prior distribution simply to indicate the range of valid - values. For a param \code{p} that has no constraint, a simple choice is - \code{p ~ dflat()}. -} - -\section{Understanding inner and outer optimizations}{ - - -Note that there are two numerical optimizations when finding maximum -likelihood estimates with a Laplace or (1D) AGHQ algorithm: (1) maximizing -the joint log-likelihood of random effects and data given a parameter value -to construct the approximation to the marginal log-likelihood at the given -parameter value; (2) maximizing the approximation to the marginal -log-likelihood over the parameters. In what follows, the prefix 'inner' -refers to optimization (1) and 'outer' refers to optimization (2). Currently -both optimizations default to using method \code{"nlminb"}. However, one can -use other optimizers or simply run optimization (2) manually from R; see the -example below. In some problems, choice of inner and/or outer optimizer can -make a big difference for obtaining accurate results, especially for standard -errors. Hence it is worth experimenting if one is in doubt. -} - -\section{\code{control} list arguments}{ - - -The \code{control} list allows additional settings to be made using named -elements of the list. Most (or all) of these can be updated later using the -`updateSettings` method. Supported elements include: - -\itemize{ - - \item \code{split}. If TRUE (default), \code{randomEffectsNodes} will be - split into conditionally independent sets if possible. This - facilitates more efficient Laplace or AGHQ approximation because each - conditionally independent set can be marginalized independently. If - FALSE, \code{randomEffectsNodes} will be handled as one multivariate - block, with one multivariate approximation. If \code{split} is a - numeric vector, \code{randomEffectsNodes} will be split by calling - \code{split}(\code{randomEffectsNodes}, \code{control$split}). The - last option allows arbitrary control over how - \code{randomEffectsNodes} are blocked. - - \item \code{check}. If TRUE (default), a warning is issued if - \code{paramNodes}, \code{randomEffectsNodes} and/or \code{calcNodes} - are provided but seem to have missing or unnecessary - elements based on some default inspections of the model. If - unnecessary warnings are emitted, simply set \code{check=FALSE}. - - \item \code{innerOptimControl}. A list (either an R list or a - `optimControlNimbleList`) of control parameters for the inner - optimization of Laplace approximation using \code{nimOptim}. See - 'Details' of \code{\link{nimOptim}} for further information. Default - is `nimOptimDefaultControl()`. - - \item \code{innerOptimMethod}. Optimization method to be used in - \code{nimOptim} for the inner optimization. See 'Details' of - \code{\link{nimOptim}}. Currently \code{nimOptim} in NIMBLE supports: - \code{"Nelder-Mead"}", \code{"BFGS"}, \code{"CG"}, \code{"L-BFGS-B"}, - \code{"nlminb"}, \code{"bobyqa"}, and user-provided optimizers. By default, method - \code{"nlminb"} is used for both univariate and multivariate cases. For - \code{"nlminb"}, \code{"bobyqa"}, or user-provided optimizers, only a subset of - elements of the \code{innerOptimControlList} are supported. (Note - that control over the outer optimization method is available as an - argument to `findMLE`). Choice of optimizers can be important and so - can be worth exploring. - - \item \code{innerOptimStart}. Method for determining starting values for - the inner optimization. Options are: - -\itemize{ - -\item \code{"last.best"} (default): use optimized random effects values corresponding to - the best outer optimization (i.e. the largest marginal log likelihood value) so far - for each conditionally independent part of the approximation; - -\item \code{"last"}: use the result of the last inner optimization; - -\item \code{"zero"}: use all zeros; - -\item \code{"constant"}: always use the same values, determined by - \code{innerOptimStartValues}; - -\item \code{"random"}: randomly draw new starting values from the - model (i.e., from the prior); - -\item \code{"model"}: use values for random effects stored in the - model, which are determined from the first call. - -} - - Note that \code{"model"} and \code{"zero"} are shorthand for - \code{"constant"} with particular choices of - \code{innerOptimStartValues}. Note that \code{"last"} and - \code{"last.best"} require a choice for the very first values, which will - come from \code{innerOptimStartValues}. The default is - \code{innerOptimStart="zero"} and may change in the future. - - \item \code{innerOptimStartValues}. Values for some of - \code{innerOptimStart} approaches. If a scalar is provided, that - value is used for all elements of random effects for each - conditionally independent set. If a vector is provided, it must be - the length of *all* random effects. If these are named (by node - names), the names will be used to split them correctly among each - conditionally independent set of random effects. If they are not - named, it is not always obvious what the order should be because it - may depend on the conditionally independent sets of random - effects. It should match the order of names returned as part of - `summaryLaplace`. - - \item \code{innerOptimWarning}. If FALSE (default), do not emit warnings - from the inner optimization. Optimization methods may sometimes emit a - warning such as for bad parameter values encountered during the - optimization search. Often, a method can recover and still find the - optimum. In the approximations here, sometimes the inner optimization - search can fail entirely, yet the outer optimization see this as one failed - parameter value and can recover. Hence, it is often desirable to silence - warnings from the inner optimizer, and this is done by default. Set - \code{innerOptimWarning=TRUE} to see all warnings. - - \item \code{useInnerCache}. If TRUE (default), use caching system for - efficiency of inner optimizations. The caching system records one set of - previous parameters and uses the corresponding results if those parameters - are used again (e.g., in a gradient call). This should generally not be - modified. - - \item \code{outerOptimMethod}. Optimization method to be used in - \code{nimOptim} for the outer optimization. See 'Details' of - \code{\link{nimOptim}}. Currently \code{nimOptim} in NIMBLE supports: - \code{"Nelder-Mead"}", \code{"BFGS"}, \code{"CG"}, \code{"L-BFGS-B"}, - \code{"nlminb"}, \code{"bobyqa"}, and user-provided optimizers. By default, method - \code{"nlminb"} is used for both univariate and multivariate cases, - although some problems may benefit from other choices. For - \code{"nlminb"}, \code{"bobyqa"}, or user-provided optimizers, only a subset of - elements of the \code{innerOptimControlList} are supported. (Note - that control over the outer optimization method is available as an - argument to `findMLE`). Choice of optimizers can be important and so - can be worth exploring. - -\item \code{outerOptimControl}. A list of control parameters for maximizing - the Laplace log-likelihood using \code{nimOptim}. See 'Details' of - \code{\link{nimOptim}} for further information. - - \item \code{computeMethod}. There are three approaches available for - internal details of how the approximations, and specifically derivatives - involved in their calculation, are handled. These are labeled simply 1, 2, - and 3, and the default is 2. The relative performance of the methods will - depend on the specific model. Users wanting to explore efficiency can try - switching from method 2 (default) to methods 1 or 3 and comparing - performance. The first Laplace approximation with each method will be - (much) slower than subsequent Laplace approximations. Further details are - not provided at this time. - - \item \code{gridType} (relevant only \code{nQuad>1}). For multivariate AGHQ, - a grid must be constructed based on the Hessian at the inner mode. Options - include "cholesky" (default) and "spectral" (i.e., eigenvectors and - eigenvalues) for the corresponding matrix decompositions on which the grid - can be based. - -} # end itemize -} - -\section{Available methods}{ - - -The object returned by \code{buildLaplace} or \code{buildAGHQ} is a nimbleFunction object with -numerous methods (functions). Here these are described in three tiers of user -relevance. -} - -\section{Most useful methods}{ - - -The most relevant methods to a user are: - -\itemize{ - -\item \code{calcLogLik(p, trans=FALSE)}. Calculate the approximation to the - marginal log-likelihood function at parameter value \code{p}, which (if - \code{trans} is FALSE) should match the order of \code{paramNodes}. For - any non-scalar nodes in \code{paramNodes}, the order within the node is - column-major. The order of names can be obtained from method - \code{getNodeNamesVec(TRUE)}. Return value is the scalar (approximate, - marginal) log likelihood. - - If \code{trans} is TRUE, then \code{p} is the vector of parameters on - the transformed scale, if any, described above. In this case, the - parameters on the original scale (as the model was written) will be - determined by calling the method \code{pInverseTransform(p)}. Note that - the length of the parameter vector on the transformed scale might not - be the same as on the original scale (because some constraints of - non-scalar parameters result in fewer free transformed parameters than - original parameters). - -\item \code{calcLaplace(p, trans)}. This is the same as \code{calcLogLik} but - requires that the approximation be Laplace (i.e \code{nQuad} is 1), - and results in an error otherwise. - -\item \code{findMLE(pStart, hessian)}. Find the maximum likelihood - estimates of parameters using the approximated marginal likelihood. - This can be used if \code{nQuad} is 1 (Laplace case) or if - \code{nQuad>1} and all marginalizations involve only univariate - random effects. Arguments are \code{pStart}: initial parameter - values (defaults to parameter values currently in the model); - and \code{hessian}: whether to calculate and return the - Hessian matrix (defaults to \code{TRUE}, which is required for - subsequent use of \code{summary} method). Second derivatives in the - Hessian are determined by finite differences of the gradients - obtained by automatic differentiation (AD). Return value is a - nimbleList of type \code{optimResultNimbleList}, similar to what is - returned by R's optim. See \code{help(nimOptim)}. Note that - parameters (\code{par}) are returned for the natural parameters, i.e. how - they are defined in the model. But the \code{hessian}, if requested, is - computed for the parameters as transformed for optimization if - necessary. Hence one must be careful interpreting `hessian` if any - parameters have constraints, and the safest next step is to use the - \code{summary} method or \code{summaryLaplace} function. - -\item \code{summary(MLEoutput, originalScale, randomEffectsStdError, - jointCovariance)}. Summarize the maximum likelihood estimation - results, given object \code{MLEoutput} that was returned by - \code{findMLE}. The summary can include a covariance matrix for the - parameters, the random effects, or both), and these can be returned on - the original parameter scale or on the (potentially) transformed - scale(s) used in estimation. It is often preferred instead to call - function (not method) `summaryLaplace` because this will attach - parameter and random effects names (i.e., node names) to the results. - -In more detail, \code{summary} accepts the following optional arguments: - - \itemize{ - - \item \code{originalScale}. Logical. If TRUE, the function returns - results on the original scale(s) of parameters and random effects; - otherwise, it returns results on the transformed scale(s). If there - are no constraints, the two scales are identical. Defaults to TRUE. - - \item \code{randomEffectsStdError}. Logical. If TRUE, standard - errors of random effects will be calculated. - Defaults to TRUE. - - \item \code{jointCovariance}. Logical. If TRUE, the joint - variance-covariance matrix of the parameters and the random effects - will be returned. If FALSE, the variance-covariance matrix of the - parameters will be returned. Defaults to FALSE. - - } - - The object returned by \code{summary} is an \code{AGHQuad_summary} - nimbleList with elements: - - \itemize{ - - \item \code{params}. A nimbleList that contains estimates and - standard errors of parameters (on the original or transformed - scale, as chosen by \code{originalScale}). - - \item \code{randomEffects}. A nimbleList that contains estimates of - random effects and, if requested - (\code{randomEffectsStdError=TRUE}) their standard errors, on - original or transformed scale. Standard errors are calculated - following the generalized delta method of Kass and Steffey (1989). - - \item \code{vcov}. If requested (i.e. - \code{jointCovariance=TRUE}), the joint variance-covariance - matrix of the parameters and random effects, on original or - transformed scale. If \code{jointCovariance=FALSE}, the - covariance matrix of the parameters, on original or transformed - scale. - - \item \code{scale}. \code{"original"} or \code{"transformed"}, the - scale on which results were requested. - } - } -} - -\section{Methods for more advanced uses}{ - - -Additional methods to access or control more details of the Laplace/AGHQ -approximation include: - -\itemize{ - - \item \code{updateSettings}. This provides a single function through which - many of the settings described above (mostly for the \code{control} list) - can be later changed. Options that can be changed include: - \code{innerOptimMethod}, \code{innerOptimStart}, - \code{innerOptimStartValues}, \code{useInnerCache}, \code{nQuad}, - \code{gridType}, \code{innerOptimControl}, \code{outerOptimMethod}, - \code{outerOptimControl}, and \code{computeMethod}. - For \code{innerOptimStart}, method "zero" cannot be - specified but can be achieved by choosing method "constant" with - \code{innerOptimStartValues=0}. Only provided options will be modified. The - exceptions are \code{innerOptimControl}, \code{outerOptimControl}, which - are replaced only when \code{replace_innerOptimControl=TRUE} or - \code{replace_outerOptimControl=TRUE}, respectively. - - \item \code{getNodeNamesVec(returnParams)}. Return a vector (>1) of names - of parameters/random effects nodes, according to \code{returnParams = - TRUE/FALSE}. Use this if there is more than one node. - - \item \code{getNodeNameSingle(returnParams)}. Return the name of a - single parameter/random effect node, according to \code{returnParams = - TRUE/FALSE}. Use this if there is only one node. - - \item \code{checkInnerConvergence(message)}. Checks whether all internal - optimizers converged. Returns a zero if everything converged and one - otherwise. If \code{message = TRUE}, it will print more details about - convergence for each conditionally independent set. - - \item \code{gr_logLik(p, trans)}. Gradient of the (approximated) - marginal log-likelihood at parameter value \code{p}. Argument \code{trans} - is similar to that in \code{calcLaplace}. If there are multiple parameters, - the vector \code{p} is given in the order of parameter names returned by - \code{getNodeNamesVec(returnParams=TRUE)}. - - \item \code{gr_Laplace(p, trans)}. This is the same as \code{gr_logLik}. - - \item \code{otherLogLik(p)}. Calculate the \code{calcNodesOther} - nodes, which returns the log-likelihood of the parts of the model that are - not included in the Laplace or AGHQ approximation. - - \item \code{gr_otherLogLik(p)}. Gradient (vector of derivatives with - respect to each parameter) of \code{otherLogLik(p)}. Results should - match \code{gr_otherLogLik_internal(p)} but may be more efficient after - the first call. - -} -} - -\section{Internal or development methods}{ - - -Some methods are included for calculating the (approximate) marginal log -posterior density by including the prior distribution of the parameters. This -is useful for finding the maximum a posteriori probability (MAP) estimate. -Currently these are provided for point calculations without estimation methods. - -\itemize{ - - \item \code{calcPrior_p(p)}. Log density of prior distribution. - - \item \code{calcPrior_pTransformed(pTransform)}. Log density of prior distribution on transformed scale, includes the Jacobian. - - \item \code{calcPostLogDens(p)}. Marginal log posterior density in terms of the parameter p. - - \item \code{calcPostLogDens_pTransformed (pTransform)}. Marginal log posterior density in terms of the transformed - parameter, which includes the Jacobian transformation. - - \item \code{gr_postLogDens_pTransformed(pTransform)}. Graident of marginal log posterior density on the transformed scale. - Other available options that are used in the derivative for more flexible include \code{logDetJacobian(pTransform)} and - \code{gr_logDeJacobian(pTransform)}, as well as \code{gr_prior(p)}. -} - -Finally, methods that are primarily for internal use by other methods include: - -\itemize{ - - \item \code{gr_logLik_pTransformed}. Gradient of the Laplace - approximation (\code{calcLogLik_pTransformed(pTransform)}) at transformed - (unconstrained) parameter value \code{pTransform}. - - \item \code{pInverseTransform(pTransform)}. Back-transform the transformed - parameter value \code{pTransform} to original scale. - - \item \code{derivs_pInverseTransform(pTransform, order)}. Derivatives of - the back-transformation (i.e. inverse of parameter transformation) with - respect to transformed parameters at \code{pTransform}. Derivative order - is given by \code{order} (any of 0, 1, and/or 2). - - \item \code{reInverseTransform(reTrans)}. Back-transform the transformed - random effects value \code{reTrans} to original scale. - - \item \code{derivs_reInverseTransform(reTrans, order)}. Derivatives of the - back-transformation (i.e. inverse of random effects transformation) with - respect to transformed random effects at \code{reTrans}. Derivative order - is given by \code{order} (any of 0, 1, and/or 2). - - \item \code{optimRandomEffects(pTransform)}. Calculate the optimized - random effects given transformed parameter value \code{pTransform}. The - optimized random effects are the mode of the conditional distribution of - random effects given data at parameters \code{pTransform}, i.e. the - calculation of \code{calcNodes}. - - \item \code{inverse_negHess(p, reTransform)}. Calculate the inverse of the - negative Hessian matrix of the joint (parameters and random effects) - log-likelihood with respect to transformed random effects, evaluated at - parameter value \code{p} and transformed random effects - \code{reTransform}. - - \item \code{hess_logLik_wrt_p_wrt_re(p, reTransform)}. Calculate the - Hessian matrix of the joint log-likelihood with respect to parameters and - transformed random effects, evaluated at parameter value \code{p} and - transformed random effects \code{reTransform}. - - \item \code{one_time_fixes()}. Users never need to run this. Is is called - when necessary internally to fix dimensionality issues if there is only - one parameter in the model. - - \item \code{calcLogLik_pTransformed(pTransform)}. Laplace approximation at - transformed (unconstrained) parameter value \code{pTransform}. To - make maximizing the Laplace likelihood unconstrained, an automated - transformation via \code{\link{parameterTransform}} is performed on - any parameters with constraints indicated by their priors (even - though the prior probabilities are not used). - - \item \code{gr_otherLogLik_internal(p)}. Gradient (vector of - derivatives with respect to each parameter) of \code{otherLogLik(p)}. - This is obtained using automatic differentiation (AD) with single-taping. - First call will always be slower than later calls. - - \item \code{cache_outer_logLik(logLikVal)}. Save the marginal log likelihood value - to the inner Laplace mariginlization functions to track the outer maximum internally. - - \item \code{reset_outer_inner_logLik()}. Reset the internal saved maximum marginal log likelihood. - - \item \code{get_inner_cholesky(atOuterMode = integer(0, default = 0))}. Returns the cholesky - of the negative Hessian with respect to the random effects. If \code{atOuterMode = 1} then returns - the value at the overall best marginal likelihood value, otherwise \code{atOuterMode = 0} returns the last. - - \item \code{get_inner_mode(atOuterMode = integer(0, default = 0))}. Returns the mode of the random effects - for either the last call to the innner quadrature functions (\code{atOuterMode = 0} ), or the last best - value for the marginal log likelihood, \code{atOuterMode = 1}. - -} -} - -\examples{ -pumpCode <- nimbleCode({ - for (i in 1:N){ - theta[i] ~ dgamma(alpha, beta) - lambda[i] <- theta[i] * t[i] - x[i] ~ dpois(lambda[i]) - } - alpha ~ dexp(1.0) - beta ~ dgamma(0.1, 1.0) -}) -pumpConsts <- list(N = 10, t = c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5)) -pumpData <- list(x = c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22)) -pumpInits <- list(alpha = 0.1, beta = 0.1, theta = rep(0.1, pumpConsts$N)) -pump <- nimbleModel(code = pumpCode, name = "pump", constants = pumpConsts, - data = pumpData, inits = pumpInits, buildDerivs = TRUE) - -# Build Laplace approximation -pumpLaplace <- buildLaplace(pump) - -\dontrun{ -# Compile the model -Cpump <- compileNimble(pump) -CpumpLaplace <- compileNimble(pumpLaplace, project = pump) -# Calculate MLEs of parameters -MLEres <- CpumpLaplace$findMLE() -# Calculate estimates and standard errors for parameters and random effects on original scale -allres <- CpumpLaplace$summary(MLEres, randomEffectsStdError = TRUE) - -# Change the settings and also illustrate runLaplace -CpumpLaplace$updateSettings(innerOptimControl = list(maxit = 1000), - replace_innerOptimControl) -newres <- runLaplace(CpumpLaplace) - -# Illustrate use of the component log likelihood and gradient functions to -# run an optimizer manually from R. -# Use nlminb to find MLEs -MLEres.manual <- nlminb(c(0.1, 0.1), - function(x) -CpumpLaplace$calcLogLik(x), - function(x) -CpumpLaplace$gr_Laplace(x)) -} - -} -\references{ -Kass, R. and Steffey, D. (1989). Approximate Bayesian inference in -conditionally independent hierarchical models (parametric empirical Bayes -models). \emph{Journal of the American Statistical Association}, 84(407), -717-726. - -Liu, Q. and Pierce, D. A. (1994). A Note on Gauss-Hermite Quadrature. \emph{Biometrika}, 81(3) 624-629. - -Jackel, P. (2005). A note on multivariate Gauss-Hermite quadrature. London: \emph{ABN-Amro. Re.} - -Skaug, H. and Fournier, D. (2006). Automatic approximation of the marginal -likelihood in non-Gaussian hierarchical models. \emph{Computational -Statistics & Data Analysis}, 56, 699-709. -} -\author{ -Wei Zhang, Perry de Valpine, Paul van Dam-Bates -} diff --git a/packages/nimble/man/nimble-internal.Rd b/packages/nimble/man/nimble-internal.Rd index d13744e18..28f358b37 100644 --- a/packages/nimble/man/nimble-internal.Rd +++ b/packages/nimble/man/nimble-internal.Rd @@ -76,8 +76,10 @@ \alias{messageIfVerbose} \alias{calc_dmnormConjugacyContributions} \alias{calc_dmnormAltParams} +\alias{calc_dmnorm_inv_ld_AltParams} \alias{calc_dwishAltParams} \alias{calc_dcatConjugacyContributions} +\alias{PDinverse_logdet} \alias{CAR_calcM} \alias{CAR_calcC} \alias{CAR_calcCmatrix} diff --git a/packages/nimble/man/nimble-package.Rd b/packages/nimble/man/nimble-package.Rd index de7ea7191..a2cec6010 100644 --- a/packages/nimble/man/nimble-package.Rd +++ b/packages/nimble/man/nimble-package.Rd @@ -30,7 +30,7 @@ Authors: \item Claudia Wehrhahn Cortes (Bayesian nonparametrics system) \item Abel Rodríguez (Bayesian nonparametrics system) \item Duncan Temple Lang (packaging configuration) - \item Wei Zhang (Laplace approximation) + \item Wei Zhang (node marginalization) \item Sally Paganin (reversible jump MCMC) \item Joshua Hug (WAIC) \item Paul van Dam-Bates (Pólya-Gamma sampler, nimIntegrate, matrix exponential) diff --git a/packages/nimble/man/runLaplace.Rd b/packages/nimble/man/runLaplace.Rd index 304bd545a..460b1fc3b 100644 --- a/packages/nimble/man/runLaplace.Rd +++ b/packages/nimble/man/runLaplace.Rd @@ -1,92 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Laplace.R +% Please edit documentation in R/miscFunctions.R \name{runLaplace} \alias{runLaplace} -\alias{runAGHQ} -\title{Combine steps of running Laplace or adaptive Gauss-Hermite quadrature approximation} +\title{Placeholder for runLaplace} \usage{ -runLaplace( - laplace, - pStart, - originalScale = TRUE, - randomEffectsStdError = TRUE, - jointCovariance = FALSE -) - -runAGHQ( - AGHQ, - pStart, - originalScale = TRUE, - randomEffectsStdError = TRUE, - jointCovariance = FALSE -) +runLaplace(...) } \arguments{ -\item{laplace}{A (compiled or uncompiled) nimble laplace approximation object -returned from `buildLaplace` or `buildAGHQ`. These return the same type of -approximation algorithm object. `buildLaplace` is simply `buildAGHQ` -with `nQuad=1`.} - -\item{pStart}{Initial values for parameters to begin optimization search for -the maximum likelihood estimates. If omitted, the values currently in the -(compiled or uncompiled) model object will be used.} - -\item{originalScale}{If \code{TRUE}, return all results on the original scale -of the parameters and/or random effects as written in the model. Otherwise, -return all results on potentially unconstrained transformed scales that are -used in the actual computations. Transformed scales (parameterizations) are -used if any parameter or random effect has contraint(s) on its support -(range of allowed values). Default = \code{TRUE}.} - -\item{randomEffectsStdError}{If \code{TRUE}, include standard errors for the -random effects estimates. Default = \code{TRUE}.} - -\item{jointCovariance}{If \code{TRUE}, return the full joint covariance -matrix (inverse of the Hessian) of parameters and random effects. Default = -\code{FALSE}.} - -\item{AGHQ}{Same as \code{laplace}.} -} -\value{ -A list with elements \code{MLE} and \code{summary}. - -\code{MLE} is the result of the \code{findMLE} method, which contains the -parameter estimates and Hessian matrix. This is considered raw output, and -one should normally use instead the contents of \code{summary}. (For example -not that the Hessian matrix in \code{MLE} may not correspond to the same -scale as the parameter estimates if a transformation was used to operate in -an unconstrained parameter space.) - -\code{summary} is the result of \code{summaryLaplace} (or equivalently -\code{summaryAGHQ}), which contains parameter estimates and standard errors, -and optionally other requested components. All results in this object will be -on the same scale (parameterization), either original or transformed, as -requested. +\item{...}{arguments} } \description{ -Use an approximation (compiled or uncompiled) returned from -`buildLaplace` or `buildAGHQ` to find the maximum likelihood estimate and return it -with random effects estimates and/or standard errors. -} -\details{ -Adaptive Gauss-Hermite quadrature is a generalization of Laplace -approximation. \code{runLaplace} simply calles \code{runAGHQ} and provides a -convenient name. - -These functions manage the steps of calling the `findMLE` method to obtain -the maximum likelihood estimate of the parameters and then the -`summaryLaplace` function to obtain standard errors, (optionally) random -effects estimates (conditional modes), their standard errors, and the full -parameter-random effects covariance matrix. - -Note that for `nQuad > 1` (see \code{\link{buildAGHQ}}), i.e., AGHQ with -higher order than Laplace approximation, maximum likelihood estimation is -available only if all random effects integrations are univariate. With -multivariate random effects integrations, one can use `nQuad > 1` only to -calculate marginal log likelihoods at given parameter values. This is useful -for checking the accuracy of the log likelihood at the MLE obtained for -Laplace approximation (`nQuad == 1`). `nQuad` can be changed using the -`updateSettings` method of the approximation object. - -See \code{\link{summaryLaplace}}, which is called for the summary components. +This function has been moved to the `nimbleQuad` package. } diff --git a/packages/nimble/man/setupMargNodes.Rd b/packages/nimble/man/setupMargNodes.Rd index 6a3bee440..7fc1e1683 100644 --- a/packages/nimble/man/setupMargNodes.Rd +++ b/packages/nimble/man/setupMargNodes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Laplace.R +% Please edit documentation in R/setupMargNodes.R \name{setupMargNodes} \alias{setupMargNodes} \title{Organize model nodes for marginalization} diff --git a/packages/nimble/man/summaryLaplace.Rd b/packages/nimble/man/summaryLaplace.Rd index 690a4098b..9e95fe5eb 100644 --- a/packages/nimble/man/summaryLaplace.Rd +++ b/packages/nimble/man/summaryLaplace.Rd @@ -1,71 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Laplace.R +% Please edit documentation in R/miscFunctions.R \name{summaryLaplace} \alias{summaryLaplace} -\alias{summaryAGHQ} -\title{Summarize results from Laplace or adaptive Gauss-Hermite quadrature approximation} +\title{Placeholder for summaryLaplace} \usage{ -summaryLaplace( - laplace, - MLEoutput, - originalScale = TRUE, - randomEffectsStdError = TRUE, - jointCovariance = FALSE -) - -summaryAGHQ( - AGHQ, - MLEoutput, - originalScale = TRUE, - randomEffectsStdError = TRUE, - jointCovariance = FALSE -) +summaryLaplace(...) } \arguments{ -\item{laplace}{The Laplace approximation object, typically the compiled one. -This would be the result of compiling an object returned from -`buildLaplace`.} - -\item{MLEoutput}{The maximum likelihood estimate using Laplace or AGHQ, -returned from e.g. `approx$findMLE(...)`, where \code{approx} is the -algorithm object returned by `buildLaplace` or `buildAGHQ`, or (more -typically) the result of compiling that object with `compileNimble`. See -`help(buildLaplace)` for more information.} - -\item{originalScale}{Should results be returned using the original -parameterization in the model code (TRUE) or the potentially transformed -parameterization used internally by the Laplace approximation (FALSE). -Transformations are used for any parameters and/or random effects that have -constrained ranges of valid values, so that in the transformed parameter -space there are no constraints. (default = TRUE)} - -\item{randomEffectsStdError}{If TRUE, calculate the standard error of the -estimates of random effects values. (default = TRUE)} - -\item{jointCovariance}{If TRUE, calculate the joint covariance matrix of -the parameters and random effects together. If FALSE, calculate the -covariance matrix of the parameters. (default = FALSE)} - -\item{AGHQ}{Same as \code{laplace}. Note that `buildLaplace` and -`buildAGHQ` create the same kind of algorithm object that can be used -interchangeably. `buildLaplace` simply sets the number of quadrature points -(`nQuad`) to 1 to achieve Laplace approximation as a special case of AGHQ.} -} -\value{ -A list with data frames `params` and `randomEffects`, each with columns for -`estimate` and (possibly) `se` (standard error) and row names for model -nodes, a matrix `vcov` with the covariance matrix with row and column names, -and `originalScale` with the input value of `originalScale` so it is recorded -for later use if wanted. +\item{...}{arguments} } \description{ -Process the results of the `findMLE` method of a nimble Laplace or AGHQ approximation -into a more useful format. -} -\details{ -The numbers obtained by this function can be obtained more directly by -`approx$summary(...)`. The added benefit of `summaryLaplace` is to arrange -the results into data frames (for parameters and random effects), with row -names for the model nodes, and also adding row and column names to the -covariance matrix. +This function has been moved to the `nimbleQuad` package. } diff --git a/packages/nimble/tests/testthat/test-ADaghq.R b/packages/nimble/tests/testthat/test-ADaghq.R deleted file mode 100644 index c894e596d..000000000 --- a/packages/nimble/tests/testthat/test-ADaghq.R +++ /dev/null @@ -1,561 +0,0 @@ - -# Tests of AGH Quadrature approximation -source(system.file(file.path('tests', 'testthat', 'test_utils.R'), package = 'nimble')) -source(system.file(file.path('tests', 'testthat', 'AD_test_utils.R'), package = 'nimble')) -EDopt <- nimbleOptions("enableDerivs") -BMDopt <- nimbleOptions("buildModelDerivs") -nimbleOptions(enableDerivs = TRUE) -nimbleOptions(buildModelDerivs = TRUE) -nimbleOptions(allowDynamicIndexing = FALSE) - -test_that("AGH Quadrature Normal-Normal 1D works", { - set.seed(123) - n <- 50 - m <- nimbleModel(nimbleCode({ - # priors - b0 ~ dnorm(0, 1000) - sigma1 ~ dunif(0, 1000) - sigma2 ~ dunif(0, 1000) - for(i in 1:n){ - b[i] ~ dnorm(mean = 0, sd = sigma2) - mu[i] <- b0 + b[i] - y[i] ~ dnorm(mean = mu[i], sd = sigma1) - }}), data = list(y=rnorm(n, 5, sqrt(1 + 0.5^2))), constants = list(n=n), - inits = list(b0 = 3.5, sigma1 = 1, sigma2 = 1), buildDerivs = TRUE) - - mQuad <- buildAGHQ(model = m, nQuad = 5) - mLaplace <- buildAGHQ(model = m, nQuad = 1) - cm <- compileNimble(m) - cQL <- compileNimble(mQuad, mLaplace, project = m) - cmQuad <- cQL$mQuad - cmLaplace <- cQL$mLaplace - - ll.norm <- function(pars) - { - b0 <- pars[1] - sigma1 <- pars[2] - sigma2 <- pars[3] - ll <- 0 - for( i in seq_along(m$y) ) { - ll <- ll + dnorm( m$y[i], mean = b0, sd = sqrt(sigma1^2 + sigma2^2), log = TRUE ) - } - ll - } - gr.ll.norm <- function(pars) - { - b0 <- pars[1] - sigma1 <- pars[2] - sigma2 <- pars[3] - var0 <- sigma1^2+sigma2^2 - dll <- 0 - for( i in seq_along(m$y) ) { - di <- m$y[i]-b0 - dll <- dll + -0.5*(c(0, 2*sigma1, 2*sigma2)/var0) - c(-2*di/(2*var0), -0.5*(di)^2*var0^(-2)*2*sigma1, -0.5*(di)^2*var0^(-2)*2*sigma2 ) - } - dll - } - test.val1 <- c(5, 1, 0.5) - test.val2 <- c(2, 0.5, 0.1) - - ## Test against output from buildLaplace - expect_equal(cmLaplace$calcLogLik(test.val1), -72.5573684952759521, tol = 1e-10 ) - expect_equal(cmLaplace$calcLogLik(test.val2), -1000.9603427653298695, tol = 1e-10 ) - - ## Values from buildLaplace with testval1 and testval2 - grTestLaplace1 <- c(1.5385734890331042, -6.3490351165007235, -3.1745175582503542) - grTestLaplace2 <- c(584.3200662134241838, 3706.5010041520263258, 741.3001253250956779) - - ## Check Marginalization - logLik5 <- cmQuad$calcLogLik(test.val1) - logLikTru <- ll.norm(test.val1) - expect_equal(logLik5, logLikTru, tol = 1e-14) ## Should be very similar for Normal-Normal Case. - - logLik5 <- cmQuad$calcLogLik(test.val2) - logLikTru <- ll.norm(test.val2) - expect_equal(logLik5, logLikTru, tol = 1e-14) ## Should be very similar for Normal-Normal Case. - - ## Check Gradient of Marginalization - gr_quad51 <- cmQuad$gr_logLik(test.val1) - gr_laplace1 <- cmLaplace$gr_logLik(test.val1) - expect_equal(gr_quad51, gr_laplace1, tol = 1e-08) ## Should be very similar for Normal-Normal Case. - expect_equal(gr_laplace1, grTestLaplace1, tol = 1e-14) #1e-16) ## Compare against buildLaplace - - gr_quad52 <- cmQuad$gr_logLik(test.val2) - gr_laplace2 <- cmLaplace$gr_logLik(test.val2) - expect_equal(gr_quad52, gr_laplace2, tol = 1e-05) ## Approx more different for poor values. - expect_equal(gr_laplace2, grTestLaplace2, tol = 1e-04) #1e-16) ## Compare against buildLaplace. Should be equivalent. - - expect_equal(gr_quad52, gr.ll.norm(test.val2), tol = 1e-08) ## Approx more different for poor values. - - opt <- cmQuad$findMLE(pStart = test.val1) ## Needs decent starting values. - mle.tru <- optim(test.val1, ll.norm, gr.ll.norm, control = list(fnscale = -1)) - expect_equal(opt$value, mle.tru$value, tol = 1e-8) # Same log likelihood. Diff parameter values. - - ## Check covariance? - - # Values from Laplace directly. - # mLaplace <- buildLaplace(model = m) - # cm <- compileNimble(m) - # cL <- compileNimble(mLaplace, project = m) - # x1 <- cL$Laplace(test.val1) - # x2 <- cL$Laplace(test.val2) - # g1 <- cL$gr_Laplace(test.val1) - # g2 <- cL$gr_Laplace(test.val2) - # sprintf("%.16f", x1) - # sprintf("%.16f", x2) -}) - -test_that("AGH Quadrature 1D Poisson-Gamma for checking nQuad", { - set.seed(123) - n <- 1 - m <- nimbleModel(nimbleCode({ - # priors - a ~ dunif(0, 1000) - b ~ dunif(0, 1000) - for(i in 1:n){ - lambda[i] ~ dgamma(a, b) - y[i] ~ dpois(lambda[i]) - }}), data = list(y=rpois(n, rgamma(n, 10, 2))), constants = list(n=n), - inits = list(a = 10, b = 2), buildDerivs = TRUE) - - ## Marginal model is equivalent to a negative binomial with this parameterization. - m.nb <- nimbleModel(nimbleCode({ - # priors - a ~ dunif(0, 1000) - b ~ dunif(0, 1000) - for(i in 1:n){ - y[i] ~ dnbinom(size=a, prob=b/(1+b)) - }}), data = list(y=m$y), constants = list(n=n), - inits = list(a = 10, b = 2), buildDerivs = TRUE) - - cm <- compileNimble(m) - mQuad <- buildAGHQ(model = m, nQuad = 20) - cmQuad <- compileNimble(mQuad, project = m) - test.val1 <- c(10, 2) - test.val2 <- c(1, 5) - cmQuad$updateSettings(innerOptimMethod = "nlminb") # The tolerances below happen to be for nlminb results - - ## Check Marginalization - logLik20 <- cmQuad$calcLogLik(test.val1) - m.nb$a <- test.val1[1]; m.nb$b <- test.val1[2] - m.nb$calculate() - logLikTru <- m.nb$calculate("y") - expect_equal(logLik20, logLikTru, tol = 1e-10) ## Should be very similar. - - ## Check Marginalization - logLik20.2 <- cmQuad$calcLogLik(test.val2) - m.nb$a <- test.val2[1]; m.nb$b <- test.val2[2] - m.nb$calculate() - logLikTru.2 <- m.nb$calculate("y") - expect_equal(logLik20.2, logLikTru.2, tol = 1e-8) ## Should be very similar. - - ## True Gradient - tru.ll.gr <- function(pars) - { - a <- pars[1] - b <- pars[2] - da <- digamma(a+m$y) + log(b) - digamma(a) - log(b + 1) - db <- a/b - (a + m$y)/(b+1) - return(c(da,db)) - } - - tru.gr <- tru.ll.gr(c(50,2)) - m.nb$a <- 50; m.nb$b <- 2; m.nb$calculate() - tru.logLik <- m.nb$calculate('y') - - ## Check node accuracy - ## Tolerance should decrease in loop, - for( i in 1:25 ) - { - cmQuad$updateSettings(nQuad=i) - expect_equal(cmQuad$calcLogLik(c(50,2)), tru.logLik, tol = 0.01^(sqrt(i))) - expect_equal(cmQuad$gr_logLik(c(50,2)), tru.gr, tol = 0.01^(i^0.4)) - } -}) - -test_that("AGH Quadrature 1D Binomial-Beta check 3 methods", { - set.seed(123) - n <- 50 - N <- 5 - m <- nimbleModel(nimbleCode({ - # priors - a ~ dgamma(1,1) - b ~ dgamma(1,1) - for(i in 1:n){ - p[i] ~ dbeta(a, b) - y[i] ~ dbinom(prob = p[i], size = N) - } - }), data = list(y = rbinom(n, N, rbeta(n, 10, 2))), - constants = list(N = N, n=n), inits = list(a = 10, b = 2), - buildDerivs = TRUE) - - cm <- compileNimble(m) - # mQuad <- buildLaplace(model = m) - mQuad <- buildAGHQ(model = m, nQuad = 5, control=list(innerOptimMethod="nlminb")) # tolerances set for this result - cmQuad <- compileNimble(mQuad, project = m) - - param.val <- c(7, 1) - - cmQuad$updateSettings(computeMethod=1) - ll.11 <- cmQuad$calcLogLik(param.val) - ll.12 <- cmQuad$calcLogLik(param.val+1) - gr.11 <- cmQuad$gr_logLik(param.val) - gr.12 <- cmQuad$gr_logLik(param.val+1) - cmQuad$updateSettings(computeMethod=2) - ll.21 <- cmQuad$calcLogLik(param.val) - ll.22 <- cmQuad$calcLogLik(param.val+1) - gr.21 <- cmQuad$gr_logLik(param.val) - gr.22 <- cmQuad$gr_logLik(param.val+1) - cmQuad$updateSettings(computeMethod=3) - ll.31 <- cmQuad$calcLogLik(param.val) - ll.32 <- cmQuad$calcLogLik(param.val+1) - gr.31 <- cmQuad$gr_logLik(param.val) - gr.32 <- cmQuad$gr_logLik(param.val+1) - - ## All the methods should return equivalent results, or at least nearly with some small - ## numerical differences from the calls to the AD. - expect_equal(ll.11, ll.21) - expect_equal(ll.11, ll.31) - expect_equal(ll.12, ll.22) - expect_equal(ll.12, ll.32) - expect_equal(gr.11, gr.21) - expect_equal(gr.11, gr.31) - expect_equal(gr.12, gr.22) - expect_equal(gr.12, gr.32) - - ## Check gradient and marginalization accuracy. - ll.betabin <- function(pars){ - a <- pars[1] - b <- pars[2] - ll <- 0 - for( i in seq_along(m$y)) - { - ll <- ll - lbeta(a,b) + lchoose(N, m$y[i]) + lbeta(a + m$y[i], b + N-m$y[i]) - } - ll - } - - dibeta <- function(a,b) - { - da <- digamma(a) - digamma(a+b) - db <- digamma(b) - digamma(a+b) - c(da, db) - } - - gr.betabin <- function(pars){ - a <- pars[1] - b <- pars[2] - dll <- 0 - for( i in seq_along(m$y)) - { - dll <- dll - dibeta(a,b) + dibeta(a + m$y[i], b + N-m$y[i]) - } - return(dll) - } - - cmQuad$updateSettings(computeMethod=2, nQuad=1) - ## Check Laplace against RTMB here: - #cmQuad$setQuadSize(1) - expect_equal(cmQuad$calcLogLik(param.val), -57.1448725555934729, 1e-06) - - ## Check against manual RTMB version 5 nodes. - cmQuad$updateSettings(nQuad=5) - expect_equal(cmQuad$calcLogLik(param.val), -54.7682946631443244, tol = 1e-06) ## Pretty close: - - ## Crank up the nodes to check accuracy. 15 nodes - cmQuad$updateSettings(nQuad=15) - expect_equal(cmQuad$calcLogLik(param.val), ll.betabin(param.val), tol = 1e-02) ## Accuracy is only slightly better. - expect_equal(cmQuad$gr_logLik(param.val), gr.betabin(param.val), tol = 1e-01) - - ## Lots of nodes. 35 nodes (our current max). - cmQuad$updateSettings(nQuad=35) - expect_equal(cmQuad$calcLogLik(param.val), ll.betabin(param.val), tol = 1e-5) ## Accuracy should not amazing but certainly better. - expect_equal(cmQuad$gr_logLik(param.val), gr.betabin(param.val), tol = 1e-03) ## Actually a case when we need a lot of quad points. - - ## Quick check on Laplace here as they are sooo bad. - # library(RTMB) - # dat <- list(y = m$y, N = N) - # pars <- list(loga = 0, logb = 0, logitp = rep(0, n)) - # func <- function(pars){ - # getAll(pars, dat) - # a <- exp(loga) - # b <- exp(logb) - # p <- 1/(1+exp(-logitp)) - # negll <- -sum(dbeta(p, a, b, log = TRUE)) - # negll <- negll - sum(dbinom(y, size = N, p = p, log = TRUE)) - # negll - sum(log(exp(-logitp)/(1+exp(-logitp))^2)) - # } - # obj <- MakeADFun(func, pars, random = "logitp") - # obj$fn(log(param.val)) -}) - -test_that("AGH Quadrature 1D Check MLE.", { - set.seed(123) - n <- 50 - N <- 50 - m <- nimbleModel(nimbleCode({ - # priors - a ~ dgamma(1,1) - b ~ dgamma(1,1) - for(i in 1:n){ - p[i] ~ dbeta(a, b) - y[i] ~ dbinom(p[i], N) - } - }), data = list(y = rbinom(n, N, rbeta(n, 10, 2))), - constants = list(N = N, n=n), inits = list(a = 10, b = 2), - buildDerivs = TRUE) - - cm <- compileNimble(m) - mQuad <- buildAGHQ(model = m, nQuad = 5, control=list(innerOptimMethod="nlminb")) - cmQuad <- compileNimble(mQuad, project = m) - - ## Check gradient and marginalization accuracy. - ll.betabin <- function(pars){ - a <- exp(pars[1]) - b <- exp(pars[2]) - ll <- 0 - for( i in seq_along(m$y)) ll <- ll - lbeta(a,b) + lchoose(N, m$y[i]) + lbeta(a + m$y[i], b + N-m$y[i]) - ll - } - - dibeta <- function(a,b) - { - da <- digamma(a) - digamma(a+b) - db <- digamma(b) - digamma(a+b) - c(da, db) - } - - gr.betabin <- function(pars){ - a <- exp(pars[1]) - b <- exp(pars[2]) - dll <- 0 - for( i in seq_along(m$y)) dll <- dll - dibeta(a,b) + dibeta(a + m$y[i], b + N-m$y[i]) - return(dll) - } - - mle.tru <- optim(log(c(10,2)), ll.betabin, gr.betabin, control = list(fnscale = -1)) - mle.par <- exp(mle.tru$par) - - ## Check with 5 quad points. - mle.quad <- cmQuad$findMLE(pStart = c(10,2)) - expect_equal(mle.quad$par, mle.par, tol = 1e-02) - expect_equal(mle.quad$value, mle.tru$value, tol = 1e-03) - - ## Check with 35 quad points. - cmQuad$updateSettings(nQuad=35) - for(v in m$getVarNames()) cm[[v]] <- m[[v]] - cm$calculate() - mle.quad35 <- cmQuad$findMLE(pStart = c(10,2)) - expect_equal(mle.quad35$par, mle.par, tol = 1e-04) - expect_equal(mle.quad35$value, mle.tru$value, tol = 1e-08) -}) - -test_that("AGH Quadrature Comparison to LME4 1 RE", { - set.seed(123) - n <- 50 - J <- 10 - nobs <- n*J - grp <- rep(1:n, each = J) - m <- nimbleModel(nimbleCode({ - # priors - b0 ~ dnorm(0, 1000) - # sigma1 ~ dunif(0, 1000) - # sigma2 ~ dunif(0, 1000) - sigma1 ~ dgamma(1,1) - sigma2 ~ dgamma(1,1) - for(i in 1:n){ - b[i] ~ dnorm(mean = 0, sd = sigma2) - mu[i] <- b0 + b[i] - } - for(i in 1:nobs){ - y[i] ~ dnorm(mean = mu[grp[i]], sd = sigma1) - }}), constants = list(n=n, nobs=nobs, grp = grp), - inits = list(b = rnorm(n, 0, 0.5), b0 = 3.5, sigma1 = 1.5, sigma2 = 0.5), buildDerivs = TRUE) - m$simulate('y') - m$setData('y') - m$calculate() - - cm <- compileNimble(m) - # N.B. It is not clear that setting reltol values less than sqrt(.Machine$double.eps) is useful, so we may want to update this: - mQuad <- buildAGHQ(model = m, nQuad = 21, control = list(outerOptimControl = list(reltol = 1e-16))) - mLaplace <- buildAGHQ(model = m, nQuad = 1, control = list(outerOptimControl = list(reltol = 1e-16), - outerOptimMethod = 'BFGS')) - mQuad$updateSettings(innerOptimMethod="nlminb", outerOptimMethod="BFGS") - cQL <- compileNimble(mQuad, mLaplace, project = m) - cmQuad <- cQL$mQuad - cmLaplace <- cQL$mLaplace - - - # mod.lme4 <- lme4::lmer(m$y ~ 1 + (1|grp), REML = FALSE, - # control = lmerControl(optimizer= "optimx", optCtrl = list(method="L-BFGS-B"))) - # sprintf("%.16f", summary(mod.lme4)$sigma) - # sprintf("%.16f", lme4::fixef(mod.lme4)) - # sprintf("%.16f", attr(unclass(lme4::VarCorr(mod.lme4))[[1]], 'stddev')) - - # mod.tmb <- glmmTMB::glmmTMB(m$y ~ 1 + (1|grp)) - # sprintf("%.16f", summary(mod.tmb)$sigma) - # sprintf("%.16f", lme4::fixef(mod.tmb)) - # sprintf("%.16f", attr(unclass(glmmTMB::VarCorr(mod.tmb))[[1]]$grp, 'stddev')) - - # These findMLE calls work with "BFGS" and fail with "nlminb" - # But with BFGS we get lots of warnings about uncached inner optimization - mleLME4 <- c( 3.5679609790094040, 1.4736809813876610, 0.3925194078627622 ) - mleTMB <- c( 3.5679629394855974, 1.4736809255475793, 0.3925215998142128 ) - mleLaplace <- cmLaplace$findMLE()$par - for(v in m$getVarNames()) cm[[v]] <- m[[v]] - cm$calculate() - mleQuad <- cmQuad$findMLE()$par - - expect_equal(mleLaplace, mleLME4, tol = 1e-7) - expect_equal(mleQuad, mleLME4, tol = 1e-7) - expect_equal(mleQuad, mleLaplace, tol = 1e-7) - - expect_equal(mleLaplace, mleTMB, tol = 1e-6) - expect_equal(mleQuad, mleTMB, tol = 1e-6) - - gr_mle <- cmQuad$gr_logLik(mleLME4) ## MLE gradient check. - expect_equal(gr_mle, c(0,0,0), tol = 1e-5) - - ## Compare MLE after running twice. - for(v in m$getVarNames()) cm[[v]] <- m[[v]] - cm$calculate() - mleLaplace2 <- cmLaplace$findMLE()$par - for(v in m$getVarNames()) cm[[v]] <- m[[v]] - cm$calculate() - mleQuad2 <- cmQuad$findMLE()$par - expect_equal(mleLaplace, mleLaplace2, tol = 1e-6) # 1e-8 - expect_equal(mleQuad, mleQuad2, tol = 1e-8) - -}) - -## This might be better to compare for MLE as lme4 does some different -## optimization steps for LMMs. -test_that("AGH Quadrature Comparison to LME4 1 RE for Poisson-Normal", { - set.seed(123) - n <- 50 - J <- 10 - nobs <- n*J - grp <- rep(1:n, each = J) - m <- nimbleModel(nimbleCode({ - # priors - b0 ~ dnorm(0, 1000) - sigma ~ dgamma(1,1) - for(i in 1:n){ - b[i] ~ dnorm(mean = 0, sd = sigma) - mu[i] <- exp(b0 + b[i]) - } - for(i in 1:nobs){ - y[i] ~ dpois(mu[grp[i]]) - }}), constants = list(n=n, nobs=nobs, grp = grp), - inits = list(b = rnorm(n, 0, 0.5), b0 = 3.5, sigma = 0.5), buildDerivs = TRUE) - m$simulate('y') - m$setData('y') - m$calculate() - - cm <- compileNimble(m) - mQuad <- buildAGHQ(model = m, nQuad = 21, control = list(outerOptimControl = list(reltol = 1e-12))) - mLaplace <- buildAGHQ(model = m, nQuad = 1, control = list(outerOptimControl = list(reltol = 1e-12))) - cQL <- compileNimble(mQuad, mLaplace, project = m) - cmQuad <- cQL$mQuad - cmLaplace <- cQL$mLaplace - - # library(lme4) - # mod.lme4 <- lme4::glmer(m$y ~ 1 + (1|grp), family = "poisson", nAGQ = 1, ## nAGQ = 21 for nQuad = 21 - # control = glmerControl(optimizer= "optimx", optCtrl = list(method="L-BFGS-B"))) - # sprintf("%.16f", lme4::fixef(mod.lme4)) - # sprintf("%.16f", attr(unclass(lme4::VarCorr(mod.lme4))[[1]], 'stddev')) - # sprintf("%.16f", logLik(mod.lme4, fixed.only = TRUE) ) - - lme4_laplace <- -1695.4383630192869532 - # lme4_nquad21 <- -360.6258864811468356 ## Only proportional in lme4, not the actual marginal. Can't use to compare. - - mleLME4_nquad21 <- c( 3.5136587320416126, 0.4568722479747411) - mleLME4_laplace <- c( 3.5136586190857675, 0.4568710881066258) - for(v in m$getVarNames()) cm[[v]] <- m[[v]] - mleLaplace <- cmLaplace$findMLE()$par - for(v in m$getVarNames()) cm[[v]] <- m[[v]] - mleQuad <- cmQuad$findMLE()$par - - ## Compare the marginal log likelihood for the laplace method. - logLikLaplace <- cmLaplace$calcLogLik( mleLME4_laplace ) - expect_equal(logLikLaplace, lme4_laplace, tol = 1e-7) #1e-11 ## Very very similar maximization. Even if slightly different estimates. - - ## Compare mle for laplace to lme4 laplace - ## and nQuad = 21 for both methods - expect_equal(mleLaplace, mleLME4_laplace, tol = 1e-5) - expect_equal(mleQuad, mleLME4_nquad21, tol = 1e-5) - expect_equal(mleQuad, mleLaplace, tol = 1e-5) - - ## Compare MLE after running twice. - mleLaplace2 <- cmLaplace$findMLE()$par - mleQuad2 <- cmQuad$findMLE()$par - expect_equal(mleLaplace, mleLaplace2, tol = 1e-5) - expect_equal(mleQuad, mleQuad2, tol = 1e-5) -}) - - -test_that("AGHQ nQuad > 1 for simple LME with correlated intercept and slope works", { - set.seed(1) - g <- rep(1:10, each = 10) - n <- length(g) - x <- runif(n) - m <- nimbleModel( - nimbleCode({ - for(i in 1:n) { - y[i] ~ dnorm((fixed_int + random_int_slope[g[i], 1]) + (fixed_slope + random_int_slope[g[i], 2])*x[i], sd = sigma_res) - } - cov[1, 1] <- sigma_int^2 - cov[2, 2] <- sigma_slope^2 - cov[1, 2] <- rho * sigma_int * sigma_slope - cov[2, 1] <- rho * sigma_int * sigma_slope - for(i in 1:ng) { - random_int_slope[i, 1:2] ~ dmnorm(zeros[1:2], cov = cov[1:2, 1:2]) - } - sigma_int ~ dunif(0, 10) - sigma_slope ~ dunif(0, 10) - sigma_res ~ dunif(0, 10) - fixed_int ~ dnorm(0, sd = 100) - fixed_slope ~ dnorm(0, sd = 100) - rho ~ dunif(-1, 1) - }), - constants = list(g = g, ng = max(g), n = n, x = x, zeros = rep(0, 2)), - buildDerivs = TRUE - ) - params <- c("fixed_int", "fixed_slope", "sigma_int", "sigma_slope", "sigma_res", "rho") - values(m, params) <- c(10, 0.5, 3, 0.25, 0.2, 0.45) - m$simulate(m$getDependencies(params, self = FALSE)) - m$setData('y') - y <- m$y - library(lme4) - manual_fit <- lmer(y ~ x + (1 + x | g), REML = FALSE) - mLaplace <- buildAGHQ(model = m, nQuad = 1)#, control=list(innerOptimStart="last.best")) - cm <- compileNimble(m) - cmLaplace <- compileNimble(mLaplace, project = m) - - params_in_order <- setupMargNodes(m)$paramNodes - - pStart <- values(m, params_in_order) - - init_llh <- cmLaplace$calcLogLik(pStart) - - opt <- cmLaplace$findMLE() - nimres <- cmLaplace$summary(opt, randomEffectsStdError = TRUE) - nimsumm <- summaryLaplace(cmLaplace, opt, randomEffectsStdError = TRUE) - - lme4res <- summary(manual_fit) - expect_equal(nimres$params$estimate[4:5], as.vector(lme4res$coefficients[,"Estimate"]), tol=1e-4) - sdparams <- nimres$params$estimate[-c(4,5)] - expect_equal(sdparams[c(1,2,4,3)], as.data.frame(VarCorr(manual_fit))[,"sdcor"], tol = 1e-3) - expect_equal(nimres$params$stdError[4:5], as.vector(lme4res$coefficients[,"Std. Error"]), tol=.03) - expect_equal(nimres$randomEffects$estimate, as.vector(t(ranef(manual_fit)$g)), tol = 5e-3) - - cmLaplace$updateSettings(nQuad = 3) - init_llh_3 <- cmLaplace$calcLogLik(pStart) - max_llh_3 <- cmLaplace$calcLogLik(opt$par ) - expect_equal(init_llh, init_llh_3) - expect_equal(opt$value, max_llh_3) -}) - - -nimbleOptions(enableDerivs = EDopt) -nimbleOptions(buildModelDerivs = BMDopt) diff --git a/packages/nimble/tests/testthat/test-ADlaplace.R b/packages/nimble/tests/testthat/test-ADlaplace.R deleted file mode 100644 index 5bb0e0eb8..000000000 --- a/packages/nimble/tests/testthat/test-ADlaplace.R +++ /dev/null @@ -1,2133 +0,0 @@ - -# Tests of Laplace approximation -source(system.file(file.path('tests', 'testthat', 'test_utils.R'), package = 'nimble')) -source(system.file(file.path('tests', 'testthat', 'AD_test_utils.R'), package = 'nimble')) -EDopt <- nimbleOptions("enableDerivs") -BMDopt <- nimbleOptions("buildModelDerivs") -nimbleOptions(enableDerivs = TRUE) -nimbleOptions(buildModelDerivs = TRUE) -nimbleOptions(allowDynamicIndexing = FALSE) - -# check internal consistency of optim method variants -check_laplace_alternative_methods <- function(cL, # compiled laplace algorithm - cm, # compiled model - m, # original model (or list with values) - opt, # possibly already-run LaplaceMLE result, - methods = 1:3, # methods to check - summ_orig, # summarized Laplace MLE result (original) - summ_trans, # summarized Laplace MLE result (transformed) - expected_warning = NULL, - expected_no_re = FALSE - ) { - expect_wrapper <- ifelse(is.null(expected_warning), expect_silent, - function(expr) - expect_output(eval(expr), expected_warning)) - vars <- cm$getVarNames() - reset <- function() { - for(v in vars) cm[[v]] <- m[[v]] - } - if(missing(opt)) { - reset() - expect_wrapper(opt <- cL$findMLE()) - } - - cL$updateSettings(useInnerCache=FALSE) - #cL$setInnerCache(FALSE) ## Recalculate inner optim to check starting values. Will ensure errors are printed on summary. - - if(missing(summ_orig)){ - expect_wrapper(summ_orig <- cL$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = TRUE)) - } - if(missing(summ_trans)){ - expect_wrapper(summ_trans <- cL$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = TRUE)) - } - ref_method <- cL$computeMethod_ #cL$getMethod() - for(method in methods) { - if(method != ref_method) { - reset() - cL$updateSettings(computeMethod=method) - ## if(expected_no_re) - ## expect_output(cL$setMethod(method), "no random effects") else cL$setMethod(method) - expect_wrapper(opt_alt <- cL$findMLE()) - expect_equal(opt$par, opt_alt$par, tolerance = 0.01) - expect_equal(opt$value, opt_alt$value, tolerance = 1e-4) - tryResult <- try({ - expect_wrapper(summ_orig_alt <- cL$summary(opt_alt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = TRUE)) - expect_wrapper(summ_trans_alt <- cL$summary(opt_alt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = TRUE)) - }) - if(inherits(tryResult, 'try-error')) { - print("cL$summary failing.") - print(class(cL)) - print(cL) - } else { - expect_equal(summ_orig$params$estimate, summ_orig_alt$params$estimate, tol = 1e-4) - expect_equal(summ_orig$randomEffects$estimate, summ_orig_alt$randomEffects$estimate, tol = 1e-4) - expect_equal(summ_orig$params$stdError, summ_orig_alt$params$stdError, tol = 1e-4) - expect_equal(summ_orig$randomEffects$stdError, summ_orig_alt$randomEffects$stdError, tol = 1e-4) - expect_equal(summ_orig$vcov, summ_orig_alt$vcov, tol = 1e-4) - expect_equal(summ_trans$params$estimate, summ_trans_alt$params$estimate, tol = 1e-4) - expect_equal(summ_trans$randomEffects$estimate, summ_trans_alt$randomEffects$estimate, tol = 1e-4) - expect_equal(summ_trans$params$stdError, summ_trans_alt$params$stdError, tol = 1e-4) - expect_equal(summ_trans$randomEffects$stdError, summ_trans_alt$randomEffects$stdError, tol = 1e-4) - expect_equal(summ_trans$vcov, summ_trans_alt$vcov, tol = 1e-4) - } - } - } - invisible(NULL) -} - -test_that("Laplace simplest 1D works", { - m <- nimbleModel( - nimbleCode({ - y ~ dnorm(a, sd = 2) - a ~ dnorm(mu, sd = 3) - mu ~ dnorm(0, sd = 5) - }), data = list(y = 4), inits = list(a = -1, mu = 0), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - expect_equal(opt$par, 4, tol = 1e-6) # tolerance was reduced in this test when we switched to nlminb - # V[a] = 9 - # V[y] = 9 + 4 = 13 - # Cov[a, y] = V[a] = 9 (not needed) - # y ~ N(mu, 13) - expect_equal(opt$value, dnorm(4, 4, sd = sqrt(13), log = TRUE)) - # muhat = y = 4 - # ahat = (9*y+4*mu)/(9+4) = y = 4 - # Jacobian of ahat wrt mu is 4/13 - # Hessian of joint loglik wrt a: -(1/4 + 1/9) - # Hessian of marginal loglik wrt mu is -1/13 - summ <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = TRUE) - expect_equal(summ$randomEffects$estimate, 4, tol = 1e-5) - # check behavior of summaryLaplace - summ2 <- summaryLaplace(cmLaplace, opt, randomEffectsStdError = TRUE, jointCovariance = TRUE) - expect_equal(nrow(summ2$randomEffects), 1) - expect_equal(nrow(summ2$params), 1) - expect_equal(row.names(summ2$randomEffects), "a") - expect_equal(row.names(summ2$params), "mu") - # Covariance matrix - vcov <- matrix(c(0, 0, 0, c(1/(1/4+1/9))), nrow = 2) + matrix(c(1, 4/13), ncol = 1) %*% (13) %*% t(matrix(c(1, 4/13), ncol = 1)) - expect_equal(vcov, summ$vcov, tol = 1e-6) - # Check covariance matrix for params only - summ3 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ3$vcov, vcov[1,1,drop=FALSE], tol=1e-6) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace simplest 1D with a constrained parameter works", { - m <- nimbleModel( - nimbleCode({ - y ~ dnorm(a, sd = 2) - a ~ dnorm(mu, sd = 3) - mu ~ dexp(1.0) - }), data = list(y = 4), inits = list(a = -1, mu = 0), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - # V[a] = 9 - # V[y] = 9 + 4 = 13 - # Cov[a, y] = V[a] = 9 (not needed) - # y ~ N(mu, 13) - # muhat = y = 4 - # ahat = (9*y+4*mu)/(9+4) = y = 4 - # Jacobian of ahat wrt transformed param log(mu) is 4/13*mu = 4*mu/13 = 16/13 - # Hessian of joint loglik wrt a: -(1/4 + 1/9) - # Hessian of marginal loglik wrt transformed param log(mu) is (y*mu - 2*mu*mu)/13 = -4^2/13 - # Variance of transformed param is 13/16 - expect_equal(opt$par, 4, tol = 1e-4) - expect_equal(opt$value, dnorm(4, 4, sd = sqrt(13), log = TRUE)) - expect_equal(opt$hessian[1,1], -4^2/13, tol = 1e-4) - summ <- cmLaplace$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = TRUE) - expect_equal(summ$randomEffects$estimate, 4, tol = 1e-4) - expect_equal(summ$params$estimate, log(4), tol = 1e-4) - # check summaryLaplace - summL <- summaryLaplace(cmLaplace, opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = TRUE) - expect_equal(summL$params['mu','estimate'], log(4), tol = 1e-4) - - # Covariance matrix on transformed scale - vcov_transform <- matrix(c(0, 0, 0, 1/(1/4+1/9)), nrow = 2) + matrix(c(1, 16/13), ncol = 1) %*% (13/16) %*% t(matrix(c(1, 16/13), ncol = 1)) - expect_equal(vcov_transform, summ$vcov, tol = 1e-4) - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = TRUE) - # Covariance matrix on original scale - vcov <- diag(c(4, 1)) %*% vcov_transform %*% diag(c(4, 1)) - expect_equal(vcov, summ2$vcov, tol = 1e-4) - # Check covariance matrix for params only - tryResult <- try({ - summ3 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE); - expect_equal(summ3$vcov, vcov[1,1,drop=FALSE], tol=1e-5) - summ4 <- cmLaplace$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ4$vcov, vcov_transform[1,1,drop=FALSE], tol=5e-5) - }) - if(inherits(tryResult, "try-error")) { - print(class(cmLaplace)) - print(cmLaplace) - } - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace simplest 1D (constrained) with multiple data works", { - set.seed(1) - m <- nimbleModel( - nimbleCode({ - mu ~ dnorm(0, sd = 5) - a ~ dexp(rate = exp(mu)) - for (i in 1:5){ - y[i] ~ dnorm(a, sd = 2) - } - }), data = list(y = rnorm(5, 1, 2)), inits = list(mu = 2, a = 1), - buildDerivs = TRUE - ) - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - summ <- cmLaplace$summary(opt, originalScale = FALSE, jointCovariance = TRUE) - # Results are checked using those from TMB - # TMB cpp code: - #include - #template - #Type objective_function::operator() () - # { - # DATA_VECTOR(y); - # PARAMETER(mu); - # PARAMETER(log_a); - # int n = y.size(); - # Type a = exp(log_a); // Invserse transformation - # // Negative log-likelihood - # Type ans = -dexp(a, exp(mu), true); - # ans -= log_a; // logdet Jacobian of inverse transformation: exp - # for(int i = 0; i < n; i++){ - # ans -= dnorm(y[i], a, Type(2), true); - # } - # return ans; - # } - # TMB R code: - # library(TMB) - # compile("test.cpp") - # dyn.load(dynlib("test")) - # data <- list(y = m$y) - # parameters <- list(mu = 2, log_a = 0) - # - # ## Fit model - # obj <- MakeADFun(data, parameters, random="log_a", DLL="test") - # tmbres <- nlminb(obj$par, obj$fn, obj$gr) - # tmbrep <- sdreport(obj, getJointPrecision = TRUE) - # tmbvcov <- inverse(tmbrep$jointPrecision) - expect_equal(opt$par, 0.2895238, tol = 1e-3) - expect_equal(opt$value, -10.47905, tol = 1e-7) - expect_equal(summ$randomEffects$estimate, -0.005608619, tol = 1e-3) - vcov <- matrix(c(2.741033, -1.628299, -1.628299, 1.414499), nrow = 2, byrow = TRUE) - expect_equal(summ$vcov, vcov, 2e-3) - # Check covariance matrix for params only - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, vcov[1,1,drop=FALSE], tol=1e-3) - - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace simplest 1D (constrained) with deterministic intermediates and multiple data works", { - set.seed(1) - m <- nimbleModel( - nimbleCode({ - mu ~ dnorm(0, sd = 5) - a ~ dexp(rate = exp(0.5 * mu)) - for (i in 1:5){ - y[i] ~ dnorm(0.2 * a, sd = 2) - } - }), data = list(y = rnorm(5, 1, 2)), inits = list(mu = 2, a = 1), - buildDerivs = TRUE - ) - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - summ <- cmLaplace$summary(opt, originalScale = FALSE, jointCovariance = TRUE) - # Results are checked using those from TMB - # TMB cpp code: - # #include - # template - # Type objective_function::operator() () - # { - # DATA_VECTOR(y); - # PARAMETER(mu); - # PARAMETER(log_a); - # int n = y.size(); - # Type a = exp(log_a); // Invserse transformation - # // Negative log-likelihood - # Type ans = -dexp(a, exp(0.5 * mu), true); - # ans -= log_a; // logdet Jacobian of inverse transformation: exp - # for(int i = 0; i < n; i++){ - # ans -= dnorm(y[i], 0.2 * a, Type(2), true); - # } - # ADREPORT(a); - # return ans; - # } - ## R code: - # library(TMB) - # compile("test.cpp") - # dyn.load(dynlib("test")) - # data <- list(y = m$y) - # parameters <- list(mu = 2, log_a = 0) - # - # ## Fit model - # obj <- MakeADFun(data, parameters, random="log_a", DLL="test") - # tmbres <- nlminb(obj$par, obj$fn, obj$gr) - # tmbrep <- sdreport(obj, getJointPrecision = TRUE) - # tmbvcov <- inverse(tmbrep$jointPrecision) - expect_equal(opt$par, -2.639534, 2e-4) - expect_equal(opt$value, -10.47905, tol = 1e-5) - expect_equal(summ$randomEffects$estimate, 1.603742, tol = 1e-4) - vcov <- matrix(c(10.967784, -3.258191, -3.258191, 1.415167), nrow = 2, byrow = TRUE) - expect_equal(summ$vcov, vcov, 2e-3) - # Check covariance matrix for params only - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, vcov[1,1,drop=FALSE], tol=2e-3) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-4) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace 1D with deterministic intermediates works", { - # Note this test has some slop. In old versions there were inner optimization - # warnings issued for this case. So we turned on warnings and checked for them. - # Now the warnings aren't issued. As a result, we really need a new test to - # check that warnings are correctly emitted. - m <- nimbleModel( - nimbleCode({ - y ~ dnorm(0.2 * a, sd = 2) - a ~ dnorm(0.5 * mu, sd = 3) - mu ~ dnorm(0, sd = 5) - }), data = list(y = 4), inits = list(a = -1, mu = 0), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - #expect_output( - opt <- cmLaplace$findMLE() - #, "Warning: inner optimzation had a non-zero convergence code\\. Use checkInnerConvergence\\(TRUE\\) to see details\\.") - expect_equal(opt$par, 40, tol = 1e-4) # 40 = 4 * (1/.2) * (1/.5) - # V[a] = 9 - # V[y] = 0.2^2 * 9 + 4 = 4.36 - expect_equal(opt$value, dnorm(0.1*40, 0.1*40, sd = sqrt(4.36), log = TRUE)) - # y ~ N(0.2*0.5*mu, 4.36) - # muhat = y/(0.2*0.5) = 40 - # ahat = (9*0.2*y + 4*0.5*mu)/(4+9*0.2^2) = 20 - # Jacobian of ahat wrt mu is 4*0.5/(4+9*0.2^2) = 0.4587156 - # Hessian of joint loglik wrt a: -(0.2^2/4 + 1/9) - # Hessian of marginal loglik wrt param mu is -(0.2*0.5)^2/4.36 = -0.002293578 - cmLaplace$updateSettings(innerOptimWarning=TRUE) - #expect_output( - summ <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, - jointCovariance = TRUE) - #, "optim did not converge for the inner optimization") - expect_equal(summ$randomEffects$estimate, 20, tol = 1e-4) - # Covariance matrix - vcov <- matrix(c(0, 0, 0, 1/(0.2^2/4+1/9)), nrow = 2) + matrix(c(1, 0.4587156), ncol = 1) %*% (1/0.002293578) %*% t(matrix(c(1, 0.4587156), ncol = 1)) - expect_equal(vcov, summ$vcov, tol = 1e-4) - # Check covariance matrix for params only - #expect_output( - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - #, "does not converge") - expect_equal(summ2$vcov, vcov[1,1,drop=FALSE], tol=1e-4) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - #expect_output( - optNoSplit <- cmLaplaceNoSplit$findMLE() - #, "Warning: inner optimzation had a non-zero convergence code\\. Use checkInnerConvergence\\(TRUE\\) to see details\\.") - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - cmLaplace$updateSettings(innerOptimWarning=TRUE) ## Turn warnings on for test. - check_laplace_alternative_methods(cmLaplace, cm, m, opt)#, expected_warning = "optim did not converge for the inner optimization") - cmLaplaceNoSplit$updateSettings(innerOptimWarning=TRUE) ## Turn warnings on for test. - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit)#, expected_warning = "optim did not converge for the inner optimization") -}) - -test_that("Laplace 1D with a constrained parameter and deterministic intermediates works", { - ## Again (see above), the innerOptimWarning and expect_output are - ## defunct portions of this test. - m <- nimbleModel( - nimbleCode({ - y ~ dnorm(0.2 * a, sd = 2) - a ~ dnorm(0.5 * mu, sd = 3) - mu ~ dexp(1.0) - }), data = list(y = 4), inits = list(a = -1, mu = 0), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - #expect_output( - opt <- cmLaplace$findMLE() - #, "Warning: inner optimzation had a non-zero convergence code\\. Use checkInnerConvergence\\(TRUE\\) to see details\\.") - - # V[a] = 9 - # V[y] = 0.2^2 * 9 + 4 = 4.36 - # y ~ N(0.2*0.5*mu, 4.36) - # muhat = y/(0.2*0.5) = 40 - # ahat = (9*0.2*y + 4*0.5*mu)/(4+9*0.2^2) = 20 - # Jacobian of ahat wrt transformed param log(mu) is 4*0.5*mu/(4+9*0.2^2) = 18.34862 - # Hessian of joint loglik wrt a: -(0.2^2/4 + 1/9) - # Hessian of marginal loglik wrt param mu is -(0.2*0.5)^2/4.36 - # Hessian of marginal loglik wrt transformed param log(mu) is (0.2*0.5*y*mu - 2*0.1^2*mu*mu)/4.36 = -3.669725 - expect_equal(opt$par, 40, tol = 1e-4) - expect_equal(opt$hessian[1,1], -3.669725, tol = 1e-3) - expect_equal(opt$value, dnorm(0.1*40, 0.1*40, sd = sqrt(4.36), log = TRUE)) - - cmLaplace$updateSettings(innerOptimWarning=TRUE, useInnerCache=FALSE) - #cmLaplace$setInnerOptimWarning(TRUE) - #cmLaplace$setInnerCache(FALSE) - #expect_output( - summ <- cmLaplace$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, - jointCovariance = TRUE) - #, "optim did not converge for the inner optimization") - expect_equal(summ$randomEffects$estimate, 20, tol = 1e-4) - # Covariance matrix on transformed scale - vcov_transform <- matrix(c(0, 0, 0, 1/(0.2^2/4+1/9)), nrow = 2) + matrix(c(1, 18.34862), ncol = 1) %*% (1/3.669725) %*% t(matrix(c(1, 18.34862), ncol = 1)) - expect_equal(vcov_transform, summ$vcov, tol = 1e-3) - #expect_output( - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, - jointCovariance = TRUE) - #, "optim did not converge for the inner optimization") - # Covariance matrix on original scale - vcov <- diag(c(40,1)) %*% vcov_transform %*% diag(c(40,1)) - expect_equal(vcov, summ2$vcov, tol = 1e-3) - - # Check summary based on not recomputing random effects and hessian. - summ.orig <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - # Check covariance matrix for params only - #expect_output( - summ3 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - #, "optim did not converge for the inner optimization") - - # Make sure that the recompute didn't change the values of the random effects or standard errors. - expect_equal(summ.orig$randomEffects[["estimate"]], summ3$randomEffects[["estimate"]], tol = 1e-12) - expect_equal(summ.orig$randomEffects[["stdError"]], summ3$randomEffects[["stdError"]], tol = 1e-12) - - expect_equal(summ3$vcov, vcov[1,1,drop=FALSE], tol=1e-3) - #expect_output( - summ4 <- cmLaplace$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - #, "optim did not converge for the inner optimization") - expect_equal(summ4$vcov, vcov_transform[1,1,drop=FALSE], tol=1e-4) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - cmLaplace$updateSettings(innerOptimWarning=TRUE) -# cmLaplaceNoSplit$setInnerOptimWarning(TRUE) - #expect_output( - optNoSplit <- cmLaplaceNoSplit$findMLE() - #, "optim did not converge for the inner optimization") - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - cmLaplace$updateSettings(innerOptimWarning=TRUE) - #cmLaplace$setInnerOptimWarning(TRUE) ## Turn warnings on for test. - check_laplace_alternative_methods(cmLaplace, cm, m, opt)#, expected_warning = "optim did not converge for the inner optimization") - cmLaplaceNoSplit$updateSettings(innerOptimWarning=TRUE) - #cmLaplaceNoSplit$setInnerOptimWarning(TRUE) ## Turn warnings on for test. - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit)#, expected_warning = "optim did not converge for the inner optimization") -}) - -test_that("Laplace 1D with deterministic intermediates and multiple data works", { - m <- nimbleModel( - nimbleCode({ - for(i in 1:n) - y[i] ~ dnorm(mu_y, sd = 2) # larger multiplier to amplify cov terms in result below - mu_y <- 0.8*a - a ~ dnorm(mu_a, sd = 3) - mu_a <- 0.5 * mu - mu ~ dnorm(0, sd = 5) - }), - data = list(y = c(4, 5, 6)), - constants = list(n = 3), - inits = list(a = -1, mu = 0), - buildDerivs = TRUE - ) - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - expect_equal(opt$par, 12.5, tol = 1e-4) # 12.5 = mean(y) * (1/.8) * (1/.5) where mean(y) = 5 - # V[a] = 9 - # V[y[i]] = 0.8^2 * 9 + 4 = 9.76 - # Cov[a, y[i]] = 0.8 * 9 = 7.2 - # Cov[y[i], y[j]] = 0.8^2 * 9 = 5.76 - Cov_ay1y2y3 <- matrix(nrow = 4, ncol = 4) - Cov_ay1y2y3[1, 1:4] <- c(9, 7.2, 7.2, 7.2) - Cov_ay1y2y3[2, 1:4] <- c(7.2, 9.76, 5.76, 5.76) - Cov_ay1y2y3[3, 1:4] <- c(7.2, 5.76, 9.76, 5.76) - Cov_ay1y2y3[4, 1:4] <- c(7.2, 5.76, 5.76, 9.76) - Cov_y1y2y3 <- Cov_ay1y2y3[2:4, 2:4] - chol_cov <- chol(Cov_y1y2y3) - res <- dmnorm_chol(c(4, 5, 6), 0.8*0.5*12.5, cholesky = chol_cov, prec_param=FALSE, log = TRUE) - expect_equal(opt$value, res) - # y[i] ~ N(0.4*mu, 9.76) - # mean(y) = 5 - # muhat = mean(y)/(0.8*0.5) = 12.5 - # ahat = (9*0.8*sum(y) + 4*0.5*mu)/(4+9*0.8^2*3) = 6.25 - # Jacobian of ahat wrt mu is 4*0.5/(4+9*0.8^2*3) = 0.09398496 - # Hessian of joint loglik wrt a: -(3*0.8^2/4 + 1/9) - # Hessian of marginal loglik wrt mu: -0.02255639 (numerical, have not got AD work) - summ <- cmLaplace$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = TRUE) - expect_equal(summ$randomEffects$estimate, 6.25, tol = 1e-6) - # Covariance matrix - vcov <- matrix(c(0, 0, 0, 1/(0.8^2*3/4+1/9)), nrow = 2) + matrix(c(1, 0.09398496), ncol = 1) %*% (1/0.02255639) %*% t(matrix(c(1, 0.09398496), ncol = 1)) - expect_equal(vcov, summ$vcov, tol = 1e-7) - # Check covariance matrix for params only - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, vcov[1,1,drop=FALSE], tol=1e-6) - - # check that - mLaplaceCheck <- buildLaplace(model = m, paramNodes = 'mu', randomEffectsNodes = 'a') - nim1D <- mLaplace$AGHQuad_nfl[[1]] - expect_identical(nim1D$paramNodes, "mu") - expect_identical(nim1D$paramDeps, "mu_a") - expect_identical(nim1D$randomEffectsNodes, "a") - expect_identical(nim1D$innerCalcNodes, c("a", "mu_y", "y[1]", "y[2]", "y[3]")) - expect_identical(nim1D$calcNodes, c("mu_a", nim1D$innerCalcNodes)) - expect_identical(nim1D$inner_updateNodes, "mu_a") - expect_identical(nim1D$inner_constantNodes, c("y[1]", "y[2]", "y[3]")) - expect_identical(nim1D$joint_updateNodes, character()) - expect_identical(nim1D$joint_constantNodes, c("y[1]", "y[2]", "y[3]")) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() # some warnings are ok here - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace 1D with a constrained parameter and deterministic intermediates and multiple data works", { - m <- nimbleModel( - nimbleCode({ - for(i in 1:n) - y[i] ~ dnorm(mu_y, sd = 2) - mu_y <- 0.8*a - a ~ dnorm(mu_a, sd = 3) - mu_a <- 0.5 * mu - mu ~ dexp(1.0) - }), - data = list(y = c(4, 5, 6)), - constants = list(n = 3), - inits = list(a = -1, mu = 0), - buildDerivs = TRUE - ) - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - expect_equal(opt$par, 12.5, tol = 1e-4) - # V[a] = 9 - # V[y[i]] = 0.8^2 * 9 + 4 = 9.76 - # Cov[a, y[i]] = 0.8 * 9 = 7.2 - # Cov[y[i], y[j]] = 0.8^2 * 9 = 5.76 - # y[i] ~ N(0.4*mu, 9.76) - # mean(y) = 5 - # muhat = mean(y)/(0.8*0.5) = 12.5 - # ahat = (9*0.8*sum(y) + 4*0.5*mu)/(4+9*0.8^2*3) = 6.25 - # Jacobian of ahat wrt transformed param log(mu) is 4*0.5*mu/(4+9*0.8^2*3) = 1.174812 - # Hessian of joint loglik wrt a: -(3*0.8^2/4 + 1/9) - # Hessian of marginal loglik wrt transformed param: -3.524436 (numerical, have not got AD work) - Cov_ay1y2y3 <- matrix(nrow = 4, ncol = 4) - Cov_ay1y2y3[1, 1:4] <- c(9, 7.2, 7.2, 7.2) - Cov_ay1y2y3[2, 1:4] <- c(7.2, 9.76, 5.76, 5.76) - Cov_ay1y2y3[3, 1:4] <- c(7.2, 5.76, 9.76, 5.76) - Cov_ay1y2y3[4, 1:4] <- c(7.2, 5.76, 5.76, 9.76) - Cov_y1y2y3 <- Cov_ay1y2y3[2:4, 2:4] - chol_cov <- chol(Cov_y1y2y3) - res <- dmnorm_chol(c(4, 5, 6), 0.8*0.5*12.5, cholesky = chol_cov, prec_param=FALSE, log = TRUE) - expect_equal(opt$value, res) - # Check covariance matrix - summ <- cmLaplace$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = TRUE) - expect_equal(summ$randomEffects$estimate, 6.25, tol = 1e-6) - # Covariance matrix on transformed scale - vcov_transform <- matrix(c(0, 0, 0, 1/(0.8^2*3/4+1/9)), nrow = 2) + matrix(c(1, 1.174812), ncol = 1) %*% (1/3.524436) %*% t(matrix(c(1, 1.174812), ncol = 1)) - expect_equal(vcov_transform, summ$vcov, tol = 1e-6) - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = TRUE) - # Covariance matrix on original scale - vcov <- diag(c(12.5, 1)) %*% vcov_transform %*% diag(c(12.5, 1)) - expect_equal(vcov, summ2$vcov, tol = 1e-5) - # Check covariance matrix for params only - summ3 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ3$vcov, vcov[1,1,drop=FALSE], tol=1e-5) - summ4 <- cmLaplace$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ4$vcov, vcov_transform[1,1,drop=FALSE], tol=1e-6) - - # check that - mLaplaceCheck <- buildLaplace(model = m, paramNodes = 'mu', randomEffectsNodes = 'a') - nim1D <- mLaplace$AGHQuad_nfl[[1]] - expect_identical(nim1D$paramNodes, "mu") - expect_identical(nim1D$paramDeps, "mu_a") - expect_identical(nim1D$randomEffectsNodes, "a") - expect_identical(nim1D$innerCalcNodes, c("a", "mu_y", "y[1]", "y[2]", "y[3]")) - expect_identical(nim1D$calcNodes, c("mu_a", nim1D$innerCalcNodes)) - expect_identical(nim1D$inner_updateNodes, "mu_a") - expect_identical(nim1D$inner_constantNodes, c("y[1]", "y[2]", "y[3]")) - expect_identical(nim1D$joint_updateNodes, character()) - expect_identical(nim1D$joint_constantNodes, c("y[1]", "y[2]", "y[3]")) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() # some warnings are ok here - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace simplest 2x1D works, with multiple data for each", { - set.seed(1) - y <- matrix(rnorm(6, 4, 5), nrow = 2) - m <- nimbleModel( - nimbleCode({ - for(i in 1:2) { - mu_y[i] <- 0.8*a[i] - for(j in 1:3) - y[i, j] ~ dnorm(mu_y[i], sd = 2) - a[i] ~ dnorm(mu_a, sd = 3) - } - mu_a <- 0.5 * mu - mu ~ dnorm(0, sd = 5) - }), data = list(y = y), inits = list(a = c(-2, -1), mu = 0), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - expect_equal(opt$par, mean(y)/(0.8*0.5), tol = 1e-4) # optim's reltol is about 1e-8 but that is for the value, not param. - # V[a] = 9 - # V[y[i]] = 0.8^2 * 9 + 4 = 9.76 - # Cov[a, y[i]] = 0.8 * 9 = 7.2 - # Cov[y[i], y[j]] = 0.8^2 * 9 = 5.76, within a group - Cov_A_Y <- matrix(nrow = 8, ncol = 8) - Cov_A_Y[1, 1:8] <- c( 9, 0, 7.2, 7.2, 7.2, 0, 0, 0) - Cov_A_Y[2, 1:8] <- c( 0, 9, 0, 0, 0, 7.2, 7.2, 7.2) - Cov_A_Y[3, 1:8] <- c(7.2, 0, 9.76, 5.76, 5.76, 0, 0, 0) - Cov_A_Y[4, 1:8] <- c(7.2, 0, 5.76, 9.76, 5.76, 0, 0, 0) - Cov_A_Y[5, 1:8] <- c(7.2, 0, 5.76, 5.76, 9.76, 0, 0, 0) - Cov_A_Y[6, 1:8] <- c( 0, 7.2, 0, 0, 0, 9.76, 5.76, 5.76) - Cov_A_Y[7, 1:8] <- c( 0, 7.2, 0, 0, 0, 5.76, 9.76, 5.76) - Cov_A_Y[8, 1:8] <- c( 0, 7.2, 0, 0, 0, 5.76, 5.76, 9.76) - Cov_Y <- Cov_A_Y[3:8, 3:8] - chol_cov <- chol(Cov_Y) - res <- dmnorm_chol(as.numeric(t(y)), mean(y), cholesky = chol_cov, prec_param=FALSE, log = TRUE) - expect_equal(opt$value, res) - # muhat = mean(y)/(0.8*0.5) - # ahat[1] = (9*0.8*sum(y[1,]) + 4*0.5*mu)/(4+9*0.8^2*3) - # ahat[2] = (9*0.8*sum(y[2,]) + 4*0.5*mu)/(4+9*0.8^2*3) - # Jacobian of ahat[i] wrt mu is 4*0.5/(4+9*0.8^2*3) = 0.09398496 - # Hessian of joint loglik wrt a[i]a[i]: -(3*0.8^2/4 + 1/9); wrt a[i]a[j]: 0 - # Hessian of marginal loglik wrt mu: -0.04511278 (numerical, have not got AD work) - muhat <- mean(y)/(0.8*0.5) - ahat <- c((9*0.8*sum(y[1,]) + 4*0.5*muhat)/(4+9*0.8^2*3), (9*0.8*sum(y[2,]) + 4*0.5*muhat)/(4+9*0.8^2*3)) - summ <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = TRUE) - expect_equal(summ$randomEffects$estimate, ahat, tol = 1e-6) - # Covariance matrix - vcov <- diag(c(0, rep(1/(3*0.8^2/4 + 1/9), 2))) + matrix(c(1, rep(0.09398496, 2)), ncol = 1) %*% (1/0.04511278) %*% t(matrix(c(1, rep(0.09398496, 2)), ncol = 1)) - expect_equal(vcov, summ$vcov, tol = 1e-7) - ## Check covariance matrix for params only - tryResult <- try({ - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, vcov[1,1,drop=FALSE], tol=1e-6) - }) - if(inherits(tryResult, 'try-error')) { - print(class(cmLaplace)) - print(cL) - } - - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() # some warnings are ok here - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace with 2x1D random effects needing joint integration works, without intermediate nodes", { - set.seed(1) - y <- matrix(rnorm(6, 4, 5), nrow = 2) - m <- nimbleModel( - nimbleCode({ - for(i in 1:2) { - a[i] ~ dnorm(mu_a, sd = 3) - } - for(j in 1:3) # Note this is different than above. - # These are 3 observations each of 2D - y[1:2, j] ~ dmnorm(a[1:2], cov = cov_y[1:2, 1:2]) - mu_a <- 0.5 * mu - mu ~ dnorm(0, sd = 5) - }), - data = list(y = y), - inits = list(a = c(-2, -1), mu = 0), - constants = list(cov_y = matrix(c(2, 1.5, 1.5, 2), nrow = 2)), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - expect_equal(opt$par, mean(y)/(0.5), tol = 1e-4) # optim's reltol is about 1e-8 but that is for the value, not param. - # V[a] = 9 - # V[y[1:2, i]] = diag(2)*9 + cov_y = [ (9 + 2), 0 + 1.5; 0+1.5, (9+2)] - # Cov[a[1], y[1,j]] = 9 - # Cov[a[1], y[2,j]] = 0 - # Cov[a[2], y[1,j]] = 0 - # Cov]a[2], y[2,j]] = 9 - # Cov[y[1,i], y[1,j]] = 9 - # Cov[y[2,i], y[2,j]] = 9 - Cov_A_Y <- matrix(nrow = 8, ncol = 8) - Cov_A_Y[1, 1:8] <- c( 9, 0, 9, 0, 9, 0, 9, 0) - Cov_A_Y[2, 1:8] <- c( 0, 9, 0, 9, 0, 9, 0, 9) - Cov_A_Y[3, 1:8] <- c( 9, 0, 11, 1.5, 9, 0, 9, 0) - Cov_A_Y[4, 1:8] <- c( 0, 9, 1.5, 11, 0, 9, 0, 9) - Cov_A_Y[5, 1:8] <- c( 9, 0, 9, 0, 11, 1.5, 9, 0) - Cov_A_Y[6, 1:8] <- c( 0, 9, 0, 9, 1.5, 11, 0, 9) - Cov_A_Y[7, 1:8] <- c( 9, 0, 9, 0, 9, 0, 11, 1.5) - Cov_A_Y[8, 1:8] <- c( 0, 9, 0, 9, 0, 9, 1.5, 11) - Cov_Y <- Cov_A_Y[3:8, 3:8] - chol_cov <- chol(Cov_Y) - res <- dmnorm_chol(as.numeric(y), mean(y), cholesky = chol_cov, prec_param=FALSE, log = TRUE) - expect_equal(opt$value, res) - # Check covariance matrix - summ <- cmLaplace$summary(opt, jointCovariance = TRUE) - # Covariance matrix from TMB: - # TMB cpp code (test.cpp) below: - # include - # template - # Type objective_function::operator() () - # { - # DATA_MATRIX(y); - # DATA_MATRIX(Sigma); - # PARAMETER(mu); - # PARAMETER_VECTOR(a); - # int i; - # Type ans = 0.0; - # // Negative log-likelihood - # for(i = 0; i < 2; i++){ - # ans -= dnorm(a[i], 0.5*mu, Type(3.0), true); - # } - # vector residual(2); - # using namespace density; - # MVNORM_t neg_log_dmvnorm(Sigma); - # for(i = 0; i < 3; i++) - # { - # residual = vector(y.col(i)) - a; - # ans += neg_log_dmvnorm(residual); - # } - # return ans; - # } - # TMB R code: - # library(TMB) - # compile("test.cpp") - # dyn.load(dynlib("test")) - # data <- list(y = m$y, Sigma = m$cov_y) - # parameters <- list(mu = 0, a = c(-2, -1)) - # - # ## Fit model - # obj <- MakeADFun(data, parameters, random="a", DLL="test") - # tmbopt <- nlminb(obj$par, obj$fn, obj$gr) - # tmbrep <- sdreport(obj, getJointPrecision = TRUE) - # tmbvcov <- inverse(tmbrep$jointPrecision) - tmbvcov <- matrix(nrow = 3, ncol = 3) - tmbvcov[1,] <- c(20.333333, 1.1666667, 1.1666667) - tmbvcov[2,] <- c(1.166667, 0.6651515, 0.5015152) - tmbvcov[3,] <- c(1.166667, 0.5015152, 0.6651515) - expect_equal(summ$vcov, tmbvcov, tol = 1e-4) - - # Check covariance matrix for params only - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, tmbvcov[1,1,drop=FALSE], tol=1e-4) - - #cmLaplace$setInnerCache(FALSE) - cmLaplace$updateSettings(useInnerCache=FALSE) - summ2_recomp <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, summ2_recomp$vcov, tol=1e-10) - - summL <- summaryLaplace(cmLaplace, opt, jointCovariance = TRUE) - expect_equal(nrow(summL$randomEffects), 2) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() # some warnings are ok here - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace with 2x1D random effects needing joint integration works, with intermediate nodes", { - set.seed(1) - y <- matrix(rnorm(6, 4, 5), nrow = 2) - m <- nimbleModel( - nimbleCode({ - for(i in 1:2) { - mu_y[i] <- 0.8*a[i] - a[i] ~ dnorm(mu_a, sd = 3) - } - for(j in 1:3) - y[1:2, j] ~ dmnorm(mu_y[1:2], cov = cov_y[1:2, 1:2]) - mu_a <- 0.5 * mu - mu ~ dnorm(0, sd = 5) - }), - data = list(y = y), - inits = list(a = c(-2, -1), mu = 0), - constants = list(cov_y = matrix(c(2, 1.5, 1.5, 2), nrow = 2)), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - expect_equal(opt$par, mean(y)/(0.8*0.5), tol = 1e-4) # optim's reltol is about 1e-8 but that is for the value, not param. - # V[a] = 9 - # V[y[1:2, i]] = diag(2)*0.8^2 * 9 + cov_y = [ (5.76 + 2), 0 + 1.5; 0+1.5, (5.76+2)] - # Cov[a[1], y[1,j]] = 0.8*9 = 7.2 - # Cov[a[1], y[2,j]] = 0 - # Cov[a[2], y[1,j]] = 0 - # Cov]a[2], y[2,j]] = 0.8*9 - # Cov[y[1,i], y[1,j]] = 0.8^2*9 = 5.76 - # Cov[y[2,i], y[2,j]] = 9 - Cov_A_Y <- matrix(nrow = 8, ncol = 8) - Cov_A_Y[1, 1:8] <- c( 9, 0, 7.2, 0, 7.2, 0, 7.2, 0) - Cov_A_Y[2, 1:8] <- c( 0, 9, 0, 7.2, 0, 7.2, 0, 7.2) - Cov_A_Y[3, 1:8] <- c(7.2, 0, 7.76, 1.5, 5.76, 0, 5.76, 0) - Cov_A_Y[4, 1:8] <- c( 0, 7.2, 1.5, 7.76, 0, 5.76, 0, 5.76) - Cov_A_Y[5, 1:8] <- c(7.2, 0, 5.76, 0, 7.76, 1.5, 5.76, 0) - Cov_A_Y[6, 1:8] <- c( 0, 7.2, 0, 5.76, 1.5, 7.76, 0, 5.76) - Cov_A_Y[7, 1:8] <- c(7.2, 0, 5.76, 0, 5.76, 0, 7.76, 1.5) - Cov_A_Y[8, 1:8] <- c( 0, 7.2, 0, 5.76, 0, 5.76, 1.5, 7.76) - Cov_Y <- Cov_A_Y[3:8, 3:8] - chol_cov <- chol(Cov_Y) - res <- dmnorm_chol(as.numeric(y), mean(y), cholesky = chol_cov, prec_param=FALSE, log = TRUE) - expect_equal(opt$value, res) - - # Check covariance matrix - summ <- cmLaplace$summary(opt, jointCovariance = TRUE) - # Covariance matrix from TMB: - # TMB cpp code (test.cpp) below: - # include - # template - # Type objective_function::operator() () - # { - # DATA_MATRIX(y); - # DATA_MATRIX(Sigma); - # PARAMETER(mu); - # PARAMETER_VECTOR(a); - # int i; - # Type ans = 0.0; - # // Negative log-likelihood - # for(i = 0; i < 2; i++){ - # ans -= dnorm(a[i], 0.5*mu, Type(3.0), true); - # } - # vector residual(2); - # using namespace density; - # MVNORM_t neg_log_dmvnorm(Sigma); - # for(i = 0; i < 3; i++) - # { - # residual = vector(y.col(i)) - 0.8 * a; - # ans += neg_log_dmvnorm(residual); - # } - # return ans; - # } - # TMB R code: - # library(TMB) - # compile("test.cpp") - # dyn.load(dynlib("test")) - # data <- list(y = m$y, Sigma = m$cov_y) - # parameters <- list(mu = 0, a = c(-2, -1)) - # - # ## Fit model - # obj <- MakeADFun(data, parameters, random="a", DLL="test") - # tmbopt <- nlminb(obj$par, obj$fn, obj$gr) - # tmbrep <- sdreport(obj, getJointPrecision = TRUE) - # tmbvcov <- inverse(tmbrep$jointPrecision) - tmbvcov <- matrix(nrow = 3, ncol = 3) - tmbvcov[1,] <- c(21.645833, 1.8229167, 1.8229167) - tmbvcov[2,] <- c(1.822917, 1.0380050, 0.7849117) - tmbvcov[3,] <- c(1.822917, 0.7849117, 1.0380050) - - expect_equal(summ$vcov, tmbvcov, tol = 1e-4) - # Check covariance matrix for params only - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, tmbvcov[1,1,drop=FALSE], tol=1e-4) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() # some warnings are ok here - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace with 2x2D random effects for 1D data that are separable works, with intermediate nodes", { - set.seed(1) - # y[i, j] is jth datum from ith group - y <- array(rnorm(8, 6, 5), dim = c(2, 2, 2)) - cov_a <- matrix(c(2, 1.5, 1.5, 2), nrow = 2) - m <- nimbleModel( - nimbleCode({ - for(i in 1:2) mu[i] ~ dnorm(0, sd = 10) - mu_a[1] <- 0.8 * mu[1] - mu_a[2] <- 0.2 * mu[2] - for(i in 1:2) a[i, 1:2] ~ dmnorm(mu_a[1:2], cov = cov_a[1:2, 1:2]) - for(i in 1:2) { - for(j in 1:2) { - y[1, j, i] ~ dnorm( 0.5 * a[i, 1], sd = 1.8) # this ordering makes it easier below - y[2, j, i] ~ dnorm( 0.1 * a[i, 2], sd = 1.2) - } - } - }), - data = list(y = y), - inits = list(a = matrix(c(-2, -3, 0, -1), nrow = 2), mu = c(0, .5)), - constants = list(cov_a = cov_a), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - - ## Wei: I tested this using TMB instead of the code below - # TMB cpp code: - # #include - # template - # Type objective_function::operator() () - # { - # DATA_ARRAY(y); - # DATA_MATRIX(Sigma); - # PARAMETER_VECTOR(mu); - # PARAMETER_MATRIX(a); - # int i, j; - # Type ans = 0.0; - # vector mu_a(2); - # mu_a(0) = 0.8 * mu(0); - # mu_a(1) = 0.2 * mu(1); - # // Negative log-likelihood - # vector residual(2); - # using namespace density; - # MVNORM_t neg_log_dmvnorm(Sigma); - # for(i = 0; i < 2; i++) - # { - # residual = vector(a.row(i)) - mu_a; - # ans += neg_log_dmvnorm(residual); - # } - # for(i = 0; i < 2; i++){ - # for(j = 0; j < 2; j++){ - # ans -= dnorm(y(0, j, i), 0.5*a(i, 0), Type(1.8), true); - # ans -= dnorm(y(1, j, i), 0.1*a(i, 1), Type(1.2), true); - # } - # } - # return ans; - # } - # TMB R code: - # library(TMB) - # compile("test.cpp") - # dyn.load(dynlib("test")) - # data <- list(y = m$y, Sigma = m$cov_a) - # parameters <- list(mu = m$mu, a = m$a) - # ## Fit model - # obj <- MakeADFun(data, parameters, random="a", DLL="test") - # tmbopt <- nlminb(obj$par, obj$fn, obj$gr) - # tmbrep <- sdreport(obj, getJointPrecision = TRUE) - # tmbvcov <- inverse(tmbrep$jointPrecision) - expect_equal(opt$par, c(12.98392, 406.04878), tol = 1e-4) - expect_equal(opt$value, -41.86976, tol = 1e-6) - # Check covariance matrix - summ <- cmLaplace$summary(opt, jointCovariance = TRUE) - tmbvcov <- matrix(nrow = 6, ncol = 6) - tmbvcov[1,] <- c(6.625000e+00, 4.687500e+00, 4.050000e+00, 4.050000e+00, -2.693817e-11, -2.695275e-11) - tmbvcov[2,] <- c(4.687500e+00, 9.250000e+02, 2.965628e-11, 2.967848e-11, 1.800000e+02, 1.800000e+02) - tmbvcov[3,] <- c(4.050000e+00, 2.951367e-11, 3.995242e+00, 2.484758e+00, 5.596302e-01, -5.596302e-01) - tmbvcov[4,] <- c(4.050000e+00, 2.951367e-11, 2.484758e+00, 3.995242e+00, -5.596302e-01, 5.596302e-01) - tmbvcov[5,] <- c(-2.691772e-11, 1.800000e+02, 5.596302e-01, -5.596302e-01, 3.684693e+01, 3.515307e+01) - tmbvcov[6,] <- c(-2.691772e-11, 1.800000e+02, -5.596302e-01, 5.596302e-01, 3.515307e+01, 3.684693e+01) - - # The ordering of a[1, 1:2] and a[2, 1:2] is flipped between nimble and TMB: - expect_equal(summ$vcov[c(1:3, 5, 4, 6), c(1:3, 5, 4, 6)], tmbvcov, tol = 1e-4) - - # Check covariance matrix for params only - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, tmbvcov[1:2,1:2], tol=1e-4) - - summL <- summaryLaplace(cmLaplace, opt, jointCovariance = TRUE) - expect_identical(summL$randomEffects$estimate, summ$randomEffects$estimate) - - # For this case, we build up the correct answer more formulaically - # Define A as the vector a[1, 1], a[1, 2], a[2, 1], a[2, 2] - # cov_A <- matrix(0, nrow = 4, ncol = 4) - # cov_A[1:2, 1:2] <- cov_a - # cov_A[3:4, 3:4] <- cov_a - # # Define Y as the vector y[1,1,1],y[2,1,1],y[1,2,1],y[2,2,1], then same with last index 2 - # # Define E[Y] as IA %*% A, where: - # IA <- matrix(0, nrow = 8, ncol = 4) - # IA[c(1, 3), 1] <- 0.5 - # IA[c(2, 4), 2] <- 0.1 - # IA[c(5, 7), 3] <- 0.5 - # IA[c(6, 8), 4] <- 0.1 - # - # # define cov_y_given_a as the Cov[Y | A] - # cov_y_given_a <- matrix(0, nrow = 8, ncol = 8) - # diag(cov_y_given_a) <- rep(c(1.8^2, 1.2^2), 4) - # # And finally get cov_Y, the marginal (over A) covariance of Y - # cov_Y <- IA %*% cov_A %*% t(IA) + cov_y_given_a - # chol_cov <- chol(cov_Y) - # - # # make a log likelihood function - # nlogL <- function(mu) { - # mean_Y <- rep(c(0.8*0.5*mu[1], 0.2*0.1*mu[2]), 4) - # -dmnorm_chol(as.numeric(y), mean_Y, cholesky = chol_cov, prec_param=FALSE, log = TRUE) - # } - # # maximize it - # opt_manual <- optim(c(20, 100), nlogL, method = "BFGS") - # expect_equal(opt$par, opt_manual$par, tol = 1e-4) - # expect_equal(opt$value, -opt_manual$value, tol = 1e-5) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() # some warnings are ok here - expect_equal(opt$par, optNoSplit$par, tol = 1e-4) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace with 2x2D random effects for 2D data that need joint integration works, with intermediate nodes", { - set.seed(1) - cov_a <- matrix(c(2, 1.5, 1.5, 2), nrow = 2) - cov_y <- matrix(c(1, 0.5, 0.5, 1), nrow = 2) - y <- rmnorm_chol(1, c(1, 1), chol(cov_y), prec_param = FALSE) - y <- rbind(y, rmnorm_chol(1, c(1, 1), chol(cov_y), prec_param = FALSE)) - m <- nimbleModel( - nimbleCode({ - for(i in 1:2) mu[i] ~ dnorm(0, sd = 10) - mu_a[1] <- 0.8 * mu[1] - mu_a[2] <- 0.2 * mu[2] - for(i in 1:2) a[i, 1:2] ~ dmnorm(mu_a[1:2], cov = cov_a[1:2, 1:2]) - mu_y[1:2] <- 0.5*a[1, 1:2] + 0.1*a[2, 1:2] - for(i in 1:2) { - y[i, 1:2] ~ dmnorm(mu_y[1:2], cov = cov_y[1:2, 1:2]) - } - }), - data = list(y = y), - inits = list(a = matrix(c(-2, -3, 0, -1), nrow = 2), mu = c(0, 0.5)), - constants = list(cov_a = cov_a, cov_y = cov_y), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - ## Check using TMB results - expect_equal(opt$par, c(0.5603309, 11.7064674 ), tol = 1e-4) - expect_equal(opt$value, -4.503796, tol = 1e-7) - # Check covariance matrix - summ <- cmLaplace$summary(opt, jointCovariance = TRUE) - tmbvcov <- matrix(nrow = 6, ncol = 6) - tmbvcov[1,] <- c(4.4270833, 11.111111, 1.4583333, 3.1250000, 0.6597222, 1.9097222) - tmbvcov[2,] <- c(11.1111111, 70.833333, 2.6388889, 7.6388889, 5.8333333, 12.5000000) - tmbvcov[3,] <- c(1.4583333, 2.638889, 1.5000000, 0.8333333, 0.7777778, 0.2777778) - tmbvcov[4,] <- c(3.1250000, 7.638889, 0.8333333, 4.1666667, 0.2777778, 2.7777778) - tmbvcov[5,] <- c(0.6597222, 5.833333, 0.7777778, 0.2777778, 1.5000000, 0.8333333) - tmbvcov[6,] <- c(1.9097222, 12.500000, 0.2777778, 2.7777778, 0.8333333, 4.1666667) - # The ordering of a[1, 1:2] and a[2, 1:2] is flipped between nimble and TMB: - expect_equal(summ$vcov[c(1:3, 5, 4, 6), c(1:3, 5, 4, 6)], tmbvcov, tol = 1e-4) - - # Check covariance matrix for params only - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, tmbvcov[1:2,1:2], tol=1e-4) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() # some warnings are ok here - expect_equal(opt$par, optNoSplit$par, tol = 1e-4) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) - ## TMB cpp code: - #include - #template - #Type objective_function::operator() () - # { - # DATA_MATRIX(y); - # DATA_MATRIX(cov_a); - # DATA_MATRIX(cov_y); - # PARAMETER_VECTOR(mu); - # PARAMETER_MATRIX(a); - # int i; - # Type ans = 0.0; - # - # using namespace density; - # // Negative log-likelihood of mv normal - # vector mu_a(2); - # mu_a(0) = 0.8 * mu(0); - # mu_a(1) = 0.2 * mu(1); - # vector residual_a(2); - # MVNORM_t dmvnorm_a(cov_a); - # for(i = 0; i < 2; i++) - # { - # residual_a = vector(a.row(i)) - mu_a; - # ans += dmvnorm_a(residual_a); - # } - # vector mu_y(2); - # mu_y(0) = 0.5*a(0, 0) + 0.1*a(1, 0); - # mu_y(1) = 0.5*a(0, 1) + 0.1*a(1, 1); - # vector residual_y(2); - # MVNORM_t dmvnorm_y(cov_y); - # for(i = 0; i < 2; i++){ - # residual_y = vector(y.row(i)) - mu_y; - # ans += dmvnorm_y(residual_y); - # } - # return ans; - # } - # library(TMB) - # compile("test.cpp") - # dyn.load(dynlib("test")) - # data <- list(y = m$y, cov_a = m$cov_a, cov_y = m$cov_y) - # parameters <- list(mu = m$mu, a = m$a) - # - # ## Fit model - # obj <- MakeADFun(data, parameters, random="a", DLL="test") - # tmbopt <- nlminb(obj$par, obj$fn, obj$gr) - # tmbrep <- sdreport(obj, getJointPrecision = TRUE) - # tmbvcov <- inverse(tmbrep$jointPrecision) -}) - -test_that("simple LME case works", { - # This test uses BFGS for inner and outer optimization method. - # nlminb results in an outer Hessian that is not negative definite. - set.seed(1) - g <- rep(1:10, each = 5) - n <- length(g) - x <- runif(n) - m <- nimbleModel( - nimbleCode({ - for(i in 1:n) { - y[i] ~ dnorm((fixed_int + random_int[g[i]]) + (fixed_slope + random_slope[g[i]])*x[i], sd = sigma_res) - } - for(i in 1:ng) { - random_int[i] ~ dnorm(0, sd = sigma_int) - random_slope[i] ~ dnorm(0, sd = sigma_slope) - } - sigma_int ~ dunif(0, 10) - sigma_slope ~ dunif(0, 10) - sigma_res ~ dunif(0, 10) - fixed_int ~ dnorm(0, sd = 100) - fixed_slope ~ dnorm(0, sd = 100) - }), - constants = list(g = g, ng = max(g), n = n, x = x), - buildDerivs = TRUE - ) - params <- c("fixed_int", "fixed_slope", "sigma_int", "sigma_slope", "sigma_res") - values(m, params) <- c(10, 0.5, 3, .25, 0.2) - m$simulate(m$getDependencies(params, self = FALSE)) - m$setData('y') - y <- m$y - library(lme4) - manual_fit <- lmer(y ~ x + (1 + x || g), REML = FALSE) - - mLaplace <- buildLaplace(model = m, control=list(innerOptimMethod="BFGS", - outerOptimMethod="BFGS")) - cm <- compileNimble(m) - cmLaplace <- compileNimble(mLaplace, project = m) - opt <- cmLaplace$findMLE() - nimres <- cmLaplace$summary(opt, randomEffectsStdError = TRUE) - lme4res <- summary(manual_fit) - expect_equal(nimres$params$estimate[4:5], as.vector(lme4res$coefficients[,"Estimate"]), tol=1e-5) - expect_equal(nimres$params$estimate[1:3], as.data.frame(VarCorr(manual_fit))[,"sdcor"], tol = 1e-5) - expect_equal(nimres$params$stdError[4:5], as.vector(lme4res$coefficients[,"Std. Error"]), tol=0.03) - expect_equal(nimres$randomEffects$estimate, as.vector(t(ranef(manual_fit)$g)), tol = 1e-4) -}) - -test_that("simple LME with correlated intercept and slope works (and check with nQuad=3)", { - nimbleOptions(buildInterfacesForCompiledNestedNimbleFunctions=TRUE) - set.seed(1) - g <- rep(1:10, each = 10) - n <- length(g) - x <- runif(n) - m <- nimbleModel( - nimbleCode({ - for(i in 1:n) { - y[i] ~ dnorm((fixed_int + random_int_slope[g[i], 1]) + (fixed_slope + random_int_slope[g[i], 2])*x[i], sd = sigma_res) - } - cov[1, 1] <- sigma_int^2 - cov[2, 2] <- sigma_slope^2 - cov[1, 2] <- rho * sigma_int * sigma_slope - cov[2, 1] <- rho * sigma_int * sigma_slope - for(i in 1:ng) { - random_int_slope[i, 1:2] ~ dmnorm(zeros[1:2], cov = cov[1:2, 1:2]) - } - sigma_int ~ dunif(0, 10) - sigma_slope ~ dunif(0, 10) - sigma_res ~ dunif(0, 10) - fixed_int ~ dnorm(0, sd = 100) - fixed_slope ~ dnorm(0, sd = 100) - rho ~ dunif(-1, 1) - }), - constants = list(g = g, ng = max(g), n = n, x = x, zeros = rep(0, 2)), - buildDerivs = TRUE - ) - params <- c("fixed_int", "fixed_slope", "sigma_int", "sigma_slope", "sigma_res", "rho") - values(m, params) <- c(10, 0.5, 3, 0.25, 0.2, 0.45) - m$simulate(m$getDependencies(params, self = FALSE)) - m$setData('y') - y <- m$y - library(lme4) - manual_fit <- lmer(y ~ x + (1 + x | g), REML = FALSE) - mLaplace <- buildLaplace(model = m)#, control=list(innerOptimStart="last.best")) - cm <- compileNimble(m) - cmLaplace <- compileNimble(mLaplace, project = m) - - params_in_order <- setupMargNodes(m)$paramNodes - - pStart <- values(m, params_in_order) - - init_llh <- cmLaplace$calcLogLik(pStart) - init_gr_llh <- cmLaplace$gr_logLik(pStart) - - opt <- cmLaplace$findMLE() - nimres <- cmLaplace$summary(opt, randomEffectsStdError = TRUE) - nimsumm <- summaryLaplace(cmLaplace, opt, randomEffectsStdError = TRUE) - - lme4res <- summary(manual_fit) - expect_equal(nimres$params$estimate[4:5], as.vector(lme4res$coefficients[,"Estimate"]), tol=1e-4) - sdparams <- nimres$params$estimate[-c(4,5)] - expect_equal(sdparams[c(1,2,4,3)], as.data.frame(VarCorr(manual_fit))[,"sdcor"], tol = 1e-3) - expect_equal(nimres$params$stdError[4:5], as.vector(lme4res$coefficients[,"Std. Error"]), tol=.03) - expect_equal(nimres$randomEffects$estimate, as.vector(t(ranef(manual_fit)$g)), tol = 5e-3) - - cmLaplace$updateSettings(nQuad = 3) - init_llh_3 <- cmLaplace$calcLogLik(pStart) - max_llh_3 <- cmLaplace$calcLogLik(opt$par ) - expect_equal(init_llh, init_llh_3, tolerance = 1e-7) - expect_equal(opt$value, max_llh_3, tolerance = 1e-4) - - for(v in m$getVarNames()) cm[[v]] <- m[[v]] - cm$calculate() - cmLaplace$updateSettings(useInnerCache=FALSE) - cmLaplace$updateSettings(nQuad = 1) - CrunLaplaceRes <- runLaplace(cmLaplace, pStart = pStart) - expect_equal(opt$par, CrunLaplaceRes$MLE$par, tolerance = 1e-4) - expect_equal(opt$hessian, CrunLaplaceRes$MLE$hessian, tolerance = 1e-4) - expect_equal(nimsumm$randomEffects$estimate, - CrunLaplaceRes$summary$randomEffects$estimate, tolerance = 1e-4) - expect_equal(nimsumm$randomEffects$stdError, - CrunLaplaceRes$summary$randomEffects$stdError, tolerance = 1e-4) - - - for(v in m$getVarNames()) cm[[v]] <- m[[v]] - cm$calculate() - cmLaplace$updateSettings(useInnerCache=FALSE) - CrunLaplaceRes <- runLaplace(cmLaplace, pStart = pStart) - expect_equal(opt$par, CrunLaplaceRes$MLE$par, tolerance = 1e-4) - expect_equal(opt$hessian, CrunLaplaceRes$MLE$hessian, tolerance = 1e-4) - expect_equal(nimsumm$randomEffects$estimate, - CrunLaplaceRes$summary$randomEffects$estimate, tolerance = 1e-4) - expect_equal(nimsumm$randomEffects$stdError, - CrunLaplaceRes$summary$randomEffects$stdError, tolerance = 1e-4) - -}) - -test_that("simple LME with correlated intercept and slope works through runLaplace", { - set.seed(1) - g <- rep(1:10, each = 10) - n <- length(g) - x <- runif(n) - m <- nimbleModel( - nimbleCode({ - for(i in 1:n) { - y[i] ~ dnorm((fixed_int + random_int_slope[g[i], 1]) + (fixed_slope + random_int_slope[g[i], 2])*x[i], sd = sigma_res) - } - cov[1, 1] <- sigma_int^2 - cov[2, 2] <- sigma_slope^2 - cov[1, 2] <- rho * sigma_int * sigma_slope - cov[2, 1] <- rho * sigma_int * sigma_slope - for(i in 1:ng) { - random_int_slope[i, 1:2] ~ dmnorm(zeros[1:2], cov = cov[1:2, 1:2]) - } - sigma_int ~ dunif(0, 10) - sigma_slope ~ dunif(0, 10) - sigma_res ~ dunif(0, 10) - fixed_int ~ dnorm(0, sd = 100) - fixed_slope ~ dnorm(0, sd = 100) - rho ~ dunif(-1, 1) - }), - constants = list(g = g, ng = max(g), n = n, x = x, zeros = rep(0, 2)), - buildDerivs = TRUE - ) - params <- c("fixed_int", "fixed_slope", "sigma_int", "sigma_slope", "sigma_res", "rho") - values(m, params) <- c(10, 0.5, 3, 0.25, 0.2, 0.45) - m$simulate(m$getDependencies(params, self = FALSE)) - m$setData('y') - y <- m$y - library(lme4) - manual_fit <- lmer(y ~ x + (1 + x | g), REML = FALSE) - mLaplace <- buildLaplace(model = m)#, control=list(innerOptimStart="last.best")) - cm <- compileNimble(m) - cmLaplace <- compileNimble(mLaplace, project = m) - - pStart <- values(m, params) - - res <- runLaplace(cmLaplace) - opt <- res$MLE - nimsumm <- res$summary - - #opt <- cmLaplace$findMLE() - #nimres <- cmLaplace$summary(opt, randomEffectsStdError = TRUE) - #nimsumm <- summaryLaplace(cmLaplace, opt, randomEffectsStdError = TRUE) - - lme4res <- summary(manual_fit) - expect_equal(nimsumm$params$estimate[4:5], as.vector(lme4res$coefficients[,"Estimate"]), tol=1e-4) - sdparams <- nimsumm$params$estimate[-c(4,5)] - expect_equal(sdparams[c(1,2,4,3)], as.data.frame(VarCorr(manual_fit))[,"sdcor"], tol = 1e-3) - expect_equal(nimsumm$params$stdError[4:5], as.vector(lme4res$coefficients[,"Std. Error"]), tol=.03) - expect_equal(nimsumm$randomEffects$estimate, as.vector(t(ranef(manual_fit)$g)), tol = 5e-3) -}) - -test_that("Laplace with non-empty calcNodesOther works", { - m <- nimbleModel( - nimbleCode({ - for(i in 1:3) { - mu[i] ~ dnorm(0, sd = 10) - } - mu_a[1] <- mu[1] + mu[2] - mu_a[2] <- mu[2] + mu[3] - a[1] ~ dnorm(mu_a[1], sd = 2) - y[1] ~ dnorm(a[1], sd = 3) - a[2] ~ dnorm(mu_a[2], sd = 2) - y[2] ~ dnorm(a[2], sd =3) - y[3] ~ dnorm(mu[3], sd = 3) - }), - data = list(y = c(2, 3, 5)), - inits = list(a = c(1, 2), mu = c(1, 2, 3)), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - expect_equal(opt$par, c(4, -2, 5), tol = 1e-3) - expect_equal(opt$value, -6.420377, tol = 1e-6) - - ## Check covariance matrix - summ <- cmLaplace$summary(opt, jointCovariance = TRUE) - ## TMB cpp code: - #include - #template - #Type objective_function::operator() () - # { - # DATA_VECTOR(y); - # PARAMETER_VECTOR(mu); - # PARAMETER_VECTOR(a); - # int i; - # // Negative log-likelihood - # Type ans = -dnorm(a[0], mu[0]+mu[1], Type(2.0), true); - # ans -= dnorm(a[1], mu[1]+mu[2], Type(2.0), true); - # for(i = 0; i < 2; i++){ - # ans -= dnorm(y[i], a[i], Type(3.0), true); - # } - # ans -= dnorm(y[2], mu[2], Type(3.0), true); - # return ans; - # } - ## TMB R code: - # library(TMB) - # compile("test.cpp") - # dyn.load(dynlib("test")) - # data <- list(y = m$y) - # parameters <- list(mu = c(1, 2, 3), a = c(1, 2)) - # - # ## Fit model - # obj <- MakeADFun(data, parameters, random="a", DLL="test") - # tmbres <- nlminb(obj$par, obj$fn, obj$gr) - # tmbrep <- sdreport(obj, getJointPrecision = TRUE) - # tmbvcov <- inverse(tmbrep$jointPrecision) - - ## Covariance matrix from TMB - tmbvcov <- matrix(nrow = 5, ncol = 5) - tmbvcov[1,] <- c( 35, -2.20000e+01, 9.000000e+00, 9.000000e+00, -9.000000e+00) - tmbvcov[2,] <- c(-22, 2.20000e+01, -9.000000e+00, 8.463230e-13, 9.000000e+00) - tmbvcov[3,] <- c( 9, -9.00000e+00, 9.000000e+00, -3.462231e-13, 3.462231e-13) - tmbvcov[4,] <- c( 9, 8.46323e-13, -3.462231e-13, 9.000000e+00, 3.462231e-13) - tmbvcov[5,] <- c(-9, 9.00000e+00, 3.462231e-13, 3.462231e-13, 9.000000e+00) - - expect_equal(summ$vcov, tmbvcov, tol=1e-5) - - ## Check covariance matrix for params only - tryResult <- try({ - summ2 <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, tmbvcov[1:3,1:3], tol=1e-5) - }) - if(inherits(tryResult, 'try-error')) { - print(class(cmLaplace)) - print(cL) - } - - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) -}) - -test_that("Laplace with 2x1D parameters (one needs transformation) and non-normal data works", { - m <- nimbleModel( - nimbleCode({ - mu ~ dnorm(0, sd = 10.0) - sigma ~ dunif(0, 100) - for (i in 1:5){ - theta[i] ~ dnorm(mu, sd = sigma) - logit(p[i]) <- theta[i] - y[i] ~ dbinom(10, prob = p[i]) - } - }), - data = list(y = c(8, 6, 5, 3, 7)), - inits = list(mu = 1, sigma = 1, theta = rep(0, 5)), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - ## Compare with results from TMB - expect_equal(opt$par, c(0.330241, 0.3059177), tol = 1e-4) - expect_equal(opt$value, -9.703857, tol = 1e-6) - ## Check covariance matrix on the transformed scale - summ <- cmLaplace$summary(opt, originalScale = FALSE, jointCovariance = TRUE) - tmbvcov <- matrix(nrow = 7, ncol = 7) - tmbvcov[1,] <- c(0.10337427, 0.04574391, 0.09719623, 0.08526807, 0.07943536, 0.06797944, 0.09118502) - tmbvcov[2,] <- c(0.04574391, 3.21994672, 0.91522073, 0.10980129, -0.28810783, -1.07845809, 0.51064309) - tmbvcov[3,] <- c(0.09719623, 0.91522073, 0.40584816, 0.09981763, -0.01342937, -0.23826114, 0.21393310) - tmbvcov[4,] <- c(0.08526807, 0.10980129, 0.09981763, 0.14821768, 0.05824110, 0.03110420, 0.08580658) - tmbvcov[5,] <- c(0.07943536, -0.28810783, -0.01342937, 0.05824110, 0.16979550, 0.16423022, 0.02255625) - tmbvcov[6,] <- c(0.06797944, -1.07845809, -0.23826114, 0.03110420, 0.16423022, 0.50464751, -0.10296956) - tmbvcov[7,] <- c(0.09118502, 0.51064309, 0.21393310, 0.08580658, 0.02255625, -0.10296956, 0.22602059) - expect_equal(summ$vcov, tmbvcov, tol=1e-3) - ## Stand error for sigma (original parameter) - summ2 <- cmLaplace$summary(opt, originalScale = TRUE) - expect_equal(summ2$params$stdError[2], 0.5472659, tol=1e-4) - - # Check covariance matrix for transformed params only - summ3 <- cmLaplace$summary(opt, originalScale = FALSE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ3$vcov, tmbvcov[1:2,1:2], tol=1e-3) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() - expect_equal(opt$par, optNoSplit$par, tol = 1e-2) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) - ## TMB cpp code: - #include - #template - #Type objective_function::operator() () - # { - # DATA_VECTOR(y); - # PARAMETER(mu); - # PARAMETER(sigmaTrans); - # PARAMETER_VECTOR(theta); - # // Transformation for sigma - # Type sigma = 100 * exp(sigmaTrans) / (1 + exp(sigmaTrans)); - # // Negative log-likelihood - # Type ans = 0; - # vector p(5); - # for(int i = 0; i < 5; i++){ - # p[i] = exp(theta[i]) / (1 + exp(theta[i])); - # ans -= dnorm(theta[i], mu, sigma, true) + dbinom(y[i], Type(10), p[i], true); - # } - # ADREPORT(sigma); - # return ans; - # } - ## TMB R code: - # library(TMB) - # compile("test.cpp") - # dyn.load(dynlib("test")) - # data <- list(y = m$y) - # parameters <- list(mu = m$mu, sigmaTrans = logit(m$sigma/100), theta = m$theta) - # ## Fit model - # obj <- MakeADFun(data, parameters, random="theta", DLL="test") - # tmbopt <- nlminb(obj$par, obj$fn, obj$gr) - # tmbrep <- sdreport(obj, getJointPrecision = TRUE) - # tmbvcov <- inverse(tmbrep$jointPrecision) -}) - -test_that("Laplace with no random effects (simple linear regression) works", { - set.seed(1) - x <- rnorm(5) - y <- sapply(-1 + x, rnorm, n = 1, sd = 1) - m <- nimbleModel( - nimbleCode({ - a ~ dnorm(0, sd = 10.0) - b ~ dnorm(0, sd = 10.0) - sigma ~ dunif(0, 100) - for(i in 1:5){ - mu_y[i] <- a + b*x[i] - y[i] ~ dnorm(mu_y[i], sd = sigma) - } - }), - constants = list(x = x), - data = list(y = y), - inits = list(a = -1, b = 1, sigma = 1), - buildDerivs = TRUE - ) - - mLaplace <- buildLaplace(model = m) - mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE)) - cm <- compileNimble(m) - cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) - cmLaplace <- cL$mLaplace - cmLaplaceNoSplit <- cL$mLaplaceNoSplit - - opt <- cmLaplace$findMLE() - summ <- cmLaplace$summary(opt) - ## Compare results with those from TMB - expect_equal(opt$par, c(-0.8899436, 1.1940911, 0.5744841), tol = 1e-4) - expect_equal(opt$value, -4.323288, tol = 1e-7) - expect_equal(summ$params$stdError, c(0.2598061, 0.2988869, 0.1816661), tol = 1e-5) - - for(v in cm$getVarNames()) cm[[v]] <- m[[v]] - optNoSplit <- cmLaplaceNoSplit$findMLE() - expect_equal(opt$par, optNoSplit$par, tol = 1e-4) - expect_equal(opt$value, optNoSplit$value, tol = 1e-7) - check_laplace_alternative_methods(cmLaplace, cm, m, opt, expected_no_re = TRUE) - check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, expected_no_re = TRUE) - - summL <- summaryLaplace(cmLaplace, opt, randomEffectsStdError = TRUE, jointCovariance = TRUE) - expect_equal(nrow(summL$randomEffects), 0) - expect_equal(nrow(summL$vcov), 3) - ## TMB cpp code - #include - #template - # Type objective_function::operator() () - # { - # DATA_VECTOR(y); - # DATA_VECTOR(x); - # PARAMETER(a); - # PARAMETER(b); - # PARAMETER(sigma); - # Type nll = -sum(dnorm(y, a+b*x, sigma, true)); - # return nll; - # } - ## R code - # compile("lm.cpp") - # dyn.load(dynlib("lm")) - # set.seed(1) - # x <- rnorm(5) - # y <- sapply(-1 + x, rnorm, n = 1, sd = 1) - # data <- list(y=y, x=x) - # parameters <- list(a=-1, b=1, sigma=1) - # obj <- MakeADFun(data, parameters, DLL="lm") - # obj$hessian <- TRUE - # tmbres <- do.call("optim", obj) - # tmbsumm <- summary(sdreport(obj)) -}) - -## Possible future feature (was drafted, not completed): -## -## test_that("Laplace with no priors for unconstrained parameters works", { -## ## Here we re-use some of tests above and remove priors for parameters -## ## Test 1 -## m <- nimbleModel( -## nimbleCode({ -## y ~ dnorm(a, sd = 2) -## a ~ dnorm(mu, sd = 3) -## # mu ~ dnorm(0, sd = 5) -## }), data = list(y = 4), inits = list(a = -1), -## buildDerivs = TRUE -## ) - -## mLaplace <- buildLaplace(model = m, control = list(allowNonPriors = TRUE)) -## mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE, allowNonPriors = TRUE)) -## cm <- compileNimble(m) -## cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) -## cmLaplace <- cL$mLaplace -## cmLaplaceNoSplit <- cL$mLaplaceNoSplit - -## opt <- cmLaplace$findMLE() -## expect_equal(opt$par, 4, tol = 1e-4) -## expect_equal(opt$value, dnorm(4, 4, sd = sqrt(13), log = TRUE)) -## summ <- cmLaplace$summary(opt, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = TRUE) -## expect_equal(summ$randomEffects$estimate, 4, tol = 1e-5) -## # Covariance matrix -## vcov <- matrix(c(1/(1/4+1/9), 0, 0, 0), nrow = 2) + matrix(c(4/13, 1), ncol = 1) %*% (13) %*% t(matrix(c(4/13, 1), ncol = 1)) -## expect_equal(vcov, summ$vcov, tol = 1e-6) - -## for(v in cm$getVarNames()) cm[[v]] <- m[[v]] -## optNoSplit <- cmLaplaceNoSplit$findMLE() -## expect_equal(opt$par, optNoSplit$par, tol = 1e-2) -## expect_equal(opt$value, optNoSplit$value, tol = 1e-7) -## check_laplace_alternative_methods(cmLaplace, cm, m, opt) -## check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) - -## ## Test 2 -## set.seed(1) -## x <- rnorm(5) -## y <- sapply(-1 + x, rnorm, n = 1, sd = 1) -## m <- nimbleModel( -## nimbleCode({ -## sigma ~ dunif(0, 100) -## for(i in 1:5){ -## mu_y[i] <- a + b*x[i] -## y[i] ~ dnorm(mu_y[i], sd = sigma) -## } -## }), -## constants = list(x = x), -## data = list(y = y), -## buildDerivs = TRUE -## ) - -## mLaplace <- buildLaplace(model = m, control = list(allowNonPriors = TRUE)) - -## mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE, allowNonPriors = TRUE)) -## cm <- compileNimble(m) -## cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) -## cmLaplace <- cL$mLaplace -## cmLaplaceNoSplit <- cL$mLaplaceNoSplit - -## opt <- cmLaplace$findMLE() -## summ <- cmLaplace$summary(opt) -## ## Compare results with those from TMB -## expect_equal(opt$par, c(0.5744841, -0.8899436, 1.1940911), tol = 1e-5) -## expect_equal(opt$value, -4.323288, tol = 1e-7) -## expect_equal(summ$params$stdError, c(0.1816661, 0.2598061, 0.2988869), tol = 1e-5) - -## for(v in cm$getVarNames()) cm[[v]] <- m[[v]] -## optNoSplit <- cmLaplaceNoSplit$findMLE() -## expect_equal(opt$par, optNoSplit$par, tol = 1e-2) -## expect_equal(opt$value, optNoSplit$value, tol = 1e-7) -## check_laplace_alternative_methods(cmLaplace, cm, m, opt) -## check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) - -## ## Test 3 -## set.seed(1) -## y <- array(rnorm(8, 6, 5), dim = c(2, 2, 2)) -## cov_a <- matrix(c(2, 1.5, 1.5, 2), nrow = 2) -## m <- nimbleModel( -## nimbleCode({ -## # for(i in 1:2) mu[i] ~ dnorm(0, sd = 10) -## mu_a[1] <- 0.8 * mu[1] -## mu_a[2] <- 0.2 * mu[2] -## for(i in 1:2) a[i, 1:2] ~ dmnorm(mu_a[1:2], cov = cov_a[1:2, 1:2]) -## for(i in 1:2) { -## for(j in 1:2) { -## y[1, j, i] ~ dnorm( 0.5 * a[i, 1], sd = 1.8) -## y[2, j, i] ~ dnorm( 0.1 * a[i, 2], sd = 1.2) -## } -## } -## }), -## data = list(y = y), -## inits = list(a = matrix(c(-2, -3, 0, -1), nrow = 2)), -## constants = list(cov_a = cov_a), -## buildDerivs = TRUE -## ) - -## mLaplace <- buildLaplace(model = m, control = list(allowNonPriors = TRUE)) -## mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE, allowNonPriors = TRUE)) -## cm <- compileNimble(m) -## cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) -## cmLaplace <- cL$mLaplace -## cmLaplaceNoSplit <- cL$mLaplaceNoSplit - -## opt <- cmLaplace$findMLE() - -## expect_equal(opt$par, c(12.98392, 406.04878), tol = 1e-4) -## expect_equal(opt$value, -41.86976, tol = 1e-6) -## # Check covariance matrix -## summ <- cmLaplace$summary(opt, jointCovariance = TRUE) -## tmbvcov <- matrix(nrow = 6, ncol = 6) -## tmbvcov[1,] <- c(6.625000e+00, 4.687500e+00, 4.050000e+00, 4.050000e+00, -2.693817e-11, -2.695275e-11) -## tmbvcov[2,] <- c(4.687500e+00, 9.250000e+02, 2.965628e-11, 2.967848e-11, 1.800000e+02, 1.800000e+02) -## tmbvcov[3,] <- c(4.050000e+00, 2.951367e-11, 3.995242e+00, 2.484758e+00, 5.596302e-01, -5.596302e-01) -## tmbvcov[4,] <- c(4.050000e+00, 2.951367e-11, 2.484758e+00, 3.995242e+00, -5.596302e-01, 5.596302e-01) -## tmbvcov[5,] <- c(-2.691772e-11, 1.800000e+02, 5.596302e-01, -5.596302e-01, 3.684693e+01, 3.515307e+01) -## tmbvcov[6,] <- c(-2.691772e-11, 1.800000e+02, -5.596302e-01, 5.596302e-01, 3.515307e+01, 3.684693e+01) - -## expect_equal(summ$vcov[c(5,6,1,3,2,4), c(5,6,1,3,2,4)], tmbvcov, tol = 1e-4) - -## for(v in cm$getVarNames()) cm[[v]] <- m[[v]] -## optNoSplit <- cmLaplaceNoSplit$findMLE() -## expect_equal(opt$par, optNoSplit$par, tol = 1e-4) -## expect_equal(opt$value, optNoSplit$value, tol = 1e-7) -## check_laplace_alternative_methods(cmLaplace, cm, m, opt) -## check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) - -## ## Test 4 -## m <- nimbleModel( -## nimbleCode({ -## # for(i in 1:3) { -## # mu[i] ~ dnorm(0, sd = 10) -## # } -## mu_a[1] <- mu[1] + mu[2] -## mu_a[2] <- mu[2] + mu[3] -## a[1] ~ dnorm(mu_a[1], sd = 2) -## y[1] ~ dnorm(a[1], sd = 3) -## a[2] ~ dnorm(mu_a[2], sd = 2) -## y[2] ~ dnorm(a[2], sd =3) -## y[3] ~ dnorm(mu[3], sd = 3) -## }), -## data = list(y = c(2, 3, 5)), -## inits = list(a = c(1, 2)), -## buildDerivs = TRUE -## ) - -## mLaplace <- buildLaplace(model = m, control = list(allowNonPriors = TRUE)) -## mLaplaceNoSplit <- buildLaplace(model = m, control = list(split = FALSE, allowNonPriors = TRUE)) -## cm <- compileNimble(m) -## cL <- compileNimble(mLaplace, mLaplaceNoSplit, project = m) -## cmLaplace <- cL$mLaplace -## cmLaplaceNoSplit <- cL$mLaplaceNoSplit - -## opt <- cmLaplace$findMLE() -## expect_equal(opt$par, c(4, -2, 5), tol = 1e-3) -## expect_equal(opt$value, -6.420377, tol = 1e-6) -## ## Check covariance matrix -## summ <- cmLaplace$summary(opt, jointCovariance = TRUE) - -## ## Covariance matrix from TMB -## tmbvcov <- matrix(nrow = 5, ncol = 5) -## tmbvcov[1,] <- c( 35, -2.20000e+01, 9.000000e+00, 9.000000e+00, -9.000000e+00) -## tmbvcov[2,] <- c(-22, 2.20000e+01, -9.000000e+00, 8.463230e-13, 9.000000e+00) -## tmbvcov[3,] <- c( 9, -9.00000e+00, 9.000000e+00, -3.462231e-13, 3.462231e-13) -## tmbvcov[4,] <- c( 9, 8.46323e-13, -3.462231e-13, 9.000000e+00, 3.462231e-13) -## tmbvcov[5,] <- c(-9, 9.00000e+00, 3.462231e-13, 3.462231e-13, 9.000000e+00) - -## expect_equal(summ$vcov[c(3:5, 1:2), c(3:5, 1:2)], tmbvcov, tol=1e-5) - -## for(v in cm$getVarNames()) cm[[v]] <- m[[v]] -## optNoSplit <- cmLaplaceNoSplit$findMLE() -## expect_equal(opt$par, optNoSplit$par, tol = 1e-2) -## expect_equal(opt$value, optNoSplit$value, tol = 1e-7) -## check_laplace_alternative_methods(cmLaplace, cm, m, opt) -## check_laplace_alternative_methods(cmLaplaceNoSplit, cm, m, optNoSplit) - -## }) - -test_that("Laplace with crossed random effects works", { - library(lme4) - data(Penicillin) - N <- nrow(Penicillin) - plate <- rep(1:24, each = 6) - np <- 24 - sample <- rep(1:6, 24) - ns <- 6 - - m <- nimbleModel( - nimbleCode({ - ## Intercept - beta ~ dnorm(0, sd = 100) - ## Standard deviations - sigma ~ dgamma(1.0, 1.0) - sigma_p ~ dgamma(1.0, 1.0) - sigma_s ~ dgamma(1.0, 1.0) - ## Random effects for plate - for(i in 1:np){ - mup[i] ~ dnorm(0, sd = sigma_p) - } - ## Random effects for sample - for(i in 1:ns){ - mus[i] ~ dnorm(0, sd = sigma_s) - } - ## Observations - for(i in 1:N){ - mu_y[i] <- beta + mus[sample[i]] + mup[plate[i]] - y[i] ~ dnorm(mu_y[i], sd = sigma) - } - }), - constants = list(N = N, np = np, ns = ns, plate = plate, sample = sample), - data = list(y = Penicillin$diameter), - inits = list(beta = 20, sigma = 1, sigma_p = 1, sigma_s = 1, mus = rep(0, ns), mup = rep(0, np)), - buildDerivs = TRUE - ) - mLaplace <- buildLaplace(model = m)#, control=list(innerOptimStart = "last.best")) - cm <- compileNimble(m) - cmLaplace <- compileNimble(mLaplace, project = m) - ## cmLaplace$updateSettings(innerOptimMethod = "nlminb") - opt <- cmLaplace$findMLE() - nimres <- cmLaplace$summary(opt, randomEffectsStdError = TRUE) - - lme4_fit <- lmer(diameter ~ 1 + (1|plate) + (1|sample), data = Penicillin, REML = FALSE) - lme4res <- summary(lme4_fit) - - expect_equal(nimres$params$estimate[1], lme4res$coefficients[,"Estimate"], tol=1e-3) - expect_equal(nimres$params$estimate[c(3,4,2)], as.data.frame(VarCorr(lme4_fit))[,"sdcor"], tol = 5e-4) - # Note that with innerOptimMethod "nlminb", the next check is far off, within only about 0.2 - # on Mac, and getting a NaN on ubuntu CI tests. (Also I don't know why those differ.) - expect_equal(nimres$params$stdError[1], lme4res$coefficients[,"Std. Error"], tol=2e-3) - expect_equal(nimres$randomEffects$estimate[25:30], as.vector(t(ranef(lme4_fit)$sample)), tol = 1e-3) - expect_equal(nimres$randomEffects$estimate[1:24], as.vector(t(ranef(lme4_fit)$plate)), tol = 1e-4) -}) - -test_that("Laplace with nested random effects works", { - library(lme4) - data(Pastes) - lme4_fit <- lmer(strength ~ 1 + (1|batch) + (1|batch:cask), data = Pastes, REML = FALSE) - lme4res <- summary(lme4_fit) - - m <- nimbleModel( - nimbleCode({ - ## Intercept - beta ~ dnorm(0, sd = 100) - ## Standard deviations - sigma ~ dgamma(1.0, 1.0) - sigma1 ~ dgamma(1.0, 1.0) - sigma2 ~ dgamma(1.0, 1.0) - ## Random effects for batch - for(i in 1:10){ - mub[i] ~ dnorm(0, sd = sigma1) - } - ## Random effects for batch:cask - for(i in 1:30){ - mubc[i] ~ dnorm(0, sd = sigma2) - } - ## Observations - for(i in 1:60){ - mu_y[i] <- beta + mub[batch[i]] + mubc[cask[i]] - y[i] ~ dnorm(mu_y[i], sd = sigma) - } - }), - constants = list(batch = rep(1:10, each = 6), cask = rep(1:30, each = 2)), - data = list(y = Pastes$strength), - buildDerivs = TRUE - ) - mLaplace <- buildLaplace(model = m) - cm <- compileNimble(m) - cmLaplace <- compileNimble(mLaplace, project = m) - ## It seems that default start values (0, 1, 1, 1) for this example do not work well - ## for optimisation; use c(2, 2, 2, 2) instead - #expect_output( - opt <- cmLaplace$findMLE(pStart = c(2,2,2,2)) - #, "optim does not converge for the inner optimization") - nimres <- cmLaplace$summary(opt, randomEffectsStdError = TRUE) - - expect_equal(nimres$params$estimate[1], lme4res$coefficients[,"Estimate"], tol = 1e-5) - expect_equal(nimres$params$estimate[c(4, 3, 2)], as.data.frame(VarCorr(lme4_fit))[,"sdcor"], tol = 5e-5) - expect_equal(nimres$params$stdError[1], lme4res$coefficients[,"Std. Error"], tol = 5e-5) - expect_equal(nimres$randomEffects$estimate[seq(1, 40, by = 4)], as.vector(t(ranef(lme4_fit)$batch)), tol = 5e-4) - expect_equal(nimres$randomEffects$estimate[-seq(1, 40, by = 4)], as.vector(t(ranef(lme4_fit)$`batch:cask`)), tol = 5e-4) -}) - -test_that("Laplace error trapping of wrong-length parameters works", { - library(nimble) - library(testthat) - - m <- nimbleModel( - nimbleCode({ - d[1:3] ~ ddirch(alpha[1:3]) # params - for(i in 1:3) x[i] ~ dnorm(d[i], 1) # randomEffects - for(i in 1:3) y[i] ~ dnorm(x[i], 1) # data - }), - data = list(y = rnorm(3), alpha = rep(1.1, 3)), - inits = list(x = rnorm(3), d = c(.2, .3, .5)), - buildDerivs = TRUE - ) - m$calculate() - mLaplace <- buildLaplace(model = m) - cm <- compileNimble(m) - cmLaplace <- compileNimble(mLaplace, project = m) - - ## cat("Eight messages beginning with [Warning] are expected:\n") - - # should work - expect_no_error(cmLaplace$calcLogLik(c(.4, .5, .1))) - expect_no_error(cmLaplace$calcLaplace(c(.4, .5, .1))) - expect_no_error(cmLaplace$gr_logLik(c(.4, .5, .1))) - expect_no_error(cmLaplace$gr_Laplace(c(.4, .5, .1))) - - # should throw errors - expect_output(expect_error(cmLaplace$calcLogLik(c(.4, .5))), "should be length") - expect_output(expect_error(cmLaplace$calcLaplace(c(.4, .5))), "should be length") - expect_output(expect_error(cmLaplace$gr_logLik(c(.4, .5))), "should be length") - expect_output(expect_error(cmLaplace$gr_Laplace(c(.4, .5))), "should be length") - - # should work - expect_no_error(cmLaplace$calcLogLik(c(.4, .5), trans = TRUE)) - expect_no_error(cmLaplace$calcLaplace(c(.4, .5), trans = TRUE)) - expect_no_error(cmLaplace$gr_logLik(c(.4, .5), trans = TRUE)) - expect_no_error(cmLaplace$gr_Laplace(c(.4, .5), trans = TRUE)) - - # should throw errors - expect_output(expect_error(cmLaplace$calcLogLik(c(.4, .5, .1), trans = TRUE)), "should be length") - expect_output(expect_error(cmLaplace$calcLaplace(c(.4, .5, .1), trans = TRUE)), "should be length") - expect_output(expect_error(cmLaplace$gr_logLik(c(.4, .5, .1), trans = TRUE)), "should be length") - expect_output(expect_error(cmLaplace$gr_Laplace(c(.4, .5, .1), trans = TRUE)), "should be length") - - ## - output <- cmLaplace$findMLE(c(.4, .5, .1)) - expect_true(all(output$counts > 0)) - # We couldn't throw an error from a nimbleList-returning method - # so we emit a message containing "[Warning]". - expect_output(output <- cmLaplace$findMLE(c(.4, .5)), "should be length") - expect_identical(output$counts, integer()) -}) - -test_that("Laplace works with different numbers of REs in different cond. ind. sets", { - # This checks on Issue #1312, which was really a bug with nimOptim - # that arose from having multiple nimOptim calls share the same - # control list. - # This test does not check correctness of result, only that it runs. - code <- nimbleCode({ - for(i in 1:2) { - param[i] ~ dnorm(0, 1) - for(j in 1:num_re[i]) { - re[i,j] ~ dnorm(param[i], 1) - } - y[i] ~ dnorm(sum(re[i,1:num_re[i]]), 1) - } - }) - - num_re <- c(3,7) ## different numbers of REs in two conditionally independent sets - constants <- list(num_re = num_re) - data <- list(y = c(0,0)) - - Rmodel <- nimbleModel(code, constants, data, buildDerivs = TRUE) - Rlaplace <- buildLaplace(Rmodel, 'param', 're') - - Cmodel <- compileNimble(Rmodel) - Claplace <- compileNimble(Rlaplace, project = Rmodel) - - expect_no_error(Claplace$findMLE(c(0,0))) -}) - -test_that("Laplace with N(0,1) random effects works", { - # This test also uses dflat and dhalfflat - set.seed(1) - code <- nimbleCode({ - beta0 ~ dflat() - beta1 ~ dflat() - sigma ~ dhalfflat() - for(i in 1:5) eps[i] ~ dnorm(0, 1) - for(i in 1:5) sigma_eps[i] <- eps[i] * sigma - for(i in 1:25) { - y[i] ~ dpois(exp(beta0 + beta1*X[i] + sigma_eps[group[i]])) - } - for(i in 1:10) z[i] ~ dnorm(2*beta0, 1) #calcNodesOther - foo <- step(beta0) - }) - X <- rnorm(25) - group <- rep(1:5, each = 5) - eps <- rnorm(5, 0, sd = 2) - y <- rpois(25, exp(3 + .2*X + rep(eps, each=5))) - z <- rnorm(10, 2*3, sd = 1) - m <- nimbleModel(code, data = list(y = y, z = z), - constants = list(X = X, group=group), buildDerivs=TRUE) - - # Defaults not expected to be useful - SMN <- setupMargNodes(m) - expect_identical(SMN$randomEffectsNodes, character()) - - SMN <- setupMargNodes(m, #paramNodes = c("beta0", "beta1", "sigma"), - randomEffectsNodes = 'eps[1:5]') - expect_identical(SMN$randomEffectsSets, - list('eps[1]','eps[2]','eps[3]','eps[4]','eps[5]')) - expect_identical(SMN$calcNodesOther, - m$expandNodeNames(c('lifted_d2_times_beta0', 'z[1:10]'))) - expect_identical(SMN$paramNodes, - c("beta0", "beta1", "sigma")) - - mLaplace <- buildLaplace(m, SMN) - cm <- compileNimble(m) - cmLaplace <- compileNimble(mLaplace, project = m) - cmLaplace$updateSettings(innerOptimMethod="nlminb") # findMLE will hang using BFGS - res <- cmLaplace$findMLE(c(0,0,1)) - # TMB code in test_N01.cpp -## #include -## template -## Type objective_function::operator() () -## { -## DATA_VECTOR(y); -## DATA_VECTOR(z); -## DATA_VECTOR(X); -## DATA_IVECTOR(group); -## PARAMETER_VECTOR(eps); -## PARAMETER_VECTOR(beta); -## PARAMETER(sigma); -## int i; -## // Negative log-likelihood -## Type ans = Type(0.); -## for(i = 0; i < 5; ++i) -## ans -= dnorm(eps[i], Type(0.), Type(1.), true); -## for(i = 0; i < 25; ++i) -## ans -= dpois(y[i], exp(beta[0] + beta[1] * X[i] + sigma*eps[group[i]]), true); -## for(i = 0; i < 10; ++i) -## ans -= dnorm(z[i], Type(2.)*beta[0], Type(1.), true); -## return ans; -## } -## library(TMB) -## compile("test_N01.cpp") -## dyn.load(dynlib("test_N01")) -## data <- list(y = y, X = X, group = group-1, z = z) -## parameters <- list(beta = c(0, 0), sigma = 1, eps = rep(0, 5)) -## obj <- MakeADFun(data = data, parameters = parameters, random = "eps", DLL = "test_N01") -## tmbres <- nlminb(obj$par, obj$fn, obj$gr) -## tmbrep <- sdreport(obj, getJointPrecision = TRUE) - ## tmbvcov <- solve(tmbrep$jointPrecision) - ##write.table(tmbvcov, file = "", sep=",",col.names = FALSE, row.names=FALSE) - expect_equal(res$par, c(3.1276930, 0.1645356, 1.5657498), tolerance = 1e-4 ) - summ <- cmLaplace$summary(res, randomEffectsStdError=TRUE, jointCovariance=TRUE) - ## From the write.table call just above - ## (which is symmetric anyway, so byrow =TRUE doesn't really matter) - TMB_vcov <- matrix(byrow = TRUE, nrow = 8, data = - c(c(0.0153602444576517,0.0117648503870507,0.0284134827252613,0.0199805060648755,0.00318486286937286,-0.0141707177248526,-0.00040366417968837,0.018866970112233), - c(0.0117648503870507,0.0180821401472876,0.0412180770222714,0.0268113103701797,-0.00119093828503259,-0.013523875123908,-0.000258765680997534,0.0314340905527759), - c(0.0284134827252614,0.0412180770222714,0.36562970159108,0.167252131855179,-0.0909996094062978,0.00047823545907378,0.000125154168856971,0.28920161604805), - c(0.0199805060648755,0.0268113103701797,0.167252131855179,0.10843325451055,-0.0439547462832083,-0.00708716995685574,-0.000521588459390236,0.154869927065379), - c(0.00318486286937281,-0.00119093828503263,-0.0909996094062981,-0.0439547462832083,0.0453386248870613,-0.0201751621932702,-0.000656047342397189,-0.0981200179208084), - c(-0.0141707177248526,-0.013523875123908,0.000478235459074001,-0.00708716995685565,-0.0201751621932703,0.0245443674575151,-9.27215078179135e-06,0.0144354576429851), - c(-0.00040366417968837,-0.000258765680997534,0.00012515416885698,-0.000521588459390233,-0.000656047342397191,-9.27215078179097e-06,0.00290834901828464,0.000631332975051338), - c(0.0188669701122331,0.031434090552776,0.28920161604805,0.154869927065379,-0.0981200179208082,0.0144354576429849,0.000631332975051331,0.283268865188007))) - - expect_equal(summ$vcov, TMB_vcov[c(6:8, 1:5), c(6:8, 1:5)], tol = 1e-4) - # Check covariance matrix for params only - summ2 <- cmLaplace$summary(res, originalScale = TRUE, randomEffectsStdError = TRUE, jointCovariance = FALSE) - expect_equal(summ2$vcov, TMB_vcov[6:8,6:8], tol=1e-4) -}) - -## Now that innerOptim inits has controls for method and values, -## we need to check over these tests and functionality. - -## test_that("Setting Different Initial Values for Inner Optim", { -## m <- nimbleModel( -## nimbleCode({ -## y ~ dnorm(0.2 * a, sd = 2) -## a ~ dnorm(0.5 * mu, sd = 3) -## mu ~ dnorm(0, sd = 5) -## }), data = list(y = 4), inits = list(a = -1, mu = 0), -## buildDerivs = TRUE -## ) - -## mLaplace <- buildLaplace(model = m) -## cm <- compileNimble(m) -## cL <- compileNimble(mLaplace, project = m) - -## cL$setInnerOptimWarning(TRUE) ## Print Errors. -## cL$setInnerCache(FALSE) ## Recalculate inner optim to check starting values. - -## ## Test different starting values: -## cL$setInnerOptimInits("zero") -## expect_output(cL$calcLogLik(37), "Warning: optim did not converge for the inner optimization of AGHQuad or Laplace approximation") - -## cL$setInnerOptimInits("last.best") -## expect_output(cL$calcLogLik(37), NA) # Small change to actually recalculate. No warning. - -## set.seed(21) -## cL$setInnerOptimInits("random") -## expect_output(cL$calcLogLik(37), NA) # Shouldn't warn. - -## values(cm, "a") <- 0 ## Bad init. -## cL$setInnerOptimInits("model") -## expect_output(cL$calcLogLik(37), "Warning: optim did not converge for the inner optimization of AGHQuad or Laplace approximation") - -## values(cm, "a") <- 18 -## cL$setInnerOptimInits("model") -## expect_output(cL$calcLogLik(37), NA) ## Good init. - -## cL$setInnerOptimInits("last") ## Last isn't great for this new value. -## expect_output(cL$calcLogLik(15), "Warning: optim did not converge for the inner optimization of AGHQuad or Laplace approximation") - -## ## Inspect model to see if values are updated properly after running: -## cL$setInnerCache(FALSE) -## cL$setInnerOptimInits("random") -## cL$calcLogLik(15) -## old.val <- cm$a -## cL$setModelValues(15) -## new.val <- cm$a -## expect_false(old.val == new.val) -## }) - -nimbleOptions(enableDerivs = EDopt) -nimbleOptions(buildModelDerivs = BMDopt) diff --git a/packages/nimble/tests/testthat/test-mcem.R b/packages/nimble/tests/testthat/test-mcem.R index 34725b681..a439aa5d3 100644 --- a/packages/nimble/tests/testthat/test-mcem.R +++ b/packages/nimble/tests/testthat/test-mcem.R @@ -933,11 +933,13 @@ test_that("MCMC for simple LME case works", { m2$calculate() cm2 <- compileNimble(m2) - Laplace <- buildLaplace(model=m2, randomEffectsNodes = c("random_int", "random_slope")) + if(FALSE) { # Temporarily off while get nimbleQuad on CRAN. + Laplace <- nimbleQuad::buildLaplace(model=m2, randomEffectsNodes = c("random_int", "random_slope")) cLaplace <- compileNimble(Laplace, project = m2) MLE <- cLaplace$findMLE() expect_equal(MLE$value, cLaplace$calcLogLik(opt$par), tolerance = 0.04) + } }) sink(NULL) diff --git a/packages/prep_pkg.R b/packages/prep_pkg.R index 215c4abce..f62da1ce4 100755 --- a/packages/prep_pkg.R +++ b/packages/prep_pkg.R @@ -81,8 +81,10 @@ explicitUndocFuns <- c("[,numberedModelValuesAccessors-method", additionalExports <- c("calc_dmnormConjugacyContributions", "calc_dmnormAltParams", + "calc_dmnorm_inv_ld_AltParams", "calc_dwishAltParams", "calc_dcatConjugacyContributions", + "PDinverse_logdet", "CAR_calcM", "CAR_calcC", "CAR_calcCmatrix",