diff --git a/.lintr b/.lintr index 56059936..301903a9 100755 --- a/.lintr +++ b/.lintr @@ -5,7 +5,8 @@ linters: linters_with_defaults( object_usage_linter = NULL, # Stops spurious warnings over imported functions cyclocomp_linter = NULL, indentation_linter = indentation_linter(4), - object_length_linter = object_length_linter(40) + object_length_linter = object_length_linter(40), + return_linter = NULL ) exclusions: list( "design/tests", diff --git a/DESCRIPTION b/DESCRIPTION index 4301c1ce..47cc2219 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,7 +46,8 @@ Imports: methods, digest, posterior, - stats + stats, + splines Suggests: cmdstanr, bayesplot, @@ -123,6 +124,7 @@ Collate: 'external-exports.R' 'jmpost-package.R' 'link_generics.R' + 'populationHR.R' 'settings.R' 'standalone-s3-register.R' 'zzz.R' diff --git a/NAMESPACE b/NAMESPACE index f87a1b51..23b88d2f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -251,6 +251,7 @@ export(linkNone) export(linkShrinkage) export(linkTTG) export(merge) +export(populationHR) export(prior_beta) export(prior_cauchy) export(prior_gamma) @@ -314,6 +315,7 @@ importFrom(Rdpack,reprompt) importFrom(ggplot2,autoplot) importFrom(ggplot2.utils,geom_km) importFrom(glue,as_glue) +importFrom(splines,bs) importFrom(stats,.checkMFClasses) importFrom(stats,acf) importFrom(stats,as.formula) diff --git a/NEWS.md b/NEWS.md index 722532d3..03299589 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # jmpost (development version) +- Included new `populationHR()` function to calculate population effects from a `JointModelSample` object, by marginalising over the patient-level random effects (#447). - Included new `LongitudinalRandomEffects()` function which can be used to extract the patient-level random effects parameter samples from a `JointModelSample` object (#423). - Introduced the `saveObject()` method for `JointModelSample` objects in order to serialise them to disk (#431). - Added support for truncated prior distributions e.g. you can now apply a normal prior to a strictly positive parameter and jmpost will take care of adjusting the density accordingly (#429). diff --git a/R/populationHR.R b/R/populationHR.R new file mode 100644 index 00000000..2769a20e --- /dev/null +++ b/R/populationHR.R @@ -0,0 +1,87 @@ +#' Calculate Population Hazard Ratios +#' +#' Calculates hazard ratios marginalised over subject specific random effects using the +#' approach proposed by \insertCite{oudenhoven2020marginal}{jmpost}. +#' +#' @param object ([`JointModelSamples`]) \cr samples as drawn from a Joint Model. +#' @param hr_formula (`formula`) \cr defines the terms to include in the hazard ratio calculation. +#' By default this uses the right side of the formula used in the survival model. +#' Set to `NULL` not include any terms +#' @param baseline (`formula`) \cr terms to model baseline hazard using variable `time`. +#' Default is a B-spline from [splines]: `~bs(time, df = 10)` +#' @param quantiles (`numeric`) \cr vector of two values in (0, 1) for calculating quantiles from log hazard ratio +#' distributions. +#' +#' @references \insertAllCited{} +#' +#' @returns A list containing a summary of parameter distributions as a `data.frame` and a +#' matrix containing the parameter estimates for each sample. +#' @export +#' @importFrom splines bs +populationHR <- function( + object, + hr_formula = object@data@survival@formula, + baseline = ~ bs(time, df = 10), + quantiles = c(0.025, 0.975) +) { + assert_class(object, "JointModelSamples") + assert_formula(hr_formula) + assert_formula(baseline) + assert_numeric(quantiles, lower = 0, upper = 1, any.missing = FALSE, unique = TRUE) + if (!"time" %in% all.vars(baseline)) stop("baseline formula should include a time term.") + + # Extract the variable names used in the data + subject_var <- object@data@subject@subject + arm_var <- object@data@subject@arm + long_time_var <- all.vars(delete.response(terms(object@data@longitudinal@formula))) + surv_time_var <- all.vars(object@data@survival@formula[[2]][[2]]) + surv_covs <- all.vars(delete.response(terms(hr_formula))) + if (!all(surv_covs %in% colnames(object@data@survival@data))) { + stop("All variables in hr_formula must be in survival data") + } + + + marginal_formula <- stats::reformulate(c( + attr(terms(baseline), "term.labels"), + attr(terms(hr_formula), "term.labels") + )) + + # Get the survival quantities at the observed longitudinal and survival times for each patient: + times_df <- rbind( + stats::setNames(object@data@longitudinal@data[, c(subject_var, long_time_var)], c("subject", "time")), + stats::setNames(object@data@survival@data[, c(subject_var, surv_time_var)], c("subject", "time")) + ) + times_df <- times_df[order(times_df$subject, times_df$time), ] + times_df <- times_df[times_df$time > 0, ] + times_df <- times_df[!duplicated.data.frame(times_df), ] + + # Generate samples of the log hazard log h_i(t) for each patient i at these time points t. + grid_spec <- split(times_df$time, times_df$subject) + log_haz_samples <- SurvivalQuantities( + object, + grid = GridManual(grid_spec), + type = "loghaz" + )@quantities@quantities |> t() + + # Construct \tilde{X} in paper's notation with one row per patient's time + W_df <- dplyr::left_join( + times_df, + stats::setNames(object@data@survival@data[, c(subject_var, surv_covs)], c("subject", surv_covs)), + by = "subject" + ) + W_mat <- model.matrix(marginal_formula, W_df) + + # As model matrix contains baseline and covariates, we don't need the intercept term + # but we want factor variables encoded relative to the intercept + W_mat <- W_mat[, colnames(W_mat) != "(Intercept)", drop = FALSE] + + estimates <- stats::lm.fit(x = W_mat, y = log_haz_samples)$coefficients + tidy_res <- apply(estimates, 1, function(x) { + quantiles <- stats::quantile(x, probs = quantiles) + c(mean = mean(x), median = median(x), quantiles) + }) |> + t() |> + data.frame() + + list(summary = tidy_res, estimates) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index fc1e1d33..2bfd8307 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -28,14 +28,14 @@ reference: contents: - jmpost-package - jmpost-settings - + - title: Data Specification contents: - DataJoint - DataSubject - DataLongitudinal - DataSurvival - + - title: Data Simulation contents: - SimJointData @@ -65,7 +65,7 @@ reference: - prior_std_normal - prior_student_t - prior_init_only - + - title: Longitudinal Model Specification contents: - LongitudinalRandomSlope @@ -73,7 +73,7 @@ reference: - LongitudinalSteinFojo - LongitudinalClaretBruno - LongitudinalModel - + - title: Survival Model Specification contents: - SurvivalExponential @@ -81,8 +81,8 @@ reference: - SurvivalModel - SurvivalWeibullPH - SurvivalGamma - - + + - title: Link Specification contents: - linkDSLD @@ -94,16 +94,16 @@ reference: - Link - LinkComponent - standard-link-user - + - title: Joint Model Specification contents: - JointModel - + - title: Joint Model Fitting contents: - compileStanModel - sampleStanModel - + - title: Postprocessing contents: - JointModelSamples @@ -119,7 +119,8 @@ reference: - autoplot.SurvivalQuantities - brierScore - brierScore.SurvivalQuantities - + - populationHR + - title: Stan Code contents: - Parameter @@ -133,7 +134,7 @@ reference: - write_stan - getParameters - getRandomEffectsNames - + - title: Convenience Functions contents: - initialValues @@ -185,7 +186,7 @@ reference: - as_formula - getPredictionNames - set_limits - + - title: Promises contents: - Promise @@ -194,8 +195,8 @@ reference: - resolvePromise - resolvePromise.Link - resolvePromise.PromiseLinkComponent - - - title: Miscellaneous + + - title: Miscellaneous contents: - STAN_BLOCKS diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib index 78a6bc14..1c300f55 100644 --- a/inst/REFERENCES.bib +++ b/inst/REFERENCES.bib @@ -37,3 +37,17 @@ @article{beyer2020multistate year={2020}, publisher={Wiley Online Library} } + +@article{oudenhoven2020marginal, + author = {van Oudenhoven, Floor M. and Swinkels, Sophie H. N. and Ibrahim, Joseph G. and Rizopoulos, Dimitris}, + title = {A marginal estimate for the overall treatment effect on a survival outcome within the joint modeling framework}, + journal = {Statistics in Medicine}, + volume = {39}, + number = {28}, + pages = {4120-4132}, + keywords = {joint models, marginal estimates, overall treatment effect, survival outcome.}, + doi = {https://doi.org/10.1002/sim.8713}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.8713}, + eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/sim.8713}, + year = {2020} +} diff --git a/man/populationHR.Rd b/man/populationHR.Rd new file mode 100644 index 00000000..5fec3418 --- /dev/null +++ b/man/populationHR.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/populationHR.R +\name{populationHR} +\alias{populationHR} +\title{Calculate Population Hazard Ratios} +\usage{ +populationHR( + object, + hr_formula = object@data@survival@formula, + baseline = ~bs(time, df = 10), + quantiles = c(0.025, 0.975) +) +} +\arguments{ +\item{object}{(\code{\link{JointModelSamples}}) \cr samples as drawn from a Joint Model.} + +\item{hr_formula}{(\code{formula}) \cr defines the terms to include in the hazard ratio calculation. +By default this uses the right side of the formula used in the survival model. +Set to \code{NULL} not include any terms} + +\item{baseline}{(\code{formula}) \cr terms to model baseline hazard using variable \code{time}. +Default is a B-spline from \link{splines}: \code{~bs(time, df = 10)}} + +\item{quantiles}{(\code{numeric}) \cr vector of two values in (0, 1) for calculating quantiles from log hazard ratio +distributions.} +} +\value{ +A list containing a summary of parameter distributions as a \code{data.frame} and a +matrix containing the parameter estimates for each sample. +} +\description{ +Calculates hazard ratios marginalised over subject specific random effects using the +approach proposed by \insertCite{oudenhoven2020marginal}{jmpost}. +} +\references{ +\insertAllCited{} +} diff --git a/tests/testthat/test-populationHR.R b/tests/testthat/test-populationHR.R new file mode 100644 index 00000000..6af4b694 --- /dev/null +++ b/tests/testthat/test-populationHR.R @@ -0,0 +1,103 @@ + +test_data_1 <- ensure_test_data_1() + +test_that("populationHR works as expected for default parameters", { + mp <- test_data_1$jsamples + set.seed(1231) + result <- suppressWarnings(populationHR(object = mp)) + + expect_matrix( + result[[2]], + any.missing = FALSE, + ncols = 200, + nrows = 13 # 10 baseline spline + 3 covariates + ) + + # Difference of beta_cat B and C from SimSurvivalExponential() + expect_equal( + 0.2 - -0.4, + result[[1]][["cov_catC", "mean"]] - result[[1]][["cov_catB", "mean"]], + tolerance = 0.1 + ) + + expect_equal( + 0.2, + result[[1]][["cov_cont", "mean"]], + tolerance = 0.1 + ) + + # Summary calculations are match expectations + summary_stats <- apply(result[[2]], 1, function(x) c(mean(x), quantile(x, c(0.5, 0.025, 0.975)))) |> + t() |> + as.data.frame() + expect_equal( + unname(summary_stats), + unname(result[[1]]) + ) +}) + +test_that("populationHR fails for bad input", { + mp <- test_data_1$jsamples + expect_error(populationHR(object = mp@data), "inherit from class") + expect_error(populationHR(object = mp, hr_formula = "arm"), "formula") + expect_error(populationHR(object = mp, baseline = ~arm), "time term") + expect_error(populationHR(object = mp, hr_formula = ~XX), "All variables") + expect_error(populationHR(object = mp, quantiles = c(-0.3, NA, 2)), "quantiles") +}) + + +test_that("populationHR works as expected for alternative specfications", { + mp <- test_data_1$jsamples + set.seed(1231) + # Arm + continuous covariate + result_arm_cont <- populationHR( + object = mp, + baseline = ~splines::ns(time, df = 5), + hr_formula = ~arm + cov_cont, + quantiles = c(0.05, 0.95) + ) + + expect_matrix( + result_arm_cont[[2]], + any.missing = FALSE, + ncols = 200, + nrows = 7 # 5 baseline spline + 2 covariates + ) + + + ### Summary calculations match expectations + summary_stats <- apply(result_arm_cont[[2]], 1, function(x) c(mean(x), quantile(x, c(0.5, 0.05, 0.95)))) |> + t() |> + as.data.frame() + + expect_equal( + unname(summary_stats), + unname(result_arm_cont[[1]]) + ) + + + # Arm HR only + set.seed(1231) + result_arm <- populationHR( + object = mp, + baseline = ~splines::ns(time, df = 5), + hr_formula = ~arm, + quantiles = c(0.05, 0.95) + ) + + expect_matrix( + result_arm[[2]], + any.missing = FALSE, + ncols = 200, + nrows = 6 # 5 baseline spline + 1 covariates + ) + + # Summary calculations match expectations + summary_stats_arm <- apply(result_arm[[2]], 1, function(x) c(mean(x), quantile(x, c(0.5, 0.05, 0.95)))) |> + t() |> + as.data.frame() + expect_equal( + unname(summary_stats_arm), + unname(result_arm[[1]]) + ) +})