From ba22a5cdb3b772946d2371fa5b25e1a3dbb1cc23 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Wed, 29 Jan 2025 09:03:49 +0000 Subject: [PATCH 1/6] add --- NAMESPACE | 2 + R/cor_diff.R | 109 +++++++++++++++++++++++++++++++++ man/cor_diff.Rd | 34 ++++++++++ tests/testthat/test-cor_diff.R | 7 +++ 4 files changed, 152 insertions(+) create mode 100644 R/cor_diff.R create mode 100644 man/cor_diff.Rd create mode 100644 tests/testthat/test-cor_diff.R diff --git a/NAMESPACE b/NAMESPACE index dc312044..3e783711 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ S3method(pcor_to_cor,matrix) S3method(plot,easycor_test) S3method(plot,easycormatrix) S3method(plot,easycorrelation) +S3method(print,cor_diff) S3method(print,easycormatrix) S3method(print,easycorrelation) S3method(print,easymatrixlist) @@ -38,6 +39,7 @@ S3method(summary,easycorrelation) S3method(visualisation_recipe,easycor_test) S3method(visualisation_recipe,easycormatrix) S3method(visualisation_recipe,easycorrelation) +export(cor_diff) export(cor_lower) export(cor_smooth) export(cor_sort) diff --git a/R/cor_diff.R b/R/cor_diff.R new file mode 100644 index 00000000..730175f0 --- /dev/null +++ b/R/cor_diff.R @@ -0,0 +1,109 @@ +#' Test differences between correlations +#' +#' @description +#' Tests whether the correlation between two variables `x` and `y` is different +#' from the correlation between `x2` and `y2`. +#' +#' `cor_diff()` returns a table containing an index of difference precision (i.e., +#' the estimated difference divided by its standard error) and an associated p-value. +#' A significant p-value indicates that the correlation between `x` and `y` is +#' different from the correlation between `x2` and `y2`. +#' +#' @param data A data frame of observations. +#' @param x,y,x2,y2 The variable names in `data` to be used. +#' @param method Can be `"parametric"` or `"bootstrapping"`. If `"parametric"`, +#' the [psych::r.test()] function is used. If `"bootstrapping"`, a bootstrapping +#' procedure is used. +#' @param ... Other arguments to be passed, for instance `iterations` (default: 1000) +#' if method is bootstrapping. +#' +#' @examples +#' cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width") +#' cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", +#' method = "bootstrapping", iterations = 100) +#' @export +cor_diff <- function(data, x, y, x2, y2, method = "parametric", ...) { + if(method %in% c("bootstrapping")) { + out <- .cor_diff_bootstrapping(data, x, y, x2, y2, ...) + } else { + out <- .cor_diff_parametric(data, x, y, x2, y2, ...) + } + class(out) <- c("cor_diff", class(out)) + out +} + + + +# Methods ----------------------------------------------------------------- + + + +#' @keywords internal +.cor_diff_parametric <- function(data, x, y, x2, y2, ...) { + + insight::check_if_installed("psych", "for 'parametric' correlation difference method") + + args <- list(n = nrow(data), r12 = cor(data[[x]], data[[y]])) + if(x == x2 & y != y2) { + args$r13 <- cor(data[[x]], data[[y2]]) + args$r23 <- cor(data[[y]], data[[y2]]) + } else if(y == y2 & x != x2) { + args$r13 <- cor(data[[y]], data[[x2]]) + args$r23 <- cor(data[[x]], data[[x2]]) + } else { + args$r34 <- cor(data[[x2]], data[[y2]]) + } + test <- do.call(psych::r.test, args) + + out <- data.frame( + Method = "parametric" + ) + if("t" %in% names(test)){ + out$t <- test$t + } else { + out$z <- test$z + } + out$p <- test$p + out +} + +#' @keywords internal +.cor_diff_bootstrapping <- function(data, x, y, x2, y2, iterations = 1000, robust = FALSE, ...) { + diff <- rep(NA, iterations) # Initialize vector + + # Bootstrap + for(i in 1:iterations) { + # Take random sample of data + dat <- data[sample(nrow(data), nrow(data), replace = TRUE), ] + # Compute diff + diff[i] <- cor(dat[[x]], dat[[y]]) - cor(dat[[x2]], dat[[y2]]) + } + + # Summarize + if(robust == FALSE) { + out <- data.frame( + Method = "bootstrapping", + z = mean(diff) / sd(diff), + p = bayestestR::pd_to_p(as.numeric(bayestestR::p_direction(diff))) + ) + } else { + out <- data.frame( + Method = "bootstrapping_robust", + z = median(diff) / mad(diff), + p = bayestestR::pd_to_p(as.numeric(bayestestR::p_direction(diff))) + ) + } + out +} + + + +# Printing ---------------------------------------------------------------- + +#' @export +print.cor_diff <- function(x, ...) { + insight::format_table(x, ...) |> + insight::export_table(title = "Correlation Difference Test") |> + print() + invisible(x) +} diff --git a/man/cor_diff.Rd b/man/cor_diff.Rd new file mode 100644 index 00000000..8591a278 --- /dev/null +++ b/man/cor_diff.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cor_diff.R +\name{cor_diff} +\alias{cor_diff} +\title{Test differences between correlations} +\usage{ +cor_diff(data, x, y, x2, y2, method = "parametric", ...) +} +\arguments{ +\item{data}{A data frame of observations.} + +\item{x, y, x2, y2}{The variable names in \code{data} to be used.} + +\item{method}{Can be \code{"parametric"} or \code{"bootstrapping"}. If \code{"parametric"}, +the \code{\link[psych:r.test]{psych::r.test()}} function is used. If \code{"bootstrapping"}, a bootstrapping +procedure is used.} + +\item{...}{Other arguments to be passed, for instance \code{iterations} (default: 1000) +if method is bootstrapping.} +} +\description{ +Tests whether the correlation between two variables \code{x} and \code{y} is different +from the correlation between \code{x2} and \code{y2}. + +\code{cor_diff()} returns a table containing an index of difference precision (i.e., +the estimated difference divided by its standard error) and an associated p-value. +A significant p-value indicates that the correlation between \code{x} and \code{y} is +different from the correlation between \code{x2} and \code{y2}. +} +\examples{ +cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width") +cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", + method = "bootstrapping", iterations = 100) +} diff --git a/tests/testthat/test-cor_diff.R b/tests/testthat/test-cor_diff.R new file mode 100644 index 00000000..fea85889 --- /dev/null +++ b/tests/testthat/test-cor_diff.R @@ -0,0 +1,7 @@ +test_that("cor_diff", { + expect_equal( + cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width")$t, + -10, + tolerance = 0.001 + ) +}) From a4194f103a5e6b55dea4a7cf78841d562cb1d43d Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Wed, 29 Jan 2025 09:10:18 +0000 Subject: [PATCH 2/6] allow for pairs specification --- R/cor_diff.R | 21 +++++++++++++++++---- man/cor_diff.Rd | 11 +++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/R/cor_diff.R b/R/cor_diff.R index 730175f0..5de161c1 100644 --- a/R/cor_diff.R +++ b/R/cor_diff.R @@ -10,7 +10,8 @@ #' different from the correlation between `x2` and `y2`. #' #' @param data A data frame of observations. -#' @param x,y,x2,y2 The variable names in `data` to be used. +#' @param x,y,x2,y2 The variable names in `data` to be used. `x` and `y` can also +#' be pairs of variables, in which case the second variable is used as `x2` and `y2`. #' @param method Can be `"parametric"` or `"bootstrapping"`. If `"parametric"`, #' the [psych::r.test()] function is used. If `"bootstrapping"`, a bootstrapping #' procedure is used. @@ -18,11 +19,23 @@ #' if method is bootstrapping. #' #' @examples -#' cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width") -#' cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", +#' cor_diff(iris, c("Sepal.Length", "Sepal.Width"), c("Sepal.Length", "Petal.Width")) +#' cor_diff(iris, +#' c("Sepal.Length", "Sepal.Width"), +#' c("Sepal.Length", "Petal.Width"), #' method = "bootstrapping", iterations = 100) #' @export -cor_diff <- function(data, x, y, x2, y2, method = "parametric", ...) { +cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", ...) { + + # If pairs are passed + if(length(x) == 2 & length(y) == 2) { + x2 <- x[2] + y2 <- y[2] + x <- x[1] + y <- y[1] + } + + # Compute if(method %in% c("bootstrapping")) { out <- .cor_diff_bootstrapping(data, x, y, x2, y2, ...) } else { diff --git a/man/cor_diff.Rd b/man/cor_diff.Rd index 8591a278..4059b734 100644 --- a/man/cor_diff.Rd +++ b/man/cor_diff.Rd @@ -4,12 +4,13 @@ \alias{cor_diff} \title{Test differences between correlations} \usage{ -cor_diff(data, x, y, x2, y2, method = "parametric", ...) +cor_diff(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", ...) } \arguments{ \item{data}{A data frame of observations.} -\item{x, y, x2, y2}{The variable names in \code{data} to be used.} +\item{x, y, x2, y2}{The variable names in \code{data} to be used. \code{x} and \code{y} can also +be pairs of variables, in which case the second variable is used as \code{x2} and \code{y2}.} \item{method}{Can be \code{"parametric"} or \code{"bootstrapping"}. If \code{"parametric"}, the \code{\link[psych:r.test]{psych::r.test()}} function is used. If \code{"bootstrapping"}, a bootstrapping @@ -28,7 +29,9 @@ A significant p-value indicates that the correlation between \code{x} and \code{ different from the correlation between \code{x2} and \code{y2}. } \examples{ -cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width") -cor_diff(iris, "Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", +cor_diff(iris, c("Sepal.Length", "Sepal.Width"), c("Sepal.Length", "Petal.Width")) +cor_diff(iris, + c("Sepal.Length", "Sepal.Width"), + c("Sepal.Length", "Petal.Width"), method = "bootstrapping", iterations = 100) } From e69d34bfaf205b2b4f6e0fe5e0e25e48d75ceb59 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Wed, 29 Jan 2025 09:14:59 +0000 Subject: [PATCH 3/6] hotfix --- R/cor_diff.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cor_diff.R b/R/cor_diff.R index 5de161c1..3edea6f9 100644 --- a/R/cor_diff.R +++ b/R/cor_diff.R @@ -29,10 +29,10 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. # If pairs are passed if(length(x) == 2 & length(y) == 2) { - x2 <- x[2] + x2 <- y[1] y2 <- y[2] + y <- x[2] x <- x[1] - y <- y[1] } # Compute From 6dcbffe4f9083f8b4ffa5a587724cfed85885362 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Wed, 29 Jan 2025 09:21:06 +0000 Subject: [PATCH 4/6] && --- R/cor_diff.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/cor_diff.R b/R/cor_diff.R index 3edea6f9..6c8a1aec 100644 --- a/R/cor_diff.R +++ b/R/cor_diff.R @@ -28,7 +28,7 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", ...) { # If pairs are passed - if(length(x) == 2 & length(y) == 2) { + if(length(x) == 2 && length(y) == 2) { x2 <- y[1] y2 <- y[2] y <- x[2] @@ -57,10 +57,10 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. insight::check_if_installed("psych", "for 'parametric' correlation difference method") args <- list(n = nrow(data), r12 = cor(data[[x]], data[[y]])) - if(x == x2 & y != y2) { + if(x == x2 && y != y2) { args$r13 <- cor(data[[x]], data[[y2]]) args$r23 <- cor(data[[y]], data[[y2]]) - } else if(y == y2 & x != x2) { + } else if(y == y2 && x != x2) { args$r13 <- cor(data[[y]], data[[x2]]) args$r23 <- cor(data[[x]], data[[x2]]) } else { From ca80775376fd389daf22f28327bf76855b309f09 Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Wed, 29 Jan 2025 10:13:19 +0000 Subject: [PATCH 5/6] style --- R/cor_diff.R | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/R/cor_diff.R b/R/cor_diff.R index 6c8a1aec..faf8365e 100644 --- a/R/cor_diff.R +++ b/R/cor_diff.R @@ -21,14 +21,14 @@ #' @examples #' cor_diff(iris, c("Sepal.Length", "Sepal.Width"), c("Sepal.Length", "Petal.Width")) #' cor_diff(iris, -#' c("Sepal.Length", "Sepal.Width"), -#' c("Sepal.Length", "Petal.Width"), -#' method = "bootstrapping", iterations = 100) +#' c("Sepal.Length", "Sepal.Width"), +#' c("Sepal.Length", "Petal.Width"), +#' method = "bootstrapping", iterations = 100 +#' ) #' @export cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", ...) { - # If pairs are passed - if(length(x) == 2 && length(y) == 2) { + if (length(x) == 2 && length(y) == 2) { x2 <- y[1] y2 <- y[2] y <- x[2] @@ -36,7 +36,7 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. } # Compute - if(method %in% c("bootstrapping")) { + if (method %in% c("bootstrapping")) { out <- .cor_diff_bootstrapping(data, x, y, x2, y2, ...) } else { out <- .cor_diff_parametric(data, x, y, x2, y2, ...) @@ -53,14 +53,13 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. #' @keywords internal .cor_diff_parametric <- function(data, x, y, x2, y2, ...) { - insight::check_if_installed("psych", "for 'parametric' correlation difference method") args <- list(n = nrow(data), r12 = cor(data[[x]], data[[y]])) - if(x == x2 && y != y2) { + if (x == x2 && y != y2) { args$r13 <- cor(data[[x]], data[[y2]]) args$r23 <- cor(data[[y]], data[[y2]]) - } else if(y == y2 && x != x2) { + } else if (y == y2 && x != x2) { args$r13 <- cor(data[[y]], data[[x2]]) args$r23 <- cor(data[[x]], data[[x2]]) } else { @@ -71,7 +70,7 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. out <- data.frame( Method = "parametric" ) - if("t" %in% names(test)){ + if ("t" %in% names(test)) { out$t <- test$t } else { out$z <- test$z @@ -81,11 +80,11 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. } #' @keywords internal -.cor_diff_bootstrapping <- function(data, x, y, x2, y2, iterations = 1000, robust = FALSE, ...) { - diff <- rep(NA, iterations) # Initialize vector +.cor_diff_bootstrapping <- function(data, x, y, x2, y2, iterations = 1000, robust = FALSE, ...) { + diff <- rep(NA, iterations) # Initialize vector # Bootstrap - for(i in 1:iterations) { + for (i in 1:iterations) { # Take random sample of data dat <- data[sample(nrow(data), nrow(data), replace = TRUE), ] # Compute diff @@ -93,7 +92,7 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. } # Summarize - if(robust == FALSE) { + if (robust == FALSE) { out <- data.frame( Method = "bootstrapping", z = mean(diff) / sd(diff), From 1626307962b71d3ad31c719ce25e9d4a875026ea Mon Sep 17 00:00:00 2001 From: Dominique Makowski Date: Tue, 4 Feb 2025 10:57:06 +0000 Subject: [PATCH 6/6] minor --- R/cor_diff.R | 18 +++++++++--------- man/cor_diff.Rd | 7 ++++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/R/cor_diff.R b/R/cor_diff.R index faf8365e..108f8385 100644 --- a/R/cor_diff.R +++ b/R/cor_diff.R @@ -55,15 +55,15 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. .cor_diff_parametric <- function(data, x, y, x2, y2, ...) { insight::check_if_installed("psych", "for 'parametric' correlation difference method") - args <- list(n = nrow(data), r12 = cor(data[[x]], data[[y]])) + args <- list(n = nrow(data), r12 = stats::cor(data[[x]], data[[y]])) if (x == x2 && y != y2) { - args$r13 <- cor(data[[x]], data[[y2]]) - args$r23 <- cor(data[[y]], data[[y2]]) + args$r13 <- stats::cor(data[[x]], data[[y2]]) + args$r23 <- stats::cor(data[[y]], data[[y2]]) } else if (y == y2 && x != x2) { - args$r13 <- cor(data[[y]], data[[x2]]) - args$r23 <- cor(data[[x]], data[[x2]]) + args$r13 <- stats::cor(data[[y]], data[[x2]]) + args$r23 <- stats::cor(data[[x]], data[[x2]]) } else { - args$r34 <- cor(data[[x2]], data[[y2]]) + args$r34 <- stats::cor(data[[x2]], data[[y2]]) } test <- do.call(psych::r.test, args) @@ -88,20 +88,20 @@ cor_diff <- function(data, x, y, x2 = NULL, y2 = NULL, method = "parametric", .. # Take random sample of data dat <- data[sample(nrow(data), nrow(data), replace = TRUE), ] # Compute diff - diff[i] <- cor(dat[[x]], dat[[y]]) - cor(dat[[x2]], dat[[y2]]) + diff[i] <- stats::cor(dat[[x]], dat[[y]]) - stats::cor(dat[[x2]], dat[[y2]]) } # Summarize if (robust == FALSE) { out <- data.frame( Method = "bootstrapping", - z = mean(diff) / sd(diff), + z = mean(diff) / stats::sd(diff), p = bayestestR::pd_to_p(as.numeric(bayestestR::p_direction(diff))) ) } else { out <- data.frame( Method = "bootstrapping_robust", - z = median(diff) / mad(diff), + z = stats::median(diff) / stats::mad(diff), p = bayestestR::pd_to_p(as.numeric(bayestestR::p_direction(diff))) ) } diff --git a/man/cor_diff.Rd b/man/cor_diff.Rd index 4059b734..71afb1fb 100644 --- a/man/cor_diff.Rd +++ b/man/cor_diff.Rd @@ -31,7 +31,8 @@ different from the correlation between \code{x2} and \code{y2}. \examples{ cor_diff(iris, c("Sepal.Length", "Sepal.Width"), c("Sepal.Length", "Petal.Width")) cor_diff(iris, - c("Sepal.Length", "Sepal.Width"), - c("Sepal.Length", "Petal.Width"), - method = "bootstrapping", iterations = 100) + c("Sepal.Length", "Sepal.Width"), + c("Sepal.Length", "Petal.Width"), + method = "bootstrapping", iterations = 100 +) }