diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 13b91dac..aa9c0e2f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -58,7 +58,7 @@ jobs: - name: Cache R packages if: runner.os != 'Windows' && matrix.config.image == null - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: ${{ env.R_LIBS_USER }} key: ${{ env.cache-version }}-${{ runner.os }}-bioc-${{ matrix.config.bioc }}-${{ hashFiles('depends.Rds') }} diff --git a/DESCRIPTION b/DESCRIPTION index 1237f01b..4d144379 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: einprot Type: Package Title: A collection of proteomics analysis utilities and workflows -Version: 0.9.5 +Version: 0.9.6 Authors@R: c( person("Charlotte", "Soneson", email = "charlotte.soneson@fmi.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3833-2169")), diff --git a/NAMESPACE b/NAMESPACE index 8cabe74c..489f2c58 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(doNormalization) export(doPCA) export(emptySampleText) export(expDesignText) +export(featureCollectionText) export(filterByModText) export(filterFragPipe) export(filterMaxQuant) @@ -20,6 +21,7 @@ export(fixFeatureIds) export(formatTableColumns) export(getCalibrationFrompdAnalysis) export(getColumnNames) +export(getComplexesToPlot) export(getContaminantsDatabaseFrompdAnalysis) export(getConvTable) export(getFirstId) diff --git a/NEWS.md b/NEWS.md index 0dc90624..255115ef 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# einprot 0.9.6 + +* Sort output tables from differential abundance analysis by t-statistics instead of p-value +* Allow ComplexHeatmap::Heatmap arguments to be passed on by plotMissingValuesHeatmap +* Add option to limit plotMissingValuesHeatmap to rows with at least one missing value +* Add option to make imputation plots as density plots instead of histograms +* Add more details regarding the source of feature collections to report +* Add center.median.shared and center.mean.shared normalization methods +* Add maxComplexSimilarity argument to plotVolcano +* Update PomBase and WormBase conversion tables +* Add contamination filtering to Spectronaut (presence of contam_ prefix) + # einprot 0.9.5 * Add filtering by score and number of peptides to Spectronaut workflow diff --git a/R/checkArgumentsDIANN.R b/R/checkArgumentsDIANN.R index 5a9e9115..0c0c3792 100644 --- a/R/checkArgumentsDIANN.R +++ b/R/checkArgumentsDIANN.R @@ -17,7 +17,8 @@ minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr, volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0, volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns, - interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed, + interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, + maxComplexSimilarity, seed, includeFeatureCollections, minSizeToKeepSet, customComplexes, complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns, customYml, doRender @@ -144,7 +145,8 @@ .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE) .assertScalar(x = addHeatmaps, type = "logical") .assertScalar(x = normMethod, type = "character", - validValues = c(MsCoreUtils::normalizeMethods(), "none")) + validValues = c(MsCoreUtils::normalizeMethods(), "none", + "center.mean.shared", "center.median.shared")) .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE) .assertScalar(x = stattest, type = "character", validValues = c("limma", "ttest", "proDA", "none")) @@ -163,6 +165,7 @@ .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1)) .assertScalar(x = maxNbrComplexesToPlot, type = "numeric", rngIncl = c(0, Inf)) + .assertScalar(x = maxComplexSimilarity, type = "numeric") .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf)) .assertVector(x = volcanoFeaturesToLabel, type = "character") .assertVector(x = mergeGroups, type = "list") diff --git a/R/checkArgumentsFragPipe.R b/R/checkArgumentsFragPipe.R index 2da38034..d72f6ec5 100755 --- a/R/checkArgumentsFragPipe.R +++ b/R/checkArgumentsFragPipe.R @@ -16,7 +16,8 @@ minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr, volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0, volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns, - interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed, + interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, + maxComplexSimilarity, seed, includeFeatureCollections, minSizeToKeepSet, customComplexes, complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns, customYml, doRender @@ -149,7 +150,8 @@ .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE) .assertScalar(x = addHeatmaps, type = "logical") .assertScalar(x = normMethod, type = "character", - validValues = c(MsCoreUtils::normalizeMethods(), "none")) + validValues = c(MsCoreUtils::normalizeMethods(), "none", + "center.mean.shared", "center.median.shared")) .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE) .assertScalar(x = stattest, type = "character", validValues = c("limma", "ttest", "proDA", "none")) @@ -168,6 +170,7 @@ .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1)) .assertScalar(x = maxNbrComplexesToPlot, type = "numeric", rngIncl = c(0, Inf)) + .assertScalar(x = maxComplexSimilarity, type = "numeric") .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf)) .assertVector(x = volcanoFeaturesToLabel, type = "character") .assertVector(x = mergeGroups, type = "list") diff --git a/R/checkArgumentsMaxQuant.R b/R/checkArgumentsMaxQuant.R index ecf13df2..9f1d4d9b 100644 --- a/R/checkArgumentsMaxQuant.R +++ b/R/checkArgumentsMaxQuant.R @@ -16,7 +16,8 @@ minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr, volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0, volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns, - interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed, + interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, + maxComplexSimilarity, seed, includeFeatureCollections, minSizeToKeepSet, customComplexes, complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns, customYml, doRender @@ -134,7 +135,8 @@ .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE) .assertScalar(x = addHeatmaps, type = "logical") .assertScalar(x = normMethod, type = "character", - validValues = c(MsCoreUtils::normalizeMethods(), "none")) + validValues = c(MsCoreUtils::normalizeMethods(), "none", + "center.mean.shared", "center.median.shared")) .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE) .assertScalar(x = stattest, type = "character", validValues = c("limma", "ttest", "proDA", "none")) @@ -153,6 +155,7 @@ .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1)) .assertScalar(x = maxNbrComplexesToPlot, type = "numeric", rngIncl = c(0, Inf)) + .assertScalar(x = maxComplexSimilarity, type = "numeric") .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf)) .assertVector(x = volcanoFeaturesToLabel, type = "character") .assertVector(x = mergeGroups, type = "list") diff --git a/R/checkArgumentsPDTMT.R b/R/checkArgumentsPDTMT.R index 245209ad..8471d263 100644 --- a/R/checkArgumentsPDTMT.R +++ b/R/checkArgumentsPDTMT.R @@ -19,7 +19,8 @@ minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr, volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0, volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns, - interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed, + interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, + maxComplexSimilarity, seed, includeFeatureCollections, minSizeToKeepSet, customComplexes, complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns, customYml, doRender @@ -157,7 +158,8 @@ .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE) .assertScalar(x = addHeatmaps, type = "logical") .assertScalar(x = normMethod, type = "character", - validValues = c(MsCoreUtils::normalizeMethods(), "none")) + validValues = c(MsCoreUtils::normalizeMethods(), "none", + "center.mean.shared", "center.median.shared")) .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE) .assertScalar(x = stattest, type = "character", validValues = c("limma", "ttest", "proDA", "none")) @@ -176,6 +178,7 @@ .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1)) .assertScalar(x = maxNbrComplexesToPlot, type = "numeric", rngIncl = c(0, Inf)) + .assertScalar(x = maxComplexSimilarity, type = "numeric") .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf)) .assertVector(x = volcanoFeaturesToLabel, type = "character") .assertVector(x = mergeGroups, type = "list") diff --git a/R/checkArgumentsSpectronaut.R b/R/checkArgumentsSpectronaut.R index fb37fbc9..62052f5a 100644 --- a/R/checkArgumentsSpectronaut.R +++ b/R/checkArgumentsSpectronaut.R @@ -18,7 +18,8 @@ minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr, volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0, volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns, - interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed, + interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, + maxComplexSimilarity, seed, includeFeatureCollections, minSizeToKeepSet, customComplexes, complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns, customYml, doRender @@ -147,7 +148,8 @@ .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE) .assertScalar(x = addHeatmaps, type = "logical") .assertScalar(x = normMethod, type = "character", - validValues = c(MsCoreUtils::normalizeMethods(), "none")) + validValues = c(MsCoreUtils::normalizeMethods(), "none", + "center.mean.shared", "center.median.shared")) .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE) .assertScalar(x = stattest, type = "character", validValues = c("limma", "ttest", "proDA", "none")) @@ -166,6 +168,7 @@ .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1)) .assertScalar(x = maxNbrComplexesToPlot, type = "numeric", rngIncl = c(0, Inf)) + .assertScalar(x = maxComplexSimilarity, type = "numeric") .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf)) .assertVector(x = volcanoFeaturesToLabel, type = "character") .assertVector(x = mergeGroups, type = "list") diff --git a/R/constants.R b/R/constants.R index 90804642..e6152735 100644 --- a/R/constants.R +++ b/R/constants.R @@ -18,7 +18,7 @@ NULL EINPROT_COMPLEXES_FILE <- "extdata/complexes/complexdb_einprot0.9.3_20240328_orthologs.rds" #' @export #' @rdname constants -EINPROT_WORMBASE_CONVTABLE <- "extdata/conversion_tables/WormBaseConv_einprot0.5.0_20220211.rds" +EINPROT_WORMBASE_CONVTABLE <- "extdata/conversion_tables/WormBaseConv_einprot0.9.6_20241018.rds" #' @export #' @rdname constants -EINPROT_POMBASE_CONVTABLE <- "extdata/conversion_tables/PomBaseConv_einprot0.5.0_20220211.rds" +EINPROT_POMBASE_CONVTABLE <- "extdata/conversion_tables/PomBaseConv_einprot0.9.6_20241018.rds" diff --git a/R/doFilter.R b/R/doFilter.R index 75351a1b..689e361b 100644 --- a/R/doFilter.R +++ b/R/doFilter.R @@ -550,6 +550,10 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE, #' expression) used to identify decoys (reverse hits). The pattern is #' matched against the IDs in the Spectronaut \code{PG.ProteinGroups} #' column. +#' @param contamPattern Character scalar providing the pattern (a regular +#' expression) used to identify contaminants. The pattern is +#' matched against the IDs in the Spectronaut \code{PG.ProteinGroups} +#' column. #' @param exclFile Character scalar, the path to a text file where the #' features that are filtered out are written. If \code{NULL} (default), #' excluded features are not recorded. @@ -562,22 +566,27 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE, #' @importFrom rlang .data #' filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE, - revPattern = "_Decoy$", exclFile = NULL) { + revPattern = "_Decoy$", + contamPattern = "^contam_", exclFile = NULL) { .assertVector(x = sce, type = "SummarizedExperiment") .assertScalar(x = minScore, type = "numeric", allowNULL = TRUE) .assertScalar(x = minPeptides, type = "numeric", allowNULL = TRUE) .assertScalar(x = plotUpset, type = "logical") .assertScalar(x = revPattern, type = "character") + .assertScalar(x = contamPattern, type = "character") .assertScalar(x = exclFile, type = "character", allowNULL = TRUE) ## Make sure that the columns used for filtering later are character vectors rowData(sce)$Reverse <- ifelse(grepl(revPattern, rowData(sce)$PG.ProteinGroups), "+", "") + rowData(sce)$Contaminant <- ifelse(grepl(contamPattern, + rowData(sce)$PG.ProteinGroups), + "+", "") filtdf <- as.data.frame(SummarizedExperiment::rowData(sce)) %>% dplyr::select(dplyr::any_of(c("Reverse", "PG.NrOfStrippedSequencesIdentified.Experiment.wide", - "PG.Cscore"))) %>% - dplyr::mutate(across(dplyr::any_of(c("Reverse")), + "PG.Cscore", "Contaminant"))) %>% + dplyr::mutate(across(dplyr::any_of(c("Reverse", "Contaminant")), function(x) as.numeric(x == "+"))) if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(filtdf) && !is.null(minPeptides)) { @@ -601,9 +610,9 @@ filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE, if ("Reverse" %in% colnames(rowData(sce))) { keep <- intersect(keep, which(rowData(sce)$Reverse == "")) } - # if ("Potential.contaminant" %in% colnames(rowData(sce))) { - # keep <- intersect(keep, which(rowData(sce)$Potential.contaminant == "")) - # } + if ("Contaminant" %in% colnames(rowData(sce))) { + keep <- intersect(keep, which(rowData(sce)$Contaminant == "")) + } if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(rowData(sce)) && !is.null(minPeptides)) { keep <- intersect( diff --git a/R/doNormalization.R b/R/doNormalization.R index a523bcdb..1fcad555 100644 --- a/R/doNormalization.R +++ b/R/doNormalization.R @@ -5,7 +5,10 @@ #' #' @param sce A \code{SummarizedExperiment} object (or a derivative). #' @param method Character scalar giving the normalization method. Currently, -#' the methods from \code{MsCoreUtils::normalizeMethods()} are supported. +#' the methods from \code{MsCoreUtils::normalizeMethods()} are supported, +#' together with "center.mean.shared" and "center.median.shared", +#' subtracting the mean or median, respectively, across features that are +#' observed in all samples. #' If \code{spikeFeatures} is not \code{NULL}, only #' \code{"center.mean"}, \code{"center.median"}, \code{"div.mean"} and #' \code{"div.median"} are supported. @@ -56,7 +59,9 @@ doNormalization <- function(sce, method, assayName, normalizedAssayName, spikeFeatures = NULL) { .assertVector(x = sce, type = "SummarizedExperiment") .assertScalar(x = method, type = "character", - validValues = MsCoreUtils::normalizeMethods()) + validValues = c(MsCoreUtils::normalizeMethods(), + "center.mean.shared", + "center.median.shared")) .assertScalar(x = assayName, type = "character", validValues = SummarizedExperiment::assayNames(sce)) .assertScalar(x = normalizedAssayName, type = "character") @@ -94,6 +99,22 @@ doNormalization <- function(sce, method, assayName, normalizedAssayName, assayOut <- MsCoreUtils::normalize_matrix(assayIn, method = method) + } else if (method == "center.median.shared") { + idx <- which(rowSums(is.na(assayIn)) == 0) + if (length(idx) == 0) { + stop("No features observed in all samples") + } + assayOut <- sweep(assayIn, MARGIN = 2, + STATS = apply(assayIn[idx, , drop = FALSE], 2, stats::median), + FUN = "-") + } else if (method == "center.mean.shared") { + idx <- which(rowSums(is.na(assayIn)) == 0) + if (length(idx) == 0) { + stop("No features observed in all samples") + } + assayOut <- sweep(assayIn, MARGIN = 2, + STATS = apply(assayIn[idx, , drop = FALSE], 2, mean), + FUN = "-") } else { ## Should never end up here as we check the validity of method above #nocov start diff --git a/R/plotImputation.R b/R/plotImputation.R index fd076e7e..b2411e69 100644 --- a/R/plotImputation.R +++ b/R/plotImputation.R @@ -9,6 +9,8 @@ #' @param assayImputation Character scalar indicating the name of a #' logical assay of \code{sce} to use for filling the distribution plots. #' @param xlab Character scalar providing the x-axis label for the plot. +#' @param plotType Character scalar indicating the type of plot to make +#' (either "histogram" or "density"). #' #' @export #' @author Charlotte Soneson @@ -30,13 +32,15 @@ #' @importFrom rlang .data #' plotImputationDistribution <- function(sce, assayToPlot, assayImputation, - xlab = "") { + xlab = "", plotType = "histogram") { .assertVector(x = sce, type = "SummarizedExperiment") .assertScalar(x = assayToPlot, type = "character", validValues = SummarizedExperiment::assayNames(sce)) .assertScalar(x = assayImputation, type = "character", validValues = SummarizedExperiment::assayNames(sce)) .assertScalar(x = xlab, type = "character") + .assertScalar(x = plotType, type = "character", + validValues = c("histogram", "density")) plotdf <- as.data.frame( SummarizedExperiment::assay(sce, assayToPlot)) %>% @@ -49,11 +53,26 @@ plotImputationDistribution <- function(sce, assayToPlot, assayImputation, tidyr::gather(key = "sample", value = "imputed", -"pid"), by = c("pid", "sample") ) - ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity, - fill = .data$imputed)) + - ggplot2::geom_histogram(bins = 50) + - ggplot2::facet_wrap(~ sample) + - ggplot2::theme_bw() + ggplot2::labs(x = xlab) + - ggplot2::scale_fill_manual(values = c(`TRUE` = "grey", - `FALSE` = "firebrick1")) + if (plotType == "histogram") { + ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity, + fill = .data$imputed)) + + ggplot2::geom_histogram(bins = 50) + + ggplot2::facet_wrap(~ sample) + + ggplot2::theme_bw() + ggplot2::labs(x = xlab) + + ggplot2::scale_fill_manual(values = c(`TRUE` = "grey", + `FALSE` = "firebrick1")) + } else if (plotType == "density") { + ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity, + color = .data$imputed)) + + ggplot2::geom_density(linewidth = 1.5) + + ggplot2::facet_wrap(~ sample) + + ggplot2::theme_bw() + ggplot2::labs(x = xlab) + + ggplot2::scale_color_manual(values = c(`TRUE` = "grey", + `FALSE` = "firebrick1")) + } else { + ## Should never end up here as the parameter is checked above + #nocov start + stop("Unknown value of the plotType parameter") + #nocov end + } } diff --git a/R/plotMissingValues.R b/R/plotMissingValues.R index ba63f499..12a295fe 100644 --- a/R/plotMissingValues.R +++ b/R/plotMissingValues.R @@ -6,8 +6,16 @@ #' @param sce A \code{SummarizedExperiment} object. #' @param assayMissing Character scalar indicating the name of a #' logical assay of \code{sce} representing the missingness pattern. -#' \code{"FALSE"} entries should represent observed values, while -#' \code{"TRUE"} entries represent missing values. +#' \code{FALSE} entries should represent observed values, while +#' \code{TRUE} entries represent missing values. +#' @param onlyRowsWithMissing Logical scalar indicating whether to only +#' include rows with at least one missing (\code{TRUE}) value. +#' @param settings Character scalar or \code{NULL}. Setting this to +#' \code{"clustered"} creates a heatmap with rows and columns +#' clustered (used in the \code{einprot} report). +#' Setting it to \code{NULL} allows any argument to be passed to +#' \code{ComplexHeatmap::Heatmap} via the \code{...} argument. +#' @param ... Additional arguments passed to \code{ComplexHeatmap::Heatmap}. #' #' @export #' @author Charlotte Soneson @@ -29,21 +37,43 @@ #' @importFrom ComplexHeatmap Heatmap #' @importFrom SummarizedExperiment assay assayNames #' -plotMissingValuesHeatmap <- function(sce, assayMissing) { +plotMissingValuesHeatmap <- function(sce, assayMissing, + onlyRowsWithMissing = FALSE, + settings = "clustered", ...) { .assertVector(x = sce, type = "SummarizedExperiment") .assertScalar(x = assayMissing, type = "character", validValues = SummarizedExperiment::assayNames(sce)) if (any(is.na(SummarizedExperiment::assay(sce, assayMissing)))) { stop("Assay contains missing values") } + .assertScalar(x = onlyRowsWithMissing, type = "logical") + .assertScalar(x = settings, type = "character", validValues = "clustered", + allowNULL = TRUE) - col_fun <- circlize::colorRamp2(c(0, 1), c("grey50", "white")) - ComplexHeatmap::Heatmap( - SummarizedExperiment::assay(sce, assayMissing) + 0, - col = col_fun, name = "imputed", - column_title = "Missing value pattern (white = missing)", - cluster_rows = TRUE, cluster_columns = TRUE, show_row_names = FALSE, - show_heatmap_legend = FALSE) + ## rows to plot + if (onlyRowsWithMissing) { + idx <- which(rowSums(SummarizedExperiment::assay(sce, assayMissing)) > 0) + } else { + idx <- seq(nrow(sce)) + } + if (!is.null(settings) && settings == "clustered") { + col_fun <- circlize::colorRamp2(c(0, 1), c("grey50", "white")) + ComplexHeatmap::Heatmap( + SummarizedExperiment::assay(sce, assayMissing)[idx, ] + 0, + col = col_fun, name = "imputed", + column_title = "Missing value pattern (white = missing)", + cluster_rows = TRUE, cluster_columns = TRUE, show_row_names = FALSE, + show_heatmap_legend = FALSE) + } else if (is.null(settings)) { + ComplexHeatmap::Heatmap( + SummarizedExperiment::assay(sce, assayMissing)[idx, ] + 0, ... + ) + } else { + ## Should never end up here as the parameter is checked above + #nocov start + stop("Unknown value of the settings parameter") + #nocov end + } } #' Plot detection rate per sample diff --git a/R/plotVolcano.R b/R/plotVolcano.R index 83144597..9412af14 100644 --- a/R/plotVolcano.R +++ b/R/plotVolcano.R @@ -1,3 +1,74 @@ +#' @author Charlotte Soneson +#' @noRd +#' @keywords internal +.getSetSimilarity <- function(set1, set2, method) { + if (method == "jaccard") { + length(intersect(set1, set2)) / length(union(set1, set2)) + } else { + stop("Unknown similarity method") + } +} + +#' Determine which complexes to create plots for +#' +#' @param featureCollections A \code{CharacterList} with feature set +#' definitions. +#' @param complexCandidates A character vector with feature set names that are +#' candidates for plotting. More significant candidates should appear +#' earlier in the vector. +#' @param maxNbrComplexesToPlot A numeric scalar indicating the maximum +#' number of complexes that can be selected. +#' @param maxComplexSimilarity A numeric scalar. If a complex C is more +#' similar than \code{maxComplexSimilarity} to a complex earlier in the +#' vector, which has been selected for plotting, C will not be selected. +#' +#' @returns A character vector with feature sets from \code{complexCandidates} +#' selected for plotting. +#' +#' @author Charlotte Soneson +#' @export +getComplexesToPlot <- function(featureCollections, + complexCandidates, + maxNbrComplexesToPlot, + maxComplexSimilarity) { + .assertVector(x = featureCollections, type = "CharacterList") + .assertVector(x = complexCandidates, type = "character") + .assertScalar(x = maxNbrComplexesToPlot, type = "numeric", + rngIncl = c(0, Inf)) + .assertScalar(x = maxComplexSimilarity, type = "numeric") + ## Select final list of complexes to plot. + ## Go through complexes - if encountering one with similarity + ## above maxComplexSimilarity with any previous included complex, + ## don't include it. + cplxs <- c() + i <- 1 + while ((length(cplxs) < min(length(complexCandidates), + maxNbrComplexesToPlot)) && + i <= length(complexCandidates)) { + if (length(cplxs) > 0) { + include <- TRUE + j <- 1 + while(include && j <= length(cplxs)) { + if (.getSetSimilarity( + featureCollections[[cplxs[j]]], + featureCollections[[complexCandidates[i]]], + method = "jaccard") > + maxComplexSimilarity) { + include <- FALSE + } + j <- j + 1 + } + if (include) { + cplxs <- c(cplxs, complexCandidates[i]) + } + } else { + cplxs <- c(cplxs, complexCandidates[i]) + } + i <- i + 1 + } + cplxs +} + #' @author Charlotte Soneson #' @noRd #' @keywords internal @@ -304,6 +375,11 @@ #' to be considered significant. #' @param maxNbrComplexesToPlot Numeric scalar, the largest number of #' significant complexes to generate separate volcano plots for. +#' @param maxComplexSimilarity Numeric scalar. If a significant complex C has a +#' Jaccard similarity exceeding this value with a complex for which a +#' volcano plot has already been generated, no volcano plot will be +#' generated for C. The default value is 1, which means that no complex +#' will be excluded. #' @param curveparam List with curve parameters for creating the Perseus-like #' significance curves in the volcano plots. #' @param abundanceColPat Character vector providing the column patterns used @@ -365,6 +441,7 @@ plotVolcano <- function(sce, res, testType, xv = NULL, yv = NULL, xvma = NULL, comparisonString, groupComposition = NULL, stringDb = NULL, featureCollections = list(), complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = list(), abundanceColPat = "", xlab = "log2(fold change)", ylab = "-log10(p-value)", xlabma = "Average abundance", @@ -427,6 +504,7 @@ plotVolcano <- function(sce, res, testType, xv = NULL, yv = NULL, xvma = NULL, .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1)) .assertScalar(x = maxNbrComplexesToPlot, type = "numeric", rngIncl = c(0, Inf)) + .assertScalar(x = maxComplexSimilarity, type = "numeric") .assertVector(x = curveparam, type = "list") .assertScalar(x = xlab, type = "character") .assertScalar(x = ylab, type = "character") @@ -721,8 +799,12 @@ plotVolcano <- function(sce, res, testType, xv = NULL, yv = NULL, xvma = NULL, tmpcomplx <- tmpcomplx[order(tmpcomplx[paste0(comparisonString, "_PValue")]), , drop = FALSE] - cplxs <- rownames(tmpcomplx) - cplxs <- cplxs[seq_len(min(length(cplxs), maxNbrComplexesToPlot))] + cplxs <- getComplexesToPlot( + featureCollections = featureCollections[[ftype]], + complexCandidates = rownames(tmpcomplx), + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity) + # cplxs <- cplxs[seq_len(min(length(cplxs), maxNbrComplexesToPlot))] if (length(cplxs) > 0 && !is.null(baseFileName)) { grDevices::pdf(paste0(baseFileName, "_", ftype, ".pdf"), diff --git a/R/runDIANNAnalysis.R b/R/runDIANNAnalysis.R index db6a0744..55253a49 100644 --- a/R/runDIANNAnalysis.R +++ b/R/runDIANNAnalysis.R @@ -143,7 +143,7 @@ runDIANNAnalysis <- function( volcanoS0 = 0.1, volcanoFeaturesToLabel = "", addInteractiveVolcanos = FALSE, interactiveDisplayColumns = NULL, interactiveGroupColumn = NULL, complexFDRThr = 0.1, - maxNbrComplexesToPlot = Inf, seed = 42, + maxNbrComplexesToPlot = Inf, maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, customComplexes = list(), complexSpecies = "all", complexDbPath = NULL, stringVersion = "11.5", @@ -200,7 +200,8 @@ runDIANNAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, @@ -255,6 +256,7 @@ runDIANNAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, + maxComplexSimilarity = maxComplexSimilarity, maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, diff --git a/R/runFragPipeAnalysis.R b/R/runFragPipeAnalysis.R index 0daeb3c8..225ca2e0 100755 --- a/R/runFragPipeAnalysis.R +++ b/R/runFragPipeAnalysis.R @@ -94,7 +94,8 @@ runFragPipeAnalysis <- function( volcanoS0 = 0.1, volcanoFeaturesToLabel = "", addInteractiveVolcanos = FALSE, interactiveDisplayColumns = NULL, interactiveGroupColumn = NULL, complexFDRThr = 0.1, - maxNbrComplexesToPlot = Inf, seed = 42, includeFeatureCollections = c(), + maxNbrComplexesToPlot = Inf, maxComplexSimilarity = 1, seed = 42, + includeFeatureCollections = c(), minSizeToKeepSet = 2, customComplexes = list(), complexSpecies = "all", complexDbPath = NULL, stringVersion = "11.5", stringDir = NULL, linkTableColumns = c(), customYml = NULL, doRender = TRUE @@ -149,7 +150,8 @@ runFragPipeAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, @@ -201,7 +203,8 @@ runFragPipeAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, diff --git a/R/runMaxQuantAnalysis.R b/R/runMaxQuantAnalysis.R index 5efa7119..d8724296 100644 --- a/R/runMaxQuantAnalysis.R +++ b/R/runMaxQuantAnalysis.R @@ -160,6 +160,11 @@ #' @param maxNbrComplexesToPlot Numeric, the maximum number of significant #' complexes for which to make separate volcano plots. Defaults to #' \code{Inf}, i.e., no limit. +#' @param maxComplexSimilarity Numeric scalar. If a significant complex C has a +#' Jaccard similarity exceeding this value with a complex for which a +#' volcano plot has already been generated, no volcano plot will be +#' generated for C. The default value is 1, which means that no complex +#' will be excluded. #' @param seed Numeric, random seed to use for any non-deterministic #' calculations. #' @param includeFeatureCollections Character vector, a subset of @@ -274,7 +279,7 @@ runMaxQuantAnalysis <- function( volcanoS0 = 0.1, volcanoFeaturesToLabel = "", addInteractiveVolcanos = FALSE, interactiveDisplayColumns = NULL, interactiveGroupColumn = NULL, complexFDRThr = 0.1, - maxNbrComplexesToPlot = Inf, seed = 42, + maxNbrComplexesToPlot = Inf, maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, customComplexes = list(), complexSpecies = "all", complexDbPath = NULL, stringVersion = "11.5", @@ -331,7 +336,8 @@ runMaxQuantAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, @@ -385,7 +391,8 @@ runMaxQuantAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, diff --git a/R/runPDTMTAnalysis.R b/R/runPDTMTAnalysis.R index 7f6cf5b5..b703e463 100644 --- a/R/runPDTMTAnalysis.R +++ b/R/runPDTMTAnalysis.R @@ -141,7 +141,7 @@ runPDTMTAnalysis <- function( volcanoS0 = 0.1, volcanoFeaturesToLabel = "", addInteractiveVolcanos = FALSE, interactiveDisplayColumns = NULL, interactiveGroupColumn = NULL, complexFDRThr = 0.1, - maxNbrComplexesToPlot = 10, seed = 42, + maxNbrComplexesToPlot = 10, maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, customComplexes = list(), complexSpecies = "all", complexDbPath = NULL, stringVersion = "11.5", @@ -206,7 +206,8 @@ runPDTMTAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, @@ -267,7 +268,8 @@ runPDTMTAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, diff --git a/R/runSpectronautAnalysis.R b/R/runSpectronautAnalysis.R index 8e044028..4c0c904e 100644 --- a/R/runSpectronautAnalysis.R +++ b/R/runSpectronautAnalysis.R @@ -92,7 +92,7 @@ runSpectronautAnalysis <- function( volcanoS0 = 0.1, volcanoFeaturesToLabel = "", addInteractiveVolcanos = FALSE, interactiveDisplayColumns = NULL, interactiveGroupColumn = NULL, complexFDRThr = 0.1, - maxNbrComplexesToPlot = Inf, seed = 42, + maxNbrComplexesToPlot = Inf, maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, customComplexes = list(), complexSpecies = "all", complexDbPath = NULL, stringVersion = "11.5", @@ -150,7 +150,8 @@ runSpectronautAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, @@ -206,7 +207,8 @@ runSpectronautAnalysis <- function( interactiveDisplayColumns = interactiveDisplayColumns, interactiveGroupColumn = interactiveGroupColumn, complexFDRThr = complexFDRThr, - maxNbrComplexesToPlot = maxNbrComplexesToPlot, seed = seed, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, seed = seed, includeFeatureCollections = includeFeatureCollections, minSizeToKeepSet = minSizeToKeepSet, customComplexes = customComplexes, complexSpecies = complexSpecies, diff --git a/R/runTests.R b/R/runTests.R index 34ebecc5..8b5784ec 100644 --- a/R/runTests.R +++ b/R/runTests.R @@ -721,7 +721,7 @@ runTest <- function(sce, comparisons, groupComposition = NULL, testType, if (!is.null(baseFileName)) { write.table(res %>% dplyr::filter(.data$showInVolcano) %>% - dplyr::arrange(.data$P.Value), + dplyr::arrange(dplyr::desc(.data[[camerastat]])), file = paste0(baseFileName, "_testres_", comparisonName, ".txt"), row.names = FALSE, col.names = TRUE, diff --git a/R/seqLogoApp.R b/R/seqLogoApp.R index d0fe118c..3b767542 100644 --- a/R/seqLogoApp.R +++ b/R/seqLogoApp.R @@ -94,22 +94,30 @@ seqLogoApp <- function(seqTableCsv, "xlsx files containing the retained rows of the table.\n\n")) seqlogo <- shiny::reactive({ - ## Replace ggseqlogo (removed from CRAN) with motifStack - seqs <- df[input$seqtable_rows_all, "seqWindow"] - pfm <- motifStack::pcm2pfm(Biostrings::consensusMatrix( - Biostrings::AAStringSet(seqs))) - pfm <- new("pfm", mat = pfm, name = "", - color = motifStack::colorset(alphabet = "AA", - colorScheme = "chemistry")) - pfm - # ggseqlogo::ggseqlogo(df[input$seqtable_rows_all, "seqWindow"], - # seq_type = "aa") + - # ggseqlogo::theme_logo(base_size = 15) + if (is.null(input$seqtable_rows_all)) { + NULL + } else { + ## Replace ggseqlogo (removed from CRAN) with motifStack + seqs <- df[input$seqtable_rows_all, "seqWindow"] + pfm <- motifStack::pcm2pfm(Biostrings::consensusMatrix( + Biostrings::AAStringSet(seqs))) + pfm <- new("pfm", mat = pfm, name = "", + color = motifStack::colorset(alphabet = "AA", + colorScheme = "chemistry")) + pfm + # ggseqlogo::ggseqlogo(df[input$seqtable_rows_all, "seqWindow"], + # seq_type = "aa") + + # ggseqlogo::theme_logo(base_size = 15) + } }) output$seqlogo <- shiny::renderPlot({ - motifStack::plotMotifLogo(seqlogo(), font = "sans", fontface = "plain") - ## With ggseqlogo - # seqlogo() + if (!is(seqlogo(), "pfm")) { + NULL + } else { + motifStack::plotMotifLogo(seqlogo(), font = "sans", fontface = "plain") + ## With ggseqlogo + # seqlogo() + } }) output$dl <- shiny::downloadHandler( @@ -129,8 +137,11 @@ seqLogoApp <- function(seqTableCsv, plotfile <- paste0(exportName, "-seqlogo-", timestamp, ".pdf") xlsxfile <- paste0(exportName, "-seqlogo-", timestamp, ".xlsx") csvfile <- paste0(exportName, "-seqlogo-", timestamp, ".csv") - ggplot2::ggsave(seqlogo(), file = plotfile, - width = 8, height = 5) + pdf(plotfile, width = 8, height = 5) + motifStack::plotMotifLogo(seqlogo(), font = "sans", fontface = "plain") + dev.off() + # ggplot2::ggsave(seqlogo(), file = plotfile, + # width = 8, height = 5) writexl::write_xlsx(df[input$seqtable_rows_all, ], path = xlsxfile) utils::write.table(df[input$seqtable_rows_all, ], diff --git a/R/textSnippets.R b/R/textSnippets.R index 30493e50..ac3fd2cb 100644 --- a/R/textSnippets.R +++ b/R/textSnippets.R @@ -295,3 +295,28 @@ inputText <- function(expTypeLevel) { "object. ") } } + +#' @rdname textSnippets +#' @export +featureCollectionText <- function(featureCollections) { + if (length(featureCollections) == 0) { + out <- paste0("No feature collections were tested.") + } else { + out <- "" + if ("complexes" %in% featureCollections) { + out <- paste0(out, "Complexes are obtained from Corum, CYC2008, ", + "PomBase, HuMAP2 and the Complex Portal (see ", + "the `makeComplexDB` function for more details). ") + } + if ("GO" %in% featureCollections) { + out <- paste0(out, "GO terms are obtained from MSigDB. ") + } + if ("pathways" %in% featureCollections) { + out <- paste0(out, "Pathway information is obtained from ", + "BIOCARTA, KEGG, PID, REACTOME and WIKIPATHWAYS, ", + "via MSigDB (see the `prepareFeatureCollections` ", + "function for more details). ") + } + } + out +} diff --git a/README.md b/README.md index 010163b7..12c35890 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,12 @@ or Proteome Discoverer (TMT-labelled). ## Installation You can install the development version of `einprot` from -[GitHub](https://github.com/fmicompbio/einprot) with: +[GitHub](https://github.com/fmicompbio/einprot), e.g. using +[BiocManager](https://cran.r-project.org/web/packages/BiocManager/index.html): ``` r -install.packages("remotes") -remotes::install_github("fmicompbio/einprot") +install.packages("BiocManager") +BiocManager::install("fmicompbio/einprot") ``` ## Usage diff --git a/inst/extdata/conversion_tables/PomBaseConv_einprot0.9.6_20241018.rds b/inst/extdata/conversion_tables/PomBaseConv_einprot0.9.6_20241018.rds new file mode 100644 index 00000000..653b69a0 Binary files /dev/null and b/inst/extdata/conversion_tables/PomBaseConv_einprot0.9.6_20241018.rds differ diff --git a/inst/extdata/conversion_tables/WormBaseConv_einprot0.9.6_20241018.rds b/inst/extdata/conversion_tables/WormBaseConv_einprot0.9.6_20241018.rds new file mode 100644 index 00000000..cf77062a Binary files /dev/null and b/inst/extdata/conversion_tables/WormBaseConv_einprot0.9.6_20241018.rds differ diff --git a/inst/extdata/process_basic_template.Rmd b/inst/extdata/process_basic_template.Rmd index 023afefb..8801b972 100644 --- a/inst/extdata/process_basic_template.Rmd +++ b/inst/extdata/process_basic_template.Rmd @@ -71,7 +71,9 @@ of the data is provided via [principal component analysis](#run-pca). If you are using the results from this report in published work, please cite @Soneson2023einprot, as well as the underlying packages that are used for the -analysis (indicated in the text). +analysis (indicated in the text). Documentation of all `einprot` functions, +as well as a vignette describing typical use cases in more detail, are +available from [GitHub](https://fmicompbio.github.io/einprot/). ```{r get-basic-info} ## Get species info and define STRINGdb object @@ -222,6 +224,7 @@ settingsList <- list( "Custom complexes" = paste(names(customComplexes), collapse = ";"), "FDR Threshold for complexes" = complexFDRThr, "Max nbr complexes to plot" = maxNbrComplexesToPlot, + "Max complex similarity to plot" = maxComplexSimilarity, "Number of permutations" = nperm, "Random seed" = seed, "User-defined feature annotation columns" = paste(names(extraFeatureCols), collapse = ";"), @@ -366,7 +369,9 @@ DT::datatable(as.data.frame(colData(sce)), We already now define the names of the assays we will be generating and using later in the workflow. The first column in the table below contains generic names representing the 'stage' that each assay -corresponds to. The second column contains the actual assay names. +corresponds to. The second column contains the actual assay names. Note that +the same actual assay name can correspond to multiple stages - this is the +case, e.g., if normalization is skipped, or if no batch variable is supplied. ```{r define-assaynames} aNames <- defineAssayNames(aName = aName, normMethod = normMethod, @@ -442,6 +447,7 @@ if (expType == "MaxQuant") { sce <- filterSpectronaut(sce = sce, minScore = minScore, minPeptides = minPeptides, plotUpset = TRUE, revPattern = "_Decoy$", + contamPattern = "^contam_", exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), knitr::current_input(dir = TRUE))) } else if (expType == "DIANN") { @@ -612,7 +618,9 @@ values correspond to missing values, and the grey ones to observed intensities. ```{r missing-values-overall, fig.height=7, message=FALSE, fig.width = min(14, max(7, 0.5 * ncol(sce))), fig.height = 9/7 * min(14, max(7, 0.5 * ncol(sce)))} if (addHeatmaps) { - plotMissingValuesHeatmap(sce, assayMissing = aNames$assayImputIndic) + plotMissingValuesHeatmap(sce, assayMissing = aNames$assayImputIndic, + onlyRowsWithMissing = FALSE, + settings = "clustered") } ``` @@ -628,13 +636,13 @@ if (length(no_feats) > 0) { } ``` -# Number or detected features +# Number of detected features As a summary statistic, we calculate the number of features that are observed (non-missing) in at least `r minNbrValidValues` samples, or in at least `r minNbrValidValues` samples from the same group. -```{r} +```{r nbr-features-table} ## Total number of observed features nbrFeaturesObsTotal <- length(which( rowSums(!assay(sce, aNames$assayImputIndic)) >= minNbrValidValues)) @@ -666,7 +674,7 @@ sce <- doImputation(sce, method = imputeMethod, imputedAssayName = aNames$assayImputed) plotImputationDistribution(sce, assayToPlot = aNames$assayImputed, assayImputation = aNames$assayImputIndic, - xlab = aNames$assayImputed) + xlab = aNames$assayImputed, plotType = "histogram") ``` # Overall distribution of log2 feature intensities @@ -959,6 +967,7 @@ for (nm in names(testres$tests)) { featureCollections = testres$featureCollections, complexFDRThr = complexFDRThr, maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity, curveparam = testres$curveparams[[nm]], abundanceColPat = assaysForExport, xlab = "log2(fold change)", @@ -1078,7 +1087,8 @@ tmpsign <- tmpsign[rowSums(tmpsign) > 0, , drop = FALSE] colnames(tmpsign) <- gsub("\\.showInVolcano$", "", colnames(tmpsign)) if (length(tests) > 1 && sum(colSums(tmpsign) > 0) > 1) { ComplexUpset::upset(tmpsign, intersect = colnames(tmpsign), - sort_intersections_by = "cardinality") + sort_intersections_by = "cardinality", + n_intersections = 50) } ``` @@ -1092,15 +1102,24 @@ isoform level and the feature sets are defined on the protein level, since there will be many (sometimes strongly correlated) features corresponding to a single gene or protein annotated to a feature set. +```{r text-coll, results="asis", echo=FALSE} +cat(featureCollectionText(featureCollections = names(testres$featureCollections))) +``` + ```{r top-feature-sets, fig.width = 9, fig.height = 4 * length(testres$featureCollections) + 0.1, results="asis", eval = (stattest != "none"), echo = (stattest != "none")} -for (nm in names(testres$topsets)) { +for (nm in names(testres$topsets)) { ## for each comparison if (length(testres$topsets[[nm]]) > 0) { - plts <- lapply(names(testres$topsets[[nm]]), function(snm) { + plts <- lapply(names(testres$topsets[[nm]]), function(snm) { ## set type df <- testres$topsets[[nm]][[snm]] - if (nrow(df) > maxNbrComplexesToPlot) { - df <- df[seq_len(maxNbrComplexesToPlot), ] - } + # if (nrow(df) > maxNbrComplexesToPlot) { + # df <- df[seq_len(maxNbrComplexesToPlot), ] + # } if (nrow(df) > 0) { + kp <- getComplexesToPlot(testres$featureCollections[[snm]], + complexCandidates = df$set, + maxNbrComplexesToPlot = maxNbrComplexesToPlot, + maxComplexSimilarity = maxComplexSimilarity) + df <- df[match(kp, df$set), , drop = FALSE] ggplot(df %>% dplyr::mutate(set = factor(.data$set, levels = rev(.data$set))), aes(x = .data$set, y = -log10(.data[[paste0(nm, "_FDR")]]), fill = .data[[paste0(nm, "_Direction")]])) + diff --git a/man/doNormalization.Rd b/man/doNormalization.Rd index a1e57675..9675ecad 100644 --- a/man/doNormalization.Rd +++ b/man/doNormalization.Rd @@ -16,7 +16,10 @@ doNormalization( \item{sce}{A \code{SummarizedExperiment} object (or a derivative).} \item{method}{Character scalar giving the normalization method. Currently, -the methods from \code{MsCoreUtils::normalizeMethods()} are supported. +the methods from \code{MsCoreUtils::normalizeMethods()} are supported, +together with "center.mean.shared" and "center.median.shared", +subtracting the mean or median, respectively, across features that are +observed in all samples. If \code{spikeFeatures} is not \code{NULL}, only \code{"center.mean"}, \code{"center.median"}, \code{"div.mean"} and \code{"div.median"} are supported.} diff --git a/man/filterSpectronaut.Rd b/man/filterSpectronaut.Rd index 589178e3..9069330f 100644 --- a/man/filterSpectronaut.Rd +++ b/man/filterSpectronaut.Rd @@ -10,6 +10,7 @@ filterSpectronaut( minPeptides, plotUpset = TRUE, revPattern = "_Decoy$", + contamPattern = "^contam_", exclFile = NULL ) } @@ -32,6 +33,11 @@ expression) used to identify decoys (reverse hits). The pattern is matched against the IDs in the Spectronaut \code{PG.ProteinGroups} column.} +\item{contamPattern}{Character scalar providing the pattern (a regular +expression) used to identify contaminants. The pattern is +matched against the IDs in the Spectronaut \code{PG.ProteinGroups} +column.} + \item{exclFile}{Character scalar, the path to a text file where the features that are filtered out are written. If \code{NULL} (default), excluded features are not recorded.} diff --git a/man/getComplexesToPlot.Rd b/man/getComplexesToPlot.Rd new file mode 100644 index 00000000..ee559c70 --- /dev/null +++ b/man/getComplexesToPlot.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotVolcano.R +\name{getComplexesToPlot} +\alias{getComplexesToPlot} +\title{Determine which complexes to create plots for} +\usage{ +getComplexesToPlot( + featureCollections, + complexCandidates, + maxNbrComplexesToPlot, + maxComplexSimilarity +) +} +\arguments{ +\item{featureCollections}{A \code{CharacterList} with feature set +definitions.} + +\item{complexCandidates}{A character vector with feature set names that are +candidates for plotting. More significant candidates should appear +earlier in the vector.} + +\item{maxNbrComplexesToPlot}{A numeric scalar indicating the maximum +number of complexes that can be selected.} + +\item{maxComplexSimilarity}{A numeric scalar. If a complex C is more +similar than \code{maxComplexSimilarity} to a complex earlier in the +vector, which has been selected for plotting, C will not be selected.} +} +\value{ +A character vector with feature sets from \code{complexCandidates} +selected for plotting. +} +\description{ +Determine which complexes to create plots for +} +\author{ +Charlotte Soneson +} diff --git a/man/plotImputationDistribution.Rd b/man/plotImputationDistribution.Rd index 6b917123..4165a8a4 100644 --- a/man/plotImputationDistribution.Rd +++ b/man/plotImputationDistribution.Rd @@ -4,7 +4,13 @@ \alias{plotImputationDistribution} \title{Plot distribution of imputed and unimputed values} \usage{ -plotImputationDistribution(sce, assayToPlot, assayImputation, xlab = "") +plotImputationDistribution( + sce, + assayToPlot, + assayImputation, + xlab = "", + plotType = "histogram" +) } \arguments{ \item{sce}{A \code{SummarizedExperiment} object (or a derivative).} @@ -16,6 +22,9 @@ assay of \code{sce} to use for plotting.} logical assay of \code{sce} to use for filling the distribution plots.} \item{xlab}{Character scalar providing the x-axis label for the plot.} + +\item{plotType}{Character scalar indicating the type of plot to make +(either "histogram" or "density").} } \value{ A ggplot object. diff --git a/man/plotMissingValuesHeatmap.Rd b/man/plotMissingValuesHeatmap.Rd index 7319d671..ac17fc81 100644 --- a/man/plotMissingValuesHeatmap.Rd +++ b/man/plotMissingValuesHeatmap.Rd @@ -4,15 +4,32 @@ \alias{plotMissingValuesHeatmap} \title{Plot heatmap of missing values} \usage{ -plotMissingValuesHeatmap(sce, assayMissing) +plotMissingValuesHeatmap( + sce, + assayMissing, + onlyRowsWithMissing = FALSE, + settings = "clustered", + ... +) } \arguments{ \item{sce}{A \code{SummarizedExperiment} object.} \item{assayMissing}{Character scalar indicating the name of a logical assay of \code{sce} representing the missingness pattern. -\code{"FALSE"} entries should represent observed values, while -\code{"TRUE"} entries represent missing values.} +\code{FALSE} entries should represent observed values, while +\code{TRUE} entries represent missing values.} + +\item{onlyRowsWithMissing}{Logical scalar indicating whether to only +include rows with at least one missing (\code{TRUE}) value.} + +\item{settings}{Character scalar or \code{NULL}. Setting this to +\code{"clustered"} creates a heatmap with rows and columns +clustered (used in the \code{einprot} report). +Setting it to \code{NULL} allows any argument to be passed to +\code{ComplexHeatmap::Heatmap} via the \code{...} argument.} + +\item{...}{Additional arguments passed to \code{ComplexHeatmap::Heatmap}.} } \value{ A \code{ComplexHeatmap} object. diff --git a/man/plotVolcano.Rd b/man/plotVolcano.Rd index 1e646332..cce60f6c 100644 --- a/man/plotVolcano.Rd +++ b/man/plotVolcano.Rd @@ -25,6 +25,7 @@ plotVolcano( featureCollections = list(), complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = list(), abundanceColPat = "", xlab = "log2(fold change)", @@ -108,6 +109,12 @@ to be considered significant.} \item{maxNbrComplexesToPlot}{Numeric scalar, the largest number of significant complexes to generate separate volcano plots for.} +\item{maxComplexSimilarity}{Numeric scalar. If a significant complex C has a +Jaccard similarity exceeding this value with a complex for which a +volcano plot has already been generated, no volcano plot will be +generated for C. The default value is 1, which means that no complex +will be excluded.} + \item{curveparam}{List with curve parameters for creating the Perseus-like significance curves in the volcano plots.} diff --git a/man/runDIANNAnalysis.Rd b/man/runDIANNAnalysis.Rd index 70f26ed7..5b811b13 100644 --- a/man/runDIANNAnalysis.Rd +++ b/man/runDIANNAnalysis.Rd @@ -60,6 +60,7 @@ runDIANNAnalysis( interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, @@ -283,6 +284,12 @@ of complexes.} complexes for which to make separate volcano plots. Defaults to \code{Inf}, i.e., no limit.} +\item{maxComplexSimilarity}{Numeric scalar. If a significant complex C has a +Jaccard similarity exceeding this value with a complex for which a +volcano plot has already been generated, no volcano plot will be +generated for C. The default value is 1, which means that no complex +will be excluded.} + \item{seed}{Numeric, random seed to use for any non-deterministic calculations.} diff --git a/man/runFragPipeAnalysis.Rd b/man/runFragPipeAnalysis.Rd index 34c55369..18f9c94f 100644 --- a/man/runFragPipeAnalysis.Rd +++ b/man/runFragPipeAnalysis.Rd @@ -55,6 +55,7 @@ runFragPipeAnalysis( interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, @@ -267,6 +268,12 @@ of complexes.} complexes for which to make separate volcano plots. Defaults to \code{Inf}, i.e., no limit.} +\item{maxComplexSimilarity}{Numeric scalar. If a significant complex C has a +Jaccard similarity exceeding this value with a complex for which a +volcano plot has already been generated, no volcano plot will be +generated for C. The default value is 1, which means that no complex +will be excluded.} + \item{seed}{Numeric, random seed to use for any non-deterministic calculations.} diff --git a/man/runMaxQuantAnalysis.Rd b/man/runMaxQuantAnalysis.Rd index b737027a..2d88db92 100644 --- a/man/runMaxQuantAnalysis.Rd +++ b/man/runMaxQuantAnalysis.Rd @@ -58,6 +58,7 @@ runMaxQuantAnalysis( interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, @@ -274,6 +275,12 @@ of complexes.} complexes for which to make separate volcano plots. Defaults to \code{Inf}, i.e., no limit.} +\item{maxComplexSimilarity}{Numeric scalar. If a significant complex C has a +Jaccard similarity exceeding this value with a complex for which a +volcano plot has already been generated, no volcano plot will be +generated for C. The default value is 1, which means that no complex +will be excluded.} + \item{seed}{Numeric, random seed to use for any non-deterministic calculations.} diff --git a/man/runPDTMTAnalysis.Rd b/man/runPDTMTAnalysis.Rd index 3be902bd..45af9434 100644 --- a/man/runPDTMTAnalysis.Rd +++ b/man/runPDTMTAnalysis.Rd @@ -64,6 +64,7 @@ runPDTMTAnalysis( interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, @@ -313,6 +314,12 @@ of complexes.} complexes for which to make separate volcano plots. Defaults to \code{Inf}, i.e., no limit.} +\item{maxComplexSimilarity}{Numeric scalar. If a significant complex C has a +Jaccard similarity exceeding this value with a complex for which a +volcano plot has already been generated, no volcano plot will be +generated for C. The default value is 1, which means that no complex +will be excluded.} + \item{seed}{Numeric, random seed to use for any non-deterministic calculations.} diff --git a/man/runSpectronautAnalysis.Rd b/man/runSpectronautAnalysis.Rd index ce361b72..25b564ee 100644 --- a/man/runSpectronautAnalysis.Rd +++ b/man/runSpectronautAnalysis.Rd @@ -59,6 +59,7 @@ runSpectronautAnalysis( interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = c(), minSizeToKeepSet = 2, @@ -284,6 +285,12 @@ of complexes.} complexes for which to make separate volcano plots. Defaults to \code{Inf}, i.e., no limit.} +\item{maxComplexSimilarity}{Numeric scalar. If a significant complex C has a +Jaccard similarity exceeding this value with a complex for which a +volcano plot has already been generated, no volcano plot will be +generated for C. The default value is 1, which means that no complex +will be excluded.} + \item{seed}{Numeric, random seed to use for any non-deterministic calculations.} diff --git a/man/textSnippets.Rd b/man/textSnippets.Rd index 66d6cc95..b38b0fee 100644 --- a/man/textSnippets.Rd +++ b/man/textSnippets.Rd @@ -10,6 +10,7 @@ \alias{introText} \alias{filterByModText} \alias{inputText} +\alias{featureCollectionText} \title{Text snippets for use in analysis reports} \usage{ emptySampleText(sce, assayName) @@ -27,6 +28,8 @@ introText(expType) filterByModText(excludeUnmodifiedPeptides, keepModifications) inputText(expTypeLevel) + +featureCollectionText(featureCollections) } \arguments{ \item{sce}{A SummarizedExperiment object.} diff --git a/tests/testthat/test-checkArgumentsDIANN.R b/tests/testthat/test-checkArgumentsDIANN.R index a8b6ae14..9aad9d1f 100644 --- a/tests/testthat/test-checkArgumentsDIANN.R +++ b/tests/testthat/test-checkArgumentsDIANN.R @@ -64,6 +64,7 @@ test_that("argument checking for DIANN works", { interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 123, includeFeatureCollections = "complexes", minSizeToKeepSet = 2, @@ -617,6 +618,15 @@ test_that("argument checking for DIANN works", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "1" + expect_error(do.call(.checkArgumentsDIANN, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(.checkArgumentsDIANN, args), + "'maxComplexSimilarity' must have length 1") + ## seed args <- args0 args$seed <- "1" diff --git a/tests/testthat/test-checkArgumentsFragPipe.R b/tests/testthat/test-checkArgumentsFragPipe.R index 987ad72b..3473e5af 100644 --- a/tests/testthat/test-checkArgumentsFragPipe.R +++ b/tests/testthat/test-checkArgumentsFragPipe.R @@ -61,6 +61,7 @@ test_that("argument checking for FP works", { interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 123, includeFeatureCollections = "complexes", minSizeToKeepSet = 2, @@ -609,6 +610,15 @@ test_that("argument checking for FP works", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "1" + expect_error(do.call(.checkArgumentsFragPipe, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(.checkArgumentsFragPipe, args), + "'maxComplexSimilarity' must have length 1") + ## seed args <- args0 args$seed <- "1" diff --git a/tests/testthat/test-checkArgumentsMaxQuant.R b/tests/testthat/test-checkArgumentsMaxQuant.R index 5e5535b3..12201442 100644 --- a/tests/testthat/test-checkArgumentsMaxQuant.R +++ b/tests/testthat/test-checkArgumentsMaxQuant.R @@ -69,6 +69,7 @@ test_that("argument checking for MQ works", { interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 123, includeFeatureCollections = "complexes", minSizeToKeepSet = 2, @@ -608,6 +609,15 @@ test_that("argument checking for MQ works", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "1" + expect_error(do.call(.checkArgumentsMaxQuant, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(.checkArgumentsMaxQuant, args), + "'maxComplexSimilarity' must have length 1") + ## seed args <- args0 args$seed <- "1" diff --git a/tests/testthat/test-checkArgumentsPDTMT.R b/tests/testthat/test-checkArgumentsPDTMT.R index 7fc3f604..f969280a 100644 --- a/tests/testthat/test-checkArgumentsPDTMT.R +++ b/tests/testthat/test-checkArgumentsPDTMT.R @@ -78,6 +78,7 @@ test_that("argument checking for PD-TMT works", { interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, seed = 123, includeFeatureCollections = "complexes", minSizeToKeepSet = 2, @@ -724,6 +725,15 @@ test_that("argument checking for PD-TMT works", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "1" + expect_error(do.call(.checkArgumentsPDTMT, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(.checkArgumentsPDTMT, args), + "'maxComplexSimilarity' must have length 1") + ## seed args <- args0 args$seed <- "1" diff --git a/tests/testthat/test-plotImputation.R b/tests/testthat/test-plotImputation.R index 5b957bc3..3e9afa73 100644 --- a/tests/testthat/test-plotImputation.R +++ b/tests/testthat/test-plotImputation.R @@ -5,46 +5,69 @@ test_that("plotting of imputation results works", { ## ------------------------------------------------------------------------- expect_error(plotImputationDistribution( sce = 1, assayToPlot = "log2_iBAQ", assayImputation = "imputed_iBAQ", - xlab = "log2 intensity"), + xlab = "log2 intensity", plotType = "histogram"), "'sce' must be of class 'SummarizedExperiment'") expect_error(plotImputationDistribution( sce = sce_mq_final, assayToPlot = 1, assayImputation = "imputed_iBAQ", - xlab = "log2 intensity"), + xlab = "log2 intensity", plotType = "histogram"), "'assayToPlot' must be of class 'character'") expect_error(plotImputationDistribution( sce = sce_mq_final, assayToPlot = c("log2_iBAQ", "imputed_iBAQ"), assayImputation = "imputed_iBAQ", - xlab = "log2 intensity"), + xlab = "log2 intensity", plotType = "histogram"), "'assayToPlot' must have length 1") expect_error(plotImputationDistribution( sce = sce_mq_final, assayToPlot = "missing", assayImputation = "imputed_iBAQ", - xlab = "log2 intensity"), + xlab = "log2 intensity", plotType = "histogram"), "All values in 'assayToPlot' must be one of") expect_error(plotImputationDistribution( sce = sce_mq_final, assayToPlot = "log2_iBAQ", assayImputation = 1, - xlab = "log2 intensity"), + xlab = "log2 intensity", plotType = "histogram"), "'assayImputation' must be of class 'character'") expect_error(plotImputationDistribution( sce = sce_mq_final, assayToPlot = "log2_iBAQ", assayImputation = c("log2_iBAQ", "imputed_iBAQ"), - xlab = "log2 intensity"), + xlab = "log2 intensity", plotType = "histogram"), "'assayImputation' must have length 1") expect_error(plotImputationDistribution( sce = sce_mq_final, assayToPlot = "log2_iBAQ", assayImputation = "missing", - xlab = "log2 intensity"), + xlab = "log2 intensity", plotType = "histogram"), "All values in 'assayImputation' must be one of") expect_error(plotImputationDistribution( sce = sce_mq_final, assayToPlot = "log2_iBAQ", assayImputation = "imputed_iBAQ", - xlab = 1), + xlab = 1, plotType = "histogram"), "'xlab' must be of class 'character'") expect_error(plotImputationDistribution( sce = sce_mq_final, assayToPlot = "log2_iBAQ", assayImputation = "imputed_iBAQ", - xlab = c("lab1", "lab2")), + xlab = c("lab1", "lab2"), plotType = "histogram"), "'xlab' must have length 1") + expect_error(plotImputationDistribution( + sce = sce_mq_final, assayToPlot = "log2_iBAQ", assayImputation = "imputed_iBAQ", + xlab = "log2 intensity", plotType = 1), + "'plotType' must be of class 'character'") + expect_error(plotImputationDistribution( + sce = sce_mq_final, assayToPlot = "log2_iBAQ", assayImputation = "imputed_iBAQ", + xlab = "log2 intensity", plotType = c("histogram", "density")), + "'plotType' must have length 1") + expect_error(plotImputationDistribution( + sce = sce_mq_final, assayToPlot = "log2_iBAQ", assayImputation = "imputed_iBAQ", + xlab = "log2 intensity", plotType = "missing"), + "'plotType' must be one of") + + out <- plotImputationDistribution( + sce = sce_mq_final, assayToPlot = "log2_iBAQ", + assayImputation = "imputed_iBAQ", xlab = "lab", plotType = "histogram") + expect_s3_class(out, "ggplot") + expect_true(any(out$data$imputed)) + expect_true(any(!out$data$imputed)) + expect_false(any(grepl("^iBAQ\\.", out$data$sample))) + expect_equal(nrow(out$data), 150 * 9) + expect_equal(ncol(out$data), 4) + expect_named(out$data, c("pid", "sample", "log2intensity", "imputed")) out <- plotImputationDistribution( sce = sce_mq_final, assayToPlot = "log2_iBAQ", - assayImputation = "imputed_iBAQ", xlab = "lab") + assayImputation = "imputed_iBAQ", xlab = "lab", plotType = "density") expect_s3_class(out, "ggplot") expect_true(any(out$data$imputed)) expect_true(any(!out$data$imputed)) @@ -56,7 +79,7 @@ test_that("plotting of imputation results works", { ## PD input out <- plotImputationDistribution( sce = sce_pd_final, assayToPlot = "log2_Abundance", - assayImputation = "imputed_Abundance", xlab = "lab") + assayImputation = "imputed_Abundance", xlab = "lab", plotType = "histogram") expect_s3_class(out, "ggplot") expect_true(any(out$data$imputed)) expect_true(any(!out$data$imputed)) diff --git a/tests/testthat/test-plotMissingValues.R b/tests/testthat/test-plotMissingValues.R index 0e6b3eb9..9692e7ee 100644 --- a/tests/testthat/test-plotMissingValues.R +++ b/tests/testthat/test-plotMissingValues.R @@ -4,24 +4,87 @@ test_that("missing value plots work", { ## plotMissingValuesHeatmap ## ------------------------------------------------------------------------- expect_error(plotMissingValuesHeatmap( - sce = 1, assayMissing = "imputed_iBAQ"), + sce = 1, assayMissing = "imputed_iBAQ", onlyRowsWithMissing = FALSE, + settings = "clustered"), "'sce' must be of class 'SummarizedExperiment'") expect_error(plotMissingValuesHeatmap( - sce = sce_mq_preimputation, assayMissing = 1), + sce = sce_mq_preimputation, assayMissing = 1, + onlyRowsWithMissing = FALSE, settings = "clustered"), "'assayMissing' must be of class 'character'") expect_error(plotMissingValuesHeatmap( - sce = sce_mq_preimputation, assayMissing = c("log2_iBAQ", "imputed_iBAQ")), + sce = sce_mq_preimputation, assayMissing = c("log2_iBAQ", "imputed_iBAQ"), + onlyRowsWithMissing = FALSE, settings = "clustered"), "'assayMissing' must have length 1") expect_error(plotMissingValuesHeatmap( - sce = sce_mq_preimputation, assayMissing = "missing"), + sce = sce_mq_preimputation, assayMissing = "missing", + onlyRowsWithMissing = FALSE, settings = "clustered"), "All values in 'assayMissing' must be one of") expect_error(plotMissingValuesHeatmap( - sce = sce_mq_preimputation, assayMissing = "log2_iBAQ_withNA"), + sce = sce_mq_preimputation, assayMissing = "log2_iBAQ_withNA", + onlyRowsWithMissing = FALSE, settings = "clustered"), "Assay contains missing values") + expect_error(plotMissingValuesHeatmap( + sce = sce_mq_preimputation, assayMissing = "imputed_iBAQ", + onlyRowsWithMissing = "FALSE", settings = "clustered"), + "'onlyRowsWithMissing' must be of class 'logical'") + expect_error(plotMissingValuesHeatmap( + sce = sce_mq_preimputation, assayMissing = "imputed_iBAQ", + onlyRowsWithMissing = c(TRUE, FALSE), settings = "clustered"), + "'onlyRowsWithMissing' must have length 1") + expect_error(plotMissingValuesHeatmap( + sce = sce_mq_preimputation, assayMissing = "imputed_iBAQ", + onlyRowsWithMissing = FALSE, settings = 1), + "'settings' must be of class 'character'") + expect_error(plotMissingValuesHeatmap( + sce = sce_mq_preimputation, assayMissing = "imputed_iBAQ", + onlyRowsWithMissing = FALSE, settings = c("clustered", "clustered")), + "'settings' must have length 1") + + out <- plotMissingValuesHeatmap(sce = sce_mq_preimputation, + assayMissing = "imputed_iBAQ", + onlyRowsWithMissing = FALSE, + settings = "clustered") + expect_s4_class(out, "Heatmap") + expect_length(out@column_names_param$labels, 9L) + expect_true(out@column_names_param$show) + expect_length(out@row_names_param$labels, 150L) + expect_equal(out@column_title, "Missing value pattern (white = missing)") + expect_equal(out@name, "imputed") + + out <- plotMissingValuesHeatmap(sce = sce_mq_preimputation, + assayMissing = "imputed_iBAQ", + onlyRowsWithMissing = FALSE, + settings = NULL, + cluster_rows = FALSE, show_column_names = FALSE) + expect_s4_class(out, "Heatmap") + expect_length(out@column_names_param$labels, 9L) + expect_false(out@column_names_param$show) + expect_length(out@row_names_param$labels, 150L) + expect_equal(out@column_title, character(0)) + expect_equal(substr(out@name, 1, 7), "matrix_") out <- plotMissingValuesHeatmap(sce = sce_mq_preimputation, - assayMissing = "imputed_iBAQ") + assayMissing = "imputed_iBAQ", + onlyRowsWithMissing = TRUE, + settings = "clustered") + expect_s4_class(out, "Heatmap") + expect_length(out@column_names_param$labels, 9L) + expect_true(out@column_names_param$show) + expect_length(out@row_names_param$labels, 103L) + expect_equal(out@column_title, "Missing value pattern (white = missing)") + expect_equal(out@name, "imputed") + + kp <- which(rowSums(assay(sce_mq_preimputation, "imputed_iBAQ")) == 0) + out <- plotMissingValuesHeatmap(sce = sce_mq_preimputation[kp, ], + assayMissing = "imputed_iBAQ", + onlyRowsWithMissing = TRUE, + settings = "clustered") expect_s4_class(out, "Heatmap") + expect_length(out@column_names_param$labels, 9L) + expect_true(out@column_names_param$show) + expect_length(out@row_names_param$labels, 0L) + expect_equal(out@column_title, "Missing value pattern (white = missing)") + expect_equal(out@name, "imputed") ## ------------------------------------------------------------------------- ## plotFractionDetectedPerSample diff --git a/tests/testthat/test-plotVolcano.R b/tests/testthat/test-plotVolcano.R index b339c48c..a5e0781f 100644 --- a/tests/testthat/test-plotVolcano.R +++ b/tests/testthat/test-plotVolcano.R @@ -1,3 +1,70 @@ +test_that("set selection for plotting works", { + expect_equal(.getSetSimilarity(set1 = c("A", "B", "X"), + set2 = c("A", "I", "J"), + method = "jaccard"), 0.2) + expect_error(.getSetSimilarity(set1 = c("A", "B", "X"), + set2 = c("A", "I", "J"), + method = "missing"), + "Unknown similarity method") + + expect_error(getComplexesToPlot( + featureCollections = 1, complexCandidates = c("mouse: Arnt-Ahrr complex"), + maxNbrComplexesToPlot = 10, maxComplexSimilarity = 1), + "'featureCollections' must be of class 'CharacterList'") + expect_error(getComplexesToPlot( + featureCollections = fcoll_mq_final, complexCandidates = c("mouse: Arnt-Ahrr complex"), + maxNbrComplexesToPlot = 10, maxComplexSimilarity = 1), + "'featureCollections' must be of class 'CharacterList'") + expect_error(getComplexesToPlot( + featureCollections = fcoll_mq_final$complexes, + complexCandidates = 1, + maxNbrComplexesToPlot = 10, maxComplexSimilarity = 1), + "'complexCandidates' must be of class 'character'") + expect_error(getComplexesToPlot( + featureCollections = fcoll_mq_final$complexes, + complexCandidates = c("mouse: Arnt-Ahrr complex"), + maxNbrComplexesToPlot = "10", maxComplexSimilarity = 1), + "'maxNbrComplexesToPlot' must be of class 'numeric'") + expect_error(getComplexesToPlot( + featureCollections = fcoll_mq_final$complexes, + complexCandidates = c("mouse: Arnt-Ahrr complex"), + maxNbrComplexesToPlot = c(10, 2), maxComplexSimilarity = 1), + "'maxNbrComplexesToPlot' must have length 1") + expect_error(getComplexesToPlot( + featureCollections = fcoll_mq_final$complexes, + complexCandidates = c("mouse: Arnt-Ahrr complex"), + maxNbrComplexesToPlot = 10, maxComplexSimilarity = "1"), + "'maxComplexSimilarity' must be of class 'numeric'") + expect_error(getComplexesToPlot( + featureCollections = fcoll_mq_final$complexes, + complexCandidates = c("mouse: Arnt-Ahrr complex"), + maxNbrComplexesToPlot = 10, maxComplexSimilarity = c(1, 2)), + "'maxComplexSimilarity' must have length 1") + + expect_equal(getComplexesToPlot(featureCollections = fcoll_mq_final$complexes, + complexCandidates = c("mouse: Arnt-Ahrr complex", + "mouse: Arnt-Sim1 complex", + "mouse: Arnt-Sim2 complex"), + maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1), + c("mouse: Arnt-Ahrr complex", "mouse: Arnt-Sim1 complex", + "mouse: Arnt-Sim2 complex")) + expect_equal(getComplexesToPlot(featureCollections = fcoll_mq_final$complexes, + complexCandidates = c("mouse: Arnt-Ahrr complex", + "mouse: Arnt-Sim1 complex", + "mouse: Arnt-Sim2 complex"), + maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 0.9), + c("mouse: Arnt-Ahrr complex")) + expect_equal(getComplexesToPlot(featureCollections = fcoll_mq_final$complexes, + complexCandidates = c("mouse: Arnt-Sim1 complex", + "mouse: Arnt-Ahrr complex", + "mouse: Arnt-Sim2 complex"), + maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 0.9), + c("mouse: Arnt-Sim1 complex")) +}) + test_that("volcano plots work", { out_limma <- runTest( @@ -408,6 +475,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_ttest$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_ttest$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -616,6 +684,15 @@ test_that("volcano plots work", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "2" + expect_error(do.call(plotVolcano, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(plotVolcano, args), + "'maxComplexSimilarity' must have length 1") + ## curveparam args <- args0 args$curveparam <- 1 @@ -717,6 +794,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -776,6 +854,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -834,6 +913,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -887,6 +967,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -951,6 +1032,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1011,6 +1093,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1068,6 +1151,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = list(), complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1126,6 +1210,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1181,6 +1266,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1238,6 +1324,7 @@ test_that("volcano plots work", { stringDb = NULL, featureCollections = out_limma_merged$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma_merged$curveparams[[1]], abundanceColPat = c("iBAQ", "LFQ.intensity"), xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1321,6 +1408,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_ttest$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_ttest$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1376,6 +1464,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_ttest$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_ttest$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1431,6 +1520,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_ttest$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_proda$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1486,6 +1576,7 @@ test_that("volcano plots work", { stringDb = NULL, featureCollections = out_ttest$featureCollections, complexFDRThr = 0.001, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_ttest$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1537,6 +1628,7 @@ test_that("volcano plots work", { stringDb = NULL, featureCollections = out_limma$featureCollections, complexFDRThr = 0.8, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1588,6 +1680,7 @@ test_that("volcano plots work", { stringDb = string_db, featureCollections = out_ttest$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_ttest$curveparams[[1]], abundanceColPat = "iBAQ", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1657,6 +1750,7 @@ test_that("volcano plots work", { stringDb = NULL, featureCollections = out_limma$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_limma$curveparams[[1]], abundanceColPat = "Abundance", xlab = "log2(fold change)", ylab = "-log10(p-value)", @@ -1716,6 +1810,7 @@ test_that("volcano plots work", { stringDb = NULL, featureCollections = out_ttest$featureCollections, complexFDRThr = 0.1, maxNbrComplexesToPlot = 10, + maxComplexSimilarity = 1, curveparam = out_ttest$curveparams[[1]], abundanceColPat = "Abundance", xlab = "log2(fold change)", ylab = "-log10(p-value)", diff --git a/tests/testthat/test-runDIANNAnalysis.R b/tests/testthat/test-runDIANNAnalysis.R index 2eb110b1..9809a793 100644 --- a/tests/testthat/test-runDIANNAnalysis.R +++ b/tests/testthat/test-runDIANNAnalysis.R @@ -65,6 +65,7 @@ test_that("runDIANNAnalysis works", { interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 123, includeFeatureCollections = "complexes", minSizeToKeepSet = 2, @@ -606,6 +607,15 @@ test_that("runDIANNAnalysis works", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "1" + expect_error(do.call(runDIANNAnalysis, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(runDIANNAnalysis, args), + "'maxComplexSimilarity' must have length 1") + ## seed args <- args0 args$seed <- "1" diff --git a/tests/testthat/test-runFragPipeAnalysis.R b/tests/testthat/test-runFragPipeAnalysis.R index ac5e088e..bd8ea643 100644 --- a/tests/testthat/test-runFragPipeAnalysis.R +++ b/tests/testthat/test-runFragPipeAnalysis.R @@ -63,6 +63,7 @@ test_that("runFragPipeAnalysis works", { interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 123, includeFeatureCollections = "complexes", minSizeToKeepSet = 2, @@ -569,6 +570,15 @@ test_that("runFragPipeAnalysis works", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "1" + expect_error(do.call(runFragPipeAnalysis, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(runFragPipeAnalysis, args), + "'maxComplexSimilarity' must have length 1") + ## seed args <- args0 args$seed <- "1" diff --git a/tests/testthat/test-runMaxQuantAnalysis.R b/tests/testthat/test-runMaxQuantAnalysis.R index 92f511b4..f50f6be8 100644 --- a/tests/testthat/test-runMaxQuantAnalysis.R +++ b/tests/testthat/test-runMaxQuantAnalysis.R @@ -67,6 +67,7 @@ test_that("runMaxQuantAnalysis works", { interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = "complexes", minSizeToKeepSet = 2, @@ -594,6 +595,15 @@ test_that("runMaxQuantAnalysis works", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "1" + expect_error(do.call(runMaxQuantAnalysis, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(runMaxQuantAnalysis, args), + "'maxComplexSimilarity' must have length 1") + ## seed args <- args0 args$seed <- "1" diff --git a/tests/testthat/test-runPDTMTAnalysis.R b/tests/testthat/test-runPDTMTAnalysis.R index 57e65674..33577841 100644 --- a/tests/testthat/test-runPDTMTAnalysis.R +++ b/tests/testthat/test-runPDTMTAnalysis.R @@ -77,6 +77,7 @@ test_that("runPDTMTAnalysis works", { interactiveGroupColumn = NULL, complexFDRThr = 0.1, maxNbrComplexesToPlot = Inf, + maxComplexSimilarity = 1, seed = 42, includeFeatureCollections = "complexes", minSizeToKeepSet = 2, @@ -690,6 +691,15 @@ test_that("runPDTMTAnalysis works", { "'maxNbrComplexesToPlot' must be within [0,Inf] (inclusive)", fixed = TRUE) + ## maxComplexSimilarity + args <- args0 + args$maxComplexSimilarity <- "1" + expect_error(do.call(runPDTMTAnalysis, args), + "'maxComplexSimilarity' must be of class 'numeric'") + args$maxComplexSimilarity <- c(0.1, 0.2) + expect_error(do.call(runPDTMTAnalysis, args), + "'maxComplexSimilarity' must have length 1") + ## seed args <- args0 args$seed <- "1"