fmicompbio · csoneson · Dec 31, 2024 · Sep 23, 2024 · Sep 23, 2024 · Oct 4, 2024
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -58,7 +58,7 @@ jobs:
 
       - name: Cache R packages
         if: runner.os != 'Windows' && matrix.config.image == null
-        uses: actions/cache@v1
+        uses: actions/cache@v4
         with:
           path: ${{ env.R_LIBS_USER }}
           key: ${{ env.cache-version }}-${{ runner.os }}-bioc-${{ matrix.config.bioc }}-${{ hashFiles('depends.Rds') }}

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: einprot
 Type: Package
 Title: A collection of proteomics analysis utilities and workflows
-Version: 0.9.5
+Version: 0.9.6
 Authors@R: c(
     person("Charlotte", "Soneson", email = "[email protected]", 
            role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3833-2169")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,7 @@ export(doNormalization)
 export(doPCA)
 export(emptySampleText)
 export(expDesignText)
+export(featureCollectionText)
 export(filterByModText)
 export(filterFragPipe)
 export(filterMaxQuant)
@@ -20,6 +21,7 @@ export(fixFeatureIds)
 export(formatTableColumns)
 export(getCalibrationFrompdAnalysis)
 export(getColumnNames)
+export(getComplexesToPlot)
 export(getContaminantsDatabaseFrompdAnalysis)
 export(getConvTable)
 export(getFirstId)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,15 @@
+# einprot 0.9.6
+
+* Sort output tables from differential abundance analysis by t-statistics instead of p-value
+* Allow ComplexHeatmap::Heatmap arguments to be passed on by plotMissingValuesHeatmap
+* Add option to limit plotMissingValuesHeatmap to rows with at least one missing value
+* Add option to make imputation plots as density plots instead of histograms
+* Add more details regarding the source of feature collections to report
+* Add center.median.shared and center.mean.shared normalization methods
+* Add maxComplexSimilarity argument to plotVolcano
+* Update PomBase and WormBase conversion tables
+* Add contamination filtering to Spectronaut (presence of contam_ prefix)
+
 # einprot 0.9.5
 
 * Add filtering by score and number of peptides to Spectronaut workflow

diff --git a/R/checkArgumentsDIANN.R b/R/checkArgumentsDIANN.R
@@ -17,7 +17,8 @@
     minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
     volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
     volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
-    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
+    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
+    maxComplexSimilarity, seed,
     includeFeatureCollections, minSizeToKeepSet, customComplexes,
     complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
     customYml, doRender
@@ -144,7 +145,8 @@
     .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
     .assertScalar(x = addHeatmaps, type = "logical")
     .assertScalar(x = normMethod, type = "character",
-                  validValues = c(MsCoreUtils::normalizeMethods(), "none"))
+                  validValues = c(MsCoreUtils::normalizeMethods(), "none",
+                                  "center.mean.shared", "center.median.shared"))
     .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
     .assertScalar(x = stattest, type = "character",
                   validValues = c("limma", "ttest", "proDA", "none"))
@@ -163,6 +165,7 @@
     .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
     .assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
                   rngIncl = c(0, Inf))
+    .assertScalar(x = maxComplexSimilarity, type = "numeric")
     .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
     .assertVector(x = volcanoFeaturesToLabel, type = "character")
     .assertVector(x = mergeGroups, type = "list")

diff --git a/R/checkArgumentsFragPipe.R b/R/checkArgumentsFragPipe.R
@@ -16,7 +16,8 @@
     minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
     volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
     volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
-    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
+    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
+    maxComplexSimilarity, seed,
     includeFeatureCollections, minSizeToKeepSet, customComplexes,
     complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
     customYml, doRender
@@ -149,7 +150,8 @@
     .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
     .assertScalar(x = addHeatmaps, type = "logical")
     .assertScalar(x = normMethod, type = "character",
-                  validValues = c(MsCoreUtils::normalizeMethods(), "none"))
+                  validValues = c(MsCoreUtils::normalizeMethods(), "none",
+                                  "center.mean.shared", "center.median.shared"))
     .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
     .assertScalar(x = stattest, type = "character",
                   validValues = c("limma", "ttest", "proDA", "none"))
@@ -168,6 +170,7 @@
     .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
     .assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
                   rngIncl = c(0, Inf))
+    .assertScalar(x = maxComplexSimilarity, type = "numeric")
     .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
     .assertVector(x = volcanoFeaturesToLabel, type = "character")
     .assertVector(x = mergeGroups, type = "list")

diff --git a/R/checkArgumentsMaxQuant.R b/R/checkArgumentsMaxQuant.R
@@ -16,7 +16,8 @@
     minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
     volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
     volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
-    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
+    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
+    maxComplexSimilarity, seed,
     includeFeatureCollections, minSizeToKeepSet, customComplexes,
     complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
     customYml, doRender
@@ -134,7 +135,8 @@
     .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
     .assertScalar(x = addHeatmaps, type = "logical")
     .assertScalar(x = normMethod, type = "character",
-                  validValues = c(MsCoreUtils::normalizeMethods(), "none"))
+                  validValues = c(MsCoreUtils::normalizeMethods(), "none",
+                                  "center.mean.shared", "center.median.shared"))
     .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
     .assertScalar(x = stattest, type = "character",
                   validValues = c("limma", "ttest", "proDA", "none"))
@@ -153,6 +155,7 @@
     .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
     .assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
                   rngIncl = c(0, Inf))
+    .assertScalar(x = maxComplexSimilarity, type = "numeric")
     .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
     .assertVector(x = volcanoFeaturesToLabel, type = "character")
     .assertVector(x = mergeGroups, type = "list")

diff --git a/R/checkArgumentsPDTMT.R b/R/checkArgumentsPDTMT.R
@@ -19,7 +19,8 @@
     minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
     volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
     volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
-    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
+    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
+    maxComplexSimilarity, seed,
     includeFeatureCollections, minSizeToKeepSet, customComplexes,
     complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
     customYml, doRender
@@ -157,7 +158,8 @@
     .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
     .assertScalar(x = addHeatmaps, type = "logical")
     .assertScalar(x = normMethod, type = "character",
-                  validValues = c(MsCoreUtils::normalizeMethods(), "none"))
+                  validValues = c(MsCoreUtils::normalizeMethods(), "none",
+                                  "center.mean.shared", "center.median.shared"))
     .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
     .assertScalar(x = stattest, type = "character",
                   validValues = c("limma", "ttest", "proDA", "none"))
@@ -176,6 +178,7 @@
     .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
     .assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
                   rngIncl = c(0, Inf))
+    .assertScalar(x = maxComplexSimilarity, type = "numeric")
     .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
     .assertVector(x = volcanoFeaturesToLabel, type = "character")
     .assertVector(x = mergeGroups, type = "list")

diff --git a/R/checkArgumentsSpectronaut.R b/R/checkArgumentsSpectronaut.R
@@ -18,7 +18,8 @@
     minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
     volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
     volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
-    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
+    interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
+    maxComplexSimilarity, seed,
     includeFeatureCollections, minSizeToKeepSet, customComplexes,
     complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
     customYml, doRender
@@ -147,7 +148,8 @@
     .assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
     .assertScalar(x = addHeatmaps, type = "logical")
     .assertScalar(x = normMethod, type = "character",
-                  validValues = c(MsCoreUtils::normalizeMethods(), "none"))
+                  validValues = c(MsCoreUtils::normalizeMethods(), "none",
+                                  "center.mean.shared", "center.median.shared"))
     .assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
     .assertScalar(x = stattest, type = "character",
                   validValues = c("limma", "ttest", "proDA", "none"))
@@ -166,6 +168,7 @@
     .assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
     .assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
                   rngIncl = c(0, Inf))
+    .assertScalar(x = maxComplexSimilarity, type = "numeric")
     .assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
     .assertVector(x = volcanoFeaturesToLabel, type = "character")
     .assertVector(x = mergeGroups, type = "list")

diff --git a/R/constants.R b/R/constants.R
@@ -18,7 +18,7 @@ NULL
 EINPROT_COMPLEXES_FILE <- "extdata/complexes/complexdb_einprot0.9.3_20240328_orthologs.rds"
 #' @export
 #' @rdname constants
-EINPROT_WORMBASE_CONVTABLE <- "extdata/conversion_tables/WormBaseConv_einprot0.5.0_20220211.rds"
+EINPROT_WORMBASE_CONVTABLE <- "extdata/conversion_tables/WormBaseConv_einprot0.9.6_20241018.rds"
 #' @export
 #' @rdname constants
-EINPROT_POMBASE_CONVTABLE <- "extdata/conversion_tables/PomBaseConv_einprot0.5.0_20220211.rds"
+EINPROT_POMBASE_CONVTABLE <- "extdata/conversion_tables/PomBaseConv_einprot0.9.6_20241018.rds"
diff --git a/R/doFilter.R b/R/doFilter.R
@@ -550,6 +550,10 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,
 #'     expression) used to identify decoys (reverse hits). The pattern is
 #'     matched against the IDs in the Spectronaut \code{PG.ProteinGroups}
 #'     column.
+#' @param contamPattern Character scalar providing the pattern (a regular
+#'     expression) used to identify contaminants. The pattern is
+#'     matched against the IDs in the Spectronaut \code{PG.ProteinGroups}
+#'     column.
 #' @param exclFile Character scalar, the path to a text file where the
 #'     features that are filtered out are written. If \code{NULL} (default),
 #'     excluded features are not recorded.
@@ -562,22 +566,27 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,
 #' @importFrom rlang .data
 #'
 filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE,
-                              revPattern = "_Decoy$", exclFile = NULL) {
+                              revPattern = "_Decoy$",
+                              contamPattern = "^contam_", exclFile = NULL) {
     .assertVector(x = sce, type = "SummarizedExperiment")
     .assertScalar(x = minScore, type = "numeric", allowNULL = TRUE)
     .assertScalar(x = minPeptides, type = "numeric", allowNULL = TRUE)
     .assertScalar(x = plotUpset, type = "logical")
     .assertScalar(x = revPattern, type = "character")
+    .assertScalar(x = contamPattern, type = "character")
     .assertScalar(x = exclFile, type = "character", allowNULL = TRUE)
 
     ## Make sure that the columns used for filtering later are character vectors
     rowData(sce)$Reverse <- ifelse(grepl(revPattern, rowData(sce)$PG.ProteinGroups),
                                    "+", "")
+    rowData(sce)$Contaminant <- ifelse(grepl(contamPattern,
+                                             rowData(sce)$PG.ProteinGroups),
+                                       "+", "")
 
     filtdf <- as.data.frame(SummarizedExperiment::rowData(sce)) %>%
         dplyr::select(dplyr::any_of(c("Reverse", "PG.NrOfStrippedSequencesIdentified.Experiment.wide",
-                                      "PG.Cscore"))) %>%
-        dplyr::mutate(across(dplyr::any_of(c("Reverse")),
+                                      "PG.Cscore", "Contaminant"))) %>%
+        dplyr::mutate(across(dplyr::any_of(c("Reverse", "Contaminant")),
                              function(x) as.numeric(x == "+")))
     if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(filtdf) &&
         !is.null(minPeptides)) {
@@ -601,9 +610,9 @@ filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE,
     if ("Reverse" %in% colnames(rowData(sce))) {
         keep <- intersect(keep, which(rowData(sce)$Reverse == ""))
     }
-    # if ("Potential.contaminant" %in% colnames(rowData(sce))) {
-    #     keep <- intersect(keep, which(rowData(sce)$Potential.contaminant == ""))
-    # }
+    if ("Contaminant" %in% colnames(rowData(sce))) {
+        keep <- intersect(keep, which(rowData(sce)$Contaminant == ""))
+    }
     if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(rowData(sce)) &&
         !is.null(minPeptides)) {
         keep <- intersect(

diff --git a/R/doNormalization.R b/R/doNormalization.R
@@ -5,7 +5,10 @@
 #'
 #' @param sce A \code{SummarizedExperiment} object (or a derivative).
 #' @param method Character scalar giving the normalization method. Currently,
-#'     the methods from \code{MsCoreUtils::normalizeMethods()} are supported.
+#'     the methods from \code{MsCoreUtils::normalizeMethods()} are supported,
+#'     together with "center.mean.shared" and "center.median.shared",
+#'     subtracting the mean or median, respectively, across features that are
+#'     observed in all samples.
 #'     If \code{spikeFeatures} is not \code{NULL}, only
 #'     \code{"center.mean"}, \code{"center.median"}, \code{"div.mean"} and
 #'     \code{"div.median"} are supported.
@@ -56,7 +59,9 @@ doNormalization <- function(sce, method, assayName, normalizedAssayName,
                             spikeFeatures = NULL) {
     .assertVector(x = sce, type = "SummarizedExperiment")
     .assertScalar(x = method, type = "character",
-                  validValues = MsCoreUtils::normalizeMethods())
+                  validValues = c(MsCoreUtils::normalizeMethods(),
+                                  "center.mean.shared",
+                                  "center.median.shared"))
     .assertScalar(x = assayName, type = "character",
                   validValues = SummarizedExperiment::assayNames(sce))
     .assertScalar(x = normalizedAssayName, type = "character")
@@ -94,6 +99,22 @@ doNormalization <- function(sce, method, assayName, normalizedAssayName,
             assayOut <-
                 MsCoreUtils::normalize_matrix(assayIn,
                                               method = method)
+        } else if (method == "center.median.shared") {
+            idx <- which(rowSums(is.na(assayIn)) == 0)
+            if (length(idx) == 0) {
+                stop("No features observed in all samples")
+            }
+            assayOut <- sweep(assayIn, MARGIN = 2,
+                              STATS = apply(assayIn[idx, , drop = FALSE], 2, stats::median),
+                              FUN = "-")
+        } else if (method == "center.mean.shared") {
+            idx <- which(rowSums(is.na(assayIn)) == 0)
+            if (length(idx) == 0) {
+                stop("No features observed in all samples")
+            }
+            assayOut <- sweep(assayIn, MARGIN = 2,
+                              STATS = apply(assayIn[idx, , drop = FALSE], 2, mean),
+                              FUN = "-")
         } else {
             ## Should never end up here as we check the validity of method above
             #nocov start

diff --git a/R/plotImputation.R b/R/plotImputation.R
@@ -9,6 +9,8 @@
 #' @param assayImputation Character scalar indicating the name of a
 #'     logical assay of \code{sce} to use for filling the distribution plots.
 #' @param xlab Character scalar providing the x-axis label for the plot.
+#' @param plotType Character scalar indicating the type of plot to make
+#'     (either "histogram" or "density").
 #'
 #' @export
 #' @author Charlotte Soneson
@@ -30,13 +32,15 @@
 #' @importFrom rlang .data
 #'
 plotImputationDistribution <- function(sce, assayToPlot, assayImputation,
-                                       xlab = "") {
+                                       xlab = "", plotType = "histogram") {
     .assertVector(x = sce, type = "SummarizedExperiment")
     .assertScalar(x = assayToPlot, type = "character",
                   validValues = SummarizedExperiment::assayNames(sce))
     .assertScalar(x = assayImputation, type = "character",
                   validValues = SummarizedExperiment::assayNames(sce))
     .assertScalar(x = xlab, type = "character")
+    .assertScalar(x = plotType, type = "character",
+                  validValues = c("histogram", "density"))
 
     plotdf <- as.data.frame(
         SummarizedExperiment::assay(sce, assayToPlot)) %>%
@@ -49,11 +53,26 @@ plotImputationDistribution <- function(sce, assayToPlot, assayImputation,
                 tidyr::gather(key = "sample", value = "imputed", -"pid"),
             by = c("pid", "sample")
         )
-    ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity,
-                                         fill = .data$imputed)) +
-        ggplot2::geom_histogram(bins = 50) +
-        ggplot2::facet_wrap(~ sample) +
-        ggplot2::theme_bw() + ggplot2::labs(x = xlab) +
-        ggplot2::scale_fill_manual(values = c(`TRUE` = "grey",
-                                              `FALSE` = "firebrick1"))
+    if (plotType == "histogram") {
+        ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity,
+                                             fill = .data$imputed)) +
+            ggplot2::geom_histogram(bins = 50) +
+            ggplot2::facet_wrap(~ sample) +
+            ggplot2::theme_bw() + ggplot2::labs(x = xlab) +
+            ggplot2::scale_fill_manual(values = c(`TRUE` = "grey",
+                                                  `FALSE` = "firebrick1"))
+    } else if (plotType == "density") {
+        ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity,
+                                             color = .data$imputed)) +
+            ggplot2::geom_density(linewidth = 1.5) +
+            ggplot2::facet_wrap(~ sample) +
+            ggplot2::theme_bw() + ggplot2::labs(x = xlab) +
+            ggplot2::scale_color_manual(values = c(`TRUE` = "grey",
+                                                   `FALSE` = "firebrick1"))
+    } else {
+        ## Should never end up here as the parameter is checked above
+        #nocov start
+        stop("Unknown value of the plotType parameter")
+        #nocov end
+    }
 }