mpc-bioinformatics · ItsMeKathy · Mar 4, 2025 · Jan 27, 2025 · Feb 26, 2025 · Feb 28, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,7 +4,8 @@ Version: 1.0.0
 Authors@R: c(
     person("Karin", "Schork", , "karin.schork@rub.de", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-3756-4347")),
-    person("Katharina", "Neuhaus", , "katharina.neuhaus@rub.de", role = c("aut")))
+    person("Katharina", "Neuhaus", , "katharina.neuhaus@rub.de", role = c("aut")), 
+    person("Martin", "Eisenacher", , "martin.eisenacher@rub.de", role=c("aut", "fnd")))
 Description: This package offers workflows for the statistical analysis of proteomics data.
 License: MIT + file LICENSE
 Encoding: UTF-8

diff --git a/R/WIP_ANOVA.R → R/ANOVA.R b/R/WIP_ANOVA.R → R/ANOVA.R
@@ -24,14 +24,12 @@
 #'                             The minimum number of observations per group.
 #' @param min_perc_per_group   \strong{integer} \cr
 #'                             The minimum ratio of observations per group as an alternative to min_obs_per_group.
-#' @param filename             \strong{character} \cr
-#'                             The name of the output file.
 #'
 #' @return A data.frame with p-values and fold changes
 #' @export
 #'
 #' @examples
-#' 
+#'
 
 ANOVA <- function(D,
                   id = NULL,
@@ -43,8 +41,7 @@ ANOVA <- function(D,
                   delog_for_FC = TRUE,
                   log_base = 2,
                   min_obs_per_group = 3,
-                  min_perc_per_group = NULL,
-                  filename = "results_ANOVA.xlsx") {
+                  min_perc_per_group = NULL) {
 
   if (!is.null(min_obs_per_group) & !is.null(min_perc_per_group)) stop("Please specify only one of min_obs_per_group or min_perc_per_group.")
 
@@ -68,6 +65,7 @@ ANOVA <- function(D,
     } else {
       ### Welch ANOVA (unequal variances)
       print("Welch ANOVA")
+      i <<- 0
       RES <- pbapply::pbapply(D, 1, ANOVA_Welch_single_row, group = group, log_before_test = log_before_test,
                      delog_for_FC = delog_for_FC, min_obs_per_group = min_obs_per_group,
                      min_perc_per_group = min_perc_per_group,
@@ -84,7 +82,7 @@ ANOVA <- function(D,
   if (!is.null(id)) {
     D <- cbind(id, D)
   }
-  openxlsx::write.xlsx(cbind(D, RES), filename, keepNA = TRUE, overwrite = TRUE)
+  #openxlsx::write.xlsx(cbind(D, RES), filename, keepNA = TRUE, overwrite = TRUE)
 
   return(cbind(D, RES))
 }
@@ -201,9 +199,6 @@ ANOVA_standard_single_row <- function(x,
     x2 <- x
   }
 
-
-  ### TODO: fold changes nur berechnen, wenn für die Gruppe auch ein posthoc test berechnet werden konnte
-
   fcs <- NULL
   name.fcs <- NULL
   for (j in 1:(nr_groups-1)) {
@@ -227,11 +222,6 @@ ANOVA_standard_single_row <- function(x,
 ################################################################################
 
 
-#### TODO: Derzeit müssen Factor-levels der Gruppen alphabetisch sortiert sein,
-### damit die Ergebnisse des posthoc tests richtig ausgelesen werden können
-### TODO: Option einbauen, dass nur getestet wird, wenn alle Gruppen vorhanden sind
-
-
 #' Repeated measures ANOVA (paired samples)
 #'
 #' @param x Numeric vector of protein intensities.
@@ -344,8 +334,6 @@ ANOVA_repeatedMeasurements_single_row <- function(x,
     x2 <- x
   }
 
-
-  ### TODO: fold changes nur berechnen, wenn für die Gruppe auch ein posthoc test berechnet werden konnte
   fcs <- NULL
   name.fcs <- NULL
   for (j in 1:(nr_groups - 1)) {
@@ -441,7 +429,14 @@ ANOVA_Welch_single_row <- function(x,
   }
 
   model <- stats::lm(intensity ~ group, data = D_tmp_naomit)
-  ANOVA <- suppressMessages(car::Anova(model, white.adjust = TRUE))
+  ANOVA <- try({suppressMessages(car::Anova(model, white.adjust = TRUE))})
+
+  if ("try-error" %in% class(ANOVA)) {
+    res <- rep(NA, 3 * nr_comparisons + 2)
+    names(res) <- cnames
+    return(res)
+  }
+
   p.anova <- ANOVA$`Pr(>F)`[1]
   p.anova.fdr <- NA
 
@@ -465,8 +460,6 @@ ANOVA_Welch_single_row <- function(x,
     x2 <- x
   }
 
-
-  ### TODO: fold changes nur berechnen, wenn für die Gruppe auch ein posthoc test berechnet werden konnte
   fcs <- NULL
   name.fcs <- NULL
   for (j in 1:(nr_groups-1)) {

diff --git a/R/Boxplots.R b/R/Boxplots.R
@@ -46,20 +46,20 @@ Boxplots <- function(D_long,
 
   mess <- ""
 
-  if(is.null(use_groups)){
-    if(is.na(D_long$group[1])){
+  if (is.null(use_groups)) {
+    if (is.na(D_long$group[1])) {
       use_groups <- FALSE
     }
     else{use_groups <- TRUE}
   }
   else{
-    if(use_groups && is.na(D_long$group[1])){
+    if (use_groups && is.na(D_long$group[1])) {
       use_groups <- FALSE
     }
   }
 
   # log-transform data if necessary
-  if(do_log_transformation) {
+  if (do_log_transformation) {
     D_long$value <- log(D_long$value, base = log_base)
   }
 
@@ -80,12 +80,12 @@ Boxplots <- function(D_long,
 
   pl_boxplot <- pl_boxplot +
     ggplot2::theme_bw(base_size = base_size) +
-    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust=1)) +
+    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust = 1)) +
     ggplot2::ylab("Log intensity") + ggplot2::xlab("Sample") +
     ggplot2::scale_x_discrete(limits = x_axis, drop = FALSE, na.translate = TRUE)
 
 
-  if (method == "violinplot"){
+  if (method == "violinplot") {
     pl_boxplot <- pl_boxplot + ggplot2::geom_violin()
     mess <- paste0("Violin Plot generated ", mess)
   }

diff --git a/R/PCA_Plot.R b/R/PCA_Plot.R
@@ -143,6 +143,7 @@ PCA_Plot <- function(D,
   message(mess)
 
   return(list("plot" = pl, "D_PCA_plot" = cbind(D_PCA, "Sample" = colnames(D)),
+              "filtered_D" = filtered_D,
               "pca" = pca, "message" = mess))
 }
 
diff --git a/R/WIP_Heatmap.R b/R/WIP_Heatmap.R
@@ -9,11 +9,11 @@
 #' @param protein_names_col       \strong{integer} \cr
 #'                                The column with protein or gene names, if the names should be plotted.
 #' @param na_method               \strong{character} \cr
-#'                                The method with which missing values are handeled. 
+#'                                The method with which missing values are handeled.
 #'                                Options are "na.omit" (proteins with any missing values will be removed), "impute" (missing values will be imputed) and "keep" (missing values will be kept).
 #'                                Note that clustering may not work when too many missing values are present.
 #' @param filtermissings          \strong{integer} \cr
-#'                                The threshold for missing values. 
+#'                                The threshold for missing values.
 #'                                If a protein has more missing values, it will be filtered out.
 #'                                Note that rows with only 1 or 2 valid values may cause problems with clustering.
 #' @param groups                  \strong{character factor} \cr
@@ -64,10 +64,10 @@
 #' @param ...                     Further arguments to Heatmap
 #'
 #' @return A heatmap of the given data.
-#' @export
 #'
-#' @examples 
-#' 
+#'
+#' @examples
+#'
 
 Heatmap_with_groups <- function(D, id, protein_names_col = NULL,
                            na_method = "na.omit", filtermissings = 2,

diff --git a/R/WIP_OnOff_Heatmap.R b/R/WIP_OnOff_Heatmap.R
@@ -16,12 +16,12 @@
 #'                         The column in id parameter containing the protein IDs used for mapping.
 #'
 #' @return A data.frame with the number of valid values per group (absolute and relative) and on/off status
-#' @export
-#' 
+#'
+#'
 # @seealso [Onoff_plus_heatmap()]
 #'
-#' @examples 
-#' 
+#' @examples
+#'
 
 calculate_onoff <- function(D, id, group, max_vv_off, min_vv_on, protein_id_col = 1) {
 
@@ -81,14 +81,14 @@ calculate_onoff <- function(D, id, group, max_vv_off, min_vv_on, protein_id_col
 #                              The column name of the proteins.
 # @param relative              \strong{logical} \cr
 #                              If \code{TRUE}, ?
-#                         
+#
 # @return A data.frame with the number of valid values per group (absolute and relative) and on/off status
 # @export
-# 
+#
 # @seealso [calculate_onoff()]
 #
-# @examples 
-# 
+# @examples
+#
 
 Onoff_plus_heatmap <- function(RES_onoff,
                                protein_name_column = "Gene.names",

diff --git a/R/helper_ttest.R b/R/helper_ttest.R
@@ -5,7 +5,7 @@
 #'                            The path to an .xlsx file containing the input data.
 #' @param intensity_columns   \strong{integer vector} \cr
 #'                            The intensity columns of the table.
-#' 
+#'
 #' @return A list containing an intensity data.frame, an IDs data frame and a factor of the sample groups.
 #'
 #' @examples
@@ -19,19 +19,19 @@
 prepareTtestData <- function(data_path,
                              intensity_columns
 ){
-  
+
   D <- openxlsx::read.xlsx(data_path, na.strings = c("NA", "NaN", "Filtered","#NV"))
-  
+
   id <- D[, -intensity_columns]
   D <- D[, intensity_columns]
-  
+
   D[D == 0] <- NA
-  
+
   group <- factor(limma::strsplit2(colnames(D), "_")[,1])
   number_of_groups <- length(levels(group))
-  
+
   sample <- factor(limma::strsplit2(colnames(D), "_")[,2])
-  
+
   return(list("D" = D, "ID" = id, "group" = group, "number_of_groups" = number_of_groups, "sample" = sample))
 }
 
@@ -59,20 +59,20 @@ prepareTtestData <- function(data_path,
 #' @return A factor with the three significance categories.
 #' @export
 #'
-#' @examples 
-#' 
+#' @examples
+#'
 
 calculate_significance_categories_ttest <- function(p, p_adj, fc, thres_fc=2, thres_p=0.05) {
-  
+
   significance <- dplyr::case_when(
     p_adj <= thres_p & p <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p) ~ "significant after FDR correction",
     p_adj > thres_p & p <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p) ~ "significant",
     (p > thres_p | (fc < thres_fc & fc > 1/thres_fc)) & !is.na(p) ~ "not significant",
     is.na(p) ~ NA_character_
   )
-  
+
   significance <- factor(significance, levels = c("not significant", "significant", "significant after FDR correction"))
-  
+
   return(significance)
 }
 
@@ -97,18 +97,18 @@ calculate_significance_categories_ttest <- function(p, p_adj, fc, thres_fc=2, th
 #' @export
 #'
 #' @examples
-#' 
+#'
 
 calculate_significance_categories_ANOVA <- function(p_posthoc, p_anova_adj, p_anova, fc, thres_fc=2, thres_p=0.05) {
-  
+
   significance <- dplyr::case_when(
     p_anova_adj <= thres_p & p_posthoc <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p_posthoc) & !is.na(p_anova) ~ "significant after FDR correction", # ANOVA significant after FDR, posthoc also significant, fulfills FC threshold
     p_anova_adj > thres_p & p_anova <= thres_p & p_posthoc <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p_posthoc) & !is.na(p_anova) ~ "significant", # ANOVA significant before FDR, posthoc also significant, fulfills FC threshold
     (p_anova > thres_p | p_posthoc > thres_p | (fc < thres_fc & fc > 1/thres_fc)) & !is.na(p_posthoc) & !is.na(p_anova) ~ "not significant", # ANOVA not significant or posthoc not significant or FC does not fulfill threshold
     is.na(p_posthoc) | is.na(p_anova) ~ NA_character_
   )
-  
+
   significance <- factor(significance, levels = c("not significant", "significant", "significant after FDR correction"))
-  
+
   return(significance)
-}
+}