Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ Version: 1.0.0
Authors@R: c(
person("Karin", "Schork", , "karin.schork@rub.de", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-3756-4347")),
person("Katharina", "Neuhaus", , "katharina.neuhaus@rub.de", role = c("aut")))
person("Katharina", "Neuhaus", , "katharina.neuhaus@rub.de", role = c("aut")),
person("Martin", "Eisenacher", , "martin.eisenacher@rub.de", role=c("aut", "fnd")))
Description: This package offers workflows for the statistical analysis of proteomics data.
License: MIT + file LICENSE
Encoding: UTF-8
Expand Down
31 changes: 12 additions & 19 deletions R/WIP_ANOVA.R → R/ANOVA.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@
#' The minimum number of observations per group.
#' @param min_perc_per_group \strong{integer} \cr
#' The minimum ratio of observations per group as an alternative to min_obs_per_group.
#' @param filename \strong{character} \cr
#' The name of the output file.
#'
#' @return A data.frame with p-values and fold changes
#' @export
#'
#' @examples
#'
#'

ANOVA <- function(D,
id = NULL,
Expand All @@ -43,8 +41,7 @@ ANOVA <- function(D,
delog_for_FC = TRUE,
log_base = 2,
min_obs_per_group = 3,
min_perc_per_group = NULL,
filename = "results_ANOVA.xlsx") {
min_perc_per_group = NULL) {

if (!is.null(min_obs_per_group) & !is.null(min_perc_per_group)) stop("Please specify only one of min_obs_per_group or min_perc_per_group.")

Expand All @@ -68,6 +65,7 @@ ANOVA <- function(D,
} else {
### Welch ANOVA (unequal variances)
print("Welch ANOVA")
i <<- 0
RES <- pbapply::pbapply(D, 1, ANOVA_Welch_single_row, group = group, log_before_test = log_before_test,
delog_for_FC = delog_for_FC, min_obs_per_group = min_obs_per_group,
min_perc_per_group = min_perc_per_group,
Expand All @@ -84,7 +82,7 @@ ANOVA <- function(D,
if (!is.null(id)) {
D <- cbind(id, D)
}
openxlsx::write.xlsx(cbind(D, RES), filename, keepNA = TRUE, overwrite = TRUE)
#openxlsx::write.xlsx(cbind(D, RES), filename, keepNA = TRUE, overwrite = TRUE)

return(cbind(D, RES))
}
Expand Down Expand Up @@ -201,9 +199,6 @@ ANOVA_standard_single_row <- function(x,
x2 <- x
}


### TODO: fold changes nur berechnen, wenn für die Gruppe auch ein posthoc test berechnet werden konnte

fcs <- NULL
name.fcs <- NULL
for (j in 1:(nr_groups-1)) {
Expand All @@ -227,11 +222,6 @@ ANOVA_standard_single_row <- function(x,
################################################################################


#### TODO: Derzeit müssen Factor-levels der Gruppen alphabetisch sortiert sein,
### damit die Ergebnisse des posthoc tests richtig ausgelesen werden können
### TODO: Option einbauen, dass nur getestet wird, wenn alle Gruppen vorhanden sind


#' Repeated measures ANOVA (paired samples)
#'
#' @param x Numeric vector of protein intensities.
Expand Down Expand Up @@ -344,8 +334,6 @@ ANOVA_repeatedMeasurements_single_row <- function(x,
x2 <- x
}


### TODO: fold changes nur berechnen, wenn für die Gruppe auch ein posthoc test berechnet werden konnte
fcs <- NULL
name.fcs <- NULL
for (j in 1:(nr_groups - 1)) {
Expand Down Expand Up @@ -441,7 +429,14 @@ ANOVA_Welch_single_row <- function(x,
}

model <- stats::lm(intensity ~ group, data = D_tmp_naomit)
ANOVA <- suppressMessages(car::Anova(model, white.adjust = TRUE))
ANOVA <- try({suppressMessages(car::Anova(model, white.adjust = TRUE))})

if ("try-error" %in% class(ANOVA)) {
res <- rep(NA, 3 * nr_comparisons + 2)
names(res) <- cnames
return(res)
}

p.anova <- ANOVA$`Pr(>F)`[1]
p.anova.fdr <- NA

Expand All @@ -465,8 +460,6 @@ ANOVA_Welch_single_row <- function(x,
x2 <- x
}


### TODO: fold changes nur berechnen, wenn für die Gruppe auch ein posthoc test berechnet werden konnte
fcs <- NULL
name.fcs <- NULL
for (j in 1:(nr_groups-1)) {
Expand Down
12 changes: 6 additions & 6 deletions R/Boxplots.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,20 @@ Boxplots <- function(D_long,

mess <- ""

if(is.null(use_groups)){
if(is.na(D_long$group[1])){
if (is.null(use_groups)) {
if (is.na(D_long$group[1])) {
use_groups <- FALSE
}
else{use_groups <- TRUE}
}
else{
if(use_groups && is.na(D_long$group[1])){
if (use_groups && is.na(D_long$group[1])) {
use_groups <- FALSE
}
}

# log-transform data if necessary
if(do_log_transformation) {
if (do_log_transformation) {
D_long$value <- log(D_long$value, base = log_base)
}

Expand All @@ -80,12 +80,12 @@ Boxplots <- function(D_long,

pl_boxplot <- pl_boxplot +
ggplot2::theme_bw(base_size = base_size) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust=1)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, vjust = 1, hjust = 1)) +
ggplot2::ylab("Log intensity") + ggplot2::xlab("Sample") +
ggplot2::scale_x_discrete(limits = x_axis, drop = FALSE, na.translate = TRUE)


if (method == "violinplot"){
if (method == "violinplot") {
pl_boxplot <- pl_boxplot + ggplot2::geom_violin()
mess <- paste0("Violin Plot generated ", mess)
}
Expand Down
1 change: 1 addition & 0 deletions R/PCA_Plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ PCA_Plot <- function(D,
message(mess)

return(list("plot" = pl, "D_PCA_plot" = cbind(D_PCA, "Sample" = colnames(D)),
"filtered_D" = filtered_D,
"pca" = pca, "message" = mess))
}

10 changes: 5 additions & 5 deletions R/WIP_Heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
#' @param protein_names_col \strong{integer} \cr
#' The column with protein or gene names, if the names should be plotted.
#' @param na_method \strong{character} \cr
#' The method with which missing values are handeled.
#' The method with which missing values are handeled.
#' Options are "na.omit" (proteins with any missing values will be removed), "impute" (missing values will be imputed) and "keep" (missing values will be kept).
#' Note that clustering may not work when too many missing values are present.
#' @param filtermissings \strong{integer} \cr
#' The threshold for missing values.
#' The threshold for missing values.
#' If a protein has more missing values, it will be filtered out.
#' Note that rows with only 1 or 2 valid values may cause problems with clustering.
#' @param groups \strong{character factor} \cr
Expand Down Expand Up @@ -64,10 +64,10 @@
#' @param ... Further arguments to Heatmap
#'
#' @return A heatmap of the given data.
#' @export
#'
#' @examples
#'
#'
#' @examples
#'

Heatmap_with_groups <- function(D, id, protein_names_col = NULL,
na_method = "na.omit", filtermissings = 2,
Expand Down
16 changes: 8 additions & 8 deletions R/WIP_OnOff_Heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
#' The column in id parameter containing the protein IDs used for mapping.
#'
#' @return A data.frame with the number of valid values per group (absolute and relative) and on/off status
#' @export
#'
#'
#'
# @seealso [Onoff_plus_heatmap()]
#'
#' @examples
#'
#' @examples
#'

calculate_onoff <- function(D, id, group, max_vv_off, min_vv_on, protein_id_col = 1) {

Expand Down Expand Up @@ -81,14 +81,14 @@ calculate_onoff <- function(D, id, group, max_vv_off, min_vv_on, protein_id_col
# The column name of the proteins.
# @param relative \strong{logical} \cr
# If \code{TRUE}, ?
#
#
# @return A data.frame with the number of valid values per group (absolute and relative) and on/off status
# @export
#
#
# @seealso [calculate_onoff()]
#
# @examples
#
# @examples
#

Onoff_plus_heatmap <- function(RES_onoff,
protein_name_column = "Gene.names",
Expand Down
34 changes: 17 additions & 17 deletions R/helper_ttest.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' The path to an .xlsx file containing the input data.
#' @param intensity_columns \strong{integer vector} \cr
#' The intensity columns of the table.
#'
#'
#' @return A list containing an intensity data.frame, an IDs data frame and a factor of the sample groups.
#'
#' @examples
Expand All @@ -19,19 +19,19 @@
prepareTtestData <- function(data_path,
intensity_columns
){

D <- openxlsx::read.xlsx(data_path, na.strings = c("NA", "NaN", "Filtered","#NV"))

id <- D[, -intensity_columns]
D <- D[, intensity_columns]

D[D == 0] <- NA

group <- factor(limma::strsplit2(colnames(D), "_")[,1])
number_of_groups <- length(levels(group))

sample <- factor(limma::strsplit2(colnames(D), "_")[,2])

return(list("D" = D, "ID" = id, "group" = group, "number_of_groups" = number_of_groups, "sample" = sample))
}

Expand Down Expand Up @@ -59,20 +59,20 @@ prepareTtestData <- function(data_path,
#' @return A factor with the three significance categories.
#' @export
#'
#' @examples
#'
#' @examples
#'

calculate_significance_categories_ttest <- function(p, p_adj, fc, thres_fc=2, thres_p=0.05) {

significance <- dplyr::case_when(
p_adj <= thres_p & p <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p) ~ "significant after FDR correction",
p_adj > thres_p & p <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p) ~ "significant",
(p > thres_p | (fc < thres_fc & fc > 1/thres_fc)) & !is.na(p) ~ "not significant",
is.na(p) ~ NA_character_
)

significance <- factor(significance, levels = c("not significant", "significant", "significant after FDR correction"))

return(significance)
}

Expand All @@ -97,18 +97,18 @@ calculate_significance_categories_ttest <- function(p, p_adj, fc, thres_fc=2, th
#' @export
#'
#' @examples
#'
#'

calculate_significance_categories_ANOVA <- function(p_posthoc, p_anova_adj, p_anova, fc, thres_fc=2, thres_p=0.05) {

significance <- dplyr::case_when(
p_anova_adj <= thres_p & p_posthoc <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p_posthoc) & !is.na(p_anova) ~ "significant after FDR correction", # ANOVA significant after FDR, posthoc also significant, fulfills FC threshold
p_anova_adj > thres_p & p_anova <= thres_p & p_posthoc <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p_posthoc) & !is.na(p_anova) ~ "significant", # ANOVA significant before FDR, posthoc also significant, fulfills FC threshold
(p_anova > thres_p | p_posthoc > thres_p | (fc < thres_fc & fc > 1/thres_fc)) & !is.na(p_posthoc) & !is.na(p_anova) ~ "not significant", # ANOVA not significant or posthoc not significant or FC does not fulfill threshold
is.na(p_posthoc) | is.na(p_anova) ~ NA_character_
)

significance <- factor(significance, levels = c("not significant", "significant", "significant after FDR correction"))

return(significance)
}
}
Loading