Skip to content

Commit 8822a0b

Browse files
author
Karin Schork
committed
fix some stuff in volcano plots (make point size larger, donst skip empty factor levels in legend)
1 parent cdb1925 commit 8822a0b

File tree

5 files changed

+86
-102
lines changed

5 files changed

+86
-102
lines changed

R/helper_ttest.R

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#' The path to an .xlsx file containing the input data.
66
#' @param intensity_columns \strong{integer vector} \cr
77
#' The intensity columns of the table.
8-
#'
8+
#'
99
#' @return A list containing an intensity data.frame, an IDs data frame and a factor of the sample groups.
1010
#'
1111
#' @examples
@@ -19,19 +19,19 @@
1919
prepareTtestData <- function(data_path,
2020
intensity_columns
2121
){
22-
22+
2323
D <- openxlsx::read.xlsx(data_path, na.strings = c("NA", "NaN", "Filtered","#NV"))
24-
24+
2525
id <- D[, -intensity_columns]
2626
D <- D[, intensity_columns]
27-
27+
2828
D[D == 0] <- NA
29-
29+
3030
group <- factor(limma::strsplit2(colnames(D), "_")[,1])
3131
number_of_groups <- length(levels(group))
32-
32+
3333
sample <- factor(limma::strsplit2(colnames(D), "_")[,2])
34-
34+
3535
return(list("D" = D, "ID" = id, "group" = group, "number_of_groups" = number_of_groups, "sample" = sample))
3636
}
3737

@@ -59,20 +59,20 @@ prepareTtestData <- function(data_path,
5959
#' @return A factor with the three significance categories.
6060
#' @export
6161
#'
62-
#' @examples
63-
#'
62+
#' @examples
63+
#'
6464

6565
calculate_significance_categories_ttest <- function(p, p_adj, fc, thres_fc=2, thres_p=0.05) {
66-
66+
6767
significance <- dplyr::case_when(
6868
p_adj <= thres_p & p <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p) ~ "significant after FDR correction",
6969
p_adj > thres_p & p <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p) ~ "significant",
7070
(p > thres_p | (fc < thres_fc & fc > 1/thres_fc)) & !is.na(p) ~ "not significant",
7171
is.na(p) ~ NA_character_
7272
)
73-
73+
7474
significance <- factor(significance, levels = c("not significant", "significant", "significant after FDR correction"))
75-
75+
7676
return(significance)
7777
}
7878

@@ -97,18 +97,18 @@ calculate_significance_categories_ttest <- function(p, p_adj, fc, thres_fc=2, th
9797
#' @export
9898
#'
9999
#' @examples
100-
#'
100+
#'
101101

102102
calculate_significance_categories_ANOVA <- function(p_posthoc, p_anova_adj, p_anova, fc, thres_fc=2, thres_p=0.05) {
103-
103+
104104
significance <- dplyr::case_when(
105105
p_anova_adj <= thres_p & p_posthoc <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p_posthoc) & !is.na(p_anova) ~ "significant after FDR correction", # ANOVA significant after FDR, posthoc also significant, fulfills FC threshold
106106
p_anova_adj > thres_p & p_anova <= thres_p & p_posthoc <= thres_p & (fc >= thres_fc | fc <= 1/thres_fc) & !is.na(p_posthoc) & !is.na(p_anova) ~ "significant", # ANOVA significant before FDR, posthoc also significant, fulfills FC threshold
107107
(p_anova > thres_p | p_posthoc > thres_p | (fc < thres_fc & fc > 1/thres_fc)) & !is.na(p_posthoc) & !is.na(p_anova) ~ "not significant", # ANOVA not significant or posthoc not significant or FC does not fulfill threshold
108108
is.na(p_posthoc) | is.na(p_anova) ~ NA_character_
109109
)
110-
110+
111111
significance <- factor(significance, levels = c("not significant", "significant", "significant after FDR correction"))
112-
112+
113113
return(significance)
114-
}
114+
}

R/volcanoPlots.R

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ VolcanoPlot <- function(p,
5555
legend_position = "bottom",
5656
base_size = NULL,
5757
xlim = NULL,
58-
ylim = NULL) {
58+
ylim = NULL, alpha = 0.5,
59+
point_size = 3) {
5960

6061

6162
### transform p-values and fold changes and thresholds
@@ -66,20 +67,24 @@ VolcanoPlot <- function(p,
6667
log_thres_p <- -log(thres_p, base = log_base_p)
6768
log_thres_fc <- log(thres_fc, base = log_base_fc)
6869

69-
RES <- data.frame(transformed_FC = transformed_FC,
70+
RES <<- data.frame(transformed_FC = transformed_FC,
7071
transformed_p = transformed_p,
7172
significance = significance_category)
7273

7374

7475
significance <- RES$significance
7576

7677
plot <- ggplot2::ggplot(data = RES, ggplot2::aes(x = transformed_FC, y = transformed_p, colour = significance)) +
77-
ggplot2::geom_point(alpha = 5/10) +
78-
ggplot2::scale_colour_manual(values = c("not significant" = colour1, "significant" = colour2, "significant after FDR correction" = colour3), drop = FALSE) +
79-
### TODO: axis labels with expressions
80-
### TODO: what if I do not have these categories?
78+
ggplot2::geom_point(alpha = alpha, show.legend = TRUE, size = point_size) +
79+
ggplot2::scale_colour_manual(values = c("not significant" = colour1,
80+
"significant" = colour2,
81+
"significant after FDR correction" = colour3),
82+
drop = FALSE,
83+
na.translate = FALSE) +
84+
8185
ggplot2::xlab(paste0("log",log_base_fc,"(FC)")) +
82-
ggplot2::ylab(paste0("-log",log_base_p,"(p)"))
86+
ggplot2::ylab(paste0("-log",log_base_p,"(p)")) +
87+
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = point_size*1.5)))
8388

8489

8590
if (!is.null(base_size)) {
@@ -88,7 +93,6 @@ VolcanoPlot <- function(p,
8893
plot <- plot + ggplot2::theme_bw()
8994
}
9095

91-
# TODO: legend_position einstellbar machen
9296
plot <- plot + ggplot2::theme(legend.position = legend_position)
9397

9498
### if xlim is not set, use max/min FC and make it symmetric
@@ -193,30 +197,23 @@ VolcanoPlot_ttest <- function(RES,
193197
columnname_p = "p",
194198
columnname_padj = "padj",
195199
columnname_FC = "FC",
200+
thres_p = 0.05,
201+
thres_fc = 2,
196202
log_base_fc = 2,
197203
log_base_p = 10,
198204
is_FC_log = FALSE,
199205
is_p_log = FALSE,
200-
thres_fc = 2,
201-
thres_p = 0.05,
202206
show_thres_line = TRUE,
203-
colour1 = "grey",
204-
colour2 = "black",
205-
colour3 = "orange",
206207
groupname1 = "group1",
207208
groupname2 = "group2",
208-
xlim = NULL,
209-
ylim = NULL,
210-
symmetric_x = FALSE,
211-
legend_position = "bottom",
212209
plot_height = 15,
213210
plot_width = 15,
214211
plot_dpi = 300,
215212
plot_device="pdf",
216213
output_path = NULL,
217214
suffix = NULL,
218-
base_size = NULL,
219-
add_annotation = TRUE){
215+
add_annotation = TRUE,
216+
...) {
220217

221218

222219
# make check work
@@ -247,18 +244,7 @@ VolcanoPlot_ttest <- function(RES,
247244
plot <- VolcanoPlot(p = p,
248245
FC = FC,
249246
significance_category = RES$significance,
250-
log_base_fc = log_base_fc,
251-
log_base_p = log_base_p,
252-
thres_p = thres_p,
253-
thres_fc = thres_fc,
254-
colour1 = colour1,
255-
colour2 = colour2,
256-
colour3 = colour3,
257-
symmetric_x = symmetric_x,
258-
legend_position = legend_position,
259-
base_size = base_size,
260-
xlim = xlim,
261-
ylim = ylim)
247+
...)
262248

263249

264250
## TODO: annotation of number of significant proteins

R/workflow_ttest.R

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ workflow_ttest <- function(data_path,
8787
delog_for_FC = TRUE,
8888
p_value_zeros_to_min = TRUE,
8989

90+
volcano_base_size = 25,
91+
9092
significant_after_FDR = TRUE,
9193
max_valid_values_off = 0,
9294
min_valid_values_on = NULL,
@@ -98,7 +100,7 @@ workflow_ttest <- function(data_path,
98100
plot_dpi = 300,
99101

100102
column_name_protein = "Protein"
101-
){
103+
) {
102104

103105
mess = ""
104106

@@ -111,7 +113,7 @@ workflow_ttest <- function(data_path,
111113

112114
#### Calculate ttest ####
113115

114-
test_results <- ttest(D = data[["D"]], id = data[["ID"]],
116+
test_results <<- ttest(D = data[["D"]], id = data[["ID"]],
115117
group = data[["group"]], sample = data[["sample"]],
116118
paired = paired, var.equal = var.equal,
117119
log_before_test = log_before_test, delog_for_FC = delog_for_FC, log_base = 2,
@@ -152,7 +154,7 @@ workflow_ttest <- function(data_path,
152154

153155
volcano_plot <- VolcanoPlot_ttest(RES = test_results,
154156
columnname_p = "p", columnname_padj = "p.fdr",
155-
columnname_FC = fc_col_name)
157+
columnname_FC = fc_col_name, base_size = volcano_base_size)
156158

157159
ggplot2::ggsave(paste0(output_path, "volcano_plot", suffix, ".", plot_device), plot = volcano_plot,
158160
device = plot_device, height = plot_height, width = plot_width, dpi = plot_dpi)
@@ -163,7 +165,7 @@ workflow_ttest <- function(data_path,
163165

164166
#### Create Histogram for p-values and fold changes ####
165167

166-
histograms <- pvalue_foldchange_histogram(RES = test_results,
168+
histograms <- ProtStatsWF::pvalue_foldchange_histogram(RES = test_results,
167169
columnname_p = "p", columnname_padj = "p.fdr",
168170
columnname_FC = fc_col_name)
169171

@@ -179,7 +181,7 @@ workflow_ttest <- function(data_path,
179181

180182
#### Get significant candidates ####
181183

182-
significance <- calculate_significance_categories_ttest(p = test_results[["p"]],
184+
significance <- ProtStatsWF::calculate_significance_categories_ttest(p = test_results[["p"]],
183185
p_adj = test_results[["p.fdr"]],
184186
fc = test_results[[fc_col_name]])
185187

@@ -198,7 +200,7 @@ workflow_ttest <- function(data_path,
198200

199201
#### Create Boxplots of Biomarker Candidates ####
200202

201-
Boxplots_candidates(D = data[["D"]][candidates, ],
203+
ProtStatsWF::Boxplots_candidates(D = data[["D"]][candidates, ],
202204
protein.names = data[["ID"]][candidates, column_name_protein],
203205
group = data[["group"]],
204206
suffix = suffix,

tests/testthat/_snaps/VolcanoPlot/volcano-plot-anova-file-2.svg

Lines changed: 21 additions & 23 deletions
Loading

0 commit comments

Comments
 (0)