-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathmodel_matrix_robust.r
More file actions
58 lines (47 loc) · 1.87 KB
/
model_matrix_robust.r
File metadata and controls
58 lines (47 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# point of this file:
# - use the zscores to create a linear model
library(dplyr)
b = import('base', attach_operators=FALSE)
import('base/operators')
io = import('io')
#' Fits a linear model on Z-scores
#'
#' @param zdata A list with the zscore matrix and index object
#' @return The coefficients matrix [gene x pathway]
zscore2model = function(zdata, hpc_args=NULL) {
b = import('base')
ar = import('array')
st = import('stats')
index = zdata$index
zscores = t(zdata$zscores) * index$sign
# fit model to pathway perturbations
pathway = t(ar$mask(index$pathway)) + 0
pathway["EGFR",] = pathway["EGFR",] + pathway["MAPK",] + pathway["PI3K",]
pathway["TNFa",] = pathway["TNFa",] + pathway["NFkB",]
# mod = st$rlm(zscores ~ 0 + pathway, data=index, min_pts=30, atomic="pathway",
pathway = index$pathway
mod = st$rlm(zscores ~ 0 + pathway, min_pts=30, atomic="pathway",
hpc_args=hpc_args) %>%
transmute(gene = zscores,
pathway = sub("^pathway", "", term),
zscore = estimate,
statistic = statistic) %>%
group_by(gene) %>%
mutate(adj.stat = -log10(abs(statistic))) %>%
ungroup()
zfit = ar$construct(zscore ~ gene + pathway, data=mod)
pval = ar$construct(adj.stat ~ gene + pathway, data=mod)
# filter zfit to only include top 100 genes per pathway
model = zfit
model[apply(pval, 2, function(p) !b$min_mask(p, 100))] = 0
list(assocs=mod, model=model)
}
if (is.null(module_name())) {
ZDATA = commandArgs(TRUE)[1] %or% "../data/zscores.RData"
OUTFILE = commandArgs(TRUE)[2] %or% "model_matrix_robust.RData"
# load speed data, index; filter for train set only
zdata = io$load(ZDATA)
result = zscore2model(zdata, hpc_args=list(n_jobs=20, memory=2048))
# save resulting object
save(result, file=OUTFILE)
}