forked from vladpetyuk/vp.misc
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlimma_a_b.R
126 lines (114 loc) · 4.44 KB
/
limma_a_b.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#' Wrapper for LIMMA
#'
#' Convenience wrappers for differential analysis with LIMMA.
#'
#' @param eset \code{eSet} (or most likely eset subclass) object
#' @param model.str character; formulation of the model (e.g. \code{"~ a + b"})
#' @param coef.str character; coefficient of interest. One of
#' \code{colnames(pData(eset))}. E.g. \code{"a"}.
#' For \code{limma_a_b}, this must be a character or factor with 2 levels
#' or a numeric vector. For \code{limma_gen}, this can be a character or
#' factor with at least 2 levels or a numeric vector.
#' @param ... arguments passed to \code{\link[limma]{lmFit}}
#'
#' @details
#' \code{limma_a_b} is for either 2-factor ANOVA or linear regression.
#' \code{limma_gen} is a more generic version and will return results
#' for all the coefficients that match \code{coef.str} pattern. It also
#' works the same as \code{limma_a_b} for linear regression.
#'
#' If performing linear regression, the \code{logFC} column contains the slopes
#' of the regression lines and the \code{AveExpr} column contains the
#' y-intercepts.
#'
#' \code{limma_a_b} outputs moderated t-statistics and associated p-values.
#' \code{limma_gen} outputs moderated F-statistics and associated p-values
#' for a set of contrasts. Also, the columns that begin with the
#' \code{coef.str} report the log2 fold-changes. Contrasts are always relative
#' to a reference level (if \code{coef.str} is a factor) or the first unique
#' entry of \code{coef.str} in terms of alphanumeric order.
#'
#' \code{\link[MSnSet.utils]{limma_contrasts}} is another LIMMA wrapper for
#' testing custom contrasts. It has a separate documentation page.
#'
#' @return
#' \code{data.frame}.
#' Basically output of the \code{\link[limma]{topTable}} function.
#'
#' @importFrom Biobase exprs pData
#' @importFrom limma lmFit topTable eBayes
#' @importFrom stats model.matrix
#'
#' @export limma_a_b
#'
#' @seealso
#' \code{\link{limma_contrasts}},
#' \code{\link[limma]{topTable}},
#' \code{\link[limma]{lmFit}}
#'
#' @examples
#' library("MSnSet.utils")
#' data("cptac_oca")
#'
#' # Filter to features detected in at least 30 samples
#' ee <- oca.set
#' ee <- ee[rowSums(!is.na(exprs(ee))) >= 30, ]
#'
#' # Note: only two levels are allowed in this case
#' res1 <- limma_a_b(ee,
#' model.str = "~ PLATINUM.STATUS + AGE",
#' coef.str = "PLATINUM.STATUS")
#' head(res1)
#'
#' # Linear regression
#' res2 <- limma_a_b(ee, model.str = "~ AGE", coef.str = "AGE")
#' head(res2)
#'
#' # Multiple levels - "Immunoreactive" is used as the reference level
#' res3 <- limma_gen(ee,
#' model.str = "~ SUBTYPE",
#' coef.str = "SUBTYPE")
#' head(res3)
#'
limma_a_b <- function(eset, model.str, coef.str, ...){
model.formula <- eval(parse(text=model.str), envir=pData(eset))
design <- model.matrix( model.formula)
coef.str <- grep(coef.str, colnames(design), value = TRUE)
if(length(coef.str) > 1){
stop("Multiple matches for coefficient of interest!")
stop("Most likely it is factor with > 2 levels.")
}
eset <- eset[,as.numeric(rownames(design))]
fit <- lmFit(exprs(eset), design, ...)
fit.smooth <- eBayes(fit)
sig <- topTable(fit.smooth, number=nrow(eset),
sort.by='none', coef=coef.str)
return(sig)
}
#' @export
#' @describeIn limma_a_b More generalized approach. The tested variable can have
#' more than 2 groups, unlike \code{limma_a_b}.
limma_gen <- function(eset, model.str, coef.str, ...){
model.formula <- eval(parse(text=model.str), envir=pData(eset))
design <- model.matrix( model.formula)
# this malfunctions if coefficient of interest coincided with
# suffix of some of the covariates. E.g. Plate and PlateCol.
# Testing for Plate is problematic
# coef.str <- grep(coef.str, colnames(design), value = TRUE)
# a new way
# If coef.str is a factor or character, do this
if (!(coef.str %in% colnames(design))) {
idx <- which(names(attr(design, "contrast")) == gsub("`","",coef.str))
idx <- attr(design, "assign") == idx
coef.str <- colnames(design)[idx]
}
eset <- eset[,as.numeric(rownames(design))]
fit <- lmFit(exprs(eset), design, ...)
fit.smooth <- eBayes(fit)
sig <- topTable(fit.smooth, number=nrow(eset),
sort.by='none', coef=coef.str)
se <- (sqrt(fit.smooth$s2.post) * fit.smooth$stdev.unscaled)[,coef.str,drop=FALSE]
colnames(se) <- paste("SE.", colnames(se), sep = "")
result <- cbind(sig, se)
return(result)
}