-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtest.R
61 lines (53 loc) · 2.43 KB
/
test.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# This is a test script for the EAGLE, the Environment-Ase Generalized Linear modEl
# Data sampled from the generative model and then we run inference to attempt to
# recover the true "hits"
require(eagle) # load the package
set.seed(1) # for reproducibility
#-------------- generate some synthetic data -------------------------
n.loci=100 # number of loci
n.samples=200 # number of individuals
alt=list() # list of alternative counts
n=list() # list of total counts
xFull=list() # list of design matrices for the alternative models
xNull=list() # list of design matrices for the null models
environmentVar=rnorm(n.samples) # values of the environment variable, e.g. age
# make "true" regression coefficients, most of which are 0
trueBeta=ifelse(runif(n.loci) < 0.05, rgamma(n.loci,shape=2,rate=1), 0.0)
logistic=function(x) 1/(1+exp(-x))
for (i in 1:n.loci){
maf=runif(1)*.4+.1 # MAF between .1 and .5 for this locus
# sample which individuals are heterozygous at this locus assuming HWE
hap1=runif(n.samples)<maf
hap2=runif(n.samples)<maf
hets=xor(hap1,hap2)
numHets=sum(hets)
ones=numeric(numHets)+1.0
x=environmentVar[hets]
xFull[[i]]=cbind(x,ones) # design matrix at this locus for alternative ("full") model
xNull[[i]]=cbind(ones) # # design matrix at this locus for null model
n[[i]]=rpois(numHets,100*rgamma(numHets,shape=2,rate=2)) # sample read depth from overdispersed Poisson (NB?)
n[[i]][ n[[i]]<20 ]=20
p=logistic(trueBeta[i]*x+.3*rnorm(numHets)) # sample underlying probabilities, with overdispersion
alt[[i]]=rbinom(numHets,n[[i]],p) # sample alternative counts
# "min" model: doesn't work well with the way I sample betas (and having no intercept)
# alt[[i]]=pmin(alt[[i]],n[[i]]-alt[[i]])
}
#---------------- run the model --------------------------
s=eagle.settings()
s$debug=F
#s$rev.model=as.integer(3) # local regression
s$rev.model=2
s$normalised.depth=scale(log10(unlist(lapply(n,mean)))) # only required for rev.model=3
s$max.iterations=10000
s$convergence.tolerance=.001
s$coeff.regulariser=0.0
s$learn.rev=T
# initial hyperparamters
s$rep.global.shape=1.0
s$rep.global.rate=0.0033
s$traceEvery=1
system.time( res <- eagle.helper(alt,n,xFull,xNull,s) ) # 4s
# save(file="testNoRerun.RData",res,trueBeta)
cat("p-values for true hits:",res$p.values[trueBeta!=0],"\n")
cat("true hit betas: ",trueBeta[trueBeta!=0],"\n")
#hist( res$p.values[ trueBeta==0.0 ], main="p values for null sites")