-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtapnet_packaging.R
256 lines (198 loc) · 12.2 KB
/
tapnet_packaging.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
library(devtools)
setwd("~/Data/repositories/tapnet/tapnet")
document() # process R-functions into .RD files, change namespace
#tools::compactPDF("vignettes/", gs_quality = "ebook")
## Starting from here, the performance seems to vary:
setwd("..")
build("tapnet", args="--compact-vignettes=gs+qpdf", binary=F)
install("tapnet") # install on the computer
devtools::check("tapnet", args="--as-cran")
# I don't get why it returns a warning about compactable PDF: it IS compacted and there is nothing anyone can do about this in the build!
## Using winbuilder (https://win-builder.r-project.org/upload.aspx) to check before submission, I repeatedly get funny problems when using the devtools-developed package. I suspect that some funny temporary file is involved, as the local errors point to var/local/cc/temp...
## Command line building works (mostly) fine.
R CMD build tapnet --compact-vignettes=gs+qpdf
R CMD check tapnet_0.4.tar.gz --as-cran
# %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%
# %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%
# fix things:
# use NON-STANDARDISED trait-matching function, so that Pmax is always 1, no matter which value sigma takes; prevents function to have a maximum at sigma=0
# re-run Tinoco-analysis: can delta be estimated properly (i.e. not as random value, but as optimum)? Do different optimisers yield same value?
# %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%
# %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%
# prepare things:
#save(humm_traits, humm_tree, networks, plant_traits, plant_tree, file="data/Tinoco.rda")
# test things:
library(tapnet)
data(Tinoco)
tap <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[2:3], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[2:3], abun_high=humm_abun[2:3], npems_lat = 4)
system.time(fit <- fit_tapnet(tap, fit.delta = T)) # fits on network 2 and 3
system.time(fitBFGS <- fit_tapnet(tap, fit.delta = T, method="BFGS")) # I_mat ERROR!!!!
system.time(fitCG <- fit_tapnet(tap, fit.delta = T, method="CG")) # worse than default
system.time(fitSANN <- fit_tapnet(tap, fit.delta = T, method="SANN", maxit=1e5)) # 140s
# fitSANN substantially worse in objective than default
#fitDE <- fit_tapnetDE(tap, fit.delta = F) # fits on network 2 and 3
str(gof_tapnet(fit) )
pred1 <- predict_tapnet(fit, abuns=tap$networks[[1]]$abuns) # predict to forest network
cor(as.vector(pred1*sum(tap$networks[[1]]$web)), as.vector(tap$networks[[1]]$web)) # correlation with observed interactions
# slightly less bad with exp-link:
#cor(as.vector(exp(pred1)/sum(exp(pred1))*sum(tap$networks[[1]]$web)), as.vector(tap$networks[[1]]$web)) # correlation with observed interactions
fit
# test TmatchMatrixList option:
data(Tinoco)
tap2 <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[2], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[2], abun_high=humm_abun[2], npems_lat = 4)
# make a mask:
Mask <- list(networks[[2]])
Mask[[1]][,] <- 1
Mask[[1]][10:11, 10:11] <- 0# 1e-8 # give unlikely events a VERY small value (less than 1/prod(dim(web)) )
fit2 <- fit_tapnet(tap2, TmatchMatrixList=Mask)
fit2 # different to without the mask
data(Tinoco)
tap <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[2:3],
traits_low = plant_traits, traits_high = humm_traits, npems_lat = 4)
system.time(fit <- fit_tapnet(tap, hessian=T, method="Nelder")) # fits to networks 2 and 3 only # with SANN: 252 s; with Nelder: s
str(fit)
(diag(solve(fit$opt$hessian)))
# test bjornloglik:
set.seed(2)
M <- matrix(rpois(80*100, 1), 80, 100)
P <- M/sum(M)
l <- sample.int(ncol(M)) # aims to test whether permutating entries makes difference to loglik (no!)
bjornloglik(M[,l], P[,l])
# test loglik_tapnet
fit_tapnet(tap, fit.delta=F, hessian=T)
fit_tapnet(tap, fit.delta=F, obj_function="least squares", hessian=T)
fit_tapnet(tap, fit.delta=F, obj_function="bjorn", hessian=T)
# all three yield rather different estimates; hessians look fine (for fitted parameters)
fit_tapnet(tap, fit.delta=F, obj_function="bjorn", hessian=T, tmatch_type_pem = "otto") # should step with specific error message: works!
# test simulate_tapnet
si <- simulate_tapnet(10, 15, ntraits_nopem = 0, ntraits_pem = 0)
A <- si$networks[[1]]$abuns$low %*% t(si$networks[[1]]$abuns$high)
A <- A/sum(A)
cor(as.vector(A), as.vector(si$networks[[1]]$I_mat)) # this should be 1, as we use neither traits nor PEMs
# shows that simulate_tapnet uses PEMs even when ntraits_pem=0!!
#### wishlist: ####
* add function similar to select_relevant_pems to cut phylotree to species present EVEN when calling "use.all.pems=T" in make_tapnet (wish by Amanda)
* add correlation method option to gof (not only Spearman)
* add DEoptim as optimiser (more robust but still fast, I hope): problem with parameters leading to NaN in I_mat!!
* find data suitable for analysis and include in the package, e.g. https://doi.org/10.5061/dryad.nk98sf7sc (Wang et al., 2020).
* add check for non-integer response?
* make a function for trait matching for phenology: give distribution for tmatch, e.g. Gaussian or uniform; or directly provide a phenology-match-matrix as input
#### use ranger for example ####
library(tapnet)
data(Tinoco)
# Note: use npems_lat = 105, use.all.pems=T to make sure ALL PEMs are being used; otherwise tapnet2df fails.
tap <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[2:3], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[2:3], abun_high=humm_abun[2:3], npems_lat = 105, use.all.pems=T)
tapdats <- tapnet2df(tapnetObject=tap)
head(tapdats, 3)
tap1 <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[1], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[1], abun_high=humm_abun[1], npems_lat = 105, use.all.pems=T)
tapdats1 <- tapnet2df(tapnetObject=tap1)
# ranger
library(ranger)
frf <- ranger(interactions ~., data=tapdats[, -c(1,2,4)])
plot(predict(frf, data=tapdats1)$predictions+1, tapdats1$interactions+1, log="xy")
abline(0,1)
cor(predict(frf, data=tapdats1)$predictions+1, tapdats1$interactions+1)
# mlp
library(RSNNS)
fmlp <- mlp(tapdats[, -c(1:4)], tapdats[, 3], size=c(10, 10, 10, 10))
predict(fmlp, newdata=tapdats1[, -c(1:3)])
#hm; try cito for better defaults?
#### modifying and trialling TAPNET ####
fit_tapnet -> optim -> logLik -> simnetfromtap
#### run without PEMs ####
library(tapnet)
data(Tinoco)
tap <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[3], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[3], abun_high=humm_abun[3], npems_lat = 1)
# fit <- fit_tapnet(tap, fit.delta=F) # fits on network 1, only if npems_lat > 0!!
#fit <- fit_tapnet(tap, fit.delta=F, lambda=1) # fits on network 1
source("tapnet/R/helper_tapnet.R")
source("tapnet/R/simnetfromtap.R")
library(MPSEM)
source("tapnet/R/make_tapnet.R")
source("tapnet/R/fit_tapnet.R")
fit <- fit_tapnet(tap, fit.delta=F, tmatch_type_pem = "no") # fits on network 1, does not use PEMs
fit
tapnet=tap; fit.delta=F; lambda=0; tmatch_type_pem="normal"; ini = NULL; tmatch_type_obs = "normal"# for fit_tapnet
# now go through fit_tapnet up to line 92 (before optim) to produce all variables for use in simnetfromtap
networks <- tap$networks
# now everything is set up to run the loglik for different values of sigma (see below)!
#params = ini; method = "Nelder"; maxit=500; obj_function = "multinom"; hessian=T # now go through logLik, up to line 170 (which calls simnetfromtap), then switch to there
#networks <- tap$networks; traits = tap$networks[[i]]$traits; abuns = tap$networks[[i]]$abuns; pems = tap$networks[[i]]$pems
#tmatch_type_pem = tmatch_type_pem, tmatch_type_obs = tmatch_type_obs
# evaluate logLik for different values of tmatch_width_obs1:
trials <- ini
ell=0
j=1
sigma <- seq(0.1, 10, len=100)
for (i in sigma){
trials[2] <- i
ell[j] <- loglik_tapnet(params=trials, networks=networks, tmatch_type_pem = "no", tmatch_type_obs = "normal", fit.delta=F)
j = j + 1
}
plot(sigma, ell, type="l") # fine for Tinoco 1 and 2
plot(sigma, ell, type="l", ylim=c(2000, 4000)) # very weak optimum for network 3 !!
abline(v=2.77)
#### run fit_tapnet with different random starting values ####
3. make matrix with high matching: does it show a clear optimum when evaluating the loglik?
#### trial embedding ####
library(tapnet)
data(Tinoco)
# Note: use npems_lat = 105, use.all.pems=T to make sure ALL PEMs are being used; otherwise tapnet2df fails.
tap <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[2:3], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[2:3], abun_high=humm_abun[2:3], npems_lat = 105, use.all.pems=T)
tapdats <- tapnet2df(tapnetObject=tap)
head(tapdats, 3)
tap1 <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[1], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[1], abun_high=humm_abun[1], npems_lat = 105, use.all.pems=T)
tapdats1 <- tapnet2df(tapnetObject=tap1)
# using t-stochastic neighbourhood embedding:
library(Rtsne)
emb <- Rtsne(tapdats[,-c(1,2,4)])
plot(emb$Y, asp=1)
# hm; no way to predict from this
# setting input to NA leads to error
# using node2vec: network information only, no covariates!
library(node2vec)
# make edge-with-weights object, removing all 0 links:
x <- tapdats[which(tapdats[,3] > 0), c(1,2,3)]
unique(x[,2]) # 4 letters needed to uniquely identify species
x[, 1] <- sapply(x[,1], substr, 1, 4)
x[, 2] <- sapply(x[,2], substr, 1, 4)
x[, 3] <- log(x[,3] + 1)
head(x)
emb <- node2vecR(x, walk_length=10, p=2, dim=5)
emb
#### trial neural networks ####
library(tapnet)
data(Tinoco)
# Note: use npems_lat = 105, use.all.pems=T to make sure ALL PEMs are being used; otherwise tapnet2df fails.
tap <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[2:3], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[2:3], abun_high=humm_abun[2:3], npems_lat = 105, use.all.pems=T)
tapdats <- tapnet2df(tapnetObject=tap)
head(tapdats, 3)
tap1 <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[1], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[1], abun_high=humm_abun[1], npems_lat = 105, use.all.pems=T)
tapdats1 <- tapnet2df(tapnetObject=tap1)
library(nnet)
fnn <- nnet(x=tapdats[, -c(1:5)], y=tapdats$interactions, linout=T, decay=0.00001, size=20)
preds <- predict(fnn, newdata=tapdats1[, -c(1:5)])
plot(preds, tapdats1$interactions)
abline(0,1)
library(neuralnet)
fnet <- neuralnet(interactions ~ . , data=tapdats[, -c(1,2,4)], hidden=c(10, 7, 10), linear.output=T, stepmax=1e6)
str(fnet)
predict(fnet, newdata=tapdats1[, -c(1,2,4)])
#### trait matching as in Pichler et al. (2019) ####
# https://github.com/MaximilianPi/TraitMatching
library(tapnet)
data(Tinoco)
# Note: use npems_lat = 105, use.all.pems=T to make sure ALL PEMs are being used; otherwise tapnet2df fails.
tap <- make_tapnet(tree_low = plant_tree, tree_high = humm_tree, networks = networks[2:3], traits_low = plant_traits, traits_high = humm_traits, abun_low=plant_abun[2:3], abun_high=humm_abun[2:3], npems_lat = 105, use.all.pems=T)
devtools::install_github(repo = "https://github.com/MaximilianPi/TraitMatching", subdir = "TraitMatching")
library(TraitMatching)
#sim = simulateInteraction(weights = list(main = 0.1, inter = c(0.3,0.3,0.3)))
# sim$A, sim$B, sim$binar()
#community = createCommunity(A, B, Z)
# A and B are species groups with traits, respectively; Z is observed interactions
A <- cbind(abun=tap$networks[[1]]$abuns[[1]], tap$networks[[1]]$traits[[1]], tap$networks[[1]]$pems[[1]][names(tap$networks[[1]]$abuns[[1]]),])
B <- cbind(abun=tap$networks[[1]]$abuns[[2]], tap$networks[[1]]$traits[[2]], tap$networks[[1]]$pems[[2]][names(tap$networks[[1]]$abuns[[2]]),])
Z <- tap$networks[[1]]$web
comm <- createCommunity(a=A, b=B, z=Z, impute=F, log=T)
# does not work; creates empty list and I cannot be bothered to fix it
#fit = runTM(community = comm, method = "RF", iters = 20L)