Skip to content

Commit

Permalink
Add Feb 01 simulation branch
Browse files Browse the repository at this point in the history
  • Loading branch information
e3bo committed Feb 22, 2018
1 parent 6394f5c commit 7242121
Show file tree
Hide file tree
Showing 17 changed files with 455 additions and 0 deletions.
4 changes: 4 additions & 0 deletions simulation0201/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.git
./data/*
./src/*
./work*/*
4 changes: 4 additions & 0 deletions simulation0201/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*~
.Rhistory
work*
spaero_0.1.0*
48 changes: 48 additions & 0 deletions simulation0201/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
FROM r-base
MAINTAINER Eamon O'Dea <[last name without apostrophe]35@gmail.com>
RUN apt-get update && apt-get install -y -q --no-install-recommends \
openssh-server \
r-cran-coda \
r-cran-curl \
r-cran-desolve \
r-cran-doparallel \
r-cran-evaluate \
r-cran-foreach \
r-cran-formatr \
r-cran-ggplot2 \
r-cran-httr \
r-cran-igraph \
r-cran-iterators \
r-cran-jsonlite \
r-cran-lazyeval \
r-cran-memoise \
r-cran-mime \
r-cran-mvtnorm \
r-cran-nloptr \
r-cran-openssl \
r-cran-praise \
r-cran-rcpp \
r-cran-r6 \
r-cran-testthat \
r-cran-tgp \
r-cran-xtable \
r-cran-yaml
RUN install2.r --error \
earlywarnings \
knitr \
lintr \
np \
pomp \
rmarkdown
COPY ./spaero_0.2.0.1.tar.gz /home/docker/spaero_0.2.0.1.tar.gz
RUN /usr/bin/R CMD check --no-build-vignettes --no-manual /home/docker/spaero_*.tar.gz > /home/docker/check.so \
&& /usr/bin/R CMD INSTALL --build /home/docker/spaero_*.tar.gz
RUN mkdir /var/run/sshd && echo 'docker:screencast' | chpasswd \
&& sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
17 changes: 17 additions & 0 deletions simulation0201/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
The ``simulation`` subdirectory contains:
1) simulation results used to generate the figures in the paper contained in the zip archive.
2) All scripts used to generate the simulation results in the directory ``src``.
3) Docker files (see below)
4) A copy of the R package spaero
5) Some additional non-essential but related scripts in ``src``.

To facilitate reproducibility, the scripts were run within a Docker container based on an image that is available on Docker Hub. The file ``build-image`` was used to create the image based on
the ``Dockerfile``. To reproduce the results on systems with Docker installed, run the bash script ``run-scripts`` the ``simulation`` subdirectory. This script will:

1) Pull the Docker container down from Docker Hub and create a time-stamped child directory
2) Run the Makefile in ``src`` to reproduce the full simulation results in the time-stamped child directory

Reproducing the full output is computationally intensive and not recommended on a desktop computer. The full output comes to about 30GB and includes many intermediate results which are not needed to recreate the figures. The script ``src/make-light.sh`` is included to generate a lighter compressed zip version of the results and only includes the data necessary for figures.

It is also possible to run the scripts without Docker, although one might have to consult the Docker image to determine the correct software versions to use. One important dependency, the spaero
R package for calculating the moving window statistics, is included in this subdirectory. The right sequence in which the scripts must be run to produce the results can be seen in ``src/Makefile``.
1 change: 1 addition & 0 deletions simulation0201/build-image
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker build -t eamon/2016feasibility .
19 changes: 19 additions & 0 deletions simulation0201/run-scripts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

set -e

topd=$(pwd)
tstamp=$(date +%Y-%m-%d_%H-%M-%S)
wkdir="${topd}/work-${tstamp}"
ddir="${topd}/data"
sdir="${topd}/src"
hdir="/home/docker"

mkdir ${wkdir}
chmod 757 ${wkdir}
docker run -v ${ddir}:${hdir}/data:ro \
-v ${sdir}:${hdir}/src:ro \
-v ${wkdir}:${hdir}/work \
--user docker -w ${hdir}/work \
eamon/2016feasibility:v20170829 /bin/bash -c "make -f ../src/Makefile"
chown -R $(id -u):$(id -g) ${wkdir}
Binary file added simulation0201/spaero_0.2.0.1.tar.gz
Binary file not shown.
16 changes: 16 additions & 0 deletions simulation0201/src/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
RS = '/usr/bin/Rscript'
WD = /home/docker/work
SD = /home/docker/src
DD = /home/docker/data
VPATH = $(SD):$(WD)

all: checkpoint-03.rda

checkpoint-03.rda: analyze_observations.R checkpoint-02.rda
$< >$(<F).so 2>$(<F).se

checkpoint-02.rda: simulate_observation.R checkpoint-01.rda
$< >$(<F).so 2>$(<F).se

checkpoint-01.rda: simulate_cases.R
$< >$(<F).so 2>$(<F).se
94 changes: 94 additions & 0 deletions simulation0201/src/analyze_observations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/Rscript

library(foreach)

doParallel::registerDoParallel(cores=5)
RNGkind("L'Ecuyer-CMRG")
set.seed(1234)
parallel::mc.reset.stream()

load("checkpoint-02.rda")

levs <- list()
levs$bandwidth <- c(36, 156)
levs$lag <- 1
analysis_des_mat <- do.call(expand.grid, levs)

calc_auc <- function(predictions, is_null){
test <- !is.na(predictions)
predictions <- predictions[test]
is_null <- is_null[test]
r <- rank(predictions)
r1 <- sum(r[!is_null])
n1 <- sum(!is_null)
n2 <- sum(is_null)
(r1 - n1 * (n1 + 1) / 2) / (n1 * n2)
}

analyze_observations <- function(bandwidth, lag, obs){
get_tau <- function(stat_ts, time){
stats::cor(stat_ts, time, use="pairwise.complete.obs", method="kendall")
}
get_ews_cor <- function(reports, bw=bandwidth, l=lag){
stats <- spaero::get_stats(reports,
center_bandwidth=bw, stat_bandwidth=bw,
center_trend="local_constant",
stat_trend="local_constant",
center_kernel="uniform",
stat_kernel="uniform", lag=l)$stats
taus <- sapply(stats, get_tau, time=seq_along(reports))
list(stats=stats, taus=taus)
}
loop_manip <- function(x){
loop_omult <- function(x) lapply(x, get_ews_cor)
sc <- lapply(x, loop_omult)
taus <- lapply(sc, function(x) sapply(x, "[[", "taus"))
taus <- do.call(cbind, taus)
list(sc=sc, taus=taus)
}
sct <- lapply(obs, loop_manip)
is_null <- c(rep(FALSE, ncol(sct$test$taus)), rep(TRUE, ncol(sct$null$taus)))
preds <- cbind(sct$test$taus, sct$null$taus)

df <- data.frame(is_null, t(preds), row.names=1:ncol(preds))
samp_stat <- function(x, w) {
apply(x[w, -1], 2, calc_auc, is_null=df$is_null[w])
}
bs <- boot::boot(data=df, statistic=samp_stat, R=300)
bssd <- apply(bs$t, 2, sd, na.rm=TRUE)
list(sct=sct, auc=bs$t0, auc_stderr=bssd)
}

analyzed_observations <-
foreach (i=seq(1, nrow(process_des_mat)),
.options.multicore=list(set.seed=TRUE, preschedule=FALSE)) %:%
foreach (j=seq(1, nrow(observation_des_mat)),
.options.multicore=list(set.seed=TRUE, preschedule=FALSE)) %:%
foreach (m=seq(1, nrow(analysis_des_mat)),
.options.multicore=list(set.seed=TRUE, preschedule=FALSE)) %dopar% {
do.call(analyze_observations,
c(analysis_des_mat[m, ],
list(obs=simulated_observations[[i]][[j]])))
}
warnings()

res <- list()
n <- 1
for (i in seq(1, nrow(process_des_mat))){
pvars <- process_des_mat[i, ]
for (j in seq(1, nrow(observation_des_mat))){
ovars <- observation_des_mat[j, ]
for (m in seq(1, nrow(analysis_des_mat))){
auc <- analyzed_observations[[i]][[j]][[m]]$auc
names(auc) <- paste("AUC", names(auc), sep="_")
aucse <- analyzed_observations[[i]][[j]][[m]]$auc_stderr
names(aucse) <- paste(names(auc), "stderr", sep="_")
avars <- analysis_des_mat[m, ]
res[[n]] <- data.frame(c(pvars, ovars, avars, auc, aucse))
n <- n + 1
}
}
}
res <- do.call(rbind, res)

save.image(file="checkpoint-03.rda")
43 changes: 43 additions & 0 deletions simulation0201/src/extract-observations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/Rscript

load("checkpoint-02.rda")
set.seed(1)

res <- list()
for (i in seq(1, nrow(process_des_mat))){
pvars <- process_des_mat[i, ]
jseq <- sample.int(nrow(observation_des_mat), 5) ## to reduce the size of the data
## for (j in seq(1, nrow(observation_des_mat))){
for (j in jseq){
ovars <- observation_des_mat[j, ]
sim_obs <- simulated_observations[[i]][[j]]
stopifnot(length(sim_obs$null) == 1L) ## To simplify code, we
## assume that there is only
## one realization of the
## observation model per
## realization of the
## process model.
stopifnot(length(sim_obs$test) == 1L)
tdata <- sim_obs$test[[1L]]
ndata <- sim_obs$null[[1L]]
make_df <- function(dat, repid, is_emergence){
t <- seq_along(dat)
data.frame(c(pvars, ovars), process_replicate_id = repid,
is_emergence = is_emergence,
cbind(time_index = t, reported_cases = dat))
}
tdf <- Map(make_df, dat = tdata, repid = seq_along(tdata),
is_emergence = TRUE)
tdn <- Map(make_df, dat = ndata, repid = seq_along(ndata),
is_emergence = FALSE)
res <- c(res, tdf, tdn)
}
}

reports_ts <- do.call(rbind, res)
## Delete potentially confusing column
reports_ts$scenario <- NULL
rownames(reports_ts) <- NULL

write.csv(reports_ts, file="reports_ts.csv")
save(reports_ts, file = "reports_ts.RData")
27 changes: 27 additions & 0 deletions simulation0201/src/get-cases.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/Rscript

load("checkpoint-01.rda")

## We'll delete observations that do not occur at multiples of
## ``snapshots`` to save space because they are not used in any
## further analysis

test_snap <- function(x, period){
mod <- x %% period
abs(mod) < .Machine$double.eps^.5
}

get_snaps <- function(df) {
tms <- df$time
snapshots <- c(7, 30)
tests <- lapply(snapshots, function(per) test_snap(tms, period = per))
is_snap <- colSums(do.call(rbind, tests)) > 0
df[is_snap, ]
}

for(i in seq_len(nrow(process_des_mat))){
simulated_procs[[i]]$test <- get_snaps(simulated_procs[[i]]$test)
simulated_procs[[i]]$null <- get_snaps(simulated_procs[[i]]$null)
}

save(simulated_procs, process_des_mat, file="cases.RData")
3 changes: 3 additions & 0 deletions simulation0201/src/get-res.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env Rscript
load("checkpoint-03.rda")
save(res, file = "res.RData")
8 changes: 8 additions & 0 deletions simulation0201/src/make-light.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash

cd $outputdir
../src/get-res.R
../src/get-cases.R
cd ..
lightname="${outputdir}-light"
zip -r $lightname ${outputdir}/ --exclude \*.rda
102 changes: 102 additions & 0 deletions simulation0201/src/simulate_cases.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/Rscript

library(foreach)

doParallel::registerDoParallel(cores=30)
RNGkind("L'Ecuyer-CMRG")
set.seed(3234)
parallel::mc.reset.stream()

levs <- list()
levs$external_forcing <- c(1 / 7)
levs$host_lifetime <- c(70 * 365)


levs$infectious_days <- c(30, 7)
levs$observation_days <- c(20) * 365
levs$population_size <- 10^c(6)
levs$process_reps <- 1000

levs$scenario <- c("emergence")
process_des_mat <- do.call(expand.grid, levs)

EndemicEquilSIR <- function(beta=(R0 * (mu + gamma)), eta=17/5e4,
gamma=365/22, mu=1/50, p=0, R0=17, verbose=FALSE) {
## Computes the endemic equilibrium of an SIR model with immigration
##
## Args:
## beta: numeric. The transmission rate.
## eta: numeric. The rate of infection from outside.
## gamma: numeric. The recovery rate.
## mu: numeric. The birth rate.
## p: numeric. The vaccination uptake.
## R0: numeric. The basic reproduction number.
##
## Returns:
## A list with numeric elements S, I, and R, coresponding to the
## equilibrium fractions of the population in the
## susceptible, infected, and removed states.
stopifnot(c(beta, eta, gamma, p, R0) >= 0)
stopifnot(p <= 1)
a <- - beta * (gamma + mu)
b <- beta * mu * (1 - p) - (gamma + mu) * (eta + mu)
c <- mu * (1 - p) * eta
eq <- (-b - sqrt(b^2 - 4 * a * c)) / (2 * a)
i.star <- ifelse(p == 1, 0, eq)
s.star <- ifelse(p == 1, 0, mu * (1 - p)/ (beta * i.star + eta + mu))
if (verbose) {
ds.star <- mu *(1 - p) - beta * s.star * i.star - eta * s.star - mu * s.star
di.star <- beta * s.star * i.star + eta * s.star - (gamma + mu) * i.star
cat('dS = ', ds.star, '\n')
cat('dI = ', di.star, '\n')
}
return(list(S=s.star, I=i.star, R=1 - i.star - s.star))
}

sample_process <- function(external_forcing, host_lifetime, infectious_days,
observation_days, population_size, process_reps,
scenario){
stopifnot(scenario == "emergence")

times <- seq(0, observation_days)

params <- c(gamma=1 / infectious_days, mu=1 / host_lifetime,
d=1 / host_lifetime, eta=external_forcing / population_size,
beta=0, rho=0.1, S_0=1, I_0=0, R_0=0, N_0=population_size)

beta_critical <- (params["gamma"] + params["d"]) / population_size
initial_fraction_critical <- 0.5
equil <- EndemicEquilSIR(beta = beta_critical * initial_fraction_critical,
eta = params["eta"], gamma = params["gamma"],
mu = params["mu"], p = 0)
params["S_0"] <- equil[["S"]]
params["I_0"] <- equil[["I"]]
params["R_0"] <- equil[["R"]]

covnul <- data.frame(gamma_t=c(0, 0), mu_t=c(0, 0), d_t=c(0, 0),
eta_t=c(0, 0), beta_t=rep(beta_critical, 2) * 0.5,
time=c(0, observation_days))
covalt <- data.frame(gamma_t=c(0, 0), mu_t=c(0, 0), d_t=c(0, 0),
eta_t=c(0, 0),
beta_t=c(beta_critical * 0.5, beta_critical),
time=c(0, observation_days))

simtest <- spaero::create_simulator(times=times, params=params, covar=covalt)
simnull <- spaero::create_simulator(times=times, params=params, covar=covnul)

ret <- list()
do_sim <- function(obj, nsim=process_reps){
cols_to_delete <- c("reports", "gamma_t", "mu_t", "d_t", "eta_t")
ret <- pomp::simulate(obj, nsim=nsim, as.data.frame=TRUE)
ret[, !colnames(ret) %in% cols_to_delete]
}
ret$test <- do_sim(simtest)
ret$null <- do_sim(simnull)
ret
}

simulated_procs <- foreach(i=seq(1, nrow(process_des_mat)),
.options.multicore=list(set.seed=TRUE)) %dopar%
do.call(sample_process, process_des_mat[i, ])

save.image(file="checkpoint-01.rda")
Loading

0 comments on commit 7242121

Please sign in to comment.