Skip to content

Commit

Permalink
Add directory based on Feb 02 simulation branch
Browse files Browse the repository at this point in the history
  • Loading branch information
e3bo committed Feb 22, 2018
1 parent 7242121 commit 66f8fa7
Show file tree
Hide file tree
Showing 17 changed files with 459 additions and 0 deletions.
4 changes: 4 additions & 0 deletions simulation0202/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.git
./data/*
./src/*
./work*/*
4 changes: 4 additions & 0 deletions simulation0202/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*~
.Rhistory
work*
spaero_0.1.0*
48 changes: 48 additions & 0 deletions simulation0202/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
FROM r-base
MAINTAINER Eamon O'Dea <[last name without apostrophe]35@gmail.com>
RUN apt-get update && apt-get install -y -q --no-install-recommends \
openssh-server \
r-cran-coda \
r-cran-curl \
r-cran-desolve \
r-cran-doparallel \
r-cran-evaluate \
r-cran-foreach \
r-cran-formatr \
r-cran-ggplot2 \
r-cran-httr \
r-cran-igraph \
r-cran-iterators \
r-cran-jsonlite \
r-cran-lazyeval \
r-cran-memoise \
r-cran-mime \
r-cran-mvtnorm \
r-cran-nloptr \
r-cran-openssl \
r-cran-praise \
r-cran-rcpp \
r-cran-r6 \
r-cran-testthat \
r-cran-tgp \
r-cran-xtable \
r-cran-yaml
RUN install2.r --error \
earlywarnings \
knitr \
lintr \
np \
pomp \
rmarkdown
COPY ./spaero_0.2.0.1.tar.gz /home/docker/spaero_0.2.0.1.tar.gz
RUN /usr/bin/R CMD check --no-build-vignettes --no-manual /home/docker/spaero_*.tar.gz > /home/docker/check.so \
&& /usr/bin/R CMD INSTALL --build /home/docker/spaero_*.tar.gz
RUN mkdir /var/run/sshd && echo 'docker:screencast' | chpasswd \
&& sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
17 changes: 17 additions & 0 deletions simulation0202/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
The ``simulation`` subdirectory contains:
1) simulation results used to generate the figures in the paper contained in the zip archive.
2) All scripts used to generate the simulation results in the directory ``src``.
3) Docker files (see below)
4) A copy of the R package spaero
5) Some additional non-essential but related scripts in ``src``.

To facilitate reproducibility, the scripts were run within a Docker container based on an image that is available on Docker Hub. The file ``build-image`` was used to create the image based on
the ``Dockerfile``. To reproduce the results on systems with Docker installed, run the bash script ``run-scripts`` the ``simulation`` subdirectory. This script will:

1) Pull the Docker container down from Docker Hub and create a time-stamped child directory
2) Run the Makefile in ``src`` to reproduce the full simulation results in the time-stamped child directory

Reproducing the full output is computationally intensive and not recommended on a desktop computer. The full output comes to about 30GB and includes many intermediate results which are not needed to recreate the figures. The script ``src/make-light.sh`` is included to generate a lighter compressed zip version of the results and only includes the data necessary for figures.

It is also possible to run the scripts without Docker, although one might have to consult the Docker image to determine the correct software versions to use. One important dependency, the spaero
R package for calculating the moving window statistics, is included in this subdirectory. The right sequence in which the scripts must be run to produce the results can be seen in ``src/Makefile``.
1 change: 1 addition & 0 deletions simulation0202/build-image
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker build -t eamon/2016feasibility .
19 changes: 19 additions & 0 deletions simulation0202/run-scripts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

set -e

topd=$(pwd)
tstamp=$(date +%Y-%m-%d_%H-%M-%S)
wkdir="${topd}/work-${tstamp}"
ddir="${topd}/data"
sdir="${topd}/src"
hdir="/home/docker"

mkdir ${wkdir}
chmod 757 ${wkdir}
docker run -v ${ddir}:${hdir}/data:ro \
-v ${sdir}:${hdir}/src:ro \
-v ${wkdir}:${hdir}/work \
--user docker -w ${hdir}/work \
eamon/2016feasibility:v20170829 /bin/bash -c "make -f ../src/Makefile"
chown -R $(id -u):$(id -g) ${wkdir}
Binary file added simulation0202/spaero_0.2.0.1.tar.gz
Binary file not shown.
16 changes: 16 additions & 0 deletions simulation0202/src/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
RS = '/usr/bin/Rscript'
WD = /home/docker/work
SD = /home/docker/src
DD = /home/docker/data
VPATH = $(SD):$(WD)

all: checkpoint-03.rda

checkpoint-03.rda: analyze_observations.R checkpoint-02.rda
$< >$(<F).so 2>$(<F).se

checkpoint-02.rda: simulate_observation.R checkpoint-01.rda
$< >$(<F).so 2>$(<F).se

checkpoint-01.rda: simulate_cases.R
$< >$(<F).so 2>$(<F).se
94 changes: 94 additions & 0 deletions simulation0202/src/analyze_observations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/Rscript

library(foreach)

doParallel::registerDoParallel(cores=5)
RNGkind("L'Ecuyer-CMRG")
set.seed(1234)
parallel::mc.reset.stream()

load("checkpoint-02.rda")

levs <- list()
levs$bandwidth <- c(36, 156)
levs$lag <- 1
analysis_des_mat <- do.call(expand.grid, levs)

calc_auc <- function(predictions, is_null){
test <- !is.na(predictions)
predictions <- predictions[test]
is_null <- is_null[test]
r <- rank(predictions)
r1 <- sum(r[!is_null])
n1 <- sum(!is_null)
n2 <- sum(is_null)
(r1 - n1 * (n1 + 1) / 2) / (n1 * n2)
}

analyze_observations <- function(bandwidth, lag, obs){
get_tau <- function(stat_ts, time){
stats::cor(stat_ts, time, use="pairwise.complete.obs", method="kendall")
}
get_ews_cor <- function(reports, bw=bandwidth, l=lag){
stats <- spaero::get_stats(reports,
center_bandwidth=bw, stat_bandwidth=bw,
center_trend="local_constant",
stat_trend="local_constant",
center_kernel="uniform",
stat_kernel="uniform", lag=l)$stats
taus <- sapply(stats, get_tau, time=seq_along(reports))
list(stats=stats, taus=taus)
}
loop_manip <- function(x){
loop_omult <- function(x) lapply(x, get_ews_cor)
sc <- lapply(x, loop_omult)
taus <- lapply(sc, function(x) sapply(x, "[[", "taus"))
taus <- do.call(cbind, taus)
list(sc=sc, taus=taus)
}
sct <- lapply(obs, loop_manip)
is_null <- c(rep(FALSE, ncol(sct$test$taus)), rep(TRUE, ncol(sct$null$taus)))
preds <- cbind(sct$test$taus, sct$null$taus)

df <- data.frame(is_null, t(preds), row.names=1:ncol(preds))
samp_stat <- function(x, w) {
apply(x[w, -1], 2, calc_auc, is_null=df$is_null[w])
}
bs <- boot::boot(data=df, statistic=samp_stat, R=300)
bssd <- apply(bs$t, 2, sd, na.rm=TRUE)
list(sct=sct, auc=bs$t0, auc_stderr=bssd)
}

analyzed_observations <-
foreach (i=seq(1, nrow(process_des_mat)),
.options.multicore=list(set.seed=TRUE, preschedule=FALSE)) %:%
foreach (j=seq(1, nrow(observation_des_mat)),
.options.multicore=list(set.seed=TRUE, preschedule=FALSE)) %:%
foreach (m=seq(1, nrow(analysis_des_mat)),
.options.multicore=list(set.seed=TRUE, preschedule=FALSE)) %dopar% {
do.call(analyze_observations,
c(analysis_des_mat[m, ],
list(obs=simulated_observations[[i]][[j]])))
}
warnings()

res <- list()
n <- 1
for (i in seq(1, nrow(process_des_mat))){
pvars <- process_des_mat[i, ]
for (j in seq(1, nrow(observation_des_mat))){
ovars <- observation_des_mat[j, ]
for (m in seq(1, nrow(analysis_des_mat))){
auc <- analyzed_observations[[i]][[j]][[m]]$auc
names(auc) <- paste("AUC", names(auc), sep="_")
aucse <- analyzed_observations[[i]][[j]][[m]]$auc_stderr
names(aucse) <- paste(names(auc), "stderr", sep="_")
avars <- analysis_des_mat[m, ]
res[[n]] <- data.frame(c(pvars, ovars, avars, auc, aucse))
n <- n + 1
}
}
}
res <- do.call(rbind, res)

save.image(file="checkpoint-03.rda")
43 changes: 43 additions & 0 deletions simulation0202/src/extract-observations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/Rscript

load("checkpoint-02.rda")
set.seed(1)

res <- list()
for (i in seq(1, nrow(process_des_mat))){
pvars <- process_des_mat[i, ]
jseq <- sample.int(nrow(observation_des_mat), 5) ## to reduce the size of the data
## for (j in seq(1, nrow(observation_des_mat))){
for (j in jseq){
ovars <- observation_des_mat[j, ]
sim_obs <- simulated_observations[[i]][[j]]
stopifnot(length(sim_obs$null) == 1L) ## To simplify code, we
## assume that there is only
## one realization of the
## observation model per
## realization of the
## process model.
stopifnot(length(sim_obs$test) == 1L)
tdata <- sim_obs$test[[1L]]
ndata <- sim_obs$null[[1L]]
make_df <- function(dat, repid, is_emergence){
t <- seq_along(dat)
data.frame(c(pvars, ovars), process_replicate_id = repid,
is_emergence = is_emergence,
cbind(time_index = t, reported_cases = dat))
}
tdf <- Map(make_df, dat = tdata, repid = seq_along(tdata),
is_emergence = TRUE)
tdn <- Map(make_df, dat = ndata, repid = seq_along(ndata),
is_emergence = FALSE)
res <- c(res, tdf, tdn)
}
}

reports_ts <- do.call(rbind, res)
## Delete potentially confusing column
reports_ts$scenario <- NULL
rownames(reports_ts) <- NULL

write.csv(reports_ts, file="reports_ts.csv")
save(reports_ts, file = "reports_ts.RData")
27 changes: 27 additions & 0 deletions simulation0202/src/get-cases.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/Rscript

load("checkpoint-01.rda")

## We'll delete observations that do not occur at multiples of
## ``snapshots`` to save space because they are not used in any
## further analysis

test_snap <- function(x, period){
mod <- x %% period
abs(mod) < .Machine$double.eps^.5
}

get_snaps <- function(df) {
tms <- df$time
snapshots <- c(7, 30)
tests <- lapply(snapshots, function(per) test_snap(tms, period = per))
is_snap <- colSums(do.call(rbind, tests)) > 0
df[is_snap, ]
}

for(i in seq_len(nrow(process_des_mat))){
simulated_procs[[i]]$test <- get_snaps(simulated_procs[[i]]$test)
simulated_procs[[i]]$null <- get_snaps(simulated_procs[[i]]$null)
}

save(simulated_procs, process_des_mat, file="cases.RData")
3 changes: 3 additions & 0 deletions simulation0202/src/get-res.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env Rscript
load("checkpoint-03.rda")
save(res, file = "res.RData")
8 changes: 8 additions & 0 deletions simulation0202/src/make-light.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash

cd $outputdir
../src/get-res.R
../src/get-cases.R
cd ..
lightname="${outputdir}-light"
zip -r $lightname ${outputdir}/ --exclude \*.rda
Loading

0 comments on commit 66f8fa7

Please sign in to comment.