Skip to content

Commit

Permalink
this one is the Yang and Chen SimMSeq simulation method.
Browse files Browse the repository at this point in the history
  • Loading branch information
jakob-wirbel committed May 10, 2024
1 parent 6055aa1 commit 4db9761
Showing 1 changed file with 62 additions and 0 deletions.
62 changes: 62 additions & 0 deletions R/helper_yang_chen.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/Rscript
### SIMBA
### Simulation of Microbiome data with Biological Accuracy
### Morgan Essex - MDC Berlin
### Jakob Wirbel - EMBL Heidelberg
### 2020 GNU GPL 3.0

# Helper functions to simulate data according to Yang and Chen
# Based on the SimulateMSeq function in the GUniFrac package

#' # wrapper for the Yang and Chen
#' @keywords internal
simulate.SimMSeq <- function(feat, meta, sim.out, sim.params){

test.package("GUniFrac")

ab.scale <- sim.params$ab.scale
prop.markers <- sim.params$prop.markers
class.balance <- sim.params$class.balance
repeats <- sim.params$repeats

no.marker.feat <- round(prop.markers * nrow(feat))
el.feat.names <- rownames(feat)
num.sample <- ncol(feat)

libSizesOrig <- colSums(feat)

pb <- progress_bar$new(total = length(ab.scale)*repeats)
for (a in seq_along(ab.scale)){
for (r in seq_len(repeats)){

# create new dataset with their function
sim.chen <- SimulateMSeq(feat,
nSam=ncol(feat), nOTU=nrow(feat),
diff.otu.pct = prop.markers,
diff.otu.mode = 'mix',
diff.otu.direct = 'balanced',
covariate.type = 'binary',
grp.ratio = 0.5/class.balance,
covariate.eff.mean = a,
depth.mu = mean(libSizesOrig))
# get the info out of sim.chen
sim.feat <- sim.chen$otu.tab.sim[rownames(feat),]
marker.idx <- sim.chen$otu.names[which(sim.chen$diff.otu.ind)]
label <- sim.chen$covariate[,1]
label <- label[colnames(sim.feat)]
label[label==0] <- -1

# save data in H5-file
h5.subdir <- paste0('ab', a, '_rep', r)
stopifnot(h5createGroup(sim.out, h5.subdir))
h5write(label, sim.out, paste0(h5.subdir, '/labels'))
h5write(sim.feat, sim.out, paste0(h5.subdir, '/features'))
h5write(marker.idx, sim.out, paste0(h5.subdir, '/marker_idx'))
h5write(colnames(sim.feat), sim.out,
paste(h5.subdir, '/sample_names', sep=''))
h5write(rownames(sim.feat), sim.out,
paste(h5.subdir, '/feature_names', sep=''))
pb$tick()
}
}
}

0 comments on commit 4db9761

Please sign in to comment.