Skip to content

Commit

Permalink
WIP on (no branch): fb99d45 Merge pull request #71 from EPPIcenter/ch…
Browse files Browse the repository at this point in the history
…anges-for-v0.0.9
  • Loading branch information
bgpalmer committed Mar 11, 2023
1 parent fb99d45 commit 3daf178
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
27 changes: 13 additions & 14 deletions R_code/postdada_rearrange.R
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ if (length(non_overlaps_idx) > 0) {

## Reference table for sequences - this is to reduce memory usage in intermediate files
df.sequences <- data.frame(
sid = sprintf("S%d", 1:length(sequences)),
seqid = sprintf("S%d", 1:length(sequences)),
sequences = sequences
)

Expand Down Expand Up @@ -213,7 +213,7 @@ if (!is.null(args$homopolymer_threshold) && args$homopolymer_threshold > 0) {
patt <- c(alignedPattern(aln[num]), alignedSubject(aln[num]))
ind <- sum(str_count(as.character(patt),"-"))
data.frame(
sid = df.sequences[seq1,]$sid,
seqid = df.sequences[seq1,]$seqid,
hapseq = as.character(patt)[2],
refseq = as.character(patt)[1],
refid = names(patt)[1],
Expand Down Expand Up @@ -328,46 +328,45 @@ if (!is.null(args$homopolymer_threshold) && args$homopolymer_threshold > 0) {
}

data.frame(
sid = df_aln[seq1, ]$sid,
seqid = df_aln[seq1, ]$seqid,
refid = df_aln[seq1, ]$refid,
asv_prime = as.character(asv_prime)
)
}

# df_seqs <- inner_join(df_aln, df_masked, by = c("original", "refid", "refseq", "hapseq"))

seqtab.nochim.df <- tibble::rownames_to_column(as.data.frame(t(seqtab.nochim)), "sequences") %>%
inner_join(df.sequences, by = c("sequences")) %>%
select(-c(sequences))

df_seqs <- df_aln %>%
ungroup() %>%
select(sid, refid) %>%
select(seqid, refid) %>%
distinct() %>%
inner_join(
df_masked %>%
select(sid, refid, asv_prime) %>%
select(seqid, refid, asv_prime) %>%
distinct()
, by = c("sid", "refid"))
, by = c("seqid", "refid")) %>%
select(-c(refid)) %>%
distinct()

seqtab.nochim.df <- df_seqs %>%
inner_join(seqtab.nochim.df, by = c("sid")) %>%
group_by(sid, refid, asv_prime) %>%
inner_join(seqtab.nochim.df, by = c("seqid")) %>%
select(-c(seqid)) %>%
group_by(asv_prime) %>%
summarise(across(everything(), sum)) %>%
ungroup() %>%
select(-c(sid,refid))
ungroup()

seqtab.nochim.df <- as.data.frame(seqtab.nochim.df)
rownames(seqtab.nochim.df) <- seqtab.nochim.df$asv_prime
seqtab.nochim.df <- seqtab.nochim.df %>% select(-c(asv_prime))
seqtab.nochim.df$asv_prime <- NULL

seqtab.nochim.df <- as.data.frame(t(seqtab.nochim.df))

seqtab.nochim.df <- tibble::rownames_to_column(seqtab.nochim.df, "sample")
seqtab.nochim.df <- seqtab.nochim.df %>% arrange(sample)
seqtab.nochim.df[seqtab.nochim.df==0]=NA


} else {
seqtab.nochim.df = as.data.frame(seqtab.nochim)
seqtab.nochim.df$sample = rownames(seqtab.nochim)
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ profiles {
apptainer.enabled = true
apptainer.autoMounts = true
docker.enabled = false
process.container = 'file://ampseq_workflow.sif'
process.container = "file://'mad4hatter.sif"
}
sge {
process {
Expand Down

0 comments on commit 3daf178

Please sign in to comment.