generated from OpenOmics/baseline
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: seperate paired and single ended rules, conditionally import
- Loading branch information
Showing
12 changed files
with
1,023 additions
and
691 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# Calling read coverage as peaks | ||
# ~~~~ | ||
# Paired-end peak calling rules | ||
|
||
|
||
rule MACS2_broad: | ||
input: | ||
chip = join(bam_dir, "{name}.Q5DD_tagAlign.gz"), | ||
txt = join(ppqt_dir, "{name}.Q5DD_tagAlign.ppqt.txt"), | ||
c_option = lambda w: join(bam_dir, f"{chip2input[w.name]}.Q5DD.bam"), | ||
output: | ||
join(macsB_dir, "{name}", "{name}_peaks.broadPeak"), | ||
params: | ||
rname = 'MACS2_broad', | ||
gsize = config['references'][genome]['EFFECTIVEGENOMESIZE'], | ||
macsver = config['tools']['MACSVER'], | ||
flag = lambda w: "-c" if chip2input[w.name] else "", | ||
shell: | ||
""" | ||
module load {params.macsver}; | ||
macs2 callpeak \\ | ||
-t {input.chip} {params.flag} {input.c_option} \\ | ||
-g {params.gsize} \\ | ||
-n {wildcards.name} \\ | ||
--outdir {macsB_dir}/{wildcards.name} \\ | ||
--broad \\ | ||
--broad-cutoff 0.01 \\ | ||
--keep-dup="all" \\ | ||
-f "BAMPE" | ||
""" | ||
|
||
|
||
rule SICER: | ||
input: | ||
chip = lambda w: join(bam_dir, w.name + ".Q5DD.bam"), | ||
fragLen = lambda w: join(qc_dir, name + ".Q5DD.insert_size_metrics.txt"), | ||
c_option = lambda w: join(bam_dir, f"{chip2input[w.name]}.Q5DD.bam"), | ||
output: | ||
bed = join(sicer_dir, "{name}", "{name}_broadpeaks.bed") if has_inputs else [], | ||
params: | ||
rname = 'SICER', | ||
name = "{name}", | ||
sicerver = config['tools']['SICERVER'], | ||
bedtoolsver = config['tools']['BEDTOOLSVER'], | ||
genomever = config['options']['genome'], | ||
this_sicer_dir = join(sicer_dir,"{name}"), | ||
frac = config['references'][genome]['FRAC'], | ||
flag = lambda w: "-c" if chip2input[w.name] else "", | ||
shell: | ||
""" | ||
module load {params.sicerver} | ||
module load {params.bedtoolsver} | ||
if [ ! -d \"""" + str(tmpdir) + """\" ]; then mkdir -p \"""" + str(tmpdir) + """\"; fi | ||
tmp=$(mktemp -d -p \"""" + str(tmpdir) + """\") | ||
trap 'rm -rf "${{tmp}}"' EXIT | ||
MEAN_INSERT_SIZE=$(cat {input.fragLen} | awk '/MEDIAN_INSERT_SIZE/{{f=1;next}} /## HISTOGRAM/{{f=0}} f' | cut -f 6) | ||
mean_insert_size=$(printf "%.0f" $MEAN_INSERT_SIZE) | ||
echo "printing out value of mean-insert-size ${{mean_insert_size}}" | ||
a={input.c_option} | ||
echo "Printing input.c_option ${{a}}" | ||
if [ -f "{input.c_option}" ]; then | ||
# Copying input to tmpdir due to SICER2 | ||
# bam2bed file conversion, if more than | ||
# one sample shares the same IP sample | ||
# than a race condition can occur where | ||
# two jobs can concurrent try to write | ||
# to the same BED file (during bedtools | ||
# bam2bed that sicer calls). | ||
input_bam="$(basename "{input.c_option}")" | ||
cp {input.c_option} ${{tmp}} | ||
echo "paired-end with input... ${{tmp}}/${{input_bam}}" | ||
sicer \\ | ||
-t {input.chip} \\ | ||
-c "${{tmp}}/${{input_bam}}" \\ | ||
-s {params.genomever} \\ | ||
-rt 100 \\ | ||
-w 300 \\ | ||
-f ${{mean_insert_size}} \\ | ||
-egf {params.frac} \\ | ||
-g 600 \\ | ||
-fdr 1E-2 \\ | ||
-cpu 30 \\ | ||
-o ${{tmp}} | ||
mv ${{tmp}}/{params.name}.Q5DD-W300-G600-FDR0.01-island.bed {output.bed}; | ||
mv ${{tmp}}/{params.name}.Q5DD-W300-G600-islands-summary {params.this_sicer_dir} | ||
else | ||
echo "paired-end without input" | ||
sicer \\ | ||
-t {input.chip} \\ | ||
-s {params.genomever} \\ | ||
-rt 100 \\ | ||
-w 300 \\ | ||
-f ${{mean_insert_size}} \\ | ||
-egf {params.frac} \\ | ||
-g 600 \\ | ||
-e 100 \\ | ||
-cpu 30 \\ | ||
-o ${{tmp}} | ||
mv ${{tmp}}/{params.name}.Q5DD-W300-G600.scoreisland {params.this_sicer_dir} | ||
fi | ||
""" | ||
|
||
|
||
rule MACS2_narrow: | ||
input: | ||
chip = join(bam_dir, "{name}.Q5DD_tagAlign.gz"), | ||
txt = join(ppqt_dir, "{name}.Q5DD_tagAlign.ppqt.txt"), | ||
c_option = lambda w: join(bam_dir, f"{chip2input[w.name]}.Q5DD.bam"), | ||
output: | ||
join(macsN_dir, "{name}", "{name}_peaks.narrowPeak"), | ||
params: | ||
rname = 'MACS2_narrow', | ||
gsize = config['references'][genome]['EFFECTIVEGENOMESIZE'], | ||
macsver = config['tools']['MACSVER'], | ||
flag = lambda w: "-c" if chip2input[w.name] else "", | ||
shell: | ||
""" | ||
module load {params.macsver}; | ||
macs2 callpeak \\ | ||
-t {input.chip} {params.flag} {input.c_option} \\ | ||
-g {params.gsize} \\ | ||
-n {wildcards.name} \\ | ||
--outdir {macsN_dir}/{wildcards.name} \\ | ||
-q 0.01 \\ | ||
--keep-dup="all" \\ | ||
-f "BAMPE" | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# Quality control metrics and reports | ||
# ~~~~ | ||
# Paired-end peak qc rules | ||
|
||
rule fastq_screen: | ||
""" | ||
Quality-control step to screen for different sources of contamination. | ||
FastQ Screen compares your sequencing data to a set of different reference | ||
genomes to determine if there is contamination. It allows a user to see if | ||
the composition of your library matches what you expect. | ||
@Input: | ||
Trimmed FastQ files (scatter) | ||
@Output: | ||
FastQ Screen report and logfiles | ||
""" | ||
input: | ||
expand(join(trim_dir, "{name}.R{rn}.trim.fastq.gz"), name=samples, rn=ends) | ||
output: | ||
first_txt = expand(join( | ||
qc_dir, "FQscreen", "{name}.R{rn}.trim_screen.txt"), name=samples, rn=[1, 2] | ||
), | ||
first_png = expand(join( | ||
qc_dir, "FQscreen", "{name}.R{rn}.trim_screen.png"), name=samples, rn=[1, 2] | ||
), | ||
second_txt = expand(join( | ||
qc_dir, "FQscreen2", "{name}.R{rn}.trim_screen.txt"), name=samples, rn=[1, 2] | ||
), | ||
second_png = expand(join( | ||
qc_dir, "FQscreen2", "{name}.R{rn}.trim_screen.png"), name=samples, rn=[1, 2] | ||
) | ||
params: | ||
rname = 'fqscreen', | ||
outdir = join(qc_dir, "FQscreen"), | ||
outdir2 = join(qc_dir, "FQscreen2"), | ||
fastq_screen = config['bin']['FASTQ_SCREEN'], | ||
fastq_screen_config1 = config['shared_resources']['FASTQ_SCREEN_CONFIG_P1'], | ||
fastq_screen_config2 = config['shared_resources']['FASTQ_SCREEN_CONFIG_P2'], | ||
envmodules: | ||
config['tools']['BOWTIE2VER'], | ||
config['tools']['PERLVER'], | ||
threads: | ||
int(allocated("threads", "fastq_screen", cluster)) | ||
shell: | ||
""" | ||
# First pass of contamination screening | ||
{params.fastq_screen} \\ | ||
--conf {params.fastq_screen_config1} \\ | ||
--outdir {params.outdir} \\ | ||
--threads {threads} \\ | ||
--subset 1000000 \\ | ||
--aligner bowtie2 \\ | ||
--force \\ | ||
{input} | ||
# Second pass of contamination screening | ||
{params.fastq_screen} \\ | ||
--conf {params.fastq_screen_config2} \\ | ||
--outdir {params.outdir2} \\ | ||
--threads {threads} \\ | ||
--subset 1000000 \\ | ||
--aligner bowtie2 \\ | ||
--force \\ | ||
{input} | ||
""" | ||
|
||
rule kraken: | ||
""" | ||
Quality-control step to assess for potential sources of microbial contamination. | ||
If there are high levels of microbial contamination, Kraken will provide an | ||
estimation of the taxonomic composition. Kraken is used in conjunction with | ||
Krona to produce an interactive reports. | ||
@Input: | ||
Trimmed FastQ files (scatter) | ||
@Output: | ||
Kraken logfile and interative krona report | ||
""" | ||
input: | ||
fq1 = join(trim_dir, "{name}.R1.trim.fastq.gz"), | ||
fq2 = join(trim_dir, "{name}.R2.trim.fastq.gz") | ||
output: | ||
krakenout = join(kraken_dir, "{name}.trim.kraken_bacteria.out.txt"), | ||
krakentaxa = join(kraken_dir, "{name}.trim.kraken_bacteria.taxa.txt"), | ||
kronahtml = join(kraken_dir, "{name}.trim.kraken_bacteria.krona.html"), | ||
params: | ||
rname = 'kraken', | ||
outdir = kraken_dir, | ||
bacdb = config['shared_resources']['KRAKENBACDB'], | ||
tmpdir = tmpdir | ||
threads: | ||
int(allocated("threads", "kraken_pe", cluster)), | ||
envmodules: | ||
config['tools']['KRAKENVER'], | ||
config['tools']['KRONATOOLSVER'], | ||
shell: | ||
""" | ||
# Setups temporary directory for | ||
# intermediate files with built-in | ||
# mechanism for deletion on exit | ||
if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi | ||
tmp=$(mktemp -d -p "{params.tmpdir}") | ||
trap 'rm -rf "${{tmp}}"' EXIT | ||
# Copy kraken2 db to /lscratch or temp | ||
# location to reduce filesystem strain | ||
cp -rv {params.bacdb} ${{tmp}}/; | ||
kdb_base=$(basename {params.bacdb}) | ||
kraken2 --db ${{tmp}}/${{kdb_base}} \\ | ||
--threads {threads} --report {output.krakentaxa} \\ | ||
--output {output.krakenout} \\ | ||
--gzip-compressed \\ | ||
--paired {input.fq1} {input.fq2} | ||
# Generate Krona Report | ||
cut -f2,3 {output.krakenout} | \\ | ||
ktImportTaxonomy - -o {output.kronahtml} | ||
""" |
Oops, something went wrong.