Skip to content

Commit

Permalink
fix: ppqt routing and acceptance of tagalign
Browse files Browse the repository at this point in the history
  • Loading branch information
rroutsong committed Jan 2, 2025
1 parent f3bf548 commit 5d05803
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 46 deletions.
Empty file modified bin/DiffBind_v2_Deseq2_block.Rmd
100644 → 100755
Empty file.
Empty file modified bin/DiffBind_v2_EdgeR.Rmd
100644 → 100755
Empty file.
Empty file modified bin/DiffBind_v2_EdgeR_block.Rmd
100644 → 100755
Empty file.
43 changes: 24 additions & 19 deletions bin/ppqt_process.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
#!/usr/bin/env python3

#Purpose: To grab the estimated fragment length from the ppqt output and a small txt with that information. For input files, adding an extra value of 200bp as an alternative.
# Purpose:
# To grab the estimated fragment length from the ppqt output and a
# small txt with that information. For input files, adding an extra
# value of 200bp as an alternative.
import argparse
parser = argparse.ArgumentParser(description='Script to extract the the estimated fragment length from the ppqt output.')
parser.add_argument('-i', required=True,help='Name of the ppqt txt file')
parser.add_argument('-o', required=True,help='Name of the output file')
args = parser.parse_args()

output = args.o
inppqt = args.i

o=open(output,'w')
def main(args):
cln_file = lambda z: z.strip().split()
this_file = list(map(cln_file, open(args.ppqt, "r").readlines()))
ppqt_values = this_file[0][2].split(",")
frag_len = None
for ppqt_value in ppqt_values:
if int(ppqt_value) > 150:
frag_len = ppqt_value
break

file = list(map(lambda z:z.strip().split(),open(inppqt,'r').readlines()))
if frag_len is None:
frag_len = 200
print(frag_len)
return


ppqt_values = file[0][2].split(",")
extenders = []
for ppqt_value in ppqt_values:
if int(ppqt_value) > 150:
extenders.append(ppqt_value)
if len(extenders) > 0:
o.write(extenders[0])
else:
o.write("200")
o.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script to extract the the estimated fragment length from the ppqt output."
)
parser.add_argument("ppqt", help="Name of the ppqt txt file")
args = parser.parse_args()
main(args)
5 changes: 5 additions & 0 deletions config/cluster.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@
"mem": "96g",
"threads": "2"
},
"ppqt_tagalign": {
"threads": "16",
"mem": "24g"
},
"ppqt": {
"threads": "16",
"mem": "24g"
},
"bam2bw": {
Expand Down
2 changes: 1 addition & 1 deletion src/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ def runner(
threads=2,
jobname="pl:master",
submission_script="run.sh",
tmp_dir="/lscratch/$SLURM_JOBID/",
tmp_dir="/lscratch/$SLURM_JOB_ID/",
):
"""Runs the pipeline via selected executor: local or slurm.
If 'local' is selected, the pipeline is executed locally on a compute node/instance.
Expand Down
33 changes: 22 additions & 11 deletions workflow/rules/single/peakcall.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,20 @@ genome = config['options']['genome']
tmpdir = config['options']['tmp_dir']


wildcard_constraints:
# - regex to avoid backslashes in name wild cards
# this corrects routing for `ppqt` and
# `ppqt_tagalign` rules
# - also possible to use negative look around regex:
# "^((?!\/).)*$"
name = "[A-Za-z0-9_-]+"


rule MACS2_broad:
input:
chip = join(bam_dir, "{name}.Q5DD_tagAlign.gz"),
txt = join(ppqt_dir, "{name}.Q5DD_tagAlign.ppqt.txt"),
c_option = lambda w: join(bam_dir, f"{chip2input[w.name]}.Q5DD_tagAlign.gz")
c_option = lambda w: join(bam_dir, f"{chip2input[w.name]}.Q5DD.bam")
if w.name and chip2input[w.name] else [],
output:
join(macsB_dir, "{name}", "{name}_peaks.broadPeak"),
Expand All @@ -22,10 +31,11 @@ rule MACS2_broad:
gsize = config['references'][genome]['EFFECTIVEGENOMESIZE'],
macsver = config['tools']['MACSVER'],
flag = lambda w: "-c" if chip2input[w.name] else "",
frag_len_script = join(bin_path, "ppqt_process.py"),
shell:
"""
module load {params.macsver};
ppqt_len=$(awk '{{print $1}}' {input.txt})
ppqt_len=$({params.frag_len_script} {input.txt})
macs2 callpeak \\
-t {input.chip} {params.flag} {input.c_option} \\
-g {params.gsize} \\
Expand All @@ -41,21 +51,22 @@ rule MACS2_broad:

rule MACS2_narrow:
input:
chip = join(bam_dir, "{name}.Q5DD.bam"),
txt = join(ppqt_dir, "{name}.Q5DD.ppqt.txt"),
c_option = lambda w: join(bam_dir, f"{chip2input[w.name]}.Q5DD_tagAlign.gz")
if w.name and chip2input[w.name] else [],
chip = join(bam_dir, "{name}.Q5DD.bam"),
txt = join(ppqt_dir, "{name}.Q5DD.ppqt.txt"),
c_option = lambda w: join(bam_dir, f"{chip2input[w.name]}.Q5DD.bam")
if w.name and chip2input[w.name] else [],
output:
join(macsN_dir, "{name}", "{name}_peaks.narrowPeak"),
params:
rname = 'MACS2_narrow',
gsize = config['references'][genome]['EFFECTIVEGENOMESIZE'],
macsver = config['tools']['MACSVER'],
flag = lambda w: "-c" if chip2input[w.name] else "",
rname = 'MACS2_narrow',
gsize = config['references'][genome]['EFFECTIVEGENOMESIZE'],
macsver = config['tools']['MACSVER'],
flag = lambda w: "-c" if chip2input[w.name] else "",
frag_len_script = join(bin_path, "ppqt_process.py"),
shell:
"""
module load {params.macsver};
ppqt_len=$(awk '{{print $1}}' {input.txt})
ppqt_len=$({params.frag_len_script} {input.txt})
macs2 callpeak \\
-t {input.chip} {params.flag} {input.c_option} \\
-g {params.gsize} \\
Expand Down
47 changes: 32 additions & 15 deletions workflow/rules/single/trim_align_dedup.smk
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ bw_dir = join(workpath, "bigwig")
ppqt_dir = join(bam_dir, "ppqt")


wildcard_constraints:
# - regex to avoid backslashes in name wild cards
# this corrects routing for `ppqt` and
# `ppqt_tagalign` rules
# - also possible to use negative look around regex:
# "^((?!\/).)*$"
name = "[A-Za-z0-9_-]+"


rule trim:
"""
Data-processing step to remove adapter sequences and perform quality trimming
Expand Down Expand Up @@ -215,25 +224,27 @@ rule ppqt:
output:
ppqt = join(ppqt_dir, "{name}.{ext}.ppqt.txt"),
pdf = join(ppqt_dir, "{name}.{ext}.pdf"),
txt = join(ppqt_dir, "{name}.{ext}.txt"),
params:
rname = "ppqt",
samtoolsver = config['tools']['SAMTOOLSVER'],
rver = config['tools']['RVER'],
scriptPy = join(bin_path, "ppqt_process.py"),
tmpdir = tmpdir,
container:
config['images']['ppqt']
threads:
cluster['ppqt'].get('threads', cluster['__default__']['threads'])
shell:
"""
if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi
tmp=$(mktemp -d -p "{params.tmpdir}")
trap 'rm -rf "${{tmp}}"' EXIT
run_spp.R -c={input.bam} \
-savp={output.pdf} -out={output.ppqt} \
-tmpdir=/lscratch/$SLURM_JOBID -rf
python {params.scriptPy} -i {output.ppqt} -o {output.txt}
run_spp.R \\
-c={input.bam} \\
-savp={output.pdf} \\
-out={output.ppqt} \\
-p={threads} \\
-tmpdir=${{tmp}}
"""


Expand All @@ -245,32 +256,37 @@ rule ppqt_tagalign:
pdf = join(ppqt_dir, "{name}.Q5DD_tagAlign.pdf"),
txt = join(ppqt_dir, "{name}.Q5DD_tagAlign.txt"),
params:
rname = "ppqt",
rname = "ppqt_tagalign",
samtoolsver = config['tools']['SAMTOOLSVER'],
rver = config['tools']['RVER'],
scriptPy = join(bin_path, "ppqt_process.py"),
tmpdir = tmpdir,
container:
config['images']['ppqt']
# "docker://seqeralabs/phantompeakqualtools:latest"
threads:
cluster['ppqt_tagalign'].get('threads', cluster['__default__']['threads'])
shell:
"""
if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi
tmp=$(mktemp -d -p "{params.tmpdir}")
trap 'rm -rf "${{tmp}}"' EXIT
ln -s {input.bam} ${{tmp}}/{wildcards.name}.Q5DD_tagAlign.gz;
run_spp.R -c=${{tmp}}/{wildcards.name}.Q5DD_tagAlign.gz \
-savp={output.pdf} -out={output.ppqt} \
-tmpdir=/lscratch/$SLURM_JOBID -rf
python {params.scriptPy} -i {output.ppqt} -o {output.txt}
ln -s {input.bam} ${{tmp}}/{wildcards.name}.Q5DD_tagAlign.gz
zcat ${{tmp}}/{wildcards.name}.Q5DD_tagAlign.gz | head
run_spp.R \\
-c=${{tmp}}/{wildcards.name}.Q5DD_tagAlign.gz \\
-savp={output.pdf} \\
-out={output.ppqt} \\
-p={threads} \\
-tmpdir=${{tmp}}
"""


rule bam2bw:
"""
bamCoverage converts bams to bigwig files for read visialization
in UCSC Genome Browser or IGV.
@Input:
Files with extensions: sorted.bam and Q5DD.bam,
for all samples (treatment and input controls)
Expand All @@ -288,6 +304,7 @@ rule bam2bw:
name = "{name}",
effectivegenomesize = config['references'][genome]['EFFECTIVEGENOMESIZE'],
tmpdir = tmpdir,
frag_len_script = join(bin_path, "ppqt_process.py"),
threads:
int(allocated("threads", "bam2bw", cluster)),
envmodules:
Expand All @@ -298,7 +315,7 @@ rule bam2bw:
tmp=$(mktemp -d -p "{params.tmpdir}")
trap 'rm -rf "${{tmp}}"' EXIT
ppqt_len=$(awk '{{print $1}}' {input.ppqt})
ppqt_len=$({params.frag_len_script} {input.ppqt})
bamCoverage \\
--bam {input.bam} \\
Expand Down

0 comments on commit 5d05803

Please sign in to comment.