Skip to content

Commit

Permalink
adding chip motif analysis tool, MEME rule
Browse files Browse the repository at this point in the history
  • Loading branch information
asyakhl committed Nov 16, 2023
1 parent 6f9bf2d commit ebccfe5
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 5 deletions.
20 changes: 16 additions & 4 deletions config/genome.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
"GENOMECHR": "/data/OpenOmics/references/cfChIP-seek/hg19_basic/Chromsomes/",
"GTFFILE": "/data/OpenOmics/references/cfChIP-seek/hg19_basic/genes.gtf",
"REFLEN": "/data/OpenOmics/references/cfChIP-seek/hg19_basic/indexes/hg19.fa.sizes",
"FRAC": "0.8970265870106838"
"FRAC": "0.8970265870106838",
"MEME_VERTEBRATES_DB": "/fdb/meme/motif_databases/JASPAR/JASPAR2018_CORE_vertebrates_non-redundant.meme",
"MEME_EUKARYOTE_DB": "/fdb/meme/motif_databases/EUKARYOTE/jolma2013.meme",
"MEME_GENOME_DB": "/fdb/meme/motif_databases/HUMAN/HOCOMOCOv11_full_HUMAN_mono_meme_format.meme"
},
"hg38": {
"ALIAS": "hg38",
Expand All @@ -28,7 +31,10 @@
"GENOMECHR": "/data/OpenOmics/references/chrom-seek/hg38_basic/Chromsomes/",
"GTFFILE": "/data/OpenOmics/references/chrom-seek/hg38_basic/genes.gtf",
"REFLEN": "/data/OpenOmics/references/chrom-seek/hg38_basic/indexes/hg38.fa.sizes",
"FRAC": "0.9084767300375779"
"FRAC": "0.9084767300375779",
"MEME_VERTEBRATES_DB": "/fdb/meme/motif_databases/JASPAR/JASPAR2018_CORE_vertebrates_non-redundant.meme",
"MEME_EUKARYOTE_DB": "/fdb/meme/motif_databases/EUKARYOTE/jolma2013.meme",
"MEME_GENOME_DB": "/fdb/meme/motif_databases/HUMAN/HOCOMOCOv11_full_HUMAN_mono_meme_format.meme"
},
"mm10": {
"ALIAS": "mm10",
Expand All @@ -43,7 +49,10 @@
"GENOMECHR": "/data/OpenOmics/references/chrom-seek/mm10_basic/Chromsomes/",
"GTFFILE": "/data/OpenOmics/references/chrom-seek/mm10_basic/genes.gtf",
"REFLEN": "/data/OpenOmics/references/chrom-seek/mm10_basic/indexes/mm10.fa.sizes",
"FRAC": "0.9053190260640642"
"FRAC": "0.9053190260640642",
"MEME_VERTEBRATES_DB": "/fdb/meme/motif_databases/JASPAR/JASPAR2018_CORE_vertebrates_non-redundant.meme",
"MEME_EUKARYOTE_DB": "/fdb/meme/motif_databases/EUKARYOTE/jolma2013.meme",
"MEME_GENOME_DB": "/fdb/meme/motif_databases/MOUSE/HOCOMOCOv11_full_MOUSE_mono_meme_format.meme"
},
"rheMac10": {
"ALIAS": "rheMac10",
Expand All @@ -58,7 +67,10 @@
"GENOMECHR": "",
"GTFFILE": "/data/OpenOmics/references/chrom-seek/rheMac10/Macaca_mulatta.Mmul_10.110.chr.gtf",
"REFLEN": "/data/OpenOmics/references/chrom-seek/rheMac10/Macaca_mulatta.Mmul_10.dna.chr.sizes",
"FRAC": "0.9643910116868353"
"FRAC": "0.9643910116868353",
"MEME_VERTEBRATES_DB": "/fdb/meme/motif_databases/JASPAR/JASPAR2018_CORE_vertebrates_non-redundant.meme",
"MEME_EUKARYOTE_DB": "/fdb/meme/motif_databases/EUKARYOTE/jolma2013.meme",
"MEME_GENOME_DB": ""
}
}
}
6 changes: 5 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,9 @@ if assay == "cfchip":
provided(expand(join(workpath,uropa_dir,diffbind_dir,'{name}_{PeakTool}_uropa_{type}_allhits.txt'),
PeakTool=['DiffbindEdgeR','DiffbindDeseq2'],name=contrasts,type=["protTSS"]), reps == "yes"),
provided(expand(join(workpath,uropa_dir,"promoterTable2",'DiffbindDeseq2_{PeakTool}_promoter_overlap_summaryTable.txt'),
PeakTool=PeakTools),reps == "yes" and contrast)
PeakTool=PeakTools),reps == "yes" and contrast),
expand(join(workpath, "MEME", "{PeakTool}", "{name}_meme", "meme-chip.html"), PeakTool=PeakTools, name=chips),
expand(join(workpath, "MEME", "{PeakTool}", "{name}_ame", "ame.html"), PeakTool=PeakTools, name=chips)

elif assay in ["atac", "chip"]:
rule all:
Expand Down Expand Up @@ -239,6 +241,8 @@ elif assay in ["atac", "chip"]:
provided(expand(join(workpath,bam_dir,ppqt_dir,"{name}.{ext}.ppqt"), name=samples, ext=["sorted", "Q5DD_tagAlign"]), paired_end == False and assay=="chip"),
provided(expand(join(workpath,bam_dir,ppqt_dir,"{name}.{ext}.pdf"), name=samples, ext=["sorted", "Q5DD_tagAlign"]), paired_end == False and assay=="chip"),
provided(expand(join(workpath,bam_dir,ppqt_dir,"{name}.{ext}.ppqt.txt"),name=samples, ext=["sorted", "Q5DD_tagAlign"]), paired_end == False and assay=="chip"),
expand(join(workpath, "MEME", "{PeakTool}", "{name}_meme", "meme-chip.html"), PeakTool=PeakTools, name=chips),
expand(join(workpath, "MEME", "{PeakTool}", "{name}_ame", "ame.html"), PeakTool=PeakTools, name=chips)


#############################
Expand Down
36 changes: 36 additions & 0 deletions workflow/rules/peakcall.smk
Original file line number Diff line number Diff line change
Expand Up @@ -286,4 +286,40 @@ rule SICER:
mv ${{tmp}}/{params.name}-W300-G600.scoreisland {output.bed}
fi
fi
"""

rule MEME:
input:
bed = lambda w: join(workpath, w.PeakTool, w.name, w.name + PeakExtensions[w.PeakTool])
output:
meme_out = join(workpath, "MEME", "{PeakTool}", "{name}_meme", "meme-chip.html"),
ame_out = join(workpath, "MEME", "{PeakTool}", "{name}_ame", "ame.html")
params:
rname='MEME',
ref_fa=config['references'][genome]['GENOME'],
meme_vertebrates_db=config['references'][genome]['MEME_VERTEBRATES_DB'],
meme_euk_db=config['references'][genome]['MEME_EUKARYOTE_DB'],
meme_genome_db=config['references'][genome]['MEME_GENOME_DB'],
oc=join(workpath, "MEME", "{PeakTool}", "{name}"),
tmpdir=tmpdir,
outfa="{name}.fa",
ntasks=int(28)
shell: """
module load meme
module load bedtools
if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi
tmp=$(mktemp -d -p "{params.tmpdir}")
trap 'rm -rf "${{tmp}}"' EXIT
bedtools getfasta -fi {params.ref_fa} -bed {input.bed} -fo ${{tmp}}/{params.outfa}
meme-chip \\
--oc {params.oc}_meme \\
-db {params.meme_vertebrates_db} \\
-meme-searchsize 34000000 \\
-meme-p {params.ntasks} \\
${{tmp}}/{params.outfa}
ame \\
--oc {params.oc}_ame ${{tmp}}/{params.outfa} \\
{params.meme_euk_db} {params.meme_vertebrates_db} {params.meme_genome_db}
"""

0 comments on commit ebccfe5

Please sign in to comment.