Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding new DiffBindQC for cfChIP #37

Merged
merged 1 commit into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,11 @@ if assay == "cfchip":
join(workpath,"QC","H3K4me3_cfChIP_signature.txt"),
expand(join(workpath,bw_dir,"{name}.{ext}.RPGC.bw"),name=samples, ext=["sorted", "Q5DD"]),
expand(join(workpath,bw_dir,"{name}.Q5DD.RPGC.inputnorm.bw"), name=sampleswinput),
expand(join(workpath,uropa_dir,'{PeakTool}','{name}_{PeakTool}_uropa_{type}_allhits.txt'),
expand(join(workpath,uropa_dir,'{PeakTool}','{name}_{PeakTool}_uropa_{type}_allhits.txt'),
PeakTool=PeakTools,name=chips,type=["protTSS"]),
expand(join(workpath, "QC", "AllSamples-{PeakTool}", "AllSamples-{PeakTool}_DiffBindQC_TMMcounts.bed"), PeakTool=PeakTools),
expand(join(workpath, uropa_dir, "QC", "AllSamples-macsNarrow_{PeakTool}_uropa_{type}_allhits.txt"),
PeakTool="DiffBindQC", type="protTSS"),
expand(join(workpath,uropa_dir,"promoterTable1",'{PeakTool}_promoter_overlap_summaryTable.txt'),PeakTool=PeakTools),
provided(expand(join(workpath,diffbind_dir,"{group1}_vs_{group2}-{PeakTool}","{group1}_vs_{group2}-{PeakTool}_Diffbind.html"),
zip,group1=zipGroup1,group2=zipGroup2,PeakTool=zipToolC), reps == "yes"),
Expand Down
29 changes: 28 additions & 1 deletion workflow/rules/cfChIP.smk
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ rule cfChIPtool:
rver="R/4.1.0",
toolkit = config['references'][genome]['cfChIP_TOOLS_SRC'],
tmpfile = lambda w: join(workpath,cfTool_subdir2, w.name + ".Q5DD.tagAlign"),
tag=lambda w: temp(join(workpath,bam_dir, w.name+".Q5DD.tagAlign"))
tag=lambda w: temp(join(workpath,bam_dir, w.name+".Q5DD_tagAlign"))
container:
config['images']['cfchip']
shell: """
Expand Down Expand Up @@ -83,3 +83,30 @@ rule promoterTable2:
Rscript -e "source('{params.script2}'); promoterAnnotationWrapper('{output.txt}','{params.gtf}','Reactome')";
"""

rule diffbindQC:
input:
lambda w: [ join(workpath, w.PeakTool, chip, chip + PeakExtensions[w.PeakTool]) for chip in chips ]
output:
html = join(workpath, "QC", "AllSamples-{PeakTool}", "AllSamples-{PeakTool}_DiffBindQC.html"),
bed = join(workpath, "QC", "AllSamples-{PeakTool}", "AllSamples-{PeakTool}_DiffBindQC_TMMcounts.bed"),
params:
rname="diffbindQC",
rscript=join(workpath,"workflow","scripts","DiffBind_v2_cfChIP_QC.Rmd"),
outdir = join(workpath, "QC", "AllSamples-{PeakTool}"),
contrast = "AllSamples",
csvfile = join(workpath, "QC", "AllSamples-{PeakTool}", "AllSamples-{PeakTool}_DiffBind_prep.csv"),
pythonscript = join(workpath,"workflow","scripts","prep_diffbindQC.py"),
PeakExtension= lambda w: PeakExtensions[w.PeakTool],
PeakTool="{PeakTool}",
peakcaller= lambda w: FileTypesDiffBind[w.PeakTool],
container:
config['images']['cfchip']
shell: """
python {params.pythonscript} --wp {workpath} \
--pt {params.PeakTool} --pe {params.PeakExtension} --bd {bam_dir} \
--pc {params.peakcaller} --csv {params.csvfile}
cp {params.rscript} {params.outdir}
cd {params.outdir}
Rscript -e 'rmarkdown::render("DiffBind_v2_cfChIP_QC.Rmd", output_file= "{output.html}",
params=list(csvfile= "{params.csvfile}", contrasts= "{params.contrast}", peakcaller= "{params.PeakTool}"))'
"""
11 changes: 8 additions & 3 deletions workflow/rules/dba.smk
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ PeakExtensions = {
'DiffbindDeseq2': '_Diffbind_Deseq2.bed',
'DiffbindEdgeRBlock': '_Diffbind_EdgeR_block.bed',
'DiffbindDeseq2Block': '_Diffbind_Deseq2_block.bed',
'Genrich': '.narrowPeak'
'Genrich': '.narrowPeak',
'DiffBindQC': '_DiffBindQC_TMMcounts.bed'
}

FileTypesDiffBind = {
Expand Down Expand Up @@ -211,7 +212,9 @@ if assay == "cfchip":
input:
lambda w: [ join(workpath, w.PeakTool1, w.name, w.name + PeakExtensions[w.PeakTool2]) ]
output:
join(workpath, uropa_dir, '{PeakTool1}', '{name}_{PeakTool2}_uropa_{type}_allhits.txt')
txt=join(workpath, uropa_dir, '{PeakTool1}', '{name}_{PeakTool2}_uropa_{type}_allhits.txt'),
bed1=temp(join(workpath, uropa_dir, '{PeakTool1}', '{name}_{PeakTool2}_uropa_{type}_allhits.bed')),
bed2=temp(join(workpath, uropa_dir, '{PeakTool1}', '{name}_{PeakTool2}_uropa_{type}_finalhits.bed')),
params:
rname="uropa",
uropaver = config['tools']['UROPAVER'],
Expand Down Expand Up @@ -241,7 +244,9 @@ else:
input:
lambda w: [ join(workpath, w.PeakTool1, w.name, w.name + PeakExtensions[w.PeakTool2]) ]
output:
join(workpath, uropa_dir, '{PeakTool1}', '{name}_{PeakTool2}_uropa_{type}_allhits.txt')
txt=join(workpath, uropa_dir, '{PeakTool1}', '{name}_{PeakTool2}_uropa_{type}_allhits.txt'),
bed1=temp(join(workpath, uropa_dir, '{PeakTool1}', '{name}_{PeakTool2}_uropa_{type}_allhits.bed')),
bed2=temp(join(workpath, uropa_dir, '{PeakTool1}', '{name}_{PeakTool2}_uropa_{type}_finalhits.bed')),
params:
rname="uropa",
uropaver = config['tools']['UROPAVER'],
Expand Down
11 changes: 8 additions & 3 deletions workflow/scripts/DiffBind_v2_cfChIP_QC.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ if ( sum(samples$class["Condition",] != "") == ncol(samples$class) ) {
minOv <- floor(ncol(samples$class)/3)
}

print(paste0("The minimum number of overlaps is: ", minOv))

if (summits == TRUE) {
DBdataCounts <- dba.count(samples, summits=250, minOverlap = minOv)
} else {
Expand Down Expand Up @@ -147,9 +149,12 @@ consensus2 <- dba.peakset(DBdataCounts, bRetrieve=TRUE) %>% ##extracts TMM-norma
as.data.frame() %>% setNames(vec) %>% arrange(start, end) %>% mutate(Peaks = paste0("Peak",1:nrow(.))) %>%
dplyr::select(1:4, Peaks, samples$samples$SampleID)

outfile <- paste0("Count", "_", contrasts, "_", "TMM_min", minOv, ".csv")
write.csv(consensus2, outfile, row.names = F)
outfile1 <- paste0(contrasts, "-", peakcaller, "_DiffBindQC_TMMcounts.csv")
write.csv(consensus2, outfile1, row.names = F)

outfile2 <- paste0(contrasts, "-", peakcaller, "_DiffBindQC_TMMcounts.bed")
write.table(consensus2[,c("seqnames","start","end","Peaks")],
outfile2, quote=F, sep="\t", row.names=F, col.names=F)

counts_TMM_ALL <- consensus2
rownames(counts_TMM_ALL) <- counts_TMM_ALL$Peaks
Expand All @@ -166,7 +171,7 @@ if (nrow(samples$samples) < 16) {
}
umap_coord <-as.data.frame(umap_coord$layout) %>% setNames(c("UMAP1", "UMAP2"))

outfile <- paste0("UMAP", "_", contrasts, "_", "TMM_min", minOv, ".csv")
outfile <- paste0(contrasts, "-", peakcaller, "_DiffBindQC_UMAP.csv")
write.csv(umap_coord, outfile, row.names = F)
```

Expand Down
51 changes: 51 additions & 0 deletions workflow/scripts/prep_diffbindQC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3

import json
import argparse

parser = argparse.ArgumentParser(description='Script to prepare the DiffBind input csv')
parser.add_argument('--wp',dest='workpath',required=True,help='Full path of the working directory')
parser.add_argument('--pt',dest='peaktool',required=True,help='Name of the the peak calling tool, also the directory where the peak file will be located')
parser.add_argument('--pe',dest='peakextension',required=True,help='The file extension of the peakcall output')
parser.add_argument('--pc',dest='peakcaller',required=True,help='Value for the PeakCaller column of the DiffBind csv')
parser.add_argument('--bd',dest='bamdir',required=True,help='Name of the directory where the bam files are located')
parser.add_argument('--csv',dest='csvfile',required=True,help='Name of the output csv file')

args = parser.parse_args()

with open("config.json","r") as read_file:
config=json.load(read_file)

chip2input = config['project']['peaks']['inputs']
groupdata = config['project']['groups']

tmpIDs = [x for xs in groupdata.values() for x in xs]
Ncounts = [tmpIDs.count(tmp) for tmp in set(tmpIDs)]

samplesheet = [",".join(["SampleID","Condition", "Replicate", "bamReads",
"ControlID", "bamControl", "Peaks", "PeakCaller"])]

count = 1
for chip in chip2input.keys():
if set(Ncounts) == {1}: # if all samples only in one group
for key in groupdata.keys():
if chip in groupdata[key]:
condition = key
replicate = str([ i + 1 for i in range(len(groupdata[condition])) if groupdata[condition][i]== chip ][0])
else:
condition = ""
replicate = str(count)
count = count +1
bamReads = args.workpath + "/" + args.bamdir + "/" + chip + ".Q5DD.bam"
controlID = chip2input[chip]
if controlID != "":
bamControl = args.workpath + "/" + args.bamdir + "/" + controlID + ".Q5DD.bam"
else:
bamControl = ""
peaks = args.workpath + "/" + args.peaktool + "/" + chip + "/" + chip + args.peakextension
samplesheet.append(",".join([chip, condition, replicate, bamReads,
controlID, bamControl, peaks, args.peakcaller]))

f = open(args.csvfile, 'w')
f.write ("\n".join(samplesheet))
f.close()
Loading