Skip to content

Commit

Permalink
fix: finalize validation fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
rroutsong committed Sep 9, 2024
1 parent 74d241b commit 5e847c5
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 27 deletions.
3 changes: 1 addition & 2 deletions bin/DiffBind_v2_Deseq2.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@ params:
# inputs
dateandtime <- format(Sys.time(), "%a %b %d %Y - %X")
csvfile <- params$csvfile
outbase <- dirname(csvfile)
contrasts <- params$contrasts
peakcaller <- params$peakcaller
peak_counts <- params$counts
up_file <- params$up_file
down_file <- params$down_file
list_file <- params$counts
list_file <- params$list_file
# knitr options
knitr::opts_chunk$set(echo = FALSE, include=TRUE, message=FALSE, warning=FALSE, error=FALSE)
Expand Down
10 changes: 5 additions & 5 deletions bin/DiffBind_v2_EdgeR.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -145,30 +145,30 @@ DBReportEdgeR <- dba.report(DBAnalysisEdgeR, method = DBA_EDGER)
Variance of differential peaks only

```{r PCA3, fig.height=5,fig.width=5}
dba.plotPCA(DBAnalysisEdgeR, contrast=1, method=DBA_EdgeR)
dba.plotPCA(DBAnalysisEdgeR, contrast=1, method=DBA_EDGER)
```

## MA plot
"Log concentration" means average concentration across all samples.
Each dot is a consensus peak.

```{r MA_D}
dba.plotMA(DBAnalysisEdgeR, method=DBA_EdgeR)
dba.plotMA(DBAnalysisEdgeR, method=DBA_EDGER)
```

## Volcano plot
Each dot is a consensus peak.

```{r Volcano1}
dba.plotVolcano(DBAnalysisEdgeR, method=DBA_EdgeR)
dba.plotVolcano(DBAnalysisEdgeR, method=DBA_EDGER)
```


## Heatmap: Differential
1000 most significant differential peaks (EdgeR or EdgeR normalized)

```{r heatmap4D}
dba.plotHeatmap(DBAnalysisEdgeR, contrast=1, method=DBA_EdgeR, correlations=FALSE, margin=20, cexRow=1, cexCol=1)
dba.plotHeatmap(DBAnalysisEdgeR, contrast=1, method=DBA_EDGER, correlations=FALSE, margin=20, cexRow=1, cexCol=1)
```

## Top 500 or less differentially bound peaks
Expand All @@ -186,7 +186,7 @@ DT::datatable(data.frame(DBReportEdgeR)[1:i,], rownames=F)
report2 <- dba.report(
DBAnalysisEdgeR,
method = DBA_EdgeR,
method = DBA_EDGER,
th=100,
bNormalized=T,
bFlip=FALSE,
Expand Down
10 changes: 7 additions & 3 deletions bin/DiffBind_v2_EdgeR_block.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -179,14 +179,18 @@ dba.plotHeatmap(DBAnalysisEdgeR, contrast=1, method=DBA_EDGER_BLOCK, correlation

```{r EdgeRReport}
UpPeaks <- DBReportEdgeR[which(DBReportEdgeR$Fold > 0)]
try(rtracklayer::export(UpPeaks, up_file),silent=TRUE)
print("up ")
print(dim(UpPeaks))
rtracklayer::export(UpPeaks, up_file)
DownPeaks <- DBReportEdgeR[which(DBReportEdgeR$Fold < 0)]
try(rtracklayer::export(DownPeaks, down_file),silent=TRUE)
print("down ")
print(dim(DownPeaks))
rtracklayer::export(DownPeaks, down_file)
D2i <- length(DBReportEdgeR)
i <- as.integer(min(c(500, as.integer(max(c(D2i, 1))))))
try(DT::datatable(data.frame(DBReportEdgeR)[1:i,], rownames=F),silent=TRUE)
DT::datatable(data.frame(DBReportEdgeR)[1:i,], rownames=F)
report2 <- dba.report(
DBAnalysisEdgeR,
Expand Down
5 changes: 0 additions & 5 deletions bin/DiffBind_v2_load.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ DBdataCounts <- dba.count(samples, summits=summits_arg, bParallel=T)

# save counts
saveRDS(DBdataCounts, counts)
cp_cmd <- paste("cp ", counts, " /data/OpenOmics/dev/datasets/outputs/w_blocking/")
print(cp_cmd)
system(cp_cmd, wait=TRUE)
head <- system(paste("head ", counts), wait=TRUE)
print(head)

# save peaklist
consensus <- dba.peakset(DBdataCounts, bRetrieve=T)
Expand Down
54 changes: 42 additions & 12 deletions workflow/rules/dba.smk
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Differential binding analysis rules
# ~~~~
import os
import json
from os.path import join
from textwrap import dedent
from scripts.common import allocated, mk_dir_if_not_exist
Expand Down Expand Up @@ -390,7 +389,7 @@ rule diffbind_edger_blocking:
#!/bin/bash
Rscript -e 'rmarkdown::render("{params.blocking_rscript}", output_file="{output.diffbind_block_report}",
params=list(csvfile="{input.csvfile}", peakcaller="{params.this_peaktool}", list_file="{output.full_list}",
contrasts="{params.this_contrast}", counts="{input.peak_counts}"))'
up_file="{output.up_file}", down_file="{output.down_file}", contrasts="{params.this_contrast}", counts="{input.peak_counts}"))'
EOF
chmod +x ${{tmp}}/rscript.sh
Expand All @@ -407,12 +406,25 @@ rule UROPA_prep_in:
lambda w: join(workpath, w.PeakTool1, w.name, w.name + PeakExtensions[w.PeakTool2])
params:
fldr = join(uropa_dir, "{PeakTool1}"),
log:
join(uropa_dir, "{PeakTool1}", "{name}.{PeakTool2}.log")
output:
json = [
join(uropa_dir, "{PeakTool1}", "{name}.{PeakTool2}."+pktype+".json") \
for pktype in peak_types
join(
uropa_dir,
"{PeakTool1}",
"{name}.{PeakTool2}."+pktype+".json"
) for pktype in peak_types
],
reformat_bed = [
join(
uropa_dir,
"{PeakTool1}",
"{name}_{PeakTool2}_uropa_"+pktype+"_input.bed"
) for pktype in peak_types
],
run:
import json, csv
# Dynamically creates UROPA config file
if not os.path.exists(params.fldr):
os.mkdir(params.fldr, mode=0o775)
Expand All @@ -426,13 +438,31 @@ rule UROPA_prep_in:

assert len(input) == len(peak_types)

for peak_type in peak_types:
for i, peak_type in enumerate(peak_types):
json_construct = dict()
json_construct['queries'] = []
json_construct['show_attributes'] = ["gene_id", "gene_name", "gene_type"]
json_construct["priority"] = "Yes"
json_construct['gtf'] = gtf
json_construct['bed'] = input[0]

# reformat to standard bed
rdr = csv.DictReader(input[i], delimiter='\t')
csv_map = {
'seqnames': 'chr',
'start': 'start',
'end': 'stop',
'strand': 'strand'
}
newbed = []
for row in rdr:
row = {csv_map[k.lower()]: v for k, v in row.items() if k.lower() in csv_map}
newbed.append(row)
with open(output.reformat_bed[i], 'w') as fo:
wrt = csv.DictWriter(fo, fieldnames=csv_map.values())
# wrt.writeheader() # bed file wo header row
wrt.writerows(newbed)
json_construct['bed'] = output.reformat_bed[i]

if assay == 'cfchip':
if peak_type == 'protTSS':
for _d in (3000, 10000, 100000):
Expand Down Expand Up @@ -477,18 +507,18 @@ rule UROPA_prep_in:
this_q['distance'] = _d
json_construct['queries'].append(this_q)

with open(output.json[peak_types.index(peak_type)], 'w') as jo:
with open(output.json[i], 'w') as jo:
json.dump(json_construct, jo, indent=4)
jo.close()

if not os.path.exists(output.json[peak_types.index(peak_type)]):
raise FileNotFoundError(output.json[peak_types.index(peak_type)] + " does not exist!")
if not os.path.exists(output.json[i]):
raise FileNotFoundError(output.json[i] + " does not exist!")



rule UROPA:
input:
json = join(uropa_dir, "{PeakTool1}", "{name}.{PeakTool2}.{_type}.json"),
this_json = join(uropa_dir, "{PeakTool1}", "{name}.{PeakTool2}.{_type}.json"),
output:
txt = join(
uropa_dir,
Expand All @@ -509,10 +539,10 @@ rule UROPA:
params:
rname = "uropa",
outroot = join(uropa_dir, "{PeakTool1}", "{name}_{PeakTool2}_uropa_{_type}"),
threads: int(allocated("threads", "UROPA", cluster))
threads: int(allocated("threads", "UROPA", cluster))
container:
config["images"]["uropa"]
shell:
"""
uropa -i {input.json} -p {params.outroot} -l {output.log} -t {threads} -s
uropa -i {input.this_json} -p {params.outroot} -l {output.log} -t {threads} -s
"""

0 comments on commit 5e847c5

Please sign in to comment.