Skip to content

Commit

Permalink
Merge branch 'develop' into 'master'
Browse files Browse the repository at this point in the history
Release v1.0.0

See merge request tron/tronflow-mutect2!8
  • Loading branch information
Pablo Riesgo Ferreiro committed May 26, 2021
2 parents abee754 + 2320c1f commit 76b9a7d
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 83 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
/shelf/
/workspace.xml
work
.nextflow.log
.nextflow.log*
report.html
timeline.html
trace.txt
Expand Down
16 changes: 13 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

all : clean test check

clean:
rm -rf output
rm -rf work
#rm -rf work
rm -f report.html*
rm -f timeline.html*
rm -f trace.txt*
Expand All @@ -10,5 +13,12 @@ clean:


test:
nextflow main.nf -profile test,conda
nextflow main.nf -profile test,conda --disable_common_germline_filter
nextflow main.nf -profile test,conda --output output/test1
nextflow main.nf -profile test,conda --disable_common_germline_filter --output output/test2
nextflow main.nf -profile test,conda --input_files test_data/test_input_with_replicates.txt --output output/test3


check:
test -s output/test1/sample_name/sample_name.mutect2.vcf || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/sample_name/sample_name.mutect2.vcf || { echo "Missing test 2 output file!"; exit 1; }
test -s output/test3/sample_name_with_replicates/sample_name_with_replicates.mutect2.vcf || { echo "Missing test 3 output file!"; exit 1; }
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ A nextflow (Di Tommaso, 2017) pipeline implementing the Mutect2 (Benjamin, 2019)
```
$ nextflow run tron-bioinformatics/tronflow-mutect2 -profile conda --help
Usage:
nextflow main.nf --input_files input_files
nextflow run tron-bioinformatics/tronflow-mutect2 -profile conda --input_files input_files [--reference reference.fasta]
This workflow is based on the implementation at /code/iCaM/scripts/mutect2_ID.sh
Expand Down
78 changes: 23 additions & 55 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,7 @@ params.cpus = 2
params.disable_common_germline_filter = false

def helpMessage() {
log.info"""
Usage:
nextflow run tron-bioinformatics/tronflow-mutect2 -profile conda --input_files input_files [--reference reference.fasta]
This workflow is based on the implementation at /code/iCaM/scripts/mutect2_ID.sh
Input:
* input_files: the path to a tab-separated values file containing in each row the sample name, tumor bam and normal bam
The input file does not have header!
Example input file:
name1 tumor_bam1 normal_bam1
name2 tumor_bam2 normal_bam2
Optional input:
* reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
* intervals: path to a BED file containing the regions to analyse
* gnomad: path to the gnomad VCF
* NOTE: if any of the above parameters is not provided, default hg19 resources will be used
* output: the folder where to publish output
* memory: the ammount of memory used by each job (default: 16g)
* cpus: the number of CPUs used by each job (default: 2)
* disable_common_germline_filter: disable the use of GnomAD to filter out common variants in the population
from the somatic calls. The GnomAD resource is still required though as this common SNPs are used elsewhere to
calculate the contamination (default: false)
Output:
* Output VCF
* Other intermediate files
"""
log.info params.help_message
}

if (params.help) {
Expand All @@ -56,19 +28,13 @@ if (params.input_files) {
Channel
.fromPath(params.input_files)
.splitCsv(header: ['name', 'tumor_bam', 'normal_bam'], sep: "\t")
.map{ row->
tuple(row.name,
file(row.tumor_bam), file(row.tumor_bam + ".bai"),
file(row.normal_bam), file(row.normal_bam + ".bai")
) }
.map{ row-> tuple(row.name, row.tumor_bam, row.normal_bam) }
.set { input_files }

Channel
.fromPath(params.input_files)
.splitCsv(header: ['name', 'tumor_bam', 'normal_bam'], sep: "\t")
.map{ row->
tuple(row.name,
file(row.tumor_bam), file(row.tumor_bam + ".bai")) }
.map{ row-> tuple(row.name, row.tumor_bam) }
.set { tumor_bams }
} else {
exit 1, "Input file not specified!"
Expand All @@ -78,38 +44,38 @@ process mutect2 {
cpus params.cpus
memory params.memory
tag "${name}"
publishDir "${params.output}", mode: "copy"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, file(tumor_bam), file(tumor_bai), file(normal_bam), file(normal_bai) from input_files
set name, tumor_bam, normal_bam from input_files

output:
set val("${name}"), file("${name}.unfiltered.vcf"), file("${name}.unfiltered.vcf.stats") into unfiltered_vcfs
set val("${name}"), file("${name}.f1r2.tar.gz") into f1r2_stats
set val("${name}"), file("${name}.mutect2.unfiltered.vcf"), file("${name}.mutect2.unfiltered.vcf.stats") into unfiltered_vcfs
set val("${name}"), file("${name}.f1r2.tar.gz") into f1r2_stats

script:
normal_panel_option = params.pon ? "--panel-of-normals ${params.pon}" : ""
germline_filter = params.disable_common_germline_filter ? "" : "--germline-resource ${params.gnomad}"
normal_inputs = normal_bam.split(",").collect({v -> "--input $v"}).join(" ")
tumor_inputs = tumor_bam.split(",").collect({v -> "--input $v"}).join(" ")
"""
gatk --java-options '-Xmx${params.memory}' Mutect2 \
--reference ${params.reference} \
--intervals ${params.intervals} \
${germline_filter} \
${normal_panel_option} \
--input ${normal_bam} \
--normal-sample normal \
--input ${tumor_bam} \
--tumor-sample tumor \
--output ${name}.unfiltered.vcf \
--f1r2-tar-gz ${name}.f1r2.tar.gz
${normal_inputs} --normal-sample normal \
${tumor_inputs} --tumor-sample tumor \
--output ${name}.mutect2.unfiltered.vcf \
--f1r2-tar-gz ${name}.f1r2.tar.gz
"""
}

process learnReadOrientationModel {
cpus params.cpus
memory params.memory
tag "${name}"
publishDir "${params.output}", mode: "copy"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, file(f1r2_stats) from f1r2_stats
Expand All @@ -128,20 +94,21 @@ process pileUpSummaries {
cpus params.cpus
memory params.memory
tag "${name}"
publishDir "${params.output}", mode: "copy"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, file(tumor_bam), file(tumor_bai) from tumor_bams
set name, tumor_bam from tumor_bams

output:
set val("${name}"), file("${name}.pileupsummaries.table") into pileupsummaries

script:
tumor_inputs = tumor_bam.split(",").collect({v -> "--input $v"}).join(" ")
"""
gatk --java-options '-Xmx${params.memory}' GetPileupSummaries \
--intervals ${params.gnomad} \
--variant ${params.gnomad} \
--input ${tumor_bam} \
${tumor_inputs} \
--output ${name}.pileupsummaries.table
"""
}
Expand All @@ -150,7 +117,7 @@ process calculateContamination {
cpus params.cpus
memory params.memory
tag "${name}"
publishDir "${params.output}", mode: "copy"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, file(table) from pileupsummaries
Expand All @@ -170,14 +137,15 @@ process filterCalls {
cpus params.cpus
memory params.memory
tag "${name}"
publishDir "${params.output}", mode: "copy"
publishDir "${params.output}/${name}", mode: "copy"

input:
set name, file(segments_table), file(contamination_table), file(model),
file(unfiltered_vcf), file(vcf_stats) from contaminationTables.join(read_orientation_model).join(unfiltered_vcfs)

output:
set name, file("${name}.vcf") into final_vcfs
set name, val("${params.output}/${name}/${name}.mutect2.vcf") into final_vcfs
file "${name}.mutect2.vcf"

"""
gatk --java-options '-Xmx${params.memory}' FilterMutectCalls \
Expand All @@ -186,7 +154,7 @@ process filterCalls {
--tumor-segmentation ${segments_table} \
--contamination-table ${contamination_table} \
--ob-priors ${model} \
--output ${name}.vcf
--output ${name}.mutect2.vcf
"""
}

Expand Down
78 changes: 56 additions & 22 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,35 @@
* Default config options for all environments.
*/

// Container slug. Stable releases should specify release tag!
// Developmental code should specify :dev
process.container = 'tron-bioinformatics/tronflow-mutect2:0.3.0'
test_bams = [
"sample_name",
"$baseDir/test_data/TESTX_S1_L001.bam",
"$baseDir/test_data/TESTX_S1_L002.bam"]
test_input_file = new File("$baseDir/test_data/test_input.txt")
test_input_file.write(test_bams.join("\t") + "\n")

test_bams_with_replicates = [
"sample_name_with_replicates",
"$baseDir/test_data/TESTX_S1_L001.bam,$baseDir/test_data/TESTX_S1_L001.bam",
"$baseDir/test_data/TESTX_S1_L002.bam,$baseDir/test_data/TESTX_S1_L002.bam"]
test_input_file_with_replicates = new File("$baseDir/test_data/test_input_with_replicates.txt")
test_input_file_with_replicates.write(test_bams_with_replicates.join("\t") + "\n")


profiles {
conda { process.conda = "$baseDir/environment.yml" }
debug { process.beforeScript = 'echo $HOSTNAME' }
test {
params.input_files = "test_data/test_input.txt"
params.input_files = test_input_file
params.reference = "$baseDir/test_data/ucsc.hg19.minimal.fasta"
params.intervals = "$baseDir/test_data/intervals.minimal.bed"
params.gnomad = "$baseDir/test_data/gnomad.minimal.vcf.gz"
params.cpus = 1
params.memory = "2g"
timeline.enabled = false
report.enabled = false
trace.enabled = false
dag.enabled = false
}
}

Expand All @@ -30,29 +45,48 @@ env {
// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

timeline {
enabled = true
//file = "${params.output}/execution_timeline.html"
}
report {
enabled = true
//file = "${params.output}/execution_report.html"
}
trace {
enabled = true
//file = "${params.output}/execution_trace.txt"
}
dag {
enabled = true
//file = "${params.output}/pipeline_dag.svg"
}
VERSION = '1.0.0'
DOI = 'https://zenodo.org/badge/latestdoi/355860788'

manifest {
name = 'TRON-Bioinformatics/tronflow-mutect2'
author = 'Pablo Riesgo Ferreiro'
author = 'Pablo Riesgo-Ferreiro, Özlem Muslu, Luisa Bresadola'
homePage = 'https://github.com/TRON-Bioinformatics/tronflow-mutect2'
description = 'Mutect2 best practices workflow'
mainScript = 'main.nf'
nextflowVersion = '>=19.10.0'
version = '0.3.1'
version = VERSION
doi = DOI
}

params.help_message = """
TronFlow Mutect2 v${VERSION} ${DOI}
Usage:
nextflow run tron-bioinformatics/tronflow-mutect2 -profile conda --input_files input_files [--reference reference.fasta]
This workflow is based on the implementation at /code/iCaM/scripts/mutect2_ID.sh
Input:
* input_files: the path to a tab-separated values file containing in each row the sample name, tumor bam and normal bam
The input file does not have header!
Example input file:
name1 tumor_bam1 normal_bam1
name2 tumor_bam2 normal_bam2
Optional input:
* reference: path to the FASTA genome reference (indexes expected *.fai, *.dict)
* intervals: path to a BED file containing the regions to analyse
* gnomad: path to the gnomad VCF
* NOTE: if any of the above parameters is not provided, default hg19 resources will be used
* output: the folder where to publish output
* memory: the ammount of memory used by each job (default: 16g)
* cpus: the number of CPUs used by each job (default: 2)
* disable_common_germline_filter: disable the use of GnomAD to filter out common variants in the population
from the somatic calls. The GnomAD resource is still required though as this common SNPs are used elsewhere to
calculate the contamination (default: false)
Output:
* Output VCF
* Other intermediate files
"""
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 0 additions & 1 deletion test_data/test_input.txt

This file was deleted.

0 comments on commit 76b9a7d

Please sign in to comment.