diff --git a/conf/modules/annotate_mobile_elements.config b/conf/modules/annotate_mobile_elements.config index aa119729..bed2c0a7 100644 --- a/conf/modules/annotate_mobile_elements.config +++ b/conf/modules/annotate_mobile_elements.config @@ -54,7 +54,7 @@ process { withName: '.*ANNOTATE_MOBILE_ELEMENTS:BCFTOOLS_VIEW_FILTER' { // extend filter with arguments such as --exclude 'INFO/swegen_sva_FRQ > 0.1' - ext.args = { "--apply-filters PASS" } + ext.args = { "--apply-filters PASS --output-type z" } ext.prefix = { "${meta.id}_filter" } } diff --git a/conf/modules/annotate_rhocallviz.config b/conf/modules/annotate_rhocallviz.config new file mode 100644 index 00000000..cfeb8d7e --- /dev/null +++ b/conf/modules/annotate_rhocallviz.config @@ -0,0 +1,54 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// rhocall viz options +// + +process { + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:BCFTOOLS_VIEW' { + ext.prefix = { "${meta.sample}" } + ext.args = { "--output-type z --min-ac 1 --samples ${meta.sample}" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:BCFTOOLS_ROH' { + ext.prefix = { "${meta.sample}" } + ext.args = { "--AF-tag GNOMADAF --skip-indels" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:BCFTOOLS_VIEW_UNCOMPRESS' { + ext.prefix = { "${meta.sample}" } + ext.args = { "--output-type v" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:RHOCALL_VIZ' { + ext.prefix = { "${meta.sample}_rhocallviz" } + ext.args = { "--aftag GNOMADAF --wig" } + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:UCSC_WIGTOBIGWIG' { + ext.prefix = { "${meta.sample}_rhocallviz" } + ext.args = { "-clip" } + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome/${meta.sample}_rhocallviz" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/docs/output.md b/docs/output.md index 37f7d4f8..2f782cf4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -52,6 +52,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [VEP](#vep) - [UPD](#upd) - [Chromograph](#chromograph) + - [Rhocall viz](#rhocall-viz) - [Annotation - SV](#annotation---sv) - [SVDB query](#svdb-query) - [VEP](#vep-1) @@ -403,6 +404,19 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e +#### Rhocall viz + +[Rhocall viz](https://github.com/dnil/rhocall) plots binned zygosity and RHO-regions. + +
+Output files + +- `annotate_snv/genome/_rhocallviz/_rhocallviz.bed`: file containing regions of homozygosity in bed format. +- `annotate_snv/genome/_rhocallviz/_rhocallviz.wig`: file containing the fraction of homozygous SNPs in wig format. +- `annotate_snv/genome/_rhocallviz/_rhocallviz.bw`: file containing the fraction of homozygous SNPs in bigwig format. + +
+ ### Annotation - SV #### SVDB query diff --git a/modules.json b/modules.json index ffdf8a5f..1076a6d6 100644 --- a/modules.json +++ b/modules.json @@ -47,7 +47,7 @@ }, "bcftools/view": { "branch": "master", - "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", "installed_by": ["modules"] }, "bedtools/genomecov": { @@ -358,6 +358,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "rhocall/viz": { + "branch": "master", + "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", + "installed_by": ["modules"] + }, "rtgtools/format": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf index a9d93fa6..5237adc8 100644 --- a/modules/nf-core/bcftools/view/main.nf +++ b/modules/nf-core/bcftools/view/main.nf @@ -14,8 +14,8 @@ process BCFTOOLS_VIEW { path(samples) output: - tuple val(meta), path("*.gz") , emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,9 +26,14 @@ process BCFTOOLS_VIEW { def regions_file = regions ? "--regions-file ${regions}" : "" def targets_file = targets ? "--targets-file ${targets}" : "" def samples_file = samples ? "--samples-file ${samples}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" """ bcftools view \\ - --output ${prefix}.vcf.gz \\ + --output ${prefix}.${extension} \\ ${regions_file} \\ ${targets_file} \\ ${samples_file} \\ @@ -43,9 +48,15 @@ process BCFTOOLS_VIEW { """ stub: + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" """ - touch ${prefix}.vcf.gz + touch ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml index eaa12b56..6baa34a6 100644 --- a/modules/nf-core/bcftools/view/meta.yml +++ b/modules/nf-core/bcftools/view/meta.yml @@ -53,7 +53,7 @@ output: - vcf: type: file description: VCF normalized output file - pattern: "*.{vcf.gz}" + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test.snap b/modules/nf-core/bcftools/view/tests/main.nf.test.snap index 049ac252..b59be932 100644 --- a/modules/nf-core/bcftools/view/tests/main.nf.test.snap +++ b/modules/nf-core/bcftools/view/tests/main.nf.test.snap @@ -7,23 +7,23 @@ "id": "out", "single_end": false }, - "out.vcf.gz:md5,3c47ba1a6aa4ef9b3ad800175814d739" + "out.vcf:md5,1bcbd0eff25d316ba915d06463aab17b" ] ], [ "versions.yml:md5,106d119dde844ec7fee1cdd30828bcdc" ] ], - "timestamp": "2023-11-29T14:27:10.724842996" + "timestamp": "2024-02-05T17:12:20.799849895" }, "sarscov2 - [vcf, tbi], [], [], [] - stub": { "content": [ - "out.vcf.gz", + "out.vcf", [ "versions.yml:md5,106d119dde844ec7fee1cdd30828bcdc" ] ], - "timestamp": "2023-11-29T14:27:17.445846794" + "timestamp": "2024-02-05T16:53:34.652746985" }, "sarscov2 - [vcf, tbi], [], [], []": { "content": [ @@ -33,13 +33,13 @@ "id": "out", "single_end": false }, - "out.vcf.gz:md5,a1e45fe6d2b386fc2611766e5d2937ee" + "out.vcf:md5,8e722884ffb75155212a3fc053918766" ] ], [ "versions.yml:md5,106d119dde844ec7fee1cdd30828bcdc" ] ], - "timestamp": "2023-11-29T14:27:03.328392594" + "timestamp": "2024-02-05T17:12:14.247465409" } } \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/nextflow.config b/modules/nf-core/bcftools/view/tests/nextflow.config index b05aa504..932e3ba6 100644 --- a/modules/nf-core/bcftools/view/tests/nextflow.config +++ b/modules/nf-core/bcftools/view/tests/nextflow.config @@ -1,3 +1,3 @@ process { - ext.args = '--no-version' -} \ No newline at end of file + ext.args = '--no-version --output-type v' +} diff --git a/modules/nf-core/rhocall/viz/environment.yml b/modules/nf-core/rhocall/viz/environment.yml new file mode 100644 index 00000000..6e5b352f --- /dev/null +++ b/modules/nf-core/rhocall/viz/environment.yml @@ -0,0 +1,7 @@ +name: "rhocall_viz" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::rhocall=0.5.1" diff --git a/modules/nf-core/rhocall/viz/main.nf b/modules/nf-core/rhocall/viz/main.nf new file mode 100644 index 00000000..4191ef3b --- /dev/null +++ b/modules/nf-core/rhocall/viz/main.nf @@ -0,0 +1,54 @@ +process RHOCALL_VIZ { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rhocall:0.5.1--py39hbf8eff0_0': + 'biocontainers/rhocall:0.5.1--py39hbf8eff0_0' }" + + input: + tuple val(meta), path(vcf) + tuple val(meta2), path(roh) + + output: + tuple val(meta), path("${prefix}/${prefix}.bed"), emit: bed + tuple val(meta), path("${prefix}/${prefix}.wig"), emit: wig + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + rhocall \\ + viz \\ + $args \\ + -r $roh \\ + --out_dir ${prefix} \\ + $vcf + + mv ${prefix}/output.bed ${prefix}/${prefix}.bed + mv ${prefix}/output.wig ${prefix}/${prefix}.wig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' ) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + touch ${prefix}/${prefix}.bed + touch ${prefix}/${prefix}.wig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rhocall/viz/meta.yml b/modules/nf-core/rhocall/viz/meta.yml new file mode 100644 index 00000000..bbcad9ba --- /dev/null +++ b/modules/nf-core/rhocall/viz/meta.yml @@ -0,0 +1,52 @@ +name: "rhocall_viz" +description: Call regions of homozygosity and make tentative UPD calls +keywords: + - roh + - bcftools + - runs_of_homozygosity +tools: + - "rhocall": + description: "Call regions of homozygosity and make tentative UPD calls." + homepage: "https://github.com/dnil/rhocall" + documentation: "https://github.com/dnil/rhocall" + tool_dev_url: "https://github.com/dnil" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - vcf: + type: file + description: VCF file + pattern: "*.{vcf}" + - roh: + type: file + description: Input RHO file produced from rhocall + pattern: "*.{roh}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bed: + type: file + description: Bed file containing roh calls + pattern: "*.{bed}" + - wig: + type: file + description: Wig file containing roh calls + pattern: "*.{wig}" + +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/rhocall/viz/tests/main.nf.test b/modules/nf-core/rhocall/viz/tests/main.nf.test new file mode 100644 index 00000000..094e7d07 --- /dev/null +++ b/modules/nf-core/rhocall/viz/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process RHOCALL_VIZ" + script "../main.nf" + process "RHOCALL_VIZ" + + tag "modules" + tag "modules_nfcore" + tag "rhocall" + tag "rhocall/viz" + tag "bcftools/roh" + + config "./nextflow.config" + + test("sarscov2 - vcf, roh") { + + + setup { + run("BCFTOOLS_ROH") { + script "../../../bcftools/roh/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = [[],[]] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + } + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)] + input[1] = BCFTOOLS_ROH.out.roh + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/rhocall/viz/tests/main.nf.test.snap b/modules/nf-core/rhocall/viz/tests/main.nf.test.snap new file mode 100644 index 00000000..cba4067f --- /dev/null +++ b/modules/nf-core/rhocall/viz/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "sarscov2 - vcf, roh": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,4579710bbd8e1e4449274d261c439891" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.wig:md5,ab2d23269213f6331f18b7ad6ca94a5f" + ] + ], + "2": [ + "versions.yml:md5,079291120b14dd6b9368dd1cff72518e" + ], + "bed": [ + [ + { + "id": "test" + }, + "test.bed:md5,4579710bbd8e1e4449274d261c439891" + ] + ], + "versions": [ + "versions.yml:md5,079291120b14dd6b9368dd1cff72518e" + ], + "wig": [ + [ + { + "id": "test" + }, + "test.wig:md5,ab2d23269213f6331f18b7ad6ca94a5f" + ] + ] + } + ], + "timestamp": "2024-02-05T17:57:24.70125206" + } +} \ No newline at end of file diff --git a/modules/nf-core/rhocall/viz/tests/nextflow.config b/modules/nf-core/rhocall/viz/tests/nextflow.config new file mode 100644 index 00000000..2217be8e --- /dev/null +++ b/modules/nf-core/rhocall/viz/tests/nextflow.config @@ -0,0 +1,3 @@ +env { + MPLCONFIGDIR = "/tmp" +} diff --git a/modules/nf-core/rhocall/viz/tests/tags.yml b/modules/nf-core/rhocall/viz/tests/tags.yml new file mode 100644 index 00000000..bc2d74a7 --- /dev/null +++ b/modules/nf-core/rhocall/viz/tests/tags.yml @@ -0,0 +1,2 @@ +rhocall/viz: + - "modules/nf-core/rhocall/viz/**" diff --git a/nextflow.config b/nextflow.config index 69dd2cae..72bb3f34 100644 --- a/nextflow.config +++ b/nextflow.config @@ -337,6 +337,7 @@ includeConfig 'conf/modules/annotate_mobile_elements.config' includeConfig 'conf/modules/generate_clinical_set.config' includeConfig 'conf/modules/variant_evaluation.config' includeConfig 'conf/modules/subsample_mt.config' +includeConfig 'conf/modules/annotate_rhocallviz.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/subworkflows/local/annotate_genome_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf index 9e3d74d6..fe59303d 100644 --- a/subworkflows/local/annotate_genome_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -19,6 +19,7 @@ include { TABIX_TABIX as TABIX_BCFTOOLS_CONCAT } from '../../modules/nf-core/ta include { TABIX_TABIX as TABIX_BCFTOOLS_VIEW } from '../../modules/nf-core/tabix/tabix/main' include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' include { ANNOTATE_CADD } from './annotation/annotate_cadd' +include { ANNOTATE_RHOCALLVIZ } from './annotation/annotate_rhocallviz' workflow ANNOTATE_GENOME_SNVS { @@ -35,8 +36,10 @@ workflow ANNOTATE_GENOME_SNVS { ch_vep_cache // channel: [mandatory] [ path(cache) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_gnomad_af // channel: [optional] [ path(tab), path(tbi) ] + ch_samples // channel: [mandatory] [ val(sample_meta) ] ch_split_intervals // channel: [mandatory] [ path(intervals) ] ch_vep_extra_files // channel: [mandatory] [ path(files) ] + ch_genome_chrsizes // channel: [mandatory] [ path(sizes) ] main: ch_cadd_vcf = Channel.empty() @@ -77,6 +80,9 @@ workflow ANNOTATE_GENOME_SNVS { ZIP_TABIX_VCFANNO (VCFANNO.out.vcf) + //rhocall_viz + ANNOTATE_RHOCALLVIZ(ZIP_TABIX_VCFANNO.out.gz_tbi, ch_samples, ch_genome_chrsizes) + BCFTOOLS_VIEW(ZIP_TABIX_VCFANNO.out.gz_tbi, [], [], []) // filter on frequencies TABIX_BCFTOOLS_VIEW (BCFTOOLS_VIEW.out.vcf) @@ -171,6 +177,7 @@ workflow ANNOTATE_GENOME_SNVS { ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) ch_versions = ch_versions.mix(TABIX_BCFTOOLS_CONCAT.out.versions) + ch_versions = ch_versions.mix(ANNOTATE_RHOCALLVIZ.out.versions) emit: vcf_ann = ch_vep_ann // channel: [ val(meta), path(vcf) ] diff --git a/subworkflows/local/annotation/annotate_rhocallviz.nf b/subworkflows/local/annotation/annotate_rhocallviz.nf new file mode 100644 index 00000000..37aeb02d --- /dev/null +++ b/subworkflows/local/annotation/annotate_rhocallviz.nf @@ -0,0 +1,52 @@ +// +// A subworkflow to plot binned zygosity and RHO-regions. +// + +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_ROH } from '../../../modules/nf-core/bcftools/roh/main' +include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_UNCOMPRESS } from '../../../modules/nf-core/bcftools/view/main' +include { RHOCALL_VIZ } from '../../../modules/nf-core/rhocall/viz/main' +include { UCSC_WIGTOBIGWIG } from '../../../modules/nf-core/ucsc/wigtobigwig/main' + +workflow ANNOTATE_RHOCALLVIZ { + + take: + ch_vcf_tbi // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] + ch_samples // channel: [mandatory] [ val(sample_meta) ] + ch_genome_chrsizes // channel: [mandatory] [ path(sizes) ] + + main: + ch_versions = Channel.empty() + + ch_vcf_tbi + .combine(ch_samples) + .map {meta, vcf, tbi, meta2 -> return [meta2,vcf,tbi]} + .set { ch_rhocall_viz } + + BCFTOOLS_VIEW(ch_rhocall_viz, [],[],[]) + + TABIX_TABIX(BCFTOOLS_VIEW.out.vcf) + + BCFTOOLS_VIEW.out.vcf + .join(TABIX_TABIX.out.tbi) + .set {ch_roh_in } + + BCFTOOLS_ROH(ch_roh_in, [[],[]], [], [], [], []) + + BCFTOOLS_VIEW_UNCOMPRESS(ch_roh_in,[],[],[]) + + RHOCALL_VIZ( BCFTOOLS_VIEW_UNCOMPRESS.out.vcf, BCFTOOLS_ROH.out.roh) + + UCSC_WIGTOBIGWIG(RHOCALL_VIZ.out.wig, ch_genome_chrsizes) + + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first()) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_ROH.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_VIEW_UNCOMPRESS.out.versions.first()) + ch_versions = ch_versions.mix(RHOCALL_VIZ.out.versions.first()) + ch_versions = ch_versions.mix(UCSC_WIGTOBIGWIG.out.versions.first()) + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 52293252..342812db 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -540,8 +540,10 @@ workflow RAREDISEASE { ch_vep_cache, ch_genome_fasta, ch_gnomad_af, + ch_samples, ch_scatter_split_intervals, - ch_vep_extra_files + ch_vep_extra_files, + ch_genome_chrsizes ).set { ch_snv_annotate } ch_versions = ch_versions.mix(ch_snv_annotate.versions)