From 1fb1d91c4511caa1f976710e110181acb7b22f42 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Mon, 29 Jan 2024 23:09:00 +0100
Subject: [PATCH 01/14] add genomecov
---
modules.json | 5 +
.../bedtools/genomecov/environment.yml | 7 ++
modules/nf-core/bedtools/genomecov/main.nf | 70 +++++++++++
modules/nf-core/bedtools/genomecov/meta.yml | 59 +++++++++
.../bedtools/genomecov/tests/main.nf.test | 118 ++++++++++++++++++
.../genomecov/tests/main.nf.test.snap | 95 ++++++++++++++
.../bedtools/genomecov/tests/nextflow.config | 7 ++
.../nf-core/bedtools/genomecov/tests/tags.yml | 2 +
8 files changed, 363 insertions(+)
create mode 100644 modules/nf-core/bedtools/genomecov/environment.yml
create mode 100644 modules/nf-core/bedtools/genomecov/main.nf
create mode 100644 modules/nf-core/bedtools/genomecov/meta.yml
create mode 100644 modules/nf-core/bedtools/genomecov/tests/main.nf.test
create mode 100644 modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap
create mode 100644 modules/nf-core/bedtools/genomecov/tests/nextflow.config
create mode 100644 modules/nf-core/bedtools/genomecov/tests/tags.yml
diff --git a/modules.json b/modules.json
index e33ea340..e86e65d7 100644
--- a/modules.json
+++ b/modules.json
@@ -50,6 +50,11 @@
"git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09",
"installed_by": ["modules"]
},
+ "bedtools/genomecov": {
+ "branch": "master",
+ "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83",
+ "installed_by": ["modules"]
+ },
"bwa/index": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
diff --git a/modules/nf-core/bedtools/genomecov/environment.yml b/modules/nf-core/bedtools/genomecov/environment.yml
new file mode 100644
index 00000000..8fbe20c3
--- /dev/null
+++ b/modules/nf-core/bedtools/genomecov/environment.yml
@@ -0,0 +1,7 @@
+name: bedtools_genomecov
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::bedtools=2.31.1
diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf
new file mode 100644
index 00000000..7a4d9c45
--- /dev/null
+++ b/modules/nf-core/bedtools/genomecov/main.nf
@@ -0,0 +1,70 @@
+process BEDTOOLS_GENOMECOV {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' :
+ 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }"
+
+ input:
+ tuple val(meta), path(intervals), val(scale)
+ path sizes
+ val extension
+
+ output:
+ tuple val(meta), path("*.${extension}"), emit: genomecov
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args_list = args.tokenize()
+ args += (scale > 0 && scale != 1) ? " -scale $scale" : ""
+ if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) {
+ args += " -bg"
+ }
+
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if (intervals.name =~ /\.bam/) {
+ """
+ bedtools \\
+ genomecov \\
+ -ibam $intervals \\
+ $args \\
+ > ${prefix}.${extension}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+ END_VERSIONS
+ """
+ } else {
+ """
+ bedtools \\
+ genomecov \\
+ -i $intervals \\
+ -g $sizes \\
+ $args \\
+ > ${prefix}.${extension}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+ END_VERSIONS
+ """
+ }
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.${extension}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml
new file mode 100644
index 00000000..2b2385e3
--- /dev/null
+++ b/modules/nf-core/bedtools/genomecov/meta.yml
@@ -0,0 +1,59 @@
+name: bedtools_genomecov
+description: Computes histograms (default), per-base reports (-d) and BEDGRAPH (-bg) summaries of feature coverage (e.g., aligned sequences) for a given genome.
+keywords:
+ - bed
+ - bam
+ - genomecov
+ - bedtools
+ - histogram
+tools:
+ - bedtools:
+ description: |
+ A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types.
+ documentation: https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - intervals:
+ type: file
+ description: BAM/BED/GFF/VCF
+ pattern: "*.{bam|bed|gff|vcf}"
+ - scale:
+ type: integer
+ description: Number containing the scale factor for the output. Set to 1 to disable. Setting to a value other than 1 will also get the -bg bedgraph output format as this is required for this command switch
+ - sizes:
+ type: file
+ description: Tab-delimited table of chromosome names in the first column and chromosome sizes in the second column
+ - extension:
+ type: string
+ description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - genomecov:
+ type: file
+ description: Computed genome coverage file
+ pattern: "*.${extension}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@edmundmiller"
+ - "@sruthipsuresh"
+ - "@drpatelh"
+ - "@sidorov-si"
+ - "@chris-cheshire"
+maintainers:
+ - "@edmundmiller"
+ - "@sruthipsuresh"
+ - "@drpatelh"
+ - "@sidorov-si"
+ - "@chris-cheshire"
diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test b/modules/nf-core/bedtools/genomecov/tests/main.nf.test
new file mode 100644
index 00000000..21e69aed
--- /dev/null
+++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test
@@ -0,0 +1,118 @@
+nextflow_process {
+ name "Test Process BEDTOOLS_GENOMECOV"
+ script "../main.nf"
+ process "BEDTOOLS_GENOMECOV"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "bedtools"
+ tag "bedtools/genomecov"
+
+ test("sarscov2 - no scale") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
+ 1
+ ]
+ // sizes
+ input[1] = []
+ // extension
+ input[2] = "txt"
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match("no_scale") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - dummy sizes") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test'],
+ file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
+ 0.5
+ ]
+ // sizes
+ input[1] = file('dummy_chromosome_sizes')
+ // extension
+ input[2] = 'txt'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match("dummy_sizes") }
+ )
+ }
+
+ }
+
+ test("sarscov2 - scale") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test'],
+ file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true),
+ 0.5
+ ]
+ // sizes
+ input[1] = file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+ // extension
+ input[2] = 'txt'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match("scale") }
+ )
+ }
+
+ }
+
+ test("stub") {
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
+ 1
+ ]
+ // sizes
+ input[1] = []
+ // extension
+ input[2] = 'txt'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(file(process.out.genomecov[0][1]).name).match("stub") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap
new file mode 100644
index 00000000..8f9191e4
--- /dev/null
+++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap
@@ -0,0 +1,95 @@
+{
+ "dummy_sizes": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2"
+ ],
+ "genomecov": [
+ [
+ {
+ "id": "test"
+ },
+ "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2"
+ ]
+ }
+ ],
+ "timestamp": "2023-12-05T17:35:58.35232"
+ },
+ "no_scale": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2"
+ ],
+ "genomecov": [
+ [
+ {
+ "id": "test"
+ },
+ "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2"
+ ]
+ }
+ ],
+ "timestamp": "2023-12-05T17:35:51.142496"
+ },
+ "stub": {
+ "content": [
+ "test.coverage.txt"
+ ],
+ "timestamp": "2023-12-05T17:36:13.084709"
+ },
+ "scale": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2"
+ ],
+ "genomecov": [
+ [
+ {
+ "id": "test"
+ },
+ "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2"
+ ]
+ }
+ ],
+ "timestamp": "2023-12-05T17:36:05.962006"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/bedtools/genomecov/tests/nextflow.config b/modules/nf-core/bedtools/genomecov/tests/nextflow.config
new file mode 100644
index 00000000..bdb74ae5
--- /dev/null
+++ b/modules/nf-core/bedtools/genomecov/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: BEDTOOLS_GENOMECOV {
+ ext.prefix = { "${meta.id}.coverage" }
+ }
+
+}
diff --git a/modules/nf-core/bedtools/genomecov/tests/tags.yml b/modules/nf-core/bedtools/genomecov/tests/tags.yml
new file mode 100644
index 00000000..55fce478
--- /dev/null
+++ b/modules/nf-core/bedtools/genomecov/tests/tags.yml
@@ -0,0 +1,2 @@
+bedtools/genomecov:
+ - "modules/nf-core/bedtools/genomecov/**"
From 8c32fe124df1a95c4ce857ad5cac262b4ec16d63 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Tue, 30 Jan 2024 01:20:32 +0100
Subject: [PATCH 02/14] add subsample_mt
---
conf/modules/subsample_mt.config | 23 +++++++++++++++++++++++
nextflow.config | 3 ++-
subworkflows/local/subsample_mt.nf | 22 ++++++++++++++++++++++
workflows/raredisease.nf | 3 +++
4 files changed, 50 insertions(+), 1 deletion(-)
create mode 100644 conf/modules/subsample_mt.config
create mode 100644 subworkflows/local/subsample_mt.nf
diff --git a/conf/modules/subsample_mt.config b/conf/modules/subsample_mt.config
new file mode 100644
index 00000000..079affe9
--- /dev/null
+++ b/conf/modules/subsample_mt.config
@@ -0,0 +1,23 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = Conditional clause
+----------------------------------------------------------------------------------------
+*/
+
+//
+// Subsample MT
+//
+
+process {
+ withName: '.*BEDTOOLS_GENOMECOV' {
+ ext.args = { "-dz" }
+ ext.prefix = { "${meta.id}" }
+ }
+}
diff --git a/nextflow.config b/nextflow.config
index 681972ca..c851ac89 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -243,7 +243,7 @@ if (!params.igenomes_ignore) {
// Load nf-core/raredisease custom config
try {
- includeConfig "${params.custom_config_base}/pipeline/raredisease.config"
+ includeConfig "https://raw.githubusercontent.com/nf-core/configs/683bae73d91856a97cd1dbbfe3e3dde4ae5373a1/pipeline/raredisease.config"
} catch (Exception e) {
System.err.println("WARNING: Could not load nf-core/config/raredisease profiles: ${params.custom_config_base}/pipeline/raredisease.config")
}
@@ -337,6 +337,7 @@ includeConfig 'conf/modules/call_mobile_elements.config'
includeConfig 'conf/modules/annotate_mobile_elements.config'
includeConfig 'conf/modules/generate_clinical_set.config'
includeConfig 'conf/modules/variant_evaluation.config'
+includeConfig 'conf/modules/subsample_mt.config'
// Function to ensure that resource requirements don't go beyond
// a maximum limit
diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf
new file mode 100644
index 00000000..6ffe8c98
--- /dev/null
+++ b/subworkflows/local/subsample_mt.nf
@@ -0,0 +1,22 @@
+//
+// A subworkflow to subsample MT alignments
+//
+
+include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main'
+
+workflow SUBSAMPLE_MT {
+
+ take:
+ ch_mt_marked_bam // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ]
+
+ main:
+
+ ch_mt_marked_bam.map {meta, bam -> return [meta, bam, []]}.set {ch_genomecov_in}
+
+ BEDTOOLS_GENOMECOV (ch_genomecov_in, [], [])
+
+ ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first())
+
+ emit:
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 9f402370..1ee3f1c9 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -140,6 +140,7 @@ include { RANK_VARIANTS as RANK_VARIANTS_MT } from '../subworkf
include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants'
include { RANK_VARIANTS as RANK_VARIANTS_SV } from '../subworkflows/local/rank_variants'
include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome'
+include { SUBSAMPLE_MT } from '../subworkflows/local/subsample_mt'
include { VARIANT_EVALUATION } from '../subworkflows/local/variant_evaluation'
/*
@@ -362,6 +363,8 @@ workflow RAREDISEASE {
.set { ch_mapped }
ch_versions = ch_versions.mix(ALIGN.out.versions)
+ SUBSAMPLE_MT(ch_mapped.mt_marked_bam)
+
//
// BAM QUALITY CHECK
//
From f814060f4f21761d58909d99a59fa6b20908efca Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 02:55:26 +0100
Subject: [PATCH 03/14] add subsample
---
conf/modules/subsample_mt.config | 21 ++++++++++-
modules/local/calculate_seed_fraction.nf | 45 ++++++++++++++++++++++++
subworkflows/local/subsample_mt.nf | 26 +++++++++++---
workflows/raredisease.nf | 3 +-
4 files changed, 89 insertions(+), 6 deletions(-)
create mode 100644 modules/local/calculate_seed_fraction.nf
diff --git a/conf/modules/subsample_mt.config b/conf/modules/subsample_mt.config
index 079affe9..7ce80bed 100644
--- a/conf/modules/subsample_mt.config
+++ b/conf/modules/subsample_mt.config
@@ -16,8 +16,27 @@
//
process {
- withName: '.*BEDTOOLS_GENOMECOV' {
+ withName: '.*SUBSAMPLE_MT:BEDTOOLS_GENOMECOV' {
ext.args = { "-dz" }
ext.prefix = { "${meta.id}" }
}
+
+ withName: '.*SUBSAMPLE_MT:SAMTOOLS_VIEW' {
+ ext.args = { "--output-fmt BAM -h -F 4 -s ${meta.seedfrac}" }
+ ext.prefix = { "${meta.id}_mt_subsample" }
+ publishDir = [
+ path: { "${params.outdir}/alignment" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: '.*SUBSAMPLE_MT:SAMTOOLS_INDEX' {
+ publishDir = [
+ path: { "${params.outdir}/alignment" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
}
diff --git a/modules/local/calculate_seed_fraction.nf b/modules/local/calculate_seed_fraction.nf
new file mode 100644
index 00000000..6d776e14
--- /dev/null
+++ b/modules/local/calculate_seed_fraction.nf
@@ -0,0 +1,45 @@
+process CALCULATE_SEED_FRACTION {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "conda-forge::python=3.8.3"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/python:3.8.3' :
+ 'biocontainers/python:3.8.3' }"
+
+ input:
+ tuple val(meta), path(cov)
+
+ output:
+ tuple val(meta), path("seedfrac.csv"), emit: csv
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ export MT_COVERAGE=`awk '{cov += \$3}END{ if (NR > 0) print cov / NR }' $cov`
+
+ python -c "import os;print('%0.6f' % (30+ 150/float(os.environ['MT_COVERAGE'])))" >seedfrac.csv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ calculate_seed_fraction: v1.0
+ python: \$(python --version | sed 's/Python //g')
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch seedfrac.csv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ calculate_seed_fraction: v1.0
+ python: \$(python --version | sed 's/Python //g')
+ END_VERSIONS
+ """
+}
diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf
index 6ffe8c98..b8d3ff2f 100644
--- a/subworkflows/local/subsample_mt.nf
+++ b/subworkflows/local/subsample_mt.nf
@@ -2,20 +2,38 @@
// A subworkflow to subsample MT alignments
//
-include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main'
+include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main'
+include { CALCULATE_SEED_FRACTION } from '../../modules/local/calculate_seed_fraction'
+include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
+include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
workflow SUBSAMPLE_MT {
take:
- ch_mt_marked_bam // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ]
+ ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
main:
+ ch_versions = Channel.empty()
- ch_mt_marked_bam.map {meta, bam -> return [meta, bam, []]}.set {ch_genomecov_in}
+ ch_mt_bam_bai.map {meta, bam, bai -> return [meta, bam, -1]}.set {ch_genomecov_in}
- BEDTOOLS_GENOMECOV (ch_genomecov_in, [], [])
+ BEDTOOLS_GENOMECOV (ch_genomecov_in, [], "genomecov")
+
+ CALCULATE_SEED_FRACTION (BEDTOOLS_GENOMECOV.out.genomecov).csv
+ .join(ch_mt_bam_bai, failOnMismatch:true)
+ .map{meta, seedfrac, bam, bai ->
+ return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai]
+ }
+ .set { ch_subsample_in }
+
+ SAMTOOLS_VIEW(ch_subsample_in, [[:],[]], [])
+
+ SAMTOOLS_INDEX(SAMTOOLS_VIEW.out.bam)
ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first())
+ ch_versions = ch_versions.mix(CALCULATE_SEED_FRACTION.out.versions.first())
+ ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first())
+ ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
emit:
versions = ch_versions // channel: [ path(versions.yml) ]
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 6884787d..eebb7886 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -363,8 +363,9 @@ workflow RAREDISEASE {
.set { ch_mapped }
ch_versions = ch_versions.mix(ALIGN.out.versions)
- SUBSAMPLE_MT(ch_mapped.mt_marked_bam)
+ SUBSAMPLE_MT(ch_mapped.mt_bam_bai)
+ return
//
// BAM QUALITY CHECK
//
From c05325a0d2d3f35db254a5f178e966839be5b312 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 02:55:54 +0100
Subject: [PATCH 04/14] fix return
---
workflows/raredisease.nf | 1 -
1 file changed, 1 deletion(-)
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index eebb7886..ed9f62a5 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -365,7 +365,6 @@ workflow RAREDISEASE {
SUBSAMPLE_MT(ch_mapped.mt_bam_bai)
- return
//
// BAM QUALITY CHECK
//
From 4aa778dd2d28a8e188bc47623369358745802343 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 10:56:02 +0100
Subject: [PATCH 05/14] add params
---
docs/output.md | 13 ++++++++++++
modules/local/calculate_seed_fraction.nf | 4 +++-
nextflow.config | 3 +++
nextflow_schema.json | 25 ++++++++++++++++++++++--
subworkflows/local/subsample_mt.nf | 11 +++++++++--
workflows/raredisease.nf | 9 ++++++++-
6 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/docs/output.md b/docs/output.md
index 6cc3ecf9..90ac6f51 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -21,6 +21,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Duplicate marking](#duplicate-marking)
- [Picard's MarkDuplicates](#picards-markduplicates)
- [Sentieon Dedup](#sentieon-dedup)
+ - [Subsample mitochondrial alignments](#subsample-mitochondrial-alignments)
- [Quality control and reporting](#quality-control-and-reporting)
- [Quality control](#quality-control)
- [FastQC](#fastqc)
@@ -115,6 +116,18 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- `*.metrics`: Text file containing the dedup metrics.
+#### Subsample mitochondrial alignments
+
+[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage.
+
+
+Output files from Alignment
+
+- `{outputdir}/alignment/`
+ - `_mt_subsample.bam`: Alignment file in bam format.
+ - `_mt_subsample.bam.bai`: Index of the corresponding bam file.
+
+
### Quality control and reporting
#### Quality control
diff --git a/modules/local/calculate_seed_fraction.nf b/modules/local/calculate_seed_fraction.nf
index 6d776e14..1c367b38 100644
--- a/modules/local/calculate_seed_fraction.nf
+++ b/modules/local/calculate_seed_fraction.nf
@@ -9,6 +9,8 @@ process CALCULATE_SEED_FRACTION {
input:
tuple val(meta), path(cov)
+ val rd
+ val seed
output:
tuple val(meta), path("seedfrac.csv"), emit: csv
@@ -23,7 +25,7 @@ process CALCULATE_SEED_FRACTION {
"""
export MT_COVERAGE=`awk '{cov += \$3}END{ if (NR > 0) print cov / NR }' $cov`
- python -c "import os;print('%0.6f' % (30+ 150/float(os.environ['MT_COVERAGE'])))" >seedfrac.csv
+ python -c "import os;print('%0.6f' % ($seed+ $rd/float(os.environ['MT_COVERAGE'])))" >seedfrac.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/nextflow.config b/nextflow.config
index a18da1fb..69dd2cae 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -35,6 +35,7 @@ params {
skip_sv_annotation = false
skip_me_annotation = false
skip_mt_annotation = false
+ skip_mt_subsample = false
skip_vcf2cytosure = true
skip_vep_filter = false
gens_switch = false
@@ -53,6 +54,8 @@ params {
// Alignment
aligner = 'bwamem2'
min_trimmed_length = 40
+ mt_subsample_rd = 150
+ mt_subsample_seed = 30
rmdup = false
// Variant calling
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 74b35fd8..79045f6d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -27,7 +27,8 @@
"type": "string",
"format": "directory-path",
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
- "fa_icon": "fas fa-folder-open"
+ "fa_icon": "fas fa-folder-open",
+ "default": "results"
},
"email": {
"type": "string",
@@ -212,7 +213,8 @@
"type": "string",
"description": "Name of the mitochondrial contig in the reference fasta file",
"help_text": "Used to extract relevant information from the references to analyse mitochondria",
- "fa_icon": "fas fa-align-center"
+ "fa_icon": "fas fa-align-center",
+ "default": "chrM"
},
"mobile_element_references": {
"type": "string",
@@ -505,6 +507,11 @@
"description": "Specifies whether or not to skip annotation of mitochondrial variants.",
"fa_icon": "fas fa-toggle-on"
},
+ "skip_mt_subsample": {
+ "type": "boolean",
+ "description": "Specifies whether or not to subsample mt alignment.",
+ "fa_icon": "fas fa-toggle-on"
+ },
"skip_snv_annotation": {
"type": "boolean",
"description": "Specifies whether or not to skip annotate SNV subworkflow.",
@@ -549,6 +556,20 @@
"help_text": "Minimum length of reads after adapter trimming. Shorter reads are discarded. The program default is 15 bp. ",
"fa_icon": "fas fa-less-than"
},
+ "mt_subsample_rd": {
+ "type": "integer",
+ "default": 150,
+ "description": "Expected coverage to subsample mt alignment to.",
+ "help_text": "To know more about this parameter check samtools' view documentation.",
+ "fa_icon": "fas fa-less-than"
+ },
+ "mt_subsample_seed": {
+ "type": "integer",
+ "default": 30,
+ "description": "Subsampling seed used to influence which subset of mitochondrial reads is kept. ",
+ "help_text": "To know more about this parameter check samtools' view documentation.",
+ "fa_icon": "fas fa-less-than"
+ },
"rmdup": {
"type": "boolean",
"description": "Specifies whether duplicates reads should be removed prior to variant calling.",
diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf
index b8d3ff2f..dd6b87b0 100644
--- a/subworkflows/local/subsample_mt.nf
+++ b/subworkflows/local/subsample_mt.nf
@@ -10,7 +10,9 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/m
workflow SUBSAMPLE_MT {
take:
- ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ val_mt_subsample_rd // channel: [mandatory] [ val(read_dept) ]
+ val_mt_subsample_seed // channel: [mandatory] [ val(seed) ]
main:
ch_versions = Channel.empty()
@@ -19,7 +21,12 @@ workflow SUBSAMPLE_MT {
BEDTOOLS_GENOMECOV (ch_genomecov_in, [], "genomecov")
- CALCULATE_SEED_FRACTION (BEDTOOLS_GENOMECOV.out.genomecov).csv
+ CALCULATE_SEED_FRACTION (
+ BEDTOOLS_GENOMECOV.out.genomecov,
+ val_mt_subsample_rd,
+ val_mt_subsample_seed
+ )
+ .csv
.join(ch_mt_bam_bai, failOnMismatch:true)
.map{meta, seedfrac, bam, bai ->
return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai]
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index ed9f62a5..01a9bb88 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -363,7 +363,14 @@ workflow RAREDISEASE {
.set { ch_mapped }
ch_versions = ch_versions.mix(ALIGN.out.versions)
- SUBSAMPLE_MT(ch_mapped.mt_bam_bai)
+ if (params.skip_mt_subsample) {
+ SUBSAMPLE_MT(
+ ch_mapped.mt_bam_bai,
+ params.mt_subsample_rd,
+ params.mt_subsample_seed
+ )
+ ch_versions = ch_versions.mix(SUBSAMPLE_MT.out.versions)
+ }
//
// BAM QUALITY CHECK
From 05be462f9a4e2172563e162c6f4d6ede1f208e71 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 11:00:44 +0100
Subject: [PATCH 06/14] update conditional
---
workflows/raredisease.nf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 01a9bb88..650ca446 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -363,7 +363,7 @@ workflow RAREDISEASE {
.set { ch_mapped }
ch_versions = ch_versions.mix(ALIGN.out.versions)
- if (params.skip_mt_subsample) {
+ if (!params.skip_mt_subsample) {
SUBSAMPLE_MT(
ch_mapped.mt_bam_bai,
params.mt_subsample_rd,
From a722bc194df30865053a1824d8f3b153feb76dd9 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 11:04:59 +0100
Subject: [PATCH 07/14] update changelog
---
CHANGELOG.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55ea2691..17fc2667 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add FOUND_IN tag, which mentions the variant caller that found the mutation, in the INFO column of the vcf files [#471](https://github.com/nf-core/raredisease/pull/471)
- A new parameter `vep_plugin_files` to supply files required by vep plugins [#482](https://github.com/nf-core/raredisease/pull/482)
- New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483)
+- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508).
### `Changed`
From 0273a3757728a55791198729f2b46ac8368d4183 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Wed, 31 Jan 2024 21:45:56 +0100
Subject: [PATCH 08/14] chromograph viz
---
conf/modules/qc_bam.config | 5 +++++
subworkflows/local/qc_bam.nf | 5 ++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/conf/modules/qc_bam.config b/conf/modules/qc_bam.config
index 61daf4d6..93b0f2db 100644
--- a/conf/modules/qc_bam.config
+++ b/conf/modules/qc_bam.config
@@ -50,6 +50,11 @@ process {
ext.args = '-clip'
}
+ withName: '.*QC_BAM:CHROMOGRAPH_COV' {
+ ext.args = '--euploid --step 500'
+ ext.prefix = { "${meta2.id}_chromographcov" }
+ }
+
withName: '.*QC_BAM:MOSDEPTH' {
ext.args = '--d4'
ext.prefix = { "${meta.id}_mosdepth" }
diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf
index d4ae2842..9bb5a2a4 100644
--- a/subworkflows/local/qc_bam.nf
+++ b/subworkflows/local/qc_bam.nf
@@ -4,13 +4,14 @@
include { PICARD_COLLECTMULTIPLEMETRICS } from '../../modules/nf-core/picard/collectmultiplemetrics/main'
include { PICARD_COLLECTHSMETRICS } from '../../modules/nf-core/picard/collecthsmetrics/main'
+include { CHROMOGRAPH as CHROMOGRAPH_COV } from '../../modules/nf-core/chromograph/main'
include { QUALIMAP_BAMQC } from '../../modules/nf-core/qualimap/bamqc/main'
include { TIDDIT_COV } from '../../modules/nf-core/tiddit/cov/main'
include { MOSDEPTH } from '../../modules/nf-core/mosdepth/main'
include { UCSC_WIGTOBIGWIG } from '../../modules/nf-core/ucsc/wigtobigwig/main'
include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS } from '../../modules/nf-core/picard/collectwgsmetrics/main'
include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS_Y } from '../../modules/nf-core/picard/collectwgsmetrics/main'
-include { SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main'
+include { SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main'
include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main'
include { NGSBITS_SAMPLEGENDER } from '../../modules/nf-core/ngsbits/samplegender/main'
@@ -51,6 +52,8 @@ workflow QC_BAM {
UCSC_WIGTOBIGWIG (TIDDIT_COV.out.wig, ch_chrom_sizes)
+ CHROMOGRAPH_COV([[:],[]], TIDDIT_COV.out.wig, [[:],[]], [[:],[]], [[:],[]], [[:],[]], [[:],[]])
+
ch_bam_bai.map{ meta, bam, bai -> [meta, bam, bai, []]}.set{ch_mosdepth_in}
MOSDEPTH (ch_mosdepth_in, ch_genome_fasta)
From 8e39cb8e2c0fc27a5356b6760604ae3afea3a90d Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Thu, 1 Feb 2024 17:06:20 +0100
Subject: [PATCH 09/14] update chromograph
---
modules.json | 2 +-
modules/nf-core/chromograph/main.nf | 4 +-
modules/nf-core/chromograph/meta.yml | 2 +-
.../nf-core/chromograph/tests/main.nf.test | 38 +++++++++++++++++++
modules/nf-core/chromograph/tests/tags.yml | 2 +
5 files changed, 44 insertions(+), 4 deletions(-)
create mode 100644 modules/nf-core/chromograph/tests/main.nf.test
create mode 100644 modules/nf-core/chromograph/tests/tags.yml
diff --git a/modules.json b/modules.json
index 2b008539..ffdf8a5f 100644
--- a/modules.json
+++ b/modules.json
@@ -87,7 +87,7 @@
},
"chromograph": {
"branch": "master",
- "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+ "git_sha": "f05512229a501df5d67273bb7358c5f69667f40d",
"installed_by": ["modules"]
},
"cnvnator/cnvnator": {
diff --git a/modules/nf-core/chromograph/main.nf b/modules/nf-core/chromograph/main.nf
index dd67e1ba..e1374e48 100644
--- a/modules/nf-core/chromograph/main.nf
+++ b/modules/nf-core/chromograph/main.nf
@@ -4,8 +4,8 @@ process CHROMOGRAPH {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/chromograph:1.3.1--pyhdfd78af_1':
- 'biocontainers/chromograph:1.3.1--pyhdfd78af_1' }"
+ 'https://depot.galaxyproject.org/singularity/chromograph:1.3.1--pyhdfd78af_2':
+ 'biocontainers/chromograph:1.3.1--pyhdfd78af_2' }"
input:
tuple val(meta), path(autozyg)
diff --git a/modules/nf-core/chromograph/meta.yml b/modules/nf-core/chromograph/meta.yml
index 0fd7250f..6540d9f0 100644
--- a/modules/nf-core/chromograph/meta.yml
+++ b/modules/nf-core/chromograph/meta.yml
@@ -12,7 +12,7 @@ tools:
description: "Chromograph is a python package to create PNG images from genetics data such as BED and WIG files."
homepage: "https://github.com/Clinical-Genomics/chromograph"
documentation: "https://github.com/Clinical-Genomics/chromograph/blob/master/README.md"
- licence: "['MIT']"
+ licence: ["MIT"]
input:
- meta:
type: map
diff --git a/modules/nf-core/chromograph/tests/main.nf.test b/modules/nf-core/chromograph/tests/main.nf.test
new file mode 100644
index 00000000..caba8829
--- /dev/null
+++ b/modules/nf-core/chromograph/tests/main.nf.test
@@ -0,0 +1,38 @@
+nextflow_process {
+
+ name "Test Process CHROMOGRAPH"
+ script "modules/nf-core/chromograph/main.nf"
+ process "CHROMOGRAPH"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "chromograph"
+
+ test("test_chromograph_sites") {
+
+ when {
+ process {
+ """
+ input[0] = [[:],[]]
+ input[1] = [[:],[]]
+ input[2] = [[:],[]]
+ input[3] = [[:],[]]
+ input[4] = [[:],[]]
+ input[5] = [[:],[]]
+ input[6] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['homo_sapiens']['genome']['updsites_bed'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert process.out.plots.get(0).get(1) ==~ ".*/test"}
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/chromograph/tests/tags.yml b/modules/nf-core/chromograph/tests/tags.yml
new file mode 100644
index 00000000..e60ad9db
--- /dev/null
+++ b/modules/nf-core/chromograph/tests/tags.yml
@@ -0,0 +1,2 @@
+chromograph:
+ - "modules/nf-core/chromograph/**"
From c18ee8ef42c37eaa1496c4ea06130102974c8b73 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 11:06:56 +0100
Subject: [PATCH 10/14] update changelog
---
CHANGELOG.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 17fc2667..db07d7eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- A new parameter `vep_plugin_files` to supply files required by vep plugins [#482](https://github.com/nf-core/raredisease/pull/482)
- New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483)
- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508).
+- Chromograph to plot coverage across chromosomes [#507](https://github.com/nf-core/raredisease/pull/507)
### `Changed`
From e50e30c52fc5ea7f250d6d960b19cf8ce2a69258 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 14:16:50 +0100
Subject: [PATCH 11/14] update output docs
---
docs/output.md | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/docs/output.md b/docs/output.md
index 90ac6f51..7004fc1a 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -28,6 +28,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Mosdepth](#mosdepth)
- [Picard tools](#picard-tools)
- [Qualimap](#qualimap)
+ - [Chromograph coverage](#chromograph-coverage)
- [Sention WgsMetricsAlgo](#sention-wgsmetricsalgo)
- [TIDDIT's cov and UCSC WigToBigWig](#tiddits-cov-and-ucsc-wigtobigwig)
- [Reporting](#reporting)
@@ -196,6 +197,16 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- `{outputdir}/qc_bam/_qualimap/` this directory includes a qualimap report and associated raw statistic files. You can open the .html file in your internet browser to see the in-depth report.
+##### Chromograph coverage
+
+[Chromograph](https://github.com/Clinical-Genomics/chromograph) is a python package to create PNG images from genetics data such as BED and WIG files.
+
+
+Output files
+
+- `{outputdir}/qc_bam/_chromographcov/*.png` plots showing coverage across chromosomes for each chromosome.
+
+
##### Sention WgsMetricsAlgo
[Sentieon's WgsMetricsAlgo](https://support.sentieon.com/manual/usages/general/) is the Sentieon's equivalent of Picard's CollectWgsMetrics.
From a9c3d9cbe2d57972651f5bae2dfc7b8b47933e7c Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 11:04:59 +0100
Subject: [PATCH 12/14] update changelog
---
CHANGELOG.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index db07d7eb..d5135e7c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483)
- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508).
- Chromograph to plot coverage across chromosomes [#507](https://github.com/nf-core/raredisease/pull/507)
+- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508).
### `Changed`
From fca0ac638b1f8c4aed3b84614ced7521965e14e2 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:49:03 +0100
Subject: [PATCH 13/14] review suggestions
---
docs/output.md | 2 +-
subworkflows/local/subsample_mt.nf | 14 +++++++-------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/docs/output.md b/docs/output.md
index 7004fc1a..e6e231e2 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -119,7 +119,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
#### Subsample mitochondrial alignments
-[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage.
+[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. The file is mainly intended to be used for visualization of MT alignments in IGV. The non-subsampled bam file is used for variant calling and other downstream analysis steps.
Output files from Alignment
diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf
index dd6b87b0..d8da5aae 100644
--- a/subworkflows/local/subsample_mt.nf
+++ b/subworkflows/local/subsample_mt.nf
@@ -25,13 +25,13 @@ workflow SUBSAMPLE_MT {
BEDTOOLS_GENOMECOV.out.genomecov,
val_mt_subsample_rd,
val_mt_subsample_seed
- )
- .csv
- .join(ch_mt_bam_bai, failOnMismatch:true)
- .map{meta, seedfrac, bam, bai ->
- return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai]
- }
- .set { ch_subsample_in }
+ )
+ .csv
+ .join(ch_mt_bam_bai, failOnMismatch:true)
+ .map{meta, seedfrac, bam, bai ->
+ return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai]
+ }
+ .set { ch_subsample_in }
SAMTOOLS_VIEW(ch_subsample_in, [[:],[]], [])
From cf6c313a9231c3c9695b443cf098d0ec6f5b9e52 Mon Sep 17 00:00:00 2001
From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:57:52 +0100
Subject: [PATCH 14/14] fix lint error
---
docs/output.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/output.md b/docs/output.md
index e6e231e2..37f7d4f8 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -119,7 +119,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
#### Subsample mitochondrial alignments
-[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. The file is mainly intended to be used for visualization of MT alignments in IGV. The non-subsampled bam file is used for variant calling and other downstream analysis steps.
+[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. The file is mainly intended to be used for visualization of MT alignments in IGV. The non-subsampled bam file is used for variant calling and other downstream analysis steps.
Output files from Alignment