From 1fb1d91c4511caa1f976710e110181acb7b22f42 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:09:00 +0100 Subject: [PATCH 01/14] add genomecov --- modules.json | 5 + .../bedtools/genomecov/environment.yml | 7 ++ modules/nf-core/bedtools/genomecov/main.nf | 70 +++++++++++ modules/nf-core/bedtools/genomecov/meta.yml | 59 +++++++++ .../bedtools/genomecov/tests/main.nf.test | 118 ++++++++++++++++++ .../genomecov/tests/main.nf.test.snap | 95 ++++++++++++++ .../bedtools/genomecov/tests/nextflow.config | 7 ++ .../nf-core/bedtools/genomecov/tests/tags.yml | 2 + 8 files changed, 363 insertions(+) create mode 100644 modules/nf-core/bedtools/genomecov/environment.yml create mode 100644 modules/nf-core/bedtools/genomecov/main.nf create mode 100644 modules/nf-core/bedtools/genomecov/meta.yml create mode 100644 modules/nf-core/bedtools/genomecov/tests/main.nf.test create mode 100644 modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap create mode 100644 modules/nf-core/bedtools/genomecov/tests/nextflow.config create mode 100644 modules/nf-core/bedtools/genomecov/tests/tags.yml diff --git a/modules.json b/modules.json index e33ea340..e86e65d7 100644 --- a/modules.json +++ b/modules.json @@ -50,6 +50,11 @@ "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, + "bedtools/genomecov": { + "branch": "master", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", + "installed_by": ["modules"] + }, "bwa/index": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/bedtools/genomecov/environment.yml b/modules/nf-core/bedtools/genomecov/environment.yml new file mode 100644 index 00000000..8fbe20c3 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_genomecov +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf new file mode 100644 index 00000000..7a4d9c45 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -0,0 +1,70 @@ +process BEDTOOLS_GENOMECOV { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(intervals), val(scale) + path sizes + val extension + + output: + tuple val(meta), path("*.${extension}"), emit: genomecov + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + args += (scale > 0 && scale != 1) ? " -scale $scale" : "" + if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { + args += " -bg" + } + + def prefix = task.ext.prefix ?: "${meta.id}" + if (intervals.name =~ /\.bam/) { + """ + bedtools \\ + genomecov \\ + -ibam $intervals \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } else { + """ + bedtools \\ + genomecov \\ + -i $intervals \\ + -g $sizes \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml new file mode 100644 index 00000000..2b2385e3 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/meta.yml @@ -0,0 +1,59 @@ +name: bedtools_genomecov +description: Computes histograms (default), per-base reports (-d) and BEDGRAPH (-bg) summaries of feature coverage (e.g., aligned sequences) for a given genome. +keywords: + - bed + - bam + - genomecov + - bedtools + - histogram +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - scale: + type: integer + description: Number containing the scale factor for the output. Set to 1 to disable. Setting to a value other than 1 will also get the -bg bedgraph output format as this is required for this command switch + - sizes: + type: file + description: Tab-delimited table of chromosome names in the first column and chromosome sizes in the second column + - extension: + type: string + description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genomecov: + type: file + description: Computed genome coverage file + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" + - "@chris-cheshire" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" + - "@chris-cheshire" diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test b/modules/nf-core/bedtools/genomecov/tests/main.nf.test new file mode 100644 index 00000000..21e69aed --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test @@ -0,0 +1,118 @@ +nextflow_process { + name "Test Process BEDTOOLS_GENOMECOV" + script "../main.nf" + process "BEDTOOLS_GENOMECOV" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/genomecov" + + test("sarscov2 - no scale") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 1 + ] + // sizes + input[1] = [] + // extension + input[2] = "txt" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("no_scale") } + ) + } + + } + + test("sarscov2 - dummy sizes") { + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file('dummy_chromosome_sizes') + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("dummy_sizes") } + ) + } + + } + + test("sarscov2 - scale") { + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("scale") } + ) + } + + } + + test("stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 1 + ] + // sizes + input[1] = [] + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.genomecov[0][1]).name).match("stub") } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap new file mode 100644 index 00000000..8f9191e4 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "dummy_sizes": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:35:58.35232" + }, + "no_scale": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:35:51.142496" + }, + "stub": { + "content": [ + "test.coverage.txt" + ], + "timestamp": "2023-12-05T17:36:13.084709" + }, + "scale": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:36:05.962006" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/genomecov/tests/nextflow.config b/modules/nf-core/bedtools/genomecov/tests/nextflow.config new file mode 100644 index 00000000..bdb74ae5 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: BEDTOOLS_GENOMECOV { + ext.prefix = { "${meta.id}.coverage" } + } + +} diff --git a/modules/nf-core/bedtools/genomecov/tests/tags.yml b/modules/nf-core/bedtools/genomecov/tests/tags.yml new file mode 100644 index 00000000..55fce478 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/genomecov: + - "modules/nf-core/bedtools/genomecov/**" From 8c32fe124df1a95c4ce857ad5cac262b4ec16d63 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 30 Jan 2024 01:20:32 +0100 Subject: [PATCH 02/14] add subsample_mt --- conf/modules/subsample_mt.config | 23 +++++++++++++++++++++++ nextflow.config | 3 ++- subworkflows/local/subsample_mt.nf | 22 ++++++++++++++++++++++ workflows/raredisease.nf | 3 +++ 4 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 conf/modules/subsample_mt.config create mode 100644 subworkflows/local/subsample_mt.nf diff --git a/conf/modules/subsample_mt.config b/conf/modules/subsample_mt.config new file mode 100644 index 00000000..079affe9 --- /dev/null +++ b/conf/modules/subsample_mt.config @@ -0,0 +1,23 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Subsample MT +// + +process { + withName: '.*BEDTOOLS_GENOMECOV' { + ext.args = { "-dz" } + ext.prefix = { "${meta.id}" } + } +} diff --git a/nextflow.config b/nextflow.config index 681972ca..c851ac89 100644 --- a/nextflow.config +++ b/nextflow.config @@ -243,7 +243,7 @@ if (!params.igenomes_ignore) { // Load nf-core/raredisease custom config try { - includeConfig "${params.custom_config_base}/pipeline/raredisease.config" + includeConfig "https://raw.githubusercontent.com/nf-core/configs/683bae73d91856a97cd1dbbfe3e3dde4ae5373a1/pipeline/raredisease.config" } catch (Exception e) { System.err.println("WARNING: Could not load nf-core/config/raredisease profiles: ${params.custom_config_base}/pipeline/raredisease.config") } @@ -337,6 +337,7 @@ includeConfig 'conf/modules/call_mobile_elements.config' includeConfig 'conf/modules/annotate_mobile_elements.config' includeConfig 'conf/modules/generate_clinical_set.config' includeConfig 'conf/modules/variant_evaluation.config' +includeConfig 'conf/modules/subsample_mt.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf new file mode 100644 index 00000000..6ffe8c98 --- /dev/null +++ b/subworkflows/local/subsample_mt.nf @@ -0,0 +1,22 @@ +// +// A subworkflow to subsample MT alignments +// + +include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main' + +workflow SUBSAMPLE_MT { + + take: + ch_mt_marked_bam // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] + + main: + + ch_mt_marked_bam.map {meta, bam -> return [meta, bam, []]}.set {ch_genomecov_in} + + BEDTOOLS_GENOMECOV (ch_genomecov_in, [], []) + + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 9f402370..1ee3f1c9 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -140,6 +140,7 @@ include { RANK_VARIANTS as RANK_VARIANTS_MT } from '../subworkf include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants' include { RANK_VARIANTS as RANK_VARIANTS_SV } from '../subworkflows/local/rank_variants' include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome' +include { SUBSAMPLE_MT } from '../subworkflows/local/subsample_mt' include { VARIANT_EVALUATION } from '../subworkflows/local/variant_evaluation' /* @@ -362,6 +363,8 @@ workflow RAREDISEASE { .set { ch_mapped } ch_versions = ch_versions.mix(ALIGN.out.versions) + SUBSAMPLE_MT(ch_mapped.mt_marked_bam) + // // BAM QUALITY CHECK // From f814060f4f21761d58909d99a59fa6b20908efca Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 02:55:26 +0100 Subject: [PATCH 03/14] add subsample --- conf/modules/subsample_mt.config | 21 ++++++++++- modules/local/calculate_seed_fraction.nf | 45 ++++++++++++++++++++++++ subworkflows/local/subsample_mt.nf | 26 +++++++++++--- workflows/raredisease.nf | 3 +- 4 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 modules/local/calculate_seed_fraction.nf diff --git a/conf/modules/subsample_mt.config b/conf/modules/subsample_mt.config index 079affe9..7ce80bed 100644 --- a/conf/modules/subsample_mt.config +++ b/conf/modules/subsample_mt.config @@ -16,8 +16,27 @@ // process { - withName: '.*BEDTOOLS_GENOMECOV' { + withName: '.*SUBSAMPLE_MT:BEDTOOLS_GENOMECOV' { ext.args = { "-dz" } ext.prefix = { "${meta.id}" } } + + withName: '.*SUBSAMPLE_MT:SAMTOOLS_VIEW' { + ext.args = { "--output-fmt BAM -h -F 4 -s ${meta.seedfrac}" } + ext.prefix = { "${meta.id}_mt_subsample" } + publishDir = [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SUBSAMPLE_MT:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } diff --git a/modules/local/calculate_seed_fraction.nf b/modules/local/calculate_seed_fraction.nf new file mode 100644 index 00000000..6d776e14 --- /dev/null +++ b/modules/local/calculate_seed_fraction.nf @@ -0,0 +1,45 @@ +process CALCULATE_SEED_FRACTION { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'biocontainers/python:3.8.3' }" + + input: + tuple val(meta), path(cov) + + output: + tuple val(meta), path("seedfrac.csv"), emit: csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + export MT_COVERAGE=`awk '{cov += \$3}END{ if (NR > 0) print cov / NR }' $cov` + + python -c "import os;print('%0.6f' % (30+ 150/float(os.environ['MT_COVERAGE'])))" >seedfrac.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calculate_seed_fraction: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch seedfrac.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calculate_seed_fraction: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf index 6ffe8c98..b8d3ff2f 100644 --- a/subworkflows/local/subsample_mt.nf +++ b/subworkflows/local/subsample_mt.nf @@ -2,20 +2,38 @@ // A subworkflow to subsample MT alignments // -include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main' +include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main' +include { CALCULATE_SEED_FRACTION } from '../../modules/local/calculate_seed_fraction' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' workflow SUBSAMPLE_MT { take: - ch_mt_marked_bam // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] + ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] main: + ch_versions = Channel.empty() - ch_mt_marked_bam.map {meta, bam -> return [meta, bam, []]}.set {ch_genomecov_in} + ch_mt_bam_bai.map {meta, bam, bai -> return [meta, bam, -1]}.set {ch_genomecov_in} - BEDTOOLS_GENOMECOV (ch_genomecov_in, [], []) + BEDTOOLS_GENOMECOV (ch_genomecov_in, [], "genomecov") + + CALCULATE_SEED_FRACTION (BEDTOOLS_GENOMECOV.out.genomecov).csv + .join(ch_mt_bam_bai, failOnMismatch:true) + .map{meta, seedfrac, bam, bai -> + return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai] + } + .set { ch_subsample_in } + + SAMTOOLS_VIEW(ch_subsample_in, [[:],[]], []) + + SAMTOOLS_INDEX(SAMTOOLS_VIEW.out.bam) ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) + ch_versions = ch_versions.mix(CALCULATE_SEED_FRACTION.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) emit: versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 6884787d..eebb7886 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -363,8 +363,9 @@ workflow RAREDISEASE { .set { ch_mapped } ch_versions = ch_versions.mix(ALIGN.out.versions) - SUBSAMPLE_MT(ch_mapped.mt_marked_bam) + SUBSAMPLE_MT(ch_mapped.mt_bam_bai) + return // // BAM QUALITY CHECK // From c05325a0d2d3f35db254a5f178e966839be5b312 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 02:55:54 +0100 Subject: [PATCH 04/14] fix return --- workflows/raredisease.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index eebb7886..ed9f62a5 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -365,7 +365,6 @@ workflow RAREDISEASE { SUBSAMPLE_MT(ch_mapped.mt_bam_bai) - return // // BAM QUALITY CHECK // From 4aa778dd2d28a8e188bc47623369358745802343 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 10:56:02 +0100 Subject: [PATCH 05/14] add params --- docs/output.md | 13 ++++++++++++ modules/local/calculate_seed_fraction.nf | 4 +++- nextflow.config | 3 +++ nextflow_schema.json | 25 ++++++++++++++++++++++-- subworkflows/local/subsample_mt.nf | 11 +++++++++-- workflows/raredisease.nf | 9 ++++++++- 6 files changed, 59 insertions(+), 6 deletions(-) diff --git a/docs/output.md b/docs/output.md index 6cc3ecf9..90ac6f51 100644 --- a/docs/output.md +++ b/docs/output.md @@ -21,6 +21,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Duplicate marking](#duplicate-marking) - [Picard's MarkDuplicates](#picards-markduplicates) - [Sentieon Dedup](#sentieon-dedup) + - [Subsample mitochondrial alignments](#subsample-mitochondrial-alignments) - [Quality control and reporting](#quality-control-and-reporting) - [Quality control](#quality-control) - [FastQC](#fastqc) @@ -115,6 +116,18 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `*.metrics`: Text file containing the dedup metrics. +#### Subsample mitochondrial alignments + +[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. + +
+Output files from Alignment + +- `{outputdir}/alignment/` + - `_mt_subsample.bam`: Alignment file in bam format. + - `_mt_subsample.bam.bai`: Index of the corresponding bam file. +
+ ### Quality control and reporting #### Quality control diff --git a/modules/local/calculate_seed_fraction.nf b/modules/local/calculate_seed_fraction.nf index 6d776e14..1c367b38 100644 --- a/modules/local/calculate_seed_fraction.nf +++ b/modules/local/calculate_seed_fraction.nf @@ -9,6 +9,8 @@ process CALCULATE_SEED_FRACTION { input: tuple val(meta), path(cov) + val rd + val seed output: tuple val(meta), path("seedfrac.csv"), emit: csv @@ -23,7 +25,7 @@ process CALCULATE_SEED_FRACTION { """ export MT_COVERAGE=`awk '{cov += \$3}END{ if (NR > 0) print cov / NR }' $cov` - python -c "import os;print('%0.6f' % (30+ 150/float(os.environ['MT_COVERAGE'])))" >seedfrac.csv + python -c "import os;print('%0.6f' % ($seed+ $rd/float(os.environ['MT_COVERAGE'])))" >seedfrac.csv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index a18da1fb..69dd2cae 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,6 +35,7 @@ params { skip_sv_annotation = false skip_me_annotation = false skip_mt_annotation = false + skip_mt_subsample = false skip_vcf2cytosure = true skip_vep_filter = false gens_switch = false @@ -53,6 +54,8 @@ params { // Alignment aligner = 'bwamem2' min_trimmed_length = 40 + mt_subsample_rd = 150 + mt_subsample_seed = 30 rmdup = false // Variant calling diff --git a/nextflow_schema.json b/nextflow_schema.json index 74b35fd8..79045f6d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -27,7 +27,8 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "results" }, "email": { "type": "string", @@ -212,7 +213,8 @@ "type": "string", "description": "Name of the mitochondrial contig in the reference fasta file", "help_text": "Used to extract relevant information from the references to analyse mitochondria", - "fa_icon": "fas fa-align-center" + "fa_icon": "fas fa-align-center", + "default": "chrM" }, "mobile_element_references": { "type": "string", @@ -505,6 +507,11 @@ "description": "Specifies whether or not to skip annotation of mitochondrial variants.", "fa_icon": "fas fa-toggle-on" }, + "skip_mt_subsample": { + "type": "boolean", + "description": "Specifies whether or not to subsample mt alignment.", + "fa_icon": "fas fa-toggle-on" + }, "skip_snv_annotation": { "type": "boolean", "description": "Specifies whether or not to skip annotate SNV subworkflow.", @@ -549,6 +556,20 @@ "help_text": "Minimum length of reads after adapter trimming. Shorter reads are discarded. The program default is 15 bp. ", "fa_icon": "fas fa-less-than" }, + "mt_subsample_rd": { + "type": "integer", + "default": 150, + "description": "Expected coverage to subsample mt alignment to.", + "help_text": "To know more about this parameter check samtools' view documentation.", + "fa_icon": "fas fa-less-than" + }, + "mt_subsample_seed": { + "type": "integer", + "default": 30, + "description": "Subsampling seed used to influence which subset of mitochondrial reads is kept. ", + "help_text": "To know more about this parameter check samtools' view documentation.", + "fa_icon": "fas fa-less-than" + }, "rmdup": { "type": "boolean", "description": "Specifies whether duplicates reads should be removed prior to variant calling.", diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf index b8d3ff2f..dd6b87b0 100644 --- a/subworkflows/local/subsample_mt.nf +++ b/subworkflows/local/subsample_mt.nf @@ -10,7 +10,9 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/m workflow SUBSAMPLE_MT { take: - ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + val_mt_subsample_rd // channel: [mandatory] [ val(read_dept) ] + val_mt_subsample_seed // channel: [mandatory] [ val(seed) ] main: ch_versions = Channel.empty() @@ -19,7 +21,12 @@ workflow SUBSAMPLE_MT { BEDTOOLS_GENOMECOV (ch_genomecov_in, [], "genomecov") - CALCULATE_SEED_FRACTION (BEDTOOLS_GENOMECOV.out.genomecov).csv + CALCULATE_SEED_FRACTION ( + BEDTOOLS_GENOMECOV.out.genomecov, + val_mt_subsample_rd, + val_mt_subsample_seed + ) + .csv .join(ch_mt_bam_bai, failOnMismatch:true) .map{meta, seedfrac, bam, bai -> return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai] diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index ed9f62a5..01a9bb88 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -363,7 +363,14 @@ workflow RAREDISEASE { .set { ch_mapped } ch_versions = ch_versions.mix(ALIGN.out.versions) - SUBSAMPLE_MT(ch_mapped.mt_bam_bai) + if (params.skip_mt_subsample) { + SUBSAMPLE_MT( + ch_mapped.mt_bam_bai, + params.mt_subsample_rd, + params.mt_subsample_seed + ) + ch_versions = ch_versions.mix(SUBSAMPLE_MT.out.versions) + } // // BAM QUALITY CHECK From 05be462f9a4e2172563e162c6f4d6ede1f208e71 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:00:44 +0100 Subject: [PATCH 06/14] update conditional --- workflows/raredisease.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 01a9bb88..650ca446 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -363,7 +363,7 @@ workflow RAREDISEASE { .set { ch_mapped } ch_versions = ch_versions.mix(ALIGN.out.versions) - if (params.skip_mt_subsample) { + if (!params.skip_mt_subsample) { SUBSAMPLE_MT( ch_mapped.mt_bam_bai, params.mt_subsample_rd, From a722bc194df30865053a1824d8f3b153feb76dd9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:04:59 +0100 Subject: [PATCH 07/14] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55ea2691..17fc2667 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add FOUND_IN tag, which mentions the variant caller that found the mutation, in the INFO column of the vcf files [#471](https://github.com/nf-core/raredisease/pull/471) - A new parameter `vep_plugin_files` to supply files required by vep plugins [#482](https://github.com/nf-core/raredisease/pull/482) - New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483) +- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508). ### `Changed` From 0273a3757728a55791198729f2b46ac8368d4183 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:45:56 +0100 Subject: [PATCH 08/14] chromograph viz --- conf/modules/qc_bam.config | 5 +++++ subworkflows/local/qc_bam.nf | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/conf/modules/qc_bam.config b/conf/modules/qc_bam.config index 61daf4d6..93b0f2db 100644 --- a/conf/modules/qc_bam.config +++ b/conf/modules/qc_bam.config @@ -50,6 +50,11 @@ process { ext.args = '-clip' } + withName: '.*QC_BAM:CHROMOGRAPH_COV' { + ext.args = '--euploid --step 500' + ext.prefix = { "${meta2.id}_chromographcov" } + } + withName: '.*QC_BAM:MOSDEPTH' { ext.args = '--d4' ext.prefix = { "${meta.id}_mosdepth" } diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index d4ae2842..9bb5a2a4 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -4,13 +4,14 @@ include { PICARD_COLLECTMULTIPLEMETRICS } from '../../modules/nf-core/picard/collectmultiplemetrics/main' include { PICARD_COLLECTHSMETRICS } from '../../modules/nf-core/picard/collecthsmetrics/main' +include { CHROMOGRAPH as CHROMOGRAPH_COV } from '../../modules/nf-core/chromograph/main' include { QUALIMAP_BAMQC } from '../../modules/nf-core/qualimap/bamqc/main' include { TIDDIT_COV } from '../../modules/nf-core/tiddit/cov/main' include { MOSDEPTH } from '../../modules/nf-core/mosdepth/main' include { UCSC_WIGTOBIGWIG } from '../../modules/nf-core/ucsc/wigtobigwig/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS } from '../../modules/nf-core/picard/collectwgsmetrics/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS_Y } from '../../modules/nf-core/picard/collectwgsmetrics/main' -include { SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' +include { SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main' include { NGSBITS_SAMPLEGENDER } from '../../modules/nf-core/ngsbits/samplegender/main' @@ -51,6 +52,8 @@ workflow QC_BAM { UCSC_WIGTOBIGWIG (TIDDIT_COV.out.wig, ch_chrom_sizes) + CHROMOGRAPH_COV([[:],[]], TIDDIT_COV.out.wig, [[:],[]], [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + ch_bam_bai.map{ meta, bam, bai -> [meta, bam, bai, []]}.set{ch_mosdepth_in} MOSDEPTH (ch_mosdepth_in, ch_genome_fasta) From 8e39cb8e2c0fc27a5356b6760604ae3afea3a90d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 1 Feb 2024 17:06:20 +0100 Subject: [PATCH 09/14] update chromograph --- modules.json | 2 +- modules/nf-core/chromograph/main.nf | 4 +- modules/nf-core/chromograph/meta.yml | 2 +- .../nf-core/chromograph/tests/main.nf.test | 38 +++++++++++++++++++ modules/nf-core/chromograph/tests/tags.yml | 2 + 5 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 modules/nf-core/chromograph/tests/main.nf.test create mode 100644 modules/nf-core/chromograph/tests/tags.yml diff --git a/modules.json b/modules.json index 2b008539..ffdf8a5f 100644 --- a/modules.json +++ b/modules.json @@ -87,7 +87,7 @@ }, "chromograph": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "f05512229a501df5d67273bb7358c5f69667f40d", "installed_by": ["modules"] }, "cnvnator/cnvnator": { diff --git a/modules/nf-core/chromograph/main.nf b/modules/nf-core/chromograph/main.nf index dd67e1ba..e1374e48 100644 --- a/modules/nf-core/chromograph/main.nf +++ b/modules/nf-core/chromograph/main.nf @@ -4,8 +4,8 @@ process CHROMOGRAPH { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/chromograph:1.3.1--pyhdfd78af_1': - 'biocontainers/chromograph:1.3.1--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/chromograph:1.3.1--pyhdfd78af_2': + 'biocontainers/chromograph:1.3.1--pyhdfd78af_2' }" input: tuple val(meta), path(autozyg) diff --git a/modules/nf-core/chromograph/meta.yml b/modules/nf-core/chromograph/meta.yml index 0fd7250f..6540d9f0 100644 --- a/modules/nf-core/chromograph/meta.yml +++ b/modules/nf-core/chromograph/meta.yml @@ -12,7 +12,7 @@ tools: description: "Chromograph is a python package to create PNG images from genetics data such as BED and WIG files." homepage: "https://github.com/Clinical-Genomics/chromograph" documentation: "https://github.com/Clinical-Genomics/chromograph/blob/master/README.md" - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map diff --git a/modules/nf-core/chromograph/tests/main.nf.test b/modules/nf-core/chromograph/tests/main.nf.test new file mode 100644 index 00000000..caba8829 --- /dev/null +++ b/modules/nf-core/chromograph/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CHROMOGRAPH" + script "modules/nf-core/chromograph/main.nf" + process "CHROMOGRAPH" + tag "modules" + tag "modules_nfcore" + tag "chromograph" + + test("test_chromograph_sites") { + + when { + process { + """ + input[0] = [[:],[]] + input[1] = [[:],[]] + input[2] = [[:],[]] + input[3] = [[:],[]] + input[4] = [[:],[]] + input[5] = [[:],[]] + input[6] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['updsites_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert process.out.plots.get(0).get(1) ==~ ".*/test"} + ) + } + + } + +} diff --git a/modules/nf-core/chromograph/tests/tags.yml b/modules/nf-core/chromograph/tests/tags.yml new file mode 100644 index 00000000..e60ad9db --- /dev/null +++ b/modules/nf-core/chromograph/tests/tags.yml @@ -0,0 +1,2 @@ +chromograph: + - "modules/nf-core/chromograph/**" From c18ee8ef42c37eaa1496c4ea06130102974c8b73 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:06:56 +0100 Subject: [PATCH 10/14] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17fc2667..db07d7eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - A new parameter `vep_plugin_files` to supply files required by vep plugins [#482](https://github.com/nf-core/raredisease/pull/482) - New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483) - Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508). +- Chromograph to plot coverage across chromosomes [#507](https://github.com/nf-core/raredisease/pull/507) ### `Changed` From e50e30c52fc5ea7f250d6d960b19cf8ce2a69258 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 14:16:50 +0100 Subject: [PATCH 11/14] update output docs --- docs/output.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/output.md b/docs/output.md index 90ac6f51..7004fc1a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -28,6 +28,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Mosdepth](#mosdepth) - [Picard tools](#picard-tools) - [Qualimap](#qualimap) + - [Chromograph coverage](#chromograph-coverage) - [Sention WgsMetricsAlgo](#sention-wgsmetricsalgo) - [TIDDIT's cov and UCSC WigToBigWig](#tiddits-cov-and-ucsc-wigtobigwig) - [Reporting](#reporting) @@ -196,6 +197,16 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `{outputdir}/qc_bam/_qualimap/` this directory includes a qualimap report and associated raw statistic files. You can open the .html file in your internet browser to see the in-depth report. +##### Chromograph coverage + +[Chromograph](https://github.com/Clinical-Genomics/chromograph) is a python package to create PNG images from genetics data such as BED and WIG files. + +
+Output files + +- `{outputdir}/qc_bam/_chromographcov/*.png` plots showing coverage across chromosomes for each chromosome. +
+ ##### Sention WgsMetricsAlgo [Sentieon's WgsMetricsAlgo](https://support.sentieon.com/manual/usages/general/) is the Sentieon's equivalent of Picard's CollectWgsMetrics. From a9c3d9cbe2d57972651f5bae2dfc7b8b47933e7c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:04:59 +0100 Subject: [PATCH 12/14] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index db07d7eb..d5135e7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483) - Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508). - Chromograph to plot coverage across chromosomes [#507](https://github.com/nf-core/raredisease/pull/507) +- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508). ### `Changed` From fca0ac638b1f8c4aed3b84614ced7521965e14e2 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 15:49:03 +0100 Subject: [PATCH 13/14] review suggestions --- docs/output.md | 2 +- subworkflows/local/subsample_mt.nf | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/output.md b/docs/output.md index 7004fc1a..e6e231e2 100644 --- a/docs/output.md +++ b/docs/output.md @@ -119,7 +119,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d #### Subsample mitochondrial alignments -[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. +[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. The file is mainly intended to be used for visualization of MT alignments in IGV. The non-subsampled bam file is used for variant calling and other downstream analysis steps.
Output files from Alignment diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf index dd6b87b0..d8da5aae 100644 --- a/subworkflows/local/subsample_mt.nf +++ b/subworkflows/local/subsample_mt.nf @@ -25,13 +25,13 @@ workflow SUBSAMPLE_MT { BEDTOOLS_GENOMECOV.out.genomecov, val_mt_subsample_rd, val_mt_subsample_seed - ) - .csv - .join(ch_mt_bam_bai, failOnMismatch:true) - .map{meta, seedfrac, bam, bai -> - return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai] - } - .set { ch_subsample_in } + ) + .csv + .join(ch_mt_bam_bai, failOnMismatch:true) + .map{meta, seedfrac, bam, bai -> + return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai] + } + .set { ch_subsample_in } SAMTOOLS_VIEW(ch_subsample_in, [[:],[]], []) From cf6c313a9231c3c9695b443cf098d0ec6f5b9e52 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 2 Feb 2024 15:57:52 +0100 Subject: [PATCH 14/14] fix lint error --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index e6e231e2..37f7d4f8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -119,7 +119,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d #### Subsample mitochondrial alignments -[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. The file is mainly intended to be used for visualization of MT alignments in IGV. The non-subsampled bam file is used for variant calling and other downstream analysis steps. +[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. The file is mainly intended to be used for visualization of MT alignments in IGV. The non-subsampled bam file is used for variant calling and other downstream analysis steps.
Output files from Alignment