Skip to content

Commit

Permalink
feat: filter manta calls (#1371)
Browse files Browse the repository at this point in the history
## Added
- bcftools filter to all Manta rules to remove all variants with support from fewer than 4 reads.
- "--exome" flag to Manta runs on TGA cases to remove MaxDepth filter

## Changed
None

## Fixed
None

## Removed
- MaxDepth from list of filters to keep in bcftools view commands
  • Loading branch information
mathiasbio authored Feb 10, 2024
1 parent 413189e commit 511d870
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 27 deletions.
13 changes: 13 additions & 0 deletions BALSAMIC/constants/variant_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,19 @@
"description": "General purpose filters used for filtering tnscope and tnhaplotyper",
}

# Manta bcftools filters
MANTA_FILTER_SETTINGS = {
"low_pr_sr_count": {
"tag_value": 4,
"filter_name": "low_pr_sr_count",
"field": "FORMAT",
},
"varcaller_name": "Manta",
"filter_type": "general",
"analysis_type": "tumor_only,tumor_normal",
"description": "Bcftools filters to set frequency and minimum read support for SV calls",
}

# Configuration for SVDB settings:
SVDB_FILTER_SETTINGS = {
"swegen_sv_freq": {
Expand Down
4 changes: 4 additions & 0 deletions BALSAMIC/constants/workflow_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@
]
),
},
"manta": {
"wgs_settings": "",
"tga_settings": "--exome",
},
"vardict": {
"allelic_frequency": "0.001",
"max_pval": "0.9",
Expand Down
26 changes: 26 additions & 0 deletions BALSAMIC/models/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Optional

from pydantic import BaseModel, ConfigDict
from BALSAMIC.constants.analysis import SequencingType


class ParamsCommon(BaseModel):
Expand All @@ -24,6 +25,18 @@ class ParamsCommon(BaseModel):
picard_RG_tumor: str


class ParamsManta(BaseModel):
"""This class defines the params settings used as constants in Manta rule.
Attributes:
wgs_settings: str(required). parameters for Manta analysis for WGS
tga_settings: str(required). parameters for Manta analysis for TGA
"""

wgs_settings: str
tga_settings: str


class ParamsVardict(BaseModel):
"""This class defines the params settings used as constants in vardict rule.
Expand Down Expand Up @@ -149,22 +162,33 @@ class BalsamicWorkflowConfig(BaseModel):
Attributes:
common: global params defined across all rules in balsamic workflow
manta: params used in the manta rules
umicommon: global params defined across specific rules in UMI workflow
vep: global params defined in the rule vep
vardict: params defined in the rule vardict
umiextract : params defined in the rule sentieon_umiextract
umiconsensuscall: params defined in the rule sentieon_consensuscall
tnscope_umi: params defined in the rule sentieon_tnscope_umi
Functions:
- get_manta_settings: Return setting for manta rule
"""

common: ParamsCommon
manta: ParamsManta
vardict: ParamsVardict
vep: ParamsVEP
umicommon: UMIParamsCommon
umiextract: UMIParamsUMIextract
umiconsensuscall: UMIParamsConsensuscall
tnscope_umi: UMIParamsTNscope

def get_manta_settings(self, sequencing_type) -> str:
"""Return correct setting for manta rules depending on sequencing type."""
if sequencing_type == SequencingType.WGS:
return self.manta.wgs_settings
return self.manta.tga_settings


class VCFAttributes(BaseModel):
"""General purpose filter to manage various VCF attributes
Expand Down Expand Up @@ -205,6 +229,7 @@ class VarCallerFilter(BaseModel):
swegen_sv_freq: VCFAttributes (optional); maximum swegen sv allele frequency
loqusdb_clinical_snv_freq: VCFAttributes (optional); maximum loqusdb clinical snv allele frequency
loqusdb_clinical_sv_freq: VCFAttributes (optional); maximum loqusdb clinical sv allele frequency
low_pr_sr_count: VCFAttributes (optional); minumum Manta variant read support
varcaller_name: str (required); variant caller name
filter_type: str (required); filter name for variant caller
analysis_type: str (required); analysis type e.g. tumor_normal or tumor_only
Expand All @@ -224,6 +249,7 @@ class VarCallerFilter(BaseModel):
swegen_sv_freq: Optional[VCFAttributes] = None
loqusdb_clinical_snv_freq: Optional[VCFAttributes] = None
loqusdb_clinical_sv_freq: Optional[VCFAttributes] = None
low_pr_sr_count: Optional[VCFAttributes] = None
varcaller_name: str
filter_type: str
analysis_type: str
Expand Down
8 changes: 3 additions & 5 deletions BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ rule bcftools_filter_sv_research:
"Filtering merged research structural and copy number variants using bcftools for {params.case_name}"
shell:
"""
bcftools view --threads {threads} -f .,PASS,MaxDepth {input.vcf_sv_research} |\
bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\
bcftools view --threads {threads} -f .,PASS,MaxDepth -O z -o {output.vcf_pass_svdb};
bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' {input.vcf_sv_research} |\
bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb};
tabix -p vcf -f {output.vcf_pass_svdb};
Expand Down Expand Up @@ -56,10 +55,9 @@ rule bcftools_filter_sv_clinical:
shell:
"""
bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_sv_clinical} |\
bcftools view --threads {threads} -f .,PASS,MaxDepth |\
bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\
bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\
bcftools view --threads {threads} -f .,PASS,MaxDepth -O z -o {output.vcf_pass_svdb};
bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb};
tabix -p vcf -f {output.vcf_pass_svdb};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

rule bcftools_quality_filter_svdb:
input:
vcf_svdb = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz",
Expand All @@ -15,7 +16,7 @@ rule bcftools_quality_filter_svdb:
"Filtering merged research structural and copy number variants using bcftools for {params.case_name}"
shell:
"""
bcftools view --threads {threads} -f .,PASS,MaxDepth -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb};
bcftools view --threads {threads} -f .,PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb};

tabix -p vcf -f {output.vcf_pass_svdb_research};
"""
"""
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ rule manta_tumor_normal:
Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
settings = params.get_manta_settings(sequencing_type=sequencing_type),
runmode = "local",
tumor = config_model.get_sample_name_by_type(SampleType.TUMOR),
normal = config_model.get_sample_name_by_type(SampleType.NORMAL),
case_name = case_id,
manta_install_path = "/opt/conda/share/manta-1.6.0-2"
manta_install_path = "/opt/conda/share/manta-1.6.0-2",
low_pr_sr_count_value = MANTA_FILTERS.low_pr_sr_count.tag_value,
low_pr_sr_count_filter_name = MANTA_FILTERS.low_pr_sr_count.filter_name,
threads:
get_threads(cluster_config, "manta_tumor_normal")
message:
Expand All @@ -31,6 +34,7 @@ rule manta_tumor_normal:
samtools_path=$(readlink -f $(which samtools))
configManta.py \
{params.settings} \
--normalBam={input.bamN} \
--tumorBam={input.bamT} \
--referenceFasta={input.fa} \
Expand All @@ -43,7 +47,9 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads};
{input.fa} \
{params.tmpdir}/results/variants/somaticSV.vcf.gz > {params.tmpdir}/results/variants/somaticSV_converted.vcf;
bgzip -l 9 -c {params.tmpdir}/results/variants/somaticSV_converted.vcf > {output.final};
bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ;
bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count_value}' --soft-filter '{params.low_pr_sr_count_filter_name}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz
tabix -p vcf -f {output.final};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ rule manta_tumor_only:
Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
settings = params.get_manta_settings(sequencing_type=sequencing_type),
runmode = "local",
tumor = config_model.get_sample_name_by_type(SampleType.TUMOR),
case_name = config["analysis"]["case_id"],
manta_install_path= "/opt/conda/share/manta-1.6.0-2"
manta_install_path= "/opt/conda/share/manta-1.6.0-2",
low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name],
threads:
get_threads(cluster_config, "manta_tumor_only")
message:
Expand All @@ -29,6 +31,7 @@ rule manta_tumor_only:
samtools_path=$(readlink -f $(which samtools))
configManta.py \
{params.settings} \
--tumorBam={input.bamT} \
--referenceFasta={input.fa} \
--runDir={params.tmpdir};
Expand All @@ -40,7 +43,9 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads};
{input.fa} \
{params.tmpdir}/results/variants/tumorSV.vcf.gz > {params.tmpdir}/results/variants/tumorSV_converted.vcf;
bgzip -l 9 -c {params.tmpdir}/results/variants/tumorSV_converted.vcf > {output.final};
bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf;
bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz
tabix -p vcf -f {output.final};
Expand Down
27 changes: 14 additions & 13 deletions BALSAMIC/workflows/balsamic.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ from BALSAMIC.constants.variant_filters import (
SENTIEON_VARCALL_SETTINGS,
SVDB_FILTER_SETTINGS,
VARDICT_SETTINGS,
MANTA_FILTER_SETTINGS,
)
from BALSAMIC.constants.workflow_params import VARCALL_PARAMS, WORKFLOW_PARAMS, SLEEP_BEFORE_START
from BALSAMIC.models.config import ConfigModel
Expand Down Expand Up @@ -112,6 +113,19 @@ else:
status_to_sample_id = "TUMOR" + "\\\\t" + tumor_sample


# Varcaller filter settings
COMMON_FILTERS = VarCallerFilter.model_validate(COMMON_SETTINGS)
VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS)
SENTIEON_CALLER = VarCallerFilter.model_validate(SENTIEON_VARCALL_SETTINGS)
SVDB_FILTERS = VarCallerFilter.model_validate(SVDB_FILTER_SETTINGS)
MANTA_FILTERS = VarCallerFilter.model_validate(MANTA_FILTER_SETTINGS)

# Fastp parameters
fastp_parameters: Dict = get_fastp_parameters(config_model)

# parse parameters as constants to workflows
params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS)

# vcfanno annotations
research_annotations.append( {
'annotation': [{
Expand Down Expand Up @@ -201,19 +215,6 @@ if "swegen_sv_frequency" in config["reference"]:
swegen_sv: str = get_swegen_sv(config)



# Varcaller filter settings
COMMON_FILTERS = VarCallerFilter.model_validate(COMMON_SETTINGS)
VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS)
SENTIEON_CALLER = VarCallerFilter.model_validate(SENTIEON_VARCALL_SETTINGS)
SVDB_FILTERS = VarCallerFilter.model_validate(SVDB_FILTER_SETTINGS)

# Fastp parameters
fastp_parameters: Dict = get_fastp_parameters(config_model)

# parse parameters as constants to workflows
params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS)

# Capture kit name
if config["analysis"]["sequencing_type"] != "wgs":
capture_kit = os.path.split(config["panel"]["capture_kit"])[1]
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
[X.X.X]
-------

Added:
^^^^^^
* bcftools filters for PR:SR evidence in Manta calls
* "--exome" argument to Manta runs in TGA cases

Removed:
^^^^^^^^
* Extra bcftools filters that allows MaxDepth filtered variants in the final SV VCF

[13.0.1]
-------

Expand Down
6 changes: 5 additions & 1 deletion docs/balsamic_sv_cnv.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ The copy number variants, identified using ascatNgs and `dellycnv`, are converte
Tumor and normal calls in `TIDDIT` are merged using `SVDB` with `--bnd_distance 500` and `--overlap = 0.80`.
Using a custom made script "filter_SVs.py", soft-filters are added to the calls based on the presence of the variant in the normal, with the goal of retaining only somatic variants as PASS.

Manta calls are filtered using bcftools to only keep variants that have evidence from 3 or more reads.

.. list-table:: SV filters
:widths: 25 25 40
Expand All @@ -78,6 +79,9 @@ Using a custom made script "filter_SVs.py", soft-filters are added to the calls
* - TIDDIT
- in_normal
- ctg_n == True and AF_N_MAX == 0 and AF_T_MAX <= 0.25
* - Manta
- low_pr_sr_count
- SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < 4.0


Further information regarding the TIDDIT tumor normal filtration: As translocation variants are represented by 2 BNDs in the VCF which allows for mixed assignment of soft-filters, a requirement for assigning soft-filters to translocations is that neither BND is PASS.
Expand Down Expand Up @@ -138,7 +142,7 @@ The following filter applies for both tumor-normal and tumor-only samples in add

Frq <= 0.02 (or) Frq == "."

The variants scored as `PASS` or `MaxDepth` are included in the final vcf file (`SNV.somatic.<CASE_ID>.svdb.<research/clinical>.filtered.pass.vcf.gz`).
The variants scored as `PASS` are included in the final vcf file (`SNV.somatic.<CASE_ID>.svdb.<research/clinical>.filtered.pass.vcf.gz`).

The following command can be used to fetch the variants identified by a specific caller from merged structural and copy number variants.

Expand Down
43 changes: 43 additions & 0 deletions tests/models/test_params_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
import pytest
from pydantic import ValidationError

from BALSAMIC.constants.analysis import SequencingType
from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS
from BALSAMIC.models.params import BalsamicWorkflowConfig
from BALSAMIC.models.config import VarcallerAttribute
from BALSAMIC.models.params import (
ParamsManta,
ParamsVardict,
ParamsVEP,
QCModel,
Expand All @@ -18,6 +22,45 @@
)


def test_params_manta():
"""Test Manta settings model for correct validation."""

# GIVEN Manta params
test_manta_params = {"wgs_settings": "", "tga_settings": "--exome"}

# WHEN building the model
test_manta_built = ParamsManta(**test_manta_params)

# THEN string values should be correctly populated into the model
assert test_manta_built.tga_settings == "--exome"
assert test_manta_built.wgs_settings == ""


def test_get_manta_settings_tga():
"""Test get Manta settings based on sequencing type TGA."""

# GIVEN workflow params
params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS)

# WHEN getting manta settings for TGA
manta_settings = params.get_manta_settings(SequencingType.TARGETED)

# THEN manta setting should specify exome argument
assert manta_settings == "--exome"


def test_get_manta_settings_wgs():
"""Test get Manta settings based on sequencing type WGS."""
# GIVEN workflow params
params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS)

# WHEN getting manta settings for WGS
manta_settings = params.get_manta_settings(SequencingType.WGS)

# THEN manta setting should be empty
assert manta_settings == ""


def test_params_vardict():
"""test UMIParamsVardict model for correct validation"""

Expand Down
4 changes: 2 additions & 2 deletions tests/test_data/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@
"single"
],
"sequencing_type": [
"targeted",
"wgs"
"wgs",
"targeted"
],
"workflow_solution": [
"BALSAMIC"
Expand Down

0 comments on commit 511d870

Please sign in to comment.