Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: filter manta calls #1371

Merged
merged 36 commits into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
8d348cf
add rule sleep before starting analysis
mathiasbio Nov 1, 2023
5b3c2bd
merge release 13
mathiasbio Jan 4, 2024
f184460
increase time to 200 sec
mathiasbio Jan 4, 2024
3ac67a5
add sleep requirement to concatenate umi
mathiasbio Jan 4, 2024
11064da
fix qc workflow
mathiasbio Jan 15, 2024
0c200e0
increase to 5 mins
mathiasbio Jan 22, 2024
6e4918c
Merge branch 'master' into add_sleep_rule_before_start
mathiasbio Jan 22, 2024
1054394
changelog
mathiasbio Jan 22, 2024
9e0a4bb
remove unnecessary variables in smks
mathiasbio Jan 22, 2024
259bbed
fix string
mathiasbio Jan 22, 2024
be3cf2c
changelog version bump
mathiasbio Jan 22, 2024
17e55fd
fix pr link
mathiasbio Jan 22, 2024
f65b07b
adding exome mode to targeted, and removing max depth for everything …
mathiasbio Jan 26, 2024
f55705d
Merge branch 'add_sleep_rule_before_start' into fix_max_depth
mathiasbio Jan 26, 2024
def3631
add new cluster config entries for new rules
mathiasbio Jan 26, 2024
3109e5b
add new rule to model
mathiasbio Jan 26, 2024
8613431
black and pytest
mathiasbio Jan 26, 2024
fe818cd
add new filters
mathiasbio Jan 26, 2024
bef4a28
fix af filter
mathiasbio Jan 26, 2024
0b4fcdb
fix bug
mathiasbio Feb 2, 2024
4039fb6
remove extra manta rule
mathiasbio Feb 2, 2024
6796193
lowered to 0.5% vaf
mathiasbio Feb 2, 2024
157cf5b
fix bug
mathiasbio Feb 2, 2024
2dc9487
fix
mathiasbio Feb 2, 2024
2dece5d
add changelog and merge from develop
mathiasbio Feb 6, 2024
65f029a
remove freq filter
mathiasbio Feb 7, 2024
b4ea679
fix bug
mathiasbio Feb 7, 2024
fb15ac5
fix bug
mathiasbio Feb 7, 2024
2873832
add new tests
mathiasbio Feb 7, 2024
24a96db
updated docs
mathiasbio Feb 7, 2024
5638f59
remove dsstore
mathiasbio Feb 7, 2024
d7e4e55
add to allow . in filters
mathiasbio Feb 8, 2024
b661656
add then
mathiasbio Feb 8, 2024
f2f12b4
fix bug
mathiasbio Feb 8, 2024
5c28c33
remove ds sneaky file and fix doc strings
mathiasbio Feb 8, 2024
2337ec0
split manta filter in two
mathiasbio Feb 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
13 changes: 13 additions & 0 deletions BALSAMIC/constants/variant_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,19 @@
"description": "General purpose filters used for filtering tnscope and tnhaplotyper",
}

# Manta bcftools filters
MANTA_FILTER_SETTINGS = {
"low_pr_sr_count": {
"tag_value": 4,
"filter_name": "low_pr_sr_count",
"field": "FORMAT",
},
"varcaller_name": "Manta",
"filter_type": "general",
"analysis_type": "tumor_only,tumor_normal",
"description": "Bcftools filters to set frequency and minimum read support for SV calls",
}

# Configuration for SVDB settings:
SVDB_FILTER_SETTINGS = {
"swegen_sv_freq": {
Expand Down
4 changes: 4 additions & 0 deletions BALSAMIC/constants/workflow_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@
]
),
},
"manta": {
"wgs_settings": "",
"tga_settings": "--exome",
},
"vardict": {
"allelic_frequency": "0.001",
"max_pval": "0.9",
Expand Down
27 changes: 27 additions & 0 deletions BALSAMIC/models/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Optional

from pydantic import BaseModel, ConfigDict
from BALSAMIC.constants.analysis import SequencingType


class ParamsCommon(BaseModel):
Expand All @@ -24,6 +25,18 @@ class ParamsCommon(BaseModel):
picard_RG_tumor: str


class ParamsManta(BaseModel):
"""This class defines the params settings used as constants in Manta rule.

Attributes:
wgs_settings: str(required). parameters for Manta analysis for WGS
tga_settings: str(required). parameters for Manta analysis for TGA
"""

wgs_settings: str
tga_settings: str


class ParamsVardict(BaseModel):
"""This class defines the params settings used as constants in vardict rule.

Expand Down Expand Up @@ -149,22 +162,34 @@ class BalsamicWorkflowConfig(BaseModel):

Attributes:
common: global params defined across all rules in balsamic workflow
manta: params used in the manta rules
umicommon: global params defined across specific rules in UMI workflow
vep: global params defined in the rule vep
vardict: params defined in the rule vardict
umiextract : params defined in the rule sentieon_umiextract
umiconsensuscall: params defined in the rule sentieon_consensuscall
tnscope_umi: params defined in the rule sentieon_tnscope_umi

Functions:
- get_manta_settings: Return setting for manta rule
"""

common: ParamsCommon
manta: ParamsManta
vardict: ParamsVardict
vep: ParamsVEP
umicommon: UMIParamsCommon
umiextract: UMIParamsUMIextract
umiconsensuscall: UMIParamsConsensuscall
tnscope_umi: UMIParamsTNscope

def get_manta_settings(self, sequencing_type) -> str:
"""Return correct setting for manta rules depending on sequencing type."""
if sequencing_type == SequencingType.WGS:
return self.manta.wgs_settings
else:
return self.manta.tga_settings


class VCFAttributes(BaseModel):
"""General purpose filter to manage various VCF attributes
Expand Down Expand Up @@ -205,6 +230,7 @@ class VarCallerFilter(BaseModel):
swegen_sv_freq: VCFAttributes (optional); maximum swegen sv allele frequency
loqusdb_clinical_snv_freq: VCFAttributes (optional); maximum loqusdb clinical snv allele frequency
loqusdb_clinical_sv_freq: VCFAttributes (optional); maximum loqusdb clinical sv allele frequency
low_pr_sr_count: VCFAttributes (optional); minumum Manta variant read support
varcaller_name: str (required); variant caller name
filter_type: str (required); filter name for variant caller
analysis_type: str (required); analysis type e.g. tumor_normal or tumor_only
Expand All @@ -224,6 +250,7 @@ class VarCallerFilter(BaseModel):
swegen_sv_freq: Optional[VCFAttributes] = None
loqusdb_clinical_snv_freq: Optional[VCFAttributes] = None
loqusdb_clinical_sv_freq: Optional[VCFAttributes] = None
low_pr_sr_count: Optional[VCFAttributes] = None
varcaller_name: str
filter_type: str
analysis_type: str
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ rule bcftools_filter_sv_research:
"Filtering merged research structural and copy number variants using bcftools for {params.case_name}"
shell:
"""
bcftools view --threads {threads} -f .,PASS,MaxDepth {input.vcf_sv_research} |\
bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\
bcftools view --threads {threads} -f .,PASS,MaxDepth -O z -o {output.vcf_pass_svdb};
bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb};

tabix -p vcf -f {output.vcf_pass_svdb};

Expand Down Expand Up @@ -56,10 +55,9 @@ rule bcftools_filter_sv_clinical:
shell:
"""
bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_sv_clinical} |\
bcftools view --threads {threads} -f .,PASS,MaxDepth |\
bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\
bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\
bcftools view --threads {threads} -f .,PASS,MaxDepth -O z -o {output.vcf_pass_svdb};
bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb};

tabix -p vcf -f {output.vcf_pass_svdb};

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

rule bcftools_quality_filter_svdb:
input:
vcf_svdb = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz",
Expand All @@ -15,7 +16,7 @@ rule bcftools_quality_filter_svdb:
"Filtering merged research structural and copy number variants using bcftools for {params.case_name}"
shell:
"""
bcftools view --threads {threads} -f .,PASS,MaxDepth -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb};
bcftools view --threads {threads} -f .,PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb};

tabix -p vcf -f {output.vcf_pass_svdb_research};
"""
"""
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ rule manta_tumor_normal:
Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
settings = params.get_manta_settings(sequencing_type=sequencing_type),
runmode = "local",
tumor = config_model.get_sample_name_by_type(SampleType.TUMOR),
normal = config_model.get_sample_name_by_type(SampleType.NORMAL),
case_name = case_id,
manta_install_path = "/opt/conda/share/manta-1.6.0-2"
manta_install_path = "/opt/conda/share/manta-1.6.0-2",
low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name],
threads:
get_threads(cluster_config, "manta_tumor_normal")
message:
Expand All @@ -31,6 +33,7 @@ rule manta_tumor_normal:
samtools_path=$(readlink -f $(which samtools))

configManta.py \
{params.settings} \
--normalBam={input.bamN} \
--tumorBam={input.bamT} \
--referenceFasta={input.fa} \
Expand All @@ -43,7 +46,9 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads};
{input.fa} \
{params.tmpdir}/results/variants/somaticSV.vcf.gz > {params.tmpdir}/results/variants/somaticSV_converted.vcf;

bgzip -l 9 -c {params.tmpdir}/results/variants/somaticSV_converted.vcf > {output.final};
bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ;

bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz

tabix -p vcf -f {output.final};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ rule manta_tumor_only:
Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
settings = params.get_manta_settings(sequencing_type=sequencing_type),
runmode = "local",
tumor = config_model.get_sample_name_by_type(SampleType.TUMOR),
case_name = config["analysis"]["case_id"],
manta_install_path= "/opt/conda/share/manta-1.6.0-2"
manta_install_path= "/opt/conda/share/manta-1.6.0-2",
low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name],
threads:
get_threads(cluster_config, "manta_tumor_only")
message:
Expand All @@ -29,6 +31,7 @@ rule manta_tumor_only:
samtools_path=$(readlink -f $(which samtools))

configManta.py \
{params.settings} \
--tumorBam={input.bamT} \
--referenceFasta={input.fa} \
--runDir={params.tmpdir};
Expand All @@ -40,7 +43,9 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads};
{input.fa} \
{params.tmpdir}/results/variants/tumorSV.vcf.gz > {params.tmpdir}/results/variants/tumorSV_converted.vcf;

bgzip -l 9 -c {params.tmpdir}/results/variants/tumorSV_converted.vcf > {output.final};
bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf;

bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz

tabix -p vcf -f {output.final};

Expand Down
27 changes: 14 additions & 13 deletions BALSAMIC/workflows/balsamic.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ from BALSAMIC.constants.variant_filters import (
SENTIEON_VARCALL_SETTINGS,
SVDB_FILTER_SETTINGS,
VARDICT_SETTINGS,
MANTA_FILTER_SETTINGS,
)
from BALSAMIC.constants.workflow_params import VARCALL_PARAMS, WORKFLOW_PARAMS, SLEEP_BEFORE_START
from BALSAMIC.models.config import ConfigModel
Expand Down Expand Up @@ -112,6 +113,19 @@ else:
status_to_sample_id = "TUMOR" + "\\\\t" + tumor_sample


# Varcaller filter settings
COMMON_FILTERS = VarCallerFilter.model_validate(COMMON_SETTINGS)
VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS)
SENTIEON_CALLER = VarCallerFilter.model_validate(SENTIEON_VARCALL_SETTINGS)
SVDB_FILTERS = VarCallerFilter.model_validate(SVDB_FILTER_SETTINGS)
MANTA_FILTERS = VarCallerFilter.model_validate(MANTA_FILTER_SETTINGS)

# Fastp parameters
fastp_parameters: Dict = get_fastp_parameters(config_model)

# parse parameters as constants to workflows
params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS)

# vcfanno annotations
research_annotations.append( {
'annotation': [{
Expand Down Expand Up @@ -201,19 +215,6 @@ if "swegen_sv_frequency" in config["reference"]:
swegen_sv: str = get_swegen_sv(config)



# Varcaller filter settings
COMMON_FILTERS = VarCallerFilter.model_validate(COMMON_SETTINGS)
VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS)
SENTIEON_CALLER = VarCallerFilter.model_validate(SENTIEON_VARCALL_SETTINGS)
SVDB_FILTERS = VarCallerFilter.model_validate(SVDB_FILTER_SETTINGS)

# Fastp parameters
fastp_parameters: Dict = get_fastp_parameters(config_model)

# parse parameters as constants to workflows
params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS)

# Capture kit name
if config["analysis"]["sequencing_type"] != "wgs":
capture_kit = os.path.split(config["panel"]["capture_kit"])[1]
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
[X.X.X]
-------

Added:
^^^^^^
* bcftools filters for PR:SR evidence in Manta calls
* "--exome" argument to Manta runs in TGA cases

Removed:
^^^^^^^^
* Extra bcftools filters that allows MaxDepth filtered variants in the final SV VCF

[13.0.1]
-------

Expand Down
6 changes: 5 additions & 1 deletion docs/balsamic_sv_cnv.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ The copy number variants, identified using ascatNgs and `dellycnv`, are converte
Tumor and normal calls in `TIDDIT` are merged using `SVDB` with `--bnd_distance 500` and `--overlap = 0.80`.
Using a custom made script "filter_SVs.py", soft-filters are added to the calls based on the presence of the variant in the normal, with the goal of retaining only somatic variants as PASS.

Manta calls are filtered using bcftools to only keep variants that have evidence from 3 or more reads.

.. list-table:: SV filters
:widths: 25 25 40
Expand All @@ -78,6 +79,9 @@ Using a custom made script "filter_SVs.py", soft-filters are added to the calls
* - TIDDIT
- in_normal
- ctg_n == True and AF_N_MAX == 0 and AF_T_MAX <= 0.25
* - Manta
- low_pr_sr_count
- SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < 4.0


Further information regarding the TIDDIT tumor normal filtration: As translocation variants are represented by 2 BNDs in the VCF which allows for mixed assignment of soft-filters, a requirement for assigning soft-filters to translocations is that neither BND is PASS.
Expand Down Expand Up @@ -138,7 +142,7 @@ The following filter applies for both tumor-normal and tumor-only samples in add

Frq <= 0.02 (or) Frq == "."

The variants scored as `PASS` or `MaxDepth` are included in the final vcf file (`SNV.somatic.<CASE_ID>.svdb.<research/clinical>.filtered.pass.vcf.gz`).
The variants scored as `PASS` are included in the final vcf file (`SNV.somatic.<CASE_ID>.svdb.<research/clinical>.filtered.pass.vcf.gz`).

The following command can be used to fetch the variants identified by a specific caller from merged structural and copy number variants.

Expand Down
43 changes: 43 additions & 0 deletions tests/models/test_params_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
import pytest
from pydantic import ValidationError

from BALSAMIC.constants.analysis import SequencingType
from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS
from BALSAMIC.models.params import BalsamicWorkflowConfig
from BALSAMIC.models.config import VarcallerAttribute
from BALSAMIC.models.params import (
ParamsManta,
ParamsVardict,
ParamsVEP,
QCModel,
Expand All @@ -18,6 +22,45 @@
)


def test_params_manta():
"""test Manta settings model for correct validation."""

# GIVEN Manta params
test_manta_params = {"wgs_settings": "", "tga_settings": "--exome"}

# WHEN building the model
test_manta_built = ParamsManta(**test_manta_params)

# THEN string values should be correctly populated into the model
assert test_manta_built.tga_settings == "--exome"
assert test_manta_built.wgs_settings == ""


def test_get_manta_settings_tga():
"""test get Manta settings based on sequencing type TGA."""

# GIVEN workflow params
params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS)

# WHEN getting manta settings for TGA
manta_settings = params.get_manta_settings(SequencingType.TARGETED)

# THEN manta setting should specify exome argument
assert manta_settings == "--exome"


def test_get_manta_settings_wgs():
"""test get Manta settings based on sequencing type WGS."""
# GIVEN workflow params
params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS)

# WHEN getting manta settings for WGS
manta_settings = params.get_manta_settings(SequencingType.WGS)

# THEN manta setting should be empty
assert manta_settings == ""


def test_params_vardict():
"""test UMIParamsVardict model for correct validation"""

Expand Down
4 changes: 2 additions & 2 deletions tests/test_data/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@
"single"
],
"sequencing_type": [
"targeted",
"wgs"
"wgs",
"targeted"
],
"workflow_solution": [
"BALSAMIC"
Expand Down
Loading