From 8d348cf7f3a4f597e34736d047e6dd73250220ad Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Wed, 1 Nov 2023 15:11:15 +0100 Subject: [PATCH 01/32] add rule sleep before starting analysis --- BALSAMIC/constants/rules.py | 1 + BALSAMIC/constants/workflow_params.py | 2 ++ BALSAMIC/snakemake_rules/misc/sleep.rule | 15 +++++++++++++++ .../quality_control/fastp_tga.rule | 1 + .../quality_control/fastp_wgs.rule | 1 + .../snakemake_rules/quality_control/fastqc.rule | 1 + BALSAMIC/workflows/PON.smk | 6 +++++- BALSAMIC/workflows/QC.smk | 6 ++++-- BALSAMIC/workflows/balsamic.smk | 4 +++- CHANGELOG.rst | 2 ++ 10 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 BALSAMIC/snakemake_rules/misc/sleep.rule diff --git a/BALSAMIC/constants/rules.py b/BALSAMIC/constants/rules.py index a9d440b9c..2b96cf2d6 100644 --- a/BALSAMIC/constants/rules.py +++ b/BALSAMIC/constants/rules.py @@ -30,6 +30,7 @@ SNAKEMAKE_RULES: Dict[str, Dict[str, list]] = { "common": { + "misc": ["snakemake_rules/misc/sleep.rule"], "qc": [ "snakemake_rules/quality_control/fastqc.rule", "snakemake_rules/quality_control/multiqc.rule", diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index 3c6d46614..fcf822b54 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -108,6 +108,8 @@ }, } +SLEEP_BEFORE_START = 120 + WORKFLOW_PARAMS = { "common": { "pcr_model": "NONE", diff --git a/BALSAMIC/snakemake_rules/misc/sleep.rule b/BALSAMIC/snakemake_rules/misc/sleep.rule new file mode 100644 index 000000000..c97deb58d --- /dev/null +++ b/BALSAMIC/snakemake_rules/misc/sleep.rule @@ -0,0 +1,15 @@ + +rule sleep_before_start: + """Wait 120s before starting any processing to avoid key_error issue.""" + output: + wake_up = result_dir + "start_analysis" + params: + sleep_seconds = seconds_before_start + threads: get_threads(cluster_config, "sleep_before_start") + message: + "Sleeping for {params.sleep_seconds} seconds before starting analysis." + shell: + """ +sleep {params.sleep_seconds} +echo "Waited: {params.sleep_seconds} seconds. Now starting analysis." >> {output.wake_up} + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule b/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule index a9abae72f..841eca850 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule @@ -3,6 +3,7 @@ rule fastp_umi_trim: """Fastq TGA data pre-processing to remove UMIs.""" input: + wake_up = result_dir + "start_analysis", fastq_r1 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.FWD), fastq_r2 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.REV) output: diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule b/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule index 743d50db3..0b4bba3dc 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule @@ -4,6 +4,7 @@ rule fastp_quality_trim_wgs: """Fastq data pre-processing for WGS.""" input: + wake_up = result_dir + "start_analysis", fastq_r1 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.FWD), fastq_r2 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.REV) output: diff --git a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule index 493a892fd..4d1d895f5 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule @@ -4,6 +4,7 @@ rule fastqc: """Perform quality control checks on raw sequence data.""" input: + wake_up = result_dir + "start_analysis", fastq = input_fastq_dir + "{fastq_file_names}.fastq.gz" output: fastqc_zip = fastqc_dir + "{fastq_file_names}_fastqc.zip" diff --git a/BALSAMIC/workflows/PON.smk b/BALSAMIC/workflows/PON.smk index 134e5ff06..092029cb8 100644 --- a/BALSAMIC/workflows/PON.smk +++ b/BALSAMIC/workflows/PON.smk @@ -15,7 +15,7 @@ from BALSAMIC.constants.paths import BALSAMIC_DIR from BALSAMIC.constants.analysis import FastqName, SampleType, SequencingType, PONWorkflow, Gender from BALSAMIC.utils.io import write_finish_file from BALSAMIC.utils.rule import get_fastp_parameters, get_threads, get_result_dir -from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS, SLEEP_BEFORE_START from BALSAMIC.models.analysis import BalsamicWorkflowConfig, ConfigModel @@ -50,6 +50,9 @@ bam_dir: str = Path(result_dir, "bam", "").as_posix() + "/" cnv_dir: str = Path(result_dir, "cnv", "").as_posix() + "/" qc_dir: str = Path(result_dir, "qc", "").as_posix() + "/" +# Pre run parameters +seconds_before_start: int = SLEEP_BEFORE_START + # PON setting pon_workflow: PONWorkflow = config_model.analysis.pon_workflow @@ -83,6 +86,7 @@ if not Path(config["SENTIEON_EXEC"]).exists(): sequence_type = config['analysis']["sequencing_type"] rules_to_include = [] +rules_to_include.append("snakemake_rules/misc/sleep.rule") if sequence_type == SequencingType.TARGETED: rules_to_include.append("snakemake_rules/quality_control/fastp_tga.rule") else: diff --git a/BALSAMIC/workflows/QC.smk b/BALSAMIC/workflows/QC.smk index c8bbb140f..49bba14fb 100644 --- a/BALSAMIC/workflows/QC.smk +++ b/BALSAMIC/workflows/QC.smk @@ -24,7 +24,7 @@ from BALSAMIC.utils.rule import (get_fastp_parameters, get_rule_output, get_resu get_script_path, get_threads, get_sequencing_type, get_capture_kit) -from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS, SLEEP_BEFORE_START # Initialize ConfigModel config_model = ConfigModel.parse_obj(config) @@ -56,6 +56,8 @@ vcf_dir: str = Path(result_dir, "vcf").as_posix() + "/" qc_dir: str = Path(result_dir, "qc").as_posix() + "/" delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" +# Pre run parameters +seconds_before_start: int = SLEEP_BEFORE_START # Run information singularity_image: str = config_model.singularity['image'] @@ -112,7 +114,7 @@ sequence_type = config['analysis']["sequencing_type"] rules_to_include = [] for workflow_type, value in SNAKEMAKE_RULES.items(): if workflow_type in ["common", analysis_type + "_" + sequence_type]: - rules_to_include.extend(value.get("qc", []) + value.get("align", [])) + rules_to_include.extend(value.get("misc", []) + value.get("qc", []) + value.get("align", [])) rules_to_include = [rule for rule in rules_to_include if "umi" not in rule] if "snakemake_rules/quality_control/report.rule" in rules_to_include: rules_to_include = [rule for rule in rules_to_include if "quality_control/report.rule" not in rule] diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index eb7f4f4fc..8555b19c6 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -34,7 +34,7 @@ from BALSAMIC.utils.rule import (get_fastp_parameters, get_variant_callers, get_ from BALSAMIC.constants.analysis import MutationType, FastqName, SampleType from BALSAMIC.constants.variant_filters import (COMMON_SETTINGS, VARDICT_SETTINGS, SENTIEON_VARCALL_SETTINGS, SVDB_FILTER_SETTINGS) -from BALSAMIC.constants.workflow_params import (WORKFLOW_PARAMS, VARCALL_PARAMS) +from BALSAMIC.constants.workflow_params import (WORKFLOW_PARAMS, VARCALL_PARAMS, SLEEP_BEFORE_START) from BALSAMIC.constants.rules import SNAKEMAKE_RULES # Initialize ConfigModel @@ -71,6 +71,8 @@ delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" umi_dir: str = Path(result_dir, "umi").as_posix() + "/" umi_qc_dir: str = Path(qc_dir, "umi_qc").as_posix() + "/" +# Pre run parameters +seconds_before_start: int = SLEEP_BEFORE_START # Annotations research_annotations = [] diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4a249a2c7..31c764fe0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -29,6 +29,8 @@ Added: * CNVs from PureCN to TGA workflow https://Clinical-Genomics/BALSAMIC/pull/1278 * CNVkit Panel of Normal for gmsmyeloid_5.3 to TGA workflow https://Clinical-Genomics/BALSAMIC/pull/1278 * Command-line arguments and rules for creation of GENS files https://github.com/Clinical-Genomics/BALSAMIC/pull/1279 +* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1310 + Changed: From f18446069d1a0cfe5200c753053c44b9c1716791 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Thu, 4 Jan 2024 15:39:51 +0100 Subject: [PATCH 02/32] increase time to 200 sec --- BALSAMIC/constants/workflow_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index 1d3988708..bfe5d26d4 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -108,7 +108,7 @@ }, } -SLEEP_BEFORE_START = 120 +SLEEP_BEFORE_START = 200 WORKFLOW_PARAMS = { "common": { From 3ac67a5ab1ceb7039ea716d02816b940722d8140 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Thu, 4 Jan 2024 15:50:36 +0100 Subject: [PATCH 03/32] add sleep requirement to concatenate umi --- BALSAMIC/snakemake_rules/umi/concatenation_umi.rule | 1 + 1 file changed, 1 insertion(+) diff --git a/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule b/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule index 23de2c483..4f502e6dd 100644 --- a/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule +++ b/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule @@ -4,6 +4,7 @@ rule concatenate_umi_reads: input: + wake_up = result_dir + "start_analysis", fastqs_fwd=lambda wildcards: config_model.get_all_fastqs_for_sample( sample_name=wildcards.sample, fastq_types=[FastqName.FWD] ), From 11064da6985ba0784e05b1d556320ae1b27c5b73 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Mon, 15 Jan 2024 14:31:52 +0100 Subject: [PATCH 04/32] fix qc workflow --- BALSAMIC/workflows/QC.smk | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/BALSAMIC/workflows/QC.smk b/BALSAMIC/workflows/QC.smk index df88e3b5b..6874ecf8b 100644 --- a/BALSAMIC/workflows/QC.smk +++ b/BALSAMIC/workflows/QC.smk @@ -57,12 +57,9 @@ vcf_dir: str = Path(result_dir, "vcf").as_posix() + "/" qc_dir: str = Path(result_dir, "qc").as_posix() + "/" delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" -<<<<<<< HEAD # Pre run parameters seconds_before_start: int = SLEEP_BEFORE_START -======= ->>>>>>> release_v13.0.0 # Run information singularity_image: str = config_model.singularity['image'] sample_names: List[str] = config_model.get_all_sample_names() @@ -118,15 +115,9 @@ sequence_type = config['analysis']["sequencing_type"] rules_to_include = [] for workflow_type, value in SNAKEMAKE_RULES.items(): if workflow_type in ["common", analysis_type + "_" + sequence_type]: -<<<<<<< HEAD - rules_to_include.extend(value.get("misc", []) + value.get("qc", []) + value.get("align", [])) -rules_to_include = [rule for rule in rules_to_include if "umi" not in rule] -if "snakemake_rules/quality_control/report.rule" in rules_to_include: - rules_to_include = [rule for rule in rules_to_include if "quality_control/report.rule" not in rule] -======= - rules_to_include.extend(value.get("qc", []) + value.get("align", [])) + rules_to_include.extend(value.get("qc", []) + value.get("align", []) + value.get("misc", [])) rules_to_include = [rule for rule in rules_to_include if "umi" not in rule and "report" not in rule] ->>>>>>> release_v13.0.0 + # Somalier only implemented for hg38 and hg19 if "canfam3" in config["reference"]["reference_genome"]: From 0c200e06324f7b376911ae37ac51b4d50a170d3c Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Mon, 22 Jan 2024 11:38:04 +0100 Subject: [PATCH 05/32] increase to 5 mins --- BALSAMIC/constants/workflow_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index bfe5d26d4..2c7f0b04a 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -108,7 +108,7 @@ }, } -SLEEP_BEFORE_START = 200 +SLEEP_BEFORE_START = 300 WORKFLOW_PARAMS = { "common": { From 105439450112f64dfa9293ad73b5d370744d4df6 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Mon, 22 Jan 2024 11:41:47 +0100 Subject: [PATCH 06/32] changelog --- CHANGELOG.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 49f536249..7f81f168f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,11 @@ +[X.X.X] +------- + +Added: +^^^^^^ +* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1310 + + [13.0.0] ------- @@ -34,7 +42,6 @@ Added: * CNV report for TGA workflow https://github.com/Clinical-Genomics/BALSAMIC/pull/1339 * `wkhtmltopdf` to system requirements https://github.com/Clinical-Genomics/BALSAMIC/pull/1339 * Store WGS CNV report plots https://github.com/Clinical-Genomics/BALSAMIC/pull/1347 -* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1310 Changed: From 9e0a4bb9c82e19aba23c28d2133b9d1dfbe79bfb Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Mon, 22 Jan 2024 11:44:25 +0100 Subject: [PATCH 07/32] remove unnecessary variables in smks --- BALSAMIC/snakemake_rules/misc/sleep.rule | 2 +- BALSAMIC/workflows/PON.smk | 3 --- BALSAMIC/workflows/QC.smk | 3 --- BALSAMIC/workflows/balsamic.smk | 3 --- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/BALSAMIC/snakemake_rules/misc/sleep.rule b/BALSAMIC/snakemake_rules/misc/sleep.rule index c97deb58d..ba960cdab 100644 --- a/BALSAMIC/snakemake_rules/misc/sleep.rule +++ b/BALSAMIC/snakemake_rules/misc/sleep.rule @@ -4,7 +4,7 @@ rule sleep_before_start: output: wake_up = result_dir + "start_analysis" params: - sleep_seconds = seconds_before_start + sleep_seconds = SLEEP_BEFORE_START threads: get_threads(cluster_config, "sleep_before_start") message: "Sleeping for {params.sleep_seconds} seconds before starting analysis." diff --git a/BALSAMIC/workflows/PON.smk b/BALSAMIC/workflows/PON.smk index d7f7827e1..63ae6472c 100644 --- a/BALSAMIC/workflows/PON.smk +++ b/BALSAMIC/workflows/PON.smk @@ -49,9 +49,6 @@ bam_dir: str = Path(result_dir, "bam", "").as_posix() + "/" cnv_dir: str = Path(result_dir, "cnv", "").as_posix() + "/" qc_dir: str = Path(result_dir, "qc", "").as_posix() + "/" -# Pre run parameters -seconds_before_start: int = SLEEP_BEFORE_START - # PON setting pon_workflow: PONWorkflow = config_model.analysis.pon_workflow diff --git a/BALSAMIC/workflows/QC.smk b/BALSAMIC/workflows/QC.smk index 6874ecf8b..b721be365 100644 --- a/BALSAMIC/workflows/QC.smk +++ b/BALSAMIC/workflows/QC.smk @@ -57,9 +57,6 @@ vcf_dir: str = Path(result_dir, "vcf").as_posix() + "/" qc_dir: str = Path(result_dir, "qc").as_posix() + "/" delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" -# Pre run parameters -seconds_before_start: int = SLEEP_BEFORE_START - # Run information singularity_image: str = config_model.singularity['image'] sample_names: List[str] = config_model.get_all_sample_names() diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index e6b315eaa..25a1d6aee 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -83,9 +83,6 @@ delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" umi_dir: str = Path(result_dir, "umi").as_posix() + "/" umi_qc_dir: str = Path(qc_dir, "umi_qc").as_posix() + "/" -# Pre run parameters -seconds_before_start: int = SLEEP_BEFORE_START - # Annotations research_annotations = [] clinical_annotations = [] From 259bbed2ed950d627e2d799c5f36cc00c6c0201d Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Mon, 22 Jan 2024 11:45:40 +0100 Subject: [PATCH 08/32] fix string --- BALSAMIC/snakemake_rules/misc/sleep.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/snakemake_rules/misc/sleep.rule b/BALSAMIC/snakemake_rules/misc/sleep.rule index ba960cdab..ddd79784e 100644 --- a/BALSAMIC/snakemake_rules/misc/sleep.rule +++ b/BALSAMIC/snakemake_rules/misc/sleep.rule @@ -1,6 +1,6 @@ rule sleep_before_start: - """Wait 120s before starting any processing to avoid key_error issue.""" + """Wait the specified number of seconds before starting any processing to avoid key_error issue.""" output: wake_up = result_dir + "start_analysis" params: From be3cf2cac01e64d82838a824083a94e183dc18e4 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Mon, 22 Jan 2024 12:18:05 +0100 Subject: [PATCH 09/32] changelog version bump --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7f81f168f..769753891 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,4 +1,4 @@ -[X.X.X] +[13.0.1] ------- Added: From 17e55fd0a18bafa216ea3194af788aa4a71554e0 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Mon, 22 Jan 2024 13:29:29 +0100 Subject: [PATCH 10/32] fix pr link --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 769753891..f1a26ba75 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,7 +3,7 @@ Added: ^^^^^^ -* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1310 +* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1311 [13.0.0] From f65b07bc6491dd4f1976b42a8d4665a2807f2fd8 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 26 Jan 2024 11:48:48 +0100 Subject: [PATCH 11/32] adding exome mode to targeted, and removing max depth for everything except FLT3 --- BALSAMIC/constants/workflow_params.py | 12 +++++ BALSAMIC/models/params.py | 30 +++++++++++ .../annotation/varcaller_sv_filter.rule | 6 +-- .../somatic_sv_quality_filter.rule | 2 +- .../somatic_sv_tumor_normal.rule | 52 +++++++++++++++++++ .../somatic_sv_tumor_only.rule | 49 +++++++++++++++++ BALSAMIC/workflows/balsamic.smk | 25 +++++---- docs/balsamic_sv_cnv.rst | 2 +- 8 files changed, 159 insertions(+), 19 deletions(-) diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index d3a2486b6..ff2a67f03 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -30,6 +30,13 @@ "workflow_solution": ["Sentieon"], }, "manta": { + "mutation": "somatic", + "mutation_type": "SV", + "analysis_type": ["paired", "single"], + "sequencing_type": ["wgs"], + "workflow_solution": ["BALSAMIC"], + }, + "mantatarget": { "mutation": "somatic", "mutation_type": "SV", "analysis_type": ["paired", "single"], @@ -146,6 +153,11 @@ ] ), }, + "manta_target": { + "wgs_hg19": "--exome --region=chr13:28525642-28726481", + "wgs_hg38": "--exome --region=chr13:27952208-28151643", + "targeted": "--exome", + }, "vardict": { "allelic_frequency": "0.001", "max_pval": "0.9", diff --git a/BALSAMIC/models/params.py b/BALSAMIC/models/params.py index e79d5d0ad..24c86bbcb 100644 --- a/BALSAMIC/models/params.py +++ b/BALSAMIC/models/params.py @@ -2,6 +2,8 @@ from typing import Optional from pydantic import BaseModel, ConfigDict +from BALSAMIC.constants.analysis import SequencingType +from BALSAMIC.constants.cache import GenomeVersion class ParamsCommon(BaseModel): @@ -24,6 +26,20 @@ class ParamsCommon(BaseModel): picard_RG_tumor: str +class ParamsMantaTarget(BaseModel): + """This class defines the params settings used as constants in vardict rule. + + Attributes: + wgs_hg19: str(required). parameters for targted Manta analysis for WGS with hg19 positions + wgs_hg38: str(required). parameters for targted Manta analysis for WGS with hg38 positions + targeted: str(required). parameters for targted Manta analysis for TGA + """ + + wgs_hg19: str + wgs_hg38: str + targeted: str + + class ParamsVardict(BaseModel): """This class defines the params settings used as constants in vardict rule. @@ -149,15 +165,20 @@ class BalsamicWorkflowConfig(BaseModel): Attributes: common: global params defined across all rules in balsamic workflow + manta_target: params used in the manta_target rules umicommon: global params defined across specific rules in UMI workflow vep: global params defined in the rule vep vardict: params defined in the rule vardict umiextract : params defined in the rule sentieon_umiextract umiconsensuscall: params defined in the rule sentieon_consensuscall tnscope_umi: params defined in the rule sentieon_tnscope_umi + + Functions: + - get_manta_target_settings: Return setting for manta_target rule """ common: ParamsCommon + manta_target: ParamsMantaTarget vardict: ParamsVardict vep: ParamsVEP umicommon: UMIParamsCommon @@ -165,6 +186,15 @@ class BalsamicWorkflowConfig(BaseModel): umiconsensuscall: UMIParamsConsensuscall tnscope_umi: UMIParamsTNscope + def get_manta_target_settings(self, genome_version, sequencing_type) -> str: + """Return correct setting for manta target rule.""" + if sequencing_type == SequencingType.WGS and genome_version == GenomeVersion.HG19: + return self.manta_target.wgs_hg19 + elif sequencing_type == SequencingType.WGS and genome_version == GenomeVersion.HG38: + return self.manta_target.wgs_hg38 + else: + return self.manta_target.targeted + class VCFAttributes(BaseModel): """General purpose filter to manage various VCF attributes diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule index 32d0ae2fb..1ea9617e0 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule @@ -23,9 +23,8 @@ rule bcftools_filter_sv_research: "Filtering merged research structural and copy number variants using bcftools for {params.case_name}" shell: """ -bcftools view --threads {threads} -f .,PASS,MaxDepth {input.vcf_sv_research} |\ bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} -f .,PASS,MaxDepth -O z -o {output.vcf_pass_svdb}; +bcftools view --threads {threads} -f PASS -O z -o {output.vcf_pass_svdb}; tabix -p vcf -f {output.vcf_pass_svdb}; @@ -56,10 +55,9 @@ rule bcftools_filter_sv_clinical: shell: """ bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_sv_clinical} |\ -bcftools view --threads {threads} -f .,PASS,MaxDepth |\ bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} -f .,PASS,MaxDepth -O z -o {output.vcf_pass_svdb}; +bcftools view --threads {threads} -f PASS -O z -o {output.vcf_pass_svdb}; tabix -p vcf -f {output.vcf_pass_svdb}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule index d4cdcd5e1..141b15035 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule @@ -15,7 +15,7 @@ rule bcftools_quality_filter_svdb: "Filtering merged research structural and copy number variants using bcftools for {params.case_name}" shell: """ -bcftools view --threads {threads} -f .,PASS,MaxDepth -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb}; +bcftools view --threads {threads} -f PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb}; tabix -p vcf -f {output.vcf_pass_svdb_research}; """ \ No newline at end of file diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index ff79d5805..a5b5a86e4 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -49,6 +49,58 @@ tabix -p vcf -f {output.final}; echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap}; +rm -rf {params.tmpdir}; + """ + +rule manta_target_tumor_normal: + input: + fa = config["reference"]["reference_genome"], + bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = normal_sample), + bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample) + output: + final = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.vcf.gz", + namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.sample_name_map", + benchmark: + Path(benchmark_dir, 'manta_tumor_normal_' + config["analysis"]["case_id"] + ".tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() + params: + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + runmode = "local", + settings = params.get_manta_target_settings(genome_version=config["reference"]["genome_version"],sequencing_type=sequencing_type), + tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), + normal = config_model.get_sample_name_by_type(SampleType.NORMAL), + case_name = case_id, + manta_install_path = "/opt/conda/share/manta-1.6.0-2" + threads: + get_threads(cluster_config, "manta_tumor_normal") + message: + ("Calling structural variants using manta for {params.case_name} and " + "index the compressed vcf file") + shell: + """ +samtools_path=$(readlink -f $(which samtools)) + +configManta.py \ +{params.settings} \ +--normalBam={input.bamN} \ +--tumorBam={input.bamT} \ +--referenceFasta={input.fa} \ +--runDir={params.tmpdir}; + +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; + +{params.manta_install_path}/libexec/convertInversion.py \ + $samtools_path \ + {input.fa} \ + {params.tmpdir}/results/variants/somaticSV.vcf.gz > {params.tmpdir}/results/variants/somaticSV_converted.vcf; + +bgzip -l 9 -c {params.tmpdir}/results/variants/somaticSV_converted.vcf > {output.final}; + +tabix -p vcf -f {output.final}; + +echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap}; + rm -rf {params.tmpdir}; """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index cb5b860f7..a2a59b7d5 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -46,6 +46,55 @@ tabix -p vcf -f {output.final}; echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; +rm -rf {params.tmpdir}; + """ + +rule manta_target_tumor_only: + input: + fa = config["reference"]["reference_genome"], + bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample) + output: + final = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.vcf.gz", + namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.sample_name_map" + benchmark: + benchmark_dir + 'manta_tumor_only_' + config["analysis"]["case_id"] + ".tsv" + singularity: + Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() + params: + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + runmode = "local", + settings = params.get_manta_target_settings(genome_version=config["reference"]["genome_version"], sequencing_type=sequencing_type), + tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), + case_name = config["analysis"]["case_id"], + manta_install_path= "/opt/conda/share/manta-1.6.0-2" + threads: + get_threads(cluster_config, "manta_tumor_only") + message: + ("Calling structural variants using manta for {params.case_name} and" + "index the compressed vcf file") + shell: + """ +samtools_path=$(readlink -f $(which samtools)) + +configManta.py \ +{params.settings} \ +--tumorBam={input.bamT} \ +--referenceFasta={input.fa} \ +--runDir={params.tmpdir}; + +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; + +{params.manta_install_path}/libexec/convertInversion.py \ + $samtools_path \ + {input.fa} \ + {params.tmpdir}/results/variants/tumorSV.vcf.gz > {params.tmpdir}/results/variants/tumorSV_converted.vcf; + +bgzip -l 9 -c {params.tmpdir}/results/variants/tumorSV_converted.vcf > {output.final}; + +tabix -p vcf -f {output.final}; + +echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; + rm -rf {params.tmpdir}; """ diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index 8a3cd272a..3f2e48317 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -113,6 +113,18 @@ else: status_to_sample_id = "TUMOR" + "\\\\t" + tumor_sample +# Varcaller filter settings +COMMON_FILTERS = VarCallerFilter.model_validate(COMMON_SETTINGS) +VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS) +SENTIEON_CALLER = VarCallerFilter.model_validate(SENTIEON_VARCALL_SETTINGS) +SVDB_FILTERS = VarCallerFilter.model_validate(SVDB_FILTER_SETTINGS) + +# Fastp parameters +fastp_parameters: Dict = get_fastp_parameters(config_model) + +# parse parameters as constants to workflows +params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS) + # vcfanno annotations research_annotations.append( { 'annotation': [{ @@ -202,19 +214,6 @@ if "swegen_sv_frequency" in config["reference"]: swegen_sv: str = get_swegen_sv(config) - -# Varcaller filter settings -COMMON_FILTERS = VarCallerFilter.model_validate(COMMON_SETTINGS) -VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS) -SENTIEON_CALLER = VarCallerFilter.model_validate(SENTIEON_VARCALL_SETTINGS) -SVDB_FILTERS = VarCallerFilter.model_validate(SVDB_FILTER_SETTINGS) - -# Fastp parameters -fastp_parameters: Dict = get_fastp_parameters(config_model) - -# parse parameters as constants to workflows -params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS) - # Capture kit name if config["analysis"]["sequencing_type"] != "wgs": capture_kit = os.path.split(config["panel"]["capture_kit"])[1] diff --git a/docs/balsamic_sv_cnv.rst b/docs/balsamic_sv_cnv.rst index 2a14b35db..d51b8249e 100644 --- a/docs/balsamic_sv_cnv.rst +++ b/docs/balsamic_sv_cnv.rst @@ -138,7 +138,7 @@ The following filter applies for both tumor-normal and tumor-only samples in add Frq <= 0.02 (or) Frq == "." -The variants scored as `PASS` or `MaxDepth` are included in the final vcf file (`SNV.somatic..svdb..filtered.pass.vcf.gz`). +The variants scored as `PASS` are included in the final vcf file (`SNV.somatic..svdb..filtered.pass.vcf.gz`). The following command can be used to fetch the variants identified by a specific caller from merged structural and copy number variants. From def3631a9b4126781b904dc8177c371cc33c5f40 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 26 Jan 2024 13:18:47 +0100 Subject: [PATCH 12/32] add new cluster config entries for new rules --- BALSAMIC/constants/cluster_analysis.json | 8 ++++++++ .../variant_calling/somatic_sv_tumor_normal.rule | 7 +++---- .../variant_calling/somatic_sv_tumor_only.rule | 7 +++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/BALSAMIC/constants/cluster_analysis.json b/BALSAMIC/constants/cluster_analysis.json index df3b90de4..c9cbc54f2 100644 --- a/BALSAMIC/constants/cluster_analysis.json +++ b/BALSAMIC/constants/cluster_analysis.json @@ -108,10 +108,18 @@ "time": "10:00:00", "n": 12 }, + "manta_target_tumor_only": { + "time": "10:00:00", + "n": 4 + }, "manta_tumor_normal": { "time": "10:00:00", "n": 12 }, + "manta_target_tumor_normal": { + "time": "10:00:00", + "n": 4 + }, "mergeBam_normal_gatk": { "time": "04:30:00", "n": 8 diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index a5b5a86e4..fe7e9636a 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -61,7 +61,7 @@ rule manta_target_tumor_normal: final = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.vcf.gz", namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.sample_name_map", benchmark: - Path(benchmark_dir, 'manta_tumor_normal_' + config["analysis"]["case_id"] + ".tsv").as_posix() + Path(benchmark_dir, 'manta_target_tumor_normal_' + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() params: @@ -73,10 +73,9 @@ rule manta_target_tumor_normal: case_name = case_id, manta_install_path = "/opt/conda/share/manta-1.6.0-2" threads: - get_threads(cluster_config, "manta_tumor_normal") + get_threads(cluster_config, "manta_target_tumor_normal") message: - ("Calling structural variants using manta for {params.case_name} and " - "index the compressed vcf file") + ("Calling structural variants in FLT3 region using manta for {params.case_name}") shell: """ samtools_path=$(readlink -f $(which samtools)) diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index a2a59b7d5..58a8901f2 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -57,7 +57,7 @@ rule manta_target_tumor_only: final = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.vcf.gz", namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.sample_name_map" benchmark: - benchmark_dir + 'manta_tumor_only_' + config["analysis"]["case_id"] + ".tsv" + benchmark_dir + 'manta_target_tumor_only_' + config["analysis"]["case_id"] + ".tsv" singularity: Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() params: @@ -68,10 +68,9 @@ rule manta_target_tumor_only: case_name = config["analysis"]["case_id"], manta_install_path= "/opt/conda/share/manta-1.6.0-2" threads: - get_threads(cluster_config, "manta_tumor_only") + get_threads(cluster_config, "manta_target_tumor_only") message: - ("Calling structural variants using manta for {params.case_name} and" - "index the compressed vcf file") + ("Calling structural variants using manta for {params.case_name} using --exome mode") shell: """ samtools_path=$(readlink -f $(which samtools)) From 3109e5bf1d20a689b2739e3d4c01321fab8b8a94 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 26 Jan 2024 13:42:40 +0100 Subject: [PATCH 13/32] add new rule to model --- BALSAMIC/models/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/BALSAMIC/models/config.py b/BALSAMIC/models/config.py index 7df595883..e14cbb910 100644 --- a/BALSAMIC/models/config.py +++ b/BALSAMIC/models/config.py @@ -95,6 +95,7 @@ class VCFModel(BaseModel): dnascope: VarcallerAttribute tnscope_umi: VarcallerAttribute manta_germline: VarcallerAttribute + mantatarget: VarcallerAttribute manta: VarcallerAttribute dellysv: VarcallerAttribute cnvkit: VarcallerAttribute From 8613431a1a8a1779a922ba355b7c9f52bbf8e9bf Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 26 Jan 2024 14:03:11 +0100 Subject: [PATCH 14/32] black and pytest --- BALSAMIC/models/params.py | 10 ++++++++-- tests/test_data/config.json | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/models/params.py b/BALSAMIC/models/params.py index 24c86bbcb..aa1a2f5d1 100644 --- a/BALSAMIC/models/params.py +++ b/BALSAMIC/models/params.py @@ -188,9 +188,15 @@ class BalsamicWorkflowConfig(BaseModel): def get_manta_target_settings(self, genome_version, sequencing_type) -> str: """Return correct setting for manta target rule.""" - if sequencing_type == SequencingType.WGS and genome_version == GenomeVersion.HG19: + if ( + sequencing_type == SequencingType.WGS + and genome_version == GenomeVersion.HG19 + ): return self.manta_target.wgs_hg19 - elif sequencing_type == SequencingType.WGS and genome_version == GenomeVersion.HG38: + elif ( + sequencing_type == SequencingType.WGS + and genome_version == GenomeVersion.HG38 + ): return self.manta_target.wgs_hg38 else: return self.manta_target.targeted diff --git a/tests/test_data/config.json b/tests/test_data/config.json index a63bff715..131ee793f 100644 --- a/tests/test_data/config.json +++ b/tests/test_data/config.json @@ -83,6 +83,20 @@ ] }, "manta": { + "mutation": "somatic", + "mutation_type": "SV", + "analysis_type": [ + "paired", + "single" + ], + "sequencing_type": [ + "wgs" + ], + "workflow_solution": [ + "BALSAMIC" + ] + }, + "mantatarget": { "mutation": "somatic", "mutation_type": "SV", "analysis_type": [ From fe818cdcf5fb77d686243947f378c0760fc9bd84 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 26 Jan 2024 16:36:35 +0100 Subject: [PATCH 15/32] add new filters --- BALSAMIC/constants/variant_filters.py | 18 ++++++++++++++++++ BALSAMIC/models/params.py | 4 ++++ .../somatic_sv_tumor_normal.rule | 19 +++++++++++++++---- .../somatic_sv_tumor_only.rule | 18 ++++++++++++++---- BALSAMIC/workflows/balsamic.smk | 2 ++ 5 files changed, 53 insertions(+), 8 deletions(-) diff --git a/BALSAMIC/constants/variant_filters.py b/BALSAMIC/constants/variant_filters.py index e3ddab449..8cc89f1f0 100644 --- a/BALSAMIC/constants/variant_filters.py +++ b/BALSAMIC/constants/variant_filters.py @@ -91,6 +91,24 @@ "description": "General purpose filters used for filtering tnscope and tnhaplotyper", } +# Manta bcftools filters +MANTA_FILTER_SETTINGS = { + "low_pr_sr_freq": { + "tag_value": 0.03, + "filter_name": "low_pr_sr_freq", + "field": "FORMAT", + }, + "low_pr_sr_count": { + "tag_value": 5, + "filter_name": "low_pr_sr_count", + "field": "FORMAT", + }, + "varcaller_name": "Manta", + "filter_type": "general", + "analysis_type": "tumor_only,tumor_normal", + "description": "Bcftools filters to set frequency and minimum read support for SV calls", +} + # Configuration for SVDB settings: SVDB_FILTER_SETTINGS = { "swegen_sv_freq": { diff --git a/BALSAMIC/models/params.py b/BALSAMIC/models/params.py index aa1a2f5d1..d92af4f58 100644 --- a/BALSAMIC/models/params.py +++ b/BALSAMIC/models/params.py @@ -241,6 +241,8 @@ class VarCallerFilter(BaseModel): swegen_sv_freq: VCFAttributes (optional); maximum swegen sv allele frequency loqusdb_clinical_snv_freq: VCFAttributes (optional); maximum loqusdb clinical snv allele frequency loqusdb_clinical_sv_freq: VCFAttributes (optional); maximum loqusdb clinical sv allele frequency + low_pr_sr_freq: VCFAttributes (optional); minimum Manta variant frequency + low_pr_sr_count: VCFAttributes (optional); minumum Manta variant read support varcaller_name: str (required); variant caller name filter_type: str (required); filter name for variant caller analysis_type: str (required); analysis type e.g. tumor_normal or tumor_only @@ -260,6 +262,8 @@ class VarCallerFilter(BaseModel): swegen_sv_freq: Optional[VCFAttributes] = None loqusdb_clinical_snv_freq: Optional[VCFAttributes] = None loqusdb_clinical_sv_freq: Optional[VCFAttributes] = None + low_pr_sr_freq: Optional[VCFAttributes] = None + low_pr_sr_count: Optional[VCFAttributes] = None varcaller_name: str filter_type: str analysis_type: str diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index fe7e9636a..0fa111adc 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -20,7 +20,9 @@ rule manta_tumor_normal: tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), normal = config_model.get_sample_name_by_type(SampleType.NORMAL), case_name = case_id, - manta_install_path = "/opt/conda/share/manta-1.6.0-2" + manta_install_path = "/opt/conda/share/manta-1.6.0-2", + low_pr_sr_freq = [MANTA_FILTERS.low_pr_sr_freq.tag_value,MANTA_FILTERS.low_pr_sr_freq.filter_name], + low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], threads: get_threads(cluster_config, "manta_tumor_normal") message: @@ -43,7 +45,10 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; {input.fa} \ {params.tmpdir}/results/variants/somaticSV.vcf.gz > {params.tmpdir}/results/variants/somaticSV_converted.vcf; -bgzip -l 9 -c {params.tmpdir}/results/variants/somaticSV_converted.vcf > {output.final}; +bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; + +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0]) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +| bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} tabix -p vcf -f {output.final}; @@ -71,7 +76,9 @@ rule manta_target_tumor_normal: tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), normal = config_model.get_sample_name_by_type(SampleType.NORMAL), case_name = case_id, - manta_install_path = "/opt/conda/share/manta-1.6.0-2" + manta_install_path = "/opt/conda/share/manta-1.6.0-2", + low_pr_sr_freq = [MANTA_FILTERS.low_pr_sr_freq.tag_value,MANTA_FILTERS.low_pr_sr_freq.filter_name], + low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], threads: get_threads(cluster_config, "manta_target_tumor_normal") message: @@ -94,7 +101,11 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; {input.fa} \ {params.tmpdir}/results/variants/somaticSV.vcf.gz > {params.tmpdir}/results/variants/somaticSV_converted.vcf; -bgzip -l 9 -c {params.tmpdir}/results/variants/somaticSV_converted.vcf > {output.final}; +bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; + +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0]) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +| bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} + tabix -p vcf -f {output.final}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index 58a8901f2..0fc22b79a 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -18,7 +18,9 @@ rule manta_tumor_only: runmode = "local", tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), case_name = config["analysis"]["case_id"], - manta_install_path= "/opt/conda/share/manta-1.6.0-2" + manta_install_path= "/opt/conda/share/manta-1.6.0-2", + low_pr_sr_freq = [MANTA_FILTERS.low_pr_sr_freq.tag_value,MANTA_FILTERS.low_pr_sr_freq.filter_name], + low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], threads: get_threads(cluster_config, "manta_tumor_only") message: @@ -40,7 +42,10 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; {input.fa} \ {params.tmpdir}/results/variants/tumorSV.vcf.gz > {params.tmpdir}/results/variants/tumorSV_converted.vcf; -bgzip -l 9 -c {params.tmpdir}/results/variants/tumorSV_converted.vcf > {output.final}; +bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; + +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0]) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +| bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} tabix -p vcf -f {output.final}; @@ -66,7 +71,9 @@ rule manta_target_tumor_only: settings = params.get_manta_target_settings(genome_version=config["reference"]["genome_version"], sequencing_type=sequencing_type), tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), case_name = config["analysis"]["case_id"], - manta_install_path= "/opt/conda/share/manta-1.6.0-2" + manta_install_path= "/opt/conda/share/manta-1.6.0-2", + low_pr_sr_freq = [MANTA_FILTERS.low_pr_sr_freq.tag_value, MANTA_FILTERS.low_pr_sr_freq.filter_name], + low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value, MANTA_FILTERS.low_pr_sr_count.filter_name], threads: get_threads(cluster_config, "manta_target_tumor_only") message: @@ -88,7 +95,10 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; {input.fa} \ {params.tmpdir}/results/variants/tumorSV.vcf.gz > {params.tmpdir}/results/variants/tumorSV_converted.vcf; -bgzip -l 9 -c {params.tmpdir}/results/variants/tumorSV_converted.vcf > {output.final}; +bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; + +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0]) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +| bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} tabix -p vcf -f {output.final}; diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index dce8bb587..57c414ca1 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -17,6 +17,7 @@ from BALSAMIC.constants.variant_filters import ( SENTIEON_VARCALL_SETTINGS, SVDB_FILTER_SETTINGS, VARDICT_SETTINGS, + MANTA_FILTER_SETTINGS, ) from BALSAMIC.constants.workflow_params import VARCALL_PARAMS, WORKFLOW_PARAMS, SLEEP_BEFORE_START from BALSAMIC.models.config import ConfigModel @@ -117,6 +118,7 @@ COMMON_FILTERS = VarCallerFilter.model_validate(COMMON_SETTINGS) VARDICT = VarCallerFilter.model_validate(VARDICT_SETTINGS) SENTIEON_CALLER = VarCallerFilter.model_validate(SENTIEON_VARCALL_SETTINGS) SVDB_FILTERS = VarCallerFilter.model_validate(SVDB_FILTER_SETTINGS) +MANTA_FILTERS = VarCallerFilter.model_validate(MANTA_FILTER_SETTINGS) # Fastp parameters fastp_parameters: Dict = get_fastp_parameters(config_model) From bef4a28d81a61e171d84801ede92dce433ffc9d1 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 26 Jan 2024 17:04:08 +0100 Subject: [PATCH 16/32] fix af filter --- BALSAMIC/constants/variant_filters.py | 2 +- .../variant_calling/somatic_sv_tumor_normal.rule | 4 ++-- .../variant_calling/somatic_sv_tumor_only.rule | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/BALSAMIC/constants/variant_filters.py b/BALSAMIC/constants/variant_filters.py index 8cc89f1f0..c2306efb5 100644 --- a/BALSAMIC/constants/variant_filters.py +++ b/BALSAMIC/constants/variant_filters.py @@ -94,7 +94,7 @@ # Manta bcftools filters MANTA_FILTER_SETTINGS = { "low_pr_sr_freq": { - "tag_value": 0.03, + "tag_value": 0.01, "filter_name": "low_pr_sr_freq", "field": "FORMAT", }, diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index 0fa111adc..cc62e456d 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -47,7 +47,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0]) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ | bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} tabix -p vcf -f {output.final}; @@ -103,7 +103,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0]) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ | bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index 0fc22b79a..440964aaf 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -44,7 +44,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0]) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ | bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} tabix -p vcf -f {output.final}; @@ -97,7 +97,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0]) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ | bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} tabix -p vcf -f {output.final}; From 0b4fcdb3f0cb7e1cc22496f56b84f85082c891e3 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 2 Feb 2024 13:51:04 +0100 Subject: [PATCH 17/32] fix bug --- BALSAMIC/constants/workflow_params.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index 54827e6aa..e4f9dfc24 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -156,8 +156,8 @@ ), }, "manta_target": { - "wgs_hg19": "--exome --region=chr13:28525642-28726481", - "wgs_hg38": "--exome --region=chr13:27952208-28151643", + "wgs_hg19": "--exome --region=13:28525642-28726481", + "wgs_hg38": "--exome --region=13:27952208-28151643", "targeted": "--exome", }, "vardict": { From 4039fb6a3c9c67ea120a9922a4c566e7aa42cfa8 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 2 Feb 2024 15:48:33 +0100 Subject: [PATCH 18/32] remove extra manta rule --- BALSAMIC/constants/cluster_analysis.json | 8 --- BALSAMIC/constants/workflow_params.py | 14 +---- BALSAMIC/models/params.py | 40 +++++-------- .../somatic_sv_tumor_normal.rule | 57 +------------------ .../somatic_sv_tumor_only.rule | 53 +---------------- tests/test_data/config.json | 18 +----- 6 files changed, 21 insertions(+), 169 deletions(-) diff --git a/BALSAMIC/constants/cluster_analysis.json b/BALSAMIC/constants/cluster_analysis.json index c9cbc54f2..df3b90de4 100644 --- a/BALSAMIC/constants/cluster_analysis.json +++ b/BALSAMIC/constants/cluster_analysis.json @@ -108,18 +108,10 @@ "time": "10:00:00", "n": 12 }, - "manta_target_tumor_only": { - "time": "10:00:00", - "n": 4 - }, "manta_tumor_normal": { "time": "10:00:00", "n": 12 }, - "manta_target_tumor_normal": { - "time": "10:00:00", - "n": 4 - }, "mergeBam_normal_gatk": { "time": "04:30:00", "n": 8 diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index e4f9dfc24..396c92e73 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -30,13 +30,6 @@ "workflow_solution": ["Sentieon"], }, "manta": { - "mutation": "somatic", - "mutation_type": "SV", - "analysis_type": ["paired", "single"], - "sequencing_type": ["wgs"], - "workflow_solution": ["BALSAMIC"], - }, - "mantatarget": { "mutation": "somatic", "mutation_type": "SV", "analysis_type": ["paired", "single"], @@ -155,10 +148,9 @@ ] ), }, - "manta_target": { - "wgs_hg19": "--exome --region=13:28525642-28726481", - "wgs_hg38": "--exome --region=13:27952208-28151643", - "targeted": "--exome", + "manta": { + "wgs_settings": "", + "tga_settings": "--exome", }, "vardict": { "allelic_frequency": "0.001", diff --git a/BALSAMIC/models/params.py b/BALSAMIC/models/params.py index d92af4f58..864d41263 100644 --- a/BALSAMIC/models/params.py +++ b/BALSAMIC/models/params.py @@ -3,8 +3,6 @@ from pydantic import BaseModel, ConfigDict from BALSAMIC.constants.analysis import SequencingType -from BALSAMIC.constants.cache import GenomeVersion - class ParamsCommon(BaseModel): """This class defines the common params settings used as constants across various rules in balsamic workflow. @@ -26,18 +24,16 @@ class ParamsCommon(BaseModel): picard_RG_tumor: str -class ParamsMantaTarget(BaseModel): - """This class defines the params settings used as constants in vardict rule. +class ParamsManta(BaseModel): + """This class defines the params settings used as constants in Manta rule. Attributes: - wgs_hg19: str(required). parameters for targted Manta analysis for WGS with hg19 positions - wgs_hg38: str(required). parameters for targted Manta analysis for WGS with hg38 positions - targeted: str(required). parameters for targted Manta analysis for TGA + wgs_settings: str(required). parameters for Manta analysis for WGS + tga_settings: str(required). parameters for Manta analysis for TGA """ - wgs_hg19: str - wgs_hg38: str - targeted: str + wgs_settings: str + tga_settings: str class ParamsVardict(BaseModel): @@ -165,7 +161,7 @@ class BalsamicWorkflowConfig(BaseModel): Attributes: common: global params defined across all rules in balsamic workflow - manta_target: params used in the manta_target rules + manta: params used in the manta rules umicommon: global params defined across specific rules in UMI workflow vep: global params defined in the rule vep vardict: params defined in the rule vardict @@ -174,11 +170,11 @@ class BalsamicWorkflowConfig(BaseModel): tnscope_umi: params defined in the rule sentieon_tnscope_umi Functions: - - get_manta_target_settings: Return setting for manta_target rule + - get_manta_settings: Return setting for manta rule """ common: ParamsCommon - manta_target: ParamsMantaTarget + manta: ParamsManta vardict: ParamsVardict vep: ParamsVEP umicommon: UMIParamsCommon @@ -186,20 +182,12 @@ class BalsamicWorkflowConfig(BaseModel): umiconsensuscall: UMIParamsConsensuscall tnscope_umi: UMIParamsTNscope - def get_manta_target_settings(self, genome_version, sequencing_type) -> str: - """Return correct setting for manta target rule.""" - if ( - sequencing_type == SequencingType.WGS - and genome_version == GenomeVersion.HG19 - ): - return self.manta_target.wgs_hg19 - elif ( - sequencing_type == SequencingType.WGS - and genome_version == GenomeVersion.HG38 - ): - return self.manta_target.wgs_hg38 + def get_manta_settings(self, sequencing_type) -> str: + """Return correct setting for manta rules depending on sequencing type.""" + if sequencing_type == SequencingType.WGS: + return self.manta.wgs_settings else: - return self.manta_target.targeted + return self.manta.tga_settings class VCFAttributes(BaseModel): diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index cc62e456d..d9fa4034d 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -16,6 +16,7 @@ rule manta_tumor_normal: Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + settings = params.get_manta_settings(sequencing_type=sequencing_type), runmode = "local", tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), normal = config_model.get_sample_name_by_type(SampleType.NORMAL), @@ -32,61 +33,6 @@ rule manta_tumor_normal: """ samtools_path=$(readlink -f $(which samtools)) -configManta.py \ ---normalBam={input.bamN} \ ---tumorBam={input.bamT} \ ---referenceFasta={input.fa} \ ---runDir={params.tmpdir}; - -python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; - -{params.manta_install_path}/libexec/convertInversion.py \ - $samtools_path \ - {input.fa} \ - {params.tmpdir}/results/variants/somaticSV.vcf.gz > {params.tmpdir}/results/variants/somaticSV_converted.vcf; - -bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; - -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ -| bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} - -tabix -p vcf -f {output.final}; - -echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap}; - -rm -rf {params.tmpdir}; - """ - -rule manta_target_tumor_normal: - input: - fa = config["reference"]["reference_genome"], - bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = normal_sample), - bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample) - output: - final = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.vcf.gz", - namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.sample_name_map", - benchmark: - Path(benchmark_dir, 'manta_target_tumor_normal_' + config["analysis"]["case_id"] + ".tsv").as_posix() - singularity: - Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() - params: - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - runmode = "local", - settings = params.get_manta_target_settings(genome_version=config["reference"]["genome_version"],sequencing_type=sequencing_type), - tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), - normal = config_model.get_sample_name_by_type(SampleType.NORMAL), - case_name = case_id, - manta_install_path = "/opt/conda/share/manta-1.6.0-2", - low_pr_sr_freq = [MANTA_FILTERS.low_pr_sr_freq.tag_value,MANTA_FILTERS.low_pr_sr_freq.filter_name], - low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], - threads: - get_threads(cluster_config, "manta_target_tumor_normal") - message: - ("Calling structural variants in FLT3 region using manta for {params.case_name}") - shell: - """ -samtools_path=$(readlink -f $(which samtools)) - configManta.py \ {params.settings} \ --normalBam={input.bamN} \ @@ -106,7 +52,6 @@ bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ | bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} - tabix -p vcf -f {output.final}; echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index 440964aaf..a28f60a0b 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -15,6 +15,7 @@ rule manta_tumor_only: Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + settings = params.get_manta_settings(sequencing_type=sequencing_type), runmode = "local", tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), case_name = config["analysis"]["case_id"], @@ -30,58 +31,6 @@ rule manta_tumor_only: """ samtools_path=$(readlink -f $(which samtools)) -configManta.py \ ---tumorBam={input.bamT} \ ---referenceFasta={input.fa} \ ---runDir={params.tmpdir}; - -python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; - -{params.manta_install_path}/libexec/convertInversion.py \ - $samtools_path \ - {input.fa} \ - {params.tmpdir}/results/variants/tumorSV.vcf.gz > {params.tmpdir}/results/variants/tumorSV_converted.vcf; - -bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; - -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ -| bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} - -tabix -p vcf -f {output.final}; - -echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; - -rm -rf {params.tmpdir}; - """ - -rule manta_target_tumor_only: - input: - fa = config["reference"]["reference_genome"], - bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample) - output: - final = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.vcf.gz", - namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".mantatarget.sample_name_map" - benchmark: - benchmark_dir + 'manta_target_tumor_only_' + config["analysis"]["case_id"] + ".tsv" - singularity: - Path(singularity_image, config["bioinfo_tools"].get("manta") + ".sif").as_posix() - params: - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - runmode = "local", - settings = params.get_manta_target_settings(genome_version=config["reference"]["genome_version"], sequencing_type=sequencing_type), - tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), - case_name = config["analysis"]["case_id"], - manta_install_path= "/opt/conda/share/manta-1.6.0-2", - low_pr_sr_freq = [MANTA_FILTERS.low_pr_sr_freq.tag_value, MANTA_FILTERS.low_pr_sr_freq.filter_name], - low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value, MANTA_FILTERS.low_pr_sr_count.filter_name], - threads: - get_threads(cluster_config, "manta_target_tumor_only") - message: - ("Calling structural variants using manta for {params.case_name} using --exome mode") - shell: - """ -samtools_path=$(readlink -f $(which samtools)) - configManta.py \ {params.settings} \ --tumorBam={input.bamT} \ diff --git a/tests/test_data/config.json b/tests/test_data/config.json index 131ee793f..507aa8aef 100644 --- a/tests/test_data/config.json +++ b/tests/test_data/config.json @@ -90,22 +90,8 @@ "single" ], "sequencing_type": [ - "wgs" - ], - "workflow_solution": [ - "BALSAMIC" - ] - }, - "mantatarget": { - "mutation": "somatic", - "mutation_type": "SV", - "analysis_type": [ - "paired", - "single" - ], - "sequencing_type": [ - "targeted", - "wgs" + "wgs", + "targeted" ], "workflow_solution": [ "BALSAMIC" From 6796193a824a88112c016479a7701c50afbdfd93 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 2 Feb 2024 15:50:47 +0100 Subject: [PATCH 19/32] lowered to 0.5% vaf --- BALSAMIC/constants/variant_filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/constants/variant_filters.py b/BALSAMIC/constants/variant_filters.py index c2306efb5..135783691 100644 --- a/BALSAMIC/constants/variant_filters.py +++ b/BALSAMIC/constants/variant_filters.py @@ -94,7 +94,7 @@ # Manta bcftools filters MANTA_FILTER_SETTINGS = { "low_pr_sr_freq": { - "tag_value": 0.01, + "tag_value": 0.005, "filter_name": "low_pr_sr_freq", "field": "FORMAT", }, From 157cf5b251051d1d3d9e42484e75b54fa9bf91af Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 2 Feb 2024 15:54:18 +0100 Subject: [PATCH 20/32] fix bug --- BALSAMIC/models/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/BALSAMIC/models/config.py b/BALSAMIC/models/config.py index e14cbb910..7df595883 100644 --- a/BALSAMIC/models/config.py +++ b/BALSAMIC/models/config.py @@ -95,7 +95,6 @@ class VCFModel(BaseModel): dnascope: VarcallerAttribute tnscope_umi: VarcallerAttribute manta_germline: VarcallerAttribute - mantatarget: VarcallerAttribute manta: VarcallerAttribute dellysv: VarcallerAttribute cnvkit: VarcallerAttribute From 2dc94870780c2b6d8503e3205bb558c521d7b6d6 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Fri, 2 Feb 2024 18:40:18 +0100 Subject: [PATCH 21/32] fix --- .../variant_calling/somatic_sv_tumor_normal.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index d9fa4034d..940b606f8 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -49,7 +49,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz \ | bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} tabix -p vcf -f {output.final}; From 65f029a5a74c3a168f3b5b26764fdeea04f159bf Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Wed, 7 Feb 2024 09:51:43 +0100 Subject: [PATCH 22/32] remove freq filter --- BALSAMIC/constants/variant_filters.py | 7 +------ BALSAMIC/models/params.py | 2 -- .../variant_calling/somatic_sv_tumor_normal.rule | 3 +-- .../variant_calling/somatic_sv_tumor_only.rule | 4 +--- 4 files changed, 3 insertions(+), 13 deletions(-) diff --git a/BALSAMIC/constants/variant_filters.py b/BALSAMIC/constants/variant_filters.py index 135783691..df8fe2d40 100644 --- a/BALSAMIC/constants/variant_filters.py +++ b/BALSAMIC/constants/variant_filters.py @@ -93,13 +93,8 @@ # Manta bcftools filters MANTA_FILTER_SETTINGS = { - "low_pr_sr_freq": { - "tag_value": 0.005, - "filter_name": "low_pr_sr_freq", - "field": "FORMAT", - }, "low_pr_sr_count": { - "tag_value": 5, + "tag_value": 4, "filter_name": "low_pr_sr_count", "field": "FORMAT", }, diff --git a/BALSAMIC/models/params.py b/BALSAMIC/models/params.py index 864d41263..60813843f 100644 --- a/BALSAMIC/models/params.py +++ b/BALSAMIC/models/params.py @@ -229,7 +229,6 @@ class VarCallerFilter(BaseModel): swegen_sv_freq: VCFAttributes (optional); maximum swegen sv allele frequency loqusdb_clinical_snv_freq: VCFAttributes (optional); maximum loqusdb clinical snv allele frequency loqusdb_clinical_sv_freq: VCFAttributes (optional); maximum loqusdb clinical sv allele frequency - low_pr_sr_freq: VCFAttributes (optional); minimum Manta variant frequency low_pr_sr_count: VCFAttributes (optional); minumum Manta variant read support varcaller_name: str (required); variant caller name filter_type: str (required); filter name for variant caller @@ -250,7 +249,6 @@ class VarCallerFilter(BaseModel): swegen_sv_freq: Optional[VCFAttributes] = None loqusdb_clinical_snv_freq: Optional[VCFAttributes] = None loqusdb_clinical_sv_freq: Optional[VCFAttributes] = None - low_pr_sr_freq: Optional[VCFAttributes] = None low_pr_sr_count: Optional[VCFAttributes] = None varcaller_name: str filter_type: str diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index 940b606f8..fd1887559 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -49,8 +49,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz \ -| bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz tabix -p vcf -f {output.final}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index a28f60a0b..5b3477fbd 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -20,7 +20,6 @@ rule manta_tumor_only: tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), case_name = config["analysis"]["case_id"], manta_install_path= "/opt/conda/share/manta-1.6.0-2", - low_pr_sr_freq = [MANTA_FILTERS.low_pr_sr_freq.tag_value,MANTA_FILTERS.low_pr_sr_freq.filter_name], low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], threads: get_threads(cluster_config, "manta_tumor_only") @@ -46,8 +45,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])/SUM(SUM(FORMAT/PR[0:0]+FORMAT/SR[0:0])+SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1])) < {params.low_pr_sr_freq[0]}' --soft-filter '{params.low_pr_sr_freq[1]}' --mode '+' {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz \ -| bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz tabix -p vcf -f {output.final}; From b4ea6793b7bd6a52a0b7ecc7252b6a6e1e8b9880 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Wed, 7 Feb 2024 09:57:33 +0100 Subject: [PATCH 23/32] fix bug --- .../snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule | 1 - 1 file changed, 1 deletion(-) diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index fd1887559..af77bae69 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -22,7 +22,6 @@ rule manta_tumor_normal: normal = config_model.get_sample_name_by_type(SampleType.NORMAL), case_name = case_id, manta_install_path = "/opt/conda/share/manta-1.6.0-2", - low_pr_sr_freq = [MANTA_FILTERS.low_pr_sr_freq.tag_value,MANTA_FILTERS.low_pr_sr_freq.filter_name], low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], threads: get_threads(cluster_config, "manta_tumor_normal") From fb15ac56778cf2211989d9a61bacdc602ad32a3d Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Wed, 7 Feb 2024 11:48:41 +0100 Subject: [PATCH 24/32] fix bug --- BALSAMIC/models/params.py | 1 + .../variant_calling/somatic_sv_tumor_normal.rule | 2 +- .../snakemake_rules/variant_calling/somatic_sv_tumor_only.rule | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/models/params.py b/BALSAMIC/models/params.py index 60813843f..8ac264c7b 100644 --- a/BALSAMIC/models/params.py +++ b/BALSAMIC/models/params.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, ConfigDict from BALSAMIC.constants.analysis import SequencingType + class ParamsCommon(BaseModel): """This class defines the common params settings used as constants across various rules in balsamic workflow. diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index af77bae69..882035689 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -48,7 +48,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz tabix -p vcf -f {output.final}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index 5b3477fbd..fa2bc7c9b 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -45,7 +45,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz tabix -p vcf -f {output.final}; From 2873832e23006ef3e481bd5ce245bd611812f83e Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Wed, 7 Feb 2024 18:28:57 +0100 Subject: [PATCH 25/32] add new tests --- tests/models/test_params_models.py | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/models/test_params_models.py b/tests/models/test_params_models.py index 50dd10b52..7c1a1c1b5 100644 --- a/tests/models/test_params_models.py +++ b/tests/models/test_params_models.py @@ -4,8 +4,12 @@ import pytest from pydantic import ValidationError +from BALSAMIC.constants.analysis import SequencingType +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS +from BALSAMIC.models.params import BalsamicWorkflowConfig from BALSAMIC.models.config import VarcallerAttribute from BALSAMIC.models.params import ( + ParamsManta, ParamsVardict, ParamsVEP, QCModel, @@ -18,6 +22,44 @@ ) +def test_params_manta(): + """test Manta settings model for correct validation.""" + + # GIVEN Manta params + test_manta_params = {"wgs_settings": "", "tga_settings": "--exome"} + + # WHEN building the model + test_manta_built = ParamsManta(**test_manta_params) + + assert test_manta_built.tga_settings == "--exome" + assert test_manta_built.wgs_settings == "" + + +def test_get_manta_settings_tga(): + """test get Manta settings based on sequencing type TGA.""" + + # GIVEN workflow params + params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS) + + # WHEN getting manta settings for TGA + manta_settings = params.get_manta_settings(SequencingType.TARGETED) + + # THEN manta setting should specify exome argument + assert manta_settings == "--exome" + + +def test_get_manta_settings_wgs(): + """test get Manta settings based on sequencing type WGS.""" + # GIVEN workflow params + params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS) + + # WHEN getting manta settings for WGS + manta_settings = params.get_manta_settings(SequencingType.WGS) + + # THEN manta setting should be empty + assert manta_settings == "" + + def test_params_vardict(): """test UMIParamsVardict model for correct validation""" From 24a96db041c83a1eaa0c3d91eba7a38b98197e0e Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Wed, 7 Feb 2024 19:00:18 +0100 Subject: [PATCH 26/32] updated docs --- .DS_Store | Bin 0 -> 6148 bytes docs/.DS_Store | Bin 0 -> 8196 bytes docs/balsamic_sv_cnv.rst | 4 ++++ 3 files changed, 4 insertions(+) create mode 100644 .DS_Store create mode 100644 docs/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..73e7bd16458269ab6cf11c688c1b359368c840af GIT binary patch literal 6148 zcmeHK(M}UV6g>k)7An#QOn5Qb#8<2cgoOCg7OFN;NLZo|YHCXtXf z{(`T5iT~o0o;y2`?E;S&HFJ}hGdpw7-JLUcXLbRo+Mx3YC;@O-1PimQt}q#wa>*u4 z&pc7-7*)JL1DlN?+P~y&JFEg$fxk@wd3PPuP{nh2tS{c*b`Z5WCL2SQrTj}9TfiJP z@r=FOc#1lCJH#^jh>^&fpJ9H>F>jOEz#h4K#2rQ`V;=`N80V>bIbM-o5jyZS9+P!~ zBLq3tmmF(edxuESLKh*~eKdLH%abC`v47^+1>P@Wgq+{R`|JGL!?@Z3!71xw7Q|+*Mx4z zr^}mH-LMl4YtbO=#G31+1I~;y(=08Xp02DfFS)C08)r-I>Bb7%Ypd&LXEV;-`wt)Q zHa`SMVf2~LPE*{iwapnk2T|~1-p5`)45CoxEB@15Y3}V@@oi>GeOx`ABVRK)Qe-Ac zhBWYm43FzwmNm$+BtNpOS?togiY>D1SLc+@jVxz@tLoz;28=JQBNkcn7!P#DNVX*S zl+R5ibIrz?Cwo9<6UWS#fYCZc$(Ax>9gaV_l5EMc9s2n69-v469!|#nb=GA4r*z~i z@=%micQQ)yMmJ<83!LH&#kXOt0#<>4R{_}{To%EK!BL8O>tLmS0f+^5YhznJ8HD2+ ztQZ`n$Pt=Ksl=43%rAyY>FjrnU&Y`k#gq1%SU;d;3 literal 0 HcmV?d00001 diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2f4d8258ec3b4a25e09816fd57ed0714611e6e27 GIT binary patch literal 8196 zcmeI1!EVz)5Qb;l(mIV=BGIb7AX(yCl~Sro6_*fNBn}*C5F7xRI7wTpjvZ_#p{j~< zhIil+YlYFb0U5V(;9 z`2Ap`vaP3ftgXB{(8v}5b`GbdppSikXzbM1Q#;mHq!?509#kq-=@vt2Ij-C64qH#{ zSX(V8rRAj3nN_+&Q8GJpwz!k3Yin9Vz!11jfcNh6p69PHp}Ce0z6m!+yB4cd=og?`@;M z^Kkd#qGGMyyZ`9WIrV!%_*tSRmA*D;NgrReU(m~~Y1oVcKMam>o;7PA(*l6foZ{hZL>DbU9>1fw>X42z?fVYb;QRxt`w^q@I{br64uUP2=fj-egkn| zeo3BO0`IkL)c2@d*l(<@NYKc399FjDuvh;uMBj!hYwD>TYl|}|fBlC5^Z75_O>?se F{06Tr%hCV< literal 0 HcmV?d00001 diff --git a/docs/balsamic_sv_cnv.rst b/docs/balsamic_sv_cnv.rst index d51b8249e..c32a8b2e8 100644 --- a/docs/balsamic_sv_cnv.rst +++ b/docs/balsamic_sv_cnv.rst @@ -58,6 +58,7 @@ The copy number variants, identified using ascatNgs and `dellycnv`, are converte Tumor and normal calls in `TIDDIT` are merged using `SVDB` with `--bnd_distance 500` and `--overlap = 0.80`. Using a custom made script "filter_SVs.py", soft-filters are added to the calls based on the presence of the variant in the normal, with the goal of retaining only somatic variants as PASS. +Manta calls are filtered using bcftools to only keep variants that have evidence from 3 or more reads. .. list-table:: SV filters :widths: 25 25 40 @@ -78,6 +79,9 @@ Using a custom made script "filter_SVs.py", soft-filters are added to the calls * - TIDDIT - in_normal - ctg_n == True and AF_N_MAX == 0 and AF_T_MAX <= 0.25 + * - Manta + - low_pr_sr_count + - SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < 4.0 Further information regarding the TIDDIT tumor normal filtration: As translocation variants are represented by 2 BNDs in the VCF which allows for mixed assignment of soft-filters, a requirement for assigning soft-filters to translocations is that neither BND is PASS. From 5638f59f2ad8be9e7be99c48aa6f124c2100065e Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Wed, 7 Feb 2024 19:01:14 +0100 Subject: [PATCH 27/32] remove dsstore --- docs/.DS_Store | Bin 8196 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 docs/.DS_Store diff --git a/docs/.DS_Store b/docs/.DS_Store deleted file mode 100644 index 2f4d8258ec3b4a25e09816fd57ed0714611e6e27..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeI1!EVz)5Qb;l(mIV=BGIb7AX(yCl~Sro6_*fNBn}*C5F7xRI7wTpjvZ_#p{j~< zhIil+YlYFb0U5V(;9 z`2Ap`vaP3ftgXB{(8v}5b`GbdppSikXzbM1Q#;mHq!?509#kq-=@vt2Ij-C64qH#{ zSX(V8rRAj3nN_+&Q8GJpwz!k3Yin9Vz!11jfcNh6p69PHp}Ce0z6m!+yB4cd=og?`@;M z^Kkd#qGGMyyZ`9WIrV!%_*tSRmA*D;NgrReU(m~~Y1oVcKMam>o;7PA(*l6foZ{hZL>DbU9>1fw>X42z?fVYb;QRxt`w^q@I{br64uUP2=fj-egkn| zeo3BO0`IkL)c2@d*l(<@NYKc399FjDuvh;uMBj!hYwD>TYl|}|fBlC5^Z75_O>?se F{06Tr%hCV< From d7e4e55de3bb57ff24a577df88025e0ff31b7386 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Thu, 8 Feb 2024 10:01:33 +0100 Subject: [PATCH 28/32] add to allow . in filters --- BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule | 4 ++-- .../variant_calling/somatic_sv_quality_filter.rule | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule index 1ea9617e0..f0e92939d 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule @@ -24,7 +24,7 @@ rule bcftools_filter_sv_research: shell: """ bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} -f PASS -O z -o {output.vcf_pass_svdb}; +bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb}; tabix -p vcf -f {output.vcf_pass_svdb}; @@ -57,7 +57,7 @@ rule bcftools_filter_sv_clinical: bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_sv_clinical} |\ bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} -f PASS -O z -o {output.vcf_pass_svdb}; +bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb}; tabix -p vcf -f {output.vcf_pass_svdb}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule index 141b15035..dad55ffa8 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule @@ -15,7 +15,7 @@ rule bcftools_quality_filter_svdb: "Filtering merged research structural and copy number variants using bcftools for {params.case_name}" shell: """ -bcftools view --threads {threads} -f PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb}; +bcftools view --threads {threads} -f .,PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb}; tabix -p vcf -f {output.vcf_pass_svdb_research}; - """ \ No newline at end of file + """ From b6616565aa53dd3f17ba539802d33c6125db0795 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Thu, 8 Feb 2024 11:06:40 +0100 Subject: [PATCH 29/32] add then --- .../variant_calling/somatic_sv_quality_filter.rule | 1 + tests/models/test_params_models.py | 1 + 2 files changed, 2 insertions(+) diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule index dad55ffa8..586d44a71 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_quality_filter.rule @@ -1,3 +1,4 @@ + rule bcftools_quality_filter_svdb: input: vcf_svdb = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz", diff --git a/tests/models/test_params_models.py b/tests/models/test_params_models.py index 7c1a1c1b5..18e5c25aa 100644 --- a/tests/models/test_params_models.py +++ b/tests/models/test_params_models.py @@ -31,6 +31,7 @@ def test_params_manta(): # WHEN building the model test_manta_built = ParamsManta(**test_manta_params) + # THEN string values should be correctly populated into the model assert test_manta_built.tga_settings == "--exome" assert test_manta_built.wgs_settings == "" From f2f12b4c6fc4611e4e5cbf39b6a078da8c212ca0 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Thu, 8 Feb 2024 13:19:37 +0100 Subject: [PATCH 30/32] fix bug --- BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule index f0e92939d..6215b5c16 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule @@ -23,7 +23,7 @@ rule bcftools_filter_sv_research: "Filtering merged research structural and copy number variants using bcftools for {params.case_name}" shell: """ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' {input.vcf_sv_research} |\ bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb}; tabix -p vcf -f {output.vcf_pass_svdb}; From 5c28c3390277f06e956e6d587c022cec27259bf6 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Thu, 8 Feb 2024 14:19:20 +0100 Subject: [PATCH 31/32] remove ds sneaky file and fix doc strings --- .DS_Store | Bin 6148 -> 0 bytes BALSAMIC/models/params.py | 3 +-- tests/models/test_params_models.py | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 73e7bd16458269ab6cf11c688c1b359368c840af..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK(M}UV6g>k)7An#QOn5Qb#8<2cgoOCg7OFN;NLZo|YHCXtXf z{(`T5iT~o0o;y2`?E;S&HFJ}hGdpw7-JLUcXLbRo+Mx3YC;@O-1PimQt}q#wa>*u4 z&pc7-7*)JL1DlN?+P~y&JFEg$fxk@wd3PPuP{nh2tS{c*b`Z5WCL2SQrTj}9TfiJP z@r=FOc#1lCJH#^jh>^&fpJ9H>F>jOEz#h4K#2rQ`V;=`N80V>bIbM-o5jyZS9+P!~ zBLq3tmmF(edxuESLKh*~eKdLH%abC`v47^+1>P@Wgq+{R`|JGL!?@Z3!71xw7Q|+*Mx4z zr^}mH-LMl4YtbO=#G31+1I~;y(=08Xp02DfFS)C08)r-I>Bb7%Ypd&LXEV;-`wt)Q zHa`SMVf2~LPE*{iwapnk2T|~1-p5`)45CoxEB@15Y3}V@@oi>GeOx`ABVRK)Qe-Ac zhBWYm43FzwmNm$+BtNpOS?togiY>D1SLc+@jVxz@tLoz;28=JQBNkcn7!P#DNVX*S zl+R5ibIrz?Cwo9<6UWS#fYCZc$(Ax>9gaV_l5EMc9s2n69-v469!|#nb=GA4r*z~i z@=%micQQ)yMmJ<83!LH&#kXOt0#<>4R{_}{To%EK!BL8O>tLmS0f+^5YhznJ8HD2+ ztQZ`n$Pt=Ksl=43%rAyY>FjrnU&Y`k#gq1%SU;d;3 diff --git a/BALSAMIC/models/params.py b/BALSAMIC/models/params.py index 8ac264c7b..f7bbbe99b 100644 --- a/BALSAMIC/models/params.py +++ b/BALSAMIC/models/params.py @@ -187,8 +187,7 @@ def get_manta_settings(self, sequencing_type) -> str: """Return correct setting for manta rules depending on sequencing type.""" if sequencing_type == SequencingType.WGS: return self.manta.wgs_settings - else: - return self.manta.tga_settings + return self.manta.tga_settings class VCFAttributes(BaseModel): diff --git a/tests/models/test_params_models.py b/tests/models/test_params_models.py index 18e5c25aa..94244602e 100644 --- a/tests/models/test_params_models.py +++ b/tests/models/test_params_models.py @@ -23,7 +23,7 @@ def test_params_manta(): - """test Manta settings model for correct validation.""" + """Test Manta settings model for correct validation.""" # GIVEN Manta params test_manta_params = {"wgs_settings": "", "tga_settings": "--exome"} @@ -37,7 +37,7 @@ def test_params_manta(): def test_get_manta_settings_tga(): - """test get Manta settings based on sequencing type TGA.""" + """Test get Manta settings based on sequencing type TGA.""" # GIVEN workflow params params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS) @@ -50,7 +50,7 @@ def test_get_manta_settings_tga(): def test_get_manta_settings_wgs(): - """test get Manta settings based on sequencing type WGS.""" + """Test get Manta settings based on sequencing type WGS.""" # GIVEN workflow params params = BalsamicWorkflowConfig.model_validate(WORKFLOW_PARAMS) From 2337ec0eea6357c2128a5c9b7c61f49d0f039a36 Mon Sep 17 00:00:00 2001 From: Mathias Johansson Date: Thu, 8 Feb 2024 15:42:31 +0100 Subject: [PATCH 32/32] split manta filter in two --- .../variant_calling/somatic_sv_tumor_normal.rule | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index 882035689..cab852304 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -22,7 +22,8 @@ rule manta_tumor_normal: normal = config_model.get_sample_name_by_type(SampleType.NORMAL), case_name = case_id, manta_install_path = "/opt/conda/share/manta-1.6.0-2", - low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], + low_pr_sr_count_value = MANTA_FILTERS.low_pr_sr_count.tag_value, + low_pr_sr_count_filter_name = MANTA_FILTERS.low_pr_sr_count.filter_name, threads: get_threads(cluster_config, "manta_tumor_normal") message: @@ -48,7 +49,7 @@ python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz +bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count_value}' --soft-filter '{params.low_pr_sr_count_filter_name}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz tabix -p vcf -f {output.final};