From e0b9aa8b694518e776abebc87404c80664be9be6 Mon Sep 17 00:00:00 2001 From: Vadym Date: Fri, 2 Feb 2024 15:54:54 +0100 Subject: [PATCH 01/12] Remove unused delivery options --- BALSAMIC/commands/options.py | 31 ++++++++++--------------------- BALSAMIC/constants/analysis.py | 10 ---------- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/BALSAMIC/commands/options.py b/BALSAMIC/commands/options.py index 02194ae7b..575cdae9b 100644 --- a/BALSAMIC/commands/options.py +++ b/BALSAMIC/commands/options.py @@ -3,30 +3,27 @@ from BALSAMIC import __version__ as balsamic_version from BALSAMIC.constants.analysis import ( - RunMode, - RUN_MODES, ANALYSIS_WORKFLOWS, + PON_WORKFLOWS, + RUN_MODES, AnalysisWorkflow, Gender, - RULE_DELIVERY_MODES, - RuleDeliveryMode, - PON_WORKFLOWS, PONWorkflow, + RunMode, ) -from BALSAMIC.constants.cache import GenomeVersion, CacheVersion, GENOME_VERSIONS +from BALSAMIC.constants.cache import GENOME_VERSIONS, CacheVersion, GenomeVersion from BALSAMIC.constants.cluster import ( - ClusterProfile, - QOS, + CLUSTER_MAIL_TYPES, CLUSTER_PROFILES, + QOS, QOS_OPTIONS, - CLUSTER_MAIL_TYPES, + ClusterProfile, ) -from BALSAMIC.constants.constants import LogLevel, LOG_LEVELS +from BALSAMIC.constants.constants import LOG_LEVELS, LogLevel from BALSAMIC.constants.rules import DELIVERY_RULES from BALSAMIC.constants.workflow_params import VCF_DICT from BALSAMIC.utils.cli import validate_cache_version - OPTION_ADAPTER_TRIM = click.option( "--adapter-trim/--no-adapter-trim", default=True, @@ -179,16 +176,6 @@ help="Cosmic DB authentication key", ) -OPTION_DELIVERY_MODE = click.option( - "-m", - "--delivery-mode", - type=click.Choice(RULE_DELIVERY_MODES), - default=RuleDeliveryMode.APPEND, - show_default=True, - help=f"Append rules to deliver to the current delivery option ({RuleDeliveryMode.APPEND}) or deliver only " - f"the ones specified ({RuleDeliveryMode.RESET})", -) - OPTION_DISABLE_VARIANT_CALLER = click.option( "--disable-variant-caller", help=f"Run workflow with selected variant caller(s) disable. Use comma to remove multiple variant callers. Valid " @@ -339,6 +326,8 @@ "-r", "--rules-to-deliver", multiple=True, + default=DELIVERY_RULES, + show_default=False, type=click.Choice(DELIVERY_RULES), help="Specify the rules to deliver. The delivery mode selected via the --delivery-mode option.", ) diff --git a/BALSAMIC/constants/analysis.py b/BALSAMIC/constants/analysis.py index 11d2ffd41..a0c852df1 100644 --- a/BALSAMIC/constants/analysis.py +++ b/BALSAMIC/constants/analysis.py @@ -90,16 +90,6 @@ class PONWorkflow(StrEnum): PON_WORKFLOWS: List[PONWorkflow] = [workflow for workflow in PONWorkflow] -class RuleDeliveryMode(StrEnum): - """Rules to deliver mode.""" - - APPEND: str = "append" - RESET: str = "reset" - - -RULE_DELIVERY_MODES: List[RuleDeliveryMode] = [mode for mode in RuleDeliveryMode] - - class BioinfoTools(StrEnum): """List of bioinformatics tools in Balsamic.""" From bbf67f11fe7b72762711014ac7cf0427722e5d72 Mon Sep 17 00:00:00 2001 From: Vadym Date: Fri, 2 Feb 2024 17:30:54 +0100 Subject: [PATCH 02/12] Write yaml method --- BALSAMIC/utils/io.py | 8 +++++++- tests/utils/test_utils.py | 27 ++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/utils/io.py b/BALSAMIC/utils/io.py index c990e114b..d4aa03844 100644 --- a/BALSAMIC/utils/io.py +++ b/BALSAMIC/utils/io.py @@ -4,7 +4,7 @@ import logging from datetime import datetime from pathlib import Path -from typing import List +from typing import List, Any import snakemake import yaml @@ -74,6 +74,12 @@ def read_yaml(yaml_path: str) -> dict: raise FileNotFoundError(f"The YAML file {yaml_path} was not found") +def write_yaml(data: Any, file_path: Path) -> None: + """Write data to a yaml file.""" + with open(file_path, "w") as file: + yaml.dump(data, file) + + def read_vcf_file(vcf_file_path: str) -> List[str]: """ Reads a VCF file and returns its contents as a list of lines. diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index ff249c123..f5fb24c7d 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -4,7 +4,7 @@ import subprocess import sys from pathlib import Path -from typing import Dict, List +from typing import Dict, List, Any from unittest import mock import click @@ -46,6 +46,7 @@ read_yaml, write_finish_file, write_json, + write_yaml, ) from BALSAMIC.utils.rule import ( get_delivery_id, @@ -548,6 +549,30 @@ def test_read_yaml_error(): assert f"The YAML file {yaml_path} was not found" in str(file_exc) +def test_write_yaml(metrics_yaml_path: str, tmp_path: Path): + """Tests write yaml file.""" + + # GIVEN a yaml file + + # GIVEN a file path to write to + yaml_file: Path = Path(tmp_path, "write_yaml.yaml") + + # WHEN reading the yaml file + metrics_data: Dict[str, Any] = read_yaml(metrics_yaml_path) + + # WHEN writing the yaml file from dict + write_yaml(data=metrics_data, file_path=yaml_file.as_posix()) + + # THEN assert that a file was successfully created + assert Path.exists(yaml_file) + + # WHEN reading it as a yaml + written_metrics_data: dict = read_yaml(yaml_file.as_posix()) + + # THEN assert that all data is kept + assert written_metrics_data == metrics_data + + def test_get_threads(cluster_analysis_config_path: str): # GIVEN cluster config file and rule name cluster_config = json.load(open(cluster_analysis_config_path, "r")) From 7c71d3f5d5126b5c4d45b241721d3d1b9e9af0fa Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 5 Feb 2024 09:32:48 +0100 Subject: [PATCH 03/12] Refactor deliver.py --- BALSAMIC/commands/report/deliver.py | 152 ++++++++++++---------------- BALSAMIC/constants/constants.py | 1 + BALSAMIC/utils/cli.py | 9 +- tests/utils/test_utils.py | 56 +++++----- 4 files changed, 97 insertions(+), 121 deletions(-) diff --git a/BALSAMIC/commands/report/deliver.py b/BALSAMIC/commands/report/deliver.py index 82d54a37f..b901d08a1 100644 --- a/BALSAMIC/commands/report/deliver.py +++ b/BALSAMIC/commands/report/deliver.py @@ -1,87 +1,69 @@ -"""Balsamic report delivery CLI.""" -import json +"""Report deliver CLI command.""" import logging -import os import subprocess import sys from pathlib import Path -from typing import List +from typing import Any, Dict, List, Optional import click import snakemake -import yaml +from BALSAMIC.constants.constants import FileType from BALSAMIC.commands.options import ( - OPTION_SAMPLE_CONFIG, OPTION_DISABLE_VARIANT_CALLER, - OPTION_DELIVERY_MODE, OPTION_RULES_TO_DELIVER, + OPTION_SAMPLE_CONFIG, ) -from BALSAMIC.constants.analysis import RunMode, RuleDeliveryMode -from BALSAMIC.constants.rules import DELIVERY_RULES +from BALSAMIC.constants.analysis import RunMode +from BALSAMIC.models.config import ConfigModel from BALSAMIC.models.snakemake import SnakemakeExecutable -from BALSAMIC.utils.cli import convert_deliverables_tags -from BALSAMIC.utils.cli import get_file_extension -from BALSAMIC.utils.cli import get_snakefile -from BALSAMIC.utils.io import write_json -from BALSAMIC.utils.rule import get_result_dir +from BALSAMIC.utils.cli import ( + get_snakefile, + convert_deliverables_tags, + get_file_extension, +) +from BALSAMIC.utils.io import read_json, write_json, write_yaml LOG = logging.getLogger(__name__) -@click.command("deliver", short_help="Creates a report file with output files") -@OPTION_DELIVERY_MODE +@click.command( + "deliver", short_help="Create a .hk file with output analysis files" +) @OPTION_DISABLE_VARIANT_CALLER @OPTION_RULES_TO_DELIVER @OPTION_SAMPLE_CONFIG @click.pass_context def deliver( context: click.Context, - delivery_mode: RuleDeliveryMode, - disable_variant_caller: str, + disable_variant_caller: Optional[str], rules_to_deliver: List[str], sample_config: str, ): - """Deliver command to write .hk with the output analysis files.""" + """Report deliver command to generate output analysis files.""" LOG.info(f"BALSAMIC started with log level {context.obj['log_level']}.") - LOG.debug("Reading input sample config") - with open(sample_config, "r") as fn: - sample_config_dict = json.load(fn) - - default_rules_to_deliver = DELIVERY_RULES - - if not rules_to_deliver: - rules_to_deliver = default_rules_to_deliver - - rules_to_deliver = list(rules_to_deliver) - if delivery_mode == RuleDeliveryMode.APPEND: - rules_to_deliver.extend(default_rules_to_deliver) - - case_name = sample_config_dict["analysis"]["case_id"] - result_dir = get_result_dir(sample_config_dict) - dst_directory = os.path.join(result_dir, "delivery_report") - LOG.info("Creating delivery_report directory") - Path.mkdir(Path(dst_directory), parents=True, exist_ok=True) - - yaml_write_directory = os.path.join(result_dir, "delivery_report") - Path.mkdir(Path(yaml_write_directory), parents=True, exist_ok=True) - - analysis_type = sample_config_dict["analysis"]["analysis_type"] - analysis_workflow = sample_config_dict["analysis"]["analysis_workflow"] - snakefile = get_snakefile(analysis_type, analysis_workflow) - - report_path = Path(yaml_write_directory, f"{case_name}_report.html") - LOG.info(f"Creating report file {report_path.as_posix()}") - - LOG.info(f"Delivering {analysis_workflow} workflow...") - working_dir = Path( - sample_config_dict["analysis"]["analysis_dir"], case_name, "BALSAMIC_run" + LOG.info("Creating .hk deliverables file") + config: Dict[str, Any] = read_json(sample_config) + config_model: ConfigModel = ConfigModel(**config) + output_dir: Path = Path(config_model.analysis.result, "delivery_report") + output_dir.mkdir(exist_ok=True) + working_dir: Path = Path( + config_model.analysis.analysis_dir, + config_model.analysis.case_id, + "BALSAMIC_run", ) + html_report: Path = Path(output_dir, f"{config_model.analysis.case_id}_report.html") + snakefile: Path = get_snakefile( + analysis_type=config_model.analysis.analysis_type, + analysis_workflow=config_model.analysis.analysis_workflow, + ) + + LOG.info(f"Creating HTML report file: {html_report.as_posix()}") snakemake_executable: SnakemakeExecutable = SnakemakeExecutable( - case_id=case_name, + case_id=config_model.analysis.case_id, config_path=sample_config, disable_variant_caller=disable_variant_caller, - report_path=report_path, + report_path=html_report, run_analysis=True, run_mode=RunMode.LOCAL, snakefile=snakefile, @@ -89,11 +71,14 @@ def deliver( working_dir=working_dir, ) subprocess.check_output( - f"{sys.executable} -m {snakemake_executable.get_command()}".split(), - shell=False, + f"{sys.executable} -m {snakemake_executable.get_command()}".split(), shell=False ) - LOG.info(f"Workflow report file {report_path.as_posix()}") + LOG.info(f"Delivering analysis workflow: {config_model.analysis.analysis_workflow}") + hk_file: Path = Path(output_dir, f"{config_model.analysis.case_id}.hk") + delivery_ready_file: Path = Path( + output_dir, f"{config_model.analysis.case_id}_delivery_ready.hk" + ) snakemake.snakemake( snakefile=snakefile, config={"delivery": "True", "rules_to_deliver": ",".join(rules_to_deliver)}, @@ -101,56 +86,45 @@ def deliver( configfiles=[sample_config], quiet=True, ) - - delivery_file_name = os.path.join(yaml_write_directory, case_name + ".hk") - - delivery_file_ready = os.path.join( - yaml_write_directory, - case_name + "_delivery_ready.hk", + hk_deliverables: List[Dict[str, Any]] = read_json(delivery_ready_file.as_posix()) + hk_deliverables: List[Dict[str, Any]] = convert_deliverables_tags( + delivery_json=hk_deliverables, sample_config_dict=config ) - with open(delivery_file_ready, "r") as fn: - delivery_file_ready_dict = json.load(fn) - - delivery_json = dict() - delivery_json["files"] = delivery_file_ready_dict - delivery_json = convert_deliverables_tags( - delivery_json=delivery_json, sample_config_dict=sample_config_dict - ) - - # Add Housekeeper file to report - delivery_json["files"].append( + # HTML analysis report + hk_deliverables.append( { - "path": report_path.as_posix(), + "path": html_report.as_posix(), "step": "balsamic_delivery", - "format": get_file_extension(report_path.as_posix()), + "format": get_file_extension(html_report.as_posix()), "tag": ["balsamic-report"], - "id": case_name, + "id": config_model.analysis.case_id, } ) - # Add CASE_ID.JSON to report - delivery_json["files"].append( + + # Sample configuration file + hk_deliverables.append( { "path": Path(sample_config).resolve().as_posix(), "step": "case_config", "format": get_file_extension(sample_config), "tag": ["balsamic-config"], - "id": case_name, + "id": config_model.analysis.case_id, } ) - # Add DAG Graph to report - delivery_json["files"].append( + + # DAG + hk_deliverables.append( { - "path": sample_config_dict["analysis"]["dag"], + "path": config_model.analysis.dag, "step": "case_config", - "format": get_file_extension(sample_config_dict["analysis"]["dag"]), + "format": get_file_extension(config_model.analysis.dag), "tag": ["balsamic-dag"], - "id": case_name, + "id": config_model.analysis.case_id, } ) - write_json(delivery_json, delivery_file_name) - with open(delivery_file_name + ".yaml", "w") as fn: - yaml.dump(delivery_json, fn, default_flow_style=False) - - LOG.info(f"Housekeeper delivery file {delivery_file_name}") + hk_deliverables: Dict[str, Any] = {"files": hk_deliverables} + write_json(json_obj={"files": hk_deliverables}, path=hk_file.as_posix()) + write_yaml(data={"files": hk_deliverables}, file_path=f"{hk_file}.{FileType.YAML}") + LOG.info(f"Generated analysis deliverables: {hk_file.as_posix()}") diff --git a/BALSAMIC/constants/constants.py b/BALSAMIC/constants/constants.py index 13686a202..e3ae6243c 100644 --- a/BALSAMIC/constants/constants.py +++ b/BALSAMIC/constants/constants.py @@ -40,6 +40,7 @@ class FileType(StrEnum): TSV: str = "tsv" TXT: str = "txt" VCF: str = "vcf" + YAML: str = "yaml" class BwaIndexFileType(StrEnum): diff --git a/BALSAMIC/utils/cli.py b/BALSAMIC/utils/cli.py index 552619c19..b9779e938 100644 --- a/BALSAMIC/utils/cli.py +++ b/BALSAMIC/utils/cli.py @@ -6,7 +6,7 @@ from distutils.spawn import find_executable from io import StringIO from pathlib import Path -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional import click import graphviz @@ -406,10 +406,13 @@ def generate_graph(config_collection_dict, config_path): graph_obj.render(cleanup=True) -def convert_deliverables_tags(delivery_json: dict, sample_config_dict: dict) -> dict: +def convert_deliverables_tags( + delivery_json: List[Dict[str, Any]], sample_config_dict: dict +) -> List[Dict[str, Any]]: """Replaces values of sample_type with sample_name in deliverables dict.""" - for delivery_file in delivery_json["files"]: + for delivery_file in delivery_json: + print(delivery_file) file_tags = delivery_file["tag"].split(",") sample_list = sample_config_dict["samples"] for sample_dict in sample_list: diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index f5fb24c7d..916c5fe20 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -628,34 +628,32 @@ def test_convert_deliverables_tags(tumor_normal_fastq_info_correct: List[Dict]): """Test generation of delivery tags.""" # GIVEN a deliverables dict and a sample config dict - delivery_json = { - "files": [ - { - "path": "dummy_balsamic_run/run_tests/TN_WGS/analysis/fastq/ACC1_R_1.fp.fastq.gz", - "path_index": [], - "step": "fastp", - "tag": "ACC1,read1,quality-trimmed-fastq-read1", - "id": "ACC1", - "format": "fastq.gz", - }, - { - "path": "dummy_balsamic_run/run_tests/TN_WGS/analysis/fastq/ACC1_R_2.fp.fastq.gz", - "path_index": [], - "step": "fastp", - "tag": "read2,quality-trimmed-fastq-read1", - "id": "ACC1", - "format": "fastq.gz", - }, - { - "path": "dummy_balsamic_run/run_tests/TN_WGS/analysis/qc/fastp/ACC1.fastp.json", - "path_index": [], - "step": "fastp", - "tag": "ACC1,json,quality-trimmed-fastq-json,tumor", - "id": "tumor", - "format": "json", - }, - ] - } + delivery_json = [ + { + "path": "dummy_balsamic_run/run_tests/TN_WGS/analysis/fastq/ACC1_R_1.fp.fastq.gz", + "path_index": [], + "step": "fastp", + "tag": "ACC1,read1,quality-trimmed-fastq-read1", + "id": "ACC1", + "format": "fastq.gz", + }, + { + "path": "dummy_balsamic_run/run_tests/TN_WGS/analysis/fastq/ACC1_R_2.fp.fastq.gz", + "path_index": [], + "step": "fastp", + "tag": "read2,quality-trimmed-fastq-read1", + "id": "ACC1", + "format": "fastq.gz", + }, + { + "path": "dummy_balsamic_run/run_tests/TN_WGS/analysis/qc/fastp/ACC1.fastp.json", + "path_index": [], + "step": "fastp", + "tag": "ACC1,json,quality-trimmed-fastq-json,tumor", + "id": "tumor", + "format": "json", + }, + ] sample_config_dict = {"samples": tumor_normal_fastq_info_correct} @@ -665,7 +663,7 @@ def test_convert_deliverables_tags(tumor_normal_fastq_info_correct: List[Dict]): ) # THEN prefix strings should be replaced with sample name - for delivery_file in delivery_json["files"]: + for delivery_file in delivery_json: assert "ACC1" in delivery_file["tag"] assert "tumor" not in delivery_file["tag"] assert delivery_file["id"] == "ACC1" From 9af5fb49fa65f1e669ea5a36f4e346431e80e02a Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 5 Feb 2024 11:49:28 +0100 Subject: [PATCH 04/12] Delivery methods --- BALSAMIC/utils/delivery.py | 34 ++++++++++++++++++++++++++++++ tests/conftest.py | 34 +++++++++++++++++------------- tests/utils/test_delivery.py | 40 ++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 14 deletions(-) create mode 100644 BALSAMIC/utils/delivery.py create mode 100644 tests/utils/test_delivery.py diff --git a/BALSAMIC/utils/delivery.py b/BALSAMIC/utils/delivery.py new file mode 100644 index 000000000..c470bf1f8 --- /dev/null +++ b/BALSAMIC/utils/delivery.py @@ -0,0 +1,34 @@ +"""Utility methods for Balsamic delivery command.""" +import logging +from pathlib import Path +from typing import Dict, Any, List, Generator + +from BALSAMIC.utils.exc import BalsamicError + +from BALSAMIC.constants.constants import FileType + +LOG = logging.getLogger(__name__) + + +def get_multiqc_deliverables(case_id: str, multiqc_dir: Path) -> List[Dict[str, Any]]: + """Return a list of MultiQC deliverable files from a directory.""" + multiqc_deliverables: List[Dict[str, Any]] = [] + json_files: Generator[Path, None, None] = multiqc_dir.glob(f"*.{FileType.JSON}") + for file in json_files: + deliverable: Dict[str, Any] = { + "path": file.as_posix(), + "step": "multiqc", + "format": FileType.JSON.value, + "tag": get_file_tags_from_name(file), + "id": case_id, + } + multiqc_deliverables.append(deliverable) + if not multiqc_deliverables: + LOG.error(f"No MultiQC deliverable files found in {multiqc_dir.as_posix()}.") + raise BalsamicError + return multiqc_deliverables + + +def get_file_tags_from_name(file_path: Path) -> List[str]: + """Return Housekeeper tags from the file name after discarding the suffix.""" + return file_path.stem.split("_") diff --git a/tests/conftest.py b/tests/conftest.py index 8e3f9f19b..f53a0fbd2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1877,36 +1877,42 @@ def sample_config( @pytest.fixture(scope="session") def analysis_path(): - """Return path for test analysis""" + """Return path for test analysis.""" return "tests/test_data/qc_files/analysis" @pytest.fixture(scope="session") -def multiqc_data_path(analysis_path): - """Return path of JSON for MultiQC test data""" - return os.path.join(analysis_path, "qc", "multiqc_data", "multiqc_data.json") +def multiqc_data_dir(analysis_path: str) -> Path: + """Return path of tje MultiQC test data directory.""" + return Path(analysis_path, "qc", "multiqc_data") @pytest.fixture(scope="session") -def multiqc_data_dict(multiqc_data_path): - """Read and Return test data from JASON of MultiQC test data""" +def multiqc_data_path(multiqc_data_dir: Path) -> str: + """Return path of JSON for MultiQC test data.""" + return Path(multiqc_data_dir, "multiqc_data.json").as_posix() + + +@pytest.fixture(scope="session") +def multiqc_data_dict(multiqc_data_path: str) -> dict: + """Read and Return test data from JASON of MultiQC test data.""" return read_json(multiqc_data_path) @pytest.fixture(scope="session") -def metrics_yaml_path(analysis_path): - """Return path for Tumor-Only deliverable metrics from YAML""" - return os.path.join( +def metrics_yaml_path(analysis_path: str) -> str: + """Return path for Tumor-Only deliverable metrics from YAML.""" + return Path( analysis_path, "qc", "sample_tumor_only_metrics_deliverables.yaml" - ) + ).as_posix() @pytest.fixture(scope="session") -def bcftools_counts_path(analysis_path): - """Return path for svdb.clinical.filtered.pass.stats""" - return os.path.join( +def bcftools_counts_path(analysis_path: str) -> str: + """Return path for svdb.clinical.filtered.pass.stats.""" + return Path( analysis_path, "vep", "SNV.somatic.case.svdb.clinical.filtered.pass.stats" - ) + ).as_posix() @pytest.fixture(scope="session") diff --git a/tests/utils/test_delivery.py b/tests/utils/test_delivery.py new file mode 100644 index 000000000..6ae5c08de --- /dev/null +++ b/tests/utils/test_delivery.py @@ -0,0 +1,40 @@ +"""Test Balsamic delivery utility methods.""" +from pathlib import Path +from typing import Any, Dict, List + +from BALSAMIC.constants.constants import FileType +from BALSAMIC.utils.delivery import get_file_tags_from_name, get_multiqc_deliverables + + +def test_get_multiqc_deliverables(case_id_tumor_only: str, multiqc_data_dir: Path): + """Test MultiQC delivery files parsing.""" + + # GIVEN a case ID and a MultiQC data directory + + # WHEN extracting the deliverables + multiqc_deliverables: List[Dict[str, Any]] = get_multiqc_deliverables( + case_id=case_id_tumor_only, multiqc_dir=multiqc_data_dir + ) + + # THEN the correct number of deliverables should be returned with the expected structure + assert len(multiqc_deliverables) == 5 + assert all(isinstance(item["path"], str) for item in multiqc_deliverables) + assert all(item["step"] == "multiqc" for item in multiqc_deliverables) + assert all(item["format"] == FileType.JSON for item in multiqc_deliverables) + assert all(isinstance(item["tag"], list) for item in multiqc_deliverables) + assert all(item["id"] == case_id_tumor_only for item in multiqc_deliverables) + + +def test_get_file_tags_from_name(): + """Test tag extraction from a file name.""" + + # GIVEN a mock file object + file_path: Path = Path( + "/analysis/qc/multiqc_data/multiqc_picard_AlignmentSummaryMetrics.json" + ) + + # WHEN extracting the tags from the file name + tags: List[str] = get_file_tags_from_name(file_path) + + # THEN the correct tags are extracted from the file name + assert tags == ["multiqc", "picard", "AlignmentSummaryMetrics"] From 478a41de53873ede7a88760528e3fa082c4a8066 Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 5 Feb 2024 11:49:48 +0100 Subject: [PATCH 05/12] Deliver multiqc files --- BALSAMIC/commands/report/deliver.py | 10 ++++++++++ tests/utils/test_delivery.py | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/BALSAMIC/commands/report/deliver.py b/BALSAMIC/commands/report/deliver.py index b901d08a1..37fcb935d 100644 --- a/BALSAMIC/commands/report/deliver.py +++ b/BALSAMIC/commands/report/deliver.py @@ -22,6 +22,7 @@ convert_deliverables_tags, get_file_extension, ) +from BALSAMIC.utils.delivery import get_multiqc_deliverables from BALSAMIC.utils.io import read_json, write_json, write_yaml LOG = logging.getLogger(__name__) @@ -124,6 +125,15 @@ def deliver( } ) + # MultiQC intermediate files + multiqc_deliverables: List[Dict[str, Any]] = get_multiqc_deliverables( + case_id=config_model.analysis.case_id, + multiqc_dir=Path( + config_model.analysis.result, "qc", "multiqc_data", "multiqc_data.json" + ), + ) + hk_deliverables.extend(multiqc_deliverables) + hk_deliverables: Dict[str, Any] = {"files": hk_deliverables} write_json(json_obj={"files": hk_deliverables}, path=hk_file.as_posix()) write_yaml(data={"files": hk_deliverables}, file_path=f"{hk_file}.{FileType.YAML}") diff --git a/tests/utils/test_delivery.py b/tests/utils/test_delivery.py index 6ae5c08de..0bba15832 100644 --- a/tests/utils/test_delivery.py +++ b/tests/utils/test_delivery.py @@ -2,6 +2,9 @@ from pathlib import Path from typing import Any, Dict, List +import pytest +from BALSAMIC.utils.exc import BalsamicError + from BALSAMIC.constants.constants import FileType from BALSAMIC.utils.delivery import get_file_tags_from_name, get_multiqc_deliverables @@ -25,6 +28,21 @@ def test_get_multiqc_deliverables(case_id_tumor_only: str, multiqc_data_dir: Pat assert all(item["id"] == case_id_tumor_only for item in multiqc_deliverables) +def test_get_multiqc_deliverables_error( + case_id_tumor_only: str, fastq_dir_tumor_only: str +): + """Test MultiQC delivery files parsing when incorrect path is provided.""" + + # GIVEN a case ID and an incorrect MultiQC data directory + + # WHEN extracting the deliverables + with pytest.raises(BalsamicError): + # THEN an exception should be raised + get_multiqc_deliverables( + case_id=case_id_tumor_only, multiqc_dir=Path(fastq_dir_tumor_only) + ) + + def test_get_file_tags_from_name(): """Test tag extraction from a file name.""" From 68ee60edc9c83732fda7fae6e6dfddd48131b860 Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 5 Feb 2024 14:20:04 +0100 Subject: [PATCH 06/12] Fix pytests --- BALSAMIC/commands/report/deliver.py | 4 +--- tests/commands/report/test_deliver.py | 8 ++++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/BALSAMIC/commands/report/deliver.py b/BALSAMIC/commands/report/deliver.py index 37fcb935d..61ce33c93 100644 --- a/BALSAMIC/commands/report/deliver.py +++ b/BALSAMIC/commands/report/deliver.py @@ -128,9 +128,7 @@ def deliver( # MultiQC intermediate files multiqc_deliverables: List[Dict[str, Any]] = get_multiqc_deliverables( case_id=config_model.analysis.case_id, - multiqc_dir=Path( - config_model.analysis.result, "qc", "multiqc_data", "multiqc_data.json" - ), + multiqc_dir=Path(config_model.analysis.result, "qc", "multiqc_data"), ) hk_deliverables.extend(multiqc_deliverables) diff --git a/tests/commands/report/test_deliver.py b/tests/commands/report/test_deliver.py index 6a5e52754..7e55385c6 100644 --- a/tests/commands/report/test_deliver.py +++ b/tests/commands/report/test_deliver.py @@ -16,6 +16,10 @@ def test_deliver_tumor_only_panel( # GIVEN a tumor-normal config file helpers.read_config(tumor_only_config) actual_delivery_report = Path(helpers.delivery_dir, helpers.case_id + ".hk") + multiqc_data_dir: Path = Path(helpers.result_dir, "qc", "multiqc_data") + multiqc_data_dir.mkdir(parents=True, exist_ok=True) + multiqc_data_json: Path = Path(multiqc_data_dir, "multiqc_data.json") + multiqc_data_json.touch() with mock.patch.dict( environ, @@ -71,6 +75,10 @@ def test_deliver_tumor_normal_panel( vep_result_dir, "SNV.somatic." + helpers.case_id + ".vardict.vcf.gz.tbi" ) touch_vcf_delivery_file_index.touch() + multiqc_data_dir: Path = Path(helpers.result_dir, "qc", "multiqc_data") + multiqc_data_dir.mkdir(parents=True, exist_ok=True) + multiqc_data_json: Path = Path(multiqc_data_dir, "multiqc_data.json") + multiqc_data_json.touch() # Temporary files to be ignored by delivery vcf_result_dir = Path(helpers.result_dir, "vcf") From 13f747df2a4658fa49964bdc1bd25bc1c8b97192 Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 5 Feb 2024 14:28:39 +0100 Subject: [PATCH 07/12] CHANGELOG --- CHANGELOG.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0accbe095..e71e43439 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,14 @@ +[X.X.X] +------- + +Added: +^^^^^^ +* Add MultiQC intermediate files to deliverables + +Changed: +^^^^^^^^ +* `delivery.py`, removing unused arguments + [13.0.1] ------- From 91173c6af94ef096e7044a82a42e090fe198864f Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 5 Feb 2024 14:48:32 +0100 Subject: [PATCH 08/12] Remove print --- BALSAMIC/utils/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/BALSAMIC/utils/cli.py b/BALSAMIC/utils/cli.py index b9779e938..11616e197 100644 --- a/BALSAMIC/utils/cli.py +++ b/BALSAMIC/utils/cli.py @@ -412,7 +412,6 @@ def convert_deliverables_tags( """Replaces values of sample_type with sample_name in deliverables dict.""" for delivery_file in delivery_json: - print(delivery_file) file_tags = delivery_file["tag"].split(",") sample_list = sample_config_dict["samples"] for sample_dict in sample_list: From 1bcf99b478fce10872325aececd3601b252c0235 Mon Sep 17 00:00:00 2001 From: Vadym Date: Tue, 6 Feb 2024 14:00:47 +0100 Subject: [PATCH 09/12] lower case tags --- BALSAMIC/utils/delivery.py | 2 +- tests/utils/test_delivery.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/utils/delivery.py b/BALSAMIC/utils/delivery.py index c470bf1f8..3c15386b5 100644 --- a/BALSAMIC/utils/delivery.py +++ b/BALSAMIC/utils/delivery.py @@ -31,4 +31,4 @@ def get_multiqc_deliverables(case_id: str, multiqc_dir: Path) -> List[Dict[str, def get_file_tags_from_name(file_path: Path) -> List[str]: """Return Housekeeper tags from the file name after discarding the suffix.""" - return file_path.stem.split("_") + return [tag.lower() for tag in file_path.stem.split("_")] diff --git a/tests/utils/test_delivery.py b/tests/utils/test_delivery.py index 0bba15832..49a146dfc 100644 --- a/tests/utils/test_delivery.py +++ b/tests/utils/test_delivery.py @@ -55,4 +55,4 @@ def test_get_file_tags_from_name(): tags: List[str] = get_file_tags_from_name(file_path) # THEN the correct tags are extracted from the file name - assert tags == ["multiqc", "picard", "AlignmentSummaryMetrics"] + assert tags == ["multiqc", "picard", "alignmentsummarymetrics"] From e1f30ffe128adb0e543864223231419994bd7b82 Mon Sep 17 00:00:00 2001 From: Vadym Date: Tue, 6 Feb 2024 15:45:46 +0100 Subject: [PATCH 10/12] Typo --- BALSAMIC/commands/report/deliver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/commands/report/deliver.py b/BALSAMIC/commands/report/deliver.py index 61ce33c93..38f0ef122 100644 --- a/BALSAMIC/commands/report/deliver.py +++ b/BALSAMIC/commands/report/deliver.py @@ -133,6 +133,6 @@ def deliver( hk_deliverables.extend(multiqc_deliverables) hk_deliverables: Dict[str, Any] = {"files": hk_deliverables} - write_json(json_obj={"files": hk_deliverables}, path=hk_file.as_posix()) - write_yaml(data={"files": hk_deliverables}, file_path=f"{hk_file}.{FileType.YAML}") + write_json(json_obj=hk_deliverables, path=hk_file.as_posix()) + write_yaml(data=hk_deliverables, file_path=f"{hk_file}.{FileType.YAML}") LOG.info(f"Generated analysis deliverables: {hk_file.as_posix()}") From 1fbbd805707c18a83fa45d568363c3882de7315d Mon Sep 17 00:00:00 2001 From: Vadym Date: Sat, 10 Feb 2024 18:22:53 +0100 Subject: [PATCH 11/12] Fix CHANGELOG.rst --- CHANGELOG.rst | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5b5eb45a7..4ee3f79cc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,17 +3,14 @@ Added: ^^^^^^ -* Add MultiQC intermediate files to deliverables - -Changed: -^^^^^^^^ -* bcftools filters for PR:SR evidence in Manta calls -* "--exome" argument to Manta runs in TGA cases -* `delivery.py`, removing unused arguments +* bcftools filters for PR:SR evidence in Manta calls https://github.com/Clinical-Genomics/BALSAMIC/pull/1371 +* "--exome" argument to Manta runs in TGA cases https://github.com/Clinical-Genomics/BALSAMIC/pull/1371 +* MultiQC intermediate files to deliverables https://github.com/Clinical-Genomics/BALSAMIC/pull/1388 Removed: ^^^^^^^^ -* Extra bcftools filters that allows MaxDepth filtered variants in the final SV VCF +* Extra bcftools filters that allows MaxDepth filtered variants in the final SV VCF https://github.com/Clinical-Genomics/BALSAMIC/pull/1371 +* Unused arguments from `delivery.py` https://github.com/Clinical-Genomics/BALSAMIC/pull/1388 [13.0.1] ------- From fe40771eccd9e8eddae2f0161825da1039b105ac Mon Sep 17 00:00:00 2001 From: Vadym Date: Sat, 10 Feb 2024 18:24:56 +0100 Subject: [PATCH 12/12] Update CHANGELOG.rst --- CHANGELOG.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4ee3f79cc..bfabcca1e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,8 +3,8 @@ Added: ^^^^^^ -* bcftools filters for PR:SR evidence in Manta calls https://github.com/Clinical-Genomics/BALSAMIC/pull/1371 -* "--exome" argument to Manta runs in TGA cases https://github.com/Clinical-Genomics/BALSAMIC/pull/1371 +* bcftools filters for `PR:SR` evidence in Manta calls https://github.com/Clinical-Genomics/BALSAMIC/pull/1371 +* `--exome` argument to Manta runs in TGA cases https://github.com/Clinical-Genomics/BALSAMIC/pull/1371 * MultiQC intermediate files to deliverables https://github.com/Clinical-Genomics/BALSAMIC/pull/1388 Removed: