Skip to content

Commit

Permalink
Merge pull request #1388 from Clinical-Genomics/deliver-multiqc-files
Browse files Browse the repository at this point in the history
  • Loading branch information
ivadym authored Feb 10, 2024
2 parents 511d870 + fe40771 commit 2745116
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 170 deletions.
31 changes: 10 additions & 21 deletions BALSAMIC/commands/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,27 @@

from BALSAMIC import __version__ as balsamic_version
from BALSAMIC.constants.analysis import (
RunMode,
RUN_MODES,
ANALYSIS_WORKFLOWS,
PON_WORKFLOWS,
RUN_MODES,
AnalysisWorkflow,
Gender,
RULE_DELIVERY_MODES,
RuleDeliveryMode,
PON_WORKFLOWS,
PONWorkflow,
RunMode,
)
from BALSAMIC.constants.cache import GenomeVersion, CacheVersion, GENOME_VERSIONS
from BALSAMIC.constants.cache import GENOME_VERSIONS, CacheVersion, GenomeVersion
from BALSAMIC.constants.cluster import (
ClusterProfile,
QOS,
CLUSTER_MAIL_TYPES,
CLUSTER_PROFILES,
QOS,
QOS_OPTIONS,
CLUSTER_MAIL_TYPES,
ClusterProfile,
)
from BALSAMIC.constants.constants import LogLevel, LOG_LEVELS
from BALSAMIC.constants.constants import LOG_LEVELS, LogLevel
from BALSAMIC.constants.rules import DELIVERY_RULES
from BALSAMIC.constants.workflow_params import VCF_DICT
from BALSAMIC.utils.cli import validate_cache_version


OPTION_ADAPTER_TRIM = click.option(
"--adapter-trim/--no-adapter-trim",
default=True,
Expand Down Expand Up @@ -179,16 +176,6 @@
help="Cosmic DB authentication key",
)

OPTION_DELIVERY_MODE = click.option(
"-m",
"--delivery-mode",
type=click.Choice(RULE_DELIVERY_MODES),
default=RuleDeliveryMode.APPEND,
show_default=True,
help=f"Append rules to deliver to the current delivery option ({RuleDeliveryMode.APPEND}) or deliver only "
f"the ones specified ({RuleDeliveryMode.RESET})",
)

OPTION_DISABLE_VARIANT_CALLER = click.option(
"--disable-variant-caller",
help=f"Run workflow with selected variant caller(s) disable. Use comma to remove multiple variant callers. Valid "
Expand Down Expand Up @@ -339,6 +326,8 @@
"-r",
"--rules-to-deliver",
multiple=True,
default=DELIVERY_RULES,
show_default=False,
type=click.Choice(DELIVERY_RULES),
help="Specify the rules to deliver. The delivery mode selected via the --delivery-mode option.",
)
Expand Down
158 changes: 70 additions & 88 deletions BALSAMIC/commands/report/deliver.py
Original file line number Diff line number Diff line change
@@ -1,156 +1,138 @@
"""Balsamic report delivery CLI."""
import json
"""Report deliver CLI command."""
import logging
import os
import subprocess
import sys
from pathlib import Path
from typing import List
from typing import Any, Dict, List, Optional

import click
import snakemake
import yaml
from BALSAMIC.constants.constants import FileType

from BALSAMIC.commands.options import (
OPTION_SAMPLE_CONFIG,
OPTION_DISABLE_VARIANT_CALLER,
OPTION_DELIVERY_MODE,
OPTION_RULES_TO_DELIVER,
OPTION_SAMPLE_CONFIG,
)
from BALSAMIC.constants.analysis import RunMode, RuleDeliveryMode
from BALSAMIC.constants.rules import DELIVERY_RULES
from BALSAMIC.constants.analysis import RunMode
from BALSAMIC.models.config import ConfigModel
from BALSAMIC.models.snakemake import SnakemakeExecutable
from BALSAMIC.utils.cli import convert_deliverables_tags
from BALSAMIC.utils.cli import get_file_extension
from BALSAMIC.utils.cli import get_snakefile
from BALSAMIC.utils.io import write_json
from BALSAMIC.utils.rule import get_result_dir
from BALSAMIC.utils.cli import (
get_snakefile,
convert_deliverables_tags,
get_file_extension,
)
from BALSAMIC.utils.delivery import get_multiqc_deliverables
from BALSAMIC.utils.io import read_json, write_json, write_yaml

LOG = logging.getLogger(__name__)


@click.command("deliver", short_help="Creates a report file with output files")
@OPTION_DELIVERY_MODE
@click.command(
"deliver", short_help="Create a <case_id>.hk file with output analysis files"
)
@OPTION_DISABLE_VARIANT_CALLER
@OPTION_RULES_TO_DELIVER
@OPTION_SAMPLE_CONFIG
@click.pass_context
def deliver(
context: click.Context,
delivery_mode: RuleDeliveryMode,
disable_variant_caller: str,
disable_variant_caller: Optional[str],
rules_to_deliver: List[str],
sample_config: str,
):
"""Deliver command to write <case_id>.hk with the output analysis files."""
"""Report deliver command to generate output analysis files."""
LOG.info(f"BALSAMIC started with log level {context.obj['log_level']}.")
LOG.debug("Reading input sample config")
with open(sample_config, "r") as fn:
sample_config_dict = json.load(fn)

default_rules_to_deliver = DELIVERY_RULES

if not rules_to_deliver:
rules_to_deliver = default_rules_to_deliver

rules_to_deliver = list(rules_to_deliver)
if delivery_mode == RuleDeliveryMode.APPEND:
rules_to_deliver.extend(default_rules_to_deliver)

case_name = sample_config_dict["analysis"]["case_id"]
result_dir = get_result_dir(sample_config_dict)
dst_directory = os.path.join(result_dir, "delivery_report")
LOG.info("Creating delivery_report directory")
Path.mkdir(Path(dst_directory), parents=True, exist_ok=True)

yaml_write_directory = os.path.join(result_dir, "delivery_report")
Path.mkdir(Path(yaml_write_directory), parents=True, exist_ok=True)

analysis_type = sample_config_dict["analysis"]["analysis_type"]
analysis_workflow = sample_config_dict["analysis"]["analysis_workflow"]
snakefile = get_snakefile(analysis_type, analysis_workflow)

report_path = Path(yaml_write_directory, f"{case_name}_report.html")
LOG.info(f"Creating report file {report_path.as_posix()}")

LOG.info(f"Delivering {analysis_workflow} workflow...")
working_dir = Path(
sample_config_dict["analysis"]["analysis_dir"], case_name, "BALSAMIC_run"
LOG.info("Creating <case_id>.hk deliverables file")
config: Dict[str, Any] = read_json(sample_config)
config_model: ConfigModel = ConfigModel(**config)
output_dir: Path = Path(config_model.analysis.result, "delivery_report")
output_dir.mkdir(exist_ok=True)
working_dir: Path = Path(
config_model.analysis.analysis_dir,
config_model.analysis.case_id,
"BALSAMIC_run",
)
html_report: Path = Path(output_dir, f"{config_model.analysis.case_id}_report.html")
snakefile: Path = get_snakefile(
analysis_type=config_model.analysis.analysis_type,
analysis_workflow=config_model.analysis.analysis_workflow,
)

LOG.info(f"Creating HTML report file: {html_report.as_posix()}")
snakemake_executable: SnakemakeExecutable = SnakemakeExecutable(
case_id=case_name,
case_id=config_model.analysis.case_id,
config_path=sample_config,
disable_variant_caller=disable_variant_caller,
report_path=report_path,
report_path=html_report,
run_analysis=True,
run_mode=RunMode.LOCAL,
snakefile=snakefile,
snakemake_options=["--quiet"],
working_dir=working_dir,
)
subprocess.check_output(
f"{sys.executable} -m {snakemake_executable.get_command()}".split(),
shell=False,
f"{sys.executable} -m {snakemake_executable.get_command()}".split(), shell=False
)
LOG.info(f"Workflow report file {report_path.as_posix()}")

LOG.info(f"Delivering analysis workflow: {config_model.analysis.analysis_workflow}")
hk_file: Path = Path(output_dir, f"{config_model.analysis.case_id}.hk")
delivery_ready_file: Path = Path(
output_dir, f"{config_model.analysis.case_id}_delivery_ready.hk"
)
snakemake.snakemake(
snakefile=snakefile,
config={"delivery": "True", "rules_to_deliver": ",".join(rules_to_deliver)},
dryrun=True,
configfiles=[sample_config],
quiet=True,
)

delivery_file_name = os.path.join(yaml_write_directory, case_name + ".hk")

delivery_file_ready = os.path.join(
yaml_write_directory,
case_name + "_delivery_ready.hk",
hk_deliverables: List[Dict[str, Any]] = read_json(delivery_ready_file.as_posix())
hk_deliverables: List[Dict[str, Any]] = convert_deliverables_tags(
delivery_json=hk_deliverables, sample_config_dict=config
)
with open(delivery_file_ready, "r") as fn:
delivery_file_ready_dict = json.load(fn)

delivery_json = dict()
delivery_json["files"] = delivery_file_ready_dict

delivery_json = convert_deliverables_tags(
delivery_json=delivery_json, sample_config_dict=sample_config_dict
)

# Add Housekeeper file to report
delivery_json["files"].append(
# HTML analysis report
hk_deliverables.append(
{
"path": report_path.as_posix(),
"path": html_report.as_posix(),
"step": "balsamic_delivery",
"format": get_file_extension(report_path.as_posix()),
"format": get_file_extension(html_report.as_posix()),
"tag": ["balsamic-report"],
"id": case_name,
"id": config_model.analysis.case_id,
}
)
# Add CASE_ID.JSON to report
delivery_json["files"].append(

# Sample configuration file
hk_deliverables.append(
{
"path": Path(sample_config).resolve().as_posix(),
"step": "case_config",
"format": get_file_extension(sample_config),
"tag": ["balsamic-config"],
"id": case_name,
"id": config_model.analysis.case_id,
}
)
# Add DAG Graph to report
delivery_json["files"].append(

# DAG
hk_deliverables.append(
{
"path": sample_config_dict["analysis"]["dag"],
"path": config_model.analysis.dag,
"step": "case_config",
"format": get_file_extension(sample_config_dict["analysis"]["dag"]),
"format": get_file_extension(config_model.analysis.dag),
"tag": ["balsamic-dag"],
"id": case_name,
"id": config_model.analysis.case_id,
}
)

write_json(delivery_json, delivery_file_name)
with open(delivery_file_name + ".yaml", "w") as fn:
yaml.dump(delivery_json, fn, default_flow_style=False)
# MultiQC intermediate files
multiqc_deliverables: List[Dict[str, Any]] = get_multiqc_deliverables(
case_id=config_model.analysis.case_id,
multiqc_dir=Path(config_model.analysis.result, "qc", "multiqc_data"),
)
hk_deliverables.extend(multiqc_deliverables)

LOG.info(f"Housekeeper delivery file {delivery_file_name}")
hk_deliverables: Dict[str, Any] = {"files": hk_deliverables}
write_json(json_obj=hk_deliverables, path=hk_file.as_posix())
write_yaml(data=hk_deliverables, file_path=f"{hk_file}.{FileType.YAML}")
LOG.info(f"Generated analysis deliverables: {hk_file.as_posix()}")
10 changes: 0 additions & 10 deletions BALSAMIC/constants/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,6 @@ class PONWorkflow(StrEnum):
PON_WORKFLOWS: List[PONWorkflow] = [workflow for workflow in PONWorkflow]


class RuleDeliveryMode(StrEnum):
"""Rules to deliver mode."""

APPEND: str = "append"
RESET: str = "reset"


RULE_DELIVERY_MODES: List[RuleDeliveryMode] = [mode for mode in RuleDeliveryMode]


class BioinfoTools(StrEnum):
"""List of bioinformatics tools in Balsamic."""

Expand Down
1 change: 1 addition & 0 deletions BALSAMIC/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class FileType(StrEnum):
TSV: str = "tsv"
TXT: str = "txt"
VCF: str = "vcf"
YAML: str = "yaml"


class BwaIndexFileType(StrEnum):
Expand Down
8 changes: 5 additions & 3 deletions BALSAMIC/utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from distutils.spawn import find_executable
from io import StringIO
from pathlib import Path
from typing import Dict, List, Optional
from typing import Any, Dict, List, Optional

import click
import graphviz
Expand Down Expand Up @@ -406,10 +406,12 @@ def generate_graph(config_collection_dict, config_path):
graph_obj.render(cleanup=True)


def convert_deliverables_tags(delivery_json: dict, sample_config_dict: dict) -> dict:
def convert_deliverables_tags(
delivery_json: List[Dict[str, Any]], sample_config_dict: dict
) -> List[Dict[str, Any]]:
"""Replaces values of sample_type with sample_name in deliverables dict."""

for delivery_file in delivery_json["files"]:
for delivery_file in delivery_json:
file_tags = delivery_file["tag"].split(",")
sample_list = sample_config_dict["samples"]
for sample_dict in sample_list:
Expand Down
34 changes: 34 additions & 0 deletions BALSAMIC/utils/delivery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Utility methods for Balsamic delivery command."""
import logging
from pathlib import Path
from typing import Dict, Any, List, Generator

from BALSAMIC.utils.exc import BalsamicError

from BALSAMIC.constants.constants import FileType

LOG = logging.getLogger(__name__)


def get_multiqc_deliverables(case_id: str, multiqc_dir: Path) -> List[Dict[str, Any]]:
"""Return a list of MultiQC deliverable files from a directory."""
multiqc_deliverables: List[Dict[str, Any]] = []
json_files: Generator[Path, None, None] = multiqc_dir.glob(f"*.{FileType.JSON}")
for file in json_files:
deliverable: Dict[str, Any] = {
"path": file.as_posix(),
"step": "multiqc",
"format": FileType.JSON.value,
"tag": get_file_tags_from_name(file),
"id": case_id,
}
multiqc_deliverables.append(deliverable)
if not multiqc_deliverables:
LOG.error(f"No MultiQC deliverable files found in {multiqc_dir.as_posix()}.")
raise BalsamicError
return multiqc_deliverables


def get_file_tags_from_name(file_path: Path) -> List[str]:
"""Return Housekeeper tags from the file name after discarding the suffix."""
return [tag.lower() for tag in file_path.stem.split("_")]
8 changes: 7 additions & 1 deletion BALSAMIC/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
from datetime import datetime
from pathlib import Path
from typing import List
from typing import List, Any

import snakemake
import yaml
Expand Down Expand Up @@ -74,6 +74,12 @@ def read_yaml(yaml_path: str) -> dict:
raise FileNotFoundError(f"The YAML file {yaml_path} was not found")


def write_yaml(data: Any, file_path: Path) -> None:
"""Write data to a yaml file."""
with open(file_path, "w") as file:
yaml.dump(data, file)


def read_vcf_file(vcf_file_path: str) -> List[str]:
"""
Reads a VCF file and returns its contents as a list of lines.
Expand Down
Loading

0 comments on commit 2745116

Please sign in to comment.