From 59755ac64ac3ad53d1d894d353c15b7045196897 Mon Sep 17 00:00:00 2001 From: Patrik Grenfeldt Date: Wed, 25 Mar 2020 16:58:30 +0100 Subject: [PATCH] Feat(store): Store balsamic analysis in housekeeper (#551) * store-like-mip * Test store * store balsamic files in meta-file * remove unused argument * add missing required parameter * move calls to store object * change black pre-commit line length to what we use * Remove Python2 charset declaration. Format code * Remove Python2 charset declaration * Capitalize in log message * add method description * output with click instead of print * remove unused imports * remove unneccesary whitespace * describe methods * use existing case instead of refetching * remove hard coded dummy values * remove unused assignment * add missing argument in call * make mock behave more like real thing * store root_dir in hk_api * fix string * handle directories and multiple tags * Merge with master * clarify the priority mapping * simplify test * remove debug print * move test helpers to a module * fix import of store_helpers * remove redundant test setup code * fix store_helper module path * remove redundant import * fix import order * Add docstring * linitng * linting * skip conversion to path * use full path * rework the path and tag parsing * return actual compressed filename * fix broken test * fix path to generated .hk file * fix path to generated .hk file * fix deliverables file path * merge with master * Protect the store in housekeeper from direct usage * format code * linting * linting * check black with our decided linelength * format code * call fixture what it is * test magic __getattr__ * Update cg/cli/workflow/balsamic/base.py * simplify creation of balsamic command * create the deliverables file path in one way only * simplify balsamic command * restore unsecure call * remove erronous usage of store on API * forward all arguments to wrapped add_commit * fix docstring * Wrap version method in Store * capture log at right level * more tests * fix store name according to real implementation --- .pre-commit-config.yaml | 1 + .travis.yml | 2 +- cg/__init__.py | 3 +- cg/apps/__init__.py | 1 - cg/apps/balsamic/fastq.py | 9 +- cg/apps/beacon.py | 31 +- cg/apps/gt.py | 37 +- cg/apps/hk.py | 93 ++- cg/apps/invoice/render.py | 5 +- cg/apps/lims/__init__.py | 1 - cg/apps/lims/order.py | 109 ++-- cg/apps/lims/orderform.py | 272 ++++---- cg/apps/loqus.py | 21 +- cg/apps/madeline.py | 5 +- cg/apps/mip/fastq.py | 1 - cg/apps/osticket.py | 5 +- cg/apps/pipelines/fastqhandler.py | 1 - cg/apps/scoutapi.py | 4 +- cg/apps/stats.py | 76 +-- cg/apps/tb/__init__.py | 1 - cg/apps/tb/api.py | 11 +- cg/apps/usalt/fastq.py | 5 +- cg/cli/__init__.py | 1 - cg/cli/add.py | 169 +++-- cg/cli/clean.py | 22 +- cg/cli/status.py | 359 ++++++----- cg/cli/transfer.py | 38 +- cg/cli/workflow/balsamic/base.py | 101 +-- cg/cli/workflow/balsamic/store.py | 160 ++--- cg/cli/workflow/microsalt/store.py | 22 +- cg/cli/workflow/mip_dna/store.py | 24 +- cg/cli/workflow/mip_rna/store.py | 24 +- cg/constants.py | 8 +- cg/exc.py | 3 - cg/meta/__init__.py | 1 - cg/meta/invoice.py | 26 +- cg/meta/orders/__init__.py | 1 - cg/meta/orders/api.py | 208 ++++--- cg/meta/report/api.py | 45 +- cg/meta/store/balsamic.py | 108 ++++ cg/meta/transfer/__init__.py | 1 - cg/meta/transfer/flowcell.py | 51 +- cg/meta/transfer/lims.py | 63 +- cg/meta/upload/gt.py | 34 +- cg/meta/upload/observations.py | 55 +- cg/meta/upload/scoutapi.py | 20 +- cg/meta/upload/vogue.py | 10 +- cg/meta/workflow/balsamic.py | 37 +- cg/meta/workflow/microsalt.py | 1 - cg/meta/workflow/mip_dna.py | 36 +- cg/meta/workflow/mip_rna.py | 18 +- cg/server/__init__.py | 1 - cg/server/app.py | 41 +- cg/server/auto.py | 1 - cg/server/config.py | 4 +- cg/server/ext.py | 14 +- cg/store/__init__.py | 1 - cg/store/api/__init__.py | 1 - cg/store/api/add.py | 32 +- cg/store/api/core.py | 13 +- cg/store/api/status.py | 580 +++++++++--------- cg/store/utils.py | 1 - .../balsamic/test_fastqfileconcatenator.py | 1 - .../balsamic/test_fastqfilenamecreator.py | 7 +- tests/apps/balsamic/test_fastqhandler.py | 37 +- tests/apps/hk/conftest.py | 23 +- tests/apps/hk/test__getattr__.py | 18 + tests/apps/hk/test_add_file.py | 12 +- tests/apps/lims/test_apps_lims_api.py | 1 - tests/apps/lims/test_apps_lims_orderform.py | 1 - .../mip/snapshots/snap_test_apps_mip_files.py | 5 +- tests/apps/mip/test_apps_mip_files.py | 1 - tests/apps/mip/test_mip_fastqhandler.py | 10 +- tests/apps/tb/mip/test_get_files.py | 1 - .../usalt/test_usalt_fastqfilenamecreator.py | 1 - tests/apps/usalt/test_usalt_fastqhandler.py | 5 +- tests/cli/workflow/balsamic/conftest.py | 134 +--- tests/cli/workflow/balsamic/store/conftest.py | 157 +++++ .../store/test_generate_deliverables_file.py | 69 +++ .../cli/workflow/balsamic/store/test_store.py | 126 ++++ .../{test_cli_balsamic_run.py => test_run.py} | 16 +- ...st_cli_balsamic_start.py => test_start.py} | 0 .../cli/workflow/mip_dna/test_cli_mip_dna.py | 6 +- tests/conftest.py | 16 +- tests/delivery/conftest.py | 1 - tests/delivery/test_delivery.py | 1 - .../fixtures/apps/balsamic/case/metadata.yml | 50 ++ .../apps/balsamic/case/metadata_directory.yml | 3 + .../apps/balsamic/case/metadata_file_tags.yml | 5 + tests/meta/conftest.py | 46 +- tests/meta/report/test_status_helper.py | 63 +- .../snap_test_mip_rna_build_bundle.py | 9 +- tests/meta/store/test_mip_rna_build_bundle.py | 1 - tests/meta/transfer/conftest.py | 27 +- .../transfer/test_meta_transfer_flowcell.py | 6 +- tests/store_helpers.py | 128 ++++ 96 files changed, 2158 insertions(+), 1858 deletions(-) create mode 100644 cg/meta/store/balsamic.py create mode 100644 tests/apps/hk/test__getattr__.py create mode 100644 tests/cli/workflow/balsamic/store/conftest.py create mode 100644 tests/cli/workflow/balsamic/store/test_generate_deliverables_file.py create mode 100644 tests/cli/workflow/balsamic/store/test_store.py rename tests/cli/workflow/balsamic/{test_cli_balsamic_run.py => test_run.py} (87%) rename tests/cli/workflow/balsamic/{test_cli_balsamic_start.py => test_start.py} (100%) create mode 100644 tests/fixtures/apps/balsamic/case/metadata.yml create mode 100644 tests/fixtures/apps/balsamic/case/metadata_directory.yml create mode 100644 tests/fixtures/apps/balsamic/case/metadata_file_tags.yml create mode 100644 tests/store_helpers.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c62832fd9e..31893d172b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,3 +3,4 @@ repos: rev: 19.3b0 hooks: - id: black + args: [--line-length=100] diff --git a/.travis.yml b/.travis.yml index d9c480120a..174ea63ac0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,7 +60,7 @@ jobs: - name: "Code formatting" if: type = pull_request install: pip install black - script: git --no-pager diff --name-only --diff-filter=AM $TRAVIS_COMMIT_RANGE | grep -F ".py" | xargs black --check --diff + script: git --no-pager diff --name-only --diff-filter=AM $TRAVIS_COMMIT_RANGE | grep -F ".py" | xargs black --check -l 100 - name: "Pylint score" if: type = pull_request diff --git a/cg/__init__.py b/cg/__init__.py index a667878f3b..865795c887 100644 --- a/cg/__init__.py +++ b/cg/__init__.py @@ -1,5 +1,4 @@ -# -*- coding: utf-8 -*- import pkg_resources -__title__ = 'cg' +__title__ = "cg" __version__ = pkg_resources.get_distribution(__title__).version diff --git a/cg/apps/__init__.py b/cg/apps/__init__.py index b956b9b44d..118895acf7 100644 --- a/cg/apps/__init__.py +++ b/cg/apps/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ These are independent interfaces to tools outside the package. diff --git a/cg/apps/balsamic/fastq.py b/cg/apps/balsamic/fastq.py index c81a894238..b46d342624 100644 --- a/cg/apps/balsamic/fastq.py +++ b/cg/apps/balsamic/fastq.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ This module handles concatenation of balsamic fastq files. @@ -133,9 +132,7 @@ def link(self, case: str, sample: str, files: List): linked_fastq_path = wrk_dir / linked_fastq_name linked_reads_paths[fastq_data["read"]].append(linked_fastq_path) - concatenated_paths[ - fastq_data["read"] - ] = f"{wrk_dir}/{concatenated_fastq_name}" + concatenated_paths[fastq_data["read"]] = f"{wrk_dir}/{concatenated_fastq_name}" if not linked_fastq_path.exists(): LOGGER.info("linking: %s -> %s", original_fastq_path, linked_fastq_path) @@ -145,9 +142,7 @@ def link(self, case: str, sample: str, files: List): LOGGER.info("Concatenation in progress for sample %s.", sample) for read in linked_reads_paths: - FastqFileConcatenator().concatenate( - linked_reads_paths[read], concatenated_paths[read] - ) + FastqFileConcatenator().concatenate(linked_reads_paths[read], concatenated_paths[read]) self._remove_files(linked_reads_paths[read]) @staticmethod diff --git a/cg/apps/beacon.py b/cg/apps/beacon.py index 7be1608b64..8eda84a158 100644 --- a/cg/apps/beacon.py +++ b/cg/apps/beacon.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from typing import List import datetime as dt import logging @@ -7,7 +6,8 @@ LOG = logging.getLogger(__name__) -class BeaconApi(): + +class BeaconApi: """ Interface with Beacon importer (github.com/Clinical-Genomics/cgbeacon) Inserts variants from a VCF file inside a Beacon server. @@ -15,16 +15,35 @@ class BeaconApi(): def __init__(self, config: dict): super(BeaconApi, self).__init__() - self.connection = use_mysqlalchemy(config['cgbeacon']['database']) - + self.connection = use_mysqlalchemy(config["cgbeacon"]["database"]) - def upload(self, vcf_path: str, panel_path: str, dataset: str, outfile: str, customer: str, samples: List[str], quality: int, genome_reference: str): + def upload( + self, + vcf_path: str, + panel_path: str, + dataset: str, + outfile: str, + customer: str, + samples: List[str], + quality: int, + genome_reference: str, + ): """ Uploads variants from a VCF file to a MySQL Beacon database Returns: number of new variants in the Beacon """ LOG.info("Uploading variants to beacon db.") - upload_result = Utility.beacon_upload(self.connection, vcf_path, panel_path, dataset, outfile, customer, samples, quality, genome_reference) + upload_result = Utility.beacon_upload( + self.connection, + vcf_path, + panel_path, + dataset, + outfile, + customer, + samples, + quality, + genome_reference, + ) LOG.info("Upload complete!") def remove_vars(self, sample, vcf_path, panel_path=None, qual=20): diff --git a/cg/apps/gt.py b/cg/apps/gt.py index 3015649bf3..3eddabbb45 100644 --- a/cg/apps/gt.py +++ b/cg/apps/gt.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging from subprocess import CalledProcessError @@ -22,42 +21,42 @@ class GenotypeAPI(Manager): """ def __init__(self, config: dict): - alchy_config = dict(SQLALCHEMY_DATABASE_URI=config['genotype']['database']) + alchy_config = dict(SQLALCHEMY_DATABASE_URI=config["genotype"]["database"]) super(GenotypeAPI, self).__init__(config=alchy_config, Model=models.Model) - self.genotype_config = config['genotype']['config_path'] - self.genotype_binary = config['genotype']['binary_path'] - self.base_call = [self.genotype_binary, '--config', self.genotype_config] + self.genotype_config = config["genotype"]["config_path"] + self.genotype_binary = config["genotype"]["binary_path"] + self.base_call = [self.genotype_binary, "--config", self.genotype_config] - def upload(self, bcf_path: str, samples_sex: dict, force: bool=False): + def upload(self, bcf_path: str, samples_sex: dict, force: bool = False): """Upload genotypes for a family of samples.""" snps = api.snps() analyses = load_vcf(bcf_path, snps) for analysis_obj in analyses: - LOG.debug('loading VCF genotypes for sample: %s', analysis_obj.sample_id) + LOG.debug("loading VCF genotypes for sample: %s", analysis_obj.sample_id) is_saved = api.add_analysis(self, analysis_obj, replace=force) if is_saved: - LOG.info('loaded VCF genotypes for sample: %s', analysis_obj.sample_id) + LOG.info("loaded VCF genotypes for sample: %s", analysis_obj.sample_id) else: - LOG.warning('skipped, found previous analysis: %s', analysis_obj.sample_id) + LOG.warning("skipped, found previous analysis: %s", analysis_obj.sample_id) if is_saved or force: - analysis_obj.sex = samples_sex[analysis_obj.sample_id]['analysis'] - analysis_obj.sample.sex = samples_sex[analysis_obj.sample_id]['pedigree'] + analysis_obj.sex = samples_sex[analysis_obj.sample_id]["analysis"] + analysis_obj.sample.sex = samples_sex[analysis_obj.sample_id]["pedigree"] self.commit() def export_sample(self, days: int = 0) -> str: """Export sample info.""" trending_call = self.base_call[:] - trending_call.extend(['export-sample', '-d', days]) + trending_call.extend(["export-sample", "-d", days]) try: - LOG.info('Running Genotype API to get data.') + LOG.info("Running Genotype API to get data.") LOG.debug(trending_call) output = subprocess.check_output(trending_call) except CalledProcessError as error: - LOG.critical("Could not run command: %s", ' '.join(trending_call)) + LOG.critical("Could not run command: %s", " ".join(trending_call)) raise error - output = output.decode('utf-8') + output = output.decode("utf-8") # If sample not in genotype db, stdout of genotype command will be empty. if not output: raise CaseNotFoundError("samples not found in genotype db") @@ -66,15 +65,15 @@ def export_sample(self, days: int = 0) -> str: def export_sample_analysis(self, days: int = 0) -> str: """Export analysis.""" trending_call = self.base_call[:] - trending_call.extend(['export-sample-analysis', '-d', days]) + trending_call.extend(["export-sample-analysis", "-d", days]) try: - LOG.info('Running Genotype API to get data.') + LOG.info("Running Genotype API to get data.") LOG.debug(trending_call) output = subprocess.check_output(trending_call) except CalledProcessError as error: - LOG.critical("Could not run command: %s", ' '.join(trending_call)) + LOG.critical("Could not run command: %s", " ".join(trending_call)) raise error - output = output.decode('utf-8') + output = output.decode("utf-8") # If sample not in genotype db, stdout of genotype command will be empty. if not output: raise CaseNotFoundError("samples not found in genotype db") diff --git a/cg/apps/hk.py b/cg/apps/hk.py index ab4869039c..f6a1bdf8e7 100644 --- a/cg/apps/hk.py +++ b/cg/apps/hk.py @@ -1,23 +1,83 @@ -# -*- coding: utf-8 -*- +""" Module to decouple cg code from Housekeeper code """ import datetime as dt import logging import os from pathlib import Path +from typing import List -from housekeeper.exc import VersionIncludedError from housekeeper.include import include_version, checksum as hk_checksum from housekeeper.store import Store, models -log = logging.getLogger(__name__) +LOG = logging.getLogger(__name__) -class HousekeeperAPI(Store): +class HousekeeperAPI: + """ API to decouple cg code from Housekeeper """ + def __init__(self, config): - super(HousekeeperAPI, self).__init__( - config["housekeeper"]["database"], config["housekeeper"]["root"] - ) + self._store = Store(config["housekeeper"]["database"], config["housekeeper"]["root"]) self.root_dir = config["housekeeper"]["root"] + def __getattr__(self, name): + LOG.warning("Called undefined %s on %s, please wrap", name, self.__class__.__name__) + return getattr(self._store, name) + + def add_bundle(self, bundle_data): + """ Build a new bundle version of files """ + return self._store.add_bundle(bundle_data) + + def new_file( + self, path: str, checksum: str = None, to_archive: bool = False, tags: list = None + ): + """ Create a new file """ + return self._store.new_file(path, checksum, to_archive, tags) + + def tag(self, name: str): + """ Fetch a tag """ + return self._store.tag(name) + + def bundle(self, name: str): + """ Fetch a bundle """ + return self._store.bundle(name) + + def bundles(self): + """ Fetch bundles """ + return self._store.bundles() + + def version(self, bundle: str, date: dt.datetime): + """ Fetch a version """ + return self._store.version(bundle, date) + + def files( + self, *, bundle: str = None, tags: List[str] = None, version: int = None, path: str = None + ): + """ Fetch files """ + return self._store.files(bundle=bundle, tags=tags, version=version, path=path) + + def new_tag(self, name: str, category: str = None): + """ Create a new tag """ + return self._store.new_tag(name, category) + + def new_bundle(self, name: str, created_at: dt.datetime = None): + """ Create a new file bundle """ + return self._store.new_bundle(name, created_at) + + def new_version(self, created_at: dt.datetime, expires_at: dt.datetime = None): + """ Create a new bundle version """ + return self._store.new_version(created_at, expires_at) + + def add_commit(self, *args, **kwargs): + """ Wrap method in Housekeeper Store """ + return self._store.add_commit(*args, **kwargs) + + def commit(self): + """ Wrap method in Housekeeper Store """ + return self._store.commit() + + def session_no_autoflush(self): + """ Wrap property in Housekeeper Store """ + return self._store.session.no_autoflush + def include(self, version_obj: models.Version): """Call the include version function to import related assets.""" include_version(self.get_root_dir(), version_obj) @@ -30,7 +90,7 @@ def include_file(self, file_obj: models.File, version_obj: models.Version): # generate root directory version_root_dir = global_root_dir / version_obj.relative_root_dir version_root_dir.mkdir(parents=True, exist_ok=True) - log.info(f"created new bundle version dir: {version_root_dir}") + LOG.info("Created new bundle version dir: %s", version_root_dir) if file_obj.to_archive: # calculate sha1 checksum if file is to be archived @@ -38,18 +98,20 @@ def include_file(self, file_obj: models.File, version_obj: models.Version): # hardlink file to the internal structure new_path = version_root_dir / Path(file_obj.path).name os.link(file_obj.path, new_path) - log.info(f"linked file: {file_obj.path} -> {new_path}") + LOG.info("Linked file: %s -> %s", file_obj.path, new_path) file_obj.path = str(new_path).replace(f"{global_root_dir}/", "", 1) def last_version(self, bundle: str) -> models.Version: + """Gets the latest version of a bundle""" return ( - self.Version.query.join(models.Version.bundle) + self._store.Version.query.join(models.Version.bundle) .filter(models.Bundle.name == bundle) .order_by(models.Version.created_at.desc()) .first() ) def get_root_dir(self): + """Returns the root dir of Housekeeper""" return self.root_dir def get_files(self, bundle: str, tags: list, version: int = None): @@ -59,7 +121,7 @@ def get_files(self, bundle: str, tags: list, version: int = None): Returns: iterable(hk.Models.File) """ - return self.files(bundle=bundle, tags=tags, version=version) + return self._store.files(bundle=bundle, tags=tags, version=version) def add_file(self, file, version_obj: models.Version, tags, to_archive=False): """Add a file to housekeeper.""" @@ -77,4 +139,13 @@ def add_file(self, file, version_obj: models.Version, tags, to_archive=False): @staticmethod def checksum(path): + """Calculate the checksum""" return hk_checksum(path) + + def initialise_db(self): + """Create all tables in the store.""" + self._store.create_all() + + def destroy_db(self): + """Drop all tables in the store""" + self._store.drop_all() diff --git a/cg/apps/invoice/render.py b/cg/apps/invoice/render.py index 09c292f0e1..1ae9996e81 100644 --- a/cg/apps/invoice/render.py +++ b/cg/apps/invoice/render.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import datetime as dt from pkg_resources import resource_filename @@ -38,9 +37,7 @@ def render_xlsx(data: dict) -> Workbook: pkg_dir = __name__.rpartition(".")[0] sample_type = "pool" if data["pooled_samples"] else "sample" costcenter = data["costcenter"] - template_path = resource_filename( - pkg_dir, f"templates/{costcenter}_{sample_type}_invoice.xlsx" - ) + template_path = resource_filename(pkg_dir, f"templates/{costcenter}_{sample_type}_invoice.xlsx") workbook = load_workbook(template_path) if data["pooled_samples"]: worksheet = workbook["Bilaga Prover"] diff --git a/cg/apps/lims/__init__.py b/cg/apps/lims/__init__.py index 460aaee1fc..5448dbcd69 100644 --- a/cg/apps/lims/__init__.py +++ b/cg/apps/lims/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from .api import LimsAPI from .orderform import parse_orderform from .limsjson import parse_json diff --git a/cg/apps/lims/order.py b/cg/apps/lims/order.py index 0ba2e45b90..fbd9d95149 100644 --- a/cg/apps/lims/order.py +++ b/cg/apps/lims/order.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from typing import List import logging @@ -11,11 +10,10 @@ from . import batch LOG = logging.getLogger(__name__) -CONTAINER_TYPE_MAP = {'Tube': 2, '96 well plate': 1} +CONTAINER_TYPE_MAP = {"Tube": 2, "96 well plate": 1} class OrderHandler: - def save_xml(self, uri: str, document: ObjectifiedElement): """Post the data to the server.""" data = etree.tostring(document, xml_declaration=True) @@ -28,8 +26,8 @@ def save_containers(self, container_details: ObjectifiedElement): container_uri = f"{self.get_uri()}/containers/batch/create" results = self.save_xml(container_uri, container_details) container_map = {} - for link in results.findall('link'): - lims_container = Container(self, uri=link.attrib['uri']) + for link in results.findall("link"): + lims_container = Container(self, uri=link.attrib["uri"]) container_map[lims_container.name] = lims_container return container_map @@ -39,8 +37,8 @@ def save_samples(self, sample_details: ObjectifiedElement, map_samples=False): results = self.save_xml(sample_uri, sample_details) if map_samples: sample_map = {} - for link in results.findall('link'): - lims_sample = Sample(self, uri=link.attrib['uri']) + for link in results.findall("link"): + lims_sample = Sample(self, uri=link.attrib["uri"]) sample_map[lims_sample.name] = lims_sample return sample_map return results @@ -51,41 +49,43 @@ def update_artifacts(self, artifact_details: ObjectifiedElement): results = self.save_xml(artifact_uri, artifact_details) return results - def submit_project(self, project_name: str, samples: List[dict], researcher_id: str='3'): + def submit_project(self, project_name: str, samples: List[dict], researcher_id: str = "3"): """Parse Scout project.""" containers = self.prepare(samples) lims_project = Project.create( - self, - researcher=Researcher(self, id=researcher_id), - name=project_name, + self, researcher=Researcher(self, id=researcher_id), name=project_name ) LOG.info("%s: created new LIMS project", lims_project.id) - containers_data = [batch.build_container( - name=container['name'], - con_type=Containertype(lims=self, id=container['type']), - ) for container in containers] + containers_data = [ + batch.build_container( + name=container["name"], con_type=Containertype(lims=self, id=container["type"]) + ) + for container in containers + ] container_details = batch.build_container_batch(containers_data) LOG.debug("%s: saving containers", lims_project.name) container_map = self.save_containers(container_details) - reagentlabel_samples = [sample - for container in containers - for sample in container['samples'] - if sample['index_sequence']] + reagentlabel_samples = [ + sample + for container in containers + for sample in container["samples"] + if sample["index_sequence"] + ] samples_data = [] for container in containers: - for sample in container['samples']: - LOG.debug("%s: adding sample to container: %s", sample['name'], container['name']) - lims_container = container_map[container['name']] + for sample in container["samples"]: + LOG.debug("%s: adding sample to container: %s", sample["name"], container["name"]) + lims_container = container_map[container["name"]] sample_data = batch.build_sample( - name=sample['name'], + name=sample["name"], project=lims_project, container=lims_container, - location=sample['location'], - udfs=sample['udfs'], + location=sample["location"], + udfs=sample["udfs"], ) samples_data.append(sample_data) sample_details = batch.build_sample_batch(samples_data) @@ -93,10 +93,13 @@ def submit_project(self, project_name: str, samples: List[dict], researcher_id: sample_map = self.save_samples(sample_details, map_samples=process_reagentlabels) if process_reagentlabels: - artifacts_data = [batch.build_artifact( - artifact=sample_map[sample['name']].artifact, - reagent_label=sample['index_sequence'], - ) for sample in reagentlabel_samples] + artifacts_data = [ + batch.build_artifact( + artifact=sample_map[sample["name"]].artifact, + reagent_label=sample["index_sequence"], + ) + for sample in reagentlabel_samples + ] artifact_details = batch.build_artifact_batch(artifacts_data) self.update_artifacts(artifact_details) @@ -109,45 +112,43 @@ def prepare(cls, samples): lims_containers = [] tubes, plates = cls.group_containers(samples) # "96 well plate" = container type "1"; Tube = container type "2" - for container_type, containers in [('1', plates), ('2', tubes)]: + for container_type, containers in [("1", plates), ("2", tubes)]: for container_name, samples in containers.items(): - new_container = { - 'name': container_name, - 'type': container_type, - 'samples': [], - } + new_container = {"name": container_name, "type": container_type, "samples": []} # check that positions in plate are unique well_positions = {} for sample_data in samples: - location = sample_data['well_position'] or None + location = sample_data["well_position"] or None if location: if location in well_positions: first_sample = well_positions[location] - message = (f"duplicate well position: {location} | {first_sample}" - f" - {sample_data['name']}") + message = ( + f"duplicate well position: {location} | {first_sample}" + f" - {sample_data['name']}" + ) raise OrderError(message) - well_positions[location] = sample_data['name'] - if sample_data['container'] == '96 well plate' and location is None: + well_positions[location] = sample_data["name"] + if sample_data["container"] == "96 well plate" and location is None: message = f"missing 'well_position' for sample: {sample_data['name']}" raise ValueError(message) new_sample = { - 'name': sample_data['name'], - 'location': location or '1:1', - 'index_sequence': sample_data['index_sequence'], - 'udfs': {} + "name": sample_data["name"], + "location": location or "1:1", + "index_sequence": sample_data["index_sequence"], + "udfs": {}, } - for key, value in sample_data['udfs'].items(): + for key, value in sample_data["udfs"].items(): if value is None: LOG.debug(f"{key}: skipping null value UDF") continue if key in PROP2UDF: if isinstance(value, bool): - value = 'yes' if value else 'no' - new_sample['udfs'][PROP2UDF[key]] = value + value = "yes" if value else "no" + new_sample["udfs"][PROP2UDF[key]] = value else: LOG.debug(f"UDF not found: {key} - {value}") - new_container['samples'].append(new_sample) + new_container["samples"].append(new_sample) lims_containers.append(new_container) return lims_containers @@ -157,17 +158,17 @@ def group_containers(samples): tubes = {} plates = {} for sample_data in samples: - if sample_data['container'] == 'Tube': + if sample_data["container"] == "Tube": # detected tube: name after sample unless specified - container_name = sample_data.get('container_name') or sample_data['name'] + container_name = sample_data.get("container_name") or sample_data["name"] if container_name in tubes: raise OrderError(f"{container_name}: conflicting sample/tube name") tubes[container_name] = [sample_data] - elif sample_data['container'] == '96 well plate': + elif sample_data["container"] == "96 well plate": # detected plate: require container name - if sample_data['container_name'] not in plates: - plates[sample_data['container_name']] = [] - plates[sample_data['container_name']].append(sample_data) + if sample_data["container_name"] not in plates: + plates[sample_data["container_name"]] = [] + plates[sample_data["container_name"]].append(sample_data) else: raise ValueError(f"unknown container type: {sample_data['container']}") return tubes, plates diff --git a/cg/apps/lims/orderform.py b/cg/apps/lims/orderform.py index c47721819e..38698c2a51 100644 --- a/cg/apps/lims/orderform.py +++ b/cg/apps/lims/orderform.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from typing import List import xlrd @@ -6,18 +5,18 @@ from cg.exc import OrderFormError -SEX_MAP = {'male': 'M', 'female': 'F', 'unknown': 'unknown'} +SEX_MAP = {"male": "M", "female": "F", "unknown": "unknown"} REV_SEX_MAP = {value: key for key, value in SEX_MAP.items()} -CONTAINER_TYPES = ['Tube', '96 well plate'] +CONTAINER_TYPES = ["Tube", "96 well plate"] SOURCE_TYPES = set().union(METAGENOME_SOURCES, ANALYSIS_SOURCES) -VALID_ORDERFORMS=[ - '1508:19', # Orderform MIP, Balsamic, sequencing only, MIP RNA - '1541:6', # Orderform Externally sequenced samples - '1603:9', # Microbial WGS - '1604:9', # Orderform Ready made libraries (RML) - '1605:8', # Microbial metagenomes +VALID_ORDERFORMS = [ + "1508:19", # Orderform MIP, Balsamic, sequencing only, MIP RNA + "1541:6", # Orderform Externally sequenced samples + "1603:9", # Microbial WGS + "1604:9", # Orderform Ready made libraries (RML) + "1605:8", # Microbial metagenomes ] -CASE_PROJECT_TYPES = ['mip', 'external', 'balsamic', 'mip_balsamic', 'mip_rna'] +CASE_PROJECT_TYPES = ["mip", "external", "balsamic", "mip_balsamic", "mip_rna"] def check_orderform_version(document_title): @@ -34,7 +33,7 @@ def parse_orderform(excel_path: str) -> dict: sheet_name = None sheet_names = workbook.sheet_names() - for name in ['orderform', 'order form']: + for name in ["orderform", "order form"]: if name in sheet_names: sheet_name = name break @@ -61,28 +60,24 @@ def parse_orderform(excel_path: str) -> dict: customer_ids.add(customer_id) items.append(case_data) else: - customer_ids = set(sample['customer'] for sample in parsed_samples) + customer_ids = set(sample["customer"] for sample in parsed_samples) items = parsed_samples customer_options = len(customer_ids) if customer_options == 0: - raise OrderFormError('Customer information is missing') + raise OrderFormError("Customer information is missing") elif customer_options != 1: raise OrderFormError(f"Samples have different customers: {customer_ids}") - data = { - 'customer': customer_ids.pop(), - 'items': items, - 'project_type': project_type, - } + data = {"customer": customer_ids.pop(), "items": items, "project_type": project_type} return data def get_document_title(workbook: xlrd.book.Book, orderform_sheet: xlrd.sheet.Sheet) -> str: """Get the document title for the order form.""" - if 'information' in workbook.sheet_names(): - information_sheet = workbook.sheet_by_name('information') + if "information" in workbook.sheet_names(): + information_sheet = workbook.sheet_by_name("information") document_title = information_sheet.row(0)[2].value return document_title @@ -95,16 +90,16 @@ def get_project_type(document_title: str, parsed_samples: List) -> str: project_type = None - if '1541' in document_title: - project_type = 'external' - elif '1604' in document_title: - project_type = 'rml' - elif '1603' in document_title: - project_type = 'microbial' - elif '1605' in document_title: - project_type = 'metagenome' - elif '1508' in document_title: - analyses = set(sample['analysis'].lower() for sample in parsed_samples) + if "1541" in document_title: + project_type = "external" + elif "1604" in document_title: + project_type = "rml" + elif "1603" in document_title: + project_type = "microbial" + elif "1605" in document_title: + project_type = "metagenome" + elif "1508" in document_title: + analyses = set(sample["analysis"].lower() for sample in parsed_samples) if len(analyses) == 1: project_type = analyses.pop() else: @@ -115,48 +110,61 @@ def get_project_type(document_title: str, parsed_samples: List) -> str: def expand_case(case_id, parsed_case): """Fill-in information about families.""" - new_case = {'name': case_id, 'samples': []} - samples = parsed_case['samples'] + new_case = {"name": case_id, "samples": []} + samples = parsed_case["samples"] - require_qcoks = set(raw_sample['require_qcok'] for raw_sample in samples) - new_case['require_qcok'] = True in require_qcoks + require_qcoks = set(raw_sample["require_qcok"] for raw_sample in samples) + new_case["require_qcok"] = True in require_qcoks - priorities = set(raw_sample['priority'] for raw_sample in samples) + priorities = set(raw_sample["priority"] for raw_sample in samples) if len(priorities) == 1: - new_case['priority'] = priorities.pop() + new_case["priority"] = priorities.pop() else: raise OrderFormError(f"multiple values for 'Priority' for case: {case_id}") - customers = set(raw_sample['customer'] for raw_sample in samples) + customers = set(raw_sample["customer"] for raw_sample in samples) if len(customers) != 1: raise OrderFormError("Invalid customer information: {}".format(customers)) customer = customers.pop() gene_panels = set() for raw_sample in samples: - if raw_sample['panels']: - gene_panels.update(raw_sample['panels']) + if raw_sample["panels"]: + gene_panels.update(raw_sample["panels"]) new_sample = { - 'name': raw_sample['name'], - 'sex': raw_sample['sex'], - 'application': raw_sample['application'], - 'source': raw_sample['source'], + "name": raw_sample["name"], + "sex": raw_sample["sex"], + "application": raw_sample["application"], + "source": raw_sample["source"], } - if raw_sample.get('container') in CONTAINER_TYPES: - new_sample['container'] = raw_sample['container'] - - for key in ('capture_kit', 'comment', 'container_name', 'data_analysis', 'elution_buffer', - 'formalin_fixation_time', 'from_sample', 'post_formalin_fixation_time', - 'quantity', 'status', 'time_point', 'tissue_block_size', 'tumour', - 'tumour_purity', 'well_position'): + if raw_sample.get("container") in CONTAINER_TYPES: + new_sample["container"] = raw_sample["container"] + + for key in ( + "capture_kit", + "comment", + "container_name", + "data_analysis", + "elution_buffer", + "formalin_fixation_time", + "from_sample", + "post_formalin_fixation_time", + "quantity", + "status", + "time_point", + "tissue_block_size", + "tumour", + "tumour_purity", + "well_position", + ): if raw_sample.get(key): new_sample[key] = raw_sample[key] - for parent_id in ('mother', 'father'): + for parent_id in ("mother", "father"): if raw_sample[parent_id]: new_sample[parent_id] = raw_sample[parent_id] - new_case['samples'].append(new_sample) + new_case["samples"].append(new_sample) - new_case['panels'] = list(gene_panels) + new_case["panels"] = list(gene_panels) return customer, new_case @@ -165,90 +173,94 @@ def group_cases(parsed_samples): """Group samples on case.""" raw_cases = {} for sample in parsed_samples: - case_id = sample['case'] + case_id = sample["case"] if case_id not in raw_cases: - raw_cases[case_id] = { - 'samples': [], - } - raw_cases[case_id]['samples'].append(sample) + raw_cases[case_id] = {"samples": []} + raw_cases[case_id]["samples"].append(sample) return raw_cases def parse_sample(raw_sample): """Parse a raw sample row from order form sheet.""" - if ':' in raw_sample.get('UDF/Gene List', ''): - raw_sample['UDF/Gene List'] = raw_sample['UDF/Gene List'].replace(':', ';') + if ":" in raw_sample.get("UDF/Gene List", ""): + raw_sample["UDF/Gene List"] = raw_sample["UDF/Gene List"].replace(":", ";") - if raw_sample['UDF/priority'].lower() == 'förtur': - raw_sample['UDF/priority'] = 'priority' - raw_source = raw_sample.get('UDF/Source') + if raw_sample["UDF/priority"].lower() == "förtur": + raw_sample["UDF/priority"] = "priority" + raw_source = raw_sample.get("UDF/Source") sample = { - 'application': raw_sample['UDF/Sequencing Analysis'], - 'capture_kit': raw_sample.get('UDF/Capture Library version'), - 'case': raw_sample.get('UDF/familyID'), - 'comment': raw_sample.get('UDF/Comment'), - 'container': raw_sample.get('Container/Type'), - 'container_name': raw_sample.get('Container/Name'), - 'custom_index': raw_sample.get('UDF/Custom index'), - 'customer': raw_sample['UDF/customer'], - 'data_analysis': raw_sample['UDF/Data Analysis'], - 'elution_buffer': raw_sample.get('UDF/Sample Buffer'), - 'extraction_method': raw_sample.get('UDF/Extraction method'), - 'formalin_fixation_time': raw_sample.get('UDF/Formalin Fixation Time'), - 'index': raw_sample.get('UDF/Index type'), - 'from_sample': raw_sample.get('UDF/is_for_sample'), - 'name': raw_sample['Sample/Name'], - 'organism': raw_sample.get('UDF/Strain'), - 'organism_other': raw_sample.get('UDF/Other species'), - 'panels': (raw_sample['UDF/Gene List'].split(';') if - raw_sample.get('UDF/Gene List') else None), - 'pool': raw_sample.get('UDF/pool name'), - 'post_formalin_fixation_time': raw_sample.get('UDF/Post Formalin Fixation Time'), - 'priority': raw_sample['UDF/priority'].lower() if raw_sample.get('UDF/priority') else None, - 'reagent_label': raw_sample.get('Sample/Reagent Label'), - 'reference_genome': raw_sample.get('UDF/Reference Genome Microbial'), - 'require_qcok': raw_sample.get('UDF/Process only if QC OK') == 'yes', - 'rml_plate_name': raw_sample.get('UDF/RML plate name'), - 'sex': REV_SEX_MAP.get(raw_sample.get('UDF/Gender', '').strip()), - 'source': raw_source if raw_source in SOURCE_TYPES else None, - 'status': raw_sample['UDF/Status'].lower() if raw_sample.get('UDF/Status') else None, - 'tissue_block_size': raw_sample.get('UDF/Tissue Block Size'), - 'tumour': raw_sample.get('UDF/tumor') == 'yes', - 'tumour_purity': raw_sample.get('UDF/tumour purity'), - 'well_position': raw_sample.get('Sample/Well Location'), - 'well_position_rml': raw_sample.get('UDF/RML well position'), + "application": raw_sample["UDF/Sequencing Analysis"], + "capture_kit": raw_sample.get("UDF/Capture Library version"), + "case": raw_sample.get("UDF/familyID"), + "comment": raw_sample.get("UDF/Comment"), + "container": raw_sample.get("Container/Type"), + "container_name": raw_sample.get("Container/Name"), + "custom_index": raw_sample.get("UDF/Custom index"), + "customer": raw_sample["UDF/customer"], + "data_analysis": raw_sample["UDF/Data Analysis"], + "elution_buffer": raw_sample.get("UDF/Sample Buffer"), + "extraction_method": raw_sample.get("UDF/Extraction method"), + "formalin_fixation_time": raw_sample.get("UDF/Formalin Fixation Time"), + "index": raw_sample.get("UDF/Index type"), + "from_sample": raw_sample.get("UDF/is_for_sample"), + "name": raw_sample["Sample/Name"], + "organism": raw_sample.get("UDF/Strain"), + "organism_other": raw_sample.get("UDF/Other species"), + "panels": ( + raw_sample["UDF/Gene List"].split(";") if raw_sample.get("UDF/Gene List") else None + ), + "pool": raw_sample.get("UDF/pool name"), + "post_formalin_fixation_time": raw_sample.get("UDF/Post Formalin Fixation Time"), + "priority": raw_sample["UDF/priority"].lower() if raw_sample.get("UDF/priority") else None, + "reagent_label": raw_sample.get("Sample/Reagent Label"), + "reference_genome": raw_sample.get("UDF/Reference Genome Microbial"), + "require_qcok": raw_sample.get("UDF/Process only if QC OK") == "yes", + "rml_plate_name": raw_sample.get("UDF/RML plate name"), + "sex": REV_SEX_MAP.get(raw_sample.get("UDF/Gender", "").strip()), + "source": raw_source if raw_source in SOURCE_TYPES else None, + "status": raw_sample["UDF/Status"].lower() if raw_sample.get("UDF/Status") else None, + "tissue_block_size": raw_sample.get("UDF/Tissue Block Size"), + "tumour": raw_sample.get("UDF/tumor") == "yes", + "tumour_purity": raw_sample.get("UDF/tumour purity"), + "well_position": raw_sample.get("Sample/Well Location"), + "well_position_rml": raw_sample.get("UDF/RML well position"), } - data_analysis = raw_sample.get('UDF/Data Analysis').lower() - - if 'mip' in data_analysis and data_analysis and 'balsamic' in data_analysis: - sample['analysis'] = 'mip_balsamic' - elif data_analysis and 'balsamic' in data_analysis: - sample['analysis'] = 'balsamic' - elif data_analysis and 'mip rna' in data_analysis: - sample['analysis'] = 'mip_rna' - elif data_analysis and 'mip' in data_analysis or 'scout' in data_analysis: - sample['analysis'] = 'mip' - elif data_analysis and ('fastq' in data_analysis or data_analysis == 'custom'): - sample['analysis'] = 'fastq' + data_analysis = raw_sample.get("UDF/Data Analysis").lower() + + if "mip" in data_analysis and data_analysis and "balsamic" in data_analysis: + sample["analysis"] = "mip_balsamic" + elif data_analysis and "balsamic" in data_analysis: + sample["analysis"] = "balsamic" + elif data_analysis and "mip rna" in data_analysis: + sample["analysis"] = "mip_rna" + elif data_analysis and "mip" in data_analysis or "scout" in data_analysis: + sample["analysis"] = "mip" + elif data_analysis and ("fastq" in data_analysis or data_analysis == "custom"): + sample["analysis"] = "fastq" else: raise OrderFormError(f"unknown 'Data Analysis' for order: {data_analysis}") - numeric_values = [('index_number', 'UDF/Index number'), - ('volume', 'UDF/Volume (uL)'), ('quantity', 'UDF/Quantity'), - ('concentration', 'UDF/Concentration (nM)'), - ('concentration_weight', 'UDF/Sample Conc.'), - ('time_point', 'UDF/time_point')] + numeric_values = [ + ("index_number", "UDF/Index number"), + ("volume", "UDF/Volume (uL)"), + ("quantity", "UDF/Quantity"), + ("concentration", "UDF/Concentration (nM)"), + ("concentration_weight", "UDF/Sample Conc."), + ("time_point", "UDF/time_point"), + ] for json_key, excel_key in numeric_values: - str_value = raw_sample.get(excel_key, '').rsplit('.0')[0] - if str_value.replace('.', '').isnumeric(): + str_value = raw_sample.get(excel_key, "").rsplit(".0")[0] + if str_value.replace(".", "").isnumeric(): sample[json_key] = str_value - for parent in ['mother', 'father']: + for parent in ["mother", "father"]: parent_key = f"UDF/{parent}ID" - sample[parent] = (raw_sample[parent_key] if - raw_sample.get(parent_key) and (raw_sample[parent_key] != '0.0') - else None) + sample[parent] = ( + raw_sample[parent_key] + if raw_sample.get(parent_key) and (raw_sample[parent_key] != "0.0") + else None + ) return sample @@ -259,20 +271,22 @@ def relevant_rows(orderform_sheet): current_row = None empty_row_found = False for row in orderform_sheet.get_rows(): - if row[0].value == '': + if row[0].value == "": break - if current_row == 'header': + if current_row == "header": header_row = [cell.value for cell in row] current_row = None - elif current_row == 'samples': + elif current_row == "samples": values = [str(cell.value) for cell in row] # skip empty rows if values[0]: if empty_row_found: - raise OrderFormError(f"Found data after empty lines. Please delete any " - f"non-sample data rows in between the samples") + raise OrderFormError( + f"Found data after empty lines. Please delete any " + f"non-sample data rows in between the samples" + ) sample_dict = dict(zip(header_row, values)) @@ -280,8 +294,8 @@ def relevant_rows(orderform_sheet): else: empty_row_found = True - if row[0].value == '': - current_row = 'header' - elif row[0].value == '': - current_row = 'samples' + if row[0].value == "
": + current_row = "header" + elif row[0].value == "": + current_row = "samples" return raw_samples diff --git a/cg/apps/loqus.py b/cg/apps/loqus.py index 402570e159..5ce24730c0 100644 --- a/cg/apps/loqus.py +++ b/cg/apps/loqus.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Module for loqusdb API """ @@ -35,12 +33,7 @@ def __init__(self, config: dict, analysis_type: str = "wgs"): self.process = Process(self.loqusdb_binary, self.loqusdb_config) def load( - self, - family_id: str, - ped_path: str, - vcf_path: str, - gbcf_path: str, - vcf_sv_path: str = None, + self, family_id: str, ped_path: str, vcf_path: str, gbcf_path: str, vcf_sv_path: str = None ) -> dict: """Add observations from a VCF.""" load_call_parameters = [ @@ -90,13 +83,7 @@ def get_case(self, case_id: str) -> dict: def get_duplicate(self, vcf_file: str) -> dict: """Find matching profiles in loqusdb""" ind_obj = {} - duplicates_params = [ - "profile", - "--check-vcf", - vcf_file, - "--profile-threshold", - "0.95", - ] + duplicates_params = ["profile", "--check-vcf", vcf_file, "--profile-threshold", "0.95"] try: self.process.run_command(duplicates_params) @@ -117,6 +104,4 @@ def get_duplicate(self, vcf_file: str) -> dict: def __repr__(self): - return ( - f"LoqusdbAPI(binary={self.loqusdb_binary}," f"config={self.loqusdb_config})" - ) + return f"LoqusdbAPI(binary={self.loqusdb_binary}," f"config={self.loqusdb_config})" diff --git a/cg/apps/madeline.py b/cg/apps/madeline.py index 8c2e834461..38d66f297f 100644 --- a/cg/apps/madeline.py +++ b/cg/apps/madeline.py @@ -64,10 +64,7 @@ def run(madeline_process: Process, ped_stream: List[str]): svg_content = output.read() # strip away the script tag - script_tag = ( - '' - ) + script_tag = '' svg_content.replace(script_tag, "") with open(out_path, "w") as out_handle: diff --git a/cg/apps/mip/fastq.py b/cg/apps/mip/fastq.py index ea253e44ff..1e48d56a7d 100644 --- a/cg/apps/mip/fastq.py +++ b/cg/apps/mip/fastq.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ This module handles concatenation of usalt fastq files. diff --git a/cg/apps/osticket.py b/cg/apps/osticket.py index 904aa9c01e..eb542c746d 100644 --- a/cg/apps/osticket.py +++ b/cg/apps/osticket.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging import os.path @@ -20,9 +19,7 @@ def __init__(self): def init_app(self, app: Flask): """Initialize the API in Flask.""" - self.setup( - api_key=app.config["OSTICKET_API_KEY"], domain=app.config["OSTICKET_DOMAIN"] - ) + self.setup(api_key=app.config["OSTICKET_API_KEY"], domain=app.config["OSTICKET_DOMAIN"]) def setup(self, api_key: str = None, domain: str = None): """Initialize the API.""" diff --git a/cg/apps/pipelines/fastqhandler.py b/cg/apps/pipelines/fastqhandler.py index 52c0616196..cb466e9ca3 100644 --- a/cg/apps/pipelines/fastqhandler.py +++ b/cg/apps/pipelines/fastqhandler.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ This module handles concatenation of usalt fastq files. diff --git a/cg/apps/scoutapi.py b/cg/apps/scoutapi.py index 96a862676f..66407e8284 100644 --- a/cg/apps/scoutapi.py +++ b/cg/apps/scoutapi.py @@ -151,9 +151,7 @@ def get_solved_cases(self, days_ago): return cases - def upload_delivery_report( - self, report_path: str, case_id: str, update: bool = False - ): + def upload_delivery_report(self, report_path: str, case_id: str, update: bool = False): """ Load a delivery report into a case in the database If the report already exists the function will exit. diff --git a/cg/apps/stats.py b/cg/apps/stats.py index 72092a1880..fcf71726e4 100644 --- a/cg/apps/stats.py +++ b/cg/apps/stats.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging from pathlib import Path from typing import Iterator @@ -22,65 +21,57 @@ class StatsAPI(alchy.Manager): Flowcell = models.Flowcell def __init__(self, config: dict): - alchy_config = dict(SQLALCHEMY_DATABASE_URI=config['cgstats']['database']) + alchy_config = dict(SQLALCHEMY_DATABASE_URI=config["cgstats"]["database"]) super(StatsAPI, self).__init__(config=alchy_config, Model=models.Model) - self.root_dir = Path(config['cgstats']['root']) + self.root_dir = Path(config["cgstats"]["root"]) def flowcell(self, flowcell_name: str) -> dict: """Fetch information about a flowcell.""" record = self.Flowcell.query.filter_by(flowcellname=flowcell_name).first() data = { - 'name': record.flowcellname, - 'sequencer': record.demux[0].datasource.machine, - 'sequencer_type': record.hiseqtype, - 'date': record.time, - 'samples': [] + "name": record.flowcellname, + "sequencer": record.demux[0].datasource.machine, + "sequencer_type": record.hiseqtype, + "date": record.time, + "samples": [], } for sample_obj in self.flowcell_samples(record): - raw_samplename = sample_obj.samplename.split('_', 1)[0] - curated_samplename = raw_samplename.rstrip('AB') - sample_data = { - 'name': curated_samplename, - 'reads': 0, - 'fastqs': [], - } + raw_samplename = sample_obj.samplename.split("_", 1)[0] + curated_samplename = raw_samplename.rstrip("AB") + sample_data = {"name": curated_samplename, "reads": 0, "fastqs": []} for fc_data in self.sample_reads(sample_obj): - if fc_data.type == 'hiseqga' and fc_data.q30 >= 80: - sample_data['reads'] += fc_data.reads - elif fc_data.type == 'hiseqx' and fc_data.q30 >= 75: - sample_data['reads'] += fc_data.reads - elif fc_data.type == 'novaseq' and fc_data.q30 >= 75: - sample_data['reads'] += fc_data.reads + if fc_data.type == "hiseqga" and fc_data.q30 >= 80: + sample_data["reads"] += fc_data.reads + elif fc_data.type == "hiseqx" and fc_data.q30 >= 75: + sample_data["reads"] += fc_data.reads + elif fc_data.type == "novaseq" and fc_data.q30 >= 75: + sample_data["reads"] += fc_data.reads else: - LOG.warning(f"q30 too low for {curated_samplename} on {fc_data.name}:" - f"{fc_data.q30} < {80 if fc_data.type == 'hiseqga' else 75}%") + LOG.warning( + f"q30 too low for {curated_samplename} on {fc_data.name}:" + f"{fc_data.q30} < {80 if fc_data.type == 'hiseqga' else 75}%" + ) continue for fastq_path in self.fastqs(fc_data.name, sample_obj): if self.is_lane_pooled(flowcell_obj=record, lane=fc_data.lane): - if 'Undetermined' in str(fastq_path): + if "Undetermined" in str(fastq_path): continue - sample_data['fastqs'].append(str(fastq_path)) - data['samples'].append(sample_data) + sample_data["fastqs"].append(str(fastq_path)) + data["samples"].append(sample_data) return data def flowcell_samples(self, flowcell_obj: models.Flowcell) -> Iterator[models.Sample]: """Fetch all the samples from a flowcell.""" - return ( - self.Sample.query - .join(models.Sample.unaligned, models.Unaligned.demux) - .filter(models.Demux.flowcell == flowcell_obj) + return self.Sample.query.join(models.Sample.unaligned, models.Unaligned.demux).filter( + models.Demux.flowcell == flowcell_obj ) def is_lane_pooled(self, flowcell_obj: models.Flowcell, lane: str) -> bool: """Check whether a lane is pooled or not.""" query = ( - self.session.query( - sqa.func.count(models.Unaligned.sample_id).label('sample_count') - ) - .join( - models.Unaligned.demux, - ) + self.session.query(sqa.func.count(models.Unaligned.sample_id).label("sample_count")) + .join(models.Unaligned.demux) .filter(models.Demux.flowcell == flowcell_obj) .filter(models.Unaligned.lane == lane) ) @@ -90,16 +81,13 @@ def sample_reads(self, sample_obj: models.Sample) -> Iterator: """Calculate reads for a sample.""" query = ( self.session.query( - models.Flowcell.flowcellname.label('name'), - models.Flowcell.hiseqtype.label('type'), + models.Flowcell.flowcellname.label("name"), + models.Flowcell.hiseqtype.label("type"), models.Unaligned.lane, - sqa.func.sum(models.Unaligned.readcounts).label('reads'), - sqa.func.min(models.Unaligned.q30_bases_pct).label('q30'), - ) - .join( - models.Flowcell.demux, - models.Demux.unaligned + sqa.func.sum(models.Unaligned.readcounts).label("reads"), + sqa.func.min(models.Unaligned.q30_bases_pct).label("q30"), ) + .join(models.Flowcell.demux, models.Demux.unaligned) .filter(models.Unaligned.sample == sample_obj) .group_by(models.Flowcell.flowcellname) ) diff --git a/cg/apps/tb/__init__.py b/cg/apps/tb/__init__.py index 75ec54e208..755234ef65 100644 --- a/cg/apps/tb/__init__.py +++ b/cg/apps/tb/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from trailblazer.exc import MipStartError, ConfigError from .api import TrailblazerAPI diff --git a/cg/apps/tb/api.py b/cg/apps/tb/api.py index 4cc9f3f454..161d6718cf 100644 --- a/cg/apps/tb/api.py +++ b/cg/apps/tb/api.py @@ -21,12 +21,9 @@ class TrailblazerAPI(Store, AddHandler, fastq.FastqHandler): def __init__(self, config: dict): super(TrailblazerAPI, self).__init__( - config["trailblazer"]["database"], - families_dir=config["trailblazer"]["root"], - ) - self.mip_cli = MipCli( - config["trailblazer"]["script"], config["trailblazer"]["pipeline"] + config["trailblazer"]["database"], families_dir=config["trailblazer"]["root"] ) + self.mip_cli = MipCli(config["trailblazer"]["script"], config["trailblazer"]["pipeline"]) self.mip_config = config["trailblazer"]["mip_config"] def run( @@ -99,9 +96,7 @@ def delete_analysis( self.commit() @staticmethod - def get_trending( - mip_config_raw: str, qcmetrics_raw: str, sampleinfo_raw: dict - ) -> dict: + def get_trending(mip_config_raw: str, qcmetrics_raw: str, sampleinfo_raw: dict) -> dict: """Get trending data for a MIP analysis""" return trending.parse_mip_analysis( mip_config_raw=mip_config_raw, diff --git a/cg/apps/usalt/fastq.py b/cg/apps/usalt/fastq.py index 0f8a414718..1123e7ecd6 100644 --- a/cg/apps/usalt/fastq.py +++ b/cg/apps/usalt/fastq.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ This module handles concatenation of usalt fastq files. @@ -23,9 +22,7 @@ class FastqFileNameCreator(BaseFastqHandler.BaseFastqFileNameCreator): """Creates valid usalt filename from the parameters""" @staticmethod - def create( - lane: str, flowcell: str, sample: str, read: str, more: dict = None - ) -> str: + def create(lane: str, flowcell: str, sample: str, read: str, more: dict = None) -> str: """Name a FASTQ file following usalt conventions. Naming must be xxx_R_1.fastq.gz and xxx_R_2.fastq.gz""" diff --git a/cg/cli/__init__.py b/cg/cli/__init__.py index aab9aa6fe8..58aaa45d33 100644 --- a/cg/cli/__init__.py +++ b/cg/cli/__init__.py @@ -1,2 +1 @@ -# -*- coding: utf-8 -*- from .base import base diff --git a/cg/cli/add.py b/cg/cli/add.py index be486b9d08..f7a435a514 100644 --- a/cg/cli/add.py +++ b/cg/cli/add.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging import click @@ -12,82 +11,121 @@ @click.pass_context def add(context): """Add new things to the database.""" - context.obj['db'] = Store(context.obj['database']) + context.obj["db"] = Store(context.obj["database"]) @add.command() -@click.argument('internal_id') -@click.argument('name') -@click.option('-cg', '--customer-group', 'customer_group_id', required=False, - help='internal ID for the customer group of the customer, a new group will be ' - 'created if left out') -@click.option('-ia', '--invoice-address', 'invoice_address', required=True, - help='Street adress, Post code, City') -@click.option('-ir', '--invoice-reference', 'invoice_reference', required=True, - help='Invoice reference (text)') +@click.argument("internal_id") +@click.argument("name") +@click.option( + "-cg", + "--customer-group", + "customer_group_id", + required=False, + help="internal ID for the customer group of the customer, a new group will be " + "created if left out", +) +@click.option( + "-ia", + "--invoice-address", + "invoice_address", + required=True, + help="Street adress, Post code, City", +) +@click.option( + "-ir", + "--invoice-reference", + "invoice_reference", + required=True, + help="Invoice reference (text)", +) @click.pass_context -def customer(context, internal_id: str, name: str, customer_group_id: str, invoice_address: str, - invoice_reference: str): +def customer( + context, + internal_id: str, + name: str, + customer_group_id: str, + invoice_address: str, + invoice_reference: str, +): """Add a new customer with a unique INTERNAL_ID and NAME.""" - existing = context.obj['db'].customer(internal_id) + existing = context.obj["db"].customer(internal_id) if existing: LOG.error(f"{existing.name}: customer already added") context.abort() - customer_group = context.obj['db'].customer_group(customer_group_id) + customer_group = context.obj["db"].customer_group(customer_group_id) if not customer_group: - customer_group = context.obj['db'].add_customer_group(internal_id=internal_id, name=name) + customer_group = context.obj["db"].add_customer_group(internal_id=internal_id, name=name) - new_customer = context.obj['db'].add_customer(internal_id=internal_id, name=name, - customer_group=customer_group, - invoice_address=invoice_address, - invoice_reference=invoice_reference) - context.obj['db'].add_commit(new_customer) + new_customer = context.obj["db"].add_customer( + internal_id=internal_id, + name=name, + customer_group=customer_group, + invoice_address=invoice_address, + invoice_reference=invoice_reference, + ) + context.obj["db"].add_commit(new_customer) message = f"customer added: {new_customer.internal_id} ({new_customer.id})" LOG.info(message) @add.command() -@click.option('-a', '--admin', is_flag=True, help='make the user an admin') -@click.option('-c', '--customer', 'customer_id', required=True, - help='internal ID for the customer of the user') -@click.argument('email') -@click.argument('name') +@click.option("-a", "--admin", is_flag=True, help="make the user an admin") +@click.option( + "-c", + "--customer", + "customer_id", + required=True, + help="internal ID for the customer of the user", +) +@click.argument("email") +@click.argument("name") @click.pass_context def user(context, admin, customer_id, email, name): """Add a new user with an EMAIL (login) and a NAME (full).""" - customer_obj = context.obj['db'].customer(customer_id) - existing = context.obj['db'].user(email) + customer_obj = context.obj["db"].customer(customer_id) + existing = context.obj["db"].user(email) if existing: LOG.error(f"{existing.name}: user already added") context.abort() - new_user = context.obj['db'].add_user(customer_obj, email, name, is_admin=admin) - context.obj['db'].add_commit(new_user) + new_user = context.obj["db"].add_user(customer_obj, email, name, is_admin=admin) + context.obj["db"].add_commit(new_user) LOG.info(f"user added: {new_user.email} ({new_user.id})") @add.command() -@click.option('-l', '--lims', 'lims_id', help='LIMS id for the sample') -@click.option('-d', '--downsampled', type=int, help='how many reads is the sample downsampled to?') -@click.option('-o', '--order', help='name of the order the sample belongs to') -@click.option('-s', '--sex', type=click.Choice(['male', 'female', 'unknown']), required=True, - help='sample pedigree sex') -@click.option('-a', '--application', required=True, help='application tag name') -@click.option('-p', '--priority', type=click.Choice(PRIORITY_OPTIONS), default='standard', - help='set the priority for the samples') -@click.argument('customer_id') -@click.argument('name') +@click.option("-l", "--lims", "lims_id", help="LIMS id for the sample") +@click.option("-d", "--downsampled", type=int, help="how many reads is the sample downsampled to?") +@click.option("-o", "--order", help="name of the order the sample belongs to") +@click.option( + "-s", + "--sex", + type=click.Choice(["male", "female", "unknown"]), + required=True, + help="sample pedigree sex", +) +@click.option("-a", "--application", required=True, help="application tag name") +@click.option( + "-p", + "--priority", + type=click.Choice(PRIORITY_OPTIONS), + default="standard", + help="set the priority for the samples", +) +@click.argument("customer_id") +@click.argument("name") @click.pass_context def sample(context, lims_id, downsampled, sex, order, application, priority, customer_id, name): """Add a sample for CUSTOMER_ID with a NAME (display).""" - status = context.obj['db'] + status = context.obj["db"] customer_obj = status.customer(customer_id) if customer_obj is None: - LOG.error('customer not found') + LOG.error("customer not found") context.abort() application_obj = status.application(application) if application_obj is None: - LOG.error('application not found') + LOG.error("application not found") context.abort() new_record = status.add_sample( name=name, @@ -104,15 +142,16 @@ def sample(context, lims_id, downsampled, sex, order, application, priority, cus @add.command() -@click.option('--priority', type=click.Choice(PRIORITY_OPTIONS), default='standard', - help='analysis priority') -@click.option('-p', '--panel', 'panels', multiple=True, required=True, help='default gene panels') -@click.argument('customer_id') -@click.argument('name') +@click.option( + "--priority", type=click.Choice(PRIORITY_OPTIONS), default="standard", help="analysis priority" +) +@click.option("-p", "--panel", "panels", multiple=True, required=True, help="default gene panels") +@click.argument("customer_id") +@click.argument("name") @click.pass_context def family(context, priority, panels, customer_id, name): """Add a family to CUSTOMER_ID with a NAME.""" - status = context.obj['db'] + status = context.obj["db"] customer_obj = status.customer(customer_id) if customer_obj is None: LOG.error(f"{customer_id}: customer not found") @@ -124,52 +163,48 @@ def family(context, priority, panels, customer_id, name): LOG.error(f"{panel_id}: panel not found") context.abort() - new_family = status.add_family( - name=name, - panels=panels, - priority=priority, - ) + new_family = status.add_family(name=name, panels=panels, priority=priority) new_family.customer = customer_obj status.add_commit(new_family) LOG.info(f"{new_family.internal_id}: new family added") @add.command() -@click.option('-m', '--mother', help='sample ID for mother of sample') -@click.option('-f', '--father', help='sample ID for father of sample') -@click.option('-s', '--status', type=click.Choice(STATUS_OPTIONS), - required=True) -@click.argument('family_id') -@click.argument('sample_id') +@click.option("-m", "--mother", help="sample ID for mother of sample") +@click.option("-f", "--father", help="sample ID for father of sample") +@click.option("-s", "--status", type=click.Choice(STATUS_OPTIONS), required=True) +@click.argument("family_id") +@click.argument("sample_id") @click.pass_context def relationship(context, mother, father, status, family_id, sample_id): """Create a link between a FAMILY_ID and a SAMPLE_ID.""" - status_db = context.obj['db'] + status_db = context.obj["db"] mother_obj = None father_obj = None family_obj = status_db.family(family_id) if family_obj is None: - LOG.error('%s: family not found', family_id) + LOG.error("%s: family not found", family_id) context.abort() sample_obj = status_db.sample(sample_id) if sample_obj is None: - LOG.error('%s: sample not found', sample_id) + LOG.error("%s: sample not found", sample_id) context.abort() if mother: mother_obj = status_db.sample(mother) if mother_obj is None: - LOG.error('%s: mother not found', mother) + LOG.error("%s: mother not found", mother) context.abort() if father: father_obj = status_db.sample(father) if father_obj is None: - LOG.error('%s: father not found', father) + LOG.error("%s: father not found", father) context.abort() - new_record = status_db.relate_sample(family_obj, sample_obj, status, mother=mother_obj, - father=father_obj) + new_record = status_db.relate_sample( + family_obj, sample_obj, status, mother=mother_obj, father=father_obj + ) status_db.add_commit(new_record) LOG.info(f"related {family_obj.internal_id} to {sample_obj.internal_id}") diff --git a/cg/cli/clean.py b/cg/cli/clean.py index 84680ea46e..4e20792355 100644 --- a/cg/cli/clean.py +++ b/cg/cli/clean.py @@ -51,9 +51,7 @@ def beacon(context: click.Context, item_type, item_id): @clean.command() @click.option("-y", "--yes", is_flag=True, help="skip confirmation") -@click.option( - "-d", "--dry-run", is_flag=True, help="Shows cases and files that would be cleaned" -) +@click.option("-d", "--dry-run", is_flag=True, help="Shows cases and files that would be cleaned") @click.argument("case_id") @click.argument("sample_info", type=click.File("r")) @click.pass_context @@ -116,9 +114,7 @@ def scout(context, bundle, yes: bool = False, dry_run: bool = False): help="Clean alignment files with analysis dates oldar then given number of days", ) @click.option("-y", "--yes", is_flag=True, help="skip checks") -@click.option( - "-d", "--dry-run", is_flag=True, help="Shows cases and files that would be cleaned" -) +@click.option("-d", "--dry-run", is_flag=True, help="Shows cases and files that would be cleaned") @click.pass_context def scoutauto(context, days_old: int, yes: bool = False, dry_run: bool = False): """Automatically clean up solved and archived scout cases""" @@ -139,14 +135,10 @@ def scoutauto(context, days_old: int, yes: bool = False, dry_run: bool = False): @clean.command() @click.option("-y", "--yes", is_flag=True, help="skip confirmation") -@click.option( - "-d", "--dry-run", is_flag=True, help="Shows cases and files that would be cleaned" -) +@click.option("-d", "--dry-run", is_flag=True, help="Shows cases and files that would be cleaned") @click.argument("before_str") @click.pass_context -def mipauto( - context: click.Context, before_str: str, yes: bool = False, dry_run: bool = False -): +def mipauto(context: click.Context, before_str: str, yes: bool = False, dry_run: bool = False): """Automatically clean up "old" analyses.""" before = parse_date(before_str) old_analyses = context.obj["db"].analyses(before=before) @@ -172,11 +164,7 @@ def mipauto( LOG.info("%s: cleaning MIP output", case_id) with open(sampleinfo_path, "r") as sampleinfo_file: context.invoke( - mip, - yes=yes, - case_id=case_id, - sample_info=sampleinfo_file, - dry_run=dry_run, + mip, yes=yes, case_id=case_id, sample_info=sampleinfo_file, dry_run=dry_run ) except FileNotFoundError: LOG.error( diff --git a/cg/cli/status.py b/cg/cli/status.py index 9143d63c3d..04d2778292 100644 --- a/cg/cli/status.py +++ b/cg/cli/status.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - import click from cg.apps import tb from tabulate import tabulate @@ -8,11 +6,27 @@ from cg.store import Store from cg.constants import FAMILY_ACTIONS, PRIORITY_OPTIONS -STATUS_OPTIONS = ['pending', 'running', 'completed', 'failed', 'error'] -CASE_HEADERS_LONG = ['Case', 'Ordered', 'Received', 'Prepared', 'Sequenced', 'Flowcells', - 'Analysed', 'Uploaded', 'Delivered', 'Delivery Reported', 'Invoiced', 'TAT'] -ALWAYS_LONG_HEADERS = [CASE_HEADERS_LONG[0], CASE_HEADERS_LONG[1], - CASE_HEADERS_LONG[6], CASE_HEADERS_LONG[7]] +STATUS_OPTIONS = ["pending", "running", "completed", "failed", "error"] +CASE_HEADERS_LONG = [ + "Case", + "Ordered", + "Received", + "Prepared", + "Sequenced", + "Flowcells", + "Analysed", + "Uploaded", + "Delivered", + "Delivery Reported", + "Invoiced", + "TAT", +] +ALWAYS_LONG_HEADERS = [ + CASE_HEADERS_LONG[0], + CASE_HEADERS_LONG[1], + CASE_HEADERS_LONG[6], + CASE_HEADERS_LONG[7], +] CASE_HEADERS_MEDIUM = [] CASE_HEADERS_SHORT = [] @@ -33,16 +47,16 @@ @click.pass_context def status(context): """View status of things.""" - context.obj['db'] = Store(context.obj['database']) - if context.obj.get('trailblazer'): - context.obj['tb'] = tb.TrailblazerAPI(context.obj) + context.obj["db"] = Store(context.obj["database"]) + if context.obj.get("trailblazer"): + context.obj["tb"] = tb.TrailblazerAPI(context.obj) @status.command() @click.pass_context def analysis(context): """Which families will be analyzed?""" - records = context.obj['db'].cases_to_mip_analyze() + records = context.obj["db"].cases_to_mip_analyze() for family_obj in records: click.echo(family_obj) @@ -52,15 +66,17 @@ def present_bool(a_dict, param, show_false=False): value = a_dict.get(param) if show_false: - return ('-' if value is None else - '✓' if value is True else - '✗' if value is False else - str(value)) + return ( + "-" + if value is None + else "✓" + if value is True + else "✗" + if value is False + else str(value) + ) - return ('-' if value is None else - '✓' if value is True else - '' if value is False else - str(value)) + return "-" if value is None else "✓" if value is True else "" if value is False else str(value) def present_date(a_dict, param, show_negative, show_time): @@ -73,9 +89,7 @@ def present_date(a_dict, param, show_negative, show_time): if show_negative: return str(value) - return ('' if not value else - value if value else - str(value)) + return "" if not value else value if value else str(value) def present_string(a_dict, param, show_negative): @@ -85,69 +99,84 @@ def present_string(a_dict, param, show_negative): if show_negative: return str(value) - return ('' if not value else - value if value else - str(value)) + return "" if not value else value if value else str(value) @status.command() @click.pass_context -@click.option('-o', '--output', 'output_type', type=click.Choice(['bool', 'count', 'date', - 'datetime']), default='bool', - help='how to display status') -@click.option('--verbose', is_flag=True, help='show status information otherwise left out') -@click.option('--days', default=31, help='days to go back') -@click.option('--internal-id', help='search by internal id') -@click.option('--name', help='search by name given by customer') -@click.option('--case-action', type=click.Choice(FAMILY_ACTIONS), help='filter by case action') -@click.option('--progress-status', type=click.Choice(STATUS_OPTIONS), help='filter by progress ' - 'status') -@click.option('--priority', type=click.Choice(PRIORITY_OPTIONS), help='filter by priority') -@click.option('--data-analysis', help='filter on data_analysis') -@click.option('--sample-id', help='filter by sample id') -@click.option('-c', '--customer-id', help='filter by customer') -@click.option('-C', '--exclude-customer-id', help='exclude customer') -@click.option('-r', '--only-received', is_flag=True, help='only completely received cases') -@click.option('-R', '--exclude-received', is_flag=True, help='exclude completely received cases') -@click.option('-p', '--only-prepared', is_flag=True, help='only completely prepared cases') -@click.option('-P', '--exclude-prepared', is_flag=True, help='exclude completely prepared cases') -@click.option('-s', '--only-sequenced', is_flag=True, help='only completely sequenced cases') -@click.option('-S', '--exclude-sequenced', is_flag=True, help='exclude completely sequenced cases') -@click.option('-a', '--only-analysed', is_flag=True, help='only analysed cases') -@click.option('-A', '--exclude-analysed', is_flag=True, help='exclude analysed cases') -@click.option('-u', '--only-uploaded', is_flag=True, help='only uploaded cases') -@click.option('-U', '--exclude-uploaded', is_flag=True, help='exclude uploaded cases') -@click.option('-d', '--only-delivered', is_flag=True, help='only LIMS delivered cases') -@click.option('-D', '--exclude-delivered', is_flag=True, help='exclude LIMS delivered cases') -@click.option('--dr', '--only-delivery-reported', is_flag=True, help='only delivery reported cases') -@click.option('--DR', '--exclude-delivery-reported', is_flag=True, help='exclude delivery ' - 'reported cases') -@click.option('-i', '--only-invoiced', is_flag=True, help='only completely invoiced cases') -@click.option('-I', '--exclude-invoiced', is_flag=True, help='exclude completely invoiced cases') -def cases(context, output_type, verbose, days, internal_id, name, case_action, - progress_status, priority, - customer_id, data_analysis, sample_id, - only_received, - only_prepared, - only_sequenced, - only_analysed, - only_uploaded, - only_delivered, - only_delivery_reported, - only_invoiced, - exclude_customer_id, - exclude_received, - exclude_prepared, - exclude_sequenced, - exclude_analysed, - exclude_uploaded, - exclude_delivered, - exclude_delivery_reported, - exclude_invoiced, - ): +@click.option( + "-o", + "--output", + "output_type", + type=click.Choice(["bool", "count", "date", "datetime"]), + default="bool", + help="how to display status", +) +@click.option("--verbose", is_flag=True, help="show status information otherwise left out") +@click.option("--days", default=31, help="days to go back") +@click.option("--internal-id", help="search by internal id") +@click.option("--name", help="search by name given by customer") +@click.option("--case-action", type=click.Choice(FAMILY_ACTIONS), help="filter by case action") +@click.option( + "--progress-status", type=click.Choice(STATUS_OPTIONS), help="filter by progress " "status" +) +@click.option("--priority", type=click.Choice(PRIORITY_OPTIONS), help="filter by priority") +@click.option("--data-analysis", help="filter on data_analysis") +@click.option("--sample-id", help="filter by sample id") +@click.option("-c", "--customer-id", help="filter by customer") +@click.option("-C", "--exclude-customer-id", help="exclude customer") +@click.option("-r", "--only-received", is_flag=True, help="only completely received cases") +@click.option("-R", "--exclude-received", is_flag=True, help="exclude completely received cases") +@click.option("-p", "--only-prepared", is_flag=True, help="only completely prepared cases") +@click.option("-P", "--exclude-prepared", is_flag=True, help="exclude completely prepared cases") +@click.option("-s", "--only-sequenced", is_flag=True, help="only completely sequenced cases") +@click.option("-S", "--exclude-sequenced", is_flag=True, help="exclude completely sequenced cases") +@click.option("-a", "--only-analysed", is_flag=True, help="only analysed cases") +@click.option("-A", "--exclude-analysed", is_flag=True, help="exclude analysed cases") +@click.option("-u", "--only-uploaded", is_flag=True, help="only uploaded cases") +@click.option("-U", "--exclude-uploaded", is_flag=True, help="exclude uploaded cases") +@click.option("-d", "--only-delivered", is_flag=True, help="only LIMS delivered cases") +@click.option("-D", "--exclude-delivered", is_flag=True, help="exclude LIMS delivered cases") +@click.option("--dr", "--only-delivery-reported", is_flag=True, help="only delivery reported cases") +@click.option( + "--DR", "--exclude-delivery-reported", is_flag=True, help="exclude delivery " "reported cases" +) +@click.option("-i", "--only-invoiced", is_flag=True, help="only completely invoiced cases") +@click.option("-I", "--exclude-invoiced", is_flag=True, help="exclude completely invoiced cases") +def cases( + context, + output_type, + verbose, + days, + internal_id, + name, + case_action, + progress_status, + priority, + customer_id, + data_analysis, + sample_id, + only_received, + only_prepared, + only_sequenced, + only_analysed, + only_uploaded, + only_delivered, + only_delivery_reported, + only_invoiced, + exclude_customer_id, + exclude_received, + exclude_prepared, + exclude_sequenced, + exclude_analysed, + exclude_uploaded, + exclude_delivered, + exclude_delivery_reported, + exclude_invoiced, +): """progress of each case""" - records = context.obj['db'].cases( - progress_tracker=context.obj.get('tb'), + records = context.obj["db"].cases( + progress_tracker=context.obj.get("tb"), days=days, internal_id=internal_id, name=name, @@ -177,37 +206,43 @@ def cases(context, output_type, verbose, days, internal_id, name, case_action, ) case_rows = [] - if output_type == 'bool': + if output_type == "bool": case_header = CASE_HEADERS_SHORT - elif output_type == 'count': + elif output_type == "count": case_header = CASE_HEADERS_MEDIUM - elif output_type in ('date', 'datetime'): + elif output_type in ("date", "datetime"): case_header = CASE_HEADERS_LONG for case in records: - tat_number = case.get('tat') - max_tat = case.get('max_tat') + tat_number = case.get("tat") + max_tat = case.get("max_tat") - if case.get('samples_received_bool') and case.get('samples_delivered_bool') and \ - tat_number <= max_tat: - tat_color = 'green' + if ( + case.get("samples_received_bool") + and case.get("samples_delivered_bool") + and tat_number <= max_tat + ): + tat_color = "green" elif tat_number == max_tat: - tat_color = 'yellow' + tat_color = "yellow" elif tat_number > max_tat: - tat_color = 'red' + tat_color = "red" else: - tat_color = 'white' + tat_color = "white" - color_start = Color(u"{" + f"{tat_color}" + "}") - color_end = Color(u"{/" + f"{tat_color}" + "}") + color_start = Color("{" + f"{tat_color}" + "}") + color_end = Color("{/" + f"{tat_color}" + "}") - if not case.get('case_external_bool') and case.get('samples_received_bool') and case.get(\ - 'samples_delivered_bool'): + if ( + not case.get("case_external_bool") + and case.get("samples_received_bool") + and case.get("samples_delivered_bool") + ): tat = f"{tat_number}/{max_tat}" + color_end - elif case.get('case_external_bool') and case.get('analysis_uploaded_bool'): + elif case.get("case_external_bool") and case.get("analysis_uploaded_bool"): tat = f"{tat_number}/{max_tat}" + color_end else: tat = f"({tat_number})/{max_tat}" + color_end @@ -218,77 +253,97 @@ def cases(context, output_type, verbose, days, internal_id, name, case_action, if data_analysis: title = f"{title} {case.get('samples_data_analyses')}" - show_time = output_type == 'datetime' + show_time = output_type == "datetime" - ordered = present_date(case, 'ordered_at', verbose, show_time) + ordered = present_date(case, "ordered_at", verbose, show_time) - if output_type == 'bool': - received = present_bool(case, 'samples_received_bool', verbose) - prepared = present_bool(case, 'samples_prepared_bool', verbose) - sequenced = present_bool(case, 'samples_sequenced_bool', verbose) - flowcell = present_bool(case, 'flowcells_on_disk_bool', verbose) - analysed_bool = present_bool(case, 'analysis_completed_bool', verbose) + if output_type == "bool": + received = present_bool(case, "samples_received_bool", verbose) + prepared = present_bool(case, "samples_prepared_bool", verbose) + sequenced = present_bool(case, "samples_sequenced_bool", verbose) + flowcell = present_bool(case, "flowcells_on_disk_bool", verbose) + analysed_bool = present_bool(case, "analysis_completed_bool", verbose) - if case.get('analysis_completed_at'): + if case.get("analysis_completed_at"): analysed = f"{analysed_bool}" - elif case.get('analysis_status'): - analysed = f"{analysed_bool}{present_string(case, 'analysis_status',verbose)}" \ - f" {case.get('analysis_completion')}%" + elif case.get("analysis_status"): + analysed = ( + f"{analysed_bool}{present_string(case, 'analysis_status',verbose)}" + f" {case.get('analysis_completion')}%" + ) else: analysed = f"{analysed_bool}{present_string(case, 'case_action', verbose)}" - uploaded = present_bool(case, 'analysis_uploaded_bool', verbose) - delivered = present_bool(case, 'samples_delivered_bool', verbose) - delivery_reported = present_bool(case, 'analysis_delivery_reported_bool', verbose) - invoiced = present_bool(case, 'samples_invoiced_bool', verbose) + uploaded = present_bool(case, "analysis_uploaded_bool", verbose) + delivered = present_bool(case, "samples_delivered_bool", verbose) + delivery_reported = present_bool(case, "analysis_delivery_reported_bool", verbose) + invoiced = present_bool(case, "samples_invoiced_bool", verbose) - elif output_type == 'count': + elif output_type == "count": received = f"{case.get('samples_received')}/{case.get('samples_to_receive')}" prepared = f"{case.get('samples_prepared')}/{case.get('samples_to_prepare')}" sequenced = f"{case.get('samples_sequenced')}/{case.get('samples_to_sequence')}" flowcell = f"{case.get('flowcells_on_disk')}/{case.get('total_samples')}" - if case.get('analysis_completed_at'): + if case.get("analysis_completed_at"): analysed = f"{present_date(case, 'analysis_completed_at', verbose, show_time)}" - elif case.get('analysis_status'): - analysed = f"{present_string(case, 'analysis_status', verbose)}" \ - f" {case.get('analysis_completion')}%" + elif case.get("analysis_status"): + analysed = ( + f"{present_string(case, 'analysis_status', verbose)}" + f" {case.get('analysis_completion')}%" + ) else: analysed = f"{present_string(case, 'case_action', verbose)}" - uploaded = present_date(case, 'analysis_uploaded_at', verbose, show_time) + uploaded = present_date(case, "analysis_uploaded_at", verbose, show_time) delivered = f"{case.get('samples_delivered')}/{case.get('samples_to_deliver')}" - delivery_reported = present_date(case, 'analysis_delivery_reported_at', verbose, - show_time) + delivery_reported = present_date( + case, "analysis_delivery_reported_at", verbose, show_time + ) invoiced = f"{case.get('samples_invoiced')}/{case.get('samples_to_invoice')}" - elif output_type in ('date', 'datetime'): - received = present_date(case, 'samples_received_at', verbose, show_time) - prepared = present_date(case, 'samples_prepared_at', verbose, show_time) - sequenced = present_date(case, 'samples_sequenced_at', verbose, show_time) - flowcell = present_string(case, 'flowcells_status', verbose) + elif output_type in ("date", "datetime"): + received = present_date(case, "samples_received_at", verbose, show_time) + prepared = present_date(case, "samples_prepared_at", verbose, show_time) + sequenced = present_date(case, "samples_sequenced_at", verbose, show_time) + flowcell = present_string(case, "flowcells_status", verbose) - if case.get('analysis_completed_at'): + if case.get("analysis_completed_at"): analysed = f"{present_date(case, 'analysis_completed_at', verbose, show_time)}" - elif case.get('analysis_status'): - analysed = f"{present_string(case, 'analysis_status', verbose)}" \ - f" {case.get('analysis_completion')}%" + elif case.get("analysis_status"): + analysed = ( + f"{present_string(case, 'analysis_status', verbose)}" + f" {case.get('analysis_completion')}%" + ) else: analysed = f"{present_string(case, 'case_action', verbose)}" - uploaded = present_date(case, 'analysis_uploaded_at', verbose, show_time) - delivered = present_date(case, 'samples_delivered_at', verbose, show_time) - delivery_reported = present_date(case, 'analysis_delivery_reported_at', verbose, - show_time) - invoiced = present_date(case, 'samples_invoiced_at', verbose, show_time) - - case_row = [title, ordered, received, prepared, sequenced, flowcell, analysed, uploaded, - delivered, delivery_reported, invoiced, tat] + uploaded = present_date(case, "analysis_uploaded_at", verbose, show_time) + delivered = present_date(case, "samples_delivered_at", verbose, show_time) + delivery_reported = present_date( + case, "analysis_delivery_reported_at", verbose, show_time + ) + invoiced = present_date(case, "samples_invoiced_at", verbose, show_time) + + case_row = [ + title, + ordered, + received, + prepared, + sequenced, + flowcell, + analysed, + uploaded, + delivered, + delivery_reported, + invoiced, + tat, + ] case_rows.append(case_row) - click.echo(tabulate(case_rows, headers=case_header, tablefmt='psql')) + click.echo(tabulate(case_rows, headers=case_header, tablefmt="psql")) - header_description = '' + header_description = "" for i, _ in enumerate(case_header): if case_header[i] != CASE_HEADERS_LONG[i]: header_description = f"{header_description} {case_header[i]}={CASE_HEADERS_LONG[i]}" @@ -296,42 +351,42 @@ def cases(context, output_type, verbose, days, internal_id, name, case_action, @status.command() -@click.option('-s', '--skip', default=0, help='skip initial records') +@click.option("-s", "--skip", default=0, help="skip initial records") @click.pass_context def samples(context, skip): """View status of samples.""" - records = context.obj['db'].samples().offset(skip).limit(30) + records = context.obj["db"].samples().offset(skip).limit(30) for record in records: message = f"{record.internal_id} ({record.customer.internal_id})" if record.sequenced_at: - color = 'green' + color = "green" message += f" [SEQUENCED: {record.sequenced_at.date()}]" elif record.received_at and record.reads: - color = 'orange' + color = "orange" message += f" [READS: {record.reads}]" elif record.received_at: - color = 'blue' + color = "blue" message += f" [RECEIVED: {record.received_at.date()}]" else: - color = 'white' - message += ' [NOT RECEIVED]' + color = "white" + message += " [NOT RECEIVED]" click.echo(click.style(message, fg=color)) @status.command() -@click.option('-s', '--skip', default=0, help='skip initial records') +@click.option("-s", "--skip", default=0, help="skip initial records") @click.pass_context def families(context, skip): """View status of families.""" - click.echo('red: prio > 1, blue: prio = 1, green: completed, yellow: action') - records = context.obj['db'].families().offset(skip).limit(30) + click.echo("red: prio > 1, blue: prio = 1, green: completed, yellow: action") + records = context.obj["db"].families().offset(skip).limit(30) for family_obj in records: - color = 'red' if family_obj.priority > 1 else 'blue' + color = "red" if family_obj.priority > 1 else "blue" message = f"{family_obj.internal_id} ({family_obj.priority})" if family_obj.analyses: message += f" {family_obj.analyses[0].completed_at.date()}" - color = 'green' + color = "green" if family_obj.action: message += f" [{family_obj.action.upper()}]" - color = 'yellow' + color = "yellow" click.echo(click.style(message, fg=color)) diff --git a/cg/cli/transfer.py b/cg/cli/transfer.py index af6fccbb2f..674284a24d 100644 --- a/cg/cli/transfer.py +++ b/cg/cli/transfer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging import click @@ -14,38 +13,39 @@ @click.pass_context def transfer(context): """Transfer results to the status interface.""" - context.obj['db'] = Store(context.obj['database']) + context.obj["db"] = Store(context.obj["database"]) @transfer.command() -@click.argument('flowcell_name') +@click.argument("flowcell_name") @click.pass_context def flowcell(context, flowcell_name): """Populate results from a flowcell.""" stats_api = stats.StatsAPI(context.obj) hk_api = hk.HousekeeperAPI(context.obj) - transfer_api = transfer_app.TransferFlowcell(context.obj['db'], stats_api, hk_api) + transfer_api = transfer_app.TransferFlowcell(context.obj["db"], stats_api, hk_api) new_record = transfer_api.transfer(flowcell_name) - context.obj['db'].add_commit(new_record) - click.echo(click.style(f"flowcell added: {new_record}", fg='green')) + context.obj["db"].add_commit(new_record) + click.echo(click.style(f"flowcell added: {new_record}", fg="green")) @transfer.command() -@click.option('-s', '--status', type=click.Choice(['received', 'prepared', 'delivered']), - default='received') -@click.option('-i', '--include', type=click.Choice(['unset', 'not-invoiced', 'all']), - default='unset') +@click.option( + "-s", "--status", type=click.Choice(["received", "prepared", "delivered"]), default="received" +) +@click.option( + "-i", "--include", type=click.Choice(["unset", "not-invoiced", "all"]), default="unset" +) @click.pass_context def lims(context, status, include): """Check if samples have been updated in LIMS.""" lims_api = lims_app.LimsAPI(context.obj) - transfer_api = transfer_app.TransferLims(context.obj['db'], lims_api) + transfer_api = transfer_app.TransferLims(context.obj["db"], lims_api) transfer_api.transfer_samples(transfer_app.SampleState[status.upper()], include) @transfer.command() -@click.option('-s', '--status', type=click.Choice(['received', 'delivered']), - default='delivered') +@click.option("-s", "--status", type=click.Choice(["received", "delivered"]), default="delivered") @click.pass_context def pools(context, status): """ @@ -53,13 +53,17 @@ def pools(context, status): option is provided. """ lims_api = lims_app.LimsAPI(context.obj) - transfer_api = transfer_app.TransferLims(context.obj['db'], lims_api) + transfer_api = transfer_app.TransferLims(context.obj["db"], lims_api) transfer_api.transfer_pools(transfer_app.PoolState[status.upper()]) @transfer.command() -@click.option('-s', '--status', type=click.Choice(['received', 'prepared', 'sequenced', - 'delivered']), default='delivered') +@click.option( + "-s", + "--status", + type=click.Choice(["received", "prepared", "sequenced", "delivered"]), + default="delivered", +) @click.pass_context def microbials(context, status): """ @@ -67,5 +71,5 @@ def microbials(context, status): from LIMS. Defaults to delivered if no option is provided. """ lims_api = lims_app.LimsAPI(context.obj) - transfer_api = transfer_app.TransferLims(context.obj['db'], lims_api) + transfer_api = transfer_app.TransferLims(context.obj["db"], lims_api) transfer_api.transfer_microbial_samples(transfer_app.MicrobialState[status.upper()]) diff --git a/cg/cli/workflow/balsamic/base.py b/cg/cli/workflow/balsamic/base.py index 289adeb02b..39ece5c387 100644 --- a/cg/cli/workflow/balsamic/base.py +++ b/cg/cli/workflow/balsamic/base.py @@ -11,11 +11,7 @@ from cg.apps import hk, scoutapi, lims, tb from cg.apps.balsamic.fastq import FastqHandler from cg.cli.workflow.balsamic.store import store as store_cmd -from cg.cli.workflow.balsamic.deliver import ( - deliver as deliver_cmd, - CASE_TAGS, - SAMPLE_TAGS, -) +from cg.cli.workflow.balsamic.deliver import deliver as deliver_cmd, CASE_TAGS, SAMPLE_TAGS from cg.cli.workflow.get_links import get_links from cg.exc import LimsDataError, BalsamicStartError, CgError from cg.meta.deliver import DeliverAPI @@ -23,9 +19,7 @@ from cg.store import Store LOG = logging.getLogger(__name__) -PRIORITY_OPTION = click.option( - "-p", "--priority", type=click.Choice(["low", "normal", "high"]) -) +PRIORITY_OPTION = click.option("-p", "--priority", type=click.Choice(["low", "normal", "high"])) EMAIL_OPTION = click.option("-e", "--email", help="email to send errors to") SUCCESS = 0 FAIL = 1 @@ -34,9 +28,7 @@ @click.group(invoke_without_command=True) @PRIORITY_OPTION @EMAIL_OPTION -@click.option( - "-c", "--case-id", "case_id", help="case to prepare and start an analysis for" -) +@click.option("-c", "--case-id", "case_id", help="case to prepare and start an analysis for") @click.option("--target-bed", required=False, help="Optional") @click.pass_context def balsamic(context, case_id, priority, email, target_bed): @@ -73,9 +65,7 @@ def balsamic(context, case_id, priority, email, target_bed): # execute the analysis! context.invoke(link, case_id=case_id) context.invoke(config_case, case_id=case_id, target_bed=target_bed) - context.invoke( - run, run_analysis=True, case_id=case_id, priority=priority, email=email - ) + context.invoke(run, run_analysis=True, case_id=case_id, priority=priority, email=email) @balsamic.command() @@ -89,18 +79,10 @@ def link(context, case_id, sample_id): for link_obj in link_objs: LOG.info( - "%s: %s link FASTQ files", - link_obj.sample.internal_id, - link_obj.sample.data_analysis, + "%s: %s link FASTQ files", link_obj.sample.internal_id, link_obj.sample.data_analysis ) - if ( - link_obj.sample.data_analysis - and "balsamic" in link_obj.sample.data_analysis.lower() - ): - LOG.info( - "%s has balsamic as data analysis, linking.", - link_obj.sample.internal_id, - ) + if link_obj.sample.data_analysis and "balsamic" in link_obj.sample.data_analysis.lower(): + LOG.info("%s has balsamic as data analysis, linking.", link_obj.sample.internal_id) context.obj["analysis_api"].link_sample( fastq_handler=FastqHandler(context.obj), case=link_obj.family.internal_id, @@ -108,8 +90,7 @@ def link(context, case_id, sample_id): ) else: LOG.warning( - "%s does not have blasamic as data analysis, skipping.", - link_obj.sample.internal_id, + "%s does not have blasamic as data analysis, skipping.", link_obj.sample.internal_id ) @@ -153,9 +134,7 @@ def config_case( link_obj.sample.internal_id, link_obj.sample.application_version.application.prep_category, ) - application_types.add( - link_obj.sample.application_version.application.prep_category - ) + application_types.add(link_obj.sample.application_version.application.prep_category) LOG.info("%s: config FASTQ file", link_obj.sample.internal_id) @@ -186,9 +165,7 @@ def config_case( for fastq_data in sorted_files: original_fastq_path = Path(fastq_data["path"]) - linked_fastq_name = context.obj[ - "fastq_handler" - ].FastqFileNameCreator.create( + linked_fastq_name = context.obj["fastq_handler"].FastqFileNameCreator.create( lane=fastq_data["lane"], flowcell=fastq_data["flowcell"], sample=link_obj.sample.internal_id, @@ -200,9 +177,7 @@ def config_case( ].FastqFileNameCreator.get_concatenated_name(linked_fastq_name) linked_fastq_path = wrk_dir / linked_fastq_name linked_reads_paths[fastq_data["read"]].append(linked_fastq_path) - concatenated_paths[ - fastq_data["read"] - ] = f"{wrk_dir}/{concatenated_fastq_name}" + concatenated_paths[fastq_data["read"]] = f"{wrk_dir}/{concatenated_fastq_name}" if linked_fastq_path.exists(): LOG.info("found: %s -> %s", original_fastq_path, linked_fastq_path) @@ -215,36 +190,27 @@ def config_case( normal_paths.add(concatenated_paths[1]) if not target_bed: - target_bed_shortname = context.obj["lims_api"].capture_kit( - link_obj.sample.internal_id - ) + target_bed_shortname = context.obj["lims_api"].capture_kit(link_obj.sample.internal_id) if target_bed_shortname: bed_version_obj = context.obj["db"].bed_version(target_bed_shortname) if not bed_version_obj: - raise CgError( - "Bed-version %s does not exist" % target_bed_shortname - ) + raise CgError("Bed-version %s does not exist" % target_bed_shortname) target_beds.add(bed_version_obj.filename) if len(application_types) != 1: raise BalsamicStartError( - "More than one application found for this case: %s" - % ", ".join(application_types) + "More than one application found for this case: %s" % ", ".join(application_types) ) if not application_types.issubset(acceptable_applications): - raise BalsamicStartError( - "Improper application for this case: %s" % application_types - ) + raise BalsamicStartError("Improper application for this case: %s" % application_types) nr_paths = len(tumor_paths) if tumor_paths else 0 if nr_paths != 1: - raise BalsamicStartError( - "Must have exactly one tumor sample! Found %s samples." % nr_paths - ) + raise BalsamicStartError("Must have exactly one tumor sample! Found %s samples." % nr_paths) tumor_path = tumor_paths.pop() @@ -265,9 +231,7 @@ def config_case( if len(target_beds) == 1: target_bed = Path(context.obj["bed_path"]) / target_beds.pop() elif len(target_beds) > 1: - raise BalsamicStartError( - "To many target beds specified: %s" % ", ".join(target_beds) - ) + raise BalsamicStartError("To many target beds specified: %s" % ", ".join(target_beds)) else: raise BalsamicStartError("No target bed specified!") @@ -308,12 +272,7 @@ def config_case( @balsamic.command() @click.option("-d", "--dry-run", "dry", is_flag=True, help="print command to console") @click.option( - "-r", - "--run-analysis", - "run_analysis", - is_flag=True, - default=False, - help="start " "analysis", + "-r", "--run-analysis", "run_analysis", is_flag=True, default=False, help="start " "analysis" ) @click.option("--config", "config_path", required=False, help="Optional") @PRIORITY_OPTION @@ -355,11 +314,7 @@ def run(context, dry, run_analysis, config_path, priority, email, case_id): @balsamic.command() @click.option( - "-d", - "--dry-run", - "dry_run", - is_flag=True, - help="print to console, " "without actualising", + "-d", "--dry-run", "dry_run", is_flag=True, help="print to console without actualising" ) @click.pass_context def start(context: click.Context, dry_run): @@ -369,11 +324,7 @@ def start(context: click.Context, dry_run): LOG.info("%s: start analysis", case_obj.internal_id) - priority = ( - "high" - if case_obj.high_priority - else ("low" if case_obj.low_priority else "normal") - ) + priority = get_priority_as_text(case_obj) if dry_run: continue @@ -387,6 +338,18 @@ def start(context: click.Context, dry_run): sys.exit(exit_code) +def get_priority_as_text(case_obj): + """Get priority as text for a case""" + + if case_obj.high_priority: + return "high" + + if case_obj.low_priority: + return "low" + + return "normal" + + @balsamic.command("remove-fastq") @click.option("-c", "--case", "case_id", help="remove fastq folder for a case") @click.pass_context diff --git a/cg/cli/workflow/balsamic/store.py b/cg/cli/workflow/balsamic/store.py index 94561fc14f..f65417529d 100644 --- a/cg/cli/workflow/balsamic/store.py +++ b/cg/cli/workflow/balsamic/store.py @@ -1,17 +1,22 @@ -""" CLI for storing information and data """ -import datetime as dt +"""Click commands to store balsamic analyses""" + import logging +import os +import subprocess from pathlib import Path -import sys + import click +from housekeeper.exc import VersionIncludedError from cg.apps import hk, tb -from cg.exc import AnalysisNotFinishedError, AnalysisDuplicationError +from cg.meta.store.balsamic import gather_files_and_bundle_in_housekeeper from cg.store import Store +from cg.exc import AnalysisNotFinishedError, AnalysisDuplicationError + LOG = logging.getLogger(__name__) -FAIL = 1 SUCCESS = 0 +FAIL = 1 @click.group() @@ -24,123 +29,82 @@ def store(context): @store.command() -@click.argument("config-stream", type=click.File("r"), required=False) +@click.argument("case_id") +@click.option("--deliverables-file", "deliverables_file_path", required=False, help="Optional") @click.pass_context -def analysis(context, config_stream): +def analysis(context, case_id, deliverables_file_path): """Store a finished analysis in Housekeeper.""" + status = context.obj["db"] - tb_api = context.obj["tb_api"] - hk_api = context.obj["hk_api"] + case_obj = status.family(case_id) - if not config_stream: - LOG.error("provide a config, suggestions:") - for analysis_obj in tb_api.analyses(status="completed", deleted=False)[:25]: - click.echo(analysis_obj.config_path) + if not case_obj: + click.echo(click.style(f"Case {case_id} not found", fg="red")) context.abort() - new_analysis = _gather_files_and_bundle_in_housekeeper( - config_stream, context, hk_api, status, tb_api - ) - - status.add_commit(new_analysis) - click.echo(click.style("included files in Housekeeper", fg="green")) - + if not deliverables_file_path: + root_dir = Path(context.obj["balsamic"]["root"]) + deliverables_file_path = Path.joinpath( + root_dir, case_id, "analysis/delivery_report", case_id + ".hk" + ) + if not os.path.isfile(deliverables_file_path): + context.invoke(generate_deliverables_file, case_id=case_id) -def _gather_files_and_bundle_in_housekeeper( - config_stream, context, hk_api, status, tb_api -): - """Function to gather files and bundle in housekeeper""" + hk_api = context.obj["hk_api"] try: - bundle_data = tb_api.add_analysis(config_stream) + new_analysis = gather_files_and_bundle_in_housekeeper( + deliverables_file_path, hk_api, status, case_obj + ) except AnalysisNotFinishedError as error: click.echo(click.style(error.message, fg="red")) context.abort() - - try: - results = hk_api.add_bundle(bundle_data) - if results is None: - print(click.style("analysis version already added", fg="yellow")) - context.abort() - bundle_obj, version_obj = results except FileNotFoundError as error: - click.echo(click.style(f"missing file: {error.args[0]}", fg="red")) + click.echo(click.style(f"missing file: {error.filename}", fg="red")) context.abort() - - family_obj = _add_new_analysis_to_the_status_api(bundle_obj, status) - _reset_action_from_running_on_family(family_obj) - new_analysis = _add_new_complete_analysis_record( - bundle_data, family_obj, status, version_obj - ) - version_date = version_obj.created_at.date() - click.echo(f"new bundle added: {bundle_obj.name}, version {version_date}") - _include_files_in_housekeeper(bundle_obj, context, hk_api, version_obj) - - return new_analysis - - -def _include_files_in_housekeeper(bundle_obj, context, hk_api, version_obj): - """Function to include files in housekeeper""" - try: - hk_api.include(version_obj) - except hk.VersionIncludedError as error: + except AnalysisDuplicationError: + click.echo(click.style("analysis version already added", fg="yellow")) + context.abort() + except VersionIncludedError as error: click.echo(click.style(error.message, fg="red")) context.abort() - hk_api.add_commit(bundle_obj, version_obj) + + status.add_commit(new_analysis) + click.echo(click.style("included files in Housekeeper", fg="green")) -def _add_new_complete_analysis_record(bundle_data, family_obj, status, version_obj): - """Function to create and return a new analysis database record""" - pipeline = family_obj.links[0].sample.data_analysis - pipeline = pipeline if pipeline else "balsamic" +@store.command("generate-deliverables-file") +@click.option("-d", "--dry-run", "dry", is_flag=True, help="print command to console") +@click.option("--config", "config_path", required=False, help="Optional") +@click.argument("case_id") +@click.pass_context +def generate_deliverables_file(context, dry, config_path, case_id): + """Generate a deliverables file for the case_id.""" - if status.analysis(family=family_obj, started_at=version_obj.created_at): - raise AnalysisDuplicationError( - f"Analysis object already exists for {family_obj.internal_id}{version_obj.created_at}" - ) + conda_env = context.obj["balsamic"]["conda_env"] + root_dir = Path(context.obj["balsamic"]["root"]) - new_analysis = status.add_analysis( - pipeline=pipeline, - version=bundle_data["pipeline_version"], - started_at=version_obj.created_at, - completed_at=dt.datetime.now(), - primary=(len(family_obj.analyses) == 0), - ) - new_analysis.family = family_obj - return new_analysis + case_obj = context.obj["db"].family(case_id) + if not case_obj: + click.echo(click.style(f"Case {case_id} not found", fg="yellow")) -def _reset_action_from_running_on_family(family_obj): - family_obj.action = None + if not config_path: + config_path = Path.joinpath(root_dir, case_id, case_id + ".json") + # Call Balsamic + command_str = f" plugins deliver" f" --sample-config {config_path}'" -def _add_new_analysis_to_the_status_api(bundle_obj, status): - family_obj = status.family(bundle_obj.name) - return family_obj + command = [f"bash -c 'source activate {conda_env}; balsamic"] + command.extend(command_str.split(" ")) + if dry: + click.echo(" ".join(command)) + return SUCCESS -@store.command() -@click.pass_context -def completed(context): - """Store all completed analyses.""" - hk_api = context.obj["hk_api"] + process = subprocess.run(" ".join(command), shell=True) + + if process == SUCCESS: + click.echo(click.style("created deliverables file", fg="green")) - exit_code = SUCCESS - for analysis_obj in context.obj["tb_api"].analyses( - status="completed", deleted=False - ): - existing_record = hk_api.version(analysis_obj.family, analysis_obj.started_at) - if existing_record: - LOG.debug( - "analysis stored: %s - %s", analysis_obj.family, analysis_obj.started_at - ) - continue - click.echo(click.style(f"storing family: {analysis_obj.family}", fg="blue")) - with Path(analysis_obj.config_path).open() as config_stream: - try: - context.invoke(analysis, config_stream=config_stream) - except Exception: - LOG.error("case storage failed: %s", analysis_obj.family, exc_info=True) - exit_code = FAIL - - sys.exit(exit_code) + return process diff --git a/cg/cli/workflow/microsalt/store.py b/cg/cli/workflow/microsalt/store.py index 849d059717..e46c31a78e 100644 --- a/cg/cli/workflow/microsalt/store.py +++ b/cg/cli/workflow/microsalt/store.py @@ -32,9 +32,7 @@ def analysis(context, config_stream): if not config_stream: LOG.error("provide a config, suggestions:") - for analysis_obj in context.obj["tb_api"].analyses( - status="completed", deleted=False - )[:25]: + for analysis_obj in context.obj["tb_api"].analyses(status="completed", deleted=False)[:25]: click.echo(analysis_obj.config_path) context.abort() @@ -46,9 +44,7 @@ def analysis(context, config_stream): click.echo(click.style("included files in Housekeeper", fg="green")) -def _gather_files_and_bundle_in_housekeeper( - config_stream, context, hk_api, status, tb_api -): +def _gather_files_and_bundle_in_housekeeper(config_stream, context, hk_api, status, tb_api): """Function to gather files and bundle in housekeeper""" try: bundle_data = tb_api.add_analysis(config_stream) @@ -59,7 +55,7 @@ def _gather_files_and_bundle_in_housekeeper( try: results = hk_api.add_bundle(bundle_data) if results is None: - print(click.style("analysis version already added", fg="yellow")) + click.echo(click.style("analysis version already added", fg="yellow")) context.abort() bundle_obj, version_obj = results except FileNotFoundError as error: @@ -68,9 +64,7 @@ def _gather_files_and_bundle_in_housekeeper( family_obj = _add_new_analysis_to_the_status_api(bundle_obj, status) _reset_action_from_running_on_family(family_obj) - new_analysis = _add_new_complete_analysis_record( - bundle_data, family_obj, status, version_obj - ) + new_analysis = _add_new_complete_analysis_record(bundle_data, family_obj, status, version_obj) version_date = version_obj.created_at.date() click.echo(f"new bundle added: {bundle_obj.name}, version {version_date}") _include_files_in_housekeeper(bundle_obj, context, hk_api, version_obj) @@ -123,14 +117,10 @@ def _add_new_analysis_to_the_status_api(bundle_obj, status): def completed(context): """Store all completed analyses.""" hk_api = context.obj["hk_api"] - for analysis_obj in context.obj["tb_api"].analyses( - status="completed", deleted=False - ): + for analysis_obj in context.obj["tb_api"].analyses(status="completed", deleted=False): existing_record = hk_api.version(analysis_obj.family, analysis_obj.started_at) if existing_record: - LOG.debug( - "analysis stored: %s - %s", analysis_obj.family, analysis_obj.started_at - ) + LOG.debug("analysis stored: %s - %s", analysis_obj.family, analysis_obj.started_at) continue click.echo(click.style(f"storing family: {analysis_obj.family}", fg="blue")) with Path(analysis_obj.config_path).open() as config_stream: diff --git a/cg/cli/workflow/mip_dna/store.py b/cg/cli/workflow/mip_dna/store.py index 795bee625b..2d9830369d 100644 --- a/cg/cli/workflow/mip_dna/store.py +++ b/cg/cli/workflow/mip_dna/store.py @@ -34,9 +34,7 @@ def analysis(context, config_stream): if not config_stream: LOG.error("provide a config, suggestions:") - for analysis_obj in context.obj["tb_api"].analyses( - status="completed", deleted=False - )[:25]: + for analysis_obj in context.obj["tb_api"].analyses(status="completed", deleted=False)[:25]: click.echo(analysis_obj.config_path) context.abort() @@ -47,9 +45,7 @@ def analysis(context, config_stream): click.echo(click.style("included files in Housekeeper", fg="green")) -def _gather_files_and_bundle_in_housekeeper( - config_stream, context, hk_api, status, tb_api -): +def _gather_files_and_bundle_in_housekeeper(config_stream, context, hk_api, status, tb_api): """Function to gather files and bundle in housekeeper""" try: bundle_data = tb_api.add_analysis(config_stream) @@ -59,7 +55,7 @@ def _gather_files_and_bundle_in_housekeeper( try: results = hk_api.add_bundle(bundle_data) if results is None: - print(click.style("analysis version already added", fg="yellow")) + click.echo(click.style("analysis version already added", fg="yellow")) context.abort() bundle_obj, version_obj = results except FileNotFoundError as error: @@ -68,9 +64,7 @@ def _gather_files_and_bundle_in_housekeeper( family_obj = _add_new_analysis_to_the_status_api(bundle_obj, status) _reset_action_from_running_on_family(family_obj) - new_analysis = _add_new_complete_analysis_record( - bundle_data, family_obj, status, version_obj - ) + new_analysis = _add_new_complete_analysis_record(bundle_data, family_obj, status, version_obj) version_date = version_obj.created_at.date() click.echo(f"new bundle added: {bundle_obj.name}, version {version_date}") _include_files_in_housekeeper(bundle_obj, context, hk_api, version_obj) @@ -91,7 +85,7 @@ def _include_files_in_housekeeper(bundle_obj, context, hk_api, version_obj): def _add_new_complete_analysis_record(bundle_data, family_obj, status, version_obj): """Function to create and return a new analysis database record""" pipeline = family_obj.links[0].sample.data_analysis - pipeline = pipeline if pipeline else "mip-rna" + pipeline = pipeline if pipeline else "mip" if status.analysis(family=family_obj, started_at=version_obj.created_at): raise AnalysisDuplicationError( @@ -125,14 +119,10 @@ def completed(context): hk_api = context.obj["hk_api"] exit_code = SUCCESS - for analysis_obj in context.obj["tb_api"].analyses( - status="completed", deleted=False - ): + for analysis_obj in context.obj["tb_api"].analyses(status="completed", deleted=False): existing_record = hk_api.version(analysis_obj.family, analysis_obj.started_at) if existing_record: - LOG.debug( - "analysis stored: %s - %s", analysis_obj.family, analysis_obj.started_at - ) + LOG.debug("analysis stored: %s - %s", analysis_obj.family, analysis_obj.started_at) continue click.echo(click.style(f"storing family: {analysis_obj.family}", fg="blue")) with Path(analysis_obj.config_path).open() as config_stream: diff --git a/cg/cli/workflow/mip_rna/store.py b/cg/cli/workflow/mip_rna/store.py index 965bdacb8a..29315168dd 100644 --- a/cg/cli/workflow/mip_rna/store.py +++ b/cg/cli/workflow/mip_rna/store.py @@ -34,9 +34,7 @@ def analysis(context, config_stream): if not config_stream: LOG.error("provide a config, suggestions:") - for analysis_obj in context.obj["tb_api"].analyses( - status="completed", deleted=False - )[:25]: + for analysis_obj in context.obj["tb_api"].analyses(status="completed", deleted=False)[:25]: click.echo(analysis_obj.config_path) context.abort() @@ -48,9 +46,7 @@ def analysis(context, config_stream): click.echo(click.style("included files in Housekeeper", fg="green")) -def _gather_files_and_bundle_in_housekeeper( - config_stream, context, hk_api, status, tb_api -): +def _gather_files_and_bundle_in_housekeeper(config_stream, context, hk_api, status, tb_api): """Function to gather files and bundle in housekeeper""" try: bundle_data = tb_api.add_analysis(config_stream) @@ -61,7 +57,7 @@ def _gather_files_and_bundle_in_housekeeper( try: results = hk_api.add_bundle(bundle_data) if results is None: - print(click.style("analysis version already added", fg="yellow")) + click.echo(click.style("analysis version already added", fg="yellow")) context.abort() bundle_obj, version_obj = results except FileNotFoundError as error: @@ -70,9 +66,7 @@ def _gather_files_and_bundle_in_housekeeper( family_obj = _add_new_analysis_to_the_status_api(bundle_obj, status) _reset_action_from_running_on_family(family_obj) - new_analysis = _add_new_complete_analysis_record( - bundle_data, family_obj, status, version_obj - ) + new_analysis = _add_new_complete_analysis_record(bundle_data, family_obj, status, version_obj) version_date = version_obj.created_at.date() click.echo(f"new bundle added: {bundle_obj.name}, version {version_date}") _include_files_in_housekeeper(bundle_obj, context, hk_api, version_obj) @@ -93,7 +87,7 @@ def _include_files_in_housekeeper(bundle_obj, context, hk_api, version_obj): def _add_new_complete_analysis_record(bundle_data, family_obj, status, version_obj): """Function to create and return a new analysis database record""" pipeline = family_obj.links[0].sample.data_analysis - pipeline = pipeline if pipeline else "mip" + pipeline = pipeline if pipeline else "mip-rna" if status.analysis(family=family_obj, started_at=version_obj.created_at): raise AnalysisDuplicationError( @@ -127,14 +121,10 @@ def completed(context): hk_api = context.obj["hk_api"] exit_code = SUCCESS - for analysis_obj in context.obj["tb_api"].analyses( - status="completed", deleted=False - ): + for analysis_obj in context.obj["tb_api"].analyses(status="completed", deleted=False): existing_record = hk_api.version(analysis_obj.family, analysis_obj.started_at) if existing_record: - LOG.debug( - "analysis stored: %s - %s", analysis_obj.family, analysis_obj.started_at - ) + LOG.debug("analysis stored: %s - %s", analysis_obj.family, analysis_obj.started_at) continue click.echo(click.style(f"storing family: {analysis_obj.family}", fg="blue")) with Path(analysis_obj.config_path).open() as config_stream: diff --git a/cg/constants.py b/cg/constants.py index 6e983755a8..5e75539ab4 100644 --- a/cg/constants.py +++ b/cg/constants.py @@ -1,12 +1,6 @@ """Constans for cg""" -PRIORITY_MAP = { - "research": 0, - "standard": 1, - "priority": 2, - "express": 3, - "clinical trials": 4, -} +PRIORITY_MAP = {"research": 0, "standard": 1, "priority": 2, "express": 3, "clinical trials": 4} REV_PRIORITY_MAP = {value: key for key, value in PRIORITY_MAP.items()} PRIORITY_OPTIONS = list(PRIORITY_MAP.keys()) FAMILY_ACTIONS = ("analyze", "running", "hold") diff --git a/cg/exc.py b/cg/exc.py index c53d63b5b1..55a38e8893 100644 --- a/cg/exc.py +++ b/cg/exc.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- - - class CgError(Exception): """Base exception for the package.""" diff --git a/cg/meta/__init__.py b/cg/meta/__init__.py index 3d8a7026a0..388551fe20 100644 --- a/cg/meta/__init__.py +++ b/cg/meta/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ These are wrapper interfaces the link outside apps and/or the main package together. diff --git a/cg/meta/invoice.py b/cg/meta/invoice.py index 882f9a18bd..b201c50cce 100644 --- a/cg/meta/invoice.py +++ b/cg/meta/invoice.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - from cg.apps import lims from cg.server.ext import lims as genologics_lims from cg.store import Store, models @@ -38,11 +36,7 @@ def prepare_contact_info(self, costcenter): f"customer {self.customer_obj.internal_id}. See log files." ) - customer = ( - self.db.customer("cust999") - if costcenter.lower() == "kth" - else self.customer_obj - ) + customer = self.db.customer("cust999") if costcenter.lower() == "kth" else self.customer_obj user = customer.invoice_contact if not user: @@ -75,9 +69,7 @@ def prepare(self, costcenter: str) -> dict: raw_record.name, raw_record.ticket_number ) record = self.prepare_record( - costcenter=costcenter.lower(), - discount=self.invoice_obj.discount, - record=raw_record, + costcenter=costcenter.lower(), discount=self.invoice_obj.discount, record=raw_record ) if record: records.append(record) @@ -90,9 +82,7 @@ def prepare(self, costcenter: str) -> dict: return None return { "costcenter": costcenter, - "project_number": getattr( - customer_obj, f"project_account_{costcenter.lower()}" - ), + "project_number": getattr(customer_obj, f"project_account_{costcenter.lower()}"), "customer_id": customer_obj.internal_id, "customer_name": customer_obj.name, "agreement": customer_obj.agreement_registration, @@ -149,20 +139,14 @@ def prepare_record(self, costcenter: str, discount: int, record): percent_kth = record.application_version.application.percent_kth discounted_price = self._discount_price(record, discount) except ValueError: - self.log.append( - f"Application tag/version seems to be missing for sample {record.id}." - ) + self.log.append(f"Application tag/version seems to be missing for sample {record.id}.") return None split_discounted_price = self._cost_center_split_factor( discounted_price, costcenter, percent_kth, tag, version ) - order = ( - record.microbial_order.id - if self.record_type == "Microbial" - else record.order - ) + order = record.microbial_order.id if self.record_type == "Microbial" else record.order ticket_number = ( record.microbial_order.ticket_number if self.record_type == "Microbial" diff --git a/cg/meta/orders/__init__.py b/cg/meta/orders/__init__.py index 4d249f18fe..971ea2be14 100644 --- a/cg/meta/orders/__init__.py +++ b/cg/meta/orders/__init__.py @@ -1,3 +1,2 @@ -# -*- coding: utf-8 -*- from .api import OrdersAPI from .schema import OrderType diff --git a/cg/meta/orders/api.py b/cg/meta/orders/api.py index bc21018497..485955e066 100644 --- a/cg/meta/orders/api.py +++ b/cg/meta/orders/api.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Unified interface to handle sample submissions. This interface will update information in Status and/or LIMS as required. @@ -45,57 +44,57 @@ def submit(self, project: OrderType, data: dict, ticket: dict) -> dict: self._validate_customer_on_imported_samples(project, data) # detect manual ticket assignment - ticket_match = re.fullmatch(r'#([0-9]{6})', data['name']) + ticket_match = re.fullmatch(r"#([0-9]{6})", data["name"]) if ticket_match: ticket_number = int(ticket_match.group(1)) LOG.info(f"{ticket_number}: detected ticket in order name") - data['ticket'] = ticket_number + data["ticket"] = ticket_number else: # open and assign ticket to order try: if self.osticket: message = f"data:text/html;charset=utf-8,New incoming samples: " - for sample in data.get('samples'): - message += '
' + sample.get('name') + for sample in data.get("samples"): + message += "
" + sample.get("name") - if sample.get('family_name'): + if sample.get("family_name"): message += f", family: {sample.get('family_name')}" - if sample.get('internal_id'): + if sample.get("internal_id"): - existing_sample = self.status.sample(sample.get('internal_id')) - sample_customer = '' - if existing_sample.customer_id != data['customer']: - sample_customer = ' from ' + existing_sample.customer.internal_id + existing_sample = self.status.sample(sample.get("internal_id")) + sample_customer = "" + if existing_sample.customer_id != data["customer"]: + sample_customer = " from " + existing_sample.customer.internal_id message += f" (already existing sample{sample_customer})" - if sample.get('comment'): - message += ', ' + sample.get('comment') + if sample.get("comment"): + message += ", " + sample.get("comment") message += f"
" - if data.get('comment'): + if data.get("comment"): message += f"
{data.get('comment')}." - if ticket.get('name'): + if ticket.get("name"): message += f"
{ticket.get('name')}" - data['ticket'] = self.osticket.open_ticket( - name=ticket['name'], - email=ticket['email'], - subject=data['name'], + data["ticket"] = self.osticket.open_ticket( + name=ticket["name"], + email=ticket["email"], + subject=data["name"], message=message, ) LOG.info(f"{data['ticket']}: opened new ticket") else: - data['ticket'] = None + data["ticket"] = None except TicketCreationError as error: LOG.warning(error.message) - data['ticket'] = None + data["ticket"] = None order_func = getattr(self, f"submit_{project.value}") result = order_func(data) return result @@ -103,45 +102,45 @@ def submit(self, project: OrderType, data: dict, ticket: dict) -> dict: def submit_rml(self, data: dict) -> dict: """Submit a batch of ready made libraries.""" status_data = self.pools_to_status(data) - project_data, _ = self.process_lims(data, data['samples']) + project_data, _ = self.process_lims(data, data["samples"]) new_records = self.store_pools( - customer=status_data['customer'], - order=status_data['order'], - ordered=project_data['date'], - ticket=data['ticket'], - pools=status_data['pools'], + customer=status_data["customer"], + order=status_data["order"], + ordered=project_data["date"], + ticket=data["ticket"], + pools=status_data["pools"], ) - return {'project': project_data, 'records': new_records} + return {"project": project_data, "records": new_records} def submit_fastq(self, data: dict) -> dict: """Submit a batch of samples for FASTQ delivery.""" status_data = self.samples_to_status(data) - project_data, lims_map = self.process_lims(data, data['samples']) - self.fill_in_sample_ids(status_data['samples'], lims_map) + project_data, lims_map = self.process_lims(data, data["samples"]) + self.fill_in_sample_ids(status_data["samples"], lims_map) new_samples = self.store_fastq_samples( - customer=status_data['customer'], - order=status_data['order'], - ordered=project_data['date'], - ticket=data['ticket'], - samples=status_data['samples'], + customer=status_data["customer"], + order=status_data["order"], + ordered=project_data["date"], + ticket=data["ticket"], + samples=status_data["samples"], ) self.add_missing_reads(new_samples) - return {'project': project_data, 'records': new_samples} + return {"project": project_data, "records": new_samples} def submit_metagenome(self, data: dict) -> dict: """Submit a batch of metagenome samples.""" status_data = self.samples_to_status(data) - project_data, lims_map = self.process_lims(data, data['samples']) - self.fill_in_sample_ids(status_data['samples'], lims_map) + project_data, lims_map = self.process_lims(data, data["samples"]) + self.fill_in_sample_ids(status_data["samples"], lims_map) new_samples = self.store_samples( - customer=status_data['customer'], - order=status_data['order'], - ordered=project_data['date'], - ticket=data['ticket'], - samples=status_data['samples'], + customer=status_data["customer"], + order=status_data["order"], + ordered=project_data["date"], + ticket=data["ticket"], + samples=status_data["samples"], ) self.add_missing_reads(new_samples) - return {'project': project_data, 'records': new_samples} + return {"project": project_data, "records": new_samples} def submit_external(self, data: dict) -> dict: """Submit a batch of externally sequenced samples for analysis.""" @@ -151,12 +150,15 @@ def submit_external(self, data: dict) -> dict: def submit_case_samples(self, data: dict) -> dict: """Submit a batch of samples for sequencing and analysis.""" result = self.process_family_samples(data) - for family_obj in result['records']: + for family_obj in result["records"]: LOG.info(f"{family_obj.name}: submit family samples") - status_samples = [link_obj.sample for link_obj in family_obj.links if - link_obj.sample.ticket_number == data['ticket']] + status_samples = [ + link_obj.sample + for link_obj in family_obj.links + if link_obj.sample.ticket_number == data["ticket"] + ] self.add_missing_reads(status_samples) - self.update_application(data['ticket'], result['records']) + self.update_application(data["ticket"], result["records"]) return result def submit_mip(self, data: dict) -> dict: @@ -179,56 +181,61 @@ def submit_microbial(self, data: dict) -> dict: """Submit a batch of microbial samples.""" # prepare data for status database status_data = self.microbial_samples_to_status(data) - self.fill_in_sample_verified_organism(data['samples']) + self.fill_in_sample_verified_organism(data["samples"]) # submit samples to LIMS - project_data, lims_map = self.process_lims(data, data['samples']) + project_data, lims_map = self.process_lims(data, data["samples"]) # submit samples to Status - self.fill_in_sample_ids(status_data['samples'], lims_map, id_key='internal_id') + self.fill_in_sample_ids(status_data["samples"], lims_map, id_key="internal_id") order_obj = self.store_microbial_order( - customer=status_data['customer'], - order=status_data['order'], + customer=status_data["customer"], + order=status_data["order"], ordered=dt.datetime.now(), - ticket=data['ticket'], - lims_project=project_data['id'], - samples=status_data['samples'], - comment=status_data['comment'], + ticket=data["ticket"], + lims_project=project_data["id"], + samples=status_data["samples"], + comment=status_data["comment"], ) - return {'project': project_data, 'records': order_obj.microbial_samples} + return {"project": project_data, "records": order_obj.microbial_samples} def process_family_samples(self, data: dict) -> dict: """Process samples to be analyzed.""" # filter out only new samples status_data = self.cases_to_status(data) - new_samples = [sample for sample in data['samples'] if sample.get('internal_id') is None] + new_samples = [sample for sample in data["samples"] if sample.get("internal_id") is None] if new_samples: project_data, lims_map = self.process_lims(data, new_samples) else: project_data = lims_map = None - samples = [sample - for family in status_data['families'] - for sample in family['samples']] + samples = [sample for family in status_data["families"] for sample in family["samples"]] if lims_map: self.fill_in_sample_ids(samples, lims_map) new_families = self.store_cases( - customer=status_data['customer'], - order=status_data['order'], - ordered=project_data['date'] if project_data else dt.datetime.now(), - ticket=data['ticket'], - cases=status_data['families'], + customer=status_data["customer"], + order=status_data["order"], + ordered=project_data["date"] if project_data else dt.datetime.now(), + ticket=data["ticket"], + cases=status_data["families"], ) - return {'project': project_data, 'records': new_families} + return {"project": project_data, "records": new_families} def update_application(self, ticket_number: int, families: List[models.Family]): """Update application for trios if relevant.""" - reduced_map = {'EXOSXTR100': 'EXTSXTR100', 'WGSPCFC030': 'WGTPCFC030', - 'WGSPCFC060': 'WGTPCFC060'} + reduced_map = { + "EXOSXTR100": "EXTSXTR100", + "WGSPCFC030": "WGTPCFC030", + "WGSPCFC060": "WGTPCFC060", + } for family_obj in families: LOG.debug(f"{family_obj.name}: update application for trios") - order_samples = [link_obj.sample for link_obj in family_obj.links if - link_obj.sample.ticket_number == ticket_number] + order_samples = [ + link_obj.sample + for link_obj in family_obj.links + if link_obj.sample.ticket_number == ticket_number + ] if len(order_samples) >= 3: - applications = [sample_obj.application_version.application for sample_obj in - order_samples] + applications = [ + sample_obj.application_version.application for sample_obj in order_samples + ] prep_categories = set(application.prep_category for application in applications) if len(prep_categories) == 1: for sample_obj in order_samples: @@ -236,10 +243,13 @@ def update_application(self, ticket_number: int, families: List[models.Family]): application_tag = sample_obj.application_version.application.tag if application_tag in reduced_map: reduced_tag = reduced_map[application_tag] - LOG.info(f"{sample_obj.internal_id}: update application tag - " - f"{reduced_tag}") + LOG.info( + f"{sample_obj.internal_id}: update application tag - " + f"{reduced_tag}" + ) reduced_version = self.status.current_application_version( - reduced_tag) + reduced_tag + ) sample_obj.application_version = reduced_version def add_missing_reads(self, samples: List[models.Sample]): @@ -250,37 +260,45 @@ def add_missing_reads(self, samples: List[models.Sample]): self.lims.update_sample(sample_obj.internal_id, target_reads=target_reads) @staticmethod - def fill_in_sample_ids(samples: List[dict], lims_map: dict, id_key: str = 'internal_id'): + def fill_in_sample_ids(samples: List[dict], lims_map: dict, id_key: str = "internal_id"): """Fill in LIMS sample ids.""" for sample in samples: LOG.debug(f"{sample['name']}: link sample to LIMS") if not sample.get(id_key): - internal_id = lims_map[sample['name']] + internal_id = lims_map[sample["name"]] LOG.info(f"{sample['name']} -> {internal_id}: connect sample to LIMS") sample[id_key] = internal_id def fill_in_sample_verified_organism(self, samples: List[dict]): for sample in samples: - organism_id = sample['organism'] - reference_genome = sample['reference_genome'] + organism_id = sample["organism"] + reference_genome = sample["reference_genome"] organism = self.status.organism(internal_id=organism_id) - is_verified = organism and organism.reference_genome == reference_genome and \ - organism.verified - sample['verified_organism'] = is_verified + is_verified = ( + organism and organism.reference_genome == reference_genome and organism.verified + ) + sample["verified_organism"] = is_verified def _validate_customer_on_imported_samples(self, project, data): - for sample in data.get('samples'): - - if sample.get('internal_id'): - - if project not in (OrderType.MIP, OrderType.EXTERNAL, OrderType.BALSAMIC, - OrderType.MIP_BALSAMIC, OrderType.MIP_RNA): - raise OrderError(f"Only MIP, Balsamic and external orders can have imported " - f"samples: " - f"{sample.get('name')}") + for sample in data.get("samples"): + + if sample.get("internal_id"): + + if project not in ( + OrderType.MIP, + OrderType.EXTERNAL, + OrderType.BALSAMIC, + OrderType.MIP_BALSAMIC, + OrderType.MIP_RNA, + ): + raise OrderError( + f"Only MIP, Balsamic and external orders can have imported " + f"samples: " + f"{sample.get('name')}" + ) - existing_sample = self.status.sample(sample.get('internal_id')) - data_customer = self.status.customer(data['customer']) + existing_sample = self.status.sample(sample.get("internal_id")) + data_customer = self.status.customer(data["customer"]) if existing_sample.customer.customer_group_id != data_customer.customer_group_id: raise OrderError(f"Sample not available: {sample.get('name')}") diff --git a/cg/meta/report/api.py b/cg/meta/report/api.py index 710aefffdb..051c9820cc 100644 --- a/cg/meta/report/api.py +++ b/cg/meta/report/api.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging from datetime import datetime from pathlib import Path @@ -140,12 +139,8 @@ def _incorporate_trending_data(self, report_data: dict, family_id: str): duplicates = duplicates_all_samples.get(lims_id) sample["duplicates"] = Presenter.process_float_string(duplicates, 1) - report_data["genome_build"] = Presenter.process_string( - trending_data.get("genome_build") - ) - report_data["mip_version"] = Presenter.process_string( - trending_data.get("mip_version") - ) + report_data["genome_build"] = Presenter.process_string(trending_data.get("genome_build")) + report_data["mip_version"] = Presenter.process_string(trending_data.get("mip_version")) def _incorporate_coverage_data(self, samples: list, panels: list): """Incorporate coverage data from Chanjo for each sample .""" @@ -159,9 +154,7 @@ def _incorporate_coverage_data(self, samples: list, panels: list): if sample_coverage: target_coverage = sample_coverage.get("mean_coverage") - sample["target_coverage"] = Presenter.process_float_string( - target_coverage, 1 - ) + sample["target_coverage"] = Presenter.process_float_string(target_coverage, 1) target_completeness = sample_coverage.get("mean_completeness") sample["target_completeness"] = Presenter.process_float_string( target_completeness, 2 @@ -180,35 +173,23 @@ def _fetch_family_samples_from_status_db(self, family_id: str) -> list: delivery_data_sample = dict() delivery_data_sample["id"] = sample.internal_id delivery_data_sample["ticket"] = Presenter.process_int(sample.ticket_number) - delivery_data_sample["status"] = Presenter.process_string( - family_sample.status - ) - delivery_data_sample["received"] = Presenter.process_datetime( - sample.received_at - ) - delivery_data_sample["prep_date"] = Presenter.process_datetime( - sample.prepared_at - ) + delivery_data_sample["status"] = Presenter.process_string(family_sample.status) + delivery_data_sample["received"] = Presenter.process_datetime(sample.received_at) + delivery_data_sample["prep_date"] = Presenter.process_datetime(sample.prepared_at) delivery_data_sample["sequencing_date"] = Presenter.process_datetime( sample.sequenced_at ) - delivery_data_sample["delivery_date"] = Presenter.process_datetime( - sample.delivered_at - ) + delivery_data_sample["delivery_date"] = Presenter.process_datetime(sample.delivered_at) delivery_data_sample["processing_time"] = Presenter.process_int( SampleCalculator.calculate_processing_days(sample) ) - delivery_data_sample["order_date"] = Presenter.process_datetime( - sample.ordered_at - ) + delivery_data_sample["order_date"] = Presenter.process_datetime(sample.ordered_at) delivery_data_sample["million_read_pairs"] = Presenter.process_int( round(sample.reads / 2000000, 1) if sample.reads else None ) - delivery_data_sample["capture_kit"] = Presenter.process_string( - sample.capture_kit - ) + delivery_data_sample["capture_kit"] = Presenter.process_string(sample.capture_kit) delivery_data_sample["bioinformatic_analysis"] = Presenter.process_string( sample.data_analysis ) @@ -223,9 +204,7 @@ def _get_application_data_from_status_db(self, samples: list) -> dict: used_applications = set() for sample in samples: - used_applications.add( - (sample["application"], sample["application_version"]) - ) + used_applications.add((sample["application"], sample["application_version"])) applications = [] accreditations = [] @@ -265,9 +244,7 @@ def _incorporate_lims_data(self, report_data: dict): sample["name"] = Presenter.process_string(lims_sample.get("name")) sample["sex"] = Presenter.process_string(lims_sample.get("sex")) sample["source"] = Presenter.process_string(lims_sample.get("source")) - sample["application"] = Presenter.process_string( - lims_sample.get("application") - ) + sample["application"] = Presenter.process_string(lims_sample.get("application")) sample["application_version"] = lims_sample.get("application_version") def _get_genes_from_scout(self, panels: list) -> list: diff --git a/cg/meta/store/balsamic.py b/cg/meta/store/balsamic.py new file mode 100644 index 0000000000..796a6de4d5 --- /dev/null +++ b/cg/meta/store/balsamic.py @@ -0,0 +1,108 @@ +"""Builds balsamic bundle for linking in Housekeeper""" +import datetime as dt +import logging +import os +import shutil + +import ruamel.yaml +from cg.exc import AnalysisDuplicationError +from pathlib import Path + +LOG = logging.getLogger(__name__) + + +def gather_files_and_bundle_in_housekeeper(config_stream, hk_api, status, case_obj): + """Function to gather files and bundle in housekeeper""" + + bundle_data = _add_analysis(config_stream, case_obj) + + results = hk_api.add_bundle(bundle_data) + if not results: + raise AnalysisDuplicationError("analysis version already added") + bundle_obj, version_obj = results + + _reset_analysis_action(case_obj) + new_analysis = _create_analysis(bundle_data, case_obj, status, version_obj) + version_date = version_obj.created_at.date() + LOG.info(f"new bundle added: {bundle_obj.name}, version {version_date}") + _include_files_in_housekeeper(bundle_obj, hk_api, version_obj) + return new_analysis + + +def _include_files_in_housekeeper(bundle_obj, hk_api, version_obj): + """Function to include files in housekeeper""" + hk_api.include(version_obj) + hk_api.add_commit(bundle_obj, version_obj) + + +def _create_analysis(bundle_data, case_obj, status, version_obj): + """Function to create and return a new analysis database record""" + pipeline = case_obj.links[0].sample.data_analysis + pipeline = pipeline if pipeline else "balsamic" + + if status.analysis(family=case_obj, started_at=version_obj.created_at): + raise AnalysisDuplicationError( + f"Analysis object already exists for {case_obj.internal_id}{version_obj.created_at}" + ) + + new_analysis = status.add_analysis( + pipeline=pipeline, + version=bundle_data["pipeline_version"], + started_at=version_obj.created_at, + completed_at=dt.datetime.now(), + primary=(len(case_obj.analyses) == 0), + ) + new_analysis.family = case_obj + return new_analysis + + +def _reset_analysis_action(case_obj): + case_obj.action = None + + +def _add_analysis(config_stream, case_obj): + """Gather information from balsamic analysis to store.""" + with Path(config_stream).open() as in_stream: + meta_raw = ruamel.yaml.safe_load(in_stream) + new_bundle = _build_bundle( + meta_raw, name=case_obj.internal_id, created=dt.datetime.now(), version="1" + ) + return new_bundle + + +def _build_bundle(meta_data: dict, name: str, created: dt.datetime, version: str) -> dict: + """Create a new bundle.""" + data = { + "name": name, + "created": created, + "pipeline_version": version, + "files": _get_files(meta_data), + } + return data + + +def _get_files(meta_data: dict) -> list: + """Get all the files from the balsamic files.""" + + paths = {} + for tag in meta_data["files"]: + for path_str in meta_data["files"][tag]: + path = Path(path_str).name + if path in paths.keys(): + paths[path]["tags"].append(tag) + else: + paths[path] = {"tags": [tag], "full_path": path_str} + + data = [] + for path_item in paths.values(): + path = path_item["full_path"] + tags = path_item["tags"] + if os.path.isdir(path): + path = compress_directory(path) + + data.append({"path": path, "tags": tags, "archive": False}) + return data + + +def compress_directory(path): + return shutil.make_archive(path, "gztar", path, logger=LOG) diff --git a/cg/meta/transfer/__init__.py b/cg/meta/transfer/__init__.py index 2405a11edd..0ca87219f2 100644 --- a/cg/meta/transfer/__init__.py +++ b/cg/meta/transfer/__init__.py @@ -1,3 +1,2 @@ -# -*- coding: utf-8 -*- from .flowcell import TransferFlowcell from .lims import TransferLims, SampleState, PoolState, MicrobialState diff --git a/cg/meta/transfer/flowcell.py b/cg/meta/transfer/flowcell.py index 30f4d9e936..dc918589ab 100644 --- a/cg/meta/transfer/flowcell.py +++ b/cg/meta/transfer/flowcell.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging from typing import List @@ -9,17 +8,16 @@ LOG = logging.getLogger(__name__) -class TransferFlowcell(): - - def __init__(self, db: Store, stats_api: StatsAPI, hk_api: HousekeeperAPI=None): +class TransferFlowcell: + def __init__(self, db: Store, stats_api: StatsAPI, hk_api: HousekeeperAPI = None): self.db = db self.stats = stats_api self.hk = hk_api - def transfer(self, flowcell_name: str, store: bool=True) -> models.Flowcell: + def transfer(self, flowcell_name: str, store: bool = True) -> models.Flowcell: """Populate the database with the information.""" - if store and self.hk.tag('fastq') is None: - self.hk.add_commit(self.hk.new_tag('fastq')) + if store and self.hk.tag("fastq") is None: + self.hk.add_commit(self.hk.new_tag("fastq")) if store and self.hk.tag(flowcell_name) is None: self.hk.add_commit(self.hk.new_tag(flowcell_name)) stats_data = self.stats.flowcell(flowcell_name) @@ -27,15 +25,16 @@ def transfer(self, flowcell_name: str, store: bool=True) -> models.Flowcell: if flowcell_obj is None: flowcell_obj = self.db.add_flowcell( name=flowcell_name, - sequencer=stats_data['sequencer'], - sequencer_type=stats_data['sequencer_type'], - date=stats_data['date'], + sequencer=stats_data["sequencer"], + sequencer_type=stats_data["sequencer_type"], + date=stats_data["date"], ) - flowcell_obj.status = 'ondisk' - for sample_data in stats_data['samples']: + flowcell_obj.status = "ondisk" + for sample_data in stats_data["samples"]: LOG.debug(f"adding reads/fastqs to sample: {sample_data['name']}") - sample_obj = (self.db.sample(sample_data['name']) or - self.db.microbial_sample(sample_data['name'])) + sample_obj = self.db.sample(sample_data["name"]) or self.db.microbial_sample( + sample_data["name"] + ) if sample_obj is None: LOG.warning(f"unable to find sample: {sample_data['name']}") continue @@ -44,14 +43,16 @@ def transfer(self, flowcell_name: str, store: bool=True) -> models.Flowcell: self.store_fastqs( sample=sample_obj.internal_id, flowcell=flowcell_name, - fastq_files=sample_data['fastqs'] + fastq_files=sample_data["fastqs"], ) - sample_obj.reads = sample_data['reads'] - enough_reads = (sample_obj.reads > - sample_obj.application_version.application.expected_reads) - newest_date = ((sample_obj.sequenced_at is None) or - (flowcell_obj.sequenced_at > sample_obj.sequenced_at)) + sample_obj.reads = sample_data["reads"] + enough_reads = ( + sample_obj.reads > sample_obj.application_version.application.expected_reads + ) + newest_date = (sample_obj.sequenced_at is None) or ( + flowcell_obj.sequenced_at > sample_obj.sequenced_at + ) if enough_reads and newest_date: sample_obj.sequenced_at = flowcell_obj.sequenced_at @@ -60,8 +61,10 @@ def transfer(self, flowcell_name: str, store: bool=True) -> models.Flowcell: if isinstance(sample_obj, models.MicrobialSample): flowcell_obj.microbial_samples.append(sample_obj) - LOG.info(f"added reads to sample: {sample_data['name']} - {sample_data['reads']} " - f"[{'DONE' if enough_reads else 'NOT DONE'}]") + LOG.info( + f"added reads to sample: {sample_data['name']} - {sample_data['reads']} " + f"[{'DONE' if enough_reads else 'NOT DONE'}]" + ) return flowcell_obj @@ -76,12 +79,12 @@ def store_fastqs(self, sample: str, flowcell: str, fastq_files: List[str]): self.hk.commit() LOG.info(f"added new Housekeeper bundle: {hk_bundle.name}") - with self.hk.session.no_autoflush: + with self.hk.session_no_autoflush(): hk_version = hk_bundle.versions[0] for fastq_file in fastq_files: if self.hk.files(path=fastq_file).first() is None: LOG.info(f"found FASTQ file: {fastq_file}") - tags = [self.hk.tag('fastq'), self.hk.tag(flowcell)] + tags = [self.hk.tag("fastq"), self.hk.tag(flowcell)] new_file = self.hk.new_file(path=fastq_file, tags=tags) hk_version.files.append(new_file) self.hk.commit() diff --git a/cg/meta/transfer/lims.py b/cg/meta/transfer/lims.py index 78c68f8549..8dd9273e30 100644 --- a/cg/meta/transfer/lims.py +++ b/cg/meta/transfer/lims.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from enum import Enum import logging @@ -9,31 +8,30 @@ class SampleState(Enum): - RECEIVED = 'received' - PREPARED = 'prepared' - DELIVERED = 'delivered' + RECEIVED = "received" + PREPARED = "prepared" + DELIVERED = "delivered" class PoolState(Enum): - RECEIVED = 'received' - DELIVERED = 'delivered' + RECEIVED = "received" + DELIVERED = "delivered" class MicrobialState(Enum): - RECEIVED = 'received' - PREPARED = 'prepared' - SEQUENCED = 'sequenced' - DELIVERED = 'delivered' + RECEIVED = "received" + PREPARED = "prepared" + SEQUENCED = "sequenced" + DELIVERED = "delivered" class IncludeOptions(Enum): - UNSET = 'unset' - NOTINVOICED = 'not-invoiced' - ALL = 'all' + UNSET = "unset" + NOTINVOICED = "not-invoiced" + ALL = "all" class TransferLims(object): - def __init__(self, status: Store, lims: LimsAPI): self.status = status self.lims = lims @@ -71,7 +69,7 @@ def __init__(self, status: Store, lims: LimsAPI): def _get_all_samples_not_yet_delivered(self): return self.status.samples_not_delivered() - def transfer_samples(self, status_type: SampleState, include='unset'): + def transfer_samples(self, status_type: SampleState, include="unset"): """Transfer information about samples.""" samples = self._get_samples_to_include(include, status_type) @@ -84,14 +82,16 @@ def transfer_samples(self, status_type: SampleState, include='unset'): for sample_obj in samples: lims_date = self._date_functions[status_type](sample_obj.internal_id) - statusdb_date = getattr(sample_obj, f'{status_type.value}_at') + statusdb_date = getattr(sample_obj, f"{status_type.value}_at") if lims_date: if statusdb_date and statusdb_date.date() == lims_date: continue - LOG.info(f"Found new {status_type.value} date for {sample_obj.internal_id}: " \ - f"{lims_date}, old value: {statusdb_date} ") + LOG.info( + f"Found new {status_type.value} date for {sample_obj.internal_id}: " + f"{lims_date}, old value: {statusdb_date} " + ) setattr(sample_obj, f"{status_type.value}_at", lims_date) self.status.commit() @@ -124,8 +124,13 @@ def transfer_pools(self, status_type: PoolState): samples_in_pool = self.lims.get_samples(projectname=ticket_number) for sample_obj in samples_in_pool: status_date = self._date_functions[status_type](sample_obj.id) - if sample_obj.udf['pool name'] == pool_obj.name and status_date is not None: - LOG.info(f"Found {status_type.value} date for pool id {pool_obj.id}: {status_date}.") + if sample_obj.udf["pool name"] == pool_obj.name and status_date is not None: + LOG.info( + "Found %s date for pool id %s: %s", + status_type.value, + pool_obj.id, + status_date, + ) setattr(pool_obj, f"{status_type.value}_at", status_date) self.status.commit() break @@ -136,7 +141,7 @@ def transfer_microbial_samples(self, status_type: MicrobialState): """Transfer information about microbial samples.""" microbial_samples = self._microbial_samples_functions[status_type]() - + if microbial_samples is None: LOG.info(f"No microbial samples found with {status_type.value}") return @@ -147,20 +152,26 @@ def transfer_microbial_samples(self, status_type: MicrobialState): internal_id = microbial_sample_obj.internal_id lims_date = self._date_functions[status_type](microbial_sample_obj.internal_id) - statusdb_date = getattr(microbial_sample_obj, f'{status_type.value}_at') + statusdb_date = getattr(microbial_sample_obj, f"{status_type.value}_at") if lims_date: if statusdb_date and statusdb_date.date() == lims_date: continue - LOG.info(f"Found new {status_type.value} date for {microbial_sample_obj.internal_id}: " \ - f"{lims_date}, old value: {statusdb_date} ") + LOG.info( + f"Found new {status_type.value} date for {microbial_sample_obj.internal_id}: " + f"{lims_date}, old value: {statusdb_date} " + ) setattr(microbial_sample_obj, f"{status_type.value}_at", lims_date) self.status.commit() else: - LOG.debug(f"no {status_type.value} date found for {microbial_sample_obj.internal_id}") - LOG.info(f"no {status_type.value} date found for {microbial_sample_obj.internal_id}") + LOG.debug( + f"no {status_type.value} date found for {microbial_sample_obj.internal_id}" + ) + LOG.info( + f"no {status_type.value} date found for {microbial_sample_obj.internal_id}" + ) def _get_samples_in_step(self, status_type): return self._sample_functions[status_type]() diff --git a/cg/meta/upload/gt.py b/cg/meta/upload/gt.py index aaebeafa04..0ec1ccc665 100644 --- a/cg/meta/upload/gt.py +++ b/cg/meta/upload/gt.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging from pathlib import Path @@ -11,9 +10,13 @@ class UploadGenotypesAPI(object): - - def __init__(self, status_api: Store, hk_api: hk.HousekeeperAPI, tb_api: tb.TrailblazerAPI, - gt_api: gt.GenotypeAPI): + def __init__( + self, + status_api: Store, + hk_api: hk.HousekeeperAPI, + tb_api: tb.TrailblazerAPI, + gt_api: gt.GenotypeAPI, + ): self.status = status_api self.hk = hk_api self.tb = tb_api @@ -23,33 +26,30 @@ def data(self, analysis_obj: models.Analysis) -> dict: """Fetch data about an analysis to load genotypes.""" analysis_date = analysis_obj.started_at or analysis_obj.completed_at hk_version = self.hk.version(analysis_obj.family.internal_id, analysis_date) - hk_bcf = self.hk.files(version=hk_version.id, tags=['snv-gbcf']).first() + hk_bcf = self.hk.files(version=hk_version.id, tags=["snv-gbcf"]).first() if hk_bcf is None: LOG.warning("unable to find GBCF for genotype upload") return None - data = { - 'bcf': hk_bcf.full_path, - 'samples_sex': {}, - } + data = {"bcf": hk_bcf.full_path, "samples_sex": {}} analysis_sexes = self._analysis_sex(hk_version) for link_obj in analysis_obj.family.links: - data['samples_sex'][link_obj.sample.internal_id] = { - 'pedigree': link_obj.sample.sex, - 'analysis': analysis_sexes[link_obj.sample.internal_id], + data["samples_sex"][link_obj.sample.internal_id] = { + "pedigree": link_obj.sample.sex, + "analysis": analysis_sexes[link_obj.sample.internal_id], } return data def _analysis_sex(self, hk_version: hk.models.Version) -> dict: """Fetch analysis sex for each sample of an analysis.""" - hk_qcmetrics = self.hk.files(version=hk_version.id, tags=['qcmetrics']).first() + hk_qcmetrics = self.hk.files(version=hk_version.id, tags=["qcmetrics"]).first() with Path(hk_qcmetrics.full_path).open() as in_stream: qcmetrics_raw = ruamel.yaml.safe_load(in_stream) qcmetrics_data = self.tb.parse_qcmetrics(qcmetrics_raw) data = {} - for sample_data in qcmetrics_data['samples']: - data[sample_data['id']] = sample_data['predicted_sex'] + for sample_data in qcmetrics_data["samples"]: + data[sample_data["id"]] = sample_data["predicted_sex"] return data - def upload(self, data: dict, replace: bool=False): + def upload(self, data: dict, replace: bool = False): """Upload data about genotypes for a family of samples.""" - self.gt.upload(str(data['bcf']), data['samples_sex'], force=replace) + self.gt.upload(str(data["bcf"]), data["samples_sex"], force=replace) diff --git a/cg/meta/upload/observations.py b/cg/meta/upload/observations.py index 0e22ae6700..e3c4c4d04c 100644 --- a/cg/meta/upload/observations.py +++ b/cg/meta/upload/observations.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ API for uploading observations """ @@ -15,7 +13,7 @@ LOG = logging.getLogger(__name__) -class UploadObservationsAPI(): +class UploadObservationsAPI: """API to upload observations to LoqusDB.""" @@ -31,25 +29,22 @@ def get_input(self, analysis_obj: models.Analysis) -> dict: analysis_date = analysis_obj.started_at or analysis_obj.completed_at hk_version = self.housekeeper.version(analysis_obj.family.internal_id, analysis_date) - hk_vcf = self.housekeeper.files(version=hk_version.id, tags=['vcf-snv-research']).first() - hk_snv_gbcf = self.housekeeper.files(version=hk_version.id, tags=['snv-gbcf']).first() - hk_pedigree = self.housekeeper.files(version=hk_version.id, tags=['pedigree']).first() - - input_data['sv_vcf'] = None - if self.loqusdb.analysis_type == 'wgs': - hk_sv_vcf = self.housekeeper.files(version=hk_version.id, - tags=['vcf-sv-research']).first() + hk_vcf = self.housekeeper.files(version=hk_version.id, tags=["vcf-snv-research"]).first() + hk_snv_gbcf = self.housekeeper.files(version=hk_version.id, tags=["snv-gbcf"]).first() + hk_pedigree = self.housekeeper.files(version=hk_version.id, tags=["pedigree"]).first() + + input_data["sv_vcf"] = None + if self.loqusdb.analysis_type == "wgs": + hk_sv_vcf = self.housekeeper.files( + version=hk_version.id, tags=["vcf-sv-research"] + ).first() if hk_sv_vcf is None: raise FileNotFoundError("No file with vcf-sv-research tag in housekeeper") if not Path(hk_sv_vcf.full_path).exists(): raise FileNotFoundError(f"{hk_sv_vcf.full_path} does not exist") - input_data['sv_vcf'] = hk_sv_vcf.full_path + input_data["sv_vcf"] = hk_sv_vcf.full_path - hk_files = { - 'vcf-snv-research': hk_vcf, - 'snv-gbcf': hk_snv_gbcf, - 'pedigree': hk_pedigree - } + hk_files = {"vcf-snv-research": hk_vcf, "snv-gbcf": hk_snv_gbcf, "pedigree": hk_pedigree} # Check if files exists. If hk returns None, or the path does not exist # FileNotFound error is raised @@ -59,10 +54,10 @@ def get_input(self, analysis_obj: models.Analysis) -> dict: if not Path(file.full_path).exists(): raise FileNotFoundError(f"{file.full_path} does not exist") - input_data['family'] = analysis_obj.family.internal_id - input_data['vcf'] = hk_vcf.full_path - input_data['snv_gbcf'] = hk_snv_gbcf.full_path - input_data['pedigree'] = hk_pedigree.full_path + input_data["family"] = analysis_obj.family.internal_id + input_data["vcf"] = hk_vcf.full_path + input_data["snv_gbcf"] = hk_snv_gbcf.full_path + input_data["pedigree"] = hk_pedigree.full_path return input_data @@ -70,20 +65,24 @@ def upload(self, input_data: dict): """Upload data about genotypes for a family of samples.""" try: - existing_case = self.loqusdb.get_case(case_id=input_data['family']) + existing_case = self.loqusdb.get_case(case_id=input_data["family"]) # If CaseNotFoundError is raised, this should trigger the load method of loqusdb except CaseNotFoundError: - duplicate = self.loqusdb.get_duplicate(input_data['snv_gbcf']) + duplicate = self.loqusdb.get_duplicate(input_data["snv_gbcf"]) if duplicate: err_msg = f"Found duplicate {duplicate['ind_id']} in case {duplicate['case_id']}" raise DuplicateSampleError(err_msg) - results = self.loqusdb.load(input_data['family'], input_data['pedigree'], - input_data['vcf'], input_data['snv_gbcf'], - vcf_sv_path=input_data['sv_vcf']) - LOG.info("parsed %s variants", results['variants']) + results = self.loqusdb.load( + input_data["family"], + input_data["pedigree"], + input_data["vcf"], + input_data["snv_gbcf"], + vcf_sv_path=input_data["sv_vcf"], + ) + LOG.info("parsed %s variants", results["variants"]) else: log_msg = f"found existing family {existing_case['case_id']}, skipping observations" @@ -99,7 +98,7 @@ def process(self, analysis_obj: models.Analysis): self.upload(results) case_obj = self.loqusdb.get_case(analysis_obj.family.internal_id) for link in analysis_obj.family.links: - link.sample.loqusdb_id = str(case_obj['_id']) + link.sample.loqusdb_id = str(case_obj["_id"]) self.status.commit() @staticmethod diff --git a/cg/meta/upload/scoutapi.py b/cg/meta/upload/scoutapi.py index 05147793da..9a8177b0f3 100644 --- a/cg/meta/upload/scoutapi.py +++ b/cg/meta/upload/scoutapi.py @@ -49,13 +49,9 @@ def build_samples(self, analysis_obj: models.Analysis, hk_version_id: int = None sample_id = link_obj.sample.internal_id bam_path = self.fetch_file_path("bam", sample_id, hk_version_id) alignment_file_path = self.fetch_file_path("cram", sample_id, hk_version_id) - chromograph_path = self.fetch_file_path( - "chromograph", sample_id, hk_version_id - ) + chromograph_path = self.fetch_file_path("chromograph", sample_id, hk_version_id) mt_bam_path = self.fetch_file_path("bam-mt", sample_id, hk_version_id) - vcf2cytosure_path = self.fetch_file_path( - "vcf2cytosure", sample_id, hk_version_id - ) + vcf2cytosure_path = self.fetch_file_path("vcf2cytosure", sample_id, hk_version_id) lims_sample = dict() try: @@ -84,12 +80,8 @@ def build_samples(self, analysis_obj: models.Analysis, hk_version_id: int = None def generate_config(self, analysis_obj: models.Analysis) -> dict: """Fetch data about an analysis to load Scout.""" analysis_date = analysis_obj.started_at or analysis_obj.completed_at - hk_version = self.housekeeper.version( - analysis_obj.family.internal_id, analysis_date - ) - analysis_data = self.analysis.get_latest_metadata( - analysis_obj.family.internal_id - ) + hk_version = self.housekeeper.version(analysis_obj.family.internal_id, analysis_date) + analysis_data = self.analysis.get_latest_metadata(analysis_obj.family.internal_id) data = { "analysis_date": analysis_obj.completed_at, @@ -133,9 +125,7 @@ def save_config_file(upload_config: dict, file_path: Path): yml.dump(upload_config, file_path) @staticmethod - def add_scout_config_to_hk( - config_file_path: Path, hk_api: hk.HousekeeperAPI, case_id: str - ): + def add_scout_config_to_hk(config_file_path: Path, hk_api: hk.HousekeeperAPI, case_id: str): """Add scout load config to hk bundle""" tag_name = "scout-load-config" version_obj = hk_api.last_version(bundle=case_id) diff --git a/cg/meta/upload/vogue.py b/cg/meta/upload/vogue.py index a9233896f8..55afec9d0f 100644 --- a/cg/meta/upload/vogue.py +++ b/cg/meta/upload/vogue.py @@ -1,5 +1,5 @@ """API to run Vogue""" -# -*- coding: utf-8 -*- + import json from cg.apps.gt import GenotypeAPI @@ -10,9 +10,7 @@ class UploadVogueAPI: """API to load data into Vogue""" - def __init__( - self, genotype_api: GenotypeAPI, vogue_api: VogueAPI, store: Store, - ): + def __init__(self, genotype_api: GenotypeAPI, vogue_api: VogueAPI, store: Store): self.genotype_api = genotype_api self.vogue_api = vogue_api self.store = store @@ -36,9 +34,7 @@ def load_apptags(self): apptags = self.store.applications() apptags_for_vogue = [] for tag in apptags.all(): - apptags_for_vogue.append( - {"tag": tag.tag, "prep_category": tag.prep_category} - ) + apptags_for_vogue.append({"tag": tag.tag, "prep_category": tag.prep_category}) self.vogue_api.load_apptags(apptags_for_vogue) diff --git a/cg/meta/workflow/balsamic.py b/cg/meta/workflow/balsamic.py index 8e44bf18c9..ed3bfba24f 100644 --- a/cg/meta/workflow/balsamic.py +++ b/cg/meta/workflow/balsamic.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import gzip import logging import re @@ -104,8 +103,7 @@ def run(self, family_obj: models.Family, **kwargs): external = link_obj.sample.application_version.application.is_external if downsampled or external: self.LOG.info( - "%s: downsampled/external - skip evaluation", - link_obj.sample.internal_id, + "%s: downsampled/external - skip evaluation", link_obj.sample.internal_id ) kwargs["skip_evaluation"] = True break @@ -153,28 +151,21 @@ def build_config(self, family_obj: models.Family) -> dict: if link.sample.capture_kit: # set the capture kit from status: key or custom file name mip_capturekit = CAPTUREKIT_MAP.get(link.sample.capture_kit) - sample_data["capture_kit"] = ( - mip_capturekit or link.sample.capture_kit - ) + sample_data["capture_kit"] = mip_capturekit or link.sample.capture_kit else: if link.sample.downsampled_to: - self.LOG.debug( - f"{link.sample.name}: downsampled sample, skipping" - ) + self.LOG.debug(f"{link.sample.name}: downsampled sample, skipping") else: try: capture_kit = self.lims.capture_kit(link.sample.internal_id) if capture_kit is None or capture_kit == "NA": self.LOG.warning( - f"%s: capture kit not found", - link.sample.internal_id, + f"%s: capture kit not found", link.sample.internal_id ) else: sample_data["capture_kit"] = CAPTUREKIT_MAP[capture_kit] except HTTPError: - self.LOG.warning( - f"{link.sample.internal_id}: not found (LIMS)" - ) + self.LOG.warning(f"{link.sample.internal_id}: not found (LIMS)") if link.mother: sample_data["mother"] = link.mother.internal_id if link.father: @@ -267,9 +258,7 @@ def link_sample(self, fastq_handler: BaseFastqHandler, sample: str, case: str): def panel(self, family_obj: models.Family) -> List[str]: """Create the aggregated panel file.""" - all_panels = self.convert_panels( - family_obj.customer.internal_id, family_obj.panels - ) + all_panels = self.convert_panels(family_obj.customer.internal_id, family_obj.panels) bed_lines = self.scout.export_panels(all_panels) return bed_lines @@ -313,24 +302,20 @@ def _get_latest_raw_file(self, family_id: str, tag: str) -> Any: def _open_bundle_file(self, relative_file_path: str) -> Any: """Open a bundle file and return it as an Python object.""" - full_file_path = self.pather( - self.deliver.get_post_analysis_files_root_dir() - ).joinpath(relative_file_path) + full_file_path = self.pather(self.deliver.get_post_analysis_files_root_dir()).joinpath( + relative_file_path + ) open_file = self.yaml_loader(self.pather(full_file_path).open()) return open_file def get_latest_metadata(self, family_id: str) -> dict: """Get the latest trending data for a family.""" - mip_config_raw = self._get_latest_raw_file( - family_id=family_id, tag="mip-config" - ) + mip_config_raw = self._get_latest_raw_file(family_id=family_id, tag="mip-config") qcmetrics_raw = self._get_latest_raw_file(family_id=family_id, tag="qcmetrics") - sampleinfo_raw = self._get_latest_raw_file( - family_id=family_id, tag="sampleinfo" - ) + sampleinfo_raw = self._get_latest_raw_file(family_id=family_id, tag="sampleinfo") trending = dict() diff --git a/cg/meta/workflow/microsalt.py b/cg/meta/workflow/microsalt.py index 80889a692a..f84085eb33 100644 --- a/cg/meta/workflow/microsalt.py +++ b/cg/meta/workflow/microsalt.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import gzip import logging import re diff --git a/cg/meta/workflow/mip_dna.py b/cg/meta/workflow/mip_dna.py index 906c6323d9..09f1723a9c 100644 --- a/cg/meta/workflow/mip_dna.py +++ b/cg/meta/workflow/mip_dna.py @@ -106,8 +106,7 @@ def run(self, family_obj: models.Family, **kwargs): external = link_obj.sample.application_version.application.is_external if downsampled or external: self.LOG.info( - "%s: downsampled/external - skip evaluation", - link_obj.sample.internal_id, + "%s: downsampled/external - skip evaluation", link_obj.sample.internal_id ) kwargs["skip_evaluation"] = True break @@ -149,28 +148,21 @@ def build_config(self, family_obj: models.Family) -> dict: if link.sample.capture_kit: # set the capture kit from status: key or custom file name mip_capturekit = CAPTUREKIT_MAP.get(link.sample.capture_kit) - sample_data["capture_kit"] = ( - mip_capturekit or link.sample.capture_kit - ) + sample_data["capture_kit"] = mip_capturekit or link.sample.capture_kit else: if link.sample.downsampled_to: - self.LOG.debug( - "%s: downsampled sample, skipping", link.sample.name - ) + self.LOG.debug("%s: downsampled sample, skipping", link.sample.name) else: try: capture_kit = self.lims.capture_kit(link.sample.internal_id) if capture_kit is None or capture_kit == "NA": self.LOG.warning( - "%s: capture kit not found", - link.sample.internal_id, + "%s: capture kit not found", link.sample.internal_id ) else: sample_data["capture_kit"] = CAPTUREKIT_MAP[capture_kit] except HTTPError: - self.LOG.warning( - "%s: not found (LIMS)", link.sample.internal_id - ) + self.LOG.warning("%s: not found (LIMS)", link.sample.internal_id) if link.mother: sample_data["mother"] = link.mother.internal_id if link.father: @@ -276,9 +268,7 @@ def link_sample(self, fastq_handler: BaseFastqHandler, sample: str, case: str): def panel(self, family_obj: models.Family) -> List[str]: """Create the aggregated panel file.""" - all_panels = self.convert_panels( - family_obj.customer.internal_id, family_obj.panels - ) + all_panels = self.convert_panels(family_obj.customer.internal_id, family_obj.panels) bed_lines = self.scout.export_panels(all_panels) return bed_lines @@ -322,24 +312,20 @@ def _get_latest_raw_file(self, family_id: str, tag: str) -> Any: def _open_bundle_file(self, relative_file_path: str) -> Any: """Open a bundle file and return it as an Python object.""" - full_file_path = self.pather( - self.deliver.get_post_analysis_files_root_dir() - ).joinpath(relative_file_path) + full_file_path = self.pather(self.deliver.get_post_analysis_files_root_dir()).joinpath( + relative_file_path + ) open_file = self.yaml_loader(self.pather(full_file_path).open()) return open_file def get_latest_metadata(self, family_id: str) -> dict: """Get the latest trending data for a family.""" - mip_config_raw = self._get_latest_raw_file( - family_id=family_id, tag="mip-config" - ) + mip_config_raw = self._get_latest_raw_file(family_id=family_id, tag="mip-config") qcmetrics_raw = self._get_latest_raw_file(family_id=family_id, tag="qcmetrics") - sampleinfo_raw = self._get_latest_raw_file( - family_id=family_id, tag="sampleinfo" - ) + sampleinfo_raw = self._get_latest_raw_file(family_id=family_id, tag="sampleinfo") trending = dict() diff --git a/cg/meta/workflow/mip_rna.py b/cg/meta/workflow/mip_rna.py index b296416070..0d9189cd51 100644 --- a/cg/meta/workflow/mip_rna.py +++ b/cg/meta/workflow/mip_rna.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import gzip import logging import re @@ -66,8 +65,7 @@ def run(self, family_obj: models.Family, **kwargs): external = link_obj.sample.application_version.application.is_external if downsampled or external: self.LOG.info( - "%s: downsampled/external - skip evaluation", - link_obj.sample.internal_id, + "%s: downsampled/external - skip evaluation", link_obj.sample.internal_id ) kwargs["skip_evaluation"] = True break @@ -226,24 +224,20 @@ def _get_latest_raw_file(self, family_id: str, tag: str) -> Any: def _open_bundle_file(self, relative_file_path: str) -> Any: """Open a bundle file and return it as an Python object.""" - full_file_path = self.pather( - self.deliver.get_post_analysis_files_root_dir() - ).joinpath(relative_file_path) + full_file_path = self.pather(self.deliver.get_post_analysis_files_root_dir()).joinpath( + relative_file_path + ) open_file = self.yaml_loader(self.pather(full_file_path).open()) return open_file def get_latest_metadata(self, family_id: str) -> dict: """Get the latest trending data for a family.""" - mip_config_raw = self._get_latest_raw_file( - family_id=family_id, tag="mip-config" - ) + mip_config_raw = self._get_latest_raw_file(family_id=family_id, tag="mip-config") qcmetrics_raw = self._get_latest_raw_file(family_id=family_id, tag="qcmetrics") - sampleinfo_raw = self._get_latest_raw_file( - family_id=family_id, tag="sampleinfo" - ) + sampleinfo_raw = self._get_latest_raw_file(family_id=family_id, tag="sampleinfo") trending = dict() diff --git a/cg/server/__init__.py b/cg/server/__init__.py index 40a96afc6f..e69de29bb2 100644 --- a/cg/server/__init__.py +++ b/cg/server/__init__.py @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/cg/server/app.py b/cg/server/app.py index b835e8145e..36eec0490f 100644 --- a/cg/server/app.py +++ b/cg/server/app.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import coloredlogs from flask import Flask, redirect, url_for, session from flask_admin.base import AdminIndexView @@ -12,7 +11,7 @@ def create_app(): """Generate a flask application.""" - app = Flask(__name__, template_folder='templates') + app = Flask(__name__, template_folder="templates") _load_config(app) _configure_extensions(app) _register_blueprints(app) @@ -21,61 +20,61 @@ def create_app(): def _load_config(app): - app.config.from_object(__name__.replace('app', 'config')) + app.config.from_object(__name__.replace("app", "config")) def _configure_extensions(app: Flask): _initialize_logging(app) - certs_resp = requests.get('https://www.googleapis.com/oauth2/v1/certs') - app.config['GOOGLE_OAUTH_CERTS'] = certs_resp.json() + certs_resp = requests.get("https://www.googleapis.com/oauth2/v1/certs") + app.config["GOOGLE_OAUTH_CERTS"] = certs_resp.json() ext.cors.init_app(app) ext.db.init_app(app) ext.lims.init_app(app) - if app.config['OSTICKET_API_KEY']: + if app.config["OSTICKET_API_KEY"]: ext.osticket.init_app(app) - ext.admin.init_app(app, index_view=AdminIndexView(endpoint='admin')) + ext.admin.init_app(app, index_view=AdminIndexView(endpoint="admin")) def _initialize_logging(app): - coloredlogs.install(level='DEBUG' if app.debug else 'INFO') + coloredlogs.install(level="DEBUG" if app.debug else "INFO") def _register_blueprints(app: Flask): - if not app.config['CG_ENABLE_ADMIN']: + if not app.config["CG_ENABLE_ADMIN"]: return oauth_bp = make_google_blueprint( - client_id=app.config['GOOGLE_OAUTH_CLIENT_ID'], - client_secret=app.config['GOOGLE_OAUTH_CLIENT_SECRET'], - scope=['openid', 'https://www.googleapis.com/auth/userinfo.email'], + client_id=app.config["GOOGLE_OAUTH_CLIENT_ID"], + client_secret=app.config["GOOGLE_OAUTH_CLIENT_SECRET"], + scope=["openid", "https://www.googleapis.com/auth/userinfo.email"], ) @oauth_authorized.connect_via(oauth_bp) def logged_in(blueprint, token): """Called when the user logs in via Google OAuth.""" - resp = google.get('/oauth2/v1/userinfo?alt=json') + resp = google.get("/oauth2/v1/userinfo?alt=json") assert resp.ok, resp.text user_data = resp.json() - session['user_email'] = user_data['email'] + session["user_email"] = user_data["email"] app.register_blueprint(api.BLUEPRINT) _register_admin_views() - app.register_blueprint(invoices.BLUEPRINT, url_prefix='/invoices') + app.register_blueprint(invoices.BLUEPRINT, url_prefix="/invoices") - app.register_blueprint(oauth_bp, url_prefix='/login') + app.register_blueprint(oauth_bp, url_prefix="/login") - @app.route('/') + @app.route("/") def index(): - return redirect(url_for('admin.index')) + return redirect(url_for("admin.index")) - @app.route('/logout') + @app.route("/logout") def logout(): """Log out the user.""" - session['user_email'] = None - return redirect(url_for('index')) + session["user_email"] = None + return redirect(url_for("index")) def _register_admin_views(): diff --git a/cg/server/auto.py b/cg/server/auto.py index a10116e93e..793f52dc26 100644 --- a/cg/server/auto.py +++ b/cg/server/auto.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from .app import create_app app = create_app() diff --git a/cg/server/config.py b/cg/server/config.py index 714cf749f0..cd2c54034c 100644 --- a/cg/server/config.py +++ b/cg/server/config.py @@ -11,9 +11,7 @@ SQLALCHEMY_TRACK_MODIFICATIONS = "FLASK_DEBUG" in os.environ # server -CG_ENABLE_ADMIN = ("FLASK_DEBUG" in os.environ) or ( - os.environ.get("CG_ENABLE_ADMIN") == "1" -) +CG_ENABLE_ADMIN = ("FLASK_DEBUG" in os.environ) or (os.environ.get("CG_ENABLE_ADMIN") == "1") # lims LIMS_HOST = os.environ["LIMS_HOST"] diff --git a/cg/server/ext.py b/cg/server/ext.py index 46f6ba6bed..cef7c97e2c 100644 --- a/cg/server/ext.py +++ b/cg/server/ext.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from flask_admin import Admin from flask_alchy import Alchy from flask_cors import CORS @@ -13,24 +12,23 @@ class CgAlchy(Alchy, api.CoreHandler): class FlaskLims(LimsAPI): - def __init__(self, app=None): if app: self.init_app(app) def init_app(self, app): config = { - 'lims': { - 'host': app.config['LIMS_HOST'], - 'username': app.config['LIMS_USERNAME'], - 'password': app.config['LIMS_PASSWORD'], + "lims": { + "host": app.config["LIMS_HOST"], + "username": app.config["LIMS_USERNAME"], + "password": app.config["LIMS_PASSWORD"], } } super(FlaskLims, self).__init__(config) -cors = CORS(resources={r"/api/*": {'origins': '*'}}, supports_credentials=True) +cors = CORS(resources={r"/api/*": {"origins": "*"}}, supports_credentials=True) db = CgAlchy(Model=models.Model) -admin = Admin(name='Clinical Genomics') +admin = Admin(name="Clinical Genomics") lims = FlaskLims() osticket = OsTicket() diff --git a/cg/store/__init__.py b/cg/store/__init__.py index 47234daf44..714994e4c7 100644 --- a/cg/store/__init__.py +++ b/cg/store/__init__.py @@ -1,2 +1 @@ -# -*- coding: utf-8 -*- from .api import Store diff --git a/cg/store/api/__init__.py b/cg/store/api/__init__.py index 3b6379c71d..57949d724d 100644 --- a/cg/store/api/__init__.py +++ b/cg/store/api/__init__.py @@ -1,2 +1 @@ -# -*- coding: utf-8 -*- from .core import CoreHandler, Store diff --git a/cg/store/api/add.py b/cg/store/api/add.py index f02aff1a4d..43af4b280d 100644 --- a/cg/store/api/add.py +++ b/cg/store/api/add.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging import datetime as dt from typing import List @@ -38,14 +37,10 @@ def add_customer( ) return new_customer - def add_customer_group( - self, internal_id: str, name: str, **kwargs - ) -> models.CustomerGroup: + def add_customer_group(self, internal_id: str, name: str, **kwargs) -> models.CustomerGroup: """Build a new customer group record.""" - new_customer_group = self.CustomerGroup( - internal_id=internal_id, name=name, **kwargs - ) + new_customer_group = self.CustomerGroup(internal_id=internal_id, name=name, **kwargs) return new_customer_group def add_user( @@ -88,9 +83,7 @@ def add_version( ) -> models.ApplicationVersion: """Build a new application version record.""" - new_record = self.ApplicationVersion( - version=version, valid_from=valid_from, **kwargs - ) + new_record = self.ApplicationVersion(version=version, valid_from=valid_from, **kwargs) for price_key in ["standard", "priority", "express", "research"]: setattr(new_record, f"price_{price_key}", prices[price_key]) new_record.application = application @@ -147,9 +140,7 @@ def add_sample( ) return new_sample - def add_family( - self, name: str, panels: List[str], priority: str = "standard" - ) -> models.Family: + def add_family(self, name: str, panels: List[str], priority: str = "standard") -> models.Family: """Build a new Family record.""" # generate a unique family id @@ -161,9 +152,7 @@ def add_family( LOG.debug(f"{internal_id} already used - trying another id") priority_db = PRIORITY_MAP[priority] - new_family = self.Family( - internal_id=internal_id, name=name, priority=priority_db - ) + new_family = self.Family(internal_id=internal_id, name=name, priority=priority_db) new_family.panels = panels return new_family @@ -190,10 +179,7 @@ def add_flowcell( """Build a new Flowcell record.""" new_record = self.Flowcell( - name=name, - sequencer_name=sequencer, - sequencer_type=sequencer_type, - sequenced_at=date, + name=name, sequencer_name=sequencer, sequencer_type=sequencer_type, sequenced_at=date ) return new_record @@ -232,11 +218,7 @@ def add_panel( """Build a new panel record.""" new_record = self.Panel( - name=name, - abbrev=abbrev, - current_version=version, - date=date, - gene_count=genes, + name=name, abbrev=abbrev, current_version=version, date=date, gene_count=genes ) new_record.customer = customer return new_record diff --git a/cg/store/api/core.py b/cg/store/api/core.py index fd95438959..59b78370c3 100644 --- a/cg/store/api/core.py +++ b/cg/store/api/core.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging import alchy @@ -14,14 +13,20 @@ LOG = logging.getLogger(__name__) -class CoreHandler(AddHandler, FindBasicDataHandler, FindBusinessDataHandler, ResetHandler, - StatusHandler, TrendsHandler): +class CoreHandler( + AddHandler, + FindBasicDataHandler, + FindBusinessDataHandler, + ResetHandler, + StatusHandler, + TrendsHandler, +): """Aggregating class for the store api handlers""" + pass class Store(alchy.Manager, CoreHandler): - def __init__(self, uri): self.uri = uri super(Store, self).__init__(config=dict(SQLALCHEMY_DATABASE_URI=uri), Model=models.Model) diff --git a/cg/store/api/status.py b/cg/store/api/status.py index b0d29e7b00..a66c3bbca7 100644 --- a/cg/store/api/status.py +++ b/cg/store/api/status.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from datetime import datetime, timedelta from typing import List from sqlalchemy import or_, and_ @@ -14,10 +13,8 @@ class StatusHandler(BaseHandler): def samples_to_recieve(self, external=False): """Fetch incoming samples.""" records = ( - self.Sample.query - .join( - models.Sample.application_version, - models.ApplicationVersion.application, + self.Sample.query.join( + models.Sample.application_version, models.ApplicationVersion.application ) .filter( models.Sample.received_at == None, @@ -31,10 +28,8 @@ def samples_to_recieve(self, external=False): def samples_to_prepare(self): """Fetch samples in lab prep queue.""" records = ( - self.Sample.query - .join( - models.Sample.application_version, - models.ApplicationVersion.application, + self.Sample.query.join( + models.Sample.application_version, models.ApplicationVersion.application ) .filter( models.Sample.received_at != None, @@ -50,10 +45,8 @@ def samples_to_prepare(self): def samples_to_sequence(self): """Fetch samples in sequencing.""" records = ( - self.Sample.query - .join( - models.Sample.application_version, - models.ApplicationVersion.application, + self.Sample.query.join( + models.Sample.application_version, models.ApplicationVersion.application ) .filter( models.Sample.prepared_at != None, @@ -69,35 +62,26 @@ def cases_to_mip_analyze(self, limit: int = 50): """Fetch families without analyses where all samples are sequenced.""" families_q = ( - self.Family.query - .outerjoin(models.Analysis) + self.Family.query.outerjoin(models.Analysis) .join(models.Family.links, models.FamilySample.sample) + .filter(or_(models.Sample.is_external, models.Sample.sequenced_at.isnot(None))) .filter( or_( - models.Sample.is_external, - models.Sample.sequenced_at.isnot(None), + models.Sample.data_analysis.is_(None), models.Sample.data_analysis != "Balsamic" ) ) .filter( or_( - models.Sample.data_analysis.is_(None), - models.Sample.data_analysis != 'Balsamic' - ) - ) - .filter( - or_( - models.Family.action == 'analyze', - and_( - models.Family.action.is_(None), - models.Analysis.created_at.is_(None), - ), + models.Family.action == "analyze", + and_(models.Family.action.is_(None), models.Analysis.created_at.is_(None)), ) ) .order_by(models.Family.priority.desc(), models.Family.ordered_at) ) - families = [record for record in families_q if self._all_samples_have_sequence_data( - record.links)] + families = [ + record for record in families_q if self._all_samples_have_sequence_data(record.links) + ] return families[:limit] @@ -106,68 +90,59 @@ def cases_to_balsamic_analyze(self, limit: int = 50): # there are two cases when a sample should be analysed: families_q = ( - self.Family.query - .outerjoin(models.Analysis) + self.Family.query.outerjoin(models.Analysis) .join(models.Family.links, models.FamilySample.sample) # the samples must external or be sequenced to be analysed - .filter( - or_( - models.Sample.is_external, - models.Sample.sequenced_at.isnot(None), - ) - ) + .filter(or_(models.Sample.is_external, models.Sample.sequenced_at.isnot(None))) # The data_analysis is includes Balsamic - .filter( - models.Sample.data_analysis.like('%Balsamic%') - ) + .filter(models.Sample.data_analysis.like("%Balsamic%")) # 1. family that has been analysed but now is requested for re-analysing # 2. new family that hasn't been analysed yet .filter( or_( - models.Family.action == 'analyze', - and_( - models.Family.action.is_(None), - models.Analysis.created_at.is_(None), - ), + models.Family.action == "analyze", + and_(models.Family.action.is_(None), models.Analysis.created_at.is_(None)), ) ) .order_by(models.Family.priority.desc(), models.Family.ordered_at) ) - families = [record for record in families_q if self._all_samples_have_sequence_data( - record.links)] + families = [ + record for record in families_q if self._all_samples_have_sequence_data(record.links) + ] return families[:limit] - def cases(self, - progress_tracker=None, - internal_id=None, - name=None, - days=0, - case_action=None, - progress_status=None, - priority=None, - customer_id=None, - exclude_customer_id=None, - data_analysis=None, - sample_id=None, - only_received=False, - only_prepared=False, - only_sequenced=False, - only_analysed=False, - only_uploaded=False, - only_delivered=False, - only_delivery_reported=False, - only_invoiced=False, - exclude_received=False, - exclude_prepared=False, - exclude_sequenced=False, - exclude_analysed=False, - exclude_uploaded=False, - exclude_delivered=False, - exclude_delivery_reported=False, - exclude_invoiced=False, - ): + def cases( + self, + progress_tracker=None, + internal_id=None, + name=None, + days=0, + case_action=None, + progress_status=None, + priority=None, + customer_id=None, + exclude_customer_id=None, + data_analysis=None, + sample_id=None, + only_received=False, + only_prepared=False, + only_sequenced=False, + only_analysed=False, + only_uploaded=False, + only_delivered=False, + only_delivery_reported=False, + only_invoiced=False, + exclude_received=False, + exclude_prepared=False, + exclude_sequenced=False, + exclude_analysed=False, + exclude_uploaded=False, + exclude_delivered=False, + exclude_delivery_reported=False, + exclude_invoiced=False, + ): """Fetch cases with and w/o analyses""" families_q = self.Family.query @@ -184,10 +159,10 @@ def cases(self, families_q = families_q.filter(models.Family.priority == priority_db) if internal_id: - families_q = families_q.filter(models.Family.internal_id.like('%' + internal_id + '%')) + families_q = families_q.filter(models.Family.internal_id.like("%" + internal_id + "%")) if name: - families_q = families_q.filter(models.Family.name.like('%' + name + '%')) + families_q = families_q.filter(models.Family.name.like("%" + name + "%")) # customer filters if customer_id or exclude_customer_id: @@ -203,9 +178,9 @@ def cases(self, if data_analysis or sample_id: families_q = families_q.join(models.Family.links, models.FamilySample.sample) if data_analysis: - families_q = families_q.filter(models.Sample.data_analysis.like('%' + - data_analysis + - '%')) + families_q = families_q.filter( + models.Sample.data_analysis.like("%" + data_analysis + "%") + ) if sample_id: families_q = families_q.filter(models.Sample.internal_id.like(sample_id)) @@ -213,8 +188,9 @@ def cases(self, families_q = families_q.outerjoin(models.Family.links, models.FamilySample.sample) # other joins - families_q = families_q.outerjoin(models.Family.analyses, models.Sample.invoice, - models.Sample.flowcells) + families_q = families_q.outerjoin( + models.Family.analyses, models.Sample.invoice, models.Sample.flowcells + ) cases = [] @@ -260,76 +236,139 @@ def cases(self, case_action = record.action total_samples = len(record.links) - total_external_samples = len([link.sample.application_version.application.is_external - for link in record.links if - link.sample.application_version.application.is_external]) + total_external_samples = len( + [ + link.sample.application_version.application.is_external + for link in record.links + if link.sample.application_version.application.is_external + ] + ) total_internal_samples = total_samples - total_external_samples case_external_bool = total_external_samples == total_samples if total_samples > 0: - samples_received = len([link.sample.received_at for link in record.links if - link.sample.received_at is not None]) - samples_prepared = len([link.sample.prepared_at for link in record.links if - link.sample.prepared_at is not None]) - samples_sequenced = len([link.sample.sequenced_at for link in record.links if - link.sample.sequenced_at is not None]) - samples_delivered = len([link.sample.delivered_at for link in record.links if - link.sample.delivered_at is not None]) - samples_invoiced = len([link.sample.invoice.invoiced_at for link in record.links if - link.sample.invoice and link.sample.invoice.invoiced_at]) + samples_received = len( + [ + link.sample.received_at + for link in record.links + if link.sample.received_at is not None + ] + ) + samples_prepared = len( + [ + link.sample.prepared_at + for link in record.links + if link.sample.prepared_at is not None + ] + ) + samples_sequenced = len( + [ + link.sample.sequenced_at + for link in record.links + if link.sample.sequenced_at is not None + ] + ) + samples_delivered = len( + [ + link.sample.delivered_at + for link in record.links + if link.sample.delivered_at is not None + ] + ) + samples_invoiced = len( + [ + link.sample.invoice.invoiced_at + for link in record.links + if link.sample.invoice and link.sample.invoice.invoiced_at + ] + ) samples_to_receive = total_internal_samples samples_to_prepare = total_internal_samples samples_to_sequence = total_internal_samples samples_to_deliver = total_internal_samples - samples_to_invoice = total_samples - len([link.sample.no_invoice for link in - record.links if link.sample.no_invoice]) + samples_to_invoice = total_samples - len( + [link.sample.no_invoice for link in record.links if link.sample.no_invoice] + ) samples_received_bool = samples_received == samples_to_receive samples_prepared_bool = samples_prepared == samples_to_prepare samples_sequenced_bool = samples_sequenced == samples_to_sequence samples_delivered_bool = samples_delivered == samples_to_deliver samples_invoiced_bool = samples_invoiced == samples_to_invoice - samples_data_analyses = list(set(link.sample.data_analysis for link in - record.links)) + samples_data_analyses = list( + set(link.sample.data_analysis for link in record.links) + ) if samples_to_receive > 0 and samples_received_bool: - samples_received_at = max([link.sample.received_at for link in record.links if - link.sample.received_at is not None]) + samples_received_at = max( + [ + link.sample.received_at + for link in record.links + if link.sample.received_at is not None + ] + ) if samples_to_prepare > 0 and samples_prepared_bool: - samples_prepared_at = max([link.sample.prepared_at for link in record.links if - link.sample.prepared_at is not None]) + samples_prepared_at = max( + [ + link.sample.prepared_at + for link in record.links + if link.sample.prepared_at is not None + ] + ) if samples_to_sequence > 0 and samples_sequenced_bool: - samples_sequenced_at = max([link.sample.sequenced_at for link in record.links if - link.sample.sequenced_at is not None]) + samples_sequenced_at = max( + [ + link.sample.sequenced_at + for link in record.links + if link.sample.sequenced_at is not None + ] + ) if samples_to_deliver > 0 and samples_delivered_bool: - samples_delivered_at = max([link.sample.delivered_at for link in record.links if - link.sample.delivered_at is not None]) + samples_delivered_at = max( + [ + link.sample.delivered_at + for link in record.links + if link.sample.delivered_at is not None + ] + ) if samples_to_invoice > 0 and samples_invoiced_bool: - samples_invoiced_at = max([link.sample.invoice.invoiced_at for link in - record.links if link.sample.invoice and - link.sample.invoice.invoiced_at]) + samples_invoiced_at = max( + [ + link.sample.invoice.invoiced_at + for link in record.links + if link.sample.invoice and link.sample.invoice.invoiced_at + ] + ) - flowcells = len([flowcell.status - for link in record.links - for flowcell in link.sample.flowcells]) + flowcells = len( + [flowcell.status for link in record.links for flowcell in link.sample.flowcells] + ) - flowcells_status = list(set(flowcell.status - for link in record.links - for flowcell in link.sample.flowcells)) + flowcells_status = list( + set( + flowcell.status + for link in record.links + for flowcell in link.sample.flowcells + ) + ) if flowcells < total_samples: - flowcells_status.append('new') + flowcells_status.append("new") - flowcells_status = ', '.join(flowcells_status) + flowcells_status = ", ".join(flowcells_status) - flowcells_on_disk = len([flowcell.status - for link in record.links - for flowcell in link.sample.flowcells - if flowcell.status == 'ondisk']) + flowcells_on_disk = len( + [ + flowcell.status + for link in record.links + for flowcell in link.sample.flowcells + if flowcell.status == "ondisk" + ] + ) flowcells_on_disk_bool = flowcells_on_disk == total_samples @@ -396,7 +435,8 @@ def cases(self, if progress_tracker: for analysis_obj in progress_tracker.get_latest_logged_analysis( - case_id=record.internal_id): + case_id=record.internal_id + ): if not analysis_status: analysis_completion = round(analysis_obj.progress * 100) @@ -406,8 +446,9 @@ def cases(self, if progress_status and progress_status != analysis_status: continue - is_rerun = self._is_rerun(record, samples_received_at, samples_prepared_at, - samples_sequenced_at) + is_rerun = self._is_rerun( + record, samples_received_at, samples_prepared_at, samples_sequenced_at + ) tat = self._calculate_estimated_turnaround_time( is_rerun, @@ -418,71 +459,73 @@ def cases(self, samples_sequenced_at, analysis_completed_at, analysis_uploaded_at, - samples_delivered_at + samples_delivered_at, ) max_tat = self._get_max_tat(links=record.links) case = { - 'internal_id': record.internal_id, - 'name': record.name, - 'ordered_at': record.ordered_at, - 'total_samples': total_samples, - 'total_external_samples': total_external_samples, - 'total_internal_samples': total_internal_samples, - 'case_external_bool': case_external_bool, - 'samples_to_receive': samples_to_receive, - 'samples_to_prepare': samples_to_prepare, - 'samples_to_sequence': samples_to_sequence, - 'samples_to_deliver': samples_to_deliver, - 'samples_to_invoice': samples_to_invoice, - 'samples_data_analyses': samples_data_analyses, - 'samples_received': samples_received, - 'samples_prepared': samples_prepared, - 'samples_sequenced': samples_sequenced, - 'samples_received_at': samples_received_at, - 'samples_prepared_at': samples_prepared_at, - 'samples_sequenced_at': samples_sequenced_at, - 'samples_delivered_at': samples_delivered_at, - 'samples_invoiced_at': samples_invoiced_at, - 'case_action': case_action, - 'analysis_status': analysis_status, - 'analysis_completion': analysis_completion, - 'analysis_completed_at': analysis_completed_at, - 'analysis_uploaded_at': analysis_uploaded_at, - 'samples_delivered': samples_delivered, - 'analysis_delivery_reported_at': analysis_delivery_reported_at, - 'samples_invoiced': samples_invoiced, - 'analysis_pipeline': analysis_pipeline, - 'samples_received_bool': samples_received_bool, - 'samples_prepared_bool': samples_prepared_bool, - 'samples_sequenced_bool': samples_sequenced_bool, - 'analysis_completed_bool': analysis_completed_bool, - 'analysis_uploaded_bool': analysis_uploaded_bool, - 'samples_delivered_bool': samples_delivered_bool, - 'analysis_delivery_reported_bool': analysis_delivery_reported_bool, - 'samples_invoiced_bool': samples_invoiced_bool, - 'flowcells_status': flowcells_status, - 'flowcells_on_disk': flowcells_on_disk, - 'flowcells_on_disk_bool': flowcells_on_disk_bool, - 'tat': tat, - 'is_rerun': is_rerun, - 'max_tat': max_tat + "internal_id": record.internal_id, + "name": record.name, + "ordered_at": record.ordered_at, + "total_samples": total_samples, + "total_external_samples": total_external_samples, + "total_internal_samples": total_internal_samples, + "case_external_bool": case_external_bool, + "samples_to_receive": samples_to_receive, + "samples_to_prepare": samples_to_prepare, + "samples_to_sequence": samples_to_sequence, + "samples_to_deliver": samples_to_deliver, + "samples_to_invoice": samples_to_invoice, + "samples_data_analyses": samples_data_analyses, + "samples_received": samples_received, + "samples_prepared": samples_prepared, + "samples_sequenced": samples_sequenced, + "samples_received_at": samples_received_at, + "samples_prepared_at": samples_prepared_at, + "samples_sequenced_at": samples_sequenced_at, + "samples_delivered_at": samples_delivered_at, + "samples_invoiced_at": samples_invoiced_at, + "case_action": case_action, + "analysis_status": analysis_status, + "analysis_completion": analysis_completion, + "analysis_completed_at": analysis_completed_at, + "analysis_uploaded_at": analysis_uploaded_at, + "samples_delivered": samples_delivered, + "analysis_delivery_reported_at": analysis_delivery_reported_at, + "samples_invoiced": samples_invoiced, + "analysis_pipeline": analysis_pipeline, + "samples_received_bool": samples_received_bool, + "samples_prepared_bool": samples_prepared_bool, + "samples_sequenced_bool": samples_sequenced_bool, + "analysis_completed_bool": analysis_completed_bool, + "analysis_uploaded_bool": analysis_uploaded_bool, + "samples_delivered_bool": samples_delivered_bool, + "analysis_delivery_reported_bool": analysis_delivery_reported_bool, + "samples_invoiced_bool": samples_invoiced_bool, + "flowcells_status": flowcells_status, + "flowcells_on_disk": flowcells_on_disk, + "flowcells_on_disk_bool": flowcells_on_disk_bool, + "tat": tat, + "is_rerun": is_rerun, + "max_tat": max_tat, } cases.append(case) - cases_sorted = sorted(cases, key=lambda k: k['tat'], reverse=True) + cases_sorted = sorted(cases, key=lambda k: k["tat"], reverse=True) return cases_sorted @staticmethod def _is_rerun(record, samples_received_at, samples_prepared_at, samples_sequenced_at): - return (len(record.analyses) > 0) or \ - (samples_received_at and samples_received_at < record.ordered_at) or \ - (samples_prepared_at and samples_prepared_at < record.ordered_at) or \ - (samples_sequenced_at and samples_sequenced_at < record.ordered_at) + return ( + (len(record.analyses) > 0) + or (samples_received_at and samples_received_at < record.ordered_at) + or (samples_prepared_at and samples_prepared_at < record.ordered_at) + or (samples_sequenced_at and samples_sequenced_at < record.ordered_at) + ) @staticmethod def _all_samples_have_sequence_data(links: List[models.FamilySample]) -> bool: @@ -491,43 +534,34 @@ def _all_samples_have_sequence_data(links: List[models.FamilySample]) -> bool: def analyses_to_upload(self): """Fetch analyses that haven't been uploaded.""" - records = self.Analysis.query.filter(models.Analysis.completed_at != None, - models.Analysis.uploaded_at == None) + records = self.Analysis.query.filter( + models.Analysis.completed_at != None, models.Analysis.uploaded_at == None + ) return records def observations_to_upload(self): """Fetch observations that haven't been uploaded.""" - families_q = \ - ( - self.Family.query - .join(models.Analysis, models.Family.links, models.FamilySample.sample) - .filter(models.Sample.loqusdb_id.is_(None)) - ) + families_q = self.Family.query.join( + models.Analysis, models.Family.links, models.FamilySample.sample + ).filter(models.Sample.loqusdb_id.is_(None)) return families_q def observations_uploaded(self): """Fetch observations that have been uploaded.""" - families_q = \ - ( - self.Family.query - .join(models.Family.links, models.FamilySample.sample) - .filter(models.Sample.loqusdb_id.isnot(None)) - ) + families_q = self.Family.query.join(models.Family.links, models.FamilySample.sample).filter( + models.Sample.loqusdb_id.isnot(None) + ) return families_q def analyses_to_deliver(self): """Fetch analyses that have been uploaded but not delivered.""" records = ( - self.Analysis.query - .join(models.Family, models.Family.links, models.FamilySample.sample) - .filter( - models.Analysis.uploaded_at.isnot(None), - models.Sample.delivered_at.is_(None) - ) + self.Analysis.query.join(models.Family, models.Family.links, models.FamilySample.sample) + .filter(models.Analysis.uploaded_at.isnot(None), models.Sample.delivered_at.is_(None)) .order_by(models.Analysis.uploaded_at.desc()) ) @@ -536,8 +570,7 @@ def analyses_to_deliver(self): def analyses_to_delivery_report(self): """Fetch analyses that needs the delivery report to be regenerated.""" records = ( - self.Analysis.query - .filter(models.Analysis.uploaded_at) + self.Analysis.query.filter(models.Analysis.uploaded_at) .join(models.Family, models.Family.links, models.FamilySample.sample) .filter( models.Sample.delivered_at.isnot(None), @@ -545,9 +578,9 @@ def analyses_to_delivery_report(self): models.Analysis.delivery_report_created_at.is_(None), and_( models.Analysis.delivery_report_created_at.isnot(None), - models.Analysis.delivery_report_created_at < models.Sample.delivered_at - ) - ) + models.Analysis.delivery_report_created_at < models.Sample.delivered_at, + ), + ), ) .order_by(models.Analysis.uploaded_at.desc()) ) @@ -555,46 +588,30 @@ def analyses_to_delivery_report(self): def samples_to_deliver(self): """Fetch samples that have been sequenced but not delivered.""" - records = ( - self.Sample.query - .filter( - models.Sample.sequenced_at != None, - models.Sample.delivered_at == None, - models.Sample.downsampled_to == None - ) + records = self.Sample.query.filter( + models.Sample.sequenced_at != None, + models.Sample.delivered_at == None, + models.Sample.downsampled_to == None, ) return records def samples_not_delivered(self): """Fetch samples not delivered.""" - records = ( - self.Sample.query - .filter( - models.Sample.delivered_at == None, - models.Sample.downsampled_to == None - ) + records = self.Sample.query.filter( + models.Sample.delivered_at == None, models.Sample.downsampled_to == None ) return records def samples_not_invoiced(self): """Fetch all samples that are not invoiced.""" - records = ( - self.Sample.query - .filter( - models.Sample.downsampled_to == None, - models.Sample.invoice_id == None, - ) + records = self.Sample.query.filter( + models.Sample.downsampled_to == None, models.Sample.invoice_id == None ) return records def samples_not_downsampled(self): """Fetch all samples that are not down sampled.""" - records = ( - self.Sample.query - .filter( - models.Sample.downsampled_to == None - ) - ) + records = self.Sample.query.filter(models.Sample.downsampled_to == None) return records def microbial_samples_to_invoice(self, customer: models.Customer = None): @@ -602,17 +619,16 @@ def microbial_samples_to_invoice(self, customer: models.Customer = None): Returns microbial samples that have been delivered but not invoiced. """ - records = ( - self.MicrobialSample.query.filter( - models.MicrobialSample.delivered_at is not None, - models.MicrobialSample.invoice_id == None - ) + records = self.MicrobialSample.query.filter( + models.MicrobialSample.delivered_at is not None, + models.MicrobialSample.invoice_id == None, ) customers_to_invoice = [record.microbial_order.customer for record in records.all()] customers_to_invoice = list(set(customers_to_invoice)) if customer: records = records.join(models.MicrobialOrder).filter( - models.MicrobialOrder.customer_id == customer.id) + models.MicrobialOrder.customer_id == customer.id + ) return records, customers_to_invoice def samples_to_invoice(self, customer: models.Customer = None): @@ -621,16 +637,17 @@ def samples_to_invoice(self, customer: models.Customer = None): Returns samples have been delivered but not invoiced, excluding those that have been marked to skip invoicing. """ - records = ( - self.Sample.query.filter( - models.Sample.delivered_at != None, - models.Sample.invoice_id == None, - models.Sample.no_invoice == False, - models.Sample.downsampled_to == None - ) + records = self.Sample.query.filter( + models.Sample.delivered_at != None, + models.Sample.invoice_id == None, + models.Sample.no_invoice == False, + models.Sample.downsampled_to == None, ) - customers_to_invoice = [record.customer for record in records.all() if - not record.customer.internal_id == 'cust000'] + customers_to_invoice = [ + record.customer + for record in records.all() + if not record.customer.internal_id == "cust000" + ] customers_to_invoice = list(set(customers_to_invoice)) records = records.filter(models.Sample.customer == customer) if customer else records return records, customers_to_invoice @@ -639,48 +656,38 @@ def pools_to_invoice(self, customer: models.Customer = None): """ Fetch pools that should be invoiced. """ - records = ( - self.Pool.query.filter( - models.Pool.invoice_id == None, - models.Pool.no_invoice == False, - models.Pool.delivered_at != None - ) + records = self.Pool.query.filter( + models.Pool.invoice_id == None, + models.Pool.no_invoice == False, + models.Pool.delivered_at != None, ) - customers_to_invoice = [record.customer for record in records.all() if - not record.customer.internal_id == 'cust000'] + customers_to_invoice = [ + record.customer + for record in records.all() + if not record.customer.internal_id == "cust000" + ] customers_to_invoice = list(set(customers_to_invoice)) records = records.filter(models.Pool.customer_id == customer.id) if customer else records return records, customers_to_invoice def pools_to_receive(self): """Fetch pools that have been not yet been received.""" - records = ( - self.Pool.query - .filter( - models.Pool.received_at == None - ) - ) + records = self.Pool.query.filter(models.Pool.received_at == None) return records def pools_to_deliver(self): """Fetch pools that have been not yet been delivered.""" - records = ( - self.Pool.query - .filter( - models.Pool.received_at != None, - models.Pool.delivered_at == None - ) + records = self.Pool.query.filter( + models.Pool.received_at != None, models.Pool.delivered_at == None ) return records def microbial_samples_to_receive(self, external=False): """Fetch microbial samples from statusdb that have no received_at date.""" records = ( - self.MicrobialSample.query - .join( - models.MicrobialSample.application_version, - models.ApplicationVersion.application, + self.MicrobialSample.query.join( + models.MicrobialSample.application_version, models.ApplicationVersion.application ) .filter( models.MicrobialSample.received_at == None, @@ -693,10 +700,8 @@ def microbial_samples_to_receive(self, external=False): def microbial_samples_to_prepare(self, external=False): """Fetch microbial samples from statusdb that have no prepared_at date.""" records = ( - self.MicrobialSample.query - .join( - models.MicrobialSample.application_version, - models.ApplicationVersion.application, + self.MicrobialSample.query.join( + models.MicrobialSample.application_version, models.ApplicationVersion.application ) .filter( models.MicrobialSample.prepared_at == None, @@ -709,10 +714,8 @@ def microbial_samples_to_prepare(self, external=False): def microbial_samples_to_sequence(self, external=False): """Fetch microbial samples from statusdb that have no sequenced_at date.""" records = ( - self.MicrobialSample.query - .join( - models.MicrobialSample.application_version, - models.ApplicationVersion.application, + self.MicrobialSample.query.join( + models.MicrobialSample.application_version, models.ApplicationVersion.application ) .filter( models.MicrobialSample.sequenced_at == None, @@ -725,10 +728,8 @@ def microbial_samples_to_sequence(self, external=False): def microbial_samples_to_deliver(self, external=False): """Fetch microbial samples from statusdb that have no delivered_at date.""" records = ( - self.MicrobialSample.query - .join( - models.MicrobialSample.application_version, - models.ApplicationVersion.application, + self.MicrobialSample.query.join( + models.MicrobialSample.application_version, models.ApplicationVersion.application ) .filter( models.MicrobialSample.delivered_at == None, @@ -738,17 +739,18 @@ def microbial_samples_to_deliver(self, external=False): ) return records - def _calculate_estimated_turnaround_time(self, - is_rerun, - external_case_bool, - analysis_ordered_at, - samples_received_at, - samples_prepared_at, - samples_sequenced_at, - analysis_completed_at, - analysis_uploaded_at, - samples_delivered_at - ): + def _calculate_estimated_turnaround_time( + self, + is_rerun, + external_case_bool, + analysis_ordered_at, + samples_received_at, + samples_prepared_at, + samples_sequenced_at, + analysis_completed_at, + analysis_uploaded_at, + samples_delivered_at, + ): """Calculated estimated turnaround-time""" if samples_received_at and samples_delivered_at: return self._calculate_date_delta(None, samples_received_at, samples_delivered_at) diff --git a/cg/store/utils.py b/cg/store/utils.py index 8882c89b95..fbaebeff72 100644 --- a/cg/store/utils.py +++ b/cg/store/utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import logging from typing import Callable diff --git a/tests/apps/balsamic/test_fastqfileconcatenator.py b/tests/apps/balsamic/test_fastqfileconcatenator.py index 88042b7bd9..f89f2202d6 100644 --- a/tests/apps/balsamic/test_fastqfileconcatenator.py +++ b/tests/apps/balsamic/test_fastqfileconcatenator.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test FastqFileConcatenator""" from cg.apps.balsamic.fastq import FastqFileConcatenator diff --git a/tests/apps/balsamic/test_fastqfilenamecreator.py b/tests/apps/balsamic/test_fastqfilenamecreator.py index 9ce0069766..8afafdf660 100644 --- a/tests/apps/balsamic/test_fastqfilenamecreator.py +++ b/tests/apps/balsamic/test_fastqfilenamecreator.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test FastqHandlerBalsamic""" import datetime as dt import re @@ -17,11 +16,7 @@ def test_create(valid_fastq_filename_pattern) -> dict: undetermined = "u" optional_date = dt.datetime.now() optional_index = "abcdef" - more = { - "undetermined": undetermined, - "date": optional_date, - "index": optional_index, - } + more = {"undetermined": undetermined, "date": optional_date, "index": optional_index} # when calling the method to create a valid filename result_filename = FastqHandler.FastqFileNameCreator.create( diff --git a/tests/apps/balsamic/test_fastqhandler.py b/tests/apps/balsamic/test_fastqhandler.py index e5c412f45e..f299572333 100644 --- a/tests/apps/balsamic/test_fastqhandler.py +++ b/tests/apps/balsamic/test_fastqhandler.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test FastqHandler""" import os from pathlib import Path @@ -6,9 +5,7 @@ from cg.apps.balsamic.fastq import FastqHandler -def test_link_file_count( - tmpdir, cg_config, link_family, link_sample, simple_files_data -): +def test_link_file_count(tmpdir, cg_config, link_family, link_sample, simple_files_data): """Test method to test that the right number of files are created by linking""" # given some fastq-files belonging to family and sample @@ -26,25 +23,13 @@ def test_link_file_count( # then we should have a new directory with one concatenated file per read direction assert ( - len( - [ - name - for name in os.listdir(link_dir) - if os.path.isfile(os.path.join(link_dir, name)) - ] - ) + len([name for name in os.listdir(link_dir) if os.path.isfile(os.path.join(link_dir, name))]) == 2 ) def test_link_file_content( - tmpdir, - cg_config, - link_family, - link_sample, - simple_files_data, - content_r1, - content_r2, + tmpdir, cg_config, link_family, link_sample, simple_files_data, content_r1, content_r2 ): """Test method to test that balsamic files are linked properly""" @@ -57,9 +42,7 @@ def test_link_file_content( # then the first concatenated file should contain 'ABCD' and the other 'DEFG' linked_files = [ - name - for name in os.listdir(link_dir) - if os.path.isfile(os.path.join(link_dir, name)) + name for name in os.listdir(link_dir) if os.path.isfile(os.path.join(link_dir, name)) ] file_contents = [] @@ -75,13 +58,7 @@ def test_link_file_content( def test_link_file_content_reversed( - tmpdir, - cg_config, - link_family, - link_sample, - simple_files_data_reversed, - content_r1, - content_r2, + tmpdir, cg_config, link_family, link_sample, simple_files_data_reversed, content_r1, content_r2 ): """Test method to test that balsamic files are linked properly""" @@ -94,9 +71,7 @@ def test_link_file_content_reversed( # then the first concatenated file should contain 'ABCD' and the other 'DEFG' linked_files = [ - name - for name in os.listdir(link_dir) - if os.path.isfile(os.path.join(link_dir, name)) + name for name in os.listdir(link_dir) if os.path.isfile(os.path.join(link_dir, name)) ] file_contents = [] diff --git a/tests/apps/hk/conftest.py b/tests/apps/hk/conftest.py index a702c495a2..29422d7c4e 100644 --- a/tests/apps/hk/conftest.py +++ b/tests/apps/hk/conftest.py @@ -6,15 +6,13 @@ @pytest.yield_fixture(scope="function") -def store_housekeeper(tmpdir): +def housekeeper_api(tmpdir): """Setup Housekeeper store.""" root_path = tmpdir.mkdir("bundles") - _store = HousekeeperAPI( - {"housekeeper": {"database": "sqlite://", "root": str(root_path)}} - ) - _store.create_all() - yield _store - _store.drop_all() + _api = HousekeeperAPI({"housekeeper": {"database": "sqlite://", "root": str(root_path)}}) + _api.initialise_db() + yield _api + _api.destroy_db() @pytest.yield_fixture(scope="function") @@ -41,27 +39,20 @@ def bundle_data(): def ensure_bundle(store, bundle_data): _bundle = store.bundle(bundle_data["name"]) - print(3) if not _bundle: _bundle, _version = store.add_bundle(bundle_data) store.add_commit(_bundle, _version) - print(3.1) return _bundle def ensure_version(store: HousekeeperAPI, bundle_data): """utility function to return existing or create an version for tests""" - print(2) _bundle = ensure_bundle(store, bundle_data) - print(2.1) _version = store.last_version(_bundle) - print(2.2) return _version @pytest.yield_fixture(scope="function") -def version_obj(store_housekeeper, bundle_data): - print(1) - _version = ensure_version(store_housekeeper, bundle_data) - print(1.1) +def version_obj(housekeeper_api, bundle_data): + _version = ensure_version(housekeeper_api, bundle_data) return _version.ver diff --git a/tests/apps/hk/test__getattr__.py b/tests/apps/hk/test__getattr__.py new file mode 100644 index 0000000000..a3a9bdb779 --- /dev/null +++ b/tests/apps/hk/test__getattr__.py @@ -0,0 +1,18 @@ +""" Test the __getattr__ override when calling private _store """ + +import logging + + +def test_calling_method_on_private_store_give_warning(housekeeper_api, caplog): + """Test that we get a log warning for unwrapped methods""" + + # GIVEN an hk api and a method that is not wrapped + caplog.set_level(logging.WARNING) + + # WHEN we call add_file + housekeeper_api.files_before() + + # THEN the log should contain a warning that we have called something non-wrapped + with caplog.at_level(logging.WARNING): + assert "files_before" in caplog.text + assert "HousekeeperAPI" in caplog.text diff --git a/tests/apps/hk/test_add_file.py b/tests/apps/hk/test_add_file.py index 7d7bce2ede..6b5adac596 100644 --- a/tests/apps/hk/test_add_file.py +++ b/tests/apps/hk/test_add_file.py @@ -8,7 +8,7 @@ from cg.exc import TicketCreationError -def test_add_file_with_flat_tag(store_housekeeper, mocker): +def test_add_file_with_flat_tag(housekeeper_api, mocker): """Test that we can call hk with one tag""" # GIVEN an hk api with a mocked store backing it and a string tag @@ -19,24 +19,24 @@ def test_add_file_with_flat_tag(store_housekeeper, mocker): mocker.patch.object(HousekeeperAPI, "add_commit") # WHEN we call add_file - new_file = store_housekeeper.add_file(file, version_obj, tag) + new_file = housekeeper_api.add_file(file, version_obj, tag) # THEN the file should have been added to hk assert new_file -def test_add_file_with_list_of_tags(store_housekeeper, mocker): - """Test that we can call hk with one tag""" +def test_add_file_with_list_of_tags(housekeeper_api, mocker): + """Test that we can call hk with more than one tags""" # GIVEN an hk api with a mocked store backing it and a string tag version_obj = "version_obj" file = "file" - tag = ["tag1", "tag2"] + tags = ["tag1", "tag2"] mocker.patch.object(HousekeeperAPI, "new_file") mocker.patch.object(HousekeeperAPI, "add_commit") # WHEN we call add_file - new_file = store_housekeeper.add_file(file, version_obj, tag) + new_file = housekeeper_api.add_file(file, version_obj, tags) # THEN the file should have been added to hk assert new_file diff --git a/tests/apps/lims/test_apps_lims_api.py b/tests/apps/lims/test_apps_lims_api.py index 8d57f47a62..3c7fb65ad6 100644 --- a/tests/apps/lims/test_apps_lims_api.py +++ b/tests/apps/lims/test_apps_lims_api.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import datetime as dt diff --git a/tests/apps/lims/test_apps_lims_orderform.py b/tests/apps/lims/test_apps_lims_orderform.py index a16f30a66a..1a5990753e 100644 --- a/tests/apps/lims/test_apps_lims_orderform.py +++ b/tests/apps/lims/test_apps_lims_orderform.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from cg.apps.lims import orderform diff --git a/tests/apps/mip/snapshots/snap_test_apps_mip_files.py b/tests/apps/mip/snapshots/snap_test_apps_mip_files.py index 05839b1716..e2c054c6f6 100644 --- a/tests/apps/mip/snapshots/snap_test_apps_mip_files.py +++ b/tests/apps/mip/snapshots/snap_test_apps_mip_files.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # snapshottest: v1 - https://goo.gl/zC4yUc from __future__ import unicode_literals @@ -37,9 +36,7 @@ "mark_duplicates": [ "/path/to/stuff/rare-disease/cases/case_id/analysis/files/sample_id_1_lanes_1234_trim_star_sorted_merged_md_metric" ], - "salmon_quant": [ - "/path/to/stuff/rare-disease/cases/case_id/analysis/files/quant.sf" - ], + "salmon_quant": ["/path/to/stuff/rare-disease/cases/case_id/analysis/files/quant.sf"], "star_fusion": "/path/to/stuff/rare-disease/cases/case_id/analysis/files/star-fusion.fusion_predictions.tsv", "stringtie_ar": [ "/path/to/stuff/rare-disease/cases/case_id/analysis/files/sample_id_1_lanes_1234_trim_star_sorted_merged_strg.gtf" diff --git a/tests/apps/mip/test_apps_mip_files.py b/tests/apps/mip/test_apps_mip_files.py index 1fcd3d2de5..36526d96d8 100644 --- a/tests/apps/mip/test_apps_mip_files.py +++ b/tests/apps/mip/test_apps_mip_files.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test MIP RNA files""" from snapshottest import Snapshot diff --git a/tests/apps/mip/test_mip_fastqhandler.py b/tests/apps/mip/test_mip_fastqhandler.py index ec5342e98b..b0572de68d 100644 --- a/tests/apps/mip/test_mip_fastqhandler.py +++ b/tests/apps/mip/test_mip_fastqhandler.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test FastqHandler""" from datetime import datetime @@ -29,19 +28,14 @@ def ensure_application_version(disk_store, application_tag="dummy_tag"): application = disk_store.application(tag=application_tag) if not application: application = disk_store.add_application( - tag=application_tag, - category="wgs", - percent_kth=80, - description="dummy_description", + tag=application_tag, category="wgs", percent_kth=80, description="dummy_description" ) disk_store.add_commit(application) prices = {"standard": 10, "priority": 20, "express": 30, "research": 5} version = disk_store.application_version(application, 1) if not version: - version = disk_store.add_version( - application, 1, valid_from=datetime.now(), prices=prices - ) + version = disk_store.add_version(application, 1, valid_from=datetime.now(), prices=prices) disk_store.add_commit(version) return version.id diff --git a/tests/apps/tb/mip/test_get_files.py b/tests/apps/tb/mip/test_get_files.py index 0439caf1af..dc39785876 100644 --- a/tests/apps/tb/mip/test_get_files.py +++ b/tests/apps/tb/mip/test_get_files.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test get MIP files""" from pathlib import Path diff --git a/tests/apps/usalt/test_usalt_fastqfilenamecreator.py b/tests/apps/usalt/test_usalt_fastqfilenamecreator.py index bce0e2c767..e3f53cde9a 100644 --- a/tests/apps/usalt/test_usalt_fastqfilenamecreator.py +++ b/tests/apps/usalt/test_usalt_fastqfilenamecreator.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test FastqHandlerUsalt""" import re diff --git a/tests/apps/usalt/test_usalt_fastqhandler.py b/tests/apps/usalt/test_usalt_fastqhandler.py index 10195617dc..7692019de6 100644 --- a/tests/apps/usalt/test_usalt_fastqhandler.py +++ b/tests/apps/usalt/test_usalt_fastqhandler.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test FastqHandler""" import os from pathlib import Path @@ -6,9 +5,7 @@ from cg.apps.usalt.fastq import FastqHandler -def test_fastq_link_file_count( - tmpdir, cg_config, link_case, link_sample, simple_files_data -): +def test_fastq_link_file_count(tmpdir, cg_config, link_case, link_sample, simple_files_data): """Test method to test that the right number of files are created by linking""" # given some fastq-files belonging to family and sample diff --git a/tests/cli/workflow/balsamic/conftest.py b/tests/cli/workflow/balsamic/conftest.py index f1636dca2f..abee5d4cbc 100644 --- a/tests/cli/workflow/balsamic/conftest.py +++ b/tests/cli/workflow/balsamic/conftest.py @@ -1,5 +1,4 @@ """Fixtures for cli balsamic tests""" -from datetime import datetime import pytest from cg.apps.balsamic.fastq import FastqHandler @@ -8,6 +7,8 @@ from cg.meta.workflow.balsamic import AnalysisAPI from cg.store import Store, models +from tests.store_helpers import ensure_bed_version, ensure_customer, add_sample, add_family + @pytest.fixture def balsamic_context(balsamic_store) -> dict: @@ -124,19 +125,13 @@ def balsamic_store(base_store: Store, lims_api) -> Store: _store = base_store case = add_family(_store, "balsamic_case") - tumour_sample = add_sample( - _store, "tumour_sample", is_tumour=True, application_type="tgs" - ) - normal_sample = add_sample( - _store, "normal_sample", is_tumour=False, application_type="tgs" - ) + tumour_sample = add_sample(_store, "tumour_sample", is_tumour=True, application_type="tgs") + normal_sample = add_sample(_store, "normal_sample", is_tumour=False, application_type="tgs") _store.relate_sample(case, tumour_sample, status="unknown") _store.relate_sample(case, normal_sample, status="unknown") case = add_family(_store, "mip_case") - normal_sample = add_sample( - _store, "normal_sample", is_tumour=False, data_analysis="mip" - ) + normal_sample = add_sample(_store, "normal_sample", is_tumour=False, data_analysis="mip") _store.relate_sample(case, normal_sample, status="unknown") bed_name = lims_api.capture_kit(tumour_sample.internal_id) @@ -174,127 +169,10 @@ def balsamic_case(analysis_store) -> models.Family: @pytest.fixture(scope="function") def balsamic_case_wgs(analysis_store) -> models.Family: """case with balsamic data_type""" - return analysis_store.find_family( - ensure_customer(analysis_store), "balsamic_case_wgs" - ) + return analysis_store.find_family(ensure_customer(analysis_store), "balsamic_case_wgs") @pytest.fixture(scope="function") def mip_case(analysis_store) -> models.Family: """case with balsamic data_type""" return analysis_store.find_family(ensure_customer(analysis_store), "mip_case") - - -def ensure_application_version( - disk_store, application_tag="dummy_tag", application_type="tgs" -): - """utility function to return existing or create application version for tests""" - application = disk_store.application(tag=application_tag) - if not application: - application = disk_store.add_application( - tag=application_tag, - category=application_type, - percent_kth=80, - description="dummy_description", - ) - disk_store.add_commit(application) - - prices = {"standard": 10, "priority": 20, "express": 30, "research": 5} - version = disk_store.application_version(application, 1) - if not version: - version = disk_store.add_version( - application, 1, valid_from=datetime.now(), prices=prices - ) - - disk_store.add_commit(version) - return version - - -def ensure_bed_version(disk_store, bed_name="dummy_bed"): - """utility function to return existing or create bed version for tests""" - bed = disk_store.bed(name=bed_name) - if not bed: - bed = disk_store.add_bed(name=bed_name) - disk_store.add_commit(bed) - - version = disk_store.latest_bed_version(bed_name) - if not version: - version = disk_store.add_bed_version( - bed, 1, "dummy_filename", shortname=bed_name - ) - disk_store.add_commit(version) - return version - - -def ensure_customer(disk_store, customer_id="cust_test"): - """utility function to return existing or create customer for tests""" - customer_group = disk_store.customer_group("dummy_group") - if not customer_group: - customer_group = disk_store.add_customer_group("dummy_group", "dummy group") - - customer = disk_store.add_customer( - internal_id=customer_id, - name="Test Customer", - scout_access=False, - customer_group=customer_group, - invoice_address="dummy_address", - invoice_reference="dummy_reference", - ) - disk_store.add_commit(customer) - customer = disk_store.customer(customer_id) - return customer - - -def add_sample( - store, - sample_id="sample_test", - gender="female", - is_tumour=False, - data_analysis="balsamic", - application_tag="dummy_tag", - application_type="tgs", -): - """utility function to add a sample to use in tests""" - customer = ensure_customer(store) - application_version_id = ensure_application_version( - store, application_tag=application_tag, application_type=application_type - ).id - sample = store.add_sample( - name=sample_id, - sex=gender, - tumour=is_tumour, - sequenced_at=datetime.now(), - data_analysis=data_analysis, - ) - - sample.application_version_id = application_version_id - sample.customer = customer - store.add_commit(sample) - return sample - - -def ensure_panel(disk_store, panel_id="panel_test", customer_id="cust_test"): - """utility function to add a panel to use in tests""" - customer = ensure_customer(disk_store, customer_id) - panel = disk_store.panel(panel_id) - if not panel: - panel = disk_store.add_panel( - customer=customer, - name=panel_id, - abbrev=panel_id, - version=1.0, - date=datetime.now(), - genes=1, - ) - disk_store.add_commit(panel) - return panel - - -def add_family(disk_store, family_id="family_test", customer_id="cust_test"): - """utility function to add a family to use in tests""" - panel = ensure_panel(disk_store) - customer = ensure_customer(disk_store, customer_id) - family = disk_store.add_family(name=family_id, panels=panel.name) - family.customer = customer - disk_store.add_commit(family) - return family diff --git a/tests/cli/workflow/balsamic/store/conftest.py b/tests/cli/workflow/balsamic/store/conftest.py new file mode 100644 index 0000000000..c1fc36356c --- /dev/null +++ b/tests/cli/workflow/balsamic/store/conftest.py @@ -0,0 +1,157 @@ +"""Fixtures for cli balsamic tests""" +from datetime import datetime +import pytest + +from cg.apps.hk import HousekeeperAPI +from cg.store import Store, models +from cg.apps.tb import TrailblazerAPI + +from tests.store_helpers import ensure_customer, add_family, add_sample + + +@pytest.fixture +def balsamic_store_context(balsamic_store, balsamic_case) -> dict: + """context to use in cli""" + return { + "hk_api": MockHouseKeeper(balsamic_case.internal_id), + "db": balsamic_store, + "tb_api": MockTB(), + "balsamic": {"root": "root", "conda_env": "conda_env"}, + } + + +class MockTB(TrailblazerAPI): + """Mock of trailblazer """ + + def __init__(self): + """Override TrailblazerAPI __init__ to avoid default behaviour""" + + def analyses( + self, + *, + family: str = None, + query: str = None, + status: str = None, + deleted: bool = None, + temp: bool = False, + before: datetime = None, + is_visible: bool = None, + workflow=None + ): + """Override TrailblazerAPI analyses method to avoid default behaviour""" + return [] + + +class MockHouseKeeper(HousekeeperAPI): + """Mock HousekeeperAPI""" + + def __init__(self, bundle_name): + """Override HousekeeperAPI method to avoid default behaviour""" + self._store = MockHousekeeperStore() + self.bundle_name = bundle_name + self.bundle_data = None + self.root_dir = "root_dir" + + def get_files(self, bundle: str, tags: list, version: int = None): + """return a list of mocked files""" + del tags, bundle, version + return [MockFile()] + + def add_bundle(self, data: dict): + """fake adding a bundle in housekeeper""" + + if not self.bundle_data or self.bundle_data["name"] != data["name"]: + self.bundle_data = data + return MockBundle(data=data, name=self.bundle_name), MockVersion() + + return None + + +class MockHousekeeperStore: + """Mock Store of Housekeeper""" + + def __init__(self): + """Override __init__ to avoid default behaviour""" + self.root_dir = "" + + def add_commit(self, *pargs, **kwargs): + """Implements add_commit to allow it to be used in HousekeeperAPI""" + + +class MockBundle: + """Mock Bundle""" + + def __init__(self, data, name): + """Implement minimal set of properties to allow it to be used in test""" + self.name = name + self._data = data + + +class MockVersion: + """Mock Version""" + + def __init__(self): + """Implement minimal set of properties to allow it to be used in test""" + self.created_at = datetime.now() + self.included_at = None + self.relative_root_dir = "" + self.files = [] + + +class MockFile: + """Mock File""" + + def __init__(self, path=""): + """Implement minimal set of properties to allow it to be used in test""" + self.path = path + self.full_path = path + + +@pytest.fixture(scope="function") +def balsamic_store(base_store: Store) -> Store: + """real store to be used in tests""" + _store = base_store + + case = add_family(_store, "balsamic_case") + tumour_sample = add_sample(_store, "tumour_sample", is_tumour=True) + normal_sample = add_sample(_store, "normal_sample", is_tumour=False) + _store.relate_sample(case, tumour_sample, status="unknown") + _store.relate_sample(case, normal_sample, status="unknown") + + case = add_family(_store, "mip_case") + normal_sample = add_sample(_store, "normal_sample", is_tumour=False, data_analysis="mip") + _store.relate_sample(case, normal_sample, status="unknown") + + _store.commit() + + return _store + + +@pytest.fixture(scope="function") +def deliverables_file(): + """Return a balsamic deliverables file""" + return "tests/fixtures/apps/balsamic/case/metadata.yml" + + +@pytest.fixture(scope="function") +def deliverables_file_directory(): + """Return a balsamic deliverables file containing a directory""" + return "tests/fixtures/apps/balsamic/case/metadata_directory.yml" + + +@pytest.fixture(scope="function") +def deliverables_file_tags(): + """Return a balsamic deliverables file containing one file with two tags""" + return "tests/fixtures/apps/balsamic/case/metadata_file_tags.yml" + + +@pytest.fixture(scope="function") +def balsamic_case(analysis_store) -> models.Family: + """case with balsamic data_type""" + return analysis_store.find_family(ensure_customer(analysis_store), "balsamic_case") + + +@pytest.fixture(scope="function") +def mip_case(analysis_store) -> models.Family: + """case with balsamic data_type""" + return analysis_store.find_family(ensure_customer(analysis_store), "mip_case") diff --git a/tests/cli/workflow/balsamic/store/test_generate_deliverables_file.py b/tests/cli/workflow/balsamic/store/test_generate_deliverables_file.py new file mode 100644 index 0000000000..f26bd643f7 --- /dev/null +++ b/tests/cli/workflow/balsamic/store/test_generate_deliverables_file.py @@ -0,0 +1,69 @@ +"""This script tests the cli methods to create the case config for balsamic""" + +from cg.cli.workflow.balsamic.store import generate_deliverables_file + +EXIT_SUCCESS = 0 + + +def test_without_options(cli_runner, balsamic_context): + """Test command with dry option""" + + # GIVEN + + # WHEN dry running without anything specified + result = cli_runner.invoke(generate_deliverables_file, obj=balsamic_context) + + # THEN command should mention argument + assert result.exit_code != EXIT_SUCCESS + assert "Missing argument" in result.output + + +def test_dry(cli_runner, balsamic_context, balsamic_case): + """Test command with --dry option""" + + # GIVEN case-id + case_id = balsamic_case.internal_id + + # WHEN dry running with dry specified + result = cli_runner.invoke( + generate_deliverables_file, [case_id, "--dry-run"], obj=balsamic_context + ) + + # THEN command should print the balsamic command-string to generate the deliverables fils + assert result.exit_code == EXIT_SUCCESS + assert "plugins deliver" in result.output + assert case_id in result.output + + +def test_without_file(cli_runner, balsamic_context, balsamic_case): + """Test command to generate deliverables file without supplying the config""" + + # GIVEN no meta file for a balsamic analysis + + # WHEN calling generate deliverables file + result = cli_runner.invoke( + generate_deliverables_file, [balsamic_case.internal_id, "--dry-run"], obj=balsamic_context + ) + + # THEN we should get a message that the deliverables file was created + assert result.exit_code == EXIT_SUCCESS + assert "plugins deliver" in result.output + assert "--sample-config" in result.output + assert ".json" in result.output + + +def test_with_missing_case(cli_runner, balsamic_context): + """Test command with case to start with""" + + # GIVEN case-id not in database + case_id = "soberelephant" + + # WHEN running + result = cli_runner.invoke( + generate_deliverables_file, [case_id, "--dry-run"], obj=balsamic_context + ) + + # THEN the command should fail and mention the case id in the fail message + assert result.exit_code == EXIT_SUCCESS + + assert f"Case {case_id} not found" in result.output diff --git a/tests/cli/workflow/balsamic/store/test_store.py b/tests/cli/workflow/balsamic/store/test_store.py new file mode 100644 index 0000000000..0a89bef46f --- /dev/null +++ b/tests/cli/workflow/balsamic/store/test_store.py @@ -0,0 +1,126 @@ +"""Tests for cg.cli.store.balsamic""" + +from cg.cli.workflow.balsamic.store import analysis + +EXIT_SUCCESS = 0 + + +def test_without_options(cli_runner, balsamic_context): + """Test command with dry option""" + + # GIVEN + + # WHEN dry running without anything specified + result = cli_runner.invoke(analysis, obj=balsamic_context) + + # THEN command should mention argument + assert result.exit_code != EXIT_SUCCESS + assert "Missing argument" in result.output + + +def test_store_analysis_with_empty_file_parameter( + cli_runner, balsamic_store_context, balsamic_case +): + """Test store with analysis file""" + + # GIVEN + + # WHEN calling store with empty string for meta file path + result = cli_runner.invoke( + analysis, [balsamic_case.internal_id, "--deliverables-file", ""], obj=balsamic_store_context + ) + + # THEN the process should not exit ok + assert result.exit_code != EXIT_SUCCESS + assert "missing file" in result.output + + +def test_store_analysis_with_ok_file_parameter( + cli_runner, balsamic_store_context, balsamic_case, deliverables_file +): + """Test store with analysis file""" + + # GIVEN a meta file for a balsamic analysis + + # WHEN calling store with meta file + result = cli_runner.invoke( + analysis, + [balsamic_case.internal_id, "--deliverables-file", deliverables_file], + obj=balsamic_store_context, + ) + + # THEN we should not get a message that the analysis has been stored + assert result.exit_code == EXIT_SUCCESS + assert "included files in Housekeeper" in result.output + + +def test_already_stored_analysis( + cli_runner, balsamic_store_context, balsamic_case, deliverables_file +): + """Test store analysis command twice""" + + # GIVEN the analysis has already been stored + cli_runner.invoke( + analysis, + [balsamic_case.internal_id, "--deliverables-file", deliverables_file], + obj=balsamic_store_context, + ) + + # WHEN calling store again for same case + result = cli_runner.invoke( + analysis, + [balsamic_case.internal_id, "--deliverables-file", deliverables_file], + obj=balsamic_store_context, + ) + + # THEN we should get a message that the analysis has previously been stored + assert result.exit_code != EXIT_SUCCESS + assert "analysis version already added" in result.output + + +def test_store_analysis_generates_file_from_directory( + cli_runner, balsamic_store_context, balsamic_case, deliverables_file_directory, mocker +): + """Test store with analysis with meta data with one directory""" + + # GIVEN a meta file for a balsamic analysis containing directory that should be included + mocked_is_dir = mocker.patch("os.path.isdir") + mocked_is_dir.return_value = True + mock_make_archive = mocker.patch("shutil.make_archive") + mock_make_archive.return_value = "file.tar.gz" + + # WHEN calling store with meta file + result = cli_runner.invoke( + analysis, + [balsamic_case.internal_id, "--deliverables-file", deliverables_file_directory], + obj=balsamic_store_context, + ) + + # THEN we there should be a file representing the directory in the included bundle + assert result.exit_code == EXIT_SUCCESS + assert ( + mock_make_archive.return_value + in balsamic_store_context["hk_api"].bundle_data["files"][0]["path"] + ) + + +def test_store_analysis_includes_file_once( + cli_runner, balsamic_store_context, balsamic_case, deliverables_file_tags +): + """Test store with analysis with meta data with same file for multiple tags""" + + # GIVEN a meta file for a balsamic analysis containing one file with two tags + + # WHEN calling store with meta file + result = cli_runner.invoke( + analysis, + [balsamic_case.internal_id, "--deliverables-file", deliverables_file_tags], + obj=balsamic_store_context, + ) + + # THEN we there should be one file with two tags in the included bundle + assert result.exit_code == EXIT_SUCCESS + assert len(balsamic_store_context["hk_api"].bundle_data["files"]) == 1 + assert set(balsamic_store_context["hk_api"].bundle_data["files"][0]["tags"]) == set( + ["vcf", "vep"] + ) diff --git a/tests/cli/workflow/balsamic/test_cli_balsamic_run.py b/tests/cli/workflow/balsamic/test_run.py similarity index 87% rename from tests/cli/workflow/balsamic/test_cli_balsamic_run.py rename to tests/cli/workflow/balsamic/test_run.py index d31f033364..3d8f87a317 100644 --- a/tests/cli/workflow/balsamic/test_cli_balsamic_run.py +++ b/tests/cli/workflow/balsamic/test_run.py @@ -59,9 +59,7 @@ def test_run_analysis(cli_runner, balsamic_context): context = balsamic_context # WHEN dry running with option specified - result = cli_runner.invoke( - run, [case_id, "--dry-run", "--run-analysis"], obj=context - ) + result = cli_runner.invoke(run, [case_id, "--dry-run", "--run-analysis"], obj=context) # THEN dry-print should include the option assert result.exit_code == EXIT_SUCCESS @@ -79,9 +77,7 @@ def test_config(cli_runner, balsamic_context): context = balsamic_context # WHEN dry running with option specified - result = cli_runner.invoke( - run, [case_id, "--dry-run", option_key, option_value], obj=context - ) + result = cli_runner.invoke(run, [case_id, "--dry-run", option_key, option_value], obj=context) # THEN dry-print should include the the option-value but not the case-id assert result.exit_code == EXIT_SUCCESS @@ -100,9 +96,7 @@ def test_email(cli_runner, balsamic_context): context = balsamic_context # WHEN dry running with option specified - result = cli_runner.invoke( - run, [case_id, "--dry-run", option_key, option_value], obj=context - ) + result = cli_runner.invoke(run, [case_id, "--dry-run", option_key, option_value], obj=context) # THEN dry-print should include the the option-value but not the case-id assert result.exit_code == EXIT_SUCCESS @@ -121,9 +115,7 @@ def test_priority(cli_runner, balsamic_context): context = balsamic_context # WHEN dry running with option specified - result = cli_runner.invoke( - run, [case_id, "--dry-run", option_key, option_value], obj=context - ) + result = cli_runner.invoke(run, [case_id, "--dry-run", option_key, option_value], obj=context) # THEN dry-print should include the the option-value assert result.exit_code == EXIT_SUCCESS diff --git a/tests/cli/workflow/balsamic/test_cli_balsamic_start.py b/tests/cli/workflow/balsamic/test_start.py similarity index 100% rename from tests/cli/workflow/balsamic/test_cli_balsamic_start.py rename to tests/cli/workflow/balsamic/test_start.py diff --git a/tests/cli/workflow/mip_dna/test_cli_mip_dna.py b/tests/cli/workflow/mip_dna/test_cli_mip_dna.py index 1531381463..99898e958c 100644 --- a/tests/cli/workflow/mip_dna/test_cli_mip_dna.py +++ b/tests/cli/workflow/mip_dna/test_cli_mip_dna.py @@ -1,4 +1,4 @@ -"""Tests for cg.cli.store._add_new_complete_analysis_record""" +"""Tests for cg.cli.store._create_analysis""" from datetime import datetime import pytest @@ -25,9 +25,7 @@ def test_add_new_complete_analysis_record(analysis_store: Store, hk_version_obj) assert new_analysis["pipeline"] == pipeline -def test_duplicate_add_new_complete_analysis_record_raises( - analysis_store: Store, hk_version_obj -): +def test_duplicate_add_new_complete_analysis_record_raises(analysis_store: Store, hk_version_obj): # GIVEN A family has samples with data_analysis on them pipeline = "test-pipeline" diff --git a/tests/conftest.py b/tests/conftest.py index 3253c2a767..aedfa5c047 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,11 +33,7 @@ CRUNCHY_CONFIG = { "crunchy": { "cram_reference": "/path/to/fasta", - "slurm": { - "account": "mock_account", - "mail_user": "mock_mail", - "conda_env": "mock_env", - }, + "slurm": {"account": "mock_account", "mail_user": "mock_mail", "conda_env": "mock_env"}, } } @@ -78,7 +74,7 @@ def fixture_madeline_output(): @pytest.yield_fixture(scope="function") def madeline_api(madeline_output): - """housekeeper_api fixture""" + """madeline_api fixture""" _api = MockMadelineAPI() _api._madeline_outpath = madeline_output @@ -177,9 +173,7 @@ def files_data(files_raw): "sampleinfo": mip_dna_files_api.parse_sampleinfo(files_raw["sampleinfo"]), "qcmetrics": mip_dna_files_api.parse_qcmetrics(files_raw["qcmetrics"]), "rna_config": mip_dna_files_api.parse_config(files_raw["rna_config"]), - "rna_sampleinfo": mip_rna_files_api.parse_sampleinfo_rna( - files_raw["rna_sampleinfo"] - ), + "rna_sampleinfo": mip_rna_files_api.parse_sampleinfo_rna(files_raw["rna_sampleinfo"]), } @@ -365,9 +359,7 @@ def sample_store(base_store) -> Store: wgs_app = base_store.application("WGTPCFC030").versions[0] for sample in new_samples: sample.customer = customer - sample.application_version = ( - external_app if "external" in sample.name else wgs_app - ) + sample.application_version = external_app if "external" in sample.name else wgs_app base_store.add_commit(new_samples) return base_store diff --git a/tests/delivery/conftest.py b/tests/delivery/conftest.py index 9039baecb2..bd0d7fb4b7 100644 --- a/tests/delivery/conftest.py +++ b/tests/delivery/conftest.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import datetime as dt import pytest diff --git a/tests/delivery/test_delivery.py b/tests/delivery/test_delivery.py index 98621c3493..3849272062 100644 --- a/tests/delivery/test_delivery.py +++ b/tests/delivery/test_delivery.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import datetime as dt diff --git a/tests/fixtures/apps/balsamic/case/metadata.yml b/tests/fixtures/apps/balsamic/case/metadata.yml new file mode 100644 index 0000000000..df164e4e59 --- /dev/null +++ b/tests/fixtures/apps/balsamic/case/metadata.yml @@ -0,0 +1,50 @@ +files: + bam: + - /path_to/tinywhale/analysis/bam/tumor.sorted.mrkdup.ralgn.bsrcl.merged.bam + - /path_to/tinywhale/analysis/bam/tumor.merged.bam + - /path_to/tinywhale/analysis/bam/concatenated_ACC1234A1_XXXXXX_R.sorted.mrkdup.exon.cov.bed + - /path_to/tinywhale/analysis/bam/concatenated_ACC1234A1_XXXXXX_R.sorted.mrkdup.cov.bed + cnv: + - /path_to/tinywhale/analysis/cnv/tumor.merged-scatter.pdf + - /path_to/tinywhale/analysis/cnv/tumor.merged-diagram.pdf + - /path_to/tinywhale/analysis/cnv/tumor.merged.cnr + - /path_to/tinywhale/analysis/cnv/tumor.merged.cns + qc: + - /path_to/tinywhale/analysis/qc/multiqc_data + - /path_to/tinywhale/analysis/qc/multiqc_report.html + scout: + - /path_to/tinywhale/analysis/scout/tinywhale.scout.yaml + vcf: + - /path_to/tinywhale/analysis/vcf/SV.somatic.tinywhale.manta.vcf.gz + - /path_to/tinywhale/analysis/vcf/SNV.somatic.tinywhale.vcfmerge.vcf.gz + - /path_to/tinywhale/analysis/vcf/SNV.somatic.tinywhale.vardict.vcf.gz + - /path_to/tinywhale/analysis/vcf/SNV.germline.concatenated_ACC1234A1_XXXXXX_R.haplotypecaller.vcf.gz + - /path_to/tinywhale/analysis/vcf/SNV.germline.concatenated_ACC1234A1_XXXXXX_R.strelka_germline.vcf.gz + - /path_to/tinywhale/analysis/vcf/SNV.somatic.tinywhale.mutect.vcf.gz + - /path_to/tinywhale/analysis/vcf/SV.germline.concatenated_ACC1234A1_XXXXXX_R.manta_germline.vcf.gz + vep: + - /path_to/tinywhale/analysis/vep/SV.somatic.tinywhale.manta.vcf.gz + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.vcfmerge.vcf.gz + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.vardict.vcf.gz + - /path_to/tinywhale/analysis/vep/SNV.germline.concatenated_ACC1234A1_XXXXXX_R.haplotypecaller.vcf.gz + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.vardict.filterd.vcf.gz + - /path_to/tinywhale/analysis/vep/SNV.germline.concatenated_ACC1234A1_XXXXXX_R.strelka_germline.vcf.gz + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.mutect.vcf.gz + - /path_to/tinywhale/analysis/vep/SV.germline.concatenated_ACC1234A1_XXXXXX_R.manta_germline.vcf.gz + - /path_to/tinywhale/analysis/vep/SV.somatic.tinywhale.manta.tsv + - /path_to/tinywhale/analysis/vep/SV.germline.concatenated_ACC1234A1_XXXXXX_R.manta_germline.tsv + - /path_to/tinywhale/analysis/vep/SNV.germline.concatenated_ACC1234A1_XXXXXX_R.strelka_germline.tsv + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.vardict.tsv + - /path_to/tinywhale/analysis/vep/SNV.germline.concatenated_ACC1234A1_XXXXXX_R.haplotypecaller.tsv + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.mutect.tsv + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.vcfmerge.tsv + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.vcfmerge.tsv_summary.html + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.vardict.tsv_summary.html + - /path_to/tinywhale/analysis/vep/SNV.germline.concatenated_ACC1234A1_XXXXXX_R.strelka_germline.tsv_summary.html + - /path_to/tinywhale/analysis/vep/SV.somatic.tinywhale.manta.tsv_summary.html + - /path_to/tinywhale/analysis/vep/SV.germline.concatenated_ACC1234A1_XXXXXX_R.manta_germline.tsv_summary.html + - /path_to/tinywhale/analysis/vep/SNV.germline.concatenated_ACC1234A1_XXXXXX_R.haplotypecaller.tsv_summary.html + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.mutect.tsv_summary.html + - /path_to/tinywhale/analysis/vep/SNV.somatic.tinywhale.vcfmerge.balsamic_stat +meta: + tmb: 0 diff --git a/tests/fixtures/apps/balsamic/case/metadata_directory.yml b/tests/fixtures/apps/balsamic/case/metadata_directory.yml new file mode 100644 index 0000000000..d01692b710 --- /dev/null +++ b/tests/fixtures/apps/balsamic/case/metadata_directory.yml @@ -0,0 +1,3 @@ +files: + qc: + - /path_to/tinywhale/analysis/qc/multiqc_data diff --git a/tests/fixtures/apps/balsamic/case/metadata_file_tags.yml b/tests/fixtures/apps/balsamic/case/metadata_file_tags.yml new file mode 100644 index 0000000000..246de3efd0 --- /dev/null +++ b/tests/fixtures/apps/balsamic/case/metadata_file_tags.yml @@ -0,0 +1,5 @@ +files: + vcf: + - /path_to/tinywhale/analysis/vcf/SV.somatic.tinywhale.manta.vcf.gz + vep: + - /path_to/tinywhale/analysis/vep/SV.somatic.tinywhale.manta.vcf.gz diff --git a/tests/meta/conftest.py b/tests/meta/conftest.py index 7e52f86444..90eff0d9d0 100644 --- a/tests/meta/conftest.py +++ b/tests/meta/conftest.py @@ -38,15 +38,13 @@ def trailblazer_api(tmpdir): @pytest.yield_fixture(scope="function") -def store_housekeeper(tmpdir): +def housekeeper_api(tmpdir): """Setup Housekeeper store.""" root_path = tmpdir.mkdir("bundles") - _store = HousekeeperAPI( - {"housekeeper": {"database": "sqlite://", "root": str(root_path)}} - ) - _store.create_all() - yield _store - _store.drop_all() + _api = HousekeeperAPI({"housekeeper": {"database": "sqlite://", "root": str(root_path)}}) + _api.initialise_db() + yield _api + _api.destroy_db() @pytest.fixture @@ -128,12 +126,8 @@ def analysis_store(base_store, analysis_family): family=family, sample=sample_obj, status=sample_data["status"], - father=base_store.sample(sample_data["father"]) - if sample_data.get("father") - else None, - mother=base_store.sample(sample_data["mother"]) - if sample_data.get("mother") - else None, + father=base_store.sample(sample_data["father"]) if sample_data.get("father") else None, + mother=base_store.sample(sample_data["mother"]) if sample_data.get("mother") else None, ) base_store.add(link) base_store.commit() @@ -277,13 +271,7 @@ def __init__(self): def get_post_analysis_files(self, case: str, version, tags): if tags[0] == "mip-config": - path = ( - "/mnt/hds/proj/bioinfo/bundles/" - + case - + "/2018-01-30/" - + case - + "_config.yaml" - ) + path = "/mnt/hds/proj/bioinfo/bundles/" + case + "/2018-01-30/" + case + "_config.yaml" elif tags[0] == "sampleinfo": path = ( "/mnt/hds/proj/bioinfo/bundles/" @@ -294,11 +282,7 @@ def get_post_analysis_files(self, case: str, version, tags): ) if tags[0] == "qcmetrics": path = ( - "/mnt/hds/proj/bioinfo/bundles/" - + case - + "/2018-01-30/" - + case - + "_qc_metrics.yaml" + "/mnt/hds/proj/bioinfo/bundles/" + case + "/2018-01-30/" + case + "_qc_metrics.yaml" ) return [MockFile(path=path)] @@ -341,9 +325,7 @@ def __init__(self): """Needed to initialise mock variables""" self._make_config_was_called = False - def get_trending( - self, mip_config_raw: dict, qcmetrics_raw: dict, sampleinfo_raw: dict - ) -> dict: + def get_trending(self, mip_config_raw: dict, qcmetrics_raw: dict, sampleinfo_raw: dict) -> dict: if self._get_trending_raises_keyerror: raise KeyError("mockmessage") @@ -400,14 +382,14 @@ def safe_loader(path): @pytest.yield_fixture(scope="function") -def analysis_api(analysis_store, store_housekeeper, scout_store): +def analysis_api(analysis_store, housekeeper_api, scout_store): """Setup an analysis API.""" Path_mock = MockPath("") tb_mock = MockTB() _analysis_api = AnalysisAPI( db=analysis_store, - hk_api=store_housekeeper, + hk_api=housekeeper_api, scout_api=scout_store, tb_api=tb_mock, lims_api=None, @@ -425,9 +407,7 @@ def deliver_api(analysis_store): lims_mock = MockLims() hk_mock = MockHouseKeeper() hk_mock.add_file(file="/mock/path", version_obj="", tag_name="") - hk_mock._files = MockFiles( - [MockFile(tags=["case-tag"]), MockFile(tags=["sample-tag", "ADM1"])] - ) + hk_mock._files = MockFiles([MockFile(tags=["case-tag"]), MockFile(tags=["sample-tag", "ADM1"])]) _api = DeliverAPI( db=analysis_store, diff --git a/tests/meta/report/test_status_helper.py b/tests/meta/report/test_status_helper.py index c03ec33485..1cd903c20d 100644 --- a/tests/meta/report/test_status_helper.py +++ b/tests/meta/report/test_status_helper.py @@ -2,6 +2,7 @@ from datetime import datetime, timedelta from cg.meta.report.status_helper import StatusHelper +from tests.store_helpers import add_analysis def test_get_previous_report_version_when_only_one(store): @@ -71,65 +72,3 @@ def test_second_analysis_when_two(store): # THEN the version should be 1 assert report_version == 1 - - -def ensure_customer(disk_store, customer_id="cust_test"): - """utility function to return existing or create customer for tests""" - customer_group = disk_store.customer_group("dummy_group") - if not customer_group: - customer_group = disk_store.add_customer_group("dummy_group", "dummy group") - - customer = disk_store.add_customer( - internal_id=customer_id, - name="Test Customer", - scout_access=False, - customer_group=customer_group, - invoice_address="dummy_address", - invoice_reference="dummy_reference", - ) - disk_store.add_commit(customer) - customer = disk_store.customer(customer_id) - return customer - - -def ensure_panel(disk_store, panel_id="panel_test", customer_id="cust_test"): - """utility function to add a panel to use in tests""" - customer = ensure_customer(disk_store, customer_id) - panel = disk_store.panel(panel_id) - if not panel: - panel = disk_store.add_panel( - customer=customer, - name=panel_id, - abbrev=panel_id, - version=1.0, - date=datetime.now(), - genes=1, - ) - disk_store.add_commit(panel) - return panel - - -def add_family(disk_store, family_id="family_test", customer_id="cust_test"): - """utility function to add a family to use in tests""" - panel = ensure_panel(disk_store) - customer = ensure_customer(disk_store, customer_id) - family = disk_store.add_family(name=family_id, panels=panel.name) - family.customer = customer - disk_store.add_commit(family) - return family - - -def add_analysis(store, family=None, completed_at=None): - """Utility function to add an analysis for tests""" - - if not family: - family = add_family(store) - - analysis = store.add_analysis(pipeline="", version="") - - if completed_at: - analysis.completed_at = completed_at - - analysis.family = family - store.add_commit(analysis) - return analysis diff --git a/tests/meta/store/snapshots/snap_test_mip_rna_build_bundle.py b/tests/meta/store/snapshots/snap_test_mip_rna_build_bundle.py index 0b28fb0459..6cd4d84f4b 100644 --- a/tests/meta/store/snapshots/snap_test_mip_rna_build_bundle.py +++ b/tests/meta/store/snapshots/snap_test_mip_rna_build_bundle.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # snapshottest: v1 - https://goo.gl/zC4yUc from __future__ import unicode_literals @@ -98,9 +97,7 @@ }, { "archive": False, - "path": [ - "/path/to/stuff/rare-disease/cases/case_id/analysis/files/quant.sf" - ], + "path": ["/path/to/stuff/rare-disease/cases/case_id/analysis/files/quant.sf"], "tags": ["salmon-quant", "sample_id_1", "rd-rna"], }, { @@ -317,9 +314,7 @@ }, { "archive": False, - "path": [ - "/path/to/stuff/rare-disease/cases/case_id/analysis/files/quant.sf" - ], + "path": ["/path/to/stuff/rare-disease/cases/case_id/analysis/files/quant.sf"], "tags": ["salmon-quant", "sample_id_1", "rd-rna"], }, { diff --git a/tests/meta/store/test_mip_rna_build_bundle.py b/tests/meta/store/test_mip_rna_build_bundle.py index e0f61fc7cd..5404b1f05e 100644 --- a/tests/meta/store/test_mip_rna_build_bundle.py +++ b/tests/meta/store/test_mip_rna_build_bundle.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Test MIP RNA get files and build bundle""" from snapshottest import Snapshot diff --git a/tests/meta/transfer/conftest.py b/tests/meta/transfer/conftest.py index 0281e3d473..202be66c43 100644 --- a/tests/meta/transfer/conftest.py +++ b/tests/meta/transfer/conftest.py @@ -12,12 +12,7 @@ def data(): return { "samples": [ - { - "name": "ADM1136A3", - "index": "ACGTACAT", - "flowcell": "HJKMYBCXX", - "type": "hiseqx", - } + {"name": "ADM1136A3", "index": "ACGTACAT", "flowcell": "HJKMYBCXX", "type": "hiseqx"} ] } @@ -25,9 +20,7 @@ def data(): @pytest.yield_fixture(scope="function") def store_stats(): """Setup base CGStats store.""" - _store = StatsAPI( - {"cgstats": {"database": "sqlite://", "root": "tests/fixtures/DEMUX"}} - ) + _store = StatsAPI({"cgstats": {"database": "sqlite://", "root": "tests/fixtures/DEMUX"}}) _store.create_all() yield _store _store.drop_all() @@ -40,9 +33,7 @@ def base_store_stats(store_stats, data): for sample_data in data["samples"]: project = store_stats.Project(projectname="test", time=dt.datetime.now()) sample = store_stats.Sample( - samplename=sample_data["name"], - barcode=sample_data["index"], - limsid=sample_data["name"], + samplename=sample_data["name"], barcode=sample_data["index"], limsid=sample_data["name"] ) sample.project = project unaligned = store_stats.Unaligned(readcounts=300000000, q30_bases_pct=85) @@ -58,9 +49,7 @@ def base_store_stats(store_stats, data): time=dt.datetime.now(), ) supportparams = store_stats.Supportparams(document_path="NA", idstring="NA") - datasource = store_stats.Datasource( - document_path="NA", document_type="html" - ) + datasource = store_stats.Datasource(document_path="NA", document_type="html") datasource.supportparams = supportparams demux = store_stats.Demux() demux.flowcell = flowcell @@ -79,9 +68,7 @@ def flowcell_store(base_store, data): for sample_data in data["samples"]: customer_obj = base_store.customers().first() application_version = base_store.application("WGTPCFC030").versions[0] - sample = base_store.add_sample( - name="NA", sex="male", internal_id=sample_data["name"] - ) + sample = base_store.add_sample(name="NA", sex="male", internal_id=sample_data["name"]) sample.customer = customer_obj sample.application_version = application_version sample.received_at = dt.datetime.now() @@ -91,9 +78,9 @@ def flowcell_store(base_store, data): @pytest.yield_fixture(scope="function") -def transfer_flowcell_api(flowcell_store, store_housekeeper, base_store_stats): +def transfer_flowcell_api(flowcell_store, housekeeper_api, base_store_stats): """Setup flowcell transfer API.""" - transfer_api = TransferFlowcell(flowcell_store, base_store_stats, store_housekeeper) + transfer_api = TransferFlowcell(flowcell_store, base_store_stats, housekeeper_api) yield transfer_api diff --git a/tests/meta/transfer/test_meta_transfer_flowcell.py b/tests/meta/transfer/test_meta_transfer_flowcell.py index 9f844265bd..513078bbfe 100644 --- a/tests/meta/transfer/test_meta_transfer_flowcell.py +++ b/tests/meta/transfer/test_meta_transfer_flowcell.py @@ -3,13 +3,13 @@ from sqlalchemy import exc as sa_exc -def test_transfer_flowcell(flowcell_store, store_housekeeper, transfer_flowcell_api): +def test_transfer_flowcell(flowcell_store, housekeeper_api, transfer_flowcell_api): # GIVEN a store with a received but not sequenced sample flowcell_id = "HJKMYBCXX" assert flowcell_store.samples().count() == 1 assert flowcell_store.flowcells().count() == 0 - assert store_housekeeper.bundles().count() == 0 + assert housekeeper_api.bundles().count() == 0 # WHEN transferring the flowcell containing the sample with warnings.catch_warnings(): @@ -24,7 +24,7 @@ def test_transfer_flowcell(flowcell_store, store_housekeeper, transfer_flowcell_ assert isinstance(status_sample.sequenced_at, dt.datetime) # ... and it should store the fastq files for the sample in housekeeper - hk_bundle = store_housekeeper.bundle(status_sample.internal_id) + hk_bundle = housekeeper_api.bundle(status_sample.internal_id) assert len(hk_bundle.versions[0].files) > 0 diff --git a/tests/store_helpers.py b/tests/store_helpers.py new file mode 100644 index 0000000000..68c2e17258 --- /dev/null +++ b/tests/store_helpers.py @@ -0,0 +1,128 @@ +"""Utility functions to simply add test data in a cg store""" + +from datetime import datetime + + +def ensure_application_version(disk_store, application_tag="dummy_tag", application_type="tgs"): + """utility function to return existing or create application version for tests""" + application = disk_store.application(tag=application_tag) + if not application: + application = disk_store.add_application( + tag=application_tag, + category=application_type, + percent_kth=80, + description="dummy_description", + ) + disk_store.add_commit(application) + + prices = {"standard": 10, "priority": 20, "express": 30, "research": 5} + version = disk_store.application_version(application, 1) + if not version: + version = disk_store.add_version(application, 1, valid_from=datetime.now(), prices=prices) + + disk_store.add_commit(version) + return version + + +def ensure_bed_version(disk_store, bed_name="dummy_bed"): + """utility function to return existing or create bed version for tests""" + bed = disk_store.bed(name=bed_name) + if not bed: + bed = disk_store.add_bed(name=bed_name) + disk_store.add_commit(bed) + + version = disk_store.latest_bed_version(bed_name) + if not version: + version = disk_store.add_bed_version(bed, 1, "dummy_filename", shortname=bed_name) + disk_store.add_commit(version) + return version + + +def ensure_customer(disk_store, customer_id="cust_test"): + """utility function to return existing or create customer for tests""" + customer_group = disk_store.customer_group("dummy_group") + if not customer_group: + customer_group = disk_store.add_customer_group("dummy_group", "dummy group") + + customer = disk_store.add_customer( + internal_id=customer_id, + name="Test Customer", + scout_access=False, + customer_group=customer_group, + invoice_address="dummy_address", + invoice_reference="dummy_reference", + ) + disk_store.add_commit(customer) + customer = disk_store.customer(customer_id) + return customer + + +def add_analysis(store, family=None, completed_at=None): + """Utility function to add an analysis for tests""" + + if not family: + family = add_family(store) + + analysis = store.add_analysis(pipeline="", version="") + + if completed_at: + analysis.completed_at = completed_at + + analysis.family = family + store.add_commit(analysis) + return analysis + + +def add_sample( + store, + sample_id="sample_test", + gender="female", + is_tumour=False, + data_analysis="balsamic", + application_tag="dummy_tag", + application_type="tgs", +): + """utility function to add a sample to use in tests""" + customer = ensure_customer(store) + application_version_id = ensure_application_version( + store, application_tag=application_tag, application_type=application_type + ).id + sample = store.add_sample( + name=sample_id, + sex=gender, + tumour=is_tumour, + sequenced_at=datetime.now(), + data_analysis=data_analysis, + ) + + sample.application_version_id = application_version_id + sample.customer = customer + store.add_commit(sample) + return sample + + +def ensure_panel(disk_store, panel_id="panel_test", customer_id="cust_test"): + """utility function to add a panel to use in tests""" + customer = ensure_customer(disk_store, customer_id) + panel = disk_store.panel(panel_id) + if not panel: + panel = disk_store.add_panel( + customer=customer, + name=panel_id, + abbrev=panel_id, + version=1.0, + date=datetime.now(), + genes=1, + ) + disk_store.add_commit(panel) + return panel + + +def add_family(disk_store, family_id="family_test", customer_id="cust_test"): + """utility function to add a family to use in tests""" + panel = ensure_panel(disk_store) + customer = ensure_customer(disk_store, customer_id) + family = disk_store.add_family(name=family_id, panels=panel.name) + family.customer = customer + disk_store.add_commit(family) + return family