Skip to content

Commit

Permalink
Feat(store): Store balsamic analysis in housekeeper (#551)
Browse files Browse the repository at this point in the history
* store-like-mip

* Test store

* store balsamic files in meta-file

* remove unused argument

* add missing required parameter

* move calls to  store object

* change black pre-commit line length to what we use

* Remove Python2 charset declaration. Format code

* Remove Python2 charset declaration

* Capitalize in log message

* add method description

* output with click instead of print

* remove unused imports

* remove unneccesary whitespace

* describe methods

* use existing case instead of refetching

* remove hard coded dummy values

* remove unused assignment

* add missing argument in call

* make mock behave more like real thing

* store root_dir in hk_api

* fix string

* handle directories and multiple tags

* Merge with master

* clarify the priority mapping

* simplify test

* remove debug print

* move test helpers to a module

* fix import of store_helpers

* remove redundant test setup code

* fix store_helper module path

* remove redundant import

* fix import order

* Add docstring

* linitng

* linting

* skip conversion to path

* use full path

* rework the path and tag parsing

* return actual compressed filename

* fix broken test

* fix path to generated .hk file

* fix path to generated .hk file

* fix deliverables file path

* merge with master

* Protect the store in housekeeper from direct usage

* format code

* linting

* linting

* check black with our decided linelength

* format code

* call fixture what it is

* test magic __getattr__

* Update cg/cli/workflow/balsamic/base.py

* simplify creation of balsamic command

* create the deliverables file path in one way only

* simplify balsamic command

* restore unsecure call

* remove erronous usage of store on API

* forward all arguments to wrapped add_commit

* fix docstring

* Wrap version method in Store

* capture log at right level

* more tests

* fix store name according to real implementation
  • Loading branch information
patrikgrenfeldt authored Mar 25, 2020
1 parent dbea3ce commit 59755ac
Show file tree
Hide file tree
Showing 96 changed files with 2,158 additions and 1,858 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ repos:
rev: 19.3b0
hooks:
- id: black
args: [--line-length=100]
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
- name: "Code formatting"
if: type = pull_request
install: pip install black
script: git --no-pager diff --name-only --diff-filter=AM $TRAVIS_COMMIT_RANGE | grep -F ".py" | xargs black --check --diff
script: git --no-pager diff --name-only --diff-filter=AM $TRAVIS_COMMIT_RANGE | grep -F ".py" | xargs black --check -l 100

- name: "Pylint score"
if: type = pull_request
Expand Down
3 changes: 1 addition & 2 deletions cg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
import pkg_resources

__title__ = 'cg'
__title__ = "cg"
__version__ = pkg_resources.get_distribution(__title__).version
1 change: 0 additions & 1 deletion cg/apps/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
"""
These are independent interfaces to tools outside the package.
Expand Down
9 changes: 2 additions & 7 deletions cg/apps/balsamic/fastq.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
"""
This module handles concatenation of balsamic fastq files.
Expand Down Expand Up @@ -133,9 +132,7 @@ def link(self, case: str, sample: str, files: List):
linked_fastq_path = wrk_dir / linked_fastq_name

linked_reads_paths[fastq_data["read"]].append(linked_fastq_path)
concatenated_paths[
fastq_data["read"]
] = f"{wrk_dir}/{concatenated_fastq_name}"
concatenated_paths[fastq_data["read"]] = f"{wrk_dir}/{concatenated_fastq_name}"

if not linked_fastq_path.exists():
LOGGER.info("linking: %s -> %s", original_fastq_path, linked_fastq_path)
Expand All @@ -145,9 +142,7 @@ def link(self, case: str, sample: str, files: List):

LOGGER.info("Concatenation in progress for sample %s.", sample)
for read in linked_reads_paths:
FastqFileConcatenator().concatenate(
linked_reads_paths[read], concatenated_paths[read]
)
FastqFileConcatenator().concatenate(linked_reads_paths[read], concatenated_paths[read])
self._remove_files(linked_reads_paths[read])

@staticmethod
Expand Down
31 changes: 25 additions & 6 deletions cg/apps/beacon.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
from typing import List
import datetime as dt
import logging
Expand All @@ -7,24 +6,44 @@

LOG = logging.getLogger(__name__)

class BeaconApi():

class BeaconApi:
"""
Interface with Beacon importer (github.com/Clinical-Genomics/cgbeacon)
Inserts variants from a VCF file inside a Beacon server.
"""

def __init__(self, config: dict):
super(BeaconApi, self).__init__()
self.connection = use_mysqlalchemy(config['cgbeacon']['database'])

self.connection = use_mysqlalchemy(config["cgbeacon"]["database"])

def upload(self, vcf_path: str, panel_path: str, dataset: str, outfile: str, customer: str, samples: List[str], quality: int, genome_reference: str):
def upload(
self,
vcf_path: str,
panel_path: str,
dataset: str,
outfile: str,
customer: str,
samples: List[str],
quality: int,
genome_reference: str,
):
""" Uploads variants from a VCF file to a MySQL Beacon database
Returns: number of new variants in the Beacon
"""

LOG.info("Uploading variants to beacon db.")
upload_result = Utility.beacon_upload(self.connection, vcf_path, panel_path, dataset, outfile, customer, samples, quality, genome_reference)
upload_result = Utility.beacon_upload(
self.connection,
vcf_path,
panel_path,
dataset,
outfile,
customer,
samples,
quality,
genome_reference,
)
LOG.info("Upload complete!")

def remove_vars(self, sample, vcf_path, panel_path=None, qual=20):
Expand Down
37 changes: 18 additions & 19 deletions cg/apps/gt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
import logging

from subprocess import CalledProcessError
Expand All @@ -22,42 +21,42 @@ class GenotypeAPI(Manager):
"""

def __init__(self, config: dict):
alchy_config = dict(SQLALCHEMY_DATABASE_URI=config['genotype']['database'])
alchy_config = dict(SQLALCHEMY_DATABASE_URI=config["genotype"]["database"])
super(GenotypeAPI, self).__init__(config=alchy_config, Model=models.Model)

self.genotype_config = config['genotype']['config_path']
self.genotype_binary = config['genotype']['binary_path']
self.base_call = [self.genotype_binary, '--config', self.genotype_config]
self.genotype_config = config["genotype"]["config_path"]
self.genotype_binary = config["genotype"]["binary_path"]
self.base_call = [self.genotype_binary, "--config", self.genotype_config]

def upload(self, bcf_path: str, samples_sex: dict, force: bool=False):
def upload(self, bcf_path: str, samples_sex: dict, force: bool = False):
"""Upload genotypes for a family of samples."""
snps = api.snps()
analyses = load_vcf(bcf_path, snps)
for analysis_obj in analyses:
LOG.debug('loading VCF genotypes for sample: %s', analysis_obj.sample_id)
LOG.debug("loading VCF genotypes for sample: %s", analysis_obj.sample_id)
is_saved = api.add_analysis(self, analysis_obj, replace=force)
if is_saved:
LOG.info('loaded VCF genotypes for sample: %s', analysis_obj.sample_id)
LOG.info("loaded VCF genotypes for sample: %s", analysis_obj.sample_id)
else:
LOG.warning('skipped, found previous analysis: %s', analysis_obj.sample_id)
LOG.warning("skipped, found previous analysis: %s", analysis_obj.sample_id)

if is_saved or force:
analysis_obj.sex = samples_sex[analysis_obj.sample_id]['analysis']
analysis_obj.sample.sex = samples_sex[analysis_obj.sample_id]['pedigree']
analysis_obj.sex = samples_sex[analysis_obj.sample_id]["analysis"]
analysis_obj.sample.sex = samples_sex[analysis_obj.sample_id]["pedigree"]
self.commit()

def export_sample(self, days: int = 0) -> str:
"""Export sample info."""
trending_call = self.base_call[:]
trending_call.extend(['export-sample', '-d', days])
trending_call.extend(["export-sample", "-d", days])
try:
LOG.info('Running Genotype API to get data.')
LOG.info("Running Genotype API to get data.")
LOG.debug(trending_call)
output = subprocess.check_output(trending_call)
except CalledProcessError as error:
LOG.critical("Could not run command: %s", ' '.join(trending_call))
LOG.critical("Could not run command: %s", " ".join(trending_call))
raise error
output = output.decode('utf-8')
output = output.decode("utf-8")
# If sample not in genotype db, stdout of genotype command will be empty.
if not output:
raise CaseNotFoundError("samples not found in genotype db")
Expand All @@ -66,15 +65,15 @@ def export_sample(self, days: int = 0) -> str:
def export_sample_analysis(self, days: int = 0) -> str:
"""Export analysis."""
trending_call = self.base_call[:]
trending_call.extend(['export-sample-analysis', '-d', days])
trending_call.extend(["export-sample-analysis", "-d", days])
try:
LOG.info('Running Genotype API to get data.')
LOG.info("Running Genotype API to get data.")
LOG.debug(trending_call)
output = subprocess.check_output(trending_call)
except CalledProcessError as error:
LOG.critical("Could not run command: %s", ' '.join(trending_call))
LOG.critical("Could not run command: %s", " ".join(trending_call))
raise error
output = output.decode('utf-8')
output = output.decode("utf-8")
# If sample not in genotype db, stdout of genotype command will be empty.
if not output:
raise CaseNotFoundError("samples not found in genotype db")
Expand Down
93 changes: 82 additions & 11 deletions cg/apps/hk.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,83 @@
# -*- coding: utf-8 -*-
""" Module to decouple cg code from Housekeeper code """
import datetime as dt
import logging
import os
from pathlib import Path
from typing import List

from housekeeper.exc import VersionIncludedError
from housekeeper.include import include_version, checksum as hk_checksum
from housekeeper.store import Store, models

log = logging.getLogger(__name__)
LOG = logging.getLogger(__name__)


class HousekeeperAPI(Store):
class HousekeeperAPI:
""" API to decouple cg code from Housekeeper """

def __init__(self, config):
super(HousekeeperAPI, self).__init__(
config["housekeeper"]["database"], config["housekeeper"]["root"]
)
self._store = Store(config["housekeeper"]["database"], config["housekeeper"]["root"])
self.root_dir = config["housekeeper"]["root"]

def __getattr__(self, name):
LOG.warning("Called undefined %s on %s, please wrap", name, self.__class__.__name__)
return getattr(self._store, name)

def add_bundle(self, bundle_data):
""" Build a new bundle version of files """
return self._store.add_bundle(bundle_data)

def new_file(
self, path: str, checksum: str = None, to_archive: bool = False, tags: list = None
):
""" Create a new file """
return self._store.new_file(path, checksum, to_archive, tags)

def tag(self, name: str):
""" Fetch a tag """
return self._store.tag(name)

def bundle(self, name: str):
""" Fetch a bundle """
return self._store.bundle(name)

def bundles(self):
""" Fetch bundles """
return self._store.bundles()

def version(self, bundle: str, date: dt.datetime):
""" Fetch a version """
return self._store.version(bundle, date)

def files(
self, *, bundle: str = None, tags: List[str] = None, version: int = None, path: str = None
):
""" Fetch files """
return self._store.files(bundle=bundle, tags=tags, version=version, path=path)

def new_tag(self, name: str, category: str = None):
""" Create a new tag """
return self._store.new_tag(name, category)

def new_bundle(self, name: str, created_at: dt.datetime = None):
""" Create a new file bundle """
return self._store.new_bundle(name, created_at)

def new_version(self, created_at: dt.datetime, expires_at: dt.datetime = None):
""" Create a new bundle version """
return self._store.new_version(created_at, expires_at)

def add_commit(self, *args, **kwargs):
""" Wrap method in Housekeeper Store """
return self._store.add_commit(*args, **kwargs)

def commit(self):
""" Wrap method in Housekeeper Store """
return self._store.commit()

def session_no_autoflush(self):
""" Wrap property in Housekeeper Store """
return self._store.session.no_autoflush

def include(self, version_obj: models.Version):
"""Call the include version function to import related assets."""
include_version(self.get_root_dir(), version_obj)
Expand All @@ -30,26 +90,28 @@ def include_file(self, file_obj: models.File, version_obj: models.Version):
# generate root directory
version_root_dir = global_root_dir / version_obj.relative_root_dir
version_root_dir.mkdir(parents=True, exist_ok=True)
log.info(f"created new bundle version dir: {version_root_dir}")
LOG.info("Created new bundle version dir: %s", version_root_dir)

if file_obj.to_archive:
# calculate sha1 checksum if file is to be archived
file_obj.checksum = HousekeeperAPI.checksum(file_obj.path)
# hardlink file to the internal structure
new_path = version_root_dir / Path(file_obj.path).name
os.link(file_obj.path, new_path)
log.info(f"linked file: {file_obj.path} -> {new_path}")
LOG.info("Linked file: %s -> %s", file_obj.path, new_path)
file_obj.path = str(new_path).replace(f"{global_root_dir}/", "", 1)

def last_version(self, bundle: str) -> models.Version:
"""Gets the latest version of a bundle"""
return (
self.Version.query.join(models.Version.bundle)
self._store.Version.query.join(models.Version.bundle)
.filter(models.Bundle.name == bundle)
.order_by(models.Version.created_at.desc())
.first()
)

def get_root_dir(self):
"""Returns the root dir of Housekeeper"""
return self.root_dir

def get_files(self, bundle: str, tags: list, version: int = None):
Expand All @@ -59,7 +121,7 @@ def get_files(self, bundle: str, tags: list, version: int = None):
Returns:
iterable(hk.Models.File)
"""
return self.files(bundle=bundle, tags=tags, version=version)
return self._store.files(bundle=bundle, tags=tags, version=version)

def add_file(self, file, version_obj: models.Version, tags, to_archive=False):
"""Add a file to housekeeper."""
Expand All @@ -77,4 +139,13 @@ def add_file(self, file, version_obj: models.Version, tags, to_archive=False):

@staticmethod
def checksum(path):
"""Calculate the checksum"""
return hk_checksum(path)

def initialise_db(self):
"""Create all tables in the store."""
self._store.create_all()

def destroy_db(self):
"""Drop all tables in the store"""
self._store.drop_all()
5 changes: 1 addition & 4 deletions cg/apps/invoice/render.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
import datetime as dt
from pkg_resources import resource_filename

Expand Down Expand Up @@ -38,9 +37,7 @@ def render_xlsx(data: dict) -> Workbook:
pkg_dir = __name__.rpartition(".")[0]
sample_type = "pool" if data["pooled_samples"] else "sample"
costcenter = data["costcenter"]
template_path = resource_filename(
pkg_dir, f"templates/{costcenter}_{sample_type}_invoice.xlsx"
)
template_path = resource_filename(pkg_dir, f"templates/{costcenter}_{sample_type}_invoice.xlsx")
workbook = load_workbook(template_path)
if data["pooled_samples"]:
worksheet = workbook["Bilaga Prover"]
Expand Down
1 change: 0 additions & 1 deletion cg/apps/lims/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
from .api import LimsAPI
from .orderform import parse_orderform
from .limsjson import parse_json
Loading

0 comments on commit 59755ac

Please sign in to comment.