-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
pm storage cleanup and pm storage archive-to-swestore #34
Changes from 20 commits
f971677
955e5b3
76ba272
da5cbf3
72bee69
7d4d16c
b455c11
d3b404f
f157467
d7ac3d4
f91a359
7469a75
bdbdf5f
c432dc2
2c2e50a
299e552
f8c394f
c850c02
e35ca3b
fb85539
d803d81
74f0e2e
df91247
6359cef
f64cc4c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,6 @@ pm.egg-info | |
dist | ||
build | ||
_build | ||
*log | ||
*.log | ||
.DS* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Remove building leftovers | ||
rm -rf build dist *egg-info | ||
|
||
# Remove pyc files | ||
find . -type f -name *pyc -exec rm {} + |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
""" Core module. | ||
|
||
Place for controllers and other structural stuff. | ||
""" | ||
from cement.core import controller | ||
|
||
class BaseController(controller.CementBaseController): | ||
""" Define an application BaseController | ||
|
||
The most basic controller. To be used as a template for new and more complex | ||
controllers. | ||
""" | ||
class Meta: | ||
label = 'base' | ||
description = "Project Management - A tool for miscellaneous tasks at NGI" | ||
|
||
|
||
@controller.expose(hide=True) | ||
def default(self): | ||
print "Execute pm --help to display available commands" | ||
|
||
@controller.expose(hide=True, help="Prints a hello message") | ||
def hello(self): | ||
""" Testing method that just prints a hello message. | ||
|
||
Will not be listed as an available option (--hide) | ||
""" | ||
self.app.log.info("Welcome to Project Management tools!") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
""" PM controllers | ||
""" | ||
import os | ||
import re | ||
import shutil | ||
|
||
from cement.core import controller | ||
|
||
from pm.controllers import BaseController | ||
from pm.utils import filesystem, misc | ||
|
||
class StorageController(BaseController): | ||
""" Storage Controller | ||
|
||
Entry point for all functionalities related to storage | ||
""" | ||
class Meta: | ||
label = 'storage' | ||
description = "Entry point for all functionalities related to storage" | ||
stacked_on = 'base' | ||
stacked_type = 'nested' | ||
arguments = [ | ||
(['-r', '--run'], dict(type=str, help="Work with a specific run")), | ||
(['-d', '--days'], dict(type=int, default=10, help="Days to consider a run \"old\"")) | ||
] | ||
|
||
####################### | ||
# Storage subcommands # | ||
####################### | ||
|
||
@controller.expose(help="Move old runs to nosync directory so they're not synced to the processing server") | ||
def cleanup(self): | ||
for data_dir in self.app.config.get('storage', 'data_dirs'): | ||
with filesystem.chdir(data_dir): | ||
for run in [r for r in os.listdir(data_dir) if re.match(filesystem.RUN_RE, r)]: | ||
if os.path.exists(os.path.join(run, 'RTAComplete.txt')): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also we don't check how old the P.S: I am asking this with complete lack of knowledge regarding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmmm, hmmm... well even If it may look a bit paranoid, I think you're right and is not that much effort to check. Let's say 1 day old? ;-) |
||
self.app.log.info('Moving run {} to nosync directory'.format(os.path.basename(run))) | ||
shutil.move(run, 'nosync') | ||
|
||
|
||
@controller.expose(help="Archive old runs to SWESTORE") | ||
def archive_to_swestore(self): | ||
# If the run is specified in the command line, check that exists and archive | ||
if self.app.pargs.run: | ||
if re.match(filesystem.RUN_RE, os.path.basename(self.app.pargs.run)): | ||
if not os.path.exists(self.app.pargs.run): | ||
self.app.log.error(("Run {} not found. Please make sure to specify " | ||
"the absolute path or relative path being in the correct directory.".format(self.app.pargs.run))) | ||
else: | ||
self._archive_run(self.pargs.run) | ||
else: | ||
self.app.log.error("The name {} doesn't look like an Illumina run".format(os.path.basename(run))) | ||
# Otherwise find all runs in every data dir on the nosync partition | ||
else: | ||
self.app.log.info("Archiving old runs to SWESTORE") | ||
for data_dir in self.app.config.get('storage', 'data_dirs'): | ||
to_send_dir = os.path.join(data_dir, 'nosync') | ||
self.app.log.info('Checking {} directory'.format(to_send_dir)) | ||
with filesystem.chdir(to_send_dir): | ||
for run in [r for r in os.listdir(to_send_dir) if re.match(filesystem.RUN_RE, r)]: | ||
self._archive_run(run) | ||
|
||
############################################################# | ||
# Class helper methods, not exposed as commands/subcommands # | ||
############################################################# | ||
def _archive_run(self, run): | ||
""" Archive a specific run to swestore | ||
|
||
:param str run: Run directory | ||
""" | ||
def _send_to_swestore(f, dest, remove=True): | ||
""" Send file to swestore checking adler32 on destination and eventually | ||
removing the file from disk | ||
|
||
:param str f: File to remove | ||
:param str dest: Destination directory in Swestore | ||
:param bool remove: If True, remove original file from source | ||
""" | ||
self.app.log.info("Sending {} to swestore".format(f)) | ||
misc.call_external_command('iput -K -P {file} {dest}'.format(file=f, dest=dest), | ||
with_log_files=True) | ||
self.app.log.info('Run {} sent correctly and checksum was okay.'.format(f)) | ||
if remove: | ||
self.app.log.info('Removing run'.format(f)) | ||
os.remove(f) | ||
|
||
|
||
if run.endswith('bz2'): | ||
_send_to_swestore(run, self.app.config.get('storage', 'irods').get('irodsHome')) | ||
else: | ||
self.app.log.info("Compressing run {}".format(run)) | ||
# Compress with pbzip2 | ||
misc.call_external_command('tar --use-compress-program=pbzip2 -cf {run}.tar.bz2 {run}'.format(run=run)) | ||
self.app.log.info('Run {} successfully compressed! Removing from disk...'.format(run)) | ||
shutil.rmtree(run) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks fully safe, but I am still feeling paranoid 😂 |
||
_send_to_swestore('{}.tar.bz2'.format(run), self.app.config.get('storage', 'irods').get('irodsHome')) |
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
""" Project Management logging module | ||
""" PM logging module for external scripts | ||
""" |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,8 @@ | |
import contextlib | ||
import os | ||
|
||
RUN_RE = '\d{6}_[a-zA-Z\d\-]+_\d{4}_[AB][A-Z\d]{9}' | ||
|
||
@contextlib.contextmanager | ||
def chdir(new_dir): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nicely done 👍 :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tack :-) |
||
"""Context manager to temporarily change to a new directory. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
""" Miscellaneous or general-use methods | ||
""" | ||
import os | ||
import subprocess | ||
import sys | ||
|
||
from datetime import datetime | ||
|
||
def call_external_command(cl, with_log_files=False): | ||
""" Executes an external command | ||
|
||
:param string cl: Command line to be executed (command + options and parameters) | ||
:param bool with_log_files: Create log files for stdout and stderr | ||
""" | ||
if type(cl) == str: | ||
cl = cl.split(' ') | ||
command = os.path.basename(cl[0]) | ||
stdout = sys.stdout | ||
stderr = sys.stderr | ||
|
||
if with_log_files: | ||
stdout = open(command + '.out', 'wa') | ||
stderr = open(command + '.err', 'wa') | ||
started = "Started command {} on {}".format(' '.join(cl), datetime.now()) | ||
stdout.write(started + '\n') | ||
stdout.write(''.join(['=']*len(cl)) + '\n') | ||
|
||
try: | ||
subprocess.check_call(cl, stdout=stdout, stderr=stderr) | ||
except subprocess.CalledProcessError, e: | ||
e.message = "The command {} failed.".format(' '.join(cl)) | ||
raise e | ||
finally: | ||
if with_log_files: | ||
stdout.close() | ||
stderr.close() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this only meant for high-seq runs ? as you defined
RUN_RE = '\d{6}_[a-zA-Z\d\-]+_\d{4}_[AB][A-Z\d]{9}'
. So it would not work for miseq, would it ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
damn! no it will not work on MiSeq because of the last part of the regexp, I'll have to change it, well spotted! Thanks!