From 241e664c1a772eac3f46d761146296a2d85f7229 Mon Sep 17 00:00:00 2001 From: Boris Dimitrov Date: Thu, 10 Jan 2019 19:44:27 +0000 Subject: [PATCH] more comments on perf --- midas/utility.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/midas/utility.py b/midas/utility.py index 2cfbfd1..873d5fc 100755 --- a/midas/utility.py +++ b/midas/utility.py @@ -269,6 +269,14 @@ def iopen(inpath, mode='r'): # Python2 if sys.version_info[0] == 2: if ext == 'gz': return gzip.open(inpath, mode) + # I usually recommend lz4 over gz and bz2, but if we must use one + # of these cpu-intensive algorithms, it's best not to have + # it run on the same core as the python script. This can be achieved + # using the approach of function smarter_open in this example + # https://github.com/chanzuckerberg/idseq-bench/blob/master/util.py + # In addition, that approach can stream files from AWS S3 or another + # machine without having to copy them to an attached filesystem, + # which avoids more performance problems. elif ext == 'bz2': return bz2.BZ2File(inpath, mode) else: return open(inpath, mode) # Python3