From a4656a60186a02c8db6c719840b32a3dfb13d1d1 Mon Sep 17 00:00:00 2001 From: Robert Stein Date: Wed, 14 Dec 2022 16:12:24 -0800 Subject: [PATCH] Better temp cache --- docs/source/about.rst | 2 +- winterdrp/__main__.py | 11 +++--- winterdrp/data/__init__.py | 3 +- winterdrp/data/cache.py | 74 ++++++++++++++++++++++++++++++++++++ winterdrp/data/image_data.py | 33 ++++++---------- winterdrp/paths.py | 9 ----- winterdrp/testing.py | 9 ++++- 7 files changed, 100 insertions(+), 41 deletions(-) create mode 100644 winterdrp/data/cache.py diff --git a/docs/source/about.rst b/docs/source/about.rst index fd3799e74..66ed1e141 100644 --- a/docs/source/about.rst +++ b/docs/source/about.rst @@ -20,4 +20,4 @@ How does the code actually work? .. include:: ../../winterdrp/data/image_data.py :start-line: 5 - :end-line: 54 + :end-line: 55 diff --git a/winterdrp/__main__.py b/winterdrp/__main__.py index ed4825ac5..346fac382 100644 --- a/winterdrp/__main__.py +++ b/winterdrp/__main__.py @@ -7,11 +7,12 @@ import argparse import logging import sys +import tempfile from astropy import units as u from astropy.time import Time -from winterdrp.data import Dataset, ImageBatch, clean_cache +from winterdrp.data import Dataset, ImageBatch, cache from winterdrp.monitor.base_monitor import Monitor from winterdrp.paths import RAW_IMG_SUB_DIR, package_name from winterdrp.pipelines import Pipeline, get_pipeline @@ -90,7 +91,9 @@ night = str(ln).split(" ", maxsplit=1)[0].replace("-", "") -try: +with tempfile.TemporaryDirectory() as temp_dir: + + cache.set_cache_dir(temp_dir) if args.monitor: @@ -177,7 +180,3 @@ ) logger.info("End of winterdrp execution") - -finally: - # Delete everything added to the cache - clean_cache() diff --git a/winterdrp/data/__init__.py b/winterdrp/data/__init__.py index 074cd7965..a386f1ad2 100644 --- a/winterdrp/data/__init__.py +++ b/winterdrp/data/__init__.py @@ -1,5 +1,6 @@ """Module to specify the input data classes for :module:`wintedrp.processors` """ from winterdrp.data.base_data import DataBatch, DataBlock, Dataset -from winterdrp.data.image_data import Image, ImageBatch, clean_cache +from winterdrp.data.cache import cache +from winterdrp.data.image_data import Image, ImageBatch from winterdrp.data.source_data import SourceBatch, SourceTable diff --git a/winterdrp/data/cache.py b/winterdrp/data/cache.py new file mode 100644 index 000000000..49e1a7ac3 --- /dev/null +++ b/winterdrp/data/cache.py @@ -0,0 +1,74 @@ +""" +Central module for handling the cache, currently used only for storing image data. +""" + +import logging +import os +from pathlib import Path +from tempfile import TemporaryDirectory + +import numpy as np + +logger = logging.getLogger(__name__) + +USE_CACHE: bool = bool(os.getenv("USE_WINTER_CACHE", "true")) + + +class CacheError(Exception): + """Error Relating to cache""" + + +class Cache: + """ + A cache object for storing temporary data + """ + + cache_dir: Path | None = None + + def get_cache_dir(self) -> Path: + """ + Returns the current cache dir + + :return: Cache dir + """ + if np.logical_and(self.cache_dir is not None, USE_CACHE): + return self.cache_dir + + if USE_CACHE: + err = ( + "The code has been configured to not use a cache, " + "but is now trying to access a cache." + ) + logger.error(err) + raise CacheError(err) + + err = ( + "No cache dir has been set. " + "Please set that before trying to use the cache." + ) + logger.error(err) + raise CacheError(err) + + def set_cache_dir(self, cache_dir: TemporaryDirectory | Path | str): + """ + Function to set the cache directory + + :param cache_dir: Cache dir to set + :return: None + """ + if isinstance(cache_dir, TemporaryDirectory): + cache_dir = cache_dir.name + else: + logger.warning( + f"Setting the cache to directory {cache_dir}, " + f"rather than using a {TemporaryDirectory}. " + f"Remember to clean this cache yourself after you are done!" + ) + self.cache_dir = Path(cache_dir) + self.cache_dir.mkdir(parents=True, exist_ok=True) + + def __str__(self): + return f"A cache, with path {self.cache_dir}" + + +cache = Cache() diff --git a/winterdrp/data/image_data.py b/winterdrp/data/image_data.py index 224aad280..3fb8af0d5 100644 --- a/winterdrp/data/image_data.py +++ b/winterdrp/data/image_data.py @@ -32,20 +32,19 @@ for more info), and it is not guaranteed that Image objects will clean themselves. -As a fallback, we provide the helper function to delete all cache files created -during a session. When you run the code from the command line (and therefore call -__main__), we automatically run the cleanup before exiting, -even if the code crashes/raises errors. This is also true for the unit tests, -as provided by the base test class. **If you try to interact with the code in -any other way, please be mindful of this behaviour, and ensure that you clean your -cache in a responsible way!** +As a fallback, when you run the code from the command line (and therefore call +__main__), we use the standard python +`tempfile library ` to create a +temporary directory, and set this as a cache. We call the directory using `with` +context manager, ensuring that cleanup runs automatically before exiting, +even if the code crashes/raises errors. We also use `tempfile` and careful cleaning + for the unit tests, as provided by the base test class. + **If you try to interact with the code in any other way, please be mindful of this + behaviour, and ensure that you clean your cache in a responsible way!** If you don't like this feature, you don't need to use it. Cache mode is entirely optional, and can be disabled by setting the environment variable to false. -.. literalinclude:: ../../winterdrp/paths.py - :lines: 29 - You can change this via an environment variable. .. code-block:: bash @@ -65,7 +64,7 @@ from astropy.time import Time from winterdrp.data.base_data import DataBatch, DataBlock -from winterdrp.paths import CACHE_DIR, USE_CACHE +from winterdrp.data.cache import USE_CACHE, cache logger = logging.getLogger(__name__) @@ -101,7 +100,7 @@ def get_cache_path(self) -> Path: """ base = "".join([str(Time.now()), self.get_name()]) name = f"{hashlib.sha1(base.encode()).hexdigest()}.npy" - return CACHE_DIR.joinpath(name) + return cache.get_cache_dir().joinpath(name) def __str__(self): return f"" @@ -226,13 +225,3 @@ def get_batch(self) -> list[Image]: :return: list of :class:`~winterdrp.data.image_data.Image` objects """ return self.get_data_list() - - -def clean_cache(): - """Function to clear all created cache files - - :return: None - """ - for path in Image.cache_files: - path.unlink(missing_ok=True) - Image.cache_files = [] diff --git a/winterdrp/paths.py b/winterdrp/paths.py index 6b554b004..0cd95eef1 100644 --- a/winterdrp/paths.py +++ b/winterdrp/paths.py @@ -60,15 +60,6 @@ else: base_output_dir = Path(_base_output_dir) -# Set up cache for image data - -USE_CACHE: bool = bool(os.getenv("USE_WINTER_CACHE", "true")) - -CACHE_DIR = base_output_dir.joinpath(f"{package_name}_cache") - -if not CACHE_DIR.exists(): - CACHE_DIR.mkdir(parents=True) - # Set up special directories diff --git a/winterdrp/testing.py b/winterdrp/testing.py index e11626e7c..4839e36c1 100644 --- a/winterdrp/testing.py +++ b/winterdrp/testing.py @@ -1,9 +1,10 @@ """ Base class for unit testing, with common cleanup method """ +import tempfile import unittest -from winterdrp.data.image_data import clean_cache +from winterdrp.data.cache import cache class BaseTestCase(unittest.TestCase): @@ -11,4 +12,8 @@ class BaseTestCase(unittest.TestCase): def __init__(self, *arg, **kwargs): super().__init__(*arg, **kwargs) - self.addCleanup(clean_cache) + self.temp_dir = ( + tempfile.TemporaryDirectory() # pylint: disable=consider-using-with + ) + cache.set_cache_dir(self.temp_dir) + self.addCleanup(self.temp_dir.cleanup)