Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use tempfile for cache #242

Merged
merged 1 commit into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/about.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ How does the code actually work?

.. include:: ../../winterdrp/data/image_data.py
:start-line: 5
:end-line: 54
:end-line: 55
11 changes: 5 additions & 6 deletions winterdrp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import argparse
import logging
import sys
import tempfile

from astropy import units as u
from astropy.time import Time

from winterdrp.data import Dataset, ImageBatch, clean_cache
from winterdrp.data import Dataset, ImageBatch, cache
from winterdrp.monitor.base_monitor import Monitor
from winterdrp.paths import RAW_IMG_SUB_DIR, package_name
from winterdrp.pipelines import Pipeline, get_pipeline
Expand Down Expand Up @@ -90,7 +91,9 @@
night = str(ln).split(" ", maxsplit=1)[0].replace("-", "")


try:
with tempfile.TemporaryDirectory() as temp_dir:

cache.set_cache_dir(temp_dir)

if args.monitor:

Expand Down Expand Up @@ -177,7 +180,3 @@
)

logger.info("End of winterdrp execution")

finally:
# Delete everything added to the cache
clean_cache()
3 changes: 2 additions & 1 deletion winterdrp/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Module to specify the input data classes for :module:`wintedrp.processors`
"""
from winterdrp.data.base_data import DataBatch, DataBlock, Dataset
from winterdrp.data.image_data import Image, ImageBatch, clean_cache
from winterdrp.data.cache import cache
from winterdrp.data.image_data import Image, ImageBatch
from winterdrp.data.source_data import SourceBatch, SourceTable
74 changes: 74 additions & 0 deletions winterdrp/data/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
Central module for handling the cache, currently used only for storing image data.
"""

import logging
import os
from pathlib import Path
from tempfile import TemporaryDirectory

import numpy as np

logger = logging.getLogger(__name__)

USE_CACHE: bool = bool(os.getenv("USE_WINTER_CACHE", "true"))


class CacheError(Exception):
"""Error Relating to cache"""


class Cache:
"""
A cache object for storing temporary data
"""

cache_dir: Path | None = None

def get_cache_dir(self) -> Path:
"""
Returns the current cache dir

:return: Cache dir
"""
if np.logical_and(self.cache_dir is not None, USE_CACHE):
return self.cache_dir

if USE_CACHE:
err = (
"The code has been configured to not use a cache, "
"but is now trying to access a cache."
)
logger.error(err)
raise CacheError(err)

err = (
"No cache dir has been set. "
"Please set that before trying to use the cache."
)
logger.error(err)
raise CacheError(err)

def set_cache_dir(self, cache_dir: TemporaryDirectory | Path | str):
"""
Function to set the cache directory

:param cache_dir: Cache dir to set
:return: None
"""
if isinstance(cache_dir, TemporaryDirectory):
cache_dir = cache_dir.name
else:
logger.warning(
f"Setting the cache to directory {cache_dir}, "
f"rather than using a {TemporaryDirectory}. "
f"Remember to clean this cache yourself after you are done!"
)
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)

def __str__(self):
return f"A cache, with path {self.cache_dir}"


cache = Cache()
33 changes: 11 additions & 22 deletions winterdrp/data/image_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,19 @@
for more info), and it is not guaranteed that Image objects will
clean themselves.

As a fallback, we provide the helper function to delete all cache files created
during a session. When you run the code from the command line (and therefore call
__main__), we automatically run the cleanup before exiting,
even if the code crashes/raises errors. This is also true for the unit tests,
as provided by the base test class. **If you try to interact with the code in
any other way, please be mindful of this behaviour, and ensure that you clean your
cache in a responsible way!**
As a fallback, when you run the code from the command line (and therefore call
__main__), we use the standard python
`tempfile library <https://docs.python.org/3/library/tempfile.html>` to create a
temporary directory, and set this as a cache. We call the directory using `with`
context manager, ensuring that cleanup runs automatically before exiting,
even if the code crashes/raises errors. We also use `tempfile` and careful cleaning
for the unit tests, as provided by the base test class.
**If you try to interact with the code in any other way, please be mindful of this
behaviour, and ensure that you clean your cache in a responsible way!**

If you don't like this feature, you don't need to use it. Cache mode is entirely
optional, and can be disabled by setting the environment variable to false.

.. literalinclude:: ../../winterdrp/paths.py
:lines: 29

You can change this via an environment variable.

.. code-block:: bash
Expand All @@ -65,7 +64,7 @@
from astropy.time import Time

from winterdrp.data.base_data import DataBatch, DataBlock
from winterdrp.paths import CACHE_DIR, USE_CACHE
from winterdrp.data.cache import USE_CACHE, cache

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -101,7 +100,7 @@ def get_cache_path(self) -> Path:
"""
base = "".join([str(Time.now()), self.get_name()])
name = f"{hashlib.sha1(base.encode()).hexdigest()}.npy"
return CACHE_DIR.joinpath(name)
return cache.get_cache_dir().joinpath(name)

def __str__(self):
return f"<An {self.__class__.__name__} object, built from {self.get_name()}>"
Expand Down Expand Up @@ -226,13 +225,3 @@ def get_batch(self) -> list[Image]:
:return: list of :class:`~winterdrp.data.image_data.Image` objects
"""
return self.get_data_list()


def clean_cache():
"""Function to clear all created cache files

:return: None
"""
for path in Image.cache_files:
path.unlink(missing_ok=True)
Image.cache_files = []
9 changes: 0 additions & 9 deletions winterdrp/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,6 @@
else:
base_output_dir = Path(_base_output_dir)

# Set up cache for image data

USE_CACHE: bool = bool(os.getenv("USE_WINTER_CACHE", "true"))

CACHE_DIR = base_output_dir.joinpath(f"{package_name}_cache")

if not CACHE_DIR.exists():
CACHE_DIR.mkdir(parents=True)


# Set up special directories

Expand Down
9 changes: 7 additions & 2 deletions winterdrp/testing.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
"""
Base class for unit testing, with common cleanup method
"""
import tempfile
import unittest

from winterdrp.data.image_data import clean_cache
from winterdrp.data.cache import cache


class BaseTestCase(unittest.TestCase):
"""Base TestCase object with additional cleanup"""

def __init__(self, *arg, **kwargs):
super().__init__(*arg, **kwargs)
self.addCleanup(clean_cache)
self.temp_dir = (
tempfile.TemporaryDirectory() # pylint: disable=consider-using-with
)
cache.set_cache_dir(self.temp_dir)
self.addCleanup(self.temp_dir.cleanup)