From 8aceeea1bd1549008f404e674288e1c11e7971c1 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Thu, 6 Feb 2025 17:31:25 +0100 Subject: [PATCH] WIP: try to catch racing conditions --- src/f3dasm/_src/_io.py | 2 +- src/f3dasm/_src/design/domain.py | 10 +++-- src/f3dasm/_src/errors.py | 19 +++++++++ src/f3dasm/_src/experimentdata.py | 44 ++++++++++++-------- tests/experimentdata/test_experimentdata2.py | 23 ++++++---- 5 files changed, 70 insertions(+), 28 deletions(-) create mode 100644 src/f3dasm/_src/errors.py diff --git a/src/f3dasm/_src/_io.py b/src/f3dasm/_src/_io.py index 0f36ab14..e011be60 100644 --- a/src/f3dasm/_src/_io.py +++ b/src/f3dasm/_src/_io.py @@ -42,7 +42,7 @@ JOBS_FILENAME = "jobs" RESOLUTION_MATPLOTLIB_FIGURE = 300 -MAX_TRIES = 10 +MAX_TRIES = 20 # Storing methods # ============================================================================= diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py index c08f4419..cff6b844 100644 --- a/src/f3dasm/_src/design/domain.py +++ b/src/f3dasm/_src/design/domain.py @@ -20,6 +20,7 @@ from omegaconf import DictConfig, OmegaConf # Local +from ..errors import EmptyFileError from .parameter import (CategoricalParameter, CategoricalType, ConstantParameter, ContinuousParameter, DiscreteParameter, LoadFunction, Parameter, @@ -193,13 +194,16 @@ def from_file(cls: Type[Domain], filename: Path | str) -> Domain: >>> domain = Domain.from_json('domain.json') """ # convert filename to Path object - filename = Path(filename) + filename = Path(filename).with_suffix('.json') # Check if filename exists - if not filename.with_suffix('.json').exists(): + if not filename.exists(): raise FileNotFoundError(f"Domain file {filename} does not exist.") - with open(filename.with_suffix('.json'), 'r') as f: + if filename.stat().st_size == 0: + raise EmptyFileError(filename) + + with open(filename, 'r') as f: domain_dict = json.load(f) input_space = {k: Parameter.from_dict( diff --git a/src/f3dasm/_src/errors.py b/src/f3dasm/_src/errors.py new file mode 100644 index 00000000..371030fd --- /dev/null +++ b/src/f3dasm/_src/errors.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from pathlib import Path + + +class EmptyFileError(Exception): + """Exception raised when a file exists but is empty.""" + + def __init__(self, file_path: str | Path, message: str = "File is empty"): + """ + Initializes the EmptyFileError. + + Args: + file_path (str | Path): The path to the empty file. + message (str): A custom error message. + """ + self.file_path = Path(file_path) # Ensure it's a Path object + self.message = f"{message}: {self.file_path}" + super().__init__(self.message) diff --git a/src/f3dasm/_src/experimentdata.py b/src/f3dasm/_src/experimentdata.py index 7e29e88f..6a8ec68f 100644 --- a/src/f3dasm/_src/experimentdata.py +++ b/src/f3dasm/_src/experimentdata.py @@ -11,6 +11,7 @@ # Standard import functools +import random from collections import defaultdict from copy import copy from functools import partial @@ -35,6 +36,7 @@ from .core import Block, DataGenerator from .datageneration import _datagenerator_factory from .design import Domain, _domain_factory, _sampler_factory +from .errors import EmptyFileError from .experimentsample import ExperimentSample from .logger import logger from .optimization import _optimizer_factory @@ -299,17 +301,10 @@ def wrapper_func(project_dir: Path, *args, **kwargs) -> None: with lock: tries = 0 while tries < MAX_TRIES: - # try: - # print(f"{args=}, {kwargs=}") - # self = ExperimentData.from_file(project_dir) - # value = operation(*args, **kwargs) - # self.store() - # break try: # Load a fresh instance of ExperimentData from file loaded_self = ExperimentData.from_file( self.project_dir) - # Call the operation with the loaded instance # Replace the self in args with the loaded instance # Modify the first argument @@ -317,17 +312,16 @@ def wrapper_func(project_dir: Path, *args, **kwargs) -> None: value = operation(*args, **kwargs) loaded_self.store() break - # Racing conditions can occur when the file is empty # and the file is being read at the same time - except pd.errors.EmptyDataError: + except EmptyFileError: tries += 1 logger.debug(( f"EmptyDataError occurred, retrying" f" {tries+1}/{MAX_TRIES}")) - sleep(1) + sleep(random.uniform(0.5, 2.5)) - raise pd.errors.EmptyDataError() + raise EmptyFileError(self.project_dir) return value @@ -1662,9 +1656,17 @@ def _dict_factory(data: pd.DataFrame | List[Dict[str, Any]] | None | Path | str return [] elif isinstance(data, (Path, str)): - return _dict_factory(pd.read_csv( - Path(data).with_suffix('.csv'), - header=0, index_col=0)) + filepath = Path(data).with_suffix('.csv') + + if not filepath.exists(): + raise FileNotFoundError(f"File {filepath} not found") + + if filepath.stat().st_size == 0: + raise EmptyFileError(filepath) + + df = pd.read_csv(filepath, header=0, index_col=0) + + return _dict_factory(df) # check if data is already a list of dicts elif isinstance(data, list) and all(isinstance(d, dict) for d in data): @@ -1741,9 +1743,17 @@ def jobs_factory(jobs: pd.Series | str | Path | None) -> pd.Series: return pd.Series() elif isinstance(jobs, (Path, str)): - df = pd.read_csv( - Path(jobs).with_suffix('.csv'), - header=0, index_col=0).squeeze() + + filepath = Path(jobs).with_suffix('.csv') + + if not filepath.exists(): + raise FileNotFoundError(f"File {filepath} not found") + + if filepath.stat().st_size == 0: + raise EmptyFileError(filepath) + + df = pd.read_csv(filepath, + header=0, index_col=0).squeeze() # If the jobs is jut one value, it is parsed as a string # So, make sure that we return a pd.Series either way! if not isinstance(df, pd.Series): diff --git a/tests/experimentdata/test_experimentdata2.py b/tests/experimentdata/test_experimentdata2.py index 4fecc56b..28dda60e 100644 --- a/tests/experimentdata/test_experimentdata2.py +++ b/tests/experimentdata/test_experimentdata2.py @@ -1,4 +1,5 @@ from pathlib import Path +from unittest.mock import MagicMock, patch import numpy as np import pandas as pd @@ -245,10 +246,14 @@ def mock_domain_from_file(path, *args, **kwargs): monkeypatch.setattr(pd, 'read_csv', mock_read_csv) monkeypatch.setattr(Domain, 'from_file', mock_domain_from_file) + mock_stat = MagicMock() + mock_stat.st_size = 10 # Non Empty file - experiment_data = ExperimentData(domain=domain, input_data=input_data, - output_data=output_data, jobs=jobs, - project_dir=project_dir) + with patch.object(Path, "exists", return_value=True), \ + patch.object(Path, "stat", return_value=mock_stat): + experiment_data = ExperimentData(domain=domain, input_data=input_data, + output_data=output_data, jobs=jobs, + project_dir=project_dir) if domain is None: experiment_data.domain = edata_domain_with_output() @@ -280,10 +285,14 @@ def mock_domain_from_file(path, *args, **kwargs): monkeypatch.setattr(pd, 'read_csv', mock_read_csv) monkeypatch.setattr(Domain, 'from_file', mock_domain_from_file) - - experiment_data = ExperimentData(domain=domain, input_data=input_data, - jobs=jobs, - project_dir=project_dir) + mock_stat = MagicMock() + mock_stat.st_size = 10 # Non Empty file + + with patch.object(Path, "exists", return_value=True), \ + patch.object(Path, "stat", return_value=mock_stat): + experiment_data = ExperimentData(domain=domain, input_data=input_data, + jobs=jobs, + project_dir=project_dir) if domain is None: experiment_data.domain = edata_domain_without_output()