Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvement of the data management and naming of containers files #181

Merged
merged 5 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions notebooks/tool_implementation/tuto_photostat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,18 @@
import os
import pathlib

import matplotlib.pyplot as plt

from nectarchain.data.management import DataManagement
from nectarchain.makers.calibration import PhotoStatisticNectarCAMCalibrationTool
from nectarchain.makers.extractor.utils import CtapipeExtractor

logging.basicConfig(
format="%(asctime)s %(name)s %(levelname)s %(message)s", level=logging.INFO
)
log = logging.getLogger(__name__)
log.handlers = logging.getLogger("__main__").handlers

import matplotlib.pyplot as plt

from nectarchain.data.management import DataManagement
from nectarchain.makers.calibration import PhotoStatisticNectarCAMCalibrationTool
from nectarchain.makers.extractor.utils import CtapipeExtractor

# %%
extractor_kwargs = {"window_width": 12, "window_shift": 4}
Expand All @@ -42,23 +43,31 @@
FF_run_number = 3937

# %%
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(extractor_kwargs)
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
method=method, extractor_kwargs=extractor_kwargs
)
path = DataManagement.find_SPE_HHV(
run_number=HHV_run_number,
method=method,
str_extractor_kwargs=str_extractor_kwargs,
)
if len(path) == 1:
log.info(
f"{path[0]} found associated to HHV run {HHV_run_number}, method {method} and extractor kwargs {str_extractor_kwargs}"
f"{path[0]} found associated to HHV run {HHV_run_number},"
f"method {method} and extractor kwargs {str_extractor_kwargs}"
)
else:
_text = f"no file found in $NECTARCAM_DATA/../SPEfit associated to HHV run {HHV_run_number}, method {method} and extractor kwargs {str_extractor_kwargs}"
_text = (
f"no file found in $NECTARCAM_DATA/../SPEfit associated to HHV run"
f"{HHV_run_number}, method {method} and extractor kwargs {str_extractor_kwargs}"
)
log.error(_text)
raise FileNotFoundError(_text)

# %% [markdown]
# WARNING : for now you can't split the event loop in slice for the Photo-statistic method, however in case of the charges havn't been computed on disk, the loop over events will only store the charge, therefore memory errors should happen rarely
# WARNING : for now you can't split the event loop in slice for the Photo-statistic
# method, however in case of the charges havn't been computed on disk, the loop over
# events will only store the charge, therefore memory errors should happen rarely

# %%
tool = PhotoStatisticNectarCAMCalibrationTool(
Expand Down
142 changes: 87 additions & 55 deletions src/nectarchain/data/management.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@
break

if i == len(lines) - 1:
e = Exception("lfns not found on GRID")
e = FileNotFoundError("lfns not found on GRID")

Check warning on line 233 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L233

Added line #L233 was not covered by tests
log.error(e, exc_info=True)
log.debug(lines)
raise e
Expand Down Expand Up @@ -283,16 +283,17 @@
ped_method="FullWaveformSum",
str_extractor_kwargs="",
):
full_file = glob.glob(
pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/PhotoStat/"
f"PhotoStatisticNectarCAM_FFrun{FF_run_number}_{FF_method}"
f"_{str_extractor_kwargs}_Pedrun{ped_run_number}_{ped_method}.h5"
).__str__()
path = pathlib.Path(

Check warning on line 286 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L286

Added line #L286 was not covered by tests
f"{os.environ.get('NECTARCAMDATA','/tmp')}/PhotoStat/"
f"PhotoStatisticNectarCAM_FFrun{FF_run_number}_{FF_method}"
f"_{str_extractor_kwargs}_Pedrun{ped_run_number}_{ped_method}.h5"
)
full_file = glob.glob(str(path))

Check warning on line 291 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L291

Added line #L291 was not covered by tests
log.debug("for now it does not check if there are files with max events")
if len(full_file) != 1:
raise Exception(f"the files is {full_file}")
raise FileNotFoundError(

Check warning on line 294 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L294

Added line #L294 was not covered by tests
f"When looking for {str(path)} : the found files are {full_file}"
)
return full_file

@staticmethod
Expand Down Expand Up @@ -328,67 +329,98 @@
):
keyword = kwargs.get("keyword", "FlatFieldSPEHHV")
std_key = "" if free_pp_n else "Std"
full_file = glob.glob(
pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/SPEfit/"
f"{keyword}{std_key}NectarCAM_run{run_number}*_{method}"
f"_{str_extractor_kwargs}.h5"
).__str__()
path = pathlib.Path(

Check warning on line 332 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L332

Added line #L332 was not covered by tests
f"{os.environ.get('NECTARCAMDATA','/tmp')}/SPEfit/"
f"{keyword}{std_key}NectarCAM_run{run_number}*_{method}"
f"_{str_extractor_kwargs}.h5"
)
# need to improve the files search !!
# -> unstable behavior with SPE results computed
# with maxevents not to None
if len(full_file) != 1:
all_files = glob.glob(
pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/SPEfit/"
f"FlatFieldSPEHHVStdNectarCAM_run{run_number}_maxevents*_"
f"{method}_{str_extractor_kwargs}.h5"
).__str__()
full_file = glob.glob(str(path))
if len(full_file) == 0:
raise FileNotFoundError(f"No file found looking for {str(path)}")
elif len(full_file) > 1:
log.debug(f"Several files found for {str(path)} : {full_file}")
for file in full_file:
if "maxevents" not in file:
log.debug(

Check warning on line 344 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L337-L344

Added lines #L337 - L344 were not covered by tests
f"File found with the most important"
f"number of events for {str(path)} : {file}"
)
return file
path = pathlib.Path(

Check warning on line 349 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L348-L349

Added lines #L348 - L349 were not covered by tests
f"{os.environ.get('NECTARCAMDATA','/tmp')}/SPEfit/"
f"{keyword}{std_key}NectarCAM_run{run_number}_maxevents*_"
f"{method}_{str_extractor_kwargs}.h5"
)
max_events = 0
for i, file in enumerate(all_files):
data = file.split("/")[-1].split(".h5")[0].split("_")
for _data in data:
if "maxevents" in _data:
_max_events = int(_data.split("maxevents")[-1])
break
if _max_events >= max_events:
max_events = _max_events
index = i
return [all_files[index]]
all_files = glob.glob(str(path))
if len(all_files) == 0:
raise FileNotFoundError(f"No file found looking for {str(path)}")

Check warning on line 356 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L354-L356

Added lines #L354 - L356 were not covered by tests
else:
log.debug(f"Files found for {str(path)} : {all_files}")
max_events = 0
for i, file in enumerate(all_files):
data = file.split("/")[-1].split(".h5")[0].split("_")
for _data in data:
if "maxevents" in _data:
_max_events = int(_data.split("maxevents")[-1])
break
if _max_events >= max_events:
max_events = _max_events
index = i
log.debug(f"Best file found : {all_files[index]}")
return [all_files[index]]

Check warning on line 370 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L358-L370

Added lines #L358 - L370 were not covered by tests
else:
log.debug(f"File found for {str(path)} : {full_file}")

Check warning on line 372 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L372

Added line #L372 was not covered by tests
return full_file

@staticmethod
def __find_computed_data(
run_number, max_events=None, ext=".h5", data_type="waveforms"
):
out = glob.glob(
pathlib.Path(
if max_events is not None:
path = pathlib.Path(

Check warning on line 380 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L380

Added line #L380 was not covered by tests
f"{os.environ.get('NECTARCAMDATA','/tmp')}/runs/"
f"{data_type}/*_run{run_number}_maxevents*{ext}"
)
else:
path = pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/runs/"
f"{data_type}/*_run{run_number}{ext}"
).__str__()
)
if not (max_events is None):
all_files = glob.glob(
pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/runs/"
f"{data_type}/*_run{run_number}_maxevents*{ext}"
).__str__()
)
best_max_events = np.inf
best_index = None
for i, file in enumerate(all_files):
data = file.split("/")[-1].split(".h5")[0].split("_")
out = glob.glob(str(path))
if len(out) == 0:
raise FileNotFoundError(f"No file found looking for {str(path)}")
elif len(out) > 1:
if max_events is None:
raise FileExistsError(f"Several files found for {str(path)} : {out}")

Check warning on line 394 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L392-L394

Added lines #L392 - L394 were not covered by tests
else:
log.debug(

Check warning on line 396 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L396

Added line #L396 was not covered by tests
f"Several files found for {str(path)} : {out},"
f"will look for the most complete one"
)
best_max_events = np.inf
best_index = None
for i, file in enumerate(out):
data = file.split("/")[-1].split(".h5")[0].split("_")
for _data in data:
if "maxevents" in _data:
_max_events = int(_data.split("maxevents")[-1])
break
if _max_events >= max_events:
if _max_events < best_max_events:
best_max_events = _max_events
best_index = i
if best_index is not None:
out = [out[best_index]]

Check warning on line 413 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L400-L413

Added lines #L400 - L413 were not covered by tests
else:
if max_events is not None:
data = out[0].split("/")[-1].split(".h5")[0].split("_")

Check warning on line 416 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L415-L416

Added lines #L415 - L416 were not covered by tests
for _data in data:
if "maxevents" in _data:
_max_events = int(_data.split("maxevents")[-1])
break
if _max_events >= max_events:
if _max_events < best_max_events:
best_max_events = _max_events
best_index = i
if not (best_index is None):
out = [all_files[best_index]]
if _max_events < max_events:
raise FileNotFoundError(

Check warning on line 422 in src/nectarchain/data/management.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/data/management.py#L421-L422

Added lines #L421 - L422 were not covered by tests
f"File found for {str(path)} : {out[0]} has less events "
f"than max_events asked {max_events}"
)
return out
62 changes: 37 additions & 25 deletions src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ....data.container import ChargesContainer, ChargesContainers
from ....data.container.core import merge_map_ArrayDataContainer
from ....data.management import DataManagement
from ....utils.error import TooMuchFileException
from ...component import ArrayDataComponent, NectarCAMComponent
from ...extractor.utils import CtapipeExtractor
from .core import GainNectarCAMCalibrationTool
Expand Down Expand Up @@ -46,25 +47,37 @@
super().__init__(*args, **kwargs)

str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
method=self.method,
extractor_kwargs=self.extractor_kwargs,
)
if not (self.reload_events):
files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
if len(files) == 1:
try:
files = DataManagement.find_charges(

Check warning on line 55 in src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py#L54-L55

Added lines #L54 - L55 were not covered by tests
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
if len(files) == 1:
log.warning(

Check warning on line 62 in src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py#L61-L62

Added lines #L61 - L62 were not covered by tests
"You asked events_per_slice but you don't want to\
reload events and a charges file is on disk, \
then events_per_slice is set to None"
)
self.events_per_slice = None

Check warning on line 67 in src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py#L67

Added line #L67 was not covered by tests
else:
raise TooMuchFileException("No single charges file found")
except (FileNotFoundError, TooMuchFileException) as e:
log.warning(e)

Check warning on line 71 in src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py#L69-L71

Added lines #L69 - L71 were not covered by tests
log.warning(
"You asked events_per_slice but you don't want to reload events and\
a charges file is on disk, then events_per_slice is set to None"
"You will not be able to reload charges from\
disk when start() call"
)
self.events_per_slice = None

def _init_output_path(self):
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
method=self.method,
extractor_kwargs=self.extractor_kwargs,
)
if self.events_per_slice is None:
ext = ".h5"
Expand Down Expand Up @@ -94,14 +107,19 @@
**kwargs,
):
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
)
files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
extractor_kwargs=self.extractor_kwargs,
)
try:
files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
except Exception as e:
log.warning(e)
files = []
if self.reload_events or len(files) != 1:
if len(files) != 1:
self.log.info(
Expand Down Expand Up @@ -135,7 +153,7 @@
self.components[
0
]._chargesContainers = merge_map_ArrayDataContainer(
chargesContainers
next(chargesContainers)
)
else:
self.log.info("merging along slices")
Expand All @@ -152,12 +170,6 @@
)

def _write_container(self, container: Container, index_component: int = 0) -> None:
# if isinstance(container,SPEfitContainer) :
# self.writer.write(table_name = f"{self.method}_
# {CtapipeExtractor.get_extractor_kwargs_str(self.extractor_kwargs)}",
# containers = container,
# )
# else :
super()._write_container(container=container, index_component=index_component)


Expand Down
32 changes: 21 additions & 11 deletions src/nectarchain/makers/calibration/gain/photostat_makers.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@

def _init_output_path(self):
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
method=self.method,
extractor_kwargs=self.extractor_kwargs,
)
if self.max_events is None:
filename = (
Expand Down Expand Up @@ -96,18 +97,27 @@
**kwargs,
):
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
)
FF_files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
Ped_files = DataManagement.find_charges(
run_number=self.Ped_run_number,
max_events=self.max_events,
extractor_kwargs=self.extractor_kwargs,
)
try:
FF_files = DataManagement.find_charges(

Check warning on line 104 in src/nectarchain/makers/calibration/gain/photostat_makers.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/makers/calibration/gain/photostat_makers.py#L103-L104

Added lines #L103 - L104 were not covered by tests
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
except Exception as e:
self.log.warning(e)
FF_files = []
try:
Ped_files = DataManagement.find_charges(

Check warning on line 114 in src/nectarchain/makers/calibration/gain/photostat_makers.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/makers/calibration/gain/photostat_makers.py#L110-L114

Added lines #L110 - L114 were not covered by tests
run_number=self.Ped_run_number,
max_events=self.max_events,
)
except Exception as e:
self.log.warning(e)
Ped_files = []

Check warning on line 120 in src/nectarchain/makers/calibration/gain/photostat_makers.py

View check run for this annotation

Codecov / codecov/patch

src/nectarchain/makers/calibration/gain/photostat_makers.py#L118-L120

Added lines #L118 - L120 were not covered by tests
if self.reload_events or len(FF_files) != 1 or len(Ped_files) != 1:
if len(FF_files) != 1 or len(Ped_files) != 1:
self.log.info(
Expand Down
Loading