diff --git a/src/faim_hcs/io/ImageXpress.py b/src/faim_hcs/io/ImageXpress.py index 0c1409b0..b7db6d00 100644 --- a/src/faim_hcs/io/ImageXpress.py +++ b/src/faim_hcs/io/ImageXpress.py @@ -1,19 +1,17 @@ -import os import re from pathlib import Path from typing import Union import pandas as pd -from faim_hcs.io.acquisition import Plate_Acquisition, Well_Acquisition -from faim_hcs.io.MetaSeriesTiff import ( - load_metaseries_tiff, - load_metaseries_tiff_metadata, -) +from faim_hcs.io.acquisition import ChannelMetadata, PlateAcquisition, WellAcquisition +from faim_hcs.io.MetaSeriesTiff import load_metaseries_tiff_metadata from faim_hcs.MetaSeriesUtils import _build_ch_metadata +from faim_hcs.stitching import Tile +from faim_hcs.stitching.Tile import TilePosition -class ImageXpress_Plate_Acquisition(Plate_Acquisition): +class ImageXpressPlateAcquisition(PlateAcquisition): _METASERIES_FILENAME_PATTERN = r"(?P.*)_(?P[A-Z]+\d{2})_(?Ps\d+)_(?Pw[1-9]{1})(?!_thumb)(?P.*)(?P.tif)" _METASERIES_FOLDER_PATTERN = r".*[\/\\](?P\d{4}-\d{2}-\d{2})[\/\\](?P\d+)(?:[\/\\]ZStep_(?P\d+))?.*" _METASERIES_MAIN_FOLDER_PATTERN = ( @@ -21,14 +19,27 @@ class ImageXpress_Plate_Acquisition(Plate_Acquisition): ) _METASERIES_ZSTEP_FOLDER_PATTERN = r".*[\/\\](?P\d{4}-\d{2}-\d{2})[\/\\](?P\d+)[\/\\]ZStep_(?P\d+).*" - def wells(self): - if self._wells is None: - self._populate_wells() - return self._wells + def __init__( + self, acquisition_dir: Union[Path, str], mode: str = "top-level" + ) -> None: + self.mode = mode + super().__init__(acquisition_dir=acquisition_dir) - def well_acquisitions(self): # TODO consider moving this logic in parent class - for well in self.wells(): - yield ImageXpress_Well_Acquisition( + def _get_root_re(self) -> re.Pattern: + if self.mode == "top-level": + root_pattern = self._METASERIES_MAIN_FOLDER_PATTERN + elif self.mode == "z-steps": + root_pattern = self._METASERIES_ZSTEP_FOLDER_PATTERN + else: + root_pattern = self._METASERIES_FOLDER_PATTERN + return re.compile(root_pattern) + + def _get_filename_re(self) -> re.Pattern: + return re.compile(self._METASERIES_FILENAME_PATTERN) + + def well_acquisitions(self): + for well in self.wells: + yield ImageXpressWellAcquisition( files=self._files[self._files["well"] == well], ch_metadata=self.channels(), ) @@ -37,109 +48,55 @@ def channels(self): ch_metadata = [] for ch in self._files["channel"].unique(): ch_metadata.append(self._ch_metadata(ch)) - # also include tile size (ny, nx) return ch_metadata def _ch_metadata(self, channel): # Read first image of channel path = self._files[self._files["channel"] == channel]["path"].iloc[0] - data, metadata = load_metaseries_tiff(path=path) + metadata = load_metaseries_tiff_metadata(path=path) _channel_metadata = _build_ch_metadata(metadata) - return { - "channel-index": None, - "channel-name": channel, - "display-color": _channel_metadata["display-color"], - "pixel-type": metadata["PixelType"], - "spatial-calibration-x": metadata["spatial-calibration-x"], - "spatial-calibration-y": metadata["spatial-calibration-y"], - "spatial-calibration-units": metadata["spatial-calibration-units"], - "z-scaling": None, - "unit": None, - "wavelength": _channel_metadata["wavelength"], - "exposure-time": _channel_metadata["exposure-time"], - "exposure-time-unit": _channel_metadata["exposure-time-unit"], - "objective": metadata["_MagSetting_"], - "tile-size-x": data.shape[-1], - "tile-size-y": data.shape[-2], - } - - def _populate_wells(self): - if self._files is None: - self._parse_files() - self._wells = sorted(self._files["well"].unique()) - - def _parse_files(self): - if self.mode == "top-level": - root_pattern = self._METASERIES_MAIN_FOLDER_PATTERN - elif self.mode == "z-steps": - root_pattern = self._METASERIES_ZSTEP_FOLDER_PATTERN - else: - root_pattern = self._METASERIES_FOLDER_PATTERN - self._files = pd.DataFrame( - self._list_dataset_files( - root_dir=self.acquisition_dir, - root_re=re.compile(root_pattern), - filename_re=re.compile(self._METASERIES_FILENAME_PATTERN), - ) + return ChannelMetadata( + channel_index=None, + channel_name=channel, + display_color=_channel_metadata["display-color"], + spatial_calibration_x=metadata["spatial-calibration-x"], + spatial_calibration_y=metadata["spatial-calibration-y"], + spatial_calibration_units=metadata["spatial-calibration-units"], + z_scaling=None, + unit=None, + wavelength=_channel_metadata["wavelength"], + exposure_time=_channel_metadata["exposure-time"], + exposure_time_unit=_channel_metadata["exposure-time-unit"], + objective=metadata["_MagSetting_"], ) - def _list_dataset_files( - self, - root_dir: Union[Path, str], - root_re: re.Pattern, - filename_re: re.Pattern, - ) -> list[str]: - files = [] - for root, _, filenames in os.walk(root_dir): - m_root = root_re.fullmatch(root) - if m_root: - for f in filenames: - m_filename = filename_re.fullmatch(f) - if m_filename: - row = m_root.groupdict() - row |= m_filename.groupdict() - row["path"] = str(Path(root).joinpath(f)) - files.append(row) - return files - - -class ImageXpress_Well_Acquisition(Well_Acquisition): - def files(self) -> pd.DataFrame: - return self._files - def positions(self) -> pd.DataFrame: - if self._positions is None: - self._parse_positions() - return self._positions - - def roi_tables(self) -> list[dict]: - pass # TODO - - def _parse_positions(self): - path = [] - pos_x = [] - pos_y = [] - pos_z = [] - for file in self.files()["path"]: - path.append(file) - x, y, z = self._get_position(file) - pos_x.append(x) - pos_y.append(y) - pos_z.append(z) - self._positions = pd.DataFrame( - { - "path": path, - "pos_x": pos_x, - "pos_y": pos_y, - "pos_z": pos_z, - } - ) - - def _get_position(self, file): - metadata = load_metaseries_tiff_metadata(file) - return ( - metadata["stage-position-x"], - metadata["stage-position-y"], - metadata["z-position"], - ) +class ImageXpressWellAcquisition(WellAcquisition): + _tiles: list[Tile] = None + + def __init__(self, files: pd.DataFrame, ch_metadata: pd.DataFrame) -> None: + super().__init__(files=files, ch_metadata=ch_metadata) + self._tiles = self._parse_tiles() + + def _parse_tiles(self) -> list[Tile]: + tiles = [] + for i, row in self._files.iterrows(): + file = row["path"] + channel_index = row["channel"] + time_point = 0 + metadata = load_metaseries_tiff_metadata(file) + tiles.append( + Tile( + path=file, + shape=(metadata["pixel-size-y"], metadata["pixel-size-x"]), + position=TilePosition( + time=time_point, + channel=channel_index, + z=metadata["z-position"], + y=metadata["stage-position-y"], + x=metadata["stage-position-x"], + ), + ) + ) + return tiles diff --git a/src/faim_hcs/io/MetaSeriesTiff.py b/src/faim_hcs/io/MetaSeriesTiff.py index 478f4db3..08beb2c0 100644 --- a/src/faim_hcs/io/MetaSeriesTiff.py +++ b/src/faim_hcs/io/MetaSeriesTiff.py @@ -8,6 +8,8 @@ def load_metaseries_tiff_metadata(path: Path) -> tuple[ArrayLike, dict]: """Load parts of the metadata of a metaseries tiff file. The following metadata is collected: + * pixel-size-x + * pixel-size-y * _IllumSetting_ * spatial-calibration-x * spatial-calibration-y @@ -38,6 +40,8 @@ def load_metaseries_tiff_metadata(path: Path) -> tuple[ArrayLike, dict]: with tifffile.TiffFile(path) as tiff: assert tiff.is_metaseries, f"{path} is not a metamorph file." selected_keys = [ + "pixel-size-x", + "pixel-size-y", "_IllumSetting_", "spatial-calibration-x", "spatial-calibration-y", diff --git a/src/faim_hcs/io/acquisition.py b/src/faim_hcs/io/acquisition.py index 60e5cfb4..8fa33cba 100644 --- a/src/faim_hcs/io/acquisition.py +++ b/src/faim_hcs/io/acquisition.py @@ -1,52 +1,84 @@ +import os +import re from abc import ABC, abstractmethod from pathlib import Path -from typing import Union +from typing import Optional, Union import dask.array as da import numpy as np import pandas as pd +from pydantic import BaseModel, NonNegativeInt, PositiveFloat, PositiveInt +from faim_hcs.stitching import Tile -class Plate_Acquisition(ABC): + +class PlateAcquisition(ABC): _acquisition_dir = None _files = None - _wells = None - _channels = None + wells = None - def __init__(self, acquisition_dir: Union[Path, str], mode: str = None) -> None: - self.acquisition_dir = acquisition_dir - self.mode = mode + def __init__(self, acquisition_dir: Union[Path, str]) -> None: + self._acquisition_dir = acquisition_dir + self._files = self._parse_files() + self.wells = self._get_wells() super().__init__() + def _parse_files(self) -> pd.DataFrame: + """Parse all files in the acquisition directory. + + Returns + ------- + DataFrame + Table of all files in the acquisition. + """ + return pd.DataFrame( + self._list_and_match_files( + root_dir=self._acquisition_dir, + root_re=self._get_root_re(), + filename_re=self._get_filename_re(), + ) + ) + + def _list_and_match_files( + self, + root_dir: Union[Path, str], + root_re: re.Pattern, + filename_re: re.Pattern, + ) -> list[str]: + files = [] + for root, _, filenames in os.walk(root_dir): + m_root = root_re.fullmatch(root) + if m_root: + for f in filenames: + m_filename = filename_re.fullmatch(f) + if m_filename: + row = m_root.groupdict() + row |= m_filename.groupdict() + row["path"] = str(Path(root).joinpath(f)) + files.append(row) + return files + + @abstractmethod + def _get_root_re(self) -> re.Pattern: + """Regular expression for matching the root directory of the acquisition.""" + raise NotImplementedError() + @abstractmethod - def wells(self): + def _get_filename_re(self) -> re.Pattern: + """Regular expression for matching the filename of the acquisition.""" + raise NotImplementedError() + + def _get_wells(self) -> list["WellAcquisition"]: """List of wells.""" + return sorted(self._files["well"].unique()) @abstractmethod def well_acquisitions(self): """Iterator over Well_Acquisition objects.""" - - @abstractmethod - def channels(self) -> pd.DataFrame: - """Table of channels with their metadata. - - Dataframe columns: - * channel-index - * channel-name - * display-color - * pixel-type - * spatial-calibration-x - * spatial-calibration-y - * [z-scaling] - * [unit] - * [wavelength] - * [exposure-time] - * [exposure-time-unit] - * [objective] - """ + raise NotImplementedError() -class Well_Acquisition(ABC): +class WellAcquisition(ABC): _files = None _positions: pd.DataFrame = None _channel_metadata = None @@ -57,19 +89,9 @@ def __init__(self, files: pd.DataFrame, ch_metadata: pd.DataFrame) -> None: super().__init__() @abstractmethod - def files(self) -> pd.DataFrame: - """Table of all files contained in the acquisition. - - Subsets of the acquisition files depending on 'mode'. - - Dataframe columns: - * index - * path - * channel - * well - * field - * *(more optional) - """ + def _parse_tiles(self) -> list[Tile]: + """Parse all tiles in the well.""" + raise NotImplementedError() @abstractmethod def positions(self) -> pd.DataFrame: @@ -118,3 +140,52 @@ def roi_tables(self) -> list[dict]: * len_y_micrometer * len_z_micrometer """ + + +class TileMetadata(BaseModel): + tile_size_x: PositiveInt + tile_size_y: PositiveInt + + +class ChannelMetadata(BaseModel): + channel_index: Optional[NonNegativeInt] + channel_name: str + display_color: str + spatial_calibration_x: float + spatial_calibration_y: float + spatial_calibration_units: str + z_scaling: Optional[PositiveFloat] + unit: Optional[str] + wavelength: PositiveInt + exposure_time: PositiveFloat + exposure_time_unit: str + objective: str + + def __init__( + self, + channel_index: Optional[NonNegativeInt], + channel_name: str, + display_color: str, + spatial_calibration_x: float, + spatial_calibration_y: float, + spatial_calibration_units: str, + z_scaling: Optional[PositiveFloat], + unit: Optional[str], + wavelength: PositiveInt, + exposure_time: PositiveFloat, + exposure_time_unit: str, + objective: str, + ): + super().__init__() + self.channel_index = channel_index + self.channel_name = channel_name + self.display_color = display_color + self.spatial_calibration_x = spatial_calibration_x + self.spatial_calibration_y = spatial_calibration_y + self.spatial_calibration_units = spatial_calibration_units + self.z_scaling = z_scaling + self.unit = unit + self.wavelength = wavelength + self.exposure_time = exposure_time + self.exposure_time_unit = exposure_time_unit + self.objective = objective diff --git a/tests/io/test_ImageXpress.py b/tests/io/test_ImageXpress.py index eff2223e..83609394 100644 --- a/tests/io/test_ImageXpress.py +++ b/tests/io/test_ImageXpress.py @@ -2,8 +2,8 @@ import pytest -from faim_hcs.io.acquisition import Plate_Acquisition, Well_Acquisition -from faim_hcs.io.ImageXpress import ImageXpress_Plate_Acquisition +from faim_hcs.io.acquisition import PlateAcquisition, WellAcquisition +from faim_hcs.io.ImageXpress import ImageXpressPlateAcquisition @pytest.fixture @@ -13,11 +13,11 @@ def acquisition_dir(): @pytest.fixture def acquisition(acquisition_dir): - return ImageXpress_Plate_Acquisition(acquisition_dir) + return ImageXpressPlateAcquisition(acquisition_dir) -def test_default(acquisition: Plate_Acquisition): - wells = acquisition.wells() +def test_default(acquisition: PlateAcquisition): + wells = acquisition._get_wells() assert wells is not None assert len(wells) == 2 @@ -32,7 +32,7 @@ def test_default(acquisition: Plate_Acquisition): y_spacing = channels[0]["spatial-calibration-y"] for well_acquisition in well_acquisitions: - assert isinstance(well_acquisition, Well_Acquisition) + assert isinstance(well_acquisition, WellAcquisition) assert len(well_acquisition.files()) == 48 files = well_acquisition.files()