Merge pull request #151 from ral-facilities/DSEGOG-325-Update-ingesti…

…on-to-support-epac_ops_data_version-1.1 Handle epac_ops_data_version 1.1: parse bit_depth and shift image bits
ral-facilities · Feb 11, 2025 · 58661a4 · 58661a4
2 parents 6888a97 + 9d7c470
commit 58661a4
Show file tree

Hide file tree

Showing 22 changed files with 756 additions and 169 deletions.
diff --git a/operationsgateway_api/src/models.py b/operationsgateway_api/src/models.py
@@ -46,6 +46,7 @@ def validate_from_str(input_value: str) -> ObjectId:
 class ImageModel(BaseModel):
     path: Optional[Union[str, Any]]
     data: Optional[Union[np.ndarray, Any]]
+    bit_depth: Optional[Union[int, Any]] = None
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
 
@@ -75,6 +76,29 @@ class ImageChannelMetadataModel(BaseModel):
     x_pixel_units: Optional[Union[str, Any]] = None
     y_pixel_size: Optional[Union[float, Any]] = None
     y_pixel_units: Optional[Union[str, Any]] = None
+    bit_depth: Optional[Union[int, Any]] = None
+
+    @field_validator("bit_depth")
+    @classmethod
+    def validate_bit_depth(cls, bit_depth: "int | Any | None") -> "int | Any | None":
+        """
+        Ensure that we do not attempt to persist a np.integer by the use of Any.
+        While the value from the hdf5 file will (at time of writing) be this type, it
+        cannot be sent to Mongo in the model_dump as it is not a valid JSON type.
+
+        Oddly, this only needs to be done for integers - floats behave as expected, and
+        Pydantic casts np.floating to float upon __init__.
+
+        Args:
+            bit_depth (int | Any | None): Value for bit depth, possible a np.integer
+
+        Returns:
+            int | Any | None: Value for bit depth, definitely not a np.integer
+        """
+        if isinstance(bit_depth, np.integer):
+            return int(bit_depth)
+        else:
+            return bit_depth
 
 
 class ImageChannelModel(BaseModel):

diff --git a/operationsgateway_api/src/records/export_handler.py b/operationsgateway_api/src/records/export_handler.py
@@ -25,6 +25,7 @@ def __init__(
         projection: List[str],
         lower_level: int,
         upper_level: int,
+        limit_bit_depth: int,
         colourmap_name: str,
         functions: "list[dict[str, str]]",
         export_scalars: bool,
@@ -40,6 +41,7 @@ def __init__(
         self.projection = projection
         self.lower_level = lower_level
         self.upper_level = upper_level
+        self.limit_bit_depth = limit_bit_depth
         self.colourmap_name = colourmap_name
         self.export_scalars = export_scalars
         self.export_images = export_images
@@ -68,6 +70,7 @@ def original_image(self) -> bool:
         return (
             self.lower_level == 0
             and self.upper_level == 255
+            and self.limit_bit_depth == 8
             and self.colourmap_name is None
         )
 
@@ -92,12 +95,13 @@ async def process_records(self) -> None:
 
             if self.functions:
                 await Record.apply_functions(
-                    record_data,
-                    self.functions,
-                    self.original_image,
-                    self.lower_level,
-                    self.upper_level,
-                    self.colourmap_name,
+                    record=record_data,
+                    functions=self.functions,
+                    original_image=self.original_image,
+                    lower_level=self.lower_level,
+                    upper_level=self.upper_level,
+                    limit_bit_depth=self.limit_bit_depth,
+                    colourmap_name=self.colourmap_name,
                     return_thumbnails=False,
                 )
 
@@ -264,12 +268,13 @@ async def _add_image_to_zip(
                 image_bytes = channel["data"]
             else:
                 image_bytes = await Image.get_image(
-                    record_id,
-                    channel_name,
-                    self.original_image,
-                    self.lower_level,
-                    self.upper_level,
-                    self.colourmap_name,
+                    record_id=record_id,
+                    channel_name=channel_name,
+                    original_image=self.original_image,
+                    lower_level=self.lower_level,
+                    upper_level=self.upper_level,
+                    limit_bit_depth=self.limit_bit_depth,
+                    colourmap_name=self.colourmap_name,
                 )
             self.zip_file.writestr(
                 f"{record_id}_{channel_name}.png",

diff --git a/operationsgateway_api/src/records/false_colour_handler.py b/operationsgateway_api/src/records/false_colour_handler.py
@@ -45,6 +45,7 @@ async def get_preferred_colourmap(
     def create_colourbar(
         lower_level: int,
         upper_level: int,
+        limit_bit_depth: int,
         colourmap_name: str,
     ) -> BytesIO:
         """
@@ -60,11 +61,12 @@ def create_colourbar(
             range(256) for _ in range(FalseColourHandler.colourbar_height_pixels)
         ]
         return FalseColourHandler.apply_false_colour(
-            image_array,
-            8,
-            lower_level,
-            upper_level,
-            colourmap_name,
+            image_array=image_array,
+            storage_bit_depth=8,
+            lower_level=lower_level,
+            upper_level=upper_level,
+            limit_bit_depth=limit_bit_depth,
+            colourmap_name=colourmap_name,
         )
 
     @staticmethod
@@ -83,17 +85,19 @@ def apply_false_colour_to_b64_img(
         return FalseColourHandler.apply_false_colour(
             image_array,
             FalseColourHandler.get_pixel_depth(img_src),
-            lower_level,
-            upper_level,
-            colourmap_name,
+            lower_level=lower_level,
+            upper_level=upper_level,
+            limit_bit_depth=8,  # All thumbnails are 8 bit, so limits should be too
+            colourmap_name=colourmap_name,
         )
 
     @staticmethod
     def apply_false_colour(
         image_array: np.ndarray,
-        bits_per_pixel: int,
+        storage_bit_depth: int,
         lower_level: int,
         upper_level: int,
+        limit_bit_depth: int,
         colourmap_name: str,
     ) -> BytesIO:
         """
@@ -102,9 +106,10 @@ def apply_false_colour(
         retrieved as base 64 from the database, or from an image stored on disk.
         """
         vmin, vmax = FalseColourHandler.pixel_limits(
-            bits_per_pixel,
-            lower_level,
-            upper_level,
+            storage_bit_depth=storage_bit_depth,
+            lower_level=lower_level,
+            upper_level=upper_level,
+            limit_bit_depth=limit_bit_depth,
         )
         if colourmap_name is None:
             colourmap_name = FalseColourHandler.default_colour_map_name
@@ -128,39 +133,46 @@ def apply_false_colour(
 
     @staticmethod
     def pixel_limits(
-        bits_per_pixel: int,
+        storage_bit_depth: int,
         lower_level: int,
         upper_level: int,
+        limit_bit_depth: int,
     ) -> "tuple[int, int]":
-        """Adjusts pixel limits to account for the number of `bits_per_pixel`.
+        """Adjusts pixel limits to account for the bit depth the image was actually
+        saved with.
 
         Args:
-            bits_per_pixel (int):
-                Bits of depth to each pixel, such that the max value is
-                `2**bits_per_pixel - 1`
-            lower_level (int): Low pixel value in 8 bit depth
-            upper_level (int): High pixel value in 8 bit depth
+            storage_bit_depth (int):
+                Bit depth of each pixel in the stored format, such that the max value is
+                `2**actual_bit_depth - 1`
+            lower_level (int): Low pixel value in `limit_bit_depth`
+            upper_level (int): High pixel value in `limit_bit_depth`
+            limit_bit_depth (int): The bit depth used for the limit levels provided
 
         Raises:
-            ImageError: If `bits_per_pixel` is neither `8` nor `16`
-            QueryParameterError: If `lower_level` is greater than `upper_level`
+            QueryParameterError:
+                If `lower_level` is greater than `upper_level` or `upper_level` is
+                greater than or equal to 2**`limit_bit_depth`.
 
         Returns:
             tuple[int, int]: The scaled limits
         """
-        if bits_per_pixel != 8 and bits_per_pixel != 16:
-            raise ImageError(f"{bits_per_pixel} bits per pixel is not supported")
 
         if lower_level is None:
             lower_level = 0
+
         if upper_level is None:
-            upper_level = 255
+            upper_level = 2**limit_bit_depth - 1
+        elif upper_level >= 2**limit_bit_depth:
+            msg = "upper_level must be less than 2**limit_bit_depth"
+            raise QueryParameterError(msg)
+
         if upper_level < lower_level:
             raise QueryParameterError(
                 "lower_level must be less than or equal to upperlevel",
             )
 
-        pixel_multiplier = 2 ** (bits_per_pixel - 8)
+        pixel_multiplier = 2 ** (storage_bit_depth - limit_bit_depth)
         vmin = lower_level * pixel_multiplier
         vmax = (upper_level + 1) * pixel_multiplier - 1
         return vmin, vmax

diff --git a/operationsgateway_api/src/records/image.py b/operationsgateway_api/src/records/image.py
@@ -6,7 +6,7 @@
 
 from botocore.exceptions import ClientError
 import numpy as np
-from PIL import Image as PILImage
+from PIL import Image as PILImage, PngImagePlugin
 from pydantic import Json
 
 from operationsgateway_api.src.auth.jwt_handler import JwtHandler
@@ -37,6 +37,40 @@ class Image:
     def __init__(self, image: ImageModel) -> None:
         self.image = image
 
+        if isinstance(self.image.bit_depth, int):
+            bit_depth = self.image.bit_depth
+            if bit_depth <= 8 and self.image.data.dtype != np.uint8:
+                msg = (
+                    "Specified bit depth is lower than actual bit depth with dtype "
+                    "of %s, only data in the %s least significant bits will be kept"
+                )
+                log.warning(msg, self.image.data.dtype, bit_depth)
+            elif bit_depth > 16:
+                log.warning(
+                    "Specified bit depth is higher than the max supported depth of 16, "
+                    "only data in the 16 most significant bits will be kept",
+                )
+        else:
+            if self.image.data.dtype == np.uint8:
+                bit_depth = 8
+            else:
+                # In principle this could be data with any number of bits, but we have
+                # no way of knowing without an explicit bit_depth so default to 16
+                bit_depth = 16
+
+        # We only store data as either 8 or 16 bit PNGs, scale to most significant bits
+        if bit_depth <= 8:
+            target_bit_depth = 8
+            target_dtype = np.uint8
+        else:
+            target_bit_depth = 16
+            target_dtype = np.uint16
+
+        self.image.data = self.image.data.astype(target_dtype)
+        shifted_data = self.image.data * 2 ** (target_bit_depth - bit_depth)
+        # Negative shifts may result in a float output, so cast the type again
+        self.image.data = shifted_data.astype(target_dtype)
+
     def create_thumbnail(self) -> None:
         """
         Using the object's image data, create a thumbnail of the image and store it as
@@ -88,7 +122,14 @@ def upload_image(input_image: Image) -> Optional[str]:
         image_bytes = BytesIO()
         try:
             image = PILImage.fromarray(input_image.image.data)
-            image.save(image_bytes, format="PNG")
+            bit_depth = input_image.image.bit_depth
+            if bit_depth is not None and 0 < bit_depth <= 16:
+                info = PngImagePlugin.PngInfo()
+                sbit = bit_depth.to_bytes(1, byteorder="big")
+                info.add(b"sBIT", sbit)
+                image.save(image_bytes, format="PNG", pnginfo=info)
+            else:
+                image.save(image_bytes, format="PNG")
         except TypeError as exc:
             log.exception(msg=exc)
             raise ImageError("Image data is not in correct format to be read") from exc
@@ -113,6 +154,7 @@ async def get_image(
         original_image: bool,
         lower_level: int,
         upper_level: int,
+        limit_bit_depth: int,
         colourmap_name: str,
     ) -> BytesIO:
         """
@@ -150,13 +192,14 @@ async def get_image(
                 )
                 img_src = PILImage.open(image_bytes)
                 orig_img_array = np.array(img_src)
-
+                storage_bit_depth = FalseColourHandler.get_pixel_depth(img_src)
                 false_colour_image = FalseColourHandler.apply_false_colour(
-                    orig_img_array,
-                    FalseColourHandler.get_pixel_depth(img_src),
-                    lower_level,
-                    upper_level,
-                    colourmap_name,
+                    image_array=orig_img_array,
+                    storage_bit_depth=storage_bit_depth,
+                    lower_level=lower_level,
+                    upper_level=upper_level,
+                    limit_bit_depth=limit_bit_depth,
+                    colourmap_name=colourmap_name,
                 )
                 img_src.close()
                 return false_colour_image

diff --git a/operationsgateway_api/src/records/ingestion/channel_checks.py b/operationsgateway_api/src/records/ingestion/channel_checks.py
@@ -249,6 +249,14 @@ def image_metadata_checks(cls, key, value_dict, rejected_channels):
             rejected_channels.append(
                 {key: "y_pixel_units attribute has wrong datatype"},
             )
+
+        if (
+            "bit_depth" in value_dict
+            and value_dict["bit_depth"] is not None
+            and not isinstance(value_dict["bit_depth"], (int, np.integer))
+        ):
+            rejected_channels.append({key: "bit_depth attribute has wrong datatype"})
+
         return rejected_channels
 
     def optional_dtype_checks(self):

diff --git a/operationsgateway_api/src/records/ingestion/file_checks.py b/operationsgateway_api/src/records/ingestion/file_checks.py
@@ -47,11 +47,11 @@ def epac_data_version_checks(self):
                     raise RejectFileError(
                         "epac_ops_data_version major version was not 1",
                     )
-                if int(epac_numbers[1]) > 0:
+                if int(epac_numbers[1]) > 1:
                     log.warning(
                         "epac_ops_data_version minor version: %s",
                         epac_numbers[1],
                     )
-                    return "File minor version number too high (expected 0)"
+                    return "File minor version number too high (expected <=1)"
         else:
             raise RejectFileError("epac_ops_data_version does not exist")
diff --git a/operationsgateway_api/src/records/ingestion/hdf_handler.py b/operationsgateway_api/src/records/ingestion/hdf_handler.py
@@ -122,14 +122,15 @@ def _extract_image(
             return None, internal_failed_channel
 
         try:
-            self.images.append(
-                ImageModel(path=image_path, data=value["data"][()]),
+            metadata = ImageChannelMetadataModel(**channel_metadata)
+            channel = ImageChannelModel(metadata=metadata, image_path=image_path)
+            image_model = ImageModel(
+                path=image_path,
+                data=value["data"][()],
+                bit_depth=metadata.bit_depth,
             )
+            self.images.append(image_model)
 
-            channel = ImageChannelModel(
-                metadata=ImageChannelMetadataModel(**channel_metadata),
-                image_path=image_path,
-            )
             return channel, False
         except KeyError:
             internal_failed_channel.append(