Merge pull request #5265 from voxel51/merge/release/v1.2.0

Merge `release/v1.2.0` to `develop`
voxel51 · Dec 13, 2024 · d58f23d · d58f23d
2 parents 79b8395 + 81336a0
commit d58f23d
Show file tree

Hide file tree

Showing 13 changed files with 450 additions and 128 deletions.
diff --git a/app/packages/looker/src/lookers/utils.test.ts b/app/packages/looker/src/lookers/utils.test.ts
@@ -0,0 +1,19 @@
+import { describe, expect, it } from "vitest";
+import type { Buffers } from "../state";
+import { hasFrame } from "./utils";
+
+describe("looker utilities", () => {
+  it("determines frame availability given a buffer list", () => {
+    const BUFFERS: Buffers = [
+      [1, 3],
+      [5, 25],
+    ];
+    for (const frameNumber of [1, 10, 25]) {
+      expect(hasFrame(BUFFERS, frameNumber)).toBe(true);
+    }
+
+    for (const frameNumber of [0, 4, 26]) {
+      expect(hasFrame(BUFFERS, frameNumber)).toBe(false);
+    }
+  });
+});
diff --git a/app/packages/looker/src/lookers/utils.ts b/app/packages/looker/src/lookers/utils.ts
@@ -0,0 +1,7 @@
+import type { Buffers } from "../state";
+
+export const hasFrame = (buffers: Buffers, frameNumber: number) => {
+  return buffers.some(
+    ([start, end]) => start <= frameNumber && frameNumber <= end
+  );
+};
diff --git a/app/packages/looker/src/lookers/video.ts b/app/packages/looker/src/lookers/video.ts
@@ -19,6 +19,7 @@ import { addToBuffers, removeFromBuffers } from "../util";
 import { AbstractLooker } from "./abstract";
 import { type Frame, acquireReader, clearReader } from "./frame-reader";
 import { LookerUtils, withFrames } from "./shared";
+import { hasFrame } from "./utils";
 
 let LOOKER_WITH_READER: VideoLooker | null = null;
 
@@ -394,13 +395,7 @@ export class VideoLooker extends AbstractLooker<VideoState, VideoSample> {
   }
 
   private hasFrame(frameNumber: number) {
-    if (frameNumber === this.firstFrameNumber) {
-      return this.firstFrame;
-    }
-    return (
-      this.frames.has(frameNumber) &&
-      this.frames.get(frameNumber)?.deref() !== undefined
-    );
+    return hasFrame(this.state.buffers, frameNumber);
   }
 
   private getFrame(frameNumber: number) {

diff --git a/app/packages/looker/src/worker/canvas-decoder.test.ts b/app/packages/looker/src/worker/canvas-decoder.test.ts
@@ -0,0 +1,43 @@
+import { describe, expect, it } from "vitest";
+import { isGrayscale } from "./canvas-decoder";
+
+const createData = (
+  pixels: Array<[number, number, number, number]>
+): Uint8ClampedArray => {
+  return new Uint8ClampedArray(pixels.flat());
+};
+
+describe("isGrayscale", () => {
+  it("should return true for a perfectly grayscale image", () => {
+    const data = createData(Array(100).fill([100, 100, 100, 255]));
+    expect(isGrayscale(data)).toBe(true);
+  });
+
+  it("should return false if alpha is not 255", () => {
+    const data = createData([
+      [100, 100, 100, 255],
+      [100, 100, 100, 254],
+      ...Array(98).fill([100, 100, 100, 255]),
+    ]);
+    expect(isGrayscale(data)).toBe(false);
+  });
+
+  it("should return false if any pixel is not grayscale", () => {
+    const data = createData([
+      [100, 100, 100, 255],
+      [100, 101, 100, 255],
+      ...Array(98).fill([100, 100, 100, 255]),
+    ]);
+    expect(isGrayscale(data)).toBe(false);
+  });
+
+  it("should detect a non-grayscale pixel placed deep enough to ensure at least 1% of pixels are checked", () => {
+    // large image: 100,000 pixels. 1% of 100,000 is 1,000.
+    // the function will check at least 1,000 pixels.
+    // place a non-grayscale pixel after 800 pixels.
+    const pixels = Array(100000).fill([50, 50, 50, 255]);
+    pixels[800] = [50, 51, 50, 255]; // this is within the first 1% of pixels
+    const data = createData(pixels);
+    expect(isGrayscale(data)).toBe(false);
+  });
+});
diff --git a/app/packages/looker/src/worker/canvas-decoder.ts b/app/packages/looker/src/worker/canvas-decoder.ts
@@ -1,5 +1,26 @@
 import { OverlayMask } from "../numpy";
 
+/**
+ * Checks if the given pixel data is grayscale by sampling a subset of pixels.
+ * The function will check at least 500 pixels or 1% of all pixels, whichever is larger.
+ * If the image is grayscale, the R, G, and B channels will be equal for all sampled pixels,
+ * and the alpha channel will always be 255.
+ */
+export const isGrayscale = (data: Uint8ClampedArray): boolean => {
+  const totalPixels = data.length / 4;
+  const checks = Math.max(500, Math.floor(totalPixels * 0.01));
+  const step = Math.max(1, Math.floor(totalPixels / checks));
+
+  for (let p = 0; p < totalPixels; p += step) {
+    const i = p * 4;
+    const [r, g, b, a] = [data[i], data[i + 1], data[i + 2], data[i + 3]];
+    if (a !== 255 || r !== g || g !== b) {
+      return false;
+    }
+  }
+  return true;
+};
+
 /**
  * Decodes a given image source into an OverlayMask using an OffscreenCanvas
  */
@@ -12,25 +33,26 @@ export const decodeWithCanvas = async (blob: ImageBitmapSource) => {
   const ctx = canvas.getContext("2d");
 
   ctx.drawImage(imageBitmap, 0, 0);
+  imageBitmap.close();
 
   const imageData = ctx.getImageData(0, 0, width, height);
 
-  const numChannels = imageData.data.length / (width * height);
-
-  const overlayData = {
-    width,
-    height,
-    data: imageData.data,
-    channels: numChannels,
-  };
+  // for nongrayscale images, channel is guaranteed to be 4 (RGBA)
+  const channels = isGrayscale(imageData.data) ? 1 : 4;
 
-  // dispose
-  imageBitmap.close();
+  if (channels === 1) {
+    // get rid of the G, B, and A channels, new buffer will be 1/4 the size
+    const data = new Uint8ClampedArray(width * height);
+    for (let i = 0; i < data.length; i++) {
+      data[i] = imageData.data[i * 4];
+    }
+    imageData.data.set(data);
+  }
 
   return {
-    buffer: overlayData.data.buffer,
-    channels: numChannels,
-    arrayType: overlayData.data.constructor.name as OverlayMask["arrayType"],
-    shape: [overlayData.height, overlayData.width],
+    buffer: imageData.data.buffer,
+    channels,
+    arrayType: "Uint8ClampedArray",
+    shape: [height, width],
   } as OverlayMask;
 };
diff --git a/app/packages/looker/src/worker/painter.ts b/app/packages/looker/src/worker/painter.ts
@@ -278,7 +278,14 @@ export const PainterFactory = (requestColor) => ({
 
     const isRgbMaskTargets_ = isRgbMaskTargets(maskTargets);
 
-    if (maskData.channels > 2) {
+    // we have an additional guard for targets length = new image buffer byte length
+    // because we reduce the RGBA mask into a grayscale mask in first load for
+    // performance reasons
+    // For subsequent mask updates, the maskData.buffer is already a single channel
+    if (
+      maskData.channels === 4 &&
+      targets.length === label.mask.image.byteLength
+    ) {
       for (let i = 0; i < overlay.length; i++) {
         const [r, g, b] = getRgbFromMaskData(targets, maskData.channels, i);
 

diff --git a/docs/source/user_guide/using_datasets.rst b/docs/source/user_guide/using_datasets.rst
@@ -2542,7 +2542,7 @@ Object detections stored in |Detections| may also have instance segmentation
 masks.
 
 These masks can be stored in one of two ways: either directly in the database
-via the :attr:`mask<fiftyone.core.labels.Detection.mask>` attribute, or on
+via the :attr:`mask <fiftyone.core.labels.Detection.mask>` attribute, or on
 disk referenced by the
 :attr:`mask_path <fiftyone.core.labels.Detection.mask_path>` attribute.
 
@@ -2605,8 +2605,10 @@ object's bounding box when visualizing in the App.
                 <Detection: {
                     'id': '5f8709282018186b6ef6682b',
                     'attributes': {},
+                    'tags': [],
                     'label': 'cat',
                     'bounding_box': [0.48, 0.513, 0.397, 0.288],
+                    'mask': None,
                     'mask_path': '/path/to/mask.png',
                     'confidence': 0.96,
                     'index': None,
@@ -2615,8 +2617,8 @@ object's bounding box when visualizing in the App.
         }>,
     }>
 
-Like all |Label| types, you can also add custom attributes to your detections
-by dynamically adding new fields to each |Detection| instance:
+Like all |Label| types, you can also add custom attributes to your instance
+segmentations by dynamically adding new fields to each |Detection| instance:
 
 .. code-block:: python
     :linenos:

diff --git a/fiftyone/core/collections.py b/fiftyone/core/collections.py
@@ -10662,9 +10662,7 @@ def _handle_db_fields(self, paths, frames=False):
         db_fields_map = self._get_db_fields_map(frames=frames)
         return [db_fields_map.get(p, p) for p in paths]
 
-    def _get_media_fields(
-        self, include_filepath=True, whitelist=None, frames=False
-    ):
+    def _get_media_fields(self, whitelist=None, blacklist=None, frames=False):
         media_fields = {}
 
         if frames:
@@ -10674,13 +10672,13 @@ def _get_media_fields(
             schema = self.get_field_schema(flat=True)
             app_media_fields = set(self._dataset.app_config.media_fields)
 
-            if include_filepath:
-                # 'filepath' should already be in set, but add it just in case
-                app_media_fields.add("filepath")
-            else:
-                app_media_fields.discard("filepath")
+            # 'filepath' should already be in set, but add it just in case
+            app_media_fields.add("filepath")
 
         for field_name, field in schema.items():
+            while isinstance(field, fof.ListField):
+                field = field.field
+
             if field_name in app_media_fields:
                 media_fields[field_name] = None
             elif isinstance(field, fof.EmbeddedDocumentField) and issubclass(
@@ -10695,14 +10693,28 @@ def _get_media_fields(
                 whitelist = {whitelist}
 
             media_fields = {
-                k: v for k, v in media_fields.items() if k in whitelist
+                k: v
+                for k, v in media_fields.items()
+                if any(w == k or k.startswith(w + ".") for w in whitelist)
+            }
+
+        if blacklist is not None:
+            if etau.is_container(blacklist):
+                blacklist = set(blacklist)
+            else:
+                blacklist = {blacklist}
+
+            media_fields = {
+                k: v
+                for k, v in media_fields.items()
+                if not any(w == k or k.startswith(w + ".") for w in blacklist)
             }
 
         return media_fields
 
-    def _resolve_media_field(self, media_field):
+    def _parse_media_field(self, media_field):
         if media_field in self._dataset.app_config.media_fields:
-            return media_field
+            return media_field, None
 
         _media_field, is_frame_field = self._handle_frame_field(media_field)
 
@@ -10711,12 +10723,20 @@ def _resolve_media_field(self, media_field):
             if leaf is not None:
                 leaf = root + "." + leaf
 
-            if _media_field in (root, leaf):
+            if _media_field in (root, leaf) or root.startswith(
+                _media_field + "."
+            ):
                 _resolved_field = leaf if leaf is not None else root
                 if is_frame_field:
                     _resolved_field = self._FRAMES_PREFIX + _resolved_field
 
-                return _resolved_field
+                _list_fields = self._parse_field_name(
+                    _resolved_field, auto_unwind=False
+                )[-2]
+                if _list_fields:
+                    return _resolved_field, _list_fields[0]
+
+                return _resolved_field, None
 
         raise ValueError("'%s' is not a valid media field" % media_field)
 

diff --git a/fiftyone/core/labels.py b/fiftyone/core/labels.py
@@ -409,7 +409,8 @@ class Detection(_HasAttributesDict, _HasID, _HasMedia, Label):
             its bounding box, which should be a 2D binary or 0/1 integer numpy
             array
         mask_path (None):  the absolute path to the instance segmentation image
-            on disk
+            on disk, which should be a single-channel PNG image where any
+            non-zero values represent the instance's extent
         confidence (None): a confidence in ``[0, 1]`` for the detection
         index (None): an index for the object
         attributes ({}): a dict mapping attribute names to :class:`Attribute`
@@ -532,8 +533,8 @@ def to_segmentation(self, mask=None, frame_size=None, target=255):
         """
         if not self.has_mask:
             raise ValueError(
-                "Only detections with their `mask` attributes populated can "
-                "be converted to segmentations"
+                "Only detections with their `mask` or `mask_path` attribute "
+                "populated can be converted to segmentations"
             )
 
         mask, target = _parse_segmentation_target(mask, frame_size, target)