ObjectId Sidebar Filter Fixes (#5415)

* improve object id filtering for qp * allow non hex search * bug, add frontend check
voxel51 · Feb 4, 2025 · 274d9f6 · 274d9f6
1 parent b6a395d
commit 274d9f6
Show file tree

Hide file tree

Showing 5 changed files with 98 additions and 23 deletions.
diff --git a/app/packages/core/src/components/Filters/StringFilter/state.ts b/app/packages/core/src/components/Filters/StringFilter/state.ts
@@ -1,6 +1,6 @@
 import * as fos from "@fiftyone/state";
 import { isMatchingAtom, stringExcludeAtom } from "@fiftyone/state";
-import { getFetchFunction } from "@fiftyone/utilities";
+import { getFetchFunction, isObjectIdString } from "@fiftyone/utilities";
 import { atomFamily, selectorFamily } from "recoil";
 import { labelTagsCount } from "../../Sidebar/Entries/EntryCounts";
 import { nullSort } from "../utils";
@@ -57,6 +57,11 @@ export const stringSearchResults = selectorFamily<
     ({ path, modal, filter }) =>
     async ({ get }) => {
       const search = filter ? "" : get(stringSearch({ modal, path }));
+      // for object id searches, skip request when the string is not <= 24 hex
+      if (get(fos.isObjectIdField(path)) && !isObjectIdString(search, false)) {
+        return { values: [] };
+      }
+
       const sorting = get(fos.sortFilterResults(modal));
       const mixed = get(fos.groupStatistics(modal)) === "group";
       const selected = get(fos.stringSelectedValuesAtom({ path, modal }));

diff --git a/app/packages/core/src/components/Filters/StringFilter/useOnSelect.ts b/app/packages/core/src/components/Filters/StringFilter/useOnSelect.ts
@@ -1,8 +1,9 @@
 import { SelectorValidationError } from "@fiftyone/components";
 import { isObjectIdField, snackbarErrors } from "@fiftyone/state";
 import { isObjectIdString } from "@fiftyone/utilities";
-import { RecoilState, useRecoilCallback } from "recoil";
-import { Result } from "./Result";
+import type { RecoilState } from "recoil";
+import { useRecoilCallback } from "recoil";
+import type { Result } from "./Result";
 
 export default function (
   modal: boolean,

diff --git a/app/packages/utilities/src/type-check.ts b/app/packages/utilities/src/type-check.ts
@@ -14,8 +14,8 @@ export function isHex(value: string) {
   return /[0-9a-f]{24}/g.test(value);
 }
 
-export function isObjectIdString(value: string) {
-  return isHex(value) && value.length === 24;
+export function isObjectIdString(value: string, strict = true) {
+  return isHex(value) && strict ? value.length === 24 : value.length <= 24;
 }
 
 export type NumberKeyObjectType<V = unknown> = {

diff --git a/fiftyone/server/lightning.py b/fiftyone/server/lightning.py
@@ -345,8 +345,13 @@ async def _do_distinct_query(
     query: DistinctQuery,
 ):
     match = None
+    matcher = lambda v: False
     if query.search:
         match = query.search
+        matcher = lambda v: match not in v
+        if query.is_object_id_field:
+            match = match[:_TWENTY_FOUR]
+            matcher = lambda v: v < match
 
     try:
         result = await collection.distinct(query.path)
@@ -358,10 +363,13 @@ async def _do_distinct_query(
     exclude = set(query.exclude or [])
 
     for value in result:
+        if query.is_object_id_field:
+            value = str(value)
+
         if value in exclude:
             continue
 
-        if not value or (match and match not in value):
+        if not value or matcher(value):
             continue
 
         values.append(value)
@@ -386,25 +394,19 @@ async def _do_distinct_pipeline(
 
     pipeline.append({"$sort": {query.path: 1}})
 
+    match_search = None
     if query.search:
-        if query.is_object_id_field:
-            add = (_TWENTY_FOUR - len(query.search)) * "0"
-            value = {"$gte": ObjectId(f"{query.search}{add}")}
-        else:
-            value = Regex(f"^{query.search}")
-        pipeline.append({"$match": {query.path: value}})
+        match_search = _add_search(query)
+        pipeline.append(match_search)
 
-    pipeline += _match_arrays(dataset, query.path, False) + _unwind(
+    match_arrays = _match_arrays(dataset, query.path, False) + _unwind(
         dataset, query.path, False
     )
-
-    if query.search:
-        if query.is_object_id_field:
-            add = (_TWENTY_FOUR - len(query.search)) * "0"
-            value = {"$gte": ObjectId(f"{query.search}{add}")}
-        else:
-            value = Regex(f"^{query.search}")
-        pipeline.append({"$match": {query.path: value}})
+    if match_arrays:
+        pipeline += match_arrays
+        if match_search:
+            # match again after unwinding list fields
+            pipeline.append(match_search)
 
     pipeline += [{"$group": {"_id": f"${query.path}"}}]
 
@@ -423,6 +425,23 @@ async def _do_distinct_pipeline(
     return values
 
 
+def _add_search(query: DistinctQuery):
+    # strip chars after 24
+    if query.is_object_id_field:
+        search = query.search[:_TWENTY_FOUR]
+        add = (_TWENTY_FOUR - len(search)) * "0"
+        if add:
+            search = f"{search}{add}"
+        try:
+            value = {"$gte": ObjectId(search)}
+        except:
+            # search is not valid
+            value = {"$lt": ObjectId("0" * _TWENTY_FOUR)}
+    else:
+        value = Regex(f"^{search}")
+    return {"$match": {query.path: value}}
+
+
 def _first(
     path: str,
     dataset: fo.Dataset,

diff --git a/tests/unittests/lightning_tests.py b/tests/unittests/lightning_tests.py
@@ -1172,6 +1172,51 @@ async def test_group_dataset(self, dataset: fo.Dataset):
         )
 
 
+class TestObjectIdLightningQueries(unittest.IsolatedAsyncioTestCase):
+    @drop_async_dataset
+    async def test_object_ids(self, dataset: fo.Dataset):
+        keys = _add_samples(dataset, dict(id="0" * 24))
+        query = """
+            query Query($input: LightningInput!) {
+                lightning(input: $input) {
+                    ... on ObjectIdLightningResult {
+                        path
+                        values
+                    }
+                }
+            }
+        """
+
+        result = await _execute(
+            query,
+            dataset,
+            fo.ObjectIdField,
+            keys,
+            frames=False,
+            search="0" * 25,
+        )
+
+        for path in result.data["lightning"]:
+            if path["path"] == "id":
+                self.assertEqual(len(path["values"]), 1)
+            else:
+                self.assertListEqual(
+                    path["values"], ["000000000000000000000000"]
+                )
+
+        result = await _execute(
+            query,
+            dataset,
+            fo.ObjectIdField,
+            keys,
+            frames=False,
+            search="Z" * 25,
+        )
+
+        for path in result.data["lightning"]:
+            self.assertListEqual(path["values"], [])
+
+
 def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]):
     samples = []
     keys = set()
@@ -1191,6 +1236,7 @@ async def _execute(
     field: fo.Field,
     keys: t.Set[str],
     frames=True,
+    search: t.Optional[str] = None,
     slice: t.Optional[str] = None,
 ):
     return await execute(
@@ -1200,7 +1246,9 @@ async def _execute(
             "input": asdict(
                 LightningInput(
                     dataset=dataset.name,
-                    paths=_get_paths(dataset, field, keys, frames=frames),
+                    paths=_get_paths(
+                        dataset, field, keys, frames=frames, search=search
+                    ),
                     slice=slice,
                 )
             )
@@ -1213,6 +1261,7 @@ def _get_paths(
     field_type: t.Type[fo.Field],
     keys: t.Set[str],
     frames=True,
+    search: t.Optional[str] = None,
 ):
     field_dict = dataset.get_field_schema(flat=True)
 
@@ -1239,7 +1288,8 @@ def _get_paths(
             continue
 
         dataset.create_index(path)
-        paths.append(LightningPathInput(path=path))
+        paths.append(LightningPathInput(path=path, search=search))
+
     return paths