voxel51 · benjaminpkane · Feb 4, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 30, 2025
diff --git a/fiftyone/server/lightning.py b/fiftyone/server/lightning.py
@@ -345,8 +345,13 @@ async def _do_distinct_query(
     query: DistinctQuery,
 ):
     match = None
+    matcher = lambda v: False
     if query.search:
         match = query.search
+        matcher = lambda v: match not in v
+        if query.is_object_id_field:
+            match = match[:_TWENTY_FOUR]
+            matcher = lambda v: v < match
 
     try:
         result = await collection.distinct(query.path)
@@ -358,10 +363,13 @@ async def _do_distinct_query(
     exclude = set(query.exclude or [])
 
     for value in result:
+        if query.is_object_id_field:
+            value = str(value)
+
         if value in exclude:
             continue
 
-        if not value or (match and match not in value):
+        if not value or matcher(value):
             continue
 
         values.append(value)
@@ -386,25 +394,19 @@ async def _do_distinct_pipeline(
 
     pipeline.append({"$sort": {query.path: 1}})
 
+    match_search = None
     if query.search:
-        if query.is_object_id_field:
-            add = (_TWENTY_FOUR - len(query.search)) * "0"
-            value = {"$gte": ObjectId(f"{query.search}{add}")}
-        else:
-            value = Regex(f"^{query.search}")
-        pipeline.append({"$match": {query.path: value}})
+        match_search = _add_search(query)
+        pipeline.append(match_search)
 
-    pipeline += _match_arrays(dataset, query.path, False) + _unwind(
+    match_arrays = _match_arrays(dataset, query.path, False) + _unwind(
         dataset, query.path, False
     )
-
-    if query.search:
-        if query.is_object_id_field:
-            add = (_TWENTY_FOUR - len(query.search)) * "0"
-            value = {"$gte": ObjectId(f"{query.search}{add}")}
-        else:
-            value = Regex(f"^{query.search}")
-        pipeline.append({"$match": {query.path: value}})
+    if match_arrays:
+        pipeline += match_arrays
+        if match_search:
+            # match again after unwinding list fields
+            pipeline.append(match_search)
 
     pipeline += [{"$group": {"_id": f"${query.path}"}}]
 
@@ -423,6 +425,23 @@ async def _do_distinct_pipeline(
     return values
 
 
+def _add_search(query: DistinctQuery):
+    # strip chars after 24
+    search = query.search[:_TWENTY_FOUR]
+    if query.is_object_id_field:
+        add = (_TWENTY_FOUR - len(search)) * "0"
+        if add:
+            search = f"{search}{add}"
+        try:
+            value = {"$gte": ObjectId(search)}
+        except:
+            # search is not valid
+            value = {"$lt": ObjectId("0" * _TWENTY_FOUR)}
+    else:
+        value = Regex(f"^{search}")
+    return {"$match": {query.path: value}}
+
+
 def _first(
     path: str,
     dataset: fo.Dataset,

diff --git a/tests/unittests/lightning_tests.py b/tests/unittests/lightning_tests.py
@@ -1172,6 +1172,51 @@ async def test_group_dataset(self, dataset: fo.Dataset):
         )
 
 
+class TestObjectIdLightningQueries(unittest.IsolatedAsyncioTestCase):
+    @drop_async_dataset
+    async def test_object_ids(self, dataset: fo.Dataset):
+        keys = _add_samples(dataset, dict(id="0" * 24))
+        query = """
+            query Query($input: LightningInput!) {
+                lightning(input: $input) {
+                    ... on ObjectIdLightningResult {
+                        path
+                        values
+                    }
+                }
+            }
+        """
+
+        result = await _execute(
+            query,
+            dataset,
+            fo.ObjectIdField,
+            keys,
+            frames=False,
+            search="0" * 25,
+        )
+
+        for path in result.data["lightning"]:
+            if path["path"] == "id":
+                self.assertEqual(len(path["values"]), 1)
+            else:
+                self.assertListEqual(
+                    path["values"], ["000000000000000000000000"]
+                )
+
+        result = await _execute(
+            query,
+            dataset,
+            fo.ObjectIdField,
+            keys,
+            frames=False,
+            search="Z" * 25,
+        )
+
+        for path in result.data["lightning"]:
+            self.assertListEqual(path["values"], [])
+
+
 def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]):
     samples = []
     keys = set()
@@ -1191,6 +1236,7 @@ async def _execute(
     field: fo.Field,
     keys: t.Set[str],
     frames=True,
+    search: t.Optional[str] = None,
     slice: t.Optional[str] = None,
 ):
     return await execute(
@@ -1200,7 +1246,9 @@ async def _execute(
             "input": asdict(
                 LightningInput(
                     dataset=dataset.name,
-                    paths=_get_paths(dataset, field, keys, frames=frames),
+                    paths=_get_paths(
+                        dataset, field, keys, frames=frames, search=search
+                    ),
                     slice=slice,
                 )
             )
@@ -1213,6 +1261,7 @@ def _get_paths(
     field_type: t.Type[fo.Field],
     keys: t.Set[str],
     frames=True,
+    search: t.Optional[str] = None,
 ):
     field_dict = dataset.get_field_schema(flat=True)
 
@@ -1239,7 +1288,8 @@ def _get_paths(
             continue
 
         dataset.create_index(path)
-        paths.append(LightningPathInput(path=path))
+        paths.append(LightningPathInput(path=path, search=search))
+
     return paths