From 5505649f45aa5ea52af81a96ef1d856ea804d295 Mon Sep 17 00:00:00 2001 From: Benjamin Kane Date: Tue, 21 Jan 2025 10:06:55 -0500 Subject: [PATCH 1/3] improve object id filtering for qp --- fiftyone/server/lightning.py | 47 ++++++++++++++++++++---------- tests/unittests/lightning_tests.py | 42 ++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/fiftyone/server/lightning.py b/fiftyone/server/lightning.py index fe0501191e6..72af1ab099d 100644 --- a/fiftyone/server/lightning.py +++ b/fiftyone/server/lightning.py @@ -345,8 +345,13 @@ async def _do_distinct_query( query: DistinctQuery, ): match = None + matcher = lambda v: False if query.search: match = query.search + matcher = lambda v: match not in v + if query.is_object_id_field: + match = match[:_TWENTY_FOUR] + matcher = lambda v: v < match try: result = await collection.distinct(query.path) @@ -358,10 +363,13 @@ async def _do_distinct_query( exclude = set(query.exclude or []) for value in result: + if query.is_object_id_field: + value = str(value) + if value in exclude: continue - if not value or (match and match not in value): + if not value or matcher(value): continue values.append(value) @@ -386,25 +394,19 @@ async def _do_distinct_pipeline( pipeline.append({"$sort": {query.path: 1}}) + match_search = None if query.search: - if query.is_object_id_field: - add = (_TWENTY_FOUR - len(query.search)) * "0" - value = {"$gte": ObjectId(f"{query.search}{add}")} - else: - value = Regex(f"^{query.search}") - pipeline.append({"$match": {query.path: value}}) + match_search = _add_search(query) + pipeline.append(match_search) - pipeline += _match_arrays(dataset, query.path, False) + _unwind( + match_arrays = _match_arrays(dataset, query.path, False) + _unwind( dataset, query.path, False ) - - if query.search: - if query.is_object_id_field: - add = (_TWENTY_FOUR - len(query.search)) * "0" - value = {"$gte": ObjectId(f"{query.search}{add}")} - else: - value = Regex(f"^{query.search}") - pipeline.append({"$match": {query.path: value}}) + if match_arrays: + pipeline += match_arrays + if match_search: + # match again after unwinding list fields + pipeline.append(match_search) pipeline += [{"$group": {"_id": f"${query.path}"}}] @@ -423,6 +425,19 @@ async def _do_distinct_pipeline( return values +def _add_search(query: DistinctQuery): + # strip chars after 24 + search = query.search[:_TWENTY_FOUR] + if query.is_object_id_field: + add = (_TWENTY_FOUR - len(search)) * "0" + if add: + search = f"{search}{add}" + value = {"$gte": ObjectId(search)} + else: + value = Regex(f"^{search}") + return {"$match": {query.path: value}} + + def _first( path: str, dataset: fo.Dataset, diff --git a/tests/unittests/lightning_tests.py b/tests/unittests/lightning_tests.py index 7ea8b7efe5e..6922de86e92 100644 --- a/tests/unittests/lightning_tests.py +++ b/tests/unittests/lightning_tests.py @@ -1172,6 +1172,39 @@ async def test_group_dataset(self, dataset: fo.Dataset): ) +class TestObjectIdLightningQueries(unittest.IsolatedAsyncioTestCase): + @drop_async_dataset + async def test_object_ids(self, dataset: fo.Dataset): + keys = _add_samples(dataset, dict(id="0" * 24)) + query = """ + query Query($input: LightningInput!) { + lightning(input: $input) { + ... on ObjectIdLightningResult { + path + values + } + } + } + """ + + result = await _execute( + query, + dataset, + fo.ObjectIdField, + keys, + frames=False, + search="0" * 25, + ) + + for path in result.data["lightning"]: + if path["path"] == "id": + self.assertEqual(len(path["values"]), 1) + else: + self.assertListEqual( + path["values"], ["000000000000000000000000"] + ) + + def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]): samples = [] keys = set() @@ -1191,6 +1224,7 @@ async def _execute( field: fo.Field, keys: t.Set[str], frames=True, + search: t.Optional[str] = None, slice: t.Optional[str] = None, ): return await execute( @@ -1200,7 +1234,9 @@ async def _execute( "input": asdict( LightningInput( dataset=dataset.name, - paths=_get_paths(dataset, field, keys, frames=frames), + paths=_get_paths( + dataset, field, keys, frames=frames, search=search + ), slice=slice, ) ) @@ -1213,6 +1249,7 @@ def _get_paths( field_type: t.Type[fo.Field], keys: t.Set[str], frames=True, + search: t.Optional[str] = None, ): field_dict = dataset.get_field_schema(flat=True) @@ -1239,7 +1276,8 @@ def _get_paths( continue dataset.create_index(path) - paths.append(LightningPathInput(path=path)) + paths.append(LightningPathInput(path=path, search=search)) + return paths From 9ddc487ea11c96343cf1af1f61cfc1cc00de6dbd Mon Sep 17 00:00:00 2001 From: Benjamin Kane Date: Tue, 21 Jan 2025 10:11:57 -0500 Subject: [PATCH 2/3] allow non hex search --- fiftyone/server/lightning.py | 6 +++++- tests/unittests/lightning_tests.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fiftyone/server/lightning.py b/fiftyone/server/lightning.py index 72af1ab099d..2588321dd82 100644 --- a/fiftyone/server/lightning.py +++ b/fiftyone/server/lightning.py @@ -432,7 +432,11 @@ def _add_search(query: DistinctQuery): add = (_TWENTY_FOUR - len(search)) * "0" if add: search = f"{search}{add}" - value = {"$gte": ObjectId(search)} + try: + value = {"$gte": ObjectId(search)} + except: + # search is not valid + value = {"$lt": ObjectId("0" * _TWENTY_FOUR)} else: value = Regex(f"^{search}") return {"$match": {query.path: value}} diff --git a/tests/unittests/lightning_tests.py b/tests/unittests/lightning_tests.py index 6922de86e92..e834621aeff 100644 --- a/tests/unittests/lightning_tests.py +++ b/tests/unittests/lightning_tests.py @@ -1204,6 +1204,18 @@ async def test_object_ids(self, dataset: fo.Dataset): path["values"], ["000000000000000000000000"] ) + result = await _execute( + query, + dataset, + fo.ObjectIdField, + keys, + frames=False, + search="Z" * 25, + ) + + for path in result.data["lightning"]: + self.assertListEqual(path["values"], []) + def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]): samples = [] From 8b2848050aaac87eeceed7d283a1cc11d8268f83 Mon Sep 17 00:00:00 2001 From: Benjamin Kane Date: Thu, 30 Jan 2025 15:34:24 -0500 Subject: [PATCH 3/3] bug, add frontend check --- .../core/src/components/Filters/StringFilter/state.ts | 7 ++++++- .../src/components/Filters/StringFilter/useOnSelect.ts | 5 +++-- app/packages/utilities/src/type-check.ts | 4 ++-- fiftyone/server/lightning.py | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/app/packages/core/src/components/Filters/StringFilter/state.ts b/app/packages/core/src/components/Filters/StringFilter/state.ts index ff1babaf069..309ed28f1a1 100644 --- a/app/packages/core/src/components/Filters/StringFilter/state.ts +++ b/app/packages/core/src/components/Filters/StringFilter/state.ts @@ -1,6 +1,6 @@ import * as fos from "@fiftyone/state"; import { isMatchingAtom, stringExcludeAtom } from "@fiftyone/state"; -import { getFetchFunction } from "@fiftyone/utilities"; +import { getFetchFunction, isObjectIdString } from "@fiftyone/utilities"; import { atomFamily, selectorFamily } from "recoil"; import { labelTagsCount } from "../../Sidebar/Entries/EntryCounts"; import { nullSort } from "../utils"; @@ -57,6 +57,11 @@ export const stringSearchResults = selectorFamily< ({ path, modal, filter }) => async ({ get }) => { const search = filter ? "" : get(stringSearch({ modal, path })); + // for object id searches, skip request when the string is not <= 24 hex + if (get(fos.isObjectIdField(path)) && !isObjectIdString(search, false)) { + return { values: [] }; + } + const sorting = get(fos.sortFilterResults(modal)); const mixed = get(fos.groupStatistics(modal)) === "group"; const selected = get(fos.stringSelectedValuesAtom({ path, modal })); diff --git a/app/packages/core/src/components/Filters/StringFilter/useOnSelect.ts b/app/packages/core/src/components/Filters/StringFilter/useOnSelect.ts index c6d1c5ae76b..bf645271c40 100644 --- a/app/packages/core/src/components/Filters/StringFilter/useOnSelect.ts +++ b/app/packages/core/src/components/Filters/StringFilter/useOnSelect.ts @@ -1,8 +1,9 @@ import { SelectorValidationError } from "@fiftyone/components"; import { isObjectIdField, snackbarErrors } from "@fiftyone/state"; import { isObjectIdString } from "@fiftyone/utilities"; -import { RecoilState, useRecoilCallback } from "recoil"; -import { Result } from "./Result"; +import type { RecoilState } from "recoil"; +import { useRecoilCallback } from "recoil"; +import type { Result } from "./Result"; export default function ( modal: boolean, diff --git a/app/packages/utilities/src/type-check.ts b/app/packages/utilities/src/type-check.ts index b818f64cad6..49f6d0d5008 100644 --- a/app/packages/utilities/src/type-check.ts +++ b/app/packages/utilities/src/type-check.ts @@ -14,8 +14,8 @@ export function isHex(value: string) { return /[0-9a-f]{24}/g.test(value); } -export function isObjectIdString(value: string) { - return isHex(value) && value.length === 24; +export function isObjectIdString(value: string, strict = true) { + return isHex(value) && strict ? value.length === 24 : value.length <= 24; } export type NumberKeyObjectType = { diff --git a/fiftyone/server/lightning.py b/fiftyone/server/lightning.py index 2588321dd82..a5329b31ff1 100644 --- a/fiftyone/server/lightning.py +++ b/fiftyone/server/lightning.py @@ -427,8 +427,8 @@ async def _do_distinct_pipeline( def _add_search(query: DistinctQuery): # strip chars after 24 - search = query.search[:_TWENTY_FOUR] if query.is_object_id_field: + search = query.search[:_TWENTY_FOUR] add = (_TWENTY_FOUR - len(search)) * "0" if add: search = f"{search}{add}"