Skip to content

Commit

Permalink
ObjectId Sidebar Filter Fixes (#5415)
Browse files Browse the repository at this point in the history
* improve object id filtering for qp

* allow non hex search

* bug, add frontend check
  • Loading branch information
benjaminpkane authored Feb 4, 2025
1 parent b6a395d commit 274d9f6
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import * as fos from "@fiftyone/state";
import { isMatchingAtom, stringExcludeAtom } from "@fiftyone/state";
import { getFetchFunction } from "@fiftyone/utilities";
import { getFetchFunction, isObjectIdString } from "@fiftyone/utilities";
import { atomFamily, selectorFamily } from "recoil";
import { labelTagsCount } from "../../Sidebar/Entries/EntryCounts";
import { nullSort } from "../utils";
Expand Down Expand Up @@ -57,6 +57,11 @@ export const stringSearchResults = selectorFamily<
({ path, modal, filter }) =>
async ({ get }) => {
const search = filter ? "" : get(stringSearch({ modal, path }));
// for object id searches, skip request when the string is not <= 24 hex
if (get(fos.isObjectIdField(path)) && !isObjectIdString(search, false)) {
return { values: [] };
}

const sorting = get(fos.sortFilterResults(modal));
const mixed = get(fos.groupStatistics(modal)) === "group";
const selected = get(fos.stringSelectedValuesAtom({ path, modal }));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { SelectorValidationError } from "@fiftyone/components";
import { isObjectIdField, snackbarErrors } from "@fiftyone/state";
import { isObjectIdString } from "@fiftyone/utilities";
import { RecoilState, useRecoilCallback } from "recoil";
import { Result } from "./Result";
import type { RecoilState } from "recoil";
import { useRecoilCallback } from "recoil";
import type { Result } from "./Result";

export default function (
modal: boolean,
Expand Down
4 changes: 2 additions & 2 deletions app/packages/utilities/src/type-check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ export function isHex(value: string) {
return /[0-9a-f]{24}/g.test(value);
}

export function isObjectIdString(value: string) {
return isHex(value) && value.length === 24;
export function isObjectIdString(value: string, strict = true) {
return isHex(value) && strict ? value.length === 24 : value.length <= 24;
}

export type NumberKeyObjectType<V = unknown> = {
Expand Down
51 changes: 35 additions & 16 deletions fiftyone/server/lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,13 @@ async def _do_distinct_query(
query: DistinctQuery,
):
match = None
matcher = lambda v: False
if query.search:
match = query.search
matcher = lambda v: match not in v
if query.is_object_id_field:
match = match[:_TWENTY_FOUR]
matcher = lambda v: v < match

try:
result = await collection.distinct(query.path)
Expand All @@ -358,10 +363,13 @@ async def _do_distinct_query(
exclude = set(query.exclude or [])

for value in result:
if query.is_object_id_field:
value = str(value)

if value in exclude:
continue

if not value or (match and match not in value):
if not value or matcher(value):
continue

values.append(value)
Expand All @@ -386,25 +394,19 @@ async def _do_distinct_pipeline(

pipeline.append({"$sort": {query.path: 1}})

match_search = None
if query.search:
if query.is_object_id_field:
add = (_TWENTY_FOUR - len(query.search)) * "0"
value = {"$gte": ObjectId(f"{query.search}{add}")}
else:
value = Regex(f"^{query.search}")
pipeline.append({"$match": {query.path: value}})
match_search = _add_search(query)
pipeline.append(match_search)

pipeline += _match_arrays(dataset, query.path, False) + _unwind(
match_arrays = _match_arrays(dataset, query.path, False) + _unwind(
dataset, query.path, False
)

if query.search:
if query.is_object_id_field:
add = (_TWENTY_FOUR - len(query.search)) * "0"
value = {"$gte": ObjectId(f"{query.search}{add}")}
else:
value = Regex(f"^{query.search}")
pipeline.append({"$match": {query.path: value}})
if match_arrays:
pipeline += match_arrays
if match_search:
# match again after unwinding list fields
pipeline.append(match_search)

pipeline += [{"$group": {"_id": f"${query.path}"}}]

Expand All @@ -423,6 +425,23 @@ async def _do_distinct_pipeline(
return values


def _add_search(query: DistinctQuery):
# strip chars after 24
if query.is_object_id_field:
search = query.search[:_TWENTY_FOUR]
add = (_TWENTY_FOUR - len(search)) * "0"
if add:
search = f"{search}{add}"
try:
value = {"$gte": ObjectId(search)}
except:
# search is not valid
value = {"$lt": ObjectId("0" * _TWENTY_FOUR)}
else:
value = Regex(f"^{search}")
return {"$match": {query.path: value}}


def _first(
path: str,
dataset: fo.Dataset,
Expand Down
54 changes: 52 additions & 2 deletions tests/unittests/lightning_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,6 +1172,51 @@ async def test_group_dataset(self, dataset: fo.Dataset):
)


class TestObjectIdLightningQueries(unittest.IsolatedAsyncioTestCase):
@drop_async_dataset
async def test_object_ids(self, dataset: fo.Dataset):
keys = _add_samples(dataset, dict(id="0" * 24))
query = """
query Query($input: LightningInput!) {
lightning(input: $input) {
... on ObjectIdLightningResult {
path
values
}
}
}
"""

result = await _execute(
query,
dataset,
fo.ObjectIdField,
keys,
frames=False,
search="0" * 25,
)

for path in result.data["lightning"]:
if path["path"] == "id":
self.assertEqual(len(path["values"]), 1)
else:
self.assertListEqual(
path["values"], ["000000000000000000000000"]
)

result = await _execute(
query,
dataset,
fo.ObjectIdField,
keys,
frames=False,
search="Z" * 25,
)

for path in result.data["lightning"]:
self.assertListEqual(path["values"], [])


def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]):
samples = []
keys = set()
Expand All @@ -1191,6 +1236,7 @@ async def _execute(
field: fo.Field,
keys: t.Set[str],
frames=True,
search: t.Optional[str] = None,
slice: t.Optional[str] = None,
):
return await execute(
Expand All @@ -1200,7 +1246,9 @@ async def _execute(
"input": asdict(
LightningInput(
dataset=dataset.name,
paths=_get_paths(dataset, field, keys, frames=frames),
paths=_get_paths(
dataset, field, keys, frames=frames, search=search
),
slice=slice,
)
)
Expand All @@ -1213,6 +1261,7 @@ def _get_paths(
field_type: t.Type[fo.Field],
keys: t.Set[str],
frames=True,
search: t.Optional[str] = None,
):
field_dict = dataset.get_field_schema(flat=True)

Expand All @@ -1239,7 +1288,8 @@ def _get_paths(
continue

dataset.create_index(path)
paths.append(LightningPathInput(path=path))
paths.append(LightningPathInput(path=path, search=search))

return paths


Expand Down

0 comments on commit 274d9f6

Please sign in to comment.