Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ObjectId Sidebar Filter Fixes #5415

Merged
merged 3 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import * as fos from "@fiftyone/state";
import { isMatchingAtom, stringExcludeAtom } from "@fiftyone/state";
import { getFetchFunction } from "@fiftyone/utilities";
import { getFetchFunction, isObjectIdString } from "@fiftyone/utilities";
import { atomFamily, selectorFamily } from "recoil";
import { labelTagsCount } from "../../Sidebar/Entries/EntryCounts";
import { nullSort } from "../utils";
Expand Down Expand Up @@ -57,6 +57,11 @@ export const stringSearchResults = selectorFamily<
({ path, modal, filter }) =>
async ({ get }) => {
const search = filter ? "" : get(stringSearch({ modal, path }));
// for object id searches, skip request when the string is not <= 24 hex
if (get(fos.isObjectIdField(path)) && !isObjectIdString(search, false)) {
return { values: [] };
}

const sorting = get(fos.sortFilterResults(modal));
const mixed = get(fos.groupStatistics(modal)) === "group";
const selected = get(fos.stringSelectedValuesAtom({ path, modal }));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { SelectorValidationError } from "@fiftyone/components";
import { isObjectIdField, snackbarErrors } from "@fiftyone/state";
import { isObjectIdString } from "@fiftyone/utilities";
import { RecoilState, useRecoilCallback } from "recoil";
import { Result } from "./Result";
import type { RecoilState } from "recoil";
import { useRecoilCallback } from "recoil";
import type { Result } from "./Result";

export default function (
modal: boolean,
Expand Down
4 changes: 2 additions & 2 deletions app/packages/utilities/src/type-check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ export function isHex(value: string) {
return /[0-9a-f]{24}/g.test(value);
}

export function isObjectIdString(value: string) {
return isHex(value) && value.length === 24;
export function isObjectIdString(value: string, strict = true) {
return isHex(value) && strict ? value.length === 24 : value.length <= 24;
Comment on lines +17 to +18
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve the ObjectId validation logic.

The current implementation has a few issues that could be improved:

  1. The regex pattern should be anchored and shouldn't use the global flag
  2. The function logic can be simplified

Apply this diff to improve the implementation:

-export function isObjectIdString(value: string, strict = true) {
-  return isHex(value) && strict ? value.length === 24 : value.length <= 24;
+export function isObjectIdString(value: string, strict = true) {
+  if (!value || (strict ? value.length !== 24 : value.length > 24)) {
+    return false;
+  }
+  return /^[0-9a-f]+$/.test(value);
+}

This implementation:

  • Validates the length first (fail fast)
  • Uses an anchored regex without global flag
  • Handles empty/null values
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
export function isObjectIdString(value: string, strict = true) {
return isHex(value) && strict ? value.length === 24 : value.length <= 24;
export function isObjectIdString(value: string, strict = true) {
if (!value || (strict ? value.length !== 24 : value.length > 24)) {
return false;
}
return /^[0-9a-f]+$/.test(value);
}

}

export type NumberKeyObjectType<V = unknown> = {
Expand Down
51 changes: 35 additions & 16 deletions fiftyone/server/lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,13 @@ async def _do_distinct_query(
query: DistinctQuery,
):
match = None
matcher = lambda v: False
if query.search:
match = query.search
matcher = lambda v: match not in v
if query.is_object_id_field:
match = match[:_TWENTY_FOUR]
matcher = lambda v: v < match

try:
result = await collection.distinct(query.path)
Expand All @@ -358,10 +363,13 @@ async def _do_distinct_query(
exclude = set(query.exclude or [])

for value in result:
if query.is_object_id_field:
value = str(value)

if value in exclude:
continue

if not value or (match and match not in value):
if not value or matcher(value):
continue

values.append(value)
Expand All @@ -386,25 +394,19 @@ async def _do_distinct_pipeline(

pipeline.append({"$sort": {query.path: 1}})

match_search = None
if query.search:
if query.is_object_id_field:
add = (_TWENTY_FOUR - len(query.search)) * "0"
value = {"$gte": ObjectId(f"{query.search}{add}")}
else:
value = Regex(f"^{query.search}")
pipeline.append({"$match": {query.path: value}})
match_search = _add_search(query)
pipeline.append(match_search)

pipeline += _match_arrays(dataset, query.path, False) + _unwind(
match_arrays = _match_arrays(dataset, query.path, False) + _unwind(
dataset, query.path, False
)

if query.search:
if query.is_object_id_field:
add = (_TWENTY_FOUR - len(query.search)) * "0"
value = {"$gte": ObjectId(f"{query.search}{add}")}
else:
value = Regex(f"^{query.search}")
pipeline.append({"$match": {query.path: value}})
if match_arrays:
pipeline += match_arrays
if match_search:
# match again after unwinding list fields
pipeline.append(match_search)

pipeline += [{"$group": {"_id": f"${query.path}"}}]

Expand All @@ -423,6 +425,23 @@ async def _do_distinct_pipeline(
return values


def _add_search(query: DistinctQuery):
# strip chars after 24
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean sidebar only filters by first 24 chars? So long file path filters could potentially not include the file name?

What's the purpose of limiting to the first 24

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good comment. I refactored this incorrectly. It should only apply to ObjectId fields

I think I will add frontend validation to omit these queries completely. That is a more holistic approach.

An object id is always a 24 char hexadecimal string. The search results can be omitted when the search does not comply

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will undraft when I'm happy with the solution

if query.is_object_id_field:
search = query.search[:_TWENTY_FOUR]
add = (_TWENTY_FOUR - len(search)) * "0"
if add:
search = f"{search}{add}"
try:
value = {"$gte": ObjectId(search)}
except:
# search is not valid
value = {"$lt": ObjectId("0" * _TWENTY_FOUR)}
else:
value = Regex(f"^{search}")
return {"$match": {query.path: value}}


def _first(
path: str,
dataset: fo.Dataset,
Expand Down
54 changes: 52 additions & 2 deletions tests/unittests/lightning_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,6 +1172,51 @@ async def test_group_dataset(self, dataset: fo.Dataset):
)


class TestObjectIdLightningQueries(unittest.IsolatedAsyncioTestCase):
@drop_async_dataset
async def test_object_ids(self, dataset: fo.Dataset):
keys = _add_samples(dataset, dict(id="0" * 24))
query = """
query Query($input: LightningInput!) {
lightning(input: $input) {
... on ObjectIdLightningResult {
path
values
}
}
}
"""

result = await _execute(
query,
dataset,
fo.ObjectIdField,
keys,
frames=False,
search="0" * 25,
)

for path in result.data["lightning"]:
if path["path"] == "id":
self.assertEqual(len(path["values"]), 1)
else:
self.assertListEqual(
path["values"], ["000000000000000000000000"]
)

result = await _execute(
query,
dataset,
fo.ObjectIdField,
keys,
frames=False,
search="Z" * 25,
)

for path in result.data["lightning"]:
self.assertListEqual(path["values"], [])


def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]):
samples = []
keys = set()
Expand All @@ -1191,6 +1236,7 @@ async def _execute(
field: fo.Field,
keys: t.Set[str],
frames=True,
search: t.Optional[str] = None,
slice: t.Optional[str] = None,
):
return await execute(
Expand All @@ -1200,7 +1246,9 @@ async def _execute(
"input": asdict(
LightningInput(
dataset=dataset.name,
paths=_get_paths(dataset, field, keys, frames=frames),
paths=_get_paths(
dataset, field, keys, frames=frames, search=search
),
slice=slice,
)
)
Expand All @@ -1213,6 +1261,7 @@ def _get_paths(
field_type: t.Type[fo.Field],
keys: t.Set[str],
frames=True,
search: t.Optional[str] = None,
):
field_dict = dataset.get_field_schema(flat=True)

Expand All @@ -1239,7 +1288,8 @@ def _get_paths(
continue

dataset.create_index(path)
paths.append(LightningPathInput(path=path))
paths.append(LightningPathInput(path=path, search=search))

return paths


Expand Down
Loading