Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ObjectId Sidebar Filter Fixes #5415

Merged
merged 3 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 35 additions & 16 deletions fiftyone/server/lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,13 @@ async def _do_distinct_query(
query: DistinctQuery,
):
match = None
matcher = lambda v: False
if query.search:
match = query.search
matcher = lambda v: match not in v
if query.is_object_id_field:
match = match[:_TWENTY_FOUR]
matcher = lambda v: v < match

try:
result = await collection.distinct(query.path)
Expand All @@ -358,10 +363,13 @@ async def _do_distinct_query(
exclude = set(query.exclude or [])

for value in result:
if query.is_object_id_field:
value = str(value)

if value in exclude:
continue

if not value or (match and match not in value):
if not value or matcher(value):
continue

values.append(value)
Expand All @@ -386,25 +394,19 @@ async def _do_distinct_pipeline(

pipeline.append({"$sort": {query.path: 1}})

match_search = None
if query.search:
if query.is_object_id_field:
add = (_TWENTY_FOUR - len(query.search)) * "0"
value = {"$gte": ObjectId(f"{query.search}{add}")}
else:
value = Regex(f"^{query.search}")
pipeline.append({"$match": {query.path: value}})
match_search = _add_search(query)
pipeline.append(match_search)

pipeline += _match_arrays(dataset, query.path, False) + _unwind(
match_arrays = _match_arrays(dataset, query.path, False) + _unwind(
dataset, query.path, False
)

if query.search:
if query.is_object_id_field:
add = (_TWENTY_FOUR - len(query.search)) * "0"
value = {"$gte": ObjectId(f"{query.search}{add}")}
else:
value = Regex(f"^{query.search}")
pipeline.append({"$match": {query.path: value}})
if match_arrays:
pipeline += match_arrays
if match_search:
# match again after unwinding list fields
pipeline.append(match_search)

pipeline += [{"$group": {"_id": f"${query.path}"}}]

Expand All @@ -423,6 +425,23 @@ async def _do_distinct_pipeline(
return values


def _add_search(query: DistinctQuery):
# strip chars after 24
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean sidebar only filters by first 24 chars? So long file path filters could potentially not include the file name?

What's the purpose of limiting to the first 24

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good comment. I refactored this incorrectly. It should only apply to ObjectId fields

I think I will add frontend validation to omit these queries completely. That is a more holistic approach.

An object id is always a 24 char hexadecimal string. The search results can be omitted when the search does not comply

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will undraft when I'm happy with the solution

search = query.search[:_TWENTY_FOUR]
if query.is_object_id_field:
add = (_TWENTY_FOUR - len(search)) * "0"
if add:
search = f"{search}{add}"
try:
value = {"$gte": ObjectId(search)}
except:
# search is not valid
value = {"$lt": ObjectId("0" * _TWENTY_FOUR)}
else:
value = Regex(f"^{search}")
return {"$match": {query.path: value}}


def _first(
path: str,
dataset: fo.Dataset,
Expand Down
54 changes: 52 additions & 2 deletions tests/unittests/lightning_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,6 +1172,51 @@ async def test_group_dataset(self, dataset: fo.Dataset):
)


class TestObjectIdLightningQueries(unittest.IsolatedAsyncioTestCase):
@drop_async_dataset
async def test_object_ids(self, dataset: fo.Dataset):
keys = _add_samples(dataset, dict(id="0" * 24))
query = """
query Query($input: LightningInput!) {
lightning(input: $input) {
... on ObjectIdLightningResult {
path
values
}
}
}
"""

result = await _execute(
query,
dataset,
fo.ObjectIdField,
keys,
frames=False,
search="0" * 25,
)

for path in result.data["lightning"]:
if path["path"] == "id":
self.assertEqual(len(path["values"]), 1)
else:
self.assertListEqual(
path["values"], ["000000000000000000000000"]
)

result = await _execute(
query,
dataset,
fo.ObjectIdField,
keys,
frames=False,
search="Z" * 25,
)

for path in result.data["lightning"]:
self.assertListEqual(path["values"], [])


def _add_samples(dataset: fo.Dataset, *sample_data: t.List[t.Dict]):
samples = []
keys = set()
Expand All @@ -1191,6 +1236,7 @@ async def _execute(
field: fo.Field,
keys: t.Set[str],
frames=True,
search: t.Optional[str] = None,
slice: t.Optional[str] = None,
):
return await execute(
Expand All @@ -1200,7 +1246,9 @@ async def _execute(
"input": asdict(
LightningInput(
dataset=dataset.name,
paths=_get_paths(dataset, field, keys, frames=frames),
paths=_get_paths(
dataset, field, keys, frames=frames, search=search
),
slice=slice,
)
)
Expand All @@ -1213,6 +1261,7 @@ def _get_paths(
field_type: t.Type[fo.Field],
keys: t.Set[str],
frames=True,
search: t.Optional[str] = None,
):
field_dict = dataset.get_field_schema(flat=True)

Expand All @@ -1239,7 +1288,8 @@ def _get_paths(
continue

dataset.create_index(path)
paths.append(LightningPathInput(path=path))
paths.append(LightningPathInput(path=path, search=search))

return paths


Expand Down
Loading