Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bounds for embedded lists in QP #5202

Merged
merged 10 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion app/packages/core/src/components/Common/utils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,14 @@ export const getFormatter = (fieldType: string, timeZone: string, bounds) => {
);
}

return numeral(v).format(
const str = numeral(v).format(
[INT_FIELD, FRAME_NUMBER_FIELD, FRAME_SUPPORT_FIELD].includes(fieldType)
? "0a"
: bounds[1] - bounds[0] < 0.1
? "0.0000a"
: "0.00a"
);
return str === "NaN" ? v.toString() : str;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if str is not a number (null?), we just show it as null?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The formatter (numeral().format) returns an NaN string when it can't meaningfully format given the precision provided ("0.00a"). Punting on a better solution for now, this at least shows the value as opposed to NaN

},
};
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,12 @@ const RangeSlider = ({
const one = useRecoilValue(state.oneBound({ path, modal }));
const timeZone = useRecoilValue(fos.timeZone);
const hasBounds = useRecoilValue(state.hasBounds({ path, modal }));
const nonfinitesText = useRecoilValue(state.nonfinitesText({ path, modal }));

if (!hasBounds) {
return <Box text="No results" />;
return (
<Box text={nonfinitesText ? `${nonfinitesText} present` : "No results"} />
);
}

const showSlider = hasBounds && !(excluded && defaultRange);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { Nonfinite } from "@fiftyone/state";
import { boundsAtom, nonfiniteAtom, rangeAtom } from "@fiftyone/state";
import { boundsAtom, nonfiniteData, rangeAtom } from "@fiftyone/state";
import { selectorFamily } from "recoil";

export const FLOAT_NONFINITES: Nonfinite[] = ["inf", "ninf", "nan"];
Expand All @@ -25,14 +25,17 @@ export const hasDefaultRange = selectorFamily({
},
});

export const hasNonfinites = selectorFamily({
key: "hasNonfinites",
export const nonfinitesText = selectorFamily({
key: "nonfinitesText",
get:
(params: { path: string; modal: boolean }) =>
({ get }) => {
return FLOAT_NONFINITES.every((key) =>
get(nonfiniteAtom({ key, ...params }))
const data = get(nonfiniteData({ ...params, extended: false }));
const result = Object.entries(data).filter(
([k, v]) => k !== "none" && Boolean(v)
);

return result.length ? result.map(([key]) => key).join(", ") : null;
},
});

Expand Down
42 changes: 20 additions & 22 deletions fiftyone/server/lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from bson import ObjectId
from dataclasses import asdict, dataclass
from datetime import date, datetime
import math
import typing as t

import asyncio
Expand Down Expand Up @@ -139,11 +140,11 @@ async def lightning_resolver(
for item in sublist
]

filter = (
{f"{dataset.group_field}.name": input.slice}
if dataset.group_field and input.slice
else None
)
if dataset.group_field and input.slice:
filter = {f"{dataset.group_field}.name": input.slice}
dataset.group_slice = input.slice
else:
filter = {}
result = await _do_async_pooled_queries(dataset, flattened, filter)

results = []
Expand Down Expand Up @@ -316,13 +317,15 @@ async def _do_async_query(
filter: t.Optional[t.Mapping[str, str]],
):
if isinstance(query, DistinctQuery):
if query.has_list and not query.filters:
if query.has_list:
return await _do_distinct_query(collection, query, filter)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure about mongo, but distinct scans in most dbs are really expensive and slow... have you confirmed that relying more on distinct improves performance?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A distinct scan with high cardinality is not good, yes. I am not aware of an aggregation pipeline that can return the first N sorted results using a multikey index, though. But we should investigate more


return await _do_distinct_pipeline(dataset, collection, query, filter)

if filter:
query.insert(0, {"$match": filter})
for k, v in filter.items():
query.insert(0, {"$match": {k: v}})
query.insert(0, {"$sort": {k: 1}})

return [i async for i in collection.aggregate(query)]

Expand Down Expand Up @@ -420,29 +423,19 @@ def _first(
):
pipeline = [{"$sort": {path: sort}}]

if floats:
pipeline.extend(_handle_nonfinites(path, sort))

if sort:
pipeline.append({"$match": {path: {"$ne": None}}})

matched_arrays = _match_arrays(dataset, path, is_frame_field)
if matched_arrays:
pipeline += matched_arrays
elif floats:
pipeline.extend(_handle_nonfinites(path, sort))

pipeline.append({"$limit": 1})

pipeline.extend([{"$match": {path: {"$exists": True}}}, {"$limit": 1}])
unwound = _unwind(dataset, path, is_frame_field)
if unwound:
pipeline += unwound
if floats:
pipeline.extend(_handle_nonfinites(path, sort))

if sort:
pipeline.append({"$match": {path: {"$ne": None}}})

pipeline.append({"$sort": {path: sort}})

return pipeline + [
{
"$group": {
Expand Down Expand Up @@ -513,8 +506,13 @@ def _match_arrays(dataset: fo.Dataset, path: str, is_frame_field: bool):
def _parse_result(data):
if data and data[0]:
value = data[0]
if value.get("value", None) is not None:
return value["value"]
if "value" in value:
value = value["value"]
return (
value
if not isinstance(value, float) or math.isfinite(value)
else None
)

return value.get("_id", None)

Expand Down
17 changes: 10 additions & 7 deletions fiftyone/server/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,8 +614,9 @@ def _make_range_query(path: str, field: fof.Field, args):
def _make_scalar_expression(f, args, field, list_field=None, is_label=False):
expr = None
if _is_support(field):
mn, mx = args["range"]
expr = (f[0] >= mn) & (f[1] <= mx)
if "range" in args:
mn, mx = args["range"]
expr = (f[0] >= mn) & (f[1] <= mx)
elif isinstance(field, fof.ListField):
if isinstance(list_field, str):
return f.filter(
Expand All @@ -640,12 +641,14 @@ def _make_scalar_expression(f, args, field, list_field=None, is_label=False):
if not true and not false:
expr = (f != True) & (f != False)
elif _is_datetime(field):
mn, mx = args["range"]
p = fou.timestamp_to_datetime
expr = (f >= p(mn)) & (f <= p(mx))
if "range" in args:
mn, mx = args["range"]
p = fou.timestamp_to_datetime
expr = (f >= p(mn)) & (f <= p(mx))
elif isinstance(field, (fof.FloatField, fof.IntField)):
mn, mx = args["range"]
expr = (f >= mn) & (f <= mx)
if "range" in args:
mn, mx = args["range"]
expr = (f >= mn) & (f <= mx)
else:
values = args["values"]
if not values:
Expand Down
Loading
Loading