Skip to content

Commit

Permalink
Optimize catalog query: do not order when computing totals (#2959)
Browse files Browse the repository at this point in the history
  • Loading branch information
javitonino authored Mar 10, 2025
1 parent ccd53e6 commit 5642aaf
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 19 deletions.
42 changes: 24 additions & 18 deletions nucliadb/src/nucliadb/search/search/pgcatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def _convert_date_filter(date: CatalogExpression.Date, filter_params: dict[str,
raise ValueError(f"Invalid date operator")


def _prepare_query(catalog_query: CatalogQuery):
def _prepare_query_filters(catalog_query: CatalogQuery) -> tuple[str, dict[str, Any]]:
filter_sql = ["kbid = %(kbid)s"]
filter_params: dict[str, Any] = {"kbid": catalog_query.kbid}

Expand All @@ -127,7 +127,17 @@ def _prepare_query(catalog_query: CatalogQuery):
if catalog_query.filters:
filter_sql.append(_convert_filter(catalog_query.filters, filter_params))

order_sql = ""
return (
f"SELECT * FROM catalog WHERE {' AND '.join(filter_sql)}",
filter_params,
)


def _prepare_query(catalog_query: CatalogQuery) -> tuple[str, dict[str, Any]]:
# Base query with all the filters
query, filter_params = _prepare_query_filters(catalog_query)

# Sort
if catalog_query.sort:
if catalog_query.sort.field == SortField.CREATED:
order_field = "created_at"
Expand All @@ -144,12 +154,15 @@ def _prepare_query(catalog_query: CatalogQuery):
else:
order_dir = "DESC"

order_sql = f" ORDER BY {order_field} {order_dir}"
query += f" ORDER BY {order_field} {order_dir}"

return (
f"SELECT * FROM catalog WHERE {' AND '.join(filter_sql)}{order_sql}",
filter_params,
)
# Pagination
offset = catalog_query.page_size * catalog_query.page_number
query += f" LIMIT %(page_size)s OFFSET %(offset)s"
filter_params["page_size"] = catalog_query.page_size
filter_params["offset"] = offset

return query, filter_params


def _pg_driver() -> PGDriver:
Expand All @@ -159,7 +172,7 @@ def _pg_driver() -> PGDriver:
@observer.wrap({"op": "search"})
async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
# Prepare SQL query
query, query_params = _prepare_query(catalog_query)
query, query_params = _prepare_query_filters(catalog_query)

async with _pg_driver()._get_connection() as conn, conn.cursor(row_factory=dict_row) as cur:
facets = {}
Expand Down Expand Up @@ -210,15 +223,8 @@ async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:

# Query
with observer({"op": "query"}):
offset = catalog_query.page_size * catalog_query.page_number
await cur.execute(
f"{query} LIMIT %(page_size)s OFFSET %(offset)s",
{
**query_params,
"page_size": catalog_query.page_size,
"offset": offset,
},
)
query, query_params = _prepare_query(catalog_query)
await cur.execute(query, query_params)
data = await cur.fetchall()

return Resources(
Expand All @@ -237,6 +243,6 @@ async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
total=total,
page_number=catalog_query.page_number,
page_size=catalog_query.page_size,
next_page=(offset + len(data) < total),
next_page=(catalog_query.page_size * catalog_query.page_number + len(data) < total),
min_score=0,
)
5 changes: 4 additions & 1 deletion nucliadb/tests/search/unit/search/search/test_pgcatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,12 +243,15 @@ async def test_filter_expression():
"(extract_facets(labels) @> %(param2)s AND (NOT modified_at > %(param3)s)) "
"OR rid = %(param4)s"
") "
"ORDER BY created_at DESC"
"ORDER BY created_at DESC "
"LIMIT %(page_size)s OFFSET %(offset)s"
)
assert params == {
"kbid": "84ed9257-04ef-41d1-b1d2-26286b92777f",
"param2": ["/l/topic/boats", "/p/folder"],
"param3": datetime(2019, 1, 1, 11, 0),
"param4": ["00112233445566778899aabbccddeeff"],
"query": "This is my query",
"page_size": 25,
"offset": 0,
}

0 comments on commit 5642aaf

Please sign in to comment.