Skip to content

Commit

Permalink
⚡ [#538] Optimize objects list performance
Browse files Browse the repository at this point in the history
the filter to only include the objectrecords with the highest index per object was causing major performance degradations, especially in combination with filters on data_attrs.
    Instead of using `Max(...)` together with GROUP BY, we now use Window to figure out the max index per object which is more efficient for larger datasets
  • Loading branch information
stevenbal committed Mar 4, 2025
1 parent 9c8d3db commit b29a2ca
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 7 deletions.
22 changes: 15 additions & 7 deletions src/objects/core/query.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from django.db import models
from django.db.models import F, Window
from django.db.models.functions import RowNumber

from vng_api_common.utils import get_uuid_from_path
from zgw_consumers.models import Service
Expand Down Expand Up @@ -43,14 +45,20 @@ def keep_max_record_per_object(self):
"""
Return records with the largest index for the object
"""
filtered_records = self.order_by()
grouped_records = (
filtered_records.filter(object=models.OuterRef("object"))
.values("object")
.annotate(max_index=models.Max("index"))
.values("max_index")
filtered_records = (
self.filter(object=models.OuterRef("object"))
.annotate(
row_number=Window(
expression=RowNumber(),
partition_by=[F("object")],
order_by=F("index").desc(),
)
)
.filter(row_number=1)
.values("index")
)
return self.filter(index=models.Subquery(grouped_records))

return self.filter(index__in=filtered_records)

def filter_for_date(self, date):
"""
Expand Down
38 changes: 38 additions & 0 deletions src/objects/tests/v2/test_object_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,44 @@ def test_list_available_today(self):
self.assertEqual(object_data["uuid"], str(self.object.uuid))
self.assertEqual(object_data["record"]["data"], {"name": "new"})

@freeze_time("2024-08-31")
def test_only_show_latest_index(self):
"""
In the list endpoint, only the latest record that existed at the given date
should show up
"""
object_url = reverse("object-detail", kwargs={"uuid": self.object.uuid})
object2 = ObjectFactory.create(object_type=self.object_type)
object2_url = reverse("object-detail", kwargs={"uuid": object2.uuid})
ObjectRecordFactory.create(
object=object2,
index=1,
data={"name": "old"},
start_at="2024-08-01",
end_at="2024-08-28",
registration_at="2024-08-02",
)
ObjectRecordFactory.create(
object=object2,
index=2,
data={"name": "new"},
start_at="2024-08-28",
end_at="2024-09-30",
registration_at="2024-08-02",
)

response = self.client.get(self.url, {"date": "2024-08-30"})

self.assertEqual(response.status_code, status.HTTP_200_OK)

data = response.json()

self.assertEqual(data["count"], 2)
self.assertEqual(data["results"][0]["record"]["index"], 2)
self.assertEqual(data["results"][0]["url"], f"http://testserver{object2_url}")
self.assertEqual(data["results"][1]["record"]["index"], 1)
self.assertEqual(data["results"][1]["url"], f"http://testserver{object_url}")

def test_list_available_for_date(self):
with self.subTest("filter on old name"):
response = self.client.get(
Expand Down

0 comments on commit b29a2ca

Please sign in to comment.