Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(person-on-events): Revamp pagination to handle 'lost' personIDs #11500

Merged
merged 15 commits into from
Aug 31, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def actor_query(self, limit_actors: Optional[bool] = True):

def get_actors(
self,
) -> Tuple[Union[QuerySet[Person], QuerySet[Group]], Union[List[SerializedGroup], List[SerializedPerson]]]:
) -> Tuple[Union[QuerySet[Person], QuerySet[Group]], Union[List[SerializedGroup], List[SerializedPerson]], int]:
if self._filter.correlation_type == FunnelCorrelationType.PROPERTIES:
return _FunnelPropertyCorrelationActors(self._filter, self._team, self._base_uri).get_actors()
else:
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/test/breakdown_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def funnel_breakdown_group_test_factory(Funnel, FunnelPerson, _create_event, _cr
class TestFunnelBreakdownGroup(APIBaseTest):
def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None):
person_filter = filter.with_data({"funnel_step": funnel_step, "funnel_step_breakdown": breakdown_value})
_, serialized_result = FunnelPerson(person_filter, self.team).get_actors()
_, serialized_result, _ = FunnelPerson(person_filter, self.team).get_actors()

return [val["id"] for val in serialized_result]

Expand Down
4 changes: 2 additions & 2 deletions ee/clickhouse/queries/funnels/test/test_funnel_correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def _get_actors_for_event(self, filter: Filter, event_name: str, properties=None
}
)

_, serialized_actors = FunnelCorrelationActors(actor_filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(actor_filter, self.team).get_actors()
return [str(row["id"]) for row in serialized_actors]

def _get_actors_for_property(self, filter: Filter, property_values: list, success=True):
Expand All @@ -57,7 +57,7 @@ def _get_actors_for_property(self, filter: Filter, property_values: list, succes
"funnel_correlation_person_converted": "TrUe" if success else "falSE",
}
)
_, serialized_actors = FunnelCorrelationActors(actor_filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(actor_filter, self.team).get_actors()
return [str(row["id"]) for row in serialized_actors]

def test_basic_funnel_correlation_with_events(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_basic_funnel_correlation_with_events(self):
"funnel_correlation_person_converted": "TrUe",
}
)
_, serialized_actors = FunnelCorrelationActors(filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertCountEqual([str(val["id"]) for val in serialized_actors], success_target_persons)

Expand All @@ -126,7 +126,7 @@ def test_basic_funnel_correlation_with_events(self):
}
)

_, serialized_actors = FunnelCorrelationActors(filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertCountEqual([str(val["id"]) for val in serialized_actors], failure_target_persons)

Expand All @@ -137,7 +137,7 @@ def test_basic_funnel_correlation_with_events(self):
"funnel_correlation_person_converted": "False",
}
)
_, serialized_actors = FunnelCorrelationActors(filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertCountEqual([str(val["id"]) for val in serialized_actors], [str(person_fail.uuid)])

Expand All @@ -148,7 +148,7 @@ def test_basic_funnel_correlation_with_events(self):
"funnel_correlation_person_converted": "trUE",
}
)
_, serialized_actors = FunnelCorrelationActors(filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertCountEqual([str(val["id"]) for val in serialized_actors], [str(person_succ.uuid)])

Expand All @@ -159,7 +159,7 @@ def test_basic_funnel_correlation_with_events(self):
"funnel_correlation_person_converted": None,
}
)
_, serialized_actors = FunnelCorrelationActors(filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertCountEqual(
[str(val["id"]) for val in serialized_actors], [*success_target_persons, str(person_fail.uuid)]
Expand All @@ -172,7 +172,7 @@ def test_basic_funnel_correlation_with_events(self):
"funnel_correlation_person_converted": None,
}
)
_, serialized_actors = FunnelCorrelationActors(filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertCountEqual(
[str(val["id"]) for val in serialized_actors], [*failure_target_persons, str(person_succ.uuid)]
Expand Down Expand Up @@ -253,7 +253,7 @@ def test_people_arent_returned_multiple_times(self):
"funnel_correlation_person_converted": "TrUe",
}
)
_, serialized_actors = FunnelCorrelationActors(filter, self.team).get_actors()
_, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertCountEqual([str(val["id"]) for val in serialized_actors], [str(people["user_1"].uuid)])

Expand Down Expand Up @@ -301,7 +301,7 @@ def test_funnel_correlation_on_event_with_recordings(self):
"funnel_correlation_person_converted": "True",
}
)
_, results = FunnelCorrelationActors(filter, self.team).get_actors()
_, results, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertEqual(results[0]["id"], p1.uuid)
self.assertEqual(
Expand Down Expand Up @@ -337,7 +337,7 @@ def test_funnel_correlation_on_event_with_recordings(self):
"funnel_correlation_person_converted": "False",
}
)
_, results = FunnelCorrelationActors(filter, self.team).get_actors()
_, results, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertEqual(results[0]["id"], p1.uuid)
self.assertEqual(
Expand Down Expand Up @@ -394,7 +394,7 @@ def test_funnel_correlation_on_properties_with_recordings(self):
"funnel_correlation_person_converted": "True",
}
)
_, results = FunnelCorrelationActors(filter, self.team).get_actors()
_, results, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertEqual(results[0]["id"], p1.uuid)
self.assertEqual(
Expand Down Expand Up @@ -489,7 +489,7 @@ def test_strict_funnel_correlation_with_recordings(self):
"funnel_correlation_person_converted": "True",
}
)
_, results = FunnelCorrelationActors(filter, self.team).get_actors()
_, results, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertEqual(len(results), 1)
self.assertEqual(results[0]["id"], p1.uuid)
Expand Down Expand Up @@ -525,7 +525,7 @@ def test_strict_funnel_correlation_with_recordings(self):
"funnel_correlation_person_converted": "False",
}
)
_, results = FunnelCorrelationActors(filter, self.team).get_actors()
_, results, _ = FunnelCorrelationActors(filter, self.team).get_actors()

self.assertEqual(results[0]["id"], p2.uuid)
self.assertEqual(
Expand Down
4 changes: 2 additions & 2 deletions ee/clickhouse/queries/test/__snapshots__/test_trends.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@
AND timestamp <= toDateTime('2020-01-02 23:59:59')
AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -983,7 +983,7 @@
AND timestamp >= toDateTime('2020-01-05 15:00:00')
AND timestamp <= toDateTime('2020-01-05 16:00:00') )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down
12 changes: 6 additions & 6 deletions ee/clickhouse/queries/test/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def _get_people_at_path(self, filter, path_start=None, path_end=None, funnel_fil
person_filter = filter.with_data(
{"path_start_key": path_start, "path_end_key": path_end, "path_dropoff_key": path_dropoff}
)
_, serialized_actors = PathsActors(person_filter, self.team, funnel_filter).get_actors()
_, serialized_actors, _ = PathsActors(person_filter, self.team, funnel_filter).get_actors()
return [row["id"] for row in serialized_actors]

def test_step_limit(self):
Expand Down Expand Up @@ -2842,7 +2842,7 @@ def test_path_recording(self):
"include_recordings": "true",
}
)
_, serialized_actors = PathsActors(filter, self.team).get_actors()
_, serialized_actors, _ = PathsActors(filter, self.team).get_actors()
self.assertCountEqual([p1.uuid, p2.uuid], [actor["id"] for actor in serialized_actors])
matched_recordings = [actor["matched_recordings"] for actor in serialized_actors]

Expand Down Expand Up @@ -2904,7 +2904,7 @@ def test_path_recording_with_no_window_or_session_id(self):
"include_recordings": "true",
}
)
_, serialized_actors = PathsActors(filter, self.team).get_actors()
_, serialized_actors, _ = PathsActors(filter, self.team).get_actors()
self.assertEqual([p1.uuid], [actor["id"] for actor in serialized_actors])
self.assertEqual(
[[]], [actor["matched_recordings"] for actor in serialized_actors],
Expand Down Expand Up @@ -2953,7 +2953,7 @@ def test_path_recording_with_start_and_end(self):
"include_recordings": "true",
}
)
_, serialized_actors = PathsActors(filter, self.team).get_actors()
_, serialized_actors, _ = PathsActors(filter, self.team).get_actors()
self.assertEqual([p1.uuid], [actor["id"] for actor in serialized_actors])
self.assertEqual(
[
Expand Down Expand Up @@ -3015,7 +3015,7 @@ def test_path_recording_for_dropoff(self):
"include_recordings": "true",
}
)
_, serialized_actors = PathsActors(filter, self.team).get_actors()
_, serialized_actors, _ = PathsActors(filter, self.team).get_actors()
self.assertEqual([], [actor["id"] for actor in serialized_actors])
self.assertEqual(
[], [actor["matched_recordings"] for actor in serialized_actors],
Expand All @@ -3031,7 +3031,7 @@ def test_path_recording_for_dropoff(self):
"include_recordings": "true",
}
)
_, serialized_actors = PathsActors(filter, self.team).get_actors()
_, serialized_actors, _ = PathsActors(filter, self.team).get_actors()
self.assertEqual([p1.uuid], [actor["id"] for actor in serialized_actors])
self.assertEqual(
[
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/test/test_trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class TestClickhouseTrends(trend_test_factory(Trends)): # type: ignore
maxDiff = None

def _get_trend_people(self, filter, entity):
_, serialized_actors = TrendsActors(filter=filter, entity=entity, team=self.team).get_actors()
_, serialized_actors, _ = TrendsActors(filter=filter, entity=entity, team=self.team).get_actors()
return serialized_actors

def _create_groups(self):
Expand Down
15 changes: 8 additions & 7 deletions ee/clickhouse/views/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from rest_framework.decorators import action

from ee.clickhouse.queries.funnels.funnel_correlation_persons import FunnelCorrelationActors
from posthog.api.person import PersonViewSet, should_paginate
from posthog.api.person import PersonViewSet
from posthog.constants import FUNNEL_CORRELATION_PERSON_LIMIT, FUNNEL_CORRELATION_PERSON_OFFSET, INSIGHT_FUNNELS
from posthog.decorators import cached_function
from posthog.models import Filter
Expand All @@ -22,7 +22,7 @@ def funnel_correlation(self, request: request.Request, **kwargs) -> response.Res
if not results_package:
return response.Response(data=[])

people, next_url, initial_url = results_package["result"]
people, next_url, initial_url, missing_persons = results_package["result"]

return response.Response(
data={
Expand All @@ -31,24 +31,25 @@ def funnel_correlation(self, request: request.Request, **kwargs) -> response.Res
"initial": initial_url,
"is_cached": results_package.get("is_cached"),
"last_refresh": results_package.get("last_refresh"),
"missing_persons": missing_persons,
}
)

@cached_function
def calculate_funnel_correlation_persons(
self, request: request.Request
) -> Dict[str, Tuple[list, Optional[str], Optional[str]]]:
) -> Dict[str, Tuple[list, Optional[str], Optional[str], int]]:
if request.user.is_anonymous or not self.team:
return {"result": ([], None, None)}
return {"result": ([], None, None, False)}

filter = Filter(request=request, data={"insight": INSIGHT_FUNNELS}, team=self.team)
if not filter.correlation_person_limit:
filter = filter.with_data({FUNNEL_CORRELATION_PERSON_LIMIT: 100})
base_uri = request.build_absolute_uri("/")
actors, serialized_actors = FunnelCorrelationActors(
actors, serialized_actors, raw_count = FunnelCorrelationActors(
filter=filter, team=self.team, base_uri=base_uri
).get_actors()
_should_paginate = should_paginate(actors, filter.correlation_person_limit)
_should_paginate = raw_count >= filter.correlation_person_limit

next_url = (
format_query_params_absolute_url(
Expand All @@ -63,7 +64,7 @@ def calculate_funnel_correlation_persons(
initial_url = format_query_params_absolute_url(request, 0)

# cached_function expects a dict with the key result
return {"result": (serialized_actors, next_url, initial_url)}
return {"result": (serialized_actors, next_url, initial_url, raw_count - len(serialized_actors))}


class LegacyEnterprisePersonViewSet(EnterprisePersonViewSet):
Expand Down
20 changes: 10 additions & 10 deletions ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
AND timestamp >= toTimezone(toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00')), 'UTC'), 'UTC')
AND timestamp <= toDateTime('2012-01-15 23:59:59') )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -95,7 +95,7 @@
AND timestamp >= toDateTime('2012-01-14 00:00:00')
AND timestamp <= toDateTime('2012-01-14 23:59:59') )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -153,7 +153,7 @@
AND timestamp <= toDateTime('2012-01-14 23:59:59')
AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -207,7 +207,7 @@
AND timestamp >= toTimezone(toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00')), 'UTC'), 'UTC')
AND timestamp <= toDateTime('2012-01-14 23:59:59') )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -316,7 +316,7 @@
AND timestamp <= toDateTime('2012-01-14 23:59:59')
AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -382,7 +382,7 @@
AND timestamp >= toTimezone(toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00')), 'UTC'), 'UTC')
AND timestamp <= toDateTime('2012-01-14 23:59:59') )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -474,7 +474,7 @@
AND timestamp <= toDateTime('2012-01-14 23:59:59')
AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -645,7 +645,7 @@
AND timestamp <= toDateTime('2012-01-14 23:59:59')
AND (((has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))))) )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -838,7 +838,7 @@
AND (NOT has([''], "$group_0")
AND NOT has([''], "$group_0")) )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Expand Down Expand Up @@ -894,7 +894,7 @@
AND timestamp >= toDateTime('2020-01-02 00:00:00')
AND timestamp <= toDateTime('2020-01-02 23:59:59') )
GROUP BY actor_id
LIMIT 200
LIMIT 100
OFFSET 0
'
---
Loading