From 90e87baa4c288d0d313e6482fa868efaeec02ddb Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 22 Jul 2021 11:03:08 +0100 Subject: [PATCH 1/3] resolve #5275 --- .../queries/funnels/funnel_trends.py | 9 +- .../funnels/test/test_funnel_trends.py | 112 ++++++++++++------ 2 files changed, 82 insertions(+), 39 deletions(-) diff --git a/ee/clickhouse/queries/funnels/funnel_trends.py b/ee/clickhouse/queries/funnels/funnel_trends.py index f0c3b74643232..ce3e52492cbbe 100644 --- a/ee/clickhouse/queries/funnels/funnel_trends.py +++ b/ee/clickhouse/queries/funnels/funnel_trends.py @@ -3,12 +3,13 @@ from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel -from ee.clickhouse.queries.util import get_time_diff, get_trunc_func_ch +from ee.clickhouse.queries.util import PERIOD_TRUNC_MONTH, PERIOD_TRUNC_WEEK, get_time_diff, get_trunc_func_ch from posthog.constants import BREAKDOWN from posthog.models.filters.filter import Filter from posthog.models.team import Team TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S" +DATE_FORMAT = "%Y-%m-%d" HUMAN_READABLE_TIMESTAMP_FORMAT = "%a. %-d %b" @@ -68,7 +69,11 @@ def get_step_counts_without_aggregation_query( # This is used by funnel trends when we only need data for one period, e.g. person per data point if specific_entrance_period_start: - self.params["entrance_period_start"] = specific_entrance_period_start.strftime(TIMESTAMP_FORMAT) + self.params["entrance_period_start"] = ( + specific_entrance_period_start.strftime(DATE_FORMAT) + if interval_method in [PERIOD_TRUNC_WEEK, PERIOD_TRUNC_MONTH] + else specific_entrance_period_start.strftime(TIMESTAMP_FORMAT) + ) return f""" SELECT diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_trends.py b/ee/clickhouse/queries/funnels/test/test_funnel_trends.py index 714879d4044a6..030e5d5a14add 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_trends.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_trends.py @@ -30,6 +30,10 @@ def _create_event(**kwargs): class TestFunnelTrends(ClickhouseTestMixin, APIBaseTest): maxDiff = None + def _get_people_at_step(self, filter, entrance_period_start, drop_off, funnel_class=ClickhouseFunnel): + person_filter = filter.with_data({"entrance_period_start": entrance_period_start, "drop_off": drop_off}) + return ClickhouseFunnelTrendsPersons(person_filter, self.team, funnel_class).run() + def _create_sample_data(self): # five people, three steps _create_person(distinct_ids=["user_one"], team=self.team) @@ -177,11 +181,9 @@ def test_only_one_user_reached_one_step(self): ) # 1 user who dropped off starting 2021-06-07 - funnel_trends_persons_existent_dropped_off_results, _ = ClickhouseFunnelTrendsPersons( - Filter({**filter._data, "entrance_period_start": "2021-06-07 00:00:00", "drop_off": True}), - self.team, - ClickhouseFunnel, - ).run() + funnel_trends_persons_existent_dropped_off_results, _ = self._get_people_at_step( + filter, "2021-06-07 00:00:00", True + ) self.assertEqual( len(funnel_trends_persons_existent_dropped_off_results), 1, @@ -191,22 +193,18 @@ def test_only_one_user_reached_one_step(self): ) # No users converted 2021-06-07 - funnel_trends_persons_nonexistent_converted_results, _ = ClickhouseFunnelTrendsPersons( - Filter({**filter._data, "entrance_period_start": "2021-06-07 00:00:00", "drop_off": False}), - self.team, - ClickhouseFunnel, - ).run() + funnel_trends_persons_nonexistent_converted_results, _ = self._get_people_at_step( + filter, "2021-06-07 00:00:00", False + ) self.assertEqual( len(funnel_trends_persons_nonexistent_converted_results), 0, ) # No users dropped off 2021-06-08 - funnel_trends_persons_nonexistent_converted_results, _ = ClickhouseFunnelTrendsPersons( - Filter({**filter._data, "entrance_period_start": "2021-06-08 00:00:00", "drop_off": True}), - self.team, - ClickhouseFunnel, - ).run() + funnel_trends_persons_nonexistent_converted_results, _ = self._get_people_at_step( + filter, "2021-06-08 00:00:00", True + ) self.assertEqual( len(funnel_trends_persons_nonexistent_converted_results), 0, @@ -248,8 +246,24 @@ def test_day_interval(self): ], } ) + _create_person(distinct_ids=["user_one"], team=self.team) + + # full run + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-01 00:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-01 01:00:00") + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-01 02:00:00") + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnel)._exec_query() - self.assertEqual(len(results), 7) + self.assertEqual(7, len(results)) + + persons, _ = self._get_people_at_step(filter, "2021-05-01 00:00:00", False) + + self.assertEqual( + len(persons), 1, + ) + self.assertEqual( + [person["distinct_ids"] for person in persons], [["user_one"]], + ) def test_week_interval(self): filter = Filter( @@ -267,8 +281,24 @@ def test_week_interval(self): ], } ) + + _create_person(distinct_ids=["user_one"], team=self.team) + + # full run + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-01 00:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-01 01:00:00") + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-01 02:00:00") + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnel)._exec_query() + persons, _ = self._get_people_at_step(filter, "2021-04-25 00:00:00", False) + self.assertEqual(2, len(results)) + self.assertEqual( + len(persons), 1, + ) + self.assertEqual( + [person["distinct_ids"] for person in persons], [["user_one"]], + ) def test_month_interval(self): filter = Filter( @@ -286,8 +316,24 @@ def test_month_interval(self): ], } ) + _create_person(distinct_ids=["user_one"], team=self.team) + + # full run + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-01 00:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-01 01:00:00") + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-01 02:00:00") + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnel)._exec_query() - self.assertEqual(len(results), 1) + self.assertEqual(1, len(results)) + + persons, _ = self._get_people_at_step(filter, "2021-05-01 00:00:00", False) + + self.assertEqual( + len(persons), 1, + ) + self.assertEqual( + [person["distinct_ids"] for person in persons], [["user_one"]], + ) def test_all_results_for_day_interval(self): self._create_sample_data() @@ -604,11 +650,9 @@ def test_one_person_in_multiple_periods_and_windows(self): self.assertEqual(day_4["is_period_final"], True) # 1 user who dropped off starting # 2021-05-04 - funnel_trends_persons_existent_dropped_off_results, _ = ClickhouseFunnelTrendsPersons( - Filter({**filter._data, "entrance_period_start": "2021-05-04 00:00:00", "drop_off": True}), - self.team, - ClickhouseFunnel, - ).run() + funnel_trends_persons_existent_dropped_off_results, _ = self._get_people_at_step( + filter, "2021-05-04 00:00:00", True + ) self.assertEqual( len(funnel_trends_persons_existent_dropped_off_results), 1, @@ -618,11 +662,9 @@ def test_one_person_in_multiple_periods_and_windows(self): ) # 1 user who converted starting # 2021-05-04 - funnel_trends_persons_existent_dropped_off_results, _ = ClickhouseFunnelTrendsPersons( - Filter({**filter._data, "entrance_period_start": "2021-05-04 00:00:00", "drop_off": False}), - self.team, - ClickhouseFunnel, - ).run() + funnel_trends_persons_existent_dropped_off_results, _ = self._get_people_at_step( + filter, "2021-05-04 00:00:00", False + ) self.assertEqual( len(funnel_trends_persons_existent_dropped_off_results), 1, @@ -845,11 +887,9 @@ def test_one_person_in_multiple_periods_and_windows_in_unordered_funnel(self): self.assertEqual(day_4["is_period_final"], True) # 1 user who dropped off starting # 2021-05-04 - funnel_trends_persons_existent_dropped_off_results, _ = ClickhouseFunnelTrendsPersons( - Filter({**filter._data, "entrance_period_start": "2021-05-04 00:00:00", "drop_off": True}), - self.team, - ClickhouseFunnelUnordered, - ).run() + funnel_trends_persons_existent_dropped_off_results, _ = self._get_people_at_step( + filter, "2021-05-04 00:00:00", True, ClickhouseFunnelUnordered + ) self.assertEqual( len(funnel_trends_persons_existent_dropped_off_results), 1, @@ -859,11 +899,9 @@ def test_one_person_in_multiple_periods_and_windows_in_unordered_funnel(self): ) # 1 user who converted starting # 2021-05-04 - funnel_trends_persons_existent_dropped_off_results, _ = ClickhouseFunnelTrendsPersons( - Filter({**filter._data, "entrance_period_start": "2021-05-04 00:00:00", "drop_off": False}), - self.team, - ClickhouseFunnelUnordered, - ).run() + funnel_trends_persons_existent_dropped_off_results, _ = self._get_people_at_step( + filter, "2021-05-04 00:00:00", False, ClickhouseFunnelUnordered + ) self.assertEqual( len(funnel_trends_persons_existent_dropped_off_results), 1, From e3b887cdbaccc70fd1fe0b6fc7c2e727575a97ca Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 22 Jul 2021 11:18:38 +0100 Subject: [PATCH 2/3] address comment --- ee/clickhouse/queries/funnels/funnel_trends.py | 10 +++------- .../queries/funnels/test/test_funnel_trends.py | 9 --------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/ee/clickhouse/queries/funnels/funnel_trends.py b/ee/clickhouse/queries/funnels/funnel_trends.py index ce3e52492cbbe..f1ad9998ac188 100644 --- a/ee/clickhouse/queries/funnels/funnel_trends.py +++ b/ee/clickhouse/queries/funnels/funnel_trends.py @@ -3,7 +3,7 @@ from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel -from ee.clickhouse.queries.util import PERIOD_TRUNC_MONTH, PERIOD_TRUNC_WEEK, get_time_diff, get_trunc_func_ch +from ee.clickhouse.queries.util import get_time_diff, get_trunc_func_ch from posthog.constants import BREAKDOWN from posthog.models.filters.filter import Filter from posthog.models.team import Team @@ -69,11 +69,7 @@ def get_step_counts_without_aggregation_query( # This is used by funnel trends when we only need data for one period, e.g. person per data point if specific_entrance_period_start: - self.params["entrance_period_start"] = ( - specific_entrance_period_start.strftime(DATE_FORMAT) - if interval_method in [PERIOD_TRUNC_WEEK, PERIOD_TRUNC_MONTH] - else specific_entrance_period_start.strftime(TIMESTAMP_FORMAT) - ) + self.params["entrance_period_start"] = specific_entrance_period_start.strftime(TIMESTAMP_FORMAT) return f""" SELECT @@ -83,7 +79,7 @@ def get_step_counts_without_aggregation_query( FROM ( {steps_per_person_query} ) - {"WHERE entrance_period_start = %(entrance_period_start)s" if specific_entrance_period_start else ""} + {"WHERE toDateTime(entrance_period_start) = %(entrance_period_start)s" if specific_entrance_period_start else ""} GROUP BY person_id, entrance_period_start""" def get_query(self) -> str: diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_trends.py b/ee/clickhouse/queries/funnels/test/test_funnel_trends.py index 030e5d5a14add..3f58d37cb8d26 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_trends.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_trends.py @@ -258,9 +258,6 @@ def test_day_interval(self): persons, _ = self._get_people_at_step(filter, "2021-05-01 00:00:00", False) - self.assertEqual( - len(persons), 1, - ) self.assertEqual( [person["distinct_ids"] for person in persons], [["user_one"]], ) @@ -293,9 +290,6 @@ def test_week_interval(self): persons, _ = self._get_people_at_step(filter, "2021-04-25 00:00:00", False) self.assertEqual(2, len(results)) - self.assertEqual( - len(persons), 1, - ) self.assertEqual( [person["distinct_ids"] for person in persons], [["user_one"]], ) @@ -328,9 +322,6 @@ def test_month_interval(self): persons, _ = self._get_people_at_step(filter, "2021-05-01 00:00:00", False) - self.assertEqual( - len(persons), 1, - ) self.assertEqual( [person["distinct_ids"] for person in persons], [["user_one"]], ) From 5f9d6653e5a5a9d97d5e84ea3c5b1604bc03f008 Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 22 Jul 2021 12:01:27 +0100 Subject: [PATCH 3/3] rm unused piece --- ee/clickhouse/queries/funnels/funnel_trends.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ee/clickhouse/queries/funnels/funnel_trends.py b/ee/clickhouse/queries/funnels/funnel_trends.py index f1ad9998ac188..6b0c025b00c21 100644 --- a/ee/clickhouse/queries/funnels/funnel_trends.py +++ b/ee/clickhouse/queries/funnels/funnel_trends.py @@ -9,7 +9,6 @@ from posthog.models.team import Team TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S" -DATE_FORMAT = "%Y-%m-%d" HUMAN_READABLE_TIMESTAMP_FORMAT = "%a. %-d %b"