Skip to content

Commit

Permalink
feat(trends): add moving average smoothing_intervals param (#6435)
Browse files Browse the repository at this point in the history
* feat(trends): add moving average `smoothing_intervals` param

This will average over the preceding `smoothing_intervals` intervals.
The driver for this is to remove either weekly or monthly trends.

Limitations at the moment are that it only works for total volume
queries, as it relies on the results from clickhouse being from a single
timeline series.

* WIP UI for selecting smoothing

* WIP: Fix typing and appearce logic

* Made smoothing extensible beyond day

* Test now considers a non-trivial case

* chore(smoothing): use url params for state storage of smoothing value

This should allow for url navigation to work, although I haven't quite
figured out how we then use this to make the insights/trends POST
request

* Added further tests for smoothing intervals

* fix test_to_dict

* fix trend tests

* Added test to validate preceeding limit

* Fixed test for 2 interval

* clear django redis cache before each test

* add explicit ordering to dashboard items in refresh test

Otherwise the test is not deterministic.

* Revert "clear django redis cache before each test"

This is a good thing to do, but I don't want to open up a discussion on
this PR. It was not fix for the immediate intermittent
test_refresh_cache test

This reverts commit aa52f33.

* format

* clickhouse trends: SET allow_experimental_window_functions = 1

* fix setting of allow_experimental_window_functions

* add cache clears for requests

* Delete SmoothingFilterLogic.ts

Removed smoothing filter logic file

* Use `SMOOTHING_INTERVALS` instead of string literal

This should ensure that if we update the url param key somewhere, we don't have to remember to change it here as well.

Co-authored-by: Neil Kakkar <neilkakkar@gmail.com>

* move test_trends to ee

* actually delete test_trends.py

* Transitioned smoothing logic to use Logic

* WIP: Moving away from url based logic

* WIP: Refactored logic on listeners

* fix storybook

* refactor

* set redis url

* remove keastory

* Move interval filter to setFilters insight logic

* update trend snapshots

* run prettier

* Add stickiness test deps that were in test_trends

* Fix storybook

* fix stickiness tests

* update snapshot

* fix cohort test

* only display smoothing on linear

* fix tests

* Add some tests

* added box to make dropdown consistent

* Floor

* fix tests

* fix tests

* fix test

* Add feature flag

Co-authored-by: Marcus Hyett <marcus@posthog.com>
Co-authored-by: Marcus Hyett (PostHog) <85295485+marcushyett-ph@users.noreply.github.com>
Co-authored-by: Neil Kakkar <neilkakkar@gmail.com>
Co-authored-by: Tim Glaser <tim@glsr.nl>
  • Loading branch information
5 people authored Mar 23, 2022
1 parent 7759947 commit fbdefda
Show file tree
Hide file tree
Showing 22 changed files with 712 additions and 235 deletions.
3 changes: 3 additions & 0 deletions .devcontainer/.env
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ REDIS_URL=redis://redis:6379
CLICKHOUSE_HOST=clickhouse
CLICKHOUSE_DATABASE=posthog_test
CLICKHOUSE_VERIFY=False

# Setup redis config
REDIS_URL=redis://redis:6379/
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/test/__snapshots__/test_trends.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@
AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) )
GROUP BY toStartOfDay(timestamp))
group by day_start
order by day_start) SETTINGS timeout_before_checking_execution_speed = 60
order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60
'
---
# name: TestClickhouseTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns
Expand Down
22 changes: 21 additions & 1 deletion ee/clickhouse/queries/trends/total_volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,27 @@ def _total_volume_query(self, entity: Entity, filter: Filter, team: Team) -> Tup

null_sql = NULL_SQL.format(trunc_func=trunc_func, interval_func=interval_func)
params["interval"] = filter.interval
final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql)

# If we have a smoothing interval > 1 then add in the sql to
# handling rolling average. Else just do a sum. This is possibly an
# nessacary optimization.
if filter.smoothing_intervals > 1:
smoothing_operation = f"""
AVG(SUM(total))
OVER (
ORDER BY day_start
ROWS BETWEEN {filter.smoothing_intervals - 1} PRECEDING
AND CURRENT ROW
)"""
else:
smoothing_operation = "SUM(total)"

final_query = AGGREGATE_SQL.format(
null_sql=null_sql,
content_sql=content_sql,
smoothing_operation=smoothing_operation,
aggregate="count" if filter.smoothing_intervals < 2 else "floor(count)",
)
return final_query, params, self._parse_total_volume_result(filter, entity, team.id)

def _parse_total_volume_result(self, filter: Filter, entity: Entity, team_id: int) -> Callable:
Expand Down
12 changes: 10 additions & 2 deletions ee/clickhouse/sql/trends/volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,16 @@
"""

AGGREGATE_SQL = """
SELECT groupArray(day_start) as date, groupArray(count) as data FROM (
SELECT SUM(total) AS count, day_start from ({null_sql} UNION ALL {content_sql}) group by day_start order by day_start
SELECT groupArray(day_start) as date, groupArray({aggregate}) as data FROM (
SELECT {smoothing_operation} AS count, day_start
from (
{null_sql}
UNION ALL
{content_sql}
)
group by day_start
order by day_start
SETTINGS allow_experimental_window_functions = 1
)
SETTINGS timeout_before_checking_execution_speed = 60
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
AND timestamp <= '2012-01-15 23:59:59' )
GROUP BY toStartOfDay(timestamp))
group by day_start
order by day_start) SETTINGS timeout_before_checking_execution_speed = 60
order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60
'
---
# name: ClickhouseTestTrends.test_insight_trends_basic.1
Expand Down Expand Up @@ -138,7 +138,7 @@
AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) )
GROUP BY toStartOfDay(timestamp))
group by day_start
order by day_start) SETTINGS timeout_before_checking_execution_speed = 60
order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60
'
---
# name: ClickhouseTestTrends.test_insight_trends_clean_arg.1
Expand Down Expand Up @@ -200,7 +200,7 @@
AND timestamp <= '2012-01-15 23:59:59' )
GROUP BY toStartOfDay(timestamp))
group by day_start
order by day_start) SETTINGS timeout_before_checking_execution_speed = 60
order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60
'
---
# name: ClickhouseTestTrends.test_insight_trends_cumulative.1
Expand Down Expand Up @@ -272,7 +272,7 @@
GROUP BY person_id)
GROUP BY toStartOfDay(timestamp))
group by day_start
order by day_start) SETTINGS timeout_before_checking_execution_speed = 60
order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60
'
---
# name: ClickhouseTestTrends.test_insight_trends_cumulative.3
Expand Down Expand Up @@ -547,7 +547,7 @@
AND (NOT has([''], "$group_0")) )
GROUP BY toStartOfDay(timestamp))
group by day_start
order by day_start) SETTINGS timeout_before_checking_execution_speed = 60
order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60
'
---
# name: ClickhouseTestTrendsGroups.test_aggregating_by_group.1
Expand Down
8 changes: 7 additions & 1 deletion ee/clickhouse/views/test/test_clickhouse_stickiness.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from datetime import timedelta
from uuid import uuid4

from django.test.client import Client
from freezegun.api import freeze_time

from ee.clickhouse.models.event import create_event
from ee.clickhouse.models.group import create_group
from ee.clickhouse.queries.stickiness.clickhouse_stickiness import ClickhouseStickiness
from ee.clickhouse.util import ClickhouseTestMixin, snapshot_clickhouse_queries
from posthog.api.test.test_stickiness import get_stickiness_time_series_ok, stickiness_test_factory
from posthog.api.test.test_trends import get_people_from_url_ok
from posthog.models.action import Action
from posthog.models.action_step import ActionStep
from posthog.models.person import Person
Expand All @@ -34,6 +34,12 @@ def _create_person(**kwargs):
return Person(id=person.uuid)


def get_people_from_url_ok(client: Client, url: str):
response = client.get("/" + url)
assert response.status_code == 200, response.content
return response.json()["results"][0]["people"]


class TestClickhouseStickiness(ClickhouseTestMixin, stickiness_test_factory(ClickhouseStickiness, _create_event, _create_person, _create_action, get_earliest_timestamp)): # type: ignore
@snapshot_clickhouse_queries
def test_filter_by_group_properties(self):
Expand Down
Loading

0 comments on commit fbdefda

Please sign in to comment.