diff --git a/.devcontainer/.env b/.devcontainer/.env index e6a6dab458b63..4ab94d042a33e 100644 --- a/.devcontainer/.env +++ b/.devcontainer/.env @@ -21,3 +21,6 @@ REDIS_URL=redis://redis:6379 CLICKHOUSE_HOST=clickhouse CLICKHOUSE_DATABASE=posthog_test CLICKHOUSE_VERIFY=False + +# Setup redis config +REDIS_URL=redis://redis:6379/ \ No newline at end of file diff --git a/ee/clickhouse/queries/test/__snapshots__/test_trends.ambr b/ee/clickhouse/queries/test/__snapshots__/test_trends.ambr index 372b7d1654e6a..0fad7375eb2be 100644 --- a/ee/clickhouse/queries/test/__snapshots__/test_trends.ambr +++ b/ee/clickhouse/queries/test/__snapshots__/test_trends.ambr @@ -449,7 +449,7 @@ AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) ) GROUP BY toStartOfDay(timestamp)) group by day_start - order by day_start) SETTINGS timeout_before_checking_execution_speed = 60 + order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60 ' --- # name: TestClickhouseTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns diff --git a/ee/clickhouse/queries/trends/total_volume.py b/ee/clickhouse/queries/trends/total_volume.py index 99233d2f9e0fc..38a8987a0dc51 100644 --- a/ee/clickhouse/queries/trends/total_volume.py +++ b/ee/clickhouse/queries/trends/total_volume.py @@ -67,7 +67,27 @@ def _total_volume_query(self, entity: Entity, filter: Filter, team: Team) -> Tup null_sql = NULL_SQL.format(trunc_func=trunc_func, interval_func=interval_func) params["interval"] = filter.interval - final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql) + + # If we have a smoothing interval > 1 then add in the sql to + # handling rolling average. Else just do a sum. This is possibly an + # nessacary optimization. + if filter.smoothing_intervals > 1: + smoothing_operation = f""" + AVG(SUM(total)) + OVER ( + ORDER BY day_start + ROWS BETWEEN {filter.smoothing_intervals - 1} PRECEDING + AND CURRENT ROW + )""" + else: + smoothing_operation = "SUM(total)" + + final_query = AGGREGATE_SQL.format( + null_sql=null_sql, + content_sql=content_sql, + smoothing_operation=smoothing_operation, + aggregate="count" if filter.smoothing_intervals < 2 else "floor(count)", + ) return final_query, params, self._parse_total_volume_result(filter, entity, team.id) def _parse_total_volume_result(self, filter: Filter, entity: Entity, team_id: int) -> Callable: diff --git a/ee/clickhouse/sql/trends/volume.py b/ee/clickhouse/sql/trends/volume.py index b08b03dd80841..82e522b4963ee 100644 --- a/ee/clickhouse/sql/trends/volume.py +++ b/ee/clickhouse/sql/trends/volume.py @@ -20,8 +20,16 @@ """ AGGREGATE_SQL = """ -SELECT groupArray(day_start) as date, groupArray(count) as data FROM ( - SELECT SUM(total) AS count, day_start from ({null_sql} UNION ALL {content_sql}) group by day_start order by day_start +SELECT groupArray(day_start) as date, groupArray({aggregate}) as data FROM ( + SELECT {smoothing_operation} AS count, day_start + from ( + {null_sql} + UNION ALL + {content_sql} + ) + group by day_start + order by day_start + SETTINGS allow_experimental_window_functions = 1 ) SETTINGS timeout_before_checking_execution_speed = 60 """ diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr index b98b7b27293e9..40563ea20c010 100644 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr +++ b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr @@ -76,7 +76,7 @@ AND timestamp <= '2012-01-15 23:59:59' ) GROUP BY toStartOfDay(timestamp)) group by day_start - order by day_start) SETTINGS timeout_before_checking_execution_speed = 60 + order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60 ' --- # name: ClickhouseTestTrends.test_insight_trends_basic.1 @@ -138,7 +138,7 @@ AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) ) GROUP BY toStartOfDay(timestamp)) group by day_start - order by day_start) SETTINGS timeout_before_checking_execution_speed = 60 + order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60 ' --- # name: ClickhouseTestTrends.test_insight_trends_clean_arg.1 @@ -200,7 +200,7 @@ AND timestamp <= '2012-01-15 23:59:59' ) GROUP BY toStartOfDay(timestamp)) group by day_start - order by day_start) SETTINGS timeout_before_checking_execution_speed = 60 + order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60 ' --- # name: ClickhouseTestTrends.test_insight_trends_cumulative.1 @@ -272,7 +272,7 @@ GROUP BY person_id) GROUP BY toStartOfDay(timestamp)) group by day_start - order by day_start) SETTINGS timeout_before_checking_execution_speed = 60 + order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60 ' --- # name: ClickhouseTestTrends.test_insight_trends_cumulative.3 @@ -547,7 +547,7 @@ AND (NOT has([''], "$group_0")) ) GROUP BY toStartOfDay(timestamp)) group by day_start - order by day_start) SETTINGS timeout_before_checking_execution_speed = 60 + order by day_start SETTINGS allow_experimental_window_functions = 1) SETTINGS timeout_before_checking_execution_speed = 60 ' --- # name: ClickhouseTestTrendsGroups.test_aggregating_by_group.1 diff --git a/ee/clickhouse/views/test/test_clickhouse_stickiness.py b/ee/clickhouse/views/test/test_clickhouse_stickiness.py index d5a7a83d638aa..7f2f33c3ec249 100644 --- a/ee/clickhouse/views/test/test_clickhouse_stickiness.py +++ b/ee/clickhouse/views/test/test_clickhouse_stickiness.py @@ -1,6 +1,7 @@ from datetime import timedelta from uuid import uuid4 +from django.test.client import Client from freezegun.api import freeze_time from ee.clickhouse.models.event import create_event @@ -8,7 +9,6 @@ from ee.clickhouse.queries.stickiness.clickhouse_stickiness import ClickhouseStickiness from ee.clickhouse.util import ClickhouseTestMixin, snapshot_clickhouse_queries from posthog.api.test.test_stickiness import get_stickiness_time_series_ok, stickiness_test_factory -from posthog.api.test.test_trends import get_people_from_url_ok from posthog.models.action import Action from posthog.models.action_step import ActionStep from posthog.models.person import Person @@ -34,6 +34,12 @@ def _create_person(**kwargs): return Person(id=person.uuid) +def get_people_from_url_ok(client: Client, url: str): + response = client.get("/" + url) + assert response.status_code == 200, response.content + return response.json()["results"][0]["people"] + + class TestClickhouseStickiness(ClickhouseTestMixin, stickiness_test_factory(ClickhouseStickiness, _create_event, _create_person, _create_action, get_earliest_timestamp)): # type: ignore @snapshot_clickhouse_queries def test_filter_by_group_properties(self): diff --git a/ee/clickhouse/views/test/test_clickhouse_trends.py b/ee/clickhouse/views/test/test_clickhouse_trends.py index 1dccb0f3f82a3..e46db4d48dd1a 100644 --- a/ee/clickhouse/views/test/test_clickhouse_trends.py +++ b/ee/clickhouse/views/test/test_clickhouse_trends.py @@ -1,22 +1,494 @@ +import json +from dataclasses import dataclass, field from datetime import datetime +from typing import Any, Dict, List, Optional, Union +from unittest.mock import ANY -from freezegun.api import freeze_time +import pytest +from django.core.cache import cache +from django.test import Client +from freezegun import freeze_time from ee.api.test.base import LicensedTestMixin from ee.clickhouse.models.group import create_group -from ee.clickhouse.test.test_journeys import journeys_for +from ee.clickhouse.test.test_journeys import journeys_for, update_or_create_person from ee.clickhouse.util import ClickhouseTestMixin, snapshot_clickhouse_queries -from posthog.api.test.test_trends import ( - TrendsRequest, - TrendsRequestBreakdown, - get_people_from_url_ok, - get_trends_aggregate_ok, - get_trends_time_series_ok, -) +from posthog.api.test.test_cohort import create_cohort_ok +from posthog.api.test.test_event_definition import create_organization, create_team, create_user from posthog.models.group_type_mapping import GroupTypeMapping +from posthog.models.team import Team from posthog.test.base import APIBaseTest, test_with_materialized_columns +@pytest.mark.django_db +@pytest.mark.ee +def test_includes_only_intervals_within_range(client: Client): + """ + This is the case highlighted by https://github.com/PostHog/posthog/issues/2675 + + Here the issue is that we request, for instance, 14 days as the + date_from, display at weekly intervals but previously we + were displaying 4 ticks on the date axis. If we were exactly on the + beginning of the week for two weeks then we'd want 2 ticks. + Otherwise we would have 3 ticks as the range would be intersecting + with three weeks. We should never need to display 4 ticks. + """ + organization = create_organization(name="test org") + team = create_team(organization=organization) + user = create_user("user", "pass", organization) + + client.force_login(user) + cache.clear() + + #  I'm creating a cohort here so that I can use as a breakdown, just because + #  this is what was used demonstrated in + #  https://github.com/PostHog/posthog/issues/2675 but it might not be the + #  simplest way to reproduce + + # "2021-09-19" is a sunday, i.e. beginning of week + with freeze_time("2021-09-20T16:00:00"): + #  First identify as a member of the cohort + distinct_id = "abc" + update_or_create_person(distinct_ids=[distinct_id], team_id=team.id, properties={"cohort_identifier": 1}) + cohort = create_cohort_ok( + client=client, team_id=team.id, name="test cohort", groups=[{"properties": {"cohort_identifier": 1}}] + ) + + journeys_for( + events_by_person={ + distinct_id: [ + {"event": "$pageview", "timestamp": "2021-09-04"}, + {"event": "$pageview", "timestamp": "2021-09-05"}, + {"event": "$pageview", "timestamp": "2021-09-12"}, + {"event": "$pageview", "timestamp": "2021-09-19"}, + ] + }, + team=team, + ) + + trends = get_trends_ok( + client, + team=team, + request=TrendsRequestBreakdown( + date_from="-14days", + date_to="2021-09-21", + interval="week", + insight="TRENDS", + breakdown=json.dumps([cohort["id"]]), + breakdown_type="cohort", + display="ActionsLineGraph", + events=[ + { + "id": "$pageview", + "math": "dau", + "name": "$pageview", + "custom_name": None, + "type": "events", + "order": 0, + "properties": [], + "math_property": None, + } + ], + ), + ) + assert trends == { + "is_cached": False, + "last_refresh": "2021-09-20T16:00:00Z", + "next": None, + "result": [ + { + "action": ANY, + "breakdown_value": cohort["id"], + "label": "$pageview - test cohort", + "count": 3.0, + "data": [1.0, 1.0, 1.0], + # Prior to the fix this would also include '29-Aug-2021' + "labels": ["5-Sep-2021", "12-Sep-2021", "19-Sep-2021"], + "days": ["2021-09-05", "2021-09-12", "2021-09-19"], + "persons_urls": ANY, + "filter": ANY, + } + ], + } + + +@pytest.mark.django_db +@pytest.mark.ee +def test_can_specify_number_of_smoothing_intervals(client: Client): + """ + The Smoothing feature should allow specifying a number of intervals over + which we will provide smoothing of the aggregated trend data. + """ + organization = create_organization(name="test org") + team = create_team(organization=organization) + user = create_user("user", "pass", organization) + + client.force_login(user) + + with freeze_time("2021-09-20T16:00:00"): + journeys_for( + events_by_person={ + "abc": [ + {"event": "$pageview", "timestamp": "2021-09-01"}, + {"event": "$pageview", "timestamp": "2021-09-01"}, + {"event": "$pageview", "timestamp": "2021-09-02"}, + {"event": "$pageview", "timestamp": "2021-09-03"}, + {"event": "$pageview", "timestamp": "2021-09-03"}, + {"event": "$pageview", "timestamp": "2021-09-03"}, + ] + }, + team=team, + ) + + interval_3_trend = get_trends_ok( + client, + team=team, + request=TrendsRequest( + date_from="2021-09-01", + date_to="2021-09-03", + interval="day", + insight="TRENDS", + display="ActionsLineGraph", + smoothing_intervals=3, + events=[ + { + "id": "$pageview", + "name": "$pageview", + "custom_name": None, + "type": "events", + "order": 0, + "properties": [], + } + ], + ), + ) + + assert interval_3_trend == { + "is_cached": False, + "last_refresh": "2021-09-20T16:00:00Z", + "next": None, + "result": [ + { + "action": ANY, + "label": "$pageview", + "count": 5, + "data": [2.0, 1, 2.0], + "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], + "days": ["2021-09-01", "2021-09-02", "2021-09-03"], + "persons_urls": ANY, + "filter": ANY, + } + ], + } + + interval_2_trend = get_trends_ok( + client, + team=team, + request=TrendsRequest( + date_from="2021-09-01", + date_to="2021-09-03", + interval="day", + insight="TRENDS", + display="ActionsLineGraph", + smoothing_intervals=2, + events=[ + { + "id": "$pageview", + "name": "$pageview", + "custom_name": None, + "type": "events", + "order": 0, + "properties": [], + } + ], + ), + ) + + assert interval_2_trend == { + "is_cached": False, + "last_refresh": "2021-09-20T16:00:00Z", + "next": None, + "result": [ + { + "action": ANY, + "label": "$pageview", + "count": 5, + "data": [2.0, 1, 2.0], + "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], + "days": ["2021-09-01", "2021-09-02", "2021-09-03"], + "persons_urls": ANY, + "filter": ANY, + } + ], + } + + interval_1_trend = get_trends_ok( + client, + team=team, + request=TrendsRequest( + date_from="2021-09-01", + date_to="2021-09-03", + interval="day", + insight="TRENDS", + display="ActionsLineGraph", + smoothing_intervals=1, + events=[ + { + "id": "$pageview", + "name": "$pageview", + "custom_name": None, + "type": "events", + "order": 0, + "properties": [], + } + ], + ), + ) + + assert interval_1_trend == { + "is_cached": False, + "last_refresh": "2021-09-20T16:00:00Z", + "next": None, + "result": [ + { + "action": { + "id": "$pageview", + "type": "events", + "order": 0, + "name": "$pageview", + "custom_name": None, + "math": None, + "math_property": None, + "math_group_type_index": ANY, + "properties": {}, + }, + "label": "$pageview", + "count": 6.0, + "data": [2.0, 1.0, 3.0], + "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], + "days": ["2021-09-01", "2021-09-02", "2021-09-03"], + "persons_urls": ANY, + "filter": ANY, + } + ], + } + + +@pytest.mark.django_db +@pytest.mark.ee +def test_smoothing_intervals_copes_with_null_values(client: Client): + """ + The Smoothing feature should allow specifying a number of intervals over + which we will provide smoothing of the aggregated trend data. + """ + organization = create_organization(name="test org") + team = create_team(organization=organization) + user = create_user("user", "pass", organization) + + client.force_login(user) + cache.clear() + + with freeze_time("2021-09-20T16:00:00"): + journeys_for( + events_by_person={ + "abc": [ + {"event": "$pageview", "timestamp": "2021-09-01"}, + {"event": "$pageview", "timestamp": "2021-09-01"}, + {"event": "$pageview", "timestamp": "2021-09-01"}, + # No events on 2 Sept + {"event": "$pageview", "timestamp": "2021-09-03"}, + {"event": "$pageview", "timestamp": "2021-09-03"}, + {"event": "$pageview", "timestamp": "2021-09-03"}, + ] + }, + team=team, + ) + + interval_3_trend = get_trends_ok( + client, + team=team, + request=TrendsRequest( + date_from="2021-09-01", + date_to="2021-09-03", + interval="day", + insight="TRENDS", + display="ActionsLineGraph", + smoothing_intervals=3, + events=[ + { + "id": "$pageview", + "name": "$pageview", + "custom_name": None, + "type": "events", + "order": 0, + "properties": [], + } + ], + ), + ) + + assert interval_3_trend == { + "is_cached": False, + "last_refresh": "2021-09-20T16:00:00Z", + "next": None, + "result": [ + { + "action": ANY, + "label": "$pageview", + "count": 6.0, + "data": [3.0, 1.0, 2.0], + "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], + "days": ["2021-09-01", "2021-09-02", "2021-09-03"], + "persons_urls": ANY, + "filter": ANY, + } + ], + } + + interval_1_trend = get_trends_ok( + client, + team=team, + request=TrendsRequest( + date_from="2021-09-01", + date_to="2021-09-03", + interval="day", + insight="TRENDS", + display="ActionsLineGraph", + smoothing_intervals=1, + events=[ + { + "id": "$pageview", + "name": "$pageview", + "custom_name": None, + "type": "events", + "order": 0, + "properties": [], + } + ], + ), + ) + + assert interval_1_trend == { + "is_cached": False, + "last_refresh": "2021-09-20T16:00:00Z", + "next": None, + "result": [ + { + "action": ANY, + "label": "$pageview", + "count": 6.0, + "data": [3.0, 0.0, 3.0], + "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], + "days": ["2021-09-01", "2021-09-02", "2021-09-03"], + "persons_urls": ANY, + "filter": ANY, + } + ], + } + + +@dataclass +class TrendsRequest: + date_from: Optional[str] = None + date_to: Optional[str] = None + interval: Optional[str] = None + insight: Optional[str] = None + display: Optional[str] = None + compare: Optional[bool] = None + events: List[Dict[str, Any]] = field(default_factory=list) + properties: List[Dict[str, Any]] = field(default_factory=list) + smoothing_intervals: Optional[int] = 1 + + +@dataclass +class TrendsRequestBreakdown(TrendsRequest): + breakdown: Optional[Union[List[int], str]] = None + breakdown_type: Optional[str] = None + + +def get_trends(client, request: Union[TrendsRequestBreakdown, TrendsRequest], team: Team): + data: Dict[str, Any] = { + "date_from": request.date_from, + "date_to": request.date_to, + "interval": request.interval, + "insight": request.insight, + "display": request.display, + "compare": request.compare, + "events": json.dumps(request.events), + "properties": json.dumps(request.properties), + "smoothing_intervals": request.smoothing_intervals, + } + + if isinstance(request, TrendsRequestBreakdown): + data["breakdown"] = request.breakdown + data["breakdown_type"] = request.breakdown_type + + filtered_data = {k: v for k, v in data.items() if v is not None} + + return client.get(f"/api/projects/{team.id}/insights/trend/", data=filtered_data,) + + +def get_trends_ok(client: Client, request: TrendsRequest, team: Team): + response = get_trends(client=client, request=request, team=team) + assert response.status_code == 200, response.content + return response.json() + + +@dataclass +class NormalizedTrendResult: + value: float + label: str + person_url: str + breakdown_value: Optional[Union[str, int]] + + +def get_trends_time_series_ok( + client: Client, request: TrendsRequest, team: Team +) -> Dict[str, Dict[str, NormalizedTrendResult]]: + data = get_trends_ok(client=client, request=request, team=team) + res = {} + for item in data["result"]: + collect_dates = {} + for idx, date in enumerate(item["days"]): + collect_dates[date] = NormalizedTrendResult( + value=item["data"][idx], + label=item["labels"][idx], + person_url=item["persons_urls"][idx]["url"], + breakdown_value=item.get("breakdown_value", None), + ) + res[ + "{}{}".format(item["label"], " - {}".format(item["compare_label"]) if item.get("compare_label") else "") + ] = collect_dates + + return res + + +def get_trends_aggregate_ok(client: Client, request: TrendsRequest, team: Team) -> Dict[str, NormalizedTrendResult]: + data = get_trends_ok(client=client, request=request, team=team) + res = {} + for item in data["result"]: + res[item["label"]] = NormalizedTrendResult( + value=item["aggregated_value"], + label=item["action"]["name"], + person_url=item["persons"]["url"], + breakdown_value=item.get("breakdown_value", None), + ) + + return res + + +def get_trends_people_ok(client: Client, url: str): + response = client.get("/" + url) + assert response.status_code == 200, response.content + return response.json()["results"][0]["people"] + + +def get_people_from_url_ok(client: Client, url: str): + response = client.get("/" + url) + assert response.status_code == 200, response.content + return response.json()["results"][0]["people"] + + class ClickhouseTestTrends(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest): maxDiff = None CLASS_DATA_LEVEL_SETUP = False diff --git a/frontend/src/lib/components/SmoothingFilter/SmoothingFilter.tsx b/frontend/src/lib/components/SmoothingFilter/SmoothingFilter.tsx new file mode 100644 index 0000000000000..7a1f7d1cefec0 --- /dev/null +++ b/frontend/src/lib/components/SmoothingFilter/SmoothingFilter.tsx @@ -0,0 +1,45 @@ +import React from 'react' +import { Select } from 'antd' +import { FundOutlined } from '@ant-design/icons' +import { smoothingOptions } from './smoothings' +import { useActions, useValues } from 'kea' +import { insightLogic } from 'scenes/insights/insightLogic' + +export function SmoothingFilter(): JSX.Element | null { + const { filters } = useValues(insightLogic) + const { setFilters } = useActions(insightLogic) + const { interval, smoothing_intervals } = filters + + if (!interval) { + return null + } + + // Put a little icon next to the selected item + const options = smoothingOptions[interval].map(({ value, label }) => ({ + value, + label: + value === smoothing_intervals ? ( + <> + {label} + + ) : ( + label + ), + })) + + return options.length ? ( +