diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index 5746fa1d8a15e..7c746ecf2e73a 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -328,14 +328,14 @@ def create_hogql_database( ) cast(LazyJoin, raw_replay_events.fields["events"]).join_table = events - with timings.measure("initial_domain_type"): - database.persons.fields["$virt_initial_referring_domain_type"] = create_initial_domain_type( - "$virt_initial_referring_domain_type" - ) - with timings.measure("initial_channel_type"): - database.persons.fields["$virt_initial_channel_type"] = create_initial_channel_type( - "$virt_initial_channel_type", modifiers.customChannelTypeRules - ) + with timings.measure("initial_domain_type"): + database.persons.fields["$virt_initial_referring_domain_type"] = create_initial_domain_type( + "$virt_initial_referring_domain_type", timings=timings + ) + with timings.measure("initial_channel_type"): + database.persons.fields["$virt_initial_channel_type"] = create_initial_channel_type( + "$virt_initial_channel_type", modifiers.customChannelTypeRules, timings=timings + ) with timings.measure("group_type_mapping"): for mapping in GroupTypeMapping.objects.filter(project_id=team.project_id): diff --git a/posthog/hogql/database/schema/channel_type.py b/posthog/hogql/database/schema/channel_type.py index 56c8a753875ba..8fe13ee425fb9 100644 --- a/posthog/hogql/database/schema/channel_type.py +++ b/posthog/hogql/database/schema/channel_type.py @@ -1,9 +1,13 @@ from dataclasses import dataclass +from functools import cache from typing import Optional, Union + from posthog.hogql import ast from posthog.hogql.database.models import ExpressionField from posthog.hogql.parser import parse_expr +from posthog.hogql.placeholders import replace_placeholders +from posthog.hogql.timings import HogQLTimings from posthog.schema import ( CustomChannelRule, CustomChannelOperator, @@ -41,17 +45,17 @@ class ChannelTypeExprs: gad_source: ast.Expr -def create_initial_domain_type(name: str): +def create_initial_domain_type(name: str, timings: Optional[HogQLTimings] = None): + if timings is None: + timings = HogQLTimings() + + with timings.measure("initial_domain_type_expr"): + expr = _initial_domain_type_expr() + return ExpressionField( name=name, - expr=parse_expr( - """ -if( - {referring_domain} = '$direct', - '$direct', - hogql_lookupDomainType({referring_domain}) -) -""", + expr=replace_placeholders( + expr, { "referring_domain": ast.Call( name="toString", args=[ast.Field(chain=["properties", "$initial_referring_domain"])] @@ -61,7 +65,22 @@ def create_initial_domain_type(name: str): ) -def create_initial_channel_type(name: str, custom_rules: Optional[list[CustomChannelRule]] = None): +@cache +def _initial_domain_type_expr(): + return parse_expr( + """ +if( + {referring_domain} = '$direct', + '$direct', + hogql_lookupDomainType({referring_domain}) +) +""" + ) + + +def create_initial_channel_type( + name: str, custom_rules: Optional[list[CustomChannelRule]] = None, timings: Optional[HogQLTimings] = None +): return ExpressionField( name=name, expr=create_channel_type_expr( @@ -89,6 +108,7 @@ def create_initial_channel_type(name: str, custom_rules: Optional[list[CustomCha gad_source=ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_gad_source"])]), ), custom_rules=custom_rules, + timings=timings, ), ) @@ -203,109 +223,124 @@ def custom_rule_to_expr(custom_rule: CustomChannelRule, source_exprs: ChannelTyp def create_channel_type_expr( - custom_rules: Optional[list[CustomChannelRule]], source_exprs: ChannelTypeExprs + custom_rules: Optional[list[CustomChannelRule]], + source_exprs: ChannelTypeExprs, + timings: Optional[HogQLTimings] = None, ) -> ast.Expr: - custom_rule_expr: Optional[ast.Expr] = None - if custom_rules: - if_args = [] - for rule in custom_rules: - if_args.append(custom_rule_to_expr(rule, source_exprs)) - if_args.append(ast.Constant(value=rule.channel_type)) - if_args.append(ast.Constant(value=None)) - custom_rule_expr = ast.Call(name="multiIf", args=if_args) + if timings is None: + timings = HogQLTimings() + + with timings.measure("custom_channel_rules"): + custom_rule_expr: Optional[ast.Expr] = None + if custom_rules: + if_args = [] + for rule in custom_rules: + if_args.append(custom_rule_to_expr(rule, source_exprs)) + if_args.append(ast.Constant(value=rule.channel_type)) + if_args.append(ast.Constant(value=None)) + custom_rule_expr = ast.Call(name="multiIf", args=if_args) + + with timings.measure("default_channel_rules_parse"): + builtin_rules_expr = _initial_default_channel_rules_expr() + with timings.measure("default_channel_rules_replace"): + builtin_rules = replace_placeholders( + builtin_rules_expr, + placeholders={ + "campaign": wrap_with_lower(wrap_with_null_if_empty(source_exprs.campaign)), + "medium": wrap_with_lower(wrap_with_null_if_empty(source_exprs.medium)), + "source": wrap_with_lower(wrap_with_null_if_empty(source_exprs.source)), + "referring_domain": source_exprs.referring_domain, + "has_gclid": source_exprs.has_gclid, + "has_fbclid": source_exprs.has_fbclid, + "gad_source": wrap_with_null_if_empty(source_exprs.gad_source), + }, + ) + if custom_rule_expr: + return ast.Call( + name="coalesce", + args=[custom_rule_expr, builtin_rules], + ) + else: + return builtin_rules + +@cache +def _initial_default_channel_rules_expr(): # This logic is referenced in our docs https://posthog.com/docs/data/channel-type, be sure to update both if you # update either. - builtin_rules = parse_expr( + return parse_expr( """ -multiIf( - match({campaign}, 'cross-network'), - 'Cross Network', - - ( - {medium} IN ('cpc', 'cpm', 'cpv', 'cpa', 'ppc', 'retargeting') OR - startsWith({medium}, 'paid') OR - {has_gclid} OR - {gad_source} IS NOT NULL - ), - coalesce( - hogql_lookupPaidSourceType({source}), - if( - match({campaign}, '^(.*(([^a-df-z]|^)shop|shopping).*)$'), - 'Paid Shopping', - NULL - ), - hogql_lookupPaidMediumType({medium}), - hogql_lookupPaidSourceType({referring_domain}), - multiIf ( - {gad_source} = '1', - 'Paid Search', + multiIf( + match({campaign}, 'cross-network'), + 'Cross Network', + + ( + {medium} IN ('cpc', 'cpm', 'cpv', 'cpa', 'ppc', 'retargeting') OR + startsWith({medium}, 'paid') OR + {has_gclid} OR + {gad_source} IS NOT NULL + ), + coalesce( + hogql_lookupPaidSourceType({source}), + if( + match({campaign}, '^(.*(([^a-df-z]|^)shop|shopping).*)$'), + 'Paid Shopping', + NULL + ), + hogql_lookupPaidMediumType({medium}), + hogql_lookupPaidSourceType({referring_domain}), + multiIf ( + {gad_source} = '1', + 'Paid Search', - match({campaign}, '^(.*video.*)$'), - 'Paid Video', + match({campaign}, '^(.*video.*)$'), + 'Paid Video', - {has_fbclid}, - 'Paid Social', + {has_fbclid}, + 'Paid Social', - 'Paid Unknown' - ) - ), - - ( - {referring_domain} = '$direct' - AND ({medium} IS NULL) - AND ({source} IS NULL OR {source} IN ('(direct)', 'direct', '$direct')) - AND NOT {has_fbclid} - ), - 'Direct', - - coalesce( - hogql_lookupOrganicSourceType({source}), - if( - match({campaign}, '^(.*(([^a-df-z]|^)shop|shopping).*)$'), - 'Organic Shopping', - NULL - ), - hogql_lookupOrganicMediumType({medium}), - hogql_lookupOrganicSourceType({referring_domain}), - multiIf( - match({campaign}, '^(.*video.*)$'), - 'Organic Video', + 'Paid Unknown' + ) + ), - match({medium}, 'push$'), - 'Push', + ( + {referring_domain} = '$direct' + AND ({medium} IS NULL) + AND ({source} IS NULL OR {source} IN ('(direct)', 'direct', '$direct')) + AND NOT {has_fbclid} + ), + 'Direct', - {has_fbclid}, - 'Organic Social', + coalesce( + hogql_lookupOrganicSourceType({source}), + if( + match({campaign}, '^(.*(([^a-df-z]|^)shop|shopping).*)$'), + 'Organic Shopping', + NULL + ), + hogql_lookupOrganicMediumType({medium}), + hogql_lookupOrganicSourceType({referring_domain}), + multiIf( + match({campaign}, '^(.*video.*)$'), + 'Organic Video', - {referring_domain} == '$direct', - 'Direct', + match({medium}, 'push$'), + 'Push', - {referring_domain} IS NOT NULL, - 'Referral', + {has_fbclid}, + 'Organic Social', - 'Unknown' - ) - ) -)""", - start=None, - placeholders={ - "campaign": wrap_with_lower(wrap_with_null_if_empty(source_exprs.campaign)), - "medium": wrap_with_lower(wrap_with_null_if_empty(source_exprs.medium)), - "source": wrap_with_lower(wrap_with_null_if_empty(source_exprs.source)), - "referring_domain": source_exprs.referring_domain, - "has_gclid": source_exprs.has_gclid, - "has_fbclid": source_exprs.has_fbclid, - "gad_source": wrap_with_null_if_empty(source_exprs.gad_source), - }, + {referring_domain} == '$direct', + 'Direct', + + {referring_domain} IS NOT NULL, + 'Referral', + + 'Unknown' + ) + ) + )""" ) - if custom_rule_expr: - return ast.Call( - name="coalesce", - args=[custom_rule_expr, builtin_rules], - ) - else: - return builtin_rules def wrap_with_null_if_empty(expr: ast.Expr) -> ast.Expr: diff --git a/posthog/hogql/database/schema/sessions_v1.py b/posthog/hogql/database/schema/sessions_v1.py index 425f43bf4c305..c6f381be4b940 100644 --- a/posthog/hogql/database/schema/sessions_v1.py +++ b/posthog/hogql/database/schema/sessions_v1.py @@ -282,6 +282,7 @@ def arg_max_merge_field(field_name: str) -> ast.Call: ), gad_source=aggregate_fields["$entry_gad_source"], ), + timings=context.timings, ) # aliases for people reverting from v2 to v1 diff --git a/posthog/hogql/database/schema/sessions_v2.py b/posthog/hogql/database/schema/sessions_v2.py index 6a418834accb2..36b97be216830 100644 --- a/posthog/hogql/database/schema/sessions_v2.py +++ b/posthog/hogql/database/schema/sessions_v2.py @@ -356,6 +356,7 @@ def arg_max_merge_field(field_name: str) -> ast.Call: ), gad_source=aggregate_fields["$entry_gad_source"], ), + timings=context.timings, ) # some aliases for people upgrading from v1 to v2 aggregate_fields["$exit_current_url"] = aggregate_fields["$end_current_url"]