From 16f830087c60572ecdc61c767e44ec012621d5a1 Mon Sep 17 00:00:00 2001 From: dgitis Date: Mon, 18 Nov 2024 16:13:13 -0800 Subject: [PATCH 1/4] single-site user tables --- README.md | 142 +++++++++++++----- macros/base_select.sql | 38 ++++- .../base/base_ga4__pseudonymous_users.sql | 29 ++++ .../base/base_ga4__pseudonymous_users.yml | 85 +++++++++++ models/staging/base/base_ga4__users.sql | 29 ++++ models/staging/base/base_ga4__users.yml | 83 ++++++++++ models/staging/src_ga4.yml | 8 +- models/staging/stg_ga4__client_keys.sql | 23 +++ models/staging/stg_ga4__client_keys.yml | 15 ++ models/staging/stg_ga4__users.sql | 21 +++ models/staging/stg_ga4__users.yml | 9 ++ 11 files changed, 439 insertions(+), 43 deletions(-) create mode 100644 models/staging/base/base_ga4__pseudonymous_users.sql create mode 100644 models/staging/base/base_ga4__pseudonymous_users.yml create mode 100644 models/staging/base/base_ga4__users.sql create mode 100644 models/staging/base/base_ga4__users.yml create mode 100644 models/staging/stg_ga4__client_keys.sql create mode 100644 models/staging/stg_ga4__client_keys.yml create mode 100644 models/staging/stg_ga4__users.sql create mode 100644 models/staging/stg_ga4__users.yml diff --git a/README.md b/README.md index 16ecbbda..016127a5 100644 --- a/README.md +++ b/README.md @@ -173,47 +173,6 @@ vars: value_type: "int_value" ``` -### User Properties - -User properties are provided by GA4 in the `user_properties` repeated field. The most recent user property for each user will be extracted and included in the `dim_ga4__users` model by configuring the `user_properties` variable in your project as follows: - -``` -vars: - ga4: - user_properties: - - user_property_name: "membership_level" - value_type: "int_value" - - user_property_name: "account_status" - value_type: "string_value" -``` - -### Derived User Properties - -Derived user properties are different from "User Properties" in that they are derived from event parameters. This provides additional flexibility in allowing users to turn any event parameter into a user property. - -Derived User Properties are included in the `dim_ga4__users` model and contain the latest event parameter value per user. - -``` -derived_user_properties: - - event_parameter: "[your event parameter]" - user_property_name: "[a unique name for the derived user property]" - value_type: "[string_value|int_value|float_value|double_value]" -``` - -For example: - -``` -vars: - ga4: - derived_user_properties: - - event_parameter: "page_location" - user_property_name: "most_recent_page_location" - value_type: "string_value" - - event_parameter: "another_event_param" - user_property_name: "most_recent_param" - value_type: "string_value" -``` - ### Derived Session Properties Derived session properties are similar to derived user properties, but on a per-session basis, for properties that change slowly over time. This provides additional flexibility in allowing users to turn any event parameter into a session property. @@ -290,6 +249,107 @@ vars: - name: "some_other_parameter" value_type: "string_value" ``` + +# User Tables + +This package contains two sets of user tables: an original set of user tables implemented from the inception of this package and a new set of user tables designed to use the GA4 BigQuery user export tables that were released after this package was first launched. + +The original user tables build one-row-per-user tables and include data like first and last device, first and last geo, user properties, and derived user properties. To build them, they need to process all-time data. Large sites might want to consider disabling these tables to save costs. + +The newer user tables leverage the GA4 user export setting. They are partitioned tables so they are more appropriate for high-traffic sites. They lose the first and last columns and derived user properties, but include user properties, audiences, user LTV, and predictive data. + +The GA4 user export tables do not currently support multi-site. There is a multi-site branch that needs testing. If you have a multi-site implementation and wish to use the GA4 user export tables, then please install the [user branch](https://github.com/Velir/dbt-ga4/tree/user) in your development environment, configure the various user-specific settings, run dbt, and report any issues or successes on this [draft PR](https://github.com/Velir/dbt-ga4/pull/317). Reach out on the draft PR if you need help with any of this. + +## Settings Common to Both Sets of User Tables + +### User Properties + +User properties are provided by GA4 in the `user_properties` repeated field. The most recent user property for each user will be extracted and included in the `dim_ga4__users` model by configuring the `user_properties` variable in your project as follows: + +``` +vars: + ga4: + user_properties: + - user_property_name: "membership_level" + value_type: "int_value" + - user_property_name: "account_status" + value_type: "string_value" +``` + +## dbt-GA4 Original User Table Settings + +### Derived User Properties + +Derived user properties are different from "User Properties" in that they are derived from event parameters. This provides additional flexibility in allowing users to turn any event parameter into a user property. + +Derived User Properties are included in the `dim_ga4__users` model and contain the latest event parameter value per user. + +``` +derived_user_properties: + - event_parameter: "[your event parameter]" + user_property_name: "[a unique name for the derived user property]" + value_type: "[string_value|int_value|float_value|double_value]" +``` + +For example: + +``` +vars: + ga4: + derived_user_properties: + - event_parameter: "page_location" + user_property_name: "most_recent_page_location" + value_type: "string_value" + - event_parameter: "another_event_param" + user_property_name: "most_recent_param" + value_type: "string_value" +``` + +## GA4 User Export Settings + +The GA4 user export models are disabled by default. + +Enable them by adding the following model configs: + +``` +models: + ga4: + staging: + base: + base_ga4__pseudonymous_users: + +enabled: true + base_ga4__users: + +enabled: true + stg_ga4__client_keys: + +enabled: true + stg_ga4__users: + +enabled: true +``` + +### Audiences + +The GA4 User Export includes an Audiences repeated record that stores the audience membership details. Audiences are enabled by adding a list of audience names that match values in the `audiences.name` fields of your `psuedonymous_users_` and `users__` tables as shown below. + +``` +vars: + ga4: + audiences: ['Purchases', 'All Users'] +``` + +This example will add the following columns to the relevant dbt-GA4 models: + +- purchases_id +- purchases_name +- purchases_membership_start_timestamp_micros +- purchases_membership_expiry_timestamp_micros +- purchases_npa +- all_users_id +- all_users_name +- all_users_membership_start_timestamp_micros +- all_users_membership_expiry_timestamp_micros +- all_users_npa + + # Connecting to BigQuery This package assumes that BigQuery is the source of your GA4 data. Full instructions for connecting DBT to BigQuery are here: https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile diff --git a/macros/base_select.sql b/macros/base_select.sql index 17663aef..250d8e33 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -176,4 +176,40 @@ WHEN event_name = 'purchase' THEN 1 ELSE 0 END AS is_purchase -{% endmacro %} \ No newline at end of file +{% endmacro %} + + +{% macro base_select_usr_source() %} + {{ return(adapter.dispatch('base_select_usr_source', 'ga4')()) }} +{% endmacro %} + +{% macro default__base_select_usr_source() %} + , user_info.last_active_timestamp_micros as user_info_last_active_timestamp_micros + , user_info.user_first_touch_timestamp_micros as user_info_user_first_touch_timestamp_micros + , user_info.first_purchase_date as user_info_first_purchase_date + , device.operating_system as device_operating_system + , device.category as device_category + , device.mobile_brand_name as device_mobile_brand_name + , device.mobile_model_name as device_mobile_model_name + , device.unified_screen_name as device_unified_sceen_name + , geo.city as geo_city + , geo.country as geo_country + , geo.continent as geo_continent + , geo.region as geo_region + , user_ltv.revenue_in_usd as user_ltv_revenue_in_usd + , user_ltv.sessions as user_ltv_sessions + , user_ltv.engagement_time_millis as user_ltv_engagement_time_millis + , user_ltv.purchases as user_ltv_purchases + , user_ltv.engaged_sessions as user_ltv_engaged_sessions + , user_ltv.session_duration_micros as user_ltv_session_duration_micros + , predictions.in_app_purchase_score_7d as predictions_in_app_purchase_score_7d + , predictions.purchase_score_7d as predictions_purchase_score_7d + , predictions.churn_score_7d as predictions_churn_score_7d + , predictions.revenue_28d_in_usd as predictions_revenue_28d_in_usd + , privacy_info.is_limited_ad_tracking as privacy_info_is_limited_ad_tracking + , privacy_info.is_ads_personalization_allowed as privacy_info_is_ads_personalization_allowed + , parse_date('%Y%m%d' , occurrence_date) as occurrence_date + , parse_date('%Y%m%d' , last_updated_date) as last_updated_date + , user_properties + , audiences +{% endmacro %} diff --git a/models/staging/base/base_ga4__pseudonymous_users.sql b/models/staging/base/base_ga4__pseudonymous_users.sql new file mode 100644 index 00000000..44b424b1 --- /dev/null +++ b/models/staging/base/base_ga4__pseudonymous_users.sql @@ -0,0 +1,29 @@ +{% set partitions_to_replace = ['current_date'] %} +{% for i in range(var('static_incremental_days')) %} + {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} +{% endfor %} +{{ + config( + materialized = 'incremental', + incremental_strategy = 'insert_overwrite', + enabled=false, + partition_by={ + "field": "occurrence_date", + "data_type": "date", + }, + partitions = partitions_to_replace, + ) +}} + +with source as ( + select + pseudo_user_id + , stream_id + {{ ga4.base_select_usr_source() }} + from {{ source('ga4', 'pseudonymous_users') }} + {% if is_incremental() %} + where parse_date('%Y%m%d', left(_table_suffix, 8)) in ({{ partitions_to_replace | join(',') }}) + {% endif %} +) + +select * from source diff --git a/models/staging/base/base_ga4__pseudonymous_users.yml b/models/staging/base/base_ga4__pseudonymous_users.yml new file mode 100644 index 00000000..baa92ff9 --- /dev/null +++ b/models/staging/base/base_ga4__pseudonymous_users.yml @@ -0,0 +1,85 @@ +version: 2 + +models: + - name: base_ga4__pseudonymous_users + description: > + Base pseudo-user (client) model that pulls all fields from the pseudonymous user table of the user export. The pseudonymous user table is keyed on + the user_pseudo_id which is the cid parameter in Gtag calls and is the main parameter in the from which the dbt-GA4 client_id is + created. The table is partitioned by occurence_date. This model also flattens some fields. + columns: + - name: pseudo_user_id + description: > + The user_pseudo_id is a unique identifier for a user that is not tied to any personal information. This is the main identifier + used in the GA4 property. This is the cid parameter in Gtag calls and is the main parameter in the from which the dbt-GA4 client_id is + created. + - name: stream_id + description: The numeric ID of the data stream from which the event originated. + - name: user_info_last_active_timestamp_micros + description: Date of the user's last activity (timestamp in microseconds). Flattened version of user_info.last_active_timestamp_micros. + - name: user_info_user_first_touch_timestamp_micros + description: Date of the user's first_open or first_visit event, whichever is earlier (timestamp in microseconds). Flattened version of user_info.user_first_touch_timestamp_micros. + - name: user_info_first_purchase_date + description: Date of the user's first purchase (YYYYMMDD). Flattened version of user_info.first_purchase_date. + - name: device_operating_system + description: Flattened version of device.operating_system. + - name: device_category + description: Category of the device (mobile, tablet, desktop). Flattened version of device.category. + - name: device_mobile_brand_name + description: Flattened version of device.mobile_brand_name. + - name: device_mobile_model_name + description: Flattened version of device.mobile_model_name. + - name: device_unified_sceen_name + description: Flattened version of device.unified_screen_name. + - name: geo_city + description: Flattened version of geo.city. + - name: geo_country + description: Flattened version of geo.country. + - name: geo_continent + description: Flattened version of geo.continent. + - name: geo_region + description: Flattened version of geo.region. + - name: user_ltv_revenue_in_usd + description: Flattened version of user_ltv.revenue_in_usd. + - name: user_ltv_sessions + description: Flattened version of user_ltv.sessions + - name: user_ltv_engagement_time_millis + description: Flattened version of user_ltv.engagement_time_millis + - name: user_ltv_purchases + description: Flattened version of user_ltv.purchases + - name: user_ltv_engaged_sessions + description: Flattened version of user_ltv.engaged_sessions + - name: user_ltv_session_duration_micros + description: Flattened version of user_ltv.session_duration_micros + - name: predictions_in_app_purchase_score_7d + description: > + Probability that a user who was active in the last 28 days will log an in_app_purchase event within the next 7 days. + Flattened ersion of predictions.in_app_purchase_score_7d. + - name: predictions_purchase_score_7d + description: > + Probability that a user who was active in the last 28 days will log a purchase event within the next 7 days. + Flattened version of predictions.purchase_score_7d. + - name: predictions_churn_score_7d + description: > + Probability that a user who was active on your app or site within the last 7 days will not be active within the next 7 days. + Flattened version of predictions.churn_score_7d. + - name: predictions_revenue_28d_in_usd + description: > + Revenue expected (in USD) from all purchase events within the next 28 days from a user who was active in the last 28 days. + Flattened version of predictions.revenue_28d_in_usd. + - name: privacy_info_is_limited_ad_tracking + description: > + The device's Limit Ad Tracking setting. Possible values include: 'true', 'false', and '(not set)'. isLimitedAdTracking returns '(not set)' if Google Analytics is not + currently able to return this device's Limit Ad Tracking setting. Flattened version of privacy_info.is_limited_ad_tracking. + - name: privacy_info_is_ads_personalization_allowed + description: > + If a user is eligible for ads personalization, isAdsPersonalizationAllowed returns 'true'. If a user is not eligible for ads personalization, + isAdsPersonalizationAllowed returns 'false'. isAdsPersonalizationAllowed returns '(not set)' if Google Analytics is not currently able to + return whether this user is eligible for ads personalization; users where isAdsPersonalizationAllowed returns '(not set)' may or may not be + eligible for personalized ads. For personalized ads, you should treat users where isAdsPersonalizationAllowed = '(not set)' as isAdsPersonalizationAllowed = 'false' + because, in the most general case, some of the '(not set)' rows will include users that are not eligible for ads personalization. Users where + isAdsPersonalizationAllowed = 'false' may still be used for non-advertising use cases like A/B testing & data explorations. Flattened version of + privacy_info.is_ads_personalization_allowed. + - name: occurence_date + description: Date when the record change was triggered. This is the partitioning column. + - name: last_updated_date + desctiption: Date when the record was updated in the table. diff --git a/models/staging/base/base_ga4__users.sql b/models/staging/base/base_ga4__users.sql new file mode 100644 index 00000000..00cda7d9 --- /dev/null +++ b/models/staging/base/base_ga4__users.sql @@ -0,0 +1,29 @@ +{% set partitions_to_replace = ['current_date'] %} +{% for i in range(var('static_incremental_days')) %} + {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} +{% endfor %} +{{ + config( + pre_hook="{{ ga4.combine_property_data() }}" if var('combined_dataset', false) else "", + materialized = 'incremental', + incremental_strategy = 'insert_overwrite', + enabled=false, + partition_by={ + "field": "occurrence_date", + "data_type": "date", + }, + partitions = partitions_to_replace, + ) +}} + +with source as ( + select + user_id + {{ ga4.base_select_usr_source() }} + from {{ source('ga4', 'users') }} + {% if is_incremental() %} + where parse_date('%Y%m%d', left(_table_suffix, 8)) in ({{ partitions_to_replace | join(',') }}) + {% endif %} +) + +select * from source diff --git a/models/staging/base/base_ga4__users.yml b/models/staging/base/base_ga4__users.yml new file mode 100644 index 00000000..f4d14a29 --- /dev/null +++ b/models/staging/base/base_ga4__users.yml @@ -0,0 +1,83 @@ +version: 2 + +models: + - name: base_ga4__users + description: > + Base user model that pulls all fields from the pseudonymous user table of the user export. The pseudonymous user table is keyed on + the user_pseudo_id which is the cid parameter in Gtag calls and is the main parameter in the from which the dbt-GA4 client_key is + created. The table is partitioned by occurence_date. This model also flattens some fields. + columns: + - name: pseudo_user_id + description: > + The pseudo_user_id is a unique identifier for a user that is not tied to any personal information. This is the main identifier + used in the GA4 property. This is the cid parameter in Gtag calls and is the main parameter from which the dbt-GA4 client_key is + created. This field matches the user_pseudo_id in the events_ tables but you should use the client_key. + - name: user_info_last_active_timestamp_micros + description: Date of the user's last activity (timestamp in microseconds). Flattened version of user_info.last_active_timestamp_micros. + - name: user_info_user_first_touch_timestamp_micros + description: Date of the user's first_open or first_visit event, whichever is earlier (timestamp in microseconds). Flattened version of user_info.user_first_touch_timestamp_micros. + - name: user_info_first_purchase_date + description: Date of the user's first purchase (YYYYMMDD). Flattened version of user_info.first_purchase_date. + - name: device_operating_system + description: Flattened version of device.operating_system. + - name: device_category + description: Category of the device (mobile, tablet, desktop). Flattened version of device.category. + - name: device_mobile_brand_name + description: Flattened version of device.mobile_brand_name. + - name: device_mobile_model_name + description: Flattened version of device.mobile_model_name. + - name: device_unified_sceen_name + description: Flattened version of device.unified_screen_name. + - name: geo_city + description: Flattened version of geo.city. + - name: geo_country + description: Flattened version of geo.country. + - name: geo_continent + description: Flattened version of geo.continent. + - name: geo_region + description: Flattened version of geo.region. + - name: user_ltv_revenue_in_usd + description: Flattened version of user_ltv.revenue_in_usd. + - name: user_ltv_sessions + description: Flattened version of user_ltv.sessions + - name: user_ltv_engagement_time_millis + description: Flattened version of user_ltv.engagement_time_millis + - name: user_ltv_purchases + description: Flattened version of user_ltv.purchases + - name: user_ltv_engaged_sessions + description: Flattened version of user_ltv.engaged_sessions + - name: user_ltv_session_duration_micros + description: Flattened version of user_ltv.session_duration_micros + - name: predictions_in_app_purchase_score_7d + description: > + Probability that a user who was active in the last 28 days will log an in_app_purchase event within the next 7 days. + Flattened ersion of predictions.in_app_purchase_score_7d. + - name: predictions_purchase_score_7d + description: > + Probability that a user who was active in the last 28 days will log a purchase event within the next 7 days. + Flattened version of predictions.purchase_score_7d. + - name: predictions_churn_score_7d + description: > + Probability that a user who was active on your app or site within the last 7 days will not be active within the next 7 days. + Flattened version of predictions.churn_score_7d. + - name: predictions_revenue_28d_in_usd + description: > + Revenue expected (in USD) from all purchase events within the next 28 days from a user who was active in the last 28 days + Flattened version of predictions.revenue_28d_in_usd. + - name: privacy_info_is_limited_ad_tracking + description: > + The device's Limit Ad Tracking setting. Possible values include: 'true', 'false', and '(not set)'. isLimitedAdTracking returns '(not set)' if Google Analytics is not + currently able to return this device's Limit Ad Tracking setting. Flattened version of privacy_info.is_limited_ad_tracking. + - name: privacy_info_is_ads_personalization_allowed + description: > + If a user is eligible for ads personalization, isAdsPersonalizationAllowed returns 'true'. If a user is not eligible for ads personalization, + isAdsPersonalizationAllowed returns 'false'. isAdsPersonalizationAllowed returns '(not set)' if Google Analytics is not currently able to + return whether this user is eligible for ads personalization; users where isAdsPersonalizationAllowed returns '(not set)' may or may not be + eligible for personalized ads. For personalized ads, you should treat users where isAdsPersonalizationAllowed = '(not set)' as isAdsPersonalizationAllowed = 'false' + because, in the most general case, some of the '(not set)' rows will include users that are not eligible for ads personalization. Users where + isAdsPersonalizationAllowed = 'false' can still be used for non-advertising use cases like A/B testing & data explorations. Flattened version of + privacy_info.is_ads_personalization_allowed. + - name: occurence_date + description: Date when the record change was triggered. This is the partitioning column. + - name: last_updated_date + desctiption: Date when the record was updated in the table. diff --git a/models/staging/src_ga4.yml b/models/staging/src_ga4.yml index 29104767..e3eb3059 100644 --- a/models/staging/src_ga4.yml +++ b/models/staging/src_ga4.yml @@ -13,4 +13,10 @@ sources: tables: - name: events identifier: events_* # Scan across all sharded event tables. Use the 'start_date' variable to limit this scan - description: Main events table exported by GA4. Sharded by date. \ No newline at end of file + description: Main events table exported by GA4. Sharded by date. + - name: pseudonymous_users + identifier: pseudonymous_users_* + description: Daily sharded pseudonymous_users (client_id) table exported by GA4 + - name: users + identifier: users_* + description: Daily sharded users (user_id) table exported by GA4 \ No newline at end of file diff --git a/models/staging/stg_ga4__client_keys.sql b/models/staging/stg_ga4__client_keys.sql new file mode 100644 index 00000000..ba9aa5de --- /dev/null +++ b/models/staging/stg_ga4__client_keys.sql @@ -0,0 +1,23 @@ +{{ + config( + materialized='view', + enabled=false + + ) +}} +select + * + , to_base64(md5(concat(pseudo_user_id, stream_id))) as client_key + {% for up in var('user_properties', []) %} + , (select value.string_value from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_string_value + , (select value.set_timestamp_micros from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_set_timestamp_micros + , (select value.user_property_name from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_user_property_name + {% endfor %} + {% for aud in var('audiences', []) %} + , (select id from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_id + , (select name from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_name + , (select membership_start_timestamp_micros from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_membership_start_timestamp_micros + , (select membership_expiry_timestamp_micros from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_membership_expiry_timestamp_micros + , (select npa from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_npa + {% endfor %} +from {{ref('base_ga4__pseudonymous_users')}} diff --git a/models/staging/stg_ga4__client_keys.yml b/models/staging/stg_ga4__client_keys.yml new file mode 100644 index 00000000..32bb12f4 --- /dev/null +++ b/models/staging/stg_ga4__client_keys.yml @@ -0,0 +1,15 @@ +version: 2 + +models: + - name: stg_ga4__client_keys + description: > + Staging model for the base_ga4__pseudonymous_users table which pulls data from the user export. + This model is keyed on client_key which is the key used by the package for joining with client (browser/app). + This model unnests user properties and audience using variables of the same name that match the user property + or audience name and prefixing the unnested fields with those names. + tests: + - unique: + column_name: "(client_key || occurrence_date)" + columns: + - name: client_key + description: Hashed combination of user_pseudo_id and stream_id \ No newline at end of file diff --git a/models/staging/stg_ga4__users.sql b/models/staging/stg_ga4__users.sql new file mode 100644 index 00000000..5a3807c0 --- /dev/null +++ b/models/staging/stg_ga4__users.sql @@ -0,0 +1,21 @@ +{{ + config( + materialized='view', + enabled=false + ) +}} +select + * + {% for up in var('user_properties', []) %} + , (select value.string_value from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_string_value + , (select value.set_timestamp_micros from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_set_timestamp_micros + , (select value.user_property_name from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_user_property_name + {% endfor %} + {% for aud in var('audiences', []) %} + , (select id from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_id + , (select name from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_name + , (select membership_start_timestamp_micros from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_membership_start_timestamp_micros + , (select membership_expiry_timestamp_micros from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_membership_expiry_timestamp_micros + , (select npa from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_npa + {% endfor %} +from {{ref('base_ga4__users')}} diff --git a/models/staging/stg_ga4__users.yml b/models/staging/stg_ga4__users.yml new file mode 100644 index 00000000..841c7e09 --- /dev/null +++ b/models/staging/stg_ga4__users.yml @@ -0,0 +1,9 @@ +version: 2 + +models: + - name: stg_ga4__users + description: > + Staging model for the base_ga4__users table which pulls data from the user export. + This model is keyed on user_id field. Unlike most keys in the package, this key is not hashed. + This model unnests user properties and audience using variables of the same name that match the user property + or audience name and prefixing the unnested fields with those names. From da5a14d4ea11a7282faf886c7527c3d1b077d71e Mon Sep 17 00:00:00 2001 From: dgitis Date: Tue, 24 Dec 2024 14:01:46 -0800 Subject: [PATCH 2/4] Unit test for sharing --- .../base/base_ga4__pseudonymous_users.sql | 2 +- models/staging/base/base_ga4__users.sql | 2 +- models/staging/stg_ga4__client_keys.yml | 20 ++++++++++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/models/staging/base/base_ga4__pseudonymous_users.sql b/models/staging/base/base_ga4__pseudonymous_users.sql index 44b424b1..880b0ed9 100644 --- a/models/staging/base/base_ga4__pseudonymous_users.sql +++ b/models/staging/base/base_ga4__pseudonymous_users.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/staging/base/base_ga4__users.sql b/models/staging/base/base_ga4__users.sql index 00cda7d9..de4b78d5 100644 --- a/models/staging/base/base_ga4__users.sql +++ b/models/staging/base/base_ga4__users.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/staging/stg_ga4__client_keys.yml b/models/staging/stg_ga4__client_keys.yml index 32bb12f4..ffd62ca9 100644 --- a/models/staging/stg_ga4__client_keys.yml +++ b/models/staging/stg_ga4__client_keys.yml @@ -12,4 +12,22 @@ models: column_name: "(client_key || occurrence_date)" columns: - name: client_key - description: Hashed combination of user_pseudo_id and stream_id \ No newline at end of file + description: Hashed combination of user_pseudo_id and stream_id +unit_tests: + - name: test_stg_ga4__client_keys_audiences + description: > + Testing that audiences process properly when configured. + model: stg_ga4__client_keys + given: + - input: ref('base_ga4__pseudonymous_users') + rows: + - audiences: ['struct(111111111 as id, "my_test_audience" as name, 1731573754000000 as membership_start_timestamp_micros, 1731998727000000 + as membership_expiry_timestamp_micros, false as npa)', 'struct(222222222 as id, "my_second_audience" as name, 1731573754000000 as membership_start_timestamp_micros, 1731998727000000 + as membership_expiry_timestamp_micros, true as npa)'] + overrides: + vars: + audiences: ['my_test_audience', 'my_second_audience'] + expect: + rows: + - {audience_my_test_audience_id: 111111111, audience_my_test_audience_name: 'my_test_audience', audience_my_test_audience_membership_start_timestamp_micros: 1731573754000000, audience_my_test_audience_membership_expiry_timestamp_micros: 1731998727000000, audience_my_test_audience_npa: False} + - {audience_my_second_audience_id: 222222222, audience_my_second_audience_name: 'my_second_audience', audience_my_second_audience_membership_start_timestamp_micros: 1731573754000000, audience_my_second_audience_membership_expiry_timestamp_micros: 1731998727000000, audience_my_second_audience_npa: True} \ No newline at end of file From bf3457308790db3342d0dfb169db65d768f9724b Mon Sep 17 00:00:00 2001 From: dgitis Date: Thu, 26 Dec 2024 14:16:41 -0800 Subject: [PATCH 3/4] add tests --- README.md | 25 ++++- models/staging/stg_ga4__client_keys.sql | 6 +- models/staging/stg_ga4__client_keys.yml | 127 ++++++++++++++++++++- models/staging/stg_ga4__users.sql | 6 +- models/staging/stg_ga4__users.yml | 140 ++++++++++++++++++++++++ 5 files changed, 290 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 016127a5..fae65095 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,8 @@ Features include: | stg_ga4__event_* | 1 model per event (ex: page_view, purchase) which flattens event parameters specific to that event | | stg_ga4__event_items | Contains item data associated with e-commerce events (Purchase, add to cart, etc) | | stg_ga4__event_to_query_string_params | Mapping between each event and any query parameters & values that were contained in the event's `page_location` field | +| stg_ga4__users | User ID table built from the GA4 User export table. Flattens user properties and audiences using the `user_properties` and `audiences` variables in your `dbt_project.yml` file. Disabled by default. | +| stg_ga4__client_keys | Clint key table built from the GA4 User export pseudonymous users table. Flattens user properties and audiences using the `user_properties` and `audiences` variables in your `dbt_project.yml` file. Disabled by default. | | stg_ga4__user_properties | Finds the most recent occurance of specified user_properties for each user | | stg_ga4__derived_user_properties | Finds the most recent occurance of specific event_params value and assigns them to a client_key. Derived user properties are specified as variables (see documentation below) | | stg_ga4__derived_session_properties | Finds the most recent occurance of specific event_params or user_properties value and assigns them to a session's session_key. Derived session properties are specified as variables (see documentation below) | @@ -254,7 +256,7 @@ vars: This package contains two sets of user tables: an original set of user tables implemented from the inception of this package and a new set of user tables designed to use the GA4 BigQuery user export tables that were released after this package was first launched. -The original user tables build one-row-per-user tables and include data like first and last device, first and last geo, user properties, and derived user properties. To build them, they need to process all-time data. Large sites might want to consider disabling these tables to save costs. +The original user tables build one-row-per-user tables and include data like first and last device, first and last geo, user properties, and derived user properties. They need to process all-time data to build these tables. Large sites might want to consider disabling these tables to save costs. The newer user tables leverage the GA4 user export setting. They are partitioned tables so they are more appropriate for high-traffic sites. They lose the first and last columns and derived user properties, but include user properties, audiences, user LTV, and predictive data. @@ -262,9 +264,13 @@ The GA4 user export tables do not currently support multi-site. There is a multi ## Settings Common to Both Sets of User Tables +The `user_properties` fields in the `events_*` and `events_intraday_*` tables, and the `users_*` and `pseudonymous_users_*` tables are in different formats. No settings are shared between the two sets of user tables. + +## dbt-GA4 Original User Table Settings + ### User Properties -User properties are provided by GA4 in the `user_properties` repeated field. The most recent user property for each user will be extracted and included in the `dim_ga4__users` model by configuring the `user_properties` variable in your project as follows: +User properties are provided by GA4 in the `user_properties` repeated field at the event-level in the `events_*` and `events_intraday_*` tables. The most recent user property for each user will be extracted and included in the `dim_ga4__users` model by configuring the `user_properties` variable in your project as follows: ``` vars: @@ -276,8 +282,6 @@ vars: value_type: "string_value" ``` -## dbt-GA4 Original User Table Settings - ### Derived User Properties Derived user properties are different from "User Properties" in that they are derived from event parameters. This provides additional flexibility in allowing users to turn any event parameter into a user property. @@ -326,6 +330,19 @@ models: +enabled: true ``` +### User Properties + +The GA4 User Export includes a user properties repeated record that stores the user property details. User properties are enabled by adding a list of user property names that match values in the `user_properties.value.user_property_name` fields of your `pseudonymous_users_` and `users__` tables as shown below. + +``` +vars: + ga4: + user_export_user_properties: ['All Users', 'Purchasers'] +``` + +Unlike the `event_params` and `user_properties` event-level fields, the user-level user properties are keyed off of `user_properties.value.user_property_name` rather than `user_properties.key`. Tshe `user_properties.key` in the user tables is the slot that GA4 uses, `slot_01` for example, rather than the name. As a result, `user_properties.value.user_property_name` in the user tables should be the same as `user_properties.key` in the event tables. + + ### Audiences The GA4 User Export includes an Audiences repeated record that stores the audience membership details. Audiences are enabled by adding a list of audience names that match values in the `audiences.name` fields of your `psuedonymous_users_` and `users__` tables as shown below. diff --git a/models/staging/stg_ga4__client_keys.sql b/models/staging/stg_ga4__client_keys.sql index ba9aa5de..34c43d30 100644 --- a/models/staging/stg_ga4__client_keys.sql +++ b/models/staging/stg_ga4__client_keys.sql @@ -9,9 +9,9 @@ select * , to_base64(md5(concat(pseudo_user_id, stream_id))) as client_key {% for up in var('user_properties', []) %} - , (select value.string_value from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_string_value - , (select value.set_timestamp_micros from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_set_timestamp_micros - , (select value.user_property_name from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_user_property_name + , (select value.string_value from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_string_value + , (select value.set_timestamp_micros from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_set_timestamp_micros + , (select value.user_property_name from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_user_property_name {% endfor %} {% for aud in var('audiences', []) %} , (select id from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_id diff --git a/models/staging/stg_ga4__client_keys.yml b/models/staging/stg_ga4__client_keys.yml index ffd62ca9..1fa0d22a 100644 --- a/models/staging/stg_ga4__client_keys.yml +++ b/models/staging/stg_ga4__client_keys.yml @@ -22,12 +22,131 @@ unit_tests: - input: ref('base_ga4__pseudonymous_users') rows: - audiences: ['struct(111111111 as id, "my_test_audience" as name, 1731573754000000 as membership_start_timestamp_micros, 1731998727000000 - as membership_expiry_timestamp_micros, false as npa)', 'struct(222222222 as id, "my_second_audience" as name, 1731573754000000 as membership_start_timestamp_micros, 1731998727000000 - as membership_expiry_timestamp_micros, true as npa)'] +as membership_expiry_timestamp_micros, false as npa)', 'struct(222222222 as id, "my_second_audience" as name, 1731573754000000 as membership_start_timestamp_micros, 1731998727000000 +as membership_expiry_timestamp_micros, true as npa)'] overrides: vars: audiences: ['my_test_audience', 'my_second_audience'] expect: rows: - - {audience_my_test_audience_id: 111111111, audience_my_test_audience_name: 'my_test_audience', audience_my_test_audience_membership_start_timestamp_micros: 1731573754000000, audience_my_test_audience_membership_expiry_timestamp_micros: 1731998727000000, audience_my_test_audience_npa: False} - - {audience_my_second_audience_id: 222222222, audience_my_second_audience_name: 'my_second_audience', audience_my_second_audience_membership_start_timestamp_micros: 1731573754000000, audience_my_second_audience_membership_expiry_timestamp_micros: 1731998727000000, audience_my_second_audience_npa: True} \ No newline at end of file + - {audience_my_test_audience_id: 111111111, audience_my_test_audience_name: 'my_test_audience', audience_my_test_audience_membership_start_timestamp_micros: 1731573754000000, audience_my_test_audience_membership_expiry_timestamp_micros: 1731998727000000, audience_my_test_audience_npa: False, audience_my_second_audience_id: 222222222, audience_my_second_audience_name: 'my_second_audience', audience_my_second_audience_membership_start_timestamp_micros: 1731573754000000, audience_my_second_audience_membership_expiry_timestamp_micros: 1731998727000000, audience_my_second_audience_npa: True} + - name: test_base_to_stg_ga4__client_keys + description: > + Testing that a given row of base_ga4__psuedonymous_users produces the expected output in stg_ga4__client_keys. + model: stg_ga4__client_keys + given: + - input: ref('base_ga4__pseudonymous_users') + format: sql + rows: | + select + '1664444444.1694444444' as pseudo_user_id + , '1234567890' as stream_id + , 1694444444444444 as user_info_last_active_timestamp_micros + , 1664444444444444 as user_info_user_first_touch_timestamp_micros + , 20241201 as user_info_first_purchase_date + , 'web' as device_operating_system + , 'mobile' as device_category + , 'Samsung' as device_mobile_brand_name + , 'SM-J337V' as device_mobile_model_name + , 'My page title' as device_unified_screen_name + , 'Vancouver' as geo_city + , 'Canada' as geo_country + , 'Americas' as geo_continent + , 'British Columbia' as geo_region + , 200.0 as user_ltv_revenue_in_usd + , 3 as user_ltv_sessions + , 346517 as user_ltv_engagement_time_millis + , 1 as user_ltv_purchases + , 3 as user_ltv_engaged_sessions + , 6582608513 as user_ltv_session_duration_micros + , cast(null as float64) as predictions_in_app_purchase_score_7d + , 0.4 as predictions_purchase_score_7d + , 0.08 as predictions_churn_score_7d + , 321.0 as predictions_revenue_28d_in_usd + , false as privacy_info_is_limited_ad_tracking + , false as privacy_info_is_ads_personalization_allowed + , date('2024-12-10') as occurence_date + , date('2024-12-12') as last_updated_date + , array[ + struct( + 'slot_01' as key + , struct( + 'first_prop_val' as string_value + , 1695183380000000 as set_timestamp_micros + , 'First Prop Name' as user_property_name + ) as value + ) + ] as user_properties + , array[ + struct( + 2366216494 as id + , 'All Users' as name + , 1695183380000000 as membership_start_timestamp_micros + , 1715183380000000 as membership_expiry_timestamp_micros + , false as npa + ) + ] as audiences + overrides: + vars: + user_properties: ['First Prop Name'] + audiences: ['All Users'] + expect: + format: sql + rows: | + select + '1664444444.1694444444' as pseudo_user_id + , '1234567890' as stream_id + , 1694444444444444 as user_info_last_active_timestamp_micros + , 1664444444444444 as user_info_user_first_touch_timestamp_micros + , 20241201 as user_info_first_purchase_date + , 'web' as device_operating_system + , 'mobile' as device_category + , 'Samsung' as device_mobile_brand_name + , 'SM-J337V' as device_mobile_model_name + , 'My page title' as device_unified_screen_name + , 'Vancouver' as geo_city + , 'Canada' as geo_country + , 'Americas' as geo_continent + , 'British Columbia' as geo_region + , 200.0 as user_ltv_revenue_in_usd + , 3 as user_ltv_sessions + , 346517 as user_ltv_engagement_time_millis + , 1 as user_ltv_purchases + , 3 as user_ltv_engaged_sessions + , 6582608513 as user_ltv_session_duration_micros + , cast(null as float64) as predictions_in_app_purchase_score_7d + , 0.4 as predictions_purchase_score_7d + , 0.08 as predictions_churn_score_7d + , 321.0 as predictions_revenue_28d_in_usd + , false as privacy_info_is_limited_ad_tracking + , false as privacy_info_is_ads_personalization_allowed + , date('2024-12-10') as occurence_date + , date('2024-12-12') as last_updated_date + , array[ + struct( + 'slot_01' as key + , struct( + 'first_prop_val' as string_value + , 1695183380000000 as set_timestamp_micros + , 'First Prop Name' as user_property_name + ) as value + ) + ] as user_properties + , array[ + struct( + 2366216494 as id + , 'All Users' as name + , 1695183380000000 as membership_start_timestamp_micros + , 1715183380000000 as membership_expiry_timestamp_micros + , false as npa + ) + ] as audiences + , 'hhcn7XB3QFPLFh3tf5sZzQ==' as client_key + , 'first_prop_val' as first_prop_name_string_value + , 1695183380000000 as first_prop_name_set_timestamp_micros + , 'First Prop Name' as first_prop_name_user_property_name + , 2366216494 as audience_all_users_id + , 'All Users' as audience_all_users_name + , 1695183380000000 as audience_all_users_membership_start_timestamp_micros + , 1715183380000000 as audience_all_users_membership_expiry_timestamp_micros + , false as audience_all_users_npa diff --git a/models/staging/stg_ga4__users.sql b/models/staging/stg_ga4__users.sql index 5a3807c0..61cce82b 100644 --- a/models/staging/stg_ga4__users.sql +++ b/models/staging/stg_ga4__users.sql @@ -7,9 +7,9 @@ select * {% for up in var('user_properties', []) %} - , (select value.string_value from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_string_value - , (select value.set_timestamp_micros from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_set_timestamp_micros - , (select value.user_property_name from unnest(user_properties) where key = '{{up}}') as {{up | lower | replace(" ", "_")}}_user_property_name + , (select value.string_value from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_string_value + , (select value.set_timestamp_micros from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_set_timestamp_micros + , (select value.user_property_name from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_user_property_name {% endfor %} {% for aud in var('audiences', []) %} , (select id from unnest(audiences) where name = '{{aud}}') as audience_{{aud | lower | replace(" ", "_")}}_id diff --git a/models/staging/stg_ga4__users.yml b/models/staging/stg_ga4__users.yml index 841c7e09..fda60fe7 100644 --- a/models/staging/stg_ga4__users.yml +++ b/models/staging/stg_ga4__users.yml @@ -7,3 +7,143 @@ models: This model is keyed on user_id field. Unlike most keys in the package, this key is not hashed. This model unnests user properties and audience using variables of the same name that match the user property or audience name and prefixing the unnested fields with those names. + tests: + - unique: + column_name: "(user_id || occurrence_date)" + columns: + - name: user_id + description: The user_id as passed to Google Analytics using the native user ID feature. +unit_tests: + - name: test_stg_ga4__users_audiences + description: > + Testing that audiences process properly when configured. + model: stg_ga4__users + given: + - input: ref('base_ga4__users') + rows: + - audiences: ['struct(111111111 as id, "my_test_audience" as name, 1731573754000000 as membership_start_timestamp_micros, 1731998727000000 +as membership_expiry_timestamp_micros, false as npa)', 'struct(222222222 as id, "my_second_audience" as name, 1731573754000000 as membership_start_timestamp_micros, 1731998727000000 +as membership_expiry_timestamp_micros, true as npa)'] + overrides: + vars: + audiences: ['my_test_audience', 'my_second_audience'] + expect: + rows: + - {audience_my_test_audience_id: 111111111, audience_my_test_audience_name: 'my_test_audience', audience_my_test_audience_membership_start_timestamp_micros: 1731573754000000, audience_my_test_audience_membership_expiry_timestamp_micros: 1731998727000000, audience_my_test_audience_npa: False, audience_my_second_audience_id: 222222222, audience_my_second_audience_name: 'my_second_audience', audience_my_second_audience_membership_start_timestamp_micros: 1731573754000000, audience_my_second_audience_membership_expiry_timestamp_micros: 1731998727000000, audience_my_second_audience_npa: True} + - name: test_base_to_stg_ga4__users + description: > + Testing that a given row of base_ga4__psuedonymous_users produces the expected output in stg_ga4__users. + model: stg_ga4__users + given: + - input: ref('base_ga4__users') + format: sql + rows: | + select + 'mycustomuserID' as user_id + , 1694444444444444 as user_info_last_active_timestamp_micros + , 1664444444444444 as user_info_user_first_touch_timestamp_micros + , 20241201 as user_info_first_purchase_date + , 'web' as device_operating_system + , 'mobile' as device_category + , 'Samsung' as device_mobile_brand_name + , 'SM-J337V' as device_mobile_model_name + , 'My page title' as device_unified_screen_name + , 'Vancouver' as geo_city + , 'Canada' as geo_country + , 'Americas' as geo_continent + , 'British Columbia' as geo_region + , 200.0 as user_ltv_revenue_in_usd + , 3 as user_ltv_sessions + , 346517 as user_ltv_engagement_time_millis + , 1 as user_ltv_purchases + , 3 as user_ltv_engaged_sessions + , 6582608513 as user_ltv_session_duration_micros + , cast(null as float64) as predictions_in_app_purchase_score_7d + , 0.4 as predictions_purchase_score_7d + , 0.08 as predictions_churn_score_7d + , 321.0 as predictions_revenue_28d_in_usd + , false as privacy_info_is_limited_ad_tracking + , false as privacy_info_is_ads_personalization_allowed + , date('2024-12-10') as occurence_date + , date('2024-12-12') as last_updated_date + , array[ + struct( + 'slot_01' as key + , struct( + 'first_prop_val' as string_value + , 1695183380000000 as set_timestamp_micros + , 'First Prop Name' as user_property_name + ) as value + ) + ] as user_properties + , array[ + struct( + 2366216494 as id + , 'All Users' as name + , 1695183380000000 as membership_start_timestamp_micros + , 1715183380000000 as membership_expiry_timestamp_micros + , false as npa + ) + ] as audiences + overrides: + vars: + user_properties: ['First Prop Name'] + audiences: ['All Users'] + expect: + format: sql + rows: | + select + 'mycustomuserID' as user_id + , 1694444444444444 as user_info_last_active_timestamp_micros + , 1664444444444444 as user_info_user_first_touch_timestamp_micros + , 20241201 as user_info_first_purchase_date + , 'web' as device_operating_system + , 'mobile' as device_category + , 'Samsung' as device_mobile_brand_name + , 'SM-J337V' as device_mobile_model_name + , 'My page title' as device_unified_screen_name + , 'Vancouver' as geo_city + , 'Canada' as geo_country + , 'Americas' as geo_continent + , 'British Columbia' as geo_region + , 200.0 as user_ltv_revenue_in_usd + , 3 as user_ltv_sessions + , 346517 as user_ltv_engagement_time_millis + , 1 as user_ltv_purchases + , 3 as user_ltv_engaged_sessions + , 6582608513 as user_ltv_session_duration_micros + , cast(null as float64) as predictions_in_app_purchase_score_7d + , 0.4 as predictions_purchase_score_7d + , 0.08 as predictions_churn_score_7d + , 321.0 as predictions_revenue_28d_in_usd + , false as privacy_info_is_limited_ad_tracking + , false as privacy_info_is_ads_personalization_allowed + , date('2024-12-10') as occurence_date + , date('2024-12-12') as last_updated_date + , array[ + struct( + 'slot_01' as key + , struct( + 'first_prop_val' as string_value + , 1695183380000000 as set_timestamp_micros + , 'First Prop Name' as user_property_name + ) as value + ) + ] as user_properties + , array[ + struct( + 2366216494 as id + , 'All Users' as name + , 1695183380000000 as membership_start_timestamp_micros + , 1715183380000000 as membership_expiry_timestamp_micros + , false as npa + ) + ] as audiences + , 'first_prop_val' as first_prop_name_string_value + , 1695183380000000 as first_prop_name_set_timestamp_micros + , 'First Prop Name' as first_prop_name_user_property_name + , 2366216494 as audience_all_users_id + , 'All Users' as audience_all_users_name + , 1695183380000000 as audience_all_users_membership_start_timestamp_micros + , 1715183380000000 as audience_all_users_membership_expiry_timestamp_micros + , false as audience_all_users_npa \ No newline at end of file From a8daea004a5b5f61d21738bb022b0e428eae984f Mon Sep 17 00:00:00 2001 From: dgitis Date: Thu, 26 Dec 2024 14:27:23 -0800 Subject: [PATCH 4/4] Rename variable to prevent naming collisions --- models/staging/stg_ga4__client_keys.sql | 2 +- models/staging/stg_ga4__client_keys.yml | 2 +- models/staging/stg_ga4__users.sql | 2 +- models/staging/stg_ga4__users.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/models/staging/stg_ga4__client_keys.sql b/models/staging/stg_ga4__client_keys.sql index 34c43d30..f26bbf28 100644 --- a/models/staging/stg_ga4__client_keys.sql +++ b/models/staging/stg_ga4__client_keys.sql @@ -8,7 +8,7 @@ select * , to_base64(md5(concat(pseudo_user_id, stream_id))) as client_key - {% for up in var('user_properties', []) %} + {% for up in var('user_export_user_properties', []) %} , (select value.string_value from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_string_value , (select value.set_timestamp_micros from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_set_timestamp_micros , (select value.user_property_name from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_user_property_name diff --git a/models/staging/stg_ga4__client_keys.yml b/models/staging/stg_ga4__client_keys.yml index 1fa0d22a..70850d80 100644 --- a/models/staging/stg_ga4__client_keys.yml +++ b/models/staging/stg_ga4__client_keys.yml @@ -88,7 +88,7 @@ as membership_expiry_timestamp_micros, true as npa)'] ] as audiences overrides: vars: - user_properties: ['First Prop Name'] + user_export_user_properties: ['First Prop Name'] audiences: ['All Users'] expect: format: sql diff --git a/models/staging/stg_ga4__users.sql b/models/staging/stg_ga4__users.sql index 61cce82b..91ed3f99 100644 --- a/models/staging/stg_ga4__users.sql +++ b/models/staging/stg_ga4__users.sql @@ -6,7 +6,7 @@ }} select * - {% for up in var('user_properties', []) %} + {% for up in var('user_export_user_properties', []) %} , (select value.string_value from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_string_value , (select value.set_timestamp_micros from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_set_timestamp_micros , (select value.user_property_name from unnest(user_properties) where value.user_property_name = '{{up}}') as {{up | lower | replace(" ", "_")}}_user_property_name diff --git a/models/staging/stg_ga4__users.yml b/models/staging/stg_ga4__users.yml index fda60fe7..a88485f5 100644 --- a/models/staging/stg_ga4__users.yml +++ b/models/staging/stg_ga4__users.yml @@ -87,7 +87,7 @@ as membership_expiry_timestamp_micros, true as npa)'] ] as audiences overrides: vars: - user_properties: ['First Prop Name'] + user_export_user_properties: ['First Prop Name'] audiences: ['All Users'] expect: format: sql