Skip to content

Commit

Permalink
Add support for Redshift (Close #2)
Browse files Browse the repository at this point in the history
  • Loading branch information
rahul-snowplow authored and emielver committed Feb 3, 2022
1 parent 61fa622 commit 0694ff8
Show file tree
Hide file tree
Showing 31 changed files with 388 additions and 334 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

This dbt package:

- Transforms and aggregates raw web event data collected from the Snowplow [iOS tracker][ios-tracker] or [Android tracker][android-tracker] into a set of derived tables: screen views, sessions, and users.
- Transforms and aggregates raw mobile event data collected from the Snowplow [iOS tracker][ios-tracker] or [Android tracker][android-tracker] (up to v3) into a set of derived tables: screen views, sessions, users, and optionally app errors.
- Processes **all mobile events incrementally**. It is not just constrained to screen view events - any custom events you are tracking will also be incrementally processed.
- Is designed in a modular manner, allowing you to easily integrate your own custom SQL into the incremental framework provided by the package.

Expand All @@ -23,7 +23,7 @@ The snowplow-mobile v0.1.0 package currently supports Redshift & Postgres.
### Requirements

- A dataset of mobile events from the Snowplow [iOS tracker][ios-tracker] or [Android tracker][android-tracker] must be available in the database.
- Have the [session context (iOS)][ios-session-context] or [session context (Android)][android-session-context] and [screen view events (iOS)][ios-screen-views] or [screen context (Android)][android-screen-views] enabled.
- Have the [session context (iOS)][ios-session-context] or [session context (Android)][android-session-context] and [screen view events (iOS)][ios-screen-views] or [screen view events (Android)][android-screen-views] enabled.

### Installation

Expand Down
6 changes: 2 additions & 4 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,10 @@ on-run-end:
# and add their last sucessfull collector_tstamp to the manifest.
models:
snowplow_mobile:
+materialized: view
+bind: false
+materialized: table
base:
manifest:
+schema: "snowplow_manifest"
default:
enabled: "{{ target.type in ['bigquery','snowflake'] | as_bool() }}"
redshift_postgres:
enabled: "{{ target.type in ['redshift','postgres'] | as_bool() }}"
scratch:
Expand All @@ -91,6 +88,7 @@ models:
enabled: "{{ target.type == 'snowflake' | as_bool() }}"
optional_modules:
app_errors:
+schema: "derived"
+tags: "snowplow_mobile_incremental"
scratch:
+schema: "scratch"
Expand Down
2 changes: 1 addition & 1 deletion docs/catalog.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/manifest.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/run_results.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ with session_context as (
, session_lifecycle as (
select
ns.session_id,
ns.device_user_id as device_user_id,
ns.device_user_id,
least(ns.start_tstamp, coalesce(self.start_tstamp, ns.start_tstamp)) as start_tstamp,
greatest(ns.end_tstamp, coalesce(self.end_tstamp, ns.end_tstamp)) as end_tstamp -- BQ 1 NULL will return null hence coalesce

Expand Down
47 changes: 24 additions & 23 deletions models/base/scratch/base_scratch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -261,15 +261,16 @@ models:
columns:
- name: root_id
description: '{{ doc("col_root_id") }}'
tags:
- primary-key
tests:
- unique
- not_null
- not_null:
config:
enabled: '{{var("snowplow__enable_application_context", false)}}'
- name: root_tstamp
description: '{{ doc("col_root_tstamp") }}'
tests:
- not_null
- not_null:
config:
enabled: '{{var("snowplow__enable_application_context", false)}}'
- name: build
description: '{{ doc("col_build") }}'
- name: version
Expand All @@ -290,15 +291,16 @@ models:
columns:
- name: root_id
description: '{{ doc("col_root_id") }}'
tags:
- primary-key
tests:
- unique
- not_null
- not_null:
config:
enabled: '{{var("snowplow__enable_geolocation_context", false)}}'
- name: root_tstamp
description: '{{ doc("col_root_tstamp") }}'
tests:
- not_null
- not_null:
config:
enabled: '{{var("snowplow__enable_geolocation_context", false)}}'
- name: device_latitude
description: '{{ doc("col_device_latitude") }}'
- name: device_longitude
Expand All @@ -318,15 +320,16 @@ models:
columns:
- name: root_id
description: '{{ doc("col_root_id") }}'
tags:
- primary-key
tests:
- unique
- not_null
- not_null:
config:
enabled: '{{var("snowplow__enable_mobile_context", false)}}'
- name: root_tstamp
description: '{{ doc("col_root_tstamp") }}'
tests:
- not_null
- not_null:
config:
enabled: '{{var("snowplow__enable_mobile_context", false)}}'
- name: device_manufacturer
description: '{{ doc("col_device_manufacturer") }}'
- name: device_model
Expand Down Expand Up @@ -354,15 +357,16 @@ models:
columns:
- name: root_id
description: '{{ doc("col_root_id") }}'
tags:
- primary-key
tests:
- unique
- not_null
- not_null:
config:
enabled: '{{var("snowplow__enable_screen_context", false)}}'
- name: root_tstamp
description: '{{ doc("col_root_tstamp") }}'
tests:
- not_null
- not_null:
config:
enabled: '{{var("snowplow__enable_screen_context", false)}}'
- name: screen_id
description: '{{ doc("col_screen_id") }}'
- name: screen_name
Expand All @@ -382,10 +386,7 @@ models:
columns:
- name: root_id
description: '{{ doc("col_root_id") }}'
tags:
- primary-key
tests:
- unique
- not_null
- name: root_tstamp
description: '{{ doc("col_root_tstamp") }}'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{{
config(materialized='view',
enabled=(var('snowplow__enable_application_context')
and target.type in ['redshift','postgres'] | as_bool())
)
config(
enabled=(var("snowplow__enable_application_context", false)
and target.type in ['redshift','postgres'] | as_bool()),
dist='root_id',
sort='root_tstamp'
)
}}

{%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{{
config(materialized='view',
enabled=(var('snowplow__enable_geolocation_context')
and target.type in ['redshift','postgres'] | as_bool())
)
config(
enabled=(var("snowplow__enable_geolocation_context", false)
and target.type in ['redshift','postgres'] | as_bool()),
dist='root_id',
sort='root_tstamp'
)
}}

{%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{{
config(materialized='view',
enabled=(var('snowplow__enable_mobile_context')
and target.type in ['redshift','postgres'] | as_bool())
)
config(
enabled=(var("snowplow__enable_mobile_context", false)
and target.type in ['redshift','postgres'] | as_bool()),
dist='root_id',
sort='root_tstamp'
)
}}

{%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{{
config(materialized='view',
enabled=(var('snowplow__enable_screen_context')
and target.type in ['redshift','postgres'] | as_bool())
)
config(
enabled=(var("snowplow__enable_screen_context", false)
and target.type in ['redshift','postgres'] | as_bool()),
dist='root_id',
sort='root_tstamp'
)
}}

{%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{{
config(materialized='view')
{{
config(
enabled=(target.type in ['redshift','postgres'] | as_bool()),
dist='root_id',
sort='root_tstamp'
)
}}

{%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{{
config(
materialized='table',
sort='collector_tstamp',
dist='event_id',
tags=["this_run"]
Expand All @@ -21,10 +20,8 @@ with events_this_run AS (
sc.previous_session_id,
sc.device_user_id,
sc.session_first_event_id,
-- select all from events except non-mobile fields.
{{ dbt_utils.star(from=source('atomic','events'),
relation_alias='e',
except=events_columns_to_remove()) }},

e.*,
dense_rank() over (partition by e.event_id order by e.collector_tstamp) as event_id_dedupe_index --dense_rank so rows with equal tstamps assigned same #

from {{ var('snowplow__events') }} e
Expand Down Expand Up @@ -60,7 +57,7 @@ with events_this_run AS (

select
-- screen context
{% if var('snowplow__enable_screen_context') %}
{% if var("snowplow__enable_screen_context", false) %}
sc.screen_id,
sc.screen_name,
sc.screen_activity,
Expand All @@ -78,7 +75,7 @@ select
cast(null as {{ dbt_utils.type_string() }}) as screen_view_controller,
{% endif %}
-- mobile context
{% if var('snowplow__enable_mobile_context') %}
{% if var("snowplow__enable_mobile_context", false) %}
mc.device_manufacturer,
mc.device_model,
mc.os_type,
Expand All @@ -104,7 +101,7 @@ select
cast(null as {{ dbt_utils.type_string() }}) as network_type,
{% endif %}
-- geo context
{% if var('snowplow__enable_geolocation_context') %}
{% if var("snowplow__enable_geolocation_context", false) %}
gc.device_latitude,
gc.device_longitude,
gc.device_latitude_longitude_accuracy,
Expand All @@ -122,7 +119,7 @@ select
cast(null as {{ dbt_utils.type_float() }}) as device_speed,
{% endif %}
-- app context
{% if var('snowplow__enable_application_context') %}
{% if var("snowplow__enable_application_context", false) %}
ac.build,
ac.version,
{% else %}
Expand All @@ -134,25 +131,25 @@ select

from cleaned_events e

{% if var('snowplow__enable_screen_context') %}
{% if var("snowplow__enable_screen_context", false) %}
left join {{ ref('snowplow_mobile_base_screen_context') }} sc
on e.event_id = sc.root_id
and e.collector_tstamp = sc.root_tstamp
{% endif %}

{% if var('snowplow__enable_mobile_context') %}
{% if var("snowplow__enable_mobile_context", false) %}
left join {{ ref('snowplow_mobile_base_mobile_context') }} mc
on e.event_id = mc.root_id
and e.collector_tstamp = mc.root_tstamp
{% endif %}

{% if var('snowplow__enable_geolocation_context') %}
{% if var("snowplow__enable_geolocation_context", false) %}
left join {{ ref('snowplow_mobile_base_geo_context') }} gc
on e.event_id = gc.root_id
and e.collector_tstamp = gc.root_tstamp
{% endif %}

{% if var('snowplow__enable_application_context') %}
{% if var("snowplow__enable_application_context", false) %}
left join {{ ref('snowplow_mobile_base_app_context') }} ac
on e.event_id = ac.root_id
and e.collector_tstamp = ac.root_tstamp
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
{{
config(materialized='table')
}}

select
min(s.start_tstamp) as lower_limit,
max(s.end_tstamp) as upper_limit
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{{ config(
materialized='table',
post_hook=["{{snowplow_utils.print_run_limits(this)}}"]
)
}}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{{
config(
materialized='table',
sort='start_tstamp',
dist='session_id',
partition_by = {
Expand Down
Loading

0 comments on commit 0694ff8

Please sign in to comment.