diff --git a/data/transform/models/marts/telemetry/base/projects.sql b/data/transform/models/marts/telemetry/base/projects.sql index 73afea1c..ad84f03b 100644 --- a/data/transform/models/marts/telemetry/base/projects.sql +++ b/data/transform/models/marts/telemetry/base/projects.sql @@ -1,6 +1,8 @@ -WITH active_projects AS ( - - SELECT structured_events.project_id +WITH active_events AS ( + SELECT + structured_events.project_id, + structured_events.event_created_at, + structured_events.event_count FROM {{ ref('structured_events') }} LEFT JOIN {{ ref('cmd_parsed_all') }} ON @@ -13,25 +15,43 @@ WITH active_projects AS ( 'meltano test', 'meltano schedule run' ) - AND structured_events.event_created_at >= DATEADD( - 'month', -1, CURRENT_DATE() - ) +), + +active_projects AS ( + + SELECT + project_id, + DATE_TRUNC('month', event_created_at) AS month_start, + SUM(event_count) AS active_events_count + FROM active_events + GROUP BY 1, 2 + +), + +active_profile AS ( + + SELECT + structured_events.project_id, + MIN(structured_events.event_created_at) AS activation_at, + MAX(structured_events.event_created_at) AS last_active_at + FROM {{ ref('structured_events') }} + INNER JOIN active_projects + ON structured_events.project_id = active_projects.project_id + AND DATE_TRUNC( + 'month', structured_events.event_created_at + ) = active_projects.month_start + WHERE active_projects.active_events_count > 1 GROUP BY 1 - HAVING SUM(structured_events.event_count) > 1 ) SELECT structured_events.project_id, - MAX( - CASE WHEN active_projects.project_id IS NOT NULL THEN TRUE END - ) AS is_active, + MAX(active_profile.activation_at) AS activation_date, + MAX(active_profile.last_active_at) AS last_activate_at, MIN(structured_events.event_created_at) AS first_event_at, MAX(structured_events.event_created_at) AS last_event_at FROM {{ ref('structured_events') }} LEFT JOIN - {{ ref('cmd_parsed_all') }} ON - structured_events.command = cmd_parsed_all.command -LEFT JOIN - active_projects ON structured_events.project_id = active_projects.project_id + active_profile ON structured_events.project_id = active_profile.project_id GROUP BY 1 diff --git a/data/transform/models/marts/telemetry/cohorts.sql b/data/transform/models/marts/telemetry/cohorts.sql new file mode 100644 index 00000000..bdb968d8 --- /dev/null +++ b/data/transform/models/marts/telemetry/cohorts.sql @@ -0,0 +1,260 @@ +WITH base AS ( + SELECT + structured_events.project_id, + structured_events.event_id, + structured_events.event_count, + structured_events.event_created_at, + projects.first_event_at, + projects.activation_date, + projects.last_activate_at, + COALESCE(structured_events.event_created_at >= projects.activation_date + AND structured_events.event_created_at <= projects.last_activate_at, + FALSE) AS is_active + FROM {{ ref('structured_events') }} + LEFT JOIN + {{ ref('projects') }} ON + structured_events.project_id = projects.project_id +), + +plugin_categories_agg AS ( + + SELECT + 'week' AS period_grain, + 'active' AS agg_type, + DATE_TRUNC( + 'week', base.event_created_at + ) AS period_date, + base.project_id, + COUNT(DISTINCT fact_plugin_usage.plugin_category) AS plugin_distinct_cnt + FROM base + INNER JOIN {{ ref('fact_plugin_usage') }} + ON base.event_id = fact_plugin_usage.event_id + -- Active + WHERE base.is_active + GROUP BY 1, 2, 3, 4 + + UNION ALL + + SELECT + 'week' AS period_grain, + 'active_prod' AS agg_type, + DATE_TRUNC( + 'week', base.event_created_at + ) AS period_date, + base.project_id, + COUNT( + DISTINCT CASE + WHEN + environments.env_name = 'prod' + THEN fact_plugin_usage.plugin_category + END + ) AS plugin_distinct_cnt + FROM base + INNER JOIN {{ ref('fact_plugin_usage') }} + ON base.event_id = fact_plugin_usage.event_id + LEFT JOIN + {{ ref('environments') }} ON + base.event_id = environments.event_id + -- Active + WHERE base.is_active + GROUP BY 1, 2, 3, 4 + + UNION ALL + + SELECT + 'week' AS period_grain, + 'active_dev' AS agg_type, + DATE_TRUNC( + 'week', base.event_created_at + ) AS period_date, + base.project_id, + COUNT( + DISTINCT CASE + WHEN + environments.env_name = 'dev' + THEN fact_plugin_usage.plugin_category + END + ) AS plugin_distinct_cnt + FROM base + INNER JOIN {{ ref('fact_plugin_usage') }} + ON base.event_id = fact_plugin_usage.event_id + LEFT JOIN + {{ ref('environments') }} ON + base.event_id = environments.event_id + WHERE base.is_active + GROUP BY 1, 2, 3, 4 + +), + +cohort_snapshots AS ( + + -- Event usage for all projects at the month grain + SELECT + 'month' AS period_grain, + DATE_TRUNC( + 'month', event_created_at + ) AS snapshot_period, + DATE_TRUNC('month', first_event_at) AS cohort_id, + DATEDIFF( + MONTH, + DATE_TRUNC('month', first_event_at), + DATE_TRUNC('month', event_created_at) + ) AS cohort_number, + 'event_volume' AS cohort_type, + SUM(event_count) AS snapshot_value + FROM base + GROUP BY 1, 2, 3, 4, 5 + + UNION ALL + + -- Active projects at the month grain + SELECT + 'month' AS period_grain, + DATE_TRUNC( + 'month', event_created_at + ) AS snapshot_period, + DATE_TRUNC('month', first_event_at) AS cohort_id, + DATEDIFF( + MONTH, + DATE_TRUNC('month', first_event_at), + DATE_TRUNC('month', event_created_at) + ) AS cohort_number, + 'active_projects' AS cohort_type, + COUNT(DISTINCT project_id) AS snapshot_value + FROM base + WHERE is_active + GROUP BY 1, 2, 3, 4, 5 + + UNION ALL + + -- APP for active projects at the week grain + SELECT + 'week' AS period_grain, + DATE_TRUNC( + 'week', base.event_created_at + ) AS snapshot_period, + DATE_TRUNC('week', base.first_event_at) AS cohort_id, + DATEDIFF( + WEEK, + DATE_TRUNC('week', base.first_event_at), + DATE_TRUNC('week', base.event_created_at) + ) AS cohort_number, + 'APP_active' AS cohort_type, + AVG(plugin_categories_agg.plugin_distinct_cnt) AS snapshot_value + FROM base + LEFT JOIN + plugin_categories_agg ON + base.project_id = plugin_categories_agg.project_id + AND DATE_TRUNC( + 'week', base.event_created_at + ) = plugin_categories_agg.period_date + WHERE base.is_active + AND plugin_categories_agg.period_grain = 'week' + AND plugin_categories_agg.agg_type = 'active' + GROUP BY 1, 2, 3, 4, 5 + + UNION ALL + + -- APP for active projects at the month grain + SELECT + 'month' AS period_grain, + DATE_TRUNC( + 'month', base.event_created_at + ) AS snapshot_period, + DATE_TRUNC('month', base.first_event_at) AS cohort_id, + DATEDIFF( + MONTH, + DATE_TRUNC('month', base.first_event_at), + DATE_TRUNC('month', base.event_created_at) + ) AS cohort_number, + 'APP_active' AS cohort_type, + AVG(plugin_categories_agg.plugin_distinct_cnt) AS snapshot_value + FROM base + LEFT JOIN + plugin_categories_agg ON + base.project_id = plugin_categories_agg.project_id + AND DATE_TRUNC( + 'month', base.event_created_at + ) = plugin_categories_agg.period_date + WHERE base.is_active + AND plugin_categories_agg.period_grain = 'month' + AND plugin_categories_agg.agg_type = 'active' + GROUP BY 1, 2, 3, 4, 5 + + UNION ALL + + -- APP for active projects using the prod environment at the week grain + SELECT + 'week' AS period_grain, + DATE_TRUNC( + 'week', base.event_created_at + ) AS snapshot_period, + DATE_TRUNC('week', base.first_event_at) AS cohort_id, + DATEDIFF( + WEEK, + DATE_TRUNC('week', base.first_event_at), + DATE_TRUNC('week', base.event_created_at) + ) AS cohort_number, + 'APP_active_prod' AS cohort_type, + AVG(plugin_categories_agg.plugin_distinct_cnt) AS snapshot_value + FROM base + LEFT JOIN + plugin_categories_agg ON + base.project_id = plugin_categories_agg.project_id + AND DATE_TRUNC( + 'week', base.event_created_at + ) = plugin_categories_agg.period_date + WHERE base.is_active + AND plugin_categories_agg.period_grain = 'week' + AND plugin_categories_agg.agg_type = 'active_prod' + GROUP BY 1, 2, 3, 4, 5 + + UNION ALL + + -- APP for active projects using the dev environment at the week grain + SELECT + 'week' AS period_grain, + DATE_TRUNC( + 'week', base.event_created_at + ) AS snapshot_period, + DATE_TRUNC('week', base.first_event_at) AS cohort_id, + DATEDIFF( + WEEK, + DATE_TRUNC('week', base.first_event_at), + DATE_TRUNC('week', base.event_created_at) + ) AS cohort_number, + 'APP_active_dev' AS cohort_type, + AVG(plugin_categories_agg.plugin_distinct_cnt) AS snapshot_value + FROM base + LEFT JOIN + plugin_categories_agg ON + base.project_id = plugin_categories_agg.project_id + AND DATE_TRUNC( + 'week', base.event_created_at + ) = plugin_categories_agg.period_date + WHERE base.is_active + AND plugin_categories_agg.period_grain = 'week' + AND plugin_categories_agg.agg_type = 'active_dev' + GROUP BY 1, 2, 3, 4, 5 + +), + +originals AS ( + SELECT + cohort_id, + snapshot_value, + cohort_type + FROM cohort_snapshots + WHERE cohort_id = snapshot_period +) + +SELECT + cohort_snapshots.period_grain, + cohort_snapshots.cohort_id, + cohort_snapshots.snapshot_period, + cohort_snapshots.cohort_type, + cohort_snapshots.snapshot_value, + cohort_snapshots.cohort_number, + originals.snapshot_value AS original_snapshot_value +FROM cohort_snapshots +LEFT JOIN originals ON cohort_snapshots.cohort_id = originals.cohort_id diff --git a/data/transform/models/marts/telemetry/fact_plugin_usage.sql b/data/transform/models/marts/telemetry/fact_plugin_usage.sql index f8304390..2485ee0e 100644 --- a/data/transform/models/marts/telemetry/fact_plugin_usage.sql +++ b/data/transform/models/marts/telemetry/fact_plugin_usage.sql @@ -22,7 +22,9 @@ SELECT -- projects structured_events.project_id, projects.first_event_at AS project_created_at, - projects.is_active AS project_is_active, + COALESCE(projects.last_activate_at >= DATEADD( + 'month', -1, CURRENT_DATE() + ), FALSE) AS project_is_active, -- environments cmd_parsed_all.environment AS env_id, environments.env_name,