Skip to content

Commit

Permalink
Merge pull request #2870 from umami-software/feat/clickhouse-performance
Browse files Browse the repository at this point in the history
Feat/clickhouse performance
  • Loading branch information
franciscao633 authored Aug 2, 2024
2 parents 3494ae4 + d02dc6f commit 2c18cca
Show file tree
Hide file tree
Showing 9 changed files with 254 additions and 101 deletions.
55 changes: 36 additions & 19 deletions db/clickhouse/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ CREATE TABLE umami.website_event
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
engine = MergeTree
ORDER BY (website_id, session_id, created_at)
SETTINGS index_granularity = 8192;
ENGINE = MergeTree
PARTITION BY toYYYYMM(created_at)
ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at)
PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id)
SETTINGS index_granularity = 8192;

CREATE TABLE umami.event_data
(
Expand All @@ -48,9 +50,9 @@ CREATE TABLE umami.event_data
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
engine = MergeTree
ORDER BY (website_id, event_id, data_key, created_at)
SETTINGS index_granularity = 8192;
ENGINE = MergeTree
ORDER BY (website_id, event_id, data_key, created_at)
SETTINGS index_granularity = 8192;

CREATE TABLE umami.session_data
(
Expand All @@ -64,9 +66,9 @@ CREATE TABLE umami.session_data
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
engine = MergeTree
ORDER BY (website_id, session_id, data_key, created_at)
SETTINGS index_granularity = 8192;
ENGINE = MergeTree
ORDER BY (website_id, session_id, data_key, created_at)
SETTINGS index_granularity = 8192;

-- stats hourly
CREATE TABLE umami.website_event_stats_hourly
Expand Down Expand Up @@ -97,15 +99,15 @@ CREATE TABLE umami.website_event_stats_hourly
created_at Datetime('UTC')
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(created_at)
ORDER BY (
website_id,
event_type,
toStartOfHour(created_at),
cityHash64(visit_id),
visit_id
)
SAMPLE BY cityHash64(visit_id);
PARTITION BY toYYYYMM(created_at)
ORDER BY (
website_id,
event_type,
toStartOfHour(created_at),
cityHash64(visit_id),
visit_id
)
SAMPLE BY cityHash64(visit_id);

CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
TO umami.website_event_stats_hourly
Expand Down Expand Up @@ -174,4 +176,19 @@ GROUP BY website_id,
subdivision1,
city,
event_type,
timestamp);
timestamp);

-- projections
ALTER TABLE umami.website_event
ADD PROJECTION website_event_url_path_projection (
SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at
);

ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_url_path_projection;

ALTER TABLE umami.website_event
ADD PROJECTION website_event_referrer_domain_projection (
SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at
);

ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_referrer_domain_projection;
4 changes: 2 additions & 2 deletions src/lib/clickhouse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ import { ClickHouseClient, createClient } from '@clickhouse/client';
import dateFormat from 'dateformat';
import debug from 'debug';
import { CLICKHOUSE } from 'lib/db';
import { PageParams, QueryFilters, QueryOptions } from './types';
import { DEFAULT_PAGE_SIZE, OPERATORS } from './constants';
import { fetchWebsite } from './load';
import { maxDate } from './date';
import { fetchWebsite } from './load';
import { filtersToArray } from './params';
import { PageParams, QueryFilters, QueryOptions } from './types';

export const CLICKHOUSE_DATE_FORMATS = {
second: '%Y-%m-%dT%H:%i:%S',
Expand Down
15 changes: 3 additions & 12 deletions src/lib/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,7 @@ export const FILTER_REFERRERS = 'filter-referrers';
export const FILTER_PAGES = 'filter-pages';

export const UNIT_TYPES = ['year', 'month', 'hour', 'day', 'minute'];
export const EVENT_COLUMNS = [
'url',
'entry',
'exit',
'referrer',
'title',
'query',
'event',
'host',
];
export const EVENT_COLUMNS = ['url', 'entry', 'exit', 'referrer', 'title', 'query', 'event'];

export const SESSION_COLUMNS = [
'browser',
Expand All @@ -58,8 +49,8 @@ export const SESSION_COLUMNS = [

export const FILTER_COLUMNS = {
url: 'url_path',
entry: 'entry_url',
exit: 'exit_url',
entry: 'url_path',
exit: 'url_path',
referrer: 'referrer_domain',
host: 'hostname',
title: 'page_title',
Expand Down
34 changes: 25 additions & 9 deletions src/queries/analytics/events/getEventMetrics.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import { WebsiteEventMetric, QueryFilters } from 'lib/types';
import { EVENT_TYPE } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
import { QueryFilters, WebsiteEventMetric } from 'lib/types';

export async function getEventMetrics(
...args: [websiteId: string, filters: QueryFilters]
Expand Down Expand Up @@ -51,8 +51,24 @@ async function clickhouseQuery(
eventType: EVENT_TYPE.customEvent,
});

return rawQuery(
`
let sql = '';

if (filterQuery) {
sql = `
select
event_name x,
${getDateSQL('created_at', unit, timezone)} t,
count(*) y
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by x, t
order by t
`;
} else {
sql = `
select
event_name x,
${getDateSQL('created_at', unit, timezone)} t,
Expand All @@ -64,13 +80,13 @@ async function clickhouseQuery(
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
) as g
group by x, t
order by t
`,
params,
).then(a => {
`;
}

return rawQuery(sql, params).then(a => {
return Object.values(a).map(a => {
return { x: a.x, t: a.t, y: Number(a.y) };
});
Expand Down
56 changes: 45 additions & 11 deletions src/queries/analytics/getWebsiteStats.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */
import clickhouse from 'lib/clickhouse';
import { EVENT_TYPE } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
import { QueryFilters } from 'lib/types';
import { EVENT_COLUMNS } from 'lib/constants';

export async function getWebsiteStats(
...args: [websiteId: string, filters: QueryFilters]
Expand Down Expand Up @@ -68,22 +68,56 @@ async function clickhouseQuery(
eventType: EVENT_TYPE.pageView,
});

return rawQuery(
`
select
sum(views) as "pageviews",
let sql = '';

if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) {
sql = `
select
sum(t.c) as "pageviews",
uniq(t.session_id) as "visitors",
uniq(t.visit_id) as "visits",
sum(if(t.c = 1, 1, 0)) as "bounces",
sum(max_time-min_time) as "totaltime"
from (
select
session_id,
visit_id,
count(*) c,
min(created_at) min_time,
max(created_at) max_time
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by session_id, visit_id
) as t;
`;
} else {
sql = `
select
sum(t.c) as "pageviews",
uniq(session_id) as "visitors",
uniq(visit_id) as "visits",
sumIf(1, views = 1) as "bounces",
sumIf(1, t.c = 1) as "bounces",
sum(max_time-min_time) as "totaltime"
from website_event_stats_hourly "website_event"
from (select
session_id,
visit_id,
sum(views) c,
min(min_time) min_time,
max(max_time) max_time
from umami.website_event_stats_hourly "website_event"
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery};
`,
params,
).then(result => {
${filterQuery}
group by session_id, visit_id
) as t;
`;
}

return rawQuery(sql, params).then(result => {
return Object.values(result).map((a: any) => {
return {
pageviews: Number(a.pageviews),
Expand Down
79 changes: 59 additions & 20 deletions src/queries/analytics/pageviews/getPageviewMetrics.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import clickhouse from 'lib/clickhouse';
import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants';
import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
import { QueryFilters } from 'lib/types';
Expand Down Expand Up @@ -91,28 +91,66 @@ async function clickhouseQuery(
});

let excludeDomain = '';
let groupByQuery = '';
let sql = '';

if (column === 'referrer_domain') {
excludeDomain = `and t != {websiteDomain:String} and t != ''`;
}
if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) {
let entryExitQuery = '';

let columnQuery = `arrayJoin(${column})`;
if (column === 'referrer_domain') {
excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`;
}

if (type === 'entry') {
columnQuery = `visit_id x, argMinMerge(${column})`;
}
if (type === 'entry' || type === 'exit') {
const aggregrate = type === 'entry' ? 'min' : 'max';

if (type === 'exit') {
columnQuery = `visit_id x, argMaxMerge(${column})`;
}
entryExitQuery = `
JOIN (select visit_id,
${aggregrate}(created_at) target_created_at
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
group by visit_id) x
ON x.visit_id = website_event.visit_id
and x.target_created_at = website_event.created_at`;
}

if (type === 'entry' || type === 'exit') {
groupByQuery = 'group by x';
}
sql = `
select ${column} x, count(*) y
from website_event
${entryExitQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${excludeDomain}
${filterQuery}
group by x
order by y desc
limit ${limit}
offset ${offset}
`;
} else {
let groupByQuery = '';

return rawQuery(
`
if (column === 'referrer_domain') {
excludeDomain = `and t != {websiteDomain:String} and t != ''`;
}

let columnQuery = `arrayJoin(${column})`;

if (type === 'entry') {
columnQuery = `visit_id x, argMinMerge(entry_url)`;
}

if (type === 'exit') {
columnQuery = `visit_id x, argMaxMerge(exit_url)`;
}

if (type === 'entry' || type === 'exit') {
groupByQuery = 'group by x';
}

sql = `
select g.t as x,
count(*) as y
from (
Expand All @@ -128,9 +166,10 @@ async function clickhouseQuery(
order by y desc
limit ${limit}
offset ${offset}
`,
params,
).then((result: any) => {
`;
}

return rawQuery(sql, params).then((result: any) => {
return Object.values(result).map((a: any) => {
return { x: a.x, y: Number(a.y) };
});
Expand Down
Loading

0 comments on commit 2c18cca

Please sign in to comment.