Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(schema-compiler): Fix sql generation for rolling_window queries with multiple time dimensions #9124

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
97 changes: 71 additions & 26 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -934,8 +934,8 @@ export class BaseQuery {
).concat(multiStageMembers.map(m => `SELECT * FROM ${m.alias}`));
}

// Move regular measures to multiplied ones if there're same
// cubes to calculate. Most of the times it'll be much faster to
// Move regular measures to multiplied ones if there are same
// cubes to calculate. Most of the time it'll be much faster to
// calculate as there will be only single scan per cube.
if (
regularMeasures.length &&
Expand Down Expand Up @@ -1413,18 +1413,63 @@ export class BaseQuery {

overTimeSeriesQuery(baseQueryFn, cumulativeMeasure, fromRollup) {
const dateJoinCondition = cumulativeMeasure.dateJoinCondition();
const uniqDateJoinCondition = R.uniqBy(djc => djc[0].dimension, dateJoinCondition);
const cumulativeMeasures = [cumulativeMeasure];
if (!this.timeDimensions.find(d => d.granularity)) {
const filters = this.segments.concat(this.filters).concat(this.dateFromStartToEndConditionSql(dateJoinCondition, fromRollup, false));
const filters = this.segments
.concat(this.filters)
.concat(this.dateFromStartToEndConditionSql(
// If the same time dimension is passed more than once, no need to build the same
// filter condition again and again. Different granularities don't play role here,
// as rollingWindow.granularity is used for filtering.
uniqDateJoinCondition,
fromRollup,
false
));
return baseQueryFn(cumulativeMeasures, filters, false);
}
const dateSeriesSql = this.timeDimensions.map(d => this.dateSeriesSql(d)).join(', ');
const filters = this.segments.concat(this.filters).concat(this.dateFromStartToEndConditionSql(dateJoinCondition, fromRollup, true));

if (this.timeDimensions.filter(d => !d.dateRange && d.granularity).length > 0) {
throw new UserError('Time series queries without dateRange aren\'t supported');
}

// We can't do meaningful query if few time dimensions with different ranges passed,
// it won't be possible to join them together without loosing some rows.
const rangedTimeDimensions = this.timeDimensions.filter(d => d.dateRange && d.granularity);
const uniqTimeDimensionWithRanges = R.uniqBy(d => d.dateRange, rangedTimeDimensions);
if (uniqTimeDimensionWithRanges.length > 1) {
throw new Error('Can\'t build query for time dimensions with different date ranges');
}

// We need to generate time series table for the lowest granularity among all time dimensions
let dateSeriesDimension;
const dateSeriesGranularity = this.timeDimensions.filter(d => d.granularity)
.reduce((acc, d) => {
const mg = this.minGranularity(acc, d.resolvedGranularity());
if (mg === d.resolvedGranularity()) {
dateSeriesDimension = d;
}
return mg;
}, undefined);

const dateSeriesSql = this.dateSeriesSql(dateSeriesDimension);

// If the same time dimension is passed more than once, no need to build the same
// filter condition again and again. Different granularities don't play role here,
// as rollingWindow.granularity is used for filtering.
const filters = this.segments
.concat(this.filters)
.concat(this.dateFromStartToEndConditionSql(
uniqDateJoinCondition,
fromRollup,
true
));
const baseQuery = this.groupedUngroupedSelect(
() => baseQueryFn(cumulativeMeasures, filters),
cumulativeMeasure.shouldUngroupForCumulative(),
!cumulativeMeasure.shouldUngroupForCumulative() && this.minGranularity(
cumulativeMeasure.windowGranularity(), this.timeDimensions.find(d => d.granularity).resolvedGranularity()
cumulativeMeasure.windowGranularity(),
dateSeriesGranularity
) || undefined
);
const baseQueryAlias = this.cubeAlias('base');
Expand All @@ -1444,10 +1489,27 @@ export class BaseQuery {
dateSeriesSql,
baseQuery,
dateJoinConditionSql,
baseQueryAlias
baseQueryAlias,
dateSeriesDimension.granularity,
);
}

overTimeSeriesSelect(cumulativeMeasures, dateSeriesSql, baseQuery, dateJoinConditionSql, baseQueryAlias, dateSeriesGranularity) {
const forSelect = this.overTimeSeriesForSelect(cumulativeMeasures, dateSeriesGranularity);
return `SELECT ${forSelect} FROM ${dateSeriesSql}` +
` LEFT JOIN (${baseQuery}) ${this.asSyntaxJoin} ${baseQueryAlias} ON ${dateJoinConditionSql}` +
this.groupByClause();
}

overTimeSeriesForSelect(cumulativeMeasures, dateSeriesGranularity) {
return this.dimensions
.map(s => s.cumulativeSelectColumns())
.concat(this.timeDimensions.map(d => d.dateSeriesSelectColumn(null, dateSeriesGranularity)))
.concat(cumulativeMeasures.map(s => s.cumulativeSelectColumns()))
.filter(c => !!c)
.join(', ');
}

dateFromStartToEndConditionSql(dateJoinCondition, fromRollup, isFromStartToEnd) {
return dateJoinCondition.map(
// TODO these weird conversions to be strict typed for big query.
Expand All @@ -1472,24 +1534,6 @@ export class BaseQuery {
);
}

overTimeSeriesSelect(cumulativeMeasures, dateSeriesSql, baseQuery, dateJoinConditionSql, baseQueryAlias) {
const forSelect = this.overTimeSeriesForSelect(cumulativeMeasures);
return `SELECT ${forSelect} FROM ${dateSeriesSql}` +
` LEFT JOIN (${baseQuery}) ${this.asSyntaxJoin} ${baseQueryAlias} ON ${dateJoinConditionSql}` +
this.groupByClause();
}

overTimeSeriesForSelect(cumulativeMeasures) {
return this.dimensions.map(s => s.cumulativeSelectColumns()).concat(this.dateSeriesSelect()).concat(
cumulativeMeasures.map(s => s.cumulativeSelectColumns()),
).filter(c => !!c)
.join(', ');
}

dateSeriesSelect() {
return this.timeDimensions.map(d => d.dateSeriesSelectColumn());
}

/**
* @param {import('./BaseTimeDimension').BaseTimeDimension} timeDimension
* @return {string}
Expand Down Expand Up @@ -1646,7 +1690,8 @@ export class BaseQuery {

/**
*
* @param {{sql: string, on: {cubeName: string, expression: Function}, joinType: 'LEFT' | 'INNER', alias: string}} customJoin
* @param {{sql: string, on: {cubeName: string, expression: Function}, joinType: 'LEFT' | 'INNER', alias: string}}
* customJoin
* @returns {JoinItem}
*/
customSubQueryJoin(customJoin) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,18 @@ export class BaseTimeDimension extends BaseFilter {
return this.query.escapeColumnName(`${this.dimension}_series`);
}

public dateSeriesSelectColumn(dateSeriesAliasName) {
public dateSeriesSelectColumn(dateSeriesAliasName: string, dateSeriesGranularity: string) {
if (!this.granularityObj) {
return null;
}

// In case of query with more than one granularity, the time series table was generated
// with the minimal granularity among all. If this is our granularity, we can save
// some cpu cycles without 'date_from' truncation. But if this is not our granularity,
// we need to truncate it to desired.
if (dateSeriesGranularity && this.granularityObj?.granularity !== dateSeriesGranularity) {
return `${this.query.dimensionTimeGroupedColumn(`${dateSeriesAliasName || this.dateSeriesAliasName()}.${this.query.escapeColumnName('date_from')}`, this.granularityObj)} ${this.aliasName()}`;
}
return `${dateSeriesAliasName || this.dateSeriesAliasName()}.${this.query.escapeColumnName('date_from')} ${this.aliasName()}`;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,19 @@ describe('SQL Generation', () => {
offset: 'start'
}
},
countRollingUnbounded: {
type: 'count',
rollingWindow: {
trailing: 'unbounded'
}
},
countRollingWeekToDate: {
type: 'count',
rollingWindow: {
type: 'to_date',
granularity: 'week'
}
},
revenue_qtd: {
type: 'sum',
sql: 'amount',
Expand Down Expand Up @@ -220,7 +233,14 @@ describe('SQL Generation', () => {
},
created_at: {
type: 'time',
sql: 'created_at'
sql: 'created_at',
granularities: {
three_days: {
interval: '3 days',
title: '3 days',
origin: '2017-01-01'
}
}
},
updated_at: {
type: 'time',
Expand Down Expand Up @@ -778,6 +798,163 @@ describe('SQL Generation', () => {
}
]));

it('rolling window with two time dimension granularities', async () => runQueryTest({
measures: [
'visitors.countRollingWeekToDate'
],
timeDimensions: [
{
dimension: 'visitors.created_at',
granularity: 'three_days',
dateRange: ['2017-01-01', '2017-01-10']
},
{
dimension: 'visitors.created_at',
granularity: 'day',
dateRange: ['2017-01-01', '2017-01-10']
}
],
order: [{
id: 'visitors.created_at'
}],
timezone: 'America/Los_Angeles'
}, [
{
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-01T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '1',
visitors__created_at_day: '2017-01-02T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '1',
visitors__created_at_day: '2017-01-03T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '2',
visitors__created_at_day: '2017-01-04T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '3',
visitors__created_at_day: '2017-01-05T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-06T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-07T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-08T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-09T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-10T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-10T00:00:00.000Z',
}
]));

it('two rolling windows with two time dimension granularities', async () => runQueryTest({
measures: [
'visitors.countRollingUnbounded',
'visitors.countRollingWeekToDate'
],
timeDimensions: [
{
dimension: 'visitors.created_at',
granularity: 'three_days',
dateRange: ['2017-01-01', '2017-01-10']
},
{
dimension: 'visitors.created_at',
granularity: 'day',
dateRange: ['2017-01-01', '2017-01-10']
}
],
order: [{
id: 'visitors.created_at'
}],
timezone: 'America/Los_Angeles'
}, [
{
visitors__count_rolling_unbounded: '1',
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-01T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '2',
visitors__count_rolling_week_to_date: '1',
visitors__created_at_day: '2017-01-03T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '2',
visitors__count_rolling_week_to_date: '1',
visitors__created_at_day: '2017-01-02T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-01T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '3',
visitors__count_rolling_week_to_date: '2',
visitors__created_at_day: '2017-01-04T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '4',
visitors__count_rolling_week_to_date: '3',
visitors__created_at_day: '2017-01-05T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-06T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-04T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-08T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: '5',
visitors__created_at_day: '2017-01-07T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-09T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-07T00:00:00.000Z',
},
{
visitors__count_rolling_unbounded: '6',
visitors__count_rolling_week_to_date: null,
visitors__created_at_day: '2017-01-10T00:00:00.000Z',
visitors__created_at_three_days: '2017-01-10T00:00:00.000Z',
}
]));

it('rolling month', async () => runQueryTest({
measures: [
'visitors.revenueRollingThreeDay'
Expand Down
5 changes: 4 additions & 1 deletion rust/cubesql/cubesql/src/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2219,7 +2219,10 @@ limit
V1LoadRequestQueryTimeDimension {
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
granularity: Some("month".to_string()),
date_range: None,
date_range: Some(json!(vec![
"2023-07-08T00:00:00.000Z".to_string(),
"2023-10-07T23:59:59.999Z".to_string()
])),
}
]),
order: Some(vec![]),
Expand Down
10 changes: 6 additions & 4 deletions rust/cubesql/cubesql/src/compile/rewrite/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1825,16 +1825,18 @@ impl LanguageToLogicalPlanConverter {
let values =
match_data_node!(node_by_id, params[2], FilterMemberValues);
if !is_in_or && op == "inDateRange" {
let existing_time_dimension =
query_time_dimensions.iter_mut().find_map(|td| {
let existing_time_dimensions: Vec<_> = query_time_dimensions
.iter_mut()
.filter_map(|td| {
if td.dimension == member && td.date_range.is_none() {
td.date_range = Some(json!(values));
Some(td)
} else {
None
}
});
if existing_time_dimension.is_none() {
})
.collect();
if existing_time_dimensions.len() == 0 {
let dimension = V1LoadRequestQueryTimeDimension {
dimension: member.to_string(),
granularity: None,
Expand Down
Loading